fix(ingestion/lookml): drop hive. from CLL (#11210)

This commit is contained in:
sid-acryl 2024-08-21 14:56:37 +05:30 committed by GitHub
parent be7b3439fd
commit 6b3c06ae8c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 417 additions and 1 deletions

View File

@ -385,7 +385,7 @@ class SqlBasedDerivedViewUpstream(AbstractViewUpstream, ABC):
config=self.config,
)
return upstreams_column_refs
return _drop_hive_dot_from_upstream(upstreams_column_refs)
def get_upstream_dataset_urn(self) -> List[Urn]:
return self._get_upstream_dataset_urn()

View File

@ -0,0 +1,6 @@
connection: "my_connection"
include: "top_10_employee_income_source.view.lkml"
explore: top_10_employee_income_source {
}

View File

@ -0,0 +1,26 @@
view: top_10_employee_income_source {
derived_table: {
sql: SELECT id,
name,
source
FROM hive.employee_db.income_source
ORDER BY source desc
LIMIT 10
;;
}
dimension: id {
type: number
sql: ${TABLE}.id ;;
}
dimension: name {
type: string
sql: ${TABLE}.name ;;
}
dimension: source {
type: string
sql: ${TABLE}.source ;;
}
}

View File

@ -0,0 +1,357 @@
[
{
"entityType": "container",
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
"changeType": "UPSERT",
"aspectName": "containerProperties",
"aspect": {
"json": {
"customProperties": {
"platform": "looker",
"env": "PROD",
"project_name": "lkml_samples"
},
"name": "lkml_samples"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
"json": {
"platform": "urn:li:dataPlatform:looker"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"LookML Project"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "container",
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
"id": "Folders"
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
"View"
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
"changeType": "UPSERT",
"aspectName": "viewProperties",
"aspect": {
"json": {
"materialized": false,
"viewLogic": "SELECT id,\n name,\n source\n FROM hive.employee_db.income_source\n ORDER BY source desc\n LIMIT 10",
"viewLanguage": "sql"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
"container": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/Develop/lkml_samples/"
]
}
},
{
"com.linkedin.pegasus2avro.common.Status": {
"removed": false
}
},
{
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
"upstreams": [
{
"auditStamp": {
"time": 1586847600000,
"actor": "urn:li:corpuser:datahub"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,employee_db.income_source,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,employee_db.income_source,PROD),id)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),id)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,employee_db.income_source,PROD),name)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),name)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,employee_db.income_source,PROD),source)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD),source)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "top_10_employee_income_source",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "id",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "name",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "source",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
}
],
"primaryKeys": []
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
"looker.file.path": "top_10_employee_income_source.view.lkml",
"looker.model": "data"
},
"name": "top_10_employee_income_source",
"tags": []
}
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.top_10_employee_income_source,PROD)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
"id": "Develop"
},
{
"id": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
"urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
}
]
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "tag",
"entityUrn": "urn:li:tag:Dimension",
"changeType": "UPSERT",
"aspectName": "tagKey",
"aspect": {
"json": {
"name": "Dimension"
}
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "lookml-test",
"lastRunId": "no-run-id-provided"
}
}
]

View File

@ -1032,3 +1032,30 @@ def test_field_tag_ingest(pytestconfig, tmp_path, mock_time):
output_path=tmp_path / mce_out_file,
golden_path=golden_path,
)
@freeze_time(FROZEN_TIME)
def test_drop_hive(pytestconfig, tmp_path, mock_time):
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
mce_out_file = "drop_hive_dot.json"
new_recipe = get_default_recipe(
f"{tmp_path}/{mce_out_file}",
f"{test_resources_dir}/drop_hive_dot",
)
new_recipe["source"]["config"]["connection_to_platform_map"] = {
"my_connection": "hive"
}
pipeline = Pipeline.create(new_recipe)
pipeline.run()
pipeline.pretty_print_summary()
pipeline.raise_from_status(raise_warnings=True)
golden_path = test_resources_dir / "drop_hive_dot_golden.json"
mce_helpers.check_golden_file(
pytestconfig,
output_path=tmp_path / mce_out_file,
golden_path=golden_path,
)