From 3e4749624f34b177fee95feb1e587d687fe39a56 Mon Sep 17 00:00:00 2001 From: sid-acryl <155424659+sid-acryl@users.noreply.github.com> Date: Mon, 1 Jul 2024 23:45:51 +0530 Subject: [PATCH] feat(ingest/lookml): ingest field tags (#10792) Co-authored-by: Harshal Sheth --- .../ingestion/source/looker/looker_common.py | 26 +- .../ingestion/source/looker/lookml_source.py | 1 + .../looker/golden_looker_mces.json | 12 - .../looker/golden_test_allow_ingest.json | 6 - ...olden_test_external_project_view_mces.json | 6 - .../looker/golden_test_file_path_ingest.json | 6 - .../golden_test_independent_look_ingest.json | 6 - .../looker/golden_test_ingest.json | 6 - .../looker/golden_test_ingest_joins.json | 6 - .../golden_test_ingest_unaliased_joins.json | 6 - .../looker_mces_golden_deleted_stateful.json | 74 ++- .../looker/looker_mces_usage_history.json | 6 - .../duplicate_field_ingestion_golden.json | 21 +- .../integration/lookml/expected_output.json | 9 - .../lookml/field_tag_ingestion_golden.json | 567 ++++++++++++++++++ .../dataset_lineages.view.lkml | 1 + .../lookml/lookml_mces_api_bigquery.json | 9 - .../lookml/lookml_mces_api_hive2.json | 9 - .../lookml/lookml_mces_offline.json | 9 - ...lookml_mces_offline_platform_instance.json | 9 - .../lookml_mces_with_external_urls.json | 9 - .../lookml/refinements_ingestion_golden.json | 9 - .../tests/integration/lookml/test_lookml.py | 26 + 23 files changed, 667 insertions(+), 172 deletions(-) create mode 100644 metadata-ingestion/tests/integration/lookml/field_tag_ingestion_golden.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index b53fe27745..44b6fcdf6d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -244,6 +244,7 @@ class ViewField: view_name: Optional[str] = None is_primary_key: bool = False upstream_fields: List[str] = dataclasses_field(default_factory=list) + tags: List[str] = dataclasses_field(default_factory=list) @dataclass @@ -561,21 +562,30 @@ class LookerUtil: @staticmethod def _get_tags_from_field_type( - field_type: ViewFieldType, reporter: SourceReport + field: ViewField, reporter: SourceReport ) -> Optional[GlobalTagsClass]: - if field_type in LookerUtil.type_to_tag_map: - return GlobalTagsClass( - tags=[ + schema_field_tags: List[TagAssociationClass] = [ + TagAssociationClass(tag=builder.make_tag_urn(tag_name)) + for tag_name in field.tags + ] + + if field.field_type in LookerUtil.type_to_tag_map: + schema_field_tags.extend( + [ TagAssociationClass(tag=tag_name) - for tag_name in LookerUtil.type_to_tag_map[field_type] + for tag_name in LookerUtil.type_to_tag_map[field.field_type] ] ) else: reporter.report_warning( "lookml", - f"Failed to map view field type {field_type}. Won't emit tags for it", + f"Failed to map view field type {field.field_type}. Won't emit tags for measure and dimension", ) - return None + + if schema_field_tags: + return GlobalTagsClass(tags=schema_field_tags) + + return None @staticmethod def get_tag_mces() -> Iterable[MetadataChangeEvent]: @@ -602,7 +612,7 @@ class LookerUtil: else f"{field.field_type.value}. {field.description}" ), globalTags=( - LookerUtil._get_tags_from_field_type(field.field_type, reporter) + LookerUtil._get_tags_from_field_type(field, reporter) if tag_measures_and_dimensions is True else None ), diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 4a872f8b1a..0c9b3ae869 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -1090,6 +1090,7 @@ class LookerView: is_primary_key=is_primary_key, field_type=type_cls, upstream_fields=upstream_fields, + tags=field_dict.get("tags") or [], ) fields.append(field) return fields diff --git a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json index 9ec1a69c6f..1ce1b4b475 100644 --- a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json +++ b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json @@ -78,9 +78,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -140,9 +137,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } } @@ -178,9 +172,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -222,9 +213,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } } diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json index 875d5a7356..87430ef106 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json @@ -76,9 +76,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -137,9 +134,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json index 3ba4f1fbb5..3f11798d0a 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json @@ -312,9 +312,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -373,9 +370,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json index be49879f4e..ec2c46c5da 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json @@ -312,9 +312,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -373,9 +370,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json index 465244c492..bb3c3ccb4e 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json @@ -324,9 +324,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -386,9 +383,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index ab1c671960..a7b8abed02 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -312,9 +312,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -373,9 +370,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json index 0f55e26a2b..a1a7747c74 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json @@ -312,9 +312,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -373,9 +370,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json index fcad66ce61..4d8e2e79ea 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json @@ -76,9 +76,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -137,9 +134,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json index 15f1758aa4..e3cbf43059 100644 --- a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json +++ b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json @@ -324,9 +324,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -386,9 +383,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } }, @@ -812,23 +806,6 @@ "pipelineName": "stateful-looker-pipeline" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:621eb6e00da9abece0f64522f81be0e7", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "looker-test", - "lastRunId": "no-run-id-provided", - "pipelineName": "stateful-looker-pipeline" - } -}, { "entityType": "chart", "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)", @@ -846,23 +823,6 @@ "pipelineName": "stateful-looker-pipeline" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "looker-test", - "lastRunId": "no-run-id-provided", - "pipelineName": "stateful-looker-pipeline" - } -}, { "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(looker,dashboards.11)", @@ -879,5 +839,39 @@ "lastRunId": "no-run-id-provided", "pipelineName": "stateful-looker-pipeline" } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:621eb6e00da9abece0f64522f81be0e7", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": true + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "stateful-looker-pipeline" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": true + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "stateful-looker-pipeline" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json index e94089d9b4..f3da2dbfac 100644 --- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json +++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json @@ -76,9 +76,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } } @@ -113,9 +110,6 @@ }, "nativeDataType": "string", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } } diff --git a/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json index b06b59ba43..149610768a 100644 --- a/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json +++ b/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json @@ -116,7 +116,7 @@ "aspect": { "json": { "materialized": false, - "viewLogic": "# File was added to check duplicate field issue\n\nview: dataset_lineages {\n sql_table_name: \"PUBLIC\".\"DATASET_LINEAGES\"\n ;;\n\n dimension: createdon {\n type: date\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension_group: createdon {\n type: time\n timeframes: [\n raw,\n time,\n date,\n week,\n month,\n quarter,\n year\n ]\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension: entity {\n type: string\n sql: ${TABLE}.\"ENTITY\" ;;\n }\n\n dimension: metadata {\n type: string\n sql: ${TABLE}.\"METADATA\" ;;\n }\n\n dimension: urn {\n type: string\n sql: ${TABLE}.\"URN\" ;;\n }\n\n dimension: version {\n type: number\n sql: ${TABLE}.\"VERSION\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLogic": "# File was added to check duplicate field issue\n\nview: dataset_lineages {\n sql_table_name: \"PUBLIC\".\"DATASET_LINEAGES\"\n ;;\n\n dimension: createdon {\n type: date\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension_group: createdon {\n type: time\n timeframes: [\n raw,\n time,\n date,\n week,\n month,\n quarter,\n year\n ]\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension: entity {\n type: string\n sql: ${TABLE}.\"ENTITY\" ;;\n }\n\n dimension: metadata {\n type: string\n sql: ${TABLE}.\"METADATA\" ;;\n }\n\n dimension: urn {\n type: string\n sql: ${TABLE}.\"URN\" ;;\n }\n\n dimension: version {\n type: number\n tags: [\"softVersion\"]\n sql: ${TABLE}.\"VERSION\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", "viewLanguage": "lookml" } }, @@ -338,6 +338,9 @@ "recursive": false, "globalTags": { "tags": [ + { + "tag": "urn:li:tag:softVersion" + }, { "tag": "urn:li:tag:Dimension" } @@ -484,5 +487,21 @@ "runId": "lookml-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:softVersion", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "softVersion" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/expected_output.json b/metadata-ingestion/tests/integration/lookml/expected_output.json index 931830eecf..1a789af60a 100644 --- a/metadata-ingestion/tests/integration/lookml/expected_output.json +++ b/metadata-ingestion/tests/integration/lookml/expected_output.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/field_tag_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/field_tag_ingestion_golden.json new file mode 100644 index 0000000000..fdd3713988 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/field_tag_ingestion_golden.json @@ -0,0 +1,567 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "project_name": "lkml_samples" + }, + "name": "lkml_samples" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "# File was added to check duplicate field issue\n\nview: dataset_lineages {\n sql_table_name: \"PUBLIC\".\"DATASET_LINEAGES\"\n ;;\n\n dimension: createdon {\n type: date\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension_group: createdon {\n type: time\n timeframes: [\n raw,\n time,\n date,\n week,\n month,\n quarter,\n year\n ]\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension: entity {\n type: string\n sql: ${TABLE}.\"ENTITY\" ;;\n }\n\n dimension: metadata {\n type: string\n sql: ${TABLE}.\"METADATA\" ;;\n }\n\n dimension: urn {\n type: string\n sql: ${TABLE}.\"URN\" ;;\n }\n\n dimension: version {\n type: number\n tags: [\"softVersion\"]\n sql: ${TABLE}.\"VERSION\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),entity)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),entity)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),metadata)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),metadata)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),urn)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),urn)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),version)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),version)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),createdon)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),createdon)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),count)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "dataset_lineages", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "entity", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "metadata", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "urn", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "version", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:softVersion" + }, + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "createdon", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "time", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + }, + { + "tag": "urn:li:tag:Temporal" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "count", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "count", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Measure" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "dataset_lineages.view.lkml", + "looker.model": "data" + }, + "name": "dataset_lineages", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Dimension", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Dimension", + "description": "A tag that is applied to all dimension fields." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Temporal", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Temporal", + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Measure", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Measure", + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Dimension", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Measure", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Temporal", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:softVersion", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "softVersion" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml b/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml index 6062993f32..c29c477198 100644 --- a/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml +++ b/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml @@ -40,6 +40,7 @@ view: dataset_lineages { dimension: version { type: number + tags: ["softVersion"] sql: ${TABLE}."VERSION" ;; } diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json index a846e2ca84..05c950f9e1 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json index 47d536fb82..23384d6070 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json index caefb7b9bc..579a984b88 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json index c0cec6c261..d1487a62e9 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json @@ -1388,9 +1388,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1405,9 +1402,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1422,9 +1416,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json index 31aec97293..d7793fbed8 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json @@ -1395,9 +1395,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1412,9 +1409,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1429,9 +1423,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json index 16e901125e..25d6511d17 100644 --- a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json +++ b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json @@ -1409,9 +1409,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1426,9 +1423,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false }, { @@ -1443,9 +1437,6 @@ }, "nativeDataType": "unknown", "recursive": false, - "globalTags": { - "tags": [] - }, "isPartOfKey": false } ], diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index 1c1f0fec3e..1099a29ba3 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -868,6 +868,7 @@ def test_manifest_parser(pytestconfig: pytest.Config) -> None: @freeze_time(FROZEN_TIME) def test_duplicate_field_ingest(pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" mce_out_file = "duplicate_ingest_mces_output.json" @@ -887,3 +888,28 @@ def test_duplicate_field_ingest(pytestconfig, tmp_path, mock_time): output_path=tmp_path / mce_out_file, golden_path=golden_path, ) + + +@freeze_time(FROZEN_TIME) +def test_field_tag_ingest(pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" + mce_out_file = "field_tag_mces_output.json" + + new_recipe = get_default_recipe( + f"{tmp_path}/{mce_out_file}", + f"{test_resources_dir}/lkml_samples_duplicate_field", + ) + + new_recipe["source"]["config"]["tag_measures_and_dimensions"] = True + + pipeline = Pipeline.create(new_recipe) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status(raise_warnings=True) + + golden_path = test_resources_dir / "field_tag_ingestion_golden.json" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / mce_out_file, + golden_path=golden_path, + )