mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-03 04:10:43 +00:00
fix(ingestion/looker): deduplicate the view field (#10482)
Co-authored-by: Pedro Silva <pedro@acryl.io>
This commit is contained in:
parent
8e5f17b131
commit
c55c12c918
@ -112,6 +112,34 @@ _VIEW_FILE_EXTENSION = ".view.lkml"
|
||||
_MODEL_FILE_EXTENSION = ".model.lkml"
|
||||
|
||||
|
||||
def deduplicate_fields(fields: List[ViewField]) -> List[ViewField]:
|
||||
# Remove duplicates filed from self.fields
|
||||
# Logic is: If more than a field has same ViewField.name then keep only one filed where ViewField.field_type
|
||||
# is DIMENSION_GROUP.
|
||||
# Looker Constraint:
|
||||
# - Any field declared as dimension or measure can be redefined as dimension_group.
|
||||
# - Any field declared in dimension can't be redefined in measure and vice-versa.
|
||||
|
||||
dimension_group_field_names: List[str] = [
|
||||
field.name
|
||||
for field in fields
|
||||
if field.field_type == ViewFieldType.DIMENSION_GROUP
|
||||
]
|
||||
|
||||
new_fields: List[ViewField] = []
|
||||
|
||||
for field in fields:
|
||||
if (
|
||||
field.name in dimension_group_field_names
|
||||
and field.field_type != ViewFieldType.DIMENSION_GROUP
|
||||
):
|
||||
continue
|
||||
|
||||
new_fields.append(field)
|
||||
|
||||
return new_fields
|
||||
|
||||
|
||||
def _get_bigquery_definition(
|
||||
looker_connection: DBConnection,
|
||||
) -> Tuple[str, Optional[str], Optional[str]]:
|
||||
@ -1155,6 +1183,8 @@ class LookerView:
|
||||
)
|
||||
fields: List[ViewField] = dimensions + dimension_groups + measures
|
||||
|
||||
fields = deduplicate_fields(fields)
|
||||
|
||||
# Prep "default" values for the view, which will be overridden by the logic below.
|
||||
view_logic = looker_viewfile.raw_file_content[:max_file_snippet_length]
|
||||
sql_table_names: List[str] = []
|
||||
|
||||
@ -0,0 +1,488 @@
|
||||
[
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "containerProperties",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"customProperties": {
|
||||
"platform": "looker",
|
||||
"env": "PROD",
|
||||
"project_name": "lkml_samples"
|
||||
},
|
||||
"name": "lkml_samples"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "status",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "dataPlatformInstance",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"platform": "urn:li:dataPlatform:looker"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "subTypes",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"typeNames": [
|
||||
"LookML Project"
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "container",
|
||||
"entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "browsePathsV2",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"path": [
|
||||
{
|
||||
"id": "Folders"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "subTypes",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"typeNames": [
|
||||
"View"
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "viewProperties",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"materialized": false,
|
||||
"viewLogic": "# File was added to check duplicate field issue\n\nview: dataset_lineages {\n sql_table_name: \"PUBLIC\".\"DATASET_LINEAGES\"\n ;;\n\n dimension: createdon {\n type: date\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension_group: createdon {\n type: time\n timeframes: [\n raw,\n time,\n date,\n week,\n month,\n quarter,\n year\n ]\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension: entity {\n type: string\n sql: ${TABLE}.\"ENTITY\" ;;\n }\n\n dimension: metadata {\n type: string\n sql: ${TABLE}.\"METADATA\" ;;\n }\n\n dimension: urn {\n type: string\n sql: ${TABLE}.\"URN\" ;;\n }\n\n dimension: version {\n type: number\n sql: ${TABLE}.\"VERSION\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n",
|
||||
"viewLanguage": "lookml"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "container",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"container": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
||||
"paths": [
|
||||
"/Develop/lkml_samples/"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.common.Status": {
|
||||
"removed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
|
||||
"upstreams": [
|
||||
{
|
||||
"auditStamp": {
|
||||
"time": 1586847600000,
|
||||
"actor": "urn:li:corpuser:datahub"
|
||||
},
|
||||
"dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD)",
|
||||
"type": "VIEW"
|
||||
}
|
||||
],
|
||||
"fineGrainedLineages": [
|
||||
{
|
||||
"upstreamType": "FIELD_SET",
|
||||
"upstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),entity)"
|
||||
],
|
||||
"downstreamType": "FIELD",
|
||||
"downstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),entity)"
|
||||
],
|
||||
"confidenceScore": 1.0
|
||||
},
|
||||
{
|
||||
"upstreamType": "FIELD_SET",
|
||||
"upstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),metadata)"
|
||||
],
|
||||
"downstreamType": "FIELD",
|
||||
"downstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),metadata)"
|
||||
],
|
||||
"confidenceScore": 1.0
|
||||
},
|
||||
{
|
||||
"upstreamType": "FIELD_SET",
|
||||
"upstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),urn)"
|
||||
],
|
||||
"downstreamType": "FIELD",
|
||||
"downstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),urn)"
|
||||
],
|
||||
"confidenceScore": 1.0
|
||||
},
|
||||
{
|
||||
"upstreamType": "FIELD_SET",
|
||||
"upstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),version)"
|
||||
],
|
||||
"downstreamType": "FIELD",
|
||||
"downstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),version)"
|
||||
],
|
||||
"confidenceScore": 1.0
|
||||
},
|
||||
{
|
||||
"upstreamType": "FIELD_SET",
|
||||
"upstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),createdon)"
|
||||
],
|
||||
"downstreamType": "FIELD",
|
||||
"downstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),createdon)"
|
||||
],
|
||||
"confidenceScore": 1.0
|
||||
},
|
||||
{
|
||||
"upstreamType": "FIELD_SET",
|
||||
"upstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),count)"
|
||||
],
|
||||
"downstreamType": "FIELD",
|
||||
"downstreams": [
|
||||
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),count)"
|
||||
],
|
||||
"confidenceScore": 1.0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
|
||||
"schemaName": "dataset_lineages",
|
||||
"platform": "urn:li:dataPlatform:looker",
|
||||
"version": 0,
|
||||
"created": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"lastModified": {
|
||||
"time": 0,
|
||||
"actor": "urn:li:corpuser:unknown"
|
||||
},
|
||||
"hash": "",
|
||||
"platformSchema": {
|
||||
"com.linkedin.pegasus2avro.schema.OtherSchema": {
|
||||
"rawSchema": ""
|
||||
}
|
||||
},
|
||||
"fields": [
|
||||
{
|
||||
"fieldPath": "entity",
|
||||
"nullable": false,
|
||||
"description": "",
|
||||
"label": "",
|
||||
"type": {
|
||||
"type": {
|
||||
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||
}
|
||||
},
|
||||
"nativeDataType": "string",
|
||||
"recursive": false,
|
||||
"globalTags": {
|
||||
"tags": [
|
||||
{
|
||||
"tag": "urn:li:tag:Dimension"
|
||||
}
|
||||
]
|
||||
},
|
||||
"isPartOfKey": false
|
||||
},
|
||||
{
|
||||
"fieldPath": "metadata",
|
||||
"nullable": false,
|
||||
"description": "",
|
||||
"label": "",
|
||||
"type": {
|
||||
"type": {
|
||||
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||
}
|
||||
},
|
||||
"nativeDataType": "string",
|
||||
"recursive": false,
|
||||
"globalTags": {
|
||||
"tags": [
|
||||
{
|
||||
"tag": "urn:li:tag:Dimension"
|
||||
}
|
||||
]
|
||||
},
|
||||
"isPartOfKey": false
|
||||
},
|
||||
{
|
||||
"fieldPath": "urn",
|
||||
"nullable": false,
|
||||
"description": "",
|
||||
"label": "",
|
||||
"type": {
|
||||
"type": {
|
||||
"com.linkedin.pegasus2avro.schema.StringType": {}
|
||||
}
|
||||
},
|
||||
"nativeDataType": "string",
|
||||
"recursive": false,
|
||||
"globalTags": {
|
||||
"tags": [
|
||||
{
|
||||
"tag": "urn:li:tag:Dimension"
|
||||
}
|
||||
]
|
||||
},
|
||||
"isPartOfKey": false
|
||||
},
|
||||
{
|
||||
"fieldPath": "version",
|
||||
"nullable": false,
|
||||
"description": "",
|
||||
"label": "",
|
||||
"type": {
|
||||
"type": {
|
||||
"com.linkedin.pegasus2avro.schema.NumberType": {}
|
||||
}
|
||||
},
|
||||
"nativeDataType": "number",
|
||||
"recursive": false,
|
||||
"globalTags": {
|
||||
"tags": [
|
||||
{
|
||||
"tag": "urn:li:tag:Dimension"
|
||||
}
|
||||
]
|
||||
},
|
||||
"isPartOfKey": false
|
||||
},
|
||||
{
|
||||
"fieldPath": "createdon",
|
||||
"nullable": false,
|
||||
"description": "",
|
||||
"label": "",
|
||||
"type": {
|
||||
"type": {
|
||||
"com.linkedin.pegasus2avro.schema.TimeType": {}
|
||||
}
|
||||
},
|
||||
"nativeDataType": "time",
|
||||
"recursive": false,
|
||||
"globalTags": {
|
||||
"tags": [
|
||||
{
|
||||
"tag": "urn:li:tag:Dimension"
|
||||
},
|
||||
{
|
||||
"tag": "urn:li:tag:Temporal"
|
||||
}
|
||||
]
|
||||
},
|
||||
"isPartOfKey": false
|
||||
},
|
||||
{
|
||||
"fieldPath": "count",
|
||||
"nullable": false,
|
||||
"description": "",
|
||||
"label": "",
|
||||
"type": {
|
||||
"type": {
|
||||
"com.linkedin.pegasus2avro.schema.NumberType": {}
|
||||
}
|
||||
},
|
||||
"nativeDataType": "count",
|
||||
"recursive": false,
|
||||
"globalTags": {
|
||||
"tags": [
|
||||
{
|
||||
"tag": "urn:li:tag:Measure"
|
||||
}
|
||||
]
|
||||
},
|
||||
"isPartOfKey": false
|
||||
}
|
||||
],
|
||||
"primaryKeys": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
|
||||
"customProperties": {
|
||||
"looker.file.path": "dataset_lineages.view.lkml",
|
||||
"looker.model": "data"
|
||||
},
|
||||
"name": "dataset_lineages",
|
||||
"tags": []
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "dataset",
|
||||
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "browsePathsV2",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"path": [
|
||||
{
|
||||
"id": "Develop"
|
||||
},
|
||||
{
|
||||
"id": "urn:li:container:78f22c19304954b15e8adb1d9809975e",
|
||||
"urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "tag",
|
||||
"entityUrn": "urn:li:tag:Dimension",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "tagKey",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"name": "Dimension"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "tag",
|
||||
"entityUrn": "urn:li:tag:Measure",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "tagKey",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"name": "Measure"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
},
|
||||
{
|
||||
"entityType": "tag",
|
||||
"entityUrn": "urn:li:tag:Temporal",
|
||||
"changeType": "UPSERT",
|
||||
"aspectName": "tagKey",
|
||||
"aspect": {
|
||||
"json": {
|
||||
"name": "Temporal"
|
||||
}
|
||||
},
|
||||
"systemMetadata": {
|
||||
"lastObserved": 1586847600000,
|
||||
"runId": "lookml-test",
|
||||
"lastRunId": "no-run-id-provided"
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -0,0 +1,7 @@
|
||||
connection: "my_connection"
|
||||
|
||||
include: "dataset_lineages.view.lkml"
|
||||
|
||||
explore: explore_dataset_lineage {
|
||||
from: dataset_lineages
|
||||
}
|
||||
@ -0,0 +1,50 @@
|
||||
# File was added to check duplicate field issue
|
||||
|
||||
view: dataset_lineages {
|
||||
sql_table_name: "PUBLIC"."DATASET_LINEAGES"
|
||||
;;
|
||||
|
||||
dimension: createdon {
|
||||
type: date
|
||||
sql: ${TABLE}."CREATEDON" ;;
|
||||
}
|
||||
|
||||
dimension_group: createdon {
|
||||
type: time
|
||||
timeframes: [
|
||||
raw,
|
||||
time,
|
||||
date,
|
||||
week,
|
||||
month,
|
||||
quarter,
|
||||
year
|
||||
]
|
||||
sql: ${TABLE}."CREATEDON" ;;
|
||||
}
|
||||
|
||||
dimension: entity {
|
||||
type: string
|
||||
sql: ${TABLE}."ENTITY" ;;
|
||||
}
|
||||
|
||||
dimension: metadata {
|
||||
type: string
|
||||
sql: ${TABLE}."METADATA" ;;
|
||||
}
|
||||
|
||||
dimension: urn {
|
||||
type: string
|
||||
sql: ${TABLE}."URN" ;;
|
||||
}
|
||||
|
||||
dimension: version {
|
||||
type: number
|
||||
sql: ${TABLE}."VERSION" ;;
|
||||
}
|
||||
|
||||
measure: count {
|
||||
type: count
|
||||
drill_fields: []
|
||||
}
|
||||
}
|
||||
@ -864,3 +864,26 @@ def test_manifest_parser(pytestconfig: pytest.Config) -> None:
|
||||
|
||||
manifest = load_lkml(manifest_file)
|
||||
assert manifest
|
||||
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
def test_duplicate_field_ingest(pytestconfig, tmp_path, mock_time):
|
||||
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
|
||||
mce_out_file = "duplicate_ingest_mces_output.json"
|
||||
|
||||
new_recipe = get_default_recipe(
|
||||
f"{tmp_path}/{mce_out_file}",
|
||||
f"{test_resources_dir}/lkml_samples_duplicate_field",
|
||||
)
|
||||
|
||||
pipeline = Pipeline.create(new_recipe)
|
||||
pipeline.run()
|
||||
pipeline.pretty_print_summary()
|
||||
pipeline.raise_from_status(raise_warnings=True)
|
||||
|
||||
golden_path = test_resources_dir / "duplicate_field_ingestion_golden.json"
|
||||
mce_helpers.check_golden_file(
|
||||
pytestconfig,
|
||||
output_path=tmp_path / mce_out_file,
|
||||
golden_path=golden_path,
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user