From 3668a56df7b0a5fbba18f1dc7647b3b0eb3ef1a7 Mon Sep 17 00:00:00 2001 From: dushayntAW <158567391+dushayntAW@users.noreply.github.com> Date: Mon, 22 Apr 2024 20:15:58 +0530 Subject: [PATCH] fix(ingest/transformer): avoid duplicating terms (#10348) --- .../transformer/add_dataset_schema_terms.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py index a7502eb1c9..047252a5ee 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py @@ -71,6 +71,15 @@ class AddDatasetSchemaTerms(DatasetSchemaMetadataTransformer): if len(terms_to_add) == 0: terms_to_add = all_terms + new_glossary_terms = [] + new_glossary_terms.extend(server_terms) + new_glossary_terms.extend(terms_to_add) + + unique_gloseary_terms = [] + for term in new_glossary_terms: + if term not in unique_gloseary_terms: + unique_gloseary_terms.append(term) + new_glossary_term = GlossaryTermsClass( terms=[], auditStamp=schema_field.glossaryTerms.auditStamp @@ -79,11 +88,9 @@ class AddDatasetSchemaTerms(DatasetSchemaMetadataTransformer): time=builder.get_sys_time(), actor="urn:li:corpUser:restEmitter" ), ) - new_glossary_term.terms.extend(terms_to_add) - new_glossary_term.terms.extend(server_terms) + new_glossary_term.terms.extend(unique_gloseary_terms) schema_field.glossaryTerms = new_glossary_term - return schema_field def transform_aspect(