mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-09 16:03:31 +00:00
fix(ingest/transformer): avoid duplicating terms (#10348)
This commit is contained in:
parent
08731055ba
commit
3668a56df7
@ -71,6 +71,15 @@ class AddDatasetSchemaTerms(DatasetSchemaMetadataTransformer):
|
|||||||
if len(terms_to_add) == 0:
|
if len(terms_to_add) == 0:
|
||||||
terms_to_add = all_terms
|
terms_to_add = all_terms
|
||||||
|
|
||||||
|
new_glossary_terms = []
|
||||||
|
new_glossary_terms.extend(server_terms)
|
||||||
|
new_glossary_terms.extend(terms_to_add)
|
||||||
|
|
||||||
|
unique_gloseary_terms = []
|
||||||
|
for term in new_glossary_terms:
|
||||||
|
if term not in unique_gloseary_terms:
|
||||||
|
unique_gloseary_terms.append(term)
|
||||||
|
|
||||||
new_glossary_term = GlossaryTermsClass(
|
new_glossary_term = GlossaryTermsClass(
|
||||||
terms=[],
|
terms=[],
|
||||||
auditStamp=schema_field.glossaryTerms.auditStamp
|
auditStamp=schema_field.glossaryTerms.auditStamp
|
||||||
@ -79,11 +88,9 @@ class AddDatasetSchemaTerms(DatasetSchemaMetadataTransformer):
|
|||||||
time=builder.get_sys_time(), actor="urn:li:corpUser:restEmitter"
|
time=builder.get_sys_time(), actor="urn:li:corpUser:restEmitter"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
new_glossary_term.terms.extend(terms_to_add)
|
new_glossary_term.terms.extend(unique_gloseary_terms)
|
||||||
new_glossary_term.terms.extend(server_terms)
|
|
||||||
|
|
||||||
schema_field.glossaryTerms = new_glossary_term
|
schema_field.glossaryTerms = new_glossary_term
|
||||||
|
|
||||||
return schema_field
|
return schema_field
|
||||||
|
|
||||||
def transform_aspect(
|
def transform_aspect(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user