refactor(demo): Add empty global tags to BigQuery demo data (#2258)

This commit is contained in:
Dexter Lee 2021-03-18 16:03:05 -07:00 committed by GitHub
parent 7c5d8cb719
commit 61c37aa04d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11549 additions and 289 deletions

File diff suppressed because it is too large Load Diff

View File

@ -23,6 +23,8 @@ from datahub.metadata.schema_classes import (
UpstreamLineageClass,
UpstreamClass,
DatasetLineageTypeClass,
GlobalTagsClass,
EditableSchemaMetadataClass,
)
DEMO_DATA_DIR = pathlib.Path("./examples/demo_data")
@ -143,6 +145,30 @@ def create_lineage_aspect_mce(directive: Directive) -> MetadataChangeEventClass:
)
)
def create_global_tags_aspect_mce(directive: Directive) -> MetadataChangeEventClass:
return MetadataChangeEventClass(
proposedSnapshot=DatasetSnapshotClass(
urn=dataset_name_to_urn(directive.table),
aspects=[
GlobalTagsClass(
tags=[]
)
],
)
)
def create_editable_schema_info_aspect_mce(directive: Directive) -> MetadataChangeEventClass:
return MetadataChangeEventClass(
proposedSnapshot=DatasetSnapshotClass(
urn=dataset_name_to_urn(directive.table),
aspects=[
EditableSchemaMetadataClass(
editableSchemaFieldInfo=[]
)
],
)
)
if __name__ == "__main__":
datasets = read_mces(INPUT_ALL_DATASETS)
@ -180,10 +206,24 @@ if __name__ == "__main__":
if directive.depends_on
]
global_tags_aspect_mces = [
create_global_tags_aspect_mce(directive)
for directive in directives
if not directive.drop
]
editable_schema_info_aspect_mces = [
create_editable_schema_info_aspect_mce(directive)
for directive in directives
if not directive.drop
]
enriched_mces = (
filtered_dataset_mces
+ owner_entity_mces
+ ownership_aspect_mces
+ lineage_aspect_mces
+ global_tags_aspect_mces
+ editable_schema_info_aspect_mces
)
write_mces(OUTPUT_ENRICHED, enriched_mces)

View File

@ -5,9 +5,11 @@
set -euxo pipefail
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# Fetch public COVID-19 datasets from BigQuery.
datahub ingest -c $DIR/bigquery_covid19_to_file.yml
FILE="$DIR/bigquery_covid19_to_file.yml"
if [ ! -f "$FILE" ]; then
# Fetch public COVID-19 datasets from BigQuery.
datahub ingest -c $FILE
fi
# Pull the directives CSV from Google sheets.
# See https://docs.google.com/spreadsheets/d/17c5SBiXEw5PuV7oEkC2uQnX55C6TPZTnr6XRQ6X-Qy0/edit#gid=0.