datahub/metadata-ingestion/tests/unit/test_bigquery_lineage.py

88 lines
3.1 KiB
Python

import datetime
from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config
from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report
from datahub.ingestion.source.bigquery_v2.bigquery_schema import BigqueryView
from datahub.ingestion.source.bigquery_v2.lineage import BigqueryLineageExtractor
def test_parse_view_lineage():
config = BigQueryV2Config()
report = BigQueryV2Report()
extractor = BigqueryLineageExtractor(config, report)
# ddl = "select * from some_dataset.sometable as a"
ddl = """CREATE VIEW `my-project.my-dataset.test_table`
AS SELECT
* REPLACE(
myrandom(something) AS something)
FROM
`my-project2.my-dataset2.test_physical_table`;
"""
view = BigqueryView(
name="test",
created=datetime.datetime.now(),
last_altered=datetime.datetime.now(),
comment="",
view_definition=ddl,
)
tables = extractor.parse_view_lineage("my_project", "my_dataset", view)
assert 1 == len(tables)
assert "my-project2.my-dataset2.test_physical_table" == tables[0].get_table_name()
def test_parse_view_lineage_with_two_part_table_name():
config = BigQueryV2Config()
report = BigQueryV2Report()
extractor = BigqueryLineageExtractor(config, report)
ddl = "CREATE VIEW my_view as select * from some_dataset.sometable as a"
view = BigqueryView(
name="test",
created=datetime.datetime.now(),
last_altered=datetime.datetime.now(),
comment="",
view_definition=ddl,
)
tables = extractor.parse_view_lineage("my_project", "my_dataset", view)
assert 1 == len(tables)
assert "my_project.some_dataset.sometable" == tables[0].get_table_name()
def test_one_part_table():
config = BigQueryV2Config()
report = BigQueryV2Report()
extractor = BigqueryLineageExtractor(config, report)
ddl = "CREATE VIEW my_view as select * from sometable as a"
view = BigqueryView(
name="test",
created=datetime.datetime.now(),
last_altered=datetime.datetime.now(),
comment="",
view_definition=ddl,
)
tables = extractor.parse_view_lineage("my_project", "my_dataset", view)
assert 1 == len(tables)
assert "my_project.my_dataset.sometable" == tables[0].get_table_name()
def test_create_statement_with_multiple_table():
config = BigQueryV2Config()
report = BigQueryV2Report()
extractor = BigqueryLineageExtractor(config, report)
ddl = "CREATE VIEW my_view as select * from my_project_2.my_dataset_2.sometable union select * from my_project_2.my_dataset_2.sometable2 as a"
view = BigqueryView(
name="test",
created=datetime.datetime.now(),
last_altered=datetime.datetime.now(),
comment="",
view_definition=ddl,
)
tables = extractor.parse_view_lineage("my_project", "my_dataset", view)
tables.sort(key=lambda e: e.get_table_name())
assert 2 == len(tables)
assert "my_project_2.my_dataset_2.sometable" == tables[0].get_table_name()
assert "my_project_2.my_dataset_2.sometable2" == tables[1].get_table_name()