From 561c04bcf85aead93dbff3a19d23aca96ba07861 Mon Sep 17 00:00:00 2001 From: AndreasTA-AW <85551349+AndreasTA-AW@users.noreply.github.com> Date: Tue, 26 Oct 2021 07:31:19 +0200 Subject: [PATCH] feat(dbt-ingestion): add ability to skip specific models (#3340) --- metadata-ingestion/source_docs/dbt.md | 6 + .../src/datahub/ingestion/source/dbt.py | 22 +- .../tests/integration/dbt/dbt_manifest.json | 13 +- .../dbt/dbt_with_schemas_mces_golden.json | 9 +- .../dbt/dbt_without_schemas_mces_golden.json | 7 +- ...thout_schemas_with_filter_mces_golden.json | 781 ++++++++++++++++++ .../tests/integration/dbt/test_dbt.py | 13 + 7 files changed, 831 insertions(+), 20 deletions(-) create mode 100644 metadata-ingestion/tests/integration/dbt/dbt_without_schemas_with_filter_mces_golden.json diff --git a/metadata-ingestion/source_docs/dbt.md b/metadata-ingestion/source_docs/dbt.md index 620fd592f3..27478edc09 100644 --- a/metadata-ingestion/source_docs/dbt.md +++ b/metadata-ingestion/source_docs/dbt.md @@ -30,6 +30,8 @@ This plugin pulls metadata from dbt's artifact files: - Prefix added to tags during ingestion. - node_type_pattern: - Use this filter to exclude and include node types using allow or deny method +- node_name_pattern + - Use this filter to exclude and include node by names using allow or deny method ## Quickstart recipe @@ -71,6 +73,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe. | `node_type_pattern.allow` | | | List of regex patterns for dbt nodes to include in ingestion. | | `node_type_pattern.deny` | | | List of regex patterns for dbt nodes to exclude from ingestion. | | `node_type_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. | +| `node_name_pattern.allow` | | | List of regex patterns for dbt model names to include in ingestion. | +| `node_name_pattern.deny` | | | List of regex patterns for dbt model names to exclude from ingestion. | +| `node_name_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. | + ## Compatibility diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt.py b/metadata-ingestion/src/datahub/ingestion/source/dbt.py index 4763f11fb0..87992484eb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt.py @@ -59,6 +59,7 @@ class DBTConfig(ConfigModel): use_identifiers: bool = False node_type_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() tag_prefix: str = "dbt:" + node_name_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() @dataclass @@ -146,6 +147,7 @@ def extract_dbt_entities( environment: str, node_type_pattern: AllowDenyPattern, report: SourceReport, + node_name_pattern: AllowDenyPattern, ) -> List[DBTNode]: sources_by_id = {x["unique_id"]: x for x in sources_results} @@ -157,13 +159,15 @@ def extract_dbt_entities( continue name = manifest_node["name"] - if "identifier" in manifest_node and use_identifiers: name = manifest_node["identifier"] if manifest_node.get("alias") is not None: name = manifest_node["alias"] + if not node_name_pattern.allowed(key): + continue + # initialize comment to "" for consistency with descriptions # (since dbt null/undefined descriptions as "") comment = "" @@ -201,7 +205,10 @@ def extract_dbt_entities( catalog_type = all_catalog_entities[key]["metadata"]["type"] meta = manifest_node.get("meta", {}) + owner = meta.get("owner") + if owner is None: + owner = manifest_node.get("config", {}).get("meta", {}).get("owner") tags = manifest_node.get("tags", []) tags = [tag_prefix + tag for tag in tags] @@ -210,10 +217,10 @@ def extract_dbt_entities( dbt_name=key, database=manifest_node["database"], schema=manifest_node["schema"], + name=name, dbt_file_path=manifest_node["original_file_path"], node_type=manifest_node["resource_type"], max_loaded_at=sources_by_id.get(key, {}).get("max_loaded_at"), - name=name, comment=comment, description=manifest_node.get("description", ""), upstream_urns=upstream_urns, @@ -266,6 +273,7 @@ def loadManifestAndCatalog( environment: str, node_type_pattern: AllowDenyPattern, report: SourceReport, + node_name_pattern: AllowDenyPattern, ) -> Tuple[List[DBTNode], Optional[str], Optional[str], Optional[str], Optional[str]]: with open(manifest_path, "r") as manifest: dbt_manifest_json = json.load(manifest) @@ -307,16 +315,20 @@ def loadManifestAndCatalog( environment, node_type_pattern, report, + node_name_pattern, ) return nodes, manifest_schema, manifest_version, catalog_schema, catalog_version +def get_db_fqn(database: str, schema: str, name: str) -> str: + return f"{database}.{schema}.{name}".replace('"', "") + + def get_urn_from_dbtNode( database: str, schema: str, name: str, target_platform: str, env: str ) -> str: - - db_fqn = f"{database}.{schema}.{name}".replace('"', "") + db_fqn = get_db_fqn(database, schema, name) return f"urn:li:dataset:(urn:li:dataPlatform:{target_platform},{db_fqn},{env})" @@ -351,7 +363,6 @@ def get_upstreams( upstream_urns = [] for upstream in upstreams: - if "identifier" in all_nodes[upstream] and use_identifiers: name = all_nodes[upstream]["identifier"] else: @@ -517,6 +528,7 @@ class DBTSource(Source): self.config.env, self.config.node_type_pattern, self.report, + self.config.node_name_pattern, ) additional_custom_props = { diff --git a/metadata-ingestion/tests/integration/dbt/dbt_manifest.json b/metadata-ingestion/tests/integration/dbt/dbt_manifest.json index 0e1599e44e..e9c1c8f266 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_manifest.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_manifest.json @@ -2998,7 +2998,10 @@ "quoting": {}, "schema": null, "tags": [], - "vars": {} + "vars": {}, + "meta": { + "owner": "@alice2" + } }, "database": "pagila", "deferred": false, @@ -3021,9 +3024,7 @@ "transform", "customer_details" ], - "meta": { - "owner": "@alice" - }, + "meta": {}, "name": "customer_details", "original_file_path": "models/transform/customer_details.sql", "package_name": "sample_dbt", @@ -3390,8 +3391,8 @@ "loader": "", "meta": { "model_maturity": "in dev", - "owner": "@alice", - "some_other_property": "test 1" + "some_other_property": "test 1", + "owner": "@alice1" }, "name": "actor", "original_file_path": "models/base.yml", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_with_schemas_mces_golden.json index e3fc36d66f..df5c487453 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_with_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_with_schemas_mces_golden.json @@ -8,7 +8,6 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "owner": "@alice", "node_type": "model", "materialization": "ephemeral", "dbt_file_path": "models/transform/customer_details.sql", @@ -29,7 +28,7 @@ "com.linkedin.pegasus2avro.common.Ownership": { "owners": [ { - "owner": "urn:li:corpuser:@alice", + "owner": "urn:li:corpuser:@alice2", "type": "DATAOWNER", "source": null } @@ -644,8 +643,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "model_maturity": "in dev", - "owner": "@alice", "some_other_property": "test 1", + "owner": "@alice1", "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", @@ -664,7 +663,7 @@ "com.linkedin.pegasus2avro.common.Ownership": { "owners": [ { - "owner": "urn:li:corpuser:@alice", + "owner": "urn:li:corpuser:@alice1", "type": "DATAOWNER", "source": null } @@ -2614,4 +2613,4 @@ "properties": null } } -] +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_mces_golden.json index d6548fc9dc..37a39e8606 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_mces_golden.json @@ -8,7 +8,6 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "owner": "@alice", "node_type": "model", "materialization": "ephemeral", "dbt_file_path": "models/transform/customer_details.sql", @@ -29,7 +28,7 @@ "com.linkedin.pegasus2avro.common.Ownership": { "owners": [ { - "owner": "urn:li:corpuser:@alice", + "owner": "urn:li:corpuser:@alice2", "type": "DATAOWNER", "source": null } @@ -313,8 +312,8 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "model_maturity": "in dev", - "owner": "@alice", "some_other_property": "test 1", + "owner": "@alice1", "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", @@ -333,7 +332,7 @@ "com.linkedin.pegasus2avro.common.Ownership": { "owners": [ { - "owner": "urn:li:corpuser:@alice", + "owner": "urn:li:corpuser:@alice1", "type": "DATAOWNER", "source": null } diff --git a/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_with_filter_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_with_filter_mces_golden.json new file mode 100644 index 0000000000..5cdca6bc2f --- /dev/null +++ b/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_with_filter_mces_golden.json @@ -0,0 +1,781 @@ +[ +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.customer_details,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "ephemeral", + "dbt_file_path": "models/transform/customer_details.sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [ + "dbt:test_tag" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@alice2", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.common.GlobalTags": { + "tags": [ + { + "tag": "urn:li:tag:dbt:test_tag" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD)", + "type": "TRANSFORMED" + } + ] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "table", + "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.customer_details,PROD)", + "type": "TRANSFORMED" + } + ] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "view", + "dbt_file_path": "models/base/payments_base.sql", + "catalog_type": "VIEW", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD)", + "type": "TRANSFORMED" + } + ] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "table", + "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD)", + "type": "TRANSFORMED" + } + ] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "model_maturity": "in dev", + "some_other_property": "test 1", + "owner": "@alice1", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": "postgres comment: Actors table \u2013 from postgres\n\ndbt model description: description for actor table from dbt", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@alice1", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": "a user's address", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.category,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": "a user's category", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.country,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "model_maturity": "in prod", + "owner": "@bob", + "some_other_property": "test 2", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@bob", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": "description for customer table from dbt", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "an_array_property": "['alpha', 'beta', 'charlie']", + "model_maturity": "in prod", + "owner": "@charles", + "some_other_property": "test 3", + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:@charles", + "type": "DATAOWNER", + "source": null + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/base.yml", + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" + }, + "externalUrl": null, + "description": "a payment", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [] + } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "dbt-test-without-schemas-with-filter", + "properties": null + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index e52c2a3756..f0f5470482 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -75,6 +75,19 @@ def test_dbt_ingest(pytestconfig, tmp_path, mock_time): "dbt_without_schemas_mces_golden.json", source_config_modifiers={"load_schemas": False}, ), + DbtTestConfig( + "dbt-test-without-schemas-with-filter", + test_resources_dir, + tmp_path, + "dbt_without_schemas_with_filter_mces.json", + "dbt_without_schemas_with_filter_mces_golden.json", + source_config_modifiers={ + "load_schemas": False, + "node_name_pattern": { + "deny": ["source.sample_dbt.pagila.payment_p2020_06"] + }, + }, + ), ] for config in config_variants: