2023-02-22 20:39:37 +05:30
|
|
|
"""
|
|
|
|
Test dbt
|
|
|
|
"""
|
|
|
|
|
|
|
|
import json
|
2023-02-23 19:17:54 +05:30
|
|
|
import uuid
|
|
|
|
from pathlib import Path
|
2023-02-22 20:39:37 +05:30
|
|
|
from unittest import TestCase
|
2023-03-29 12:41:44 +05:30
|
|
|
from unittest.mock import MagicMock, patch
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2025-02-04 11:57:39 +05:30
|
|
|
from collate_dbt_artifacts_parser.parser import (
|
|
|
|
parse_catalog,
|
|
|
|
parse_manifest,
|
|
|
|
parse_run_results,
|
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
from pydantic import AnyUrl
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
from metadata.generated.schema.entity.data.table import Column, DataModel, Table
|
2023-02-22 20:39:37 +05:30
|
|
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
|
|
OpenMetadataWorkflowConfig,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.type.entityReference import EntityReference
|
2024-07-29 23:06:39 -07:00
|
|
|
from metadata.generated.schema.type.entityReferenceList import EntityReferenceList
|
2023-03-08 11:07:34 +05:30
|
|
|
from metadata.generated.schema.type.tagLabel import (
|
|
|
|
LabelType,
|
|
|
|
State,
|
|
|
|
TagLabel,
|
|
|
|
TagSource,
|
|
|
|
)
|
2023-08-30 15:49:42 +02:00
|
|
|
from metadata.ingestion.api.models import Either
|
2023-10-04 09:14:03 +02:00
|
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
2023-06-23 13:31:22 +05:30
|
|
|
from metadata.ingestion.source.database.dbt.dbt_utils import (
|
|
|
|
generate_entity_link,
|
|
|
|
get_corrected_name,
|
|
|
|
get_data_model_path,
|
|
|
|
get_dbt_compiled_query,
|
|
|
|
get_dbt_raw_query,
|
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
from metadata.ingestion.source.database.dbt.metadata import DbtSource
|
2023-06-26 11:30:35 +05:30
|
|
|
from metadata.ingestion.source.database.dbt.models import DbtFiles, DbtObjects
|
2023-08-30 15:49:42 +02:00
|
|
|
from metadata.utils.logger import ingestion_logger, set_loggers_level
|
2023-06-09 10:45:53 +05:30
|
|
|
from metadata.utils.tag_utils import get_tag_labels
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-08-30 15:49:42 +02:00
|
|
|
logger = ingestion_logger()
|
|
|
|
|
2023-02-22 20:39:37 +05:30
|
|
|
mock_dbt_config = {
|
|
|
|
"source": {
|
|
|
|
"type": "dbt",
|
|
|
|
"serviceName": "dbt_test",
|
|
|
|
"sourceConfig": {
|
|
|
|
"config": {
|
|
|
|
"type": "DBT",
|
|
|
|
"dbtConfigSource": {
|
2024-02-29 10:41:21 +05:30
|
|
|
"dbtConfigType": "local",
|
2023-02-22 20:39:37 +05:30
|
|
|
"dbtCatalogFilePath": "sample/dbt_files/catalog.json",
|
|
|
|
"dbtManifestFilePath": "sample/dbt_files/manifest.json",
|
|
|
|
"dbtRunResultsFilePath": "sample/dbt_files/run_results.json",
|
2024-11-28 18:30:11 +01:00
|
|
|
"dbtSourcesFilePath": "sample/dbt_files/sources.json",
|
2023-02-22 20:39:37 +05:30
|
|
|
},
|
2025-01-07 16:37:30 +05:30
|
|
|
"dbtUpdateOwners": True,
|
2023-02-22 20:39:37 +05:30
|
|
|
}
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"sink": {"type": "metadata-rest", "config": {}},
|
|
|
|
"workflowConfig": {
|
2023-04-10 15:34:25 +05:30
|
|
|
"loggerLevel": "DEBUG",
|
2023-02-22 20:39:37 +05:30
|
|
|
"openMetadataServerConfig": {
|
|
|
|
"hostPort": "http://localhost:8585/api",
|
|
|
|
"authProvider": "openmetadata",
|
|
|
|
"securityConfig": {
|
|
|
|
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGc"
|
|
|
|
"iOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE"
|
|
|
|
"2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXB"
|
|
|
|
"iEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fN"
|
|
|
|
"r3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3u"
|
|
|
|
"d-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
|
|
},
|
2023-04-10 15:34:25 +05:30
|
|
|
},
|
2023-02-22 20:39:37 +05:30
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
MOCK_SAMPLE_MANIFEST_V4_V5_V6 = "resources/datasets/manifest_v4_v5_v6.json"
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
MOCK_SAMPLE_MANIFEST_V7 = "resources/datasets/manifest_v7.json"
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
MOCK_SAMPLE_MANIFEST_V8 = "resources/datasets/manifest_v8.json"
|
|
|
|
|
2024-09-27 19:53:27 +05:30
|
|
|
MOCK_SAMPLE_MANIFEST_VERSIONLESS = "resources/datasets/manifest_versionless.json"
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
MOCK_SAMPLE_MANIFEST_NULL_DB = "resources/datasets/manifest_null_db.json"
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-03-08 11:07:34 +05:30
|
|
|
MOCK_SAMPLE_MANIFEST_TEST_NODE = "resources/datasets/manifest_test_node.json"
|
|
|
|
|
2023-02-22 20:39:37 +05:30
|
|
|
|
|
|
|
EXPECTED_DATA_MODEL_FQNS = [
|
|
|
|
"dbt_test.dev.dbt_jaffle.customers",
|
|
|
|
"dbt_test.dev.dbt_jaffle.orders",
|
|
|
|
"dbt_test.dev.dbt_jaffle.stg_customers",
|
2023-02-23 19:17:54 +05:30
|
|
|
"dbt_test.dev.dbt_jaffle.customers_null_db",
|
2023-02-22 20:39:37 +05:30
|
|
|
]
|
|
|
|
|
|
|
|
EXPECTED_DATA_MODELS = [
|
|
|
|
DataModel(
|
|
|
|
modelType="DBT",
|
|
|
|
description="This table has basic information about a customer, as well as some derived facts based on a customer's orders",
|
|
|
|
path="sample/customers/root/path/models/customers.sql",
|
|
|
|
rawSql="sample customers raw code",
|
2023-10-12 11:07:58 +05:30
|
|
|
resourceType="model",
|
2023-02-22 20:39:37 +05:30
|
|
|
sql="sample customers compile code",
|
2023-03-08 11:07:34 +05:30
|
|
|
upstream=[],
|
2024-07-29 23:06:39 -07:00
|
|
|
owners=EntityReferenceList(
|
|
|
|
root=[
|
|
|
|
EntityReference(
|
|
|
|
id="cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
type="user",
|
|
|
|
name=None,
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
description=None,
|
|
|
|
displayName=None,
|
|
|
|
deleted=None,
|
|
|
|
href=AnyUrl(
|
|
|
|
"http://localhost:8585/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
),
|
|
|
|
)
|
|
|
|
]
|
2023-02-22 20:39:37 +05:30
|
|
|
),
|
|
|
|
tags=[
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_one",
|
|
|
|
description=None,
|
2023-03-09 00:30:36 -08:00
|
|
|
source="Classification",
|
2023-02-22 20:39:37 +05:30
|
|
|
labelType="Automated",
|
2023-03-29 12:41:44 +05:30
|
|
|
state="Suggested",
|
2023-02-22 20:39:37 +05:30
|
|
|
href=None,
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_two",
|
|
|
|
description=None,
|
2023-03-09 00:30:36 -08:00
|
|
|
source="Classification",
|
2023-02-22 20:39:37 +05:30
|
|
|
labelType="Automated",
|
2023-03-29 12:41:44 +05:30
|
|
|
state="Suggested",
|
2023-02-22 20:39:37 +05:30
|
|
|
href=None,
|
|
|
|
),
|
2024-10-15 16:43:03 +05:30
|
|
|
TagLabel(
|
|
|
|
tagFQN='dbtTags."22.8.5.1"',
|
|
|
|
description=None,
|
|
|
|
source="Classification",
|
|
|
|
labelType="Automated",
|
|
|
|
state="Suggested",
|
|
|
|
href=None,
|
|
|
|
),
|
2023-02-22 20:39:37 +05:30
|
|
|
],
|
|
|
|
columns=[
|
|
|
|
Column(
|
|
|
|
name="customer_id",
|
2023-03-23 11:41:29 +05:30
|
|
|
dataType="UNKNOWN",
|
2023-02-22 20:39:37 +05:30
|
|
|
dataLength=1,
|
|
|
|
description="This is a unique identifier for a customer",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="first_name",
|
2023-03-23 11:41:29 +05:30
|
|
|
dataType="UNKNOWN",
|
2023-02-22 20:39:37 +05:30
|
|
|
dataLength=1,
|
|
|
|
description="Customer's first name. PII.",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="last_name",
|
2023-03-23 11:41:29 +05:30
|
|
|
dataType="UNKNOWN",
|
2023-02-22 20:39:37 +05:30
|
|
|
dataLength=1,
|
|
|
|
description="Customer's last name. PII.",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
generatedAt=None,
|
2023-03-08 11:07:34 +05:30
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
]
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
EXPECTED_DATA_MODEL_NULL_DB = [
|
|
|
|
DataModel(
|
|
|
|
modelType="DBT",
|
|
|
|
description=None,
|
|
|
|
path="sample/customers_null_db/root/path/models/staging/customers_null_db.sql",
|
|
|
|
rawSql="sample customers_null_db raw_code",
|
2023-10-12 11:07:58 +05:30
|
|
|
resourceType="model",
|
2023-02-23 19:17:54 +05:30
|
|
|
sql="sample customers_null_db compiled code",
|
|
|
|
upstream=[],
|
2024-07-29 23:06:39 -07:00
|
|
|
owners=EntityReferenceList(
|
|
|
|
root=[
|
|
|
|
EntityReference(
|
|
|
|
id="cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
type="user",
|
|
|
|
name=None,
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
description=None,
|
|
|
|
displayName=None,
|
|
|
|
deleted=None,
|
|
|
|
href=AnyUrl(
|
|
|
|
"http://localhost:8585/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
),
|
|
|
|
)
|
|
|
|
]
|
2023-02-23 19:17:54 +05:30
|
|
|
),
|
2024-11-15 10:50:15 -08:00
|
|
|
tags=[],
|
2023-02-23 19:17:54 +05:30
|
|
|
columns=[
|
|
|
|
Column(
|
|
|
|
name="customer_id",
|
|
|
|
displayName=None,
|
2023-03-23 11:41:29 +05:30
|
|
|
dataType="UNKNOWN",
|
2023-02-23 19:17:54 +05:30
|
|
|
dataLength=1,
|
|
|
|
description="This is a unique identifier for an customer",
|
|
|
|
)
|
|
|
|
],
|
|
|
|
generatedAt=None,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
2024-09-27 19:53:27 +05:30
|
|
|
EXPECTED_DATA_MODEL_VERSIONLESS = [
|
|
|
|
DataModel(
|
|
|
|
modelType="DBT",
|
|
|
|
resourceType="model",
|
|
|
|
description="This table has basic information about a customer, as well as some derived facts based on a customer's orders",
|
|
|
|
path="models/customers.sql",
|
|
|
|
rawSql="sample customers raw code",
|
|
|
|
sql="sample customers compile code",
|
|
|
|
upstream=[
|
|
|
|
"dbt_test.dev.dbt_jaffle.customers",
|
|
|
|
"dbt_test.dev.dbt_jaffle.customers",
|
|
|
|
"dbt_test.dev.dbt_jaffle.customers",
|
|
|
|
],
|
|
|
|
owners=EntityReferenceList(
|
|
|
|
root=[
|
|
|
|
EntityReference(
|
|
|
|
id="cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
type="user",
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
href=AnyUrl(
|
|
|
|
"http://localhost:8585/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538"
|
|
|
|
),
|
|
|
|
)
|
|
|
|
]
|
|
|
|
),
|
|
|
|
tags=[
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_one",
|
|
|
|
source="Classification",
|
|
|
|
labelType="Automated",
|
|
|
|
state="Suggested",
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_two",
|
|
|
|
source="Classification",
|
|
|
|
labelType="Automated",
|
|
|
|
state="Suggested",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
columns=[
|
|
|
|
Column(
|
|
|
|
name="customer_id",
|
|
|
|
dataType="UNKNOWN",
|
|
|
|
dataLength=1,
|
|
|
|
description="This is a unique identifier for a customer",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="first_name",
|
|
|
|
dataType="UNKNOWN",
|
|
|
|
dataLength=1,
|
|
|
|
description="Customer's first name. PII.",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="last_name",
|
|
|
|
dataType="UNKNOWN",
|
|
|
|
dataLength=1,
|
|
|
|
description="Customer's last name. PII.",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="first_order",
|
|
|
|
dataType="UNKNOWN",
|
|
|
|
dataLength=1,
|
|
|
|
description="Date (UTC) of a customer's first order",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="most_recent_order",
|
|
|
|
dataType="UNKNOWN",
|
|
|
|
dataLength=1,
|
|
|
|
description="Date (UTC) of a customer's most recent order",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="number_of_orders",
|
|
|
|
dataType="UNKNOWN",
|
|
|
|
dataLength=1,
|
|
|
|
description="Count of the number of orders a customer has placed",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="total_order_amount",
|
|
|
|
dataType="UNKNOWN",
|
|
|
|
dataLength=1,
|
|
|
|
description="Total value (AUD) of a customer's orders",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
]
|
|
|
|
|
2024-07-29 23:06:39 -07:00
|
|
|
MOCK_OWNER = EntityReferenceList(
|
|
|
|
root=[
|
|
|
|
EntityReference(
|
|
|
|
id="cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
type="user",
|
|
|
|
name=None,
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
description=None,
|
|
|
|
displayName=None,
|
|
|
|
deleted=None,
|
|
|
|
href=AnyUrl(
|
|
|
|
"http://localhost:8585/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
),
|
|
|
|
)
|
|
|
|
]
|
2023-02-22 20:39:37 +05:30
|
|
|
)
|
|
|
|
|
2024-02-20 16:06:54 +05:30
|
|
|
MOCK_USER = EntityReference(
|
|
|
|
id="70064aef-f085-4658-a11a-b5f46568e980",
|
|
|
|
name="aaron_johnson0",
|
|
|
|
type="user",
|
|
|
|
href="http://localhost:8585/api/v1/users/d96eccb9-9a9b-40ad-9585-0a8a71665c51",
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
)
|
2023-03-08 11:07:34 +05:30
|
|
|
|
|
|
|
|
|
|
|
MOCK_TABLE_ENTITIES = [
|
|
|
|
Table(
|
|
|
|
id=uuid.uuid4(),
|
|
|
|
name="customers",
|
|
|
|
databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"),
|
|
|
|
fullyQualifiedName="dbt_test.dev.dbt_jaffle.customers",
|
|
|
|
columns=[],
|
|
|
|
)
|
|
|
|
]
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
MOCK_NULL_DB_TABLE = [
|
|
|
|
Table(
|
|
|
|
id=uuid.uuid4(),
|
2023-03-08 11:07:34 +05:30
|
|
|
name="customers_null_db",
|
2023-02-23 19:17:54 +05:30
|
|
|
databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"),
|
|
|
|
fullyQualifiedName="dbt_test.dev.dbt_jaffle.customers_null_db",
|
|
|
|
columns=[],
|
2023-03-08 11:07:34 +05:30
|
|
|
),
|
|
|
|
]
|
|
|
|
|
|
|
|
MOCK_TAG_LABELS = [
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.tag1",
|
|
|
|
labelType=LabelType.Automated,
|
2023-03-29 12:41:44 +05:30
|
|
|
state=State.Suggested,
|
2023-03-09 00:30:36 -08:00
|
|
|
source=TagSource.Classification,
|
2023-03-08 11:07:34 +05:30
|
|
|
),
|
|
|
|
TagLabel(
|
2023-03-29 12:41:44 +05:30
|
|
|
tagFQN='dbtTags."tag2.name"',
|
2023-03-08 11:07:34 +05:30
|
|
|
labelType=LabelType.Automated,
|
2023-03-29 12:41:44 +05:30
|
|
|
state=State.Suggested,
|
2023-03-09 00:30:36 -08:00
|
|
|
source=TagSource.Classification,
|
2023-03-08 11:07:34 +05:30
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.tag3",
|
|
|
|
labelType=LabelType.Automated,
|
2023-03-29 12:41:44 +05:30
|
|
|
state=State.Suggested,
|
2023-03-09 00:30:36 -08:00
|
|
|
source=TagSource.Classification,
|
2023-03-08 11:07:34 +05:30
|
|
|
),
|
2023-02-23 19:17:54 +05:30
|
|
|
]
|
|
|
|
|
2024-06-21 17:36:25 +05:30
|
|
|
MOCK_GLOASSARY_LABELS = [
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="Test_Glossary.term_one",
|
|
|
|
labelType=LabelType.Automated,
|
|
|
|
state=State.Suggested,
|
|
|
|
source=TagSource.Glossary,
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="Test_Glossary.term_two.nested_term.more_nested_term",
|
|
|
|
labelType=LabelType.Automated,
|
|
|
|
state=State.Suggested,
|
|
|
|
source=TagSource.Glossary,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
2023-02-22 20:39:37 +05:30
|
|
|
|
|
|
|
class DbtUnitTest(TestCase):
|
|
|
|
"""
|
|
|
|
Implements the necessary methods to extract
|
|
|
|
dbt Unit Test
|
|
|
|
"""
|
|
|
|
|
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.test_connection")
|
|
|
|
def __init__(self, methodName, test_connection) -> None:
|
|
|
|
super().__init__(methodName)
|
|
|
|
test_connection.return_value = False
|
2024-06-07 04:36:17 +02:00
|
|
|
self.config = OpenMetadataWorkflowConfig.model_validate(mock_dbt_config)
|
2023-02-22 20:39:37 +05:30
|
|
|
self.dbt_source_obj = DbtSource.create(
|
|
|
|
mock_dbt_config["source"],
|
2023-10-04 09:14:03 +02:00
|
|
|
OpenMetadata(self.config.workflowConfig.openMetadataServerConfig),
|
2023-02-22 20:39:37 +05:30
|
|
|
)
|
2023-08-30 15:49:42 +02:00
|
|
|
set_loggers_level("DEBUG")
|
2023-02-22 20:39:37 +05:30
|
|
|
|
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.get_dbt_owner")
|
2023-03-08 11:07:34 +05:30
|
|
|
@patch("metadata.ingestion.ometa.mixins.es_mixin.ESMixin.es_search_from_fqn")
|
|
|
|
def test_dbt_manifest_v4_v5_v6(self, es_search_from_fqn, get_dbt_owner):
|
2023-02-22 20:39:37 +05:30
|
|
|
get_dbt_owner.return_value = MOCK_OWNER
|
2023-03-08 11:07:34 +05:30
|
|
|
es_search_from_fqn.side_effect = MOCK_TABLE_ENTITIES
|
2023-02-23 19:17:54 +05:30
|
|
|
self.execute_test(
|
|
|
|
MOCK_SAMPLE_MANIFEST_V4_V5_V6,
|
2023-03-08 11:07:34 +05:30
|
|
|
expected_records=2,
|
2023-02-23 19:17:54 +05:30
|
|
|
expected_data_models=EXPECTED_DATA_MODELS,
|
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
|
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.get_dbt_owner")
|
2023-03-08 11:07:34 +05:30
|
|
|
@patch("metadata.ingestion.ometa.mixins.es_mixin.ESMixin.es_search_from_fqn")
|
|
|
|
def test_dbt_manifest_v7(self, es_search_from_fqn, get_dbt_owner):
|
2023-02-22 20:39:37 +05:30
|
|
|
get_dbt_owner.return_value = MOCK_OWNER
|
2023-03-08 11:07:34 +05:30
|
|
|
es_search_from_fqn.side_effect = MOCK_TABLE_ENTITIES
|
2023-02-23 19:17:54 +05:30
|
|
|
self.execute_test(
|
|
|
|
MOCK_SAMPLE_MANIFEST_V7,
|
2023-03-08 11:07:34 +05:30
|
|
|
expected_records=2,
|
2023-02-23 19:17:54 +05:30
|
|
|
expected_data_models=EXPECTED_DATA_MODELS,
|
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
|
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.get_dbt_owner")
|
2023-03-08 11:07:34 +05:30
|
|
|
@patch("metadata.ingestion.ometa.mixins.es_mixin.ESMixin.es_search_from_fqn")
|
2023-06-09 10:45:53 +05:30
|
|
|
@patch("metadata.utils.tag_utils.get_tag_label")
|
|
|
|
def test_dbt_manifest_v8(self, get_tag_label, es_search_from_fqn, get_dbt_owner):
|
2023-02-22 20:39:37 +05:30
|
|
|
get_dbt_owner.return_value = MOCK_OWNER
|
2023-03-08 11:07:34 +05:30
|
|
|
es_search_from_fqn.return_value = MOCK_TABLE_ENTITIES
|
2023-06-09 10:45:53 +05:30
|
|
|
get_tag_label.side_effect = [
|
2023-05-11 21:34:55 +05:30
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_one",
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
|
|
|
source=TagSource.Classification.value,
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_two",
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
2024-10-15 16:43:03 +05:30
|
|
|
source=TagSource.Classification.value,
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN='dbtTags."22.8.5.1"',
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
2023-05-11 21:34:55 +05:30
|
|
|
source=TagSource.Classification.value,
|
|
|
|
),
|
|
|
|
]
|
2023-02-23 19:17:54 +05:30
|
|
|
self.execute_test(
|
|
|
|
MOCK_SAMPLE_MANIFEST_V8,
|
2023-03-08 11:07:34 +05:30
|
|
|
expected_records=2,
|
2023-02-23 19:17:54 +05:30
|
|
|
expected_data_models=EXPECTED_DATA_MODELS,
|
|
|
|
)
|
|
|
|
|
2024-09-27 19:53:27 +05:30
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.get_dbt_owner")
|
|
|
|
@patch("metadata.ingestion.ometa.mixins.es_mixin.ESMixin.es_search_from_fqn")
|
|
|
|
@patch("metadata.utils.tag_utils.get_tag_label")
|
|
|
|
def test_dbt_manifest_versionless(
|
|
|
|
self, get_tag_label, es_search_from_fqn, get_dbt_owner
|
|
|
|
):
|
|
|
|
get_dbt_owner.return_value = MOCK_OWNER
|
|
|
|
es_search_from_fqn.return_value = MOCK_TABLE_ENTITIES
|
|
|
|
get_tag_label.side_effect = [
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_one",
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
|
|
|
source=TagSource.Classification.value,
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_two",
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
|
|
|
source=TagSource.Classification.value,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
self.execute_test(
|
|
|
|
MOCK_SAMPLE_MANIFEST_VERSIONLESS,
|
|
|
|
expected_records=9,
|
|
|
|
expected_data_models=EXPECTED_DATA_MODEL_VERSIONLESS,
|
|
|
|
)
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.get_dbt_owner")
|
|
|
|
@patch("metadata.ingestion.ometa.mixins.es_mixin.ESMixin.es_search_from_fqn")
|
|
|
|
def test_dbt_manifest_null_db(self, es_search_from_fqn, get_dbt_owner):
|
|
|
|
get_dbt_owner.return_value = MOCK_OWNER
|
|
|
|
es_search_from_fqn.return_value = MOCK_NULL_DB_TABLE
|
|
|
|
self.execute_test(
|
|
|
|
MOCK_SAMPLE_MANIFEST_NULL_DB,
|
|
|
|
expected_records=2,
|
|
|
|
expected_data_models=EXPECTED_DATA_MODEL_NULL_DB,
|
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-03-08 11:07:34 +05:30
|
|
|
def test_dbt_get_corrected_name(self):
|
2023-06-23 13:31:22 +05:30
|
|
|
self.assertEqual("dbt_jaffle", get_corrected_name(name="dbt_jaffle"))
|
|
|
|
self.assertIsNone(get_corrected_name(name="None"))
|
|
|
|
self.assertIsNone(get_corrected_name(name="null"))
|
|
|
|
self.assertIsNotNone(get_corrected_name(name="dev"))
|
2023-03-08 11:07:34 +05:30
|
|
|
|
2023-06-09 10:45:53 +05:30
|
|
|
@patch("metadata.utils.tag_utils.get_tag_label")
|
|
|
|
def test_dbt_get_dbt_tag_labels(self, get_tag_label):
|
|
|
|
get_tag_label.side_effect = [
|
2023-05-11 21:34:55 +05:30
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.tag1",
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
|
|
|
source=TagSource.Classification.value,
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN='dbtTags."tag2.name"',
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
|
|
|
source=TagSource.Classification.value,
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.tag3",
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
|
|
|
source=TagSource.Classification.value,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
2023-03-29 12:41:44 +05:30
|
|
|
mocked_metadata = MagicMock()
|
2023-06-09 10:45:53 +05:30
|
|
|
result = get_tag_labels(
|
2023-03-29 12:41:44 +05:30
|
|
|
metadata=mocked_metadata,
|
|
|
|
classification_name="dbtTags",
|
|
|
|
tags=["tag1", "tag2.name", "tag3"],
|
|
|
|
include_tags=True,
|
|
|
|
)
|
2023-03-08 11:07:34 +05:30
|
|
|
self.assertListEqual(result, MOCK_TAG_LABELS)
|
|
|
|
|
|
|
|
def test_dbt_get_data_model_path(self):
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_V8
|
|
|
|
)
|
|
|
|
manifest_node = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
)
|
2023-06-23 13:31:22 +05:30
|
|
|
result = get_data_model_path(manifest_node=manifest_node)
|
2023-03-08 11:07:34 +05:30
|
|
|
self.assertEqual("sample/customers/root/path/models/customers.sql", result)
|
|
|
|
|
|
|
|
def test_dbt_generate_entity_link(self):
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_TEST_NODE
|
|
|
|
)
|
|
|
|
manifest_node = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"test.jaffle_shop.unique_orders_order_id.fed79b3a6e"
|
|
|
|
)
|
|
|
|
dbt_test = {
|
|
|
|
"manifest_node": manifest_node,
|
|
|
|
"upstream": ["local_redshift_dbt2.dev.dbt_jaffle.stg_customers"],
|
|
|
|
"results": "",
|
|
|
|
}
|
2023-06-23 13:31:22 +05:30
|
|
|
result = generate_entity_link(dbt_test=dbt_test)
|
2023-03-08 11:07:34 +05:30
|
|
|
self.assertListEqual(
|
|
|
|
[
|
|
|
|
"<#E::table::local_redshift_dbt2.dev.dbt_jaffle.stg_customers::columns::order_id>"
|
|
|
|
],
|
|
|
|
result,
|
|
|
|
)
|
|
|
|
|
|
|
|
def test_dbt_compiled_query(self):
|
|
|
|
expected_query = "sample customers compile code"
|
|
|
|
|
|
|
|
# Test the compiled queries with v8 manifest
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_V8
|
|
|
|
)
|
|
|
|
manifest_node = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
)
|
2023-06-23 13:31:22 +05:30
|
|
|
result = get_dbt_compiled_query(mnode=manifest_node)
|
2023-03-08 11:07:34 +05:30
|
|
|
self.assertEqual(expected_query, result)
|
|
|
|
|
|
|
|
# Test the compiled queries with v4 v5 v6 manifest
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_V4_V5_V6
|
|
|
|
)
|
|
|
|
manifest_node = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
)
|
2023-06-23 13:31:22 +05:30
|
|
|
result = get_dbt_compiled_query(mnode=manifest_node)
|
2023-03-08 11:07:34 +05:30
|
|
|
self.assertEqual(expected_query, result)
|
|
|
|
|
|
|
|
def test_dbt_raw_query(self):
|
|
|
|
expected_query = "sample customers raw code"
|
|
|
|
|
|
|
|
# Test the raw queries with v8 manifest
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_V8
|
|
|
|
)
|
|
|
|
manifest_node = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
)
|
2023-06-23 13:31:22 +05:30
|
|
|
result = get_dbt_raw_query(mnode=manifest_node)
|
2023-03-08 11:07:34 +05:30
|
|
|
self.assertEqual(expected_query, result)
|
|
|
|
|
|
|
|
# Test the raw queries with v4 v5 v6 manifest
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_V4_V5_V6
|
|
|
|
)
|
|
|
|
manifest_node = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
)
|
2023-06-23 13:31:22 +05:30
|
|
|
result = get_dbt_raw_query(mnode=manifest_node)
|
2023-03-08 11:07:34 +05:30
|
|
|
self.assertEqual(expected_query, result)
|
|
|
|
|
2024-09-27 19:53:27 +05:30
|
|
|
# Test the raw queries with versionless manifest
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_VERSIONLESS
|
|
|
|
)
|
|
|
|
manifest_node = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
)
|
|
|
|
result = get_dbt_raw_query(mnode=manifest_node)
|
|
|
|
self.assertEqual(expected_query, result)
|
|
|
|
|
2024-02-20 16:06:54 +05:30
|
|
|
@patch(
|
|
|
|
"metadata.ingestion.ometa.mixins.user_mixin.OMetaUserMixin.get_reference_by_name"
|
|
|
|
)
|
|
|
|
def test_dbt_owner(self, get_reference_by_name):
|
2023-08-30 15:49:42 +02:00
|
|
|
"""
|
|
|
|
This test requires having the sample data properly indexed
|
|
|
|
"""
|
2024-02-20 16:06:54 +05:30
|
|
|
get_reference_by_name.return_value = MOCK_USER
|
2023-03-08 11:07:34 +05:30
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_V8
|
|
|
|
)
|
|
|
|
manifest_node = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
)
|
|
|
|
result = self.dbt_source_obj.get_dbt_owner(
|
|
|
|
manifest_node=manifest_node, catalog_node=None
|
|
|
|
)
|
2024-02-20 16:06:54 +05:30
|
|
|
self.assertEqual(
|
2024-06-05 21:18:37 +02:00
|
|
|
"70064aef-f085-4658-a11a-b5f46568e980", result.id.root.__str__()
|
2024-02-20 16:06:54 +05:30
|
|
|
)
|
2023-03-08 11:07:34 +05:30
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
def execute_test(self, mock_manifest, expected_records, expected_data_models):
|
2023-03-08 11:07:34 +05:30
|
|
|
dbt_files, dbt_objects = self.get_dbt_object_files(mock_manifest)
|
|
|
|
self.check_dbt_validate(dbt_files=dbt_files, expected_records=expected_records)
|
|
|
|
self.check_yield_datamodel(
|
|
|
|
dbt_objects=dbt_objects, expected_data_models=expected_data_models
|
|
|
|
)
|
|
|
|
|
|
|
|
def get_dbt_object_files(self, mock_manifest):
|
2023-02-23 19:17:54 +05:30
|
|
|
mock_file_path = Path(__file__).parent / mock_manifest
|
|
|
|
with open(mock_file_path) as file:
|
|
|
|
mock_data: dict = json.load(file)
|
2024-09-27 19:53:27 +05:30
|
|
|
self.dbt_source_obj.remove_manifest_non_required_keys(manifest_dict=mock_data)
|
2023-02-23 19:17:54 +05:30
|
|
|
dbt_files = DbtFiles(dbt_manifest=mock_data)
|
2023-02-22 20:39:37 +05:30
|
|
|
dbt_objects = DbtObjects(
|
|
|
|
dbt_catalog=parse_catalog(dbt_files.dbt_catalog)
|
|
|
|
if dbt_files.dbt_catalog
|
|
|
|
else None,
|
|
|
|
dbt_manifest=parse_manifest(dbt_files.dbt_manifest),
|
2024-08-13 13:19:56 +05:30
|
|
|
dbt_run_results=[parse_run_results(dbt_files.dbt_run_results)]
|
2023-02-22 20:39:37 +05:30
|
|
|
if dbt_files.dbt_run_results
|
|
|
|
else None,
|
|
|
|
)
|
2023-03-08 11:07:34 +05:30
|
|
|
return dbt_files, dbt_objects
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
def check_dbt_validate(self, dbt_files, expected_records):
|
2023-08-30 15:49:42 +02:00
|
|
|
with self.assertLogs(level="DEBUG", logger=logger) as captured:
|
2023-02-22 20:39:37 +05:30
|
|
|
self.dbt_source_obj.validate_dbt_files(dbt_files=dbt_files)
|
2023-02-23 19:17:54 +05:30
|
|
|
self.assertEqual(len(captured.records), expected_records)
|
2023-02-22 20:39:37 +05:30
|
|
|
for record in captured.records:
|
|
|
|
self.assertNotIn("Error", record.getMessage())
|
|
|
|
self.assertNotIn("Unable", record.getMessage())
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
def check_yield_datamodel(self, dbt_objects, expected_data_models):
|
2023-02-22 20:39:37 +05:30
|
|
|
data_model_list = []
|
|
|
|
yield_data_models = self.dbt_source_obj.yield_data_models(
|
|
|
|
dbt_objects=dbt_objects
|
|
|
|
)
|
|
|
|
for data_model_link in yield_data_models:
|
2023-08-30 15:49:42 +02:00
|
|
|
if isinstance(data_model_link, Either) and data_model_link.right:
|
2023-03-08 11:07:34 +05:30
|
|
|
self.assertIn(
|
2024-06-05 21:18:37 +02:00
|
|
|
data_model_link.right.table_entity.fullyQualifiedName.root,
|
2023-03-08 11:07:34 +05:30
|
|
|
EXPECTED_DATA_MODEL_FQNS,
|
|
|
|
)
|
2025-01-07 16:37:30 +05:30
|
|
|
self.check_process_dbt_owners(data_model_link.right)
|
2023-08-30 15:49:42 +02:00
|
|
|
data_model_list.append(data_model_link.right.datamodel)
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-08-30 15:49:42 +02:00
|
|
|
for _, (expected, original) in enumerate(
|
2023-02-23 19:17:54 +05:30
|
|
|
zip(expected_data_models, data_model_list)
|
2023-02-22 20:39:37 +05:30
|
|
|
):
|
2023-08-30 15:49:42 +02:00
|
|
|
self.assertEqual(expected, original)
|
2024-06-21 17:36:25 +05:30
|
|
|
|
2025-01-07 16:37:30 +05:30
|
|
|
def check_process_dbt_owners(self, data_model_link):
|
|
|
|
process_dbt_owners = self.dbt_source_obj.process_dbt_owners(data_model_link)
|
|
|
|
for entity in process_dbt_owners:
|
|
|
|
entity_owner = entity.right.new_entity.owners
|
|
|
|
self.assertEqual(entity_owner, MOCK_OWNER)
|
|
|
|
|
2024-09-27 19:53:27 +05:30
|
|
|
@patch("metadata.ingestion.ometa.mixins.es_mixin.ESMixin.es_search_from_fqn")
|
2024-11-28 18:30:11 +01:00
|
|
|
def test_upstream_nodes_for_lineage(self, es_search_from_fqn):
|
2024-09-27 19:53:27 +05:30
|
|
|
expected_upstream_nodes = [
|
|
|
|
"model.jaffle_shop.stg_customers",
|
|
|
|
"model.jaffle_shop.stg_orders",
|
|
|
|
"model.jaffle_shop.stg_payments",
|
|
|
|
]
|
|
|
|
es_search_from_fqn.return_value = MOCK_TABLE_ENTITIES
|
|
|
|
|
|
|
|
# Test the raw queries with V4 V5 V6 manifest
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_V4_V5_V6
|
|
|
|
)
|
|
|
|
upstream_nodes = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
).depends_on.nodes
|
|
|
|
self.assertEqual(expected_upstream_nodes, upstream_nodes)
|
|
|
|
|
|
|
|
# Test the raw queries with V7 manifest
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_V7
|
|
|
|
)
|
|
|
|
upstream_nodes = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
).depends_on.nodes
|
|
|
|
self.assertEqual(expected_upstream_nodes, upstream_nodes)
|
|
|
|
|
|
|
|
# Test the raw queries with VERSIONLESS manifest
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_VERSIONLESS
|
|
|
|
)
|
|
|
|
upstream_nodes = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
).depends_on.nodes
|
|
|
|
|
|
|
|
self.assertEqual(expected_upstream_nodes, upstream_nodes)
|
|
|
|
|
2024-06-21 17:36:25 +05:30
|
|
|
@patch("metadata.utils.tag_utils.get_tag_label")
|
|
|
|
def test_dbt_glossary_tiers(self, get_tag_label):
|
|
|
|
get_tag_label.side_effect = [
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="Test_Glossary.term_one",
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
|
|
|
source=TagSource.Glossary.value,
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="Test_Glossary.term_two.nested_term.more_nested_term",
|
|
|
|
labelType=LabelType.Automated.value,
|
|
|
|
state=State.Suggested.value,
|
|
|
|
source=TagSource.Glossary.value,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
|
|
|
_, dbt_objects = self.get_dbt_object_files(
|
|
|
|
mock_manifest=MOCK_SAMPLE_MANIFEST_V8
|
|
|
|
)
|
|
|
|
manifest_node = dbt_objects.dbt_manifest.nodes.get(
|
|
|
|
"model.jaffle_shop.customers"
|
|
|
|
)
|
|
|
|
dbt_meta_tags = self.dbt_source_obj.process_dbt_meta(
|
|
|
|
manifest_meta=manifest_node.meta
|
|
|
|
)
|
|
|
|
|
|
|
|
self.assertEqual(dbt_meta_tags, MOCK_GLOASSARY_LABELS)
|