2023-02-22 20:39:37 +05:30
|
|
|
"""
|
|
|
|
Test dbt
|
|
|
|
"""
|
|
|
|
|
|
|
|
import json
|
2023-02-23 19:17:54 +05:30
|
|
|
import uuid
|
|
|
|
from pathlib import Path
|
2023-02-22 20:39:37 +05:30
|
|
|
from unittest import TestCase
|
|
|
|
from unittest.mock import patch
|
|
|
|
|
|
|
|
from dbt_artifacts_parser.parser import parse_catalog, parse_manifest, parse_run_results
|
|
|
|
from pydantic import AnyUrl
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
from metadata.generated.schema.entity.data.table import Column, DataModel, Table
|
2023-02-22 20:39:37 +05:30
|
|
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
|
|
OpenMetadataWorkflowConfig,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.type.entityReference import EntityReference
|
|
|
|
from metadata.generated.schema.type.tagLabel import TagLabel
|
|
|
|
from metadata.ingestion.source.database.database_service import DataModelLink
|
|
|
|
from metadata.ingestion.source.database.dbt.metadata import DbtSource
|
|
|
|
from metadata.utils.dbt_config import DbtFiles, DbtObjects
|
|
|
|
|
|
|
|
mock_dbt_config = {
|
|
|
|
"source": {
|
|
|
|
"type": "dbt",
|
|
|
|
"serviceName": "dbt_test",
|
|
|
|
"sourceConfig": {
|
|
|
|
"config": {
|
|
|
|
"type": "DBT",
|
|
|
|
"dbtConfigSource": {
|
|
|
|
"dbtCatalogFilePath": "sample/dbt_files/catalog.json",
|
|
|
|
"dbtManifestFilePath": "sample/dbt_files/manifest.json",
|
|
|
|
"dbtRunResultsFilePath": "sample/dbt_files/run_results.json",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"sink": {"type": "metadata-rest", "config": {}},
|
|
|
|
"workflowConfig": {
|
|
|
|
"openMetadataServerConfig": {
|
|
|
|
"hostPort": "http://localhost:8585/api",
|
|
|
|
"authProvider": "openmetadata",
|
|
|
|
"securityConfig": {
|
|
|
|
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGc"
|
|
|
|
"iOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE"
|
|
|
|
"2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXB"
|
|
|
|
"iEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fN"
|
|
|
|
"r3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3u"
|
|
|
|
"d-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
MOCK_SAMPLE_MANIFEST_V4_V5_V6 = "resources/datasets/manifest_v4_v5_v6.json"
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
MOCK_SAMPLE_MANIFEST_V7 = "resources/datasets/manifest_v7.json"
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
MOCK_SAMPLE_MANIFEST_V8 = "resources/datasets/manifest_v8.json"
|
|
|
|
|
|
|
|
MOCK_SAMPLE_MANIFEST_NULL_DB = "resources/datasets/manifest_null_db.json"
|
2023-02-22 20:39:37 +05:30
|
|
|
|
|
|
|
|
|
|
|
EXPECTED_DATA_MODEL_FQNS = [
|
|
|
|
"dbt_test.dev.dbt_jaffle.customers",
|
|
|
|
"dbt_test.dev.dbt_jaffle.orders",
|
|
|
|
"dbt_test.dev.dbt_jaffle.stg_customers",
|
2023-02-23 19:17:54 +05:30
|
|
|
"dbt_test.dev.dbt_jaffle.customers_null_db",
|
2023-02-22 20:39:37 +05:30
|
|
|
]
|
|
|
|
|
|
|
|
EXPECTED_DATA_MODELS = [
|
|
|
|
DataModel(
|
|
|
|
modelType="DBT",
|
|
|
|
description="This table has basic information about a customer, as well as some derived facts based on a customer's orders",
|
|
|
|
path="sample/customers/root/path/models/customers.sql",
|
|
|
|
rawSql="sample customers raw code",
|
|
|
|
sql="sample customers compile code",
|
|
|
|
upstream=["dbt_test.dev.dbt_jaffle.stg_customers"],
|
|
|
|
owner=EntityReference(
|
|
|
|
id="cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
type="user",
|
|
|
|
name=None,
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
description=None,
|
|
|
|
displayName=None,
|
|
|
|
deleted=None,
|
|
|
|
href=AnyUrl(
|
|
|
|
"http://localhost:8585/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
scheme="http",
|
|
|
|
host="localhost",
|
|
|
|
host_type="int_domain",
|
|
|
|
port="8585",
|
|
|
|
path="/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
),
|
|
|
|
),
|
|
|
|
tags=[
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_one",
|
|
|
|
description=None,
|
|
|
|
source="Tag",
|
|
|
|
labelType="Automated",
|
|
|
|
state="Confirmed",
|
|
|
|
href=None,
|
|
|
|
),
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.model_tag_two",
|
|
|
|
description=None,
|
|
|
|
source="Tag",
|
|
|
|
labelType="Automated",
|
|
|
|
state="Confirmed",
|
|
|
|
href=None,
|
|
|
|
),
|
|
|
|
],
|
|
|
|
columns=[
|
|
|
|
Column(
|
|
|
|
name="customer_id",
|
|
|
|
dataType="VARCHAR",
|
|
|
|
dataLength=1,
|
|
|
|
description="This is a unique identifier for a customer",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="first_name",
|
|
|
|
dataType="VARCHAR",
|
|
|
|
dataLength=1,
|
|
|
|
description="Customer's first name. PII.",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="last_name",
|
|
|
|
dataType="VARCHAR",
|
|
|
|
dataLength=1,
|
|
|
|
description="Customer's last name. PII.",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
generatedAt=None,
|
|
|
|
),
|
|
|
|
DataModel(
|
|
|
|
modelType="DBT",
|
|
|
|
description="This table has basic information about orders, as well as some derived facts based on payments",
|
|
|
|
path="sample/orders/root/path/models/orders.sql",
|
|
|
|
rawSql="sample raw orders code",
|
|
|
|
sql="sample compiled code",
|
|
|
|
upstream=[],
|
|
|
|
owner=EntityReference(
|
|
|
|
id="cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
type="user",
|
|
|
|
name=None,
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
description=None,
|
|
|
|
displayName=None,
|
|
|
|
deleted=None,
|
|
|
|
href=AnyUrl(
|
|
|
|
"http://localhost:8585/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
scheme="http",
|
|
|
|
host="localhost",
|
|
|
|
host_type="int_domain",
|
|
|
|
port="8585",
|
|
|
|
path="/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
),
|
|
|
|
),
|
|
|
|
tags=[
|
|
|
|
TagLabel(
|
|
|
|
tagFQN="dbtTags.single_tag",
|
|
|
|
description=None,
|
|
|
|
source="Tag",
|
|
|
|
labelType="Automated",
|
|
|
|
state="Confirmed",
|
|
|
|
href=None,
|
|
|
|
)
|
|
|
|
],
|
|
|
|
columns=[
|
|
|
|
Column(
|
|
|
|
name="order_id",
|
|
|
|
displayName=None,
|
|
|
|
dataType="VARCHAR",
|
|
|
|
dataLength=1,
|
|
|
|
description="This is a unique identifier for an order",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="customer_id",
|
|
|
|
displayName=None,
|
|
|
|
dataType="VARCHAR",
|
|
|
|
dataLength=1,
|
|
|
|
description="Foreign key to the customers table",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
generatedAt=None,
|
|
|
|
),
|
|
|
|
DataModel(
|
|
|
|
modelType="DBT",
|
|
|
|
description=None,
|
|
|
|
path="sample/stg_customers/root/path/models/staging/stg_customers.sql",
|
|
|
|
rawSql="sample stg_customers raw_code",
|
|
|
|
sql="sample stg_customers compiled code",
|
|
|
|
upstream=[],
|
|
|
|
owner=EntityReference(
|
|
|
|
id="cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
type="user",
|
|
|
|
name=None,
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
description=None,
|
|
|
|
displayName=None,
|
|
|
|
deleted=None,
|
|
|
|
href=AnyUrl(
|
|
|
|
"http://localhost:8585/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
scheme="http",
|
|
|
|
host="localhost",
|
|
|
|
host_type="int_domain",
|
|
|
|
port="8585",
|
|
|
|
path="/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
),
|
|
|
|
),
|
|
|
|
tags=None,
|
|
|
|
columns=[
|
|
|
|
Column(
|
|
|
|
name="customer_id",
|
|
|
|
displayName=None,
|
|
|
|
dataType="VARCHAR",
|
|
|
|
dataLength=1,
|
|
|
|
description="This is a unique identifier for an customer",
|
|
|
|
)
|
|
|
|
],
|
|
|
|
generatedAt=None,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
EXPECTED_DATA_MODEL_NULL_DB = [
|
|
|
|
DataModel(
|
|
|
|
modelType="DBT",
|
|
|
|
description=None,
|
|
|
|
path="sample/customers_null_db/root/path/models/staging/customers_null_db.sql",
|
|
|
|
rawSql="sample customers_null_db raw_code",
|
|
|
|
sql="sample customers_null_db compiled code",
|
|
|
|
upstream=[],
|
|
|
|
owner=EntityReference(
|
|
|
|
id="cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
type="user",
|
|
|
|
name=None,
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
description=None,
|
|
|
|
displayName=None,
|
|
|
|
deleted=None,
|
|
|
|
href=AnyUrl(
|
|
|
|
"http://localhost:8585/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
scheme="http",
|
|
|
|
host="localhost",
|
|
|
|
host_type="int_domain",
|
|
|
|
port="8585",
|
|
|
|
path="/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
),
|
|
|
|
),
|
|
|
|
tags=None,
|
|
|
|
columns=[
|
|
|
|
Column(
|
|
|
|
name="customer_id",
|
|
|
|
displayName=None,
|
|
|
|
dataType="VARCHAR",
|
|
|
|
dataLength=1,
|
|
|
|
description="This is a unique identifier for an customer",
|
|
|
|
)
|
|
|
|
],
|
|
|
|
generatedAt=None,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
|
2023-02-22 20:39:37 +05:30
|
|
|
MOCK_OWNER = EntityReference(
|
|
|
|
id="cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
type="user",
|
|
|
|
name=None,
|
|
|
|
fullyQualifiedName="aaron_johnson0",
|
|
|
|
description=None,
|
|
|
|
displayName=None,
|
|
|
|
deleted=None,
|
|
|
|
href=AnyUrl(
|
|
|
|
"http://localhost:8585/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
scheme="http",
|
|
|
|
host="localhost",
|
|
|
|
host_type="int_domain",
|
|
|
|
port="8585",
|
|
|
|
path="/api/v1/users/cb2a92f5-e935-4ad7-911c-654280046538",
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
MOCK_NULL_DB_TABLE = [
|
|
|
|
Table(
|
|
|
|
id=uuid.uuid4(),
|
|
|
|
name="test",
|
|
|
|
databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"),
|
|
|
|
fullyQualifiedName="dbt_test.dev.dbt_jaffle.customers_null_db",
|
|
|
|
columns=[],
|
|
|
|
)
|
|
|
|
]
|
|
|
|
|
2023-02-22 20:39:37 +05:30
|
|
|
|
|
|
|
class DbtUnitTest(TestCase):
|
|
|
|
"""
|
|
|
|
Implements the necessary methods to extract
|
|
|
|
dbt Unit Test
|
|
|
|
"""
|
|
|
|
|
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.test_connection")
|
|
|
|
def __init__(self, methodName, test_connection) -> None:
|
|
|
|
super().__init__(methodName)
|
|
|
|
test_connection.return_value = False
|
|
|
|
self.config = OpenMetadataWorkflowConfig.parse_obj(mock_dbt_config)
|
|
|
|
self.dbt_source_obj = DbtSource.create(
|
|
|
|
mock_dbt_config["source"],
|
|
|
|
self.config.workflowConfig.openMetadataServerConfig,
|
|
|
|
)
|
|
|
|
|
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.get_dbt_owner")
|
|
|
|
def test_dbt_manifest_v4_v5_v6(self, get_dbt_owner):
|
|
|
|
get_dbt_owner.return_value = MOCK_OWNER
|
2023-02-23 19:17:54 +05:30
|
|
|
self.execute_test(
|
|
|
|
MOCK_SAMPLE_MANIFEST_V4_V5_V6,
|
|
|
|
expected_records=4,
|
|
|
|
expected_data_models=EXPECTED_DATA_MODELS,
|
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
|
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.get_dbt_owner")
|
|
|
|
def test_dbt_manifest_v7(self, get_dbt_owner):
|
|
|
|
get_dbt_owner.return_value = MOCK_OWNER
|
2023-02-23 19:17:54 +05:30
|
|
|
self.execute_test(
|
|
|
|
MOCK_SAMPLE_MANIFEST_V7,
|
|
|
|
expected_records=4,
|
|
|
|
expected_data_models=EXPECTED_DATA_MODELS,
|
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
|
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.get_dbt_owner")
|
|
|
|
def test_dbt_manifest_v8(self, get_dbt_owner):
|
|
|
|
get_dbt_owner.return_value = MOCK_OWNER
|
2023-02-23 19:17:54 +05:30
|
|
|
self.execute_test(
|
|
|
|
MOCK_SAMPLE_MANIFEST_V8,
|
|
|
|
expected_records=4,
|
|
|
|
expected_data_models=EXPECTED_DATA_MODELS,
|
|
|
|
)
|
|
|
|
|
|
|
|
@patch("metadata.ingestion.source.database.dbt.metadata.DbtSource.get_dbt_owner")
|
|
|
|
@patch("metadata.ingestion.ometa.mixins.es_mixin.ESMixin.es_search_from_fqn")
|
|
|
|
def test_dbt_manifest_null_db(self, es_search_from_fqn, get_dbt_owner):
|
|
|
|
get_dbt_owner.return_value = MOCK_OWNER
|
|
|
|
es_search_from_fqn.return_value = MOCK_NULL_DB_TABLE
|
|
|
|
self.execute_test(
|
|
|
|
MOCK_SAMPLE_MANIFEST_NULL_DB,
|
|
|
|
expected_records=2,
|
|
|
|
expected_data_models=EXPECTED_DATA_MODEL_NULL_DB,
|
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
def execute_test(self, mock_manifest, expected_records, expected_data_models):
|
|
|
|
mock_file_path = Path(__file__).parent / mock_manifest
|
|
|
|
with open(mock_file_path) as file:
|
|
|
|
mock_data: dict = json.load(file)
|
|
|
|
dbt_files = DbtFiles(dbt_manifest=mock_data)
|
2023-02-22 20:39:37 +05:30
|
|
|
dbt_objects = DbtObjects(
|
|
|
|
dbt_catalog=parse_catalog(dbt_files.dbt_catalog)
|
|
|
|
if dbt_files.dbt_catalog
|
|
|
|
else None,
|
|
|
|
dbt_manifest=parse_manifest(dbt_files.dbt_manifest),
|
|
|
|
dbt_run_results=parse_run_results(dbt_files.dbt_run_results)
|
|
|
|
if dbt_files.dbt_run_results
|
|
|
|
else None,
|
|
|
|
)
|
2023-02-23 19:17:54 +05:30
|
|
|
self.check_dbt_validate(dbt_files=dbt_files, expected_records=expected_records)
|
|
|
|
self.check_yield_datamodel(
|
|
|
|
dbt_objects=dbt_objects, expected_data_models=expected_data_models
|
|
|
|
)
|
2023-02-22 20:39:37 +05:30
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
def check_dbt_validate(self, dbt_files, expected_records):
|
2023-02-22 20:39:37 +05:30
|
|
|
with self.assertLogs() as captured:
|
|
|
|
self.dbt_source_obj.validate_dbt_files(dbt_files=dbt_files)
|
2023-02-23 19:17:54 +05:30
|
|
|
self.assertEqual(len(captured.records), expected_records)
|
2023-02-22 20:39:37 +05:30
|
|
|
for record in captured.records:
|
|
|
|
self.assertNotIn("Error", record.getMessage())
|
|
|
|
self.assertNotIn("Unable", record.getMessage())
|
|
|
|
|
2023-02-23 19:17:54 +05:30
|
|
|
def check_yield_datamodel(self, dbt_objects, expected_data_models):
|
2023-02-22 20:39:37 +05:30
|
|
|
data_model_list = []
|
|
|
|
yield_data_models = self.dbt_source_obj.yield_data_models(
|
|
|
|
dbt_objects=dbt_objects
|
|
|
|
)
|
|
|
|
for data_model_link in yield_data_models:
|
|
|
|
if isinstance(data_model_link, DataModelLink):
|
|
|
|
self.assertIn(data_model_link.fqn.__root__, EXPECTED_DATA_MODEL_FQNS)
|
|
|
|
data_model_list.append(data_model_link.datamodel)
|
|
|
|
|
|
|
|
for _, (exptected, original) in enumerate(
|
2023-02-23 19:17:54 +05:30
|
|
|
zip(expected_data_models, data_model_list)
|
2023-02-22 20:39:37 +05:30
|
|
|
):
|
|
|
|
self.assertEqual(exptected, original)
|