mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-19 04:41:02 +00:00
Fix: dbtcloud CL errors (#21685)
(cherry picked from commit 0df058a53d4202b0c6c50d1a33de0f8d77c5d516)
This commit is contained in:
parent
113dae708d
commit
c676735660
@ -113,6 +113,7 @@ class OMetaLineageMixin(Generic[T]):
|
|||||||
Add lineage relationship between two entities and returns
|
Add lineage relationship between two entities and returns
|
||||||
the entity information of the origin node
|
the entity information of the origin node
|
||||||
"""
|
"""
|
||||||
|
data = deepcopy(data)
|
||||||
try:
|
try:
|
||||||
patch_op_success = False
|
patch_op_success = False
|
||||||
if check_patch and data.edge.lineageDetails:
|
if check_patch and data.edge.lineageDetails:
|
||||||
|
@ -168,13 +168,6 @@ class DbtcloudSource(PipelineServiceSource):
|
|||||||
entity=Pipeline, fqn=pipeline_fqn
|
entity=Pipeline, fqn=pipeline_fqn
|
||||||
)
|
)
|
||||||
|
|
||||||
lineage_details = LineageDetails(
|
|
||||||
pipeline=EntityReference(
|
|
||||||
id=pipeline_entity.id.root, type="pipeline"
|
|
||||||
),
|
|
||||||
source=LineageSource.PipelineLineage,
|
|
||||||
)
|
|
||||||
|
|
||||||
dbt_models = self.client.get_model_details(
|
dbt_models = self.client.get_model_details(
|
||||||
job_id=pipeline_details.id, run_id=self.context.get().latest_run_id
|
job_id=pipeline_details.id, run_id=self.context.get().latest_run_id
|
||||||
)
|
)
|
||||||
@ -222,6 +215,13 @@ class DbtcloudSource(PipelineServiceSource):
|
|||||||
if from_entity is None:
|
if from_entity is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
lineage_details = LineageDetails(
|
||||||
|
pipeline=EntityReference(
|
||||||
|
id=pipeline_entity.id.root, type="pipeline"
|
||||||
|
),
|
||||||
|
source=LineageSource.PipelineLineage,
|
||||||
|
)
|
||||||
|
|
||||||
yield Either(
|
yield Either(
|
||||||
right=AddLineageRequest(
|
right=AddLineageRequest(
|
||||||
edge=EntitiesEdge(
|
edge=EntitiesEdge(
|
||||||
|
@ -19,6 +19,10 @@ from unittest.mock import patch
|
|||||||
|
|
||||||
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
||||||
from metadata.generated.schema.entity.data.pipeline import Pipeline, Task
|
from metadata.generated.schema.entity.data.pipeline import Pipeline, Task
|
||||||
|
from metadata.generated.schema.entity.data.table import Table
|
||||||
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||||
|
OpenMetadataConnection,
|
||||||
|
)
|
||||||
from metadata.generated.schema.entity.services.pipelineService import (
|
from metadata.generated.schema.entity.services.pipelineService import (
|
||||||
PipelineConnection,
|
PipelineConnection,
|
||||||
PipelineService,
|
PipelineService,
|
||||||
@ -41,6 +45,7 @@ from metadata.ingestion.source.pipeline.dbtcloud.metadata import DbtcloudSource
|
|||||||
from metadata.ingestion.source.pipeline.dbtcloud.models import (
|
from metadata.ingestion.source.pipeline.dbtcloud.models import (
|
||||||
DBTJob,
|
DBTJob,
|
||||||
DBTJobList,
|
DBTJobList,
|
||||||
|
DBTModel,
|
||||||
DBTSchedule,
|
DBTSchedule,
|
||||||
)
|
)
|
||||||
from metadata.ingestion.source.pipeline.pipeline_service import PipelineUsage
|
from metadata.ingestion.source.pipeline.pipeline_service import PipelineUsage
|
||||||
@ -549,6 +554,11 @@ class DBTCloudUnitTest(TestCase):
|
|||||||
self.dbtcloud.metadata = OpenMetadata(
|
self.dbtcloud.metadata = OpenMetadata(
|
||||||
config.workflowConfig.openMetadataServerConfig
|
config.workflowConfig.openMetadataServerConfig
|
||||||
)
|
)
|
||||||
|
self.metadata = OpenMetadata(
|
||||||
|
OpenMetadataConnection.model_validate(
|
||||||
|
mock_dbtcloud_config["workflowConfig"]["openMetadataServerConfig"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
@patch("metadata.ingestion.source.pipeline.dbtcloud.client.DBTCloudClient.get_jobs")
|
@patch("metadata.ingestion.source.pipeline.dbtcloud.client.DBTCloudClient.get_jobs")
|
||||||
def test_get_pipelines_list(self, get_jobs):
|
def test_get_pipelines_list(self, get_jobs):
|
||||||
@ -567,8 +577,19 @@ class DBTCloudUnitTest(TestCase):
|
|||||||
assert self.dbtcloud.client.project_ids == EXPECTED_PROJECT_FILTERS
|
assert self.dbtcloud.client.project_ids == EXPECTED_PROJECT_FILTERS
|
||||||
|
|
||||||
def test_pipelines(self):
|
def test_pipelines(self):
|
||||||
|
"""
|
||||||
|
Test pipeline creation
|
||||||
|
"""
|
||||||
pipeline = list(self.dbtcloud.yield_pipeline(EXPECTED_JOB_DETAILS))[0].right
|
pipeline = list(self.dbtcloud.yield_pipeline(EXPECTED_JOB_DETAILS))[0].right
|
||||||
assert pipeline == EXPECTED_CREATED_PIPELINES
|
|
||||||
|
# Compare individual fields instead of entire objects
|
||||||
|
self.assertEqual(pipeline.name, EXPECTED_CREATED_PIPELINES.name)
|
||||||
|
self.assertEqual(pipeline.description, EXPECTED_CREATED_PIPELINES.description)
|
||||||
|
self.assertEqual(pipeline.sourceUrl, EXPECTED_CREATED_PIPELINES.sourceUrl)
|
||||||
|
self.assertEqual(
|
||||||
|
pipeline.scheduleInterval, EXPECTED_CREATED_PIPELINES.scheduleInterval
|
||||||
|
)
|
||||||
|
self.assertEqual(pipeline.service, EXPECTED_CREATED_PIPELINES.service)
|
||||||
|
|
||||||
def test_yield_pipeline_usage(self):
|
def test_yield_pipeline_usage(self):
|
||||||
"""
|
"""
|
||||||
@ -783,3 +804,269 @@ class DBTCloudUnitTest(TestCase):
|
|||||||
self.assertIsNotNone(
|
self.assertIsNotNone(
|
||||||
list(self.dbtcloud.yield_pipeline_usage(EXPECTED_JOB_DETAILS))[0].left
|
list(self.dbtcloud.yield_pipeline_usage(EXPECTED_JOB_DETAILS))[0].left
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_get_model_details(self):
|
||||||
|
"""
|
||||||
|
Test getting model details from DBT Cloud
|
||||||
|
"""
|
||||||
|
# Mock the graphql client's post method
|
||||||
|
with patch.object(self.dbtcloud.client.graphql_client, "post") as mock_post:
|
||||||
|
# Set up mock return value
|
||||||
|
mock_post.return_value = {
|
||||||
|
"data": {
|
||||||
|
"job": {
|
||||||
|
"models": [
|
||||||
|
{
|
||||||
|
"uniqueId": "model.dbt_test_new.model_32",
|
||||||
|
"name": "model_32",
|
||||||
|
"schema": "dbt_test_new",
|
||||||
|
"database": "dev",
|
||||||
|
"dependsOn": [
|
||||||
|
"model.dbt_test_new.model_15",
|
||||||
|
"model.dbt_test_new.model_11",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uniqueId": "model.dbt_test_new.model_15",
|
||||||
|
"name": "model_15",
|
||||||
|
"schema": "dbt_test_new",
|
||||||
|
"database": "dev",
|
||||||
|
"dependsOn": None,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uniqueId": "model.dbt_test_new.model_11",
|
||||||
|
"name": "model_11",
|
||||||
|
"schema": "dbt_test_new",
|
||||||
|
"database": "dev",
|
||||||
|
"dependsOn": None,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Call the method
|
||||||
|
models = self.dbtcloud.client.get_model_details(
|
||||||
|
70403103936332, 70403110257794
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify we got the expected models
|
||||||
|
self.assertEqual(len(models), 3)
|
||||||
|
|
||||||
|
# Verify the first model (model_32)
|
||||||
|
model_32 = next(m for m in models if m.name == "model_32")
|
||||||
|
self.assertEqual(model_32.database, "dev")
|
||||||
|
self.assertEqual(model_32.dbtschema, "dbt_test_new")
|
||||||
|
self.assertEqual(len(model_32.dependsOn), 2)
|
||||||
|
self.assertIn("model.dbt_test_new.model_15", model_32.dependsOn)
|
||||||
|
self.assertIn("model.dbt_test_new.model_11", model_32.dependsOn)
|
||||||
|
|
||||||
|
# Test error case
|
||||||
|
mock_post.side_effect = Exception("Test error")
|
||||||
|
error_models = self.dbtcloud.client.get_model_details(
|
||||||
|
70403103936332, 70403110257794
|
||||||
|
)
|
||||||
|
self.assertIsNone(error_models)
|
||||||
|
|
||||||
|
def test_get_models_and_seeds_details(self):
|
||||||
|
"""
|
||||||
|
Test getting models and seeds details from DBT Cloud
|
||||||
|
"""
|
||||||
|
# Mock the graphql client's post method
|
||||||
|
with patch.object(self.dbtcloud.client.graphql_client, "post") as mock_post:
|
||||||
|
# Set up mock return value
|
||||||
|
mock_post.return_value = {
|
||||||
|
"data": {
|
||||||
|
"job": {
|
||||||
|
"models": [
|
||||||
|
{
|
||||||
|
"uniqueId": "model.dbt_test_new.model_32",
|
||||||
|
"name": "model_32",
|
||||||
|
"schema": "dbt_test_new",
|
||||||
|
"database": "dev",
|
||||||
|
"dependsOn": [
|
||||||
|
"model.dbt_test_new.model_15",
|
||||||
|
"model.dbt_test_new.model_11",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uniqueId": "model.dbt_test_new.model_15",
|
||||||
|
"name": "model_15",
|
||||||
|
"schema": "dbt_test_new",
|
||||||
|
"database": "dev",
|
||||||
|
"dependsOn": None,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uniqueId": "model.dbt_test_new.model_11",
|
||||||
|
"name": "model_11",
|
||||||
|
"schema": "dbt_test_new",
|
||||||
|
"database": "dev",
|
||||||
|
"dependsOn": None,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"seeds": [
|
||||||
|
{
|
||||||
|
"uniqueId": "seed.dbt_test_new.raw_payments",
|
||||||
|
"name": "raw_payments",
|
||||||
|
"schema": "dbt_test_new",
|
||||||
|
"database": "dev",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"uniqueId": "seed.dbt_test_new.raw_orders",
|
||||||
|
"name": "raw_orders",
|
||||||
|
"schema": "dbt_test_new",
|
||||||
|
"database": "dev",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Call the method
|
||||||
|
models_and_seeds = self.dbtcloud.client.get_models_and_seeds_details(
|
||||||
|
70403103936332, 70403110257794
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify we got the expected models and seeds
|
||||||
|
self.assertEqual(len(models_and_seeds), 5)
|
||||||
|
|
||||||
|
# Verify the first model (model_32)
|
||||||
|
model_32 = next(m for m in models_and_seeds if m.name == "model_32")
|
||||||
|
self.assertEqual(model_32.database, "dev")
|
||||||
|
self.assertEqual(model_32.dbtschema, "dbt_test_new")
|
||||||
|
self.assertEqual(len(model_32.dependsOn), 2)
|
||||||
|
self.assertIn("model.dbt_test_new.model_15", model_32.dependsOn)
|
||||||
|
self.assertIn("model.dbt_test_new.model_11", model_32.dependsOn)
|
||||||
|
|
||||||
|
# Verify seeds
|
||||||
|
seeds = [m for m in models_and_seeds if m.uniqueId.startswith("seed.")]
|
||||||
|
self.assertEqual(len(seeds), 2)
|
||||||
|
self.assertIn("raw_payments", [s.name for s in seeds])
|
||||||
|
self.assertIn("raw_orders", [s.name for s in seeds])
|
||||||
|
|
||||||
|
# Test error case
|
||||||
|
mock_post.side_effect = Exception("Test error")
|
||||||
|
error_models = self.dbtcloud.client.get_models_and_seeds_details(
|
||||||
|
70403103936332, 70403110257794
|
||||||
|
)
|
||||||
|
self.assertIsNone(error_models)
|
||||||
|
|
||||||
|
def test_error_handling_in_lineage(self):
|
||||||
|
"""
|
||||||
|
Test error handling in lineage generation
|
||||||
|
"""
|
||||||
|
# Mock the context with latest run ID
|
||||||
|
self.dbtcloud.context.get().__dict__["latest_run_id"] = 70403110257794
|
||||||
|
|
||||||
|
# Mock metadata.get_by_name to raise an exception
|
||||||
|
with patch.object(
|
||||||
|
OpenMetadata, "get_by_name", side_effect=Exception("Test error")
|
||||||
|
):
|
||||||
|
# Get the lineage details
|
||||||
|
lineage_details = list(
|
||||||
|
self.dbtcloud.yield_pipeline_lineage_details(EXPECTED_JOB_DETAILS)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify we got an error
|
||||||
|
self.assertEqual(len(lineage_details), 1)
|
||||||
|
self.assertIsNotNone(lineage_details[0].left)
|
||||||
|
self.assertIn("Test error", lineage_details[0].left.error)
|
||||||
|
|
||||||
|
def test_yield_pipeline_lineage_details(self):
|
||||||
|
"""
|
||||||
|
Test the lineage details generation from DBT Cloud models
|
||||||
|
"""
|
||||||
|
# Mock the context with latest run ID
|
||||||
|
self.dbtcloud.context.get().__dict__["latest_run_id"] = 70403110257794
|
||||||
|
self.dbtcloud.context.get().__dict__["pipeline"] = "New job"
|
||||||
|
self.dbtcloud.context.get().__dict__[
|
||||||
|
"pipeline_service"
|
||||||
|
] = "dbtcloud_pipeline_test"
|
||||||
|
|
||||||
|
# Mock the source config for lineage
|
||||||
|
self.dbtcloud.source_config.lineageInformation = type(
|
||||||
|
"obj", (object,), {"dbServiceNames": ["local_redshift"]}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create mock entities
|
||||||
|
mock_pipeline = Pipeline(
|
||||||
|
id=uuid.uuid4(),
|
||||||
|
name="New job",
|
||||||
|
fullyQualifiedName="dbtcloud_pipeline_test.New job",
|
||||||
|
service=EntityReference(id=uuid.uuid4(), type="pipelineService"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create source and target tables
|
||||||
|
mock_source_table = Table(
|
||||||
|
id=uuid.uuid4(),
|
||||||
|
name="model_15",
|
||||||
|
fullyQualifiedName="local_redshift.dev.dbt_test_new.model_15",
|
||||||
|
database=EntityReference(id=uuid.uuid4(), type="database"),
|
||||||
|
columns=[],
|
||||||
|
databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"),
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_target_table = Table(
|
||||||
|
id=uuid.uuid4(),
|
||||||
|
name="model_32",
|
||||||
|
fullyQualifiedName="local_redshift.dev.dbt_test_new.model_32",
|
||||||
|
database=EntityReference(id=uuid.uuid4(), type="database"),
|
||||||
|
columns=[],
|
||||||
|
databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Patch the metadata's get_by_name method
|
||||||
|
with patch.object(self.dbtcloud.metadata, "get_by_name") as mock_get_by_name:
|
||||||
|
|
||||||
|
def get_by_name_side_effect(entity, fqn):
|
||||||
|
if entity == Pipeline:
|
||||||
|
# Handle both string FQN and FullyQualifiedEntityName
|
||||||
|
if isinstance(fqn, str):
|
||||||
|
if fqn == "dbtcloud_pipeline_test.New job":
|
||||||
|
return mock_pipeline
|
||||||
|
elif isinstance(fqn, FullyQualifiedEntityName):
|
||||||
|
if fqn.root == "dbtcloud_pipeline_test.New job":
|
||||||
|
return mock_pipeline
|
||||||
|
elif entity == Table:
|
||||||
|
if "model_15" in fqn:
|
||||||
|
return mock_source_table
|
||||||
|
elif "model_32" in fqn:
|
||||||
|
return mock_target_table
|
||||||
|
return "None data testing"
|
||||||
|
|
||||||
|
mock_get_by_name.side_effect = get_by_name_side_effect
|
||||||
|
|
||||||
|
# Mock the graphql client's post method
|
||||||
|
with patch.object(
|
||||||
|
self.dbtcloud.client, "get_models_and_seeds_details"
|
||||||
|
) as mock_get_parents, patch.object(
|
||||||
|
self.dbtcloud.client, "get_model_details"
|
||||||
|
) as mock_get_models:
|
||||||
|
|
||||||
|
mock_get_parents.return_value = [
|
||||||
|
DBTModel(
|
||||||
|
uniqueId="model.dbt_test_new.model_15",
|
||||||
|
name="model_15",
|
||||||
|
dbtschema="dbt_test_new",
|
||||||
|
database="dev",
|
||||||
|
dependsOn=None,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_get_models.return_value = [
|
||||||
|
DBTModel(
|
||||||
|
uniqueId="model.dbt_test_new.model_32",
|
||||||
|
name="model_32",
|
||||||
|
dbtschema="dbt_test_new",
|
||||||
|
database="dev",
|
||||||
|
dependsOn=["model.dbt_test_new.model_15"],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Get the lineage details
|
||||||
|
lineage_details = list(
|
||||||
|
self.dbtcloud.yield_pipeline_lineage_details(EXPECTED_JOB_DETAILS)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify we got exactly one lineage edge
|
||||||
|
self.assertEqual(len(lineage_details), 1)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user