Fix: dbtcloud CL errors (#21685)

(cherry picked from commit 0df058a53d4202b0c6c50d1a33de0f8d77c5d516)
This commit is contained in:
Suman Maharana 2025-06-10 21:45:07 +05:30 committed by OpenMetadata Release Bot
parent 113dae708d
commit c676735660
3 changed files with 296 additions and 8 deletions

View File

@ -113,6 +113,7 @@ class OMetaLineageMixin(Generic[T]):
Add lineage relationship between two entities and returns Add lineage relationship between two entities and returns
the entity information of the origin node the entity information of the origin node
""" """
data = deepcopy(data)
try: try:
patch_op_success = False patch_op_success = False
if check_patch and data.edge.lineageDetails: if check_patch and data.edge.lineageDetails:

View File

@ -168,13 +168,6 @@ class DbtcloudSource(PipelineServiceSource):
entity=Pipeline, fqn=pipeline_fqn entity=Pipeline, fqn=pipeline_fqn
) )
lineage_details = LineageDetails(
pipeline=EntityReference(
id=pipeline_entity.id.root, type="pipeline"
),
source=LineageSource.PipelineLineage,
)
dbt_models = self.client.get_model_details( dbt_models = self.client.get_model_details(
job_id=pipeline_details.id, run_id=self.context.get().latest_run_id job_id=pipeline_details.id, run_id=self.context.get().latest_run_id
) )
@ -222,6 +215,13 @@ class DbtcloudSource(PipelineServiceSource):
if from_entity is None: if from_entity is None:
continue continue
lineage_details = LineageDetails(
pipeline=EntityReference(
id=pipeline_entity.id.root, type="pipeline"
),
source=LineageSource.PipelineLineage,
)
yield Either( yield Either(
right=AddLineageRequest( right=AddLineageRequest(
edge=EntitiesEdge( edge=EntitiesEdge(

View File

@ -19,6 +19,10 @@ from unittest.mock import patch
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
from metadata.generated.schema.entity.data.pipeline import Pipeline, Task from metadata.generated.schema.entity.data.pipeline import Pipeline, Task
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
OpenMetadataConnection,
)
from metadata.generated.schema.entity.services.pipelineService import ( from metadata.generated.schema.entity.services.pipelineService import (
PipelineConnection, PipelineConnection,
PipelineService, PipelineService,
@ -41,6 +45,7 @@ from metadata.ingestion.source.pipeline.dbtcloud.metadata import DbtcloudSource
from metadata.ingestion.source.pipeline.dbtcloud.models import ( from metadata.ingestion.source.pipeline.dbtcloud.models import (
DBTJob, DBTJob,
DBTJobList, DBTJobList,
DBTModel,
DBTSchedule, DBTSchedule,
) )
from metadata.ingestion.source.pipeline.pipeline_service import PipelineUsage from metadata.ingestion.source.pipeline.pipeline_service import PipelineUsage
@ -549,6 +554,11 @@ class DBTCloudUnitTest(TestCase):
self.dbtcloud.metadata = OpenMetadata( self.dbtcloud.metadata = OpenMetadata(
config.workflowConfig.openMetadataServerConfig config.workflowConfig.openMetadataServerConfig
) )
self.metadata = OpenMetadata(
OpenMetadataConnection.model_validate(
mock_dbtcloud_config["workflowConfig"]["openMetadataServerConfig"]
)
)
@patch("metadata.ingestion.source.pipeline.dbtcloud.client.DBTCloudClient.get_jobs") @patch("metadata.ingestion.source.pipeline.dbtcloud.client.DBTCloudClient.get_jobs")
def test_get_pipelines_list(self, get_jobs): def test_get_pipelines_list(self, get_jobs):
@ -567,8 +577,19 @@ class DBTCloudUnitTest(TestCase):
assert self.dbtcloud.client.project_ids == EXPECTED_PROJECT_FILTERS assert self.dbtcloud.client.project_ids == EXPECTED_PROJECT_FILTERS
def test_pipelines(self): def test_pipelines(self):
"""
Test pipeline creation
"""
pipeline = list(self.dbtcloud.yield_pipeline(EXPECTED_JOB_DETAILS))[0].right pipeline = list(self.dbtcloud.yield_pipeline(EXPECTED_JOB_DETAILS))[0].right
assert pipeline == EXPECTED_CREATED_PIPELINES
# Compare individual fields instead of entire objects
self.assertEqual(pipeline.name, EXPECTED_CREATED_PIPELINES.name)
self.assertEqual(pipeline.description, EXPECTED_CREATED_PIPELINES.description)
self.assertEqual(pipeline.sourceUrl, EXPECTED_CREATED_PIPELINES.sourceUrl)
self.assertEqual(
pipeline.scheduleInterval, EXPECTED_CREATED_PIPELINES.scheduleInterval
)
self.assertEqual(pipeline.service, EXPECTED_CREATED_PIPELINES.service)
def test_yield_pipeline_usage(self): def test_yield_pipeline_usage(self):
""" """
@ -783,3 +804,269 @@ class DBTCloudUnitTest(TestCase):
self.assertIsNotNone( self.assertIsNotNone(
list(self.dbtcloud.yield_pipeline_usage(EXPECTED_JOB_DETAILS))[0].left list(self.dbtcloud.yield_pipeline_usage(EXPECTED_JOB_DETAILS))[0].left
) )
def test_get_model_details(self):
"""
Test getting model details from DBT Cloud
"""
# Mock the graphql client's post method
with patch.object(self.dbtcloud.client.graphql_client, "post") as mock_post:
# Set up mock return value
mock_post.return_value = {
"data": {
"job": {
"models": [
{
"uniqueId": "model.dbt_test_new.model_32",
"name": "model_32",
"schema": "dbt_test_new",
"database": "dev",
"dependsOn": [
"model.dbt_test_new.model_15",
"model.dbt_test_new.model_11",
],
},
{
"uniqueId": "model.dbt_test_new.model_15",
"name": "model_15",
"schema": "dbt_test_new",
"database": "dev",
"dependsOn": None,
},
{
"uniqueId": "model.dbt_test_new.model_11",
"name": "model_11",
"schema": "dbt_test_new",
"database": "dev",
"dependsOn": None,
},
]
}
}
}
# Call the method
models = self.dbtcloud.client.get_model_details(
70403103936332, 70403110257794
)
# Verify we got the expected models
self.assertEqual(len(models), 3)
# Verify the first model (model_32)
model_32 = next(m for m in models if m.name == "model_32")
self.assertEqual(model_32.database, "dev")
self.assertEqual(model_32.dbtschema, "dbt_test_new")
self.assertEqual(len(model_32.dependsOn), 2)
self.assertIn("model.dbt_test_new.model_15", model_32.dependsOn)
self.assertIn("model.dbt_test_new.model_11", model_32.dependsOn)
# Test error case
mock_post.side_effect = Exception("Test error")
error_models = self.dbtcloud.client.get_model_details(
70403103936332, 70403110257794
)
self.assertIsNone(error_models)
def test_get_models_and_seeds_details(self):
"""
Test getting models and seeds details from DBT Cloud
"""
# Mock the graphql client's post method
with patch.object(self.dbtcloud.client.graphql_client, "post") as mock_post:
# Set up mock return value
mock_post.return_value = {
"data": {
"job": {
"models": [
{
"uniqueId": "model.dbt_test_new.model_32",
"name": "model_32",
"schema": "dbt_test_new",
"database": "dev",
"dependsOn": [
"model.dbt_test_new.model_15",
"model.dbt_test_new.model_11",
],
},
{
"uniqueId": "model.dbt_test_new.model_15",
"name": "model_15",
"schema": "dbt_test_new",
"database": "dev",
"dependsOn": None,
},
{
"uniqueId": "model.dbt_test_new.model_11",
"name": "model_11",
"schema": "dbt_test_new",
"database": "dev",
"dependsOn": None,
},
],
"seeds": [
{
"uniqueId": "seed.dbt_test_new.raw_payments",
"name": "raw_payments",
"schema": "dbt_test_new",
"database": "dev",
},
{
"uniqueId": "seed.dbt_test_new.raw_orders",
"name": "raw_orders",
"schema": "dbt_test_new",
"database": "dev",
},
],
}
}
}
# Call the method
models_and_seeds = self.dbtcloud.client.get_models_and_seeds_details(
70403103936332, 70403110257794
)
# Verify we got the expected models and seeds
self.assertEqual(len(models_and_seeds), 5)
# Verify the first model (model_32)
model_32 = next(m for m in models_and_seeds if m.name == "model_32")
self.assertEqual(model_32.database, "dev")
self.assertEqual(model_32.dbtschema, "dbt_test_new")
self.assertEqual(len(model_32.dependsOn), 2)
self.assertIn("model.dbt_test_new.model_15", model_32.dependsOn)
self.assertIn("model.dbt_test_new.model_11", model_32.dependsOn)
# Verify seeds
seeds = [m for m in models_and_seeds if m.uniqueId.startswith("seed.")]
self.assertEqual(len(seeds), 2)
self.assertIn("raw_payments", [s.name for s in seeds])
self.assertIn("raw_orders", [s.name for s in seeds])
# Test error case
mock_post.side_effect = Exception("Test error")
error_models = self.dbtcloud.client.get_models_and_seeds_details(
70403103936332, 70403110257794
)
self.assertIsNone(error_models)
def test_error_handling_in_lineage(self):
"""
Test error handling in lineage generation
"""
# Mock the context with latest run ID
self.dbtcloud.context.get().__dict__["latest_run_id"] = 70403110257794
# Mock metadata.get_by_name to raise an exception
with patch.object(
OpenMetadata, "get_by_name", side_effect=Exception("Test error")
):
# Get the lineage details
lineage_details = list(
self.dbtcloud.yield_pipeline_lineage_details(EXPECTED_JOB_DETAILS)
)
# Verify we got an error
self.assertEqual(len(lineage_details), 1)
self.assertIsNotNone(lineage_details[0].left)
self.assertIn("Test error", lineage_details[0].left.error)
def test_yield_pipeline_lineage_details(self):
"""
Test the lineage details generation from DBT Cloud models
"""
# Mock the context with latest run ID
self.dbtcloud.context.get().__dict__["latest_run_id"] = 70403110257794
self.dbtcloud.context.get().__dict__["pipeline"] = "New job"
self.dbtcloud.context.get().__dict__[
"pipeline_service"
] = "dbtcloud_pipeline_test"
# Mock the source config for lineage
self.dbtcloud.source_config.lineageInformation = type(
"obj", (object,), {"dbServiceNames": ["local_redshift"]}
)
# Create mock entities
mock_pipeline = Pipeline(
id=uuid.uuid4(),
name="New job",
fullyQualifiedName="dbtcloud_pipeline_test.New job",
service=EntityReference(id=uuid.uuid4(), type="pipelineService"),
)
# Create source and target tables
mock_source_table = Table(
id=uuid.uuid4(),
name="model_15",
fullyQualifiedName="local_redshift.dev.dbt_test_new.model_15",
database=EntityReference(id=uuid.uuid4(), type="database"),
columns=[],
databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"),
)
mock_target_table = Table(
id=uuid.uuid4(),
name="model_32",
fullyQualifiedName="local_redshift.dev.dbt_test_new.model_32",
database=EntityReference(id=uuid.uuid4(), type="database"),
columns=[],
databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"),
)
# Patch the metadata's get_by_name method
with patch.object(self.dbtcloud.metadata, "get_by_name") as mock_get_by_name:
def get_by_name_side_effect(entity, fqn):
if entity == Pipeline:
# Handle both string FQN and FullyQualifiedEntityName
if isinstance(fqn, str):
if fqn == "dbtcloud_pipeline_test.New job":
return mock_pipeline
elif isinstance(fqn, FullyQualifiedEntityName):
if fqn.root == "dbtcloud_pipeline_test.New job":
return mock_pipeline
elif entity == Table:
if "model_15" in fqn:
return mock_source_table
elif "model_32" in fqn:
return mock_target_table
return "None data testing"
mock_get_by_name.side_effect = get_by_name_side_effect
# Mock the graphql client's post method
with patch.object(
self.dbtcloud.client, "get_models_and_seeds_details"
) as mock_get_parents, patch.object(
self.dbtcloud.client, "get_model_details"
) as mock_get_models:
mock_get_parents.return_value = [
DBTModel(
uniqueId="model.dbt_test_new.model_15",
name="model_15",
dbtschema="dbt_test_new",
database="dev",
dependsOn=None,
)
]
mock_get_models.return_value = [
DBTModel(
uniqueId="model.dbt_test_new.model_32",
name="model_32",
dbtschema="dbt_test_new",
database="dev",
dependsOn=["model.dbt_test_new.model_15"],
)
]
# Get the lineage details
lineage_details = list(
self.dbtcloud.yield_pipeline_lineage_details(EXPECTED_JOB_DETAILS)
)
# Verify we got exactly one lineage edge
self.assertEqual(len(lineage_details), 1)