mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-30 18:17:53 +00:00
MINOR: fix Kafka connect CDC lineage (#23836)
This commit is contained in:
parent
d4a9381473
commit
a90cacc93b
@ -39,6 +39,7 @@ from metadata.ingestion.models.custom_pydantic import BaseModel
|
|||||||
from metadata.ingestion.ometa.client import REST, APIError
|
from metadata.ingestion.ometa.client import REST, APIError
|
||||||
from metadata.ingestion.ometa.utils import quote
|
from metadata.ingestion.ometa.utils import quote
|
||||||
from metadata.ingestion.source.models import TableView
|
from metadata.ingestion.source.models import TableView
|
||||||
|
from metadata.utils import fqn
|
||||||
from metadata.utils.elasticsearch import ES_INDEX_MAP, get_entity_from_es_result
|
from metadata.utils.elasticsearch import ES_INDEX_MAP, get_entity_from_es_result
|
||||||
from metadata.utils.execution_time_tracker import calculate_execution_time_generator
|
from metadata.utils.execution_time_tracker import calculate_execution_time_generator
|
||||||
from metadata.utils.logger import ometa_logger
|
from metadata.utils.logger import ometa_logger
|
||||||
@ -521,10 +522,13 @@ class ESMixin(Generic[T]):
|
|||||||
fetch table from es when with/without `db_service_name`
|
fetch table from es when with/without `db_service_name`
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
prepended_fqn = fqn.prefix_entity_for_wildcard_search(
|
||||||
|
entity_type=entity_type, fqn=fqn_search_string
|
||||||
|
)
|
||||||
entity_result = get_entity_from_es_result(
|
entity_result = get_entity_from_es_result(
|
||||||
entity_list=self.es_search_from_fqn(
|
entity_list=self.es_search_from_fqn(
|
||||||
entity_type=entity_type,
|
entity_type=entity_type,
|
||||||
fqn_search_string=fqn_search_string,
|
fqn_search_string=prepended_fqn,
|
||||||
),
|
),
|
||||||
fetch_multiple_entities=fetch_multiple_entities,
|
fetch_multiple_entities=fetch_multiple_entities,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -110,16 +110,16 @@ class ConnectorConfigKeys:
|
|||||||
"db.name",
|
"db.name",
|
||||||
"snowflake.database.name",
|
"snowflake.database.name",
|
||||||
"database.include.list",
|
"database.include.list",
|
||||||
"database.hostname",
|
# "database.hostname",
|
||||||
"connection.url",
|
# "connection.url",
|
||||||
"database.dbname",
|
"database.dbname",
|
||||||
"topic.prefix",
|
"topic.prefix",
|
||||||
"database.server.name", # Debezium V1
|
# "database.server.name", # This maps the server name, not the actual database
|
||||||
"databases.include",
|
"databases.include",
|
||||||
"database.names",
|
"database.names",
|
||||||
"snowflake.database",
|
"snowflake.database",
|
||||||
"connection.host",
|
# "connection.host",
|
||||||
"database.exclude.list",
|
# "database.exclude.list",
|
||||||
]
|
]
|
||||||
|
|
||||||
CONTAINER_KEYS = [
|
CONTAINER_KEYS = [
|
||||||
|
|||||||
@ -182,7 +182,7 @@ class KafkaconnectSource(PipelineServiceSource):
|
|||||||
)
|
)
|
||||||
# Build search string: schema.table format
|
# Build search string: schema.table format
|
||||||
search_string = (
|
search_string = (
|
||||||
f"{dataset_details.database}.{dataset_details.table}"
|
f"{fqn.quote_name(dataset_details.database)}.{fqn.quote_name(dataset_details.table)}"
|
||||||
if dataset_details.database
|
if dataset_details.database
|
||||||
else dataset_details.table
|
else dataset_details.table
|
||||||
)
|
)
|
||||||
|
|||||||
@ -13,10 +13,12 @@ Handle FQN building and splitting logic.
|
|||||||
Filter information has been taken from the
|
Filter information has been taken from the
|
||||||
ES indexes definitions
|
ES indexes definitions
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Dict, List, Optional, Type, TypeVar, Union
|
from typing import TYPE_CHECKING, Dict, List, Optional, Type, TypeVar, Union
|
||||||
|
|
||||||
from antlr4.CommonTokenStream import CommonTokenStream
|
from antlr4.CommonTokenStream import CommonTokenStream
|
||||||
from antlr4.error.ErrorStrategy import BailErrorStrategy
|
from antlr4.error.ErrorStrategy import BailErrorStrategy
|
||||||
@ -51,11 +53,13 @@ from metadata.generated.schema.entity.teams.team import Team
|
|||||||
from metadata.generated.schema.entity.teams.user import User
|
from metadata.generated.schema.entity.teams.user import User
|
||||||
from metadata.generated.schema.tests.testCase import TestCase
|
from metadata.generated.schema.tests.testCase import TestCase
|
||||||
from metadata.generated.schema.tests.testSuite import TestSuite
|
from metadata.generated.schema.tests.testSuite import TestSuite
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
||||||
from metadata.utils.dispatch import class_register
|
from metadata.utils.dispatch import class_register
|
||||||
from metadata.utils.elasticsearch import get_entity_from_es_result
|
from metadata.utils.elasticsearch import get_entity_from_es_result
|
||||||
from metadata.utils.logger import utils_logger
|
from metadata.utils.logger import utils_logger
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
|
||||||
logger = utils_logger()
|
logger = utils_logger()
|
||||||
|
|
||||||
T = TypeVar("T", bound=BaseModel)
|
T = TypeVar("T", bound=BaseModel)
|
||||||
@ -866,3 +870,56 @@ def get_query_checksum(query: str) -> str:
|
|||||||
The checksum is used as the query's name.
|
The checksum is used as the query's name.
|
||||||
"""
|
"""
|
||||||
return hashlib.md5(query.encode()).hexdigest()
|
return hashlib.md5(query.encode()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
# Not adding container since children can have recursive slots: service.container1.container2...
|
||||||
|
FQN_ENTITY_SLOTS = {
|
||||||
|
Table.__name__: 4,
|
||||||
|
DatabaseSchema.__name__: 3,
|
||||||
|
Database.__name__: 2,
|
||||||
|
Dashboard.__name__: 2,
|
||||||
|
APICollection.__name__: 2,
|
||||||
|
Chart.__name__: 2,
|
||||||
|
MlModel.__name__: 2,
|
||||||
|
Topic.__name__: 2,
|
||||||
|
SearchIndex.__name__: 2,
|
||||||
|
Tag.__name__: 2,
|
||||||
|
DataModel.__name__: 2,
|
||||||
|
StoredProcedure.__name__: 4,
|
||||||
|
Pipeline.__name__: 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def prefix_entity_for_wildcard_search(entity_type: Type[T], fqn: str) -> str:
|
||||||
|
"""
|
||||||
|
Given an entity type and an FQN, return the FQN prefixed with wildcards
|
||||||
|
to match any parent hierarchy leading to that entity.
|
||||||
|
|
||||||
|
For example, for a Topic with FQN "potato", return "*.potato" to match
|
||||||
|
the topic in any service. For a Table with FQN "schema.table", return
|
||||||
|
"*.*.schema.table" to match the table in any service and database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
entity_type: The entity type to match.
|
||||||
|
fqn: The FQN to prefix.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The prefixed FQN with wildcards for missing parent levels.
|
||||||
|
"""
|
||||||
|
slots = FQN_ENTITY_SLOTS.get(entity_type.__name__)
|
||||||
|
if not slots:
|
||||||
|
raise FQNBuildingException(
|
||||||
|
f"Entity type {entity_type.__name__} not supported for wildcard search"
|
||||||
|
)
|
||||||
|
|
||||||
|
parts = split(fqn)
|
||||||
|
if len(parts) > slots:
|
||||||
|
raise FQNBuildingException(
|
||||||
|
f"FQN {fqn} has too many parts ({len(parts)})"
|
||||||
|
f"for entity type {entity_type.__name__} (expected {slots} or fewer)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add wildcards for missing parent levels
|
||||||
|
wildcards_needed = slots - len(parts)
|
||||||
|
prefixed_parts = ["*"] * wildcards_needed + parts
|
||||||
|
return _build(*prefixed_parts, quote=True)
|
||||||
|
|||||||
@ -16,7 +16,18 @@ from unittest.mock import MagicMock
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from metadata.generated.schema.entity.data.table import Column, Table
|
from metadata.generated.schema.entity.classification.tag import Tag
|
||||||
|
from metadata.generated.schema.entity.data.apiCollection import APICollection
|
||||||
|
from metadata.generated.schema.entity.data.chart import Chart
|
||||||
|
from metadata.generated.schema.entity.data.dashboard import Dashboard
|
||||||
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
|
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
||||||
|
from metadata.generated.schema.entity.data.mlmodel import MlModel
|
||||||
|
from metadata.generated.schema.entity.data.pipeline import Pipeline
|
||||||
|
from metadata.generated.schema.entity.data.searchIndex import SearchIndex
|
||||||
|
from metadata.generated.schema.entity.data.storedProcedure import StoredProcedure
|
||||||
|
from metadata.generated.schema.entity.data.table import Column, DataModel, Table
|
||||||
|
from metadata.generated.schema.entity.data.topic import Topic
|
||||||
from metadata.generated.schema.type.basic import FullyQualifiedEntityName
|
from metadata.generated.schema.type.basic import FullyQualifiedEntityName
|
||||||
from metadata.ingestion.models.custom_basemodel_validation import (
|
from metadata.ingestion.models.custom_basemodel_validation import (
|
||||||
RESERVED_ARROW_KEYWORD,
|
RESERVED_ARROW_KEYWORD,
|
||||||
@ -288,3 +299,130 @@ class TestFqn(TestCase):
|
|||||||
)
|
)
|
||||||
expected3 = f"bigquery.my-project.dataset.events_2024${RESERVED_QUOTE_KEYWORD}daily{RESERVED_QUOTE_KEYWORD}"
|
expected3 = f"bigquery.my-project.dataset.events_2024${RESERVED_QUOTE_KEYWORD}daily{RESERVED_QUOTE_KEYWORD}"
|
||||||
self.assertEqual(result3, expected3)
|
self.assertEqual(result3, expected3)
|
||||||
|
|
||||||
|
def test_prefix_entity_for_wildcard_search(self):
|
||||||
|
"""Test wildcard search prefix generation for all supported entity types"""
|
||||||
|
|
||||||
|
# Table (4 slots: service.database.schema.table)
|
||||||
|
# Full FQN - no wildcards needed
|
||||||
|
table_fqn = "my_service.my_db.my_schema.my_table"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Table, table_fqn)
|
||||||
|
self.assertEqual(result, "my_service.my_db.my_schema.my_table")
|
||||||
|
|
||||||
|
# Table with partial FQN - needs wildcards
|
||||||
|
table_fqn_partial = "my_schema.my_table"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Table, table_fqn_partial)
|
||||||
|
self.assertEqual(result, "*.*.my_schema.my_table")
|
||||||
|
|
||||||
|
# Table with just table name - needs all wildcards
|
||||||
|
table_fqn_minimal = "my_table"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Table, table_fqn_minimal)
|
||||||
|
self.assertEqual(result, "*.*.*.my_table")
|
||||||
|
|
||||||
|
# Table with quoted parts
|
||||||
|
table_fqn_quoted = '"my.schema".my_table'
|
||||||
|
result_quoted = fqn.prefix_entity_for_wildcard_search(Table, table_fqn_quoted)
|
||||||
|
self.assertEqual(result_quoted, '*.*."my.schema".my_table')
|
||||||
|
|
||||||
|
# DatabaseSchema (3 slots: service.database.schema)
|
||||||
|
schema_fqn = "public"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(DatabaseSchema, schema_fqn)
|
||||||
|
self.assertEqual(result, "*.*.public")
|
||||||
|
|
||||||
|
schema_fqn_full = "postgres_service.analytics_db.public"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(DatabaseSchema, schema_fqn_full)
|
||||||
|
self.assertEqual(result, "postgres_service.analytics_db.public")
|
||||||
|
|
||||||
|
# Database (2 slots: service.database)
|
||||||
|
database_fqn = "production_db"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Database, database_fqn)
|
||||||
|
self.assertEqual(result, "*.production_db")
|
||||||
|
|
||||||
|
database_fqn_full = "mysql_service.production_db"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Database, database_fqn_full)
|
||||||
|
self.assertEqual(result, "mysql_service.production_db")
|
||||||
|
|
||||||
|
# Dashboard (2 slots: service.dashboard)
|
||||||
|
dashboard_fqn = "sales_dashboard"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Dashboard, dashboard_fqn)
|
||||||
|
self.assertEqual(result, "*.sales_dashboard")
|
||||||
|
|
||||||
|
# APICollection (2 slots: service.collection)
|
||||||
|
api_collection_fqn = "users_api"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(
|
||||||
|
APICollection, api_collection_fqn
|
||||||
|
)
|
||||||
|
self.assertEqual(result, "*.users_api")
|
||||||
|
|
||||||
|
# Chart (2 slots: service.chart)
|
||||||
|
chart_fqn = "revenue_chart"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Chart, chart_fqn)
|
||||||
|
self.assertEqual(result, "*.revenue_chart")
|
||||||
|
|
||||||
|
# MlModel (2 slots: service.model)
|
||||||
|
mlmodel_fqn = "fraud_detection_model"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(MlModel, mlmodel_fqn)
|
||||||
|
self.assertEqual(result, "*.fraud_detection_model")
|
||||||
|
|
||||||
|
# Topic (2 slots: service.topic)
|
||||||
|
topic_fqn = "potato"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Topic, topic_fqn)
|
||||||
|
self.assertEqual(result, "*.potato")
|
||||||
|
|
||||||
|
topic_fqn_full = "kafka.user_events"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Topic, topic_fqn_full)
|
||||||
|
self.assertEqual(result, "kafka.user_events")
|
||||||
|
|
||||||
|
# SearchIndex (2 slots: service.index)
|
||||||
|
search_index_fqn = "product_index"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(SearchIndex, search_index_fqn)
|
||||||
|
self.assertEqual(result, "*.product_index")
|
||||||
|
|
||||||
|
# Tag (2 slots: classification.tag)
|
||||||
|
tag_fqn = "Sensitive"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Tag, tag_fqn)
|
||||||
|
self.assertEqual(result, "*.Sensitive")
|
||||||
|
|
||||||
|
tag_fqn_full = "PII.Sensitive"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Tag, tag_fqn_full)
|
||||||
|
self.assertEqual(result, "PII.Sensitive")
|
||||||
|
|
||||||
|
# DataModel (2 slots: service.model)
|
||||||
|
data_model_fqn = "customer_model"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(DataModel, data_model_fqn)
|
||||||
|
self.assertEqual(result, "*.customer_model")
|
||||||
|
|
||||||
|
# StoredProcedure (4 slots: service.database.schema.procedure)
|
||||||
|
stored_proc_fqn = "calculate_revenue"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(StoredProcedure, stored_proc_fqn)
|
||||||
|
self.assertEqual(result, "*.*.*.calculate_revenue")
|
||||||
|
|
||||||
|
stored_proc_fqn_partial = "public.calculate_revenue"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(
|
||||||
|
StoredProcedure, stored_proc_fqn_partial
|
||||||
|
)
|
||||||
|
self.assertEqual(result, "*.*.public.calculate_revenue")
|
||||||
|
|
||||||
|
stored_proc_fqn_full = "oracle.sales_db.public.calculate_revenue"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(
|
||||||
|
StoredProcedure, stored_proc_fqn_full
|
||||||
|
)
|
||||||
|
self.assertEqual(result, "oracle.sales_db.public.calculate_revenue")
|
||||||
|
|
||||||
|
# Pipeline (2 slots: service.pipeline)
|
||||||
|
pipeline_fqn = "daily_ingestion"
|
||||||
|
result = fqn.prefix_entity_for_wildcard_search(Pipeline, pipeline_fqn)
|
||||||
|
self.assertEqual(result, "*.daily_ingestion")
|
||||||
|
|
||||||
|
# Test error cases
|
||||||
|
# FQN with too many parts
|
||||||
|
with pytest.raises(fqn.FQNBuildingException) as exc:
|
||||||
|
fqn.prefix_entity_for_wildcard_search(
|
||||||
|
Table, "service.db.schema.table.extra"
|
||||||
|
)
|
||||||
|
assert "has too many parts" in str(exc.value)
|
||||||
|
|
||||||
|
# Test unsupported entity type (Column doesn't have slots defined)
|
||||||
|
with pytest.raises(fqn.FQNBuildingException) as exc:
|
||||||
|
fqn.prefix_entity_for_wildcard_search(Column, "column")
|
||||||
|
assert "not supported for wildcard search" in str(exc.value)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user