mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-27 01:45:32 +00:00
Moved view lineage to post process (#6585)
Co-authored-by: Onkar Ravgan <onkarravgan@Onkars-MacBook-Pro.local>
This commit is contained in:
parent
b37bca5471
commit
3d93aaad53
@ -90,9 +90,10 @@ class TopologyRunnerMixin(Generic[C]):
|
|||||||
|
|
||||||
if node.post_process:
|
if node.post_process:
|
||||||
logger.debug(f"Post processing node {node}")
|
logger.debug(f"Post processing node {node}")
|
||||||
node_post_process = getattr(self, node.post_process)
|
for process in node.post_process:
|
||||||
for entity_request in node_post_process():
|
node_post_process = getattr(self, process)
|
||||||
yield entity_request
|
for entity_request in node_post_process():
|
||||||
|
yield entity_request
|
||||||
|
|
||||||
def next_record(self) -> Iterable[Entity]:
|
def next_record(self) -> Iterable[Entity]:
|
||||||
"""
|
"""
|
||||||
|
@ -66,7 +66,7 @@ class TopologyNode(BaseModel):
|
|||||||
|
|
||||||
children: Optional[List[str]] = None # nodes to call execute next
|
children: Optional[List[str]] = None # nodes to call execute next
|
||||||
post_process: Optional[
|
post_process: Optional[
|
||||||
str
|
List[str]
|
||||||
] = None # Method to be run after the node has been fully processed
|
] = None # Method to be run after the node has been fully processed
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,6 +52,7 @@ from metadata.ingestion.source.database.database_service import (
|
|||||||
)
|
)
|
||||||
from metadata.ingestion.source.database.sql_column_handler import SqlColumnHandlerMixin
|
from metadata.ingestion.source.database.sql_column_handler import SqlColumnHandlerMixin
|
||||||
from metadata.ingestion.source.database.sqlalchemy_source import SqlAlchemySource
|
from metadata.ingestion.source.database.sqlalchemy_source import SqlAlchemySource
|
||||||
|
from metadata.utils import fqn
|
||||||
from metadata.utils.connections import get_connection, test_connection
|
from metadata.utils.connections import get_connection, test_connection
|
||||||
from metadata.utils.filters import filter_by_schema, filter_by_table
|
from metadata.utils.filters import filter_by_schema, filter_by_table
|
||||||
from metadata.utils.logger import ingestion_logger
|
from metadata.utils.logger import ingestion_logger
|
||||||
@ -90,6 +91,7 @@ class CommonDbSourceService(
|
|||||||
self.data_models = {}
|
self.data_models = {}
|
||||||
self.table_constraints = None
|
self.table_constraints = None
|
||||||
self.database_source_state = set()
|
self.database_source_state = set()
|
||||||
|
self.context.table_views = []
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def set_inspector(self, database_name: str) -> None:
|
def set_inspector(self, database_name: str) -> None:
|
||||||
@ -276,6 +278,13 @@ class CommonDbSourceService(
|
|||||||
inspector=self.inspector,
|
inspector=self.inspector,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
view_definition = self.get_view_definition(
|
||||||
|
table_type=table_type,
|
||||||
|
table_name=table_name,
|
||||||
|
schema_name=schema_name,
|
||||||
|
inspector=self.inspector,
|
||||||
|
)
|
||||||
|
|
||||||
table_request = CreateTableRequest(
|
table_request = CreateTableRequest(
|
||||||
name=table_name,
|
name=table_name,
|
||||||
tableType=table_type,
|
tableType=table_type,
|
||||||
@ -285,12 +294,7 @@ class CommonDbSourceService(
|
|||||||
inspector=self.inspector,
|
inspector=self.inspector,
|
||||||
),
|
),
|
||||||
columns=columns,
|
columns=columns,
|
||||||
viewDefinition=self.get_view_definition(
|
viewDefinition=view_definition,
|
||||||
table_type=table_type,
|
|
||||||
table_name=table_name,
|
|
||||||
schema_name=schema_name,
|
|
||||||
inspector=self.inspector,
|
|
||||||
),
|
|
||||||
tableConstraints=table_constraints if table_constraints else None,
|
tableConstraints=table_constraints if table_constraints else None,
|
||||||
databaseSchema=EntityReference(
|
databaseSchema=EntityReference(
|
||||||
id=self.context.database_schema.id,
|
id=self.context.database_schema.id,
|
||||||
@ -301,6 +305,15 @@ class CommonDbSourceService(
|
|||||||
), # Pick tags from context info, if any
|
), # Pick tags from context info, if any
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if table_type == TableType.View or view_definition:
|
||||||
|
table_view = {
|
||||||
|
"table_name": table_name,
|
||||||
|
"table_type": table_type,
|
||||||
|
"schema_name": schema_name,
|
||||||
|
"db_name": db_name,
|
||||||
|
}
|
||||||
|
self.context.table_views.append(table_view)
|
||||||
|
|
||||||
yield table_request
|
yield table_request
|
||||||
|
|
||||||
self.register_record(table_request=table_request)
|
self.register_record(table_request=table_request)
|
||||||
@ -312,54 +325,64 @@ class CommonDbSourceService(
|
|||||||
"{}.{}".format(self.config.serviceName, table_name)
|
"{}.{}".format(self.config.serviceName, table_name)
|
||||||
)
|
)
|
||||||
|
|
||||||
def yield_view_lineage(
|
def yield_view_lineage(self) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
self, table_name_and_type: Tuple[str, str]
|
logger.info(f"Processing Lineage for Views")
|
||||||
) -> Optional[Iterable[AddLineageRequest]]:
|
for view in self.context.table_views:
|
||||||
table_name, table_type = table_name_and_type
|
table_name = view.get("table_name")
|
||||||
table_entity: Table = self.context.table
|
table_type = view.get("table_type")
|
||||||
schema_name = self.context.database_schema.name.__root__
|
schema_name = view.get("schema_name")
|
||||||
db_name = self.context.database.name.__root__
|
db_name = view.get("db_name")
|
||||||
view_definition = self.get_view_definition(
|
table_fqn = fqn.build(
|
||||||
table_type=table_type,
|
self.metadata,
|
||||||
table_name=table_name,
|
entity_type=Table,
|
||||||
schema_name=schema_name,
|
service_name=self.context.database_service.name.__root__,
|
||||||
inspector=self.inspector,
|
database_name=db_name,
|
||||||
)
|
schema_name=schema_name,
|
||||||
if table_type != TableType.View or not view_definition:
|
table_name=table_name,
|
||||||
return
|
)
|
||||||
# Prevent sqllineage from modifying the logger config
|
table_entity = self.metadata.get_by_name(
|
||||||
# Disable the DictConfigurator.configure method while importing LineageRunner
|
entity=Table,
|
||||||
configure = DictConfigurator.configure
|
fqn=table_fqn,
|
||||||
DictConfigurator.configure = lambda _: None
|
)
|
||||||
from sqllineage.runner import LineageRunner
|
view_definition = self.get_view_definition(
|
||||||
|
table_type=table_type,
|
||||||
|
table_name=table_name,
|
||||||
|
schema_name=schema_name,
|
||||||
|
inspector=self.inspector,
|
||||||
|
)
|
||||||
|
# Prevent sqllineage from modifying the logger config
|
||||||
|
# Disable the DictConfigurator.configure method while importing LineageRunner
|
||||||
|
configure = DictConfigurator.configure
|
||||||
|
DictConfigurator.configure = lambda _: None
|
||||||
|
from sqllineage.runner import LineageRunner
|
||||||
|
|
||||||
# Reverting changes after import is done
|
# Reverting changes after import is done
|
||||||
DictConfigurator.configure = configure
|
DictConfigurator.configure = configure
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = LineageRunner(view_definition)
|
result = LineageRunner(view_definition)
|
||||||
if result.source_tables and result.target_tables:
|
if result.source_tables and result.target_tables:
|
||||||
yield from get_lineage_by_query(
|
yield from get_lineage_by_query(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
query=view_definition,
|
query=view_definition,
|
||||||
service_name=self.context.database_service.name.__root__,
|
service_name=self.context.database_service.name.__root__,
|
||||||
database_name=db_name,
|
database_name=db_name,
|
||||||
schema_name=schema_name,
|
schema_name=schema_name,
|
||||||
) or []
|
) or []
|
||||||
|
|
||||||
else:
|
else:
|
||||||
yield from get_lineage_via_table_entity(
|
yield from get_lineage_via_table_entity(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
table_entity=table_entity,
|
table_entity=table_entity,
|
||||||
service_name=self.context.database_service.name.__root__,
|
service_name=self.context.database_service.name.__root__,
|
||||||
database_name=db_name,
|
database_name=db_name,
|
||||||
schema_name=schema_name,
|
schema_name=schema_name,
|
||||||
query=view_definition,
|
query=view_definition,
|
||||||
) or []
|
) or []
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
logger.debug(f"Query : {view_definition}")
|
logger.debug(f"Query : {view_definition}")
|
||||||
logger.warning("Could not parse query: Ingesting lineage failed")
|
logger.warning("Could not parse query: Ingesting lineage failed")
|
||||||
|
|
||||||
def test_connection(self) -> None:
|
def test_connection(self) -> None:
|
||||||
"""
|
"""
|
||||||
|
@ -120,7 +120,7 @@ class DatabaseServiceTopology(ServiceTopology):
|
|||||||
),
|
),
|
||||||
],
|
],
|
||||||
children=["database"],
|
children=["database"],
|
||||||
post_process="create_dbt_lineage",
|
post_process=["create_dbt_lineage", "yield_view_lineage"],
|
||||||
)
|
)
|
||||||
database = TopologyNode(
|
database = TopologyNode(
|
||||||
producer="get_database_names",
|
producer="get_database_names",
|
||||||
@ -133,7 +133,7 @@ class DatabaseServiceTopology(ServiceTopology):
|
|||||||
)
|
)
|
||||||
],
|
],
|
||||||
children=["databaseSchema"],
|
children=["databaseSchema"],
|
||||||
post_process="mark_tables_as_deleted",
|
post_process=["mark_tables_as_deleted"],
|
||||||
)
|
)
|
||||||
databaseSchema = TopologyNode(
|
databaseSchema = TopologyNode(
|
||||||
producer="get_database_schema_names",
|
producer="get_database_schema_names",
|
||||||
@ -171,13 +171,6 @@ class DatabaseServiceTopology(ServiceTopology):
|
|||||||
consumer=["storage_service"],
|
consumer=["storage_service"],
|
||||||
nullable=True,
|
nullable=True,
|
||||||
),
|
),
|
||||||
NodeStage(
|
|
||||||
type_=AddLineageRequest,
|
|
||||||
context="view_lineage",
|
|
||||||
processor="yield_view_lineage",
|
|
||||||
ack_sink=False,
|
|
||||||
nullable=True,
|
|
||||||
),
|
|
||||||
NodeStage(
|
NodeStage(
|
||||||
type_=DataModelLink,
|
type_=DataModelLink,
|
||||||
processor="yield_datamodel",
|
processor="yield_datamodel",
|
||||||
@ -317,9 +310,7 @@ class DatabaseServiceSource(DBTMixin, TopologyRunnerMixin, Source, ABC):
|
|||||||
yield from self.yield_tag(schema_name) or []
|
yield from self.yield_tag(schema_name) or []
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def yield_view_lineage(
|
def yield_view_lineage(self) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
self, table_name_and_type: Tuple[str, TableType]
|
|
||||||
) -> Optional[Iterable[AddLineageRequest]]:
|
|
||||||
"""
|
"""
|
||||||
From topology.
|
From topology.
|
||||||
Parses view definition to get lineage information
|
Parses view definition to get lineage information
|
||||||
|
@ -322,10 +322,8 @@ class DatalakeSource(DatabaseServiceSource):
|
|||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
logger.error(err)
|
logger.error(err)
|
||||||
|
|
||||||
def yield_view_lineage(
|
def yield_view_lineage(self) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
self, table_name_and_type: Tuple[str, str]
|
yield from []
|
||||||
) -> Optional[Iterable[AddLineageRequest]]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
||||||
pass
|
pass
|
||||||
|
@ -351,10 +351,8 @@ class DeltalakeSource(DatabaseServiceSource):
|
|||||||
|
|
||||||
return parsed_columns
|
return parsed_columns
|
||||||
|
|
||||||
def yield_view_lineage(
|
def yield_view_lineage(self) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
self, table_name_and_type: Tuple[str, str]
|
yield from []
|
||||||
) -> Optional[Iterable[AddLineageRequest]]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
||||||
pass
|
pass
|
||||||
|
@ -189,10 +189,8 @@ class DynamodbSource(DatabaseServiceSource):
|
|||||||
"{}.{}".format(self.config.serviceName, table_name)
|
"{}.{}".format(self.config.serviceName, table_name)
|
||||||
)
|
)
|
||||||
|
|
||||||
def yield_view_lineage(
|
def yield_view_lineage(self) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
self, table_name_and_type: Tuple[str, str]
|
yield from []
|
||||||
) -> Optional[Iterable[AddLineageRequest]]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
||||||
pass
|
pass
|
||||||
|
@ -341,10 +341,8 @@ class GlueSource(DatabaseServiceSource):
|
|||||||
def standardize_table_name(self, schema: str, table: str) -> str:
|
def standardize_table_name(self, schema: str, table: str) -> str:
|
||||||
return table[:128]
|
return table[:128]
|
||||||
|
|
||||||
def yield_view_lineage(
|
def yield_view_lineage(self) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
self, table_name_and_type: Tuple[str, str]
|
yield from []
|
||||||
) -> Optional[Iterable[AddLineageRequest]]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
||||||
pass
|
pass
|
||||||
|
@ -219,10 +219,8 @@ class SalesforceSource(DatabaseServiceSource):
|
|||||||
return "INT"
|
return "INT"
|
||||||
return "VARCHAR"
|
return "VARCHAR"
|
||||||
|
|
||||||
def yield_view_lineage(
|
def yield_view_lineage(self) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
self, table_name_and_type: Tuple[str, str]
|
yield from []
|
||||||
) -> Optional[Iterable[AddLineageRequest]]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
def yield_tag(self, schema_name: str) -> Iterable[OMetaTagAndCategory]:
|
||||||
pass
|
pass
|
||||||
|
Loading…
x
Reference in New Issue
Block a user