mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-12 17:26:43 +00:00
parent
cde3a7dd1e
commit
f4ff43c24c
@ -17,15 +17,27 @@ import traceback
|
|||||||
from abc import ABC
|
from abc import ABC
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import Callable, Iterable, Iterator, Union
|
from typing import Callable, Iterable, Iterator, List, Optional, Union
|
||||||
|
|
||||||
from metadata.generated.schema.api.data.createQuery import CreateQueryRequest
|
from metadata.generated.schema.api.data.createQuery import CreateQueryRequest
|
||||||
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||||
from metadata.generated.schema.type.basic import FullyQualifiedEntityName, SqlQuery
|
from metadata.generated.schema.entity.data.table import Table
|
||||||
|
from metadata.generated.schema.type.basic import (
|
||||||
|
FullyQualifiedEntityName,
|
||||||
|
SqlQuery,
|
||||||
|
Uuid,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.type.entityLineage import (
|
||||||
|
ColumnLineage,
|
||||||
|
EntitiesEdge,
|
||||||
|
LineageDetails,
|
||||||
|
Source,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.type.entityReference import EntityReference
|
||||||
from metadata.generated.schema.type.tableQuery import TableQuery
|
from metadata.generated.schema.type.tableQuery import TableQuery
|
||||||
from metadata.ingestion.api.models import Either
|
from metadata.ingestion.api.models import Either
|
||||||
from metadata.ingestion.lineage.models import ConnectionTypeDialectMapper, Dialect
|
from metadata.ingestion.lineage.models import ConnectionTypeDialectMapper, Dialect
|
||||||
from metadata.ingestion.lineage.sql_lineage import get_lineage_by_query
|
from metadata.ingestion.lineage.sql_lineage import get_column_fqn, get_lineage_by_query
|
||||||
from metadata.ingestion.models.ometa_lineage import OMetaLineageRequest
|
from metadata.ingestion.models.ometa_lineage import OMetaLineageRequest
|
||||||
from metadata.ingestion.source.database.query_parser_source import QueryParserSource
|
from metadata.ingestion.source.database.query_parser_source import QueryParserSource
|
||||||
from metadata.ingestion.source.models import TableView
|
from metadata.ingestion.source.models import TableView
|
||||||
@ -239,6 +251,62 @@ class LineageSource(QueryParserSource, ABC):
|
|||||||
f"Processing Procedure Lineage not supported for {str(self.service_connection.type.value)}"
|
f"Processing Procedure Lineage not supported for {str(self.service_connection.type.value)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_column_lineage(
|
||||||
|
self, from_table: Table, to_table: Table
|
||||||
|
) -> List[ColumnLineage]:
|
||||||
|
"""
|
||||||
|
Get the column lineage from the fields
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
column_lineage = []
|
||||||
|
for column in from_table.columns:
|
||||||
|
field = column.name.root
|
||||||
|
from_column = get_column_fqn(table_entity=from_table, column=field)
|
||||||
|
to_column = get_column_fqn(table_entity=to_table, column=field)
|
||||||
|
if from_column and to_column:
|
||||||
|
column_lineage.append(
|
||||||
|
ColumnLineage(fromColumns=[from_column], toColumn=to_column)
|
||||||
|
)
|
||||||
|
|
||||||
|
return column_lineage
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(f"Error to get column lineage: {exc}")
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
|
def get_add_cross_database_lineage_request(
|
||||||
|
self,
|
||||||
|
from_entity: Table,
|
||||||
|
to_entity: Table,
|
||||||
|
column_lineage: List[ColumnLineage] = None,
|
||||||
|
) -> Optional[Either[AddLineageRequest]]:
|
||||||
|
if from_entity and to_entity:
|
||||||
|
return Either(
|
||||||
|
right=AddLineageRequest(
|
||||||
|
edge=EntitiesEdge(
|
||||||
|
fromEntity=EntityReference(
|
||||||
|
id=Uuid(from_entity.id.root), type="table"
|
||||||
|
),
|
||||||
|
toEntity=EntityReference(
|
||||||
|
id=Uuid(to_entity.id.root), type="table"
|
||||||
|
),
|
||||||
|
lineageDetails=LineageDetails(
|
||||||
|
source=Source.CrossDatabaseLineage,
|
||||||
|
columnsLineage=column_lineage,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def yield_cross_database_lineage(self) -> Iterable[Either[AddLineageRequest]]:
|
||||||
|
"""
|
||||||
|
By default cross database lineage is not supported.
|
||||||
|
"""
|
||||||
|
logger.info(
|
||||||
|
f"Processing Cross Database Lineage not supported for {str(self.service_connection.type.value)}"
|
||||||
|
)
|
||||||
|
|
||||||
def _iter(
|
def _iter(
|
||||||
self, *_, **__
|
self, *_, **__
|
||||||
) -> Iterable[Either[Union[AddLineageRequest, CreateQueryRequest]]]:
|
) -> Iterable[Either[Union[AddLineageRequest, CreateQueryRequest]]]:
|
||||||
@ -257,3 +325,8 @@ class LineageSource(QueryParserSource, ABC):
|
|||||||
logger.warning(
|
logger.warning(
|
||||||
f"Lineage extraction is not supported for {str(self.service_connection.type.value)} connection"
|
f"Lineage extraction is not supported for {str(self.service_connection.type.value)} connection"
|
||||||
)
|
)
|
||||||
|
if (
|
||||||
|
self.source_config.processCrossDatabaseLineage
|
||||||
|
and self.source_config.crossDatabaseServiceNames
|
||||||
|
):
|
||||||
|
yield from self.yield_cross_database_lineage() or []
|
||||||
|
@ -11,7 +11,16 @@
|
|||||||
"""
|
"""
|
||||||
Trino lineage module
|
Trino lineage module
|
||||||
"""
|
"""
|
||||||
|
import traceback
|
||||||
|
from typing import Iterable, List
|
||||||
|
|
||||||
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||||
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
|
from metadata.generated.schema.entity.data.table import Table
|
||||||
|
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
|
||||||
|
StackTraceError,
|
||||||
|
)
|
||||||
|
from metadata.ingestion.api.models import Either
|
||||||
from metadata.ingestion.source.database.lineage_source import LineageSource
|
from metadata.ingestion.source.database.lineage_source import LineageSource
|
||||||
from metadata.ingestion.source.database.trino.queries import TRINO_SQL_STATEMENT
|
from metadata.ingestion.source.database.trino.queries import TRINO_SQL_STATEMENT
|
||||||
from metadata.ingestion.source.database.trino.query_parser import TrinoQueryParserSource
|
from metadata.ingestion.source.database.trino.query_parser import TrinoQueryParserSource
|
||||||
@ -32,3 +41,89 @@ class TrinoLineageSource(TrinoQueryParserSource, LineageSource):
|
|||||||
OR lower("query") LIKE '%%merge%%'
|
OR lower("query") LIKE '%%merge%%'
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def get_cross_database_fqn_from_service_names(self) -> List[str]:
|
||||||
|
database_service_names = self.source_config.crossDatabaseServiceNames
|
||||||
|
return [
|
||||||
|
database.fullyQualifiedName.root
|
||||||
|
for service in database_service_names
|
||||||
|
for database in self.metadata.list_all_entities(
|
||||||
|
entity=Database, params={"service": service}
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
def check_same_table(self, table1: Table, table2: Table) -> bool:
|
||||||
|
"""
|
||||||
|
Method to check whether the table1 and table2 are same
|
||||||
|
"""
|
||||||
|
return table1.name.root == table2.name.root and {
|
||||||
|
column.name.root for column in table1.columns
|
||||||
|
} == {column.name.root for column in table2.columns}
|
||||||
|
|
||||||
|
def get_cross_database_lineage(
|
||||||
|
self, from_table: Table, to_table: Table
|
||||||
|
) -> Either[AddLineageRequest]:
|
||||||
|
"""
|
||||||
|
Method to return cross database lineage request object
|
||||||
|
"""
|
||||||
|
column_lineage = None
|
||||||
|
if from_table and from_table.columns and to_table and to_table.columns:
|
||||||
|
column_lineage = self.get_column_lineage(
|
||||||
|
from_table=from_table, to_table=to_table
|
||||||
|
)
|
||||||
|
return self.get_add_cross_database_lineage_request(
|
||||||
|
from_entity=from_table, to_entity=to_table, column_lineage=column_lineage
|
||||||
|
)
|
||||||
|
|
||||||
|
def yield_cross_database_lineage(self) -> Iterable[Either[AddLineageRequest]]:
|
||||||
|
try:
|
||||||
|
all_cross_database_fqns = self.get_cross_database_fqn_from_service_names()
|
||||||
|
cross_database_table_fqn_mapping = {}
|
||||||
|
|
||||||
|
# Get all databases for the specified Trino service
|
||||||
|
trino_databases = self.metadata.list_all_entities(
|
||||||
|
entity=Database, params={"service": self.config.serviceName}
|
||||||
|
)
|
||||||
|
for trino_database in trino_databases:
|
||||||
|
trino_database_fqn = trino_database.fullyQualifiedName.root
|
||||||
|
|
||||||
|
# Get all tables for the specified Trino database schema
|
||||||
|
trino_tables = self.metadata.list_all_entities(
|
||||||
|
entity=Table, params={"database": trino_database_fqn}
|
||||||
|
)
|
||||||
|
# NOTE: Currently, tables in system-defined schemas will also be checked for lineage.
|
||||||
|
for trino_table in trino_tables:
|
||||||
|
trino_table_fqn = trino_table.fullyQualifiedName.root
|
||||||
|
for cross_database_fqn in all_cross_database_fqns:
|
||||||
|
# Construct the FQN for cross-database tables
|
||||||
|
cross_database_table_fqn = trino_table_fqn.replace(
|
||||||
|
trino_database_fqn, cross_database_fqn
|
||||||
|
)
|
||||||
|
# Cache cross-database table against its FQN to avoid repeated API calls
|
||||||
|
cross_database_table = cross_database_table_fqn_mapping[
|
||||||
|
cross_database_table_fqn
|
||||||
|
] = cross_database_table_fqn_mapping.get(
|
||||||
|
cross_database_table_fqn,
|
||||||
|
self.metadata.get_by_name(
|
||||||
|
Table, fqn=cross_database_table_fqn
|
||||||
|
),
|
||||||
|
)
|
||||||
|
# Create cross database lineage request if both tables are same
|
||||||
|
if cross_database_table and self.check_same_table(
|
||||||
|
trino_table, cross_database_table
|
||||||
|
):
|
||||||
|
yield self.get_cross_database_lineage(
|
||||||
|
cross_database_table, trino_table
|
||||||
|
)
|
||||||
|
break
|
||||||
|
except Exception as exc:
|
||||||
|
yield Either(
|
||||||
|
left=StackTraceError(
|
||||||
|
name=f"{self.config.serviceName} Cross Database Lineage",
|
||||||
|
error=(
|
||||||
|
"Error to yield cross database lineage details "
|
||||||
|
f"service name [{self.config.serviceName}]: {exc}"
|
||||||
|
),
|
||||||
|
stackTrace=traceback.format_exc(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
@ -29,7 +29,7 @@ slug: /main-concepts/metadata-standard/schemas/type/entitylineage
|
|||||||
- **Items**: Refer to *#/definitions/columnLineage*.
|
- **Items**: Refer to *#/definitions/columnLineage*.
|
||||||
- **`pipeline`**: Pipeline where the sqlQuery is periodically run. Refer to *../type/entityReference.json*.
|
- **`pipeline`**: Pipeline where the sqlQuery is periodically run. Refer to *../type/entityReference.json*.
|
||||||
- **`description`** *(string)*: description of lineage.
|
- **`description`** *(string)*: description of lineage.
|
||||||
- **`source`** *(string)*: Lineage type describes how a lineage was created. Must be one of: `['Manual', 'ViewLineage', 'QueryLineage', 'PipelineLineage', 'DashboardLineage', 'DbtLineage']`. Default: `Manual`.
|
- **`source`** *(string)*: Lineage type describes how a lineage was created. Must be one of: `['Manual', 'ViewLineage', 'QueryLineage', 'PipelineLineage', 'DashboardLineage', 'DbtLineage', 'CrossDatabaseLineage']`. Default: `Manual`.
|
||||||
- **`edge`** *(object)*: Edge in the lineage graph from one entity to another by entity IDs. Cannot contain additional properties.
|
- **`edge`** *(object)*: Edge in the lineage graph from one entity to another by entity IDs. Cannot contain additional properties.
|
||||||
- **`fromEntity`**: From entity that is upstream of lineage edge. Refer to *basic.json#/definitions/uuid*.
|
- **`fromEntity`**: From entity that is upstream of lineage edge. Refer to *basic.json#/definitions/uuid*.
|
||||||
- **`toEntity`**: To entity that is downstream of lineage edge. Refer to *basic.json#/definitions/uuid*.
|
- **`toEntity`**: To entity that is downstream of lineage edge. Refer to *basic.json#/definitions/uuid*.
|
||||||
|
@ -91,6 +91,20 @@
|
|||||||
"default": 1,
|
"default": 1,
|
||||||
"title": "Number of Threads",
|
"title": "Number of Threads",
|
||||||
"minimum": 1
|
"minimum": 1
|
||||||
|
},
|
||||||
|
"processCrossDatabaseLineage": {
|
||||||
|
"title": "Process Cross Database Lineage",
|
||||||
|
"description": "Set the 'Process Cross Database Lineage' toggle to control whether to process table lineage across different databases.",
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false
|
||||||
|
},
|
||||||
|
"crossDatabaseServiceNames": {
|
||||||
|
"title": "Cross Database Service Names",
|
||||||
|
"description": "Set 'Cross Database Service Names' to process lineage with the database.",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
|
@ -52,7 +52,7 @@
|
|||||||
"source": {
|
"source": {
|
||||||
"description": "Lineage type describes how a lineage was created.",
|
"description": "Lineage type describes how a lineage was created.",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["Manual", "ViewLineage", "QueryLineage", "PipelineLineage", "DashboardLineage", "DbtLineage", "SparkLineage", "OpenLineage", "ExternalTableLineage"],
|
"enum": ["Manual", "ViewLineage", "QueryLineage", "PipelineLineage", "DashboardLineage", "DbtLineage", "SparkLineage", "OpenLineage", "ExternalTableLineage", "CrossDatabaseLineage"],
|
||||||
"default": "Manual"
|
"default": "Manual"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -99,6 +99,7 @@ export const LINEAGE_SOURCE: { [key in Source]: string } = {
|
|||||||
[Source.SparkLineage]: 'Spark Lineage',
|
[Source.SparkLineage]: 'Spark Lineage',
|
||||||
[Source.ViewLineage]: 'View Lineage',
|
[Source.ViewLineage]: 'View Lineage',
|
||||||
[Source.OpenLineage]: 'OpenLineage',
|
[Source.OpenLineage]: 'OpenLineage',
|
||||||
|
[Source.CrossDatabaseLineage]: 'Cross Database Lineage',
|
||||||
[Source.ExternalTableLineage]: 'External Table Lineage',
|
[Source.ExternalTableLineage]: 'External Table Lineage',
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add lineage details between two entities
|
* Add lineage details between two entities
|
||||||
*/
|
*/
|
||||||
export interface AddLineage {
|
export interface AddLineage {
|
||||||
@ -150,6 +150,7 @@ export interface ColumnLineage {
|
|||||||
* Lineage type describes how a lineage was created.
|
* Lineage type describes how a lineage was created.
|
||||||
*/
|
*/
|
||||||
export enum Source {
|
export enum Source {
|
||||||
|
CrossDatabaseLineage = "CrossDatabaseLineage",
|
||||||
DashboardLineage = "DashboardLineage",
|
DashboardLineage = "DashboardLineage",
|
||||||
DbtLineage = "DbtLineage",
|
DbtLineage = "DbtLineage",
|
||||||
ExternalTableLineage = "ExternalTableLineage",
|
ExternalTableLineage = "ExternalTableLineage",
|
||||||
|
@ -148,6 +148,7 @@ export interface EntityReference {
|
|||||||
* Lineage type describes how a lineage was created.
|
* Lineage type describes how a lineage was created.
|
||||||
*/
|
*/
|
||||||
export enum Source {
|
export enum Source {
|
||||||
|
CrossDatabaseLineage = "CrossDatabaseLineage",
|
||||||
DashboardLineage = "DashboardLineage",
|
DashboardLineage = "DashboardLineage",
|
||||||
DbtLineage = "DbtLineage",
|
DbtLineage = "DbtLineage",
|
||||||
ExternalTableLineage = "ExternalTableLineage",
|
ExternalTableLineage = "ExternalTableLineage",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user