mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-04 23:28:16 +00:00
Added elastic search in usage (#4226)
* Added elastic search in usage * optimized conditions
This commit is contained in:
parent
2860258b27
commit
cc6683beed
@ -12,6 +12,7 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from typing import List
|
||||||
|
|
||||||
from metadata.config.common import ConfigModel
|
from metadata.config.common import ConfigModel
|
||||||
from metadata.generated.schema.entity.data.database import Database
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
@ -79,56 +80,69 @@ class MetadataUsageBulkSink(BulkSink):
|
|||||||
table_usage_map = {}
|
table_usage_map = {}
|
||||||
for record in usage_records:
|
for record in usage_records:
|
||||||
table_usage = TableUsageCount(**json.loads(record))
|
table_usage = TableUsageCount(**json.loads(record))
|
||||||
|
table_entities = []
|
||||||
if "." in table_usage.table:
|
if "." in table_usage.table:
|
||||||
(
|
(
|
||||||
table_usage.database_schema,
|
table_usage.database_schema,
|
||||||
table_usage.table,
|
table_usage.table,
|
||||||
) = table_usage.table.split(".")[-2:]
|
) = table_usage.table.split(".")[-2:]
|
||||||
self.service_name = table_usage.service_name
|
table_entities = self.__get_table_entity(
|
||||||
table_entity = self.__get_table_entity(
|
table_usage.database, table_usage.database_schema, table_usage.table
|
||||||
table_usage.database, table_usage.database_schema, table_usage.table
|
|
||||||
)
|
|
||||||
if table_entity is not None:
|
|
||||||
if not table_usage_map.get(table_entity.id.__root__):
|
|
||||||
table_usage_map[table_entity.id.__root__] = {
|
|
||||||
"table_entity": table_entity,
|
|
||||||
"usage_count": table_usage.count,
|
|
||||||
"sql_queries": table_usage.sql_queries,
|
|
||||||
"usage_date": table_usage.date,
|
|
||||||
"database": table_usage.database,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
table_usage_map[table_entity.id.__root__][
|
|
||||||
"usage_count"
|
|
||||||
] += table_usage.count
|
|
||||||
table_usage_map[table_entity.id.__root__]["sql_queries"].extend(
|
|
||||||
table_usage.sql_queries
|
|
||||||
)
|
|
||||||
table_join_request = self.__get_table_joins(table_usage)
|
|
||||||
logger.debug("table join request {}".format(table_join_request))
|
|
||||||
try:
|
|
||||||
if (
|
|
||||||
table_join_request is not None
|
|
||||||
and len(table_join_request.columnJoins) > 0
|
|
||||||
):
|
|
||||||
self.metadata.publish_frequently_joined_with(
|
|
||||||
table_entity, table_join_request
|
|
||||||
)
|
|
||||||
except APIError as err:
|
|
||||||
self.status.failures.append(table_join_request)
|
|
||||||
logger.error(
|
|
||||||
"Failed to update query join for {}, {}".format(
|
|
||||||
table_usage.table, err
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"Table does not exist, skipping usage publish {}, {}".format(
|
|
||||||
table_usage.table, table_usage.database
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
self.status.warnings.append(f"Table: {table_usage.table}")
|
else:
|
||||||
|
es_result = self.metadata.search_entities_using_es(
|
||||||
|
service_name=self.service_name,
|
||||||
|
table_obj={
|
||||||
|
"database": table_usage.database,
|
||||||
|
"database_schema": None,
|
||||||
|
"name": table_usage.table,
|
||||||
|
},
|
||||||
|
search_index="table_search_index",
|
||||||
|
)
|
||||||
|
table_entities = es_result
|
||||||
|
self.service_name = table_usage.service_name
|
||||||
|
for table_entity in table_entities:
|
||||||
|
if table_entity is not None:
|
||||||
|
if not table_usage_map.get(table_entity.id.__root__):
|
||||||
|
table_usage_map[table_entity.id.__root__] = {
|
||||||
|
"table_entity": table_entity,
|
||||||
|
"usage_count": table_usage.count,
|
||||||
|
"sql_queries": table_usage.sql_queries,
|
||||||
|
"usage_date": table_usage.date,
|
||||||
|
"database": table_usage.database,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
table_usage_map[table_entity.id.__root__][
|
||||||
|
"usage_count"
|
||||||
|
] += table_usage.count
|
||||||
|
table_usage_map[table_entity.id.__root__]["sql_queries"].extend(
|
||||||
|
table_usage.sql_queries
|
||||||
|
)
|
||||||
|
table_join_request = self.__get_table_joins(table_usage)
|
||||||
|
logger.debug("table join request {}".format(table_join_request))
|
||||||
|
try:
|
||||||
|
if (
|
||||||
|
table_join_request is not None
|
||||||
|
and len(table_join_request.columnJoins) > 0
|
||||||
|
):
|
||||||
|
self.metadata.publish_frequently_joined_with(
|
||||||
|
table_entity, table_join_request
|
||||||
|
)
|
||||||
|
except APIError as err:
|
||||||
|
self.status.failures.append(table_join_request)
|
||||||
|
logger.error(
|
||||||
|
"Failed to update query join for {}, {}".format(
|
||||||
|
table_usage.table, err
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Table does not exist, skipping usage publish {}, {}".format(
|
||||||
|
table_usage.table, table_usage.database
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.status.warnings.append(f"Table: {table_usage.table}")
|
||||||
|
|
||||||
for table_id, value_dict in table_usage_map.items():
|
for table_id, value_dict in table_usage_map.items():
|
||||||
self.metadata.ingest_table_queries_data(
|
self.metadata.ingest_table_queries_data(
|
||||||
@ -212,24 +226,36 @@ class MetadataUsageBulkSink(BulkSink):
|
|||||||
def __get_column_fqdn(
|
def __get_column_fqdn(
|
||||||
self, database: str, database_schema: str, table_column: TableColumn
|
self, database: str, database_schema: str, table_column: TableColumn
|
||||||
):
|
):
|
||||||
table_entity = self.__get_table_entity(
|
table_entities = self.__get_table_entity(
|
||||||
database, database_schema, table_column.table
|
database, database_schema, table_column.table
|
||||||
)
|
)
|
||||||
if table_entity is None:
|
if table_entities is None or table_entities == []:
|
||||||
return None
|
return None
|
||||||
for tbl_column in table_entity.columns:
|
for table_entity in table_entities:
|
||||||
if table_column.column.lower() == tbl_column.name.__root__.lower():
|
for tbl_column in table_entity.columns:
|
||||||
return tbl_column.fullyQualifiedName.__root__.__root__
|
if table_column.column.lower() == tbl_column.name.__root__.lower():
|
||||||
|
return tbl_column.fullyQualifiedName.__root__.__root__
|
||||||
|
|
||||||
def __get_table_entity(
|
def __get_table_entity(
|
||||||
self, database_name: str, database_schema: str, table_name: str
|
self, database_name: str, database_schema: str, table_name: str
|
||||||
) -> Table:
|
) -> List[Table]:
|
||||||
table_fqn = get_fqdn(
|
table_fqn = get_fqdn(
|
||||||
Table, self.service_name, database_name, database_schema, table_name
|
Table, self.service_name, database_name, database_schema, table_name
|
||||||
)
|
)
|
||||||
table_fqn = _get_formmated_table_name(table_fqn)
|
table_fqn = _get_formmated_table_name(table_fqn)
|
||||||
table_entity = self.metadata.get_by_name(Table, fqdn=table_fqn)
|
table_entity = self.metadata.get_by_name(Table, fqdn=table_fqn)
|
||||||
return table_entity
|
if table_entity:
|
||||||
|
return [table_entity]
|
||||||
|
es_result = self.metadata.search_entities_using_es(
|
||||||
|
service_name=self.service_name,
|
||||||
|
table_obj={
|
||||||
|
"database": database_name,
|
||||||
|
"database_schema": database_schema,
|
||||||
|
"name": table_name,
|
||||||
|
},
|
||||||
|
search_index="table_search_index",
|
||||||
|
)
|
||||||
|
return es_result
|
||||||
|
|
||||||
def get_status(self):
|
def get_status(self):
|
||||||
return self.status
|
return self.status
|
||||||
|
@ -144,6 +144,8 @@ class OMetaLineageMixin(Generic[T]):
|
|||||||
|
|
||||||
def _separate_fqn(self, database, fqn):
|
def _separate_fqn(self, database, fqn):
|
||||||
database_schema, table = fqn.split(".")[-2:]
|
database_schema, table = fqn.split(".")[-2:]
|
||||||
|
if not database_schema:
|
||||||
|
database_schema = None
|
||||||
return {"database": database, "database_schema": database_schema, "name": table}
|
return {"database": database, "database_schema": database_schema, "name": table}
|
||||||
|
|
||||||
def _create_lineage_by_table_name(
|
def _create_lineage_by_table_name(
|
||||||
@ -153,6 +155,8 @@ class OMetaLineageMixin(Generic[T]):
|
|||||||
This method is to create a lineage between two tables
|
This method is to create a lineage between two tables
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
from_table = str(from_table).replace("<default>", "")
|
||||||
|
to_table = str(to_table).replace("<default>", "")
|
||||||
from_fqdn = get_fqdn(
|
from_fqdn = get_fqdn(
|
||||||
AddLineageRequest,
|
AddLineageRequest,
|
||||||
service_name,
|
service_name,
|
||||||
@ -185,7 +189,7 @@ class OMetaLineageMixin(Generic[T]):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
multiple_to_fqns = [to_entity]
|
multiple_to_fqns = [to_entity]
|
||||||
if not from_entity or not to_entity:
|
if not multiple_to_fqns or not multiple_from_fqns:
|
||||||
return None
|
return None
|
||||||
for from_entity in multiple_from_fqns:
|
for from_entity in multiple_from_fqns:
|
||||||
for to_entity in multiple_to_fqns:
|
for to_entity in multiple_to_fqns:
|
||||||
@ -201,6 +205,7 @@ class OMetaLineageMixin(Generic[T]):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
created_lineage = self.add_lineage(lineage)
|
created_lineage = self.add_lineage(lineage)
|
||||||
logger.info(f"Successfully added Lineage {created_lineage}")
|
logger.info(f"Successfully added Lineage {created_lineage}")
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user