mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-13 09:48:19 +00:00
Metadata to ES Pipeline Fix (#4150)
This commit is contained in:
parent
0c27f16582
commit
eec9cb05d1
@ -0,0 +1,67 @@
|
|||||||
|
{
|
||||||
|
"$id": "https://open-metadata.org/schema/entity/services/connections/metadata/metadataESConnection.json",
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"title": "MetadataESConnection",
|
||||||
|
"description": "Metadata to ElasticSeach Connection Config",
|
||||||
|
"type": "object",
|
||||||
|
"javaType": "org.openmetadata.catalog.services.connections.metadata.MetadataESConnection",
|
||||||
|
"definitions": {
|
||||||
|
"metadataESType": {
|
||||||
|
"description": "Metadata to Elastic Seach type",
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["MetadataES"],
|
||||||
|
"default": "MetadataES"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"description": "Service Type",
|
||||||
|
"$ref": "#/definitions/metadataESType",
|
||||||
|
"default": "MetadataES"
|
||||||
|
},
|
||||||
|
"includeTopics": {
|
||||||
|
"description": "Include Topics for Indexing",
|
||||||
|
"type": "boolean",
|
||||||
|
"default": "true"
|
||||||
|
},
|
||||||
|
"includeTables": {
|
||||||
|
"description": "Include Tables for Indexing",
|
||||||
|
"type": "boolean",
|
||||||
|
"default": "true"
|
||||||
|
},
|
||||||
|
"includeDashboards": {
|
||||||
|
"description": "Include Dashboards for Indexing",
|
||||||
|
"type": "boolean",
|
||||||
|
"default": "true"
|
||||||
|
},
|
||||||
|
"includePipelines": {
|
||||||
|
"description": "Include Pipelines for Indexing",
|
||||||
|
"type": "boolean",
|
||||||
|
"default": "true"
|
||||||
|
},
|
||||||
|
"includeUsers": {
|
||||||
|
"description": "Include Users for Indexing",
|
||||||
|
"type": "boolean",
|
||||||
|
"default": "true"
|
||||||
|
},
|
||||||
|
"includeTeams": {
|
||||||
|
"description": "Include Teams for Indexing",
|
||||||
|
"type": "boolean",
|
||||||
|
"default": "true"
|
||||||
|
},
|
||||||
|
"includeGlossaryTerms": {
|
||||||
|
"description": "Include Glossary Terms for Indexing",
|
||||||
|
"type": "boolean",
|
||||||
|
"default": "true"
|
||||||
|
},
|
||||||
|
"limitRecords": {
|
||||||
|
"description": "Limit the number of records for Indexing.",
|
||||||
|
"type": "integer",
|
||||||
|
"default": "1000"
|
||||||
|
},
|
||||||
|
"supportsMetadataExtraction": {
|
||||||
|
"$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
@ -8,10 +8,13 @@
|
|||||||
"metadataServiceType": {
|
"metadataServiceType": {
|
||||||
"description": "Type of database service such as Amundsen, Atlas...",
|
"description": "Type of database service such as Amundsen, Atlas...",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["Amundsen"],
|
"enum": ["Amundsen", "MetadataES"],
|
||||||
"javaEnums": [
|
"javaEnums": [
|
||||||
{
|
{
|
||||||
"name": "Amundsen"
|
"name": "Amundsen"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MetadataES"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -23,6 +26,9 @@
|
|||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
"$ref": "./connections/metadata/amundsenConnection.json"
|
"$ref": "./connections/metadata/amundsenConnection.json"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "./connections/metadata/metadataESConnection.json"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -2,12 +2,17 @@
|
|||||||
"source": {
|
"source": {
|
||||||
"type": "metadata",
|
"type": "metadata",
|
||||||
"serviceName": "openMetadata",
|
"serviceName": "openMetadata",
|
||||||
"config": {
|
"serviceConnection": {
|
||||||
"include_tables": "true",
|
"config":{
|
||||||
"include_topics": "true",
|
"type":"MetadataES",
|
||||||
"include_dashboards": "true",
|
"includeTables": "true",
|
||||||
"limit_records": 10
|
"includeUsers": "true",
|
||||||
}
|
"includeTopics": "true",
|
||||||
|
"includeDashboards": "true",
|
||||||
|
"limitRecords": 10
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sourceConfig":{"config":{}}
|
||||||
},
|
},
|
||||||
"sink": {
|
"sink": {
|
||||||
"type": "elasticsearch",
|
"type": "elasticsearch",
|
||||||
|
@ -12,39 +12,31 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Iterable, List, Optional
|
from typing import Iterable, List
|
||||||
|
|
||||||
from metadata.config.common import ConfigModel
|
|
||||||
from metadata.generated.schema.entity.data.dashboard import Dashboard
|
from metadata.generated.schema.entity.data.dashboard import Dashboard
|
||||||
from metadata.generated.schema.entity.data.glossaryTerm import GlossaryTerm
|
from metadata.generated.schema.entity.data.glossaryTerm import GlossaryTerm
|
||||||
from metadata.generated.schema.entity.data.pipeline import Pipeline
|
from metadata.generated.schema.entity.data.pipeline import Pipeline
|
||||||
from metadata.generated.schema.entity.data.table import Table
|
from metadata.generated.schema.entity.data.table import Table
|
||||||
from metadata.generated.schema.entity.data.topic import Topic
|
from metadata.generated.schema.entity.data.topic import Topic
|
||||||
|
from metadata.generated.schema.entity.services.connections.metadata.metadataESConnection import (
|
||||||
|
MetadataESConnection,
|
||||||
|
)
|
||||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||||
OpenMetadataConnection,
|
OpenMetadataConnection,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.entity.teams.team import Team
|
from metadata.generated.schema.entity.teams.team import Team
|
||||||
from metadata.generated.schema.entity.teams.user import User
|
from metadata.generated.schema.entity.teams.user import User
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
Source as WorkflowSource,
|
||||||
|
)
|
||||||
from metadata.ingestion.api.common import Entity
|
from metadata.ingestion.api.common import Entity
|
||||||
from metadata.ingestion.api.source import Source, SourceStatus
|
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class MetadataTablesRestSourceConfig(ConfigModel):
|
|
||||||
"""Metadata Table Rest pydantic config model"""
|
|
||||||
|
|
||||||
include_tables: Optional[bool] = True
|
|
||||||
include_topics: Optional[bool] = True
|
|
||||||
include_dashboards: Optional[bool] = True
|
|
||||||
include_pipelines: Optional[bool] = True
|
|
||||||
include_users: Optional[bool] = True
|
|
||||||
include_teams: Optional[bool] = True
|
|
||||||
include_glossary_terms: Optional[bool] = True
|
|
||||||
limit_records: int = 1000
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class MetadataSourceStatus(SourceStatus):
|
class MetadataSourceStatus(SourceStatus):
|
||||||
"""Metadata Source class -- extends SourceStatus class
|
"""Metadata Source class -- extends SourceStatus class
|
||||||
@ -145,16 +137,17 @@ class MetadataSource(Source[Entity]):
|
|||||||
topics:
|
topics:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
config: MetadataTablesRestSourceConfig
|
config: WorkflowSource
|
||||||
report: SourceStatus
|
report: SourceStatus
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config: MetadataTablesRestSourceConfig,
|
config: WorkflowSource,
|
||||||
metadata_config: OpenMetadataConnection,
|
metadata_config: OpenMetadataConnection,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.config = config
|
self.config = config
|
||||||
|
self.service_connection = config.serviceConnection.__root__.config
|
||||||
self.metadata_config = metadata_config
|
self.metadata_config = metadata_config
|
||||||
self.status = MetadataSourceStatus()
|
self.status = MetadataSourceStatus()
|
||||||
self.wrote_something = False
|
self.wrote_something = False
|
||||||
@ -166,8 +159,13 @@ class MetadataSource(Source[Entity]):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, config_dict: dict, metadata_config: OpenMetadataConnection):
|
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
||||||
config = MetadataTablesRestSourceConfig.parse_obj(config_dict)
|
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
||||||
|
connection: MetadataESConnection = config.serviceConnection.__root__.config
|
||||||
|
if not isinstance(connection, MetadataESConnection):
|
||||||
|
raise InvalidSourceException(
|
||||||
|
f"Expected HiveSQLConnection, but got {connection}"
|
||||||
|
)
|
||||||
return cls(config, metadata_config)
|
return cls(config, metadata_config)
|
||||||
|
|
||||||
def next_record(self) -> Iterable[Entity]:
|
def next_record(self) -> Iterable[Entity]:
|
||||||
@ -185,7 +183,7 @@ class MetadataSource(Source[Entity]):
|
|||||||
Returns:
|
Returns:
|
||||||
Table
|
Table
|
||||||
"""
|
"""
|
||||||
if self.config.include_tables:
|
if self.service_connection.includeTables:
|
||||||
after = None
|
after = None
|
||||||
while True:
|
while True:
|
||||||
table_entities = self.metadata.list_entities(
|
table_entities = self.metadata.list_entities(
|
||||||
@ -199,7 +197,7 @@ class MetadataSource(Source[Entity]):
|
|||||||
"followers",
|
"followers",
|
||||||
],
|
],
|
||||||
after=after,
|
after=after,
|
||||||
limit=self.config.limit_records,
|
limit=self.service_connection.limitRecords,
|
||||||
)
|
)
|
||||||
for table in table_entities.entities:
|
for table in table_entities.entities:
|
||||||
self.status.scanned_table(table.name.__root__)
|
self.status.scanned_table(table.name.__root__)
|
||||||
@ -214,14 +212,14 @@ class MetadataSource(Source[Entity]):
|
|||||||
Returns:
|
Returns:
|
||||||
Topic
|
Topic
|
||||||
"""
|
"""
|
||||||
if self.config.include_topics:
|
if self.service_connection.includeTopics:
|
||||||
after = None
|
after = None
|
||||||
while True:
|
while True:
|
||||||
topic_entities = self.metadata.list_entities(
|
topic_entities = self.metadata.list_entities(
|
||||||
entity=Topic,
|
entity=Topic,
|
||||||
fields=["owner", "tags", "followers"],
|
fields=["owner", "tags", "followers"],
|
||||||
after=after,
|
after=after,
|
||||||
limit=self.config.limit_records,
|
limit=self.service_connection.limitRecords,
|
||||||
)
|
)
|
||||||
for topic in topic_entities.entities:
|
for topic in topic_entities.entities:
|
||||||
self.status.scanned_topic(topic.name.__root__)
|
self.status.scanned_topic(topic.name.__root__)
|
||||||
@ -236,7 +234,7 @@ class MetadataSource(Source[Entity]):
|
|||||||
Returns:
|
Returns:
|
||||||
Dashboard:
|
Dashboard:
|
||||||
"""
|
"""
|
||||||
if self.config.include_dashboards:
|
if self.service_connection.includeDashboards:
|
||||||
after = None
|
after = None
|
||||||
while True:
|
while True:
|
||||||
dashboard_entities = self.metadata.list_entities(
|
dashboard_entities = self.metadata.list_entities(
|
||||||
@ -249,7 +247,7 @@ class MetadataSource(Source[Entity]):
|
|||||||
"usageSummary",
|
"usageSummary",
|
||||||
],
|
],
|
||||||
after=after,
|
after=after,
|
||||||
limit=self.config.limit_records,
|
limit=self.service_connection.limitRecords,
|
||||||
)
|
)
|
||||||
for dashboard in dashboard_entities.entities:
|
for dashboard in dashboard_entities.entities:
|
||||||
self.status.scanned_dashboard(dashboard.name)
|
self.status.scanned_dashboard(dashboard.name)
|
||||||
@ -264,14 +262,14 @@ class MetadataSource(Source[Entity]):
|
|||||||
Returns:
|
Returns:
|
||||||
Pipeline:
|
Pipeline:
|
||||||
"""
|
"""
|
||||||
if self.config.include_pipelines:
|
if self.service_connection.includePipelines:
|
||||||
after = None
|
after = None
|
||||||
while True:
|
while True:
|
||||||
pipeline_entities = self.metadata.list_entities(
|
pipeline_entities = self.metadata.list_entities(
|
||||||
entity=Pipeline,
|
entity=Pipeline,
|
||||||
fields=["owner", "tags", "followers", "tasks"],
|
fields=["owner", "tags", "followers", "tasks"],
|
||||||
after=after,
|
after=after,
|
||||||
limit=self.config.limit_records,
|
limit=self.service_connection.limitRecords,
|
||||||
)
|
)
|
||||||
for pipeline in pipeline_entities.entities:
|
for pipeline in pipeline_entities.entities:
|
||||||
self.status.scanned_dashboard(pipeline.name)
|
self.status.scanned_dashboard(pipeline.name)
|
||||||
@ -286,14 +284,14 @@ class MetadataSource(Source[Entity]):
|
|||||||
Returns:
|
Returns:
|
||||||
User:
|
User:
|
||||||
"""
|
"""
|
||||||
if self.config.include_users:
|
if self.service_connection.includeUsers:
|
||||||
after = None
|
after = None
|
||||||
while True:
|
while True:
|
||||||
user_entities = self.metadata.list_entities(
|
user_entities = self.metadata.list_entities(
|
||||||
entity=User,
|
entity=User,
|
||||||
fields=["teams", "roles"],
|
fields=["teams", "roles"],
|
||||||
after=after,
|
after=after,
|
||||||
limit=self.config.limit_records,
|
limit=self.service_connection.limitRecords,
|
||||||
)
|
)
|
||||||
for user in user_entities.entities:
|
for user in user_entities.entities:
|
||||||
self.status.scanned_user(user.name)
|
self.status.scanned_user(user.name)
|
||||||
@ -308,14 +306,14 @@ class MetadataSource(Source[Entity]):
|
|||||||
Returns:
|
Returns:
|
||||||
Team:
|
Team:
|
||||||
"""
|
"""
|
||||||
if self.config.include_teams:
|
if self.service_connection.includeTeams:
|
||||||
after = None
|
after = None
|
||||||
while True:
|
while True:
|
||||||
team_entities = self.metadata.list_entities(
|
team_entities = self.metadata.list_entities(
|
||||||
entity=Team,
|
entity=Team,
|
||||||
fields=["users", "owns"],
|
fields=["users", "owns"],
|
||||||
after=after,
|
after=after,
|
||||||
limit=self.config.limit_records,
|
limit=self.service_connection.limitRecords,
|
||||||
)
|
)
|
||||||
for team in team_entities.entities:
|
for team in team_entities.entities:
|
||||||
self.status.scanned_team(team.name)
|
self.status.scanned_team(team.name)
|
||||||
@ -330,14 +328,14 @@ class MetadataSource(Source[Entity]):
|
|||||||
Returns:
|
Returns:
|
||||||
GlossaryTerm:
|
GlossaryTerm:
|
||||||
"""
|
"""
|
||||||
if self.config.include_glossary_terms:
|
if self.service_connection.includeGlossaryTerms:
|
||||||
after = None
|
after = None
|
||||||
while True:
|
while True:
|
||||||
glossary_term_entities = self.metadata.list_entities(
|
glossary_term_entities = self.metadata.list_entities(
|
||||||
entity=GlossaryTerm,
|
entity=GlossaryTerm,
|
||||||
fields=[],
|
fields=[],
|
||||||
after=after,
|
after=after,
|
||||||
limit=self.config.limit_records,
|
limit=self.service_connection.limitRecords,
|
||||||
)
|
)
|
||||||
for glossary_term in glossary_term_entities.entities:
|
for glossary_term in glossary_term_entities.entities:
|
||||||
self.status.scanned_team(glossary_term.name)
|
self.status.scanned_team(glossary_term.name)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user