mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-13 08:37:03 +00:00
WIP: Fixed amundsen ingestion (#8544)
This commit is contained in:
parent
3c6e467829
commit
4ed088f55e
@ -28,6 +28,9 @@ from metadata.generated.schema.api.data.createDatabaseSchema import (
|
|||||||
CreateDatabaseSchemaRequest,
|
CreateDatabaseSchemaRequest,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
||||||
|
from metadata.generated.schema.api.services.createDatabaseService import (
|
||||||
|
CreateDatabaseServiceRequest,
|
||||||
|
)
|
||||||
from metadata.generated.schema.api.tags.createTag import CreateTagRequest
|
from metadata.generated.schema.api.tags.createTag import CreateTagRequest
|
||||||
from metadata.generated.schema.api.tags.createTagCategory import (
|
from metadata.generated.schema.api.tags.createTagCategory import (
|
||||||
CreateTagCategoryRequest,
|
CreateTagCategoryRequest,
|
||||||
@ -64,6 +67,7 @@ from metadata.ingestion.ometa.client_utils import get_chart_entities_from_id
|
|||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.ingestion.source.database.column_type_parser import ColumnTypeParser
|
from metadata.ingestion.source.database.column_type_parser import ColumnTypeParser
|
||||||
from metadata.utils import fqn
|
from metadata.utils import fqn
|
||||||
|
from metadata.utils.amundsen_helper import SERVICE_TYPE_MAPPER
|
||||||
from metadata.utils.helpers import get_standard_chart_type
|
from metadata.utils.helpers import get_standard_chart_type
|
||||||
from metadata.utils.logger import ingestion_logger
|
from metadata.utils.logger import ingestion_logger
|
||||||
from metadata.utils.sql_queries import (
|
from metadata.utils.sql_queries import (
|
||||||
@ -259,21 +263,24 @@ class AmundsenSource(Source[Entity]):
|
|||||||
def _yield_create_database(self, table):
|
def _yield_create_database(self, table):
|
||||||
try:
|
try:
|
||||||
service_entity = self.get_database_service(table["database"])
|
service_entity = self.get_database_service(table["database"])
|
||||||
|
table_name = ""
|
||||||
|
if hasattr(service_entity.connection.config, "supportsDatabase"):
|
||||||
|
table_name = table["cluster"]
|
||||||
|
else:
|
||||||
|
table_name = "default"
|
||||||
|
|
||||||
database_request = CreateDatabaseRequest(
|
database_request = CreateDatabaseRequest(
|
||||||
name=table["cluster"]
|
name=table_name
|
||||||
if hasattr(service_entity.connection.config, "supportsDatabase")
|
if hasattr(service_entity.connection.config, "supportsDatabase")
|
||||||
else "default",
|
else "default",
|
||||||
service=EntityReference(id=service_entity.id, type="databaseService"),
|
service=EntityReference(id=service_entity.id, type="databaseService"),
|
||||||
)
|
)
|
||||||
|
|
||||||
yield database_request
|
yield database_request
|
||||||
|
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
service_name=table["database"],
|
service_name=table["database"],
|
||||||
database_name=table["cluster"],
|
database_name=table_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.database_object = self.metadata.get_by_name(
|
self.database_object = self.metadata.get_by_name(
|
||||||
@ -285,17 +292,17 @@ class AmundsenSource(Source[Entity]):
|
|||||||
|
|
||||||
def _yield_create_database_schema(self, table):
|
def _yield_create_database_schema(self, table):
|
||||||
try:
|
try:
|
||||||
|
|
||||||
database_schema_request = CreateDatabaseSchemaRequest(
|
database_schema_request = CreateDatabaseSchemaRequest(
|
||||||
name=table["schema"],
|
name=table["schema"],
|
||||||
database=EntityReference(id=self.database_object.id, type="database"),
|
database=EntityReference(id=self.database_object.id, type="database"),
|
||||||
)
|
)
|
||||||
yield database_schema_request
|
yield database_schema_request
|
||||||
|
|
||||||
database_schema_fqn = fqn.build(
|
database_schema_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=DatabaseSchema,
|
entity_type=DatabaseSchema,
|
||||||
service_name=table["database"],
|
service_name=table["database"],
|
||||||
database_name=table["cluster"],
|
database_name=self.database_object.name.__root__,
|
||||||
schema_name=database_schema_request.name.__root__,
|
schema_name=database_schema_request.name.__root__,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -495,7 +502,22 @@ class AmundsenSource(Source[Entity]):
|
|||||||
return data_type
|
return data_type
|
||||||
|
|
||||||
def get_database_service(self, service_name: str) -> DatabaseService:
|
def get_database_service(self, service_name: str) -> DatabaseService:
|
||||||
service = self.metadata.get_by_name(entity=DatabaseService, fqn=service_name)
|
"""
|
||||||
|
Method to get and create Database Service
|
||||||
|
"""
|
||||||
|
service = self.metadata.create_or_update(
|
||||||
|
CreateDatabaseServiceRequest(
|
||||||
|
name=service_name,
|
||||||
|
displayName=service_name,
|
||||||
|
connection=SERVICE_TYPE_MAPPER.get(
|
||||||
|
service_name, SERVICE_TYPE_MAPPER["mysql"]["connection"]
|
||||||
|
)["connection"],
|
||||||
|
serviceType=SERVICE_TYPE_MAPPER.get(
|
||||||
|
service_name, SERVICE_TYPE_MAPPER["mysql"]["service_name"]
|
||||||
|
)["service_name"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
if service is not None:
|
if service is not None:
|
||||||
return service
|
return service
|
||||||
logger.error(f"Please create a service with name {service_name}")
|
logger.error(f"Please create a service with name {service_name}")
|
||||||
|
|||||||
89
ingestion/src/metadata/utils/amundsen_helper.py
Normal file
89
ingestion/src/metadata/utils/amundsen_helper.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
"""
|
||||||
|
Amundsen helper file for service mapping
|
||||||
|
"""
|
||||||
|
|
||||||
|
SERVICE_TYPE_MAPPER = {
|
||||||
|
"hive": {
|
||||||
|
"service_name": "Hive",
|
||||||
|
"connection": {"config": {"hostPort": "http://nohost:6000", "type": "Hive"}},
|
||||||
|
},
|
||||||
|
"delta": {
|
||||||
|
"service_name": "DeltaLake",
|
||||||
|
"connection": {
|
||||||
|
"config": {
|
||||||
|
"metastoreConnection": {"metastoreHostPort": "http://localhost:9083"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"dynamo": {
|
||||||
|
"service_name": "DynamoDB",
|
||||||
|
"connection": {
|
||||||
|
"config": {"awsConfig": {"awsRegion": "aws_region"}, "type": "DynamoDB"}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"mysql": {
|
||||||
|
"service_name": "Mysql",
|
||||||
|
"connection": {
|
||||||
|
"config": {"hostPort": "http://nohost:6000", "username": "randomName"}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"athena": {
|
||||||
|
"service_name": "Athena",
|
||||||
|
"connection": {
|
||||||
|
"config": {
|
||||||
|
"s3StagingDir": "s3 staging dir",
|
||||||
|
"awsConfig": "aws_config",
|
||||||
|
"workgroup": "work_group",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"bigquery": {
|
||||||
|
"service_name": "BigQuery",
|
||||||
|
"connection": {"config": {"credentials": "credentials"}},
|
||||||
|
},
|
||||||
|
"db2": {
|
||||||
|
"service_name": "Db2",
|
||||||
|
"connection": {
|
||||||
|
"config": {"hostPort": "http://nohost:6000", "username": "username"}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"druid": {
|
||||||
|
"service_name": "Druid",
|
||||||
|
"connection": {"config": {"hostPort": "http://nohost:6000"}},
|
||||||
|
},
|
||||||
|
"salesforce": {
|
||||||
|
"service_name": "Salesforce",
|
||||||
|
"connection": {"config": {"username": "randomName"}},
|
||||||
|
},
|
||||||
|
"oracle": {
|
||||||
|
"service_name": "Oracle",
|
||||||
|
"connection": {
|
||||||
|
"config": {
|
||||||
|
"hostPort": "http://nohost:6000",
|
||||||
|
"username": "randomName",
|
||||||
|
"oracleConnectionType": {
|
||||||
|
"oracleServiceName": {"title": "orcale_ser_name"}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"glue": {
|
||||||
|
"service_name": "Glue",
|
||||||
|
"connection": {
|
||||||
|
"config": {
|
||||||
|
"awsConfig": "aws_config",
|
||||||
|
"storageServiceName": "glue_stroage_name",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"snowflake": {
|
||||||
|
"service_nmae": "Snowflake",
|
||||||
|
"connection": {
|
||||||
|
"config": {
|
||||||
|
"username": "randomName",
|
||||||
|
"account": "snow_fl_acco",
|
||||||
|
"warehouse": "compute",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
208
ingestion/tests/unit/topology/metadata/test_amundsen.py
Normal file
208
ingestion/tests/unit/topology/metadata/test_amundsen.py
Normal file
@ -0,0 +1,208 @@
|
|||||||
|
"""
|
||||||
|
TestCase for Amundsen using the topology
|
||||||
|
"""
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
from unittest import TestCase
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from pydantic import AnyUrl
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.services.connections.database.deltaLakeConnection import (
|
||||||
|
DeltaLakeConnection,
|
||||||
|
MetastoreHostPortConnection,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.entity.services.connections.database.dynamoDBConnection import (
|
||||||
|
DynamoDBConnection,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.entity.services.connections.database.hiveConnection import (
|
||||||
|
HiveConnection,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.entity.services.databaseService import (
|
||||||
|
DatabaseConnection,
|
||||||
|
DatabaseService,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
OpenMetadataWorkflowConfig,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials
|
||||||
|
from metadata.generated.schema.type.basic import Href
|
||||||
|
from metadata.ingestion.source.metadata.amundsen import AmundsenSource
|
||||||
|
|
||||||
|
mock_amundsen_config = {
|
||||||
|
"source": {
|
||||||
|
"type": "amundsen",
|
||||||
|
"serviceName": "local_amundsen",
|
||||||
|
"serviceConnection": {
|
||||||
|
"config": {
|
||||||
|
"type": "Amundsen",
|
||||||
|
"username": "neo4j",
|
||||||
|
"password": "test",
|
||||||
|
"hostPort": "bolt://192.168.1.8:7687",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sourceConfig": {"config": {"type": "DatabaseMetadata"}},
|
||||||
|
},
|
||||||
|
"sink": {"type": "metadata-rest", "config": {}},
|
||||||
|
"workflowConfig": {
|
||||||
|
"openMetadataServerConfig": {
|
||||||
|
"hostPort": "http://localhost:8585/api",
|
||||||
|
"authProvider": "openmetadata",
|
||||||
|
"securityConfig": {
|
||||||
|
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGc"
|
||||||
|
"iOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE"
|
||||||
|
"2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXB"
|
||||||
|
"iEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fN"
|
||||||
|
"r3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3u"
|
||||||
|
"d-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECTED_SERVICE = [
|
||||||
|
DatabaseService(
|
||||||
|
id="05f98ea5-1a30-480c-9bfc-55d1eabc45c7",
|
||||||
|
name="hive",
|
||||||
|
fullyQualifiedName="hive",
|
||||||
|
displayName="hive",
|
||||||
|
serviceType="Hive",
|
||||||
|
description=None,
|
||||||
|
connection=DatabaseConnection(
|
||||||
|
config=HiveConnection(
|
||||||
|
type="Hive",
|
||||||
|
scheme="hive",
|
||||||
|
username=None,
|
||||||
|
password=None,
|
||||||
|
hostPort="http://nohost:6000",
|
||||||
|
databaseSchema=None,
|
||||||
|
authOptions=None,
|
||||||
|
connectionOptions=None,
|
||||||
|
connectionArguments=None,
|
||||||
|
supportsMetadataExtraction=True,
|
||||||
|
supportsProfiler=True,
|
||||||
|
)
|
||||||
|
),
|
||||||
|
pipelines=None,
|
||||||
|
version=2.5,
|
||||||
|
updatedAt=1667892646744,
|
||||||
|
updatedBy="admin",
|
||||||
|
owner=None,
|
||||||
|
href=Href(
|
||||||
|
__root__=AnyUrl(
|
||||||
|
"http://localhost:8585/api/v1/services/databaseServices/05f98ea5-1a30-480c-9bfc-55d1eabc45c7",
|
||||||
|
scheme="http",
|
||||||
|
host="localhost",
|
||||||
|
host_type="int_domain",
|
||||||
|
port="8585",
|
||||||
|
path="/api/v1/services/databaseServices/05f98ea5-1a30-480c-9bfc-55d1eabc45c7",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
changeDescription=None,
|
||||||
|
deleted=False,
|
||||||
|
),
|
||||||
|
DatabaseService(
|
||||||
|
id="e856d239-4e74-4a7d-844b-d61c3e73b81d",
|
||||||
|
name="delta",
|
||||||
|
fullyQualifiedName="delta",
|
||||||
|
displayName="delta",
|
||||||
|
serviceType="DeltaLake",
|
||||||
|
description=None,
|
||||||
|
connection=DatabaseConnection(
|
||||||
|
config=DeltaLakeConnection(
|
||||||
|
type="DeltaLake",
|
||||||
|
metastoreConnection=MetastoreHostPortConnection(
|
||||||
|
metastoreHostPort="http://localhost:9083"
|
||||||
|
),
|
||||||
|
connectionArguments=None,
|
||||||
|
supportsMetadataExtraction=True,
|
||||||
|
)
|
||||||
|
),
|
||||||
|
pipelines=None,
|
||||||
|
version=2.5,
|
||||||
|
updatedAt=1667892646744,
|
||||||
|
updatedBy="admin",
|
||||||
|
owner=None,
|
||||||
|
href=Href(
|
||||||
|
__root__=AnyUrl(
|
||||||
|
"http://localhost:8585/api/v1/services/databaseServices/e856d239-4e74-4a7d-844b-d61c3e73b81d",
|
||||||
|
scheme="http",
|
||||||
|
host="localhost",
|
||||||
|
host_type="int_domain",
|
||||||
|
port="8585",
|
||||||
|
path="/api/v1/services/databaseServices/e856d239-4e74-4a7d-844b-d61c3e73b81d",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
changeDescription=None,
|
||||||
|
deleted=False,
|
||||||
|
),
|
||||||
|
DatabaseService(
|
||||||
|
id="836ff98d-a241-4d06-832d-745f96ac88fc",
|
||||||
|
name="dynamo",
|
||||||
|
fullyQualifiedName="dynamo",
|
||||||
|
displayName="dynamo",
|
||||||
|
serviceType="DynamoDB",
|
||||||
|
description=None,
|
||||||
|
connection=DatabaseConnection(
|
||||||
|
config=DynamoDBConnection(
|
||||||
|
type="DynamoDB",
|
||||||
|
awsConfig=AWSCredentials(awsRegion="aws_region"),
|
||||||
|
connectionArguments=None,
|
||||||
|
supportsMetadataExtraction=True,
|
||||||
|
supportsProfiler=True,
|
||||||
|
)
|
||||||
|
),
|
||||||
|
pipelines=None,
|
||||||
|
version=2.5,
|
||||||
|
updatedAt=1667892646744,
|
||||||
|
updatedBy="admin",
|
||||||
|
owner=None,
|
||||||
|
href=Href(
|
||||||
|
__root__=AnyUrl(
|
||||||
|
"http://localhost:8585/api/v1/services/databaseServices/836ff98d-a241-4d06-832d-745f96ac88fc",
|
||||||
|
scheme="http",
|
||||||
|
host="localhost",
|
||||||
|
host_type="int_domain",
|
||||||
|
port="8585",
|
||||||
|
path="/api/v1/services/databaseServices/836ff98d-a241-4d06-832d-745f96ac88fc",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
changeDescription=None,
|
||||||
|
deleted=False,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
SERVICE_NAME = ["hive", "delta", "dynamo"]
|
||||||
|
|
||||||
|
|
||||||
|
class AmundsenUnitTest(TestCase):
|
||||||
|
"""
|
||||||
|
Implements the necessary methods to extract
|
||||||
|
Amundsen Unit Test
|
||||||
|
"""
|
||||||
|
|
||||||
|
@patch("metadata.ingestion.source.pipeline.pipeline_service.test_connection")
|
||||||
|
def __init__(self, methodName, test_connection) -> None:
|
||||||
|
super().__init__(methodName)
|
||||||
|
test_connection.return_value = False
|
||||||
|
self.config = OpenMetadataWorkflowConfig.parse_obj(mock_amundsen_config)
|
||||||
|
self.amundsen = AmundsenSource.create(
|
||||||
|
mock_amundsen_config["source"],
|
||||||
|
self.config.workflowConfig.openMetadataServerConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_database_service(self):
|
||||||
|
database_service_list = []
|
||||||
|
for service_name in SERVICE_NAME:
|
||||||
|
service_entity = self.amundsen.get_database_service(service_name)
|
||||||
|
database_service_list.append(service_entity)
|
||||||
|
|
||||||
|
for _, (expected, original) in enumerate(
|
||||||
|
zip(EXPECTED_SERVICE, database_service_list)
|
||||||
|
):
|
||||||
|
original.id = expected.id = "836ff98d-a241-4d06-832d-745f96ac88fc"
|
||||||
|
original.href = expected.href = None
|
||||||
|
original.updatedAt = expected.updatedAt = datetime.datetime.now()
|
||||||
|
original.version = expected.version = 2.5
|
||||||
|
original.changeDescription = None
|
||||||
|
self.assertEqual(expected, original)
|
||||||
Loading…
x
Reference in New Issue
Block a user