mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-01 19:25:56 +00:00
Fix for wrong containers on Athena (#4167)
This commit is contained in:
parent
2e7f3ae6f3
commit
585aad1aac
@ -6,6 +6,8 @@ from pyathena.common import BaseCursor
|
||||
from pyathena.model import AthenaTableMetadata
|
||||
from sqlalchemy.engine.reflection import Inspector
|
||||
|
||||
from datahub.emitter.mcp_builder import DatabaseKey, gen_containers
|
||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
from datahub.ingestion.source.sql.sql_common import (
|
||||
SQLAlchemyConfig,
|
||||
SQLAlchemySource,
|
||||
@ -95,6 +97,32 @@ class AthenaSource(SQLAlchemySource):
|
||||
return [schema for schema in schemas if schema == athena_config.database]
|
||||
return schemas
|
||||
|
||||
def gen_database_containers(
|
||||
self, database: str
|
||||
) -> typing.Iterable[MetadataWorkUnit]:
|
||||
# In Athena the schema is the database and database is not existing
|
||||
return []
|
||||
|
||||
def gen_schema_key(self, db_name: str, schema: str) -> DatabaseKey:
|
||||
return DatabaseKey(
|
||||
platform=self.platform, instance=self.config.env, database=schema
|
||||
)
|
||||
|
||||
def gen_schema_containers(
|
||||
self, schema: str, db_name: str
|
||||
) -> typing.Iterable[MetadataWorkUnit]:
|
||||
database_container_key = self.gen_database_key(database=schema)
|
||||
|
||||
container_workunits = gen_containers(
|
||||
database_container_key,
|
||||
schema,
|
||||
["Database"],
|
||||
)
|
||||
|
||||
for wu in container_workunits:
|
||||
self.report.report_workunit(wu)
|
||||
yield wu
|
||||
|
||||
def close(self):
|
||||
if self.cursor:
|
||||
self.cursor.close()
|
||||
|
||||
@ -585,7 +585,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
||||
) -> Iterable[MetadataWorkUnit]:
|
||||
schema_container_key = self.gen_schema_key(db_name, schema)
|
||||
|
||||
database_container_key = self.gen_database_key(database=db_name)
|
||||
database_container_key: Optional[PlatformKey] = None
|
||||
if db_name is not None:
|
||||
database_container_key = self.gen_database_key(database=db_name)
|
||||
|
||||
container_workunits = gen_containers(
|
||||
schema_container_key,
|
||||
@ -625,8 +627,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
||||
self.report.report_dropped(f"{schema}.*")
|
||||
continue
|
||||
|
||||
if db_name:
|
||||
yield from self.gen_schema_containers(schema, db_name)
|
||||
yield from self.gen_schema_containers(schema, db_name)
|
||||
|
||||
if sql_config.include_tables:
|
||||
yield from self.loop_tables(inspector, schema, sql_config)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user