mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-09 16:03:31 +00:00
Fix for wrong containers on Athena (#4167)
This commit is contained in:
parent
2e7f3ae6f3
commit
585aad1aac
@ -6,6 +6,8 @@ from pyathena.common import BaseCursor
|
|||||||
from pyathena.model import AthenaTableMetadata
|
from pyathena.model import AthenaTableMetadata
|
||||||
from sqlalchemy.engine.reflection import Inspector
|
from sqlalchemy.engine.reflection import Inspector
|
||||||
|
|
||||||
|
from datahub.emitter.mcp_builder import DatabaseKey, gen_containers
|
||||||
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||||
from datahub.ingestion.source.sql.sql_common import (
|
from datahub.ingestion.source.sql.sql_common import (
|
||||||
SQLAlchemyConfig,
|
SQLAlchemyConfig,
|
||||||
SQLAlchemySource,
|
SQLAlchemySource,
|
||||||
@ -95,6 +97,32 @@ class AthenaSource(SQLAlchemySource):
|
|||||||
return [schema for schema in schemas if schema == athena_config.database]
|
return [schema for schema in schemas if schema == athena_config.database]
|
||||||
return schemas
|
return schemas
|
||||||
|
|
||||||
|
def gen_database_containers(
|
||||||
|
self, database: str
|
||||||
|
) -> typing.Iterable[MetadataWorkUnit]:
|
||||||
|
# In Athena the schema is the database and database is not existing
|
||||||
|
return []
|
||||||
|
|
||||||
|
def gen_schema_key(self, db_name: str, schema: str) -> DatabaseKey:
|
||||||
|
return DatabaseKey(
|
||||||
|
platform=self.platform, instance=self.config.env, database=schema
|
||||||
|
)
|
||||||
|
|
||||||
|
def gen_schema_containers(
|
||||||
|
self, schema: str, db_name: str
|
||||||
|
) -> typing.Iterable[MetadataWorkUnit]:
|
||||||
|
database_container_key = self.gen_database_key(database=schema)
|
||||||
|
|
||||||
|
container_workunits = gen_containers(
|
||||||
|
database_container_key,
|
||||||
|
schema,
|
||||||
|
["Database"],
|
||||||
|
)
|
||||||
|
|
||||||
|
for wu in container_workunits:
|
||||||
|
self.report.report_workunit(wu)
|
||||||
|
yield wu
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.cursor:
|
if self.cursor:
|
||||||
self.cursor.close()
|
self.cursor.close()
|
||||||
|
|||||||
@ -585,7 +585,9 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
|||||||
) -> Iterable[MetadataWorkUnit]:
|
) -> Iterable[MetadataWorkUnit]:
|
||||||
schema_container_key = self.gen_schema_key(db_name, schema)
|
schema_container_key = self.gen_schema_key(db_name, schema)
|
||||||
|
|
||||||
database_container_key = self.gen_database_key(database=db_name)
|
database_container_key: Optional[PlatformKey] = None
|
||||||
|
if db_name is not None:
|
||||||
|
database_container_key = self.gen_database_key(database=db_name)
|
||||||
|
|
||||||
container_workunits = gen_containers(
|
container_workunits = gen_containers(
|
||||||
schema_container_key,
|
schema_container_key,
|
||||||
@ -625,8 +627,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase):
|
|||||||
self.report.report_dropped(f"{schema}.*")
|
self.report.report_dropped(f"{schema}.*")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if db_name:
|
yield from self.gen_schema_containers(schema, db_name)
|
||||||
yield from self.gen_schema_containers(schema, db_name)
|
|
||||||
|
|
||||||
if sql_config.include_tables:
|
if sql_config.include_tables:
|
||||||
yield from self.loop_tables(inspector, schema, sql_config)
|
yield from self.loop_tables(inspector, schema, sql_config)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user