diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index afc5000220..3f3c08919a 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -313,6 +313,7 @@ Extracts: - List of databases, schema, and tables - Column types associated with each table - Also supports PostGIS extensions +- database_alias (optional) can be used to change the name of database to be ingested ```yml source: @@ -322,6 +323,7 @@ source: password: pass host_port: localhost:5432 database: DemoDatabase + database_alias: DatabaseNameToBeIngested include_views: True # whether to include views, defaults to True # table_pattern/schema_pattern is same as above # options is same as above diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 1e651bb381..99efb299b1 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -151,6 +151,7 @@ base_dev_requirements = { "looker", "glue", "oracle", + "postgres", "sagemaker", "datahub-kafka", "datahub-rest", diff --git a/metadata-ingestion/src/datahub/ingestion/source/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/postgres.py index c8654c23a0..097c931057 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/postgres.py +++ b/metadata-ingestion/src/datahub/ingestion/source/postgres.py @@ -29,6 +29,8 @@ class PostgresConfig(BasicSQLAlchemyConfig): def get_identifier(self, schema: str, table: str) -> str: regular = f"{schema}.{table}" + if self.database_alias: + return f"{self.database_alias}.{regular}" if self.database: return f"{self.database}.{regular}" return regular diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql_common.py index fe52c4cace..f7050439ef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql_common.py @@ -120,6 +120,7 @@ class BasicSQLAlchemyConfig(SQLAlchemyConfig): password: Optional[pydantic.SecretStr] = None host_port: str database: Optional[str] = None + database_alias: Optional[str] = None scheme: str def get_sql_alchemy_url(self, uri_opts=None): diff --git a/metadata-ingestion/tests/unit/test_postgres_source.py b/metadata-ingestion/tests/unit/test_postgres_source.py new file mode 100644 index 0000000000..a146d13a9a --- /dev/null +++ b/metadata-ingestion/tests/unit/test_postgres_source.py @@ -0,0 +1,21 @@ +from datahub.ingestion.source.postgres import PostgresConfig + + +def _base_config(): + return {"username": "user", "password": "password", "host_port": "host:1521"} + + +def test_database_alias_takes_precendence(): + config = PostgresConfig.parse_obj( + { + **_base_config(), + "database_alias": "ops_database", + "database": "postgres", + } + ) + assert config.get_identifier("superset", "logs") == "ops_database.superset.logs" + + +def test_database_in_identifier(): + config = PostgresConfig.parse_obj({**_base_config(), "database": "postgres"}) + assert config.get_identifier("superset", "logs") == "postgres.superset.logs"