From 988baeb467fe7f09bb8ed298bbb6be7e1a12e1e6 Mon Sep 17 00:00:00 2001 From: iasoon Date: Wed, 12 Jan 2022 09:52:55 +0100 Subject: [PATCH] feat(ingest): metabase - allow configuring how database engines get mapped to platforms (#3869) --- metadata-ingestion/source_docs/metabase.md | 10 +++++++++- .../src/datahub/ingestion/source/metabase.py | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/source_docs/metabase.md b/metadata-ingestion/source_docs/metabase.md index b627656187..b01cf9ec44 100644 --- a/metadata-ingestion/source_docs/metabase.md +++ b/metadata-ingestion/source_docs/metabase.md @@ -81,10 +81,18 @@ sink: | `username` | ✅ | | Metabase username. | | `password` | ✅ | | Metabase password. | | `database_alias_map` | | | Database name map to use when constructing dataset URN. | +| `engine_platform_map`| | | Custom mappings between metabase database engines and DataHub platforms | | `default_schema` | | `public` | Default schema name to use when schema is not provided in an SQL query | | `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. | - +Metabase databases will be mapped to a DataHub platform based on the engine listed in the +[api/database](https://www.metabase.com/docs/latest/api-documentation.html#database) response. This mapping can be +customized by using the `engine_platform_map` config option. For example, to map databases using the `athena` engine to +the underlying datasets in the `glue` platform, the following snippet can be used: +```yml + engine_platform_map: + athena: glue +``` DataHub will try to determine database name from Metabase [api/database](https://www.metabase.com/docs/latest/api-documentation.html#database) payload. However, the name can be overridden from `database_alias_map` for a given database connected to Metabase. diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index b3ff03e487..b686f9741f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -42,6 +42,7 @@ class MetabaseConfig(ConfigModel): username: Optional[str] = None password: Optional[str] = None database_alias_map: Optional[dict] = None + engine_platform_map: Optional[dict] = None default_schema: str = "public" env: str = builder.DEFAULT_ENV @@ -477,6 +478,10 @@ class MetabaseSource(Source): "sqlserver": "mssql", "bigquery-cloud-sdk": "bigquery", } + + if self.config.engine_platform_map is not None: + engine_mapping.update(self.config.engine_platform_map) + if engine in engine_mapping: platform = engine_mapping[engine] else: