feat(ingest): add option to specify source platform database in lookml ingestion (#2749)

This commit is contained in:
Remi 2021-06-23 17:16:20 -06:00 committed by GitHub
parent 22a2ed81e4
commit 91f5d4f59a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 8 deletions

View File

@ -620,15 +620,15 @@ Extracts:
source:
type: "lookml"
config:
base_folder: /path/to/model/files # Where the *.model.lkml and *.view.lkml files are stored.
connection_to_platform_map: # mapping between connection names in the model files to platform names.
my_snowflake_conn: snowflake
platform_name: looker_views # Optional, default is "looker_views"
actor: "urn:li:corpuser:etl" # Optional, "urn:li:corpuser:etl"
base_folder: /path/to/model/files # where the *.model.lkml and *.view.lkml files are stored
connection_to_platform_map: # mappings between connection names in the model files to platform names
connection_name: platform_name (or platform_name.database_name) # for ex. my_snowflake_conn: snowflake.my_database
platform_name: "looker" # optional, default is "looker"
actor: "urn:li:corpuser:etl" # optional, default is "urn:li:corpuser:etl"
model_pattern: {}
view_pattern: {}
env: "PROD" # Optional, default is "PROD"
parse_table_names_from_sql: False # See note below.
env: "PROD" # optional, default is "PROD"
parse_table_names_from_sql: False # see note below
```
Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-metadata/) to try to parse the tables the

View File

@ -382,7 +382,15 @@ class LookMLSource(Source): # pragma: no cover
def _construct_datalineage_urn(self, sql_table_name: str, connection: str) -> str:
platform = self._get_platform_based_on_connection(connection)
return f"urn:li:dataset:(urn:li:dataPlatform:{platform},{sql_table_name},{self.source_config.env})"
if "." in platform:
platform_name, database_name = platform.lower().split(".", maxsplit=1)
sql_table_name = f"{database_name}.{sql_table_name}".lower()
else:
platform_name = platform.lower()
sql_table_name = sql_table_name.lower()
return f"urn:li:dataset:(urn:li:dataPlatform:{platform_name},{sql_table_name},{self.source_config.env})"
def _get_platform_based_on_connection(self, connection: str) -> str:
if connection in self.source_config.connection_to_platform_map: