diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index 139da9cf02..3f8f1b6e4c 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -620,15 +620,15 @@ Extracts: source: type: "lookml" config: - base_folder: /path/to/model/files # Where the *.model.lkml and *.view.lkml files are stored. - connection_to_platform_map: # mapping between connection names in the model files to platform names. - my_snowflake_conn: snowflake - platform_name: looker_views # Optional, default is "looker_views" - actor: "urn:li:corpuser:etl" # Optional, "urn:li:corpuser:etl" + base_folder: /path/to/model/files # where the *.model.lkml and *.view.lkml files are stored + connection_to_platform_map: # mappings between connection names in the model files to platform names + connection_name: platform_name (or platform_name.database_name) # for ex. my_snowflake_conn: snowflake.my_database + platform_name: "looker" # optional, default is "looker" + actor: "urn:li:corpuser:etl" # optional, default is "urn:li:corpuser:etl" model_pattern: {} view_pattern: {} - env: "PROD" # Optional, default is "PROD" - parse_table_names_from_sql: False # See note below. + env: "PROD" # optional, default is "PROD" + parse_table_names_from_sql: False # see note below ``` Note! The integration can use [`sql-metadata`](https://pypi.org/project/sql-metadata/) to try to parse the tables the diff --git a/metadata-ingestion/src/datahub/ingestion/source/lookml.py b/metadata-ingestion/src/datahub/ingestion/source/lookml.py index b875e5904d..f270c1aa82 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/lookml.py +++ b/metadata-ingestion/src/datahub/ingestion/source/lookml.py @@ -382,7 +382,15 @@ class LookMLSource(Source): # pragma: no cover def _construct_datalineage_urn(self, sql_table_name: str, connection: str) -> str: platform = self._get_platform_based_on_connection(connection) - return f"urn:li:dataset:(urn:li:dataPlatform:{platform},{sql_table_name},{self.source_config.env})" + + if "." in platform: + platform_name, database_name = platform.lower().split(".", maxsplit=1) + sql_table_name = f"{database_name}.{sql_table_name}".lower() + else: + platform_name = platform.lower() + sql_table_name = sql_table_name.lower() + + return f"urn:li:dataset:(urn:li:dataPlatform:{platform_name},{sql_table_name},{self.source_config.env})" def _get_platform_based_on_connection(self, connection: str) -> str: if connection in self.source_config.connection_to_platform_map: