diff --git a/catalog-rest-service/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json b/catalog-rest-service/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json index 7a5b4f83053..bf523db48a5 100644 --- a/catalog-rest-service/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json +++ b/catalog-rest-service/src/main/resources/json/schema/metadataIngestion/databaseServiceMetadataPipeline.json @@ -28,16 +28,38 @@ }, "additionalProperties": false }, + "dbtCloudConfig": { + "title": "DBT Cloud Config Source", + "description": "DBT Catalog and Manifest HTTP path configuration.", + "type": "object", + "properties": { + "dbtCloudAuthToken": { + "title": "DBT Cloud Authentication Token", + "description": "DBT cloud account authentication token", + "type": "string", + "format": "password" + }, + "dbtCloudAccountId": { + "title": "DBT Cloud Account Id", + "description": "DBT cloud account Id", + "type": "string" + } + }, + "additionalProperties": false, + "required": ["dbtCloudAuthToken", "dbtCloudAccountId"] + }, "dbtLocalConfig": { "title": "DBT Local Config Source", "description": "DBT Catalog and Manifest file path config.", "type": "object", "properties": { "dbtCatalogFilePath": { + "title": "DBT Catalog File Path", "description": "DBT catalog file path to extract dbt models with their column schemas.", "type": "string" }, "dbtManifestFilePath": { + "title": "DBT Manifest File Path", "description": "DBT manifest file path to extract dbt models and associate with tables.", "type": "string" } @@ -51,10 +73,12 @@ "type": "object", "properties": { "dbtCatalogHttpPath": { + "title": "DBT Catalog HTTP File Path", "description": "DBT catalog http file path to extract dbt models with their column schemas.", "type": "string" }, "dbtManifestHttpPath": { + "title": "DBT Manifest HTTP File Path", "description": "DBT manifest http file path to extract dbt models and associate with tables.", "type": "string" } @@ -133,6 +157,9 @@ "title": "DBT Configuration Source", "description": "Available sources to fetch DBT catalog and manifest files.", "oneOf": [ + { + "$ref": "#/definitions/dbtCloudConfig" + }, { "$ref": "#/definitions/dbtLocalConfig" }, diff --git a/ingestion/src/metadata/utils/dbt_config.py b/ingestion/src/metadata/utils/dbt_config.py index 734f98f9743..aef41400fae 100644 --- a/ingestion/src/metadata/utils/dbt_config.py +++ b/ingestion/src/metadata/utils/dbt_config.py @@ -18,6 +18,7 @@ from functools import singledispatch from typing import Optional, Tuple from metadata.generated.schema.metadataIngestion.databaseServiceMetadataPipeline import ( + DbtCloudConfig, DbtGCSConfig, DbtHttpConfig, DbtLocalConfig, @@ -70,6 +71,41 @@ def _(config: DbtHttpConfig): return None +@get_dbt_details.register +def _(config: DbtCloudConfig): + try: + from metadata.ingestion.ometa.client import REST, ClientConfig + + expiry = 0 + auth_token = config.dbtCloudAuthToken.get_secret_value(), expiry + client_config = ClientConfig( + base_url="https://cloud.getdbt.com", + api_version="api/v2", + auth_token=lambda: auth_token, + auth_header="Authorization", + allow_redirects=True, + ) + client = REST(client_config) + account_id = config.dbtCloudAccountId + response = client.get( + f"/accounts/{account_id}/runs/?order_by=-finished_at&limit=1" + ) + runs_data = response.get("data") + if runs_data: + run_id = runs_data[0]["id"] + dbt_catalog = client.get( + f"/accounts/{account_id}/runs/{run_id}/artifacts/{DBT_CATALOG_FILE_NAME}" + ) + dbt_manifest = client.get( + f"/accounts/{account_id}/runs/{run_id}/artifacts/{DBT_MANIFEST_FILE_NAME}" + ) + return dbt_catalog, dbt_manifest + except Exception as exc: + logger.error(traceback.format_exc()) + logger.error(f"Error fetching dbt files from DBT Cloud {repr(exc)}") + return None + + @get_dbt_details.register def _(config: DbtS3Config): try: