diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 2dfbe12735..ae34a647dc 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -242,8 +242,7 @@ usage_common = { "sqlparse", } -databricks_cli = { - "databricks-cli>=0.17.7", +databricks = { "databricks-sdk>=0.1.1", "pyspark", "requests", @@ -375,7 +374,7 @@ plugins: Dict[str, Set[str]] = { "powerbi": microsoft_common | {"lark[regex]==1.1.4", "sqlparse"}, "powerbi-report-server": powerbi_report_server, "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.1"}, - "unity-catalog": databricks_cli | sqllineage_lib, + "unity-catalog": databricks | sqllineage_lib, } # This is mainly used to exclude plugins from the Docker image. diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py index 4828f2171f..e92f4ff07b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py @@ -3,7 +3,7 @@ Manage the communication with DataBricks Server and provide equivalent dataclass """ import dataclasses import logging -from datetime import datetime +from datetime import datetime, timezone from typing import Any, Dict, Iterable, List, Optional, Union from unittest.mock import patch @@ -277,7 +277,7 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin): def _create_catalog(self, metastore: Metastore, obj: CatalogInfo) -> Catalog: return Catalog( name=obj.name, - id="{}.{}".format(metastore.id, self._escape_sequence(obj.name)), + id=f"{metastore.id}.{self._escape_sequence(obj.name)}", metastore=metastore, comment=obj.comment, owner=obj.owner, @@ -287,7 +287,7 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin): def _create_schema(self, catalog: Catalog, obj: SchemaInfo) -> Schema: return Schema( name=obj.name, - id="{}.{}".format(catalog.id, self._escape_sequence(obj.name)), + id=f"{catalog.id}.{self._escape_sequence(obj.name)}", catalog=catalog, comment=obj.comment, owner=obj.owner, @@ -296,7 +296,7 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin): def _create_column(self, table_id: str, obj: ColumnInfo) -> Column: return Column( name=obj.name, - id="{}.{}".format(table_id, self._escape_sequence(obj.name)), + id=f"{table_id}.{self._escape_sequence(obj.name)}", type_text=obj.type_text, type_name=obj.type_name, type_scale=obj.type_scale, @@ -307,7 +307,7 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin): ) def _create_table(self, schema: Schema, obj: TableInfoWithGeneration) -> Table: - table_id: str = "{}.{}".format(schema.id, self._escape_sequence(obj.name)) + table_id = f"{schema.id}.{self._escape_sequence(obj.name)}" return Table( name=obj.name, id=table_id, @@ -322,9 +322,9 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin): properties=obj.properties or {}, owner=obj.owner, generation=obj.generation, - created_at=datetime.utcfromtimestamp(obj.created_at / 1000), + created_at=datetime.fromtimestamp(obj.created_at / 1000, tz=timezone.utc), created_by=obj.created_by, - updated_at=datetime.utcfromtimestamp(obj.updated_at / 1000) + updated_at=datetime.fromtimestamp(obj.updated_at / 1000, tz=timezone.utc) if obj.updated_at else None, updated_by=obj.updated_by, @@ -336,7 +336,7 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin): self, obj: DatabricksServicePrincipal ) -> ServicePrincipal: return ServicePrincipal( - id="{}.{}".format(obj.id, self._escape_sequence(obj.display_name)), + id=f"{obj.id}.{self._escape_sequence(obj.display_name)}", display_name=obj.display_name, application_id=obj.application_id, active=obj.active, @@ -348,8 +348,12 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin): query_id=info.query_id, query_text=info.query_text, statement_type=info.statement_type, - start_time=datetime.utcfromtimestamp(info.query_start_time_ms / 1000), - end_time=datetime.utcfromtimestamp(info.query_end_time_ms / 1000), + start_time=datetime.fromtimestamp( + info.query_start_time_ms / 1000, tz=timezone.utc + ), + end_time=datetime.fromtimestamp( + info.query_end_time_ms / 1000, tz=timezone.utc + ), user_id=info.user_id, user_name=info.user_name, executed_as_user_id=info.executed_as_user_id, diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py index fe74a2f4c0..ab38119d01 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py @@ -13,7 +13,6 @@ from databricks.sdk.service.sql import ( StatementState, StatementStatus, ) -from databricks_cli.unity_catalog.api import UnityCatalogApi from datahub.ingestion.source.unity.proxy_types import ( ColumnProfile, @@ -29,7 +28,6 @@ logger: logging.Logger = logging.getLogger(__name__) # TODO: Move to separate proxy/ directory with rest of proxy code class UnityCatalogProxyProfilingMixin: _workspace_client: WorkspaceClient - _unity_catalog_api: UnityCatalogApi report: UnityCatalogReport warehouse_id: str diff --git a/metadata-ingestion/tests/integration/unity/unity_catalog_mces_golden.json b/metadata-ingestion/tests/integration/unity/unity_catalog_mces_golden.json index 1b84d3132f..f37170bdae 100644 --- a/metadata-ingestion/tests/integration/unity/unity_catalog_mces_golden.json +++ b/metadata-ingestion/tests/integration/unity/unity_catalog_mces_golden.json @@ -335,7 +335,7 @@ "generation": "2", "table_type": "MANAGED", "created_by": "abc@acryl.io", - "created_at": "2022-10-19 13:21:38.688000", + "created_at": "2022-10-19 13:21:38.688000+00:00", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", "delta.minReaderVersion": "1", @@ -343,7 +343,7 @@ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", "owner": "account users", "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000" + "updated_at": "2022-10-19 13:27:29.633000+00:00" }, "externalUrl": "https://dummy.cloud.databricks.com/explore/data/main/default/quickstart_table", "name": "quickstart_table", @@ -621,7 +621,7 @@ "generation": "2", "table_type": "MANAGED", "created_by": "abc@acryl.io", - "created_at": "2022-10-19 13:21:38.688000", + "created_at": "2022-10-19 13:21:38.688000+00:00", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", "delta.minReaderVersion": "1", @@ -629,7 +629,7 @@ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", "owner": "account users", "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000" + "updated_at": "2022-10-19 13:27:29.633000+00:00" }, "externalUrl": "https://dummy.cloud.databricks.com/explore/data/main/information_schema/quickstart_table", "name": "quickstart_table", @@ -907,7 +907,7 @@ "generation": "2", "table_type": "MANAGED", "created_by": "abc@acryl.io", - "created_at": "2022-10-19 13:21:38.688000", + "created_at": "2022-10-19 13:21:38.688000+00:00", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", "delta.minReaderVersion": "1", @@ -915,7 +915,7 @@ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", "owner": "account users", "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000" + "updated_at": "2022-10-19 13:27:29.633000+00:00" }, "externalUrl": "https://dummy.cloud.databricks.com/explore/data/main/quickstart_schema/quickstart_table", "name": "quickstart_table", @@ -1301,7 +1301,7 @@ "generation": "2", "table_type": "MANAGED", "created_by": "abc@acryl.io", - "created_at": "2022-10-19 13:21:38.688000", + "created_at": "2022-10-19 13:21:38.688000+00:00", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", "delta.minReaderVersion": "1", @@ -1309,7 +1309,7 @@ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", "owner": "account users", "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000" + "updated_at": "2022-10-19 13:27:29.633000+00:00" }, "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/default/quickstart_table", "name": "quickstart_table", @@ -1587,7 +1587,7 @@ "generation": "2", "table_type": "MANAGED", "created_by": "abc@acryl.io", - "created_at": "2022-10-19 13:21:38.688000", + "created_at": "2022-10-19 13:21:38.688000+00:00", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", "delta.minReaderVersion": "1", @@ -1595,7 +1595,7 @@ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", "owner": "account users", "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000" + "updated_at": "2022-10-19 13:27:29.633000+00:00" }, "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/information_schema/quickstart_table", "name": "quickstart_table", @@ -1873,7 +1873,7 @@ "generation": "2", "table_type": "MANAGED", "created_by": "abc@acryl.io", - "created_at": "2022-10-19 13:21:38.688000", + "created_at": "2022-10-19 13:21:38.688000+00:00", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", "delta.minReaderVersion": "1", @@ -1881,7 +1881,7 @@ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", "owner": "account users", "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000" + "updated_at": "2022-10-19 13:27:29.633000+00:00" }, "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/quickstart_schema/quickstart_table", "name": "quickstart_table", @@ -2267,7 +2267,7 @@ "generation": "2", "table_type": "MANAGED", "created_by": "abc@acryl.io", - "created_at": "2022-10-19 13:21:38.688000", + "created_at": "2022-10-19 13:21:38.688000+00:00", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", "delta.minReaderVersion": "1", @@ -2275,7 +2275,7 @@ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", "owner": "account users", "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000" + "updated_at": "2022-10-19 13:27:29.633000+00:00" }, "externalUrl": "https://dummy.cloud.databricks.com/explore/data/system/default/quickstart_table", "name": "quickstart_table", @@ -2553,7 +2553,7 @@ "generation": "2", "table_type": "MANAGED", "created_by": "abc@acryl.io", - "created_at": "2022-10-19 13:21:38.688000", + "created_at": "2022-10-19 13:21:38.688000+00:00", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", "delta.minReaderVersion": "1", @@ -2561,7 +2561,7 @@ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", "owner": "account users", "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000" + "updated_at": "2022-10-19 13:27:29.633000+00:00" }, "externalUrl": "https://dummy.cloud.databricks.com/explore/data/system/information_schema/quickstart_table", "name": "quickstart_table", @@ -2839,7 +2839,7 @@ "generation": "2", "table_type": "MANAGED", "created_by": "abc@acryl.io", - "created_at": "2022-10-19 13:21:38.688000", + "created_at": "2022-10-19 13:21:38.688000+00:00", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", "delta.minReaderVersion": "1", @@ -2847,7 +2847,7 @@ "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", "owner": "account users", "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000" + "updated_at": "2022-10-19 13:27:29.633000+00:00" }, "externalUrl": "https://dummy.cloud.databricks.com/explore/data/system/quickstart_schema/quickstart_table", "name": "quickstart_table",