From 05e6a56b41edabed2bd915b58a5a3cb706312e31 Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Mon, 9 Jun 2025 14:18:35 +0530 Subject: [PATCH] Add Databricks Sampler, Refactor Unity Catalog Sampler (#21612) --- .../database/databricks/service_spec.py | 2 ++ .../database/unitycatalog/service_spec.py | 2 +- .../sampler/sqlalchemy/databricks/sampler.py | 26 +++++++++++++++++++ .../sqlalchemy/unitycatalog/sampler.py} | 0 4 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 ingestion/src/metadata/sampler/sqlalchemy/databricks/sampler.py rename ingestion/src/metadata/{profiler/interface/sqlalchemy/unity_catalog/sampler_interface.py => sampler/sqlalchemy/unitycatalog/sampler.py} (100%) diff --git a/ingestion/src/metadata/ingestion/source/database/databricks/service_spec.py b/ingestion/src/metadata/ingestion/source/database/databricks/service_spec.py index 36f6c42cb4b..741981be244 100644 --- a/ingestion/src/metadata/ingestion/source/database/databricks/service_spec.py +++ b/ingestion/src/metadata/ingestion/source/database/databricks/service_spec.py @@ -9,6 +9,7 @@ from metadata.ingestion.source.database.databricks.usage import DatabricksUsageS from metadata.profiler.interface.sqlalchemy.databricks.profiler_interface import ( DatabricksProfilerInterface, ) +from metadata.sampler.sqlalchemy.databricks.sampler import DatabricksSamplerInterface from metadata.utils.service_spec.default import DefaultDatabaseSpec ServiceSpec = DefaultDatabaseSpec( @@ -17,4 +18,5 @@ ServiceSpec = DefaultDatabaseSpec( usage_source_class=DatabricksUsageSource, profiler_class=DatabricksProfilerInterface, test_suite_class=DatabricksTestSuiteInterface, + sampler_class=DatabricksSamplerInterface, ) diff --git a/ingestion/src/metadata/ingestion/source/database/unitycatalog/service_spec.py b/ingestion/src/metadata/ingestion/source/database/unitycatalog/service_spec.py index 892f88ef7b7..4dff12fb3c5 100644 --- a/ingestion/src/metadata/ingestion/source/database/unitycatalog/service_spec.py +++ b/ingestion/src/metadata/ingestion/source/database/unitycatalog/service_spec.py @@ -11,7 +11,7 @@ from metadata.ingestion.source.database.unitycatalog.usage import ( from metadata.profiler.interface.sqlalchemy.unity_catalog.profiler_interface import ( UnityCatalogProfilerInterface, ) -from metadata.profiler.interface.sqlalchemy.unity_catalog.sampler_interface import ( +from metadata.sampler.sqlalchemy.unitycatalog.sampler import ( UnityCatalogSamplerInterface, ) from metadata.utils.service_spec.default import DefaultDatabaseSpec diff --git a/ingestion/src/metadata/sampler/sqlalchemy/databricks/sampler.py b/ingestion/src/metadata/sampler/sqlalchemy/databricks/sampler.py new file mode 100644 index 00000000000..3f610acc0fe --- /dev/null +++ b/ingestion/src/metadata/sampler/sqlalchemy/databricks/sampler.py @@ -0,0 +1,26 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Helper module to handle data sampling for the profiler +""" +from metadata.ingestion.source.database.databricks.connection import ( + get_connection as databricks_get_connection, +) +from metadata.sampler.sqlalchemy.sampler import SQASampler + + +class DatabricksSamplerInterface(SQASampler): + def get_client(self): + """client is the session for SQA""" + self.connection = databricks_get_connection(self.service_connection_config) + client = super().get_client() + self.set_catalog(client) + return client diff --git a/ingestion/src/metadata/profiler/interface/sqlalchemy/unity_catalog/sampler_interface.py b/ingestion/src/metadata/sampler/sqlalchemy/unitycatalog/sampler.py similarity index 100% rename from ingestion/src/metadata/profiler/interface/sqlalchemy/unity_catalog/sampler_interface.py rename to ingestion/src/metadata/sampler/sqlalchemy/unitycatalog/sampler.py