diff --git a/ingestion/src/metadata/ingestion/source/database/databricks/service_spec.py b/ingestion/src/metadata/ingestion/source/database/databricks/service_spec.py index 36f6c42cb4b..741981be244 100644 --- a/ingestion/src/metadata/ingestion/source/database/databricks/service_spec.py +++ b/ingestion/src/metadata/ingestion/source/database/databricks/service_spec.py @@ -9,6 +9,7 @@ from metadata.ingestion.source.database.databricks.usage import DatabricksUsageS from metadata.profiler.interface.sqlalchemy.databricks.profiler_interface import ( DatabricksProfilerInterface, ) +from metadata.sampler.sqlalchemy.databricks.sampler import DatabricksSamplerInterface from metadata.utils.service_spec.default import DefaultDatabaseSpec ServiceSpec = DefaultDatabaseSpec( @@ -17,4 +18,5 @@ ServiceSpec = DefaultDatabaseSpec( usage_source_class=DatabricksUsageSource, profiler_class=DatabricksProfilerInterface, test_suite_class=DatabricksTestSuiteInterface, + sampler_class=DatabricksSamplerInterface, ) diff --git a/ingestion/src/metadata/ingestion/source/database/unitycatalog/service_spec.py b/ingestion/src/metadata/ingestion/source/database/unitycatalog/service_spec.py index 892f88ef7b7..4dff12fb3c5 100644 --- a/ingestion/src/metadata/ingestion/source/database/unitycatalog/service_spec.py +++ b/ingestion/src/metadata/ingestion/source/database/unitycatalog/service_spec.py @@ -11,7 +11,7 @@ from metadata.ingestion.source.database.unitycatalog.usage import ( from metadata.profiler.interface.sqlalchemy.unity_catalog.profiler_interface import ( UnityCatalogProfilerInterface, ) -from metadata.profiler.interface.sqlalchemy.unity_catalog.sampler_interface import ( +from metadata.sampler.sqlalchemy.unitycatalog.sampler import ( UnityCatalogSamplerInterface, ) from metadata.utils.service_spec.default import DefaultDatabaseSpec diff --git a/ingestion/src/metadata/sampler/sqlalchemy/databricks/sampler.py b/ingestion/src/metadata/sampler/sqlalchemy/databricks/sampler.py new file mode 100644 index 00000000000..3f610acc0fe --- /dev/null +++ b/ingestion/src/metadata/sampler/sqlalchemy/databricks/sampler.py @@ -0,0 +1,26 @@ +# Copyright 2025 Collate +# Licensed under the Collate Community License, Version 1.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Helper module to handle data sampling for the profiler +""" +from metadata.ingestion.source.database.databricks.connection import ( + get_connection as databricks_get_connection, +) +from metadata.sampler.sqlalchemy.sampler import SQASampler + + +class DatabricksSamplerInterface(SQASampler): + def get_client(self): + """client is the session for SQA""" + self.connection = databricks_get_connection(self.service_connection_config) + client = super().get_client() + self.set_catalog(client) + return client diff --git a/ingestion/src/metadata/profiler/interface/sqlalchemy/unity_catalog/sampler_interface.py b/ingestion/src/metadata/sampler/sqlalchemy/unitycatalog/sampler.py similarity index 100% rename from ingestion/src/metadata/profiler/interface/sqlalchemy/unity_catalog/sampler_interface.py rename to ingestion/src/metadata/sampler/sqlalchemy/unitycatalog/sampler.py