fix(ingest): avoid requiring sqlalchemy for dynamodb classification (#10213)

2025-11-03 20:27:50 +00:00 · 2024-04-08 15:13:25 -07:00 · 2024-04-08 15:13:25 -07:00 · 29bf0e96c6
commit 29bf0e96c6
parent 6c66e955ba
4 changed files with 7 additions and 8 deletions
--- a/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py
+++ b/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py
@ -15,7 +15,6 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.glossary.classifier import ClassificationConfig, Classifier
 from datahub.ingestion.glossary.classifier_registry import classifier_registry
 from datahub.ingestion.source.common.data_reader import DataReader
 from datahub.ingestion.source.sql.sqlalchemy_data_reader import SAMPLE_SIZE_MULTIPLIER
 from datahub.metadata.com.linkedin.pegasus2avro.common import (
    AuditStamp,
    GlossaryTermAssociation,
@ -26,6 +25,9 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaMetadata
 from datahub.utilities.lossy_collections import LossyDict, LossyList
 from datahub.utilities.perf_timer import PerfTimer
 SAMPLE_SIZE_MULTIPLIER = 1.2
 logger: logging.Logger = logging.getLogger(__name__)
@ -289,7 +291,7 @@ def classification_workunit_processor(
    classification_handler: ClassificationHandler,
    data_reader: Optional[DataReader],
    table_id: List[str],
-    data_reader_kwargs: dict = {},
+    data_reader_kwargs: Optional[dict] = None,
 ) -> Iterable[MetadataWorkUnit]:
    """
    Classification handling for a particular table.
@ -317,7 +319,7 @@ def classification_workunit_processor(
                            table_id,
                            classification_handler.config.classification.sample_size
                            * SAMPLE_SIZE_MULTIPLIER,
-                            **data_reader_kwargs,
+                            **(data_reader_kwargs or {}),
                        )
                        if data_reader
                        else dict()
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@ -38,6 +38,7 @@ from datahub.ingestion.api.source import (
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.glossary.classification_mixin import (
    SAMPLE_SIZE_MULTIPLIER,
    ClassificationHandler,
    classification_workunit_processor,
 )
@ -77,7 +78,6 @@ from datahub.ingestion.source.sql.sql_utils import (
    gen_schema_container,
    get_domain_wu,
 )
 from datahub.ingestion.source.sql.sqlalchemy_data_reader import SAMPLE_SIZE_MULTIPLIER
 from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
 from datahub.ingestion.source.state.redundant_run_skip_handler import (
    RedundantLineageRunSkipHandler,
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@ -48,6 +48,7 @@ from datahub.ingestion.api.source import (
 )
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.glossary.classification_mixin import (
    SAMPLE_SIZE_MULTIPLIER,
    ClassificationHandler,
    ClassificationReportMixin,
 )
@ -68,7 +69,6 @@ from datahub.ingestion.source.sql.sql_utils import (
    schema_requires_v2,
 )
 from datahub.ingestion.source.sql.sqlalchemy_data_reader import (
    SAMPLE_SIZE_MULTIPLIER,
    SqlAlchemyTableDataReader,
 )
 from datahub.ingestion.source.state.stale_entity_removal_handler import (
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_data_reader.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sqlalchemy_data_reader.py
@ -72,6 +72,3 @@ class SqlAlchemyTableDataReader(DataReader):
    def close(self) -> None:
        self.connection.close()
 SAMPLE_SIZE_MULTIPLIER = 1.2