From ac9f803b46d9df641d05965329e0c2430daf1b18 Mon Sep 17 00:00:00 2001 From: Pere Menal-Ferrer Date: Tue, 27 May 2025 14:24:28 +0200 Subject: [PATCH] Make presidio_analyzer a lazy import in the PII processor (#21408) Co-authored-by: Pere Menal --- ingestion/src/metadata/pii/processor.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ingestion/src/metadata/pii/processor.py b/ingestion/src/metadata/pii/processor.py index bd7b3d8a5cd..1bd4f5a6eb7 100644 --- a/ingestion/src/metadata/pii/processor.py +++ b/ingestion/src/metadata/pii/processor.py @@ -26,7 +26,6 @@ from metadata.generated.schema.type.tagLabel import ( TagSource, ) from metadata.ingestion.ometa.ometa_api import OpenMetadata -from metadata.pii.algorithms.classifiers import ColumnClassifier, PIISensitiveClassifier from metadata.pii.algorithms.tags import PIISensitivityTag from metadata.pii.algorithms.utils import get_top_classes, normalize_scores from metadata.pii.base_processor import AutoClassificationProcessor @@ -48,6 +47,12 @@ class PIIProcessor(AutoClassificationProcessor): metadata: OpenMetadata, ): super().__init__(config, metadata) + + from metadata.pii.algorithms.classifiers import ( # pylint: disable=import-outside-toplevel + ColumnClassifier, + PIISensitiveClassifier, + ) + self._classifier: ColumnClassifier[PIISensitivityTag] = PIISensitiveClassifier() self.confidence_threshold = self.source_config.confidence / 100