From ac9f803b46d9df641d05965329e0c2430daf1b18 Mon Sep 17 00:00:00 2001
From: Pere Menal-Ferrer
Date: Tue, 27 May 2025 14:24:28 +0200
Subject: [PATCH] Make presidio_analyzer a lazy import in the PII processor
(#21408)
Co-authored-by: Pere Menal
---
ingestion/src/metadata/pii/processor.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/ingestion/src/metadata/pii/processor.py b/ingestion/src/metadata/pii/processor.py
index bd7b3d8a5cd..1bd4f5a6eb7 100644
--- a/ingestion/src/metadata/pii/processor.py
+++ b/ingestion/src/metadata/pii/processor.py
@@ -26,7 +26,6 @@ from metadata.generated.schema.type.tagLabel import (
TagSource,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
-from metadata.pii.algorithms.classifiers import ColumnClassifier, PIISensitiveClassifier
from metadata.pii.algorithms.tags import PIISensitivityTag
from metadata.pii.algorithms.utils import get_top_classes, normalize_scores
from metadata.pii.base_processor import AutoClassificationProcessor
@@ -48,6 +47,12 @@ class PIIProcessor(AutoClassificationProcessor):
metadata: OpenMetadata,
):
super().__init__(config, metadata)
+
+ from metadata.pii.algorithms.classifiers import ( # pylint: disable=import-outside-toplevel
+ ColumnClassifier,
+ PIISensitiveClassifier,
+ )
+
self._classifier: ColumnClassifier[PIISensitivityTag] = PIISensitiveClassifier()
self.confidence_threshold = self.source_config.confidence / 100