MINOR - Keep presidio logger at ERROR (#22124)

* MINOR - Keep presidio debug at ERROR

* test

---------

Co-authored-by: Keshav Mohta <68001229+keshavmohta09@users.noreply.github.com>
Co-authored-by: Sriharsha Chintalapani <harshach@users.noreply.github.com>
This commit is contained in:
Pere Miquel Brull 2025-07-14 11:55:02 +02:00 committed by GitHub
parent 707a3b5d2d
commit a86c51e82d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 23 deletions

View File

@ -73,9 +73,6 @@ class ColumnClassifier(ABC, Generic[T]):
""" """
# Implementations
@final @final
class HeuristicPIIClassifier(ColumnClassifier[PIITag]): class HeuristicPIIClassifier(ColumnClassifier[PIITag]):
""" """

View File

@ -13,7 +13,7 @@ Utilities for working with the Presidio Library.
""" """
import inspect import inspect
import logging import logging
from typing import Iterable, Optional, Type from typing import Iterable, Type, Union
import spacy import spacy
from presidio_analyzer import ( from presidio_analyzer import (
@ -26,7 +26,7 @@ from presidio_analyzer.nlp_engine import SpacyNlpEngine
from spacy.cli.download import download # pyright: ignore[reportUnknownVariableType] from spacy.cli.download import download # pyright: ignore[reportUnknownVariableType]
from metadata.pii.constants import PRESIDIO_LOGGER, SPACY_EN_MODEL, SUPPORTED_LANG from metadata.pii.constants import PRESIDIO_LOGGER, SPACY_EN_MODEL, SUPPORTED_LANG
from metadata.utils.logger import METADATA_LOGGER, pii_logger from metadata.utils.logger import pii_logger
logger = pii_logger() logger = pii_logger()
@ -64,17 +64,10 @@ def build_analyzer_engine(
return analyzer_engine return analyzer_engine
def set_presidio_logger_level(log_level: Optional[int] = None) -> None: def set_presidio_logger_level(log_level: Union[int, str] = logging.ERROR) -> None:
""" """
Set the presidio logger to talk less about internal entities unless we are debugging. Set the presidio logger to talk less about internal entities unless we are debugging.
""" """
if log_level is None:
log_level = (
logging.INFO
if logging.getLogger(METADATA_LOGGER).level == logging.DEBUG
else logging.ERROR
)
logging.getLogger(PRESIDIO_LOGGER).setLevel(log_level) logging.getLogger(PRESIDIO_LOGGER).setLevel(log_level)
@ -87,7 +80,6 @@ def _load_spacy_model(model_name: str) -> None:
try: try:
_ = spacy.load(model_name) _ = spacy.load(model_name)
except OSError: except OSError:
logger.warning(f"Downloading {model_name} language model for the spaCy") logger.warning(f"Downloading {model_name} language model for the spaCy")
download(model_name) download(model_name)
_ = spacy.load(model_name) _ = spacy.load(model_name)

View File

@ -22,6 +22,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
from metadata.generated.schema.entity.classification.tag import Tag from metadata.generated.schema.entity.classification.tag import Tag
from metadata.pii.algorithms.presidio_utils import _load_spacy_model
from metadata.pii.constants import PII, SPACY_EN_MODEL from metadata.pii.constants import PII, SPACY_EN_MODEL
from metadata.pii.models import TagAndConfidence from metadata.pii.models import TagAndConfidence
from metadata.pii.ner import NEREntity from metadata.pii.ner import NEREntity
@ -56,18 +57,10 @@ class NERScanner(BaseScanner):
"""Based on https://microsoft.github.io/presidio/""" """Based on https://microsoft.github.io/presidio/"""
def __init__(self): def __init__(self):
import spacy
from presidio_analyzer import AnalyzerEngine from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine.spacy_nlp_engine import SpacyNlpEngine from presidio_analyzer.nlp_engine.spacy_nlp_engine import SpacyNlpEngine
try: _load_spacy_model(SPACY_EN_MODEL)
spacy.load(SPACY_EN_MODEL)
except OSError:
logger.warning("Downloading en_core_web_md language model for the spaCy")
from spacy.cli import download
download(SPACY_EN_MODEL)
spacy.load(SPACY_EN_MODEL)
nlp_engine_model = NLPEngineModel( nlp_engine_model = NLPEngineModel(
lang_code=SUPPORTED_LANG, model_name=SPACY_EN_MODEL lang_code=SUPPORTED_LANG, model_name=SPACY_EN_MODEL