MINOR - Keep presidio logger at ERROR (#22124)

* MINOR - Keep presidio debug at ERROR

* test

---------

Co-authored-by: Keshav Mohta <68001229+keshavmohta09@users.noreply.github.com>
Co-authored-by: Sriharsha Chintalapani <harshach@users.noreply.github.com>
This commit is contained in:
Pere Miquel Brull 2025-07-14 11:55:02 +02:00 committed by GitHub
parent 707a3b5d2d
commit a86c51e82d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 23 deletions

View File

@ -73,9 +73,6 @@ class ColumnClassifier(ABC, Generic[T]):
"""
# Implementations
@final
class HeuristicPIIClassifier(ColumnClassifier[PIITag]):
"""

View File

@ -13,7 +13,7 @@ Utilities for working with the Presidio Library.
"""
import inspect
import logging
from typing import Iterable, Optional, Type
from typing import Iterable, Type, Union
import spacy
from presidio_analyzer import (
@ -26,7 +26,7 @@ from presidio_analyzer.nlp_engine import SpacyNlpEngine
from spacy.cli.download import download # pyright: ignore[reportUnknownVariableType]
from metadata.pii.constants import PRESIDIO_LOGGER, SPACY_EN_MODEL, SUPPORTED_LANG
from metadata.utils.logger import METADATA_LOGGER, pii_logger
from metadata.utils.logger import pii_logger
logger = pii_logger()
@ -64,17 +64,10 @@ def build_analyzer_engine(
return analyzer_engine
def set_presidio_logger_level(log_level: Optional[int] = None) -> None:
def set_presidio_logger_level(log_level: Union[int, str] = logging.ERROR) -> None:
"""
Set the presidio logger to talk less about internal entities unless we are debugging.
"""
if log_level is None:
log_level = (
logging.INFO
if logging.getLogger(METADATA_LOGGER).level == logging.DEBUG
else logging.ERROR
)
logging.getLogger(PRESIDIO_LOGGER).setLevel(log_level)
@ -87,7 +80,6 @@ def _load_spacy_model(model_name: str) -> None:
try:
_ = spacy.load(model_name)
except OSError:
logger.warning(f"Downloading {model_name} language model for the spaCy")
download(model_name)
_ = spacy.load(model_name)

View File

@ -22,6 +22,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
from pydantic import BaseModel, ConfigDict
from metadata.generated.schema.entity.classification.tag import Tag
from metadata.pii.algorithms.presidio_utils import _load_spacy_model
from metadata.pii.constants import PII, SPACY_EN_MODEL
from metadata.pii.models import TagAndConfidence
from metadata.pii.ner import NEREntity
@ -56,18 +57,10 @@ class NERScanner(BaseScanner):
"""Based on https://microsoft.github.io/presidio/"""
def __init__(self):
import spacy
from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine.spacy_nlp_engine import SpacyNlpEngine
try:
spacy.load(SPACY_EN_MODEL)
except OSError:
logger.warning("Downloading en_core_web_md language model for the spaCy")
from spacy.cli import download
download(SPACY_EN_MODEL)
spacy.load(SPACY_EN_MODEL)
_load_spacy_model(SPACY_EN_MODEL)
nlp_engine_model = NLPEngineModel(
lang_code=SUPPORTED_LANG, model_name=SPACY_EN_MODEL