mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-09 17:48:29 +00:00
MINOR - Keep presidio logger at ERROR (#22124)
* MINOR - Keep presidio debug at ERROR * test --------- Co-authored-by: Keshav Mohta <68001229+keshavmohta09@users.noreply.github.com> Co-authored-by: Sriharsha Chintalapani <harshach@users.noreply.github.com>
This commit is contained in:
parent
707a3b5d2d
commit
a86c51e82d
@ -73,9 +73,6 @@ class ColumnClassifier(ABC, Generic[T]):
|
||||
"""
|
||||
|
||||
|
||||
# Implementations
|
||||
|
||||
|
||||
@final
|
||||
class HeuristicPIIClassifier(ColumnClassifier[PIITag]):
|
||||
"""
|
||||
|
@ -13,7 +13,7 @@ Utilities for working with the Presidio Library.
|
||||
"""
|
||||
import inspect
|
||||
import logging
|
||||
from typing import Iterable, Optional, Type
|
||||
from typing import Iterable, Type, Union
|
||||
|
||||
import spacy
|
||||
from presidio_analyzer import (
|
||||
@ -26,7 +26,7 @@ from presidio_analyzer.nlp_engine import SpacyNlpEngine
|
||||
from spacy.cli.download import download # pyright: ignore[reportUnknownVariableType]
|
||||
|
||||
from metadata.pii.constants import PRESIDIO_LOGGER, SPACY_EN_MODEL, SUPPORTED_LANG
|
||||
from metadata.utils.logger import METADATA_LOGGER, pii_logger
|
||||
from metadata.utils.logger import pii_logger
|
||||
|
||||
logger = pii_logger()
|
||||
|
||||
@ -64,17 +64,10 @@ def build_analyzer_engine(
|
||||
return analyzer_engine
|
||||
|
||||
|
||||
def set_presidio_logger_level(log_level: Optional[int] = None) -> None:
|
||||
def set_presidio_logger_level(log_level: Union[int, str] = logging.ERROR) -> None:
|
||||
"""
|
||||
Set the presidio logger to talk less about internal entities unless we are debugging.
|
||||
"""
|
||||
if log_level is None:
|
||||
log_level = (
|
||||
logging.INFO
|
||||
if logging.getLogger(METADATA_LOGGER).level == logging.DEBUG
|
||||
else logging.ERROR
|
||||
)
|
||||
|
||||
logging.getLogger(PRESIDIO_LOGGER).setLevel(log_level)
|
||||
|
||||
|
||||
@ -87,7 +80,6 @@ def _load_spacy_model(model_name: str) -> None:
|
||||
try:
|
||||
_ = spacy.load(model_name)
|
||||
except OSError:
|
||||
|
||||
logger.warning(f"Downloading {model_name} language model for the spaCy")
|
||||
download(model_name)
|
||||
_ = spacy.load(model_name)
|
||||
|
@ -22,6 +22,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from metadata.generated.schema.entity.classification.tag import Tag
|
||||
from metadata.pii.algorithms.presidio_utils import _load_spacy_model
|
||||
from metadata.pii.constants import PII, SPACY_EN_MODEL
|
||||
from metadata.pii.models import TagAndConfidence
|
||||
from metadata.pii.ner import NEREntity
|
||||
@ -56,18 +57,10 @@ class NERScanner(BaseScanner):
|
||||
"""Based on https://microsoft.github.io/presidio/"""
|
||||
|
||||
def __init__(self):
|
||||
import spacy
|
||||
from presidio_analyzer import AnalyzerEngine
|
||||
from presidio_analyzer.nlp_engine.spacy_nlp_engine import SpacyNlpEngine
|
||||
|
||||
try:
|
||||
spacy.load(SPACY_EN_MODEL)
|
||||
except OSError:
|
||||
logger.warning("Downloading en_core_web_md language model for the spaCy")
|
||||
from spacy.cli import download
|
||||
|
||||
download(SPACY_EN_MODEL)
|
||||
spacy.load(SPACY_EN_MODEL)
|
||||
_load_spacy_model(SPACY_EN_MODEL)
|
||||
|
||||
nlp_engine_model = NLPEngineModel(
|
||||
lang_code=SUPPORTED_LANG, model_name=SPACY_EN_MODEL
|
||||
|
Loading…
x
Reference in New Issue
Block a user