mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-11 10:36:54 +00:00
MINOR - Keep presidio logger at ERROR (#22124)
* MINOR - Keep presidio debug at ERROR * test --------- Co-authored-by: Keshav Mohta <68001229+keshavmohta09@users.noreply.github.com> Co-authored-by: Sriharsha Chintalapani <harshach@users.noreply.github.com>
This commit is contained in:
parent
707a3b5d2d
commit
a86c51e82d
@ -73,9 +73,6 @@ class ColumnClassifier(ABC, Generic[T]):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
# Implementations
|
|
||||||
|
|
||||||
|
|
||||||
@final
|
@final
|
||||||
class HeuristicPIIClassifier(ColumnClassifier[PIITag]):
|
class HeuristicPIIClassifier(ColumnClassifier[PIITag]):
|
||||||
"""
|
"""
|
||||||
|
@ -13,7 +13,7 @@ Utilities for working with the Presidio Library.
|
|||||||
"""
|
"""
|
||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
from typing import Iterable, Optional, Type
|
from typing import Iterable, Type, Union
|
||||||
|
|
||||||
import spacy
|
import spacy
|
||||||
from presidio_analyzer import (
|
from presidio_analyzer import (
|
||||||
@ -26,7 +26,7 @@ from presidio_analyzer.nlp_engine import SpacyNlpEngine
|
|||||||
from spacy.cli.download import download # pyright: ignore[reportUnknownVariableType]
|
from spacy.cli.download import download # pyright: ignore[reportUnknownVariableType]
|
||||||
|
|
||||||
from metadata.pii.constants import PRESIDIO_LOGGER, SPACY_EN_MODEL, SUPPORTED_LANG
|
from metadata.pii.constants import PRESIDIO_LOGGER, SPACY_EN_MODEL, SUPPORTED_LANG
|
||||||
from metadata.utils.logger import METADATA_LOGGER, pii_logger
|
from metadata.utils.logger import pii_logger
|
||||||
|
|
||||||
logger = pii_logger()
|
logger = pii_logger()
|
||||||
|
|
||||||
@ -64,17 +64,10 @@ def build_analyzer_engine(
|
|||||||
return analyzer_engine
|
return analyzer_engine
|
||||||
|
|
||||||
|
|
||||||
def set_presidio_logger_level(log_level: Optional[int] = None) -> None:
|
def set_presidio_logger_level(log_level: Union[int, str] = logging.ERROR) -> None:
|
||||||
"""
|
"""
|
||||||
Set the presidio logger to talk less about internal entities unless we are debugging.
|
Set the presidio logger to talk less about internal entities unless we are debugging.
|
||||||
"""
|
"""
|
||||||
if log_level is None:
|
|
||||||
log_level = (
|
|
||||||
logging.INFO
|
|
||||||
if logging.getLogger(METADATA_LOGGER).level == logging.DEBUG
|
|
||||||
else logging.ERROR
|
|
||||||
)
|
|
||||||
|
|
||||||
logging.getLogger(PRESIDIO_LOGGER).setLevel(log_level)
|
logging.getLogger(PRESIDIO_LOGGER).setLevel(log_level)
|
||||||
|
|
||||||
|
|
||||||
@ -87,7 +80,6 @@ def _load_spacy_model(model_name: str) -> None:
|
|||||||
try:
|
try:
|
||||||
_ = spacy.load(model_name)
|
_ = spacy.load(model_name)
|
||||||
except OSError:
|
except OSError:
|
||||||
|
|
||||||
logger.warning(f"Downloading {model_name} language model for the spaCy")
|
logger.warning(f"Downloading {model_name} language model for the spaCy")
|
||||||
download(model_name)
|
download(model_name)
|
||||||
_ = spacy.load(model_name)
|
_ = spacy.load(model_name)
|
||||||
|
@ -22,6 +22,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
|||||||
from pydantic import BaseModel, ConfigDict
|
from pydantic import BaseModel, ConfigDict
|
||||||
|
|
||||||
from metadata.generated.schema.entity.classification.tag import Tag
|
from metadata.generated.schema.entity.classification.tag import Tag
|
||||||
|
from metadata.pii.algorithms.presidio_utils import _load_spacy_model
|
||||||
from metadata.pii.constants import PII, SPACY_EN_MODEL
|
from metadata.pii.constants import PII, SPACY_EN_MODEL
|
||||||
from metadata.pii.models import TagAndConfidence
|
from metadata.pii.models import TagAndConfidence
|
||||||
from metadata.pii.ner import NEREntity
|
from metadata.pii.ner import NEREntity
|
||||||
@ -56,18 +57,10 @@ class NERScanner(BaseScanner):
|
|||||||
"""Based on https://microsoft.github.io/presidio/"""
|
"""Based on https://microsoft.github.io/presidio/"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
import spacy
|
|
||||||
from presidio_analyzer import AnalyzerEngine
|
from presidio_analyzer import AnalyzerEngine
|
||||||
from presidio_analyzer.nlp_engine.spacy_nlp_engine import SpacyNlpEngine
|
from presidio_analyzer.nlp_engine.spacy_nlp_engine import SpacyNlpEngine
|
||||||
|
|
||||||
try:
|
_load_spacy_model(SPACY_EN_MODEL)
|
||||||
spacy.load(SPACY_EN_MODEL)
|
|
||||||
except OSError:
|
|
||||||
logger.warning("Downloading en_core_web_md language model for the spaCy")
|
|
||||||
from spacy.cli import download
|
|
||||||
|
|
||||||
download(SPACY_EN_MODEL)
|
|
||||||
spacy.load(SPACY_EN_MODEL)
|
|
||||||
|
|
||||||
nlp_engine_model = NLPEngineModel(
|
nlp_engine_model = NLPEngineModel(
|
||||||
lang_code=SUPPORTED_LANG, model_name=SPACY_EN_MODEL
|
lang_code=SUPPORTED_LANG, model_name=SPACY_EN_MODEL
|
||||||
|
Loading…
x
Reference in New Issue
Block a user