mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-02 02:26:00 +00:00
removed en_web_md from setup (#10839)
* removed en_web_md from setup * Use Constant --------- Co-authored-by: ulixius9 <mayursingal9@gmail.com>
This commit is contained in:
parent
2e2c6a0cdf
commit
3406c8c868
@ -67,10 +67,6 @@ COMMONS = {
|
||||
# required library for pii tagging
|
||||
pii_requirements = {
|
||||
"spacy==3.5.0",
|
||||
(
|
||||
"en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/"
|
||||
"en_core_web_md-3.5.0/en_core_web_md-3.5.0-py3-none-any.whl"
|
||||
),
|
||||
VERSIONS["pandas"],
|
||||
"presidio-analyzer==2.2.32",
|
||||
}
|
||||
|
||||
@ -25,6 +25,8 @@ from metadata.utils import fqn
|
||||
|
||||
PII = "PII"
|
||||
|
||||
SPACY_EN_MODEL = "en_core_web_md"
|
||||
|
||||
|
||||
class PiiTypes(Enum):
|
||||
"""PiiTypes enumerates the different types of PII data"""
|
||||
@ -115,6 +117,7 @@ class NERScanner:
|
||||
"""A scanner that uses Spacy NER for entity recognition"""
|
||||
|
||||
def __init__(self, metadata: OpenMetadata):
|
||||
import spacy # pylint: disable=import-outside-toplevel
|
||||
from presidio_analyzer import ( # pylint: disable=import-outside-toplevel
|
||||
AnalyzerEngine,
|
||||
)
|
||||
@ -122,10 +125,19 @@ class NERScanner:
|
||||
SpacyNlpEngine,
|
||||
)
|
||||
|
||||
try:
|
||||
spacy.load(SPACY_EN_MODEL)
|
||||
except OSError:
|
||||
logging.warning("Downloading en_core_web_md language model for the spaCy")
|
||||
from spacy.cli import download # pylint: disable=import-outside-toplevel
|
||||
|
||||
download(SPACY_EN_MODEL)
|
||||
spacy.load(SPACY_EN_MODEL)
|
||||
|
||||
self.metadata = metadata
|
||||
self.text = ""
|
||||
self.analyzer = AnalyzerEngine(
|
||||
nlp_engine=SpacyNlpEngine(models={"en": "en_core_web_md"})
|
||||
nlp_engine=SpacyNlpEngine(models={"en": SPACY_EN_MODEL})
|
||||
)
|
||||
|
||||
def get_highest_score_label(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user