mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-05 03:54:23 +00:00
removed en_web_md from setup (#10839)
* removed en_web_md from setup * Use Constant --------- Co-authored-by: ulixius9 <mayursingal9@gmail.com>
This commit is contained in:
parent
2e2c6a0cdf
commit
3406c8c868
@ -67,10 +67,6 @@ COMMONS = {
|
|||||||
# required library for pii tagging
|
# required library for pii tagging
|
||||||
pii_requirements = {
|
pii_requirements = {
|
||||||
"spacy==3.5.0",
|
"spacy==3.5.0",
|
||||||
(
|
|
||||||
"en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/"
|
|
||||||
"en_core_web_md-3.5.0/en_core_web_md-3.5.0-py3-none-any.whl"
|
|
||||||
),
|
|
||||||
VERSIONS["pandas"],
|
VERSIONS["pandas"],
|
||||||
"presidio-analyzer==2.2.32",
|
"presidio-analyzer==2.2.32",
|
||||||
}
|
}
|
||||||
|
|||||||
@ -25,6 +25,8 @@ from metadata.utils import fqn
|
|||||||
|
|
||||||
PII = "PII"
|
PII = "PII"
|
||||||
|
|
||||||
|
SPACY_EN_MODEL = "en_core_web_md"
|
||||||
|
|
||||||
|
|
||||||
class PiiTypes(Enum):
|
class PiiTypes(Enum):
|
||||||
"""PiiTypes enumerates the different types of PII data"""
|
"""PiiTypes enumerates the different types of PII data"""
|
||||||
@ -115,6 +117,7 @@ class NERScanner:
|
|||||||
"""A scanner that uses Spacy NER for entity recognition"""
|
"""A scanner that uses Spacy NER for entity recognition"""
|
||||||
|
|
||||||
def __init__(self, metadata: OpenMetadata):
|
def __init__(self, metadata: OpenMetadata):
|
||||||
|
import spacy # pylint: disable=import-outside-toplevel
|
||||||
from presidio_analyzer import ( # pylint: disable=import-outside-toplevel
|
from presidio_analyzer import ( # pylint: disable=import-outside-toplevel
|
||||||
AnalyzerEngine,
|
AnalyzerEngine,
|
||||||
)
|
)
|
||||||
@ -122,10 +125,19 @@ class NERScanner:
|
|||||||
SpacyNlpEngine,
|
SpacyNlpEngine,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
spacy.load(SPACY_EN_MODEL)
|
||||||
|
except OSError:
|
||||||
|
logging.warning("Downloading en_core_web_md language model for the spaCy")
|
||||||
|
from spacy.cli import download # pylint: disable=import-outside-toplevel
|
||||||
|
|
||||||
|
download(SPACY_EN_MODEL)
|
||||||
|
spacy.load(SPACY_EN_MODEL)
|
||||||
|
|
||||||
self.metadata = metadata
|
self.metadata = metadata
|
||||||
self.text = ""
|
self.text = ""
|
||||||
self.analyzer = AnalyzerEngine(
|
self.analyzer = AnalyzerEngine(
|
||||||
nlp_engine=SpacyNlpEngine(models={"en": "en_core_web_md"})
|
nlp_engine=SpacyNlpEngine(models={"en": SPACY_EN_MODEL})
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_highest_score_label(
|
def get_highest_score_label(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user