#14943 - Check tags before PII processor (#15622)

This commit is contained in:
Pere Miquel Brull 2024-03-21 09:45:28 +01:00 committed by GitHub
parent c641a57818
commit b778bc7968
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 15 additions and 9 deletions

View File

@ -46,7 +46,7 @@ from metadata.ingestion.ometa.mixins.patch_mixin_utils import (
) )
from metadata.ingestion.ometa.utils import model_str from metadata.ingestion.ometa.utils import model_str
from metadata.utils.deprecation import deprecated from metadata.utils.deprecation import deprecated
from metadata.utils.logger import ometa_logger from metadata.utils.logger import get_log_name, ometa_logger
logger = ometa_logger() logger = ometa_logger()
@ -155,9 +155,7 @@ class OMetaPatchMixin(OMetaPatchMixinBase):
except Exception as exc: except Exception as exc:
logger.debug(traceback.format_exc()) logger.debug(traceback.format_exc())
logger.error( logger.error(f"Error trying to PATCH {get_log_name(source)}: {exc}")
f"Error trying to PATCH {entity.__name__} [{source.id.__root__}]: {exc}"
)
return None return None

View File

@ -21,7 +21,7 @@ from pydantic import BaseModel
from metadata.generated.schema.entity.classification.tag import Tag from metadata.generated.schema.entity.classification.tag import Tag
from metadata.pii.constants import PII, SPACY_EN_MODEL from metadata.pii.constants import PII, SPACY_EN_MODEL
from metadata.pii.models import TagAndConfidence, TagType from metadata.pii.models import TagAndConfidence
from metadata.pii.ner import NEREntity from metadata.pii.ner import NEREntity
from metadata.utils import fqn from metadata.utils import fqn
from metadata.utils.logger import pii_logger from metadata.utils.logger import pii_logger
@ -119,13 +119,15 @@ class NERScanner:
if entities_score: if entities_score:
label, score = self.get_highest_score_label(entities_score) label, score = self.get_highest_score_label(entities_score)
tag_type = NEREntity.__members__.get(label, TagType.NONSENSITIVE).value tag_type = NEREntity.__members__.get(label)
if not tag_type:
return None
return TagAndConfidence( return TagAndConfidence(
tag_fqn=fqn.build( tag_fqn=fqn.build(
metadata=None, metadata=None,
entity_type=Tag, entity_type=Tag,
classification_name=PII, classification_name=PII,
tag_name=tag_type, tag_name=tag_type.value,
), ),
confidence=score, confidence=score,
) )

View File

@ -43,6 +43,10 @@ from metadata.utils.logger import profiler_logger
logger = profiler_logger() logger = profiler_logger()
TABLE_FIELDS = ["tableProfilerConfig", "columns", "customMetrics"]
TAGS_FIELD = ["tags"]
class ProfilerSourceAndEntity(BaseModel): class ProfilerSourceAndEntity(BaseModel):
"""Return class for the OpenMetadata Profiler Source""" """Return class for the OpenMetadata Profiler Source"""
@ -278,7 +282,9 @@ class OpenMetadataSource(Source):
""" """
tables = self.metadata.list_all_entities( tables = self.metadata.list_all_entities(
entity=Table, entity=Table,
fields=["tableProfilerConfig", "columns", "customMetrics"], fields=TABLE_FIELDS
if not self.source_config.processPiiSensitive
else TABLE_FIELDS + TAGS_FIELD,
params={ params={
"service": self.config.source.serviceName, "service": self.config.source.serviceName,
"database": fqn.build( "database": fqn.build(

View File

@ -108,7 +108,7 @@ class BaseWorkflow(ABC, WorkflowStatusMixin):
@property @property
def ingestion_pipeline(self): def ingestion_pipeline(self):
"""Get or create the Ingestion Pipeline from the configuration""" """Get or create the Ingestion Pipeline from the configuration"""
if not self._ingestion_pipeline: if not self._ingestion_pipeline and self.config.ingestionPipelineFQN:
self._ingestion_pipeline = self.get_or_create_ingestion_pipeline() self._ingestion_pipeline = self.get_or_create_ingestion_pipeline()
return self._ingestion_pipeline return self._ingestion_pipeline