mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-12 09:18:20 +00:00
Fix: Added changes for Pii sensitive (#10119)
* Fix: added changes for pii sensitive * Fix: removed comments * Fix: python checkstyle * differtiate between sensitive and non sensitive tag * fix: python test * fix: added tests * fix: maven CI
This commit is contained in:
parent
fe03e51cfe
commit
34a0cc147e
@ -52,6 +52,7 @@ from metadata.ingestion.source.database.database_service import (
|
||||
DatabaseServiceSource,
|
||||
SQLSourceStatus,
|
||||
)
|
||||
from metadata.ingestion.source.database.processor import PiiProcessor
|
||||
from metadata.ingestion.source.database.sql_column_handler import SqlColumnHandlerMixin
|
||||
from metadata.ingestion.source.database.sqlalchemy_source import SqlAlchemySource
|
||||
from metadata.ingestion.source.models import TableView
|
||||
@ -372,6 +373,12 @@ class CommonDbSourceService(
|
||||
table_name=table_name
|
||||
), # Pick tags from context info, if any
|
||||
)
|
||||
|
||||
# Process pii sensitive column and append tags
|
||||
if self.source_config.processPiiSensitive:
|
||||
processor = PiiProcessor(metadata_config=self.metadata)
|
||||
processor.process(table_request)
|
||||
|
||||
is_partitioned, partition_details = self.get_table_partition_details(
|
||||
table_name=table_name, schema_name=schema_name, inspector=self.inspector
|
||||
)
|
||||
|
@ -67,6 +67,7 @@ from metadata.ingestion.models.topology import (
|
||||
create_source_context,
|
||||
)
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.ingestion.source.database.processor import PiiProcessor
|
||||
from metadata.utils import fqn
|
||||
from metadata.utils.filters import filter_by_schema
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
@ -490,3 +491,10 @@ class DatabaseServiceSource(
|
||||
)
|
||||
|
||||
yield from self.delete_schema_tables(schema_fqn)
|
||||
|
||||
def process_pii_sensitive_column(
|
||||
self, metadata_config: OpenMetadata, table_request: CreateTableRequest
|
||||
):
|
||||
if self.source_config.processPiiSensitive:
|
||||
processer = PiiProcessor(metadata_config=metadata_config)
|
||||
processer.process(table_request=table_request)
|
||||
|
@ -432,6 +432,9 @@ class DatalakeSource(DatabaseServiceSource): # pylint: disable=too-many-public-
|
||||
type="databaseSchema",
|
||||
),
|
||||
)
|
||||
self.process_pii_sensitive_column(
|
||||
metadata_config=self.metadata, table_request=table_request
|
||||
)
|
||||
yield table_request
|
||||
self.register_record(table_request=table_request)
|
||||
except Exception as exc:
|
||||
|
@ -272,7 +272,9 @@ class DeltalakeSource(DatabaseServiceSource):
|
||||
),
|
||||
viewDefinition=view_definition,
|
||||
)
|
||||
|
||||
self.process_pii_sensitive_column(
|
||||
metadata_config=self.metadata, table_request=table_request
|
||||
)
|
||||
yield table_request
|
||||
self.register_record(table_request=table_request)
|
||||
|
||||
|
@ -161,6 +161,9 @@ class DomodatabaseSource(DatabaseServiceSource):
|
||||
type="databaseSchema",
|
||||
),
|
||||
)
|
||||
self.process_pii_sensitive_column(
|
||||
metadata_config=self.metadata, table_request=table_request
|
||||
)
|
||||
yield table_request
|
||||
self.register_record(table_request=table_request)
|
||||
except Exception as exc:
|
||||
|
@ -205,7 +205,9 @@ class DynamodbSource(DatabaseServiceSource):
|
||||
type="databaseSchema",
|
||||
),
|
||||
)
|
||||
|
||||
self.process_pii_sensitive_column(
|
||||
metadata_config=self.metadata, table_request=table_request
|
||||
)
|
||||
yield table_request
|
||||
self.register_record(table_request=table_request)
|
||||
|
||||
|
@ -287,6 +287,9 @@ class GlueSource(DatabaseServiceSource):
|
||||
type="databaseSchema",
|
||||
),
|
||||
)
|
||||
self.process_pii_sensitive_column(
|
||||
metadata_config=self.metadata, table_request=table_request
|
||||
)
|
||||
yield table_request
|
||||
self.register_record(table_request=table_request)
|
||||
except Exception as exc:
|
||||
|
197
ingestion/src/metadata/ingestion/source/database/processor.py
Normal file
197
ingestion/src/metadata/ingestion/source/database/processor.py
Normal file
@ -0,0 +1,197 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Processor util to fetch pii sensitive columns
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum, auto
|
||||
from typing import List, Optional
|
||||
|
||||
from commonregex import CommonRegex
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
||||
from metadata.generated.schema.type.tagLabel import (
|
||||
LabelType,
|
||||
State,
|
||||
TagLabel,
|
||||
TagSource,
|
||||
)
|
||||
from metadata.ingestion.api.processor import Processor, ProcessorStatus
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata, OpenMetadataConnection
|
||||
|
||||
PII = "PII"
|
||||
|
||||
|
||||
class PiiTypes(Enum):
|
||||
"""PiiTypes enumerates the different types of PII data"""
|
||||
|
||||
NONE = auto()
|
||||
UNSUPPORTED = auto()
|
||||
PHONE = auto()
|
||||
EMAIL = auto()
|
||||
CREDIT_CARD = auto()
|
||||
ADDRESS = auto()
|
||||
ADDRESS_LOCATION = auto()
|
||||
PERSON = auto()
|
||||
LOCATION = auto()
|
||||
BIRTH_DATE = auto()
|
||||
GENDER = auto()
|
||||
NATIONALITY = auto()
|
||||
IP_ADDRESS = auto()
|
||||
SSN = auto()
|
||||
USER_NAME = auto()
|
||||
PASSWORD = auto()
|
||||
ETHNICITY = auto()
|
||||
TAX_ID = auto()
|
||||
KEY = auto()
|
||||
BANKACC = auto()
|
||||
|
||||
|
||||
class TagType(Enum):
|
||||
SENSITIVE = "Sensitive"
|
||||
NONSENSITIVE = "NonSensitive"
|
||||
|
||||
|
||||
class ColumnPIIType(BaseModel):
|
||||
pii_types: PiiTypes
|
||||
tag_type: TagType
|
||||
|
||||
|
||||
class Scanner(ABC):
|
||||
@abstractmethod
|
||||
def scan(self, text):
|
||||
"""scan the text and return array of PiiTypes that are found"""
|
||||
|
||||
|
||||
class RegexScanner(Scanner):
|
||||
"""A scanner that uses commmon regular expressions to find PII"""
|
||||
|
||||
def scan(self, text):
|
||||
"""Scan the text and return an array of PiiTypes that are found"""
|
||||
regex_result = CommonRegex(text)
|
||||
types = []
|
||||
if regex_result.phones: # pylint: disable=no-member
|
||||
types.append(PiiTypes.PHONE.name)
|
||||
if regex_result.emails: # pylint: disable=no-member
|
||||
types.append(PiiTypes.EMAIL.name)
|
||||
if regex_result.credit_cards: # pylint: disable=no-member
|
||||
types.append(PiiTypes.CREDIT_CARD.name)
|
||||
if regex_result.street_addresses: # pylint: disable=no-member
|
||||
types.append(PiiTypes.ADDRESS.name)
|
||||
|
||||
return types
|
||||
|
||||
|
||||
class ColumnNameScanner(Scanner):
|
||||
"""
|
||||
Column Name Scanner to scan column name
|
||||
"""
|
||||
|
||||
sensitive_regex = {
|
||||
PiiTypes.PASSWORD: re.compile("^.*password.*$", re.IGNORECASE),
|
||||
PiiTypes.USER_NAME: re.compile("^.*user(id|name|).*$", re.IGNORECASE),
|
||||
PiiTypes.KEY: re.compile("^.*(key).*$", re.IGNORECASE),
|
||||
PiiTypes.SSN: re.compile("^.*(ssn|social).*$", re.IGNORECASE),
|
||||
PiiTypes.CREDIT_CARD: re.compile("^.*(card).*$", re.IGNORECASE),
|
||||
PiiTypes.BANKACC: re.compile("^.*(bank|acc|amount).*$", re.IGNORECASE),
|
||||
PiiTypes.EMAIL: re.compile("^.*(email|e-mail|mail).*$", re.IGNORECASE),
|
||||
}
|
||||
non_sensitive_regex = {
|
||||
PiiTypes.PERSON: re.compile(
|
||||
"^.*(firstname|fname|lastname|lname|"
|
||||
"fullname|maidenname|_name|"
|
||||
"nickname|name_suffix|name).*$",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
PiiTypes.BIRTH_DATE: re.compile(
|
||||
"^.*(date_of_birth|dateofbirth|dob|"
|
||||
"birthday|date_of_death|dateofdeath).*$",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
PiiTypes.GENDER: re.compile("^.*(gender).*$", re.IGNORECASE),
|
||||
PiiTypes.NATIONALITY: re.compile("^.*(nationality).*$", re.IGNORECASE),
|
||||
PiiTypes.ADDRESS: re.compile(
|
||||
"^.*(address|city|state|county|country|"
|
||||
"zipcode|zip|postal|zone|borough).*$",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
PiiTypes.PHONE: re.compile("^.*(phone).*$", re.IGNORECASE),
|
||||
}
|
||||
|
||||
def scan(self, text) -> Optional[List[ColumnPIIType]]:
|
||||
types = set()
|
||||
for pii_type_keys, pii_type_pattern in self.sensitive_regex.items():
|
||||
if pii_type_pattern.match(text) is not None:
|
||||
return ColumnPIIType(
|
||||
pii_types=pii_type_keys, tag_type=TagType.SENSITIVE.value
|
||||
)
|
||||
|
||||
for pii_type_keys, pii_type_pattern in self.non_sensitive_regex.items():
|
||||
if pii_type_pattern.match(text) is not None:
|
||||
return ColumnPIIType(
|
||||
pii_types=pii_type_keys, tag_type=TagType.NONSENSITIVE.value
|
||||
)
|
||||
|
||||
logging.debug("PiiTypes are %s", ",".join(str(x) for x in list(types)))
|
||||
return None
|
||||
|
||||
|
||||
class PiiProcessor(Processor):
|
||||
"""
|
||||
Processor class to process columns of table
|
||||
"""
|
||||
|
||||
metadata_config: OpenMetadata
|
||||
status: ProcessorStatus
|
||||
metadata: OpenMetadata
|
||||
|
||||
def __init__(self, metadata_config: OpenMetadata):
|
||||
super().__init__()
|
||||
self.metadata = metadata_config
|
||||
self.status = ProcessorStatus()
|
||||
self.column_scanner = ColumnNameScanner()
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict: dict): # pylint: disable=arguments-differ
|
||||
metadata_config = OpenMetadataConnection.parse_obj(config_dict)
|
||||
return cls(metadata_config)
|
||||
|
||||
def process( # pylint: disable=arguments-differ
|
||||
self, table_request: CreateTableRequest
|
||||
) -> Optional[CreateTableRequest]:
|
||||
for column in table_request.columns:
|
||||
pii_tags = []
|
||||
pii_tags: ColumnPIIType = self.column_scanner.scan(column.name.__root__)
|
||||
tag_labels = []
|
||||
if pii_tags:
|
||||
tag_labels.append(
|
||||
TagLabel(
|
||||
tagFQN=f"{PII}.{pii_tags.tag_type.value}",
|
||||
labelType=LabelType.Automated.value,
|
||||
state=State.Suggested.value,
|
||||
source=TagSource.Tag.value,
|
||||
)
|
||||
)
|
||||
if len(tag_labels) > 0 and column.tags:
|
||||
column.tags.extend(tag_labels)
|
||||
elif len(tag_labels) > 0:
|
||||
column.tags = tag_labels
|
||||
self.status.records.append(column.name.__root__)
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def get_status(self) -> ProcessorStatus:
|
||||
return self.status
|
@ -206,6 +206,9 @@ class SalesforceSource(DatabaseServiceSource):
|
||||
type="databaseSchema",
|
||||
),
|
||||
)
|
||||
self.process_pii_sensitive_column(
|
||||
metadata_config=self.metadata, table_request=table_request
|
||||
)
|
||||
yield table_request
|
||||
self.register_record(table_request=table_request)
|
||||
|
||||
|
397
ingestion/tests/integration/utils/test_processor.py
Normal file
397
ingestion/tests/integration/utils/test_processor.py
Normal file
@ -0,0 +1,397 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Test Processor Class
|
||||
"""
|
||||
|
||||
from unittest import TestCase
|
||||
|
||||
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
||||
from metadata.generated.schema.entity.data.table import Column, DataType, TableType
|
||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||
OpenMetadataConnection,
|
||||
)
|
||||
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
|
||||
OpenMetadataJWTClientConfig,
|
||||
)
|
||||
from metadata.generated.schema.type.entityReference import EntityReference
|
||||
from metadata.generated.schema.type.tagLabel import TagFQN, TagLabel
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.ingestion.source.database.processor import PiiProcessor
|
||||
|
||||
MOCK_TABLE: CreateTableRequest = CreateTableRequest(
|
||||
name="DataSet Input",
|
||||
displayName="DataSet Input",
|
||||
description="this is a description for dataset input",
|
||||
tableType=TableType.Regular.value,
|
||||
columns=[
|
||||
Column(
|
||||
name="amount",
|
||||
displayName=None,
|
||||
dataType=DataType.DOUBLE.value,
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="This is description for amount",
|
||||
fullyQualifiedName=None,
|
||||
tags=None,
|
||||
constraint=None,
|
||||
ordinalPosition=1,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="bank_transfer_amount",
|
||||
displayName=None,
|
||||
dataType=DataType.DOUBLE.value,
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=None,
|
||||
constraint=None,
|
||||
ordinalPosition=2,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="coupon_amount",
|
||||
displayName=None,
|
||||
dataType=DataType.DOUBLE.value,
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=None,
|
||||
constraint=None,
|
||||
ordinalPosition=3,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="credit_card_amount",
|
||||
displayName=None,
|
||||
dataType=DataType.DOUBLE.value,
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=[
|
||||
TagLabel(
|
||||
tagFQN="PersonalData.Personal",
|
||||
description=None,
|
||||
source="Tag",
|
||||
labelType="Automated",
|
||||
state="Suggested",
|
||||
href=None,
|
||||
)
|
||||
],
|
||||
constraint=None,
|
||||
ordinalPosition=4,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="FirstName",
|
||||
displayName=None,
|
||||
dataType=DataType.STRING.value,
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=None,
|
||||
constraint=None,
|
||||
ordinalPosition=4,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="is_customer",
|
||||
displayName=None,
|
||||
dataType=DataType.BOOLEAN.value,
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=[
|
||||
TagLabel(
|
||||
tagFQN="PersonalData.Personal",
|
||||
description=None,
|
||||
source="Tag",
|
||||
labelType="Automated",
|
||||
state="Suggested",
|
||||
href=None,
|
||||
)
|
||||
],
|
||||
constraint=None,
|
||||
ordinalPosition=4,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
],
|
||||
tableConstraints=None,
|
||||
tablePartition=None,
|
||||
tableProfilerConfig=None,
|
||||
owner=None,
|
||||
databaseSchema=EntityReference(
|
||||
id="c3eb265f-5445-4ad3-ba5e-797d3a3071bb",
|
||||
type="databaseSchema",
|
||||
name=None,
|
||||
fullyQualifiedName=None,
|
||||
description=None,
|
||||
displayName=None,
|
||||
deleted=None,
|
||||
href=None,
|
||||
),
|
||||
tags=None,
|
||||
viewDefinition=None,
|
||||
extension=None,
|
||||
)
|
||||
|
||||
EXPECTED_COLUMNS = [
|
||||
Column(
|
||||
name="amount",
|
||||
displayName=None,
|
||||
dataType="DOUBLE",
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="This is description for amount",
|
||||
fullyQualifiedName=None,
|
||||
tags=[
|
||||
TagLabel(
|
||||
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
||||
description=None,
|
||||
source="Tag",
|
||||
labelType="Automated",
|
||||
state="Suggested",
|
||||
href=None,
|
||||
)
|
||||
],
|
||||
constraint=None,
|
||||
ordinalPosition=1,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="bank_transfer_amount",
|
||||
displayName=None,
|
||||
dataType="DOUBLE",
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=[
|
||||
TagLabel(
|
||||
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
||||
description=None,
|
||||
source="Tag",
|
||||
labelType="Automated",
|
||||
state="Suggested",
|
||||
href=None,
|
||||
)
|
||||
],
|
||||
constraint=None,
|
||||
ordinalPosition=2,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="coupon_amount",
|
||||
displayName=None,
|
||||
dataType="DOUBLE",
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=[
|
||||
TagLabel(
|
||||
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
||||
description=None,
|
||||
source="Tag",
|
||||
labelType="Automated",
|
||||
state="Suggested",
|
||||
href=None,
|
||||
)
|
||||
],
|
||||
constraint=None,
|
||||
ordinalPosition=3,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="credit_card_amount",
|
||||
displayName=None,
|
||||
dataType="DOUBLE",
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=[
|
||||
TagLabel(
|
||||
tagFQN="PersonalData.Personal",
|
||||
description=None,
|
||||
source="Tag",
|
||||
labelType="Automated",
|
||||
state="Suggested",
|
||||
href=None,
|
||||
),
|
||||
TagLabel(
|
||||
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
||||
description=None,
|
||||
source="Tag",
|
||||
labelType="Automated",
|
||||
state="Suggested",
|
||||
href=None,
|
||||
),
|
||||
],
|
||||
constraint=None,
|
||||
ordinalPosition=4,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="FirstName",
|
||||
displayName=None,
|
||||
dataType="STRING",
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=[
|
||||
TagLabel(
|
||||
tagFQN=TagFQN(__root__="PII.NonSensitive"),
|
||||
description=None,
|
||||
source="Tag",
|
||||
labelType="Automated",
|
||||
state="Suggested",
|
||||
href=None,
|
||||
)
|
||||
],
|
||||
constraint=None,
|
||||
ordinalPosition=4,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
Column(
|
||||
name="is_customer",
|
||||
displayName=None,
|
||||
dataType="BOOLEAN",
|
||||
arrayDataType=None,
|
||||
dataLength=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
dataTypeDisplay=None,
|
||||
description="",
|
||||
fullyQualifiedName=None,
|
||||
tags=[
|
||||
TagLabel(
|
||||
tagFQN="PersonalData.Personal",
|
||||
description=None,
|
||||
source="Tag",
|
||||
labelType="Automated",
|
||||
state="Suggested",
|
||||
href=None,
|
||||
)
|
||||
],
|
||||
constraint=None,
|
||||
ordinalPosition=4,
|
||||
jsonSchema=None,
|
||||
children=None,
|
||||
customMetrics=None,
|
||||
profile=None,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class PiiProcessorTest(TestCase):
|
||||
"""
|
||||
Run this integration test with different type of column name
|
||||
to attach PII Tags
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
methodName,
|
||||
) -> None:
|
||||
super().__init__(methodName)
|
||||
server_config = OpenMetadataConnection(
|
||||
hostPort="http://localhost:8585/api",
|
||||
authProvider="openmetadata",
|
||||
securityConfig=OpenMetadataJWTClientConfig(
|
||||
jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJh"
|
||||
"bGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vc"
|
||||
"mciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7Hgz"
|
||||
"GBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUx"
|
||||
"huv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakL"
|
||||
"Lzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM"
|
||||
"5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
||||
),
|
||||
)
|
||||
metadata = OpenMetadata(server_config)
|
||||
self.processor = PiiProcessor(metadata_config=metadata)
|
||||
|
||||
def test_process(self):
|
||||
self.processor.process(MOCK_TABLE)
|
||||
assert MOCK_TABLE.columns == EXPECTED_COLUMNS
|
@ -43,6 +43,11 @@
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"processPiiSensitive": {
|
||||
"description": "Optional configuration to automatically tag columns that might contain sensitive information",
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"useFqnForFiltering": {
|
||||
"description": "Regex will be applied on fully qualified name (e.g service_name.db_name.schema_name.table_name) instead of raw name (e.g. table_name)",
|
||||
"type": "boolean",
|
||||
|
@ -141,6 +141,7 @@ const AddIngestion = ({
|
||||
data?.name ?? getIngestionName(serviceData.name, pipelineType),
|
||||
ingestSampleData: sourceConfig?.generateSampleData ?? true,
|
||||
useFqnFilter: sourceConfig?.useFqnForFiltering ?? false,
|
||||
processPii: sourceConfig?.processPiiSensitive ?? false,
|
||||
databaseServiceNames: sourceConfig?.dbServiceNames ?? [],
|
||||
description: data?.description ?? '',
|
||||
repeatFrequency:
|
||||
@ -327,6 +328,7 @@ const AddIngestion = ({
|
||||
tableFilterPattern,
|
||||
topicFilterPattern,
|
||||
useFqnFilter,
|
||||
processPii,
|
||||
} = state;
|
||||
|
||||
switch (serviceCategory) {
|
||||
@ -335,6 +337,7 @@ const AddIngestion = ({
|
||||
useFqnForFiltering: useFqnFilter,
|
||||
includeViews: includeView,
|
||||
includeTags: includeTags,
|
||||
processPiiSensitive: processPii,
|
||||
databaseFilterPattern: getFilterPatternData(
|
||||
databaseFilterPattern,
|
||||
showDatabaseFilter
|
||||
|
@ -132,6 +132,6 @@ describe('Test ConfigureIngestion component', () => {
|
||||
expect(backButton).toBeInTheDocument();
|
||||
expect(nextButton).toBeInTheDocument();
|
||||
expect(filterPatternComponents).toHaveLength(3);
|
||||
expect(toggleSwitchs).toHaveLength(5);
|
||||
expect(toggleSwitchs).toHaveLength(6);
|
||||
});
|
||||
});
|
||||
|
@ -85,6 +85,7 @@ const ConfigureIngestion = ({
|
||||
timeoutSeconds,
|
||||
topicFilterPattern,
|
||||
useFqnFilter,
|
||||
processPii,
|
||||
} = useMemo(
|
||||
() => ({
|
||||
chartFilterPattern: data.chartFilterPattern,
|
||||
@ -121,6 +122,7 @@ const ConfigureIngestion = ({
|
||||
timeoutSeconds: data.timeoutSeconds,
|
||||
topicFilterPattern: data.topicFilterPattern,
|
||||
useFqnFilter: data.useFqnFilter,
|
||||
processPii: data.processPii,
|
||||
}),
|
||||
[data]
|
||||
);
|
||||
@ -185,6 +187,8 @@ const ConfigureIngestion = ({
|
||||
|
||||
const handleFqnFilter = () => toggleField('useFqnFilter');
|
||||
|
||||
const handleProcessPii = () => toggleField('processPii');
|
||||
|
||||
const handleQueryLogDuration = handleValueParseInt('queryLogDuration');
|
||||
|
||||
const handleResultLimit = handleValueParseInt('resultLimit');
|
||||
@ -455,6 +459,25 @@ const ConfigureIngestion = ({
|
||||
);
|
||||
};
|
||||
|
||||
const getProcessPiiToggles = () => {
|
||||
return (
|
||||
<Field>
|
||||
<div className="tw-flex tw-gap-1">
|
||||
<label>{t('label.process-pii-sensitive-column')}</label>
|
||||
<ToggleSwitchV1
|
||||
checked={processPii}
|
||||
handleCheck={handleProcessPii}
|
||||
testId="include-lineage"
|
||||
/>
|
||||
</div>
|
||||
<p className="tw-text-grey-muted tw-mt-3">
|
||||
{t('message.process-pii-sensitive-column-message')}
|
||||
</p>
|
||||
{getSeparator('')}
|
||||
</Field>
|
||||
);
|
||||
};
|
||||
|
||||
const getDashboardDBServiceName = () => {
|
||||
return (
|
||||
<Field>
|
||||
@ -527,6 +550,7 @@ const ConfigureIngestion = ({
|
||||
{getFilterPatterns()}
|
||||
{getSeparator('')}
|
||||
{getFqnForFilteringToggles()}
|
||||
{getProcessPiiToggles()}
|
||||
{getDatabaseFieldToggles()}
|
||||
</Fragment>
|
||||
);
|
||||
|
@ -133,6 +133,7 @@ export interface AddIngestionState {
|
||||
timeoutSeconds: number;
|
||||
topicFilterPattern: FilterPattern;
|
||||
useFqnFilter: boolean;
|
||||
processPii: boolean;
|
||||
}
|
||||
|
||||
export enum ShowFilter {
|
||||
|
@ -501,6 +501,7 @@
|
||||
"primary-key": "Primary Key",
|
||||
"private-key": "PrivateKey",
|
||||
"private-key-id": "Private Key ID",
|
||||
"process-pii-sensitive-column": "Auto Tag PII",
|
||||
"profile": "Profile",
|
||||
"profile-lowercase": "profile",
|
||||
"profile-sample-type": "Profile Sample {{type}}",
|
||||
@ -956,6 +957,7 @@
|
||||
"pipeline-description-message": "Description of the pipeline.",
|
||||
"pipeline-trigger-success-message": "Pipeline triggered successfully!",
|
||||
"pipeline-will-trigger-manually": "Pipeline will only be triggered manually.",
|
||||
"process-pii-sensitive-column-message": "Check column names to auto tag PII Senstive/nonSensitive columns.",
|
||||
"profile-sample-percentage-message": "Set the Profiler value as percentage",
|
||||
"profile-sample-row-count-message": " Set the Profiler value as row count",
|
||||
"profiler-ingestion-description": "A profiler workflow can be configured and deployed after a metadata ingestion has been set up. Multiple profiler pipelines can be set up for the same database service. The pipeline feeds the Profiler tab of the Table entity, and also runs the tests configured for that entity. Add a Name, FQN, and define the filter pattern to start.",
|
||||
|
Loading…
x
Reference in New Issue
Block a user