398 lines
12 KiB
Python
Raw Normal View History

# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test Processor Class
"""
from unittest import TestCase
from metadata.generated.schema.api.data.createTable import CreateTableRequest
from metadata.generated.schema.entity.data.table import Column, DataType, TableType
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
OpenMetadataConnection,
)
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
OpenMetadataJWTClientConfig,
)
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.generated.schema.type.tagLabel import TagFQN, TagLabel
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.database.processor import PiiProcessor
MOCK_TABLE: CreateTableRequest = CreateTableRequest(
name="DataSet Input",
displayName="DataSet Input",
description="this is a description for dataset input",
tableType=TableType.Regular.value,
columns=[
Column(
name="amount",
displayName=None,
dataType=DataType.DOUBLE.value,
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="This is description for amount",
fullyQualifiedName=None,
tags=None,
constraint=None,
ordinalPosition=1,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="bank_transfer_amount",
displayName=None,
dataType=DataType.DOUBLE.value,
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=None,
constraint=None,
ordinalPosition=2,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="coupon_amount",
displayName=None,
dataType=DataType.DOUBLE.value,
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=None,
constraint=None,
ordinalPosition=3,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="credit_card_amount",
displayName=None,
dataType=DataType.DOUBLE.value,
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=[
TagLabel(
tagFQN="PersonalData.Personal",
description=None,
source="Tag",
labelType="Automated",
state="Suggested",
href=None,
)
],
constraint=None,
ordinalPosition=4,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="FirstName",
displayName=None,
dataType=DataType.STRING.value,
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=None,
constraint=None,
ordinalPosition=4,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="is_customer",
displayName=None,
dataType=DataType.BOOLEAN.value,
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=[
TagLabel(
tagFQN="PersonalData.Personal",
description=None,
source="Tag",
labelType="Automated",
state="Suggested",
href=None,
)
],
constraint=None,
ordinalPosition=4,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
],
tableConstraints=None,
tablePartition=None,
tableProfilerConfig=None,
owner=None,
databaseSchema=EntityReference(
id="c3eb265f-5445-4ad3-ba5e-797d3a3071bb",
type="databaseSchema",
name=None,
fullyQualifiedName=None,
description=None,
displayName=None,
deleted=None,
href=None,
),
tags=None,
viewDefinition=None,
extension=None,
)
EXPECTED_COLUMNS = [
Column(
name="amount",
displayName=None,
dataType="DOUBLE",
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="This is description for amount",
fullyQualifiedName=None,
tags=[
TagLabel(
tagFQN=TagFQN(__root__="PII.Sensitive"),
description=None,
source="Tag",
labelType="Automated",
state="Suggested",
href=None,
)
],
constraint=None,
ordinalPosition=1,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="bank_transfer_amount",
displayName=None,
dataType="DOUBLE",
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=[
TagLabel(
tagFQN=TagFQN(__root__="PII.Sensitive"),
description=None,
source="Tag",
labelType="Automated",
state="Suggested",
href=None,
)
],
constraint=None,
ordinalPosition=2,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="coupon_amount",
displayName=None,
dataType="DOUBLE",
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=[
TagLabel(
tagFQN=TagFQN(__root__="PII.Sensitive"),
description=None,
source="Tag",
labelType="Automated",
state="Suggested",
href=None,
)
],
constraint=None,
ordinalPosition=3,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="credit_card_amount",
displayName=None,
dataType="DOUBLE",
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=[
TagLabel(
tagFQN="PersonalData.Personal",
description=None,
source="Tag",
labelType="Automated",
state="Suggested",
href=None,
),
TagLabel(
tagFQN=TagFQN(__root__="PII.Sensitive"),
description=None,
source="Tag",
labelType="Automated",
state="Suggested",
href=None,
),
],
constraint=None,
ordinalPosition=4,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="FirstName",
displayName=None,
dataType="STRING",
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=[
TagLabel(
tagFQN=TagFQN(__root__="PII.NonSensitive"),
description=None,
source="Tag",
labelType="Automated",
state="Suggested",
href=None,
)
],
constraint=None,
ordinalPosition=4,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
Column(
name="is_customer",
displayName=None,
dataType="BOOLEAN",
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay=None,
description="",
fullyQualifiedName=None,
tags=[
TagLabel(
tagFQN="PersonalData.Personal",
description=None,
source="Tag",
labelType="Automated",
state="Suggested",
href=None,
)
],
constraint=None,
ordinalPosition=4,
jsonSchema=None,
children=None,
customMetrics=None,
profile=None,
),
]
class PiiProcessorTest(TestCase):
"""
Run this integration test with different type of column name
to attach PII Tags
"""
def __init__(
self,
methodName,
) -> None:
super().__init__(methodName)
server_config = OpenMetadataConnection(
hostPort="http://localhost:8585/api",
authProvider="openmetadata",
securityConfig=OpenMetadataJWTClientConfig(
jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJh"
"bGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vc"
"mciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7Hgz"
"GBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUx"
"huv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakL"
"Lzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM"
"5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
),
)
metadata = OpenMetadata(server_config)
self.processor = PiiProcessor(metadata_config=metadata)
def test_process(self):
self.processor.process(MOCK_TABLE)
assert MOCK_TABLE.columns == EXPECTED_COLUMNS