mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-25 02:04:39 +00:00

* added query as an entity * changed name of the variables and methods * Added Resource Descriptors * testcase bug fix * addressing comments * added script for table query migration * added script for table query migration postgresql * bug fix * db change for script test * added current timestamp * change db config from postgresql to mysql * added extension to use fucntion gen_random_uuid() * solving maven ci * added queryUsage and change is migration script * addressing comments * addressing comments * added queryUsage relation and testcase * added api to insert queries in bulk * . * fix a test case which was failing due to latest changes * Ingestion Changes for Query as Entity * move query changes to latest sqls * added tags and owner * update PR for Query as Entity * update type * fixed pagination * fix path param * fix TestCases * add validation criteria * removed exisitng query apis * checkstyle fix * remove vote from put * remove vote from put * Query As Entity Ingestion Changes * Remove unused func * update Review Comments * update Review Comments * remove previous changes for Query and Update Tests * moved Checksum to Query Util Class * update python api * fix python checkstyle * Fixed Tests * Fix pytest * remove space changes * remove space changes * Fixed put_addFollowerDeleteEntity_200 * Fix usage ingestion * Update Python SDK and tests * pylint fix --------- Co-authored-by: Himank Mehta <himankmehta@Himanks-MacBook-Air.local> Co-authored-by: ulixius9 <mayursingal9@gmail.com> Co-authored-by: Mayur Singal <39544459+ulixius9@users.noreply.github.com>
903 lines
28 KiB
Python
903 lines
28 KiB
Python
# Copyright 2021 Collate
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Test Processor Class
|
|
"""
|
|
|
|
import datetime
|
|
from unittest import TestCase
|
|
|
|
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
|
from metadata.generated.schema.api.data.createDatabaseSchema import (
|
|
CreateDatabaseSchemaRequest,
|
|
)
|
|
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
|
from metadata.generated.schema.api.services.createDatabaseService import (
|
|
CreateDatabaseServiceRequest,
|
|
)
|
|
from metadata.generated.schema.entity.data.table import (
|
|
Column,
|
|
ColumnName,
|
|
DataType,
|
|
Table,
|
|
TableData,
|
|
TableType,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
|
|
MysqlConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
|
OpenMetadataConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.databaseService import (
|
|
DatabaseConnection,
|
|
DatabaseService,
|
|
DatabaseServiceType,
|
|
)
|
|
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
|
|
OpenMetadataJWTClientConfig,
|
|
)
|
|
from metadata.generated.schema.type.basic import AnyUrl, Href
|
|
from metadata.generated.schema.type.entityReference import EntityReference
|
|
from metadata.generated.schema.type.tagLabel import (
|
|
LabelType,
|
|
State,
|
|
TagFQN,
|
|
TagLabel,
|
|
TagSource,
|
|
)
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
from metadata.ingestion.processor.pii import NERScanner
|
|
|
|
MOCK_TABLE: CreateTableRequest = CreateTableRequest(
|
|
name="DataSet Input",
|
|
displayName="DataSet Input",
|
|
description="this is a description for dataset input",
|
|
tableType=TableType.Regular.value,
|
|
columns=[
|
|
Column(
|
|
name="amount",
|
|
displayName=None,
|
|
dataType=DataType.DOUBLE.value,
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="This is description for amount",
|
|
fullyQualifiedName=None,
|
|
tags=None,
|
|
constraint=None,
|
|
ordinalPosition=1,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="bank_transfer_amount",
|
|
displayName=None,
|
|
dataType=DataType.DOUBLE.value,
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=None,
|
|
constraint=None,
|
|
ordinalPosition=2,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="coupon_amount",
|
|
displayName=None,
|
|
dataType=DataType.DOUBLE.value,
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=None,
|
|
constraint=None,
|
|
ordinalPosition=3,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="credit_card_amount",
|
|
displayName=None,
|
|
dataType=DataType.DOUBLE.value,
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN="PersonalData.Personal",
|
|
description=None,
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=4,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="FirstName",
|
|
displayName=None,
|
|
dataType=DataType.STRING.value,
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=None,
|
|
constraint=None,
|
|
ordinalPosition=4,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="is_customer",
|
|
displayName=None,
|
|
dataType=DataType.BOOLEAN.value,
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN="PersonalData.Personal",
|
|
description=None,
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=4,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
],
|
|
tableConstraints=None,
|
|
tablePartition=None,
|
|
tableProfilerConfig=None,
|
|
owner=None,
|
|
databaseSchema="default.default.schema",
|
|
tags=None,
|
|
viewDefinition=None,
|
|
extension=None,
|
|
)
|
|
|
|
EXPECTED_COLUMNS = [
|
|
Column(
|
|
name="amount",
|
|
displayName=None,
|
|
dataType="DOUBLE",
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="This is description for amount",
|
|
fullyQualifiedName=None,
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
|
description=None,
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=1,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="bank_transfer_amount",
|
|
displayName=None,
|
|
dataType="DOUBLE",
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
|
description=None,
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=2,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="coupon_amount",
|
|
displayName=None,
|
|
dataType="DOUBLE",
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
|
description=None,
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=3,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="credit_card_amount",
|
|
displayName=None,
|
|
dataType="DOUBLE",
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN="PersonalData.Personal",
|
|
description=None,
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
),
|
|
TagLabel(
|
|
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
|
description=None,
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
),
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=4,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="FirstName",
|
|
displayName=None,
|
|
dataType="STRING",
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN=TagFQN(__root__="PII.NonSensitive"),
|
|
description=None,
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=4,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name="is_customer",
|
|
displayName=None,
|
|
dataType="BOOLEAN",
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay=None,
|
|
description="",
|
|
fullyQualifiedName=None,
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN="PersonalData.Personal",
|
|
description=None,
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=4,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
]
|
|
|
|
table_data = TableData(
|
|
columns=[
|
|
ColumnName(__root__="customer_id"),
|
|
ColumnName(__root__="first_name"),
|
|
ColumnName(__root__="last_name"),
|
|
ColumnName(__root__="first_order"),
|
|
ColumnName(__root__="customer_email"),
|
|
ColumnName(__root__="number_of_orders"),
|
|
],
|
|
rows=[
|
|
[
|
|
30,
|
|
"Christina",
|
|
"W.",
|
|
datetime.date(2018, 3, 2),
|
|
"christina@hotmail.com",
|
|
2,
|
|
],
|
|
[73, "Alan", "B.", None, "joshua.alan@yahoo.com", None],
|
|
[71, "Gerald", "C.", datetime.date(2018, 1, 18), "geraldc@gmail.com", 3],
|
|
[35, "Sara", "T.", datetime.date(2018, 2, 21), "saratimithi@godesign.com", 2],
|
|
[22, "Sean", "H.", datetime.date(2018, 1, 26), "heroldsean@google.com", 3],
|
|
[50, "Billy", "L.", datetime.date(2018, 1, 5), "bliam@random.com", 2],
|
|
[
|
|
76,
|
|
"Barbara",
|
|
"W.",
|
|
datetime.date(2018, 3, 23),
|
|
"bmwastin@gmail.co.in",
|
|
1,
|
|
],
|
|
[5, "Katherine", "R.", None, None, None],
|
|
[31, "Jane", "G.", datetime.date(2018, 2, 17), "gg34jane@hammer.com", 1],
|
|
[45, "Scott", "B.", None, None, None],
|
|
[21, "Willie", "H.", datetime.date(2018, 3, 28), "12hwilliejose@gmail.com", 1],
|
|
[18, "Johnny", "K.", datetime.date(2018, 2, 27), "johnnykk@dexter.com", 1],
|
|
[6, "Sarah", "R.", datetime.date(2018, 2, 19), "rrsarah@britinia.com", 1],
|
|
[56, "Joshua", "K.", None, None, None],
|
|
[79, "Jack", "R.", datetime.date(2018, 2, 28), "jack.mm@people.co.in", 2],
|
|
[94, "Gregory", "H.", datetime.date(2018, 1, 4), "peter.gregory@japer.com", 2],
|
|
[83, "Virginia", "R.", None, None, None],
|
|
[17, "Kimberly", "R.", None, None, None],
|
|
[2, "Shawn", "M.", datetime.date(2018, 1, 11), "shawn344@gmail.com", 1],
|
|
[60, "Norma", "W.", None, None, None],
|
|
[87, "Phillip", "B.", None, None, None],
|
|
],
|
|
)
|
|
|
|
TABLE_ENTITY = Table(
|
|
id="c6e75645-62e3-4110-8040-faa0e1ae3289",
|
|
name="customers",
|
|
displayName=None,
|
|
fullyQualifiedName="aws_redshift1.dev.dbt_jaffle.customers",
|
|
description=None,
|
|
version=0.7,
|
|
updatedAt=1676984225597,
|
|
updatedBy="admin",
|
|
href=Href(
|
|
__root__=AnyUrl(
|
|
"http://localhost:8585/api/v1/tables/c6e75645-62e3-4110-8040-faa0e1ae3289",
|
|
scheme="http",
|
|
host="localhost",
|
|
host_type="int_domain",
|
|
port="8585",
|
|
path="/api/v1/tables/c6e75645-62e3-4110-8040-faa0e1ae3289",
|
|
)
|
|
),
|
|
tableType="Local",
|
|
columns=[
|
|
Column(
|
|
name=ColumnName(__root__="customer_id"),
|
|
displayName=None,
|
|
dataType="INT",
|
|
arrayDataType=None,
|
|
dataLength=1,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="int",
|
|
description="This is an ID identifing a unique customer",
|
|
fullyQualifiedName="aws_redshift1.dev.dbt_jaffle.customers.customer_id",
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN="PII.Sensitive",
|
|
source=TagSource.Classification.value,
|
|
labelType=LabelType.Automated.value,
|
|
state=State.Suggested.value,
|
|
)
|
|
],
|
|
constraint="NULL",
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="first_name"),
|
|
displayName=None,
|
|
dataType="VARCHAR",
|
|
arrayDataType=None,
|
|
dataLength=10,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="varchar(10)",
|
|
description=None,
|
|
fullyQualifiedName="aws_redshift1.dev.dbt_jaffle.customers.first_name",
|
|
tags=None,
|
|
constraint="NULL",
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="last_name"),
|
|
displayName=None,
|
|
dataType="VARCHAR",
|
|
arrayDataType=None,
|
|
dataLength=2,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="varchar(2)",
|
|
description=None,
|
|
fullyQualifiedName="aws_redshift1.dev.dbt_jaffle.customers.last_name",
|
|
tags=None,
|
|
constraint="NULL",
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="first_order"),
|
|
displayName=None,
|
|
dataType="DATE",
|
|
arrayDataType=None,
|
|
dataLength=1,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="date",
|
|
description=None,
|
|
fullyQualifiedName="aws_redshift1.dev.dbt_jaffle.customers.first_order",
|
|
tags=None,
|
|
constraint="NULL",
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="customer_email"),
|
|
displayName=None,
|
|
dataType="DATE",
|
|
arrayDataType=None,
|
|
dataLength=1,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="date",
|
|
description=None,
|
|
fullyQualifiedName="aws_redshift1.dev.dbt_jaffle.customers.customer_email",
|
|
tags=None,
|
|
constraint="NULL",
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="number_of_orders"),
|
|
displayName=None,
|
|
dataType="BIGINT",
|
|
arrayDataType=None,
|
|
dataLength=1,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="bigint",
|
|
description=None,
|
|
fullyQualifiedName="aws_redshift1.dev.dbt_jaffle.customers.number_of_orders",
|
|
tags=None,
|
|
constraint="NULL",
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
],
|
|
tableConstraints=None,
|
|
tablePartition=None,
|
|
owner=None,
|
|
databaseSchema=EntityReference(
|
|
id="9db326f8-c23c-49c5-bc75-865cb8e87981",
|
|
type="databaseSchema",
|
|
name="dbt_jaffle",
|
|
fullyQualifiedName="aws_redshift1.dev.dbt_jaffle",
|
|
description=None,
|
|
displayName=None,
|
|
deleted=False,
|
|
href=Href(
|
|
__root__=AnyUrl(
|
|
"http://localhost:8585/api/v1/databaseSchemas/9db326f8-c23c-49c5-bc75-865cb8e87981",
|
|
scheme="http",
|
|
host="localhost",
|
|
host_type="int_domain",
|
|
port="8585",
|
|
path="/api/v1/databaseSchemas/9db326f8-c23c-49c5-bc75-865cb8e87981",
|
|
)
|
|
),
|
|
),
|
|
database=EntityReference(
|
|
id="f74772d0-2827-442a-8aa4-3dfd136f0c53",
|
|
type="database",
|
|
name="dev",
|
|
fullyQualifiedName="aws_redshift1.dev",
|
|
description=None,
|
|
displayName=None,
|
|
deleted=False,
|
|
href=Href(
|
|
__root__=AnyUrl(
|
|
"http://localhost:8585/api/v1/databases/f74772d0-2827-442a-8aa4-3dfd136f0c53",
|
|
scheme="http",
|
|
host="localhost",
|
|
host_type="int_domain",
|
|
port="8585",
|
|
path="/api/v1/databases/f74772d0-2827-442a-8aa4-3dfd136f0c53",
|
|
)
|
|
),
|
|
),
|
|
service=EntityReference(
|
|
id="31964ed7-8b76-468f-8f1d-d3839792a3b0",
|
|
type="databaseService",
|
|
name="aws_redshift1",
|
|
fullyQualifiedName="aws_redshift1",
|
|
description=None,
|
|
displayName=None,
|
|
deleted=False,
|
|
href=Href(
|
|
__root__=AnyUrl(
|
|
"http://localhost:8585/api/v1/services/databaseServices/31964ed7-8b76-468f-8f1d-d3839792a3b0",
|
|
scheme="http",
|
|
host="localhost",
|
|
host_type="int_domain",
|
|
port="8585",
|
|
path="/api/v1/services/databaseServices/31964ed7-8b76-468f-8f1d-d3839792a3b0",
|
|
)
|
|
),
|
|
),
|
|
serviceType="Redshift",
|
|
location=None,
|
|
viewDefinition=None,
|
|
tags=None,
|
|
usageSummary=None,
|
|
followers=None,
|
|
joins=None,
|
|
sampleData=None,
|
|
tableProfilerConfig=None,
|
|
profile=None,
|
|
dataModel=None,
|
|
changeDescription=None,
|
|
deleted=False,
|
|
extension=None,
|
|
)
|
|
|
|
UPDATED_TABLE_ENTITY = [
|
|
Column(
|
|
name=ColumnName(__root__="customer_id"),
|
|
displayName=None,
|
|
dataType="INT",
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="int",
|
|
description=None,
|
|
fullyQualifiedName="test-service-table-patch.test-db.test-schema.customers.customer_id",
|
|
tags=[],
|
|
constraint=None,
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="first_name"),
|
|
displayName=None,
|
|
dataType="VARCHAR",
|
|
arrayDataType=None,
|
|
dataLength=20,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="varchar",
|
|
description=None,
|
|
fullyQualifiedName="test-service-table-patch.test-db.test-schema.customers.first_name",
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
|
description=(
|
|
(
|
|
"PII which if lost, compromised, or disclosed without authorization, could result in "
|
|
"substantial harm, embarrassment, inconvenience, or unfairness to an individual."
|
|
)
|
|
),
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="last_name"),
|
|
displayName=None,
|
|
dataType="VARCHAR",
|
|
arrayDataType=None,
|
|
dataLength=20,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="varchar",
|
|
description=None,
|
|
fullyQualifiedName="test-service-table-patch.test-db.test-schema.customers.last_name",
|
|
tags=[],
|
|
constraint=None,
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="first_order"),
|
|
displayName=None,
|
|
dataType="DATE",
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="date",
|
|
description=None,
|
|
fullyQualifiedName="test-service-table-patch.test-db.test-schema.customers.first_order",
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN=TagFQN(__root__="PII.NonSensitive"),
|
|
description=(
|
|
"PII which is easily accessible from public sources and can include zip code, "
|
|
"race, gender, and date of birth."
|
|
),
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="customer_email"),
|
|
displayName=None,
|
|
dataType="VARCHAR",
|
|
arrayDataType=None,
|
|
dataLength=20,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="date",
|
|
description=None,
|
|
fullyQualifiedName="test-service-table-patch.test-db.test-schema.customers.customer_email",
|
|
tags=[
|
|
TagLabel(
|
|
tagFQN=TagFQN(__root__="PII.Sensitive"),
|
|
description=(
|
|
(
|
|
"PII which if lost, compromised, or disclosed without authorization, could result in"
|
|
" substantial harm, embarrassment, inconvenience, or unfairness to an individual."
|
|
)
|
|
),
|
|
source="Classification",
|
|
labelType="Automated",
|
|
state="Suggested",
|
|
href=None,
|
|
)
|
|
],
|
|
constraint=None,
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
Column(
|
|
name=ColumnName(__root__="number_of_orders"),
|
|
displayName=None,
|
|
dataType="BIGINT",
|
|
arrayDataType=None,
|
|
dataLength=None,
|
|
precision=None,
|
|
scale=None,
|
|
dataTypeDisplay="bigint",
|
|
description=None,
|
|
fullyQualifiedName="test-service-table-patch.test-db.test-schema.customers.number_of_orders",
|
|
tags=[],
|
|
constraint=None,
|
|
ordinalPosition=None,
|
|
jsonSchema=None,
|
|
children=None,
|
|
customMetrics=None,
|
|
profile=None,
|
|
),
|
|
]
|
|
|
|
|
|
class PiiProcessorTest(TestCase):
|
|
"""
|
|
Run this integration test with different type of column name
|
|
to attach PII Tags
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
methodName,
|
|
) -> None:
|
|
super().__init__(methodName)
|
|
server_config = OpenMetadataConnection(
|
|
hostPort="http://localhost:8585/api",
|
|
authProvider="openmetadata",
|
|
securityConfig=OpenMetadataJWTClientConfig(
|
|
jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJh"
|
|
"bGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vc"
|
|
"mciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7Hgz"
|
|
"GBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUx"
|
|
"huv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakL"
|
|
"Lzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM"
|
|
"5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
),
|
|
)
|
|
self.metadata = OpenMetadata(server_config)
|
|
self.nerscanner_processor = NERScanner(self.metadata)
|
|
|
|
def test_nerscanner_process(self):
|
|
"""
|
|
test function for ner Scanner
|
|
"""
|
|
service = CreateDatabaseServiceRequest(
|
|
name="test-service-table-patch",
|
|
serviceType=DatabaseServiceType.Mysql,
|
|
connection=DatabaseConnection(
|
|
config=MysqlConnection(
|
|
username="username",
|
|
password="password",
|
|
hostPort="http://localhost:1234",
|
|
)
|
|
),
|
|
)
|
|
service_entity = self.metadata.create_or_update(data=service)
|
|
|
|
create_db = CreateDatabaseRequest(
|
|
name="test-db",
|
|
service=service_entity.fullyQualifiedName,
|
|
)
|
|
|
|
create_db_entity = self.metadata.create_or_update(data=create_db)
|
|
|
|
create_schema = CreateDatabaseSchemaRequest(
|
|
name="test-schema",
|
|
database=create_db_entity.fullyQualifiedName,
|
|
)
|
|
|
|
create_schema_entity = self.metadata.create_or_update(data=create_schema)
|
|
|
|
created_table = CreateTableRequest(
|
|
name="customers",
|
|
columns=[
|
|
Column(name="customer_id", dataType=DataType.INT),
|
|
Column(name="first_name", dataType=DataType.VARCHAR, dataLength=20),
|
|
Column(name="last_name", dataType=DataType.VARCHAR, dataLength=20),
|
|
Column(name="first_order", dataType=DataType.DATE),
|
|
Column(name="customer_email", dataType=DataType.VARCHAR, dataLength=20),
|
|
Column(name="number_of_orders", dataType=DataType.BIGINT),
|
|
],
|
|
databaseSchema=create_schema_entity.fullyQualifiedName,
|
|
)
|
|
table_entity = self.metadata.create_or_update(data=created_table)
|
|
TABLE_ENTITY.id = table_entity.id
|
|
|
|
self.nerscanner_processor.process(
|
|
table_data=table_data, table_entity=TABLE_ENTITY, client=self.metadata
|
|
)
|
|
updated_table_entity = self.metadata.get_by_id(
|
|
entity=Table, entity_id=table_entity.id, fields=["tags"]
|
|
)
|
|
for _, (expected, original) in enumerate(
|
|
zip(UPDATED_TABLE_ENTITY, updated_table_entity.columns)
|
|
):
|
|
self.assertEqual(expected.tags, original.tags)
|
|
|
|
self.metadata.delete(
|
|
entity=DatabaseService,
|
|
entity_id=service_entity.id,
|
|
recursive=True,
|
|
hard_delete=True,
|
|
)
|