Fixes GEN-1260: Add Validators while creating table to escape special characters (#18456)

This commit is contained in:
Ayush Shah 2024-11-18 15:02:57 +05:30 committed by GitHub
parent 231025df67
commit 6f1df37ba1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 1233 additions and 62 deletions

View File

@ -1,5 +1,732 @@
{
"tables": [
{
"name": "dim_::>address",
"description": "This dimension table contains the billing and shipping addresses of customers. You can join this table with the sales table to generate lists of the billing and shipping addresses. Customers can enter their addresses more than once, so the same address can appear in more than one row in this table. This table contains one row per customer address.",
"version": 0.1,
"updatedAt": 1638354087391,
"updatedBy": "anonymous",
"href": "http://localhost:8585/api/v1/tables/3cda8ecb-f4c6-4ed4-8506-abe965b54b86",
"tableType": "Regular",
"schemaDefinition": "CREATE TABLE dim_address(address_id NUMERIC PRIMARY KEY, shop_id NUMERIC)",
"retentionPeriod": "10D",
"columns": [
{
"name": "address_::>id",
"dataType": "NUMERIC",
"dataTypeDisplay": "numeric",
"description": "Unique identifier for the address.",
"tags": [],
"ordinalPosition": 1
},
{
"name": "shop_id",
"dataType": "NUMERIC",
"dataTypeDisplay": "numeric",
"description": "The ID of the store. This column is a foreign key reference to the shop_id column in the dim_shop table.",
"tags": [],
"ordinalPosition": 2
},
{
"name": "first_name",
"dataType": "VARCHAR",
"dataLength": 100,
"dataTypeDisplay": "varchar",
"description": "First name of the customer.",
"tags": [],
"ordinalPosition": 3
},
{
"name": "last_name",
"dataType": "VARCHAR",
"dataLength": 100,
"dataTypeDisplay": "varchar",
"description": "Last name of the customer.",
"tags": [],
"ordinalPosition": 4
},
{
"name": "address1",
"dataType": "VARCHAR",
"dataLength": 500,
"dataTypeDisplay": "varchar",
"description": "The first address line. For example, 150 Elgin St.",
"tags": [],
"ordinalPosition": 5
},
{
"name": "address2",
"dataType": "VARCHAR",
"dataLength": 500,
"dataTypeDisplay": "varchar",
"description": "The second address line. For example, Suite 800.",
"tags": [],
"ordinalPosition": 6
},
{
"name": "company",
"dataType": "VARCHAR",
"dataLength": 100,
"dataTypeDisplay": "varchar",
"description": "The name of the customer's business, if one exists.",
"tags": [],
"ordinalPosition": 7
},
{
"name": "city",
"dataType": "VARCHAR",
"dataLength": 100,
"dataTypeDisplay": "varchar",
"description": "The name of the city. For example, Palo Alto.",
"tags": [],
"ordinalPosition": 8
},
{
"name": "region",
"dataType": "VARCHAR",
"dataLength": 512,
"dataTypeDisplay": "varchar",
"description": "The name of the region, such as a province or state, where the customer is located. For example, Ontario or New York. This column is the same as CustomerAddress.province in the Admin API.",
"tags": [],
"ordinalPosition": 9
},
{
"name": "zip",
"dataType": "VARCHAR",
"dataLength": 10,
"dataTypeDisplay": "varchar",
"description": "The ZIP or postal code. For example, 90210.",
"tags": [],
"ordinalPosition": 10,
"customMetrics": [
{
"name": "CountOfLAZipCode",
"columnName": "zip",
"expression": "SELECT COUNT(zip) FROM dim_address WHERE zip LIKE '900%'"
},
{
"name": "CountOfOrangeCountyZipCode",
"columnName": "zip",
"expression": "SELECT COUNT(zip) FROM dim_address WHERE zip LIKE '92%'"
}
]
},
{
"name": "country",
"dataType": "VARCHAR",
"dataLength": 50,
"dataTypeDisplay": "varchar",
"description": "The full name of the country. For example, Canada.",
"tags": [],
"ordinalPosition": 11
},
{
"name": "phone",
"dataType": "VARCHAR",
"dataLength": 15,
"dataTypeDisplay": "varchar",
"description": "The phone number of the customer.",
"tags": [],
"ordinalPosition": 12,
"customMetrics": [
{
"name": "CountOfNonUsPhoneNumbers",
"columnName": "phone",
"expression": "SELECT COUNT(phone) FROM dim_address WHERE phone NOT LIKE '1%'"
}
]
}
],
"tableConstraints": [
{
"constraintType": "PRIMARY_KEY",
"columns": [
"address_::>id",
"shop_id"
]
}
],
"database": {
"id": "50da1ff8-4e1d-4967-8931-45edbf4dd908",
"type": "database",
"name": "sample_data.ecommerce_db",
"description": "This **mock** database contains tables related to shopify sales and orders with related dimension tables.",
"href": "http://localhost:8585/api/v1/databases/50da1ff8-4e1d-4967-8931-45edbf4dd908"
},
"tags": [],
"usageSummary": {
"dailyStats": {
"count": 73,
"percentileRank": 45
},
"weeklyStats": {
"count": 73,
"percentileRank": 45
},
"monthlyStats": {
"count": 67,
"percentileRank": 45
},
"date": "2021-12-01"
},
"followers": [],
"joins": {
"startDate": "2021-11-01",
"dayCount": 30,
"columnJoins": [
{
"columnName": "shop_id",
"joinedWith": [
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.\"dim.shop\".shop_id",
"joinCount": 35
}
]
},
{
"columnName": "address_::>id",
"joinedWith": [
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.api_client_id",
"joinCount": 49
},
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.billing_address_id",
"joinCount": 49
},
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.customer_id",
"joinCount": 49
},
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.location_id",
"joinCount": 49
},
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.order_id",
"joinCount": 49
},
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.product_id",
"joinCount": 49
},
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.product_variant_id",
"joinCount": 49
},
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.shipping_address_id",
"joinCount": 49
},
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.shop_id",
"joinCount": 49
},
{
"fullyQualifiedName": "sample_data.ecommerce_db.shopify.fact_sale.user_id",
"joinCount": 49
}
]
}
]
},
"sampleData": {
"columns": [
"address_::>id",
"shop_id",
"first_name",
"last_name",
"address1",
"address2",
"company",
"city",
"region",
"zip",
"country",
"phone"
],
"rows": [
[
"bc35100e-2da5-48bb-bfc8-667dafe66532",
"70424951-bc97-4b20-9ce7-be37c4619361",
"Zachary",
"Brett",
"9054 Maria Circle Apt. 296",
"48348 Victoria Valleys Suite 144",
"Robinson Inc",
"Stephanieport",
"048 Moore Turnpike Apt. 061",
"30839",
"Latvia",
"(381)575-6692"
],
[
"d9a5b1a0-bf92-4b71-884d-9818308d040d",
"ffdc81e4-b0dc-47b2-8240-8540ba6c3029",
"Melissa",
"Amy",
"6212 William Plaza Suite 362",
"741 Eric Bypass",
"Todd, Henderson and Jenkins",
"Petersonstad",
"363 Gary Mills",
"91794",
"Lesotho",
"828-855-9883"
],
[
"facf92d7-05ea-43d2-ba2a-067d63dee60c",
"a8d30187-1409-4606-9259-322a4f6caf74",
"Amber",
"Albert",
"3170 Warren Orchard Apt. 834",
"3204 Brewer Shoal Suite 324",
"Davila-Snyder",
"Nicoleland",
"023 Paul Course",
"14086",
"Sweden",
"438-959-1151"
],
[
"bab9a506-e23d-4c53-9402-d070e7704376",
"e02e1fac-b650-4db8-8c9d-5fa5edf5d863",
"Heidi",
"Kelly",
"30942 Gonzalez Stravenue",
"3158 Watts Green",
"Moore PLC",
"West Erica",
"6294 Elliott Ville",
"07418",
"Saint Martin",
"(830)112-9566x8681"
],
[
"67ff12ae-5d40-4795-9a82-3afa62a4aea1",
"bf45dc22-8d23-49d9-9d87-7b4bd2dd1762",
"Adam",
"Anna",
"986 Walker Unions",
"5798 Ross Spur Apt. 510",
"Schultz, Allen and Price",
"West Brandyton",
"07379 Alejandra Manors",
"09192",
"Cambodia",
"(859)222-9872x9188"
],
[
"97831db3-23df-424e-b2e3-19f196874002",
"cfc3c305-748b-4992-9fef-ddd0aa7ce927",
"Tracey",
"Kaitlin",
"1810 Fletcher Islands",
"6988 Steven Vista Apt. 738",
"Patel-Evans",
"Powellbury",
"8856 Lambert Forest Apt. 010",
"49826",
"Turks and Caicos Islands",
"195.384.1264x84605"
],
[
"a405990b-732f-431e-b3ad-0ce05a484671",
"f53ff51f-3365-4029-ae67-a679e1dca626",
"Carmen",
"Jessica",
"58361 Audrey Parks Apt. 128",
"864 Phillips Squares",
"Perez Inc",
"Laurieshire",
"407 Nicole Greens",
"06175",
"Kenya",
"(565)853-4978x18062"
],
[
"d3be5a1e-10f0-46e6-8033-d15766e73047",
"ffeea1b4-eb28-452a-8b06-2408e36b1a94",
"Melinda",
"Paul",
"09590 Gonzales Camp Apt. 092",
"146 Carlson Manor Suite 961",
"Mills Ltd",
"South Wendy",
"6273 Victor Springs Apt. 655",
"68154",
"United Arab Emirates",
"(209)678-7748x53000"
],
[
"52636d10-1505-4f0a-894a-52a194cdba26",
"8c021bf4-f33a-4d41-b55f-78d801a45d5e",
"Jacob",
"Jamie",
"0952 Colon Greens",
"00913 Natalie Tunnel Apt. 769",
"Sullivan Group",
"Longbury",
"03714 April Landing",
"74874",
"Turkey",
"(038)859-4310x9693"
],
[
"b2a1279b-392f-404f-88e0-f755d2bd3c14",
"2351209a-94b2-4b20-a517-1eef888c0d5b",
"Allen",
"Dean",
"8492 Smith Plains Apt. 986",
"420 Hodges Inlet Suite 088",
"Thornton-Mcknight",
"Gregoryborough",
"56904 Tony Branch Suite 945",
"88544",
"Mauritania",
"001-198-032-8011x00683"
],
[
"3a7cc826-4e7d-46de-a13d-895d5319a881",
"1a03bf40-3f40-460e-b170-2fadea8bdbee",
"Jill",
"Diana",
"092 Jose Unions",
"36896 Serrano Plaza",
"Wilson and Sons",
"North Justinchester",
"750 Contreras Bridge Apt. 456",
"77859",
"Kazakhstan",
"311.331.9899x4838"
],
[
"d4891b70-a002-41a9-a072-a1d3ef99c4d9",
"e49ad3aa-2c30-4148-81f8-4e2980467d1c",
"Jonathan",
"Lauren",
"4416 Christina Ville",
"44992 Katie Inlet Suite 965",
"Bennett, Hicks and Davidson",
"Lake Kendra",
"67281 Lee Drive Suite 987",
"27227",
"Jersey",
"950.930.9189x591"
],
[
"fbc3eeda-75b3-41f6-91a3-d671e3cc17d6",
"c3c73e91-020d-4fee-a1a9-e9781d684d9a",
"Gary",
"Jonathan",
"71309 Theresa Trail",
"29432 Brenda Overpass",
"Mccarthy-Huang",
"Lake Amanda",
"000 William Ranch",
"93218",
"Kazakhstan",
"001-109-449-4210x33965"
],
[
"684360f0-48c0-4bf7-9476-6231b7861c0b",
"18d728f1-8ec6-4e94-9725-fcd2ed719723",
"Karen",
"Paul",
"2867 Hunter Center Apt. 091",
"46974 Peter Drives Suite 073",
"Stephens, Wall and Johnson",
"Mcdowellfurt",
"33302 Mcintosh Orchard Apt. 884",
"17017",
"Gambia",
"484-039-6675"
],
[
"ec768cba-fc73-484a-bfc2-792e1bf29342",
"9c85fb32-1109-4409-a87d-75112fc3708f",
"Edgar",
"Hailey",
"70196 Moore Canyon",
"885 Jimenez Parkways",
"Gomez Group",
"Marktown",
"44624 Lawrence Springs Apt. 502",
"71815",
"Hungary",
"(777)526-6200"
],
[
"128eae16-fc8d-4d0e-9f07-78b6e4825709",
"530c610e-ab24-4f28-8d7f-684bb05817c8",
"Don",
"Edgar",
"040 Jessica Mews Apt. 149",
"3454 Lopez Neck",
"Clark, Simon and Griffith",
"Rickborough",
"549 Jeffrey Stream",
"71662",
"Korea",
"4911281340"
],
[
"341c94b3-cac4-46f0-bcba-33ccb52eacec",
"9f80457c-dd0e-4c4d-ab1f-cf76f734ad1c",
"Kevin",
"Kimberly",
"29504 Moore Grove",
"0079 Williams Pine Suite 274",
"Gregory-Mcgrath",
"Cantufort",
"5849 Paige River",
"72224",
"Zambia",
"(112)074-5447"
],
[
"d05acf85-8f05-40e8-bd8b-00a751ed4b16",
"a8214997-3076-4732-be67-66e0169f894d",
"Candace",
"Donna",
"1110 Powers Neck Suite 716",
"50101 Pamela Orchard Apt. 576",
"Brown, Hunter and Adkins",
"Adrianaville",
"4326 Orr Extensions",
"94064",
"Haiti",
"001-843-960-7631x732"
],
[
"44886c82-0e01-4b65-bf46-235d4d1e4e17",
"c0055521-b8d3-4ad8-86c3-cf310b5719c9",
"Holly",
"Tony",
"562 Shaw Burgs",
"568 Reed Vista",
"Ramirez Group",
"West Aaronfurt",
"6008 Brianna Lock Suite 088",
"46723",
"Uganda",
"343.960.6582"
],
[
"3ba1f5ff-fcd9-4315-8e72-14b716496a3c",
"3dc80460-e52c-4e94-a3dc-e9234f73e016",
"Melissa",
"Jessica",
"4076 Lisa Unions",
"46781 Brittany Route",
"Williams Inc",
"Harmonmouth",
"4349 Hughes Grove Suite 834",
"94032",
"South Georgia and the South Sandwich Islands",
"195-206-6214x45451"
],
[
"f757641c-963a-4696-9314-7f19f2df79fa",
"8d3afcc6-3662-4d23-899a-b9c37108f522",
"Larry",
"Brian",
"1698 Brittany Ridges",
"00442 Terry Via",
"Gallagher Group",
"Smithton",
"4835 Long Hills",
"96036",
"Japan",
"(592)844-2557x28538"
],
[
"a2c68229-75c5-43a5-b18b-8a8b504d4f9d",
"d78f0b90-d3a9-4568-b7de-8bb45423b25b",
"Gary",
"Luis",
"909 Carolyn Highway Apt. 717",
"549 Steve Walks Suite 058",
"Serrano, Harrison and Beard",
"South Kimberly",
"18324 Briana Point",
"36285",
"Macao",
"392.711.2369x14285"
],
[
"2a89cec7-1cf5-4c9c-ad79-745a1984e36f",
"4035c32c-7a03-409f-a8f4-bb3ff3d7c91d",
"Brittany",
"Brenda",
"2691 Brandon Plaza",
"13209 Alexis Cove Suite 182",
"Durham LLC",
"New Christyton",
"080 Peters Track",
"97880",
"Gabon",
"+1-604-197-7676x09064"
],
[
"a391bf33-6665-4caa-a2d7-de3132bf60d3",
"88156561-0c36-4053-a1d7-b58fac121003",
"Debbie",
"Sharon",
"600 Keller Trafficway",
"18809 Peter Village",
"Vazquez, Allen and Washington",
"Singhmouth",
"5628 Terrence Locks Suite 798",
"20287",
"Anguilla",
"001-151-880-6914x699"
],
[
"19bcef03-483c-4225-bb61-aafe48a2f981",
"8a35b2d9-8f65-484f-b735-8eb42c2bbfec",
"Mark",
"Ashley",
"3124 Kenneth Mountains Suite 321",
"111 Carter Drives Apt. 942",
"Hill PLC",
"Port Shannonville",
"6035 Calderon Ramp",
"25829",
"Angola",
"441.553.9413x20451"
]
]
},
"tableProfile": {
"timestamp": 1634366539,
"columnCount": 12,
"rowCount": 725,
"customMetrics": [
{
"name": "CountOfUSAddress",
"value": 15467
},
{
"name": "CountOfFRAddress",
"value": 1467
}
],
"columnProfile": [
{
"name": "address_::>id",
"uniqueCount": 9,
"uniqueProportion": 0.52805369,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "shop_id",
"uniqueCount": 29,
"uniqueProportion": 0.2290940013,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "first_name",
"uniqueCount": 13,
"uniqueProportion": 0.1327792,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "last_name",
"uniqueCount": 23,
"uniqueProportion": 0.177126,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "address1",
"uniqueCount": 53,
"uniqueProportion": 0.0396391,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "address2",
"uniqueCount": 76,
"uniqueProportion": 0.0690369,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "company",
"uniqueCount": 23,
"uniqueProportion": 0.1210963,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "city",
"uniqueCount": 6,
"uniqueProportion": 0.024072,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "region",
"uniqueCount": 3,
"uniqueProportion": 0.1478114,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "zip",
"uniqueCount": 11,
"uniqueProportion": 0.1383472,
"distinctCount": 0,
"distinctProportion": 0,
"customMetrics": [
{
"name": "CountOfLAZipCode",
"value": 3456
},
{
"name": "CountOfOrangeCountyZipCode",
"value": 2345
}
]
},
{
"name": "country",
"uniqueCount": 13,
"uniqueProportion": 0.1601013,
"distinctCount": 0,
"distinctProportion": 0
},
{
"name": "phone",
"uniqueCount": 8,
"uniqueProportion": 0.1297079,
"distinctCount": 0,
"distinctProportion": 0
}
]
},
"databaseSchema": {
"id": "d7be1e2c-b3dc-11ec-b909-0242ac120002",
"type": "databaseSchema",
"name": "sample_data.ecommerce_db.shopify",
"description": "This **mock** Schema contains tables related to shopify sales and orders with related dimension tables.",
"href": "http://localhost:8585/api/v1/databaseSchemas/d7be1e2c-b3dc-11ec-b909-0242ac120002"
},
"customMetrics": [
{
"name": "CountOfUSAddress",
"expression": "SELECT COUNT(address_id) FROM dim_address WHERE country = 'US'"
},
{
"name": "CountOfFRAddress",
"expression": "SELECT COUNT(address_id) FROM dim_address WHERE country = 'FR'"
}
]
},
{
"name": "dim_address",
"description": "This dimension table contains the billing and shipping addresses of customers. You can join this table with the sales table to generate lists of the billing and shipping addresses. Customers can enter their addresses more than once, so the same address can appear in more than one row in this table. This table contains one row per customer address.",

View File

@ -158,6 +158,51 @@
"ordinalPosition": 5
}
]
},
{
"name": "\"orders_view\" || \"operations_view\"",
"displayName": "Orders View and Operations View",
"description": "Orders View and Operations View from Sample Data",
"dataModelType": "LookMlView",
"serviceType": "Looker",
"sql": "view: orders {\n sql_table_name: orders ;;\n\n dimension: \"1. Phase I\" {\n type: string\n sql: ${TABLE}.status ;;\n }\n\n dimension: \"4. Authorized\" {\n type: int\n sql: ${TABLE}.amount ;;\n }\n}",
"columns": [
{
"name": "0. Pre-clinical",
"dataType": "NUMERIC",
"dataTypeDisplay": "numeric",
"description": "Vaccine Candidates in phase: 'Pre-clinical'",
"ordinalPosition": 1
},
{
"name": "2. Phase II or Combined I/II",
"dataType": "NUMERIC",
"dataTypeDisplay": "numeric",
"description": "Vaccine Candidates in phase: 'Phase II or Combined I/II'",
"ordinalPosition": 2
},
{
"name": "1. Phase I",
"dataType": "NUMERIC",
"dataTypeDisplay": "numeric",
"description": "Vaccine Candidates in phase: 'Phase I'",
"ordinalPosition": 3
},
{
"name": "3. Phase III",
"dataType": "NUMERIC",
"dataTypeDisplay": "numeric",
"description": "Vaccine Candidates in phase: 'Phase III'",
"ordinalPosition": 4
},
{
"name": "4. Authorized",
"dataType": "NUMERIC",
"dataTypeDisplay": "numeric",
"description": "Vaccine Candidates in phase: 'Authorize'",
"ordinalPosition": 5
}
]
}
]

View File

@ -87,6 +87,87 @@
]
}
},
{
"name": "table_column_count_equals",
"description": "test the number of column in table",
"entityLink": "<#E::table::sample_data.ecommerce_db.shopify.dim___reserved__colon____reserved__arrow__address>",
"testDefinitionName": "tableColumnCountToEqual",
"parameterValues": [
{
"name": "columnCount",
"value": "10"
}
],
"resolutions": {
"sequenceOne": [
{
"testCaseResolutionStatusType": "Ack",
"severity": "Severity1"
},
{
"testCaseResolutionStatusType": "Assigned",
"severity": "Severity1",
"assignee": "aaron_johnson0"
},
{
"testCaseResolutionStatusType": "Resolved",
"severity": "Severity1",
"resolver": "aaron_johnson0"
}
],
"sequenceTwo": [
{
"testCaseResolutionStatusType": "New",
"severity": "Severity1"
},
{
"testCaseResolutionStatusType": "Ack",
"severity": "Severity1"
},
{
"testCaseResolutionStatusType": "Assigned",
"severity": "Severity1",
"assignee": "christopher_campbell7"
},
{
"testCaseResolutionStatusType": "Resolved",
"severity": "Severity1",
"resolver": "christopher_campbell7"
}
],
"sequenceThree": [
{
"testCaseResolutionStatusType": "New",
"severity": "Severity3"
},
{
"testCaseResolutionStatusType": "Ack",
"severity": "Severity3"
},
{
"testCaseResolutionStatusType": "Assigned",
"severity": "Severity3",
"assignee": "christopher_campbell7"
},
{
"testCaseResolutionStatusType": "Assigned",
"severity": "Severity3",
"assignee": "aaron_johnson0"
},
{
"testCaseResolutionStatusType": "Resolved",
"severity": "Severity3",
"resolver": "aaron_johnson0"
}
],
"sequenceFour": [
{
"testCaseResolutionStatusType": "New",
"severity": "Severity5"
}
]
}
},
{
"name": "table_column_count_between",
"description": "test the number of column in table is between x and y",

View File

@ -19,7 +19,9 @@ from abc import ABC, abstractmethod
from typing import IO, Any, Optional
import yaml
from pydantic import BaseModel, ConfigDict
from pydantic import ConfigDict
from metadata.ingestion.models.custom_pydantic import BaseModel
class ConfigModel(BaseModel):

View File

@ -18,13 +18,14 @@ multiple test cases per workflow.
from typing import List, Optional
from pydantic import BaseModel, Field
from pydantic import Field
from metadata.config.common import ConfigModel
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.tests.basic import TestCaseResult
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
from metadata.ingestion.models.custom_pydantic import BaseModel
class TestCaseDefinition(ConfigModel):

View File

@ -0,0 +1,80 @@
# Copyright 2022 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Validation logic for Custom Pydantic BaseModel
"""
import logging
logger = logging.getLogger("metadata")
RESTRICTED_KEYWORDS = ["::", ">"]
RESERVED_COLON_KEYWORD = "__reserved__colon__"
RESERVED_ARROW_KEYWORD = "__reserved__arrow__"
CREATE_ADJACENT_MODELS = {"ProfilerResponse", "SampleData"}
NAME_FIELDS = {"EntityName", "str", "ColumnName", "TableData"}
FETCH_MODELS = {"Table", "CustomColumnName"}
FIELD_NAMES = {"name", "columns", "root"}
def revert_separators(value):
return value.replace(RESERVED_COLON_KEYWORD, "::").replace(
RESERVED_ARROW_KEYWORD, ">"
)
def replace_separators(value):
return value.replace("::", RESERVED_COLON_KEYWORD).replace(
">", RESERVED_ARROW_KEYWORD
)
def validate_name_and_transform(values, modification_method, field_name: str = None):
"""
Validate the name and transform it if needed.
"""
if isinstance(values, str) and field_name in FIELD_NAMES:
values = modification_method(values)
elif (
hasattr(values, "root")
and isinstance(values.root, str)
and field_name in FIELD_NAMES
):
values.root = modification_method(values.root)
elif hasattr(values, "model_fields"):
for key in type(values).model_fields.keys():
if getattr(values, key):
if getattr(values, key).__class__.__name__ in NAME_FIELDS:
setattr(
values,
key,
validate_name_and_transform(
getattr(values, key),
modification_method=modification_method,
field_name=key,
),
)
elif isinstance(getattr(values, key), list):
setattr(
values,
key,
[
validate_name_and_transform(
item,
modification_method=modification_method,
field_name=key,
)
for item in getattr(values, key)
],
)
return values

View File

@ -20,11 +20,19 @@ import logging
from typing import Any, Dict, Literal, Optional, Union
from pydantic import BaseModel as PydanticBaseModel
from pydantic import PlainSerializer
from pydantic import PlainSerializer, model_validator
from pydantic.main import IncEx
from pydantic.types import SecretStr
from typing_extensions import Annotated
from metadata.ingestion.models.custom_basemodel_validation import (
CREATE_ADJACENT_MODELS,
FETCH_MODELS,
replace_separators,
revert_separators,
validate_name_and_transform,
)
logger = logging.getLogger("metadata")
SECRET = "secret:"
@ -37,6 +45,30 @@ class BaseModel(PydanticBaseModel):
Specified as `--base-class BASE_CLASS` in the generator.
"""
@model_validator(mode="after")
@classmethod
def parse_name(cls, values): # pylint: disable=inconsistent-return-statements
"""
Primary entry point to process values based on their class.
"""
if not values:
return
try:
if cls.__name__ in CREATE_ADJACENT_MODELS or cls.__name__.startswith(
"Create"
):
values = validate_name_and_transform(values, replace_separators)
elif cls.__name__ in FETCH_MODELS:
values = validate_name_and_transform(values, revert_separators)
except Exception as exc:
logger.warning("Exception while parsing Basemodel: %s", exc)
raise exc
return values
def model_dump_json( # pylint: disable=too-many-arguments
self,
*,

View File

@ -136,6 +136,7 @@ from metadata.parsers.schema_parsers import (
InvalidSchemaTypeException,
schema_parser_config_registry,
)
from metadata.profiler.api.models import ProfilerResponse, SampleData
from metadata.utils import entity_link, fqn
from metadata.utils.constants import UTF_8
from metadata.utils.fqn import FQN_SEPARATOR
@ -898,10 +899,24 @@ class SampleDataSource(
self.metadata.ingest_table_sample_data(
table_entity,
TableData(
rows=table["sampleData"]["rows"],
columns=table["sampleData"]["columns"],
),
ProfilerResponse(
table=table_entity,
profile=CreateTableProfileRequest(
tableProfile=TableProfile(
timestamp=Timestamp(
int(datetime.now().timestamp() * 1000)
),
columnCount=1.0,
rowCount=3.0,
)
),
sample_data=SampleData(
data=TableData(
rows=table["sampleData"]["rows"],
columns=table["sampleData"]["columns"],
)
),
).sample_data.data,
)
if table.get("customMetrics"):
@ -1331,12 +1346,14 @@ class SampleDataSource(
description=model["description"],
algorithm=model["algorithm"],
dashboard=dashboard.fullyQualifiedName.root,
mlStore=MlStore(
storage=model["mlStore"]["storage"],
imageRepository=model["mlStore"]["imageRepository"],
)
if model.get("mlStore")
else None,
mlStore=(
MlStore(
storage=model["mlStore"]["storage"],
imageRepository=model["mlStore"]["imageRepository"],
)
if model.get("mlStore")
else None
),
server=model.get("server"),
target=model.get("target"),
mlFeatures=self.get_ml_features(model),
@ -1375,9 +1392,11 @@ class SampleDataSource(
name=container["name"],
displayName=container["displayName"],
description=container["description"],
parent=EntityReference(id=parent_container.id, type="container")
if parent_container_fqn
else None,
parent=(
EntityReference(id=parent_container.id, type="container")
if parent_container_fqn
else None
),
prefix=container["prefix"],
dataModel=container.get("dataModel"),
numberOfObjects=container.get("numberOfObjects"),
@ -1415,11 +1434,13 @@ class SampleDataSource(
yield Either(
right=CreateContainerRequest(
name=name,
parent=EntityReference(
id=parent_container.id, type="container"
)
if parent_container
else None,
parent=(
EntityReference(
id=parent_container.id, type="container"
)
if parent_container
else None
),
service=self.storage_service.fullyQualifiedName,
)
)

View File

@ -142,7 +142,8 @@ def ometa_to_sqa_orm(
"extend_existing": True, # Recreates the table ORM object if it already exists. Useful for testing
"quote": check_snowflake_case_sensitive(
table.serviceType, table.name.root
),
)
or None,
},
**cols,
"metadata": sqa_metadata_obj or Base.metadata,

View File

@ -19,19 +19,23 @@ from antlr4.CommonTokenStream import CommonTokenStream
from antlr4.error.ErrorStrategy import BailErrorStrategy
from antlr4.InputStream import InputStream
from antlr4.tree.Tree import ParseTreeWalker
from pydantic import BaseModel
from requests.compat import unquote_plus
from metadata.antlr.split_listener import EntityLinkSplitListener
from metadata.generated.antlr.EntityLinkLexer import EntityLinkLexer
from metadata.generated.antlr.EntityLinkParser import EntityLinkParser
from metadata.generated.schema.entity.data.table import Table
from metadata.ingestion.models.custom_pydantic import BaseModel
from metadata.utils.constants import ENTITY_REFERENCE_TYPE_MAP
from metadata.utils.dispatch import class_register
T = TypeVar("T", bound=BaseModel)
class CustomColumnName(BaseModel):
root: str
class EntityLinkBuildingException(Exception):
"""
Raise for inconsistencies when building the EntityLink
@ -69,7 +73,9 @@ def get_decoded_column(entity_link: str) -> str:
entity_link: entity link
"""
return unquote_plus(entity_link.split("::")[-1].replace(">", ""))
return CustomColumnName(
root=unquote_plus(entity_link.split("::")[-1].replace(">", ""))
).root
def get_table_fqn(entity_link: str) -> str:

View File

@ -105,6 +105,13 @@ EXPECTED_SCHEMA_REQUEST = CreateSchemaRequest(
)
EXPECTED_TABLES = [
CreateTableRequest(
key="34.shopify.dim_::>address",
title="dim_::>address",
description="This dimension table contains the billing and shipping addresses of customers. You can join this table with the sales table to generate lists of the billing and shipping addresses. Customers can enter their addresses more than once, so the same address can appear in more than one row in this table. This table contains one row per customer address.",
table_type="TABLE",
sql=None,
),
CreateTableRequest(
key="34.shopify.dim_address",
title="dim_address",

View File

@ -34,6 +34,10 @@ from metadata.utils.entity_link import get_decoded_column, get_table_or_column_f
"<#E::table::rds.dev.dbt_jaffle.column_w_space::columns::随机的>",
"随机的",
),
(
"<#E::table::rds.dev.dbt_jaffle.column_w_space::columns::test__reserved__colon____reserved__arrow__test>",
"test::>test",
),
],
)
def test_get_decoded_column(entity_link, expected):
@ -94,6 +98,21 @@ def test_get_decoded_column(entity_link, expected):
"rds.dev.dbt_jaffle>.customers",
id="valid_entity_link10",
),
pytest.param(
'<#E::dashboard::"rds.dev.dbt_jaffle.customers">',
'"rds.dev.dbt_jaffle.customers"',
id="valid_entity_link11",
),
pytest.param(
"<#E::table::rds.dev.:dbt_jaffle.customers::columns::阿>",
"rds.dev.:dbt_jaffle.customers.阿",
id="valid_entity_link12",
),
pytest.param(
"<#E::table::rds.dev.dbt_jaffle.customers::columns::grea:>hdfwsd>",
"rds.dev.dbt_jaffle.customers.grea:>hdfwsd",
id="valid_entity_link13",
),
],
)
def test_valid_get_table_or_column_fqn(entity_link, fqn):
@ -114,16 +133,6 @@ def test_valid_get_table_or_column_fqn(entity_link, fqn):
ParseCancellationException,
id="invalid_entity_link1",
),
pytest.param(
"<#E::table::rds.dev.dbt_jaffle.customers::columns::grea:>hdfwsd>",
ParseCancellationException,
id="invalid_entity_link2",
),
pytest.param(
"<#E::table::rds.dev.:dbt_jaffle.customers::columns::阿>",
ParseCancellationException,
id="invalid_entity_link3",
),
pytest.param(
"<#E::table::rds.dev.dbt_jaffle.customers::columns>",
ValueError,

View File

@ -0,0 +1,143 @@
import uuid
from unittest import TestCase
from metadata.generated.schema.api.data.createTable import CreateTableRequest
from metadata.generated.schema.entity.data.table import (
Column,
ColumnName,
DataType,
Table,
TableConstraint,
)
from metadata.generated.schema.type.basic import (
EntityExtension,
EntityName,
FullyQualifiedEntityName,
Markdown,
)
from metadata.generated.schema.type.entityReference import EntityReference
class CustomPydanticValidationTest(TestCase):
create_request = CreateTableRequest(
name=EntityName("Sales::>Territory"),
displayName="SalesTerritory",
description=Markdown(root="Sales territory lookup table."),
tableType="Regular",
columns=[
Column(
name=ColumnName(root="Sales::Last>Year"),
displayName="SalesLastYear",
dataType="NUMBER",
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay="NUMBER",
description=Markdown(root="Sales total of previous year."),
fullyQualifiedName=None,
tags=None,
constraint="NOT_NULL",
ordinalPosition=7,
jsonSchema=None,
children=None,
profile=None,
customMetrics=None,
),
Column(
name=ColumnName(root="Bonus"),
displayName="Bonus",
dataType="NUMBER",
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay="NUMBER",
description=Markdown(root="Bonus due if quota is met."),
fullyQualifiedName=None,
tags=None,
constraint="NOT_NULL",
ordinalPosition=4,
jsonSchema=None,
children=None,
profile=None,
customMetrics=None,
),
Column(
name=ColumnName(root="ModifiedDate"),
displayName="ModifiedDate",
dataType="DATETIME",
arrayDataType=None,
dataLength=None,
precision=None,
scale=None,
dataTypeDisplay="DATETIME",
description=Markdown(root="Date and time the record was last updated."),
fullyQualifiedName=None,
tags=None,
constraint="NOT_NULL",
ordinalPosition=9,
jsonSchema=None,
children=None,
profile=None,
customMetrics=None,
),
],
dataModel=None,
tableConstraints=[
TableConstraint(constraintType="PRIMARY_KEY", columns=["Sales::Last>Year"])
],
tablePartition=None,
tableProfilerConfig=None,
owners=None,
databaseSchema=FullyQualifiedEntityName(
root='New Gyro 360.New Gyro 360."AdventureWorks2017.HumanResources"'
),
tags=None,
schemaDefinition=None,
retentionPeriod=None,
extension=EntityExtension(
root={
"DataQuality": '<div><p><b>Last evaluation:</b> 07/24/2023<br><b>Interval: </b>30 days <br><b>Next run:</b> 08/23/2023, 10:44:20<br><b>Measurement unit:</b> percent [%]</p><br><table><tbody><tr><th>Metric</th><th>Target</th><th>Latest result</th></tr><tr><td><p class="text-success">Completeness</p></td><td>90%</td><td><div class="bar fabric" style="width: 100%;"><strong>100%</strong></div></td></tr><tr><td><p class="text-success">Integrity</p></td><td>90%</td><td><div class="bar fabric" style="width: 100%;"><strong>100%</strong></div></td></tr><tr><td><p class="text-warning">Timeliness</p></td><td>90%</td><td><div class="bar fabric" style="width: 25%;"><strong>25%</strong></div></td></tr><tr><td><p class="text-warning">Uniqueness</p></td><td>90%</td><td><div class="bar fabric" style="width: 60%;"><strong>60%</strong></div></td></tr><tr><td><p class="text-success">Validity</p></td><td>90%</td><td><div class="bar fabric" style="width: 100%;"><strong>100%</strong></div></td></tr></tbody></table><h3>Overall score of the table is: 77%</h3><hr style="border-width: 5px;"></div>'
}
),
sourceUrl=None,
domain=None,
dataProducts=None,
fileFormat=None,
lifeCycle=None,
sourceHash=None,
)
def test_replace_separator(self):
assert (
self.create_request.name.root
== "Sales__reserved__colon____reserved__arrow__Territory"
)
assert (
self.create_request.columns[0].name.root
== "Sales__reserved__colon__Last__reserved__arrow__Year"
)
assert (
self.create_request.tableConstraints[0].columns[0]
== "Sales__reserved__colon__Last__reserved__arrow__Year"
)
def test_revert_separator(self):
fetch_response_revert_separator = Table(
id=uuid.uuid4(),
name="test__reserved__colon__table",
databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"),
fullyQualifiedName="test-service-table.test-db.test-schema.test",
columns=[Column(name="id", dataType=DataType.BIGINT)],
)
fetch_response_revert_separator_2 = Table(
id=uuid.uuid4(),
name="test__reserved__colon__table__reserved__arrow__",
databaseSchema=EntityReference(id=uuid.uuid4(), type="databaseSchema"),
fullyQualifiedName="test-service-table.test-db.test-schema.test",
columns=[Column(name="id", dataType=DataType.BIGINT)],
)
assert fetch_response_revert_separator.name.root == "test::table"
assert fetch_response_revert_separator_2.name.root == "test::table>"

View File

@ -87,7 +87,7 @@ class TestFqn(TestCase):
def test_invalid(self):
with self.assertRaises(Exception):
fqn.split('a"')
fqn.split('a.."')
def test_build_table(self):
"""

View File

@ -162,12 +162,16 @@ public final class MessageParser {
// Entity links also have support for fallback texts with "|"
// example: <#E::user::user1|[@User One](http://localhost:8585/user/user1)>
// Extract the entity link alone if the string has a fallback text
if (link.contains("|")) {
link = link.substring(0, link.indexOf("|")) + ">";
}
Matcher matcher = ENTITY_LINK_PATTERN.matcher(link);
EntityLink entityLink = null;
Matcher matcher = ENTITY_LINK_PATTERN.matcher(link);
while (matcher.find()) {
if (link.contains("|") && matcher.group(1).contains("user")) {
link = link.substring(0, link.indexOf("|")) + ">";
matcher = ENTITY_LINK_PATTERN.matcher(link);
matcher.find();
}
if (entityLink == null) {
String entityType = matcher.group(1);
String entityFQN = matcher.group(2);

View File

@ -160,6 +160,11 @@ public class FullyQualifiedName {
if (!unquotedName.contains("\"")) {
return unquotedName.contains(".") ? "\"" + name + "\"" : unquotedName;
}
// Allow names with quotes
else if (unquotedName.contains("\"")) {
return unquotedName.replace("\"", "\\\"");
}
throw new IllegalArgumentException(CatalogExceptionMessage.invalidName(name));
}

View File

@ -131,16 +131,32 @@ class EntityUtilTest {
expected.put("fullyQualifiedFieldValue", "special!@#$%^&*()_+[]{};:\\'\",./?");
verifyEntityLinkParser(expected);
// Invalid entity link
expected.clear();
expected.put("entityLink", "<#E::table::special!@#$%^&*()_+[]{}|;:\\'\",./?>");
// EntityLink with `|` character will not be parsed correctly and everything after `|` will be
// ignored
expected.put("entityLink", "<#E::table::special!@#$%^&*()_+[]{}|;\\'\",./?>");
expected.put("entityType", "table");
expected.put("entityFQN", "special!@#$%^&*()_+[]{}|;\\'\",./?");
expected.put("linkType", "ENTITY");
expected.put("fullyQualifiedFieldType", "table");
expected.put("fullyQualifiedFieldValue", "special!@#$%^&*()_+[]{}|;\\'\",./?");
verifyEntityLinkParser(expected);
expected.clear();
expected.put("entityLink", "<#E::table::special!@:#$%^&*()_+[]{}|;\\'\",./?>");
expected.put("entityType", "table");
expected.put("entityFQN", "special!@:#$%^&*()_+[]{}|;\\'\",./?");
expected.put("linkType", "ENTITY");
expected.put("fullyQualifiedFieldType", "table");
expected.put("fullyQualifiedFieldValue", "special!@:#$%^&*()_+[]{}|;\\'\",./?");
verifyEntityLinkParser(expected);
expected.clear();
expected.put("entityLink", "<#E::table::spec::>ial!@:#$%^&*()_+[]{}|;\\'\",./?>");
org.opentest4j.AssertionFailedError exception =
assertThrows(
org.opentest4j.AssertionFailedError.class, () -> verifyEntityLinkParser(expected));
assertEquals(
"expected: <<#E::table::special!@#$%^&*()_+[]{}|;:\\'\",./?>> but was: <<#E::table::special!@#$%^&*()_+[]{}>>",
"expected: <<#E::table::spec::>ial!@:#$%^&*()_+[]{}|;\\'\",./?>> but was: <<#E::table::spec::>>",
exception.getMessage());
expected.clear();

View File

@ -51,20 +51,10 @@ class FullyQualifiedNameTest {
assertEquals("\"a.b\"", FullyQualifiedName.quoteName("a.b")); // Add quotes when "." in the name
assertEquals("\"a.b\"", FullyQualifiedName.quoteName("\"a.b\"")); // Leave existing valid quotes
assertEquals("a", FullyQualifiedName.quoteName("\"a\"")); // Remove quotes when not needed
assertThrows(
IllegalArgumentException.class,
() -> FullyQualifiedName.quoteName("\"a")); // Error when ending quote is missing
assertThrows(
IllegalArgumentException.class,
() -> FullyQualifiedName.quoteName("a\"")); // Error when beginning quote is missing
assertThrows(
IllegalArgumentException.class,
() ->
FullyQualifiedName.quoteName(
"a\"b")); // Error when invalid quote is present in the middle of the string
// we now allow quotes
assertEquals("\\\"a", FullyQualifiedName.quoteName("\"a"));
assertEquals("a\\\"", FullyQualifiedName.quoteName("a\""));
assertEquals("a\\\"b", FullyQualifiedName.quoteName("a\"b"));
}
@Test
@ -75,7 +65,8 @@ class FullyQualifiedNameTest {
@Test
void test_invalid() {
assertThrows(ParseCancellationException.class, () -> FullyQualifiedName.split("a\""));
assertThrows(ParseCancellationException.class, () -> FullyQualifiedName.split("..a"));
assertThrows(ParseCancellationException.class, () -> FullyQualifiedName.split("a.."));
}
@Test

View File

@ -132,5 +132,5 @@ ENTITY_FIELD
NAME_OR_FQN
: ~(':')* ~(':'|'>')+
: ( ~[:>] | ':' ~[:] | . '>' . )+
;

View File

@ -14,7 +14,7 @@ NAME
;
NAME_WITH_RESERVED
: QUOTE NON_RESERVED* (RESERVED NON_RESERVED*)+ QUOTE
: QUOTE NON_RESERVED* (RESERVED)* NON_RESERVED* QUOTE
;
QUOTE
@ -22,7 +22,7 @@ QUOTE
;
NON_RESERVED
: ~[".]
: ~[.]
;
RESERVED