diff --git a/ingestion/examples/sample_data/datasets/tables.json b/ingestion/examples/sample_data/datasets/tables.json index acddd81788c..98cfcbc216b 100644 --- a/ingestion/examples/sample_data/datasets/tables.json +++ b/ingestion/examples/sample_data/datasets/tables.json @@ -95,7 +95,19 @@ "dataTypeDisplay": "varchar", "description": "The ZIP or postal code. For example, 90210.", "tags": [], - "ordinalPosition": 10 + "ordinalPosition": 10, + "customMetrics": [ + { + "name": "CountOfLAZipCode", + "columnName": "zip", + "expression": "SELECT COUNT(zip) FROM dim_address WHERE zip LIKE '900%'" + }, + { + "name": "CountOfOrangeCountyZipCode", + "columnName": "zip", + "expression": "SELECT COUNT(zip) FROM dim_address WHERE zip LIKE '92%'" + } + ] }, { "name": "country", @@ -113,7 +125,14 @@ "dataTypeDisplay": "varchar", "description": "The phone number of the customer.", "tags": [], - "ordinalPosition": 12 + "ordinalPosition": 12, + "customMetrics": [ + { + "name": "CountOfNonUsPhoneNumbers", + "columnName": "zip", + "expression": "SELECT COUNT(phone) FROM dim_address WHERE phone NOT LIKE '1%'" + } + ] } ], "tableConstraints": [ @@ -581,6 +600,16 @@ "timestamp": 1634366539, "columnCount": 12, "rowCount": 725, + "customMetrics": [ + { + "name": "CountOfUSAddress", + "value": 15467 + }, + { + "name": "CountOfFRAddress", + "value": 1467 + } + ], "columnProfile": [ { "name": "address_id", @@ -650,7 +679,17 @@ "uniqueCount": 11, "uniqueProportion": 0.1383472, "distinctCount": 0, - "distinctProportion": 0 + "distinctProportion": 0, + "customMetrics": [ + { + "name": "CountOfLAZipCode", + "value": 3456 + }, + { + "name": "CountOfOrangeCountyZipCode", + "value": 2345 + } + ] }, { "name": "country", @@ -674,7 +713,17 @@ "name": "sample_data.ecommerce_db.shopify", "description": "This **mock** Schema contains tables related to shopify sales and orders with related dimension tables.", "href": "http://localhost:8585/api/v1/databaseSchemas/d7be1e2c-b3dc-11ec-b909-0242ac120002" - } + }, + "customMetrics": [ + { + "name": "CountOfUSAddress", + "expression": "SELECT COUNT(address_id) FROM dim_address WHERE country = 'US'" + }, + { + "name": "CountOfFRAddress", + "expression": "SELECT COUNT(address_id) FROM dim_address WHERE country = 'FR'" + } + ] }, { "id": "1cda9ecb-f4c6-4ed4-8506-abe965b64c87", diff --git a/ingestion/examples/sample_data/profiler/tableProfile.json b/ingestion/examples/sample_data/profiler/tableProfile.json index b9e38d1e735..0dea2772e7b 100644 --- a/ingestion/examples/sample_data/profiler/tableProfile.json +++ b/ingestion/examples/sample_data/profiler/tableProfile.json @@ -8,6 +8,16 @@ "rowCount": 14567.0, "sizeInByte": 16890, "createDateTime": "2023-07-24T07:00:48.000750Z", + "customMetrics": [ + { + "name": "CountOfUSAddress", + "value": 15467 + }, + { + "name": "CountOfFRAddress", + "value": 1467 + } + ], "columnProfile": [ { "name": "shop_id", @@ -151,7 +161,17 @@ "distinctProportion": 0.10, "minLength": 6.0, "maxLength": 156.0, - "mean": 98.0 + "mean": 98.0, + "customMetrics": [ + { + "name": "CountOfLAZipCode", + "value": 3041 + }, + { + "name": "CountOfOrangeCountyZipCode", + "value": 2076 + } + ] }, { "name": "country", @@ -208,6 +228,16 @@ "rowCount": 13256.0, "sizeInByte": 163290, "createDateTime": "2023-07-24T07:00:48.000750Z", + "customMetrics": [ + { + "name": "CountOfUSAddress", + "value": 15098 + }, + { + "name": "CountOfFRAddress", + "value": 1402 + } + ], "columnProfile": [ { "name": "shop_id", @@ -351,7 +381,17 @@ "distinctProportion": 0.10, "minLength": 6.0, "maxLength": 156.0, - "mean": 98.0 + "mean": 98.0, + "customMetrics": [ + { + "name": "CountOfLAZipCode", + "value": 2987 + }, + { + "name": "CountOfOrangeCountyZipCode", + "value": 2005 + } + ] }, { "name": "country", @@ -408,6 +448,16 @@ "rowCount": 10256.0, "sizeInByte": 16890, "createDateTime": "2023-07-24T07:00:48.000750Z", + "customMetrics": [ + { + "name": "CountOfUSAddress", + "value": 14998 + }, + { + "name": "CountOfFRAddress", + "value": 1387 + } + ], "columnProfile": [ { "name": "shop_id", @@ -551,7 +601,17 @@ "distinctProportion": 0.10, "minLength": 6.0, "maxLength": 156.0, - "mean": 98.0 + "mean": 98.0, + "customMetrics": [ + { + "name": "CountOfLAZipCode", + "value": 3109 + }, + { + "name": "CountOfOrangeCountyZipCode", + "value": 2178 + } + ] }, { "name": "country", @@ -600,6 +660,16 @@ "rowCount": 8945.0, "sizeInByte": 16890521, "createDateTime": "2023-07-24T07:00:48.000750Z", + "customMetrics": [ + { + "name": "CountOfUSAddress", + "value": 13458 + }, + { + "name": "CountOfFRAddress", + "value": 1278 + } + ], "columnProfile": [ { "name": "shop_id", @@ -743,7 +813,17 @@ "distinctProportion": 0.10, "minLength": 6.0, "maxLength": 156.0, - "mean": 98.0 + "mean": 98.0, + "customMetrics": [ + { + "name": "CountOfLAZipCode", + "value": 3389 + }, + { + "name": "CountOfOrangeCountyZipCode", + "value": 2165 + } + ] }, { "name": "country", @@ -788,6 +868,16 @@ "rowCount": 5461.0, "sizeInByte": 1572301627719.68, "createDateTime": "2023-07-24T07:00:48.000750Z", + "customMetrics": [ + { + "name": "CountOfUSAddress", + "value": 13092 + }, + { + "name": "CountOfFRAddress", + "value": 1293 + } + ], "columnProfile": [ { "name": "shop_id", @@ -931,7 +1021,17 @@ "distinctProportion": 0.10, "minLength": 6.0, "maxLength": 156.0, - "mean": 98.0 + "mean": 98.0, + "customMetrics": [ + { + "name": "CountOfLAZipCode", + "value": 3456 + }, + { + "name": "CountOfOrangeCountyZipCode", + "value": 2345 + } + ] }, { "name": "country", diff --git a/ingestion/pyproject.toml b/ingestion/pyproject.toml index c5fb5c572eb..37f603ea256 100644 --- a/ingestion/pyproject.toml +++ b/ingestion/pyproject.toml @@ -32,7 +32,7 @@ namespaces = true "metadata.examples" = ["workflows/*.yaml"] [project.scripts] -metadata-cli = "metadata.cmd:metadata" +metadata = "metadata.cmd:metadata" [project.entry-points.apache_airflow_provider] provider_info = "airflow_provider_openmetadata:get_provider_config" diff --git a/ingestion/src/metadata/ingestion/source/database/sample_data.py b/ingestion/src/metadata/ingestion/source/database/sample_data.py index a4742295a6b..26c0147c4c6 100644 --- a/ingestion/src/metadata/ingestion/source/database/sample_data.py +++ b/ingestion/src/metadata/ingestion/source/database/sample_data.py @@ -1292,6 +1292,7 @@ class SampleDataSource( rowCount=profile["rowCount"], createDateTime=profile.get("createDateTime"), sizeInByte=profile.get("sizeInByte"), + customMetrics=profile.get("customMetrics"), timestamp=int( ( datetime.now(tz=timezone.utc) - timedelta(days=days)