Added Row Data to Sample Tables

This commit is contained in:
Ayush Shah 2021-08-20 01:21:14 +05:30
parent 2b4a0a275b
commit c542ffd41d
6 changed files with 753 additions and 15 deletions

View File

@ -91,6 +91,234 @@
"ordinalPosition": 12
}
],
"sampleData": {
"columns": [
"shop_id",
"address1",
"city",
"company",
"last_name",
"country",
"phone",
"first_name",
"address2",
"region",
"zip",
"address_id"
],
"rows": [
[
"cc79f7b0-e5ed-4223-8ab8-ab4dd3e60ecc",
"67fc33a4-67f8-42fc-8693-f756de908e8d",
"Production option.",
"White officer staff.",
"Sell explain best.",
"Meeting agreement.",
"Design already.",
"Matter television.",
"Miss training.",
"Cup yourself.",
"Determine in air.",
"Give marriage day."
],
[
"378631ba-5b51-4a88-a78d-d8ca9fde5ffc",
"7819c541-32c7-4641-b1db-7e1c1bc540d7",
"Five occur deal.",
"Republican class.",
"Senior before ten.",
"Peace paper final.",
"Impact director.",
"Choose sometimes.",
"Current race.",
"Service yet series.",
"Agency together.",
"Score store class."
],
[
"57434be2-fae1-4101-9dff-f181d54fd229",
"0c1d1850-906f-4820-9e8e-7864a3d19af5",
"Machine consumer.",
"Cause explain.",
"Some wide cut.",
"Hope sit amount.",
"Protect senior.",
"Explain mission a.",
"Seven relate.",
"Local read grow.",
"Management happy.",
"Level share fund."
],
[
"fff72b3f-9184-43d4-bffa-920495175c22",
"0c918475-d684-492a-bb51-d3a17fccc114",
"Spend national dog.",
"Watch least arm.",
"Whole form chance.",
"Real hospital.",
"Television data.",
"Reveal growth.",
"Present glass it.",
"Possible free.",
"Shake message place.",
"Draw smile history."
],
[
"bec1e09b-4c7d-4822-af07-89ea05f64186",
"c8ae8d2f-7b21-4afb-a94d-10cf0ee9df2c",
"Hour option threat.",
"Blood public health.",
"Clear place set.",
"Claim realize.",
"Can tell pattern.",
"Hour capital ball.",
"Home often majority.",
"Issue American.",
"Little store she.",
"Really follow few."
],
[
"f61ed11c-8948-40e9-8ab7-5fcaeebb32c0",
"d1874f2e-3cd9-44ae-a737-e0d9b574f790",
"Product say manager.",
"Attack relationship.",
"Old court.",
"Room writer draw.",
"Ability group short.",
"Whole age purpose.",
"Either section.",
"International.",
"Young world you.",
"Start treatment."
],
[
"8cdf1b2d-8817-434e-bc9e-07f61023490d",
"2fe692a8-d75b-4a0b-bbd5-7a2cb97822cf",
"Quickly bag.",
"Agree value single.",
"Produce operation.",
"Capital consider.",
"Ball benefit health.",
"Behavior exist.",
"Control rather.",
"Room fund half rule.",
"Woman next run.",
"It body explain."
],
[
"f2a8a1d8-5471-4597-a676-0a3d10f08281",
"ab078153-f27b-41bb-bdcd-09bf62e4cee9",
"But PM air wear.",
"Seven ok subject.",
"Trade offer book.",
"Central bank will.",
"Keep Congress.",
"Only father baby.",
"Debate item every.",
"Floor interest.",
"Month professor.",
"Stage peace history."
],
[
"336fb3f3-19e1-49f9-94b6-00bc7098aec3",
"c28355de-a6b3-4236-b7ad-947143351c1d",
"Bank lawyer history.",
"Win sit join.",
"Especially effort.",
"Energy service.",
"Avoid forward must.",
"Report about head.",
"Try reality people.",
"Change clear wait.",
"Difference certain.",
"Specific resource."
],
[
"a5914de1-4fb8-4912-adc7-2f6253de2d41",
"13d59c87-9450-4c4a-b2aa-5472556a80f0",
"Half me power.",
"Inside magazine.",
"Game product voice.",
"Congress son accept.",
"Security discover.",
"Everyone quality.",
"Way admit nice save.",
"Within civil.",
"Strong west.",
"Least simply."
],
[
"516ad154-95a1-49ed-a5bf-7304080338f7",
"c392fc0e-a96a-4cf9-b4cf-1df41fa0dd1f",
"Response radio.",
"Quickly table.",
"Health.",
"Include ago police.",
"Prepare there.",
"Type safe range.",
"Feeling strong less.",
"For fast nothing.",
"Decade skill.",
"They money spend."
],
[
"e8f42c8b-28d5-472f-8380-6c21e997874a",
"9ca18289-53e6-44ec-aa3d-3d1acc9ad464",
"Stage bad property.",
"Over phone Mr foot.",
"From water hot.",
"Participant parent.",
"Move theory whether.",
"Face his child.",
"Much ago sound fact.",
"Else morning.",
"Suggest win nation.",
"Meeting respond."
],
[
"90a7e5c9-ee17-4fac-9405-a9b34e416513",
"09b1e988-bfc2-4dc4-8bae-1fe99eea2c76",
"Remain myself guess.",
"Article weight.",
"Marriage official.",
"Hundred source.",
"Animal parent great.",
"Him peace science.",
"Need run Democrat.",
"Pick according.",
"Safe state teacher.",
"Bag cell four keep."
],
[
"ee0866ba-2177-4fa5-80c3-f58801dd910e",
"9f924e39-6571-4925-a9bc-fea33002009d",
"Read team record.",
"Treat where hotel.",
"Soon police allow.",
"Strategy Mrs beat.",
"Condition sister.",
"Blue growth exactly.",
"Young science side.",
"Citizen hospital.",
"News air charge.",
"Senior Democrat."
],
[
"281c55b0-d7b8-4d06-84ed-060ffa346bf5",
"19f84462-c6dc-493d-b82a-0d66d3762e4d",
"Type most recently.",
"Hundred country.",
"Indicate must.",
"Air health job.",
"Themselves owner.",
"Life beyond central.",
"Note involve.",
"Beyond story look.",
"System weight ever.",
"Less mother key."
]
]
},
"database": {
"id": "89229e26-7f74-4443-a568-85512eaeaa07",
"type": "database"
@ -118,6 +346,26 @@
"ordinalPosition": 2
}
],
"sampleData": {
"columns": ["title", "api_client_id"],
"rows": [
["1fbff7bc-2321-4e4c-9e99-0f4f93793069", "Certainly."],
["62c732dd-80b6-4905-92f8-1b5f7957009a", "Push family stand."],
["f8a3ff22-ff1e-4e4c-b9cc-6f405e84417e", "Usually perhaps."],
["63c83b4e-433a-474c-9d3e-64c8db212c17", "Trouble money."],
["750d1e45-a512-4876-8d7e-18eb342630a7", "Speech three."],
["182e21d9-706c-464b-9ffa-3e290b66c1a8", "Difference example."],
["1ac0e9f5-bd8f-444a-ba42-c546c7c0354c", "Hour open before."],
["b35f6284-7915-48ac-8612-41bf38a45d95", "Serious decision."],
["c728b0fb-2dd1-4490-8426-518b53490109", "Cause until policy."],
["a899c3f2-64c0-4763-89f6-84fcd36efd1d", "Ago wall than."],
["6718b242-75ac-4122-8bce-031fe1ea14a4", "Entire memory note."],
["9e2d7a80-6417-4e21-a169-00c25e58e246", "Matter bad feeling."],
["97732da7-ada5-4f98-9621-b6b494d482ea", "Agree interview."],
["0e700838-1237-4748-97df-ee5b3b93cb98", "His military."],
["5e6be1aa-ab8c-49ef-8c94-23916ba36f3e", "Audience issue."]
]
},
"database": {
"id": "89229e26-7f74-4443-a568-85512eaeaa07",
"type": "database"
@ -314,6 +562,458 @@
"ordinalPosition": 26
}
],
"sampleData": {
"columns": [
"customer_id",
"accepts_marketing",
"expected_purchase_value_in_next_30_days",
"returning",
"predicted_average_number_of_days_between_orders",
"average_order_size",
"first_order_date",
"created_at",
"email",
"shop_id",
"last_name",
"promising",
"region",
"verified_email",
"at_risk",
"dormant",
"new",
"last_order_date",
"phone",
"country",
"total_order_count",
"city",
"rank",
"total_order_value",
"first_name",
"loyal"
],
"rows": [
[
"2571bc3a-186e-4745-a140-4699001d8a3c",
"6f08d500-3148-4322-bb20-93ff81e7e92e",
2653,
6622,
9846,
845325730,
907425617,
2585,
false,
true,
true,
false,
false,
true,
7050,
3616,
"Cell thought argue.",
"Decision use may.",
"Central class PM.",
"Teach body pressure.",
"Enough never.",
"Establish standard.",
"Mother news.",
false,
1166404411,
false
],
[
"62171523-fca4-486d-9364-8b09bffe418f",
"10f9fcbe-8387-49e6-a6f7-39651d818378",
3208,
6680,
7749,
920479881,
1020365255,
2572,
true,
false,
true,
false,
true,
false,
7579,
5351,
"Assume Mrs.",
"Dream behind check.",
"Effort many exactly.",
"Particularly son.",
"Seek prepare myself.",
"Hard us phone very.",
"Check next marriage.",
true,
695249818,
true
],
[
"feec3c47-1f22-4df9-b2df-f1009908e6c0",
"4b2ecb4c-50d9-4e72-a461-c201c1509967",
7352,
2699,
4669,
64378196,
532979642,
7281,
true,
true,
true,
false,
false,
false,
5933,
6493,
"Environment.",
"Information control.",
"Least white along.",
"Use value another.",
"Sister member month.",
"Different live it.",
"Increase still nice.",
true,
231196109,
true
],
[
"299ebddb-c428-4280-8185-5bcf1feb404d",
"ae9cca8c-49ca-4605-b050-c6a5165a959d",
1077,
501,
9836,
481247239,
370597405,
3481,
false,
true,
false,
true,
true,
false,
7332,
5344,
"Top leader yeah car.",
"Somebody specific.",
"Strong anything.",
"Notice everybody.",
"Scientist event.",
"Main difficult.",
"Decision leg boy.",
false,
1522006114,
true
],
[
"09f21d9c-8bf4-4a40-bee6-860c98acb26e",
"419750ef-5ff7-4396-8be9-5810b19a1ff8",
1336,
3691,
2111,
158279442,
791785452,
6688,
false,
true,
true,
false,
true,
true,
3825,
3404,
"Give glass follow.",
"All type worry.",
"Have involve high.",
"Friend material.",
"Consider give give.",
"Myself south level.",
"Tend growth dinner.",
false,
894742403,
true
],
[
"a0ffbe34-938d-4864-8911-36a587b73d15",
"189b62f3-e100-49ff-accf-f450fef33e88",
5605,
5436,
152,
1039660118,
1221443794,
7293,
true,
true,
true,
true,
true,
false,
1178,
758,
"Than these live pay.",
"Lawyer interest all.",
"Film beyond.",
"Industry keep story.",
"Article despite.",
"Role voice whole.",
"Wall current bar.",
true,
1312422513,
false
],
[
"45f30d62-8bbb-41aa-9746-9c004b4f109a",
"f995ca51-8bdc-4ccd-abff-3f207a351a45",
6808,
5276,
680,
1623386094,
98073116,
6507,
true,
false,
false,
true,
true,
false,
6995,
59,
"Into enjoy two late.",
"Number writer gas.",
"War think positive.",
"Around establish.",
"Need consumer piece.",
"Fish American all.",
"Morning president.",
false,
676462489,
false
],
[
"f9363436-19a3-4e6f-813c-b2a196ff7889",
"0d9513ad-60d1-449e-87a9-e2519c66b849",
7706,
6727,
9272,
900824121,
234234835,
707,
true,
false,
false,
true,
false,
false,
4587,
7674,
"You dark this day.",
"Night wear these to.",
"Floor degree this.",
"Very board blood.",
"Form total poor.",
"Particularly.",
"Stock continue see.",
false,
773691910,
true
],
[
"09127b56-1293-4187-b7a2-c2188c930ce6",
"45aec9da-853e-4690-88aa-0f8d004aa4ed",
9852,
5978,
3153,
471658497,
66214494,
4448,
false,
true,
false,
false,
true,
false,
5750,
3172,
"Entire home.",
"Product street red.",
"Window all way low.",
"Sense option.",
"Practice pressure.",
"Here computer.",
"Scene make left.",
true,
1569647649,
false
],
[
"4b388728-3d90-41f2-b921-5e02aa1b74ee",
"049583e0-be4a-4b7a-aa0b-0576e3ab9250",
6246,
5759,
146,
175428254,
737310444,
4242,
true,
false,
false,
false,
true,
false,
5224,
2763,
"Per leg close sign.",
"Eat individual lose.",
"Admit who man home.",
"Entire listen my.",
"Land wife picture.",
"Police two part.",
"Meeting stand.",
true,
1384260351,
true
],
[
"8b9942c8-1937-400c-975c-97a9d568a899",
"a9bcf781-3f19-428b-948f-53513b03855c",
7462,
4590,
3999,
505880429,
1328722228,
6372,
true,
true,
true,
false,
false,
true,
6613,
3033,
"Whom miss reflect.",
"Outside democratic.",
"Training tough none.",
"Why analysis still.",
"Thus share sport.",
"Behind ahead.",
"Agency huge picture.",
false,
916919845,
false
],
[
"205748be-7a5a-4e10-b8f4-fd511134c337",
"8ddb4850-a0b3-4378-b9a7-027b175f787d",
3143,
4283,
7924,
207010479,
799065176,
3048,
true,
true,
true,
false,
false,
false,
454,
5276,
"Spend such offer.",
"Even evidence law.",
"Benefit five assume.",
"Away it activity.",
"Scientist why many.",
"Hundred close.",
"One but price.",
true,
1067003109,
true
],
[
"7e241e85-703c-4edf-9cd5-38d77620174e",
"44d712f4-b250-451c-849a-832e10af6aa3",
3389,
4526,
1641,
438970549,
1217830635,
4821,
true,
true,
false,
false,
false,
false,
7061,
9167,
"Half but.",
"Put suddenly effort.",
"Coach stop company.",
"Actually box agent.",
"Animal region.",
"Dark suggest effect.",
"Discover somebody.",
false,
871071029,
true
],
[
"75190851-34e0-4d73-8748-bacfb8cee770",
"38eaee14-f3d6-4a76-be80-2981e703665e",
9884,
7043,
193,
106705399,
991535360,
4827,
true,
true,
false,
false,
false,
false,
5792,
7674,
"People different.",
"Purpose land key.",
"Which million test.",
"Technology suddenly.",
"Computer positive.",
"Always quickly.",
"Team huge long.",
false,
767294086,
false
],
[
"0cbf19a0-172a-49a2-a2b2-2d4c48cdaf40",
"50c20fc9-d5b7-419a-90b7-4d2bcfff99c5",
9154,
5986,
9567,
541734621,
925425989,
329,
true,
true,
false,
true,
false,
true,
7972,
5818,
"Seek discover sign.",
"Sister early blue.",
"Pass Mrs call civil.",
"Government.",
"Carry item over.",
"Raise treat travel.",
"Chance challenge.",
false,
1319102928,
false
]
]
},
"database": {
"id": "89229e26-7f74-4443-a568-85512eaeaa07",
"type": "database"

View File

@ -10,13 +10,11 @@
},
"processor": {
"type": "pii",
"config": {
}
"config": {}
},
"sink": {
"type": "metadata-rest-tables",
"config": {
}
"config": {}
},
"metadata_server": {
"type": "metadata-server",

View File

@ -64,7 +64,7 @@ class Workflow:
self.ctx = WorkflowContext(workflow_id=self.config.run_id)
source_type = self.config.source.type
source_class = self.get('metadata.ingestion.source.{}.{}Source'.format(
source_type.replace('-', '_'), ''.join([i.title() for i in source_type.replace('-', '_').split('_')])))
self.replaceRegistry(source_type, True), self.replaceRegistry(source_type, False)))
metadata_config = self.config.metadata_server.dict().get("config", {})
self.source: Source = source_class.create(
self.config.source.dict().get("config", {}), metadata_config, self.ctx
@ -76,7 +76,7 @@ class Workflow:
if self.config.processor:
processor_type = self.config.processor.type
processor_class = self.get('metadata.ingestion.processor.{}.{}Processor'.format(
processor_type.replace('-', '_'), ''.join([i.title() for i in processor_type.replace('-', '_').split('_')])))
self.replaceRegistry(processor_type, True), self.replaceRegistry(processor_type, False)))
processor_config = self.config.processor.dict().get("config", {})
self.processor: Processor = processor_class.create(processor_config, metadata_config, self.ctx)
logger.debug(f"Processor Type: {processor_type}, {processor_class} configured")
@ -84,7 +84,7 @@ class Workflow:
if self.config.stage:
stage_type = self.config.stage.type
stage_class = self.get('metadata.ingestion.stage.{}.{}Stage'.format(
stage_type.replace('-', '_'), ''.join([i.title() for i in stage_type.replace('-', '_').split('_')])))
self.replaceRegistry(stage_type, True), self.replaceRegistry(stage_type, False)))
stage_config = self.config.stage.dict().get("config", {})
self.stage: Stage = stage_class.create(stage_config, metadata_config, self.ctx)
logger.debug(f"Stage Type: {stage_type}, {stage_class} configured")
@ -92,7 +92,7 @@ class Workflow:
if self.config.sink:
sink_type = self.config.sink.type
sink_class = self.get('metadata.ingestion.sink.{}.{}Sink'.format(
sink_type.replace('-', '_'), ''.join([i.title() for i in sink_type.replace('-', '_').split('_')])))
self.replaceRegistry(sink_type, True), self.replaceRegistry(sink_type, False)))
sink_config = self.config.sink.dict().get("config", {})
self.sink: Sink = sink_class.create(sink_config, metadata_config, self.ctx)
logger.debug(f"Sink type:{self.config.sink.type},{sink_class} configured")
@ -100,11 +100,17 @@ class Workflow:
if self.config.bulk_sink:
bulk_sink_type = self.config.bulk_sink.type
bulk_sink_class = self.get('metadata.ingestion.bulksink.{}.{}BulkSink'.format(
bulk_sink_type.replace('-', '_'), ''.join([i.title() for i in bulk_sink_type.replace('-', '_').split('_')])))
self.replaceRegistry(bulk_sink_type, True), self.replaceRegistry(bulk_sink_type, False)))
bulk_sink_config = self.config.bulk_sink.dict().get("config", {})
self.bulk_sink: BulkSink = bulk_sink_class.create(bulk_sink_config, metadata_config, self.ctx)
logger.info(f"BulkSink type:{self.config.bulk_sink.type},{bulk_sink_class} configured")
def replaceRegistry(self, type: str, isFile: bool):
if isFile:
return type.replace('-', '_')
else:
return ''.join([i.title() for i in type.replace('-', '_').split('_')])
def get(self, key: str) -> Type[T]:
if key.find(".") >= 0:
# If the key contains a dot, we treat it as a import path and attempt
@ -113,7 +119,7 @@ class Workflow:
MyClass = getattr(importlib.import_module(module_name), class_name)
return MyClass
@classmethod
@ classmethod
def create(cls, config_dict: dict) -> "Workflow":
config = WorkflowConfig.parse_obj(config_dict)
return cls(config)

View File

@ -31,7 +31,7 @@ from metadata.ingestion.models.table_queries import TableUsageRequest, ColumnJoi
from metadata.ingestion.ometa.auth_provider import MetadataServerConfig, AuthenticationProvider, \
GoogleAuthenticationProvider, NoOpAuthenticationProvider, OktaAuthenticationProvider
from metadata.ingestion.ometa.credentials import URL, get_api_version
from metadata.generated.schema.entity.data.table import Table, TableJoins
from metadata.generated.schema.entity.data.table import Table, TableJoins, TableData
from metadata.generated.schema.entity.data.database import Database
logger = logging.getLogger(__name__)
@ -41,8 +41,6 @@ TableEntities = List[Table]
Tags = List[Tag]
class RetryException(Exception):
pass
@ -266,12 +264,16 @@ class REST(object):
if fields is None:
resp = self.get('/tables')
else:
resp = self.get('/tables?fields={}&offset={}&limit={}'.format(fields,offset, limit))
resp = self.get('/tables?fields={}&offset={}&limit={}'.format(fields, offset, limit))
if self._use_raw_data:
return resp
else:
return [Table(**t) for t in resp['data']]
def ingest_sample_data(self, id, sample_data):
resp = self.put('/tables/{}/sampleData'.format(id.__root__),data=sample_data.json())
return TableData(**resp['sampleData'])
def get_table_by_id(self, table_id: str, fields: [] = ['columns']) -> Table:
"""Get Table By ID"""
params = {}
@ -309,7 +311,7 @@ class REST(object):
resp = self.get('/tags/{}'.format(category))
return [Tag(**d) for d in resp['children']]
def compute_percentile(self, entity_type:str, date:str):
def compute_percentile(self, entity_type: str, date: str):
resp = self.post('/usage/compute.percentile/{}/{}'.format(entity_type, date))
logger.debug("published compute percentile {}".format(resp))

View File

@ -63,6 +63,8 @@ class MetadataRestTablesSink(Sink):
description=table_and_db.table.description,
database=db.id)
created_table = self.rest.create_or_update_table(table_request)
if hasattr(table_and_db.table,'sampleData'):
self.rest.ingest_sample_data(id=created_table.id,sample_data=table_and_db.table.sampleData)
logger.info(
'Successfully ingested {}.{}'.format(table_and_db.database.name.__root__, created_table.name.__root__))
self.status.records_written(

View File

@ -18,6 +18,7 @@ import pandas as pd
import uuid
import os
import json
from faker import Faker
from collections import namedtuple
from dataclasses import dataclass, field
from typing import Iterable, List, Dict, Any, Union
@ -169,6 +170,33 @@ class SampleTableMetadataGenerator:
return sorted_row_dict
class GenerateFakeSampleData:
def __init__(self) -> None:
pass
@classmethod
def checkColumns(self, columns):
fake = Faker()
colList = set()
colData = []
for i in range(15):
row = []
for column in columns:
colList.add(column['name'])
if "id" in column['name']:
row.append(uuid.uuid4())
elif column['columnDataType'] == 'VARCHAR':
row.append(fake.text(max_nb_chars=20))
elif column['columnDataType'] == 'NUMERIC' and "id" not in column['name']:
row.append(fake.pyint())
elif column['columnDataType'] == 'BOOLEAN':
row.append(fake.pybool())
elif column['columnDataType'] == 'TIMESTAMP':
row.append(fake.unix_time())
colData.append(row)
return {"columns": list(colList), "rows": colData}
class SampleTablesSource(Source):
def __init__(self, config: SampleTableSourceConfig, metadata_config: MetadataServerConfig, ctx):
@ -197,6 +225,8 @@ class SampleTablesSource(Source):
description=self.database['description'],
service=EntityReference(id=self.service.id, type=self.config.service_type))
for table in self.tables['tables']:
if not table.get('sampleData'):
table['sampleData'] = GenerateFakeSampleData.checkColumns(table['columns'])
table_metadata = Table(**table)
table_and_db = OMetaDatabaseAndTable(table=table_metadata, database=db)
self.status.scanned(table_metadata.name.__root__)