ISSUE-243: Generate sample data

This commit is contained in:
Suresh Srinivas 2021-08-19 19:39:57 -07:00
parent 1ce36a3f64
commit 6b5dd36672
3 changed files with 8 additions and 12 deletions

View File

@ -2,9 +2,9 @@
"source": { "source": {
"type": "redshift", "type": "redshift",
"config": { "config": {
"host_port": "cluster.name.region.redshift.amazonaws.com:5439", "host_port": "redshift-cluster-1.clot5cqn1cnb.us-west-2.redshift.amazonaws.com:5439",
"username": "username", "username": "awsuser",
"password": "strong_password", "password": "focguC-kaqqe5-nepsok",
"database": "warehouse", "database": "warehouse",
"service_name": "aws_redshift", "service_name": "aws_redshift",
"service_type": "Redshift", "service_type": "Redshift",

View File

@ -119,7 +119,7 @@ class ElasticsearchBulkSink(BulkSink):
table_name=table.name.__root__, table_name=table.name.__root__,
suggest=suggest, suggest=suggest,
description=table.description, description=table.description,
table_type=table.tableType, table_type=table.tableType.name,
last_updated_timestamp=timestamp, last_updated_timestamp=timestamp,
column_names=column_names, column_names=column_names,
column_descriptions=column_descriptions, column_descriptions=column_descriptions,

View File

@ -176,9 +176,9 @@ class SQLSource(Source):
if self.connection is None: if self.connection is None:
self.connection = self.engine.connect() self.connection = self.engine.connect()
def fetch_sample_data(self, table: str): def fetch_sample_data(self, schema: str, table: str):
try: try:
query = f"select * from {table} limit 50" query = f"select * from {schema}.{table} limit 50"
logger.info("Fetching sample data, this may take a while {}".format(query)) logger.info("Fetching sample data, this may take a while {}".format(query))
results = self.connection.execute(query) results = self.connection.execute(query)
cols = list(results.keys()) cols = list(results.keys())
@ -231,7 +231,7 @@ class SQLSource(Source):
columns=table_columns) columns=table_columns)
if self.sql_config.generate_sample_data: if self.sql_config.generate_sample_data:
self._get_connection() self._get_connection()
table_data = self.fetch_sample_data(table_name) table_data = self.fetch_sample_data(schema, table_name)
table.sampleData = table_data table.sampleData = table_data
table_and_db = OMetaDatabaseAndTable(table=table, database=self._get_database(schema)) table_and_db = OMetaDatabaseAndTable(table=table, database=self._get_database(schema))
@ -259,10 +259,6 @@ class SQLSource(Source):
description = self._get_table_description(schema, view_name, inspector) description = self._get_table_description(schema, view_name, inspector)
table_columns = self._get_columns(schema, view_name, inspector) table_columns = self._get_columns(schema, view_name, inspector)
if self.sql_config.generate_sample_data:
self._get_connection()
self.fetch_sample_data(view_name)
table = Table(id=uuid.uuid4(), table = Table(id=uuid.uuid4(),
name=view_name, name=view_name,
tableType='View', tableType='View',
@ -271,7 +267,7 @@ class SQLSource(Source):
viewDefinition=view_definition) viewDefinition=view_definition)
if self.sql_config.generate_sample_data: if self.sql_config.generate_sample_data:
self._get_connection() self._get_connection()
table_data = self.fetch_sample_data(view_name) table_data = self.fetch_sample_data(schema, view_name)
table.sampleData = table_data table.sampleData = table_data
table_and_db = OMetaDatabaseAndTable(table=table, database=self._get_database(schema)) table_and_db = OMetaDatabaseAndTable(table=table, database=self._get_database(schema))