SampleData Usage Fix (#4398)

* SampleData Test Connection & Usage Fix

* Fixed Pytest
This commit is contained in:
Mayur Singal 2022-05-04 20:15:49 +05:30 committed by GitHub
parent 6bd587be8d
commit 450fb2b132
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 14168 additions and 14786 deletions

View File

@ -822,4 +822,194 @@ query
"select fact_sale.gross_sale, dim_staff.first_name, dim_staff.last_name from shopify.fact_sale join shopify.dim_staff on fact_sale.user_id = dim_staff.user_id"
"select fact_sale.gross_sale, dim_staff.first_name, dim_staff.last_name from shopify.fact_sale join shopify.dim_staff on fact_sale.user_id = dim_staff.user_id"
"select fact_sale.gross_sale, dim_staff.first_name, dim_staff.last_name from shopify.fact_sale join shopify.dim_staff on fact_sale.user_id = dim_staff.user_id"
"select fact_sale.gross_sale, dim_staff.first_name, dim_staff.last_name from shopify.fact_sale join shopify.dim_staff on fact_sale.user_id = dim_staff.user_id"
"select fact_sale.gross_sale, dim_staff.first_name, dim_staff.last_name from shopify.fact_sale join shopify.dim_staff on fact_sale.user_id = dim_staff.user_id"
"select address_id from dim_address"
"select shop_id from dim_address"
"select first_name from dim_address"
"select last_name from dim_address"
"select address1 from dim_address"
"select address2 from dim_address"
"select company from dim_address"
"select city from dim_address"
"select region from dim_address"
"select zip from dim_address"
"select country from dim_address"
"select phone from dim_address"
"select api_client_id from dim_api_client"
"select title from dim_api_client"
"select customer_id from dim_customer"
"select shop_id from dim_customer"
"select average_order_size from dim_customer"
"select total_order_count from dim_customer"
"select total_order_value from dim_customer"
"select first_order_date from dim_customer"
"select last_order_date from dim_customer"
"select rank from dim_customer"
"select new from dim_customer"
"select returning from dim_customer"
"select loyal from dim_customer"
"select at_risk from dim_customer"
"select dormant from dim_customer"
"select promising from dim_customer"
"select predicted_average_number_of_days_between_orders from dim_customer"
"select expected_purchase_value_in_next_30_days from dim_customer"
"select first_name from dim_customer"
"select last_name from dim_customer"
"select city from dim_customer"
"select region from dim_customer"
"select country from dim_customer"
"select email from dim_customer"
"select phone from dim_customer"
"select verified_email from dim_customer"
"select created_at from dim_customer"
"select accepts_marketing from dim_customer"
"select customer from dim_customer"
"select shipping_address from dim_customer"
"select orders from dim_customer"
"select location_id from dim_location"
"select shop_id from dim_location"
"select name from dim_location"
"select address from dim_location"
"select product_id from dim.product"
"select shop_id from dim.product"
"select title from dim.product"
"select vendor from dim.product"
"select created_at from dim.product"
"select deleted_at from dim.product"
"select product_variant_id from dim.product_variant"
"select product_id from dim.product_variant"
"select shop_id from dim.product_variant"
"select title from dim.product_variant"
"select barcode from dim.product_variant"
"select sku from dim.product_variant"
"select price from dim.product_variant"
"select grams from dim.product_variant"
"select created_at from dim.product_variant"
"select deleted_at from dim.product_variant"
"select shop_id from dim.shop"
"select name from dim.shop"
"select domain from dim.shop"
"select user_id from dim_staff"
"select shop_id from dim_staff"
"select first_name from dim_staff"
"select last_name from dim_staff"
"select email from dim_staff"
"select line_item_id from fact_line_item"
"select billing_address_id from fact_line_item"
"select order_id from fact_line_item"
"select product_id from fact_line_item"
"select product_variant_id from fact_line_item"
"select shop_id from fact_line_item"
"select name from fact_line_item"
"select product_title from fact_line_item"
"select price from fact_line_item"
"select quantity from fact_line_item"
"select requires_shipping from fact_line_item"
"select taxable from fact_line_item"
"select gift_card from fact_line_item"
"select grams from fact_line_item"
"select product_vendor from fact_line_item"
"select fulfillable_quantity from fact_line_item"
"select fulfillment_service from fact_line_item"
"select order_id from fact_order"
"select api_client_id from fact_order"
"select billing_address_id from fact_order"
"select customer_id from fact_order"
"select location_id from fact_order"
"select shipping_address_id from fact_order"
"select shop_id from fact_order"
"select user_id from fact_order"
"select name from fact_order"
"select total_price from fact_order"
"select discount_code from fact_order"
"select processed_at from fact_order"
"select canceled_at from fact_order"
"select deleted_at from fact_order"
"select test from fact_order"
"select sale_id from fact_sale"
"select billing_address_id from fact_sale"
"select api_client_id from fact_sale"
"select customer_id from fact_sale"
"select line_item_id from fact_sale"
"select location_id from fact_sale"
"select order_id from fact_sale"
"select product_id from fact_sale"
"select product_variant_id from fact_sale"
"select shipping_address_id from fact_sale"
"select shop_id from fact_sale"
"select user_id from fact_sale"
"select gross_sales from fact_sale"
"select net_sales from fact_sale"
"select total_sales from fact_sale"
"select returns from fact_sale"
"select discounts from fact_sale"
"select shipping from fact_sale"
"select taxes from fact_sale"
"select gift_card_discounts from fact_sale"
"select gift_card_gross_sales from fact_sale"
"select gift_cards_issued from fact_sale"
"select quantity from fact_sale"
"select currency from fact_sale"
"select is_deleted from fact_sale"
"select test from fact_sale"
"select happened_at from fact_sale"
"select derived_session_token from fact_session"
"select shop_id from fact_session"
"select session_duration from fact_session"
"select count_of_pageviews from fact_session"
"select session_started_at from fact_session"
"select session_token from fact_session"
"select user_token from fact_session"
"select landing_page_url from fact_session"
"select exit_page_path from fact_session"
"select exit_page_url from fact_session"
"select referrer_tld from fact_session"
"select ua_browser from fact_session"
"select ua_raw from fact_session"
"select count_of_orders_completed from fact_session"
"select completed_first_order_at from fact_session"
"select hit_first_checkout_at from fact_session"
"select started_first_checkout_at from fact_session"
"select count_of_cart_additions from fact_session"
"select count_of_distinct_products_added_to_cart from fact_session"
"select count_of_distinct_product_variants_added_to_cart from fact_session"
"select had_error from fact_session"
"select had_payment_error from fact_session"
"select had_out_of_stock_warning from fact_session"
"select had_credit_card_info_error from fact_session"
"select had_discount from fact_session"
"select had_free_shipping from fact_session"
"select location_city from fact_session"
"select location_region from fact_session"
"select location_region_code from fact_session"
"select location_country from fact_session"
"select location_country_code from fact_session"
"select comments from raw_customer"
"select creditcard from raw_customer"
"select membership from raw_customer"
"select orders from raw_customer"
"select platform from raw_customer"
"select preference from raw_customer"
"select shipping_address from raw_customer"
"select shipping_date from raw_customer"
"select transaction_date from raw_customer"
"select customer from raw_customer"
"select comments from raw_order"
"select creditcard from raw_order"
"select membership from raw_order"
"select orders from raw_order"
"select platform from raw_order"
"select preference from raw_order"
"select shipping_address from raw_order"
"select shipping_date from raw_order"
"select transaction_date from raw_order"
"select total_order_count from raw_order"
"select total_order_value from raw_order"
"select first_order_date from raw_order"
"select last_order_date from raw_order"
"select comments from raw_product_catalog"
"select products from raw_product_catalog"
"select platform from raw_product_catalog"
"select store_address from raw_product_catalog"
"select first_order_date from raw_product_catalog"
"select last_order_date from raw_product_catalog"

File diff suppressed because it is too large Load Diff

View File

@ -81,6 +81,7 @@ class MetadataUsageBulkSink(BulkSink):
for record in usage_records:
table_usage = TableUsageCount(**json.loads(record))
table_entities = []
self.service_name = table_usage.service_name
if "." in table_usage.table:
(
table_usage.database_schema,
@ -100,7 +101,6 @@ class MetadataUsageBulkSink(BulkSink):
search_index="table_search_index",
)
table_entities = es_result
self.service_name = table_usage.service_name
for table_entity in table_entities:
if table_entity is not None:
if not table_usage_map.get(table_entity.id.__root__):

View File

@ -66,5 +66,5 @@ class ESMixin(Generic[T]):
)
)
except Exception as err:
logger.error(f"Elasticsearch failed for query: {generate_es_string}")
logger.warning(f"Elasticsearch failed for query: {generate_es_string}")
return multiple_entities

View File

@ -244,5 +244,5 @@ class OMetaLineageMixin(Generic[T]):
return True
except Exception as err:
logger.debug(str(err))
logger.error(f"Ingesting lineage failed")
logger.warning(f"Ingesting lineage failed")
return False

View File

@ -12,7 +12,6 @@
import csv
import json
import os
import random
import sys
import traceback
import uuid
@ -457,24 +456,7 @@ class SampleDataSource(Source[Entity]):
)
resp = self.metadata.list_entities(entity=User, limit=5)
self.user_entity = resp.entities
user_entity_len = min(len(self.user_entity), 5)
for table in self.tables["tables"]:
try:
for sql_object in table["tableQueries"]:
user_entity = self.user_entity[
random.choice(range(user_entity_len))
]
user_dict = {
"id": user_entity.id.__root__,
"name": user_entity.name.__root__,
"displayName": user_entity.displayName,
"href": user_entity.href,
"description": user_entity.description,
}
sql_object["user"] = EntityReference(**user_dict, type="user")
except Exception as err:
logger.debug(traceback.format_exc())
logger.debug(err)
table_metadata = Table(**table)
table_and_db = OMetaDatabaseAndTable(
table=table_metadata, database=db, database_schema=schema

View File

@ -101,7 +101,7 @@ def create_generic_connection(connection, verbose: bool = False):
@singledispatch
def get_connection(
connection, verbose: bool = False
) -> Union[Engine, DynamoClient, GlueClient]:
) -> Union[Engine, DynamoClient, GlueClient, SalesforceClient]:
"""
Given an SQL configuration, build the SQLAlchemy Engine
"""

View File

@ -67,7 +67,7 @@ class QueryParserTest(TestCase):
"shopify.dim_address": 100,
"shopify.dim_shop": 190,
"shopify.dim_customer": 125,
"dim_customer": 9,
"dim_customer": 38,
"shopify.dim_location": 75,
"dim_location.shop_id": 25,
"dim_shop.shop_id": 105,