diff --git a/ingestion/examples/sample_data/datasets/query_log b/ingestion/examples/sample_data/datasets/query_log index 2a62e1093ee..6d98dc2032a 100644 --- a/ingestion/examples/sample_data/datasets/query_log +++ b/ingestion/examples/sample_data/datasets/query_log @@ -1,18 +1,24 @@ -query -"select * from shopify.raw_product_catalog" -"select comments, products from shopify.raw_product_catalog" -"select cust.customer_id, fact_order.order_id from dim_customer cust join fact_order on cust.customer_id = fact_order.customer_id" -"select sale.sale_id, cust.customer_id, fact_order.order_ir from shopify.fact_sale sale join dim_customer cust on sale.customer_id = cust.customer_id join fact_order on fact_order.order_id = sale.order_id" -"select case when net_sales > 100 then 'high' else 'low' end as map_sales from shopify.fact_sale" -"select ROW_NUMBER() OVER (PARTITION BY sale_id, customer_id) AS sale_row from shopify.fact_sale" -"select * from shopify.raw_customer" -"select * from shopify.raw_customer" -"select * from shopify.raw_customer" -"select * from shopify.raw_customer" -"select * from shopify.raw_customer" -"select * from shopify.raw_customer" -"select * from shopify.raw_customer" -"select * from shopify.raw_customer" -"select * from shopify.raw_customer" -"select * from shopify.raw_customer" -"create table shopify.dim_address_clean as select address_id, shop_id, first_name, last_name, address1 as address, company, city, region, zip, country, phone from shopify.dim_address" \ No newline at end of file +query,cost +"select * from shopify.raw_product_catalog",10 +"select * from shopify.raw_product_catalog",12 +"select comments, products from shopify.raw_product_catalog",2 +"select comments, products from shopify.raw_product_catalog",3 +"select comments, products from shopify.raw_product_catalog",9 +"select cust.customer_id, fact_order.order_id from dim_customer cust join fact_order on cust.customer_id = fact_order.customer_id",0.222 +"select sale.sale_id, cust.customer_id, fact_order.order_ir from shopify.fact_sale sale join dim_customer cust on sale.customer_id = cust.customer_id join fact_order on fact_order.order_id = sale.order_id",0.234 +"select sale.sale_id, cust.customer_id, fact_order.order_ir from shopify.fact_sale sale join dim_customer cust on sale.customer_id = cust.customer_id join fact_order on fact_order.order_id = sale.order_id",0.5 +"select sale.sale_id, cust.customer_id, fact_order.order_ir from shopify.fact_sale sale join dim_customer cust on sale.customer_id = cust.customer_id join fact_order on fact_order.order_id = sale.order_id",0.65 +"select case when net_sales > 100 then 'high' else 'low' end as map_sales from shopify.fact_sale",2 +"select ROW_NUMBER() OVER (PARTITION BY sale_id, customer_id) AS sale_row from shopify.fact_sale",5 +"select * from shopify.raw_customer",19 +"select * from shopify.raw_customer",19 +"select * from shopify.raw_customer",18 +"select * from shopify.raw_customer",17 +"select * from shopify.raw_customer",16 +"select * from shopify.raw_customer",20 +"select * from shopify.raw_customer",21 +"select * from shopify.raw_customer",22 +"select * from shopify.raw_customer",15 +"select * from shopify.raw_customer",12 +"create table shopify.dim_address_clean as select address_id, shop_id, first_name, last_name, address1 as address, company, city, region, zip, country, phone from shopify.dim_address",0.5 +"create table shopify.dim_address_clean as select address_id, shop_id, first_name, last_name, address1 as address, company, city, region, zip, country, phone from shopify.dim_address",0.5 \ No newline at end of file diff --git a/ingestion/src/metadata/ingestion/source/database/sample_usage.py b/ingestion/src/metadata/ingestion/source/database/sample_usage.py index 41b30ca181f..8e98d72d87b 100644 --- a/ingestion/src/metadata/ingestion/source/database/sample_usage.py +++ b/ingestion/src/metadata/ingestion/source/database/sample_usage.py @@ -92,6 +92,7 @@ class SampleUsageSource(UsageSource): databaseName="ecommerce_db", serviceName=self.config.serviceName, databaseSchema="shopify", + cost=row.get("cost"), ) for row in self.query_logs ]