From 8dd613caa58c016b26262028425519dac29a033a Mon Sep 17 00:00:00 2001 From: juntao Date: Thu, 23 May 2024 23:45:47 +0800 Subject: [PATCH] Fixes #16235: need quote fullyQualifiedName in Ingestion Framework (#16273) * Fixes #16235: need quote fullyQualifiedName in Ingestion Framework * MINOR: fix UT issue * revert: fix UT issue * revert code * revert code * format code --- .../reports/web_analytic_report_data_processor.py | 4 +++- .../metadata/ingestion/ometa/mixins/table_mixin.py | 2 +- .../orm_profiler/test_orm_profiler_e2e.py | 13 ++++++++++--- .../test_web_analytic_report_processor.py | 10 +++++----- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/ingestion/src/metadata/data_insight/processor/reports/web_analytic_report_data_processor.py b/ingestion/src/metadata/data_insight/processor/reports/web_analytic_report_data_processor.py index 9ab1175f829..bbd78780cd5 100644 --- a/ingestion/src/metadata/data_insight/processor/reports/web_analytic_report_data_processor.py +++ b/ingestion/src/metadata/data_insight/processor/reports/web_analytic_report_data_processor.py @@ -108,7 +108,9 @@ class WebAnalyticEntityViewReportDataProcessor(DataProcessor): entity_obj = EntityObj(split_url[0], split_url[1]) entity_type = entity_obj.entity_type - re_pattern = re.compile(f"(.*{entity_type}/{entity_obj.fqn})") + re_pattern = re.compile( + f"(.*{re.escape(entity_type)}/{re.escape(entity_obj.fqn)})" + ) if ( entity_obj.fqn in refined_data diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/table_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/table_mixin.py index 16c968ba0c1..0096c150f74 100644 --- a/ingestion/src/metadata/ingestion/ometa/mixins/table_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/table_mixin.py @@ -290,7 +290,7 @@ class OMetaTableMixin: Returns: Optional[Table]: OM table object """ - return self._get(Table, f"{quote(model_str(fqn))}/tableProfile/latest") + return self._get(Table, f"{quote(model_str(fqn), safe='')}/tableProfile/latest") def create_or_update_custom_metric( self, custom_metric: CreateCustomMetricRequest, table_id: str diff --git a/ingestion/tests/integration/orm_profiler/test_orm_profiler_e2e.py b/ingestion/tests/integration/orm_profiler/test_orm_profiler_e2e.py index 0c80fdd5305..7fb37cc00c6 100644 --- a/ingestion/tests/integration/orm_profiler/test_orm_profiler_e2e.py +++ b/ingestion/tests/integration/orm_profiler/test_orm_profiler_e2e.py @@ -89,8 +89,9 @@ class User(Base): signedup = Column(DateTime) +# with weird characters of fqn class NewUser(Base): - __tablename__ = "new_users" + __tablename__ = "new/users" id = Column(Integer, primary_key=True) name = Column(String(256)) fullname = Column(String(256)) @@ -303,7 +304,7 @@ class ProfilerWorkflowTest(TestCase): { "type": "Profiler", "profileSample": 50, - "tableFilterPattern": {"includes": ["new_users"]}, + "tableFilterPattern": {"includes": ["new/users"]}, } ) workflow_config["processor"] = {"type": "orm-profiler", "config": {}} @@ -315,13 +316,19 @@ class ProfilerWorkflowTest(TestCase): table = self.metadata.get_by_name( entity=Table, - fqn="test_sqlite.main.main.new_users", + fqn="test_sqlite.main.main.new/users", fields=["tableProfilerConfig"], ) # setting sampleProfile from config has been temporarly removed # up until we split tests and profiling assert table.tableProfilerConfig is None + profile = self.metadata.get_latest_table_profile( + table.fullyQualifiedName + ).profile + + assert profile is not None + def test_workflow_datetime_partition(self): """test workflow with partition""" workflow_config = deepcopy(ingestion_config) diff --git a/ingestion/tests/unit/data_insight/test_web_analytic_report_processor.py b/ingestion/tests/unit/data_insight/test_web_analytic_report_processor.py index cbbf3c0a666..b2292a21275 100644 --- a/ingestion/tests/unit/data_insight/test_web_analytic_report_processor.py +++ b/ingestion/tests/unit/data_insight/test_web_analytic_report_processor.py @@ -42,8 +42,8 @@ WEB_ANALYTIC_EVENTS = [ timestamp=1667475458757, eventType=WebAnalyticEventType.PageView.value, eventData=PageViewData( - fullUrl="http://localhost:8585/table/sample_data.ecommerce_db.shopify.dim_address", - url="/table/sample_data.ecommerce_db.shopify.dim_address", + fullUrl="http://localhost:8585/table/sample_data.ecommerce_db.shopify.dim(address)", + url="/table/sample_data.ecommerce_db.shopify.dim(address)", hostname="localhost", language="en-US", screenSize="2140x1273", @@ -58,8 +58,8 @@ WEB_ANALYTIC_EVENTS = [ timestamp=1667475458757, eventType=WebAnalyticEventType.PageView.value, eventData=PageViewData( - fullUrl="http://localhost:8585/table/sample_data.ecommerce_db.shopify.dim_address", - url="/table/sample_data.ecommerce_db.shopify.dim_address", + fullUrl="http://localhost:8585/table/sample_data.ecommerce_db.shopify.dim(address)", + url="/table/sample_data.ecommerce_db.shopify.dim(address)", hostname="localhost", language="en-US", screenSize="2140x1273", @@ -111,7 +111,7 @@ class WebAnalyticEntityViewReportDataProcessorTest(unittest.TestCase): assert ( web_analytic_entity_report_data[ - "sample_data.ecommerce_db.shopify.dim_address" + "sample_data.ecommerce_db.shopify.dim(address)" ].views == 2 )