From 17fb2cabca5bf1d7f05f691ca0f4a3ff3f7d16c3 Mon Sep 17 00:00:00 2001 From: Mayur Singal <39544459+ulixius9@users.noreply.github.com> Date: Thu, 25 Jan 2024 10:15:32 +0530 Subject: [PATCH] MINOR: Lineage handle copy queries being skipped (#14855) --- .../src/metadata/ingestion/lineage/parser.py | 28 +++++++++++---- ingestion/tests/unit/test_query_parser.py | 35 ++++++++++++++++--- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/ingestion/src/metadata/ingestion/lineage/parser.py b/ingestion/src/metadata/ingestion/lineage/parser.py index 6bd4386ee0a..5b74bfba2d7 100644 --- a/ingestion/src/metadata/ingestion/lineage/parser.py +++ b/ingestion/src/metadata/ingestion/lineage/parser.py @@ -98,24 +98,30 @@ class LineageParser: """ Get a list of intermediate tables """ - # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning - return self.retrieve_tables(self.parser.intermediate_tables) + if self.parser: + # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning + return self.retrieve_tables(self.parser.intermediate_tables) + return [] @cached_property def source_tables(self) -> List[Table]: """ Get a list of source tables """ - # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning - return self.retrieve_tables(self.parser.source_tables) + if self.parser: + # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning + return self.retrieve_tables(self.parser.source_tables) + return [] @cached_property def target_tables(self) -> List[Table]: """ Get a list of target tables """ - # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning - return self.retrieve_tables(self.parser.target_tables) + if self.parser: + # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning + return self.retrieve_tables(self.parser.target_tables) + return [] # pylint: disable=protected-access @cached_property @@ -124,6 +130,8 @@ class LineageParser: Get a list of tuples of column lineage """ column_lineage = [] + if self.parser is None: + return [] try: if self.parser._dialect == SQLPARSE_DIALECT: return self.parser.get_column_lineage() @@ -331,6 +339,8 @@ class LineageParser: :return: for each table name, list all joins against other tables """ join_data = defaultdict(list) + if self.parser is None: + return join_data # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning for statement in self.parser.statements(): self.stateful_add_joins_from_statement(join_data, sql_statement=statement) @@ -378,7 +388,11 @@ class LineageParser: @staticmethod def _evaluate_best_parser( query: str, dialect: Dialect, timeout_seconds: int - ) -> LineageRunner: + ) -> Optional[LineageRunner]: + + if query is None: + return None + @timeout(seconds=timeout_seconds) def get_sqlfluff_lineage_runner(qry: str, dlct: str) -> LineageRunner: lr_dialect = LineageRunner(qry, dialect=dlct) diff --git a/ingestion/tests/unit/test_query_parser.py b/ingestion/tests/unit/test_query_parser.py index 4b2baa61796..12fdc806b64 100644 --- a/ingestion/tests/unit/test_query_parser.py +++ b/ingestion/tests/unit/test_query_parser.py @@ -206,7 +206,7 @@ class QueryParserTests(TestCase): """ Validate we obtain information from Comon Table Expressions """ - query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS + query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS WITH cte_table AS ( SELECT USERS.ID, @@ -226,7 +226,6 @@ class QueryParserTests(TestCase): ; """ - expected_tables = {"testdb.public.users"} expected_lineage = [ ( Column("testdb.public.users.id"), @@ -258,7 +257,7 @@ class QueryParserTests(TestCase): """ Validate we obtain information from Comon Table Expressions """ - query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS + query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS SELECT ID, -- A comment here @@ -295,7 +294,7 @@ class QueryParserTests(TestCase): """ Validate we obtain information from Comon Table Expressions """ - query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS + query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS SELECT ID AS new_identifier, NAME new_name @@ -329,3 +328,31 @@ class QueryParserTests(TestCase): parser.column_lineage, expected_lineage, ) + + def test_copy_query(self): + """ + Validate Copy query is skipped appropriately without any errors + """ + query = """COPY MY_TABLE col1,col2,col3 + FROM 's3://bucket/schema/table.csv' + WITH CREDENTIALS '' + REGION 'US-east-2' + """ + expected_lineage = [] + expected_tables = set() + + parser = LineageParser(query) + tables = {str(table) for table in parser.involved_tables} + self.assertEqual(tables, expected_tables) + self.assertEqual( + parser.column_lineage, + expected_lineage, + ) + + parser = LineageParser(query, Dialect.MYSQL) + tables = {str(table) for table in parser.involved_tables} + self.assertEqual(tables, expected_tables) + self.assertEqual( + parser.column_lineage, + expected_lineage, + )