diff --git a/ingestion/src/metadata/ingestion/lineage/parser.py b/ingestion/src/metadata/ingestion/lineage/parser.py index 929a3f5a122..23b88a38ed7 100644 --- a/ingestion/src/metadata/ingestion/lineage/parser.py +++ b/ingestion/src/metadata/ingestion/lineage/parser.py @@ -15,7 +15,7 @@ import traceback from collections import defaultdict from copy import deepcopy from logging.config import DictConfigurator -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple import sqlparse from cached_property import cached_property @@ -118,14 +118,20 @@ class LineageParser: return self.retrieve_tables(self.parser.target_tables) @cached_property - def column_lineage(self) -> List[Union[Tuple[Column, Column]]]: + def column_lineage(self) -> List[Tuple[Column, Column]]: """ Get a list of tuples of column lineage """ if self.parser._dialect == SQLPARSE_DIALECT: # pylint: disable=protected-access return self.parser.get_column_lineage() column_lineage = [] - for src_column, tgt_column in self.parser.get_column_lineage(): + for col_lineage in self.parser.get_column_lineage(): + # In case of column level lineage it is possible that we get + # two or more columns as there might be some intermediate columns + # but the source columns will be the first value and + # the target column always will be the last columns + src_column = col_lineage[0] + tgt_column = col_lineage[-1] src_col = Column(src_column.raw_name) src_col._parent = src_column._parent # pylint: disable=protected-access tgt_col = Column(tgt_column.raw_name)