diff --git a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py index 3b1f9fc2bb..9743cef48a 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py +++ b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py @@ -52,6 +52,7 @@ class ParserState(Enum): STRING = 2 COMMENT = 3 MULTILINE_COMMENT = 4 + BRACKETED_IDENTIFIER = 5 class _StatementSplitter: @@ -141,6 +142,10 @@ class _StatementSplitter: self.state = ParserState.STRING self.current_statement.append(c) prev_real_char = c + elif c == "[": + self.state = ParserState.BRACKETED_IDENTIFIER + self.current_statement.append(c) + prev_real_char = c elif c == "-" and next_char == "-": self.state = ParserState.COMMENT self.current_statement.append(c) @@ -172,6 +177,14 @@ class _StatementSplitter: elif c == "'": self.state = ParserState.NORMAL + elif self.state == ParserState.BRACKETED_IDENTIFIER: + self.current_statement.append(c) + if c == "]" and next_char == "]": + self.current_statement.append(next_char) + self.i += 1 + elif c == "]": + self.state = ParserState.NORMAL + elif self.state == ParserState.COMMENT: self.current_statement.append(c) if c == "\n": diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py b/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py index eea5609b86..c23b2ae0eb 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py @@ -192,3 +192,55 @@ WHEN NOT MATCHED THEN statements = [statement.strip() for statement in split_statements(test_sql)] expected = [test_sql] assert statements == expected + + +def test_split_statement_with_end_keyword_in_string(): + test_sql = """ + SELECT + [Id], + 'End Date' as category + INTO myprodtable + FROM myrawtable + """ + statements = [statement.strip() for statement in split_statements(test_sql)] + expected = [test_sql.strip()] + assert statements == expected + + +def test_split_statement_with_end_keyword_in_string_with_escape(): + test_sql = """ + SELECT + [Id], + '''Escaped Part'' End Date' as category + INTO myprodtable + FROM myrawtable + """ + statements = [statement.strip() for statement in split_statements(test_sql)] + expected = [test_sql.strip()] + assert statements == expected + + +def test_split_statement_with_end_keyword_in_bracketed_identifier(): + test_sql = """ + SELECT + [Id], + [End Date] + INTO myprodtable + FROM myrawtable + """ + statements = [statement.strip() for statement in split_statements(test_sql)] + expected = [test_sql.strip()] + assert statements == expected + + +def test_split_statement_with_end_keyword_in_bracketed_identifier_with_escapes(): + test_sql = """ + SELECT + [Id], + [[Escaped Part]] End Date] + INTO myprodtable + FROM myrawtable + """ + statements = [statement.strip() for statement in split_statements(test_sql)] + expected = [test_sql.strip()] + assert statements == expected