fix(ingest/mssql): don't split_statements on keywords inside bracketed identifiers (#14863)

This commit is contained in:
Michael Maltese 2025-09-25 12:29:38 -04:00 committed by GitHub
parent 72ee770ae3
commit 55d714e0cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 65 additions and 0 deletions

View File

@ -52,6 +52,7 @@ class ParserState(Enum):
STRING = 2
COMMENT = 3
MULTILINE_COMMENT = 4
BRACKETED_IDENTIFIER = 5
class _StatementSplitter:
@ -141,6 +142,10 @@ class _StatementSplitter:
self.state = ParserState.STRING
self.current_statement.append(c)
prev_real_char = c
elif c == "[":
self.state = ParserState.BRACKETED_IDENTIFIER
self.current_statement.append(c)
prev_real_char = c
elif c == "-" and next_char == "-":
self.state = ParserState.COMMENT
self.current_statement.append(c)
@ -172,6 +177,14 @@ class _StatementSplitter:
elif c == "'":
self.state = ParserState.NORMAL
elif self.state == ParserState.BRACKETED_IDENTIFIER:
self.current_statement.append(c)
if c == "]" and next_char == "]":
self.current_statement.append(next_char)
self.i += 1
elif c == "]":
self.state = ParserState.NORMAL
elif self.state == ParserState.COMMENT:
self.current_statement.append(c)
if c == "\n":

View File

@ -192,3 +192,55 @@ WHEN NOT MATCHED THEN
statements = [statement.strip() for statement in split_statements(test_sql)]
expected = [test_sql]
assert statements == expected
def test_split_statement_with_end_keyword_in_string():
test_sql = """
SELECT
[Id],
'End Date' as category
INTO myprodtable
FROM myrawtable
"""
statements = [statement.strip() for statement in split_statements(test_sql)]
expected = [test_sql.strip()]
assert statements == expected
def test_split_statement_with_end_keyword_in_string_with_escape():
test_sql = """
SELECT
[Id],
'''Escaped Part'' End Date' as category
INTO myprodtable
FROM myrawtable
"""
statements = [statement.strip() for statement in split_statements(test_sql)]
expected = [test_sql.strip()]
assert statements == expected
def test_split_statement_with_end_keyword_in_bracketed_identifier():
test_sql = """
SELECT
[Id],
[End Date]
INTO myprodtable
FROM myrawtable
"""
statements = [statement.strip() for statement in split_statements(test_sql)]
expected = [test_sql.strip()]
assert statements == expected
def test_split_statement_with_end_keyword_in_bracketed_identifier_with_escapes():
test_sql = """
SELECT
[Id],
[[Escaped Part]] End Date]
INTO myprodtable
FROM myrawtable
"""
statements = [statement.strip() for statement in split_statements(test_sql)]
expected = [test_sql.strip()]
assert statements == expected