Fix splitter when table is only one row wide (#8839)

This commit is contained in:
Sebastian Husch Lee 2025-02-11 10:55:35 +01:00 committed by GitHub
parent f189a1c349
commit 2c0a72844f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 1 deletions

View File

@ -195,7 +195,7 @@ class CSVDocumentSplitter:
df_length = df.shape[0] if axis == "row" else df.shape[1]
for empty_start_idx, empty_end_idx in split_indices + [(df_length, df_length)]:
# Avoid empty splits
if empty_start_idx - table_start_idx > 1:
if empty_start_idx - table_start_idx >= 1:
if axis == "row":
sub_table = df.iloc[table_start_idx:empty_start_idx]
else:

View File

@ -227,6 +227,12 @@ E,F,,,G,H
assert table.content == expected_tables[i]
assert table.meta == expected_meta[i]
def test_sub_table_with_one_row(self):
splitter = CSVDocumentSplitter(row_split_threshold=1)
doc = Document(content="""A,B,C\n1,2,3\n,,\n4,5,6""")
split_result = splitter.run([doc])
assert len(split_result["documents"]) == 2
def test_threshold_no_effect(self, two_tables_sep_by_two_empty_rows: str) -> None:
splitter = CSVDocumentSplitter(row_split_threshold=3)
doc = Document(content=two_tables_sep_by_two_empty_rows)