mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-29 07:59:27 +00:00
Fix splitter when table is only one row wide (#8839)
This commit is contained in:
parent
f189a1c349
commit
2c0a72844f
@ -195,7 +195,7 @@ class CSVDocumentSplitter:
|
||||
df_length = df.shape[0] if axis == "row" else df.shape[1]
|
||||
for empty_start_idx, empty_end_idx in split_indices + [(df_length, df_length)]:
|
||||
# Avoid empty splits
|
||||
if empty_start_idx - table_start_idx > 1:
|
||||
if empty_start_idx - table_start_idx >= 1:
|
||||
if axis == "row":
|
||||
sub_table = df.iloc[table_start_idx:empty_start_idx]
|
||||
else:
|
||||
|
||||
@ -227,6 +227,12 @@ E,F,,,G,H
|
||||
assert table.content == expected_tables[i]
|
||||
assert table.meta == expected_meta[i]
|
||||
|
||||
def test_sub_table_with_one_row(self):
|
||||
splitter = CSVDocumentSplitter(row_split_threshold=1)
|
||||
doc = Document(content="""A,B,C\n1,2,3\n,,\n4,5,6""")
|
||||
split_result = splitter.run([doc])
|
||||
assert len(split_result["documents"]) == 2
|
||||
|
||||
def test_threshold_no_effect(self, two_tables_sep_by_two_empty_rows: str) -> None:
|
||||
splitter = CSVDocumentSplitter(row_split_threshold=3)
|
||||
doc = Document(content=two_tables_sep_by_two_empty_rows)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user