mirror of
https://github.com/langgenius/dify.git
synced 2025-06-27 05:30:04 +00:00
fix(document_extractor): xlsx file column int type error (#21408)
This commit is contained in:
parent
973b3854b4
commit
45146edb31
@ -451,7 +451,7 @@ def _extract_text_from_excel(file_content: bytes) -> str:
|
|||||||
df = df.applymap(lambda x: " ".join(str(x).splitlines()) if isinstance(x, str) else x) # type: ignore
|
df = df.applymap(lambda x: " ".join(str(x).splitlines()) if isinstance(x, str) else x) # type: ignore
|
||||||
|
|
||||||
# Combine multi-line text in column names into a single line
|
# Combine multi-line text in column names into a single line
|
||||||
df.columns = pd.Index([" ".join(col.splitlines()) for col in df.columns])
|
df.columns = pd.Index([" ".join(str(col).splitlines()) for col in df.columns])
|
||||||
|
|
||||||
# Manually construct the Markdown table
|
# Manually construct the Markdown table
|
||||||
markdown_table += _construct_markdown_table(df) + "\n\n"
|
markdown_table += _construct_markdown_table(df) + "\n\n"
|
||||||
|
@ -342,3 +342,26 @@ def test_extract_text_from_excel_all_sheets_fail(mock_excel_file):
|
|||||||
assert result == ""
|
assert result == ""
|
||||||
|
|
||||||
assert mock_excel_instance.parse.call_count == 2
|
assert mock_excel_instance.parse.call_count == 2
|
||||||
|
|
||||||
|
|
||||||
|
@patch("pandas.ExcelFile")
|
||||||
|
def test_extract_text_from_excel_numeric_type_column(mock_excel_file):
|
||||||
|
"""Test extracting text from Excel file with numeric column names."""
|
||||||
|
|
||||||
|
# Test numeric type column
|
||||||
|
data = {1: ["Test"], 1.1: ["Test"]}
|
||||||
|
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
|
||||||
|
# Mock ExcelFile
|
||||||
|
mock_excel_instance = Mock()
|
||||||
|
mock_excel_instance.sheet_names = ["Sheet1"]
|
||||||
|
mock_excel_instance.parse.return_value = df
|
||||||
|
mock_excel_file.return_value = mock_excel_instance
|
||||||
|
|
||||||
|
file_content = b"fake_excel_content"
|
||||||
|
result = _extract_text_from_excel(file_content)
|
||||||
|
|
||||||
|
expected_manual = "| 1.0 | 1.1 |\n| --- | --- |\n| Test | Test |\n\n"
|
||||||
|
|
||||||
|
assert expected_manual == result
|
||||||
|
Loading…
x
Reference in New Issue
Block a user