mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-20 12:03:38 +00:00
fix: Fix JSONConverter to properly skip files that are not utf-8 encoded (#8775)
* Small fix * Add reno * Trying out license header fix here
This commit is contained in:
parent
e3dc164625
commit
bba84e5517
@ -194,6 +194,7 @@ class JSONConverter:
|
|||||||
source=source.meta["file_path"],
|
source=source.meta["file_path"],
|
||||||
error=exc,
|
error=exc,
|
||||||
)
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
meta_fields = self._meta_fields or set()
|
meta_fields = self._meta_fields or set()
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixed JSONConverter to properly skip converting JSON files that are not utf-8 encoded.
|
||||||
@ -236,6 +236,25 @@ def test_run_with_bad_filter(tmpdir, caplog):
|
|||||||
assert result == {"documents": []}
|
assert result == {"documents": []}
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_with_bad_encoding(tmpdir, caplog):
|
||||||
|
test_file = Path(tmpdir / "test_file.json")
|
||||||
|
test_file.write_text(json.dumps(test_data[0]), "utf-16")
|
||||||
|
|
||||||
|
sources = [test_file]
|
||||||
|
converter = JSONConverter(".laureates")
|
||||||
|
|
||||||
|
caplog.clear()
|
||||||
|
with caplog.at_level(logging.WARNING):
|
||||||
|
result = converter.run(sources=sources)
|
||||||
|
|
||||||
|
records = caplog.records
|
||||||
|
assert len(records) == 1
|
||||||
|
assert records[0].msg.startswith(
|
||||||
|
f"Failed to extract text from {test_file}. Skipping it. Error: 'utf-8' codec can't decode byte"
|
||||||
|
)
|
||||||
|
assert result == {"documents": []}
|
||||||
|
|
||||||
|
|
||||||
def test_run_with_single_meta(tmpdir):
|
def test_run_with_single_meta(tmpdir):
|
||||||
first_test_file = Path(tmpdir / "first_test_file.json")
|
first_test_file = Path(tmpdir / "first_test_file.json")
|
||||||
second_test_file = Path(tmpdir / "second_test_file.json")
|
second_test_file = Path(tmpdir / "second_test_file.json")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user