mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-09 05:37:25 +00:00
fix: make pypdf converter more robust (#8427)
* fix: make `from_dict` of `PyPDFToDocument` more robust * chore: drop trailing space * converting method to static and making the comment shorter * reverting method to static --------- Co-authored-by: David S. Batista <dsbatista@gmail.com>
This commit is contained in:
parent
651244225b
commit
d234c75168
@ -108,8 +108,10 @@ class PyPDFToDocument:
|
||||
:returns:
|
||||
Deserialized component.
|
||||
"""
|
||||
converter_class = deserialize_type(data["init_parameters"]["converter"]["type"])
|
||||
data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"])
|
||||
# the converter default is `None`, check if it was defined before deserializing
|
||||
if "converter" in data["init_parameters"]:
|
||||
converter_class = deserialize_type(data["init_parameters"]["converter"]["type"])
|
||||
data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"])
|
||||
return default_from_dict(cls, data)
|
||||
|
||||
@component.output_types(documents=List[Document])
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Make the `from_dict` method of the `PyPDFToDocument` more robust to cases when the converter is
|
||||
not provided in the dictionary.
|
||||
@ -40,6 +40,12 @@ class TestPyPDFToDocument:
|
||||
assert isinstance(instance, PyPDFToDocument)
|
||||
assert isinstance(instance.converter, DefaultConverter)
|
||||
|
||||
def test_from_dict_no_converter(self):
|
||||
data = {"type": "haystack.components.converters.pypdf.PyPDFToDocument", "init_parameters": {}}
|
||||
instance = PyPDFToDocument.from_dict(data)
|
||||
assert isinstance(instance, PyPDFToDocument)
|
||||
assert isinstance(instance.converter, DefaultConverter)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_run(self, test_files_path, pypdf_converter):
|
||||
"""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user