fix: make pypdf converter more robust (#8427)

* fix: make `from_dict` of `PyPDFToDocument` more robust

* chore: drop trailing space

* converting method to static and making the comment shorter

* reverting method to static

---------

Co-authored-by: David S. Batista <dsbatista@gmail.com>
This commit is contained in:
Tobias Wochinger 2024-09-30 18:47:23 +02:00 committed by GitHub
parent 651244225b
commit d234c75168
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 15 additions and 2 deletions

View File

@ -108,8 +108,10 @@ class PyPDFToDocument:
:returns:
Deserialized component.
"""
converter_class = deserialize_type(data["init_parameters"]["converter"]["type"])
data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"])
# the converter default is `None`, check if it was defined before deserializing
if "converter" in data["init_parameters"]:
converter_class = deserialize_type(data["init_parameters"]["converter"]["type"])
data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"])
return default_from_dict(cls, data)
@component.output_types(documents=List[Document])

View File

@ -0,0 +1,5 @@
---
fixes:
- |
Make the `from_dict` method of the `PyPDFToDocument` more robust to cases when the converter is
not provided in the dictionary.

View File

@ -40,6 +40,12 @@ class TestPyPDFToDocument:
assert isinstance(instance, PyPDFToDocument)
assert isinstance(instance.converter, DefaultConverter)
def test_from_dict_no_converter(self):
data = {"type": "haystack.components.converters.pypdf.PyPDFToDocument", "init_parameters": {}}
instance = PyPDFToDocument.from_dict(data)
assert isinstance(instance, PyPDFToDocument)
assert isinstance(instance.converter, DefaultConverter)
@pytest.mark.integration
def test_run(self, test_files_path, pypdf_converter):
"""