fix: make PyPDF backward compatible (#7996)

* fix: make PyPDF backward compatible

* Add release note

---------

Co-authored-by: Vladimir Blagojevic <dovlex@gmail.com>
This commit is contained in:
Tobias Wochinger 2024-07-09 10:08:37 +02:00 committed by GitHub
parent cd8a5b98fe
commit 58b48e36eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 18 additions and 2 deletions

View File

@ -106,8 +106,14 @@ class PyPDFToDocument:
:returns:
Deserialized component.
"""
converter_class = deserialize_type(data["init_parameters"]["converter"]["type"])
if converter := data["init_parameters"].get("converter"):
converter_class = deserialize_type(converter["type"])
data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"])
else:
# Ensures backwards compatibility with Pipelines dumped with < 2.3.0
data["init_parameters"]["converter"] = DefaultConverter()
return default_from_dict(cls, data)
@component.output_types(documents=List[Document])

View File

@ -0,0 +1,4 @@
---
enhancements:
- |
Enhanced the PyPDF converter to ensure backwards compatibility with Pipelines dumped with versions older than 2.3.0. The update includes a conditional check to automatically default to the `DefaultConverter` if a specific converter is not provided, improving the component's robustness and ease of use.

View File

@ -40,6 +40,12 @@ class TestPyPDFToDocument:
assert isinstance(instance, PyPDFToDocument)
assert isinstance(instance.converter, DefaultConverter)
def test_from_dict_no_converter(self):
data = {"type": "haystack.components.converters.pypdf.PyPDFToDocument", "init_parameters": {}}
instance = PyPDFToDocument.from_dict(data)
assert isinstance(instance, PyPDFToDocument)
assert isinstance(instance.converter, DefaultConverter)
@pytest.mark.integration
def test_run(self, test_files_path, pypdf_converter):
"""