diff --git a/haystack/components/converters/pypdf.py b/haystack/components/converters/pypdf.py index 8f5aa5ce4..9f559d088 100644 --- a/haystack/components/converters/pypdf.py +++ b/haystack/components/converters/pypdf.py @@ -106,8 +106,14 @@ class PyPDFToDocument: :returns: Deserialized component. """ - converter_class = deserialize_type(data["init_parameters"]["converter"]["type"]) - data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"]) + + if converter := data["init_parameters"].get("converter"): + converter_class = deserialize_type(converter["type"]) + data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"]) + else: + # Ensures backwards compatibility with Pipelines dumped with < 2.3.0 + data["init_parameters"]["converter"] = DefaultConverter() + return default_from_dict(cls, data) @component.output_types(documents=List[Document]) diff --git a/releasenotes/notes/improve-pypdf-backwards-compatibility-bcc75871005e9aba.yaml b/releasenotes/notes/improve-pypdf-backwards-compatibility-bcc75871005e9aba.yaml new file mode 100644 index 000000000..48a46dfbc --- /dev/null +++ b/releasenotes/notes/improve-pypdf-backwards-compatibility-bcc75871005e9aba.yaml @@ -0,0 +1,4 @@ +--- +enhancements: + - | + Enhanced the PyPDF converter to ensure backwards compatibility with Pipelines dumped with versions older than 2.3.0. The update includes a conditional check to automatically default to the `DefaultConverter` if a specific converter is not provided, improving the component's robustness and ease of use. diff --git a/test/components/converters/test_pypdf_to_document.py b/test/components/converters/test_pypdf_to_document.py index 005301925..ecaba4577 100644 --- a/test/components/converters/test_pypdf_to_document.py +++ b/test/components/converters/test_pypdf_to_document.py @@ -40,6 +40,12 @@ class TestPyPDFToDocument: assert isinstance(instance, PyPDFToDocument) assert isinstance(instance.converter, DefaultConverter) + def test_from_dict_no_converter(self): + data = {"type": "haystack.components.converters.pypdf.PyPDFToDocument", "init_parameters": {}} + instance = PyPDFToDocument.from_dict(data) + assert isinstance(instance, PyPDFToDocument) + assert isinstance(instance.converter, DefaultConverter) + @pytest.mark.integration def test_run(self, test_files_path, pypdf_converter): """