From d234c75168dcb49866a6714aa232f37d56f72cab Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Mon, 30 Sep 2024 18:47:23 +0200 Subject: [PATCH] fix: make pypdf converter more robust (#8427) * fix: make `from_dict` of `PyPDFToDocument` more robust * chore: drop trailing space * converting method to static and making the comment shorter * reverting method to static --------- Co-authored-by: David S. Batista --- haystack/components/converters/pypdf.py | 6 ++++-- .../pypdf-converter-robust-from-dict-35ebe6aaab944b19.yaml | 5 +++++ test/components/converters/test_pypdf_to_document.py | 6 ++++++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/pypdf-converter-robust-from-dict-35ebe6aaab944b19.yaml diff --git a/haystack/components/converters/pypdf.py b/haystack/components/converters/pypdf.py index 30800cb5d..69962c543 100644 --- a/haystack/components/converters/pypdf.py +++ b/haystack/components/converters/pypdf.py @@ -108,8 +108,10 @@ class PyPDFToDocument: :returns: Deserialized component. """ - converter_class = deserialize_type(data["init_parameters"]["converter"]["type"]) - data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"]) + # the converter default is `None`, check if it was defined before deserializing + if "converter" in data["init_parameters"]: + converter_class = deserialize_type(data["init_parameters"]["converter"]["type"]) + data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"]) return default_from_dict(cls, data) @component.output_types(documents=List[Document]) diff --git a/releasenotes/notes/pypdf-converter-robust-from-dict-35ebe6aaab944b19.yaml b/releasenotes/notes/pypdf-converter-robust-from-dict-35ebe6aaab944b19.yaml new file mode 100644 index 000000000..bdde3f68e --- /dev/null +++ b/releasenotes/notes/pypdf-converter-robust-from-dict-35ebe6aaab944b19.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Make the `from_dict` method of the `PyPDFToDocument` more robust to cases when the converter is + not provided in the dictionary. diff --git a/test/components/converters/test_pypdf_to_document.py b/test/components/converters/test_pypdf_to_document.py index 005301925..ecaba4577 100644 --- a/test/components/converters/test_pypdf_to_document.py +++ b/test/components/converters/test_pypdf_to_document.py @@ -40,6 +40,12 @@ class TestPyPDFToDocument: assert isinstance(instance, PyPDFToDocument) assert isinstance(instance.converter, DefaultConverter) + def test_from_dict_no_converter(self): + data = {"type": "haystack.components.converters.pypdf.PyPDFToDocument", "init_parameters": {}} + instance = PyPDFToDocument.from_dict(data) + assert isinstance(instance, PyPDFToDocument) + assert isinstance(instance.converter, DefaultConverter) + @pytest.mark.integration def test_run(self, test_files_path, pypdf_converter): """