From 1b63cfc8b3becceabebd20cda0af96e7eb2ce296 Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Thu, 9 Nov 2023 20:02:22 +0100 Subject: [PATCH] fix: make types work without installing pypdf (#6269) * make types work without installing pypdf * make pylint happy, keep pyright happy, hope mypy doesn't care --- haystack/preview/components/file_converters/pypdf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/haystack/preview/components/file_converters/pypdf.py b/haystack/preview/components/file_converters/pypdf.py index 6ed9c1193..2568b9bc4 100644 --- a/haystack/preview/components/file_converters/pypdf.py +++ b/haystack/preview/components/file_converters/pypdf.py @@ -19,8 +19,8 @@ class PyPDFConverter(Protocol): A protocol that defines a converter which takes a PdfReader object and converts it into a Document object. """ - def convert(self, reader: PdfReader) -> Document: - """Convert a PdfReader instance to a Document instance.""" + def convert(self, reader: "PdfReader") -> Document: + ... class DefaultConverter: @@ -28,7 +28,7 @@ class DefaultConverter: The default converter class that extracts text from a PdfReader object's pages and returns a Document. """ - def convert(self, reader: PdfReader) -> Document: + def convert(self, reader: "PdfReader") -> Document: """Extract text from the PDF and return a Document object with the text content.""" text = "".join(page.extract_text() for page in reader.pages if page.extract_text()) return Document(content=text) @@ -71,7 +71,7 @@ class PyPDFToDocument: return {"documents": documents} - def _get_pdf_reader(self, source: Union[str, Path, ByteStream]) -> PdfReader: + def _get_pdf_reader(self, source: Union[str, Path, ByteStream]) -> "PdfReader": """ Creates a PdfReader object from a given source, which can be a file path or a ByteStream object.