fix: make types work without installing pypdf (#6269)

* make types work without installing pypdf

* make pylint happy, keep pyright happy, hope mypy doesn't care
This commit is contained in:
Massimiliano Pippi 2023-11-09 20:02:22 +01:00 committed by GitHub
parent b4d8d1c904
commit 1b63cfc8b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -19,8 +19,8 @@ class PyPDFConverter(Protocol):
A protocol that defines a converter which takes a PdfReader object and converts it into a Document object.
"""
def convert(self, reader: PdfReader) -> Document:
"""Convert a PdfReader instance to a Document instance."""
def convert(self, reader: "PdfReader") -> Document:
...
class DefaultConverter:
@ -28,7 +28,7 @@ class DefaultConverter:
The default converter class that extracts text from a PdfReader object's pages and returns a Document.
"""
def convert(self, reader: PdfReader) -> Document:
def convert(self, reader: "PdfReader") -> Document:
"""Extract text from the PDF and return a Document object with the text content."""
text = "".join(page.extract_text() for page in reader.pages if page.extract_text())
return Document(content=text)
@ -71,7 +71,7 @@ class PyPDFToDocument:
return {"documents": documents}
def _get_pdf_reader(self, source: Union[str, Path, ByteStream]) -> PdfReader:
def _get_pdf_reader(self, source: Union[str, Path, ByteStream]) -> "PdfReader":
"""
Creates a PdfReader object from a given source, which can be a file path or a ByteStream object.