mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-14 09:03:34 +00:00
fix: make types work without installing pypdf (#6269)
* make types work without installing pypdf * make pylint happy, keep pyright happy, hope mypy doesn't care
This commit is contained in:
parent
b4d8d1c904
commit
1b63cfc8b3
@ -19,8 +19,8 @@ class PyPDFConverter(Protocol):
|
|||||||
A protocol that defines a converter which takes a PdfReader object and converts it into a Document object.
|
A protocol that defines a converter which takes a PdfReader object and converts it into a Document object.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def convert(self, reader: PdfReader) -> Document:
|
def convert(self, reader: "PdfReader") -> Document:
|
||||||
"""Convert a PdfReader instance to a Document instance."""
|
...
|
||||||
|
|
||||||
|
|
||||||
class DefaultConverter:
|
class DefaultConverter:
|
||||||
@ -28,7 +28,7 @@ class DefaultConverter:
|
|||||||
The default converter class that extracts text from a PdfReader object's pages and returns a Document.
|
The default converter class that extracts text from a PdfReader object's pages and returns a Document.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def convert(self, reader: PdfReader) -> Document:
|
def convert(self, reader: "PdfReader") -> Document:
|
||||||
"""Extract text from the PDF and return a Document object with the text content."""
|
"""Extract text from the PDF and return a Document object with the text content."""
|
||||||
text = "".join(page.extract_text() for page in reader.pages if page.extract_text())
|
text = "".join(page.extract_text() for page in reader.pages if page.extract_text())
|
||||||
return Document(content=text)
|
return Document(content=text)
|
||||||
@ -71,7 +71,7 @@ class PyPDFToDocument:
|
|||||||
|
|
||||||
return {"documents": documents}
|
return {"documents": documents}
|
||||||
|
|
||||||
def _get_pdf_reader(self, source: Union[str, Path, ByteStream]) -> PdfReader:
|
def _get_pdf_reader(self, source: Union[str, Path, ByteStream]) -> "PdfReader":
|
||||||
"""
|
"""
|
||||||
Creates a PdfReader object from a given source, which can be a file path or a ByteStream object.
|
Creates a PdfReader object from a given source, which can be a file path or a ByteStream object.
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user