fix: fitz import switcher (#5012)

* fix pymupdf import switcher

* install pdf

* check after the import

* revert workflow change

* pylint

* pylint

* pylint again
This commit is contained in:
ZanSara 2023-05-24 18:58:40 +02:00 committed by GitHub
parent 929b8d1fb0
commit 44fd0cff7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,3 +1,4 @@
from haystack import is_imported
from haystack.nodes.file_converter.base import BaseConverter
from haystack.utils.import_utils import safe_import
@ -10,6 +11,12 @@ from haystack.nodes.file_converter.txt import TextConverter
from haystack.nodes.file_converter.azure import AzureConverter
from haystack.nodes.file_converter.parsr import ParsrConverter
# Try to use PyMuPDF, if not available fall back to xpdf
from haystack.nodes.file_converter.pdf import PDFToTextConverter
if not is_imported("fitz"):
from haystack.nodes.file_converter.pdf_xpdf import PDFToTextConverter # type: ignore # pylint: disable=reimported
MarkdownConverter = safe_import(
"haystack.nodes.file_converter.markdown", "MarkdownConverter", "preprocessing"
@ -17,9 +24,3 @@ MarkdownConverter = safe_import(
ImageToTextConverter = safe_import(
"haystack.nodes.file_converter.image", "ImageToTextConverter", "ocr"
) # Has optional dependencies
# Try to use PyMuPDF, if not available fall back to xpdf
try:
from haystack.nodes.file_converter.pdf import PDFToTextConverter
except ImportError:
from haystack.nodes.file_converter.pdf_xpdf import PDFToTextConverter # type: ignore