From 44fd0cff7a923752f5a9ee71e348af4df2bea87c Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 24 May 2023 18:58:40 +0200 Subject: [PATCH] fix: `fitz` import switcher (#5012) * fix pymupdf import switcher * install pdf * check after the import * revert workflow change * pylint * pylint * pylint again --- haystack/nodes/file_converter/__init__.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/haystack/nodes/file_converter/__init__.py b/haystack/nodes/file_converter/__init__.py index f7664d84c..f8c51b5ac 100644 --- a/haystack/nodes/file_converter/__init__.py +++ b/haystack/nodes/file_converter/__init__.py @@ -1,3 +1,4 @@ +from haystack import is_imported from haystack.nodes.file_converter.base import BaseConverter from haystack.utils.import_utils import safe_import @@ -10,6 +11,12 @@ from haystack.nodes.file_converter.txt import TextConverter from haystack.nodes.file_converter.azure import AzureConverter from haystack.nodes.file_converter.parsr import ParsrConverter +# Try to use PyMuPDF, if not available fall back to xpdf +from haystack.nodes.file_converter.pdf import PDFToTextConverter + +if not is_imported("fitz"): + from haystack.nodes.file_converter.pdf_xpdf import PDFToTextConverter # type: ignore # pylint: disable=reimported + MarkdownConverter = safe_import( "haystack.nodes.file_converter.markdown", "MarkdownConverter", "preprocessing" @@ -17,9 +24,3 @@ MarkdownConverter = safe_import( ImageToTextConverter = safe_import( "haystack.nodes.file_converter.image", "ImageToTextConverter", "ocr" ) # Has optional dependencies - -# Try to use PyMuPDF, if not available fall back to xpdf -try: - from haystack.nodes.file_converter.pdf import PDFToTextConverter -except ImportError: - from haystack.nodes.file_converter.pdf_xpdf import PDFToTextConverter # type: ignore