From adc982a624c7f45ff85ce5b8adae194a405327ff Mon Sep 17 00:00:00 2001 From: Sara Zan Date: Mon, 31 Oct 2022 12:48:43 +0100 Subject: [PATCH] fix: do not reference package directory in `PDFToTextOCRConverter.convert()` (#3478) * remove weird temp path from PDFToTextOCRConverter.convert() * remove debug lines * remove os import --- haystack/nodes/file_converter/pdf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/haystack/nodes/file_converter/pdf.py b/haystack/nodes/file_converter/pdf.py index 038927470..a1f3e16b0 100644 --- a/haystack/nodes/file_converter/pdf.py +++ b/haystack/nodes/file_converter/pdf.py @@ -1,6 +1,5 @@ from typing import List, Optional, Dict, Any -import os import logging import tempfile import subprocess @@ -252,7 +251,7 @@ class PDFToTextOCRConverter(BaseConverter): try: images = convert_from_path(file_path) for image in images: - temp_img = tempfile.NamedTemporaryFile(dir=os.path.dirname(os.path.realpath(__file__)), suffix=".jpeg") + temp_img = tempfile.NamedTemporaryFile(suffix=".jpeg") image.save(temp_img.name) pages.append(self.image_2_text.convert(file_path=temp_img.name)[0].content) except Exception as exception: