fix: do not reference package directory in PDFToTextOCRConverter.convert() (#3478)

* remove weird temp path from PDFToTextOCRConverter.convert()

* remove debug lines

* remove os import
This commit is contained in:
Sara Zan 2022-10-31 12:48:43 +01:00 committed by GitHub
parent 17cd79e2c8
commit adc982a624
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,5 @@
from typing import List, Optional, Dict, Any
import os
import logging
import tempfile
import subprocess
@ -252,7 +251,7 @@ class PDFToTextOCRConverter(BaseConverter):
try:
images = convert_from_path(file_path)
for image in images:
temp_img = tempfile.NamedTemporaryFile(dir=os.path.dirname(os.path.realpath(__file__)), suffix=".jpeg")
temp_img = tempfile.NamedTemporaryFile(suffix=".jpeg")
image.save(temp_img.name)
pages.append(self.image_2_text.convert(file_path=temp_img.name)[0].content)
except Exception as exception: