From e4c3c3d4233d079ff9496a90b6b19d5c767e3790 Mon Sep 17 00:00:00 2001 From: Malte Pietsch Date: Wed, 1 Sep 2021 17:16:05 +0200 Subject: [PATCH] Fix CI (introduced by OCR PR #1349) (#1399) * satisfy mypy * add import --- haystack/file_converter/image.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/haystack/file_converter/image.py b/haystack/file_converter/image.py index 43f2dff83..3c4e3a9ae 100644 --- a/haystack/file_converter/image.py +++ b/haystack/file_converter/image.py @@ -1,7 +1,7 @@ import logging import subprocess from pathlib import Path -from typing import List, Optional, Dict, Any +from typing import List, Optional, Dict, Any, Union import pytesseract from PIL.PpmImagePlugin import PpmImageFile @@ -78,7 +78,7 @@ class ImageToTextConverter(BaseConverter): def convert( self, - file_path: Path, + file_path: Union[Path,str], meta: Optional[Dict[str, str]] = None, remove_numeric_tables: Optional[bool] = None, valid_languages: Optional[List[str]] = None, @@ -101,6 +101,7 @@ class ImageToTextConverter(BaseConverter): not one of the valid languages, then it might likely be encoding error resulting in garbled text. """ + file_path = Path(file_path) image = Image.open(file_path) pages = self._image_to_text(image) if remove_numeric_tables is None: