OCRmyPDF/tests/plugins/tesseract_crash.py

64 lines
2.4 KiB
Python
Raw Normal View History

2020-06-02 02:36:41 -07:00
# © 2020 James R. Barlow: github.com/jbarlow83
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import signal
from subprocess import CalledProcessError
from unittest.mock import patch
from ocrmypdf import hookimpl
from ocrmypdf.builtin_plugins.tesseract_ocr import TesseractOcrEngine
def raise_crash(*args, **kwargs):
raise CalledProcessError(
128 + signal.SIGABRT,
'tesseract',
output=b"",
stderr=b"libc++abi.dylib: terminating with uncaught exception of type "
+ b"std::bad_alloc: std::bad_alloc",
)
class CrashOcrEngine(TesseractOcrEngine):
@staticmethod
def get_orientation(input_file, options):
2020-06-09 14:55:54 -07:00
with patch('ocrmypdf._exec.tesseract.run', new=raise_crash):
2020-06-02 02:36:41 -07:00
return TesseractOcrEngine.get_orientation(input_file, options)
@staticmethod
def generate_hocr(input_file, output_hocr, output_text, options):
2020-06-09 14:55:54 -07:00
with patch('ocrmypdf._exec.tesseract.run', new=raise_crash):
2020-06-02 02:36:41 -07:00
TesseractOcrEngine.generate_hocr(
input_file, output_hocr, output_text, options
)
@staticmethod
def generate_pdf(input_file, output_pdf, output_text, options):
2020-06-09 14:55:54 -07:00
with patch('ocrmypdf._exec.tesseract.run', new=raise_crash):
2020-06-02 02:36:41 -07:00
TesseractOcrEngine.generate_pdf(
input_file, output_pdf, output_text, options
)
@hookimpl
def get_ocr_engine():
return CrashOcrEngine()