Add tesseract version check

This commit is contained in:
Jim Barlow 2015-07-23 17:06:00 -07:00
parent cffd4623ca
commit 68ecaac9cc
2 changed files with 46 additions and 0 deletions

View File

@ -28,6 +28,7 @@ import ruffus.cmdline as cmdline
from .hocrtransform import HocrTransform
from .pageinfo import pdf_get_all_pageinfo
from .pdfa import generate_pdfa_def
from .tesseract import TESS_VERSION
warnings.simplefilter('ignore', pypdf.utils.PdfReadWarning)
@ -38,6 +39,30 @@ JHOVE_PATH = os.path.realpath(os.path.join(BASEDIR, '..', 'jhove'))
JHOVE_JAR = os.path.join(JHOVE_PATH, 'bin', 'JhoveApp.jar')
JHOVE_CFG = os.path.join(JHOVE_PATH, 'conf', 'jhove.conf')
EXIT_BAD_ARGS=1
EXIT_BAD_INPUT_FILE=2
EXIT_MISSING_DEPENDENCY=3
EXIT_INVALID_OUTPUT_PDFA=4
EXIT_FILE_ACCESS_ERROR=5
EXIT_OTHER_ERROR=15
# -------------
# External dependencies
MINIMUM_TESS_VERSION = '3.02.02'
if TESS_VERSION < MINIMUM_TESS_VERSION:
print(
"Please install tesseract {0} or newer "
"(currently installed version is {1})".format(
MINIMUM_TESS_VERSION, TESS_VERSION),
file=sys.stderr)
sys.exit(EXIT_MISSING_DEPENDENCY)
# -------------
# Parser
parser = cmdline.get_argparse(
prog="OCRmyPDF",
description="Generate searchable PDF file from an image-only PDF file.")

21
src/tesseract.py Normal file
View File

@ -0,0 +1,21 @@
#!/usr/bin/env python3
from cffi import FFI
from ctypes.util import find_library
import sys
ffi = FFI()
try:
libtess = ffi.dlopen(find_library('libtesseract'))
except Exception:
print("Could not find Tesseract 3.02.02", file=sys.stderr)
sys.exit(1)
ffi.cdef('''
const char* TessVersion();
''')
cstr_version = libtess.TessVersion()
TESS_VERSION = ffi.string(cstr_version).decode('ascii')
__all__ = ['TESS_VERSION']