#!/usr/bin/env python3 from subprocess import STDOUT, CalledProcessError, check_output import sys import os import re from functools import lru_cache @lru_cache(maxsize=1) def version(): args_tess = [ 'tesseract', '--version' ] try: versions = check_output( args_tess, close_fds=True, universal_newlines=True, stderr=STDOUT) except CalledProcessError: print("Could not find Tesseract executable on system PATH.") sys.exit(1) tesseract_version = re.match(r'tesseract\s(.+)', versions).group(1) return tesseract_version @lru_cache(maxsize=1) def languages(): args_tess = [ 'tesseract', '--list-langs' ] langs = check_output( args_tess, close_fds=True, universal_newlines=True, stderr=STDOUT) return set(lang.strip() for lang in langs.splitlines()[1:]) HOCR_TEMPLATE = '''

'''