mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-01-05 11:41:19 +00:00
Environment variables can now override default programs
This commit is contained in:
parent
276f421c44
commit
1731ce2a44
@ -6,6 +6,18 @@ Please always read this file before installing the package
|
||||
Download software here: https://github.com/jbarlow83/OCRmyPDF/tags
|
||||
|
||||
|
||||
Latest:
|
||||
=====
|
||||
|
||||
Changes
|
||||
-------
|
||||
|
||||
- Fixed bug where successful repair of PDF files errors led to abort (thanks to @shemgp)
|
||||
- You can now override the location of external programs OCRmyPDF uses by setting
|
||||
environmental variables of the form OCRMYPDF_PROGRAMNAME, e.g. OCRMYPDF_TESSERACT will
|
||||
override system Tesseract.
|
||||
|
||||
|
||||
v3.1:
|
||||
=====
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
from enum import IntEnum
|
||||
import os
|
||||
|
||||
|
||||
class ExitCode(IntEnum):
|
||||
@ -10,3 +11,8 @@ class ExitCode(IntEnum):
|
||||
file_access_error = 5
|
||||
already_done_ocr = 6
|
||||
other_error = 15
|
||||
|
||||
|
||||
def get_program(name):
|
||||
envvar = 'OCRMYPDF_' + name.upper()
|
||||
return os.environ.get(envvar, name)
|
||||
|
||||
@ -4,12 +4,13 @@
|
||||
from tempfile import NamedTemporaryFile
|
||||
from subprocess import Popen, PIPE, check_call
|
||||
from shutil import copy
|
||||
from . import get_program
|
||||
|
||||
|
||||
def rasterize_pdf(input_file, output_file, xres, yres, raster_device, log):
|
||||
with NamedTemporaryFile(delete=True) as tmp:
|
||||
args_gs = [
|
||||
'gs',
|
||||
get_program('gs'),
|
||||
'-dQUIET',
|
||||
'-dBATCH',
|
||||
'-dNOPAUSE',
|
||||
@ -36,7 +37,7 @@ def rasterize_pdf(input_file, output_file, xres, yres, raster_device, log):
|
||||
def generate_pdfa(pdf_pages, output_file, threads=1):
|
||||
with NamedTemporaryFile(delete=True) as gs_pdf:
|
||||
args_gs = [
|
||||
"gs",
|
||||
get_program("gs"),
|
||||
"-dQUIET",
|
||||
"-dBATCH",
|
||||
"-dNOPAUSE",
|
||||
|
||||
@ -8,6 +8,7 @@ from string import Template
|
||||
from subprocess import Popen, PIPE
|
||||
import os
|
||||
import codecs
|
||||
from . import get_program
|
||||
|
||||
|
||||
# This is a template written in PostScript which is needed to create PDF/A
|
||||
@ -95,7 +96,8 @@ def _get_pdfa_def(icc_profile, icc_identifier, pdfmark):
|
||||
def _get_postscript_icc_path():
|
||||
"Parse Ghostscript's help message to find where iccprofiles are stored"
|
||||
|
||||
p_gs = Popen(['gs', '--help'], close_fds=True, universal_newlines=True,
|
||||
p_gs = Popen([get_program('gs'), '--help'], close_fds=True,
|
||||
universal_newlines=True,
|
||||
stdout=PIPE, stderr=PIPE)
|
||||
out, _ = p_gs.communicate()
|
||||
lines = out.splitlines()
|
||||
|
||||
@ -5,12 +5,12 @@ from subprocess import CalledProcessError, check_output, STDOUT, check_call
|
||||
import sys
|
||||
import os
|
||||
|
||||
from . import ExitCode
|
||||
from . import ExitCode, get_program
|
||||
|
||||
|
||||
def check(input_file, log):
|
||||
args_qpdf = [
|
||||
'qpdf',
|
||||
get_program('qpdf'),
|
||||
'--check',
|
||||
input_file
|
||||
]
|
||||
@ -34,7 +34,7 @@ def check(input_file, log):
|
||||
|
||||
def repair(input_file, output_file, log):
|
||||
args_qpdf = [
|
||||
'qpdf', input_file, output_file
|
||||
get_program('qpdf'), input_file, output_file
|
||||
]
|
||||
try:
|
||||
check_output(args_qpdf, stderr=STDOUT, universal_newlines=True)
|
||||
@ -60,7 +60,7 @@ def repair(input_file, output_file, log):
|
||||
|
||||
def get_npages(input_file):
|
||||
pages = check_output(
|
||||
['qpdf', '--show-npages', input_file],
|
||||
[get_program('qpdf'), '--show-npages', input_file],
|
||||
universal_newlines=True, close_fds=True)
|
||||
return int(pages)
|
||||
|
||||
@ -73,7 +73,7 @@ def split_pages(input_file, work_folder, npages):
|
||||
"""
|
||||
for n in range(int(npages)):
|
||||
args_qpdf = [
|
||||
'qpdf', input_file,
|
||||
get_program('qpdf'), input_file,
|
||||
'--pages', input_file, '{0}'.format(n + 1), '--',
|
||||
os.path.join(work_folder, '{0:06d}.page.pdf'.format(n + 1))
|
||||
]
|
||||
|
||||
@ -6,7 +6,7 @@ import os
|
||||
import re
|
||||
import shutil
|
||||
from functools import lru_cache
|
||||
from . import ExitCode
|
||||
from . import ExitCode, get_program
|
||||
|
||||
from subprocess import Popen, PIPE, CalledProcessError, \
|
||||
TimeoutExpired, check_output, STDOUT
|
||||
@ -42,7 +42,7 @@ HOCR_TEMPLATE = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
@lru_cache(maxsize=1)
|
||||
def version():
|
||||
args_tess = [
|
||||
'tesseract',
|
||||
get_program('tesseract'),
|
||||
'--version'
|
||||
]
|
||||
try:
|
||||
@ -60,7 +60,7 @@ def version():
|
||||
@lru_cache(maxsize=1)
|
||||
def languages():
|
||||
args_tess = [
|
||||
'tesseract',
|
||||
get_program('tesseract'),
|
||||
'--list-langs'
|
||||
]
|
||||
try:
|
||||
@ -82,7 +82,7 @@ def generate_hocr(input_file, output_hocr, language: list, tessconfig: list,
|
||||
badxml = os.path.splitext(output_hocr)[0] + '.badxml'
|
||||
|
||||
args_tesseract = [
|
||||
'tesseract',
|
||||
get_program('tesseract'),
|
||||
'-l', '+'.join(language),
|
||||
input_file,
|
||||
badxml,
|
||||
@ -147,7 +147,7 @@ def generate_pdf(input_image, skip_pdf, output_pdf, language: list,
|
||||
'''
|
||||
|
||||
args_tesseract = [
|
||||
'tesseract',
|
||||
get_program('tesseract'),
|
||||
'-l', '+'.join(language),
|
||||
input_image,
|
||||
os.path.splitext(output_pdf)[0], # Tesseract appends suffix
|
||||
|
||||
@ -8,13 +8,13 @@ from tempfile import NamedTemporaryFile
|
||||
import sys
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from . import ExitCode
|
||||
from . import ExitCode, get_program
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def version():
|
||||
args_unpaper = [
|
||||
'unpaper',
|
||||
get_program('unpaper'),
|
||||
'--version'
|
||||
]
|
||||
p_unpaper = Popen(args_unpaper, close_fds=True, universal_newlines=True,
|
||||
@ -33,7 +33,7 @@ except ImportError:
|
||||
|
||||
def run(input_file, output_file, dpi, log, mode_args):
|
||||
args_unpaper = [
|
||||
'unpaper',
|
||||
get_program('unpaper'),
|
||||
'-v',
|
||||
'--dpi', str(dpi)
|
||||
] + mode_args
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user