Refactor qpdf subprocess calls into module

This commit is contained in:
James R. Barlow 2015-12-17 08:19:53 -08:00
parent 4ca243e490
commit 53a7c0e668
2 changed files with 35 additions and 22 deletions

View File

@ -35,6 +35,7 @@ from .pageinfo import pdf_get_all_pageinfo
from .pdfa import generate_pdfa_def
from . import ghostscript
from . import tesseract
from . import qpdf
from . import ExitCode
warnings.simplefilter('ignore', pypdf.utils.PdfReadWarning)
@ -352,29 +353,8 @@ def repair_pdf(
log,
pdfinfo,
pdfinfo_lock):
args_qpdf = [
'qpdf', input_file, output_file
]
try:
out = check_output(args_qpdf, stderr=STDOUT, universal_newlines=True)
except CalledProcessError as e:
exit_with_error = True
if e.returncode == 2:
print("{0}: not a valid PDF, and could not repair it.".format(
options.input_file))
print("Details:")
print(e.output)
elif e.returncode == 3 and e.output.find("operation succeeded"):
exit_with_error = False
out = e.output
print(e.output)
else:
print(e.output)
if exit_with_error:
sys.exit(ExitCode.input_file)
log.debug(out)
qpdf.repair(input_file, output_file, log)
with pdfinfo_lock:
pdfinfo.extend(pdf_get_all_pageinfo(output_file))
log.info(pdfinfo)

33
ocrmypdf/qpdf.py Normal file
View File

@ -0,0 +1,33 @@
#!/usr/bin/env python3
# © 2015 James R. Barlow: github.com/jbarlow83
from subprocess import CalledProcessError, check_output, STDOUT
from . import ExitCode
def repair(input_file, output_file, log):
args_qpdf = [
'qpdf', input_file, output_file
]
try:
check_output(args_qpdf, stderr=STDOUT, universal_newlines=True)
except CalledProcessError as e:
if e.returncode == 3 and e.output.find("operation succeeded"):
log.debug('qpdf found and fixed errors:')
log.debug(e.output)
print(e.output)
return
if e.returncode == 2:
print("{0}: not a valid PDF, and could not repair it.".format(
input_file))
print("Details:")
print(e.output)
sys.exit(ExitCode.input_file)
else:
print("{0}: unknown error".format(
input_file))
print(e.output)
sys.exit(ExitCode.unknown)