mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-01-04 11:12:29 +00:00
Add leptonica deskew
This commit is contained in:
parent
b92f8e43f2
commit
4dc0370c57
@ -13,8 +13,11 @@ import argparse
|
||||
import ctypes as C
|
||||
import sys
|
||||
import os
|
||||
import logging
|
||||
from tempfile import TemporaryFile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def stderr(*objs):
|
||||
"""Python 2/3 compatible print to stderr.
|
||||
@ -115,13 +118,13 @@ class LeptonicaErrorTrap(object):
|
||||
|
||||
# If there are Python errors, let them bubble up
|
||||
if exc_type:
|
||||
stderr(leptonica_output)
|
||||
logger.warning(leptonica_output)
|
||||
return False
|
||||
|
||||
# If there are Leptonica errors, wrap them in Python excpetions
|
||||
if 'Error' in leptonica_output:
|
||||
if 'image file not found' in leptonica_output:
|
||||
raise LeptonicaIOError()
|
||||
raise FileNotFoundError()
|
||||
if 'pixWrite: stream not opened' in leptonica_output:
|
||||
raise LeptonicaIOError()
|
||||
raise LeptonicaError(leptonica_output)
|
||||
@ -231,24 +234,22 @@ def getLeptonicaVersion():
|
||||
return lept.getLeptonicaVersion().decode()
|
||||
|
||||
|
||||
def deskew(args):
|
||||
def deskew(infile, outfile, dpi):
|
||||
try:
|
||||
pix_source = pixRead(args.infile)
|
||||
pix_source = pixRead(infile)
|
||||
except LeptonicaIOError:
|
||||
stderr("Failed to open file: %s" % args.infile)
|
||||
sys.exit(2)
|
||||
raise LeptonicaIOError("Failed to open file: %s" % infile)
|
||||
|
||||
if args.dpi < 150:
|
||||
if dpi < 150:
|
||||
reduction_factor = 1 # Don't downsample too much if DPI is already low
|
||||
else:
|
||||
reduction_factor = 0 # Use default
|
||||
pix_deskewed = pixDeskew(pix_source, reduction_factor)
|
||||
|
||||
try:
|
||||
pixWriteImpliedFormat(args.outfile, pix_deskewed)
|
||||
pixWriteImpliedFormat(outfile, pix_deskewed)
|
||||
except LeptonicaIOError:
|
||||
stderr("Failed to open destination file: %s" % args.outfile)
|
||||
sys.exit(5)
|
||||
raise LeptonicaIOError("Failed to open destination file: %s" % outfile)
|
||||
pixDestroy(pix_source)
|
||||
pixDestroy(pix_deskewed)
|
||||
|
||||
|
||||
@ -71,6 +71,9 @@ parser.add_argument(
|
||||
parser.add_argument(
|
||||
'tess_cfg_files', default='', nargs='*',
|
||||
help="Tesseract configuration")
|
||||
parser.add_argument(
|
||||
'--deskew-provider', choices=['imagemagick', 'leptonica'],
|
||||
default='leptonica')
|
||||
|
||||
|
||||
options = parser.parse_args()
|
||||
@ -246,7 +249,8 @@ def convert_to_tiff(input_file, output_file):
|
||||
check_call(args_convert)
|
||||
|
||||
|
||||
@active_if(options.preprocess_deskew != 0)
|
||||
@active_if(options.preprocess_deskew != 0
|
||||
and options.deskew_provider == 'imagemagick')
|
||||
@transform(convert_to_tiff, suffix(".tif"), ".deskewed.tif")
|
||||
def deskew_imagemagick(input_file, output_file):
|
||||
args_convert = [
|
||||
@ -273,6 +277,16 @@ def deskew_imagemagick(input_file, output_file):
|
||||
raise CalledProcessError(p.returncode, args_convert)
|
||||
|
||||
|
||||
@active_if(options.preprocess_deskew != 0
|
||||
and options.deskew_provider == 'leptonica')
|
||||
@transform(convert_to_tiff, suffix(".tif"), ".deskewed.tif")
|
||||
def deskew_leptonica(input_file, output_file):
|
||||
from .leptonica import deskew
|
||||
with logger_mutex:
|
||||
deskew(input_file, output_file,
|
||||
min(pageinfo['xres'], pageinfo['yres']))
|
||||
|
||||
|
||||
def clean_unpaper(pageinfo, infile, prefix, output_folder):
|
||||
args_unpaper = [
|
||||
'unpaper',
|
||||
@ -294,7 +308,7 @@ def clean_unpaper(pageinfo, infile, prefix, output_folder):
|
||||
return tmpfile.name
|
||||
|
||||
|
||||
@merge([convert_to_tiff, deskew_imagemagick],
|
||||
@merge([convert_to_tiff, deskew_imagemagick, deskew_leptonica],
|
||||
os.path.join(options.tmp_fld, "%04i.for_ocr.tif" % pageno))
|
||||
def select_ocr_image(infiles, output_file):
|
||||
re_symlink(infiles[-1], output_file, logger, logger_mutex)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user