Fixed case where page image was not converted to JPEG

If a preprocessing option was used,
and all original images on the page were JPEGs,
and --output-type=pdf, then
images would saved as Flate instead
of converted to JPEG.
This commit is contained in:
James R. Barlow 2019-12-10 01:07:59 -08:00
parent c5571388e2
commit f34130d193

View File

@ -41,7 +41,7 @@ from .helpers import safe_symlink
from .hocrtransform import HocrTransform
from .optimize import optimize
from .pdfa import generate_pdfa_ps
from .pdfinfo import Colorspace, PdfInfo
from .pdfinfo import Colorspace, PdfInfo, Encoding
VECTOR_PAGE_DPI = 400
@ -557,7 +557,7 @@ def ocr_tesseract_hocr(input_file, page_context):
def should_visible_page_image_use_jpg(pageinfo):
# If all images were JPEGs originally, produce a JPEG as output
return pageinfo.images and all(im.enc == 'jpeg' for im in pageinfo.images)
return pageinfo.images and all(im.enc == Encoding.jpeg for im in pageinfo.images)
def create_visible_page_jpg(image, page_context):