Move optimize into a builtin plugin

This commit is contained in:
James R. Barlow 2022-06-12 02:23:13 -07:00
parent 6cdf68363a
commit 685a06c93d
No known key found for this signature in database
GPG Key ID: E54A300D567E1260
4 changed files with 72 additions and 9 deletions

View File

@ -35,7 +35,6 @@ from ocrmypdf.exceptions import (
)
from ocrmypdf.helpers import IMG2PDF_KWARGS, Resolution, safe_symlink
from ocrmypdf.hocrtransform import HocrTransform
from ocrmypdf.optimize import optimize
from ocrmypdf.pdfa import generate_pdfa_ps
from ocrmypdf.pdfinfo import Colorspace, Encoding, PdfInfo
@ -833,12 +832,18 @@ def metadata_fixup(working_file: Path, context: PdfContext):
def optimize_pdf(input_file: Path, context: PdfContext, executor: Executor):
output_file = context.get_path('optimize.pdf')
save_settings = dict(
linearize=should_linearize(input_file, context),
**get_pdf_save_settings(context.options.output_type),
output_pdf = context.plugin_manager.hook.optimize_pdf(
input_pdf=input_file, output_pdf=output_file, context=context, executor=executor
)
optimize(input_file, output_file, context, save_settings, executor)
return output_file
input_size = input_file.stat().st_size
output_size = output_file.stat().st_size
if output_size > 0:
ratio = input_size / output_size
savings = 1 - output_size / input_size
log.info(f"Optimize ratio: {ratio:.2f} savings: {(savings):.1%}")
return output_pdf
def enumerate_compress_ranges(iterable):

View File

@ -0,0 +1,27 @@
# © 2022 James R. Barlow: github.com/jbarlow83
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Built-in plugin to implement PDF page optimization."""
from pathlib import Path
from ocrmypdf import PdfContext, hookimpl
from ocrmypdf._concurrent import Executor
from ocrmypdf._pipeline import get_pdf_save_settings, should_linearize
from ocrmypdf.optimize import optimize
@hookimpl
def optimize_pdf(
input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor
) -> Path:
save_settings = dict(
linearize=should_linearize(input_pdf, context),
**get_pdf_save_settings(context.options.output_type),
)
optimize(input_pdf, output_pdf, context, save_settings, executor)
return output_pdf

View File

@ -625,7 +625,7 @@ def optimize(
context,
save_settings,
executor: Executor = DEFAULT_EXECUTOR,
) -> None:
) -> Path:
options = context.options
if options.optimize == 0:
safe_symlink(input_file, output_file)
@ -664,9 +664,7 @@ def optimize(
f"Output file not created after optimizing. We probably ran "
f"out of disk space in the temporary folder: {tempfile.gettempdir()}."
)
ratio = input_size / output_size
savings = 1 - output_size / input_size
log.info(f"Optimize ratio: {ratio:.2f} savings: {(savings):.1%}")
if savings < 0:
log.info(
@ -680,6 +678,8 @@ def optimize(
else:
safe_symlink(target_file, output_file)
return output_file
def main(infile, outfile, level, jobs=1):
from shutil import copy # pylint: disable=import-outside-toplevel

View File

@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, AbstractSet, List, NamedTuple, Optional
import pluggy
from ocrmypdf import PdfContext
from ocrmypdf._concurrent import Executor
from ocrmypdf.helpers import Resolution
@ -457,3 +458,33 @@ def generate_pdfa(
See also:
https://github.com/tqdm/tqdm
"""
@hookspec(firstresult=True)
def optimize_pdf(
input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor
) -> Path:
"""Optimize a PDF after image, OCR and metadata processing.
If the input_pdf is a PDF/A, the plugin must only modify input_pdf in a way
that preserves the PDF/A status.
If the implementation fails to produce a smaller file than the input file, it
should return input_pdf instead.
Arguments:
input_pdf: The input PDF, which has OCR added.
output_pdf: The requested filename of the output PDF which should be created
by this optimization hook.
context: The current context.
executor: An initialized executor which may be used during optimization,
to distribute optimization tasks.
Returns:
Path: If optimization is successful, the hook should return ``output_file``.
If optimization does not produce a smaller file, the hook should return
``input_file``.
Note:
This is a :ref:`firstresult hook<firstresult>`.
"""