mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2025-12-29 16:10:06 +00:00
Move optimize into a builtin plugin
This commit is contained in:
parent
6cdf68363a
commit
685a06c93d
@ -35,7 +35,6 @@ from ocrmypdf.exceptions import (
|
||||
)
|
||||
from ocrmypdf.helpers import IMG2PDF_KWARGS, Resolution, safe_symlink
|
||||
from ocrmypdf.hocrtransform import HocrTransform
|
||||
from ocrmypdf.optimize import optimize
|
||||
from ocrmypdf.pdfa import generate_pdfa_ps
|
||||
from ocrmypdf.pdfinfo import Colorspace, Encoding, PdfInfo
|
||||
|
||||
@ -833,12 +832,18 @@ def metadata_fixup(working_file: Path, context: PdfContext):
|
||||
|
||||
def optimize_pdf(input_file: Path, context: PdfContext, executor: Executor):
|
||||
output_file = context.get_path('optimize.pdf')
|
||||
save_settings = dict(
|
||||
linearize=should_linearize(input_file, context),
|
||||
**get_pdf_save_settings(context.options.output_type),
|
||||
output_pdf = context.plugin_manager.hook.optimize_pdf(
|
||||
input_pdf=input_file, output_pdf=output_file, context=context, executor=executor
|
||||
)
|
||||
optimize(input_file, output_file, context, save_settings, executor)
|
||||
return output_file
|
||||
|
||||
input_size = input_file.stat().st_size
|
||||
output_size = output_file.stat().st_size
|
||||
if output_size > 0:
|
||||
ratio = input_size / output_size
|
||||
savings = 1 - output_size / input_size
|
||||
log.info(f"Optimize ratio: {ratio:.2f} savings: {(savings):.1%}")
|
||||
|
||||
return output_pdf
|
||||
|
||||
|
||||
def enumerate_compress_ranges(iterable):
|
||||
|
||||
27
src/ocrmypdf/builtin_plugins/optimize.py
Normal file
27
src/ocrmypdf/builtin_plugins/optimize.py
Normal file
@ -0,0 +1,27 @@
|
||||
# © 2022 James R. Barlow: github.com/jbarlow83
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
"""Built-in plugin to implement PDF page optimization."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from ocrmypdf import PdfContext, hookimpl
|
||||
from ocrmypdf._concurrent import Executor
|
||||
from ocrmypdf._pipeline import get_pdf_save_settings, should_linearize
|
||||
from ocrmypdf.optimize import optimize
|
||||
|
||||
|
||||
@hookimpl
|
||||
def optimize_pdf(
|
||||
input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor
|
||||
) -> Path:
|
||||
save_settings = dict(
|
||||
linearize=should_linearize(input_pdf, context),
|
||||
**get_pdf_save_settings(context.options.output_type),
|
||||
)
|
||||
optimize(input_pdf, output_pdf, context, save_settings, executor)
|
||||
return output_pdf
|
||||
@ -625,7 +625,7 @@ def optimize(
|
||||
context,
|
||||
save_settings,
|
||||
executor: Executor = DEFAULT_EXECUTOR,
|
||||
) -> None:
|
||||
) -> Path:
|
||||
options = context.options
|
||||
if options.optimize == 0:
|
||||
safe_symlink(input_file, output_file)
|
||||
@ -664,9 +664,7 @@ def optimize(
|
||||
f"Output file not created after optimizing. We probably ran "
|
||||
f"out of disk space in the temporary folder: {tempfile.gettempdir()}."
|
||||
)
|
||||
ratio = input_size / output_size
|
||||
savings = 1 - output_size / input_size
|
||||
log.info(f"Optimize ratio: {ratio:.2f} savings: {(savings):.1%}")
|
||||
|
||||
if savings < 0:
|
||||
log.info(
|
||||
@ -680,6 +678,8 @@ def optimize(
|
||||
else:
|
||||
safe_symlink(target_file, output_file)
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
def main(infile, outfile, level, jobs=1):
|
||||
from shutil import copy # pylint: disable=import-outside-toplevel
|
||||
|
||||
@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, AbstractSet, List, NamedTuple, Optional
|
||||
|
||||
import pluggy
|
||||
|
||||
from ocrmypdf import PdfContext
|
||||
from ocrmypdf._concurrent import Executor
|
||||
from ocrmypdf.helpers import Resolution
|
||||
|
||||
@ -457,3 +458,33 @@ def generate_pdfa(
|
||||
See also:
|
||||
https://github.com/tqdm/tqdm
|
||||
"""
|
||||
|
||||
|
||||
@hookspec(firstresult=True)
|
||||
def optimize_pdf(
|
||||
input_pdf: Path, output_pdf: Path, context: PdfContext, executor: Executor
|
||||
) -> Path:
|
||||
"""Optimize a PDF after image, OCR and metadata processing.
|
||||
|
||||
If the input_pdf is a PDF/A, the plugin must only modify input_pdf in a way
|
||||
that preserves the PDF/A status.
|
||||
|
||||
If the implementation fails to produce a smaller file than the input file, it
|
||||
should return input_pdf instead.
|
||||
|
||||
Arguments:
|
||||
input_pdf: The input PDF, which has OCR added.
|
||||
output_pdf: The requested filename of the output PDF which should be created
|
||||
by this optimization hook.
|
||||
context: The current context.
|
||||
executor: An initialized executor which may be used during optimization,
|
||||
to distribute optimization tasks.
|
||||
|
||||
Returns:
|
||||
Path: If optimization is successful, the hook should return ``output_file``.
|
||||
If optimization does not produce a smaller file, the hook should return
|
||||
``input_file``.
|
||||
|
||||
Note:
|
||||
This is a :ref:`firstresult hook<firstresult>`.
|
||||
"""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user