mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2025-06-26 23:49:59 +00:00
Accept most of ruff's delinting
This commit is contained in:
parent
b7eb93eb79
commit
9b8d14d16e
@ -6,7 +6,6 @@ from __future__ import annotations
|
||||
|
||||
# This script must be edited to meet your needs.
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -1,8 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R Barlow: https://github.com/jbarlow83
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""
|
||||
An example of an OCRmyPDF plugin.
|
||||
"""An example of an OCRmyPDF plugin.
|
||||
|
||||
This plugin adds two new command line arguments
|
||||
--grayscale-ocr: converts the image to grayscale before performing OCR on it
|
||||
|
@ -2,7 +2,7 @@
|
||||
# SPDX-FileCopyrightText: 2017 Enantiomerie
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""Example OCRmyPDF for Synology NAS"""
|
||||
"""Example OCRmyPDF for Synology NAS."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
# SPDX-FileCopyrightText: 2019 James R. Barlow
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
"""This is a simple web service/HTTP wrapper for OCRmyPDF
|
||||
"""This is a simple web service/HTTP wrapper for OCRmyPDF.
|
||||
|
||||
This may be more convenient than the command line tool for some Docker users.
|
||||
Note that OCRmyPDF uses Ghostscript, which is licensed under AGPLv3+. While
|
||||
@ -15,7 +15,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shlex
|
||||
from subprocess import PIPE, run
|
||||
from subprocess import run
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from flask import Flask, Response, request, send_from_directory
|
||||
|
@ -184,14 +184,19 @@ module = [
|
||||
]
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.pylint.basic]
|
||||
good-names = ["i", "j", "k", "ex", "Run", "_", "e", "p", "im", "w", "h", "m", "x", "y", "a", "b", "fp", "n", "f", "s", "v", "q", "dx", "dy"]
|
||||
logging-format-style = "old"
|
||||
disable = ["raw-checker-failed", "bad-inline-option", "locally-disabled", "file-ignored", "suppressed-message", "useless-suppression", "deprecated-pragma", "use-symbolic-message-instead", "logging-fstring-interpolation", "missing-function-docstring", "too-few-public-methods"]
|
||||
|
||||
[tool.ruff]
|
||||
src = ["src"]
|
||||
select = ["E"]
|
||||
select = [
|
||||
"D", # pydocstyle
|
||||
"E", # pycodestyle
|
||||
"W", # pycodestyle
|
||||
"F", # pyflakes
|
||||
"I001", # isort
|
||||
"UP", # pyupgrade
|
||||
]
|
||||
target-version = "py38"
|
||||
|
||||
[tool.ruff.isort]
|
||||
known-first-party = ["ocrmypdf"]
|
||||
|
||||
[tool.ruff.pydocstyle]
|
||||
convention = "google"
|
@ -11,7 +11,6 @@ import os
|
||||
import signal
|
||||
import sys
|
||||
from contextlib import suppress
|
||||
from multiprocessing import set_start_method
|
||||
|
||||
from ocrmypdf import __version__
|
||||
from ocrmypdf._plugin_manager import get_parser_options_plugins
|
||||
|
@ -51,8 +51,7 @@ class Executor(ABC):
|
||||
task_arguments: Iterable | None = None,
|
||||
task_finished: Callable | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Set up parallel execution and progress reporting.
|
||||
"""Set up parallel execution and progress reporting.
|
||||
|
||||
Args:
|
||||
use_threads: If ``False``, the workload is the sort that will benefit from
|
||||
@ -73,7 +72,6 @@ class Executor(ABC):
|
||||
task. This runs in the parent's context, but the parameters must be
|
||||
marshallable to the worker.
|
||||
"""
|
||||
|
||||
if not task_arguments:
|
||||
return # Nothing to do!
|
||||
if not worker_initializer:
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
"""Manage third party executables"""
|
||||
"""Manage third party executables."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
@ -1,7 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
"""Interface to Ghostscript executable"""
|
||||
"""Interface to Ghostscript executable."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
"""Interface to jbig2 executable"""
|
||||
"""Interface to jbig2 executable."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
"""Interface to pngquant executable"""
|
||||
"""Interface to pngquant executable."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
"""Interface to Tesseract executable"""
|
||||
"""Interface to Tesseract executable."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@ -54,7 +54,7 @@ TESSERACT_THRESHOLDING_METHODS: dict[str, int] = {
|
||||
|
||||
|
||||
class TesseractLoggerAdapter(logging.LoggerAdapter):
|
||||
"Prepend [tesseract] to messages emitted from tesseract"
|
||||
"Prepend [tesseract] to messages emitted from tesseract."
|
||||
|
||||
def process(self, msg, kwargs):
|
||||
kwargs['extra'] = self.extra
|
||||
@ -283,7 +283,8 @@ def page_timedout(timeout: float) -> None:
|
||||
|
||||
def _generate_null_hocr(output_hocr: Path, output_text: Path, image: Path) -> None:
|
||||
"""Produce a .hocr file that reports no text detected on a page that is
|
||||
the same size as the input image."""
|
||||
the same size as the input image.
|
||||
"""
|
||||
with Image.open(image) as im:
|
||||
w, h = im.size
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
"""Interface to unpaper executable"""
|
||||
"""Interface to unpaper executable."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
@ -37,7 +37,6 @@ def _update_resources(*, obj, font, font_key, procset):
|
||||
|
||||
obj can be a page or Form XObject.
|
||||
"""
|
||||
|
||||
resources = _ensure_dictionary(obj, Name.Resources)
|
||||
fonts = _ensure_dictionary(resources, Name.Font)
|
||||
if font_key is not None and font_key not in fonts:
|
||||
@ -167,7 +166,6 @@ class OcrGrafter:
|
||||
the font to page 1 even if page 1 doesn't use it, so we have a way to get it
|
||||
back.
|
||||
"""
|
||||
|
||||
page0 = self.pdf_base.pages[0]
|
||||
_update_resources(
|
||||
obj=page0, font=self.font, font_key=self.font_key, procset=self.procset
|
||||
@ -200,8 +198,7 @@ class OcrGrafter:
|
||||
return self.output_file
|
||||
|
||||
def _find_font(self, text):
|
||||
"""Copy a font from the filename text into pdf_base"""
|
||||
|
||||
"""Copy a font from the filename text into pdf_base."""
|
||||
font, font_key = None, None
|
||||
possible_font_names = ('/f-0-0', '/F1')
|
||||
try:
|
||||
@ -234,8 +231,7 @@ class OcrGrafter:
|
||||
text_rotation: int,
|
||||
strip_old_text: bool,
|
||||
):
|
||||
"""Insert the text layer from text page 0 on to pdf_base at page_num"""
|
||||
|
||||
"""Insert the text layer from text page 0 on to pdf_base at page_num."""
|
||||
# pylint: disable=invalid-name
|
||||
|
||||
log.debug("Grafting")
|
||||
|
@ -24,7 +24,7 @@ class PageNumberFilter(logging.Filter):
|
||||
|
||||
|
||||
class TqdmConsole:
|
||||
"""Wrapper to log messages in a way that is compatible with tqdm progress bar
|
||||
"""Wrapper to log messages in a way that is compatible with tqdm progress bar.
|
||||
|
||||
This routes log messages through tqdm so that it can print them above the
|
||||
progress bar, and then refresh the progress bar, rather than overwriting
|
||||
|
@ -121,7 +121,6 @@ def _pdf_guess_version(input_file: Path, search_window=1024) -> str:
|
||||
|
||||
Returns empty string if not found, indicating file is probably not PDF.
|
||||
"""
|
||||
|
||||
with open(input_file, 'rb') as f:
|
||||
signature = f.read(search_window)
|
||||
m = re.search(br'%PDF-(\d\.\d)', signature)
|
||||
@ -222,7 +221,7 @@ def _vector_page_dpi(pageinfo: PageInfo) -> int:
|
||||
|
||||
|
||||
def get_page_dpi(pageinfo: PageInfo, options) -> Resolution:
|
||||
"Get the DPI when nonsquare DPI is tolerable"
|
||||
"Get the DPI when nonsquare DPI is tolerable."
|
||||
xres = max(
|
||||
pageinfo.dpi.x or VECTOR_PAGE_DPI,
|
||||
options.oversample or 0.0,
|
||||
@ -237,7 +236,7 @@ def get_page_dpi(pageinfo: PageInfo, options) -> Resolution:
|
||||
|
||||
|
||||
def get_page_square_dpi(pageinfo: PageInfo, options) -> Resolution:
|
||||
"Get the DPI when we require xres == yres, scaled to physical units"
|
||||
"Get the DPI when we require xres == yres, scaled to physical units."
|
||||
xres = pageinfo.dpi.x or 0.0
|
||||
yres = pageinfo.dpi.y or 0.0
|
||||
userunit = float(pageinfo.userunit) or 1.0
|
||||
@ -253,7 +252,7 @@ def get_page_square_dpi(pageinfo: PageInfo, options) -> Resolution:
|
||||
|
||||
|
||||
def get_canvas_square_dpi(pageinfo: PageInfo, options) -> Resolution:
|
||||
"""Get the DPI when we require xres == yres, in Postscript units"""
|
||||
"""Get the DPI when we require xres == yres, in Postscript units."""
|
||||
units = float(
|
||||
max(
|
||||
(pageinfo.dpi.x) or VECTOR_PAGE_DPI,
|
||||
@ -358,9 +357,7 @@ def rasterize_preview(input_file: Path, page_context: PageContext) -> Path:
|
||||
|
||||
|
||||
def describe_rotation(page_context: PageContext, orient_conf, correction: int) -> str:
|
||||
"""
|
||||
Describe the page rotation we are going to perform.
|
||||
"""
|
||||
"""Describe the page rotation we are going to perform."""
|
||||
direction = {0: '⇧', 90: '⇨', 180: '⇩', 270: '⇦'}
|
||||
turns = {0: ' ', 90: '⬏', 180: '↻', 270: '⬑'}
|
||||
|
||||
@ -401,7 +398,6 @@ def get_orientation_correction(preview: Path, page_context: PageContext) -> int:
|
||||
which points it (hopefully) upright. _graft.py takes care of the orienting
|
||||
the image and text layers.
|
||||
"""
|
||||
|
||||
orient_conf = page_context.plugin_manager.hook.get_ocr_engine().get_orientation(
|
||||
preview, page_context.options
|
||||
)
|
||||
@ -514,10 +510,11 @@ def preprocess_clean(input_file: Path, page_context: PageContext) -> Path:
|
||||
|
||||
|
||||
def create_ocr_image(image: Path, page_context: PageContext) -> Path:
|
||||
"""Create the image we send for OCR. May not be the same as the display
|
||||
image depending on preprocessing. This image will never be shown to the
|
||||
user."""
|
||||
"""Create the image we send for OCR.
|
||||
|
||||
Might not be the same as the display image depending on preprocessing.
|
||||
This image will never be shown to the user.
|
||||
"""
|
||||
output_file = page_context.get_path('ocr.png')
|
||||
options = page_context.options
|
||||
with Image.open(image) as im:
|
||||
|
@ -251,8 +251,7 @@ def worker_init(max_pixels: int) -> None:
|
||||
|
||||
|
||||
def exec_concurrent(context: PdfContext, executor: Executor) -> Sequence[str]:
|
||||
"""Execute the pipeline concurrently"""
|
||||
|
||||
"""Execute the pipeline concurrently."""
|
||||
# Run exec_page_sync on every page context
|
||||
options = context.options
|
||||
max_workers = min(len(context.pdfinfo), options.jobs)
|
||||
@ -316,8 +315,7 @@ def exec_concurrent(context: PdfContext, executor: Executor) -> Sequence[str]:
|
||||
def configure_debug_logging(
|
||||
log_filename: Path, prefix: str = ''
|
||||
) -> logging.FileHandler:
|
||||
"""
|
||||
Create a debug log file at a specified location.
|
||||
"""Create a debug log file at a specified location.
|
||||
|
||||
Arguments:
|
||||
log_filename: Where to the put the log file.
|
||||
|
@ -86,7 +86,6 @@ def configure_logging(
|
||||
Returns:
|
||||
The toplevel logger for ocrmypdf (or the root logger, if we are managing it).
|
||||
"""
|
||||
|
||||
prefix = '' if manage_root_logger else 'ocrmypdf'
|
||||
|
||||
log = logging.getLogger(prefix)
|
||||
@ -277,6 +276,8 @@ def ocr( # pylint: disable=unused-argument
|
||||
When a stream is used as output, whether via a writable object or
|
||||
``"-"``, some final validation steps are not performed (we do not read
|
||||
back the stream after it is written).
|
||||
|
||||
|
||||
Raises:
|
||||
ocrmypdf.MissingDependencyError: If a required dependency program is missing or
|
||||
was not found on PATH.
|
||||
|
@ -30,7 +30,7 @@ WorkerInit = Callable[[Queue, UserInit, int], None]
|
||||
|
||||
|
||||
def log_listener(q: Queue):
|
||||
"""Listen to the worker processes and forward the messages to logging
|
||||
"""Listen to the worker processes and forward the messages to logging.
|
||||
|
||||
For simplicity this is a thread rather than a process. Only one process
|
||||
should actually write to sys.stderr or whatever we're using, so if this is
|
||||
@ -39,7 +39,6 @@ def log_listener(q: Queue):
|
||||
See:
|
||||
https://docs.python.org/3/howto/logging-cookbook.html#logging-to-a-single-file-from-multiple-processes
|
||||
"""
|
||||
|
||||
while True:
|
||||
try:
|
||||
record = q.get()
|
||||
@ -59,8 +58,7 @@ def process_sigbus(*args):
|
||||
|
||||
|
||||
def process_init(q: Queue, user_init: UserInit, loglevel) -> None:
|
||||
"""Initialize a process pool worker"""
|
||||
|
||||
"""Initialize a process pool worker."""
|
||||
# Ignore SIGINT (our parent process will kill us gracefully)
|
||||
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
||||
|
||||
|
@ -8,7 +8,6 @@ import logging
|
||||
|
||||
from ocrmypdf import hookimpl
|
||||
from ocrmypdf._exec import ghostscript
|
||||
from ocrmypdf._validation import HOCR_OK_LANGS
|
||||
from ocrmypdf.exceptions import MissingDependencyError
|
||||
from ocrmypdf.subprocess import check_external_program
|
||||
|
||||
|
@ -15,7 +15,7 @@ T = TypeVar('T', int, float)
|
||||
|
||||
|
||||
def numeric(basetype: Callable[[Any], T], min_: T | None = None, max_: T | None = None):
|
||||
"""Validator for numeric params"""
|
||||
"""Validator for numeric params."""
|
||||
min_ = basetype(min_) if min_ is not None else None
|
||||
max_ = basetype(max_) if max_ is not None else None
|
||||
|
||||
@ -46,7 +46,7 @@ def str_to_int(mapping: Mapping[str, int]):
|
||||
|
||||
|
||||
class ArgumentParser(argparse.ArgumentParser):
|
||||
"""Override parser's default behavior of calling sys.exit()
|
||||
"""Override parser's default behavior of calling sys.exit().
|
||||
|
||||
https://stackoverflow.com/questions/5943249/python-argparse-and-controlling-overriding-the-exit-status-code
|
||||
|
||||
|
@ -73,8 +73,7 @@ class ConnectionLogHandler(logging.handlers.QueueHandler):
|
||||
def process_loop(
|
||||
conn: Connection, user_init: Callable[[], None], loglevel, task, task_args
|
||||
):
|
||||
"""Initialize a process pool worker"""
|
||||
|
||||
"""Initialize a process pool worker."""
|
||||
# Install SIGBUS handler (so our parent process can abort somewhat gracefully)
|
||||
with suppress(AttributeError): # Windows and Cygwin do not have SIGBUS
|
||||
# Windows and Cygwin do not have pthread_sigmask or SIGBUS
|
||||
|
@ -108,7 +108,7 @@ class Resolution(Generic[T]):
|
||||
|
||||
|
||||
class NeverRaise(Exception):
|
||||
"""An exception that is never raised"""
|
||||
"""An exception that is never raised."""
|
||||
|
||||
|
||||
def safe_symlink(input_file: os.PathLike, soft_link_name: os.PathLike):
|
||||
@ -170,7 +170,7 @@ def monotonic(seq: Sequence) -> bool:
|
||||
|
||||
|
||||
def page_number(input_file: os.PathLike) -> int:
|
||||
"""Get one-based page number implied by filename (000002.pdf -> 2)"""
|
||||
"""Get one-based page number implied by filename (000002.pdf -> 2)."""
|
||||
return int(os.path.basename(os.fspath(input_file))[0:6])
|
||||
|
||||
|
||||
|
@ -99,11 +99,9 @@ class HocrTransformError(Exception):
|
||||
|
||||
|
||||
class HocrTransform:
|
||||
|
||||
"""
|
||||
A class for converting documents from the hOCR format.
|
||||
"""A class for converting documents from the hOCR format.
|
||||
For details of the hOCR format, see:
|
||||
http://kba.cloud/hocr-spec/
|
||||
http://kba.cloud/hocr-spec/.
|
||||
"""
|
||||
|
||||
box_pattern = re.compile(r'bbox((\s+\d+){4})')
|
||||
@ -143,9 +141,7 @@ class HocrTransform:
|
||||
raise HocrTransformError("hocr file is missing page dimensions")
|
||||
|
||||
def __str__(self): # pragma: no cover
|
||||
"""
|
||||
Return the textual content of the HTML body
|
||||
"""
|
||||
"""Return the textual content of the HTML body."""
|
||||
if self.hocr is None:
|
||||
return ''
|
||||
body = self.hocr.find(self._child_xpath('body'))
|
||||
@ -155,9 +151,7 @@ class HocrTransform:
|
||||
return ''
|
||||
|
||||
def _get_element_text(self, element: Element):
|
||||
"""
|
||||
Return the textual content of the element and its children
|
||||
"""
|
||||
"""Return the textual content of the element and its children."""
|
||||
text = ''
|
||||
if element.text is not None:
|
||||
text += element.text
|
||||
@ -169,10 +163,7 @@ class HocrTransform:
|
||||
|
||||
@classmethod
|
||||
def element_coordinates(cls, element: Element) -> Rect:
|
||||
"""
|
||||
Returns a tuple containing the coordinates of the bounding box around
|
||||
an element
|
||||
"""
|
||||
"""Get coordinates of the bounding box around an element."""
|
||||
out = Rect._make(0 for _ in range(4))
|
||||
if 'title' in element.attrib:
|
||||
matches = cls.box_pattern.search(element.attrib['title'])
|
||||
@ -183,9 +174,7 @@ class HocrTransform:
|
||||
|
||||
@classmethod
|
||||
def baseline(cls, element: Element) -> tuple[float, float]:
|
||||
"""
|
||||
Returns a tuple containing the baseline slope and intercept.
|
||||
"""
|
||||
"""Get baseline's slope and intercept."""
|
||||
if 'title' in element.attrib:
|
||||
matches = cls.baseline_pattern.search(element.attrib['title'])
|
||||
if matches:
|
||||
@ -193,9 +182,7 @@ class HocrTransform:
|
||||
return (0.0, 0.0)
|
||||
|
||||
def pt_from_pixel(self, pxl) -> Rect:
|
||||
"""
|
||||
Returns the quantity in PDF units (pt) given quantity in pixels
|
||||
"""
|
||||
"""Returns the quantity in PDF units (pt) given quantity in pixels."""
|
||||
return Rect._make((c / self.dpi * inch) for c in pxl)
|
||||
|
||||
def _child_xpath(self, html_tag: str, html_class: str | None = None) -> str:
|
||||
@ -206,11 +193,7 @@ class HocrTransform:
|
||||
|
||||
@classmethod
|
||||
def replace_unsupported_chars(cls, s: str) -> str:
|
||||
"""
|
||||
Given an input string, returns the corresponding string that:
|
||||
* is available in the Helvetica facetype
|
||||
* does not contain any ligature (to allow easy search in the PDF file)
|
||||
"""
|
||||
"""Replaces characters with those available in the Helvetica typeface."""
|
||||
return s.translate(cls.ligatures)
|
||||
|
||||
def topdown_position(self, element):
|
||||
@ -231,8 +214,8 @@ class HocrTransform:
|
||||
invisible_text: bool = False,
|
||||
interword_spaces: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Creates a PDF file with an image superimposed on top of the text.
|
||||
"""Creates a PDF file with an image superimposed on top of the text.
|
||||
|
||||
Text is positioned according to the bounding box of the lines in
|
||||
the hOCR file.
|
||||
The image need not be identical to the image used to create the hOCR
|
||||
|
@ -230,7 +230,7 @@ def extract_images(
|
||||
options,
|
||||
extract_fn: Callable[..., XrefExt | None],
|
||||
) -> Iterator[tuple[int, XrefExt]]:
|
||||
"""Extract image using extract_fn
|
||||
"""Extract image using extract_fn.
|
||||
|
||||
Enumerate images on each page, lookup their xref/ID number in the PDF.
|
||||
Exclude images that are soft masks (i.e. alpha transparency related).
|
||||
@ -244,7 +244,6 @@ def extract_images(
|
||||
it does a tuple should be returned: (xref, ext) where .ext is the file
|
||||
extension. extract_fn must also extract the file it finds interesting.
|
||||
"""
|
||||
|
||||
include_xrefs: MutableSet[Xref] = set()
|
||||
exclude_xrefs: MutableSet[Xref] = set()
|
||||
pageno_for_xref = {}
|
||||
@ -289,8 +288,7 @@ def extract_images(
|
||||
def extract_images_generic(
|
||||
pike: Pdf, root: Path, options
|
||||
) -> tuple[list[Xref], list[Xref]]:
|
||||
"""Extract any >=2bpp image we think we can improve"""
|
||||
|
||||
"""Extract any >=2bpp image we think we can improve."""
|
||||
jpegs = []
|
||||
pngs = []
|
||||
for _, xref_ext in extract_images(pike, root, options, extract_image_generic):
|
||||
@ -304,8 +302,7 @@ def extract_images_generic(
|
||||
|
||||
|
||||
def extract_images_jbig2(pike: Pdf, root: Path, options) -> dict[int, list[XrefExt]]:
|
||||
"""Extract any bitonal image that we think we can improve as JBIG2"""
|
||||
|
||||
"""Extract any bitonal image that we think we can improve as JBIG2."""
|
||||
jbig2_groups = defaultdict(list)
|
||||
for pageno, xref_ext in extract_images(pike, root, options, extract_image_jbig2):
|
||||
group = pageno // options.jbig2_page_group_size
|
||||
@ -318,7 +315,7 @@ def extract_images_jbig2(pike: Pdf, root: Path, options) -> dict[int, list[XrefE
|
||||
def _produce_jbig2_images(
|
||||
jbig2_groups: dict[int, list[XrefExt]], root: Path, options, executor: Executor
|
||||
) -> None:
|
||||
"""Produce JBIG2 images from their groups"""
|
||||
"""Produce JBIG2 images from their groups."""
|
||||
|
||||
def jbig2_group_args(root: Path, groups: dict[int, list[XrefExt]]):
|
||||
for group, xref_exts in groups.items():
|
||||
@ -674,7 +671,7 @@ def main(infile, outfile, level, jobs=1):
|
||||
from tempfile import TemporaryDirectory # pylint: disable=import-outside-toplevel
|
||||
|
||||
class OptimizeOptions:
|
||||
"""Emulate ocrmypdf's options"""
|
||||
"""Emulate ocrmypdf's options."""
|
||||
|
||||
def __init__(
|
||||
self, input_file, jobs, optimize_, jpeg_quality, png_quality, jb2lossy
|
||||
|
@ -1,9 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
"""
|
||||
Utilities for PDF/A production and confirmation with Ghostspcript.
|
||||
"""
|
||||
"""Utilities for PDF/A production and confirmation with Ghostspcript."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@ -75,7 +73,7 @@ def _make_postscript(icc_name: str, icc_data: bytes, colors: int) -> Iterator[st
|
||||
|
||||
|
||||
def generate_pdfa_ps(target_filename: Path, icc: str = 'sRGB'):
|
||||
"""Create a Postscript PDFMARK file for Ghostscript PDF/A conversion
|
||||
"""Create a Postscript PDFMARK file for Ghostscript PDF/A conversion.
|
||||
|
||||
pdfmark is an extension to the Postscript language that describes some PDF
|
||||
features like bookmarks and annotations. It was originally specified Adobe
|
||||
@ -118,7 +116,6 @@ def file_claims_pdfa(filename: Path):
|
||||
This only checks if the XMP metadata contains a PDF/A marker. It does not
|
||||
do full PDF/A validation.
|
||||
"""
|
||||
|
||||
with pikepdf.open(filename) as pdf:
|
||||
pdfmeta = pdf.open_metadata()
|
||||
if not pdfmeta.pdfa_status:
|
||||
|
@ -164,7 +164,7 @@ class TextMarker:
|
||||
|
||||
|
||||
def _normalize_stack(graphobjs):
|
||||
"""Convert runs of qQ's in the stack into single graphobjs"""
|
||||
"""Convert runs of qQ's in the stack into single graphobjs."""
|
||||
for operands, operator in graphobjs:
|
||||
operator = str(operator)
|
||||
if re.match(r'Q*q+$', operator): # Zero or more Q, one or more q
|
||||
@ -200,7 +200,6 @@ def _interpret_contents(contentstream: Object, initial_shorthand=UNIT_SQUARE):
|
||||
undefined in the spec, but we just pretend nothing happened and leave the
|
||||
CTM unchanged.
|
||||
"""
|
||||
|
||||
stack = []
|
||||
ctm = PdfMatrix(initial_shorthand)
|
||||
xobject_settings: list[XobjectSettings] = []
|
||||
@ -307,7 +306,6 @@ def _get_dpi(ctm_shorthand, image_size) -> Resolution:
|
||||
/MediaBox.
|
||||
|
||||
"""
|
||||
|
||||
a, b, c, d, _, _ = ctm_shorthand # pylint: disable=invalid-name
|
||||
|
||||
# Calculate the width and height of the image in PDF units
|
||||
@ -451,8 +449,7 @@ class ImageInfo:
|
||||
|
||||
|
||||
def _find_inline_images(contentsinfo: ContentsInfo) -> Iterator[ImageInfo]:
|
||||
"Find inline images in the contentstream"
|
||||
|
||||
"Find inline images in the contentstream."
|
||||
for n, inline in enumerate(contentsinfo.inline_images):
|
||||
yield ImageInfo(
|
||||
name=f'inline-{n:02d}', shorthand=inline.shorthand, inline=inline.iimage
|
||||
@ -460,7 +457,7 @@ def _find_inline_images(contentsinfo: ContentsInfo) -> Iterator[ImageInfo]:
|
||||
|
||||
|
||||
def _image_xobjects(container) -> Iterator[tuple[Object, str]]:
|
||||
"""Search for all XObject-based images in the container
|
||||
"""Search for all XObject-based images in the container.
|
||||
|
||||
Usually the container is a page, but it could also be a Form XObject
|
||||
that contains images. Filter out the Form XObjects which are dealt with
|
||||
@ -471,7 +468,6 @@ def _image_xobjects(container) -> Iterator[tuple[Object, str]]:
|
||||
since the object does not know its own name.
|
||||
|
||||
"""
|
||||
|
||||
if '/Resources' not in container:
|
||||
return
|
||||
resources = container['/Resources']
|
||||
@ -488,14 +484,13 @@ def _image_xobjects(container) -> Iterator[tuple[Object, str]]:
|
||||
def _find_regular_images(
|
||||
container: Object, contentsinfo: ContentsInfo
|
||||
) -> Iterator[ImageInfo]:
|
||||
"""Find images stored in the container's /Resources /XObject
|
||||
"""Find images stored in the container's /Resources /XObject.
|
||||
|
||||
Usually the container is a page, but it could also be a Form XObject
|
||||
that contains images.
|
||||
|
||||
Generates images with their DPI at time of drawing.
|
||||
"""
|
||||
|
||||
for pdfimage, xobj in _image_xobjects(container):
|
||||
if xobj not in contentsinfo.name_index:
|
||||
continue
|
||||
@ -512,7 +507,7 @@ def _find_regular_images(
|
||||
|
||||
|
||||
def _find_form_xobject_images(pdf: Pdf, container: Object, contentsinfo: ContentsInfo):
|
||||
"""Find any images that are in Form XObjects in the container
|
||||
"""Find any images that are in Form XObjects in the container.
|
||||
|
||||
The container may be a page, or a parent Form XObject.
|
||||
|
||||
@ -546,7 +541,7 @@ def _find_form_xobject_images(pdf: Pdf, container: Object, contentsinfo: Content
|
||||
def _process_content_streams(
|
||||
*, pdf: Pdf, container: Object, shorthand=None
|
||||
) -> Iterator[VectorMarker | TextMarker | ImageInfo]:
|
||||
"""Find all individual instances of images drawn in the container
|
||||
"""Find all individual instances of images drawn in the container.
|
||||
|
||||
Usually the container is a page, but it may also be a Form XObject.
|
||||
|
||||
@ -563,7 +558,6 @@ def _process_content_streams(
|
||||
downsampling.
|
||||
|
||||
"""
|
||||
|
||||
if container.get('/Type') == '/Page' and '/Contents' in container:
|
||||
initial_shorthand = shorthand or UNIT_SQUARE
|
||||
elif container.get('/Type') == '/XObject' and container['/Subtype'] == '/Form':
|
||||
@ -595,8 +589,7 @@ def _process_content_streams(
|
||||
|
||||
|
||||
def _page_has_text(text_blocks: Iterable[FloatRect], page_width, page_height) -> bool:
|
||||
"""Smarter text detection that ignores text in margins"""
|
||||
|
||||
"""Smarter text detection that ignores text in margins."""
|
||||
pw, ph = float(page_width), float(page_height) # pylint: disable=invalid-name
|
||||
|
||||
margin_ratio = 0.125
|
||||
@ -608,10 +601,9 @@ def _page_has_text(text_blocks: Iterable[FloatRect], page_width, page_height) ->
|
||||
)
|
||||
|
||||
def rects_intersect(a: FloatRect, b: FloatRect) -> bool:
|
||||
"""
|
||||
Where (a,b) are 4-tuple rects (left-0, top-1, right-2, bottom-3)
|
||||
"""Where (a,b) are 4-tuple rects (left-0, top-1, right-2, bottom-3)
|
||||
https://stackoverflow.com/questions/306316/determine-if-two-rectangles-overlap-each-other
|
||||
Formula assumes all boxes are in first quadrant
|
||||
Formula assumes all boxes are in first quadrant.
|
||||
"""
|
||||
return a[0] < b[2] and a[2] > b[0] and a[1] > b[3] and a[3] < b[1]
|
||||
|
||||
@ -624,7 +616,7 @@ def _page_has_text(text_blocks: Iterable[FloatRect], page_width, page_height) ->
|
||||
|
||||
|
||||
def simplify_textboxes(miner, textbox_getter) -> Iterator[TextboxInfo]:
|
||||
"""Extract only limited content from text boxes
|
||||
"""Extract only limited content from text boxes.
|
||||
|
||||
We do this to save memory and ensure that our objects are pickleable.
|
||||
"""
|
||||
@ -910,7 +902,7 @@ DEFAULT_EXECUTOR = SerialExecutor()
|
||||
|
||||
|
||||
class PdfInfo:
|
||||
"""Get summary information about a PDF"""
|
||||
"""Get summary information about a PDF."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -63,7 +63,7 @@ def pdftype3font__pscript5_get_ascent(self):
|
||||
|
||||
|
||||
class LTStateAwareChar(LTChar):
|
||||
"""A subclass of LTChar that tracks text render mode at time of drawing"""
|
||||
"""A subclass of LTChar that tracks text render mode at time of drawing."""
|
||||
|
||||
__slots__ = (
|
||||
'rendermode',
|
||||
@ -111,7 +111,7 @@ class LTStateAwareChar(LTChar):
|
||||
self.rendermode = textstate.render
|
||||
|
||||
def is_compatible(self, obj):
|
||||
"""Check if characters can be combined into a textline
|
||||
"""Check if characters can be combined into a textline.
|
||||
|
||||
We consider characters compatible if:
|
||||
- the Unicode mapping is known, and both have the same render mode
|
||||
@ -146,7 +146,7 @@ class LTStateAwareChar(LTChar):
|
||||
|
||||
|
||||
class TextPositionTracker(PDFLayoutAnalyzer):
|
||||
"""A page layout analyzer that pays attention to text visibility"""
|
||||
"""A page layout analyzer that pays attention to text visibility."""
|
||||
|
||||
def __init__(self, rsrcmgr, pageno=1, laparams=None):
|
||||
super().__init__(rsrcmgr, pageno, laparams)
|
||||
|
@ -99,6 +99,8 @@ def check_options(options: Namespace) -> None:
|
||||
ocrmypdf.exceptions.ExitCodeException: If options are not acceptable
|
||||
and the application should terminate gracefully with an informative
|
||||
message and error code.
|
||||
|
||||
|
||||
Note:
|
||||
This hook will be called from the main process, and may modify global state
|
||||
before child worker processes are forked.
|
||||
@ -127,6 +129,8 @@ def get_executor(progressbar_class) -> Executor:
|
||||
Note:
|
||||
This hook will be called from the main process, and may modify global state
|
||||
before child worker processes are forked.
|
||||
|
||||
|
||||
Note:
|
||||
This is a :ref:`firstresult hook<firstresult>`.
|
||||
"""
|
||||
@ -159,7 +163,6 @@ def get_progressbar_class():
|
||||
Here is how OCRmyPDF will use the progress bar:
|
||||
|
||||
Example:
|
||||
|
||||
pbar_class = pm.hook.get_progressbar_class()
|
||||
with pbar_class(**tqdm_kwargs) as pbar:
|
||||
...
|
||||
@ -181,6 +184,8 @@ def validate(pdfinfo: PdfInfo, options: Namespace) -> None:
|
||||
ocrmypdf.exceptions.ExitCodeException: If options or pdfinfo are not acceptable
|
||||
and the application should terminate gracefully with an informative
|
||||
message and error code.
|
||||
|
||||
|
||||
Note:
|
||||
This hook will be called from the main process, and may modify global state
|
||||
before child worker processes are forked.
|
||||
@ -218,6 +223,8 @@ def rasterize_pdf_page(
|
||||
Note:
|
||||
This hook will be called from child processes. Modifying global state
|
||||
will not affect the main process or other child processes.
|
||||
|
||||
|
||||
Note:
|
||||
This is a :ref:`firstresult hook<firstresult>`.
|
||||
"""
|
||||
@ -245,6 +252,8 @@ def filter_ocr_image(page: PageContext, image: Image.Image) -> Image.Image:
|
||||
Note:
|
||||
This hook will be called from child processes. Modifying global state
|
||||
will not affect the main process or other child processes.
|
||||
|
||||
|
||||
Note:
|
||||
This is a :ref:`firstresult hook<firstresult>`.
|
||||
"""
|
||||
@ -281,6 +290,8 @@ def filter_page_image(page: PageContext, image_filename: Path) -> Path:
|
||||
Note:
|
||||
This hook will be called from child processes. Modifying global state
|
||||
will not affect the main process or other child processes.
|
||||
|
||||
|
||||
Note:
|
||||
This is a :ref:`firstresult hook<firstresult>`.
|
||||
"""
|
||||
@ -323,6 +334,8 @@ def filter_pdf_page(page: PageContext, image_filename: Path, output_pdf: Path) -
|
||||
Note:
|
||||
This hook will be called from child processes. Modifying global state
|
||||
will not affect the main process or other child processes.
|
||||
|
||||
|
||||
Note:
|
||||
This is a :ref:`firstresult hook<firstresult>`.
|
||||
"""
|
||||
@ -381,7 +394,8 @@ class OcrEngine(ABC):
|
||||
"""Returns the set of all languages that are supported by the engine.
|
||||
|
||||
Languages are typically given in 3-letter ISO 3166-1 codes, but actually
|
||||
can be any value understood by the OCR engine."""
|
||||
can be any value understood by the OCR engine.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
@ -474,7 +488,7 @@ def generate_pdfa(
|
||||
Note:
|
||||
This is a :ref:`firstresult hook<firstresult>`.
|
||||
|
||||
See also:
|
||||
See Also:
|
||||
https://github.com/tqdm/tqdm
|
||||
"""
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
"""Utilities to measure OCR quality"""
|
||||
"""Utilities to measure OCR quality."""
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MPL-2.0
|
||||
"""Wrappers to manage subprocess calls"""
|
||||
"""Wrappers to manage subprocess calls."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@ -34,7 +34,7 @@ def run(
|
||||
check: bool = False,
|
||||
**kwargs,
|
||||
) -> CompletedProcess:
|
||||
"""Wrapper around :py:func:`subprocess.run`
|
||||
"""Wrapper around :py:func:`subprocess.run`.
|
||||
|
||||
The main purpose of this wrapper is to log subprocess output in an orderly
|
||||
fashion that identifies the responsible subprocess. An additional
|
||||
@ -141,7 +141,7 @@ def get_version(
|
||||
regex=r'(\d+(\.\d+)*)',
|
||||
env: OsEnviron | None = None,
|
||||
) -> str:
|
||||
"""Get the version of the specified program
|
||||
"""Get the version of the specified program.
|
||||
|
||||
Arguments:
|
||||
program: The program to version check.
|
||||
@ -323,7 +323,6 @@ def check_external_program(
|
||||
version_parser: A class that should be used to parse and compare version
|
||||
numbers. Used when version numbers do not follow standard conventions.
|
||||
"""
|
||||
|
||||
try:
|
||||
found_version = version_checker()
|
||||
except (CalledProcessError, FileNotFoundError) as e:
|
||||
|
@ -169,8 +169,7 @@ SHIMS = [
|
||||
|
||||
|
||||
def fix_windows_args(program: str, args, env):
|
||||
"""Adjust our desired program and command line arguments for use on Windows"""
|
||||
|
||||
"""Adjust our desired program and command line arguments for use on Windows."""
|
||||
# If we are running a .py on Windows, ensure we call it with this Python
|
||||
# (to support test suite shims)
|
||||
if program.lower().endswith('.py'):
|
||||
|
@ -3,12 +3,10 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from subprocess import PIPE, CompletedProcess, run
|
||||
from typing import List
|
||||
from subprocess import CompletedProcess, run
|
||||
|
||||
import pytest
|
||||
|
||||
@ -71,10 +69,13 @@ def outtxt(tmp_path) -> Path:
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def no_outpdf(tmp_path) -> Path:
|
||||
"""This just documents the fact that a test is not expected to produce
|
||||
"""Document fact that a test is not expected to produce output.
|
||||
|
||||
This just documents the fact that a test is not expected to produce
|
||||
output. Unfortunately an assertion failure inside a test fixture produces
|
||||
an error rather than a test failure, so no testing is done. It's up to
|
||||
the test to confirm that no output file was created."""
|
||||
the test to confirm that no output file was created.
|
||||
"""
|
||||
return tmp_path / 'no_output.pdf'
|
||||
|
||||
|
||||
@ -110,7 +111,6 @@ def run_ocrmypdf_api(input_file: Path, output_file: Path, *args) -> ExitCode:
|
||||
The return code must always be checked or the test may declare a failure
|
||||
to be pass.
|
||||
"""
|
||||
|
||||
api_args = [str(input_file), str(output_file)] + [
|
||||
str(arg) for arg in args if arg is not None
|
||||
]
|
||||
@ -128,7 +128,6 @@ def run_ocrmypdf(
|
||||
If an exception is thrown this fact will be returned as part of the result
|
||||
text and return code rather than exception objects.
|
||||
"""
|
||||
|
||||
p_args = (
|
||||
[sys.executable, '-m', 'ocrmypdf']
|
||||
+ [str(arg) for arg in args if arg is not None]
|
||||
|
@ -1,7 +1,7 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""Tesseract bad utf8
|
||||
"""Tesseract bad utf8.
|
||||
|
||||
In some cases, some versions of Tesseract can output binary gibberish or data
|
||||
that is not UTF-8 compatible, so we are forced to check that we can convert it
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MIT
|
||||
"""Cache output of tesseract to speed up test suite
|
||||
"""Cache output of tesseract to speed up test suite.
|
||||
|
||||
The cache is keyed by by the input test file The input arguments are slugged
|
||||
into a hideous filename that more or less represents them literally. Joined
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MIT
|
||||
"""Tesseract no-op/fixed rotate plugin
|
||||
"""Tesseract no-op/fixed rotate plugin.
|
||||
|
||||
To quickly run tests where getting OCR output is not necessary and we want to test
|
||||
the rotation pipeline.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SPDX-FileCopyrightText: 2022 James R. Barlow
|
||||
# SPDX-License-Identifier: MIT
|
||||
"""Tesseract no-op plugin
|
||||
"""Tesseract no-op plugin.
|
||||
|
||||
To quickly run tests where getting OCR output is not necessary.
|
||||
|
||||
|
@ -3,8 +3,6 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from ocrmypdf.helpers import check_pdf
|
||||
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from subprocess import PIPE, run
|
||||
from subprocess import run
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -6,7 +6,6 @@ from __future__ import annotations
|
||||
from unittest.mock import patch
|
||||
|
||||
import pikepdf
|
||||
import pytest
|
||||
|
||||
import ocrmypdf
|
||||
|
||||
|
@ -5,8 +5,6 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
|
||||
from ocrmypdf._sync import configure_debug_logging
|
||||
|
||||
|
||||
|
@ -446,7 +446,7 @@ def test_linearized_pdf_and_indirect_object(resources, outpdf):
|
||||
|
||||
|
||||
def test_very_high_dpi(resources, outpdf):
|
||||
"Checks for a Decimal quantize error with high DPI, etc"
|
||||
"Checks for a Decimal quantize error with high DPI, etc."
|
||||
check_ocrmypdf(
|
||||
resources / '2400dpi.pdf',
|
||||
outpdf,
|
||||
|
@ -231,17 +231,6 @@ def test_xml_metadata_preserved(
|
||||
'pdf:keywords',
|
||||
]
|
||||
acquired_properties = ['dc:format']
|
||||
might_change_properties = [
|
||||
'dc:date',
|
||||
'pdf:pdfversion',
|
||||
'pdf:Producer',
|
||||
'xmp:CreateDate',
|
||||
'xmp:ModifyDate',
|
||||
'xmp:MetadataDate',
|
||||
'xmp:CreatorTool',
|
||||
'xmpMM:DocumentId',
|
||||
'xmpMM:DnstanceId',
|
||||
]
|
||||
|
||||
# Cleanup messy data structure
|
||||
# Top level is key-value mapping of namespaces to keys under namespace,
|
||||
|
@ -3,8 +3,6 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from ocrmypdf import quality as qual
|
||||
|
||||
|
||||
|
@ -11,12 +11,13 @@ from os import fspath
|
||||
import img2pdf
|
||||
import pikepdf
|
||||
import pytest
|
||||
from PIL import Image, ImageChops
|
||||
from reportlab.pdfgen.canvas import Canvas
|
||||
|
||||
from ocrmypdf._exec import ghostscript
|
||||
from ocrmypdf._plugin_manager import get_plugin_manager
|
||||
from ocrmypdf.helpers import IMG2PDF_KWARGS, Resolution
|
||||
from ocrmypdf.pdfinfo import PdfInfo
|
||||
from PIL import Image, ImageChops
|
||||
from reportlab.pdfgen.canvas import Canvas
|
||||
|
||||
from .conftest import check_ocrmypdf, run_ocrmypdf
|
||||
|
||||
@ -152,13 +153,14 @@ def test_autorotate_threshold(threshold, op, comparison_threshold, resources, ou
|
||||
|
||||
|
||||
def test_rotated_skew_timeout(resources, outpdf):
|
||||
"""This document contains an image that is rotated 90 into place with a
|
||||
"""Check rotated skew timeout.
|
||||
|
||||
This document contains an image that is rotated 90 into place with a
|
||||
/Rotate tag and intentionally skewed by altering the transformation matrix.
|
||||
|
||||
This tests for a bug where the combination of preprocessing and a tesseract
|
||||
timeout produced a page whose dimensions did not match the original's.
|
||||
"""
|
||||
|
||||
input_file = resources / 'rotated_skew.pdf'
|
||||
in_pageinfo = PdfInfo(input_file)[0]
|
||||
|
||||
|
@ -4,13 +4,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from subprocess import DEVNULL, PIPE, Popen, run
|
||||
from subprocess import DEVNULL, PIPE, run
|
||||
|
||||
import pytest
|
||||
|
||||
from ocrmypdf.exceptions import ExitCode
|
||||
from ocrmypdf.helpers import check_pdf
|
||||
|
||||
from .conftest import run_ocrmypdf
|
||||
|
@ -8,14 +8,13 @@ from os import fspath
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from PIL import Image
|
||||
|
||||
from ocrmypdf._exec import unpaper
|
||||
from ocrmypdf._plugin_manager import get_parser_options_plugins
|
||||
from ocrmypdf._validation import check_options
|
||||
from ocrmypdf.exceptions import ExitCode, MissingDependencyError
|
||||
|
||||
from .conftest import check_ocrmypdf, have_unpaper, ocrmypdf_exec, run_ocrmypdf
|
||||
from .conftest import check_ocrmypdf, have_unpaper, run_ocrmypdf
|
||||
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user