Accept most of ruff's delinting

2025-06-26 23:49:59 +00:00 · 2023-04-14 00:38:34 -07:00 · 2023-04-14 00:38:34 -07:00 · 9b8d14d16e
commit 9b8d14d16e
parent b7eb93eb79
47 changed files with 121 additions and 172 deletions
--- a/misc/batch.py
+++ b/misc/batch.py
@ -6,7 +6,6 @@ from __future__ import annotations

 # This script must be edited to meet your needs.
 import logging
-import os
 import sys
 from pathlib import Path

--- a/misc/example_plugin.py
+++ b/misc/example_plugin.py
@ -1,8 +1,7 @@
 # SPDX-FileCopyrightText: 2022 James R Barlow: https://github.com/jbarlow83
 # SPDX-License-Identifier: MIT

-"""
-An example of an OCRmyPDF plugin.
+"""An example of an OCRmyPDF plugin.

 This plugin adds two new command line arguments
    --grayscale-ocr: converts the image to grayscale before performing OCR on it
--- a/misc/synology.py
+++ b/misc/synology.py
@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: 2017 Enantiomerie
 # SPDX-License-Identifier: MIT

-"""Example OCRmyPDF for Synology NAS"""
+"""Example OCRmyPDF for Synology NAS."""

 from __future__ import annotations

--- a/misc/webservice.py
+++ b/misc/webservice.py
@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: 2019 James R. Barlow
 # SPDX-License-Identifier: AGPL-3.0-or-later

-"""This is a simple web service/HTTP wrapper for OCRmyPDF
+"""This is a simple web service/HTTP wrapper for OCRmyPDF.

 This may be more convenient than the command line tool for some Docker users.
 Note that OCRmyPDF uses Ghostscript, which is licensed under AGPLv3+. While
@ -15,7 +15,7 @@ from __future__ import annotations

 import os
 import shlex
-from subprocess import PIPE, run
+from subprocess import run
 from tempfile import TemporaryDirectory

 from flask import Flask, Response, request, send_from_directory
--- a/pyproject.toml
+++ b/pyproject.toml
@ -184,14 +184,19 @@ module = [
 ]
 ignore_missing_imports = true

-[tool.pylint.basic]
-good-names = ["i", "j", "k", "ex", "Run", "_", "e", "p", "im", "w", "h", "m", "x", "y", "a", "b", "fp", "n", "f", "s", "v", "q", "dx", "dy"]
-logging-format-style = "old"
-disable = ["raw-checker-failed", "bad-inline-option", "locally-disabled", "file-ignored", "suppressed-message", "useless-suppression", "deprecated-pragma", "use-symbolic-message-instead", "logging-fstring-interpolation", "missing-function-docstring", "too-few-public-methods"]
-
 [tool.ruff]
-src = ["src"]
-select = ["E"]
+select = [
+  "D", # pydocstyle
+  "E", # pycodestyle
+  "W", # pycodestyle
+  "F", # pyflakes
+  "I001", # isort
+  "UP", # pyupgrade
+]
+target-version = "py38"
+
+[tool.ruff.isort]
+known-first-party = ["ocrmypdf"]

 [tool.ruff.pydocstyle]
 convention = "google"
--- a/src/ocrmypdf/main.py
+++ b/src/ocrmypdf/main.py
@ -11,7 +11,6 @@ import os
 import signal
 import sys
 from contextlib import suppress
-from multiprocessing import set_start_method

 from ocrmypdf import __version__
 from ocrmypdf._plugin_manager import get_parser_options_plugins
--- a/src/ocrmypdf/_concurrent.py
+++ b/src/ocrmypdf/_concurrent.py
@ -51,8 +51,7 @@ class Executor(ABC):
        task_arguments: Iterable | None = None,
        task_finished: Callable | None = None,
    ) -> None:
-        """
-        Set up parallel execution and progress reporting.
+        """Set up parallel execution and progress reporting.

        Args:
            use_threads: If ``False``, the workload is the sort that will benefit from
@ -73,7 +72,6 @@ class Executor(ABC):
                task. This runs in the parent's context, but the parameters must be
                marshallable to the worker.
        """
-
        if not task_arguments:
            return  # Nothing to do!
        if not worker_initializer:
--- a/src/ocrmypdf/_exec/init.py
+++ b/src/ocrmypdf/_exec/init.py
@ -1,6 +1,6 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0

-"""Manage third party executables"""
+"""Manage third party executables."""

 from __future__ import annotations
--- a/src/ocrmypdf/_exec/ghostscript.py
+++ b/src/ocrmypdf/_exec/ghostscript.py
@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0

-"""Interface to Ghostscript executable"""
+"""Interface to Ghostscript executable."""

 from __future__ import annotations

--- a/src/ocrmypdf/_exec/jbig2enc.py
+++ b/src/ocrmypdf/_exec/jbig2enc.py
@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0

-"""Interface to jbig2 executable"""
+"""Interface to jbig2 executable."""

 from __future__ import annotations

--- a/src/ocrmypdf/_exec/pngquant.py
+++ b/src/ocrmypdf/_exec/pngquant.py
@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0

-"""Interface to pngquant executable"""
+"""Interface to pngquant executable."""

 from __future__ import annotations

--- a/src/ocrmypdf/_exec/tesseract.py
+++ b/src/ocrmypdf/_exec/tesseract.py
@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0

-"""Interface to Tesseract executable"""
+"""Interface to Tesseract executable."""

 from __future__ import annotations

@ -54,7 +54,7 @@ TESSERACT_THRESHOLDING_METHODS: dict[str, int] = {


 class TesseractLoggerAdapter(logging.LoggerAdapter):
-    "Prepend [tesseract] to messages emitted from tesseract"
+    "Prepend [tesseract] to messages emitted from tesseract."

    def process(self, msg, kwargs):
        kwargs['extra'] = self.extra
@ -283,7 +283,8 @@ def page_timedout(timeout: float) -> None:

 def _generate_null_hocr(output_hocr: Path, output_text: Path, image: Path) -> None:
    """Produce a .hocr file that reports no text detected on a page that is
-    the same size as the input image."""
+    the same size as the input image.
+    """
    with Image.open(image) as im:
        w, h = im.size

--- a/src/ocrmypdf/_exec/unpaper.py
+++ b/src/ocrmypdf/_exec/unpaper.py
@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0

-"""Interface to unpaper executable"""
+"""Interface to unpaper executable."""

 from __future__ import annotations

--- a/src/ocrmypdf/_graft.py
+++ b/src/ocrmypdf/_graft.py
@ -37,7 +37,6 @@ def _update_resources(*, obj, font, font_key, procset):

    obj can be a page or Form XObject.
    """
-
    resources = _ensure_dictionary(obj, Name.Resources)
    fonts = _ensure_dictionary(resources, Name.Font)
    if font_key is not None and font_key not in fonts:
@ -167,7 +166,6 @@ class OcrGrafter:
        the font to page 1 even if page 1 doesn't use it, so we have a way to get it
        back.
        """
-
        page0 = self.pdf_base.pages[0]
        _update_resources(
            obj=page0, font=self.font, font_key=self.font_key, procset=self.procset
@ -200,8 +198,7 @@ class OcrGrafter:
        return self.output_file

    def _find_font(self, text):
-        """Copy a font from the filename text into pdf_base"""
-
+        """Copy a font from the filename text into pdf_base."""
        font, font_key = None, None
        possible_font_names = ('/f-0-0', '/F1')
        try:
@ -234,8 +231,7 @@ class OcrGrafter:
        text_rotation: int,
        strip_old_text: bool,
    ):
-        """Insert the text layer from text page 0 on to pdf_base at page_num"""
-
+        """Insert the text layer from text page 0 on to pdf_base at page_num."""
        # pylint: disable=invalid-name

        log.debug("Grafting")
--- a/src/ocrmypdf/_logging.py
+++ b/src/ocrmypdf/_logging.py
@ -24,7 +24,7 @@ class PageNumberFilter(logging.Filter):


 class TqdmConsole:
-    """Wrapper to log messages in a way that is compatible with tqdm progress bar
+    """Wrapper to log messages in a way that is compatible with tqdm progress bar.

    This routes log messages through tqdm so that it can print them above the
    progress bar, and then refresh the progress bar, rather than overwriting
--- a/src/ocrmypdf/_pipeline.py
+++ b/src/ocrmypdf/_pipeline.py
@ -121,7 +121,6 @@ def _pdf_guess_version(input_file: Path, search_window=1024) -> str:

    Returns empty string if not found, indicating file is probably not PDF.
    """
-
    with open(input_file, 'rb') as f:
        signature = f.read(search_window)
    m = re.search(br'%PDF-(\d\.\d)', signature)
@ -222,7 +221,7 @@ def _vector_page_dpi(pageinfo: PageInfo) -> int:


 def get_page_dpi(pageinfo: PageInfo, options) -> Resolution:
-    "Get the DPI when nonsquare DPI is tolerable"
+    "Get the DPI when nonsquare DPI is tolerable."
    xres = max(
        pageinfo.dpi.x or VECTOR_PAGE_DPI,
        options.oversample or 0.0,
@ -237,7 +236,7 @@ def get_page_dpi(pageinfo: PageInfo, options) -> Resolution:


 def get_page_square_dpi(pageinfo: PageInfo, options) -> Resolution:
-    "Get the DPI when we require xres == yres, scaled to physical units"
+    "Get the DPI when we require xres == yres, scaled to physical units."
    xres = pageinfo.dpi.x or 0.0
    yres = pageinfo.dpi.y or 0.0
    userunit = float(pageinfo.userunit) or 1.0
@ -253,7 +252,7 @@ def get_page_square_dpi(pageinfo: PageInfo, options) -> Resolution:


 def get_canvas_square_dpi(pageinfo: PageInfo, options) -> Resolution:
-    """Get the DPI when we require xres == yres, in Postscript units"""
+    """Get the DPI when we require xres == yres, in Postscript units."""
    units = float(
        max(
            (pageinfo.dpi.x) or VECTOR_PAGE_DPI,
@ -358,9 +357,7 @@ def rasterize_preview(input_file: Path, page_context: PageContext) -> Path:


 def describe_rotation(page_context: PageContext, orient_conf, correction: int) -> str:
-    """
-    Describe the page rotation we are going to perform.
-    """
+    """Describe the page rotation we are going to perform."""
    direction = {0: '⇧', 90: '⇨', 180: '⇩', 270: '⇦'}
    turns = {0: ' ', 90: '⬏', 180: '↻', 270: '⬑'}

@ -401,7 +398,6 @@ def get_orientation_correction(preview: Path, page_context: PageContext) -> int:
    which points it (hopefully) upright. _graft.py takes care of the orienting
    the image and text layers.
    """
-
    orient_conf = page_context.plugin_manager.hook.get_ocr_engine().get_orientation(
        preview, page_context.options
    )
@ -514,10 +510,11 @@ def preprocess_clean(input_file: Path, page_context: PageContext) -> Path:


 def create_ocr_image(image: Path, page_context: PageContext) -> Path:
-    """Create the image we send for OCR. May not be the same as the display
-    image depending on preprocessing. This image will never be shown to the
-    user."""
+    """Create the image we send for OCR.

+    Might not be the same as the display image depending on preprocessing.
+    This image will never be shown to the user.
+    """
    output_file = page_context.get_path('ocr.png')
    options = page_context.options
    with Image.open(image) as im:
--- a/src/ocrmypdf/_sync.py
+++ b/src/ocrmypdf/_sync.py
@ -251,8 +251,7 @@ def worker_init(max_pixels: int) -> None:


 def exec_concurrent(context: PdfContext, executor: Executor) -> Sequence[str]:
-    """Execute the pipeline concurrently"""
-
+    """Execute the pipeline concurrently."""
    # Run exec_page_sync on every page context
    options = context.options
    max_workers = min(len(context.pdfinfo), options.jobs)
@ -316,8 +315,7 @@ def exec_concurrent(context: PdfContext, executor: Executor) -> Sequence[str]:
 def configure_debug_logging(
    log_filename: Path, prefix: str = ''
 ) -> logging.FileHandler:
-    """
-    Create a debug log file at a specified location.
+    """Create a debug log file at a specified location.

    Arguments:
        log_filename: Where to the put the log file.
--- a/src/ocrmypdf/api.py
+++ b/src/ocrmypdf/api.py
@ -86,7 +86,6 @@ def configure_logging(
    Returns:
        The toplevel logger for ocrmypdf (or the root logger, if we are managing it).
    """
-
    prefix = '' if manage_root_logger else 'ocrmypdf'

    log = logging.getLogger(prefix)
@ -277,6 +276,8 @@ def ocr(  # pylint: disable=unused-argument
            When a stream is used as output, whether via a writable object or
            ``"-"``, some final validation steps are not performed (we do not read
            back the stream after it is written).
+
+
    Raises:
        ocrmypdf.MissingDependencyError: If a required dependency program is missing or
            was not found on PATH.
--- a/src/ocrmypdf/builtin_plugins/concurrency.py
+++ b/src/ocrmypdf/builtin_plugins/concurrency.py
@ -30,7 +30,7 @@ WorkerInit = Callable[[Queue, UserInit, int], None]


 def log_listener(q: Queue):
-    """Listen to the worker processes and forward the messages to logging
+    """Listen to the worker processes and forward the messages to logging.

    For simplicity this is a thread rather than a process. Only one process
    should actually write to sys.stderr or whatever we're using, so if this is
@ -39,7 +39,6 @@ def log_listener(q: Queue):
    See:
    https://docs.python.org/3/howto/logging-cookbook.html#logging-to-a-single-file-from-multiple-processes
    """
-
    while True:
        try:
            record = q.get()
@ -59,8 +58,7 @@ def process_sigbus(*args):


 def process_init(q: Queue, user_init: UserInit, loglevel) -> None:
-    """Initialize a process pool worker"""
-
+    """Initialize a process pool worker."""
    # Ignore SIGINT (our parent process will kill us gracefully)
    signal.signal(signal.SIGINT, signal.SIG_IGN)

--- a/src/ocrmypdf/builtin_plugins/ghostscript.py
+++ b/src/ocrmypdf/builtin_plugins/ghostscript.py
@ -8,7 +8,6 @@ import logging

 from ocrmypdf import hookimpl
 from ocrmypdf._exec import ghostscript
-from ocrmypdf._validation import HOCR_OK_LANGS
 from ocrmypdf.exceptions import MissingDependencyError
 from ocrmypdf.subprocess import check_external_program

--- a/src/ocrmypdf/cli.py
+++ b/src/ocrmypdf/cli.py
@ -15,7 +15,7 @@ T = TypeVar('T', int, float)


 def numeric(basetype: Callable[[Any], T], min_: T | None = None, max_: T | None = None):
-    """Validator for numeric params"""
+    """Validator for numeric params."""
    min_ = basetype(min_) if min_ is not None else None
    max_ = basetype(max_) if max_ is not None else None

@ -46,7 +46,7 @@ def str_to_int(mapping: Mapping[str, int]):


 class ArgumentParser(argparse.ArgumentParser):
-    """Override parser's default behavior of calling sys.exit()
+    """Override parser's default behavior of calling sys.exit().

    https://stackoverflow.com/questions/5943249/python-argparse-and-controlling-overriding-the-exit-status-code

--- a/src/ocrmypdf/extra_plugins/semfree.py
+++ b/src/ocrmypdf/extra_plugins/semfree.py
@ -73,8 +73,7 @@ class ConnectionLogHandler(logging.handlers.QueueHandler):
 def process_loop(
    conn: Connection, user_init: Callable[[], None], loglevel, task, task_args
 ):
-    """Initialize a process pool worker"""
-
+    """Initialize a process pool worker."""
    # Install SIGBUS handler (so our parent process can abort somewhat gracefully)
    with suppress(AttributeError):  # Windows and Cygwin do not have SIGBUS
        # Windows and Cygwin do not have pthread_sigmask or SIGBUS
--- a/src/ocrmypdf/helpers.py
+++ b/src/ocrmypdf/helpers.py
@ -108,7 +108,7 @@ class Resolution(Generic[T]):


 class NeverRaise(Exception):
-    """An exception that is never raised"""
+    """An exception that is never raised."""


 def safe_symlink(input_file: os.PathLike, soft_link_name: os.PathLike):
@ -170,7 +170,7 @@ def monotonic(seq: Sequence) -> bool:


 def page_number(input_file: os.PathLike) -> int:
-    """Get one-based page number implied by filename (000002.pdf -> 2)"""
+    """Get one-based page number implied by filename (000002.pdf -> 2)."""
    return int(os.path.basename(os.fspath(input_file))[0:6])


--- a/src/ocrmypdf/hocrtransform.py
+++ b/src/ocrmypdf/hocrtransform.py
@ -99,11 +99,9 @@ class HocrTransformError(Exception):


 class HocrTransform:
-
-    """
-    A class for converting documents from the hOCR format.
+    """A class for converting documents from the hOCR format.
    For details of the hOCR format, see:
-    http://kba.cloud/hocr-spec/
+    http://kba.cloud/hocr-spec/.
    """

    box_pattern = re.compile(r'bbox((\s+\d+){4})')
@ -143,9 +141,7 @@ class HocrTransform:
            raise HocrTransformError("hocr file is missing page dimensions")

    def __str__(self):  # pragma: no cover
-        """
-        Return the textual content of the HTML body
-        """
+        """Return the textual content of the HTML body."""
        if self.hocr is None:
            return ''
        body = self.hocr.find(self._child_xpath('body'))
@ -155,9 +151,7 @@ class HocrTransform:
            return ''

    def _get_element_text(self, element: Element):
-        """
-        Return the textual content of the element and its children
-        """
+        """Return the textual content of the element and its children."""
        text = ''
        if element.text is not None:
            text += element.text
@ -169,10 +163,7 @@ class HocrTransform:

    @classmethod
    def element_coordinates(cls, element: Element) -> Rect:
-        """
-        Returns a tuple containing the coordinates of the bounding box around
-        an element
-        """
+        """Get coordinates of the bounding box around an element."""
        out = Rect._make(0 for _ in range(4))
        if 'title' in element.attrib:
            matches = cls.box_pattern.search(element.attrib['title'])
@ -183,9 +174,7 @@ class HocrTransform:

    @classmethod
    def baseline(cls, element: Element) -> tuple[float, float]:
-        """
-        Returns a tuple containing the baseline slope and intercept.
-        """
+        """Get baseline's slope and intercept."""
        if 'title' in element.attrib:
            matches = cls.baseline_pattern.search(element.attrib['title'])
            if matches:
@ -193,9 +182,7 @@ class HocrTransform:
        return (0.0, 0.0)

    def pt_from_pixel(self, pxl) -> Rect:
-        """
-        Returns the quantity in PDF units (pt) given quantity in pixels
-        """
+        """Returns the quantity in PDF units (pt) given quantity in pixels."""
        return Rect._make((c / self.dpi * inch) for c in pxl)

    def _child_xpath(self, html_tag: str, html_class: str | None = None) -> str:
@ -206,11 +193,7 @@ class HocrTransform:

    @classmethod
    def replace_unsupported_chars(cls, s: str) -> str:
-        """
-        Given an input string, returns the corresponding string that:
-        * is available in the Helvetica facetype
-        * does not contain any ligature (to allow easy search in the PDF file)
-        """
+        """Replaces characters with those available in the Helvetica typeface."""
        return s.translate(cls.ligatures)

    def topdown_position(self, element):
@ -231,8 +214,8 @@ class HocrTransform:
        invisible_text: bool = False,
        interword_spaces: bool = False,
    ) -> None:
-        """
-        Creates a PDF file with an image superimposed on top of the text.
+        """Creates a PDF file with an image superimposed on top of the text.
+
        Text is positioned according to the bounding box of the lines in
        the hOCR file.
        The image need not be identical to the image used to create the hOCR
--- a/src/ocrmypdf/optimize.py
+++ b/src/ocrmypdf/optimize.py
@ -230,7 +230,7 @@ def extract_images(
    options,
    extract_fn: Callable[..., XrefExt | None],
 ) -> Iterator[tuple[int, XrefExt]]:
-    """Extract image using extract_fn
+    """Extract image using extract_fn.

    Enumerate images on each page, lookup their xref/ID number in the PDF.
    Exclude images that are soft masks (i.e. alpha transparency related).
@ -244,7 +244,6 @@ def extract_images(
    it does a tuple should be returned: (xref, ext) where .ext is the file
    extension. extract_fn must also extract the file it finds interesting.
    """
-
    include_xrefs: MutableSet[Xref] = set()
    exclude_xrefs: MutableSet[Xref] = set()
    pageno_for_xref = {}
@ -289,8 +288,7 @@ def extract_images(
 def extract_images_generic(
    pike: Pdf, root: Path, options
 ) -> tuple[list[Xref], list[Xref]]:
-    """Extract any >=2bpp image we think we can improve"""
-
+    """Extract any >=2bpp image we think we can improve."""
    jpegs = []
    pngs = []
    for _, xref_ext in extract_images(pike, root, options, extract_image_generic):
@ -304,8 +302,7 @@ def extract_images_generic(


 def extract_images_jbig2(pike: Pdf, root: Path, options) -> dict[int, list[XrefExt]]:
-    """Extract any bitonal image that we think we can improve as JBIG2"""
-
+    """Extract any bitonal image that we think we can improve as JBIG2."""
    jbig2_groups = defaultdict(list)
    for pageno, xref_ext in extract_images(pike, root, options, extract_image_jbig2):
        group = pageno // options.jbig2_page_group_size
@ -318,7 +315,7 @@ def extract_images_jbig2(pike: Pdf, root: Path, options) -> dict[int, list[XrefE
 def _produce_jbig2_images(
    jbig2_groups: dict[int, list[XrefExt]], root: Path, options, executor: Executor
 ) -> None:
-    """Produce JBIG2 images from their groups"""
+    """Produce JBIG2 images from their groups."""

    def jbig2_group_args(root: Path, groups: dict[int, list[XrefExt]]):
        for group, xref_exts in groups.items():
@ -674,7 +671,7 @@ def main(infile, outfile, level, jobs=1):
    from tempfile import TemporaryDirectory  # pylint: disable=import-outside-toplevel

    class OptimizeOptions:
-        """Emulate ocrmypdf's options"""
+        """Emulate ocrmypdf's options."""

        def __init__(
            self, input_file, jobs, optimize_, jpeg_quality, png_quality, jb2lossy
--- a/src/ocrmypdf/pdfa.py
+++ b/src/ocrmypdf/pdfa.py
@ -1,9 +1,7 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0

-"""
-Utilities for PDF/A production and confirmation with Ghostspcript.
-"""
+"""Utilities for PDF/A production and confirmation with Ghostspcript."""

 from __future__ import annotations

@ -75,7 +73,7 @@ def _make_postscript(icc_name: str, icc_data: bytes, colors: int) -> Iterator[st


 def generate_pdfa_ps(target_filename: Path, icc: str = 'sRGB'):
-    """Create a Postscript PDFMARK file for Ghostscript PDF/A conversion
+    """Create a Postscript PDFMARK file for Ghostscript PDF/A conversion.

    pdfmark is an extension to the Postscript language that describes some PDF
    features like bookmarks and annotations. It was originally specified Adobe
@ -118,7 +116,6 @@ def file_claims_pdfa(filename: Path):
    This only checks if the XMP metadata contains a PDF/A marker. It does not
    do full PDF/A validation.
    """
-
    with pikepdf.open(filename) as pdf:
        pdfmeta = pdf.open_metadata()
        if not pdfmeta.pdfa_status:
--- a/src/ocrmypdf/pdfinfo/info.py
+++ b/src/ocrmypdf/pdfinfo/info.py
@ -164,7 +164,7 @@ class TextMarker:


 def _normalize_stack(graphobjs):
-    """Convert runs of qQ's in the stack into single graphobjs"""
+    """Convert runs of qQ's in the stack into single graphobjs."""
    for operands, operator in graphobjs:
        operator = str(operator)
        if re.match(r'Q*q+$', operator):  # Zero or more Q, one or more q
@ -200,7 +200,6 @@ def _interpret_contents(contentstream: Object, initial_shorthand=UNIT_SQUARE):
    undefined in the spec, but we just pretend nothing happened and leave the
    CTM unchanged.
    """
-
    stack = []
    ctm = PdfMatrix(initial_shorthand)
    xobject_settings: list[XobjectSettings] = []
@ -307,7 +306,6 @@ def _get_dpi(ctm_shorthand, image_size) -> Resolution:
    /MediaBox.

    """
-
    a, b, c, d, _, _ = ctm_shorthand  # pylint: disable=invalid-name

    # Calculate the width and height of the image in PDF units
@ -451,8 +449,7 @@ class ImageInfo:


 def _find_inline_images(contentsinfo: ContentsInfo) -> Iterator[ImageInfo]:
-    "Find inline images in the contentstream"
-
+    "Find inline images in the contentstream."
    for n, inline in enumerate(contentsinfo.inline_images):
        yield ImageInfo(
            name=f'inline-{n:02d}', shorthand=inline.shorthand, inline=inline.iimage
@ -460,7 +457,7 @@ def _find_inline_images(contentsinfo: ContentsInfo) -> Iterator[ImageInfo]:


 def _image_xobjects(container) -> Iterator[tuple[Object, str]]:
-    """Search for all XObject-based images in the container
+    """Search for all XObject-based images in the container.

    Usually the container is a page, but it could also be a Form XObject
    that contains images. Filter out the Form XObjects which are dealt with
@ -471,7 +468,6 @@ def _image_xobjects(container) -> Iterator[tuple[Object, str]]:
    since the object does not know its own name.

    """
-
    if '/Resources' not in container:
        return
    resources = container['/Resources']
@ -488,14 +484,13 @@ def _image_xobjects(container) -> Iterator[tuple[Object, str]]:
 def _find_regular_images(
    container: Object, contentsinfo: ContentsInfo
 ) -> Iterator[ImageInfo]:
-    """Find images stored in the container's /Resources /XObject
+    """Find images stored in the container's /Resources /XObject.

    Usually the container is a page, but it could also be a Form XObject
    that contains images.

    Generates images with their DPI at time of drawing.
    """
-
    for pdfimage, xobj in _image_xobjects(container):
        if xobj not in contentsinfo.name_index:
            continue
@ -512,7 +507,7 @@ def _find_regular_images(


 def _find_form_xobject_images(pdf: Pdf, container: Object, contentsinfo: ContentsInfo):
-    """Find any images that are in Form XObjects in the container
+    """Find any images that are in Form XObjects in the container.

    The container may be a page, or a parent Form XObject.

@ -546,7 +541,7 @@ def _find_form_xobject_images(pdf: Pdf, container: Object, contentsinfo: Content
 def _process_content_streams(
    *, pdf: Pdf, container: Object, shorthand=None
 ) -> Iterator[VectorMarker | TextMarker | ImageInfo]:
-    """Find all individual instances of images drawn in the container
+    """Find all individual instances of images drawn in the container.

    Usually the container is a page, but it may also be a Form XObject.

@ -563,7 +558,6 @@ def _process_content_streams(
    downsampling.

    """
-
    if container.get('/Type') == '/Page' and '/Contents' in container:
        initial_shorthand = shorthand or UNIT_SQUARE
    elif container.get('/Type') == '/XObject' and container['/Subtype'] == '/Form':
@ -595,8 +589,7 @@ def _process_content_streams(


 def _page_has_text(text_blocks: Iterable[FloatRect], page_width, page_height) -> bool:
-    """Smarter text detection that ignores text in margins"""
-
+    """Smarter text detection that ignores text in margins."""
    pw, ph = float(page_width), float(page_height)  # pylint: disable=invalid-name

    margin_ratio = 0.125
@ -608,10 +601,9 @@ def _page_has_text(text_blocks: Iterable[FloatRect], page_width, page_height) ->
    )

    def rects_intersect(a: FloatRect, b: FloatRect) -> bool:
-        """
-        Where (a,b) are 4-tuple rects (left-0, top-1, right-2, bottom-3)
+        """Where (a,b) are 4-tuple rects (left-0, top-1, right-2, bottom-3)
        https://stackoverflow.com/questions/306316/determine-if-two-rectangles-overlap-each-other
-        Formula assumes all boxes are in first quadrant
+        Formula assumes all boxes are in first quadrant.
        """
        return a[0] < b[2] and a[2] > b[0] and a[1] > b[3] and a[3] < b[1]

@ -624,7 +616,7 @@ def _page_has_text(text_blocks: Iterable[FloatRect], page_width, page_height) ->


 def simplify_textboxes(miner, textbox_getter) -> Iterator[TextboxInfo]:
-    """Extract only limited content from text boxes
+    """Extract only limited content from text boxes.

    We do this to save memory and ensure that our objects are pickleable.
    """
@ -910,7 +902,7 @@ DEFAULT_EXECUTOR = SerialExecutor()


 class PdfInfo:
-    """Get summary information about a PDF"""
+    """Get summary information about a PDF."""

    def __init__(
        self,
--- a/src/ocrmypdf/pdfinfo/layout.py
+++ b/src/ocrmypdf/pdfinfo/layout.py
@ -63,7 +63,7 @@ def pdftype3font__pscript5_get_ascent(self):


 class LTStateAwareChar(LTChar):
-    """A subclass of LTChar that tracks text render mode at time of drawing"""
+    """A subclass of LTChar that tracks text render mode at time of drawing."""

    __slots__ = (
        'rendermode',
@ -111,7 +111,7 @@ class LTStateAwareChar(LTChar):
        self.rendermode = textstate.render

    def is_compatible(self, obj):
-        """Check if characters can be combined into a textline
+        """Check if characters can be combined into a textline.

        We consider characters compatible if:
            - the Unicode mapping is known, and both have the same render mode
@ -146,7 +146,7 @@ class LTStateAwareChar(LTChar):


 class TextPositionTracker(PDFLayoutAnalyzer):
-    """A page layout analyzer that pays attention to text visibility"""
+    """A page layout analyzer that pays attention to text visibility."""

    def __init__(self, rsrcmgr, pageno=1, laparams=None):
        super().__init__(rsrcmgr, pageno, laparams)
--- a/src/ocrmypdf/pluginspec.py
+++ b/src/ocrmypdf/pluginspec.py
@ -99,6 +99,8 @@ def check_options(options: Namespace) -> None:
        ocrmypdf.exceptions.ExitCodeException: If options are not acceptable
            and the application should terminate gracefully with an informative
            message and error code.
+
+
    Note:
        This hook will be called from the main process, and may modify global state
        before child worker processes are forked.
@ -127,6 +129,8 @@ def get_executor(progressbar_class) -> Executor:
    Note:
        This hook will be called from the main process, and may modify global state
        before child worker processes are forked.
+
+
    Note:
        This is a :ref:`firstresult hook<firstresult>`.
    """
@ -159,7 +163,6 @@ def get_progressbar_class():
    Here is how OCRmyPDF will use the progress bar:

    Example:
-
        pbar_class = pm.hook.get_progressbar_class()
        with pbar_class(**tqdm_kwargs) as pbar:
            ...
@ -181,6 +184,8 @@ def validate(pdfinfo: PdfInfo, options: Namespace) -> None:
        ocrmypdf.exceptions.ExitCodeException: If options or pdfinfo are not acceptable
            and the application should terminate gracefully with an informative
            message and error code.
+
+
    Note:
        This hook will be called from the main process, and may modify global state
        before child worker processes are forked.
@ -218,6 +223,8 @@ def rasterize_pdf_page(
    Note:
        This hook will be called from child processes. Modifying global state
        will not affect the main process or other child processes.
+
+
    Note:
        This is a :ref:`firstresult hook<firstresult>`.
    """
@ -245,6 +252,8 @@ def filter_ocr_image(page: PageContext, image: Image.Image) -> Image.Image:
    Note:
        This hook will be called from child processes. Modifying global state
        will not affect the main process or other child processes.
+
+
    Note:
        This is a :ref:`firstresult hook<firstresult>`.
    """
@ -281,6 +290,8 @@ def filter_page_image(page: PageContext, image_filename: Path) -> Path:
    Note:
        This hook will be called from child processes. Modifying global state
        will not affect the main process or other child processes.
+
+
    Note:
        This is a :ref:`firstresult hook<firstresult>`.
    """
@ -323,6 +334,8 @@ def filter_pdf_page(page: PageContext, image_filename: Path, output_pdf: Path) -
    Note:
        This hook will be called from child processes. Modifying global state
        will not affect the main process or other child processes.
+
+
    Note:
        This is a :ref:`firstresult hook<firstresult>`.
    """
@ -381,7 +394,8 @@ class OcrEngine(ABC):
        """Returns the set of all languages that are supported by the engine.

        Languages are typically given in 3-letter ISO 3166-1 codes, but actually
-        can be any value understood by the OCR engine."""
+        can be any value understood by the OCR engine.
+        """

    @staticmethod
    @abstractmethod
@ -474,7 +488,7 @@ def generate_pdfa(
    Note:
        This is a :ref:`firstresult hook<firstresult>`.

-    See also:
+    See Also:
        https://github.com/tqdm/tqdm
    """

--- a/src/ocrmypdf/quality.py
+++ b/src/ocrmypdf/quality.py
@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0

-"""Utilities to measure OCR quality"""
+"""Utilities to measure OCR quality."""


 from __future__ import annotations
--- a/src/ocrmypdf/subprocess/init.py
+++ b/src/ocrmypdf/subprocess/init.py
@ -1,6 +1,6 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MPL-2.0
-"""Wrappers to manage subprocess calls"""
+"""Wrappers to manage subprocess calls."""

 from __future__ import annotations

@ -34,7 +34,7 @@ def run(
    check: bool = False,
    **kwargs,
 ) -> CompletedProcess:
-    """Wrapper around :py:func:`subprocess.run`
+    """Wrapper around :py:func:`subprocess.run`.

    The main purpose of this wrapper is to log subprocess output in an orderly
    fashion that identifies the responsible subprocess. An additional
@ -141,7 +141,7 @@ def get_version(
    regex=r'(\d+(\.\d+)*)',
    env: OsEnviron | None = None,
 ) -> str:
-    """Get the version of the specified program
+    """Get the version of the specified program.

    Arguments:
        program: The program to version check.
@ -323,7 +323,6 @@ def check_external_program(
        version_parser: A class that should be used to parse and compare version
            numbers. Used when version numbers do not follow standard conventions.
    """
-
    try:
        found_version = version_checker()
    except (CalledProcessError, FileNotFoundError) as e:
--- a/src/ocrmypdf/subprocess/_windows.py
+++ b/src/ocrmypdf/subprocess/_windows.py
@ -169,8 +169,7 @@ SHIMS = [


 def fix_windows_args(program: str, args, env):
-    """Adjust our desired program and command line arguments for use on Windows"""
-
+    """Adjust our desired program and command line arguments for use on Windows."""
    # If we are running a .py on Windows, ensure we call it with this Python
    # (to support test suite shims)
    if program.lower().endswith('.py'):
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -3,12 +3,10 @@

 from __future__ import annotations

-import os
 import platform
 import sys
 from pathlib import Path
-from subprocess import PIPE, CompletedProcess, run
-from typing import List
+from subprocess import CompletedProcess, run

 import pytest

@ -71,10 +69,13 @@ def outtxt(tmp_path) -> Path:

@pytest.fixture(scope="function")
 def no_outpdf(tmp_path) -> Path:
-    """This just documents the fact that a test is not expected to produce
+    """Document fact that a test is not expected to produce output.
+
+    This just documents the fact that a test is not expected to produce
    output. Unfortunately an assertion failure inside a test fixture produces
    an error rather than a test failure, so no testing is done. It's up to
-    the test to confirm that no output file was created."""
+    the test to confirm that no output file was created.
+    """
    return tmp_path / 'no_output.pdf'


@ -110,7 +111,6 @@ def run_ocrmypdf_api(input_file: Path, output_file: Path, *args) -> ExitCode:
    The return code must always be checked or the test may declare a failure
    to be pass.
    """
-
    api_args = [str(input_file), str(output_file)] + [
        str(arg) for arg in args if arg is not None
    ]
@ -128,7 +128,6 @@ def run_ocrmypdf(
    If an exception is thrown this fact will be returned as part of the result
    text and return code rather than exception objects.
    """
-
    p_args = (
        [sys.executable, '-m', 'ocrmypdf']
        + [str(arg) for arg in args if arg is not None]
--- a/tests/plugins/tesseract_badutf8.py
+++ b/tests/plugins/tesseract_badutf8.py
@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MIT

-"""Tesseract bad utf8
+"""Tesseract bad utf8.

 In some cases, some versions of Tesseract can output binary gibberish or data
 that is not UTF-8 compatible, so we are forced to check that we can convert it
--- a/tests/plugins/tesseract_cache.py
+++ b/tests/plugins/tesseract_cache.py
@ -1,6 +1,6 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MIT
-"""Cache output of tesseract to speed up test suite
+"""Cache output of tesseract to speed up test suite.

 The cache is keyed by by the input test file The input arguments are slugged
 into a hideous filename that more or less represents them literally.  Joined
--- a/tests/plugins/tesseract_debug_rotate.py
+++ b/tests/plugins/tesseract_debug_rotate.py
@ -1,6 +1,6 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MIT
-"""Tesseract no-op/fixed rotate plugin
+"""Tesseract no-op/fixed rotate plugin.

 To quickly run tests where getting OCR output is not necessary and we want to test
 the rotation pipeline.
--- a/tests/plugins/tesseract_noop.py
+++ b/tests/plugins/tesseract_noop.py
@ -1,6 +1,6 @@
 # SPDX-FileCopyrightText: 2022 James R. Barlow
 # SPDX-License-Identifier: MIT
-"""Tesseract no-op plugin
+"""Tesseract no-op plugin.

 To quickly run tests where getting OCR output is not necessary.

--- a/tests/test_check_pdf.py
+++ b/tests/test_check_pdf.py
@ -3,8 +3,6 @@

 from __future__ import annotations

-import pytest
-
 from ocrmypdf.helpers import check_pdf


--- a/tests/test_completion.py
+++ b/tests/test_completion.py
@ -4,7 +4,7 @@
 from __future__ import annotations

 import os
-from subprocess import PIPE, run
+from subprocess import run

 import pytest

--- a/tests/test_graft.py
+++ b/tests/test_graft.py
@ -6,7 +6,6 @@ from __future__ import annotations
 from unittest.mock import patch

 import pikepdf
-import pytest

 import ocrmypdf

--- a/tests/test_logging.py
+++ b/tests/test_logging.py
@ -5,8 +5,6 @@ from __future__ import annotations

 import logging

-import pytest
-
 from ocrmypdf._sync import configure_debug_logging


--- a/tests/test_main.py
+++ b/tests/test_main.py
@ -446,7 +446,7 @@ def test_linearized_pdf_and_indirect_object(resources, outpdf):


 def test_very_high_dpi(resources, outpdf):
-    "Checks for a Decimal quantize error with high DPI, etc"
+    "Checks for a Decimal quantize error with high DPI, etc."
    check_ocrmypdf(
        resources / '2400dpi.pdf',
        outpdf,
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@ -231,17 +231,6 @@ def test_xml_metadata_preserved(
        'pdf:keywords',
    ]
    acquired_properties = ['dc:format']
-    might_change_properties = [
-        'dc:date',
-        'pdf:pdfversion',
-        'pdf:Producer',
-        'xmp:CreateDate',
-        'xmp:ModifyDate',
-        'xmp:MetadataDate',
-        'xmp:CreatorTool',
-        'xmpMM:DocumentId',
-        'xmpMM:DnstanceId',
-    ]

    # Cleanup messy data structure
    # Top level is key-value mapping of namespaces to keys under namespace,
--- a/tests/test_quality.py
+++ b/tests/test_quality.py
@ -3,8 +3,6 @@

 from __future__ import annotations

-import pytest
-
 from ocrmypdf import quality as qual


--- a/tests/test_rotation.py
+++ b/tests/test_rotation.py
@ -11,12 +11,13 @@ from os import fspath
 import img2pdf
 import pikepdf
 import pytest
+from PIL import Image, ImageChops
+from reportlab.pdfgen.canvas import Canvas
+
 from ocrmypdf._exec import ghostscript
 from ocrmypdf._plugin_manager import get_plugin_manager
 from ocrmypdf.helpers import IMG2PDF_KWARGS, Resolution
 from ocrmypdf.pdfinfo import PdfInfo
-from PIL import Image, ImageChops
-from reportlab.pdfgen.canvas import Canvas

 from .conftest import check_ocrmypdf, run_ocrmypdf

@ -152,13 +153,14 @@ def test_autorotate_threshold(threshold, op, comparison_threshold, resources, ou


 def test_rotated_skew_timeout(resources, outpdf):
-    """This document contains an image that is rotated 90 into place with a
+    """Check rotated skew timeout.
+
+    This document contains an image that is rotated 90 into place with a
    /Rotate tag and intentionally skewed by altering the transformation matrix.

    This tests for a bug where the combination of preprocessing and a tesseract
    timeout produced a page whose dimensions did not match the original's.
    """
-
    input_file = resources / 'rotated_skew.pdf'
    in_pageinfo = PdfInfo(input_file)[0]

--- a/tests/test_stdio.py
+++ b/tests/test_stdio.py
@ -4,13 +4,10 @@
 from __future__ import annotations

 import os
-import sys
-from pathlib import Path
-from subprocess import DEVNULL, PIPE, Popen, run
+from subprocess import DEVNULL, PIPE, run

 import pytest

-from ocrmypdf.exceptions import ExitCode
 from ocrmypdf.helpers import check_pdf

 from .conftest import run_ocrmypdf
--- a/tests/test_unpaper.py
+++ b/tests/test_unpaper.py
@ -8,14 +8,13 @@ from os import fspath
 from unittest.mock import patch

 import pytest
-from PIL import Image

 from ocrmypdf._exec import unpaper
 from ocrmypdf._plugin_manager import get_parser_options_plugins
 from ocrmypdf._validation import check_options
 from ocrmypdf.exceptions import ExitCode, MissingDependencyError

-from .conftest import check_ocrmypdf, have_unpaper, ocrmypdf_exec, run_ocrmypdf
+from .conftest import check_ocrmypdf, have_unpaper, run_ocrmypdf

 # pylint: disable=redefined-outer-name