Remove some obsolete parameters

2026-01-08 05:02:39 +00:00 · 2023-11-20 00:10:55 -08:00 · 2023-11-20 00:10:55 -08:00 · db2e5132e6
commit db2e5132e6
parent b14f6f778a
3 changed files with 14 additions and 14 deletions
--- a/src/ocrmypdf/_pipeline.py
+++ b/src/ocrmypdf/_pipeline.py
@ -747,9 +747,7 @@ def render_hocr_page(hocr: Path, page_context: PageContext) -> Path:
    hocrtransform.to_pdf(
        out_filename=output_file,
        image_filename=None,
-        show_bounding_boxes=False if not debug_mode else True,
        invisible_text=True if not debug_mode else False,
-        interword_spaces=True,
    )
    return output_file

--- a/src/ocrmypdf/hocrtransform/init.py
+++ b/src/ocrmypdf/hocrtransform/init.py
@ -153,10 +153,8 @@ class HocrTransform:
        *,
        out_filename: Path,
        image_filename: Path | None = None,
-        show_bounding_boxes: bool = False,
        fontname: str = "Helvetica",
        invisible_text: bool = False,
-        interword_spaces: bool = False,
    ) -> None:
        """Creates a PDF file with an image superimposed on top of the text.

@ -242,6 +240,11 @@ class HocrTransform:
        fontname: str,
        invisible_text: bool,
    ):
+        """Render the text for a given line.
+
+        The canvas's coordinate system must be configured so that hOCR pixel
+        coordinates are mapped to PDF coordinates.
+        """
        if line is None:
            return
        line_box = self.element_coordinates(line)
@ -249,19 +252,21 @@ class HocrTransform:

        self._debug_draw_line_bbox(canvas, line_box)

-        # Baseline is a polynomial (usually straight line) in the coordinate system
-        # of the line
+        # Baseline is a polynomial (usually straight line) that describes the
+        # text baseline relative to the bottom left corner of the line bounding
+        # box.
+        bottom_left_corner = line_box.llx, line_box.ury
        slope, intercept = self.baseline(line)
        if abs(slope) < 0.005:
            slope = 0.0
        angle = atan(slope)

        # Setup a new coordinate system on the line box's intercept and rotated by
-        # its slope
+        # its slope.
        canvas.push()
        line_matrix = (
            Matrix()
-            .translated(line_box.llx, line_box.ury)
+            .translated(*bottom_left_corner)
            .translated(0, intercept)
            .rotated(angle / pi * 180)
        )
@ -279,15 +284,14 @@ class HocrTransform:
            text.set_render_mode(3)  # Invisible (indicates OCR text)

        self._debug_draw_baseline(canvas, line_matrix.inverse().transform(line_box), 0)
-        canvas.set_fill_color(BLACK)  # text in black

+        canvas.set_fill_color(BLACK)  # text in black
        elements = line.findall(self._child_xpath('span', elemclass))
        for elem, next_elem in pairwise(elements + [None]):
            self._do_line_word(
                canvas,
                fontname,
                line_matrix,
-                line_box_height,
                line_box,
                text,
                fontsize,
@ -302,15 +306,13 @@ class HocrTransform:
        canvas: Canvas,
        fontname,
        line_matrix: Matrix,
-        line_height: float,
        line_box: Rectangle,
        text: PikepdfText,
        fontsize: float,
        elem: Element,
        next_elem: Element | None,
    ):
-        elemtxt = self._get_element_text(elem).strip()
-        elemtxt = self.normalize_text(elemtxt)
+        elemtxt = self.normalize_text(self._get_element_text(elem).strip())
        if elemtxt == '':
            return

--- a/tests/test_hocrtransform.py
+++ b/tests/test_hocrtransform.py
@ -64,7 +64,7 @@ def test_mono_image(blank_hocr, outdir):
        im.putpixel((n, n), 1)
    im.save(outdir / 'mono.tif', format='TIFF')

-    hocr = hocrtransform.HocrTransform(hocr_filename=str(blank_hocr), dpi=300)
+    hocr = hocrtransform.HocrTransform(hocr_filename=str(blank_hocr), dpi=8)
    hocr.to_pdf(
        out_filename=str(outdir / 'mono.pdf'), image_filename=str(outdir / 'mono.tif')
    )