mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-01-08 05:02:39 +00:00
Adjust page orientation parsing to deal with change in Tess 3.04.01
This commit is contained in:
parent
9b79b4a7c8
commit
71fbda8bf6
@ -524,9 +524,9 @@ def orient_page(
|
||||
|
||||
direction = {
|
||||
0: '⇧',
|
||||
90: '⇦',
|
||||
90: '⇨',
|
||||
180: '⇩',
|
||||
270: '⇨'
|
||||
270: '⇦'
|
||||
}
|
||||
|
||||
log.info(
|
||||
@ -544,8 +544,8 @@ def orient_page(
|
||||
reader = pypdf.PdfFileReader(page_pdf)
|
||||
page = reader.pages[0]
|
||||
|
||||
# Rotate opposite of orientation
|
||||
rotated_page = page.rotateClockwise(orient_conf.angle)
|
||||
# angle is a clockwise angle, so rotating ccw will correct the error
|
||||
rotated_page = page.rotateCounterClockwise(orient_conf.angle)
|
||||
writer.addPage(rotated_page)
|
||||
with open(output_file, 'wb') as out:
|
||||
writer.write(out)
|
||||
@ -788,11 +788,12 @@ def add_text_layer(
|
||||
|
||||
page_text = pdf_text.getPage(0)
|
||||
|
||||
# The text page always will be oriented up
|
||||
# The text page always will be oriented up by this stage
|
||||
# but if lossless_reconstruction, pdf_image may have a rotation applied
|
||||
# we can't just merge the pages, because a page can only have one /Rotate
|
||||
# tag, so the differential rotation must be corrected.
|
||||
# Also, pdf_image may not have its mediabox nailed to (0, 0)
|
||||
# We have to eliminate the /Rotate tag (because it applies to the whole
|
||||
# page) and rotate the image layer to match the text page
|
||||
# Also, pdf_image may not have its mediabox nailed to (0, 0), so may need
|
||||
# translation
|
||||
page_image = pdf_image.getPage(0)
|
||||
rotation = page_image.get('/Rotate', 0)
|
||||
|
||||
|
||||
@ -107,8 +107,20 @@ def get_orientation(input_file, language: list, timeout: float, log):
|
||||
if len(parts) == 2:
|
||||
osd[parts[0].strip()] = parts[1].strip()
|
||||
|
||||
angle = int(osd.get('Orientation in degrees', 0))
|
||||
if 'Orientation' in osd:
|
||||
# Tesseract < 3.04.01
|
||||
# reports "Orientation in degrees" as a counterclockwise angle
|
||||
# We keep it clockwise
|
||||
assert 'Rotate' not in osd
|
||||
angle = -angle % 360
|
||||
else:
|
||||
# Tesseract == 3.04.01, hopefully also Tesseract > 3.04.01
|
||||
# reports "Orientation in degrees" as a clockwise angle
|
||||
assert 'Rotate' in osd
|
||||
|
||||
oc = OrientationConfidence(
|
||||
angle=int(osd.get('Orientation in degrees', 0)),
|
||||
angle=angle,
|
||||
confidence=float(osd.get('Orientation confidence', 0)))
|
||||
return oc
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user