OCRmyPDF/tests/test_pageinfo.py

118 lines
3.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2015-07-28 04:36:58 -07:00
# © 2015 James R. Barlow: github.com/jbarlow83
from ocrmypdf import pageinfo
from reportlab.pdfgen.canvas import Canvas
from PIL import Image
from tempfile import NamedTemporaryFile
from math import isclose
from contextlib import suppress
import os
import shutil
import pytest
import img2pdf
2015-09-10 07:01:14 -07:00
import pytest
2016-01-11 17:40:44 -08:00
import sys
def test_single_page_text(outdir):
filename = outdir / 'text.pdf'
pdf = Canvas(str(filename), pagesize=(8*72, 6*72))
text = pdf.beginText()
text.setFont('Helvetica', 12)
text.setTextOrigin(1*72, 3*72)
text.textLine("Methink'st thou art a general offence and every"
" man should beat thee.")
pdf.drawText(text)
pdf.showPage()
pdf.save()
pdfinfo = pageinfo.PdfInfo(filename)
assert len(pdfinfo) == 1
page = pdfinfo[0]
assert page.has_text
assert len(page.images) == 0
def test_single_page_image(outdir):
filename = outdir / 'image-mono.pdf'
im_tmp = outdir / 'tmp.png'
im = Image.new('1', (8, 8), 0)
for n in range(8):
im.putpixel((n, n), 1)
im.save(str(im_tmp), format='PNG')
imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8))
layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)
im_bytes = im_tmp.read_bytes()
pdf_bytes = img2pdf.convert(
im_bytes, producer="img2pdf", with_pdfrw=False,
layout_fun=layout_fun)
2017-01-26 17:53:27 -08:00
filename.write_bytes(pdf_bytes)
pdfinfo = pageinfo.PdfInfo(filename)
assert len(pdfinfo) == 1
page = pdfinfo[0]
assert not page.has_text
assert len(page.images) == 1
pdfimage = page.images[0]
assert pdfimage.width == 8
assert pdfimage.color == 'gray'
# DPI in a 1"x1" is the image width
assert isclose(pdfimage.xres, 8)
assert isclose(pdfimage.yres, 8)
def test_single_page_inline_image(outdir):
filename = outdir / 'image-mono-inline.pdf'
pdf = Canvas(str(filename), pagesize=(8*72, 6*72))
with NamedTemporaryFile() as im_tmp:
im = Image.new('1', (8, 8), 0)
for n in range(8):
im.putpixel((n, n), 1)
im.save(im_tmp.name, format='PNG')
# Draw image in a 72x72 pt or 1"x1" area
pdf.drawInlineImage(im_tmp.name, 0, 0, width=72, height=72)
pdf.showPage()
pdf.save()
pdfinfo = pageinfo.PdfInfo(filename)
2016-02-26 22:44:28 -08:00
print(pdfinfo)
pdfimage = pdfinfo[0].images[0]
assert isclose(pdfimage.xres, 8)
assert pdfimage.color != '-'
assert pdfimage.width == 8
2015-07-27 00:25:24 -07:00
def test_jpeg(resources, outdir):
filename = resources / 'c02-22.pdf'
2015-07-27 00:25:24 -07:00
pdfinfo = pageinfo.PdfInfo(filename)
2015-07-27 00:25:24 -07:00
pdfimage = pdfinfo[0].images[0]
assert pdfimage.enc == 'jpeg'
assert isclose(pdfimage.xres, 150)
2015-07-27 00:25:24 -07:00
2017-02-14 12:51:15 -08:00
def test_form_xobject(resources):
filename = resources / 'formxobject.pdf'
pdfinfo = pageinfo.PdfInfo(filename)
pdfimage = pdfinfo[0].images[0]
assert pdfimage.width == 50
def test_no_contents(resources):
filename = resources / 'no_contents.pdf'
pdfinfo = pageinfo.PdfInfo(filename)
assert len(pdfinfo[0].images) == 0
assert pdfinfo[0].has_text == False