2015-07-26 15:24:42 -07:00
|
|
|
#!/usr/bin/env python3
|
2015-07-28 04:36:58 -07:00
|
|
|
# © 2015 James R. Barlow: github.com/jbarlow83
|
2015-07-26 15:24:42 -07:00
|
|
|
|
|
|
|
from ocrmypdf import pageinfo
|
|
|
|
from reportlab.pdfgen.canvas import Canvas
|
|
|
|
from PIL import Image
|
|
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
from contextlib import suppress
|
|
|
|
import os
|
|
|
|
import shutil
|
2015-07-28 00:43:22 -07:00
|
|
|
import pytest
|
2015-07-30 03:35:56 -07:00
|
|
|
import img2pdf
|
2015-09-10 07:01:14 -07:00
|
|
|
import pytest
|
2016-01-11 17:40:44 -08:00
|
|
|
import sys
|
2015-07-26 15:24:42 -07:00
|
|
|
|
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
def test_single_page_text(outdir):
|
|
|
|
filename = outdir / 'text.pdf'
|
|
|
|
pdf = Canvas(str(filename), pagesize=(8*72, 6*72))
|
2015-07-26 15:24:42 -07:00
|
|
|
text = pdf.beginText()
|
|
|
|
text.setFont('Helvetica', 12)
|
2015-07-26 18:23:37 -07:00
|
|
|
text.setTextOrigin(1*72, 3*72)
|
|
|
|
text.textLine("Methink'st thou art a general offence and every"
|
2015-07-26 15:24:42 -07:00
|
|
|
" man should beat thee.")
|
|
|
|
pdf.drawText(text)
|
|
|
|
pdf.showPage()
|
|
|
|
pdf.save()
|
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
pdfinfo = pageinfo.pdf_get_all_pageinfo(str(filename))
|
2015-07-26 15:24:42 -07:00
|
|
|
|
|
|
|
assert len(pdfinfo) == 1
|
|
|
|
page = pdfinfo[0]
|
|
|
|
|
|
|
|
assert page['has_text']
|
|
|
|
assert len(page['images']) == 0
|
|
|
|
|
|
|
|
|
2017-01-26 17:53:27 -08:00
|
|
|
@pytest.mark.skipif(sys.version_info < (3, 5), reason="needs Path.read_bytes")
|
2017-01-26 16:38:59 -08:00
|
|
|
def test_single_page_image(outdir):
|
|
|
|
filename = outdir / 'image-mono.pdf'
|
2015-07-30 03:35:56 -07:00
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
im_tmp = outdir / 'tmp.png'
|
|
|
|
im = Image.new('1', (8, 8), 0)
|
|
|
|
for n in range(8):
|
|
|
|
im.putpixel((n, n), 1)
|
|
|
|
im.save(str(im_tmp), format='PNG')
|
2016-02-06 23:41:32 -08:00
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8))
|
|
|
|
layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)
|
2016-02-06 23:41:32 -08:00
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
im_bytes = im_tmp.read_bytes()
|
|
|
|
pdf_bytes = img2pdf.convert(
|
|
|
|
im_bytes, producer="img2pdf", with_pdfrw=False,
|
|
|
|
layout_fun=layout_fun)
|
2017-01-26 17:53:27 -08:00
|
|
|
filename.write_bytes(pdf_bytes)
|
2015-07-26 15:24:42 -07:00
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
pdfinfo = pageinfo.pdf_get_all_pageinfo(str(filename))
|
2015-07-26 15:24:42 -07:00
|
|
|
|
|
|
|
assert len(pdfinfo) == 1
|
|
|
|
page = pdfinfo[0]
|
|
|
|
|
|
|
|
assert not page['has_text']
|
|
|
|
assert len(page['images']) == 1
|
|
|
|
|
|
|
|
pdfimage = page['images'][0]
|
|
|
|
assert pdfimage['width'] == 8
|
2015-07-30 03:35:56 -07:00
|
|
|
assert pdfimage['color'] == 'gray'
|
2015-07-26 15:24:42 -07:00
|
|
|
|
|
|
|
# DPI in a 1"x1" is the image width
|
2016-02-26 18:19:39 -08:00
|
|
|
assert abs(pdfimage['dpi_w'] - 8) < 1e-5
|
|
|
|
assert abs(pdfimage['dpi_h'] - 8) < 1e-5
|
2015-07-26 15:24:42 -07:00
|
|
|
|
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
def test_single_page_inline_image(outdir):
|
|
|
|
filename = outdir / 'image-mono-inline.pdf'
|
|
|
|
pdf = Canvas(str(filename), pagesize=(8*72, 6*72))
|
2015-07-26 15:24:42 -07:00
|
|
|
with NamedTemporaryFile() as im_tmp:
|
|
|
|
im = Image.new('1', (8, 8), 0)
|
|
|
|
for n in range(8):
|
|
|
|
im.putpixel((n, n), 1)
|
|
|
|
im.save(im_tmp.name, format='PNG')
|
|
|
|
# Draw image in a 72x72 pt or 1"x1" area
|
|
|
|
pdf.drawInlineImage(im_tmp.name, 0, 0, width=72, height=72)
|
|
|
|
pdf.showPage()
|
|
|
|
pdf.save()
|
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
pdfinfo = pageinfo.pdf_get_all_pageinfo(str(filename))
|
2016-02-26 22:44:28 -08:00
|
|
|
print(pdfinfo)
|
|
|
|
pdfimage = pdfinfo[0]['images'][0]
|
|
|
|
assert (pdfimage['dpi_w'] - 8) < 1e-5
|
2016-02-26 23:02:12 -08:00
|
|
|
assert pdfimage['color'] != '-'
|
|
|
|
assert pdfimage['width'] == 8
|
2015-07-26 15:24:42 -07:00
|
|
|
|
2015-07-27 00:25:24 -07:00
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
def test_jpeg(resources, outdir):
|
|
|
|
filename = resources / 'c02-22.pdf'
|
2015-07-27 00:25:24 -07:00
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
pdfinfo = pageinfo.pdf_get_all_pageinfo(str(filename))
|
2015-07-27 00:25:24 -07:00
|
|
|
|
|
|
|
pdfimage = pdfinfo[0]['images'][0]
|
|
|
|
assert pdfimage['enc'] == 'jpeg'
|
2016-02-26 18:19:39 -08:00
|
|
|
assert (pdfimage['dpi_w'] - 150) < 1e-5
|
2015-07-27 00:25:24 -07:00
|
|
|
|
2017-02-14 12:51:15 -08:00
|
|
|
|
|
|
|
def test_form_xobject(resources):
|
|
|
|
filename = resources / 'formxobject.pdf'
|
|
|
|
|
|
|
|
pdfinfo = pageinfo.pdf_get_all_pageinfo(str(filename))
|
|
|
|
pdfimage = pdfinfo[0]['images'][0]
|
|
|
|
assert pdfimage['width'] == 50
|