OCRmyPDF/tests/test_pdfinfo.py

# © 2015 James R. Barlow: github.com/jbarlow83
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import pickle
from io import BytesIO
from math import isclose

import img2pdf
import pikepdf
import pytest
from PIL import Image
from reportlab.lib.units import inch
from reportlab.pdfgen.canvas import Canvas

from ocrmypdf import pdfinfo
from ocrmypdf.exceptions import InputFileError
from ocrmypdf.helpers import Resolution
from ocrmypdf.pdfinfo import Colorspace, Encoding
from ocrmypdf.pdfinfo.layout import PDFPage

# pylint: disable=protected-access


def test_single_page_text(outdir):
    filename = outdir / 'text.pdf'
    pdf = Canvas(str(filename), pagesize=(8 * inch, 6 * inch))
    text = pdf.beginText()
    text.setFont('Helvetica', 12)
    text.setTextOrigin(1 * inch, 3 * inch)
    text.textLine(
        "Methink'st thou art a general offence and every" " man should beat thee."
    )
    pdf.drawText(text)
    pdf.showPage()
    pdf.save()

    info = pdfinfo.PdfInfo(filename)

    assert len(info) == 1
    page = info[0]

    assert page.has_text
    assert len(page.images) == 0


@pytest.fixture(scope='session')
def eight_by_eight():
    im = Image.new('1', (8, 8), 0)
    for n in range(8):
        im.putpixel((n, n), 1)
    return im


def test_single_page_image(eight_by_eight, outpdf):
    im = eight_by_eight
    bio = BytesIO()
    im.save(bio, format='PNG')
    bio.seek(0)

    imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8))
    layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)

    with outpdf.open('wb') as f:
        img2pdf.convert(
            bio,
            producer="img2pdf",
            with_pdfrw=False,
            layout_fun=layout_fun,
            outputstream=f,
        )
    info = pdfinfo.PdfInfo(outpdf)

    assert len(info) == 1
    page = info[0]

    assert not page.has_text
    assert len(page.images) == 1

    pdfimage = page.images[0]
    assert pdfimage.width == 8
    assert pdfimage.color == Colorspace.gray

    # DPI in a 1"x1" is the image width
    assert isclose(pdfimage.dpi.x, 8)
    assert isclose(pdfimage.dpi.y, 8)


def test_single_page_inline_image(eight_by_eight, outdir):
    filename = outdir / 'image-mono-inline.pdf'
    pdf = Canvas(str(filename), pagesize=(8 * 72, 6 * 72))

    # Draw image in a 72x72 pt or 1"x1" area
    pdf.drawInlineImage(eight_by_eight, 0, 0, width=72, height=72)
    pdf.showPage()
    pdf.save()

    info = pdfinfo.PdfInfo(filename)
    print(info)
    pdfimage = info[0].images[0]
    assert isclose(pdfimage.dpi.x, 8)
    assert pdfimage.color == Colorspace.gray
    assert pdfimage.width == 8


def test_jpeg(resources):
    filename = resources / 'c02-22.pdf'

    pdf = pdfinfo.PdfInfo(filename)

    pdfimage = pdf[0].images[0]
    assert pdfimage.enc == Encoding.jpeg
    assert isclose(pdfimage.dpi.x, 150)


def test_form_xobject(resources):
    filename = resources / 'formxobject.pdf'

    pdf = pdfinfo.PdfInfo(filename)
    pdfimage = pdf[0].images[0]
    assert pdfimage.width == 50


def test_no_contents(resources):
    filename = resources / 'no_contents.pdf'

    pdf = pdfinfo.PdfInfo(filename)
    assert len(pdf[0].images) == 0
    assert not pdf[0].has_text


def test_oversized_page(resources):
    pdf = pdfinfo.PdfInfo(resources / 'poster.pdf')
    image = pdf[0].images[0]
    assert image.width * image.dpi.x > 200, "this is supposed to be oversized"


def test_pickle(resources):
    # For multiprocessing we must be able to pickle our information - if
    # this fails then we are probably storing some unpickleabe pikepdf or
    # other external data around
    filename = resources / 'graph_ocred.pdf'
    pdf = pdfinfo.PdfInfo(filename)
    pickle.dumps(pdf)


def test_vector(resources):
    filename = resources / 'vector.pdf'
    pdf = pdfinfo.PdfInfo(filename)
    assert pdf[0].has_vector
    assert not pdf[0].has_text


def test_ocr_detection(resources):
    filename = resources / 'graph_ocred.pdf'
    pdf = pdfinfo.PdfInfo(filename)
    assert not pdf[0].has_vector
    assert pdf[0].has_text


@pytest.mark.parametrize(
    'testfile', ('truetype_font_nomapping.pdf', 'type3_font_nomapping.pdf')
)
def test_corrupt_font_detection(resources, testfile):
    filename = resources / testfile
    pdf = pdfinfo.PdfInfo(filename, detailed_analysis=True)
    assert pdf[0].has_corrupt_text


def test_stack_abuse():
    p = pikepdf.Pdf.new()

    stream = pikepdf.Stream(p, b'q ' * 35)
    with pytest.warns(None) as record:
        pdfinfo.info._interpret_contents(stream)
    assert 'overflowed' in str(record[0].message)

    stream = pikepdf.Stream(p, b'q Q Q Q Q')
    with pytest.warns(None) as record:
        pdfinfo.info._interpret_contents(stream)
    assert 'underflowed' in str(record[0].message)

    stream = pikepdf.Stream(p, b'q ' * 135)
    with pytest.warns(None):
        with pytest.raises(RuntimeError):
            pdfinfo.info._interpret_contents(stream)


def test_pages_issue700(monkeypatch, resources):
    def get_no_pages(*args, **kwargs):
        return iter([])

    monkeypatch.setattr(PDFPage, 'get_pages', get_no_pages)

    with pytest.raises(InputFileError, match="pdfminer"):
        pdfinfo.PdfInfo(
            resources / 'cardinal.pdf',
            detailed_analysis=True,
            progbar=False,
            max_workers=1,
        )


def test_image_scale0(resources, outpdf):
    with pikepdf.open(resources / 'cmyk.pdf') as cmyk:
        xobj = cmyk.pages[0].as_form_xobject()

        p = pikepdf.Pdf.new()
        p.add_blank_page(page_size=(72, 72))
        objname = p.pages[0].add_resource(
            p.copy_foreign(xobj), pikepdf.Name.XObject, pikepdf.Name.Im0
        )
        print(objname)
        p.pages[0].Contents = pikepdf.Stream(
            p, b"q 0 0 0 0 0 0 cm %s Do Q" % bytes(objname)
        )
        p.save(outpdf)

    pi = pdfinfo.PdfInfo(outpdf, detailed_analysis=True, progbar=False, max_workers=1)
    assert not pi.pages[0]._images[0].dpi.is_finite
    assert pi.pages[0].dpi == Resolution(0, 0)
Update release notes, add copyrights 2015-07-28 04:36:58 -07:00			`# © 2015 James R. Barlow: github.com/jbarlow83`
Add license notice to all files Source files to GPL3 Exceptions: -tests/spoof/* to MIT -hocrtransform.py -_unicodefun.py Test resources to CC BY-SA 4.0 except when otherwise noted. Add GPL license. 2018-03-14 14:40:48 -07:00			`#`
Change license of all GPLv3 files to MPL-2.0 https://github.com/jbarlow83/OCRmyPDF/issues/600 2020-08-05 00:44:42 -07:00			`# This Source Code Form is subject to the terms of the Mozilla Public`
			`# License, v. 2.0. If a copy of the MPL was not distributed with this`
			`# file, You can obtain one at http://mozilla.org/MPL/2.0/.`

Sort imports with isort 2018-12-30 01:28:15 -08:00			`import pickle`
tests: tidy pdfinfo 2021-01-08 15:04:52 -08:00			`from io import BytesIO`
Sort imports with isort 2018-12-30 01:28:15 -08:00			`from math import isclose`

			`import img2pdf`
Sort imports 2019-12-19 15:29:56 -08:00			`import pikepdf`
Sort imports with isort 2018-12-30 01:28:15 -08:00			`import pytest`
			`from PIL import Image`
tests: tidy pdfinfo 2021-01-08 15:04:52 -08:00			`from reportlab.lib.units import inch`
Sort imports with isort 2018-12-30 01:28:15 -08:00			`from reportlab.pdfgen.canvas import Canvas`

			`from ocrmypdf import pdfinfo`
Partial fix crash on 'userunit' None (#700) Our method of getting data from pdfminer would silently consume a StopIteration if pdfminer returned no processed pages, leading to odd error message. We improve an error from pdfminer properly, and returning a more descriptive error of our own. It would be possible for ocrmypdf to repair the file before sending it to pdfminer, but this seems to be rare enough that we won't do that yet. 2021-01-01 01:11:32 -08:00			`from ocrmypdf.exceptions import InputFileError`
Fix ZeroDivisionError on files containing images drawn at scale 0 Fixes #761 2021-04-15 23:26:14 -07:00			`from ocrmypdf.helpers import Resolution`
Sort imports with isort 2018-12-30 01:28:15 -08:00			`from ocrmypdf.pdfinfo import Colorspace, Encoding`
Partial fix crash on 'userunit' None (#700) Our method of getting data from pdfminer would silently consume a StopIteration if pdfminer returned no processed pages, leading to odd error message. We improve an error from pdfminer properly, and returning a more descriptive error of our own. It would be possible for ocrmypdf to repair the file before sending it to pdfminer, but this seems to be rare enough that we won't do that yet. 2021-01-01 01:11:32 -08:00			`from ocrmypdf.pdfinfo.layout import PDFPage`

Delinting 2019-01-02 13:34:45 -08:00			`# pylint: disable=protected-access`

Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`def test_single_page_text(outdir):`
			`filename = outdir / 'text.pdf'`
tests: tidy pdfinfo 2021-01-08 15:04:52 -08:00			`pdf = Canvas(str(filename), pagesize=(8 * inch, 6 * inch))`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00			`text = pdf.beginText()`
			`text.setFont('Helvetica', 12)`
tests: tidy pdfinfo 2021-01-08 15:04:52 -08:00			`text.setTextOrigin(1 * inch, 3 * inch)`
Reformat with black 2018-12-30 01:27:49 -08:00			`text.textLine(`
			`"Methink'st thou art a general offence and every" " man should beat thee."`
			`)`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00			`pdf.drawText(text)`
			`pdf.showPage()`
			`pdf.save()`

pdfinfo: replace most remaining dict-style access 2017-05-19 16:17:36 -07:00			`info = pdfinfo.PdfInfo(filename)`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00
pdfinfo: replace most remaining dict-style access 2017-05-19 16:17:36 -07:00			`assert len(info) == 1`
			`page = info[0]`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00
Access PageInfo instance variables instead of dictionary 2017-05-18 17:12:04 -07:00			`assert page.has_text`
			`assert len(page.images) == 0`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00

tests: tidy pdfinfo 2021-01-08 15:04:52 -08:00			`@pytest.fixture(scope='session')`
			`def eight_by_eight():`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`im = Image.new('1', (8, 8), 0)`
			`for n in range(8):`
			`im.putpixel((n, n), 1)`
tests: tidy pdfinfo 2021-01-08 15:04:52 -08:00			`return im`


			`def test_single_page_image(eight_by_eight, outpdf):`
			`im = eight_by_eight`
			`bio = BytesIO()`
			`im.save(bio, format='PNG')`
			`bio.seek(0)`
Fix img2pdf usage in test case (to make Travis CI happy again) 2016-02-06 23:41:32 -08:00
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8))`
			`layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)`
Fix img2pdf usage in test case (to make Travis CI happy again) 2016-02-06 23:41:32 -08:00
tests: tidy pdfinfo 2021-01-08 15:04:52 -08:00			`with outpdf.open('wb') as f:`
			`img2pdf.convert(`
			`bio,`
			`producer="img2pdf",`
			`with_pdfrw=False,`
			`layout_fun=layout_fun,`
			`outputstream=f,`
			`)`
			`info = pdfinfo.PdfInfo(outpdf)`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00
pdfinfo: replace most remaining dict-style access 2017-05-19 16:17:36 -07:00			`assert len(info) == 1`
			`page = info[0]`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00
Access PageInfo instance variables instead of dictionary 2017-05-18 17:12:04 -07:00			`assert not page.has_text`
			`assert len(page.images) == 1`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00
Access PageInfo instance variables instead of dictionary 2017-05-18 17:12:04 -07:00			`pdfimage = page.images[0]`
Refactor from ImageInfo index to attribute accessing 2017-05-18 18:39:14 -07:00			`assert pdfimage.width == 8`
Replace magic strings colorspace and encoding with Enums 2017-05-18 22:32:27 -07:00			`assert pdfimage.color == Colorspace.gray`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00
			`# DPI in a 1"x1" is the image width`
Refactor 'xyres' into Resolution 2020-04-24 04:12:05 -07:00			`assert isclose(pdfimage.dpi.x, 8)`
			`assert isclose(pdfimage.dpi.y, 8)`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00

Revert "Remove apparently unused portion of a test" This reverts commit d89a633ba73af4a6bdacda6b9a4c0638b39167bd. 2021-04-16 00:21:11 -07:00			`def test_single_page_inline_image(eight_by_eight, outdir):`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`filename = outdir / 'image-mono-inline.pdf'`
Revert "Remove apparently unused portion of a test" This reverts commit d89a633ba73af4a6bdacda6b9a4c0638b39167bd. 2021-04-16 00:21:11 -07:00			`pdf = Canvas(str(filename), pagesize=(8 * 72, 6 * 72))`

			`# Draw image in a 72x72 pt or 1"x1" area`
			`pdf.drawInlineImage(eight_by_eight, 0, 0, width=72, height=72)`
			`pdf.showPage()`
			`pdf.save()`
Fix test_single_page_inline_image - remove temp file 2019-11-27 02:26:13 -08:00
			`info = pdfinfo.PdfInfo(filename)`
			`print(info)`
			`pdfimage = info[0].images[0]`
Refactor 'xyres' into Resolution 2020-04-24 04:12:05 -07:00			`assert isclose(pdfimage.dpi.x, 8)`
Fix test_single_page_inline_image - remove temp file 2019-11-27 02:26:13 -08:00			`assert pdfimage.color == Colorspace.gray`
Refactor from ImageInfo index to attribute accessing 2017-05-18 18:39:14 -07:00			`assert pdfimage.width == 8`
Add some pageinfo test cases; found problem with inline images 2015-07-26 15:24:42 -07:00
More testing: JPEG 2015-07-27 00:25:24 -07:00
Pre-release delinting 2020-06-09 15:27:14 -07:00			`def test_jpeg(resources):`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`filename = resources / 'c02-22.pdf'`
More testing: JPEG 2015-07-27 00:25:24 -07:00
pdfinfo: replace most remaining dict-style access 2017-05-19 16:17:36 -07:00			`pdf = pdfinfo.PdfInfo(filename)`
More testing: JPEG 2015-07-27 00:25:24 -07:00
pdfinfo: replace most remaining dict-style access 2017-05-19 16:17:36 -07:00			`pdfimage = pdf[0].images[0]`
Replace magic strings colorspace and encoding with Enums 2017-05-18 22:32:27 -07:00			`assert pdfimage.enc == Encoding.jpeg`
Refactor 'xyres' into Resolution 2020-04-24 04:12:05 -07:00			`assert isclose(pdfimage.dpi.x, 150)`
More testing: JPEG 2015-07-27 00:25:24 -07:00
Create test case for Form XObjects 2017-02-14 12:51:15 -08:00
			`def test_form_xobject(resources):`
			`filename = resources / 'formxobject.pdf'`

pdfinfo: replace most remaining dict-style access 2017-05-19 16:17:36 -07:00			`pdf = pdfinfo.PdfInfo(filename)`
			`pdfimage = pdf[0].images[0]`
Refactor from ImageInfo index to attribute accessing 2017-05-18 18:39:14 -07:00			`assert pdfimage.width == 50`
Fix #156 - NoneType has no ‘getObject’ for pages with no /Contents 2017-05-01 15:46:15 -07:00

			`def test_no_contents(resources):`
			`filename = resources / 'no_contents.pdf'`

pdfinfo: replace most remaining dict-style access 2017-05-19 16:17:36 -07:00			`pdf = pdfinfo.PdfInfo(filename)`
			`assert len(pdf[0].images) == 0`
Pre-release delinting 2020-06-09 15:27:14 -07:00			`assert not pdf[0].has_text`
—output-type=pdf now outputs /UserUnit PDFs at the correct size This currently distorts the output size because Tesseract assumes it knows the DPI better than we do. Does not work for Ghostscript, because it emerges that Ghostscript honors /UserUnit for rasterizing but not in pdfwrite (resolve/wontfix). https://bugs.ghostscript.com/show_bug.cgi?id=690781 Ghostscript’s output would need to be patched in a PDF/A safe way for this to work. Temporary route may be to block Ghostscript if /UserUnit. 2017-05-24 23:26:07 -07:00

			`def test_oversized_page(resources):`
			`pdf = pdfinfo.PdfInfo(resources / 'poster.pdf')`
			`image = pdf[0].images[0]`
Refactor 'xyres' into Resolution 2020-04-24 04:12:05 -07:00			`assert image.width * image.dpi.x > 200, "this is supposed to be oversized"`
Document need for pdfinfo to be pickleable 2018-05-24 22:24:13 -07:00

			`def test_pickle(resources):`
			`# For multiprocessing we must be able to pickle our information - if`
			`# this fails then we are probably storing some unpickleabe pikepdf or`
			`# other external data around`
Reorganize around getting bboxes for visible/invisible text 2018-10-26 01:07:02 -07:00			`filename = resources / 'graph_ocred.pdf'`
Document need for pdfinfo to be pickleable 2018-05-24 22:24:13 -07:00			`pdf = pdfinfo.PdfInfo(filename)`
			`pickle.dumps(pdf)`
pdfinfo: improve the regex 2018-07-04 00:59:32 -07:00

pdfinfo: learn to detect vector graphic objects 2018-10-18 01:21:51 -07:00			`def test_vector(resources):`
			`filename = resources / 'vector.pdf'`
			`pdf = pdfinfo.PdfInfo(filename)`
			`assert pdf[0].has_vector`
Add functional "redo OCR" feature Needs argument validation and some other changes. Needs testing with mixed-content PDFs. Only really works for pure invisible text at the moment. 2018-10-19 00:02:19 -07:00			`assert not pdf[0].has_text`


			`def test_ocr_detection(resources):`
			`filename = resources / 'graph_ocred.pdf'`
			`pdf = pdfinfo.PdfInfo(filename)`
			`assert not pdf[0].has_vector`
			`assert pdf[0].has_text`
Test case: true type font without Unicode mapping 2018-11-15 16:22:53 -08:00

Add test case for Type3 fonts with no Unicode mapping 2018-11-15 21:54:26 -08:00			`@pytest.mark.parametrize(`
			`'testfile', ('truetype_font_nomapping.pdf', 'type3_font_nomapping.pdf')`
			`)`
			`def test_corrupt_font_detection(resources, testfile):`
			`filename = resources / testfile`
Reinstate quick test for text/no text Partial revert of commit 991db17 2020-06-10 11:53:04 -07:00			`pdf = pdfinfo.PdfInfo(filename, detailed_analysis=True)`
Test case: true type font without Unicode mapping 2018-11-15 16:22:53 -08:00			`assert pdf[0].has_corrupt_text`
pdfinfo: tolerate PDFs that overflow and underflow the graphics stack 2018-12-15 14:55:11 -08:00

			`def test_stack_abuse():`
			`p = pikepdf.Pdf.new()`

			`stream = pikepdf.Stream(p, b'q ' * 35)`
			`with pytest.warns(None) as record:`
Restructure ocrmypdf.pdfinfo 2019-06-20 03:10:41 -07:00			`pdfinfo.info._interpret_contents(stream)`
pdfinfo: tolerate PDFs that overflow and underflow the graphics stack 2018-12-15 14:55:11 -08:00			`assert 'overflowed' in str(record[0].message)`

			`stream = pikepdf.Stream(p, b'q Q Q Q Q')`
			`with pytest.warns(None) as record:`
Restructure ocrmypdf.pdfinfo 2019-06-20 03:10:41 -07:00			`pdfinfo.info._interpret_contents(stream)`
pdfinfo: tolerate PDFs that overflow and underflow the graphics stack 2018-12-15 14:55:11 -08:00			`assert 'underflowed' in str(record[0].message)`

			`stream = pikepdf.Stream(p, b'q ' * 135)`
			`with pytest.warns(None):`
			`with pytest.raises(RuntimeError):`
Restructure ocrmypdf.pdfinfo 2019-06-20 03:10:41 -07:00			`pdfinfo.info._interpret_contents(stream)`
Partial fix crash on 'userunit' None (#700) Our method of getting data from pdfminer would silently consume a StopIteration if pdfminer returned no processed pages, leading to odd error message. We improve an error from pdfminer properly, and returning a more descriptive error of our own. It would be possible for ocrmypdf to repair the file before sending it to pdfminer, but this seems to be rare enough that we won't do that yet. 2021-01-01 01:11:32 -08:00

			`def test_pages_issue700(monkeypatch, resources):`
			`def get_no_pages(args, *kwargs):`
			`return iter([])`

			`monkeypatch.setattr(PDFPage, 'get_pages', get_no_pages)`

			`with pytest.raises(InputFileError, match="pdfminer"):`
			`pdfinfo.PdfInfo(`
			`resources / 'cardinal.pdf',`
			`detailed_analysis=True,`
			`progbar=False,`
			`max_workers=1,`
			`)`
Fix ZeroDivisionError on files containing images drawn at scale 0 Fixes #761 2021-04-15 23:26:14 -07:00

			`def test_image_scale0(resources, outpdf):`
			`with pikepdf.open(resources / 'cmyk.pdf') as cmyk:`
Remove shims to support for old versions of pikepdf < 4 2021-11-13 00:41:36 -08:00			`xobj = cmyk.pages[0].as_form_xobject()`
Fix ZeroDivisionError on files containing images drawn at scale 0 Fixes #761 2021-04-15 23:26:14 -07:00
			`p = pikepdf.Pdf.new()`
			`p.add_blank_page(page_size=(72, 72))`
Remove shims to support for old versions of pikepdf < 4 2021-11-13 00:41:36 -08:00			`objname = p.pages[0].add_resource(`
Fix ZeroDivisionError on files containing images drawn at scale 0 Fixes #761 2021-04-15 23:26:14 -07:00			`p.copy_foreign(xobj), pikepdf.Name.XObject, pikepdf.Name.Im0`
			`)`
			`print(objname)`
			`p.pages[0].Contents = pikepdf.Stream(`
			`p, b"q 0 0 0 0 0 0 cm %s Do Q" % bytes(objname)`
			`)`
			`p.save(outpdf)`

			`pi = pdfinfo.PdfInfo(outpdf, detailed_analysis=True, progbar=False, max_workers=1)`
			`assert not pi.pages[0]._images[0].dpi.is_finite`
			`assert pi.pages[0].dpi == Resolution(0, 0)`