OCRmyPDF/tests/test_optimize.py

# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-License-Identifier: MPL-2.0

from __future__ import annotations

from io import BytesIO
from os import fspath
from pathlib import Path
from unittest.mock import MagicMock, patch

import img2pdf
import pikepdf
import pytest
from pikepdf import Array, Dictionary, Name
from PIL import Image, ImageDraw

from ocrmypdf import optimize as opt
from ocrmypdf._exec import jbig2enc, pngquant
from ocrmypdf._exec.ghostscript import rasterize_pdf
from ocrmypdf.helpers import IMG2PDF_KWARGS, Resolution
from ocrmypdf.optimize import PdfImage, extract_image_filter
from tests.conftest import check_ocrmypdf

needs_pngquant = pytest.mark.skipif(
    not pngquant.available(), reason="pngquant not installed"
)
needs_jbig2enc = pytest.mark.skipif(
    not jbig2enc.available(), reason="jbig2enc not installed"
)


# pylint:disable=redefined-outer-name


@pytest.fixture(scope="session")
def palette(resources):
    return resources / 'palette.pdf'


@needs_pngquant
@pytest.mark.parametrize('pdf', ['multipage', 'palette'])
def test_basic(multipage, palette, pdf, outpdf):
    infile = multipage if pdf == 'multipage' else palette
    opt.main(infile, outpdf, level=3)

    assert 0.98 * Path(outpdf).stat().st_size <= Path(infile).stat().st_size


@needs_pngquant
def test_mono_not_inverted(resources, outdir):
    infile = resources / '2400dpi.pdf'
    opt.main(infile, outdir / 'out.pdf', level=3)

    rasterize_pdf(
        outdir / 'out.pdf',
        outdir / 'im.png',
        raster_device='pnggray',
        raster_dpi=Resolution(10, 10),
    )

    with Image.open(fspath(outdir / 'im.png')) as im:
        assert im.getpixel((0, 0)) > 240, "Expected white background"


@needs_pngquant
def test_jpg_png_params(resources, outpdf):
    check_ocrmypdf(
        resources / 'crom.png',
        outpdf,
        '--image-dpi',
        '200',
        '--optimize',
        '3',
        '--jpg-quality',
        '50',
        '--png-quality',
        '20',
        '--plugin',
        'tests/plugins/tesseract_noop.py',
    )


@needs_jbig2enc
@pytest.mark.parametrize('lossy', [False, True])
def test_jbig2_lossy(lossy, resources, outpdf):
    args = [
        resources / 'ccitt.pdf',
        outpdf,
        '--image-dpi',
        '200',
        '--optimize',
        '3',
        '--jpg-quality',
        '50',
        '--png-quality',
        '20',
        '--plugin',
        'tests/plugins/tesseract_noop.py',
        '--jbig2-threshold',
        '0.7',
    ]
    if lossy:
        args.append('--jbig2-lossy')

    check_ocrmypdf(*args)

    with pikepdf.open(outpdf) as pdf:
        pim = pikepdf.PdfImage(next(iter(pdf.pages[0].images.values())))
        assert pim.filters[0] == '/JBIG2Decode'

        if lossy:
            assert '/JBIG2Globals' in pim.decode_parms[0]
        else:
            assert len(pim.decode_parms) == 0


@needs_pngquant
@needs_jbig2enc
def test_flate_to_jbig2(resources, outdir):
    # This test requires an image that pngquant is capable of converting to
    # to 1bpp - so use an existing 1bpp image, convert up, confirm it can
    # convert down
    with Image.open(fspath(resources / 'typewriter.png')) as im:
        assert im.mode in ('1', 'P')
        im = im.convert('L')
        im.save(fspath(outdir / 'type8.png'))

    check_ocrmypdf(
        outdir / 'type8.png',
        outdir / 'out.pdf',
        '--image-dpi',
        '100',
        '--png-quality',
        '50',
        '--optimize',
        '3',
        '--plugin',
        'tests/plugins/tesseract_noop.py',
    )

    with pikepdf.open(outdir / 'out.pdf') as pdf:
        pim = pikepdf.PdfImage(next(iter(pdf.pages[0].images.values())))
        assert pim.filters[0] == '/JBIG2Decode'


@needs_pngquant
def test_multiple_pngs(resources, outdir):
    with Path.open(outdir / 'in.pdf', 'wb') as inpdf:
        img2pdf.convert(
            fspath(resources / 'baiona_colormapped.png'),
            fspath(resources / 'baiona_gray.png'),
            outputstream=inpdf,
            **IMG2PDF_KWARGS,
        )

    def mockquant(input_file, output_file, *_args):
        with Image.open(input_file) as im:
            draw = ImageDraw.Draw(im)
            draw.rectangle((0, 0, im.width, im.height), fill=128)
            im.save(output_file)

    with patch('ocrmypdf.optimize.pngquant.quantize') as mock:
        mock.side_effect = mockquant
        check_ocrmypdf(
            outdir / 'in.pdf',
            outdir / 'out.pdf',
            '--optimize',
            '3',
            '--jobs',
            '1',
            '--use-threads',
            '--output-type',
            'pdf',
            '--plugin',
            'tests/plugins/tesseract_noop.py',
        )
        mock.assert_called()

    with (
        pikepdf.open(outdir / 'in.pdf') as inpdf,
        pikepdf.open(outdir / 'out.pdf') as outpdf,
    ):
        for n in range(len(inpdf.pages)):
            inim = next(iter(inpdf.pages[n].images.values()))
            outim = next(iter(outpdf.pages[n].images.values()))
            assert len(outim.read_raw_bytes()) < len(inim.read_raw_bytes()), n


def test_optimize_off(resources, outpdf):
    check_ocrmypdf(
        resources / 'trivial.pdf',
        outpdf,
        '--optimize=0',
        '--output-type',
        'pdf',
        '--plugin',
        'tests/plugins/tesseract_noop.py',
    )


def test_group3(resources):
    with pikepdf.open(resources / 'ccitt.pdf') as pdf:
        im = pdf.pages[0].Resources.XObject['/Im1']
        assert (
            opt.extract_image_filter(im, im.objgen[0]) is not None
        ), "Group 4 should be allowed"

        im.DecodeParms['/K'] = 0
        assert (
            opt.extract_image_filter(im, im.objgen[0]) is None
        ), "Group 3 should be disallowed"


def test_find_formx(resources):
    with pikepdf.open(resources / 'formxobject.pdf') as pdf:
        working, pagenos = opt._find_image_xrefs(pdf)
        assert len(working) == 1
        xref = next(iter(working))
        assert pagenos[xref] == 0


def test_extract_image_filter_with_pdf_image():
    image = MagicMock()
    image.Subtype = Name.Image
    image.Length = 200
    image.Width = 10
    image.Height = 10
    image.Filter = [Name.FlateDecode, Name.DCTDecode]
    pdf_image = PdfImage(image)
    image.BitsPerComponent = 8
    assert extract_image_filter(image, None) == (
        pdf_image,
        pdf_image.filter_decodeparms[1],
    )


def test_extract_image_filter_with_non_image():
    image = MagicMock()
    image.Subtype = Name.Form
    assert extract_image_filter(image, None) is None


def test_extract_image_filter_with_small_stream_size():
    image = MagicMock()
    image.Subtype = Name.Image
    image.Length = 50
    assert extract_image_filter(image, None) is None


def test_extract_image_filter_with_small_dimensions():
    image = MagicMock()
    image.Subtype = Name.Image
    image.Length = 200
    image.Width = 5
    image.Height = 5
    assert extract_image_filter(image, None) is None


def test_extract_image_filter_with_multiple_compression_filters():
    image = MagicMock()
    image.Subtype = Name.Image
    image.Length = 200
    image.Width = 10
    image.Height = 10
    image.BitsPerComponent = 8
    image.Filter = [Name.ASCII85Decode, Name.FlateDecode, Name.DCTDecode]
    assert extract_image_filter(image, None) is None


def test_extract_image_filter_with_wide_gamut_image():
    image = MagicMock()
    image.Subtype = Name.Image
    image.Length = 200
    image.Width = 10
    image.Height = 10
    image.BitsPerComponent = 16
    image.Filter = Name.FlateDecode
    assert extract_image_filter(image, None) is None


def test_extract_image_filter_with_jpeg2000_image():
    im = Image.new('RGB', (10, 10))
    bio = BytesIO()
    im.save(bio, format='JPEG2000')
    pdf = pikepdf.new()
    stream = pdf.make_stream(
        data=bio.getvalue(),
        Subtype=Name.Image,
        Length=200,
        Width=10,
        Height=10,
        BitsPerComponent=8,
        Filter=Name.JPXDecode,
    )
    assert extract_image_filter(stream, None) is None


def test_extract_image_filter_with_ccitt_group_3_image():
    image = MagicMock()
    image.Subtype = Name.Image
    image.Length = 200
    image.Width = 10
    image.Height = 10
    image.BitsPerComponent = 1
    image.Filter = Name.CCITTFaxDecode
    image.DecodeParms = Array([Dictionary(K=1)])
    assert extract_image_filter(image, None) is None


# Triggers pikepdf bug
# def test_extract_image_filter_with_decode_table():
#     image = MagicMock()
#     image.Subtype = Name.Image
#     image.Length = 200
#     image.Width = 10
#     image.Height = 10
#     image.Filter = Name.FlateDecode
#     image.BitsPerComponent = 8
#     image.ColorSpace = Name.DeviceGray
#     image.Decode = [42, 0]
#     assert extract_image_filter(image, None) is None
Change to SPDX license tracking 2022-07-28 01:06:46 -07:00			`# SPDX-FileCopyrightText: 2022 James R. Barlow`
			`# SPDX-License-Identifier: MPL-2.0`
optimize: move a lot of image scanning code to pikepdf 2018-05-14 22:21:53 -07:00
Modernize type annotations 2022-07-23 00:39:24 -07:00			`from __future__ import annotations`

optimize: better coverage 2023-10-17 02:41:40 -07:00			`from io import BytesIO`
Drop support for Python 3.5 2018-12-30 00:23:26 -08:00			`from os import fspath`
Sort imports with isort 2018-12-30 01:28:15 -08:00			`from pathlib import Path`
optimize: better coverage 2023-10-17 02:41:40 -07:00			`from unittest.mock import MagicMock, patch`
Make optimize test do a little more 2018-05-18 17:50:39 -07:00
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00			`import img2pdf`
Sort imports 2019-12-19 15:29:56 -08:00			`import pikepdf`
optimize: move a lot of image scanning code to pikepdf 2018-05-14 22:21:53 -07:00			`import pytest`
optimize: better coverage 2023-10-17 02:41:40 -07:00			`from pikepdf import Array, Dictionary, Name`
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00			`from PIL import Image, ImageDraw`
optimize: move a lot of image scanning code to pikepdf 2018-05-14 22:21:53 -07:00
Rename _optimize to optimize.py 2018-06-22 17:51:57 -07:00			`from ocrmypdf import optimize as opt`
Rename ocrmypdf.exec -> ocrmypdf._exec 2020-06-09 14:55:54 -07:00			`from ocrmypdf._exec import jbig2enc, pngquant`
			`from ocrmypdf._exec.ghostscript import rasterize_pdf`
Use better img2pdf settings where possible while supporting old versions Fixes #894 2022-01-14 11:55:54 -08:00			`from ocrmypdf.helpers import IMG2PDF_KWARGS, Resolution`
optimize: better coverage 2023-10-17 02:41:40 -07:00			`from ocrmypdf.optimize import PdfImage, extract_image_filter`
			`from tests.conftest import check_ocrmypdf`
Add test to optimize if jbig2 is present 2018-07-02 23:49:11 -07:00
tests: tag tests that need pngquant, jbig2enc 2020-12-30 01:58:57 -08:00			`needs_pngquant = pytest.mark.skipif(`
			`not pngquant.available(), reason="pngquant not installed"`
			`)`
			`needs_jbig2enc = pytest.mark.skipif(`
			`not jbig2enc.available(), reason="jbig2enc not installed"`
			`)`
Add test to optimize if jbig2 is present 2018-07-02 23:49:11 -07:00
tests: tag tests that need pngquant, jbig2enc 2020-12-30 01:58:57 -08:00
Address some linter warnings 2022-09-21 00:05:12 -07:00			`# pylint:disable=redefined-outer-name`


tests: convert all uses of multipage.pdf to fixture 2022-08-11 01:13:10 -07:00			`@pytest.fixture(scope="session")`
			`def palette(resources):`
			`return resources / 'palette.pdf'`


tests: tag tests that need pngquant, jbig2enc 2020-12-30 01:58:57 -08:00			`@needs_pngquant`
tests: convert all uses of multipage.pdf to fixture 2022-08-11 01:13:10 -07:00			`@pytest.mark.parametrize('pdf', ['multipage', 'palette'])`
			`def test_basic(multipage, palette, pdf, outpdf):`
			`infile = multipage if pdf == 'multipage' else palette`
Make optimize test do a little more 2018-05-18 17:50:39 -07:00			`opt.main(infile, outpdf, level=3)`
optimize: move a lot of image scanning code to pikepdf 2018-05-14 22:21:53 -07:00
Ensure test_optimize passes Linearization sends it over the edge 2019-07-27 16:47:53 -07:00			`assert 0.98 * Path(outpdf).stat().st_size <= Path(infile).stat().st_size`
Add test case to ensure mono is not inverted 2018-06-29 00:25:11 -07:00

tests: tag tests that need pngquant, jbig2enc 2020-12-30 01:58:57 -08:00			`@needs_pngquant`
Add test case to ensure mono is not inverted 2018-06-29 00:25:11 -07:00			`def test_mono_not_inverted(resources, outdir):`
			`infile = resources / '2400dpi.pdf'`
			`opt.main(infile, outdir / 'out.pdf', level=3)`

			`rasterize_pdf(`
Reformat with black 2018-12-30 01:27:49 -08:00			`outdir / 'out.pdf',`
			`outdir / 'im.png',`
			`raster_device='pnggray',`
Refactor 'xyres' into Resolution 2020-04-24 04:12:05 -07:00			`raster_dpi=Resolution(10, 10),`
Add test case to ensure mono is not inverted 2018-06-29 00:25:11 -07:00			`)`

Use context managers to ensure Pillow images are closed 2019-09-03 17:19:12 -07:00			`with Image.open(fspath(outdir / 'im.png')) as im:`
Turning on Ghostscript interpolation changes this test Seems acceptable. We don't normally use Ghostscript to downsample PDFs like is happening in this test. 2021-11-15 16:36:24 -08:00			`assert im.getpixel((0, 0)) > 240, "Expected white background"`
Add test to optimize if jbig2 is present 2018-07-02 23:49:11 -07:00

tests: tag tests that need pngquant, jbig2enc 2020-12-30 01:58:57 -08:00			`@needs_pngquant`
Abolish spoof_tesseract_noop 2020-06-01 03:06:40 -07:00			`def test_jpg_png_params(resources, outpdf):`
Add test to optimize if jbig2 is present 2018-07-02 23:49:11 -07:00			`check_ocrmypdf(`
Reformat with black 2018-12-30 01:27:49 -08:00			`resources / 'crom.png',`
			`outpdf,`
			`'--image-dpi',`
			`'200',`
			`'--optimize',`
			`'3',`
			`'--jpg-quality',`
			`'50',`
			`'--png-quality',`
			`'20',`
Abolish spoof_tesseract_noop 2020-06-01 03:06:40 -07:00			`'--plugin',`
			`'tests/plugins/tesseract_noop.py',`
optimize: only enable lossy JBIG2 for -O3 2018-10-03 00:38:58 -07:00			`)`


tests: tag tests that need pngquant, jbig2enc 2020-12-30 01:58:57 -08:00			`@needs_jbig2enc`
Change JBIG2 lossy mode to require --jbig2-lossy 2018-10-04 01:20:49 -07:00			`@pytest.mark.parametrize('lossy', [False, True])`
Abolish spoof_tesseract_noop 2020-06-01 03:06:40 -07:00			`def test_jbig2_lossy(lossy, resources, outpdf):`
Change JBIG2 lossy mode to require --jbig2-lossy 2018-10-04 01:20:49 -07:00			`args = [`
Reformat with black 2018-12-30 01:27:49 -08:00			`resources / 'ccitt.pdf',`
			`outpdf,`
			`'--image-dpi',`
			`'200',`
			`'--optimize',`
optimize: recognize and produce [/FlateDecode /DCTDecode] images 2022-02-08 00:38:08 -08:00			`'3',`
Reformat with black 2018-12-30 01:27:49 -08:00			`'--jpg-quality',`
			`'50',`
			`'--png-quality',`
			`'20',`
Abolish spoof_tesseract_noop 2020-06-01 03:06:40 -07:00			`'--plugin',`
			`'tests/plugins/tesseract_noop.py',`
Add support for adjusting jbig2 threshold 2023-09-17 14:44:40 -07:00			`'--jbig2-threshold',`
			`'0.7',`
Change JBIG2 lossy mode to require --jbig2-lossy 2018-10-04 01:20:49 -07:00			`]`
			`if lossy:`
			`args.append('--jbig2-lossy')`

Abolish spoof_tesseract_noop 2020-06-01 03:06:40 -07:00			`check_ocrmypdf(*args)`
optimize: only enable lossy JBIG2 for -O3 2018-10-03 00:38:58 -07:00
Tidy use of pikepdf.open without with 2023-04-15 20:17:44 -07:00			`with pikepdf.open(outpdf) as pdf:`
			`pim = pikepdf.PdfImage(next(iter(pdf.pages[0].images.values())))`
			`assert pim.filters[0] == '/JBIG2Decode'`
optimize: only enable lossy JBIG2 for -O3 2018-10-03 00:38:58 -07:00
Tidy use of pikepdf.open without with 2023-04-15 20:17:44 -07:00			`if lossy:`
			`assert '/JBIG2Globals' in pim.decode_parms[0]`
			`else:`
			`assert len(pim.decode_parms) == 0`
optimize: Reorganize so JBIG2 can be performed on images reduced to 1bpp Closes #297 2018-10-04 11:53:11 -07:00

tests: tag tests that need pngquant, jbig2enc 2020-12-30 01:58:57 -08:00			`@needs_pngquant`
			`@needs_jbig2enc`
Abolish spoof_tesseract_noop 2020-06-01 03:06:40 -07:00			`def test_flate_to_jbig2(resources, outdir):`
optimize: Reorganize so JBIG2 can be performed on images reduced to 1bpp Closes #297 2018-10-04 11:53:11 -07:00			`# This test requires an image that pngquant is capable of converting to`
			`# to 1bpp - so use an existing 1bpp image, convert up, confirm it can`
			`# convert down`
Use context managers to ensure Pillow images are closed 2019-09-03 17:19:12 -07:00			`with Image.open(fspath(resources / 'typewriter.png')) as im:`
			`assert im.mode in ('1', 'P')`
			`im = im.convert('L')`
			`im.save(fspath(outdir / 'type8.png'))`
optimize: Reorganize so JBIG2 can be performed on images reduced to 1bpp Closes #297 2018-10-04 11:53:11 -07:00
			`check_ocrmypdf(`
Reformat with black 2018-12-30 01:27:49 -08:00			`outdir / 'type8.png',`
			`outdir / 'out.pdf',`
			`'--image-dpi',`
			`'100',`
			`'--png-quality',`
optimize: use Decode to invert 1bpp PNGs for now 2019-03-03 17:50:12 -08:00			`'50',`
Reformat with black 2018-12-30 01:27:49 -08:00			`'--optimize',`
			`'3',`
Abolish spoof_tesseract_noop 2020-06-01 03:06:40 -07:00			`'--plugin',`
			`'tests/plugins/tesseract_noop.py',`
optimize: Reorganize so JBIG2 can be performed on images reduced to 1bpp Closes #297 2018-10-04 11:53:11 -07:00			`)`

Tidy use of pikepdf.open without with 2023-04-15 20:17:44 -07:00			`with pikepdf.open(outdir / 'out.pdf') as pdf:`
			`pim = pikepdf.PdfImage(next(iter(pdf.pages[0].images.values())))`
			`assert pim.filters[0] == '/JBIG2Decode'`
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00

tests: tag tests that need pngquant, jbig2enc 2020-12-30 01:58:57 -08:00			`@needs_pngquant`
Merge branch 'release/v10' into trialmerge 2020-06-09 15:12:40 -07:00			`def test_multiple_pngs(resources, outdir):`
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00			`with Path.open(outdir / 'in.pdf', 'wb') as inpdf:`
			`img2pdf.convert(`
			`fspath(resources / 'baiona_colormapped.png'),`
			`fspath(resources / 'baiona_gray.png'),`
			`outputstream=inpdf,`
Use better img2pdf settings where possible while supporting old versions Fixes #894 2022-01-14 11:55:54 -08:00			`**IMG2PDF_KWARGS,`
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00			`)`

Delinting 2021-04-07 02:09:45 -07:00			`def mockquant(input_file, output_file, *_args):`
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00			`with Image.open(input_file) as im:`
			`draw = ImageDraw.Draw(im)`
			`draw.rectangle((0, 0, im.width, im.height), fill=128)`
			`im.save(output_file)`

tests: assert that most patched functions are called We were not actually checking if functions we patched we called when expected. 2020-12-28 23:51:55 -08:00			`with patch('ocrmypdf.optimize.pngquant.quantize') as mock:`
			`mock.side_effect = mockquant`
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00			`check_ocrmypdf(`
			`outdir / 'in.pdf',`
			`outdir / 'out.pdf',`
			`'--optimize',`
			`'3',`
			`'--jobs',`
			`'1',`
			`'--use-threads',`
			`'--output-type',`
			`'pdf',`
Merge branch 'release/v10' into trialmerge 2020-06-09 15:12:40 -07:00			`'--plugin',`
			`'tests/plugins/tesseract_noop.py',`
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00			`)`
tests: assert that most patched functions are called We were not actually checking if functions we patched we called when expected. 2020-12-28 23:51:55 -08:00			`mock.assert_called()`
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00
Fix handling of pages that are restored to correct orientation with /Rotate Appears inversion of CTM was incorrect, introduced in commit 9898904 2024-02-12 01:32:26 -08:00			`with (`
			`pikepdf.open(outdir / 'in.pdf') as inpdf,`
			`pikepdf.open(outdir / 'out.pdf') as outpdf,`
			`):`
Fix issue where only first PNG-style image would be optimized 2020-04-25 03:50:11 -07:00			`for n in range(len(inpdf.pages)):`
			`inim = next(iter(inpdf.pages[n].images.values()))`
			`outim = next(iter(outpdf.pages[n].images.values()))`
			`assert len(outim.read_raw_bytes()) < len(inim.read_raw_bytes()), n`
tests: confirm that we produce pdf when optimization is off 2021-01-24 01:53:36 -08:00

			`def test_optimize_off(resources, outpdf):`
			`check_ocrmypdf(`
			`resources / 'trivial.pdf',`
			`outpdf,`
			`'--optimize=0',`
			`'--output-type',`
			`'pdf',`
			`'--plugin',`
			`'tests/plugins/tesseract_noop.py',`
			`)`
Exclude Group 3 images from optimization 2021-03-20 23:28:21 -07:00

Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`def test_group3(resources):`
Exclude Group 3 images from optimization 2021-03-20 23:28:21 -07:00			`with pikepdf.open(resources / 'ccitt.pdf') as pdf:`
			`im = pdf.pages[0].Resources.XObject['/Im1']`
			`assert (`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`opt.extract_image_filter(im, im.objgen[0]) is not None`
Exclude Group 3 images from optimization 2021-03-20 23:28:21 -07:00			`), "Group 4 should be allowed"`

			`im.DecodeParms['/K'] = 0`
			`assert (`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`opt.extract_image_filter(im, im.objgen[0]) is None`
Exclude Group 3 images from optimization 2021-03-20 23:28:21 -07:00			`), "Group 3 should be disallowed"`
Add test to confirm we can find formx optimizable images 2023-05-23 00:33:54 -07:00

Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`def test_find_formx(resources):`
Add test to confirm we can find formx optimizable images 2023-05-23 00:33:54 -07:00			`with pikepdf.open(resources / 'formxobject.pdf') as pdf:`
			`working, pagenos = opt._find_image_xrefs(pdf)`
			`assert len(working) == 1`
			`xref = next(iter(working))`
			`assert pagenos[xref] == 0`
optimize: better coverage 2023-10-17 02:41:40 -07:00

			`def test_extract_image_filter_with_pdf_image():`
			`image = MagicMock()`
			`image.Subtype = Name.Image`
			`image.Length = 200`
			`image.Width = 10`
			`image.Height = 10`
			`image.Filter = [Name.FlateDecode, Name.DCTDecode]`
			`pdf_image = PdfImage(image)`
			`image.BitsPerComponent = 8`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`assert extract_image_filter(image, None) == (`
optimize: better coverage 2023-10-17 02:41:40 -07:00			`pdf_image,`
			`pdf_image.filter_decodeparms[1],`
			`)`


			`def test_extract_image_filter_with_non_image():`
			`image = MagicMock()`
			`image.Subtype = Name.Form`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`assert extract_image_filter(image, None) is None`
optimize: better coverage 2023-10-17 02:41:40 -07:00

			`def test_extract_image_filter_with_small_stream_size():`
			`image = MagicMock()`
			`image.Subtype = Name.Image`
			`image.Length = 50`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`assert extract_image_filter(image, None) is None`
optimize: better coverage 2023-10-17 02:41:40 -07:00

			`def test_extract_image_filter_with_small_dimensions():`
			`image = MagicMock()`
			`image.Subtype = Name.Image`
			`image.Length = 200`
			`image.Width = 5`
			`image.Height = 5`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`assert extract_image_filter(image, None) is None`
optimize: better coverage 2023-10-17 02:41:40 -07:00

			`def test_extract_image_filter_with_multiple_compression_filters():`
			`image = MagicMock()`
			`image.Subtype = Name.Image`
			`image.Length = 200`
			`image.Width = 10`
			`image.Height = 10`
			`image.BitsPerComponent = 8`
			`image.Filter = [Name.ASCII85Decode, Name.FlateDecode, Name.DCTDecode]`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`assert extract_image_filter(image, None) is None`
optimize: better coverage 2023-10-17 02:41:40 -07:00

			`def test_extract_image_filter_with_wide_gamut_image():`
			`image = MagicMock()`
			`image.Subtype = Name.Image`
			`image.Length = 200`
			`image.Width = 10`
			`image.Height = 10`
			`image.BitsPerComponent = 16`
			`image.Filter = Name.FlateDecode`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`assert extract_image_filter(image, None) is None`
optimize: better coverage 2023-10-17 02:41:40 -07:00

			`def test_extract_image_filter_with_jpeg2000_image():`
			`im = Image.new('RGB', (10, 10))`
			`bio = BytesIO()`
			`im.save(bio, format='JPEG2000')`
			`pdf = pikepdf.new()`
			`stream = pdf.make_stream(`
			`data=bio.getvalue(),`
			`Subtype=Name.Image,`
			`Length=200,`
			`Width=10,`
			`Height=10,`
			`BitsPerComponent=8,`
			`Filter=Name.JPXDecode,`
			`)`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`assert extract_image_filter(stream, None) is None`
optimize: better coverage 2023-10-17 02:41:40 -07:00

			`def test_extract_image_filter_with_ccitt_group_3_image():`
			`image = MagicMock()`
			`image.Subtype = Name.Image`
			`image.Length = 200`
			`image.Width = 10`
			`image.Height = 10`
			`image.BitsPerComponent = 1`
			`image.Filter = Name.CCITTFaxDecode`
			`image.DecodeParms = Array([Dictionary(K=1)])`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`assert extract_image_filter(image, None) is None`
optimize: better coverage 2023-10-17 02:41:40 -07:00

			`# Triggers pikepdf bug`
			`# def test_extract_image_filter_with_decode_table():`
			`# image = MagicMock()`
			`# image.Subtype = Name.Image`
			`# image.Length = 200`
			`# image.Width = 10`
			`# image.Height = 10`
			`# image.Filter = Name.FlateDecode`
			`# image.BitsPerComponent = 8`
			`# image.ColorSpace = Name.DeviceGray`
			`# image.Decode = [42, 0]`
Simplify function signature of extract_image_filter 2023-10-17 03:06:13 -07:00			`# assert extract_image_filter(image, None) is None`