diff --git a/src/ocrmypdf/optimize.py b/src/ocrmypdf/optimize.py index f67b6e4d..8d9322f6 100644 --- a/src/ocrmypdf/optimize.py +++ b/src/ocrmypdf/optimize.py @@ -126,6 +126,13 @@ def extract_image_filter( if Name.Decode in image: log.debug(f"xref {xref}: skipping image with Decode table") return None # Don't mess with custom Decode tables + if image.get(Name.SMask, Dictionary()).get(Name.Matte, None) is not None: + # https://github.com/ocrmypdf/OCRmyPDF/issues/1536 + # Do not attempt to optimize images that have a SMask with a Matte. + # That means alpha channel pre-blending is used, and we're not prepared + # to deal with the complexities of that. + log.debug(f"xref {xref}: skipping image whose SMask has Matte") + return None return pim, filtdp diff --git a/tests/test_optimize.py b/tests/test_optimize.py index 4ba136bf..afdf4a0e 100644 --- a/tests/test_optimize.py +++ b/tests/test_optimize.py @@ -6,7 +6,7 @@ from __future__ import annotations from io import BytesIO from os import fspath from pathlib import Path -from unittest.mock import MagicMock, patch +from unittest.mock import patch import img2pdf import pikepdf @@ -220,7 +220,7 @@ def test_find_formx(resources): def test_extract_image_filter_with_pdf_image(): - image = MagicMock() + image = Dictionary() image.Subtype = Name.Image image.Length = 200 image.Width = 10 @@ -235,20 +235,20 @@ def test_extract_image_filter_with_pdf_image(): def test_extract_image_filter_with_non_image(): - image = MagicMock() + image = Dictionary() image.Subtype = Name.Form assert extract_image_filter(image, None) is None def test_extract_image_filter_with_small_stream_size(): - image = MagicMock() + image = Dictionary() image.Subtype = Name.Image image.Length = 50 assert extract_image_filter(image, None) is None def test_extract_image_filter_with_small_dimensions(): - image = MagicMock() + image = Dictionary() image.Subtype = Name.Image image.Length = 200 image.Width = 5 @@ -257,7 +257,7 @@ def test_extract_image_filter_with_small_dimensions(): def test_extract_image_filter_with_multiple_compression_filters(): - image = MagicMock() + image = Dictionary() image.Subtype = Name.Image image.Length = 200 image.Width = 10 @@ -268,7 +268,7 @@ def test_extract_image_filter_with_multiple_compression_filters(): def test_extract_image_filter_with_wide_gamut_image(): - image = MagicMock() + image = Dictionary() image.Subtype = Name.Image image.Length = 200 image.Width = 10 @@ -296,7 +296,7 @@ def test_extract_image_filter_with_jpeg2000_image(): def test_extract_image_filter_with_ccitt_group_3_image(): - image = MagicMock() + image = Dictionary() image.Subtype = Name.Image image.Length = 200 image.Width = 10 @@ -309,7 +309,7 @@ def test_extract_image_filter_with_ccitt_group_3_image(): # Triggers pikepdf bug # def test_extract_image_filter_with_decode_table(): -# image = MagicMock() +# image = Dictionary() # image.Subtype = Name.Image # image.Length = 200 # image.Width = 10 @@ -319,3 +319,26 @@ def test_extract_image_filter_with_ccitt_group_3_image(): # image.ColorSpace = Name.DeviceGray # image.Decode = [42, 0] # assert extract_image_filter(image, None) is None + + +def test_extract_image_filter_with_rgb_smask_matte(): + image = Dictionary() + image.Subtype = Name.Image + image.Length = 200 + image.Width = 10 + image.Height = 10 + image.Filter = Name.FlateDecode + image.BitsPerComponent = 8 + image.ColorSpace = Name.DeviceRGB + image.SMask = Dictionary( + Type=Name.Image, + Subtype=Name.Image, + Length=200, + Width=10, + Height=10, + Filter=Name.FlateDecode, + BitsPerComponent=8, + ColorSpace=Name.DeviceGray, + Matte=Array([1, 2, 3]), + ) + assert extract_image_filter(image, None) is None