Skip optimizing images with pre-blended soft masks

Fixes issue [Bug]: Optimized pdf not rendering with Quartz / Core Graphics #1536
This commit is contained in:
James R. Barlow 2025-06-12 23:58:43 -07:00
parent 6f6448f286
commit 3beabf55e7
No known key found for this signature in database
GPG Key ID: E54A300D567E1260
2 changed files with 39 additions and 9 deletions

View File

@ -126,6 +126,13 @@ def extract_image_filter(
if Name.Decode in image: if Name.Decode in image:
log.debug(f"xref {xref}: skipping image with Decode table") log.debug(f"xref {xref}: skipping image with Decode table")
return None # Don't mess with custom Decode tables return None # Don't mess with custom Decode tables
if image.get(Name.SMask, Dictionary()).get(Name.Matte, None) is not None:
# https://github.com/ocrmypdf/OCRmyPDF/issues/1536
# Do not attempt to optimize images that have a SMask with a Matte.
# That means alpha channel pre-blending is used, and we're not prepared
# to deal with the complexities of that.
log.debug(f"xref {xref}: skipping image whose SMask has Matte")
return None
return pim, filtdp return pim, filtdp

View File

@ -6,7 +6,7 @@ from __future__ import annotations
from io import BytesIO from io import BytesIO
from os import fspath from os import fspath
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, patch from unittest.mock import patch
import img2pdf import img2pdf
import pikepdf import pikepdf
@ -220,7 +220,7 @@ def test_find_formx(resources):
def test_extract_image_filter_with_pdf_image(): def test_extract_image_filter_with_pdf_image():
image = MagicMock() image = Dictionary()
image.Subtype = Name.Image image.Subtype = Name.Image
image.Length = 200 image.Length = 200
image.Width = 10 image.Width = 10
@ -235,20 +235,20 @@ def test_extract_image_filter_with_pdf_image():
def test_extract_image_filter_with_non_image(): def test_extract_image_filter_with_non_image():
image = MagicMock() image = Dictionary()
image.Subtype = Name.Form image.Subtype = Name.Form
assert extract_image_filter(image, None) is None assert extract_image_filter(image, None) is None
def test_extract_image_filter_with_small_stream_size(): def test_extract_image_filter_with_small_stream_size():
image = MagicMock() image = Dictionary()
image.Subtype = Name.Image image.Subtype = Name.Image
image.Length = 50 image.Length = 50
assert extract_image_filter(image, None) is None assert extract_image_filter(image, None) is None
def test_extract_image_filter_with_small_dimensions(): def test_extract_image_filter_with_small_dimensions():
image = MagicMock() image = Dictionary()
image.Subtype = Name.Image image.Subtype = Name.Image
image.Length = 200 image.Length = 200
image.Width = 5 image.Width = 5
@ -257,7 +257,7 @@ def test_extract_image_filter_with_small_dimensions():
def test_extract_image_filter_with_multiple_compression_filters(): def test_extract_image_filter_with_multiple_compression_filters():
image = MagicMock() image = Dictionary()
image.Subtype = Name.Image image.Subtype = Name.Image
image.Length = 200 image.Length = 200
image.Width = 10 image.Width = 10
@ -268,7 +268,7 @@ def test_extract_image_filter_with_multiple_compression_filters():
def test_extract_image_filter_with_wide_gamut_image(): def test_extract_image_filter_with_wide_gamut_image():
image = MagicMock() image = Dictionary()
image.Subtype = Name.Image image.Subtype = Name.Image
image.Length = 200 image.Length = 200
image.Width = 10 image.Width = 10
@ -296,7 +296,7 @@ def test_extract_image_filter_with_jpeg2000_image():
def test_extract_image_filter_with_ccitt_group_3_image(): def test_extract_image_filter_with_ccitt_group_3_image():
image = MagicMock() image = Dictionary()
image.Subtype = Name.Image image.Subtype = Name.Image
image.Length = 200 image.Length = 200
image.Width = 10 image.Width = 10
@ -309,7 +309,7 @@ def test_extract_image_filter_with_ccitt_group_3_image():
# Triggers pikepdf bug # Triggers pikepdf bug
# def test_extract_image_filter_with_decode_table(): # def test_extract_image_filter_with_decode_table():
# image = MagicMock() # image = Dictionary()
# image.Subtype = Name.Image # image.Subtype = Name.Image
# image.Length = 200 # image.Length = 200
# image.Width = 10 # image.Width = 10
@ -319,3 +319,26 @@ def test_extract_image_filter_with_ccitt_group_3_image():
# image.ColorSpace = Name.DeviceGray # image.ColorSpace = Name.DeviceGray
# image.Decode = [42, 0] # image.Decode = [42, 0]
# assert extract_image_filter(image, None) is None # assert extract_image_filter(image, None) is None
def test_extract_image_filter_with_rgb_smask_matte():
image = Dictionary()
image.Subtype = Name.Image
image.Length = 200
image.Width = 10
image.Height = 10
image.Filter = Name.FlateDecode
image.BitsPerComponent = 8
image.ColorSpace = Name.DeviceRGB
image.SMask = Dictionary(
Type=Name.Image,
Subtype=Name.Image,
Length=200,
Width=10,
Height=10,
Filter=Name.FlateDecode,
BitsPerComponent=8,
ColorSpace=Name.DeviceGray,
Matte=Array([1, 2, 3]),
)
assert extract_image_filter(image, None) is None