mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2025-06-26 23:49:59 +00:00
Raise exception if resulting PDF might appear blank in a known in some PDF viewers
Fixes #1187
This commit is contained in:
parent
e7fa97731f
commit
a596ccf844
@ -70,6 +70,7 @@ Files: tests/resources/linn.png
|
||||
tests/resources/ccitt.pdf
|
||||
tests/resources/cardinal.pdf
|
||||
tests/resources/jbig2.pdf
|
||||
tests/resources/jbig2_baddevicen.pdf
|
||||
tests/resources/skew.pdf
|
||||
tests/resources/rotated_skew.pdf
|
||||
tests/resources/poster.pdf
|
||||
|
@ -17,7 +17,7 @@ from subprocess import PIPE, CalledProcessError
|
||||
from packaging.version import Version
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
|
||||
from ocrmypdf.exceptions import SubprocessOutputError
|
||||
from ocrmypdf.exceptions import ColorConversionNeededError, SubprocessOutputError
|
||||
from ocrmypdf.helpers import Resolution
|
||||
from ocrmypdf.subprocess import get_version, run, run_polling_stderr
|
||||
|
||||
@ -64,10 +64,6 @@ class DuplicateFilter(logging.Filter):
|
||||
log.addFilter(DuplicateFilter(log))
|
||||
|
||||
|
||||
# Ghostscript executable - gswin32c is not supported
|
||||
GS = 'gswin64c' if os.name == 'nt' else 'gs'
|
||||
|
||||
|
||||
def version() -> Version:
|
||||
return Version(get_version(GS))
|
||||
|
||||
@ -77,6 +73,15 @@ def _gs_error_reported(stream) -> bool:
|
||||
return bool(match)
|
||||
|
||||
|
||||
def _gs_devicen_reported(stream) -> bool:
|
||||
match = re.search(
|
||||
r'DeviceN.*inappropriate alternate',
|
||||
stream,
|
||||
flags=re.IGNORECASE | re.MULTILINE,
|
||||
)
|
||||
return bool(match)
|
||||
|
||||
|
||||
def rasterize_pdf(
|
||||
input_file: os.PathLike,
|
||||
output_file: os.PathLike,
|
||||
@ -250,7 +255,6 @@ def generate_pdfa(
|
||||
]
|
||||
)
|
||||
args_gs.extend(fspath(s) for s in pdf_pages) # Stringify Path objs
|
||||
|
||||
try:
|
||||
with Path(output_file).open('wb') as output:
|
||||
p = run_polling_stderr(
|
||||
@ -279,3 +283,5 @@ def generate_pdfa(
|
||||
# the **** pattern to split the stderr into parts.
|
||||
for part in stderr.split('****'):
|
||||
log.error(part)
|
||||
if _gs_devicen_reported(stderr):
|
||||
raise ColorConversionNeededError()
|
||||
|
@ -137,3 +137,16 @@ class TaggedPDFError(InputFileError):
|
||||
override this error.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
class ColorConversionNeededError(BadArgsError):
|
||||
"""PDF needs color conversion."""
|
||||
|
||||
message = dedent(
|
||||
"""\
|
||||
The input PDF has an unusual color space. Use
|
||||
--color-conversion-strategy to convert to a common color space
|
||||
such as RGB, or use --output-type pdf to skip PDF/A conversion
|
||||
and retain the original color space.
|
||||
"""
|
||||
)
|
||||
|
BIN
tests/resources/jbig2_baddevicen.pdf
Normal file
BIN
tests/resources/jbig2_baddevicen.pdf
Normal file
Binary file not shown.
@ -13,7 +13,7 @@ import pytest
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
|
||||
from ocrmypdf._exec.ghostscript import DuplicateFilter, rasterize_pdf
|
||||
from ocrmypdf.exceptions import ExitCode
|
||||
from ocrmypdf.exceptions import ColorConversionNeededError, ExitCode
|
||||
from ocrmypdf.helpers import Resolution
|
||||
|
||||
from .conftest import check_ocrmypdf, run_ocrmypdf_api
|
||||
@ -126,6 +126,16 @@ def test_ghostscript_feature_elision(resources, outpdf):
|
||||
)
|
||||
|
||||
|
||||
def test_ghostscript_mandatory_color_conversion(resources, outpdf):
|
||||
with pytest.raises(ColorConversionNeededError):
|
||||
check_ocrmypdf(
|
||||
resources / 'jbig2_baddevicen.pdf',
|
||||
outpdf,
|
||||
'--plugin',
|
||||
'tests/plugins/tesseract_noop.py',
|
||||
)
|
||||
|
||||
|
||||
def test_rasterize_pdf_errors(resources, no_outpdf, caplog):
|
||||
with patch('ocrmypdf._exec.ghostscript.run') as mock:
|
||||
# ghostscript can produce
|
||||
|
Loading…
x
Reference in New Issue
Block a user