mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2025-12-29 16:10:06 +00:00
Fix #156 - NoneType has no ‘getObject’ for pages with no /Contents
This commit is contained in:
parent
b9b12e2879
commit
aa859a4139
@ -430,7 +430,7 @@ def _find_images(pdf, container, shorthand=None):
|
||||
|
||||
"""
|
||||
|
||||
if container.get('/Type') == '/Page':
|
||||
if container.get('/Type') == '/Page' and '/Contents' in container:
|
||||
# For a /Page the content stream is attached to the page's /Contents
|
||||
page = container
|
||||
contentstream = pypdf.pdf.ContentStream(page.getContents(), pdf)
|
||||
|
||||
@ -85,6 +85,9 @@ under the terms of the license in LICENSE.rst.
|
||||
* - overlay.pdf
|
||||
- @maxandersen
|
||||
- PDF file generated by PDFPen pro that triggered content stream parse errors
|
||||
* - no_conentes.pdf
|
||||
- @jbarlow83
|
||||
- synthetic PDF with a blank page that has no /Contents entry
|
||||
|
||||
Assemblies
|
||||
==========
|
||||
|
||||
21
tests/resources/no_contents.pdf
Normal file
21
tests/resources/no_contents.pdf
Normal file
@ -0,0 +1,21 @@
|
||||
%PDF-1.3
|
||||
%¿÷¢þ
|
||||
1 0 obj
|
||||
<< /Pages 2 0 R /Type /Catalog >>
|
||||
endobj
|
||||
2 0 obj
|
||||
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
|
||||
endobj
|
||||
3 0 obj
|
||||
<< /MediaBox [ 0 0 720 720 ] /Parent 2 0 R /Resources << >> /Type /Page >>
|
||||
endobj
|
||||
xref
|
||||
0 4
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000064 00000 n
|
||||
0000000123 00000 n
|
||||
trailer << /Root 1 0 R /Size 4 /ID [<52bba3c78160d0c6e851b59110e5d076><52bba3c78160d0c6e851b59110e5d076>] >>
|
||||
startxref
|
||||
213
|
||||
%%EOF
|
||||
@ -870,4 +870,8 @@ def test_gs_raster_failure(spoof_no_tess_gs_raster_fail, resources, outpdf):
|
||||
env=spoof_no_tess_gs_raster_fail)
|
||||
print(err)
|
||||
assert p.returncode == ExitCode.child_process_error
|
||||
|
||||
|
||||
def test_no_contents(spoof_tesseract_noop, resources, outpdf):
|
||||
check_ocrmypdf(resources / 'no_contents.pdf', outpdf, '--force-ocr',
|
||||
env=spoof_tesseract_noop)
|
||||
@ -108,3 +108,11 @@ def test_form_xobject(resources):
|
||||
pdfinfo = pageinfo.pdf_get_all_pageinfo(str(filename))
|
||||
pdfimage = pdfinfo[0]['images'][0]
|
||||
assert pdfimage['width'] == 50
|
||||
|
||||
|
||||
def test_no_contents(resources):
|
||||
filename = resources / 'no_contents.pdf'
|
||||
|
||||
pdfinfo = pageinfo.pdf_get_all_pageinfo(str(filename))
|
||||
assert len(pdfinfo[0]['images']) == 0
|
||||
assert pdfinfo[0]['has_text'] == False
|
||||
Loading…
x
Reference in New Issue
Block a user