Add argument to override digital signatures

This commit is contained in:
James R. Barlow 2023-08-12 01:31:36 -07:00
parent 45added738
commit a6ce35b13a
No known key found for this signature in database
GPG Key ID: E54A300D567E1260
6 changed files with 24 additions and 3 deletions

View File

@ -31,6 +31,7 @@ __ocrmypdf_arguments()
--force-ocr (OCR documents that already have printable text)
--skip-text (skip OCR on any pages that already contain text)
--redo-ocr (redo OCR on any pages that seem to have OCR already)
--invalidate-digital-signatures (remove digital signatures from PDF)
--skip-big (skip OCR on pages larger than this many MPixels)
--optimize (select optimization level)
--jpeg-quality (JPEG quality [0..100])

View File

@ -16,6 +16,7 @@ complete -c ocrmypdf -l remove-vectors -d "don't send vector objects to OCR"
complete -c ocrmypdf -s f -l force-ocr -d "OCR documents that already have printable text"
complete -c ocrmypdf -s s -l skip-ocr -d "skip OCR on pages that text, otherwise try OCR"
complete -c ocrmypdf -l redo-ocr -d "redo OCR on any pages that seem to have OCR already"
complete -c ocrmypdf -l invalidate-digital-signatures -d "invalidate digital signatures and allow OCR to proceed"
complete -c ocrmypdf -s k -l keep-temporary-files -d "keep temporary files (debug)"

View File

@ -197,9 +197,12 @@ def validate_pdfinfo_options(context: PdfContext) -> None:
"This PDF contains dynamic XFA forms created by Adobe LiveCycle "
"Designer and can only be read by Adobe Acrobat or Adobe Reader."
)
if pdfinfo.has_acroform:
if pdfinfo.has_signature:
if pdfinfo.has_signature:
if options.invalidate_digital_signatures:
log.warning("All digital signatures will be invalidated")
else:
raise DigitalSignatureError()
if pdfinfo.has_acroform:
if options.redo_ocr:
raise InputFileError(
"This PDF has a user fillable form. --redo-ocr is not "

View File

@ -359,6 +359,13 @@ Online documentation is located at:
help="Skip OCR on pages larger than the specified amount of megapixels, "
"but include skipped pages in final output",
)
ocrsettings.add_argument(
'--invalidate-digital-signatures',
action='store_true',
help="Normally, OCRmyPDF will refuse to OCR a PDF that has a digital "
"signature. This option allows OCR to proceed, but the digital signature "
"will be invalidated.",
)
advanced = parser.add_argument_group(
"Advanced", "Advanced options to control OCRmyPDF"

View File

@ -954,6 +954,10 @@ DEFAULT_EXECUTOR = SerialExecutor()
class PdfInfo:
"""Get summary information about a PDF."""
_has_acroform: bool = False
_has_signature: bool = False
_needs_rendering: bool = False
def __init__(
self,
infile,
@ -982,7 +986,6 @@ class PdfInfo:
detailed_analysis=detailed_analysis,
)
self._needs_rendering = pdf.Root.get(Name.NeedsRendering, False)
self._has_acroform = False
if Name.AcroForm in pdf.Root:
if len(pdf.Root.AcroForm.get(Name.Fields, [])) > 0:
self._has_acroform = True

View File

@ -46,3 +46,9 @@ def digitally_signed(acroform, outdir):
def test_digital_signature(digitally_signed, no_outpdf):
with pytest.raises(ocrmypdf.exceptions.DigitalSignatureError):
check_ocrmypdf(digitally_signed, no_outpdf)
def test_digital_signature_invalidate(digitally_signed, no_outpdf):
check_ocrmypdf(
digitally_signed, no_outpdf, '--force-ocr', '--invalidate-digital-signatures'
)