Add argument to override digital signatures

2025-06-26 23:49:59 +00:00 · 2023-08-12 01:31:36 -07:00 · 2023-08-12 01:31:36 -07:00 · a6ce35b13a
commit a6ce35b13a
parent 45added738
6 changed files with 24 additions and 3 deletions
--- a/misc/completion/ocrmypdf.bash
+++ b/misc/completion/ocrmypdf.bash
@ -31,6 +31,7 @@ __ocrmypdf_arguments()
 --force-ocr              (OCR documents that already have printable text)
 --skip-text              (skip OCR on any pages that already contain text)
 --redo-ocr               (redo OCR on any pages that seem to have OCR already)
+--invalidate-digital-signatures (remove digital signatures from PDF)
 --skip-big               (skip OCR on pages larger than this many MPixels)
 --optimize               (select optimization level)
 --jpeg-quality           (JPEG quality [0..100])
--- a/misc/completion/ocrmypdf.fish
+++ b/misc/completion/ocrmypdf.fish
@ -16,6 +16,7 @@ complete -c ocrmypdf -l remove-vectors -d "don't send vector objects to OCR"
 complete -c ocrmypdf -s f -l force-ocr -d "OCR documents that already have printable text"
 complete -c ocrmypdf -s s -l skip-ocr -d "skip OCR on pages that text, otherwise try OCR"
 complete -c ocrmypdf -l redo-ocr -d "redo OCR on any pages that seem to have OCR already"
+complete -c ocrmypdf -l invalidate-digital-signatures -d "invalidate digital signatures and allow OCR to proceed"

 complete -c ocrmypdf -s k -l keep-temporary-files -d "keep temporary files (debug)"

--- a/src/ocrmypdf/_pipeline.py
+++ b/src/ocrmypdf/_pipeline.py
@ -197,9 +197,12 @@ def validate_pdfinfo_options(context: PdfContext) -> None:
            "This PDF contains dynamic XFA forms created by Adobe LiveCycle "
            "Designer and can only be read by Adobe Acrobat or Adobe Reader."
        )
-    if pdfinfo.has_acroform:
-        if pdfinfo.has_signature:
+    if pdfinfo.has_signature:
+        if options.invalidate_digital_signatures:
+            log.warning("All digital signatures will be invalidated")
+        else:
            raise DigitalSignatureError()
+    if pdfinfo.has_acroform:
        if options.redo_ocr:
            raise InputFileError(
                "This PDF has a user fillable form. --redo-ocr is not "
--- a/src/ocrmypdf/cli.py
+++ b/src/ocrmypdf/cli.py
@ -359,6 +359,13 @@ Online documentation is located at:
        help="Skip OCR on pages larger than the specified amount of megapixels, "
        "but include skipped pages in final output",
    )
+    ocrsettings.add_argument(
+        '--invalidate-digital-signatures',
+        action='store_true',
+        help="Normally, OCRmyPDF will refuse to OCR a PDF that has a digital "
+        "signature. This option allows OCR to proceed, but the digital signature "
+        "will be invalidated.",
+    )

    advanced = parser.add_argument_group(
        "Advanced", "Advanced options to control OCRmyPDF"
--- a/src/ocrmypdf/pdfinfo/info.py
+++ b/src/ocrmypdf/pdfinfo/info.py
@ -954,6 +954,10 @@ DEFAULT_EXECUTOR = SerialExecutor()
 class PdfInfo:
    """Get summary information about a PDF."""

+    _has_acroform: bool = False
+    _has_signature: bool = False
+    _needs_rendering: bool = False
+
    def __init__(
        self,
        infile,
@ -982,7 +986,6 @@ class PdfInfo:
                detailed_analysis=detailed_analysis,
            )
            self._needs_rendering = pdf.Root.get(Name.NeedsRendering, False)
-            self._has_acroform = False
            if Name.AcroForm in pdf.Root:
                if len(pdf.Root.AcroForm.get(Name.Fields, [])) > 0:
                    self._has_acroform = True
--- a/tests/test_acroform.py
+++ b/tests/test_acroform.py
@ -46,3 +46,9 @@ def digitally_signed(acroform, outdir):
 def test_digital_signature(digitally_signed, no_outpdf):
    with pytest.raises(ocrmypdf.exceptions.DigitalSignatureError):
        check_ocrmypdf(digitally_signed, no_outpdf)
+
+
+def test_digital_signature_invalidate(digitally_signed, no_outpdf):
+    check_ocrmypdf(
+        digitally_signed, no_outpdf, '--force-ocr', '--invalidate-digital-signatures'
+    )