OCRmyPDF/misc/completion/ocrmypdf.bash

# ocrmypdf completion                                     -*- shell-script -*-

# Copyright 2019, 2021 Frank Pille
# Copyright 2020 Alex Willner
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

set -o errexit

__ocrmypdf_arguments()
{
    local arguments="--help                   (show help message)
--language               (language(s) of the file to be OCRed)
--image-dpi              (assume this DPI if input image DPI is unknown)
--output-type            (select PDF output options)
--sidecar                (write OCR to text file)
--version                (print program version and exit)
--jobs                   (how many worker processes to use)
--quiet                  (suppress INFO messages)
--verbose                (set verbosity level)
--title                  (set metadata)
--author                 (set metadata)
--subject                (set metadata)
--keywords               (set metadata)
--rotate-pages           (rotate pages to correct orientation)
--remove-background      (attempt to remove background from pages)
--deskew                 (fix small horizontal alignment skew)
--clean                  (clean document images before OCR)
--clean-final            (clean document images and keep result)
--unpaper-args           (a quoted string of arguments to pass to unpaper)
--oversample             (oversample images to this DPI)
--remove-vectors         (don\'t send vector objects to OCR)
--threshold              (threshold images before OCR)
--force-ocr              (OCR documents that already have printable text)
--skip-text              (skip OCR on any pages that already contain text)
--redo-ocr               (redo OCR on any pages that seem to have OCR already)
--skip-big               (skip OCR on pages larger than this many MPixels)
--optimize               (select optimization level)
--jpeg-quality           (JPEG quality [0..100])
--png-quality            (PNG quality [0..100])
--jbig2-lossy            (enable lossy JBIG2 (see docs))
--pages                  (apply OCR to only the specified pages)
--max-image-mpixels      (image decompression bomb threshold)
--pdf-renderer           (select PDF renderer options)
--rotate-pages-threshold (page rotation confidence)
--pdfa-image-compression (set PDF/A image compression options)
--fast-web-view          (if file size if above this amount in MB linearize PDF)
--plugin                 (name of plugin to import)
--keep-temporary-files   (keep temporary files (debug)
--tesseract-config       (set custom tesseract config file)
--tesseract-pagesegmode  (set tesseract --psm)
--tesseract-oem          (set tesseract --oem)
--tesseract-thresholding (set tesseract image thresholding)
--tesseract-timeout      (maximum number of seconds to wait for OCR)
--user-words             (specify location of user words file)
--user-patterns          (specify location of user patterns file)
--no-progress-bar        (disable the progress bar)
"

    COMPREPLY=( $( compgen -W "$arguments" -- "$cur") )

    # Remove description if only one completion exists
    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
        COMPREPLY=( ${COMPREPLY[0]%% *} )
    fi
}

__ocrmypdf_output-type()
{
    local choices="pdfa   (output a PDF/A (default))
pdf    (output a standard PDF)
pdfa-1 (output a PDF/A-1b)
pdfa-2 (output a PDF/A-2b)
pdfa-3 (output a PDF/A-3b)
none   (do not produce an output PDF (for example, if you only care about --sidecar))"

    COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

    # Remove description if only one completion exists
    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
        COMPREPLY=( ${COMPREPLY[0]%% *} )
    fi
}

__ocrmypdf_verbose()
{
    local choices="0  (standard output messages)
1  (troubleshooting output messages)
2  (debugging output messages)"

    COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

    # Remove description if only one completion exists
    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
        COMPREPLY=( ${COMPREPLY[0]%% *} )
    fi
}

__ocrmypdf_optimize()
{
    local choices="0  (do not optimize)
1  (do safe, lossless optimizations (default))
2  (do some lossy optimizations)
3  (do aggressive lossy optimizations (including lossy JBIG2))"

    COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

    # Remove description if only one completion exists
    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
        COMPREPLY=( ${COMPREPLY[0]%% *} )
    fi
}

__ocrmypdf_pdf-renderer()
{
    local choices="auto      (auto select PDF renderer)
hocr      (use hOCR renderer)
hocrdebug (uses hOCR renderer in debug mode, showing recognized text)
sandwich  (use sandwich renderer)"

    COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

    # Remove description if only one completion exists
    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
        COMPREPLY=( ${COMPREPLY[0]%% *} )
    fi
}

__ocrmypdf_pdfa-image-compression()
{
    local choices="auto     (let Ghostscript decide how to compress images)
jpeg     (convert color and grayscale images to JPEG)
lossless (convert color and grayscale images to lossless (PNG))"

    COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

    # Remove description if only one completion exists
    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
        COMPREPLY=( ${COMPREPLY[0]%% *} )
    fi
}

__ocrmypdf_tesseract-pagesegmode()
{
    local choices="0  (orientation and script detection (OSD) only)
1  (automatic page segmentation with OSD)
2  (automatic page segmentation, but no OSD, or OCR)
3  (fully automatic page segmentation, but no OSD (default))
4  (assume a single column of text of variable sizes)
5  (assume a single uniform block of vertically aligned text)
6  (assume a single uniform block of text)
7  (treat the image as a single text line)
8  (treat the image as a single word)
9  (treat the image as a single word in a circle)
10 (treat the image as a single character)
11 (sparse text - find as much text as possible in no particular order)
12 (sparse text with OSD)
13 (raw line - treat the image as a single text line)"

    COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

    # Remove description if only one completion exists
    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
        COMPREPLY=( ${COMPREPLY[0]%% *} )
    fi
}

__ocrmypdf_tesseract-oem()
{
    local choices="0 (legacy engine only)
1 (neural nets LSTM engine only)
2 (legacy + LSTM engines)
3 (default, based on what is available)"

    COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

    # Remove description if only one completion exists
    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
        COMPREPLY=( ${COMPREPLY[0]%% *} )
    fi
}

__ocrmypdf_tesseract-thresholding()
{
    local choices="auto          (let OCRmyPDF pick thresholding - current always uses otsu)
otsu          (use hOCR renderer)
adaptive-otsu (use adaptive Otsu thresholding)
sauvola       (use Sauvola thresholding)"

    COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
    # Remove description if only one completion exists
    if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
        COMPREPLY=( ${COMPREPLY[0]%% *} )
    fi
}


__ocrmypdf_check_previous()
{
    case $prev in
        -h|--help|--version)
            return 0
            ;;
        -l|--language)
            COMPREPLY=$( command tesseract --list-langs 2>/dev/null )
            COMPREPLY=( $( compgen -W '${COMPREPLY[@]##*:}' -- "$cur" ) )
            return 0
            ;;
        --output-type)
            __ocrmypdf_output-type
            return 0
            ;;
        -j|--jobs)
            COMPREPLY=( $( compgen -W '{1..'$( _ncpus )'}' -- "$cur" ) )
            return 0
            ;;
        -v|--verbose)
            __ocrmypdf_verbose
            return 0
            ;;
        -O|--optimize)
            __ocrmypdf_optimize
            return 0
            ;;
        --pdf-renderer)
            __ocrmypdf_pdf-renderer
            return 0
            ;;
        --pdfa-image-compression)
            __ocrmypdf_pdfa-image-compression
            return 0
            ;;
        --tesseract-pagesegmode)
            __ocrmypdf_tesseract-pagesegmode
            return 0
            ;;
        --tesseract-oem)
            __ocrmypdf_tesseract-oem
            return 0
            ;;
        --tesseract-thresholding)
            __ocrmypdf_tesseract-thresholding
            return 0
            ;;

        --title|--author|--subject|--keywords|--unpaper-args|--pages|--plugin|\
        --jpeg-quality|--png-quality|--image-dpi|--oversample|--skip-big|--max-image-mpixels|\
        --tesseract-timeout|--rotate-pages-threshold|--fast-web-view)
            # argument required but no completions available
            return 0
            ;;
        --tesseract-config|--user-words|--user-patterns|--sidecar)
            _filedir
            return 0
            ;;
    esac

    return 1
}

_ocrmypdf()
{
    local OLDIFS="$IFS"
    local IFS=$'\n'

    local cur prev

    # Homebrew on Macs have version 1.3 of bash-completion which doesn't include - see #502
    if declare -F _init_completion >/dev/null 2>&1; then
      _init_completion  || return
    else
        COMPREPLY=()
        _get_comp_words_by_ref cur prev
    fi

    if __ocrmypdf_check_previous -ne 0; then
        return
    fi

    if [[ "$cur" == -* ]]; then
        __ocrmypdf_arguments
    else
        _filedir
    fi

    IFS="$OLDIFS"

    return
} &&
complete -F _ocrmypdf ocrmypdf

set +o errexit

# ex: filetype=sh