mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2026-01-20 11:09:03 +00:00
Squashed commit of the following:
commit 974de2e8ccad7fd34694f2c3a7a17c64bb52cdab
Merge: a8d7f969 ee04aa72
Author: James R. Barlow <james@purplerock.ca>
Date: Sat Dec 4 20:22:50 2021 -0800
Merge branch 'update_bash-completion' of git://github.com/FPille/OCRmyPDF into FPille-update_bash-completion
commit ee04aa722504272891d8c74171f1de9bc954ca09
Author: FPille <f.pille@gmail.com>
Date: Thu Oct 14 11:09:23 2021 +0200
update
commit 76f64537aa5549278483ce338fe03764d0ce8065
Author: FPille <f.pille@gmail.com>
Date: Thu Oct 14 11:04:10 2021 +0200
updated and descriptions for arguments and choices added
deprecated arguments removed
bug fix: typo "_init_completion" instead of "_init_completions"
commit de9b93e852b3a6aca29b77ff7bdf433a07b42794
Merge: c23374de 42713b77
Author: Frank <50119297+FPille@users.noreply.github.com>
Date: Thu Oct 14 08:08:11 2021 +0200
Merge branch 'jbarlow83:master' into master
commit c23374de818edddb789073251386e5ee1cfaef84
Merge: 40b2ebcb c409fa58
Author: Frank <50119297+FPille@users.noreply.github.com>
Date: Wed May 26 20:31:00 2021 +0200
Merge branch 'jbarlow83:master' into master
commit 40b2ebcb37b6a21845e2733d4ad8078c09d08d0a
Merge: 79c84eef 7e388f59
Author: Frank <50119297+FPille@users.noreply.github.com>
Date: Sat Jun 1 11:09:07 2019 +0200
Merge pull request #1 from jbarlow83/master
update master
311 lines
10 KiB
Bash
311 lines
10 KiB
Bash
# ocrmypdf completion -*- shell-script -*-
|
|
|
|
# Copyright 2019, 2021 Frank Pille
|
|
# Copyright 2020 Alex Willner
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in all
|
|
# copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
# SOFTWARE.
|
|
|
|
set -o errexit
|
|
|
|
__ocrmypdf_arguments()
|
|
{
|
|
local arguments="--help (show help message)
|
|
--language (language(s) of the file to be OCRed)
|
|
--image-dpi (assume this DPI if input image DPI is unknown)
|
|
--output-type (select PDF output options)
|
|
--sidecar (write OCR to text file)
|
|
--version (print program version and exit)
|
|
--jobs (how many worker processes to use)
|
|
--quiet (suppress INFO messages)
|
|
--verbose (set verbosity level)
|
|
--title (set metadata)
|
|
--author (set metadata)
|
|
--subject (set metadata)
|
|
--keywords (set metadata)
|
|
--rotate-pages (rotate pages to correct orientation)
|
|
--remove-background (attempt to remove background from pages)
|
|
--deskew (fix small horizontal alignment skew)
|
|
--clean (clean document images before OCR)
|
|
--clean-final (clean document images and keep result)
|
|
--unpaper-args (a quoted string of arguments to pass to unpaper)
|
|
--oversample (oversample images to this DPI)
|
|
--remove-vectors (don\'t send vector objects to OCR)
|
|
--threshold (threshold images before OCR)
|
|
--force-ocr (OCR documents that already have printable text)
|
|
--skip-text (skip OCR on any pages that already contain text)
|
|
--redo-ocr (redo OCR on any pages that seem to have OCR already)
|
|
--skip-big (skip OCR on pages larger than this many MPixels)
|
|
--optimize (select optimization level)
|
|
--jpeg-quality (JPEG quality [0..100])
|
|
--png-quality (PNG quality [0..100])
|
|
--jbig2-lossy (enable lossy JBIG2 (see docs))
|
|
--pages (apply OCR to only the specified pages)
|
|
--max-image-mpixels (image decompression bomb threshold)
|
|
--pdf-renderer (select PDF renderer options)
|
|
--rotate-pages-threshold (page rotation confidence)
|
|
--pdfa-image-compression (set PDF/A image compression options)
|
|
--fast-web-view (if file size if above this amount in MB linearize PDF)
|
|
--plugin (name of plugin to import)
|
|
--keep-temporary-files (keep temporary files (debug)
|
|
--tesseract-config (set custom tesseract config file)
|
|
--tesseract-pagesegmode (set tesseract --psm)
|
|
--tesseract-oem (set tesseract --oem)
|
|
--tesseract-thresholding (set tesseract image thresholding)
|
|
--tesseract-timeout (maximum number of seconds to wait for OCR)
|
|
--user-words (specify location of user words file)
|
|
--user-patterns (specify location of user patterns file)
|
|
--no-progress-bar (disable the progress bar)
|
|
"
|
|
|
|
COMPREPLY=( $( compgen -W "$arguments" -- "$cur") )
|
|
|
|
# Remove description if only one completion exists
|
|
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
|
|
COMPREPLY=( ${COMPREPLY[0]%% *} )
|
|
fi
|
|
}
|
|
|
|
__ocrmypdf_output-type()
|
|
{
|
|
local choices="pdfa (output a PDF/A (default))
|
|
pdf (output a standard PDF)
|
|
pdfa-1 (output a PDF/A-1b)
|
|
pdfa-2 (output a PDF/A-2b)
|
|
pdfa-3 (output a PDF/A-3b)
|
|
none (do not produce an output PDF (for example, if you only care about --sidecar))"
|
|
|
|
COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
|
|
|
|
# Remove description if only one completion exists
|
|
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
|
|
COMPREPLY=( ${COMPREPLY[0]%% *} )
|
|
fi
|
|
}
|
|
|
|
__ocrmypdf_verbose()
|
|
{
|
|
local choices="0 (standard output messages)
|
|
1 (troubleshooting output messages)
|
|
2 (debugging output messages)"
|
|
|
|
COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
|
|
|
|
# Remove description if only one completion exists
|
|
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
|
|
COMPREPLY=( ${COMPREPLY[0]%% *} )
|
|
fi
|
|
}
|
|
|
|
__ocrmypdf_optimize()
|
|
{
|
|
local choices="0 (do not optimize)
|
|
1 (do safe, lossless optimizations (default))
|
|
2 (do some lossy optimizations)
|
|
3 (do aggressive lossy optimizations (including lossy JBIG2))"
|
|
|
|
COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
|
|
|
|
# Remove description if only one completion exists
|
|
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
|
|
COMPREPLY=( ${COMPREPLY[0]%% *} )
|
|
fi
|
|
}
|
|
|
|
__ocrmypdf_pdf-renderer()
|
|
{
|
|
local choices="auto (auto select PDF renderer)
|
|
hocr (use hOCR renderer)
|
|
hocrdebug (uses hOCR renderer in debug mode, showing recognized text)
|
|
sandwich (use sandwich renderer)"
|
|
|
|
COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
|
|
|
|
# Remove description if only one completion exists
|
|
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
|
|
COMPREPLY=( ${COMPREPLY[0]%% *} )
|
|
fi
|
|
}
|
|
|
|
__ocrmypdf_pdfa-image-compression()
|
|
{
|
|
local choices="auto (let Ghostscript decide how to compress images)
|
|
jpeg (convert color and grayscale images to JPEG)
|
|
lossless (convert color and grayscale images to lossless (PNG))"
|
|
|
|
COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
|
|
|
|
# Remove description if only one completion exists
|
|
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
|
|
COMPREPLY=( ${COMPREPLY[0]%% *} )
|
|
fi
|
|
}
|
|
|
|
__ocrmypdf_tesseract-pagesegmode()
|
|
{
|
|
local choices="0 (orientation and script detection (OSD) only)
|
|
1 (automatic page segmentation with OSD)
|
|
2 (automatic page segmentation, but no OSD, or OCR)
|
|
3 (fully automatic page segmentation, but no OSD (default))
|
|
4 (assume a single column of text of variable sizes)
|
|
5 (assume a single uniform block of vertically aligned text)
|
|
6 (assume a single uniform block of text)
|
|
7 (treat the image as a single text line)
|
|
8 (treat the image as a single word)
|
|
9 (treat the image as a single word in a circle)
|
|
10 (treat the image as a single character)
|
|
11 (sparse text - find as much text as possible in no particular order)
|
|
12 (sparse text with OSD)
|
|
13 (raw line - treat the image as a single text line)"
|
|
|
|
COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
|
|
|
|
# Remove description if only one completion exists
|
|
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
|
|
COMPREPLY=( ${COMPREPLY[0]%% *} )
|
|
fi
|
|
}
|
|
|
|
__ocrmypdf_tesseract-oem()
|
|
{
|
|
local choices="0 (legacy engine only)
|
|
1 (neural nets LSTM engine only)
|
|
2 (legacy + LSTM engines)
|
|
3 (default, based on what is available)"
|
|
|
|
COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
|
|
|
|
# Remove description if only one completion exists
|
|
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
|
|
COMPREPLY=( ${COMPREPLY[0]%% *} )
|
|
fi
|
|
}
|
|
|
|
__ocrmypdf_tesseract-thresholding()
|
|
{
|
|
local choices="auto (let OCRmyPDF pick thresholding - current always uses otsu)
|
|
otsu (use hOCR renderer)
|
|
adaptive-otsu (use adaptive Otsu thresholding)
|
|
sauvola (use Sauvola thresholding)"
|
|
|
|
COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
|
|
# Remove description if only one completion exists
|
|
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
|
|
COMPREPLY=( ${COMPREPLY[0]%% *} )
|
|
fi
|
|
}
|
|
|
|
|
|
__ocrmypdf_check_previous()
|
|
{
|
|
case $prev in
|
|
-h|--help|--version)
|
|
return 0
|
|
;;
|
|
-l|--language)
|
|
COMPREPLY=$( command tesseract --list-langs 2>/dev/null )
|
|
COMPREPLY=( $( compgen -W '${COMPREPLY[@]##*:}' -- "$cur" ) )
|
|
return 0
|
|
;;
|
|
--output-type)
|
|
__ocrmypdf_output-type
|
|
return 0
|
|
;;
|
|
-j|--jobs)
|
|
COMPREPLY=( $( compgen -W '{1..'$( _ncpus )'}' -- "$cur" ) )
|
|
return 0
|
|
;;
|
|
-v|--verbose)
|
|
__ocrmypdf_verbose
|
|
return 0
|
|
;;
|
|
-O|--optimize)
|
|
__ocrmypdf_optimize
|
|
return 0
|
|
;;
|
|
--pdf-renderer)
|
|
__ocrmypdf_pdf-renderer
|
|
return 0
|
|
;;
|
|
--pdfa-image-compression)
|
|
__ocrmypdf_pdfa-image-compression
|
|
return 0
|
|
;;
|
|
--tesseract-pagesegmode)
|
|
__ocrmypdf_tesseract-pagesegmode
|
|
return 0
|
|
;;
|
|
--tesseract-oem)
|
|
__ocrmypdf_tesseract-oem
|
|
return 0
|
|
;;
|
|
--tesseract-thresholding)
|
|
__ocrmypdf_tesseract-thresholding
|
|
return 0
|
|
;;
|
|
|
|
--title|--author|--subject|--keywords|--unpaper-args|--pages|--plugin|\
|
|
--jpeg-quality|--png-quality|--image-dpi|--oversample|--skip-big|--max-image-mpixels|\
|
|
--tesseract-timeout|--rotate-pages-threshold|--fast-web-view)
|
|
# argument required but no completions available
|
|
return 0
|
|
;;
|
|
--tesseract-config|--user-words|--user-patterns|--sidecar)
|
|
_filedir
|
|
return 0
|
|
;;
|
|
esac
|
|
|
|
return 1
|
|
}
|
|
|
|
_ocrmypdf()
|
|
{
|
|
local OLDIFS="$IFS"
|
|
local IFS=$'\n'
|
|
|
|
local cur prev
|
|
|
|
# Homebrew on Macs have version 1.3 of bash-completion which doesn't include - see #502
|
|
if declare -F _init_completion >/dev/null 2>&1; then
|
|
_init_completion || return
|
|
else
|
|
COMPREPLY=()
|
|
_get_comp_words_by_ref cur prev
|
|
fi
|
|
|
|
if __ocrmypdf_check_previous -ne 0; then
|
|
return
|
|
fi
|
|
|
|
if [[ "$cur" == -* ]]; then
|
|
__ocrmypdf_arguments
|
|
else
|
|
_filedir
|
|
fi
|
|
|
|
IFS="$OLDIFS"
|
|
|
|
return
|
|
} &&
|
|
complete -F _ocrmypdf ocrmypdf
|
|
|
|
set +o errexit
|
|
|
|
# ex: filetype=sh
|