From b332d76782b5f110ed1f5d39f457c936400aef68 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Tue, 22 Oct 2019 01:49:38 -0700 Subject: [PATCH] Mention when we default to English and the system locale is not English Closes #337 --- src/ocrmypdf/_validation.py | 9 ++++++++- tests/test_validation.py | 26 +++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/ocrmypdf/_validation.py b/src/ocrmypdf/_validation.py index c5dcb6a4..5a533fa9 100644 --- a/src/ocrmypdf/_validation.py +++ b/src/ocrmypdf/_validation.py @@ -17,6 +17,7 @@ # along with OCRmyPDF. If not, see . +import locale import logging import os import sys @@ -47,6 +48,7 @@ from .helpers import is_file_writable, is_iterable_notstr, monotonic, re_symlink # External dependencies HOCR_OK_LANGS = frozenset(['eng', 'deu', 'spa', 'ita', 'por']) +DEFAULT_LANGUAGE = 'eng' # Enforce English hegemony log = logging.getLogger(__name__) @@ -58,7 +60,12 @@ verify_python3_env() def check_options_languages(options): if not options.language: - options.language = ['eng'] # Enforce English hegemony + options.language = [DEFAULT_LANGUAGE] + system_lang = locale.getlocale()[0] + if system_lang and not system_lang.startswith('en'): + log.debug( + "No language specified; assuming --language %s" % DEFAULT_LANGUAGE + ) # Support v2.x "eng+deu" language syntax if '+' in options.language[0]: diff --git a/tests/test_validation.py b/tests/test_validation.py index 5413e375..5465a580 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -15,6 +15,8 @@ # You should have received a copy of the GNU General Public License # along with OCRmyPDF. If not, see . +import locale +import logging import os from unittest.mock import patch @@ -27,9 +29,9 @@ from ocrmypdf.pdfinfo import PdfInfo def make_opts(input_file='a.pdf', output_file='b.pdf', language='eng', **kwargs): - return create_options( - input_file=input_file, output_file=output_file, language=language, **kwargs - ) + if language is not None: + kwargs['language'] = language + return create_options(input_file=input_file, output_file=output_file, **kwargs) def test_hocr_notlatin_warning(caplog): @@ -139,3 +141,21 @@ def test_no_progress_bar(progress_bar, resources): assert tqdmpatch.called _args, kwargs = tqdmpatch.call_args assert kwargs['disable'] != progress_bar + + +def test_language_warning(caplog): + opts = make_opts(language=None) + caplog.set_level(logging.DEBUG) + with patch( + 'ocrmypdf._validation.locale.getlocale', return_value=('en_US', 'UTF-8') + ): + vd.check_options_languages(opts) + assert opts.language == ['eng'] + assert '' in caplog.text + + with patch( + 'ocrmypdf._validation.locale.getlocale', return_value=('fr_FR', 'UTF-8') + ): + vd.check_options_languages(opts) + assert opts.language == ['eng'] + assert 'assuming --language' in caplog.text