Mention when we default to English and the system locale is not English

Closes #337
This commit is contained in:
James R. Barlow 2019-10-22 01:49:38 -07:00
parent 3660007fc8
commit b332d76782
2 changed files with 31 additions and 4 deletions

View File

@ -17,6 +17,7 @@
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
import locale
import logging
import os
import sys
@ -47,6 +48,7 @@ from .helpers import is_file_writable, is_iterable_notstr, monotonic, re_symlink
# External dependencies
HOCR_OK_LANGS = frozenset(['eng', 'deu', 'spa', 'ita', 'por'])
DEFAULT_LANGUAGE = 'eng' # Enforce English hegemony
log = logging.getLogger(__name__)
@ -58,7 +60,12 @@ verify_python3_env()
def check_options_languages(options):
if not options.language:
options.language = ['eng'] # Enforce English hegemony
options.language = [DEFAULT_LANGUAGE]
system_lang = locale.getlocale()[0]
if system_lang and not system_lang.startswith('en'):
log.debug(
"No language specified; assuming --language %s" % DEFAULT_LANGUAGE
)
# Support v2.x "eng+deu" language syntax
if '+' in options.language[0]:

View File

@ -15,6 +15,8 @@
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
import locale
import logging
import os
from unittest.mock import patch
@ -27,9 +29,9 @@ from ocrmypdf.pdfinfo import PdfInfo
def make_opts(input_file='a.pdf', output_file='b.pdf', language='eng', **kwargs):
return create_options(
input_file=input_file, output_file=output_file, language=language, **kwargs
)
if language is not None:
kwargs['language'] = language
return create_options(input_file=input_file, output_file=output_file, **kwargs)
def test_hocr_notlatin_warning(caplog):
@ -139,3 +141,21 @@ def test_no_progress_bar(progress_bar, resources):
assert tqdmpatch.called
_args, kwargs = tqdmpatch.call_args
assert kwargs['disable'] != progress_bar
def test_language_warning(caplog):
opts = make_opts(language=None)
caplog.set_level(logging.DEBUG)
with patch(
'ocrmypdf._validation.locale.getlocale', return_value=('en_US', 'UTF-8')
):
vd.check_options_languages(opts)
assert opts.language == ['eng']
assert '' in caplog.text
with patch(
'ocrmypdf._validation.locale.getlocale', return_value=('fr_FR', 'UTF-8')
):
vd.check_options_languages(opts)
assert opts.language == ['eng']
assert 'assuming --language' in caplog.text