Drop support for Python 3.5

2025-12-27 15:10:22 +00:00 · 2018-12-30 00:23:26 -08:00 · 2018-12-30 00:23:26 -08:00 · 72b920eb16
commit 72b920eb16
parent b4a51907d6
17 changed files with 34 additions and 86 deletions
--- a/.gitignore
+++ b/.gitignore
@ -39,3 +39,5 @@ log/
 tests/output/
 tests/resources/private/
 tmp/
+/debug_tests.py
+*.traineddata
--- a/.travis.yml
+++ b/.travis.yml
@ -9,7 +9,7 @@ matrix:
      dist: trusty
      sudo: required
      language: python
-      python: "3.5"
+      python: "3.6"
      env:
        - DIST=trusty
      addons: &trusty_apt
@ -33,14 +33,6 @@ matrix:
          - tesseract-ocr-deu
          - tesseract-ocr-eng
          - tesseract-ocr-fra
-    - os: linux
-      dist: trusty
-      sudo: required
-      language: python
-      python: "3.6"
-      env:
-        - DIST=trusty
-      addons: *trusty_apt
    - os: linux
      dist: xenial
      sudo: required
--- a/docs/installation.rst
+++ b/docs/installation.rst
@ -493,14 +493,12 @@ Requirements for pip and HEAD install

 OCRmyPDF currently requires these external programs and libraries to be installed, and must be satisfied using the operating system package manager. ``pip`` cannot provide them.

- Python 3.5 or newer
+- Python 3.6 or newer
 - Ghostscript 9.15 or newer
 - libexempi3 2.2.0 or newer
 - qpdf 8.1.0 or newer
 - Tesseract 3.04 or newer

-Using Python 3.5 in production is discouraged. Python 3.6 and 3.7 give much better performance.
-
 As of ocrmypdf 7.2.1, the following versions are recommended:

 - Python 3.7
@ -526,7 +524,7 @@ These are in addition to the Python packaging dependencies, meaning that unfortu
 Installing HEAD revision from sources
 -------------------------------------

-If you have ``git`` and Python 3.5 or newer installed, you can install from source. When the ``pip`` installer runs, it will alert you if dependencies are missing.
+If you have ``git`` and Python 3.6 or newer installed, you can install from source. When the ``pip`` installer runs, it will alert you if dependencies are missing.

 If you prefer to build every from source, you will need to `build pikepdf from source <https://pikepdf.readthedocs.io/en/latest/installation.html#building-from-source>`_. First ensure you can build and install pikepdf.

--- a/setup.py
+++ b/setup.py
@ -20,8 +20,8 @@
 from __future__ import print_function, unicode_literals

 import sys
-if sys.version_info < (3, 5):
-    print("Python 3.5 or newer is required", file=sys.stderr)
+if sys.version_info < (3, 6):
+    print("Python 3.6 or newer is required", file=sys.stderr)
    sys.exit(1)

 from setuptools import setup, find_packages  # nopep8
@ -219,7 +219,6 @@ setup(
    package_dir={'': 'src'},
    keywords=['PDF', 'OCR', 'optical character recognition', 'PDF/A', 'scanning'],
    classifiers=[
-        "Programming Language :: Python :: 3.5",
        "Programming Language :: Python :: 3.6",
        "Programming Language :: Python :: 3.7",
        "Development Status :: 5 - Production/Stable",
@ -236,7 +235,7 @@ setup(
        "Topic :: Text Processing :: Indexing",
        "Topic :: Text Processing :: Linguistic",
        ],
-    python_requires=' >= 3.5',
+    python_requires=' >= 3.6',
    setup_requires=[
        'cffi >= 1.9.1',        # to build the leptonica module
        'pytest-runner',        # to enable python setup.py test
@ -252,7 +251,7 @@ setup(
        'cffi >= 1.9.1',          # must be a setup and install requirement
        'img2pdf >= 0.3.0, < 0.4',       # pure Python, so track HEAD closely
        'pdfminer.six == 20181108',
-        'pikepdf >= 0.9.1',
+        'pikepdf >= 0.10.0, < 0.11.0',
        'Pillow >= 4.0.0, != 5.1.0 ; sys_platform == "darwin"',
                                  # Pillow < 4 has BytesIO/TIFF bug w/img2pdf 0.2.3
                                  # block 5.1.0, broken wheels
--- a/src/ocrmypdf/_pipeline.py
+++ b/src/ocrmypdf/_pipeline.py
@ -193,12 +193,6 @@ def repair_and_parse_pdf(
        )
        raise InputFileError()

-    if len(pdfinfo.pages) > 2000 and sys.version_info[0:2] <= (3, 5):
-        log.warning(
-            "Performance regressions are known occur with Python 3.5 for "
-            "high page count files.  Python 3.6 or newer is recommended."
-        )
-
    if pdfinfo.has_acroform:
        if options.redo_ocr:
            log.error(
--- a/src/ocrmypdf/exec/ghostscript.py
+++ b/src/ocrmypdf/exec/ghostscript.py
@ -23,7 +23,7 @@ import re
 from PIL import Image
 from . import get_version
 from ..exceptions import SubprocessOutputError
-from ..helpers import fspath
+from os import fspath


@lru_cache(maxsize=1)
--- a/src/ocrmypdf/exec/qpdf.py
+++ b/src/ocrmypdf/exec/qpdf.py
@ -19,7 +19,7 @@ from subprocess import CalledProcessError, STDOUT, PIPE, run
 from functools import lru_cache

 from . import  get_version
-from ..helpers import fspath
+from os import fspath


@lru_cache(maxsize=1)
--- a/src/ocrmypdf/exec/tesseract.py
+++ b/src/ocrmypdf/exec/tesseract.py
@ -23,9 +23,10 @@ from collections import namedtuple
 from textwrap import dedent
 from subprocess import CalledProcessError, TimeoutExpired, check_output, STDOUT, run, PIPE
 from contextlib import suppress
+from os import fspath

 from ..exceptions import MissingDependencyError, TesseractConfigError
-from ..helpers import page_number, fspath
+from ..helpers import page_number
 from . import get_version

 OrientationConfidence = namedtuple(
--- a/src/ocrmypdf/helpers.py
+++ b/src/ocrmypdf/helpers.py
@ -29,8 +29,8 @@ def re_symlink(input_file, soft_link_name, log=None):
    """
    Helper function: relinks soft symbolic link if necessary
    """
-    input_file = fspath(input_file)  # For Py3.5
-    soft_link_name = fspath(soft_link_name)
+    input_file = os.fspath(input_file)
+    soft_link_name = os.fspath(soft_link_name)
    if log is None:
        prdebug = partial(print, file=sys.stderr)
    else:
@ -72,7 +72,7 @@ def is_iterable_notstr(thing):

 def page_number(input_file):
    """Get one-based page number implied by filename (000002.pdf -> 2)"""
-    return int(os.path.basename(fspath(input_file))[0:6])
+    return int(os.path.basename(os.fspath(input_file))[0:6])


 def available_cpu_count():
@ -103,19 +103,12 @@ def is_file_writable(test_file):
    p = Path(test_file)

    if p.is_symlink():
-        # Python 3.5 does not accept parameters for Path.resolve() and behaves
-        # as if strict=True (throws an exception on failure). Python 3.6
-        # defaults to strict=False. This implements strict=False like behavior
-        # for Python 3.5.
-        if sys.version_info[0:2] <= (3, 5):
-            p = Path(os.path.realpath(fspath(p)))
-        else:
-            p = p.resolve(strict=False)
+        p = p.resolve(strict=False)

    # p.is_file() throws an exception in some cases
    if p.exists() and p.is_file():
        return os.access(
-            fspath(p), os.W_OK,
+            os.fspath(p), os.W_OK,
            effective_ids=(os.access in os.supports_effective_ids))
    else:
        try:
@ -129,39 +122,6 @@ def is_file_writable(test_file):
        return True


-if sys.version_info[0:2] <= (3, 5):
-    def fspath(path):
-        """https://www.python.org/dev/peps/pep-0519/#os"""
-        import pathlib
-        if isinstance(path, (str, bytes)):
-            return path
-
-        # Work from the object's type to match method resolution of other magic
-        # methods.
-        path_type = type(path)
-        try:
-            path = path_type.__fspath__(path)
-        except AttributeError:
-            # Added for Python 3.5 support.
-            if isinstance(path, pathlib.Path):
-                return str(path)
-            elif hasattr(path_type, '__fspath__'):
-                raise
-        else:
-            if isinstance(path, (str, bytes)):
-                return path
-            else:
-                raise TypeError("expected __fspath__() to return str or bytes, "
-                                "not " + type(path).__name__)
-
-        raise TypeError(
-            "expected str, bytes, pathlib.Path or os.PathLike object, not "
-            + path_type.__name__)
-
-else:
-    fspath = os.fspath
-
-
 def flatten_groups(groups):
    for obj in groups:
        if is_iterable_notstr(obj):
--- a/src/ocrmypdf/leptonica.py
+++ b/src/ocrmypdf/leptonica.py
@ -33,7 +33,7 @@ import sys
 import warnings

 from .lib._leptonica import ffi
-from .helpers import fspath
+from os import fspath

 # pylint: disable=protected-access

--- a/src/ocrmypdf/optimize.py
+++ b/src/ocrmypdf/optimize.py
@ -20,6 +20,7 @@ import concurrent.futures
 from collections import defaultdict
 import logging
 import sys
+from os import fspath

 from PIL import Image

@ -27,7 +28,7 @@ import pikepdf

 from ._jobcontext import JobContext
 from . import leptonica
-from .helpers import re_symlink, fspath
+from .helpers import re_symlink
 from .exec import pngquant, jbig2enc

 DEFAULT_JPEG_QUALITY = 75
--- a/src/ocrmypdf/pdfinfo/init.py
+++ b/src/ocrmypdf/pdfinfo/init.py
@ -20,6 +20,7 @@ from collections import namedtuple
 from decimal import Decimal
 from enum import Enum
 from math import hypot, isclose
+from os import fspath
 from pathlib import Path
 from unittest.mock import Mock
 from warnings import warn
@ -32,7 +33,6 @@ from . import ghosttext
 from .layout import get_page_analysis, get_text_boxes

 from ..exceptions import EncryptedPdfError
-from ..helpers import fspath


 Colorspace = Enum('Colorspace',
--- a/tests/test_lept.py
+++ b/tests/test_lept.py
@ -19,13 +19,13 @@
 import os
 import shutil
 import sys
+from os import fspath
 from pickle import dumps, loads

 import pytest
 from PIL import Image, ImageChops

 import ocrmypdf.leptonica as lept
-from ocrmypdf.helpers import fspath


 def test_colormap_backgroundnorm(resources):
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@ -23,13 +23,12 @@ from pathlib import Path
 from shutil import copyfile
 from unittest.mock import patch, MagicMock
 import datetime
-from shutil import copyfile
+from os import fspath

 import pikepdf
 from pikepdf.models.metadata import decode_pdf_date

 from ocrmypdf.exceptions import ExitCode
-from ocrmypdf.helpers import fspath
 from ocrmypdf.pdfa import (
    file_claims_pdfa,
    generate_pdfa_ps,
--- a/tests/test_optimize.py
+++ b/tests/test_optimize.py
@ -16,6 +16,7 @@
 # along with OCRmyPDF.  If not, see <http://www.gnu.org/licenses/>.

 from pathlib import Path
+from os import fspath

 import pytest
 import logging
@ -27,7 +28,6 @@ import pikepdf
 from ocrmypdf import optimize as opt
 from ocrmypdf.exec.ghostscript import rasterize_pdf
 from ocrmypdf.exec import jbig2enc, pngquant
-from ocrmypdf.helpers import fspath


 check_ocrmypdf = pytest.helpers.check_ocrmypdf
--- a/tests/test_rotation.py
+++ b/tests/test_rotation.py
@ -18,6 +18,7 @@
 import logging
 from io import BytesIO
 from unittest.mock import Mock
+from os import fspath

 from PIL import Image
 import pytest
@ -27,7 +28,6 @@ import pikepdf
 from ocrmypdf import leptonica
 from ocrmypdf.pdfinfo import PdfInfo
 from ocrmypdf.exec import ghostscript, tesseract
-from ocrmypdf.helpers import fspath


 # pytest.helpers is dynamic
--- a/tests/test_tess4.py
+++ b/tests/test_tess4.py
@ -15,17 +15,19 @@
 # You should have received a copy of the GNU General Public License
 # along with OCRmyPDF.  If not, see <http://www.gnu.org/licenses/>.

-import pytest
-from ocrmypdf.exceptions import ExitCode, MissingDependencyError
-from ocrmypdf.exec import tesseract
-from ocrmypdf.helpers import fspath
-from ocrmypdf import pdfinfo
+from os import fspath
 import sys
 import os
-import PyPDF2 as pypdf
 from contextlib import contextmanager
 from pathlib import Path

+import PyPDF2 as pypdf
+import pytest
+
+from ocrmypdf import pdfinfo
+from ocrmypdf.exceptions import ExitCode, MissingDependencyError
+from ocrmypdf.exec import tesseract
+
 # pylint: disable=no-member
 spoof = pytest.helpers.spoof