Drop support for pdfminer.six 20181108

This version required a patch that has since been mainlined, and also did not declare its dependency on chardet correctly. We can remove both hacks now.
2025-11-02 10:50:29 +00:00 · 2020-01-05 17:51:09 -08:00 · 2020-01-05 17:51:09 -08:00 · 4581027246
commit 4581027246
parent 31b5f63f85
2 changed files with 1 additions and 50 deletions
--- a/setup.py
+++ b/setup.py
@ -95,11 +95,10 @@ setup(
    use_scm_version={'version_scheme': 'post-release'},
    cffi_modules=['src/ocrmypdf/lib/compile_leptonica.py:ffibuilder'],
    install_requires=[
-        'chardet >= 3.0.4, < 4',  # unlisted requirement of pdfminer.six 20181108
        'cffi >= 1.9.1',  # must be a setup and install requirement
        'coloredlogs >= 14.0',  # strictly optional
        'img2pdf >= 0.3.0, < 0.4',  # pure Python, so track HEAD closely
-        'pdfminer.six >= 20181108, <= 20200124',
+        'pdfminer.six >= 20191110, <= 20200124',
        'pikepdf >= 1.8.1, < 2',
        'Pillow >= 6.2.0',
        'reportlab >= 3.3.0',  # oldest released version with sane image handling
--- a/src/ocrmypdf/pdfinfo/layout.py
+++ b/src/ocrmypdf/pdfinfo/layout.py
@ -36,54 +36,6 @@ from ..exceptions import EncryptedPdfError

 STRIP_NAME = re.compile(r'[0-9]+')

-#
-#  pdfminer 20181108 patches
-#
-
-if pdfminer.__version__ == '20181108':
-
-    def name2unicode(name):
-        """Fix pdfminer's name2unicode function
-
-        Font cids that are mapped to names of the form /g123 seem to be, by convention
-        characters with no corresponding Unicode entry. These can be subsetted fonts
-        or symbolic fonts. There seems to be no way to map /g123 fonts to Unicode,
-        barring a ToUnicode data structure.
-        """
-        if name in glyphname2unicode:
-            return glyphname2unicode[name]
-        if name.startswith('g') or name.startswith('a'):
-            raise KeyError(name)
-        if name.startswith('uni'):
-            try:
-                return chr(int(name[3:], 16))
-            except ValueError:  # Not hexadecimal
-                raise KeyError(name)
-        m = STRIP_NAME.search(name)
-        if not m:
-            raise KeyError(name)
-        return chr(int(m.group(0)))
-
-    pdfminer.encodingdb.name2unicode = name2unicode
-
-    original_PDFFont_init = PDFFont.__init__
-
-    def PDFFont__init__(self, descriptor, widths, default_width=None):
-        original_PDFFont_init(self, descriptor, widths, default_width)
-        # PDF spec says descent should be negative
-        # A font with a positive descent implies it floats entirely above the
-        # baseline, i.e. it's not really a baseline anymore. I have fonts that
-        # claim a positive descent, but treating descent as positive always seems
-        # to misposition text.
-        if self.descent > 0:
-            self.descent = -self.descent
-
-    PDFFont.__init__ = PDFFont__init__
-
-#
-# end of pdfminer 20181108 patches
-#
-

 original_PDFSimpleFont_init = PDFSimpleFont.__init__