OCRmyPDF/setup.py

279 lines
8.8 KiB
Python
Raw Normal View History

2015-07-25 23:45:13 -07:00
#!/usr/bin/env python3
2016-02-05 02:34:49 -08:00
# -*- coding: utf-8 -*-
2015-07-28 04:36:58 -07:00
# © 2015 James R. Barlow: github.com/jbarlow83
#
# This file is part of OCRmyPDF.
#
# OCRmyPDF is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OCRmyPDF is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
2015-07-25 23:45:13 -07:00
2015-12-21 09:38:38 -08:00
from __future__ import print_function, unicode_literals
2016-02-05 02:34:49 -08:00
import sys
2018-12-30 00:23:26 -08:00
if sys.version_info < (3, 6):
print("Python 3.6 or newer is required", file=sys.stderr)
2015-12-21 09:38:38 -08:00
sys.exit(1)
2019-01-02 13:34:45 -08:00
from setuptools import setup, find_packages
from subprocess import STDOUT, check_output, CalledProcessError
from collections.abc import Mapping
import re
2016-02-05 02:34:49 -08:00
2019-01-02 13:34:45 -08:00
# pylint: disable=w0613
2016-02-05 02:34:49 -08:00
missing_program = '''
The program '{program}' could not be executed or was not found on your
system PATH.
'''
unknown_version = '''
OCRmyPDF requires '{program}' {need_version} or higher. Your system has
'{program}' but we cannot tell what version is installed. Contact the
package maintainer.
'''
old_version = '''
OCRmyPDF requires '{program}' {need_version} or higher. Your system appears
to have {found_version}. Please update this program.
'''
okay_its_optional = '''
This program is OPTIONAL, so installation of OCRmyPDF can proceed, but
some functionality may be missing.
'''
not_okay_its_required = '''
This program is REQUIRED for OCRmyPDF to work. Installation will abort.
'''
osx_install_advice = '''
If you have homebrew installed, try these command to install the missing
packages:
brew update
brew upgrade
brew install {package}
'''
linux_install_advice = '''
On systems with the aptitude package manager (Debian, Ubuntu), try these
commands:
sudo apt-get update
sudo apt-get install {package}
On RPM-based systems (Red Hat, Fedora), search for instructions on
installing the RPM for {program}.
'''
def get_platform():
if sys.platform.startswith('freebsd'):
return 'freebsd'
elif sys.platform.startswith('linux'):
return 'linux'
return sys.platform
2015-07-28 13:05:23 -07:00
def _error_trailer(program, package, optional, **kwargs):
if optional:
print(okay_its_optional.format(**locals()), file=sys.stderr)
else:
print(not_okay_its_required.format(**locals()), file=sys.stderr)
if isinstance(package, Mapping):
package = package[get_platform()]
if get_platform() == 'darwin':
print(osx_install_advice.format(**locals()), file=sys.stderr)
elif get_platform() == 'linux':
print(linux_install_advice.format(**locals()), file=sys.stderr)
def error_missing_program(
program,
package,
optional
):
print(missing_program.format(**locals()), file=sys.stderr)
_error_trailer(**locals())
def error_unknown_version(
program,
package,
2015-07-28 13:05:23 -07:00
optional,
need_version
):
print(unknown_version.format(**locals()), file=sys.stderr)
_error_trailer(**locals())
def error_old_version(
program,
package,
optional,
2015-07-28 13:05:23 -07:00
need_version,
found_version
):
print(old_version.format(**locals()), file=sys.stderr)
_error_trailer(**locals())
def check_external_program(
program,
2015-07-28 13:05:23 -07:00
need_version,
package,
2019-01-02 13:34:45 -08:00
version_check_args=None,
version_scrape_regex=re.compile(r'(\d+\.\d+(?:\.\d+)?)'),
optional=False):
2019-01-02 13:34:45 -08:00
if not version_check_args:
version_check_args = ['--version']
print(f'Checking for {program} >= {need_version}...')
try:
result = check_output(
[program] + version_check_args,
universal_newlines=True, stderr=STDOUT)
2015-07-28 12:41:24 -07:00
except (CalledProcessError, FileNotFoundError):
error_missing_program(program, package, optional)
if not optional:
sys.exit(1)
print(f'Continuing install without {program}')
2015-07-28 13:05:23 -07:00
return
try:
2015-07-28 13:05:23 -07:00
found_version = version_scrape_regex.search(result).group(1)
except AttributeError:
error_unknown_version(program, package, optional, need_version)
sys.exit(1)
2015-07-28 13:05:23 -07:00
if found_version < need_version:
error_old_version(program, package, optional, need_version,
found_version)
print(f'Found {program} {found_version}')
command = next((arg for arg in sys.argv[1:] if not arg.startswith('-')), '')
forced = ('--force' in sys.argv)
if not forced and command.startswith('install') or \
command in ['check', 'test', 'nosetests', 'easy_install']:
check_external_program(
program='tesseract',
2018-12-30 00:47:12 -08:00
need_version='4.0.0', # using backport for Travis CI
package={'darwin': 'tesseract', 'linux': 'tesseract-ocr'}
)
check_external_program(
program='gs',
need_version='9.15', # limited by Travis CI / Ubuntu 14.04 backports
package='ghostscript'
)
check_external_program(
program='unpaper',
2016-07-27 14:47:59 -07:00
need_version='6.1', # latest sane version
package='unpaper',
optional=True
)
check_external_program(
program='qpdf',
need_version='8.0.2', # test suite known to fail on 5.1.1
package='qpdf'
)
check_external_program(
program='pngquant',
need_version='2.0.0',
package='pngquant',
optional=True
)
if 'upload' in sys.argv[1:]:
print('Use twine to upload the package - setup.py upload is insecure')
sys.exit(1)
tests_require = open('requirements/test.txt', encoding='utf-8').read().splitlines()
def readme():
with open('README.md', encoding='utf-8') as f:
return f.read()
2015-07-25 23:45:13 -07:00
setup(
name='ocrmypdf',
description='OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched',
long_description=readme(),
2018-09-19 21:01:24 -07:00
long_description_content_type='text/markdown',
2015-09-05 00:53:14 -07:00
url='https://github.com/jbarlow83/OCRmyPDF',
author='James R. Barlow',
2015-07-25 23:45:13 -07:00
author_email='jim@purplerock.ca',
2018-03-24 23:54:37 -07:00
packages=find_packages('src', exclude=["tests", "tests.*"]),
package_dir={'': 'src'},
2015-07-25 23:45:13 -07:00
keywords=['PDF', 'OCR', 'optical character recognition', 'PDF/A', 'scanning'],
classifiers=[
"Programming Language :: Python :: 3.6",
2018-06-28 13:57:45 -07:00
"Programming Language :: Python :: 3.7",
2015-09-05 00:53:14 -07:00
"Development Status :: 5 - Production/Stable",
2015-07-25 23:45:13 -07:00
"Environment :: Console",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Science/Research",
"Intended Audience :: System Administrators",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
2015-07-25 23:45:13 -07:00
"Operating System :: MacOS :: MacOS X",
"Operating System :: POSIX",
"Operating System :: POSIX :: BSD",
"Operating System :: POSIX :: Linux",
"Topic :: Scientific/Engineering :: Image Recognition",
"Topic :: Text Processing :: Indexing",
"Topic :: Text Processing :: Linguistic",
],
2018-12-30 00:23:26 -08:00
python_requires=' >= 3.6',
setup_requires=[
2018-03-30 14:00:36 -07:00
'cffi >= 1.9.1', # to build the leptonica module
'pytest-runner', # to enable python setup.py test
'setuptools_scm', # so that version will work
2018-08-20 15:45:51 -07:00
'setuptools_scm_git_archive' # enable version from github tarballs
],
use_scm_version={'version_scheme': 'post-release'},
2016-01-30 15:19:40 -08:00
cffi_modules=[
2018-03-24 23:54:37 -07:00
'src/ocrmypdf/lib/compile_leptonica.py:ffibuilder'
2016-01-30 15:19:40 -08:00
],
2016-01-19 15:07:21 -08:00
install_requires=[
2018-12-15 15:27:23 -08:00
'chardet >= 3.0.4, < 4', # unlisted requirement of pdfminer.six 20181108
'cffi >= 1.9.1', # must be a setup and install requirement
2018-11-10 00:56:04 -08:00
'img2pdf >= 0.3.0, < 0.4', # pure Python, so track HEAD closely
'pdfminer.six == 20181108 ; sys_platform != "darwin"',
2019-01-17 00:57:28 -08:00
'pikepdf >= 1.0.5, < 2',
'Pillow >= 4.0.0, != 5.1.0 ; sys_platform == "darwin"',
2018-04-10 18:30:48 -07:00
# Pillow < 4 has BytesIO/TIFF bug w/img2pdf 0.2.3
# block 5.1.0, broken wheels
'reportlab >= 3.3.0', # oldest released version with sane image handling
'ruffus >= 2.7.0',
2016-01-19 15:07:21 -08:00
],
2018-03-26 13:22:09 -07:00
extras_require={
'pdfminer': ['pdfminer.six == 20181108'],
2018-03-26 13:22:09 -07:00
},
tests_require=tests_require,
2015-07-25 23:45:13 -07:00
entry_points={
'console_scripts': [
'ocrmypdf = ocrmypdf.__main__:run_pipeline'
2015-07-25 23:45:13 -07:00
],
},
package_data={'ocrmypdf': ['data/sRGB.icc']},
2015-07-25 23:45:13 -07:00
include_package_data=True,
2018-09-19 21:01:24 -07:00
zip_safe=False,
project_urls={
'Documentation': 'https://ocrmypdf.readthedocs.io/',
'Source': 'https://github.com/jbarlow83/ocrmypdf',
'Tracker': 'https://github.com/jbarlow83/ocrmypdf/issues'
}
)