2015-07-25 23:45:13 -07:00
|
|
|
#!/usr/bin/env python3
|
2016-02-05 02:34:49 -08:00
|
|
|
# -*- coding: utf-8 -*-
|
2015-07-28 04:36:58 -07:00
|
|
|
# © 2015 James R. Barlow: github.com/jbarlow83
|
2018-03-14 14:40:48 -07:00
|
|
|
#
|
|
|
|
# This file is part of OCRmyPDF.
|
|
|
|
#
|
|
|
|
# OCRmyPDF is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# OCRmyPDF is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
|
2015-07-25 23:45:13 -07:00
|
|
|
|
2015-12-21 09:38:38 -08:00
|
|
|
from __future__ import print_function, unicode_literals
|
2015-07-27 01:45:17 -07:00
|
|
|
|
2016-02-05 02:34:49 -08:00
|
|
|
import sys
|
2017-05-11 23:51:45 -07:00
|
|
|
if sys.version_info < (3, 5):
|
|
|
|
print("Python 3.5 or newer is required", file=sys.stderr)
|
2015-12-21 09:38:38 -08:00
|
|
|
sys.exit(1)
|
|
|
|
|
2017-01-27 13:13:14 -08:00
|
|
|
from setuptools import setup, find_packages # nopep8
|
2016-02-05 02:34:49 -08:00
|
|
|
from subprocess import STDOUT, check_output, CalledProcessError # nopep8
|
|
|
|
from collections.abc import Mapping # nopep8
|
|
|
|
import re # nopep8
|
|
|
|
|
|
|
|
|
2015-07-27 01:45:17 -07:00
|
|
|
missing_program = '''
|
|
|
|
The program '{program}' could not be executed or was not found on your
|
|
|
|
system PATH.
|
|
|
|
'''
|
|
|
|
|
|
|
|
unknown_version = '''
|
|
|
|
OCRmyPDF requires '{program}' {need_version} or higher. Your system has
|
|
|
|
'{program}' but we cannot tell what version is installed. Contact the
|
|
|
|
package maintainer.
|
|
|
|
'''
|
|
|
|
|
|
|
|
old_version = '''
|
|
|
|
OCRmyPDF requires '{program}' {need_version} or higher. Your system appears
|
|
|
|
to have {found_version}. Please update this program.
|
|
|
|
'''
|
|
|
|
|
|
|
|
okay_its_optional = '''
|
|
|
|
This program is OPTIONAL, so installation of OCRmyPDF can proceed, but
|
|
|
|
some functionality may be missing.
|
|
|
|
'''
|
|
|
|
|
|
|
|
not_okay_its_required = '''
|
|
|
|
This program is REQUIRED for OCRmyPDF to work. Installation will abort.
|
|
|
|
'''
|
|
|
|
|
|
|
|
osx_install_advice = '''
|
|
|
|
If you have homebrew installed, try these command to install the missing
|
|
|
|
packages:
|
|
|
|
brew update
|
|
|
|
brew upgrade
|
|
|
|
brew install {package}
|
|
|
|
'''
|
|
|
|
|
|
|
|
linux_install_advice = '''
|
|
|
|
On systems with the aptitude package manager (Debian, Ubuntu), try these
|
|
|
|
commands:
|
|
|
|
sudo apt-get update
|
|
|
|
sudo apt-get install {package}
|
|
|
|
|
|
|
|
On RPM-based systems (Red Hat, Fedora), search for instructions on
|
2015-08-15 15:12:05 -07:00
|
|
|
installing the RPM for {program}.
|
2015-07-27 01:45:17 -07:00
|
|
|
'''
|
|
|
|
|
|
|
|
|
2015-08-15 15:12:05 -07:00
|
|
|
def get_platform():
|
|
|
|
if sys.platform.startswith('freebsd'):
|
|
|
|
return 'freebsd'
|
|
|
|
elif sys.platform.startswith('linux'):
|
|
|
|
return 'linux'
|
|
|
|
return sys.platform
|
|
|
|
|
|
|
|
|
2015-07-28 13:05:23 -07:00
|
|
|
def _error_trailer(program, package, optional, **kwargs):
|
2015-07-27 01:45:17 -07:00
|
|
|
if optional:
|
|
|
|
print(okay_its_optional.format(**locals()), file=sys.stderr)
|
|
|
|
else:
|
|
|
|
print(not_okay_its_required.format(**locals()), file=sys.stderr)
|
2015-08-15 15:12:05 -07:00
|
|
|
|
|
|
|
if isinstance(package, Mapping):
|
|
|
|
package = package[get_platform()]
|
|
|
|
|
|
|
|
if get_platform() == 'darwin':
|
2015-07-27 01:45:17 -07:00
|
|
|
print(osx_install_advice.format(**locals()), file=sys.stderr)
|
2015-08-15 15:12:05 -07:00
|
|
|
elif get_platform() == 'linux':
|
2015-07-27 01:45:17 -07:00
|
|
|
print(linux_install_advice.format(**locals()), file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
def error_missing_program(
|
|
|
|
program,
|
|
|
|
package,
|
|
|
|
optional
|
|
|
|
):
|
|
|
|
print(missing_program.format(**locals()), file=sys.stderr)
|
|
|
|
_error_trailer(**locals())
|
|
|
|
|
|
|
|
|
|
|
|
def error_unknown_version(
|
|
|
|
program,
|
|
|
|
package,
|
2015-07-28 13:05:23 -07:00
|
|
|
optional,
|
|
|
|
need_version
|
2015-07-27 01:45:17 -07:00
|
|
|
):
|
|
|
|
print(unknown_version.format(**locals()), file=sys.stderr)
|
|
|
|
_error_trailer(**locals())
|
|
|
|
|
|
|
|
|
|
|
|
def error_old_version(
|
|
|
|
program,
|
|
|
|
package,
|
|
|
|
optional,
|
2015-07-28 13:05:23 -07:00
|
|
|
need_version,
|
|
|
|
found_version
|
2015-07-27 01:45:17 -07:00
|
|
|
):
|
|
|
|
print(old_version.format(**locals()), file=sys.stderr)
|
|
|
|
_error_trailer(**locals())
|
|
|
|
|
|
|
|
|
|
|
|
def check_external_program(
|
|
|
|
program,
|
2015-07-28 13:05:23 -07:00
|
|
|
need_version,
|
2015-07-27 01:45:17 -07:00
|
|
|
package,
|
|
|
|
version_check_args=['--version'],
|
|
|
|
version_scrape_regex=re.compile(r'(\d+\.\d+(?:\.\d+)?)'),
|
|
|
|
optional=False):
|
|
|
|
|
2015-07-28 13:05:23 -07:00
|
|
|
print('Checking for {program} >= {need_version}...'.format(
|
|
|
|
program=program, need_version=need_version))
|
2015-07-27 01:45:17 -07:00
|
|
|
try:
|
|
|
|
result = check_output(
|
|
|
|
[program] + version_check_args,
|
|
|
|
universal_newlines=True, stderr=STDOUT)
|
2015-07-28 12:41:24 -07:00
|
|
|
except (CalledProcessError, FileNotFoundError):
|
2015-07-27 01:45:17 -07:00
|
|
|
error_missing_program(program, package, optional)
|
|
|
|
if not optional:
|
|
|
|
sys.exit(1)
|
2015-07-28 13:05:23 -07:00
|
|
|
print('Continuing install without {program}'.format(program=program))
|
|
|
|
return
|
2015-07-27 01:45:17 -07:00
|
|
|
|
|
|
|
try:
|
2015-07-28 13:05:23 -07:00
|
|
|
found_version = version_scrape_regex.search(result).group(1)
|
|
|
|
except AttributeError:
|
|
|
|
error_unknown_version(program, package, optional, need_version)
|
|
|
|
sys.exit(1)
|
2015-07-27 01:45:17 -07:00
|
|
|
|
2015-07-28 13:05:23 -07:00
|
|
|
if found_version < need_version:
|
|
|
|
error_old_version(program, package, optional, need_version,
|
|
|
|
found_version)
|
2015-07-27 01:45:17 -07:00
|
|
|
|
2015-07-28 13:05:23 -07:00
|
|
|
print('Found {program} {found_version}'.format(
|
|
|
|
program=program, found_version=found_version))
|
2015-07-27 01:45:17 -07:00
|
|
|
|
2015-08-09 14:16:30 -07:00
|
|
|
|
2015-07-27 02:14:51 -07:00
|
|
|
command = next((arg for arg in sys.argv[1:] if not arg.startswith('-')), '')
|
2016-10-28 00:07:26 -07:00
|
|
|
forced = ('--force' in sys.argv)
|
2015-07-27 02:14:51 -07:00
|
|
|
|
2015-08-09 14:16:30 -07:00
|
|
|
|
2016-10-28 00:07:26 -07:00
|
|
|
if not forced and command.startswith('install') or \
|
2016-02-17 00:13:45 -08:00
|
|
|
command in ['check', 'test', 'nosetests', 'easy_install']:
|
2015-07-27 02:14:51 -07:00
|
|
|
check_external_program(
|
|
|
|
program='tesseract',
|
2017-05-11 23:51:28 -07:00
|
|
|
need_version='3.04', # using backport for Travis CI
|
2015-08-15 15:12:05 -07:00
|
|
|
package={'darwin': 'tesseract', 'linux': 'tesseract-ocr'}
|
2015-07-27 02:14:51 -07:00
|
|
|
)
|
|
|
|
check_external_program(
|
|
|
|
program='gs',
|
2016-12-03 14:14:07 -08:00
|
|
|
need_version='9.15', # limited by Travis CI / Ubuntu 14.04 backports
|
2015-07-27 02:14:51 -07:00
|
|
|
package='ghostscript'
|
|
|
|
)
|
|
|
|
check_external_program(
|
|
|
|
program='unpaper',
|
2016-07-27 14:47:59 -07:00
|
|
|
need_version='6.1', # latest sane version
|
2015-07-27 02:14:51 -07:00
|
|
|
package='unpaper',
|
|
|
|
optional=True
|
|
|
|
)
|
|
|
|
check_external_program(
|
2015-07-30 04:06:31 -07:00
|
|
|
program='qpdf',
|
2018-04-11 15:22:00 -07:00
|
|
|
need_version='7.0.0', # test suite known to fail on 5.1.1
|
2015-07-30 04:06:31 -07:00
|
|
|
package='qpdf',
|
|
|
|
version_check_args=['--version']
|
2015-07-27 02:14:51 -07:00
|
|
|
)
|
2015-08-09 14:16:30 -07:00
|
|
|
|
|
|
|
if 'upload' in sys.argv[1:]:
|
|
|
|
print('Use twine to upload the package - setup.py upload is insecure')
|
|
|
|
sys.exit(1)
|
|
|
|
|
2015-08-19 13:43:32 -07:00
|
|
|
tests_require = open('test_requirements.txt').read().splitlines()
|
|
|
|
|
2017-08-23 23:29:21 -07:00
|
|
|
|
|
|
|
def readme():
|
|
|
|
with open('README.rst') as f:
|
|
|
|
return f.read()
|
|
|
|
|
2015-07-25 23:45:13 -07:00
|
|
|
setup(
|
|
|
|
name='ocrmypdf',
|
|
|
|
description='OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched',
|
2017-08-23 23:29:21 -07:00
|
|
|
long_description=readme(),
|
2015-09-05 00:53:14 -07:00
|
|
|
url='https://github.com/jbarlow83/OCRmyPDF',
|
|
|
|
author='James R. Barlow',
|
2015-07-25 23:45:13 -07:00
|
|
|
author_email='jim@purplerock.ca',
|
2018-03-24 23:54:37 -07:00
|
|
|
packages=find_packages('src', exclude=["tests", "tests.*"]),
|
|
|
|
package_dir={'': 'src'},
|
2015-07-25 23:45:13 -07:00
|
|
|
keywords=['PDF', 'OCR', 'optical character recognition', 'PDF/A', 'scanning'],
|
2015-07-27 01:45:17 -07:00
|
|
|
classifiers=[
|
2017-01-02 18:17:38 -08:00
|
|
|
"Programming Language :: Python :: 3.5",
|
|
|
|
"Programming Language :: Python :: 3.6",
|
2015-09-05 00:53:14 -07:00
|
|
|
"Development Status :: 5 - Production/Stable",
|
2015-07-25 23:45:13 -07:00
|
|
|
"Environment :: Console",
|
|
|
|
"Intended Audience :: End Users/Desktop",
|
|
|
|
"Intended Audience :: Science/Research",
|
|
|
|
"Intended Audience :: System Administrators",
|
2018-03-14 14:40:48 -07:00
|
|
|
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
2015-07-25 23:45:13 -07:00
|
|
|
"Operating System :: MacOS :: MacOS X",
|
|
|
|
"Operating System :: POSIX",
|
|
|
|
"Operating System :: POSIX :: BSD",
|
|
|
|
"Operating System :: POSIX :: Linux",
|
|
|
|
"Topic :: Scientific/Engineering :: Image Recognition",
|
|
|
|
"Topic :: Text Processing :: Indexing",
|
|
|
|
"Topic :: Text Processing :: Linguistic",
|
|
|
|
],
|
2018-03-30 13:22:35 -07:00
|
|
|
python_requires=' >= 3.5',
|
2016-01-19 16:07:52 -08:00
|
|
|
setup_requires=[
|
2018-03-30 14:00:36 -07:00
|
|
|
'cffi >= 1.9.1', # to build the leptonica module
|
|
|
|
'pytest-runner', # to enable python setup.py test
|
2018-03-30 13:22:35 -07:00
|
|
|
'setuptools_scm', # so that version will work
|
2016-01-19 16:07:52 -08:00
|
|
|
],
|
|
|
|
use_scm_version={'version_scheme': 'post-release'},
|
2016-01-30 15:19:40 -08:00
|
|
|
cffi_modules=[
|
2018-03-24 23:54:37 -07:00
|
|
|
'src/ocrmypdf/lib/compile_leptonica.py:ffibuilder'
|
2016-01-30 15:19:40 -08:00
|
|
|
],
|
2016-01-19 15:07:21 -08:00
|
|
|
install_requires=[
|
2018-03-30 13:22:35 -07:00
|
|
|
'cffi >= 1.9.1', # must be a setup and install requirement
|
|
|
|
'defusedxml >= 0.5.0', # pure Python, so track HEAD closely
|
|
|
|
'img2pdf >= 0.2.4', # pure Python, so track HEAD closely
|
2018-04-10 18:30:48 -07:00
|
|
|
'Pillow >= 4.0.0, != 5.1.0 ; sys_platform == "darwin"',
|
|
|
|
# Pillow < 4 has BytesIO/TIFF bug w/img2pdf 0.2.3
|
|
|
|
# block 5.1.0, broken wheels
|
2018-03-30 13:22:35 -07:00
|
|
|
'PyPDF2 >= 1.26', # pure Python, so track HEAD closely
|
|
|
|
'reportlab >= 3.3.0', # oldest released version with sane image handling
|
|
|
|
'ruffus == 2.6.3', # pinned - ocrmypdf implements a 2.6.3 workaround
|
2016-01-19 15:07:21 -08:00
|
|
|
],
|
2018-03-26 13:22:09 -07:00
|
|
|
extras_require={
|
2018-03-30 12:39:33 -07:00
|
|
|
'fitz': ['PyMuPDF >= 1.12.5'] # for table of contents bug
|
2018-03-26 13:22:09 -07:00
|
|
|
},
|
2015-08-19 13:43:32 -07:00
|
|
|
tests_require=tests_require,
|
2015-07-25 23:45:13 -07:00
|
|
|
entry_points={
|
|
|
|
'console_scripts': [
|
2016-08-31 17:01:42 -07:00
|
|
|
'ocrmypdf = ocrmypdf.__main__:run_pipeline'
|
2015-07-25 23:45:13 -07:00
|
|
|
],
|
|
|
|
},
|
2016-05-10 21:58:04 -07:00
|
|
|
package_data={'ocrmypdf': ['data/sRGB.icc']},
|
2015-07-25 23:45:13 -07:00
|
|
|
include_package_data=True,
|
|
|
|
zip_safe=False)
|