OCRmyPDF/setup.py

215 lines
6.1 KiB
Python
Raw Normal View History

2015-07-25 23:45:13 -07:00
#!/usr/bin/env python3
2015-07-28 04:36:58 -07:00
# © 2015 James R. Barlow: github.com/jbarlow83
2015-07-25 23:45:13 -07:00
from setuptools import setup
from subprocess import Popen, STDOUT, check_output, CalledProcessError
from string import Template
import re
import sys
missing_program = '''
The program '{program}' could not be executed or was not found on your
system PATH.
'''
unknown_version = '''
OCRmyPDF requires '{program}' {need_version} or higher. Your system has
'{program}' but we cannot tell what version is installed. Contact the
package maintainer.
'''
old_version = '''
OCRmyPDF requires '{program}' {need_version} or higher. Your system appears
to have {found_version}. Please update this program.
'''
okay_its_optional = '''
This program is OPTIONAL, so installation of OCRmyPDF can proceed, but
some functionality may be missing.
'''
not_okay_its_required = '''
This program is REQUIRED for OCRmyPDF to work. Installation will abort.
'''
osx_install_advice = '''
If you have homebrew installed, try these command to install the missing
packages:
brew update
brew upgrade
brew install {package}
'''
linux_install_advice = '''
On systems with the aptitude package manager (Debian, Ubuntu), try these
commands:
sudo apt-get update
sudo apt-get install {package}
On RPM-based systems (Red Hat, Fedora), search for instructions on
installing the RPM for {package}.
'''
2015-07-28 13:05:23 -07:00
def _error_trailer(program, package, optional, **kwargs):
if program == 'java':
return # You're fucked
if optional:
print(okay_its_optional.format(**locals()), file=sys.stderr)
else:
print(not_okay_its_required.format(**locals()), file=sys.stderr)
if sys.platform.startswith('darwin'):
print(osx_install_advice.format(**locals()), file=sys.stderr)
elif sys.platform.startswith('linux'):
print(linux_install_advice.format(**locals()), file=sys.stderr)
def error_missing_program(
program,
package,
optional
):
print(missing_program.format(**locals()), file=sys.stderr)
_error_trailer(**locals())
def error_unknown_version(
program,
package,
2015-07-28 13:05:23 -07:00
optional,
need_version
):
print(unknown_version.format(**locals()), file=sys.stderr)
_error_trailer(**locals())
def error_old_version(
program,
package,
optional,
2015-07-28 13:05:23 -07:00
need_version,
found_version
):
print(old_version.format(**locals()), file=sys.stderr)
_error_trailer(**locals())
def check_external_program(
program,
2015-07-28 13:05:23 -07:00
need_version,
package,
version_check_args=['--version'],
version_scrape_regex=re.compile(r'(\d+\.\d+(?:\.\d+)?)'),
optional=False):
2015-07-28 13:05:23 -07:00
print('Checking for {program} >= {need_version}...'.format(
program=program, need_version=need_version))
try:
result = check_output(
[program] + version_check_args,
universal_newlines=True, stderr=STDOUT)
2015-07-28 12:41:24 -07:00
except (CalledProcessError, FileNotFoundError):
error_missing_program(program, package, optional)
if not optional:
sys.exit(1)
2015-07-28 13:05:23 -07:00
print('Continuing install without {program}'.format(program=program))
return
try:
2015-07-28 13:05:23 -07:00
found_version = version_scrape_regex.search(result).group(1)
except AttributeError:
error_unknown_version(program, package, optional, need_version)
sys.exit(1)
2015-07-28 13:05:23 -07:00
if found_version < need_version:
error_old_version(program, package, optional, need_version,
found_version)
2015-07-28 13:05:23 -07:00
print('Found {program} {found_version}'.format(
program=program, found_version=found_version))
command = next((arg for arg in sys.argv[1:] if not arg.startswith('-')), '')
if command.startswith('install') or \
command in ['check', 'test', 'nosetests', 'easy_install', 'egg_info']:
check_external_program(
program='tesseract',
2015-07-28 13:05:23 -07:00
need_version='3.02.02',
package='tesseract'
)
check_external_program(
program='gs',
2015-07-28 13:05:23 -07:00
need_version='9.14',
package='ghostscript'
)
check_external_program(
program='unpaper',
2015-07-28 13:05:23 -07:00
need_version='6.1',
package='unpaper',
optional=True
)
check_external_program(
program='java',
2015-07-28 13:05:23 -07:00
need_version='1.5.0',
package='Java Runtime Environment',
version_check_args=['-version']
)
check_external_program(
program='qpdf',
need_version='5.0.0',
package='qpdf',
version_check_args=['--version']
)
2015-07-25 23:45:13 -07:00
setup(
name='ocrmypdf',
2015-08-05 23:26:38 -07:00
version='3.0rc4', # also update: release notes, main.py
2015-07-25 23:45:13 -07:00
description='OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched',
url='https://github.com/fritz-hh/OCRmyPDF',
2015-08-05 23:26:38 -07:00
author='James. R. Barlow',
2015-07-25 23:45:13 -07:00
author_email='jim@purplerock.ca',
license='Public Domain',
packages=['ocrmypdf'],
keywords=['PDF', 'OCR', 'optical character recognition', 'PDF/A', 'scanning'],
classifiers=[
2015-07-25 23:45:13 -07:00
"Programming Language :: Python :: 3",
"Development Status :: 4 - Beta",
"Environment :: Console",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Science/Research",
"Intended Audience :: System Administrators",
"License :: Public Domain",
"Operating System :: MacOS :: MacOS X",
"Operating System :: POSIX",
"Operating System :: POSIX :: BSD",
"Operating System :: POSIX :: Linux",
"Topic :: Scientific/Engineering :: Image Recognition",
"Topic :: Text Processing :: Indexing",
"Topic :: Text Processing :: Linguistic",
],
install_requires=[
'ruffus>=2.6.3',
'Pillow>=2.7.0',
'lxml>=3.4.2',
'reportlab>=3.1.44',
'PyPDF2>=1.25.1'
2015-07-25 23:45:13 -07:00
],
test_requires=[
'img2pdf>=0.1.5',
'pytest>=2.7.2'
],
2015-07-25 23:45:13 -07:00
entry_points={
'console_scripts': [
'ocrmypdf = ocrmypdf.main:run_pipeline'
2015-07-25 23:45:13 -07:00
],
},
eager_resources=[
'ocrmypdf/jhove/bin/*.jar',
'ocrmypdf/jhove/conf/*.conf',
'ocrmypdf/jhove/lib/*.jar'
],
2015-07-25 23:45:13 -07:00
include_package_data=True,
zip_safe=False)