2015-07-25 23:45:13 -07:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
from setuptools import setup
|
2015-07-27 01:45:17 -07:00
|
|
|
from subprocess import Popen, STDOUT, check_output, CalledProcessError
|
|
|
|
from string import Template
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
missing_program = '''
|
|
|
|
The program '{program}' could not be executed or was not found on your
|
|
|
|
system PATH.
|
|
|
|
'''
|
|
|
|
|
|
|
|
unknown_version = '''
|
|
|
|
OCRmyPDF requires '{program}' {need_version} or higher. Your system has
|
|
|
|
'{program}' but we cannot tell what version is installed. Contact the
|
|
|
|
package maintainer.
|
|
|
|
'''
|
|
|
|
|
|
|
|
old_version = '''
|
|
|
|
OCRmyPDF requires '{program}' {need_version} or higher. Your system appears
|
|
|
|
to have {found_version}. Please update this program.
|
|
|
|
'''
|
|
|
|
|
|
|
|
okay_its_optional = '''
|
|
|
|
This program is OPTIONAL, so installation of OCRmyPDF can proceed, but
|
|
|
|
some functionality may be missing.
|
|
|
|
'''
|
|
|
|
|
|
|
|
not_okay_its_required = '''
|
|
|
|
This program is REQUIRED for OCRmyPDF to work. Installation will abort.
|
|
|
|
'''
|
|
|
|
|
|
|
|
osx_install_advice = '''
|
|
|
|
If you have homebrew installed, try these command to install the missing
|
|
|
|
packages:
|
|
|
|
brew update
|
|
|
|
brew upgrade
|
|
|
|
brew install {package}
|
|
|
|
'''
|
|
|
|
|
|
|
|
linux_install_advice = '''
|
|
|
|
On systems with the aptitude package manager (Debian, Ubuntu), try these
|
|
|
|
commands:
|
|
|
|
sudo apt-get update
|
|
|
|
sudo apt-get install {package}
|
|
|
|
|
|
|
|
On RPM-based systems (Red Hat, Fedora), search for instructions on
|
|
|
|
installing the RPM for {package}.
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
|
|
def _error_trailer(program, package, optional):
|
|
|
|
if program == 'java':
|
|
|
|
return # You're fucked
|
|
|
|
|
|
|
|
if optional:
|
|
|
|
print(okay_its_optional.format(**locals()), file=sys.stderr)
|
|
|
|
else:
|
|
|
|
print(not_okay_its_required.format(**locals()), file=sys.stderr)
|
|
|
|
if sys.platform.startswith('darwin'):
|
|
|
|
print(osx_install_advice.format(**locals()), file=sys.stderr)
|
|
|
|
elif sys.platform.startswith('linux'):
|
|
|
|
print(linux_install_advice.format(**locals()), file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
def error_missing_program(
|
|
|
|
program,
|
|
|
|
package,
|
|
|
|
optional
|
|
|
|
):
|
|
|
|
print(missing_program.format(**locals()), file=sys.stderr)
|
|
|
|
_error_trailer(**locals())
|
|
|
|
|
|
|
|
|
|
|
|
def error_unknown_version(
|
|
|
|
program,
|
|
|
|
package,
|
|
|
|
optional
|
|
|
|
):
|
|
|
|
print(unknown_version.format(**locals()), file=sys.stderr)
|
|
|
|
_error_trailer(**locals())
|
|
|
|
|
|
|
|
|
|
|
|
def error_old_version(
|
|
|
|
program,
|
|
|
|
package,
|
|
|
|
optional,
|
|
|
|
need_version
|
|
|
|
):
|
|
|
|
print(old_version.format(**locals()), file=sys.stderr)
|
|
|
|
_error_trailer(**locals())
|
|
|
|
|
|
|
|
|
|
|
|
def check_external_program(
|
|
|
|
program,
|
|
|
|
minimum_version,
|
|
|
|
package,
|
|
|
|
version_check_args=['--version'],
|
|
|
|
version_scrape_regex=re.compile(r'(\d+\.\d+(?:\.\d+)?)'),
|
|
|
|
optional=False):
|
|
|
|
|
|
|
|
print('Checking for {program} >= {minimum_version}...'.format(
|
|
|
|
program=program, minimum_version=minimum_version))
|
|
|
|
try:
|
|
|
|
result = check_output(
|
|
|
|
[program] + version_check_args,
|
|
|
|
universal_newlines=True, stderr=STDOUT)
|
|
|
|
except CalledProcessError:
|
|
|
|
error_missing_program(program, package, optional)
|
|
|
|
if not optional:
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
try:
|
|
|
|
version = version_scrape_regex.search(result).group(1)
|
|
|
|
except AttributeError:
|
|
|
|
error_unknown_version(program, package, optional, minimum_version)
|
|
|
|
if not optional:
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
if version < minimum_version:
|
|
|
|
error_old_version(program, package, optional, minimum_version)
|
|
|
|
|
|
|
|
print('Found {program} {version}'.format(
|
|
|
|
program=program, version=version))
|
|
|
|
|
2015-07-27 02:14:51 -07:00
|
|
|
command = next((arg for arg in sys.argv[1:] if not arg.startswith('-')), '')
|
|
|
|
|
|
|
|
if command.startswith('install') or \
|
|
|
|
command in ['check', 'test', 'nosetests', 'easy_install']:
|
|
|
|
check_external_program(
|
|
|
|
program='tesseract',
|
|
|
|
minimum_version='3.02.02',
|
|
|
|
package='tesseract'
|
|
|
|
)
|
|
|
|
check_external_program(
|
|
|
|
program='gs',
|
|
|
|
minimum_version='9.14',
|
|
|
|
package='ghostscript'
|
|
|
|
)
|
|
|
|
check_external_program(
|
|
|
|
program='unpaper',
|
|
|
|
minimum_version='6.1',
|
|
|
|
package='unpaper',
|
|
|
|
optional=True
|
|
|
|
)
|
|
|
|
# Deprecated
|
|
|
|
check_external_program(
|
|
|
|
program='pdfseparate',
|
|
|
|
minimum_version='0.29.0',
|
|
|
|
package='poppler',
|
|
|
|
version_check_args=['-v']
|
|
|
|
)
|
|
|
|
check_external_program(
|
|
|
|
program='java',
|
|
|
|
minimum_version='1.5.0',
|
|
|
|
package='Java Runtime Environment',
|
|
|
|
version_check_args=['-version']
|
|
|
|
)
|
|
|
|
check_external_program(
|
|
|
|
program='mutool',
|
|
|
|
minimum_version='1.7a',
|
|
|
|
version_check_args=['-v'],
|
|
|
|
version_scrape_regex=re.compile(r'(\d+\.\d+[a-z]+)'),
|
|
|
|
package='mupdf-tools'
|
|
|
|
)
|
2015-07-25 23:45:13 -07:00
|
|
|
|
|
|
|
setup(
|
|
|
|
name='ocrmypdf',
|
|
|
|
version='3.0rc1',
|
|
|
|
description='OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched',
|
|
|
|
url='https://github.com/fritz-hh/OCRmyPDF',
|
|
|
|
author='J. R. Barlow',
|
|
|
|
author_email='jim@purplerock.ca',
|
|
|
|
license='Public Domain',
|
|
|
|
packages=['ocrmypdf'],
|
|
|
|
keywords=['PDF', 'OCR', 'optical character recognition', 'PDF/A', 'scanning'],
|
2015-07-27 01:45:17 -07:00
|
|
|
classifiers=[
|
2015-07-25 23:45:13 -07:00
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
"Development Status :: 4 - Beta",
|
|
|
|
"Environment :: Console",
|
|
|
|
"Intended Audience :: End Users/Desktop",
|
|
|
|
"Intended Audience :: Science/Research",
|
|
|
|
"Intended Audience :: System Administrators",
|
|
|
|
"License :: Public Domain",
|
|
|
|
"Operating System :: MacOS :: MacOS X",
|
|
|
|
"Operating System :: POSIX",
|
|
|
|
"Operating System :: POSIX :: BSD",
|
|
|
|
"Operating System :: POSIX :: Linux",
|
|
|
|
"Topic :: Scientific/Engineering :: Image Recognition",
|
|
|
|
"Topic :: Text Processing :: Indexing",
|
|
|
|
"Topic :: Text Processing :: Linguistic",
|
|
|
|
],
|
|
|
|
install_requires=[
|
|
|
|
'ruffus>=2.6.3',
|
|
|
|
'Pillow>=2.7.0',
|
|
|
|
'lxml>=3.4.2',
|
|
|
|
'reportlab>=3.1.44',
|
2015-07-26 18:18:41 -07:00
|
|
|
'PyPDF2>=1.25.1'
|
2015-07-25 23:45:13 -07:00
|
|
|
],
|
|
|
|
entry_points={
|
|
|
|
'console_scripts': [
|
2015-07-26 01:52:08 -07:00
|
|
|
'ocrmypdf = ocrmypdf.main:run_pipeline'
|
2015-07-25 23:45:13 -07:00
|
|
|
],
|
|
|
|
},
|
2015-07-26 01:52:08 -07:00
|
|
|
eager_resources=[
|
|
|
|
'ocrmypdf/jhove/bin/*.jar',
|
|
|
|
'ocrmypdf/jhove/conf/*.conf',
|
|
|
|
'ocrmypdf/jhove/lib/*.jar'
|
|
|
|
],
|
2015-07-25 23:45:13 -07:00
|
|
|
include_package_data=True,
|
|
|
|
zip_safe=False)
|