Merge branch 'develop'

This commit is contained in:
James R. Barlow 2016-02-06 18:18:49 -08:00
commit 2d15c09cca
6 changed files with 36 additions and 22 deletions

View File

@ -1,6 +1,6 @@
# OCRmyPDF
#
# VERSION 3.0.2
# VERSION 3.2
FROM debian:stretch
MAINTAINER James R. Barlow <jim@purplerock.ca>
@ -47,13 +47,11 @@ RUN rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /root/*
RUN pyvenv /appenv \
&& pyvenv --system-site-packages /appenv
COPY . /application/
# Install application and dependencies
# In this arrangement Pillow and reportlab will be provided by the system
RUN . /appenv/bin/activate; \
pip install --upgrade pip \
&& pip install --no-cache-dir /application \
&& pip install ocrmypdf \
&& pip install --no-cache-dir -r /application/test_requirements.txt
USER docker

View File

@ -1,6 +1,6 @@
# OCRmyPDF polyglot
#
# VERSION 3.0.2
# VERSION 3.2
FROM jbarlow83/ocrmypdf:latest
MAINTAINER James R. Barlow <jim@purplerock.ca>

View File

@ -6,8 +6,18 @@ Please always read this file before installing the package
Download software here: https://github.com/jbarlow83/OCRmyPDF/tags
v3.2.1:
=======
Changes
-------
- Fixed issue #47 "convert() got and unexpected keyword argument 'dpi'" by upgrading to img2pdf 0.2
- Tweaked the Dockerfiles
v3.2:
=========
=====
New features
------------

View File

@ -165,9 +165,6 @@ parser.add_argument(
'--skip-big', type=float, metavar='MPixels',
help="skip OCR on pages larger than the specified amount of megapixels, "
"but include skipped pages in final output")
# parser.add_argument(
# '--exact-image', action='store_true',
# help="Use original page from PDF without re-rendering")
advanced = parser.add_argument_group(
"Advanced",
@ -592,9 +589,18 @@ def select_image_layer(
re_symlink(page_pdf, output_file)
else:
pageinfo = get_pageinfo(image, pdfinfo, pdfinfo_lock)
dpi = round(max(pageinfo['xres'], pageinfo['yres'], options.oversample))
with open(output_file, 'wb') as pdf:
img2pdf.convert([image], dpi=dpi, outputstream=pdf)
dpi = round(max(pageinfo['xres'], pageinfo['yres'],
options.oversample))
imgsize = ((img2pdf.ImgSize.dpi, dpi), (img2pdf.ImgSize.dpi, dpi))
layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)
with open(image, 'rb') as imfile, \
open(output_file, 'wb') as pdf:
rawdata = imfile.read()
pdf.write(img2pdf.convert(
rawdata, producer="img2pdf", with_pdfrw=False,
layout_fun=layout_fun))
@active_if(options.pdf_renderer == 'hocr')

View File

@ -1,5 +1,5 @@
ruffus>=2.6.3
Pillow>=2.4.0
reportlab>=3.1.44
PyPDF2>=1.25.1
git+https://github.com/jbarlow83/img2pdf.git@e9bcce0afc3720752ca53a991db93f911a1df709#egg=img2pdf-0.1.5.dev
ruffus==2.6.3
Pillow==3.1.1
reportlab==3.2.0
PyPDF2==1.25.1
img2pdf==0.2

View File

@ -212,11 +212,11 @@ setup(
],
use_scm_version={'version_scheme': 'post-release'},
install_requires=[
'ruffus',
'Pillow',
'reportlab',
'PyPDF2',
'img2pdf'
'ruffus>=2.6.3',
'Pillow>=2.4.0',
'reportlab>=3.1.44',
'PyPDF2>=1.25.1',
'img2pdf>=0.2.0'
],
tests_require=tests_require,
entry_points={