2015-08-18 04:46:25 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# OCRmyPDF
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-05 16:08:26 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# VERSION               3.2
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:25:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								FROM      debian:stretch
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 04:46:25 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								MAINTAINER James R. Barlow <jim@purplerock.ca>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Add unprivileged user
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN useradd docker \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  && mkdir /home/docker \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  && chown docker:docker /home/docker
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:25:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Update system and install our dependencies
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-05 01:38:59 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# If this command takes too Docker hub's automated build will timeout,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# so try it in portions
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 04:46:25 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN apt-get update && apt-get install -y --no-install-recommends \
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:44:30 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  locales \
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 04:46:25 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  python3 \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  python3-pip \
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:25:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  python3-venv \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  python3-reportlab \
							 | 
						
					
						
							
								
									
										
										
										
											2016-01-04 15:07:12 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  python3-pil \
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-05 01:38:59 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  python3-wheel
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN apt-get install -y --no-install-recommends \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  unpaper \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  qpdf \
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-21 04:24:21 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  poppler-utils \
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-05 01:38:59 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  tesseract-ocr \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-eng tesseract-ocr-fra
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 04:46:25 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-17 00:13:45 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								RUN apt-get install -qy --no-install-recommends \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  libffi-dev \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  libpython3-dev \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  gcc
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-21 04:24:21 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Install Ghostscript from Debian sid to work around JPEG 2000 issue in
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Debian stretch libgs9 or gs 9.16~dfsg-2.1
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								COPY ./share/etc-apt-sources.list /etc/apt/sources.list
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN apt-get update && apt-get install -y ghostscript/sid
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:44:30 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Enforce UTF-8
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Borrowed from https://index.docker.io/u/crosbymichael/python/ 
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN dpkg-reconfigure locales && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  locale-gen C.UTF-8 && \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /usr/sbin/update-locale LANG=C.UTF-8
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ENV LC_ALL C.UTF-8
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-22 00:30:39 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:25:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Set up a Python virtualenv and take all of the system packages, so we can
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# rely on the platform packages rather than importing GCC and compiling them
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN pyvenv /appenv \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  && pyvenv --system-site-packages /appenv
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 05:38:05 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-17 00:13:45 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								COPY . /application/
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-06 23:13:16 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-16 21:45:44 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Replace stock Tesseract 3.04.00 font with improved sharp2.ttf that resolves
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# issues in many PDF viewers.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Discussion is in https://github.com/tesseract-ocr/tesseract/issues/182
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								COPY ./share/sharp2.ttf /usr/share/tesseract-ocr/tessdata/pdf.ttf
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN chmod 644 /usr/share/tesseract-ocr/tessdata/pdf.ttf
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-17 01:11:41 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Set this here to force a docker version, allowing non-tagged versions to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# be built
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# ENV SETUPTOOLS_SCM_PRETEND_VERSION=v3.3.0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:25:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Install application and dependencies
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# In this arrangement Pillow and reportlab will be provided by the system
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-17 15:14:23 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Even though ocrmypdf is locally present, pull from PyPI because
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Dockerhub and setuptools_scm clash
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 05:38:05 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								RUN . /appenv/bin/activate; \
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:25:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  pip install --upgrade pip \
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-17 15:14:23 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  && pip install ocrmypdf \
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:25:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  && pip install --no-cache-dir -r /application/test_requirements.txt
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 04:46:25 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-17 00:13:45 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Remove the junk
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN apt-get remove -qy gcc
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN apt-get autoremove -y && apt-get clean -y
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								RUN rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /root/*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 04:46:25 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								USER docker
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								WORKDIR /home/docker
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 23:30:06 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-20 01:25:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								ENV OCRMYPDF_TEST_OUTPUT=/tmp/test-output
							 | 
						
					
						
							
								
									
										
										
										
											2016-02-17 01:03:20 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								ENV OCRMYPDF_SHARP_TTF=1
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 04:46:25 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-18 23:41:41 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Must use array form of ENTRYPOINT
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Non-array form does not append other arguments, because that is "intuitive"
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-19 13:10:58 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								ENTRYPOINT ["/application/docker-wrapper.sh"]
							 |