| 
									
										
										
										
											2023-03-14 13:40:01 -07:00
										 |  |  | # syntax=docker/dockerfile:experimental
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-22 11:13:24 -07:00
										 |  |  | FROM quay.io/unstructured-io/base-images:centos7.9
 | 
					
						
							| 
									
										
										
										
											2023-03-14 13:40:01 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | ARG PIP_VERSION
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-05 17:16:28 -07:00
										 |  |  | # Set up environment
 | 
					
						
							| 
									
										
										
										
											2023-03-14 13:40:01 -07:00
										 |  |  | ENV HOME /home/
 | 
					
						
							|  |  |  | WORKDIR ${HOME}
 | 
					
						
							|  |  |  | RUN mkdir ${HOME}/.ssh && chmod go-rwx ${HOME}/.ssh \
 | 
					
						
							|  |  |  |   &&  ssh-keyscan -t rsa github.com >> /home/.ssh/known_hosts
 | 
					
						
							|  |  |  | ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
 | 
					
						
							|  |  |  | ENV PATH="/home/usr/.local/bin:${PATH}"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-29 00:02:39 -07:00
										 |  |  | # Copy and install Unstructured
 | 
					
						
							|  |  |  | COPY requirements requirements
 | 
					
						
							| 
									
										
										
										
											2023-03-14 13:40:01 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-29 00:02:39 -07:00
										 |  |  | RUN python3.8 -m pip install pip==${PIP_VERSION} && \
 | 
					
						
							|  |  |  |   pip install --no-cache -r requirements/base.txt && \
 | 
					
						
							|  |  |  |   pip install --no-cache -r requirements/test.txt && \
 | 
					
						
							|  |  |  |   pip install --no-cache -r requirements/huggingface.txt && \
 | 
					
						
							|  |  |  |   pip install --no-cache -r requirements/dev.txt && \
 | 
					
						
							| 
									
										
										
										
											2023-03-30 13:23:30 -07:00
										 |  |  |   pip install --no-cache -r requirements/ingest-azure.txt && \
 | 
					
						
							|  |  |  |   pip install --no-cache -r requirements/ingest-github.txt && \
 | 
					
						
							|  |  |  |   pip install --no-cache -r requirements/ingest-gitlab.txt && \
 | 
					
						
							|  |  |  |   pip install --no-cache -r requirements/ingest-google-drive.txt && \
 | 
					
						
							|  |  |  |   pip install --no-cache -r requirements/ingest-reddit.txt && \
 | 
					
						
							|  |  |  |   pip install --no-cache -r requirements/ingest-s3.txt && \
 | 
					
						
							| 
									
										
										
										
											2023-04-16 12:34:43 -07:00
										 |  |  |   pip install --no-cache -r requirements/ingest-slack.txt && \
 | 
					
						
							| 
									
										
										
										
											2023-03-30 13:23:30 -07:00
										 |  |  |   pip install --no-cache -r requirements/ingest-wikipedia.txt && \
 | 
					
						
							| 
									
										
										
										
											2023-03-29 00:02:39 -07:00
										 |  |  |   pip install --no-cache -r requirements/local-inference.txt && \
 | 
					
						
							| 
									
										
										
										
											2023-05-08 17:02:55 -07:00
										 |  |  |   scl enable devtoolset-9 bash && \
 | 
					
						
							| 
									
										
										
										
											2023-04-03 18:47:43 -05:00
										 |  |  |   pip install --no-cache "detectron2@git+https://github.com/facebookresearch/detectron2.git@e2ce8dc#egg=detectron2"
 | 
					
						
							| 
									
										
										
										
											2023-03-14 13:40:01 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-29 00:02:39 -07:00
										 |  |  | COPY example-docs example-docs
 | 
					
						
							| 
									
										
										
										
											2023-03-14 13:40:01 -07:00
										 |  |  | COPY unstructured unstructured
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-29 20:48:06 -07:00
										 |  |  | RUN python3.8 -c "import nltk; nltk.download('punkt')" && \
 | 
					
						
							|  |  |  |   python3.8 -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
 | 
					
						
							|  |  |  |   python3.8 -c "from unstructured.ingest.doc_processor.generalized import initialize; initialize()"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-14 13:40:01 -07:00
										 |  |  | CMD ["/bin/bash"]
 |