mirror of
https://github.com/docling-project/docling.git
synced 2025-06-27 05:20:05 +00:00

* docs: remove old code from custom_convert.py Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * docs: update example Dockerfile Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
28 lines
1.0 KiB
Docker
28 lines
1.0 KiB
Docker
FROM python:3.11-slim-bookworm
|
|
|
|
ENV GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=no"
|
|
|
|
RUN apt-get update \
|
|
&& apt-get install -y libgl1 libglib2.0-0 curl wget git procps \
|
|
&& apt-get clean
|
|
|
|
# This will install torch with *only* cpu support
|
|
# Remove the --extra-index-url part if you want to install all the gpu requirements
|
|
# For more details in the different torch distribution visit https://pytorch.org/.
|
|
RUN pip install --no-cache-dir docling --extra-index-url https://download.pytorch.org/whl/cpu
|
|
|
|
ENV HF_HOME=/tmp/
|
|
ENV TORCH_HOME=/tmp/
|
|
|
|
COPY examples/minimal.py /root/minimal.py
|
|
|
|
RUN python -c 'from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models; load_pretrained_nlp_models(verbose=True);'
|
|
RUN python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; StandardPdfPipeline.download_models_hf(force=True);'
|
|
|
|
# On container environments, always set a thread budget to avoid undesired thread congestion.
|
|
ENV OMP_NUM_THREADS=4
|
|
|
|
# On container shell:
|
|
# > cd /root/
|
|
# > python minimal.py
|