mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-24 21:48:52 +00:00
Add models to demo docker image (#1978)
* Add utility to cache models and nltk data & modify Dockerfiles to use it * Fix punkt data not being cached
This commit is contained in:
parent
192e03be33
commit
9c3d9b4885
@ -18,11 +18,10 @@ COPY haystack /home/user/haystack
|
||||
|
||||
# install as a package
|
||||
COPY setup.py requirements.txt README.md /home/user/
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install -r requirements.txt
|
||||
RUN pip install -e .
|
||||
|
||||
# download punkt tokenizer to be included in image
|
||||
RUN python3 -c "import nltk;nltk.download('punkt', download_dir='/usr/nltk_data')"
|
||||
RUN python3 -c "from haystack.utils.docker import cache_models;cache_models()"
|
||||
|
||||
# create folder for /file-upload API endpoint with write permissions, this might be adjusted depending on FILE_UPLOAD_PATH
|
||||
RUN mkdir -p /home/user/file-upload
|
||||
|
||||
@ -37,15 +37,13 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1
|
||||
# Copy package setup files
|
||||
COPY setup.py requirements.txt README.md /home/user/
|
||||
|
||||
RUN pip install --upgrade pip
|
||||
RUN echo "Install required packages" && \
|
||||
# Install PyTorch for CUDA 11
|
||||
pip3 install torch==1.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html && \
|
||||
# Install from requirements.txt
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
# download punkt tokenizer to be included in image
|
||||
RUN python3 -c "import nltk;nltk.download('punkt', download_dir='/usr/nltk_data')"
|
||||
|
||||
# copy saved models
|
||||
COPY README.md models* /home/user/models/
|
||||
|
||||
@ -58,6 +56,9 @@ COPY haystack /home/user/haystack
|
||||
# Install package
|
||||
RUN pip3 install -e .
|
||||
|
||||
# Cache Roberta and NLTK data
|
||||
RUN python3 -c "from haystack.utils.docker import cache_models;cache_models()"
|
||||
|
||||
# optional : copy sqlite db if needed for testing
|
||||
#COPY qa.db /home/user/
|
||||
|
||||
|
||||
18
haystack/utils/docker.py
Normal file
18
haystack/utils/docker.py
Normal file
@ -0,0 +1,18 @@
|
||||
import logging
|
||||
|
||||
def cache_models():
|
||||
"""
|
||||
Small function that caches models and other data.
|
||||
Used only in the Dockerfile to include these caches in the images.
|
||||
"""
|
||||
# download punkt tokenizer
|
||||
logging.info("Caching punkt data")
|
||||
import nltk
|
||||
nltk.download('punkt', download_dir='/root/nltk_data')
|
||||
|
||||
# Cache roberta-base-squad2 model
|
||||
logging.info("Caching deepset/roberta-base-squad2")
|
||||
import transformers
|
||||
model_to_cache='deepset/roberta-base-squad2'
|
||||
transformers.AutoTokenizer.from_pretrained(model_to_cache)
|
||||
transformers.AutoModel.from_pretrained(model_to_cache)
|
||||
Loading…
x
Reference in New Issue
Block a user