From 8a616dae7556f814b41aa43e35d72cebb3b8d5bb Mon Sep 17 00:00:00 2001 From: Guillim Date: Tue, 7 Jul 2020 16:25:36 +0200 Subject: [PATCH] Adjust Docker and REST API to allow TransformsReader Class (#180) --- Dockerfile | 5 ++++- Dockerfile-GPU | 3 +++ docker-compose.yml | 33 ++++++++++++++++++++++++-------- haystack/reader/transformers.py | 2 +- rest_api/config.py | 3 +++ rest_api/controller/search.py | 34 +++++++++++++++++++++------------ 6 files changed, 58 insertions(+), 22 deletions(-) diff --git a/Dockerfile b/Dockerfile index 395bc1d50..5ff008dd2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,12 @@ COPY rest_api /home/user/rest_api # copy saved FARM models COPY models /home/user/models -# copy sqlite db if needed for testing +# optional : copy sqlite db if needed for testing #COPY qa.db /home/user/ +# optional: copy data directory containing docs for indexing +#COPY data /home/user/data + EXPOSE 8000 # cmd for running the API diff --git a/Dockerfile-GPU b/Dockerfile-GPU index 9541259f2..802d8318a 100644 --- a/Dockerfile-GPU +++ b/Dockerfile-GPU @@ -22,6 +22,9 @@ COPY models /home/user/models # Optional: copy sqlite db if needed for testing #COPY qa.db /home/user/ +# Optional: copy data directory containing docs for indexing +#COPY data /home/user/data + EXPOSE 8000 ENV LC_ALL=C.UTF-8 diff --git a/docker-compose.yml b/docker-compose.yml index f6bb4285a..4edc36938 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,19 +1,36 @@ version: '3' services: haystack-api: - image: "deepset/haystack-cpu:0.2.0" + build: + context: . + dockerfile: Dockerfile + image: "deepset/haystack-cpu:latest" ports: - 8000:8000 + volumes: + # Optional: mount your own models from disk into the container + - "./models:/home/user/models" environment: - # see haystack/api/config.py for additional variables to configure. - # load reader model from transformers' model hub. - - READER_MODEL_PATH=deepset/roberta-base-squad2 + # See rest_api/config.py for more variables that you can configure here. - DB_HOST=elasticsearch + - USE_GPU=False + # Load a model from transformers' model hub or a local path into the FARMReader. + - READER_MODEL_PATH=deepset/roberta-base-squad2 + # - READER_MODEL_PATH=home/user/models/roberta-base-squad2 + # Alternative: If you want to use the TransformersReader (e.g. for loading a local model in transformers format): + # - READER_USE_TRANSFORMERS=True + # - READER_MODEL_PATH=/home/user/models/roberta-base-squad2 + # - READER_TOKENIZER=/home/user/models/roberta-base-squad2 restart: always - + depends_on: + - elasticsearch + command: "/bin/bash -c 'sleep 15 && gunicorn rest_api.application:app -b 0.0.0.0 -k uvicorn.workers.UvicornWorker --workers 1 --timeout 180'" elasticsearch: - # This demo image contains Game of Thrones Wikipedia articles indexed. - # For starting a new Elasticsearch instance, replace image with "elasticsearch:7.6.1". - image: "deepset/elasticsearch-game-of-thrones" + # This will start an empty elasticsearch instance (so you have to add your documents yourself) + image: "elasticsearch:7.6.1" + # If you want a demo image instead that is "ready-to-query" with some indexed Game of Thrones articles: + # image: "deepset/elasticsearch-game-of-thrones" + ports: + - 9200:9200 environment: - discovery.type=single-node \ No newline at end of file diff --git a/haystack/reader/transformers.py b/haystack/reader/transformers.py index ec780e6c0..db8c15363 100644 --- a/haystack/reader/transformers.py +++ b/haystack/reader/transformers.py @@ -37,7 +37,7 @@ class TransformersReader(BaseReader): :param tokenizer: name of the tokenizer (usually the same as model) :param context_window_size: num of chars (before and after the answer) to return as "context" for each answer. The context usually helps users to understand if the answer really makes sense. - :param use_gpu: < 1 -> use cpu + :param use_gpu: < 0 -> use cpu >= 0 -> ordinal of the gpu to use """ self.model = pipeline("question-answering", model=model, tokenizer=tokenizer, device=use_gpu) diff --git a/rest_api/config.py b/rest_api/config.py index f0b2670d2..4ee0c4705 100644 --- a/rest_api/config.py +++ b/rest_api/config.py @@ -6,6 +6,7 @@ PROJECT_NAME = os.getenv("PROJECT_NAME", "FastAPI") # Resources / Computation USE_GPU = os.getenv("USE_GPU", "True").lower() == "true" +GPU_NUMBER = int(os.getenv("GPU_NUMBER", 1)) MAX_PROCESSES = int(os.getenv("MAX_PROCESSES", 4)) BATCHSIZE = int(os.getenv("BATCHSIZE", 50)) CONCURRENT_REQUEST_PER_WORKER = int(os.getenv("CONCURRENT_REQUEST_PER_WORKER", 4)) @@ -26,6 +27,8 @@ EMBEDDING_DIM = os.getenv("EMBEDDING_DIM", None) # Reader READER_MODEL_PATH = os.getenv("READER_MODEL_PATH", None) +READER_USE_TRANSFORMERS = os.getenv("READER_USE_TRANSFORMERS", "False").lower() == "true" +READER_TOKENIZER = os.getenv("READER_TOKENIZER", None) CONTEXT_WINDOW_SIZE = int(os.getenv("CONTEXT_WINDOW_SIZE", 500)) DEFAULT_TOP_K_READER = int(os.getenv("DEFAULT_TOP_K_READER", 5)) TOP_K_PER_CANDIDATE = int(os.getenv("TOP_K_PER_CANDIDATE", 3)) diff --git a/rest_api/controller/search.py b/rest_api/controller/search.py index 628d8da0b..b87e71bfa 100644 --- a/rest_api/controller/search.py +++ b/rest_api/controller/search.py @@ -13,10 +13,11 @@ from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_ EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, \ BATCHSIZE, CONTEXT_WINDOW_SIZE, TOP_K_PER_CANDIDATE, NO_ANS_BOOST, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \ DEFAULT_TOP_K_READER, DEFAULT_TOP_K_RETRIEVER, CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, \ - EMBEDDING_MODEL_FORMAT + EMBEDDING_MODEL_FORMAT, READER_USE_TRANSFORMERS, READER_TOKENIZER, GPU_NUMBER from rest_api.controller.utils import RequestLimiter from haystack.database.elasticsearch import ElasticsearchDocumentStore from haystack.reader.farm import FARMReader +from haystack.reader.transformers import TransformersReader from haystack.retriever.base import BaseRetriever from haystack.retriever.sparse import ElasticsearchRetriever from haystack.retriever.dense import EmbeddingRetriever @@ -54,17 +55,26 @@ else: retriever = ElasticsearchRetriever(document_store=document_store) if READER_MODEL_PATH: # for extractive doc-qa - reader = FARMReader( - model_name_or_path=str(READER_MODEL_PATH), - batch_size=BATCHSIZE, - use_gpu=USE_GPU, - context_window_size=CONTEXT_WINDOW_SIZE, - top_k_per_candidate=TOP_K_PER_CANDIDATE, - no_ans_boost=NO_ANS_BOOST, - num_processes=MAX_PROCESSES, - max_seq_len=MAX_SEQ_LEN, - doc_stride=DOC_STRIDE, - ) # type: Optional[FARMReader] + if READER_USE_TRANSFORMERS: + use_gpu = -1 if not USE_GPU else GPU_NUMBER + reader = TransformersReader( + model=str(READER_MODEL_PATH), + use_gpu=use_gpu, + context_window_size=CONTEXT_WINDOW_SIZE, + tokenizer=str(READER_TOKENIZER) + ) # type: Optional[FARMReader] + else: + reader = FARMReader( + model_name_or_path=str(READER_MODEL_PATH), + batch_size=BATCHSIZE, + use_gpu=USE_GPU, + context_window_size=CONTEXT_WINDOW_SIZE, + top_k_per_candidate=TOP_K_PER_CANDIDATE, + no_ans_boost=NO_ANS_BOOST, + num_processes=MAX_PROCESSES, + max_seq_len=MAX_SEQ_LEN, + doc_stride=DOC_STRIDE, + ) # type: Optional[FARMReader] else: reader = None # don't need one for pure FAQ matching