Adjust Docker and REST API to allow TransformsReader Class (#180)

This commit is contained in:
Guillim 2020-07-07 16:25:36 +02:00 committed by GitHub
parent fe33a481ad
commit 8a616dae75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 58 additions and 22 deletions

View File

@ -14,9 +14,12 @@ COPY rest_api /home/user/rest_api
# copy saved FARM models
COPY models /home/user/models
# copy sqlite db if needed for testing
# optional : copy sqlite db if needed for testing
#COPY qa.db /home/user/
# optional: copy data directory containing docs for indexing
#COPY data /home/user/data
EXPOSE 8000
# cmd for running the API

View File

@ -22,6 +22,9 @@ COPY models /home/user/models
# Optional: copy sqlite db if needed for testing
#COPY qa.db /home/user/
# Optional: copy data directory containing docs for indexing
#COPY data /home/user/data
EXPOSE 8000
ENV LC_ALL=C.UTF-8

View File

@ -1,19 +1,36 @@
version: '3'
services:
haystack-api:
image: "deepset/haystack-cpu:0.2.0"
build:
context: .
dockerfile: Dockerfile
image: "deepset/haystack-cpu:latest"
ports:
- 8000:8000
volumes:
# Optional: mount your own models from disk into the container
- "./models:/home/user/models"
environment:
# see haystack/api/config.py for additional variables to configure.
# load reader model from transformers' model hub.
- READER_MODEL_PATH=deepset/roberta-base-squad2
# See rest_api/config.py for more variables that you can configure here.
- DB_HOST=elasticsearch
- USE_GPU=False
# Load a model from transformers' model hub or a local path into the FARMReader.
- READER_MODEL_PATH=deepset/roberta-base-squad2
# - READER_MODEL_PATH=home/user/models/roberta-base-squad2
# Alternative: If you want to use the TransformersReader (e.g. for loading a local model in transformers format):
# - READER_USE_TRANSFORMERS=True
# - READER_MODEL_PATH=/home/user/models/roberta-base-squad2
# - READER_TOKENIZER=/home/user/models/roberta-base-squad2
restart: always
depends_on:
- elasticsearch
command: "/bin/bash -c 'sleep 15 && gunicorn rest_api.application:app -b 0.0.0.0 -k uvicorn.workers.UvicornWorker --workers 1 --timeout 180'"
elasticsearch:
# This demo image contains Game of Thrones Wikipedia articles indexed.
# For starting a new Elasticsearch instance, replace image with "elasticsearch:7.6.1".
image: "deepset/elasticsearch-game-of-thrones"
# This will start an empty elasticsearch instance (so you have to add your documents yourself)
image: "elasticsearch:7.6.1"
# If you want a demo image instead that is "ready-to-query" with some indexed Game of Thrones articles:
# image: "deepset/elasticsearch-game-of-thrones"
ports:
- 9200:9200
environment:
- discovery.type=single-node

View File

@ -37,7 +37,7 @@ class TransformersReader(BaseReader):
:param tokenizer: name of the tokenizer (usually the same as model)
:param context_window_size: num of chars (before and after the answer) to return as "context" for each answer.
The context usually helps users to understand if the answer really makes sense.
:param use_gpu: < 1 -> use cpu
:param use_gpu: < 0 -> use cpu
>= 0 -> ordinal of the gpu to use
"""
self.model = pipeline("question-answering", model=model, tokenizer=tokenizer, device=use_gpu)

View File

@ -6,6 +6,7 @@ PROJECT_NAME = os.getenv("PROJECT_NAME", "FastAPI")
# Resources / Computation
USE_GPU = os.getenv("USE_GPU", "True").lower() == "true"
GPU_NUMBER = int(os.getenv("GPU_NUMBER", 1))
MAX_PROCESSES = int(os.getenv("MAX_PROCESSES", 4))
BATCHSIZE = int(os.getenv("BATCHSIZE", 50))
CONCURRENT_REQUEST_PER_WORKER = int(os.getenv("CONCURRENT_REQUEST_PER_WORKER", 4))
@ -26,6 +27,8 @@ EMBEDDING_DIM = os.getenv("EMBEDDING_DIM", None)
# Reader
READER_MODEL_PATH = os.getenv("READER_MODEL_PATH", None)
READER_USE_TRANSFORMERS = os.getenv("READER_USE_TRANSFORMERS", "False").lower() == "true"
READER_TOKENIZER = os.getenv("READER_TOKENIZER", None)
CONTEXT_WINDOW_SIZE = int(os.getenv("CONTEXT_WINDOW_SIZE", 500))
DEFAULT_TOP_K_READER = int(os.getenv("DEFAULT_TOP_K_READER", 5))
TOP_K_PER_CANDIDATE = int(os.getenv("TOP_K_PER_CANDIDATE", 3))

View File

@ -13,10 +13,11 @@ from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_
EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, \
BATCHSIZE, CONTEXT_WINDOW_SIZE, TOP_K_PER_CANDIDATE, NO_ANS_BOOST, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \
DEFAULT_TOP_K_READER, DEFAULT_TOP_K_RETRIEVER, CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, \
EMBEDDING_MODEL_FORMAT
EMBEDDING_MODEL_FORMAT, READER_USE_TRANSFORMERS, READER_TOKENIZER, GPU_NUMBER
from rest_api.controller.utils import RequestLimiter
from haystack.database.elasticsearch import ElasticsearchDocumentStore
from haystack.reader.farm import FARMReader
from haystack.reader.transformers import TransformersReader
from haystack.retriever.base import BaseRetriever
from haystack.retriever.sparse import ElasticsearchRetriever
from haystack.retriever.dense import EmbeddingRetriever
@ -54,17 +55,26 @@ else:
retriever = ElasticsearchRetriever(document_store=document_store)
if READER_MODEL_PATH: # for extractive doc-qa
reader = FARMReader(
model_name_or_path=str(READER_MODEL_PATH),
batch_size=BATCHSIZE,
use_gpu=USE_GPU,
context_window_size=CONTEXT_WINDOW_SIZE,
top_k_per_candidate=TOP_K_PER_CANDIDATE,
no_ans_boost=NO_ANS_BOOST,
num_processes=MAX_PROCESSES,
max_seq_len=MAX_SEQ_LEN,
doc_stride=DOC_STRIDE,
) # type: Optional[FARMReader]
if READER_USE_TRANSFORMERS:
use_gpu = -1 if not USE_GPU else GPU_NUMBER
reader = TransformersReader(
model=str(READER_MODEL_PATH),
use_gpu=use_gpu,
context_window_size=CONTEXT_WINDOW_SIZE,
tokenizer=str(READER_TOKENIZER)
) # type: Optional[FARMReader]
else:
reader = FARMReader(
model_name_or_path=str(READER_MODEL_PATH),
batch_size=BATCHSIZE,
use_gpu=USE_GPU,
context_window_size=CONTEXT_WINDOW_SIZE,
top_k_per_candidate=TOP_K_PER_CANDIDATE,
no_ans_boost=NO_ANS_BOOST,
num_processes=MAX_PROCESSES,
max_seq_len=MAX_SEQ_LEN,
doc_stride=DOC_STRIDE,
) # type: Optional[FARMReader]
else:
reader = None # don't need one for pure FAQ matching