mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-07 20:46:31 +00:00
Adjust Docker and REST API to allow TransformsReader Class (#180)
This commit is contained in:
parent
fe33a481ad
commit
8a616dae75
@ -14,9 +14,12 @@ COPY rest_api /home/user/rest_api
|
||||
# copy saved FARM models
|
||||
COPY models /home/user/models
|
||||
|
||||
# copy sqlite db if needed for testing
|
||||
# optional : copy sqlite db if needed for testing
|
||||
#COPY qa.db /home/user/
|
||||
|
||||
# optional: copy data directory containing docs for indexing
|
||||
#COPY data /home/user/data
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
# cmd for running the API
|
||||
|
||||
@ -22,6 +22,9 @@ COPY models /home/user/models
|
||||
# Optional: copy sqlite db if needed for testing
|
||||
#COPY qa.db /home/user/
|
||||
|
||||
# Optional: copy data directory containing docs for indexing
|
||||
#COPY data /home/user/data
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
ENV LC_ALL=C.UTF-8
|
||||
|
||||
@ -1,19 +1,36 @@
|
||||
version: '3'
|
||||
services:
|
||||
haystack-api:
|
||||
image: "deepset/haystack-cpu:0.2.0"
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: "deepset/haystack-cpu:latest"
|
||||
ports:
|
||||
- 8000:8000
|
||||
volumes:
|
||||
# Optional: mount your own models from disk into the container
|
||||
- "./models:/home/user/models"
|
||||
environment:
|
||||
# see haystack/api/config.py for additional variables to configure.
|
||||
# load reader model from transformers' model hub.
|
||||
- READER_MODEL_PATH=deepset/roberta-base-squad2
|
||||
# See rest_api/config.py for more variables that you can configure here.
|
||||
- DB_HOST=elasticsearch
|
||||
- USE_GPU=False
|
||||
# Load a model from transformers' model hub or a local path into the FARMReader.
|
||||
- READER_MODEL_PATH=deepset/roberta-base-squad2
|
||||
# - READER_MODEL_PATH=home/user/models/roberta-base-squad2
|
||||
# Alternative: If you want to use the TransformersReader (e.g. for loading a local model in transformers format):
|
||||
# - READER_USE_TRANSFORMERS=True
|
||||
# - READER_MODEL_PATH=/home/user/models/roberta-base-squad2
|
||||
# - READER_TOKENIZER=/home/user/models/roberta-base-squad2
|
||||
restart: always
|
||||
|
||||
depends_on:
|
||||
- elasticsearch
|
||||
command: "/bin/bash -c 'sleep 15 && gunicorn rest_api.application:app -b 0.0.0.0 -k uvicorn.workers.UvicornWorker --workers 1 --timeout 180'"
|
||||
elasticsearch:
|
||||
# This demo image contains Game of Thrones Wikipedia articles indexed.
|
||||
# For starting a new Elasticsearch instance, replace image with "elasticsearch:7.6.1".
|
||||
image: "deepset/elasticsearch-game-of-thrones"
|
||||
# This will start an empty elasticsearch instance (so you have to add your documents yourself)
|
||||
image: "elasticsearch:7.6.1"
|
||||
# If you want a demo image instead that is "ready-to-query" with some indexed Game of Thrones articles:
|
||||
# image: "deepset/elasticsearch-game-of-thrones"
|
||||
ports:
|
||||
- 9200:9200
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
@ -37,7 +37,7 @@ class TransformersReader(BaseReader):
|
||||
:param tokenizer: name of the tokenizer (usually the same as model)
|
||||
:param context_window_size: num of chars (before and after the answer) to return as "context" for each answer.
|
||||
The context usually helps users to understand if the answer really makes sense.
|
||||
:param use_gpu: < 1 -> use cpu
|
||||
:param use_gpu: < 0 -> use cpu
|
||||
>= 0 -> ordinal of the gpu to use
|
||||
"""
|
||||
self.model = pipeline("question-answering", model=model, tokenizer=tokenizer, device=use_gpu)
|
||||
|
||||
@ -6,6 +6,7 @@ PROJECT_NAME = os.getenv("PROJECT_NAME", "FastAPI")
|
||||
|
||||
# Resources / Computation
|
||||
USE_GPU = os.getenv("USE_GPU", "True").lower() == "true"
|
||||
GPU_NUMBER = int(os.getenv("GPU_NUMBER", 1))
|
||||
MAX_PROCESSES = int(os.getenv("MAX_PROCESSES", 4))
|
||||
BATCHSIZE = int(os.getenv("BATCHSIZE", 50))
|
||||
CONCURRENT_REQUEST_PER_WORKER = int(os.getenv("CONCURRENT_REQUEST_PER_WORKER", 4))
|
||||
@ -26,6 +27,8 @@ EMBEDDING_DIM = os.getenv("EMBEDDING_DIM", None)
|
||||
|
||||
# Reader
|
||||
READER_MODEL_PATH = os.getenv("READER_MODEL_PATH", None)
|
||||
READER_USE_TRANSFORMERS = os.getenv("READER_USE_TRANSFORMERS", "False").lower() == "true"
|
||||
READER_TOKENIZER = os.getenv("READER_TOKENIZER", None)
|
||||
CONTEXT_WINDOW_SIZE = int(os.getenv("CONTEXT_WINDOW_SIZE", 500))
|
||||
DEFAULT_TOP_K_READER = int(os.getenv("DEFAULT_TOP_K_READER", 5))
|
||||
TOP_K_PER_CANDIDATE = int(os.getenv("TOP_K_PER_CANDIDATE", 3))
|
||||
|
||||
@ -13,10 +13,11 @@ from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_
|
||||
EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, \
|
||||
BATCHSIZE, CONTEXT_WINDOW_SIZE, TOP_K_PER_CANDIDATE, NO_ANS_BOOST, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \
|
||||
DEFAULT_TOP_K_READER, DEFAULT_TOP_K_RETRIEVER, CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, \
|
||||
EMBEDDING_MODEL_FORMAT
|
||||
EMBEDDING_MODEL_FORMAT, READER_USE_TRANSFORMERS, READER_TOKENIZER, GPU_NUMBER
|
||||
from rest_api.controller.utils import RequestLimiter
|
||||
from haystack.database.elasticsearch import ElasticsearchDocumentStore
|
||||
from haystack.reader.farm import FARMReader
|
||||
from haystack.reader.transformers import TransformersReader
|
||||
from haystack.retriever.base import BaseRetriever
|
||||
from haystack.retriever.sparse import ElasticsearchRetriever
|
||||
from haystack.retriever.dense import EmbeddingRetriever
|
||||
@ -54,17 +55,26 @@ else:
|
||||
retriever = ElasticsearchRetriever(document_store=document_store)
|
||||
|
||||
if READER_MODEL_PATH: # for extractive doc-qa
|
||||
reader = FARMReader(
|
||||
model_name_or_path=str(READER_MODEL_PATH),
|
||||
batch_size=BATCHSIZE,
|
||||
use_gpu=USE_GPU,
|
||||
context_window_size=CONTEXT_WINDOW_SIZE,
|
||||
top_k_per_candidate=TOP_K_PER_CANDIDATE,
|
||||
no_ans_boost=NO_ANS_BOOST,
|
||||
num_processes=MAX_PROCESSES,
|
||||
max_seq_len=MAX_SEQ_LEN,
|
||||
doc_stride=DOC_STRIDE,
|
||||
) # type: Optional[FARMReader]
|
||||
if READER_USE_TRANSFORMERS:
|
||||
use_gpu = -1 if not USE_GPU else GPU_NUMBER
|
||||
reader = TransformersReader(
|
||||
model=str(READER_MODEL_PATH),
|
||||
use_gpu=use_gpu,
|
||||
context_window_size=CONTEXT_WINDOW_SIZE,
|
||||
tokenizer=str(READER_TOKENIZER)
|
||||
) # type: Optional[FARMReader]
|
||||
else:
|
||||
reader = FARMReader(
|
||||
model_name_or_path=str(READER_MODEL_PATH),
|
||||
batch_size=BATCHSIZE,
|
||||
use_gpu=USE_GPU,
|
||||
context_window_size=CONTEXT_WINDOW_SIZE,
|
||||
top_k_per_candidate=TOP_K_PER_CANDIDATE,
|
||||
no_ans_boost=NO_ANS_BOOST,
|
||||
num_processes=MAX_PROCESSES,
|
||||
max_seq_len=MAX_SEQ_LEN,
|
||||
doc_stride=DOC_STRIDE,
|
||||
) # type: Optional[FARMReader]
|
||||
else:
|
||||
reader = None # don't need one for pure FAQ matching
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user