Move out REST API from PyPI package (#160)

This commit is contained in:
Tanay Soni 2020-06-22 12:07:12 +02:00 committed by GitHub
parent 03acb1ee32
commit ec433a5ed6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 21 additions and 22 deletions

View File

@ -226,7 +226,7 @@ A simple REST API based on `FastAPI <https://fastapi.tiangolo.com/>`_ is provide
To serve the API, run::
gunicorn haystack.api.application:app -b 0.0.0.0:80 -k uvicorn.workers.UvicornWorker
gunicorn rest_api.application:app -b 0.0.0.0:80 -k uvicorn.workers.UvicornWorker
You will find the Swagger API documentation at http://127.0.0.1:80/docs

View File

@ -8,9 +8,10 @@ class InMemoryDocumentStore(BaseDocumentStore):
In-memory document store
"""
def __init__(self):
self.docs = {}
self.doc_tags = {}
def __init__(self, embedding_field: Optional[str] = None):
self.docs = {} # type: Dict[str, Any]
self.doc_tags = {} # type: Dict[str, Any]
self.embedding_field = embedding_field
def write_documents(self, documents: List[dict]):
import hashlib
@ -64,19 +65,17 @@ class InMemoryDocumentStore(BaseDocumentStore):
return document
def query_by_embedding(self, query_emb: List[float], top_k: int = 10, candidate_doc_ids: Optional[List[str]] = None) -> List[Document]:
from haystack.api import config
from numpy import dot
from numpy.linalg import norm
embedding_field_name = config.EMBEDDING_FIELD_NAME
if embedding_field_name is None:
if self.embedding_field is None:
return []
if query_emb is None:
return []
candidate_docs = [self._convert_memory_hit_to_document(
(doc, dot(query_emb, doc[embedding_field_name]) / (norm(query_emb) * norm(doc[embedding_field_name]))), doc_id=idx) for idx, doc in self.docs.items()
(doc, dot(query_emb, doc[self.embedding_field]) / (norm(query_emb) * norm(doc[self.embedding_field]))), doc_id=idx) for idx, doc in self.docs.items()
]
return sorted(candidate_docs, key=lambda x: x.query_score, reverse=True)[0:top_k]

View File

@ -6,9 +6,9 @@ from elasticsearch import Elasticsearch
from fastapi import FastAPI, HTTPException
from starlette.middleware.cors import CORSMiddleware
from haystack.api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME, APM_SERVER, APM_SERVICE_NAME
from haystack.api.controller.errors.http_error import http_error_handler
from haystack.api.controller.router import router as api_router
from rest_api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME, APM_SERVER, APM_SERVICE_NAME
from rest_api.controller.errors.http_error import http_error_handler
from rest_api.controller.router import router as api_router
logging.basicConfig(format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p")
logger = logging.getLogger(__name__)

View File

@ -6,7 +6,7 @@ from fastapi import APIRouter, status
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from haystack.api.config import (
from rest_api.config import (
DB_HOST,
DB_PORT,
DB_USER,
@ -19,8 +19,8 @@ from haystack.api.config import (
EMBEDDING_FIELD_NAME,
EXCLUDE_META_DATA_FIELDS,
)
from haystack.api.config import DB_INDEX_FEEDBACK
from haystack.api.elasticsearch_client import elasticsearch_client
from rest_api.config import DB_INDEX_FEEDBACK
from rest_api.elasticsearch_client import elasticsearch_client
from haystack.database.elasticsearch import ElasticsearchDocumentStore
router = APIRouter()

View File

@ -8,7 +8,7 @@ from fastapi import APIRouter
from fastapi import HTTPException
from fastapi import UploadFile, File, Form
from haystack.api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, \
from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, \
SEARCH_FIELD_NAME, FILE_UPLOAD_PATH, EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, VALID_LANGUAGES, \
FAQ_QUESTION_FIELD_NAME, REMOVE_NUMERIC_TABLES, REMOVE_WHITESPACE, REMOVE_EMPTY_LINES, REMOVE_HEADER_FOOTER
from haystack.database.elasticsearch import ElasticsearchDocumentStore

View File

@ -1,6 +1,7 @@
from fastapi import APIRouter
from haystack.api.controller import search, feedback, file_upload
from rest_api.controller import file_upload
from rest_api.controller import search, feedback
router = APIRouter()

View File

@ -8,12 +8,12 @@ from fastapi import HTTPException
from pydantic import BaseModel
from haystack import Finder
from haystack.api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, SEARCH_FIELD_NAME, \
from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, SEARCH_FIELD_NAME, \
EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, \
BATCHSIZE, CONTEXT_WINDOW_SIZE, TOP_K_PER_CANDIDATE, NO_ANS_BOOST, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \
DEFAULT_TOP_K_READER, DEFAULT_TOP_K_RETRIEVER, CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, \
EMBEDDING_MODEL_FORMAT
from haystack.api.controller.utils import RequestLimiter
from rest_api.controller.utils import RequestLimiter
from haystack.database.elasticsearch import ElasticsearchDocumentStore
from haystack.reader.farm import FARMReader
from haystack.retriever.base import BaseRetriever

View File

@ -1,6 +1,6 @@
from elasticsearch import Elasticsearch
from haystack.api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME
from rest_api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME
elasticsearch_client = Elasticsearch(
hosts=[{"host": DB_HOST, "port": DB_PORT}], http_auth=(DB_USER, DB_PW), scheme=ES_CONN_SCHEME, ca_certs=False, verify_certs=False

View File

@ -1,13 +1,12 @@
from haystack import Finder
def test_faq_retriever_in_memory_store(monkeypatch):
monkeypatch.setenv("EMBEDDING_FIELD_NAME", "embedding")
def test_faq_retriever_in_memory_store():
from haystack.database.memory import InMemoryDocumentStore
from haystack.retriever.elasticsearch import EmbeddingRetriever
document_store = InMemoryDocumentStore()
document_store = InMemoryDocumentStore(embedding_field="embedding")
documents = [
{'name': 'How to test this library?', 'text': 'By running tox in the command line!', 'meta': {'question': 'How to test this library?'}},