From ec433a5ed61a46f08dceb42aed728fd161355cb1 Mon Sep 17 00:00:00 2001 From: Tanay Soni Date: Mon, 22 Jun 2020 12:07:12 +0200 Subject: [PATCH] Move out REST API from PyPI package (#160) --- README.rst | 2 +- haystack/database/memory.py | 13 ++++++------- {haystack/api => rest_api}/__init__.py | 0 {haystack/api => rest_api}/application.py | 6 +++--- {haystack/api => rest_api}/config.py | 0 {haystack/api => rest_api}/controller/__init__.py | 0 .../api => rest_api}/controller/errors/__init__.py | 0 .../controller/errors/http_error.py | 0 {haystack/api => rest_api}/controller/feedback.py | 6 +++--- .../api => rest_api}/controller/file_upload.py | 2 +- {haystack/api => rest_api}/controller/router.py | 3 ++- {haystack/api => rest_api}/controller/search.py | 4 ++-- {haystack/api => rest_api}/controller/utils.py | 0 {haystack/api => rest_api}/elasticsearch_client.py | 2 +- test/test_faq_retriever.py | 5 ++--- 15 files changed, 21 insertions(+), 22 deletions(-) rename {haystack/api => rest_api}/__init__.py (100%) rename {haystack/api => rest_api}/application.py (86%) rename {haystack/api => rest_api}/config.py (100%) rename {haystack/api => rest_api}/controller/__init__.py (100%) rename {haystack/api => rest_api}/controller/errors/__init__.py (100%) rename {haystack/api => rest_api}/controller/errors/http_error.py (100%) rename {haystack/api => rest_api}/controller/feedback.py (96%) rename {haystack/api => rest_api}/controller/file_upload.py (96%) rename {haystack/api => rest_api}/controller/router.py (71%) rename {haystack/api => rest_api}/controller/search.py (96%) rename {haystack/api => rest_api}/controller/utils.py (100%) rename {haystack/api => rest_api}/elasticsearch_client.py (72%) diff --git a/README.rst b/README.rst index c7ff0d060..37ede785d 100644 --- a/README.rst +++ b/README.rst @@ -226,7 +226,7 @@ A simple REST API based on `FastAPI `_ is provide To serve the API, run:: - gunicorn haystack.api.application:app -b 0.0.0.0:80 -k uvicorn.workers.UvicornWorker + gunicorn rest_api.application:app -b 0.0.0.0:80 -k uvicorn.workers.UvicornWorker You will find the Swagger API documentation at http://127.0.0.1:80/docs diff --git a/haystack/database/memory.py b/haystack/database/memory.py index 313f29cb2..7f0ec311f 100644 --- a/haystack/database/memory.py +++ b/haystack/database/memory.py @@ -8,9 +8,10 @@ class InMemoryDocumentStore(BaseDocumentStore): In-memory document store """ - def __init__(self): - self.docs = {} - self.doc_tags = {} + def __init__(self, embedding_field: Optional[str] = None): + self.docs = {} # type: Dict[str, Any] + self.doc_tags = {} # type: Dict[str, Any] + self.embedding_field = embedding_field def write_documents(self, documents: List[dict]): import hashlib @@ -64,19 +65,17 @@ class InMemoryDocumentStore(BaseDocumentStore): return document def query_by_embedding(self, query_emb: List[float], top_k: int = 10, candidate_doc_ids: Optional[List[str]] = None) -> List[Document]: - from haystack.api import config from numpy import dot from numpy.linalg import norm - embedding_field_name = config.EMBEDDING_FIELD_NAME - if embedding_field_name is None: + if self.embedding_field is None: return [] if query_emb is None: return [] candidate_docs = [self._convert_memory_hit_to_document( - (doc, dot(query_emb, doc[embedding_field_name]) / (norm(query_emb) * norm(doc[embedding_field_name]))), doc_id=idx) for idx, doc in self.docs.items() + (doc, dot(query_emb, doc[self.embedding_field]) / (norm(query_emb) * norm(doc[self.embedding_field]))), doc_id=idx) for idx, doc in self.docs.items() ] return sorted(candidate_docs, key=lambda x: x.query_score, reverse=True)[0:top_k] diff --git a/haystack/api/__init__.py b/rest_api/__init__.py similarity index 100% rename from haystack/api/__init__.py rename to rest_api/__init__.py diff --git a/haystack/api/application.py b/rest_api/application.py similarity index 86% rename from haystack/api/application.py rename to rest_api/application.py index 8d8273e64..e58b4e1f2 100644 --- a/haystack/api/application.py +++ b/rest_api/application.py @@ -6,9 +6,9 @@ from elasticsearch import Elasticsearch from fastapi import FastAPI, HTTPException from starlette.middleware.cors import CORSMiddleware -from haystack.api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME, APM_SERVER, APM_SERVICE_NAME -from haystack.api.controller.errors.http_error import http_error_handler -from haystack.api.controller.router import router as api_router +from rest_api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME, APM_SERVER, APM_SERVICE_NAME +from rest_api.controller.errors.http_error import http_error_handler +from rest_api.controller.router import router as api_router logging.basicConfig(format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p") logger = logging.getLogger(__name__) diff --git a/haystack/api/config.py b/rest_api/config.py similarity index 100% rename from haystack/api/config.py rename to rest_api/config.py diff --git a/haystack/api/controller/__init__.py b/rest_api/controller/__init__.py similarity index 100% rename from haystack/api/controller/__init__.py rename to rest_api/controller/__init__.py diff --git a/haystack/api/controller/errors/__init__.py b/rest_api/controller/errors/__init__.py similarity index 100% rename from haystack/api/controller/errors/__init__.py rename to rest_api/controller/errors/__init__.py diff --git a/haystack/api/controller/errors/http_error.py b/rest_api/controller/errors/http_error.py similarity index 100% rename from haystack/api/controller/errors/http_error.py rename to rest_api/controller/errors/http_error.py diff --git a/haystack/api/controller/feedback.py b/rest_api/controller/feedback.py similarity index 96% rename from haystack/api/controller/feedback.py rename to rest_api/controller/feedback.py index a53910bac..167489a06 100644 --- a/haystack/api/controller/feedback.py +++ b/rest_api/controller/feedback.py @@ -6,7 +6,7 @@ from fastapi import APIRouter, status from fastapi.responses import JSONResponse from pydantic import BaseModel, Field -from haystack.api.config import ( +from rest_api.config import ( DB_HOST, DB_PORT, DB_USER, @@ -19,8 +19,8 @@ from haystack.api.config import ( EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, ) -from haystack.api.config import DB_INDEX_FEEDBACK -from haystack.api.elasticsearch_client import elasticsearch_client +from rest_api.config import DB_INDEX_FEEDBACK +from rest_api.elasticsearch_client import elasticsearch_client from haystack.database.elasticsearch import ElasticsearchDocumentStore router = APIRouter() diff --git a/haystack/api/controller/file_upload.py b/rest_api/controller/file_upload.py similarity index 96% rename from haystack/api/controller/file_upload.py rename to rest_api/controller/file_upload.py index 9ffecc1b6..ed17f4955 100644 --- a/haystack/api/controller/file_upload.py +++ b/rest_api/controller/file_upload.py @@ -8,7 +8,7 @@ from fastapi import APIRouter from fastapi import HTTPException from fastapi import UploadFile, File, Form -from haystack.api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, \ +from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, \ SEARCH_FIELD_NAME, FILE_UPLOAD_PATH, EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, VALID_LANGUAGES, \ FAQ_QUESTION_FIELD_NAME, REMOVE_NUMERIC_TABLES, REMOVE_WHITESPACE, REMOVE_EMPTY_LINES, REMOVE_HEADER_FOOTER from haystack.database.elasticsearch import ElasticsearchDocumentStore diff --git a/haystack/api/controller/router.py b/rest_api/controller/router.py similarity index 71% rename from haystack/api/controller/router.py rename to rest_api/controller/router.py index 725bd5238..f0df4b065 100644 --- a/haystack/api/controller/router.py +++ b/rest_api/controller/router.py @@ -1,6 +1,7 @@ from fastapi import APIRouter -from haystack.api.controller import search, feedback, file_upload +from rest_api.controller import file_upload +from rest_api.controller import search, feedback router = APIRouter() diff --git a/haystack/api/controller/search.py b/rest_api/controller/search.py similarity index 96% rename from haystack/api/controller/search.py rename to rest_api/controller/search.py index 95bf50cb4..1c5c8423e 100644 --- a/haystack/api/controller/search.py +++ b/rest_api/controller/search.py @@ -8,12 +8,12 @@ from fastapi import HTTPException from pydantic import BaseModel from haystack import Finder -from haystack.api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, SEARCH_FIELD_NAME, \ +from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, SEARCH_FIELD_NAME, \ EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, \ BATCHSIZE, CONTEXT_WINDOW_SIZE, TOP_K_PER_CANDIDATE, NO_ANS_BOOST, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \ DEFAULT_TOP_K_READER, DEFAULT_TOP_K_RETRIEVER, CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, \ EMBEDDING_MODEL_FORMAT -from haystack.api.controller.utils import RequestLimiter +from rest_api.controller.utils import RequestLimiter from haystack.database.elasticsearch import ElasticsearchDocumentStore from haystack.reader.farm import FARMReader from haystack.retriever.base import BaseRetriever diff --git a/haystack/api/controller/utils.py b/rest_api/controller/utils.py similarity index 100% rename from haystack/api/controller/utils.py rename to rest_api/controller/utils.py diff --git a/haystack/api/elasticsearch_client.py b/rest_api/elasticsearch_client.py similarity index 72% rename from haystack/api/elasticsearch_client.py rename to rest_api/elasticsearch_client.py index 94531a963..f7937554f 100644 --- a/haystack/api/elasticsearch_client.py +++ b/rest_api/elasticsearch_client.py @@ -1,6 +1,6 @@ from elasticsearch import Elasticsearch -from haystack.api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME +from rest_api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME elasticsearch_client = Elasticsearch( hosts=[{"host": DB_HOST, "port": DB_PORT}], http_auth=(DB_USER, DB_PW), scheme=ES_CONN_SCHEME, ca_certs=False, verify_certs=False diff --git a/test/test_faq_retriever.py b/test/test_faq_retriever.py index 986317faa..933a619c1 100644 --- a/test/test_faq_retriever.py +++ b/test/test_faq_retriever.py @@ -1,13 +1,12 @@ from haystack import Finder -def test_faq_retriever_in_memory_store(monkeypatch): - monkeypatch.setenv("EMBEDDING_FIELD_NAME", "embedding") +def test_faq_retriever_in_memory_store(): from haystack.database.memory import InMemoryDocumentStore from haystack.retriever.elasticsearch import EmbeddingRetriever - document_store = InMemoryDocumentStore() + document_store = InMemoryDocumentStore(embedding_field="embedding") documents = [ {'name': 'How to test this library?', 'text': 'By running tox in the command line!', 'meta': {'question': 'How to test this library?'}},