mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-22 23:48:09 +00:00
Move out REST API from PyPI package (#160)
This commit is contained in:
parent
03acb1ee32
commit
ec433a5ed6
@ -226,7 +226,7 @@ A simple REST API based on `FastAPI <https://fastapi.tiangolo.com/>`_ is provide
|
|||||||
|
|
||||||
To serve the API, run::
|
To serve the API, run::
|
||||||
|
|
||||||
gunicorn haystack.api.application:app -b 0.0.0.0:80 -k uvicorn.workers.UvicornWorker
|
gunicorn rest_api.application:app -b 0.0.0.0:80 -k uvicorn.workers.UvicornWorker
|
||||||
|
|
||||||
You will find the Swagger API documentation at http://127.0.0.1:80/docs
|
You will find the Swagger API documentation at http://127.0.0.1:80/docs
|
||||||
|
|
||||||
|
@ -8,9 +8,10 @@ class InMemoryDocumentStore(BaseDocumentStore):
|
|||||||
In-memory document store
|
In-memory document store
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, embedding_field: Optional[str] = None):
|
||||||
self.docs = {}
|
self.docs = {} # type: Dict[str, Any]
|
||||||
self.doc_tags = {}
|
self.doc_tags = {} # type: Dict[str, Any]
|
||||||
|
self.embedding_field = embedding_field
|
||||||
|
|
||||||
def write_documents(self, documents: List[dict]):
|
def write_documents(self, documents: List[dict]):
|
||||||
import hashlib
|
import hashlib
|
||||||
@ -64,19 +65,17 @@ class InMemoryDocumentStore(BaseDocumentStore):
|
|||||||
return document
|
return document
|
||||||
|
|
||||||
def query_by_embedding(self, query_emb: List[float], top_k: int = 10, candidate_doc_ids: Optional[List[str]] = None) -> List[Document]:
|
def query_by_embedding(self, query_emb: List[float], top_k: int = 10, candidate_doc_ids: Optional[List[str]] = None) -> List[Document]:
|
||||||
from haystack.api import config
|
|
||||||
from numpy import dot
|
from numpy import dot
|
||||||
from numpy.linalg import norm
|
from numpy.linalg import norm
|
||||||
|
|
||||||
embedding_field_name = config.EMBEDDING_FIELD_NAME
|
if self.embedding_field is None:
|
||||||
if embedding_field_name is None:
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
if query_emb is None:
|
if query_emb is None:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
candidate_docs = [self._convert_memory_hit_to_document(
|
candidate_docs = [self._convert_memory_hit_to_document(
|
||||||
(doc, dot(query_emb, doc[embedding_field_name]) / (norm(query_emb) * norm(doc[embedding_field_name]))), doc_id=idx) for idx, doc in self.docs.items()
|
(doc, dot(query_emb, doc[self.embedding_field]) / (norm(query_emb) * norm(doc[self.embedding_field]))), doc_id=idx) for idx, doc in self.docs.items()
|
||||||
]
|
]
|
||||||
|
|
||||||
return sorted(candidate_docs, key=lambda x: x.query_score, reverse=True)[0:top_k]
|
return sorted(candidate_docs, key=lambda x: x.query_score, reverse=True)[0:top_k]
|
||||||
|
@ -6,9 +6,9 @@ from elasticsearch import Elasticsearch
|
|||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
from starlette.middleware.cors import CORSMiddleware
|
from starlette.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
from haystack.api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME, APM_SERVER, APM_SERVICE_NAME
|
from rest_api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME, APM_SERVER, APM_SERVICE_NAME
|
||||||
from haystack.api.controller.errors.http_error import http_error_handler
|
from rest_api.controller.errors.http_error import http_error_handler
|
||||||
from haystack.api.controller.router import router as api_router
|
from rest_api.controller.router import router as api_router
|
||||||
|
|
||||||
logging.basicConfig(format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p")
|
logging.basicConfig(format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p")
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
@ -6,7 +6,7 @@ from fastapi import APIRouter, status
|
|||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from haystack.api.config import (
|
from rest_api.config import (
|
||||||
DB_HOST,
|
DB_HOST,
|
||||||
DB_PORT,
|
DB_PORT,
|
||||||
DB_USER,
|
DB_USER,
|
||||||
@ -19,8 +19,8 @@ from haystack.api.config import (
|
|||||||
EMBEDDING_FIELD_NAME,
|
EMBEDDING_FIELD_NAME,
|
||||||
EXCLUDE_META_DATA_FIELDS,
|
EXCLUDE_META_DATA_FIELDS,
|
||||||
)
|
)
|
||||||
from haystack.api.config import DB_INDEX_FEEDBACK
|
from rest_api.config import DB_INDEX_FEEDBACK
|
||||||
from haystack.api.elasticsearch_client import elasticsearch_client
|
from rest_api.elasticsearch_client import elasticsearch_client
|
||||||
from haystack.database.elasticsearch import ElasticsearchDocumentStore
|
from haystack.database.elasticsearch import ElasticsearchDocumentStore
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
@ -8,7 +8,7 @@ from fastapi import APIRouter
|
|||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from fastapi import UploadFile, File, Form
|
from fastapi import UploadFile, File, Form
|
||||||
|
|
||||||
from haystack.api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, \
|
from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, \
|
||||||
SEARCH_FIELD_NAME, FILE_UPLOAD_PATH, EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, VALID_LANGUAGES, \
|
SEARCH_FIELD_NAME, FILE_UPLOAD_PATH, EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, VALID_LANGUAGES, \
|
||||||
FAQ_QUESTION_FIELD_NAME, REMOVE_NUMERIC_TABLES, REMOVE_WHITESPACE, REMOVE_EMPTY_LINES, REMOVE_HEADER_FOOTER
|
FAQ_QUESTION_FIELD_NAME, REMOVE_NUMERIC_TABLES, REMOVE_WHITESPACE, REMOVE_EMPTY_LINES, REMOVE_HEADER_FOOTER
|
||||||
from haystack.database.elasticsearch import ElasticsearchDocumentStore
|
from haystack.database.elasticsearch import ElasticsearchDocumentStore
|
@ -1,6 +1,7 @@
|
|||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
|
|
||||||
from haystack.api.controller import search, feedback, file_upload
|
from rest_api.controller import file_upload
|
||||||
|
from rest_api.controller import search, feedback
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
@ -8,12 +8,12 @@ from fastapi import HTTPException
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from haystack import Finder
|
from haystack import Finder
|
||||||
from haystack.api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, SEARCH_FIELD_NAME, \
|
from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, ES_CONN_SCHEME, TEXT_FIELD_NAME, SEARCH_FIELD_NAME, \
|
||||||
EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, \
|
EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, \
|
||||||
BATCHSIZE, CONTEXT_WINDOW_SIZE, TOP_K_PER_CANDIDATE, NO_ANS_BOOST, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \
|
BATCHSIZE, CONTEXT_WINDOW_SIZE, TOP_K_PER_CANDIDATE, NO_ANS_BOOST, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \
|
||||||
DEFAULT_TOP_K_READER, DEFAULT_TOP_K_RETRIEVER, CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, \
|
DEFAULT_TOP_K_READER, DEFAULT_TOP_K_RETRIEVER, CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, \
|
||||||
EMBEDDING_MODEL_FORMAT
|
EMBEDDING_MODEL_FORMAT
|
||||||
from haystack.api.controller.utils import RequestLimiter
|
from rest_api.controller.utils import RequestLimiter
|
||||||
from haystack.database.elasticsearch import ElasticsearchDocumentStore
|
from haystack.database.elasticsearch import ElasticsearchDocumentStore
|
||||||
from haystack.reader.farm import FARMReader
|
from haystack.reader.farm import FARMReader
|
||||||
from haystack.retriever.base import BaseRetriever
|
from haystack.retriever.base import BaseRetriever
|
@ -1,6 +1,6 @@
|
|||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
|
|
||||||
from haystack.api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME
|
from rest_api.config import DB_HOST, DB_USER, DB_PW, DB_PORT, ES_CONN_SCHEME
|
||||||
|
|
||||||
elasticsearch_client = Elasticsearch(
|
elasticsearch_client = Elasticsearch(
|
||||||
hosts=[{"host": DB_HOST, "port": DB_PORT}], http_auth=(DB_USER, DB_PW), scheme=ES_CONN_SCHEME, ca_certs=False, verify_certs=False
|
hosts=[{"host": DB_HOST, "port": DB_PORT}], http_auth=(DB_USER, DB_PW), scheme=ES_CONN_SCHEME, ca_certs=False, verify_certs=False
|
@ -1,13 +1,12 @@
|
|||||||
from haystack import Finder
|
from haystack import Finder
|
||||||
|
|
||||||
|
|
||||||
def test_faq_retriever_in_memory_store(monkeypatch):
|
def test_faq_retriever_in_memory_store():
|
||||||
monkeypatch.setenv("EMBEDDING_FIELD_NAME", "embedding")
|
|
||||||
|
|
||||||
from haystack.database.memory import InMemoryDocumentStore
|
from haystack.database.memory import InMemoryDocumentStore
|
||||||
from haystack.retriever.elasticsearch import EmbeddingRetriever
|
from haystack.retriever.elasticsearch import EmbeddingRetriever
|
||||||
|
|
||||||
document_store = InMemoryDocumentStore()
|
document_store = InMemoryDocumentStore(embedding_field="embedding")
|
||||||
|
|
||||||
documents = [
|
documents = [
|
||||||
{'name': 'How to test this library?', 'text': 'By running tox in the command line!', 'meta': {'question': 'How to test this library?'}},
|
{'name': 'How to test this library?', 'text': 'By running tox in the command line!', 'meta': {'question': 'How to test this library?'}},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user