Rename label id field for elastic & add UPDATE_EXISTING_DOCUMENTS to API config (#728)

* rename label id field for elastic

* add UPDATE_EXISTING_DOCUMENTS param to API config
This commit is contained in:
Malte Pietsch 2021-01-12 13:00:56 +01:00 committed by GitHub
parent b6e64ca42d
commit e9b5439b00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 12 additions and 3 deletions

View File

@ -313,6 +313,10 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
**label.to_dict()
} # type: Dict[str, Any]
# rename id for elastic
if label.id is not None:
_label["_id"] = str(_label.pop("id"))
labels_to_index.append(_label)
bulk(self.client, labels_to_index, request_timeout=300, refresh=self.refresh_type)

View File

@ -27,6 +27,7 @@ EMBEDDING_FIELD_NAME = os.getenv("EMBEDDING_FIELD_NAME", "embedding")
EMBEDDING_DIM = int(os.getenv("EMBEDDING_DIM", 768))
VECTOR_SIMILARITY_METRIC = os.getenv("VECTOR_SIMILARITY_METRIC", "dot_product")
CREATE_INDEX = os.getenv("CREATE_INDEX", "True").lower() == "true"
UPDATE_EXISTING_DOCUMENTS = os.getenv("UPDATE_EXISTING_DOCUMENTS", "False").lower() == "true"
# Reader
READER_MODEL_PATH = os.getenv("READER_MODEL_PATH", "deepset/roberta-base-squad2")

View File

@ -19,7 +19,8 @@ from rest_api.config import (
EXCLUDE_META_DATA_FIELDS,
FAQ_QUESTION_FIELD_NAME,
CREATE_INDEX,
VECTOR_SIMILARITY_METRIC
VECTOR_SIMILARITY_METRIC,
UPDATE_EXISTING_DOCUMENTS
)
router = APIRouter()
@ -41,6 +42,7 @@ document_store = ElasticsearchDocumentStore(
embedding_field=EMBEDDING_FIELD_NAME,
excluded_meta_data=EXCLUDE_META_DATA_FIELDS, # type: ignore
create_index=CREATE_INDEX,
update_existing_documents=UPDATE_EXISTING_DOCUMENTS,
similarity=VECTOR_SIMILARITY_METRIC
)

View File

@ -12,7 +12,7 @@ from fastapi import UploadFile, File, Form
from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, DB_INDEX_FEEDBACK, ES_CONN_SCHEME, TEXT_FIELD_NAME, \
SEARCH_FIELD_NAME, FILE_UPLOAD_PATH, EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, VALID_LANGUAGES, \
FAQ_QUESTION_FIELD_NAME, REMOVE_NUMERIC_TABLES, REMOVE_WHITESPACE, REMOVE_EMPTY_LINES, REMOVE_HEADER_FOOTER, \
CREATE_INDEX, VECTOR_SIMILARITY_METRIC
CREATE_INDEX, UPDATE_EXISTING_DOCUMENTS, VECTOR_SIMILARITY_METRIC
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.file_converter.pdf import PDFToTextConverter
from haystack.file_converter.txt import TextConverter
@ -39,6 +39,7 @@ document_store = ElasticsearchDocumentStore(
excluded_meta_data=EXCLUDE_META_DATA_FIELDS, # type: ignore
faq_question_field=FAQ_QUESTION_FIELD_NAME,
create_index=CREATE_INDEX,
update_existing_documents=UPDATE_EXISTING_DOCUMENTS,
similarity=VECTOR_SIMILARITY_METRIC
)

View File

@ -14,7 +14,7 @@ from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, DB_INDEX
RETRIEVER_TYPE, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, BATCHSIZE, CONTEXT_WINDOW_SIZE, \
TOP_K_PER_CANDIDATE, NO_ANS_BOOST, READER_CAN_HAVE_NO_ANSWER, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \
CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, EMBEDDING_MODEL_FORMAT, READER_TYPE, READER_TOKENIZER, \
GPU_NUMBER, NAME_FIELD_NAME, VECTOR_SIMILARITY_METRIC, CREATE_INDEX, LOG_LEVEL
GPU_NUMBER, NAME_FIELD_NAME, VECTOR_SIMILARITY_METRIC, CREATE_INDEX, LOG_LEVEL, UPDATE_EXISTING_DOCUMENTS
from rest_api.controller.request import Question
from rest_api.controller.response import Answers, AnswersToIndividualQuestion
@ -52,6 +52,7 @@ document_store = ElasticsearchDocumentStore(
excluded_meta_data=EXCLUDE_META_DATA_FIELDS, # type: ignore
faq_question_field=FAQ_QUESTION_FIELD_NAME,
create_index=CREATE_INDEX,
update_existing_documents=UPDATE_EXISTING_DOCUMENTS,
similarity=VECTOR_SIMILARITY_METRIC
)