Fix confusing elasticsearch exception (#2763)

* convert confusing exception to warning and add no docs case.

* blacken

* fix test
This commit is contained in:
tstadel 2022-07-06 15:40:51 +02:00 committed by GitHub
parent a2905d05f7
commit e9219f4dc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 7 deletions

View File

@ -1221,16 +1221,17 @@ class BaseElasticsearchDocumentStore(KeywordDocumentStore):
try:
result = self.client.search(index=index, body=body, request_timeout=300, headers=headers)["hits"]["hits"]
if len(result) == 0:
count_documents = self.get_document_count(index=index, headers=headers)
if count_documents == 0:
logger.warning("Index is empty. First add some documents to search them.")
count_embeddings = self.get_embedding_count(index=index, headers=headers)
if count_embeddings == 0:
raise RequestError(
400, "search_phase_execution_exception", {"error": "No documents with embeddings."}
)
logger.warning("No documents with embeddings. Run the document store's update_embeddings() method.")
except RequestError as e:
if e.error == "search_phase_execution_exception":
error_message: str = (
"search_phase_execution_exception: Likely some of your stored documents don't have embeddings."
" Run the document store's update_embeddings() method."
"search_phase_execution_exception: Likely some of your stored documents don't have embeddings. "
"Run the document store's update_embeddings() method."
)
raise RequestError(e.status_code, error_message, e.info)
raise e

View File

@ -1319,7 +1319,7 @@ def test_get_document_count_only_documents_without_embedding_arg():
@pytest.mark.elasticsearch
def test_skip_missing_embeddings():
def test_skip_missing_embeddings(caplog):
documents = [
{"content": "text1", "id": "1"}, # a document without embeddings
{"content": "text2", "id": "2", "embedding": np.random.rand(768).astype(np.float64)},
@ -1349,8 +1349,9 @@ def test_skip_missing_embeddings():
document_store.write_documents(documents)
document_store.skip_missing_embeddings = True
with pytest.raises(RequestError):
with caplog.at_level(logging.WARNING):
document_store.query_by_embedding(np.random.rand(768).astype(np.float32))
assert "No documents with embeddings. Run the document store's update_embeddings() method." in caplog.text
@pytest.mark.elasticsearch