fix scaling of pseudo probs for es scores. fix filtering of embedding retrieval(#46)

This commit is contained in:
Malte Pietsch 2020-03-22 18:28:35 +01:00 committed by GitHub
parent 909ff5d92b
commit 05aa42c687
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 2 deletions

View File

@ -184,7 +184,11 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
}
if candidate_doc_ids:
body["query"]["bool"]["filter"] = [{"terms": {"_id": candidate_doc_ids}}]
body["query"]["script_score"]["query"] = {
"bool": {
"should": [{"match_all": {}}],
"filter": [{"terms": {"_id": candidate_doc_ids}}]
}}
if self.excluded_meta_data:
body["_source"] = {"excludes": self.excluded_meta_data}

View File

@ -1,4 +1,8 @@
import logging
from scipy.special import expit
import numpy as np
logger = logging.getLogger(__name__)
@ -101,8 +105,9 @@ class Finder:
"probability": (meta["score"]+1)/2, "offset_start": 0, "offset_end": len(answer),
"meta": meta}
else:
pseudo_prob = float(expit(np.asarray(meta["score"]) / 8))
cur_answer = {"question": meta["question"], "answer": answer, "context": answer, "score": meta["score"],
"probability": meta["score"]/ 10, "offset_start": 0, "offset_end": len(answer), "meta": meta}
"probability": pseudo_prob, "offset_start": 0, "offset_end": len(answer), "meta": meta}
results["answers"].append(cur_answer)
return results