fix scaling of pseudo probs for es scores. fix filtering of embedding retrieval(#46)

2025-10-28 00:08:41 +00:00 · 2020-03-22 18:28:35 +01:00 · 2020-03-22 18:28:35 +01:00 · 05aa42c687
commit 05aa42c687
parent 909ff5d92b
2 changed files with 11 additions and 2 deletions
--- a/haystack/database/elasticsearch.py
+++ b/haystack/database/elasticsearch.py
@ -184,7 +184,11 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
            }

            if candidate_doc_ids:
-                body["query"]["bool"]["filter"] = [{"terms": {"_id": candidate_doc_ids}}]
+                body["query"]["script_score"]["query"] = {
+                    "bool": {
+                        "should": [{"match_all": {}}],
+                        "filter": [{"terms": {"_id": candidate_doc_ids}}]
+                }}

            if self.excluded_meta_data:
                body["_source"] = {"excludes": self.excluded_meta_data}
--- a/haystack/finder.py
+++ b/haystack/finder.py
@ -1,4 +1,8 @@
 import logging
+from scipy.special import expit
+import numpy as np
+
+
 logger = logging.getLogger(__name__)


@ -101,8 +105,9 @@ class Finder:
                              "probability": (meta["score"]+1)/2, "offset_start": 0, "offset_end": len(answer),
                              "meta": meta}
            else:
+                pseudo_prob = float(expit(np.asarray(meta["score"]) / 8))
                cur_answer = {"question": meta["question"], "answer": answer, "context": answer, "score": meta["score"],
-                              "probability": meta["score"]/ 10, "offset_start": 0, "offset_end": len(answer), "meta": meta}
+                              "probability": pseudo_prob, "offset_start": 0, "offset_end": len(answer), "meta": meta}
            results["answers"].append(cur_answer)

        return results