diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a3f2eff6d..4693a4857 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: - name: Run Elasticsearch uses: elastic/elastic-github-actions/elasticsearch@25ad91e35aeee806711d335fc9dec7927ae49bc6 with: - stack-version: 7.6.0 + stack-version: 7.9.2 - name: Run Apache Tika run: docker run -d -p 9998:9998 apache/tika:1.24.1 diff --git a/haystack/document_store/elasticsearch.py b/haystack/document_store/elasticsearch.py index 099cffdcd..506526fbd 100644 --- a/haystack/document_store/elasticsearch.py +++ b/haystack/document_store/elasticsearch.py @@ -460,7 +460,8 @@ class ElasticsearchDocumentStore(BaseDocumentStore): "script_score": { "query": {"match_all": {}}, "script": { - "source": f"{self.similarity_fn_name}(params.query_vector,'{self.embedding_field}') + 1.0", + # offset score to ensure a positive range as required by Elasticsearch + "source": f"{self.similarity_fn_name}(params.query_vector,'{self.embedding_field}') + 1000", "params": { "query_vector": query_emb.tolist() } @@ -497,7 +498,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore): score = hit["_score"] if hit["_score"] else None if score: if adapt_score_for_embedding: - score -= 1 + score -= 1000 probability = (score + 1) / 2 # scaling probability from cosine similarity else: probability = float(expit(np.asarray(score / 8))) # scaling probability from TFIDF/BM25 diff --git a/test/conftest.py b/test/conftest.py index a74ee8fb1..e5903500b 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -33,7 +33,7 @@ def elasticsearch_fixture(): shell=True ) status = subprocess.run( - ['docker run -d --name haystack_test_elastic -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.1'], + ['docker run -d --name haystack_test_elastic -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'], shell=True ) if status.returncode: @@ -160,12 +160,12 @@ def document_store(request, test_docs_xs, elasticsearch_fixture): return get_document_store(request.param) -@pytest.fixture(params=["es_filter_only", "elsticsearch", "dpr", "embedding", "tfidf"]) +@pytest.fixture(params=["es_filter_only", "elasticsearch", "dpr", "embedding", "tfidf"]) def retriever(request, document_store): return get_retriever(request.param, document_store) -@pytest.fixture(params=["es_filter_only", "elsticsearch", "dpr", "embedding", "tfidf"]) +@pytest.fixture(params=["es_filter_only", "elasticsearch", "dpr", "embedding", "tfidf"]) def retriever_with_docs(request, document_store_with_docs): return get_retriever(request.param, document_store_with_docs) @@ -206,7 +206,7 @@ def get_retriever(retriever_type, document_store): retriever = EmbeddingRetriever(document_store=document_store, embedding_model="deepset/sentence_bert", use_gpu=False) - elif retriever_type == "elsticsearch": + elif retriever_type == "elasticsearch": retriever = ElasticsearchRetriever(document_store=document_store) elif retriever_type == "es_filter_only": retriever = ElasticsearchFilterOnlyRetriever(document_store=document_store) diff --git a/test/test_elastic_retriever.py b/test/test_elastic_retriever.py index 06eb6f436..0e4540c3f 100644 --- a/test/test_elastic_retriever.py +++ b/test/test_elastic_retriever.py @@ -2,7 +2,7 @@ import pytest @pytest.mark.parametrize("document_store_with_docs", [("elasticsearch")], indirect=True) -@pytest.mark.parametrize("retriever_with_docs", ["elsticsearch"], indirect=True) +@pytest.mark.parametrize("retriever_with_docs", ["elasticsearch"], indirect=True) def test_elasticsearch_retrieval(retriever_with_docs, document_store_with_docs): res = retriever_with_docs.retrieve(query="Who lives in Berlin?") assert res[0].text == "My name is Carla and I live in Berlin" @@ -11,7 +11,7 @@ def test_elasticsearch_retrieval(retriever_with_docs, document_store_with_docs): @pytest.mark.parametrize("document_store_with_docs", [("elasticsearch")], indirect=True) -@pytest.mark.parametrize("retriever_with_docs", ["elsticsearch"], indirect=True) +@pytest.mark.parametrize("retriever_with_docs", ["elasticsearch"], indirect=True) def test_elasticsearch_retrieval_filters(retriever_with_docs, document_store_with_docs): res = retriever_with_docs.retrieve(query="Who lives in Berlin?", filters={"name": ["filename1"]}) assert res[0].text == "My name is Carla and I live in Berlin" diff --git a/test/test_eval.py b/test/test_eval.py index 6334a576c..a54fe0009 100644 --- a/test/test_eval.py +++ b/test/test_eval.py @@ -61,7 +61,7 @@ def test_eval_reader(reader, document_store: BaseDocumentStore): @pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True) @pytest.mark.parametrize("open_domain", [True, False]) -@pytest.mark.parametrize("retriever", ["elsticsearch"], indirect=True) +@pytest.mark.parametrize("retriever", ["elasticsearch"], indirect=True) def test_eval_elastic_retriever(document_store: BaseDocumentStore, open_domain, retriever): # add eval data (SQUAD format) document_store.delete_all_documents(index="test_eval_document") @@ -81,7 +81,7 @@ def test_eval_elastic_retriever(document_store: BaseDocumentStore, open_domain, @pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True) @pytest.mark.parametrize("reader", ["farm"], indirect=True) -@pytest.mark.parametrize("retriever", ["elsticsearch"], indirect=True) +@pytest.mark.parametrize("retriever", ["elasticsearch"], indirect=True) def test_eval_finder(document_store: BaseDocumentStore, reader, retriever): finder = Finder(reader=reader, retriever=retriever) diff --git a/tutorials/Tutorial4_FAQ_style_QA.ipynb b/tutorials/Tutorial4_FAQ_style_QA.ipynb index 823999ca0..5afcf24c1 100644 --- a/tutorials/Tutorial4_FAQ_style_QA.ipynb +++ b/tutorials/Tutorial4_FAQ_style_QA.ipynb @@ -71,7 +71,7 @@ "outputs": [], "source": [ "# Recommended: Start Elasticsearch using Docker\n", - "# ! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.6.2" + "# ! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.9.2" ] }, { @@ -81,13 +81,13 @@ "outputs": [], "source": [ "# In Colab / No Docker environments: Start Elasticsearch from source\n", - "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q\n", - "! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz\n", - "! chown -R daemon:daemon elasticsearch-7.6.2\n", + "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", + "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", + "! chown -R daemon:daemon elasticsearch-7.9.2\n", "\n", "import os\n", "from subprocess import Popen, PIPE, STDOUT\n", - "es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],\n", + "es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],\n", " stdout=PIPE, stderr=STDOUT,\n", " preexec_fn=lambda: os.setuid(1) # as daemon\n", " )\n", diff --git a/tutorials/Tutorial4_FAQ_style_QA.py b/tutorials/Tutorial4_FAQ_style_QA.py index cc371db3b..5b6f88f45 100755 --- a/tutorials/Tutorial4_FAQ_style_QA.py +++ b/tutorials/Tutorial4_FAQ_style_QA.py @@ -26,7 +26,7 @@ LAUNCH_ELASTICSEARCH=True if LAUNCH_ELASTICSEARCH: logging.info("Starting Elasticsearch ...") status = subprocess.run( - ['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.6.2'], shell=True + ['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'], shell=True ) if status.returncode: raise Exception("Failed to launch Elasticsearch. If you want to connect to an existing Elasticsearch instance" diff --git a/tutorials/Tutorial5_Evaluation.ipynb b/tutorials/Tutorial5_Evaluation.ipynb index 90e9a5dff..8bc93bd62 100644 --- a/tutorials/Tutorial5_Evaluation.ipynb +++ b/tutorials/Tutorial5_Evaluation.ipynb @@ -65,13 +65,13 @@ "outputs": [], "source": [ "# In Colab / No Docker environments: Start Elasticsearch from source\n", - "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q\n", - "! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz\n", - "! chown -R daemon:daemon elasticsearch-7.6.2\n", + "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", + "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", + "! chown -R daemon:daemon elasticsearch-7.9.2\n", "\n", "import os\n", "from subprocess import Popen, PIPE, STDOUT\n", - "es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],\n", + "es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],\n", " stdout=PIPE, stderr=STDOUT,\n", " preexec_fn=lambda: os.setuid(1) # as daemon\n", " )\n", diff --git a/tutorials/Tutorial5_Evaluation.py b/tutorials/Tutorial5_Evaluation.py index ece5e3493..6d47e3215 100644 --- a/tutorials/Tutorial5_Evaluation.py +++ b/tutorials/Tutorial5_Evaluation.py @@ -34,7 +34,7 @@ device, n_gpu = initialize_device_settings(use_cuda=True) if LAUNCH_ELASTICSEARCH: logging.info("Starting Elasticsearch ...") status = subprocess.run( - ['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.6.2'], shell=True + ['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'], shell=True ) if status.returncode: raise Exception("Failed to launch Elasticsearch. If you want to connect to an existing Elasticsearch instance"