haystack/test/conftest.py

import os
import subprocess
import time
from subprocess import run
from sys import platform

import pytest
import requests
from elasticsearch import Elasticsearch
from haystack.retriever.sparse import ElasticsearchFilterOnlyRetriever, ElasticsearchRetriever, TfidfRetriever

from haystack.retriever.dense import DensePassageRetriever, EmbeddingRetriever

from haystack import Document
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.document_store.faiss import FAISSDocumentStore
from haystack.document_store.memory import InMemoryDocumentStore
from haystack.document_store.sql import SQLDocumentStore
from haystack.reader.farm import FARMReader
from haystack.reader.transformers import TransformersReader


@pytest.fixture(scope="session")
def elasticsearch_fixture():
    # test if a ES cluster is already running. If not, download and start an ES instance locally.
    try:
        client = Elasticsearch(hosts=[{"host": "localhost", "port": "9200"}])
        client.info()
    except:
        print("Starting Elasticsearch ...")
        status = subprocess.run(
            ['docker rm haystack_test_elastic'],
            shell=True
        )
        status = subprocess.run(
            ['docker run -d --name haystack_test_elastic -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'],
            shell=True
        )
        if status.returncode:
            raise Exception(
                "Failed to launch Elasticsearch. Please check docker container logs.")
        time.sleep(30)


@pytest.fixture(scope="session")
def tika_fixture():
    try:
        tika_url = "http://localhost:9998/tika"
        ping = requests.get(tika_url)
        if ping.status_code != 200:
            raise Exception(
                "Unable to connect Tika. Please check tika endpoint {0}.".format(tika_url))
    except:
        print("Starting Tika ...")
        status = subprocess.run(
            ['docker run -d --name tika -p 9998:9998 apache/tika:1.24.1'],
            shell=True
        )
        if status.returncode:
            raise Exception(
                "Failed to launch Tika. Please check docker container logs.")
        time.sleep(30)


@pytest.fixture(scope="session")
def xpdf_fixture(tika_fixture):
    verify_installation = run(["pdftotext"], shell=True)
    if verify_installation.returncode == 127:
        if platform.startswith("linux"):
            platform_id = "linux"
            sudo_prefix = "sudo"
        elif platform.startswith("darwin"):
            platform_id = "mac"
            # For Mac, generally sudo need password in interactive console.
            # But most of the cases current user already have permission to copy to /user/local/bin.
            # Hence removing sudo requirement for Mac.
            sudo_prefix = ""
        else:
            raise Exception(
                """Currently auto installation of pdftotext is not supported on {0} platform """.format(platform)
            )

        commands = """ wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-{0}-4.02.tar.gz &&
                       tar -xvf xpdf-tools-{0}-4.02.tar.gz &&
                       {1} cp xpdf-tools-{0}-4.02/bin64/pdftotext /usr/local/bin""".format(platform_id, sudo_prefix)
        run([commands], shell=True)

        verify_installation = run(["pdftotext -v"], shell=True)
        if verify_installation.returncode == 127:
            raise Exception(
                """pdftotext is not installed. It is part of xpdf or poppler-utils software suite.
                 You can download for your OS from here: https://www.xpdfreader.com/download.html."""
            )


@pytest.fixture(params=["elasticsearch", "faiss", "memory", "sql"])
def document_store(request, test_docs_xs, elasticsearch_fixture):
    return get_document_store(request.param)


@pytest.fixture()
def test_docs_xs():
    return [
        # current "dict" format for a document
        {"text": "My name is Carla and I live in Berlin", "meta": {"meta_field": "test1", "name": "filename1"}},
        # meta_field at the top level for backward compatibility
        {"text": "My name is Paul and I live in New York", "meta_field": "test2", "name": "filename2"},
        # Document object for a doc
        Document(text="My name is Christelle and I live in Paris", meta={"meta_field": "test3", "name": "filename3"})
    ]


@pytest.fixture(params=["farm", "transformers"])
def reader(request):
    if request.param == "farm":
        return FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad",
                          use_gpu=False, top_k_per_sample=5, num_processes=0)
    if request.param == "transformers":
        return TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad",
                                  tokenizer="distilbert-base-uncased",
                                  use_gpu=-1)


# TODO Fix bug in test_no_answer_output when using
# @pytest.fixture(params=["farm", "transformers"])
@pytest.fixture(params=["farm"])
def no_answer_reader(request):
    if request.param == "farm":
        return FARMReader(model_name_or_path="deepset/roberta-base-squad2",
                          use_gpu=False, top_k_per_sample=5, no_ans_boost=0, num_processes=0)
    if request.param == "transformers":
        return TransformersReader(model_name_or_path="deepset/roberta-base-squad2",
                                  tokenizer="deepset/roberta-base-squad2",
                                  use_gpu=-1, top_k_per_candidate=5)


@pytest.fixture()
def prediction(reader, test_docs_xs):
    docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs]
    prediction = reader.predict(question="Who lives in Berlin?", documents=docs, top_k=5)
    return prediction


@pytest.fixture()
def no_answer_prediction(no_answer_reader, test_docs_xs):
    docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs]
    prediction = no_answer_reader.predict(question="What is the meaning of life?", documents=docs, top_k=5)
    return prediction


@pytest.fixture(params=["elasticsearch", "faiss", "memory", "sql"])
def document_store_with_docs(request, test_docs_xs, elasticsearch_fixture):
    document_store = get_document_store(request.param)
    document_store.write_documents(test_docs_xs)
    yield document_store
    if isinstance(document_store, FAISSDocumentStore):
        document_store.faiss_index.reset()


@pytest.fixture(params=["elasticsearch", "faiss", "memory", "sql"])
def document_store(request, test_docs_xs, elasticsearch_fixture):
    document_store = get_document_store(request.param)
    yield document_store
    if isinstance(document_store, FAISSDocumentStore):
        document_store.faiss_index.reset()


@pytest.fixture(params=["es_filter_only", "elasticsearch", "dpr", "embedding", "tfidf"])
def retriever(request, document_store):
    return get_retriever(request.param, document_store)


@pytest.fixture(params=["es_filter_only", "elasticsearch", "dpr", "embedding", "tfidf"])
def retriever_with_docs(request, document_store_with_docs):
    return get_retriever(request.param, document_store_with_docs)


def get_document_store(document_store_type):
    if document_store_type == "sql":
        if os.path.exists("haystack_test.db"):
            os.remove("haystack_test.db")
        document_store = SQLDocumentStore(url="sqlite:///haystack_test.db")
    elif document_store_type == "memory":
        document_store = InMemoryDocumentStore()
    elif document_store_type == "elasticsearch":
        # make sure we start from a fresh index
        client = Elasticsearch()
        client.indices.delete(index='haystack_test*', ignore=[404])
        document_store = ElasticsearchDocumentStore(index="haystack_test")
    elif document_store_type == "faiss":
        if os.path.exists("haystack_test_faiss.db"):
            os.remove("haystack_test_faiss.db")
        document_store = FAISSDocumentStore(sql_url="sqlite:///haystack_test_faiss.db")
    else:
        raise Exception(f"No document store fixture for '{document_store_type}'")

    return document_store


def get_retriever(retriever_type, document_store):

    if retriever_type == "dpr":
        retriever = DensePassageRetriever(document_store=document_store,
                                          query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
                                          passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
                                          use_gpu=False, embed_title=True,
                                          remove_sep_tok_from_untitled_passages=True)
    elif retriever_type == "tfidf":
        return TfidfRetriever(document_store=document_store)
    elif retriever_type == "embedding":
        retriever = EmbeddingRetriever(document_store=document_store,
                                       embedding_model="deepset/sentence_bert",
                                       use_gpu=False)
    elif retriever_type == "elasticsearch":
        retriever = ElasticsearchRetriever(document_store=document_store)
    elif retriever_type == "es_filter_only":
        retriever = ElasticsearchFilterOnlyRetriever(document_store=document_store)
    else:
        raise Exception(f"No retriever fixture for '{retriever_type}'")

    return retriever
Deprecate Tags for Document Stores (#286) 2020-08-04 14:24:12 +02:00			`import os`
Adjust tests for MacOS (#374) 2020-09-15 15:04:46 +02:00			`import subprocess`
Add test for Elasticsearch document store (#88) 2020-05-04 18:00:07 +02:00			`import time`
Adjust tests for MacOS (#374) 2020-09-15 15:04:46 +02:00			`from subprocess import run`
			`from sys import platform`
Add test for Elasticsearch document store (#88) 2020-05-04 18:00:07 +02:00
			`import pytest`
Adjust tests for MacOS (#374) 2020-09-15 15:04:46 +02:00			`import requests`
Start Elasticsearch with a Github Action (#142) 2020-06-09 12:46:15 +02:00			`from elasticsearch import Elasticsearch`
Fix update_embeddings function in FAISSDocumentStore and add retriever fixture in tests (#481) * 1. Prevent update_embeddings function in FAISSDocumentStore to set faiss_index as None when document store does not have any docs. 2. cleaning up tests by adding fixture for retriever. * TfidfRetriever need document store with documents during initialization as it call fit() function in constructor so fixing it by checking self.paragraphs of None * Fix naming of retriever's fixture (embedded to embedding and tfid to tfidf) 2020-10-14 16:15:04 +02:00			`from haystack.retriever.sparse import ElasticsearchFilterOnlyRetriever, ElasticsearchRetriever, TfidfRetriever`

			`from haystack.retriever.dense import DensePassageRetriever, EmbeddingRetriever`
Add test for Elasticsearch document store (#88) 2020-05-04 18:00:07 +02:00
Rename and restructure modules (database, indexing, schemas) (#379) * rename database to documentstore * move document, label, multilabel to haystack/schema.py * rename documentstore -> document_store * split indexing modules -> file_converter + preprocessor * fix order of imports * Update tutorial notebooks * fix torch version in tutorial 4 2020-09-16 18:33:23 +02:00			`from haystack import Document`
			`from haystack.document_store.elasticsearch import ElasticsearchDocumentStore`
			`from haystack.document_store.faiss import FAISSDocumentStore`
			`from haystack.document_store.memory import InMemoryDocumentStore`
			`from haystack.document_store.sql import SQLDocumentStore`
Add more tests (#213) 2020-07-10 10:54:56 +02:00			`from haystack.reader.farm import FARMReader`
			`from haystack.reader.transformers import TransformersReader`

Add test for Elasticsearch document store (#88) 2020-05-04 18:00:07 +02:00
			`@pytest.fixture(scope="session")`
Adjust tests for MacOS (#374) 2020-09-15 15:04:46 +02:00			`def elasticsearch_fixture():`
Start Elasticsearch with a Github Action (#142) 2020-06-09 12:46:15 +02:00			`# test if a ES cluster is already running. If not, download and start an ES instance locally.`
			`try:`
Adjust tests for MacOS (#374) 2020-09-15 15:04:46 +02:00			`client = Elasticsearch(hosts=[{"host": "localhost", "port": "9200"}])`
Start Elasticsearch with a Github Action (#142) 2020-06-09 12:46:15 +02:00			`client.info()`
			`except:`
Adjust tests for MacOS (#374) 2020-09-15 15:04:46 +02:00			`print("Starting Elasticsearch ...")`
			`status = subprocess.run(`
Remove phi normalization from FAISS, support more index types, 3x speedup (#467) * remove phi normalization * add special case for hnsw * rename vector_size to vector_dim * fix loading. fix extra dim in tests * switch to new ES syntax for vector similarity * 3x sql speed up. cascade deletes. add train_index() * add docstrings. remove vector_dim from load() * delete docs from faiss and sql * fix delete of docs in test * relax type hint for faiss index * rename metric to metric_type Co-authored-by: lalitpagaria <19303690+lalitpagaria@users.noreply.github.com> 2020-10-06 16:09:56 +02:00			`['docker rm haystack_test_elastic'],`
			`shell=True`
			`)`
			`status = subprocess.run(`
Fix scoring in Elasticsearch for dot product (#517) 2020-10-23 17:50:49 +02:00			`['docker run -d --name haystack_test_elastic -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'],`
Adjust tests for MacOS (#374) 2020-09-15 15:04:46 +02:00			`shell=True`
			`)`
			`if status.returncode:`
			`raise Exception(`
			`"Failed to launch Elasticsearch. Please check docker container logs.")`
			`time.sleep(30)`


			`@pytest.fixture(scope="session")`
			`def tika_fixture():`
			`try:`
			`tika_url = "http://localhost:9998/tika"`
			`ping = requests.get(tika_url)`
			`if ping.status_code != 200:`
			`raise Exception(`
			`"Unable to connect Tika. Please check tika endpoint {0}.".format(tika_url))`
			`except:`
			`print("Starting Tika ...")`
			`status = subprocess.run(`
			`['docker run -d --name tika -p 9998:9998 apache/tika:1.24.1'],`
			`shell=True`
			`)`
			`if status.returncode:`
			`raise Exception(`
			`"Failed to launch Tika. Please check docker container logs.")`
			`time.sleep(30)`
Add PDF text extraction (#109) 2020-06-08 11:07:19 +02:00

			`@pytest.fixture(scope="session")`
Adjust tests for MacOS (#374) 2020-09-15 15:04:46 +02:00			`def xpdf_fixture(tika_fixture):`
Add PDF text extraction (#109) 2020-06-08 11:07:19 +02:00			`verify_installation = run(["pdftotext"], shell=True)`
			`if verify_installation.returncode == 127:`
Adjust tests for MacOS (#374) 2020-09-15 15:04:46 +02:00			`if platform.startswith("linux"):`
			`platform_id = "linux"`
			`sudo_prefix = "sudo"`
			`elif platform.startswith("darwin"):`
			`platform_id = "mac"`
			`# For Mac, generally sudo need password in interactive console.`
			`# But most of the cases current user already have permission to copy to /user/local/bin.`
			`# Hence removing sudo requirement for Mac.`
			`sudo_prefix = ""`
			`else:`
			`raise Exception(`
			`"""Currently auto installation of pdftotext is not supported on {0} platform """.format(platform)`
			`)`

			`commands = """ wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-{0}-4.02.tar.gz &&`
			`tar -xvf xpdf-tools-{0}-4.02.tar.gz &&`
			`{1} cp xpdf-tools-{0}-4.02/bin64/pdftotext /usr/local/bin""".format(platform_id, sudo_prefix)`
Add PDF text extraction (#109) 2020-06-08 11:07:19 +02:00			`run([commands], shell=True)`

			`verify_installation = run(["pdftotext -v"], shell=True)`
			`if verify_installation.returncode == 127:`
			`raise Exception(`
			`"""pdftotext is not installed. It is part of xpdf or poppler-utils software suite.`
			`You can download for your OS from here: https://www.xpdfreader.com/download.html."""`
			`)`
Add more tests (#213) 2020-07-10 10:54:56 +02:00
Add Tika Converter (#314) 2020-08-17 11:21:09 +02:00
			`@pytest.fixture(params=["elasticsearch", "faiss", "memory", "sql"])`
			`def document_store(request, test_docs_xs, elasticsearch_fixture):`
			`return get_document_store(request.param)`

Add more tests (#213) 2020-07-10 10:54:56 +02:00
			`@pytest.fixture()`
			`def test_docs_xs():`
			`return [`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00			`# current "dict" format for a document`
Move document_name attribute to meta (#217) 2020-07-14 09:53:31 +02:00			`{"text": "My name is Carla and I live in Berlin", "meta": {"meta_field": "test1", "name": "filename1"}},`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00			`# meta_field at the top level for backward compatibility`
			`{"text": "My name is Paul and I live in New York", "meta_field": "test2", "name": "filename2"},`
			`# Document object for a doc`
			`Document(text="My name is Christelle and I live in Paris", meta={"meta_field": "test3", "name": "filename3"})`
Add more tests (#213) 2020-07-10 10:54:56 +02:00			`]`


			`@pytest.fixture(params=["farm", "transformers"])`
			`def reader(request):`
			`if request.param == "farm":`
			`return FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad",`
			`use_gpu=False, top_k_per_sample=5, num_processes=0)`
			`if request.param == "transformers":`
Change arg "model" to "model_name_or_path" in TransformersReader (#510) * Consistent parameter naming for TransformersReader along with removing unused imports as well. * Addressing review comments 2020-10-21 17:15:35 +02:00			`return TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad",`
Add more tests (#213) 2020-07-10 10:54:56 +02:00			`tokenizer="distilbert-base-uncased",`
			`use_gpu=-1)`


Upgrade to new FARM / Transformers / PyTorch versions (#212) 2020-07-14 18:53:15 +02:00			`# TODO Fix bug in test_no_answer_output when using`
			`# @pytest.fixture(params=["farm", "transformers"])`
			`@pytest.fixture(params=["farm"])`
			`def no_answer_reader(request):`
			`if request.param == "farm":`
			`return FARMReader(model_name_or_path="deepset/roberta-base-squad2",`
			`use_gpu=False, top_k_per_sample=5, no_ans_boost=0, num_processes=0)`
			`if request.param == "transformers":`
Change arg "model" to "model_name_or_path" in TransformersReader (#510) * Consistent parameter naming for TransformersReader along with removing unused imports as well. * Addressing review comments 2020-10-21 17:15:35 +02:00			`return TransformersReader(model_name_or_path="deepset/roberta-base-squad2",`
Upgrade to new FARM / Transformers / PyTorch versions (#212) 2020-07-14 18:53:15 +02:00			`tokenizer="deepset/roberta-base-squad2",`
Add "no answer" aggregation to Transformersreader (#259) * Add no answer aggregation * Change to covariant type annotation * Remove n_best_per_passage from transformersreader 2020-08-06 17:32:55 +02:00			`use_gpu=-1, top_k_per_candidate=5)`
Upgrade to new FARM / Transformers / PyTorch versions (#212) 2020-07-14 18:53:15 +02:00

			`@pytest.fixture()`
			`def prediction(reader, test_docs_xs):`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00			`docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs]`
Upgrade to new FARM / Transformers / PyTorch versions (#212) 2020-07-14 18:53:15 +02:00			`prediction = reader.predict(question="Who lives in Berlin?", documents=docs, top_k=5)`
			`return prediction`


			`@pytest.fixture()`
			`def no_answer_prediction(no_answer_reader, test_docs_xs):`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00			`docs = [Document.from_dict(d) if isinstance(d, dict) else d for d in test_docs_xs]`
Upgrade to new FARM / Transformers / PyTorch versions (#212) 2020-07-14 18:53:15 +02:00			`prediction = no_answer_reader.predict(question="What is the meaning of life?", documents=docs, top_k=5)`
			`return prediction`


Add FAISS Document Store (#253) 2020-08-07 14:25:08 +02:00			`@pytest.fixture(params=["elasticsearch", "faiss", "memory", "sql"])`
Add more tests (#213) 2020-07-10 10:54:56 +02:00			`def document_store_with_docs(request, test_docs_xs, elasticsearch_fixture):`
Add FAISS Document Store (#253) 2020-08-07 14:25:08 +02:00			`document_store = get_document_store(request.param)`
			`document_store.write_documents(test_docs_xs)`
Pytest fix memory leak and put pytest marker on slow tests (#520) * Clear faiss_index during teardown * Marking slow test with pytest markers. So In future these test can be optimized. Also command line option can be added to skip them refer https://pytest.org/en/stable/example/simple.html#control-skipping-of-tests-according-to-command-line-option * Fixing test 2020-10-26 19:19:10 +01:00			`yield document_store`
			`if isinstance(document_store, FAISSDocumentStore):`
			`document_store.faiss_index.reset()`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00

Add FAISS Document Store (#253) 2020-08-07 14:25:08 +02:00			`@pytest.fixture(params=["elasticsearch", "faiss", "memory", "sql"])`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00			`def document_store(request, test_docs_xs, elasticsearch_fixture):`
Pytest fix memory leak and put pytest marker on slow tests (#520) * Clear faiss_index during teardown * Marking slow test with pytest markers. So In future these test can be optimized. Also command line option can be added to skip them refer https://pytest.org/en/stable/example/simple.html#control-skipping-of-tests-according-to-command-line-option * Fixing test 2020-10-26 19:19:10 +01:00			`document_store = get_document_store(request.param)`
			`yield document_store`
			`if isinstance(document_store, FAISSDocumentStore):`
			`document_store.faiss_index.reset()`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00

Fix scoring in Elasticsearch for dot product (#517) 2020-10-23 17:50:49 +02:00			`@pytest.fixture(params=["es_filter_only", "elasticsearch", "dpr", "embedding", "tfidf"])`
Fix update_embeddings function in FAISSDocumentStore and add retriever fixture in tests (#481) * 1. Prevent update_embeddings function in FAISSDocumentStore to set faiss_index as None when document store does not have any docs. 2. cleaning up tests by adding fixture for retriever. * TfidfRetriever need document store with documents during initialization as it call fit() function in constructor so fixing it by checking self.paragraphs of None * Fix naming of retriever's fixture (embedded to embedding and tfid to tfidf) 2020-10-14 16:15:04 +02:00			`def retriever(request, document_store):`
			`return get_retriever(request.param, document_store)`


Fix scoring in Elasticsearch for dot product (#517) 2020-10-23 17:50:49 +02:00			`@pytest.fixture(params=["es_filter_only", "elasticsearch", "dpr", "embedding", "tfidf"])`
Fix update_embeddings function in FAISSDocumentStore and add retriever fixture in tests (#481) * 1. Prevent update_embeddings function in FAISSDocumentStore to set faiss_index as None when document store does not have any docs. 2. cleaning up tests by adding fixture for retriever. * TfidfRetriever need document store with documents during initialization as it call fit() function in constructor so fixing it by checking self.paragraphs of None * Fix naming of retriever's fixture (embedded to embedding and tfid to tfidf) 2020-10-14 16:15:04 +02:00			`def retriever_with_docs(request, document_store_with_docs):`
			`return get_retriever(request.param, document_store_with_docs)`


Add FAISS Document Store (#253) 2020-08-07 14:25:08 +02:00			`def get_document_store(document_store_type):`
			`if document_store_type == "sql":`
			`if os.path.exists("haystack_test.db"):`
			`os.remove("haystack_test.db")`
			`document_store = SQLDocumentStore(url="sqlite:///haystack_test.db")`
			`elif document_store_type == "memory":`
			`document_store = InMemoryDocumentStore()`
			`elif document_store_type == "elasticsearch":`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00			`# make sure we start from a fresh index`
			`client = Elasticsearch()`
Add FAISS Document Store (#253) 2020-08-07 14:25:08 +02:00			`client.indices.delete(index='haystack_test*', ignore=[404])`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00			`document_store = ElasticsearchDocumentStore(index="haystack_test")`
Add FAISS Document Store (#253) 2020-08-07 14:25:08 +02:00			`elif document_store_type == "faiss":`
			`if os.path.exists("haystack_test_faiss.db"):`
			`os.remove("haystack_test_faiss.db")`
			`document_store = FAISSDocumentStore(sql_url="sqlite:///haystack_test_faiss.db")`
			`else:`
			`raise Exception(f"No document store fixture for '{document_store_type}'")`
Add eval for Dense Passage Retriever & Refactor handling of labels/feedback (#243) 2020-07-31 11:34:06 +02:00
			`return document_store`
Fix update_embeddings function in FAISSDocumentStore and add retriever fixture in tests (#481) * 1. Prevent update_embeddings function in FAISSDocumentStore to set faiss_index as None when document store does not have any docs. 2. cleaning up tests by adding fixture for retriever. * TfidfRetriever need document store with documents during initialization as it call fit() function in constructor so fixing it by checking self.paragraphs of None * Fix naming of retriever's fixture (embedded to embedding and tfid to tfidf) 2020-10-14 16:15:04 +02:00

			`def get_retriever(retriever_type, document_store):`

			`if retriever_type == "dpr":`
			`retriever = DensePassageRetriever(document_store=document_store,`
			`query_embedding_model="facebook/dpr-question_encoder-single-nq-base",`
			`passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",`
			`use_gpu=False, embed_title=True,`
			`remove_sep_tok_from_untitled_passages=True)`
			`elif retriever_type == "tfidf":`
			`return TfidfRetriever(document_store=document_store)`
			`elif retriever_type == "embedding":`
			`retriever = EmbeddingRetriever(document_store=document_store,`
			`embedding_model="deepset/sentence_bert",`
			`use_gpu=False)`
Fix scoring in Elasticsearch for dot product (#517) 2020-10-23 17:50:49 +02:00			`elif retriever_type == "elasticsearch":`
Fix update_embeddings function in FAISSDocumentStore and add retriever fixture in tests (#481) * 1. Prevent update_embeddings function in FAISSDocumentStore to set faiss_index as None when document store does not have any docs. 2. cleaning up tests by adding fixture for retriever. * TfidfRetriever need document store with documents during initialization as it call fit() function in constructor so fixing it by checking self.paragraphs of None * Fix naming of retriever's fixture (embedded to embedding and tfid to tfidf) 2020-10-14 16:15:04 +02:00			`retriever = ElasticsearchRetriever(document_store=document_store)`
			`elif retriever_type == "es_filter_only":`
			`retriever = ElasticsearchFilterOnlyRetriever(document_store=document_store)`
			`else:`
			`raise Exception(f"No retriever fixture for '{retriever_type}'")`

			`return retriever`