From 54d32d4f1f01fb2cee3413c931fdca85fa68379a Mon Sep 17 00:00:00 2001 From: Stan Kirdey Date: Tue, 28 Apr 2020 07:10:32 -0700 Subject: [PATCH] Add coverage reports and more tests (#78) --- .gitignore | 1 + haystack/retriever/tfidf.py | 2 ++ requirements.txt | 1 + test/test_document.py | 7 +++++++ test/test_farm_reader.py | 9 +++++++++ test/test_tfidf_retriever.py | 19 +++++++++++++++++++ tox.ini | 16 +++++++++++++--- 7 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 test/test_document.py create mode 100644 test/test_farm_reader.py create mode 100644 test/test_tfidf_retriever.py diff --git a/.gitignore b/.gitignore index 824d052e3..238c92fe5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ qa.db **/qa.db **/*qa*.db +**/test-reports # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/haystack/retriever/tfidf.py b/haystack/retriever/tfidf.py index 9a304ce24..03257c86a 100644 --- a/haystack/retriever/tfidf.py +++ b/haystack/retriever/tfidf.py @@ -72,6 +72,8 @@ class TfidfRetriever(BaseRetriever): # get scores indices_and_scores = self._calc_scores(query) + print(indices_and_scores) + # rank paragraphs df_sliced = self.df.loc[indices_and_scores.keys()] df_sliced = df_sliced[:top_k] diff --git a/requirements.txt b/requirements.txt index 89bee887b..03c38b045 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,5 @@ sklearn elasticsearch elastic-apm tox +coverage # optional: sentence-transformers diff --git a/test/test_document.py b/test/test_document.py new file mode 100644 index 000000000..deb540c49 --- /dev/null +++ b/test/test_document.py @@ -0,0 +1,7 @@ +from haystack.database.base import Document + + +def test_document_data_access(): + doc = Document(id=1, text="test") + assert doc.text == "test" + assert doc['text'] == "test" diff --git a/test/test_farm_reader.py b/test/test_farm_reader.py new file mode 100644 index 000000000..bf8be081c --- /dev/null +++ b/test/test_farm_reader.py @@ -0,0 +1,9 @@ +import pytest + +from haystack.reader.farm import FARMReader + + +def test_farm_reader(): + reader = FARMReader(model_name_or_path="deepset/bert-base-cased-squad2", use_gpu=False) + assert reader is not None + assert isinstance(reader, FARMReader) diff --git a/test/test_tfidf_retriever.py b/test/test_tfidf_retriever.py new file mode 100644 index 000000000..577ac10c4 --- /dev/null +++ b/test/test_tfidf_retriever.py @@ -0,0 +1,19 @@ +from haystack.database.base import Document + + +def test_tfidf_retriever(): + from haystack.retriever.tfidf import TfidfRetriever + + test_docs = [ + {"name": "testing the finder 1", "text": "godzilla says hello"}, + {"name": "testing the finder 2", "text": "optimus prime says bye"}, + {"name": "testing the finder 3", "text": "alien says arghh"} + ] + + from haystack.database.memory import InMemoryDocumentStore + document_store = InMemoryDocumentStore() + document_store.write_documents(test_docs) + + retriever = TfidfRetriever(document_store) + retriever.fit() + assert retriever.retrieve("godzilla", top_k=1) == [Document(id='0', text='godzilla says hello', external_source_id=None, question=None, query_score=None, meta=None)] \ No newline at end of file diff --git a/tox.ini b/tox.ini index c0d047ff2..475a64491 100644 --- a/tox.ini +++ b/tox.ini @@ -5,6 +5,16 @@ envlist = py36,py37 [testenv] changedir = test -deps = pytest - pandas -commands = pytest --basetemp="{envtmpdir}" {posargs} \ No newline at end of file +deps = + coverage + pytest + pandas +setenv = + COVERAGE_FILE = test-reports/.coverage + PYTEST_ADDOPTS = --junitxml=test-reports/{envname}/junit.xml -vv +commands = + coverage run --source haystack --parallel-mode -m pytest {posargs} + coverage combine + coverage report -m + coverage html -d test-reports/coverage-html + coverage xml -o test-reports/coverage.xml