diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 542afabdb..d6f0ee6c0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -90,6 +90,7 @@ jobs: - macos-latest topic: - document_stores + - nodes runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 @@ -439,8 +440,9 @@ jobs: - name: Run tests env: TOKENIZERS_PARALLELISM: 'false' + # as confusing as it seems, we skip tests marked as unit here as it means they have been migrated to the right job already run: | - pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not weaviate and not pinecone and not integration" test/${{ matrix.folder }} --document_store_type=memory + pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not weaviate and not pinecone and not integration and not unit" test/${{ matrix.folder }} --document_store_type=memory - uses: act10ns/slack@v2 with: @@ -486,8 +488,9 @@ jobs: - name: Run tests env: TOKENIZERS_PARALLELISM: 'false' + # as confusing as it seems, we skip tests marked as unit here as it means they have been migrated to the right job already run: | - pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not weaviate and not pinecone and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/${{ matrix.folder }} --document_store_type=memory + pytest ${{ env.PYTEST_PARAMS }} -m "not elasticsearch and not faiss and not milvus and not weaviate and not pinecone and not integration and not unit" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/${{ matrix.folder }} --document_store_type=memory - uses: act10ns/slack@v2 with: diff --git a/test/nodes/test_join_answers.py b/test/nodes/test_join_answers.py new file mode 100644 index 000000000..9ba802383 --- /dev/null +++ b/test/nodes/test_join_answers.py @@ -0,0 +1,19 @@ +import pytest + +from haystack.schema import Answer +from haystack.nodes import JoinAnswers + + +@pytest.mark.unit +@pytest.mark.parametrize("join_mode", ["concatenate", "merge"]) +def test_joinanswers(join_mode): + inputs = [{"answers": [Answer(answer="answer 1", score=0.7)]}, {"answers": [Answer(answer="answer 2", score=0.8)]}] + + join_answers = JoinAnswers(join_mode=join_mode) + result, _ = join_answers.run(inputs) + assert len(result["answers"]) == 2 + assert result["answers"] == sorted(result["answers"], reverse=True) + + result, _ = join_answers.run(inputs, top_k_join=1) + assert len(result["answers"]) == 1 + assert result["answers"][0].answer == "answer 2" diff --git a/test/nodes/test_join_documents.py b/test/nodes/test_join_documents.py new file mode 100644 index 000000000..0a8b88a7d --- /dev/null +++ b/test/nodes/test_join_documents.py @@ -0,0 +1,44 @@ +import pytest + + +from haystack import Document +from haystack.nodes.other.join_docs import JoinDocuments + + +@pytest.mark.unit +@pytest.mark.parametrize("join_mode", ["concatenate", "merge", "reciprocal_rank_fusion"]) +def test_joindocuments(join_mode): + inputs = [ + {"documents": [Document(content="text document 1", content_type="text", score=0.2)]}, + {"documents": [Document(content="text document 2", content_type="text", score=0.7)]}, + ] + + join_docs = JoinDocuments(join_mode=join_mode) + result, _ = join_docs.run(inputs) + assert len(result["documents"]) == 2 + assert result["documents"] == sorted(result["documents"], reverse=True) + + result, _ = join_docs.run(inputs, top_k_join=1) + assert len(result["documents"]) == 1 + if join_mode == "reciprocal_rank_fusion": + assert result["documents"][0].content == "text document 1" + else: + assert result["documents"][0].content == "text document 2" + + +@pytest.mark.unit +@pytest.mark.parametrize("join_mode", ["concatenate", "merge", "reciprocal_rank_fusion"]) +@pytest.mark.parametrize("sort_by_score", [True, False]) +def test_joindocuments_score_none(join_mode, sort_by_score): + """Testing JoinDocuments() node when some of the documents have `score=None`""" + inputs = [ + {"documents": [Document(content="text document 1", content_type="text", score=0.2)]}, + {"documents": [Document(content="text document 2", content_type="text", score=None)]}, + ] + + join_docs = JoinDocuments(join_mode=join_mode, sort_by_score=sort_by_score) + result, _ = join_docs.run(inputs) + assert len(result["documents"]) == 2 + + result, _ = join_docs.run(inputs, top_k_join=1) + assert len(result["documents"]) == 1 diff --git a/test/nodes/test_other.py b/test/nodes/test_other.py deleted file mode 100644 index 6afa29a89..000000000 --- a/test/nodes/test_other.py +++ /dev/null @@ -1,101 +0,0 @@ -import pytest -import pandas as pd - -from haystack import Document, Answer -from haystack.nodes.other.route_documents import RouteDocuments -from haystack.nodes.other.join_answers import JoinAnswers -from haystack.nodes.other.join_docs import JoinDocuments - - -# -# RouteDocuments tests -# - - -def test_routedocuments_by_content_type(): - docs = [ - Document(content="text document", content_type="text"), - Document( - content=pd.DataFrame(columns=["col 1", "col 2"], data=[["row 1", "row 1"], ["row 2", "row 2"]]), - content_type="table", - ), - ] - route_documents = RouteDocuments() - result, _ = route_documents.run(documents=docs) - assert len(result["output_1"]) == 1 - assert len(result["output_2"]) == 1 - assert result["output_1"][0].content_type == "text" - assert result["output_2"][0].content_type == "table" - - -def test_routedocuments_by_metafield(docs): - route_documents = RouteDocuments(split_by="meta_field", metadata_values=["test1", "test3", "test5"]) - assert route_documents.outgoing_edges == 3 - result, _ = route_documents.run(docs) - assert len(result["output_1"]) == 1 - assert len(result["output_2"]) == 1 - assert len(result["output_3"]) == 1 - assert result["output_1"][0].meta["meta_field"] == "test1" - assert result["output_2"][0].meta["meta_field"] == "test3" - assert result["output_3"][0].meta["meta_field"] == "test5" - - -# -# JoinAnswers tests -# - - -@pytest.mark.parametrize("join_mode", ["concatenate", "merge"]) -def test_joinanswers(join_mode): - inputs = [{"answers": [Answer(answer="answer 1", score=0.7)]}, {"answers": [Answer(answer="answer 2", score=0.8)]}] - - join_answers = JoinAnswers(join_mode=join_mode) - result, _ = join_answers.run(inputs) - assert len(result["answers"]) == 2 - assert result["answers"] == sorted(result["answers"], reverse=True) - - result, _ = join_answers.run(inputs, top_k_join=1) - assert len(result["answers"]) == 1 - assert result["answers"][0].answer == "answer 2" - - -# -# JoinDocuments tests -# - - -@pytest.mark.parametrize("join_mode", ["concatenate", "merge", "reciprocal_rank_fusion"]) -def test_joindocuments(join_mode): - inputs = [ - {"documents": [Document(content="text document 1", content_type="text", score=0.2)]}, - {"documents": [Document(content="text document 2", content_type="text", score=0.7)]}, - ] - - join_docs = JoinDocuments(join_mode=join_mode) - result, _ = join_docs.run(inputs) - assert len(result["documents"]) == 2 - assert result["documents"] == sorted(result["documents"], reverse=True) - - result, _ = join_docs.run(inputs, top_k_join=1) - assert len(result["documents"]) == 1 - if join_mode == "reciprocal_rank_fusion": - assert result["documents"][0].content == "text document 1" - else: - assert result["documents"][0].content == "text document 2" - - -@pytest.mark.parametrize("join_mode", ["concatenate", "merge", "reciprocal_rank_fusion"]) -@pytest.mark.parametrize("sort_by_score", [True, False]) -def test_joindocuments_score_none(join_mode, sort_by_score): - """Testing JoinDocuments() node when some of the documents have `score=None`""" - inputs = [ - {"documents": [Document(content="text document 1", content_type="text", score=0.2)]}, - {"documents": [Document(content="text document 2", content_type="text", score=None)]}, - ] - - join_docs = JoinDocuments(join_mode=join_mode, sort_by_score=sort_by_score) - result, _ = join_docs.run(inputs) - assert len(result["documents"]) == 2 - - result, _ = join_docs.run(inputs, top_k_join=1) - assert len(result["documents"]) == 1 diff --git a/test/nodes/test_route_documents.py b/test/nodes/test_route_documents.py new file mode 100644 index 000000000..25f60265a --- /dev/null +++ b/test/nodes/test_route_documents.py @@ -0,0 +1,35 @@ +import pytest +import pandas as pd + +from haystack.schema import Document +from haystack.nodes import RouteDocuments + + +@pytest.mark.unit +def test_routedocuments_by_content_type(): + docs = [ + Document(content="text document", content_type="text"), + Document( + content=pd.DataFrame(columns=["col 1", "col 2"], data=[["row 1", "row 1"], ["row 2", "row 2"]]), + content_type="table", + ), + ] + route_documents = RouteDocuments() + result, _ = route_documents.run(documents=docs) + assert len(result["output_1"]) == 1 + assert len(result["output_2"]) == 1 + assert result["output_1"][0].content_type == "text" + assert result["output_2"][0].content_type == "table" + + +@pytest.mark.unit +def test_routedocuments_by_metafield(docs): + route_documents = RouteDocuments(split_by="meta_field", metadata_values=["test1", "test3", "test5"]) + assert route_documents.outgoing_edges == 3 + result, _ = route_documents.run(docs) + assert len(result["output_1"]) == 1 + assert len(result["output_2"]) == 1 + assert len(result["output_3"]) == 1 + assert result["output_1"][0].meta["meta_field"] == "test1" + assert result["output_2"][0].meta["meta_field"] == "test3" + assert result["output_3"][0].meta["meta_field"] == "test5"