mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-27 10:49:52 +00:00

* Unify CI tests (from #2466) * Update Documentation & Code Style * Change folder names * Fix markers list * Remove marker 'slow', replaced with 'integration' * Soften children check * Start ES first so it has time to boot while Python is setup * Run the full workflow * Try to make pip upgrade on Windows * Set KG tests as integration * Update Documentation & Code Style * typo * faster pylint * Make Pylint use the cache * filter diff files for pylint * debug pylint statement * revert pylint changes * Remove path from asserted log (fails on Windows) * Skip preprocessor test on Windows * Tackling Windows specific failures * Fix pytest command for windows suites * Remove \ from command * Move poppler test into integration * Skip opensearch test on windows * Add tolerance in reader sas score for Windows * Another pytorch approx * Raise time limit for unit tests :( * Skip poppler test on Windows CI * Specify to pull with FF only in docs check * temporarily run the docs check immediately * Allow merge commit for now * Try without fetch depth * Accelerating test * Accelerating test * Add repository and ref alongside fetch-depth * Separate out code&docs check from tests * Use setup-python cache * Delete custom action * Remove the pull step in the docs check, will find a way to run on bot commits * Add requirements.txt in .github for caching * Actually install dependencies * Change deps group for pylint * Unclear why the requirements.txt is still required :/ * Fix the code check python setup * Install all deps for pylint * Make the autoformat check depend on tests and doc updates workflows * Try installing dependencies in another order * Try again to install the deps * quoting the paths * Ad back the requirements * Try again to install rest_api and ui * Change deps group * Duplicate haystack install line * See if the cache is the problem * Disable also in mypy, who knows * split the install step * Split install step everywhere * Revert "Separate out code&docs check from tests" This reverts commit 1cd59b15ffc5b984e1d642dcbf4c8ccc2bb6c9bd. * Add back the action * Proactive support for audio (see text2speech branch) * Fix label generator tests * Remove install of libsndfile1 on win temporarily * exclude audio tests on win * install ffmpeg for integration tests Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
121 lines
4.7 KiB
Python
121 lines
4.7 KiB
Python
import pytest
|
|
|
|
from haystack.schema import Document
|
|
from haystack.nodes.document_classifier.base import BaseDocumentClassifier
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_document_classifier(document_classifier):
|
|
assert isinstance(document_classifier, BaseDocumentClassifier)
|
|
|
|
docs = [
|
|
Document(
|
|
content="""That's good. I like it.""" * 700, # extra long text to check truncation
|
|
meta={"name": "0"},
|
|
id="1",
|
|
),
|
|
Document(content="""That's bad. I don't like it.""", meta={"name": "1"}, id="2"),
|
|
]
|
|
results = document_classifier.predict(documents=docs)
|
|
expected_labels = ["joy", "sadness"]
|
|
for i, doc in enumerate(results):
|
|
assert doc.to_dict()["meta"]["classification"]["label"] == expected_labels[i]
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_document_classifier_batch_single_doc_list(document_classifier):
|
|
docs = [
|
|
Document(content="""That's good. I like it.""", meta={"name": "0"}, id="1"),
|
|
Document(content="""That's bad. I don't like it.""", meta={"name": "1"}, id="2"),
|
|
]
|
|
results = document_classifier.predict_batch(documents=docs)
|
|
expected_labels = ["joy", "sadness"]
|
|
for i, doc in enumerate(results):
|
|
assert doc.to_dict()["meta"]["classification"]["label"] == expected_labels[i]
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_document_classifier_batch_multiple_doc_lists(document_classifier):
|
|
docs = [
|
|
Document(content="""That's good. I like it.""", meta={"name": "0"}, id="1"),
|
|
Document(content="""That's bad. I don't like it.""", meta={"name": "1"}, id="2"),
|
|
]
|
|
results = document_classifier.predict_batch(documents=[docs, docs])
|
|
assert len(results) == 2 # 2 Document lists
|
|
expected_labels = ["joy", "sadness"]
|
|
for i, doc in enumerate(results[0]):
|
|
assert doc.to_dict()["meta"]["classification"]["label"] == expected_labels[i]
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_zero_shot_document_classifier(zero_shot_document_classifier):
|
|
assert isinstance(zero_shot_document_classifier, BaseDocumentClassifier)
|
|
|
|
docs = [
|
|
Document(
|
|
content="""That's good. I like it.""" * 700, # extra long text to check truncation
|
|
meta={"name": "0"},
|
|
id="1",
|
|
),
|
|
Document(content="""That's bad. I don't like it.""", meta={"name": "1"}, id="2"),
|
|
]
|
|
results = zero_shot_document_classifier.predict(documents=docs)
|
|
expected_labels = ["positive", "negative"]
|
|
for i, doc in enumerate(results):
|
|
assert doc.to_dict()["meta"]["classification"]["label"] == expected_labels[i]
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_document_classifier_batch_size(batched_document_classifier):
|
|
assert isinstance(batched_document_classifier, BaseDocumentClassifier)
|
|
|
|
docs = [
|
|
Document(
|
|
content="""That's good. I like it.""" * 700, # extra long text to check truncation
|
|
meta={"name": "0"},
|
|
id="1",
|
|
),
|
|
Document(content="""That's bad. I don't like it.""", meta={"name": "1"}, id="2"),
|
|
]
|
|
results = batched_document_classifier.predict(documents=docs)
|
|
expected_labels = ["joy", "sadness"]
|
|
for i, doc in enumerate(results):
|
|
assert doc.to_dict()["meta"]["classification"]["label"] == expected_labels[i]
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_document_classifier_as_index_node(indexing_document_classifier):
|
|
assert isinstance(indexing_document_classifier, BaseDocumentClassifier)
|
|
|
|
docs = [
|
|
{
|
|
"content": """That's good. I like it.""" * 700, # extra long text to check truncation
|
|
"meta": {"name": "0"},
|
|
"id": "1",
|
|
"class_field": "That's bad.",
|
|
},
|
|
{"content": """That's bad. I like it.""", "meta": {"name": "1"}, "id": "2", "class_field": "That's good."},
|
|
]
|
|
output, output_name = indexing_document_classifier.run(documents=docs, root_node="File")
|
|
expected_labels = ["sadness", "joy"]
|
|
for i, doc in enumerate(output["documents"]):
|
|
assert doc["meta"]["classification"]["label"] == expected_labels[i]
|
|
|
|
|
|
@pytest.mark.integration
|
|
def test_document_classifier_as_query_node(document_classifier):
|
|
assert isinstance(document_classifier, BaseDocumentClassifier)
|
|
|
|
docs = [
|
|
Document(
|
|
content="""That's good. I like it.""" * 700, # extra long text to check truncation
|
|
meta={"name": "0"},
|
|
id="1",
|
|
),
|
|
Document(content="""That's bad. I don't like it.""", meta={"name": "1"}, id="2"),
|
|
]
|
|
output, output_name = document_classifier.run(documents=docs, root_node="Query")
|
|
expected_labels = ["joy", "sadness"]
|
|
for i, doc in enumerate(output["documents"]):
|
|
assert doc.to_dict()["meta"]["classification"]["label"] == expected_labels[i]
|