test: mark more tests as slow (#9296)

* test: mark tests as slow

* alphabetical order; install xet

* revert pyproject

* Trigger Build

* simplify tests as suggested

* add comment to workflow
This commit is contained in:
Stefano Fiorucci 2025-04-24 10:25:13 +02:00 committed by GitHub
parent df662daaef
commit e3d4e21237
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 89 additions and 64 deletions

View File

@ -42,6 +42,7 @@ jobs:
runs-on: ubuntu-latest
permissions:
pull-requests: read
# Specifying outputs is not needed to make the job work, but only to comply with actionlint
outputs:
changes: ${{ steps.changes.outputs.changes }}
steps:
@ -54,9 +55,38 @@ jobs:
filters: |
changes:
- "haystack/components/audio/whisper_local.py"
- "haystack/components/classifiers/zero_shot_document_classifier.py"
- "haystack/components/converters/tika.py"
- "test/components/converters/test_tika_doc_converter.py"
- "haystack/components/embedders/hugging_face_api_document_embedder.py"
- "haystack/components/embedders/hugging_face_api_text_embedder.py"
- "haystack/components/embedders/sentence_transformers_text_embedder.py"
- "haystack/components/evaluators/sas_evaluator.py"
- "haystack/components/generators/chat/hugging_face_api.py"
- "haystack/components/generators/chat/hugging_face_local.py"
- "haystack/components/generators/hugging_face_api.py"
- "haystack/components/generators/hugging_face_local_generator.py"
- "haystack/components/rankers/sentence_transformers_diversity.py"
- "haystack/components/rankers/transformers_similarity.py"
- "haystack/components/readers/extractive.py"
- "haystack/components/routers/transformers_text_router.py"
- "haystack/components/routers/zero_shot_text_router.py"
- "test/components/audio/test_whisper_local.py"
- "test/components/classifiers/test_zero_shot_document_classifier.py"
- "test/components/converters/test_tika_doc_converter.py"
- "test/components/embedders/test_hugging_face_api_document_embedder.py"
- "test/components/embedders/test_hugging_face_api_text_embedder.py"
- "test/components/embedders/test_sentence_transformers_text_embedder.py"
- "test/components/evaluators/test_sas_evaluator.py"
- "test/components/generators/chat/test_hugging_face_api.py"
- "test/components/generators/chat/test_hugging_face_local.py"
- "test/components/generators/test_hugging_face_api.py"
- "test/components/generators/test_hugging_face_local_generator.py"
- "test/components/rankers/test_sentence_transformers_diversity.py"
- "test/components/rankers/test_transformers_similarity.py"
- "test/components/readers/test_extractive.py"
- "test/components/routers/test_transformers_text_router.py"
- "test/components/routers/test_zero_shot_text_router.py"
slow-integration-tests:
name: Slow Tests / ${{ matrix.os }}

View File

@ -137,6 +137,7 @@ class TestTransformersZeroShotDocumentClassifier:
assert result["documents"][1].to_dict()["classification"]["label"] == "negative"
@pytest.mark.integration
@pytest.mark.slow
def test_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
component = TransformersZeroShotDocumentClassifier(

View File

@ -371,6 +371,7 @@ class TestHuggingFaceAPIDocumentEmbedder:
@pytest.mark.flaky(reruns=5, reruns_delay=5)
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",

View File

@ -215,6 +215,7 @@ class TestHuggingFaceAPITextEmbedder:
@pytest.mark.flaky(reruns=5, reruns_delay=5)
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
@ -238,6 +239,7 @@ class TestHuggingFaceAPITextEmbedderAsync:
@pytest.mark.integration
@pytest.mark.asyncio
@pytest.mark.slow
@pytest.mark.skipif(os.environ.get("HF_API_TOKEN", "") == "", reason="HF_API_TOKEN is not set")
async def test_run_async_with_real_api(self):
"""
@ -289,6 +291,7 @@ class TestHuggingFaceAPITextEmbedderAsync:
@pytest.mark.integration
@pytest.mark.asyncio
@pytest.mark.slow
@pytest.mark.skipif(os.environ.get("HF_API_TOKEN", "") == "", reason="HF_API_TOKEN is not set")
async def test_run_async_concurrent_requests(self):
"""

View File

@ -267,6 +267,7 @@ class TestSentenceTransformersTextEmbedder:
embedder.run(text=list_integers_input)
@pytest.mark.integration
@pytest.mark.slow
def test_run_trunc(self, monkeypatch):
"""
sentence-transformers/paraphrase-albert-small-v2 maps sentences & paragraphs to a 768 dimensional dense vector space
@ -289,6 +290,7 @@ class TestSentenceTransformersTextEmbedder:
assert len(embedding_trunc) == 128
@pytest.mark.integration
@pytest.mark.slow
def test_run_quantization(self):
"""
sentence-transformers/paraphrase-albert-small-v2 maps sentences & paragraphs to a 768 dimensional dense vector space

View File

@ -104,6 +104,7 @@ class TestSASEvaluator:
evaluator.run(ground_truth_answers=ground_truths, predicted_answers=predictions)
@pytest.mark.integration
@pytest.mark.slow
def test_run_with_matching_predictions(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator()
@ -125,6 +126,7 @@ class TestSASEvaluator:
assert result["individual_scores"] == pytest.approx([1.0, 1.0, 1.0])
@pytest.mark.integration
@pytest.mark.slow
def test_run_with_single_prediction(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator()
@ -139,6 +141,7 @@ class TestSASEvaluator:
assert result["individual_scores"] == pytest.approx([0.689089], abs=1e-5)
@pytest.mark.integration
@pytest.mark.slow
def test_run_with_mismatched_predictions(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator()
@ -159,6 +162,7 @@ class TestSASEvaluator:
assert result["individual_scores"] == pytest.approx([0.689089, 0.870389, 0.908679], abs=1e-5)
@pytest.mark.integration
@pytest.mark.slow
def test_run_with_bi_encoder_model(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator(model="sentence-transformers/all-mpnet-base-v2")
@ -179,6 +183,7 @@ class TestSASEvaluator:
assert result["individual_scores"] == pytest.approx([1.0, 1.0, 1.0])
@pytest.mark.integration
@pytest.mark.slow
def test_run_with_cross_encoder_model(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator(model="cross-encoder/ms-marco-MiniLM-L-6-v2")

View File

@ -574,6 +574,7 @@ class TestHuggingFaceAPIChatGenerator:
}
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
@ -602,6 +603,7 @@ class TestHuggingFaceAPIChatGenerator:
assert "completion_tokens" in response["replies"][0].meta["usage"]
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
@ -635,6 +637,7 @@ class TestHuggingFaceAPIChatGenerator:
assert "completion_tokens" in response_meta["usage"]
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
@ -829,6 +832,7 @@ class TestHuggingFaceAPIChatGenerator:
}
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",

View File

@ -345,6 +345,7 @@ class TestHuggingFaceLocalChatGenerator:
mock_convert.assert_any_call(messages[1])
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.flaky(reruns=3, reruns_delay=10)
def test_live_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811

View File

@ -291,6 +291,7 @@ class TestHuggingFaceAPIGenerator:
@pytest.mark.flaky(reruns=5, reruns_delay=5)
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
@ -322,6 +323,7 @@ class TestHuggingFaceAPIGenerator:
@pytest.mark.flaky(reruns=5, reruns_delay=5)
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",

View File

@ -454,6 +454,7 @@ class TestHuggingFaceLocalGenerator:
assert criteria(generated_text_ids, scores=None) is True
@pytest.mark.integration
@pytest.mark.slow
def test_hf_pipeline_runs_with_our_criteria(self, monkeypatch):
"""Test that creating our own StopWordsCriteria and passing it to a Huggingface pipeline works."""
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
@ -467,6 +468,7 @@ class TestHuggingFaceLocalGenerator:
@pytest.mark.integration
@pytest.mark.flaky(reruns=3, reruns_delay=10)
@pytest.mark.slow
def test_live_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
llm = HuggingFaceLocalGenerator(model="Qwen/Qwen2.5-0.5B-Instruct", generation_kwargs={"max_new_tokens": 50})

View File

@ -577,6 +577,7 @@ class TestSentenceTransformersDiversityRanker:
assert Pipeline.loads(pipe_serialized) == pipe
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
def test_run(self, similarity, monkeypatch):
"""
@ -605,6 +606,7 @@ class TestSentenceTransformersDiversityRanker:
assert ranked_order == expected_order
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
def test_run_real_world_use_case(self, similarity, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
@ -678,6 +680,7 @@ class TestSentenceTransformersDiversityRanker:
assert result_content == expected_content
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
def test_run_with_maximum_margin_relevance_strategy(self, similarity, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811

View File

@ -336,35 +336,19 @@ class TestSimilarityRanker:
assert ranker.device == ComponentDevice.from_multiple(DeviceMap.from_hf({"layer_1": 1, "classifier": "cpu"}))
@pytest.mark.integration
@pytest.mark.parametrize(
"query,docs_before_texts,expected_first_text,scores",
[
(
"City in Bosnia and Herzegovina",
["Berlin", "Belgrade", "Sarajevo"],
"Sarajevo",
[2.2864143829792738e-05, 0.00012495707778725773, 0.009869757108390331],
),
(
"Machine learning",
["Python", "Bakery in Paris", "Tesla Giga Berlin"],
"Python",
[1.9063229046878405e-05, 1.434577916370472e-05, 1.3049247172602918e-05],
),
(
"Cubist movement",
["Nirvana", "Pablo Picasso", "Coffee"],
"Pablo Picasso",
[1.3313065210240893e-05, 9.90335684036836e-05, 1.3518535524781328e-05],
),
],
)
def test_run(self, query, docs_before_texts, expected_first_text, scores):
@pytest.mark.slow
def test_run(self):
"""
Test if the component ranks documents correctly.
"""
ranker = TransformersSimilarityRanker(model="cross-encoder/ms-marco-MiniLM-L-6-v2")
ranker.warm_up()
query = "City in Bosnia and Herzegovina"
docs_before_texts = ["Berlin", "Belgrade", "Sarajevo"]
expected_first_text = "Sarajevo"
expected_scores = [2.2864143829792738e-05, 0.00012495707778725773, 0.009869757108390331]
docs_before = [Document(content=text) for text in docs_before_texts]
output = ranker.run(query=query, documents=docs_before)
docs_after = output["documents"]
@ -372,41 +356,25 @@ class TestSimilarityRanker:
assert len(docs_after) == 3
assert docs_after[0].content == expected_first_text
sorted_scores = sorted(scores, reverse=True)
sorted_scores = sorted(expected_scores, reverse=True)
assert docs_after[0].score == pytest.approx(sorted_scores[0], abs=1e-6)
assert docs_after[1].score == pytest.approx(sorted_scores[1], abs=1e-6)
assert docs_after[2].score == pytest.approx(sorted_scores[2], abs=1e-6)
@pytest.mark.integration
@pytest.mark.parametrize(
"query,docs_before_texts,expected_first_text,scores",
[
(
"City in Bosnia and Herzegovina",
["Berlin", "Belgrade", "Sarajevo"],
"Sarajevo",
[2.2864143829792738e-05, 0.00012495707778725773, 0.009869757108390331],
),
(
"Machine learning",
["Python", "Bakery in Paris", "Tesla Giga Berlin"],
"Python",
[1.9063229046878405e-05, 1.434577916370472e-05, 1.3049247172602918e-05],
),
(
"Cubist movement",
["Nirvana", "Pablo Picasso", "Coffee"],
"Pablo Picasso",
[1.3313065210240893e-05, 9.90335684036836e-05, 1.3518535524781328e-05],
),
],
)
def test_run_small_batch_size(self, query, docs_before_texts, expected_first_text, scores):
@pytest.mark.slow
def test_run_small_batch_size(self):
"""
Test if the component ranks documents correctly.
"""
ranker = TransformersSimilarityRanker(model="cross-encoder/ms-marco-MiniLM-L-6-v2", batch_size=2)
ranker.warm_up()
query = "City in Bosnia and Herzegovina"
docs_before_texts = ["Berlin", "Belgrade", "Sarajevo"]
expected_first_text = "Sarajevo"
expected_scores = [2.2864143829792738e-05, 0.00012495707778725773, 0.009869757108390331]
docs_before = [Document(content=text) for text in docs_before_texts]
output = ranker.run(query=query, documents=docs_before)
docs_after = output["documents"]
@ -414,41 +382,37 @@ class TestSimilarityRanker:
assert len(docs_after) == 3
assert docs_after[0].content == expected_first_text
sorted_scores = sorted(scores, reverse=True)
sorted_scores = sorted(expected_scores, reverse=True)
assert docs_after[0].score == pytest.approx(sorted_scores[0], abs=1e-6)
assert docs_after[1].score == pytest.approx(sorted_scores[1], abs=1e-6)
assert docs_after[2].score == pytest.approx(sorted_scores[2], abs=1e-6)
# Returns an empty list if no documents are provided
@pytest.mark.integration
def test_returns_empty_list_if_no_documents_are_provided(self):
sampler = TransformersSimilarityRanker()
sampler.warm_up()
sampler.model = MagicMock()
output = sampler.run(query="City in Germany", documents=[])
assert not output["documents"]
# Raises ComponentError if model is not warmed up
@pytest.mark.integration
def test_raises_component_error_if_model_not_warmed_up(self):
sampler = TransformersSimilarityRanker()
with pytest.raises(RuntimeError):
sampler.run(query="query", documents=[Document(content="document")])
@pytest.mark.integration
@pytest.mark.parametrize(
"query,docs_before_texts,expected_first_text",
[
("City in Bosnia and Herzegovina", ["Berlin", "Belgrade", "Sarajevo"], "Sarajevo"),
("Machine learning", ["Python", "Bakery in Paris", "Tesla Giga Berlin"], "Python"),
("Cubist movement", ["Nirvana", "Pablo Picasso", "Coffee"], "Pablo Picasso"),
],
)
def test_run_top_k(self, query, docs_before_texts, expected_first_text):
@pytest.mark.slow
def test_run_top_k(self):
"""
Test if the component ranks documents correctly with a custom top_k.
"""
ranker = TransformersSimilarityRanker(model="cross-encoder/ms-marco-MiniLM-L-6-v2", top_k=2)
ranker.warm_up()
query = "City in Bosnia and Herzegovina"
docs_before_texts = ["Berlin", "Belgrade", "Sarajevo"]
expected_first_text = "Sarajevo"
docs_before = [Document(content=text) for text in docs_before_texts]
output = ranker.run(query=query, documents=docs_before)
docs_after = output["documents"]
@ -460,6 +424,7 @@ class TestSimilarityRanker:
assert [doc.score for doc in docs_after] == sorted_scores
@pytest.mark.integration
@pytest.mark.slow
def test_run_single_document(self):
"""
Test if the component runs with a single document.

View File

@ -776,6 +776,7 @@ class TestDeduplication:
@pytest.mark.integration
@pytest.mark.slow
def test_t5(monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
reader = ExtractiveReader("sjrhuschlee/flan-t5-base-squad2")
@ -801,6 +802,7 @@ def test_t5(monkeypatch):
@pytest.mark.integration
@pytest.mark.slow
def test_roberta(monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
reader = ExtractiveReader("deepset/tinyroberta-squad2")
@ -831,6 +833,7 @@ def test_roberta(monkeypatch):
@pytest.mark.integration
@pytest.mark.slow
def test_matches_hf_pipeline(monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
reader = ExtractiveReader(

View File

@ -172,6 +172,7 @@ class TestTransformersTextRouter:
assert out == {"en": "What is the color of the sky?"}
@pytest.mark.integration
@pytest.mark.slow
def test_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
router = TransformersTextRouter(model="papluca/xlm-roberta-base-language-detection")
@ -203,6 +204,7 @@ class TestTransformersTextRouter:
assert out == {"en": "What is the color of the sky?"}
@pytest.mark.integration
@pytest.mark.slow
def test_wrong_labels(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
router = TransformersTextRouter(model="papluca/xlm-roberta-base-language-detection", labels=["en", "de"])

View File

@ -106,6 +106,7 @@ class TestTransformersZeroShotTextRouter:
assert out == {"query": "What is the color of the sky?"}
@pytest.mark.integration
@pytest.mark.slow
def test_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
router = TransformersZeroShotTextRouter(labels=["query", "passage"])