haystack/test/components/eval/test_sas_evaluator.py

import pytest

from haystack.components.eval import SASEvaluator
from haystack.utils.device import ComponentDevice


class TestSASEvaluator:
    def test_init_default(self, monkeypatch):
        monkeypatch.setenv("HF_API_TOKEN", "fake-token")
        labels = ["label1", "label2", "label3"]
        evaluator = SASEvaluator(labels=labels)

        assert evaluator._labels == labels
        assert evaluator._regexes_to_ignore is None
        assert evaluator._ignore_case is False
        assert evaluator._ignore_punctuation is False
        assert evaluator._ignore_numbers is False
        assert evaluator._model == "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
        assert evaluator._batch_size == 32
        assert evaluator._device is None
        assert evaluator._token.resolve_value() == "fake-token"

    def test_to_dict(self, monkeypatch):
        monkeypatch.setenv("HF_API_TOKEN", "fake-token")

        labels = ["label1", "label2", "label3"]
        evaluator = SASEvaluator(labels=labels, device=ComponentDevice.from_str("cuda:0"))

        expected_dict = {
            "type": "haystack.components.eval.sas_evaluator.SASEvaluator",
            "init_parameters": {
                "labels": labels,
                "regexes_to_ignore": None,
                "ignore_case": False,
                "ignore_punctuation": False,
                "ignore_numbers": False,
                "model": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
                "batch_size": 32,
                "device": {"type": "single", "device": "cuda:0"},
                "token": {"type": "env_var", "env_vars": ["HF_API_TOKEN"], "strict": False},
            },
        }
        assert evaluator.to_dict() == expected_dict

    def test_from_dict(self, monkeypatch):
        monkeypatch.setenv("HF_API_TOKEN", "fake-token")
        evaluator = SASEvaluator.from_dict(
            {
                "type": "haystack.components.eval.sas_evaluator.SASEvaluator",
                "init_parameters": {
                    "labels": ["label1", "label2", "label3"],
                    "regexes_to_ignore": None,
                    "ignore_case": False,
                    "ignore_punctuation": False,
                    "ignore_numbers": False,
                    "model": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
                    "batch_size": 32,
                    "device": {"type": "single", "device": "cuda:0"},
                    "token": {"type": "env_var", "env_vars": ["HF_API_TOKEN"], "strict": False},
                },
            }
        )

        assert evaluator._labels == ["label1", "label2", "label3"]
        assert evaluator._regexes_to_ignore is None
        assert evaluator._ignore_case is False
        assert evaluator._ignore_punctuation is False
        assert evaluator._ignore_numbers is False
        assert evaluator._model == "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
        assert evaluator._batch_size == 32
        assert evaluator._device.to_torch_str() == "cuda:0"
        assert evaluator._token.resolve_value() == "fake-token"

    @pytest.mark.integration
    def test_run_with_empty_inputs(self):
        evaluator = SASEvaluator(labels=[])
        result = evaluator.run(predictions=[])
        assert len(result) == 2
        assert result["sas"] == 0.0
        assert result["scores"] == [0.0]

    @pytest.mark.integration
    def test_run_with_different_lengths(self):
        labels = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
        ]
        evaluator = SASEvaluator(labels=labels)

        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power.",
        ]
        with pytest.raises(ValueError):
            evaluator.run(predictions)

    @pytest.mark.integration
    def test_run_with_matching_predictions(self):
        labels = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power.",
        ]
        evaluator = SASEvaluator(labels=labels)
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)

        assert len(result) == 2
        assert result["sas"] == pytest.approx(1.0)
        assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])

    @pytest.mark.integration
    def test_run_with_single_prediction(self):
        labels = ["US $2.3 billion"]
        evaluator = SASEvaluator(labels=labels)

        result = evaluator.run(predictions=["A construction budget of US $2.3 billion"])
        assert len(result) == 2
        assert result["sas"] == pytest.approx(0.689089, abs=1e-5)
        assert result["scores"] == pytest.approx([0.689089], abs=1e-5)

    @pytest.mark.integration
    def test_run_with_mismatched_predictions(self):
        labels = [
            "US $2.3 billion",
            "Paris's cultural magnificence is symbolized by the Eiffel Tower",
            "Japan was transformed into a modernized world power after the Meiji Restoration.",
        ]
        evaluator = SASEvaluator(labels=labels)
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)
        assert len(result) == 2
        assert result["sas"] == pytest.approx(0.8227189)
        assert result["scores"] == pytest.approx([0.689089, 0.870389, 0.908679], abs=1e-5)

    @pytest.mark.integration
    def test_run_with_ignore_case(self):
        labels = [
            "A construction budget of US $2.3 BILLION",
            "The EIFFEL TOWER, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The MEIJI RESTORATION in 1868 transformed Japan into a modernized world power.",
        ]
        evaluator = SASEvaluator(labels=labels, ignore_case=True)
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)
        assert len(result) == 2
        assert result["sas"] == pytest.approx(1.0)
        assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])

    @pytest.mark.integration
    def test_run_with_ignore_punctuation(self):
        labels = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower completed in 1889 symbolizes Paris's cultural magnificence",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power",
        ]
        evaluator = SASEvaluator(labels=labels, ignore_punctuation=True)
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)
        assert len(result) == 2
        assert result["sas"] == pytest.approx(1.0)
        assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])

    @pytest.mark.integration
    def test_run_with_ignore_numbers(self):
        labels = [
            "A construction budget of US $10.3 billion",
            "The Eiffel Tower, completed in 2005, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration, in 1989, transformed Japan into a modernized world power.",
        ]
        evaluator = SASEvaluator(labels=labels, ignore_numbers=True)
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)
        assert result["sas"] == pytest.approx(1.0)
        assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])

    @pytest.mark.integration
    def test_run_with_regex_to_ignore(self):
        labels = [
            "A construction budget of US $10.3 billion",
            "The Eiffel Tower, completed in 2005, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration, in 1989, transformed Japan into a modernized world power.",
        ]
        evaluator = SASEvaluator(labels=labels, regexes_to_ignore=[r"\d+"])
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)
        assert len(result) == 2
        assert result["sas"] == pytest.approx(1.0)
        assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])

    @pytest.mark.integration
    def test_run_with_multiple_regex_to_ignore(self):
        labels = [
            "A construction budget of US $10.3 billion",
            "The Eiffel Tower, completed in 2005, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration, in 1989, transformed Japan into a modernized world power.",
        ]
        evaluator = SASEvaluator(labels=labels, regexes_to_ignore=[r"\d+", r"[^\w\s]"])
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)
        assert len(result) == 2
        assert result["sas"] == pytest.approx(1.0)
        assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])

    @pytest.mark.integration
    def test_run_with_multiple_ignore_parameters(self):
        labels = [
            "A construction budget of US $10.3 billion",
            "The Eiffel Tower, completed in 2005, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration, in 1989, transformed Japan into a modernized world power.",
        ]
        evaluator = SASEvaluator(
            labels=labels,
            ignore_numbers=True,
            ignore_punctuation=True,
            ignore_case=True,
            regexes_to_ignore=[r"[^\w\s\d]+"],
        )
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)
        assert len(result) == 2
        assert result["sas"] == pytest.approx(1.0)
        assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])

    @pytest.mark.integration
    def test_run_with_bi_encoder_model(self):
        labels = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power.",
        ]
        evaluator = SASEvaluator(labels=labels, model="sentence-transformers/all-mpnet-base-v2")
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)
        assert len(result) == 2
        assert result["sas"] == pytest.approx(1.0)
        assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])

    @pytest.mark.integration
    def test_run_with_cross_encoder_model(self):
        labels = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power.",
        ]
        evaluator = SASEvaluator(labels=labels, model="cross-encoder/ms-marco-MiniLM-L-6-v2")
        predictions = [
            "A construction budget of US $2.3 billion",
            "The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",
            "The Meiji Restoration in 1868 transformed Japan into a modernized world power.",
        ]
        result = evaluator.run(predictions=predictions)
        assert len(result) == 2
        assert result["sas"] == pytest.approx(0.999967, abs=1e-5)
        assert result["scores"] == pytest.approx([0.9999765157699585, 0.999968409538269, 0.9999572038650513], abs=1e-5)
feat: Add `SASEvaluator` component (#6980) * Add SASEvaluator component * Add release notes * Delete old tests * Remove SAS metric in old API * Avoid importing whole numpy package 2024-02-14 16:16:22 +01:00			`import pytest`

			`from haystack.components.eval import SASEvaluator`
			`from haystack.utils.device import ComponentDevice`


			`class TestSASEvaluator:`
			`def test_init_default(self, monkeypatch):`
			`monkeypatch.setenv("HF_API_TOKEN", "fake-token")`
			`labels = ["label1", "label2", "label3"]`
			`evaluator = SASEvaluator(labels=labels)`

			`assert evaluator._labels == labels`
			`assert evaluator._regexes_to_ignore is None`
			`assert evaluator._ignore_case is False`
			`assert evaluator._ignore_punctuation is False`
			`assert evaluator._ignore_numbers is False`
			`assert evaluator._model == "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"`
			`assert evaluator._batch_size == 32`
			`assert evaluator._device is None`
			`assert evaluator._token.resolve_value() == "fake-token"`

			`def test_to_dict(self, monkeypatch):`
			`monkeypatch.setenv("HF_API_TOKEN", "fake-token")`

			`labels = ["label1", "label2", "label3"]`
			`evaluator = SASEvaluator(labels=labels, device=ComponentDevice.from_str("cuda:0"))`

			`expected_dict = {`
			`"type": "haystack.components.eval.sas_evaluator.SASEvaluator",`
			`"init_parameters": {`
			`"labels": labels,`
			`"regexes_to_ignore": None,`
			`"ignore_case": False,`
			`"ignore_punctuation": False,`
			`"ignore_numbers": False,`
			`"model": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",`
			`"batch_size": 32,`
			`"device": {"type": "single", "device": "cuda:0"},`
			`"token": {"type": "env_var", "env_vars": ["HF_API_TOKEN"], "strict": False},`
			`},`
			`}`
			`assert evaluator.to_dict() == expected_dict`

			`def test_from_dict(self, monkeypatch):`
			`monkeypatch.setenv("HF_API_TOKEN", "fake-token")`
			`evaluator = SASEvaluator.from_dict(`
			`{`
			`"type": "haystack.components.eval.sas_evaluator.SASEvaluator",`
			`"init_parameters": {`
			`"labels": ["label1", "label2", "label3"],`
			`"regexes_to_ignore": None,`
			`"ignore_case": False,`
			`"ignore_punctuation": False,`
			`"ignore_numbers": False,`
			`"model": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",`
			`"batch_size": 32,`
			`"device": {"type": "single", "device": "cuda:0"},`
			`"token": {"type": "env_var", "env_vars": ["HF_API_TOKEN"], "strict": False},`
			`},`
			`}`
			`)`

			`assert evaluator._labels == ["label1", "label2", "label3"]`
			`assert evaluator._regexes_to_ignore is None`
			`assert evaluator._ignore_case is False`
			`assert evaluator._ignore_punctuation is False`
			`assert evaluator._ignore_numbers is False`
			`assert evaluator._model == "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"`
			`assert evaluator._batch_size == 32`
			`assert evaluator._device.to_torch_str() == "cuda:0"`
			`assert evaluator._token.resolve_value() == "fake-token"`

			`@pytest.mark.integration`
			`def test_run_with_empty_inputs(self):`
			`evaluator = SASEvaluator(labels=[])`
			`result = evaluator.run(predictions=[])`
			`assert len(result) == 2`
			`assert result["sas"] == 0.0`
			`assert result["scores"] == [0.0]`

			`@pytest.mark.integration`
			`def test_run_with_different_lengths(self):`
			`labels = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`]`
			`evaluator = SASEvaluator(labels=labels)`

			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power.",`
			`]`
			`with pytest.raises(ValueError):`
			`evaluator.run(predictions)`

			`@pytest.mark.integration`
			`def test_run_with_matching_predictions(self):`
			`labels = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power.",`
			`]`
			`evaluator = SASEvaluator(labels=labels)`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`

			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(1.0)`
			`assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])`

			`@pytest.mark.integration`
			`def test_run_with_single_prediction(self):`
			`labels = ["US $2.3 billion"]`
			`evaluator = SASEvaluator(labels=labels)`

			`result = evaluator.run(predictions=["A construction budget of US $2.3 billion"])`
			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(0.689089, abs=1e-5)`
			`assert result["scores"] == pytest.approx([0.689089], abs=1e-5)`

			`@pytest.mark.integration`
			`def test_run_with_mismatched_predictions(self):`
			`labels = [`
			`"US $2.3 billion",`
			`"Paris's cultural magnificence is symbolized by the Eiffel Tower",`
			`"Japan was transformed into a modernized world power after the Meiji Restoration.",`
			`]`
			`evaluator = SASEvaluator(labels=labels)`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`
			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(0.8227189)`
			`assert result["scores"] == pytest.approx([0.689089, 0.870389, 0.908679], abs=1e-5)`

			`@pytest.mark.integration`
			`def test_run_with_ignore_case(self):`
			`labels = [`
			`"A construction budget of US $2.3 BILLION",`
			`"The EIFFEL TOWER, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The MEIJI RESTORATION in 1868 transformed Japan into a modernized world power.",`
			`]`
			`evaluator = SASEvaluator(labels=labels, ignore_case=True)`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`
			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(1.0)`
			`assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])`

			`@pytest.mark.integration`
			`def test_run_with_ignore_punctuation(self):`
			`labels = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower completed in 1889 symbolizes Paris's cultural magnificence",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power",`
			`]`
			`evaluator = SASEvaluator(labels=labels, ignore_punctuation=True)`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`
			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(1.0)`
			`assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])`

			`@pytest.mark.integration`
			`def test_run_with_ignore_numbers(self):`
			`labels = [`
			`"A construction budget of US $10.3 billion",`
			`"The Eiffel Tower, completed in 2005, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration, in 1989, transformed Japan into a modernized world power.",`
			`]`
			`evaluator = SASEvaluator(labels=labels, ignore_numbers=True)`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`
			`assert result["sas"] == pytest.approx(1.0)`
			`assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])`

			`@pytest.mark.integration`
			`def test_run_with_regex_to_ignore(self):`
			`labels = [`
			`"A construction budget of US $10.3 billion",`
			`"The Eiffel Tower, completed in 2005, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration, in 1989, transformed Japan into a modernized world power.",`
			`]`
			`evaluator = SASEvaluator(labels=labels, regexes_to_ignore=[r"\d+"])`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`
			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(1.0)`
			`assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])`

			`@pytest.mark.integration`
			`def test_run_with_multiple_regex_to_ignore(self):`
			`labels = [`
			`"A construction budget of US $10.3 billion",`
			`"The Eiffel Tower, completed in 2005, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration, in 1989, transformed Japan into a modernized world power.",`
			`]`
			`evaluator = SASEvaluator(labels=labels, regexes_to_ignore=[r"\d+", r"[^\w\s]"])`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`
			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(1.0)`
			`assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])`

			`@pytest.mark.integration`
			`def test_run_with_multiple_ignore_parameters(self):`
			`labels = [`
			`"A construction budget of US $10.3 billion",`
			`"The Eiffel Tower, completed in 2005, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration, in 1989, transformed Japan into a modernized world power.",`
			`]`
			`evaluator = SASEvaluator(`
			`labels=labels,`
			`ignore_numbers=True,`
			`ignore_punctuation=True,`
			`ignore_case=True,`
			`regexes_to_ignore=[r"[^\w\s\d]+"],`
			`)`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration, in 1868, transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`
			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(1.0)`
			`assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])`

			`@pytest.mark.integration`
			`def test_run_with_bi_encoder_model(self):`
			`labels = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power.",`
			`]`
			`evaluator = SASEvaluator(labels=labels, model="sentence-transformers/all-mpnet-base-v2")`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`
			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(1.0)`
			`assert result["scores"] == pytest.approx([1.0, 1.0, 1.0])`

			`@pytest.mark.integration`
			`def test_run_with_cross_encoder_model(self):`
			`labels = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power.",`
			`]`
			`evaluator = SASEvaluator(labels=labels, model="cross-encoder/ms-marco-MiniLM-L-6-v2")`
			`predictions = [`
			`"A construction budget of US $2.3 billion",`
			`"The Eiffel Tower, completed in 1889, symbolizes Paris's cultural magnificence.",`
			`"The Meiji Restoration in 1868 transformed Japan into a modernized world power.",`
			`]`
			`result = evaluator.run(predictions=predictions)`
			`assert len(result) == 2`
			`assert result["sas"] == pytest.approx(0.999967, abs=1e-5)`
			`assert result["scores"] == pytest.approx([0.9999765157699585, 0.999968409538269, 0.9999572038650513], abs=1e-5)`