import random

import pytest

from haystack import Document, ComponentError
from haystack.components.samplers.top_p import TopPSampler


class TestTopPSampler:
    def test_run_scores_from_metadata(self):
        """
        Test if the component runs correctly with scores already in the metadata.
        """
        sampler = TopPSampler(top_p=0.95, score_field="similarity_score")
        docs = [
            Document(content="Berlin", meta={"similarity_score": -10.6}),
            Document(content="Belgrade", meta={"similarity_score": -8.9}),
            Document(content="Sarajevo", meta={"similarity_score": -4.6}),
        ]
        output = sampler.run(documents=docs)
        docs = output["documents"]
        assert len(docs) == 1
        assert docs[0].content == "Sarajevo"

    def test_run_scores(self):
        """
        Test if the component runs correctly with scores in the Document score field.
        """
        sampler = TopPSampler(top_p=0.99)
        docs = [
            Document(content="Berlin", score=-10.6),
            Document(content="Belgrade", score=-8.9),
            Document(content="Sarajevo", score=-4.6),
        ]

        random.shuffle(docs)
        sorted_scores = sorted([doc.score for doc in docs], reverse=True)

        # top_p = 0.99 will get the top 1 document
        output = sampler.run(documents=docs)
        docs_filtered = output["documents"]
        assert len(docs_filtered) == 1
        assert docs_filtered[0].content == "Sarajevo"

        assert [doc.score for doc in docs_filtered] == sorted_scores[:1]

    def test_run_scores_top_p_1(self):
        """
        Test if the component runs correctly top_p=1.
        """
        sampler = TopPSampler(top_p=1.0)
        docs = [
            Document(content="Berlin", score=-10.6),
            Document(content="Belgrade", score=-8.9),
            Document(content="Sarajevo", score=-4.6),
        ]

        random.shuffle(docs)
        output = sampler.run(documents=docs)
        docs_filtered = output["documents"]
        assert len(docs_filtered) == len(docs)
        assert docs_filtered[0].content == "Sarajevo"

        assert [doc.score for doc in docs_filtered] == sorted([doc.score for doc in docs], reverse=True)

    #  Returns an empty list if no documents are provided

    def test_returns_empty_list_if_no_documents_are_provided(self):
        sampler = TopPSampler()
        output = sampler.run(documents=[])
        assert output["documents"] == []

    def test_run_scores_no_metadata_present(self):
        """
        Test if the component runs correctly with scores missing from the metadata yet being specified in the
        score_field.
        """
        sampler = TopPSampler(top_p=0.95, score_field="similarity_score")
        docs = [
            Document(content="Berlin", score=-10.6),
            Document(content="Belgrade", score=-8.9),
            Document(content="Sarajevo", score=-4.6),
        ]
        with pytest.raises(ComponentError, match="Score field 'similarity_score' not found"):
            sampler.run(documents=docs)