diff --git a/haystack/components/evaluators/context_relevance.py b/haystack/components/evaluators/context_relevance.py index 9988bdeb0..9bd299bbc 100644 --- a/haystack/components/evaluators/context_relevance.py +++ b/haystack/components/evaluators/context_relevance.py @@ -67,6 +67,7 @@ class ContextRelevanceEvaluator(LLMEvaluator): def __init__( self, examples: Optional[List[Dict[str, Any]]] = None, + progress_bar: bool = True, api: str = "openai", api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), ): @@ -89,12 +90,13 @@ class ContextRelevanceEvaluator(LLMEvaluator): "statement_scores": [1], }, }] + :param progress_bar: + Whether to show a progress bar during the evaluation. :param api: The API to use for calling an LLM through a Generator. Supported APIs: "openai". :param api_key: The API key. - """ self.instructions = ( "Your task is to judge how relevant the provided context is for answering a question. " @@ -115,6 +117,7 @@ class ContextRelevanceEvaluator(LLMEvaluator): examples=self.examples, api=self.api, api_key=self.api_key, + progress_bar=progress_bar, ) @component.output_types(individual_scores=List[int], score=float, results=List[Dict[str, Any]]) diff --git a/haystack/components/evaluators/faithfulness.py b/haystack/components/evaluators/faithfulness.py index 2bcbb9b08..1e561f669 100644 --- a/haystack/components/evaluators/faithfulness.py +++ b/haystack/components/evaluators/faithfulness.py @@ -81,6 +81,7 @@ class FaithfulnessEvaluator(LLMEvaluator): def __init__( self, examples: Optional[List[Dict[str, Any]]] = None, + progress_bar: bool = True, api: str = "openai", api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), ): @@ -104,6 +105,8 @@ class FaithfulnessEvaluator(LLMEvaluator): "statement_scores": [1, 0], }, }] + :param progress_bar: + Whether to show a progress bar during the evaluation. :param api: The API to use for calling an LLM through a Generator. Supported APIs: "openai". @@ -131,6 +134,7 @@ class FaithfulnessEvaluator(LLMEvaluator): examples=self.examples, api=self.api, api_key=self.api_key, + progress_bar=progress_bar, ) @component.output_types(individual_scores=List[int], score=float, results=List[Dict[str, Any]]) diff --git a/haystack/components/evaluators/llm_evaluator.py b/haystack/components/evaluators/llm_evaluator.py index e4eebbd9a..9766f236a 100644 --- a/haystack/components/evaluators/llm_evaluator.py +++ b/haystack/components/evaluators/llm_evaluator.py @@ -5,6 +5,8 @@ import json from typing import Any, Dict, List, Tuple, Type +from tqdm import tqdm + from haystack import component, default_from_dict, default_to_dict from haystack.components.builders import PromptBuilder from haystack.components.generators import OpenAIGenerator @@ -50,6 +52,7 @@ class LLMEvaluator: inputs: List[Tuple[str, Type[List]]], outputs: List[str], examples: List[Dict[str, Any]], + progress_bar: bool = True, *, api: str = "openai", api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), @@ -70,6 +73,8 @@ class LLMEvaluator: `outputs` parameters. Each example is a dictionary with keys "inputs" and "outputs" They contain the input and output as dictionaries respectively. + :param progress_bar: + Whether to show a progress bar during the evaluation. :param api: The API to use for calling an LLM through a Generator. Supported APIs: "openai". @@ -78,13 +83,13 @@ class LLMEvaluator: """ self.validate_init_parameters(inputs, outputs, examples) - self.instructions = instructions self.inputs = inputs self.outputs = outputs self.examples = examples self.api = api self.api_key = api_key + self.progress_bar = progress_bar if api == "openai": self.generator = OpenAIGenerator( @@ -173,7 +178,7 @@ class LLMEvaluator: list_of_input_names_to_values = [dict(zip(input_names, v)) for v in values] results = [] - for input_names_to_values in list_of_input_names_to_values: + for input_names_to_values in tqdm(list_of_input_names_to_values, disable=not self.progress_bar): prompt = self.builder.run(**input_names_to_values) result = self.generator.run(prompt=prompt["prompt"]) @@ -243,6 +248,7 @@ class LLMEvaluator: examples=self.examples, api=self.api, api_key=self.api_key.to_dict(), + progress_bar=self.progress_bar, ) @classmethod diff --git a/test/components/evaluators/test_llm_evaluator.py b/test/components/evaluators/test_llm_evaluator.py index b1d41e000..1b28dab84 100644 --- a/test/components/evaluators/test_llm_evaluator.py +++ b/test/components/evaluators/test_llm_evaluator.py @@ -206,6 +206,7 @@ class TestLLMEvaluator: "instructions": "test-instruction", "inputs": [("predicted_answers", List[str])], "outputs": ["score"], + "progress_bar": True, "examples": [ {"inputs": {"predicted_answers": "Football is the most popular sport."}, "outputs": {"score": 0}} ], @@ -266,6 +267,7 @@ class TestLLMEvaluator: "instructions": "test-instruction", "inputs": [("predicted_answers", List[str])], "outputs": ["custom_score"], + "progress_bar": True, "examples": [ { "inputs": {"predicted_answers": "Damn, this is straight outta hell!!!"},