2024-05-09 15:40:36 +02:00
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
2024-07-11 04:14:03 -07:00
import os
2024-03-25 07:05:27 +01:00
from typing import List
import pytest
2024-06-07 12:49:23 +02:00
from haystack import Pipeline
2024-03-25 07:05:27 +01:00
from haystack . components . evaluators import LLMEvaluator
from haystack . utils . auth import Secret
class TestLLMEvaluator :
def test_init_default ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
assert component . api == " openai "
assert component . generator . client . api_key == " test-api-key "
2024-07-11 04:14:03 -07:00
assert component . api_params == { " generation_kwargs " : { " response_format " : { " type " : " json_object " } , " seed " : 42 } }
2024-03-25 07:05:27 +01:00
assert component . instructions == " test-instruction "
2024-04-30 16:30:57 +02:00
assert component . inputs == [ ( " predicted_answers " , List [ str ] ) ]
2024-03-25 07:05:27 +01:00
assert component . outputs == [ " score " ]
assert component . examples == [
2024-04-30 16:30:57 +02:00
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
2024-03-25 07:05:27 +01:00
]
def test_init_fail_wo_openai_api_key ( self , monkeypatch ) :
monkeypatch . delenv ( " OPENAI_API_KEY " , raising = False )
with pytest . raises ( ValueError , match = " None of the .* environment variables are set " ) :
LLMEvaluator (
api = " openai " ,
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
def test_init_with_parameters ( self ) :
component = LLMEvaluator (
instructions = " test-instruction " ,
api_key = Secret . from_token ( " test-api-key " ) ,
2024-07-11 04:14:03 -07:00
api_params = { " generation_kwargs " : { " seed " : 43 } } ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " custom_score " ] ,
api = " openai " ,
examples = [
2024-04-30 16:30:57 +02:00
{
" inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } ,
" outputs " : { " custom_score " : 1 } ,
} ,
{
" inputs " : { " predicted_answers " : " Football is the most popular sport. " } ,
" outputs " : { " custom_score " : 0 } ,
} ,
2024-03-25 07:05:27 +01:00
] ,
)
assert component . generator . client . api_key == " test-api-key "
2024-07-11 04:14:03 -07:00
assert component . api_params == { " generation_kwargs " : { " response_format " : { " type " : " json_object " } , " seed " : 43 } }
2024-03-25 07:05:27 +01:00
assert component . api == " openai "
assert component . examples == [
2024-04-30 16:30:57 +02:00
{ " inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } , " outputs " : { " custom_score " : 1 } } ,
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " custom_score " : 0 } } ,
2024-03-25 07:05:27 +01:00
]
assert component . instructions == " test-instruction "
2024-04-30 16:30:57 +02:00
assert component . inputs == [ ( " predicted_answers " , List [ str ] ) ]
2024-03-25 07:05:27 +01:00
assert component . outputs == [ " custom_score " ]
def test_init_with_invalid_parameters ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
# Invalid inputs
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = { ( " predicted_answers " , List [ str ] ) } ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( List [ str ] , " predicted_answers " ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
inputs = [ List [ str ] ] ,
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = { ( " predicted_answers " , str ) } ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
# Invalid outputs
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = " score " ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ [ " score " ] ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
# Invalid examples
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
examples = {
2024-04-30 16:30:57 +02:00
" inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } ,
2024-03-25 07:05:27 +01:00
" outputs " : { " custom_score " : 1 } ,
} ,
)
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
examples = [
2024-04-30 16:30:57 +02:00
[
{
" inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } ,
" outputs " : { " custom_score " : 1 } ,
}
]
2024-03-25 07:05:27 +01:00
] ,
)
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
examples = [
2024-04-30 16:30:57 +02:00
{
" wrong_key " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } ,
" outputs " : { " custom_score " : 1 } ,
}
2024-03-25 07:05:27 +01:00
] ,
)
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
examples = [
{
2024-04-30 16:30:57 +02:00
" inputs " : [ { " predicted_answers " : " Damn, this is straight outta hell!!! " } ] ,
2024-03-25 07:05:27 +01:00
" outputs " : [ { " custom_score " : 1 } ] ,
}
] ,
)
with pytest . raises ( ValueError ) :
LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
examples = [ { " inputs " : { 1 : " Damn, this is straight outta hell!!! " } , " outputs " : { 2 : 1 } } ] ,
)
def test_to_dict_default ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-09-20 14:17:13 +05:30
api_key = Secret . from_env_var ( " OPENAI_API_KEY " ) ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
data = component . to_dict ( )
assert data == {
" type " : " haystack.components.evaluators.llm_evaluator.LLMEvaluator " ,
" init_parameters " : {
2024-07-11 04:14:03 -07:00
" api_params " : { " generation_kwargs " : { " response_format " : { " type " : " json_object " } , " seed " : 42 } } ,
2024-03-25 07:05:27 +01:00
" api_key " : { " env_vars " : [ " OPENAI_API_KEY " ] , " strict " : True , " type " : " env_var " } ,
" api " : " openai " ,
" instructions " : " test-instruction " ,
2024-06-07 12:49:23 +02:00
" inputs " : [ [ " predicted_answers " , " typing.List[str] " ] ] ,
2024-03-25 07:05:27 +01:00
" outputs " : [ " score " ] ,
2024-05-23 09:22:14 +02:00
" progress_bar " : True ,
2024-04-30 16:30:57 +02:00
" examples " : [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
} ,
}
def test_from_dict ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
data = {
" type " : " haystack.components.evaluators.llm_evaluator.LLMEvaluator " ,
" init_parameters " : {
2024-07-11 04:14:03 -07:00
" api_params " : { " generation_kwargs " : { " response_format " : { " type " : " json_object " } , " seed " : 42 } } ,
2024-03-25 07:05:27 +01:00
" api_key " : { " env_vars " : [ " OPENAI_API_KEY " ] , " strict " : True , " type " : " env_var " } ,
" api " : " openai " ,
" instructions " : " test-instruction " ,
2024-06-07 12:49:23 +02:00
" inputs " : [ [ " predicted_answers " , " typing.List[str] " ] ] ,
2024-03-25 07:05:27 +01:00
" outputs " : [ " score " ] ,
2024-04-30 16:30:57 +02:00
" examples " : [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
} ,
}
component = LLMEvaluator . from_dict ( data )
assert component . api == " openai "
assert component . generator . client . api_key == " test-api-key "
2024-07-11 04:14:03 -07:00
assert component . api_params == { " generation_kwargs " : { " response_format " : { " type " : " json_object " } , " seed " : 42 } }
2024-03-25 07:05:27 +01:00
assert component . instructions == " test-instruction "
2024-04-30 16:30:57 +02:00
assert component . inputs == [ ( " predicted_answers " , List [ str ] ) ]
2024-03-25 07:05:27 +01:00
assert component . outputs == [ " score " ]
assert component . examples == [
2024-04-30 16:30:57 +02:00
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
2024-03-25 07:05:27 +01:00
]
def test_to_dict_with_parameters ( self , monkeypatch ) :
monkeypatch . setenv ( " ENV_VAR " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
api_key = Secret . from_env_var ( " ENV_VAR " ) ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " custom_score " ] ,
api = " openai " ,
examples = [
2024-04-30 16:30:57 +02:00
{
" inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } ,
" outputs " : { " custom_score " : 1 } ,
} ,
{
" inputs " : { " predicted_answers " : " Football is the most popular sport. " } ,
" outputs " : { " custom_score " : 0 } ,
} ,
2024-03-25 07:05:27 +01:00
] ,
)
data = component . to_dict ( )
assert data == {
" type " : " haystack.components.evaluators.llm_evaluator.LLMEvaluator " ,
" init_parameters " : {
2024-07-11 04:14:03 -07:00
" api_params " : { " generation_kwargs " : { " response_format " : { " type " : " json_object " } , " seed " : 42 } } ,
2024-03-25 07:05:27 +01:00
" api_key " : { " env_vars " : [ " ENV_VAR " ] , " strict " : True , " type " : " env_var " } ,
" api " : " openai " ,
" instructions " : " test-instruction " ,
2024-06-07 12:49:23 +02:00
" inputs " : [ [ " predicted_answers " , " typing.List[str] " ] ] ,
2024-03-25 07:05:27 +01:00
" outputs " : [ " custom_score " ] ,
2024-05-23 09:22:14 +02:00
" progress_bar " : True ,
2024-03-25 07:05:27 +01:00
" examples " : [
2024-04-30 16:30:57 +02:00
{
" inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } ,
" outputs " : { " custom_score " : 1 } ,
} ,
{
" inputs " : { " predicted_answers " : " Football is the most popular sport. " } ,
" outputs " : { " custom_score " : 0 } ,
} ,
2024-03-25 07:05:27 +01:00
] ,
} ,
}
2024-06-07 12:49:23 +02:00
def test_serde ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
pipeline = Pipeline ( )
component = LLMEvaluator (
instructions = " test-instruction " ,
inputs = [ ( " questions " , List [ str ] ) , ( " predicted_answers " , List [ List [ str ] ] ) ] ,
outputs = [ " score " ] ,
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
)
pipeline . add_component ( " evaluator " , component )
serialized_pipeline = pipeline . dumps ( )
deserialized_pipeline = Pipeline . loads ( serialized_pipeline )
2024-03-25 07:05:27 +01:00
def test_run_with_different_lengths ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " questions " , List [ str ] ) , ( " predicted_answers " , List [ List [ str ] ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
def generator_run ( self , * args , * * kwargs ) :
return { " replies " : [ ' { " score " : 0.5} ' ] }
monkeypatch . setattr ( " haystack.components.generators.openai.OpenAIGenerator.run " , generator_run )
with pytest . raises ( ValueError ) :
2024-04-30 16:30:57 +02:00
component . run ( questions = [ " What is the capital of Germany? " ] , predicted_answers = [ [ " Berlin " ] , [ " Paris " ] ] )
2024-03-25 07:05:27 +01:00
with pytest . raises ( ValueError ) :
component . run (
2024-04-30 16:30:57 +02:00
questions = [ " What is the capital of Germany? " , " What is the capital of France? " ] ,
predicted_answers = [ [ " Berlin " ] ] ,
2024-03-25 07:05:27 +01:00
)
def test_run_returns_parsed_result ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " questions " , List [ str ] ) , ( " predicted_answers " , List [ List [ str ] ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
def generator_run ( self , * args , * * kwargs ) :
return { " replies " : [ ' { " score " : 0.5} ' ] }
monkeypatch . setattr ( " haystack.components.generators.openai.OpenAIGenerator.run " , generator_run )
2024-04-30 16:30:57 +02:00
results = component . run ( questions = [ " What is the capital of Germany? " ] , predicted_answers = [ " Berlin " ] )
2024-07-02 11:31:51 +02:00
assert results == { " results " : [ { " score " : 0.5 } ] , " meta " : None }
2024-03-25 07:05:27 +01:00
def test_prepare_template ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
examples = [
2024-04-30 16:30:57 +02:00
{ " inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } , " outputs " : { " score " : 1 } } ,
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } } ,
2024-03-25 07:05:27 +01:00
] ,
)
template = component . prepare_template ( )
assert (
template
2024-04-30 16:30:57 +02:00
== ' Instructions: \n test-instruction \n \n Generate the response in JSON format with the following keys: \n [ " score " ] \n Consider the instructions and the examples below to determine those values. \n \n Examples: \n Inputs: \n { " predicted_answers " : " Damn, this is straight outta hell!!! " } \n Outputs: \n { " score " : 1} \n Inputs: \n { " predicted_answers " : " Football is the most popular sport. " } \n Outputs: \n { " score " : 0} \n \n Inputs: \n { " predicted_answers " : {{ predicted_answers }}} \n Outputs: \n '
2024-03-25 07:05:27 +01:00
)
def test_invalid_input_parameters ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
# None of the expected parameters are received
with pytest . raises ( ValueError ) :
2024-04-30 16:30:57 +02:00
component . validate_input_parameters (
expected = { " predicted_answers " : List [ str ] } , received = { " questions " : List [ str ] }
)
2024-03-25 07:05:27 +01:00
# Only one but not all the expected parameters are received
with pytest . raises ( ValueError ) :
component . validate_input_parameters (
2024-04-30 16:30:57 +02:00
expected = { " predicted_answers " : List [ str ] , " questions " : List [ str ] } , received = { " questions " : List [ str ] }
2024-03-25 07:05:27 +01:00
)
# Received inputs are not lists
with pytest . raises ( ValueError ) :
component . validate_input_parameters ( expected = { " questions " : List [ str ] } , received = { " questions " : str } )
def test_invalid_outputs ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
with pytest . raises ( ValueError ) :
2024-05-23 17:41:29 +02:00
component . is_valid_json_and_has_expected_keys (
expected = [ " score " , " another_expected_output " ] , received = ' { " score " : 1.0} '
)
with pytest . raises ( ValueError ) :
component . is_valid_json_and_has_expected_keys ( expected = [ " score " ] , received = ' { " wrong_name " : 1.0} ' )
def test_output_invalid_json_raise_on_failure_false ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
outputs = [ " score " ] ,
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
raise_on_failure = False ,
)
assert (
component . is_valid_json_and_has_expected_keys ( expected = [ " score " ] , received = " some_invalid_json_output " )
is False
)
2024-03-25 07:05:27 +01:00
2024-05-23 17:41:29 +02:00
def test_output_invalid_json_raise_on_failure_true ( self , monkeypatch ) :
monkeypatch . setenv ( " OPENAI_API_KEY " , " test-api-key " )
component = LLMEvaluator (
instructions = " test-instruction " ,
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
outputs = [ " score " ] ,
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
)
2024-03-25 07:05:27 +01:00
with pytest . raises ( ValueError ) :
2024-05-23 17:41:29 +02:00
component . is_valid_json_and_has_expected_keys ( expected = [ " score " ] , received = " some_invalid_json_output " )
2024-03-25 07:05:27 +01:00
def test_unsupported_api ( self ) :
with pytest . raises ( ValueError ) :
LLMEvaluator (
api = " unsupported_api " ,
instructions = " test-instruction " ,
2024-04-30 16:30:57 +02:00
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
2024-03-25 07:05:27 +01:00
outputs = [ " score " ] ,
2024-04-30 16:30:57 +02:00
examples = [
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " score " : 0 } }
] ,
2024-03-25 07:05:27 +01:00
)
2024-07-11 04:14:03 -07:00
def test_init_with_base_url ( self ) :
component = LLMEvaluator (
instructions = " test-instruction " ,
api_key = Secret . from_token ( " test-api-key " ) ,
api_params = { " api_base_url " : " http://127.0.0.1:11434/v1 " } ,
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
outputs = [ " custom_score " ] ,
api = " openai " ,
examples = [
{
" inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } ,
" outputs " : { " custom_score " : 1 } ,
} ,
{
" inputs " : { " predicted_answers " : " Football is the most popular sport. " } ,
" outputs " : { " custom_score " : 0 } ,
} ,
] ,
)
assert component . generator . client . api_key == " test-api-key "
assert component . api_params == {
" generation_kwargs " : { " response_format " : { " type " : " json_object " } , " seed " : 42 } ,
" api_base_url " : " http://127.0.0.1:11434/v1 " ,
}
assert component . api == " openai "
assert component . examples == [
{ " inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } , " outputs " : { " custom_score " : 1 } } ,
{ " inputs " : { " predicted_answers " : " Football is the most popular sport. " } , " outputs " : { " custom_score " : 0 } } ,
]
assert component . instructions == " test-instruction "
assert component . inputs == [ ( " predicted_answers " , List [ str ] ) ]
assert component . outputs == [ " custom_score " ]
@pytest.mark.skipif (
not ( os . environ . get ( " API_BASE_URL " ) and os . environ . get ( " MODEL_NAME " ) ) ,
reason = " Export env vars API_BASE_URL and MODEL_NAME containing the OpenAI API compatible server URL and the model name to run this test. " ,
)
@pytest.mark.integration
def test_run_with_base_url ( self ) :
component = LLMEvaluator (
instructions = " test-instruction " ,
api_key = Secret . from_token ( " test-api-key " ) ,
api_params = { " api_base_url " : os . environ [ " API_BASE_URL " ] , " model " : os . environ [ " MODEL_NAME " ] } ,
inputs = [ ( " predicted_answers " , List [ str ] ) ] ,
outputs = [ " custom_score " ] ,
api = " openai " ,
examples = [
{
" inputs " : { " predicted_answers " : " Damn, this is straight outta hell!!! " } ,
" outputs " : { " custom_score " : 1 } ,
} ,
{
" inputs " : { " predicted_answers " : " Football is the most popular sport. " } ,
" outputs " : { " custom_score " : 0 } ,
} ,
] ,
)
component . run ( predicted_answers = [ " Damn, this is straight outta hell!!! " , " Football is the most popular sport. " ] )
assert component . outputs == [ " custom_score " ]