mirror of
https://github.com/microsoft/autogen.git
synced 2025-08-05 23:32:30 +00:00

* first pass at offline agent eval integration * Integrating AgentEval for offline scenarios * removing old changes * fixing notebook, updating docs * fixing subcriteria bug * updating class comment * cleaning up agent constructors * moving AgentEval agents to separate folder and adding a brief README * fixing build breaks * fixing formatting break * fixing comments * consolidating files in the agenteval folder under contrib and cleaning up imports * fixing import ordering * adding basic agenteval tests and fixing criteria parsing bug * first try at adding openai agenteval tests to build process * adding non-openai agenteval tests to build process * updating test settings * updating openai test * Update test/agentchat/contrib/agent_eval/test_agent_eval.py Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com> * Update .github/workflows/contrib-openai.yml Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com> * test commit * updating typing and converting to pydantic objects * fixing test file --------- Co-authored-by: Beibin Li <BeibinLi@users.noreply.github.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com>
23 lines
931 B
Python
23 lines
931 B
Python
#!/usr/bin/env python3 -m pytest
|
|
|
|
from autogen.agentchat.contrib.agent_eval.task import Task
|
|
|
|
|
|
def test_parse_json_str():
|
|
task = Task(
|
|
**{
|
|
"name": "Math problem solving",
|
|
"description": "Given any question, the system needs to solve the problem as consisely and accurately as possible",
|
|
"successful_response": '{"message": "The answer is 5", "is_correct": True}',
|
|
"failed_response": '{"message": "I don\'t know the answer", "is_correct": False}',
|
|
}
|
|
)
|
|
assert task
|
|
assert task.name == "Math problem solving"
|
|
assert (
|
|
task.description
|
|
== "Given any question, the system needs to solve the problem as consisely and accurately as possible"
|
|
)
|
|
assert task.successful_response == '{"message": "The answer is 5", "is_correct": True}'
|
|
assert task.failed_response == '{"message": "I don\'t know the answer", "is_correct": False}'
|