James Woffinden-Luey dad9c66104
Agenteval integration (#2672)
* first pass at offline agent eval integration

* Integrating AgentEval for offline scenarios

* removing old changes

* fixing notebook, updating docs

* fixing subcriteria bug

* updating class comment

* cleaning up agent constructors

* moving AgentEval agents to separate folder and adding a brief README

* fixing build breaks

* fixing formatting break

* fixing comments

* consolidating files in the agenteval folder under contrib and cleaning up imports

* fixing import ordering

* adding basic agenteval tests and fixing criteria parsing bug

* first try at adding openai agenteval tests to build process

* adding non-openai agenteval tests to build process

* updating test settings

* updating openai test

* Update test/agentchat/contrib/agent_eval/test_agent_eval.py

Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com>

* Update .github/workflows/contrib-openai.yml

Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com>

* test commit

* updating typing and converting to pydantic objects

* fixing test file

---------

Co-authored-by: Beibin Li <BeibinLi@users.noreply.github.com>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com>
2024-05-14 07:14:37 +00:00

60 lines
1.9 KiB
Python

#!/usr/bin/env python3 -m pytest
from autogen.agentchat.contrib.agent_eval.criterion import Criterion
def test_parse_json_str():
criteria_file = "test/test_files/agenteval-in-out/samples/sample_math_criteria.json"
criteria = open(criteria_file, "r").read()
criteria = Criterion.parse_json_str(criteria)
assert criteria
assert len(criteria) == 6
assert criteria[0].name == "Problem Interpretation"
assert criteria[0].description == "Ability to correctly interpret the problem."
assert len(criteria[0].accepted_values) == 5
def test_write_json():
criteria1 = Criterion(name="test1", description="test1 description", accepted_values=["test1", "test2"])
criteria2 = Criterion(name="test2", description="test2 description", accepted_values=["test1", "test2"])
output = Criterion.write_json([criteria1, criteria2])
assert (
output
== """[
{
"name": "test1",
"description": "test1 description",
"accepted_values": [
"test1",
"test2"
],
"sub_criteria": []
},
{
"name": "test2",
"description": "test2 description",
"accepted_values": [
"test1",
"test2"
],
"sub_criteria": []
}
]"""
)
def test_write_parse_compatibility():
criterion1 = Criterion(name="test1", description="test1 description", accepted_values=["test1", "test2"])
criterion2 = Criterion(name="test2", description="test2 description", accepted_values=["test1", "test2"])
output = Criterion.write_json([criterion1, criterion2])
criteria = Criterion.parse_json_str(output)
assert criteria
assert len(criteria) == 2
assert criteria[0].name == "test1"
assert criteria[0].description == "test1 description"
assert len(criteria[0].accepted_values) == 2
assert criteria[1].name == "test2"
assert criteria[1].description == "test2 description"
assert len(criteria[1].accepted_values) == 2