mirror of
https://github.com/microsoft/autogen.git
synced 2025-08-06 07:43:00 +00:00

* first pass at offline agent eval integration * Integrating AgentEval for offline scenarios * removing old changes * fixing notebook, updating docs * fixing subcriteria bug * updating class comment * cleaning up agent constructors * moving AgentEval agents to separate folder and adding a brief README * fixing build breaks * fixing formatting break * fixing comments * consolidating files in the agenteval folder under contrib and cleaning up imports * fixing import ordering * adding basic agenteval tests and fixing criteria parsing bug * first try at adding openai agenteval tests to build process * adding non-openai agenteval tests to build process * updating test settings * updating openai test * Update test/agentchat/contrib/agent_eval/test_agent_eval.py Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com> * Update .github/workflows/contrib-openai.yml Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com> * test commit * updating typing and converting to pydantic objects * fixing test file --------- Co-authored-by: Beibin Li <BeibinLi@users.noreply.github.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Wael Karkoub <wael.karkoub96@gmail.com>
60 lines
1.9 KiB
Python
60 lines
1.9 KiB
Python
#!/usr/bin/env python3 -m pytest
|
|
|
|
from autogen.agentchat.contrib.agent_eval.criterion import Criterion
|
|
|
|
|
|
def test_parse_json_str():
|
|
criteria_file = "test/test_files/agenteval-in-out/samples/sample_math_criteria.json"
|
|
criteria = open(criteria_file, "r").read()
|
|
criteria = Criterion.parse_json_str(criteria)
|
|
assert criteria
|
|
assert len(criteria) == 6
|
|
assert criteria[0].name == "Problem Interpretation"
|
|
assert criteria[0].description == "Ability to correctly interpret the problem."
|
|
assert len(criteria[0].accepted_values) == 5
|
|
|
|
|
|
def test_write_json():
|
|
criteria1 = Criterion(name="test1", description="test1 description", accepted_values=["test1", "test2"])
|
|
criteria2 = Criterion(name="test2", description="test2 description", accepted_values=["test1", "test2"])
|
|
output = Criterion.write_json([criteria1, criteria2])
|
|
assert (
|
|
output
|
|
== """[
|
|
{
|
|
"name": "test1",
|
|
"description": "test1 description",
|
|
"accepted_values": [
|
|
"test1",
|
|
"test2"
|
|
],
|
|
"sub_criteria": []
|
|
},
|
|
{
|
|
"name": "test2",
|
|
"description": "test2 description",
|
|
"accepted_values": [
|
|
"test1",
|
|
"test2"
|
|
],
|
|
"sub_criteria": []
|
|
}
|
|
]"""
|
|
)
|
|
|
|
|
|
def test_write_parse_compatibility():
|
|
criterion1 = Criterion(name="test1", description="test1 description", accepted_values=["test1", "test2"])
|
|
criterion2 = Criterion(name="test2", description="test2 description", accepted_values=["test1", "test2"])
|
|
|
|
output = Criterion.write_json([criterion1, criterion2])
|
|
criteria = Criterion.parse_json_str(output)
|
|
assert criteria
|
|
assert len(criteria) == 2
|
|
assert criteria[0].name == "test1"
|
|
assert criteria[0].description == "test1 description"
|
|
assert len(criteria[0].accepted_values) == 2
|
|
assert criteria[1].name == "test2"
|
|
assert criteria[1].description == "test2 description"
|
|
assert len(criteria[1].accepted_values) == 2
|