mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-03 21:37:17 +00:00

1. Add host network support in Docker and remove unused requirements from argument check. 2. Use Pandas to simplify summary statistic calculations. 3. Add running time to summary statistics ``` Using tabulation method defined in '/home/ekzhu/autogen/python/packages/agbench/benchmarks/HumanEval/Scripts/custom_tabulate.py' Task Id Trial 0 Success Trial 0 Time -- ------------ ----------------- -------------- 0 HumanEval_0 True 3 1 HumanEval_1 False 15 2 HumanEval_2 True 2 3 HumanEval_3 True 11 4 HumanEval_4 True 4 5 HumanEval_5 True 2 6 HumanEval_6 False 18 7 HumanEval_7 True 2 8 HumanEval_8 True 2 9 HumanEval_9 True 12 10 HumanEval_10 False 11 11 HumanEval_11 True 2 12 HumanEval_12 True 3 13 HumanEval_13 True 1 14 HumanEval_14 True 4 15 HumanEval_15 True 1 16 HumanEval_16 True 2 17 HumanEval_17 False 76 18 HumanEval_18 True 4 19 HumanEval_19 True 3 20 HumanEval_20 True 5 21 HumanEval_21 True 3 22 HumanEval_22 True 1 23 HumanEval_23 True 2 24 HumanEval_24 nan Summary Statistics Successes Failures Missing Total Average Success Rate Average Time Total Time ------- ----------- ---------- --------- ------- ---------------------- -------------- ------------ Trial 0 20 4 1 25 0.8 7.875 189 CAUTION: 'autogenbench tabulate' is in early preview and is not thoroughly tested. Please do not cite values from these calculations in academic work without first inspecting and verifying the results in the run logs yourself. ``` Now the default tabulate output looks like this --------- Co-authored-by: Ryan Sweet <rysweet@microsoft.com>
66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
import asyncio
|
|
import os
|
|
import yaml
|
|
from autogen_ext.agents.magentic_one import MagenticOneCoderAgent
|
|
from autogen_agentchat.teams import RoundRobinGroupChat
|
|
from autogen_agentchat.ui import Console
|
|
from autogen_core.models import ModelFamily
|
|
from autogen_core.model_context import UnboundedChatCompletionContext, ChatCompletionContext
|
|
from autogen_ext.code_executors.local import LocalCommandLineCodeExecutor
|
|
from autogen_agentchat.conditions import TextMentionTermination
|
|
from custom_code_executor import CustomCodeExecutorAgent
|
|
from reasoning_model_context import ReasoningModelContext
|
|
from autogen_core.models import ChatCompletionClient
|
|
|
|
async def main() -> None:
|
|
|
|
# Load model configuration and create the model client.
|
|
with open("config.yaml", "r") as f:
|
|
config = yaml.safe_load(f)
|
|
model_client = ChatCompletionClient.load_component(config["model_config"])
|
|
|
|
# Model context
|
|
model_context : ChatCompletionContext
|
|
if model_client.model_info["family"] == ModelFamily.R1:
|
|
model_context = ReasoningModelContext()
|
|
else:
|
|
model_context = UnboundedChatCompletionContext()
|
|
|
|
# Coder
|
|
coder_agent = MagenticOneCoderAgent(
|
|
name="coder",
|
|
model_client=model_client,
|
|
)
|
|
# Set model context.
|
|
coder_agent._model_context = model_context # type: ignore
|
|
|
|
# Executor
|
|
executor = CustomCodeExecutorAgent(
|
|
name="executor",
|
|
code_executor=LocalCommandLineCodeExecutor(),
|
|
sources=["coder"],
|
|
)
|
|
|
|
# Termination condition
|
|
termination = TextMentionTermination(text="TERMINATE", sources=["executor"])
|
|
|
|
# Define a team
|
|
agent_team = RoundRobinGroupChat([coder_agent, executor], max_turns=12, termination_condition=termination)
|
|
|
|
prompt = ""
|
|
with open("prompt.txt", "rt") as fh:
|
|
prompt = fh.read()
|
|
|
|
task = f"""Complete the following python function. Format your output as Markdown python code block containing the entire function definition:
|
|
|
|
```python
|
|
{prompt}
|
|
```
|
|
"""
|
|
|
|
# Run the team and stream messages to the console.
|
|
stream = agent_team.run_stream(task=task)
|
|
await Console(stream)
|
|
|
|
asyncio.run(main())
|