Add usage summary for agents (#1269)

* update

* update

* Update notebook/oai_client_cost.ipynb

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* update doc and test

---------

Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
Yiran Wu 2024-01-16 20:55:29 -05:00 committed by GitHub
parent 563b1bb00b
commit acf81ac420
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 442 additions and 12 deletions

51
autogen/agent_utils.py Normal file
View File

@ -0,0 +1,51 @@
from typing import List, Dict, Tuple
from autogen import Agent
def gather_usage_summary(agents: List[Agent]) -> Tuple[Dict[str, any], Dict[str, any]]:
"""Gather usage summary from all agents.
Args:
agents: (list): List of agents.
Returns:
tuple: (total_usage_summary, actual_usage_summary)
Example return:
total_usage_summary = {
'total_cost': 0.0006090000000000001,
'gpt-35-turbo':
{
'cost': 0.0006090000000000001,
'prompt_tokens': 242,
'completion_tokens': 123,
'total_tokens': 365
}
}
`actual_usage_summary` follows the same format.
If none of the agents incurred any cost (not having a client), then the total_usage_summary and actual_usage_summary will be {'total_cost': 0}.
"""
def aggregate_summary(usage_summary: Dict[str, any], agent_summary: Dict[str, any]) -> None:
if agent_summary is None:
return
usage_summary["total_cost"] += agent_summary.get("total_cost", 0)
for model, data in agent_summary.items():
if model != "total_cost":
if model not in usage_summary:
usage_summary[model] = data.copy()
else:
usage_summary[model]["cost"] += data.get("cost", 0)
usage_summary[model]["prompt_tokens"] += data.get("prompt_tokens", 0)
usage_summary[model]["completion_tokens"] += data.get("completion_tokens", 0)
usage_summary[model]["total_tokens"] += data.get("total_tokens", 0)
total_usage_summary = {"total_cost": 0}
actual_usage_summary = {"total_cost": 0}
for agent in agents:
if agent.client:
aggregate_summary(total_usage_summary, agent.client.total_usage_summary)
aggregate_summary(actual_usage_summary, agent.client.actual_usage_summary)
return total_usage_summary, actual_usage_summary

View File

@ -699,6 +699,8 @@ class ConversableAgent(Agent):
self.clear_history()
self.reset_consecutive_auto_reply_counter()
self.stop_reply_at_receive()
if self.client is not None:
self.client.clear_usage_summary()
for reply_func_tuple in self._reply_func_list:
if reply_func_tuple["reset_config"] is not None:
reply_func_tuple["reset_config"](reply_func_tuple["config"])
@ -1890,3 +1892,25 @@ class ConversableAgent(Agent):
messages = messages.copy()
messages[-1]["content"] = processed_user_text
return messages
def print_usage_summary(self, mode: Union[str, List[str]] = ["actual", "total"]) -> None:
"""Print the usage summary."""
if self.client is None:
print(f"No cost incurred from agent '{self.name}'.")
else:
print(f"Agent '{self.name}':")
self.client.print_usage_summary(mode)
def get_actual_usage(self) -> Union[None, Dict[str, int]]:
"""Get the actual usage summary."""
if self.client is None:
return None
else:
return self.client.actual_usage_summary
def get_total_usage(self) -> Union[None, Dict[str, int]]:
"""Get the total usage summary."""
if self.client is None:
return None
else:
return self.client.total_usage_summary

View File

@ -42,7 +42,7 @@ OAI_PRICE1K = {
"gpt-4-0613": (0.03, 0.06),
"gpt-4-32k-0613": (0.06, 0.12),
# 11-06
"gpt-3.5-turbo": (0.001, 0.002),
"gpt-3.5-turbo": (0.0015, 0.002), # default is still 0613
"gpt-3.5-turbo-1106": (0.001, 0.002),
"gpt-35-turbo-1106": (0.001, 0.002),
"gpt-4-1106-preview": (0.01, 0.03),

View File

@ -15,7 +15,8 @@
"\n",
"Licensed under the MIT License.\n",
"\n",
"# Use AutoGen's OpenAIWrapper for cost estimation\n",
"# Usage tracking with AtuoGen\n",
"## 1. Use AutoGen's OpenAIWrapper for cost estimation\n",
"The `OpenAIWrapper` from `autogen` tracks token counts and costs of your API calls. Use the `create()` method to initiate requests and `print_usage_summary()` to retrieve a detailed usage report, including total cost and token usage for both cached and actual requests.\n",
"\n",
"- `mode=[\"actual\", \"total\"]` (default): print usage summary for non-caching completions and all completions (including cache).\n",
@ -24,6 +25,17 @@
"\n",
"Reset your session's usage data with `clear_usage_summary()` when needed.\n",
"\n",
"## 2. Track cost and token count for agents\n",
"We also support cost estimation for agents. Use `Agent.print_usage_summary()` to print the cost summary for the agent.\n",
"You can retrieve usage summary in a dict using `Agent.get_actual_usage()` and `Agent.get_total_usage()`. Note that `Agent.reset()` will also reset the usage summary.\n",
"\n",
"To gather usage data for a list of agents, we provide an utility function `autogen.agent_utils.gather_usage_summary(agents)` where you pass in a list of agents and gather the usage summary.\n",
"\n",
"## Caution when using Azure OpenAI!\n",
"If you are using azure OpenAI, the model returned from completion doesn't have the version information. The returned model is either 'gpt-35-turbo' or 'gpt-4'. From there, we are calculating the cost based on gpt-3.5-0613: ((0.0015, 0.002) per 1k prompt and completion tokens) and gpt-4-0613: (0.03,0.06). This means the cost is wrong if you are using the 1106 version of the models from azure OpenAI.\n",
"\n",
"This will be improved in the future. However, the token count summary is accurate. You can use the token count to calculate the cost yourself.\n",
"\n",
"## Requirements\n",
"\n",
"AutoGen requires `Python>=3.8`:\n",
@ -43,12 +55,14 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import autogen\n",
"from autogen import OpenAIWrapper\n",
"from autogen import AssistantAgent, UserProxyAgent\n",
"from autogen.agent_utils import gather_usage_summary\n",
"\n",
"# config_list = autogen.config_list_from_json(\n",
"# \"OAI_CONFIG_LIST\",\n",
@ -79,7 +93,7 @@
" \"api_key\": \"<your OpenAI API key>\",\n",
" }, # OpenAI API endpoint for gpt-4\n",
" {\n",
" \"model\": \"gpt-35-turbo-0631\", # 0631 or newer is needed to use functions\n",
" \"model\": \"gpt-35-turbo-0613\", # 0613 or newer is needed to use functions\n",
" \"base_url\": \"<your Azure OpenAI API base>\", \n",
" \"api_type\": \"azure\", \n",
" \"api_version\": \"2023-08-01-preview\", # 2023-07-01-preview or newer is needed to use functions\n",
@ -100,15 +114,14 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"In update_usage_summary\n",
"0.0001555\n"
"0.0003535\n"
]
}
],
@ -117,7 +130,7 @@
"messages = [\n",
" {\"role\": \"user\", \"content\": \"Can you give me 3 useful tips on learning Python? Keep it simple and short.\"},\n",
"]\n",
"response = client.create(messages=messages, model=\"gpt-35-turbo-1106\", cache_seed=None)\n",
"response = client.create(messages=messages, model=\"gpt-3.5-turbo\", cache_seed=None)\n",
"print(response.cost)"
]
},
@ -125,7 +138,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Usage Summary\n",
"## Usage Summary for OpenAIWrapper\n",
"\n",
"When creating a instance of OpenAIWrapper, cost of all completions from the same instance is recorded. You can call `print_usage_summary()` to checkout your usage summary. To clear up, use `clear_usage_summary()`.\n"
]
@ -283,6 +296,209 @@
"response = client.create(messages=messages, model=\"gpt-35-turbo-1106\", cache_seed=41)\n",
"client.print_usage_summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Usage Summary for Agents\n",
"\n",
"- `Agent.print_usage_summary()` will print the cost summary for the agent.\n",
"- `Agent.get_actual_usage()` and `Agent.get_total_usage()` will return the usage summary in a dict. When an agent doesn't use LLM, they will return None.\n",
"- `Agent.reset()` will reset the usage summary.\n",
"- `autogen.agent_utils.gather_usage_summary` will gather the usage summary for a list of agents."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33mai_user\u001b[0m (to assistant):\n",
"\n",
"$x^3=125$. What is x?\n",
"\n",
"--------------------------------------------------------------------------------\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33massistant\u001b[0m (to ai_user):\n",
"\n",
"To find the value of x, we need to find the cube root of 125. \n",
"\n",
"The cube root of 125 is 5. \n",
"\n",
"Therefore, x = 5.\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33mai_user\u001b[0m (to assistant):\n",
"\n",
"Great job! Your answer is correct.\n",
"\n",
"Indeed, to find the value of x in the equation $x^3 = 125$, we need to find the cube root of 125. The cube root of 125 is indeed 5.\n",
"\n",
"Therefore, x = 5 is the correct solution. Well done!\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33massistant\u001b[0m (to ai_user):\n",
"\n",
"Thank you! I'm glad I could assist you. If you have any more questions, feel free to ask.\n",
"\n",
"--------------------------------------------------------------------------------\n"
]
}
],
"source": [
"\n",
"assistant = AssistantAgent(\n",
" \"assistant\",\n",
" system_message=\"You are a helpful assistant.\",\n",
" llm_config={\n",
" \"timeout\": 600,\n",
" \"cache_seed\": None,\n",
" \"config_list\": config_list,\n",
" },\n",
")\n",
"\n",
"ai_user_proxy = UserProxyAgent(\n",
" name=\"ai_user\",\n",
" human_input_mode=\"NEVER\",\n",
" max_consecutive_auto_reply=1,\n",
" code_execution_config=False,\n",
" llm_config={\n",
" \"config_list\": config_list,\n",
" },\n",
" # In the system message the \"user\" always refers to the other agent.\n",
" system_message=\"You ask a user for help. You check the answer from the user and provide feedback.\",\n",
")\n",
"assistant.reset()\n",
"\n",
"math_problem = \"$x^3=125$. What is x?\"\n",
"ai_user_proxy.initiate_chat(\n",
" assistant,\n",
" message=math_problem,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Agent 'ai_user':\n",
"----------------------------------------------------------------------------------------------------\n",
"Usage summary excluding cached usage: \n",
"Total cost: 0.00025\n",
"* Model 'gpt-35-turbo': cost: 0.00025, prompt_tokens: 80, completion_tokens: 63, total_tokens: 143\n",
"\n",
"All completions are non-cached: the total cost with cached completions is the same as actual cost.\n",
"----------------------------------------------------------------------------------------------------\n",
"\n",
"Agent 'assistant':\n",
"----------------------------------------------------------------------------------------------------\n",
"Usage summary excluding cached usage: \n",
"Total cost: 0.00036\n",
"* Model 'gpt-35-turbo': cost: 0.00036, prompt_tokens: 162, completion_tokens: 60, total_tokens: 222\n",
"\n",
"All completions are non-cached: the total cost with cached completions is the same as actual cost.\n",
"----------------------------------------------------------------------------------------------------\n"
]
}
],
"source": [
"ai_user_proxy.print_usage_summary()\n",
"print()\n",
"assistant.print_usage_summary()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"No cost incurred from agent 'user'.\n"
]
}
],
"source": [
"user_proxy = UserProxyAgent(\n",
" name=\"user\",\n",
" human_input_mode=\"NEVER\",\n",
" max_consecutive_auto_reply=2,\n",
" code_execution_config=False,\n",
" default_auto_reply=\"That's all. Thank you.\",\n",
")\n",
"user_proxy.print_usage_summary()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Actual usage summary for assistant (excluding completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n",
"Total usage summary for assistant (including completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n",
"Actual usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n",
"Total usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n",
"Actual usage summary for user_proxy: None\n",
"Total usage summary for user_proxy: None\n"
]
}
],
"source": [
"print(\"Actual usage summary for assistant (excluding completion from cache):\", assistant.get_actual_usage())\n",
"print(\"Total usage summary for assistant (including completion from cache):\", assistant.get_total_usage())\n",
"\n",
"print(\"Actual usage summary for ai_user_proxy:\", ai_user_proxy.get_actual_usage())\n",
"print(\"Total usage summary for ai_user_proxy:\", ai_user_proxy.get_total_usage())\n",
"\n",
"print(\"Actual usage summary for user_proxy:\", user_proxy.get_actual_usage())\n",
"print(\"Total usage summary for user_proxy:\", user_proxy.get_total_usage())"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'total_cost': 0.0006090000000000001,\n",
" 'gpt-35-turbo': {'cost': 0.0006090000000000001,\n",
" 'prompt_tokens': 242,\n",
" 'completion_tokens': 123,\n",
" 'total_tokens': 365}}"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_usage_summary, actual_usage_summary = gather_usage_summary([assistant, ai_user_proxy, user_proxy])\n",
"total_usage_summary"
]
}
],
"metadata": {
@ -301,7 +517,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.9.18"
}
},
"nbformat": 4,

View File

@ -0,0 +1,139 @@
from autogen.agent_utils import gather_usage_summary
from autogen import AssistantAgent, UserProxyAgent
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
import pytest
from conftest import skip_openai
import autogen
import io
from contextlib import redirect_stdout
try:
import openai
except ImportError:
skip = True
else:
skip = False or skip_openai
@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
def test_gathering():
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)
assistant1 = AssistantAgent(
"assistant",
system_message="You are a helpful assistant.",
llm_config={
"config_list": config_list,
"model": "gpt-3.5-turbo-0613",
},
)
assistant2 = AssistantAgent(
"assistant",
system_message="You are a helpful assistant.",
llm_config={
"config_list": config_list,
"model": "gpt-3.5-turbo-0613",
},
)
assistant3 = AssistantAgent(
"assistant",
system_message="You are a helpful assistant.",
llm_config={
"config_list": config_list,
"model": "gpt-3.5-turbo-0613",
},
)
assistant1.client.total_usage_summary = {
"total_cost": 0.1,
"gpt-35-turbo": {"cost": 0.1, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
}
assistant2.client.total_usage_summary = {
"total_cost": 0.2,
"gpt-35-turbo": {"cost": 0.2, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
}
assistant3.client.total_usage_summary = {
"total_cost": 0.3,
"gpt-4": {"cost": 0.3, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
}
total_usage, _ = gather_usage_summary([assistant1, assistant2, assistant3])
assert round(total_usage["total_cost"], 8) == 0.6
assert round(total_usage["gpt-35-turbo"]["cost"], 8) == 0.3
assert round(total_usage["gpt-4"]["cost"], 8) == 0.3
# test when agent doesn't have client
user_proxy = UserProxyAgent(
name="ai_user",
human_input_mode="NEVER",
max_consecutive_auto_reply=2,
code_execution_config=False,
default_auto_reply="That's all. Thank you.",
)
total_usage, acutal_usage = gather_usage_summary([user_proxy])
@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
def test_agent_usage():
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)
assistant = AssistantAgent(
"assistant",
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"cache_seed": None,
"config_list": config_list,
"model": "gpt-3.5-turbo-0613",
},
)
ai_user_proxy = UserProxyAgent(
name="ai_user",
human_input_mode="NEVER",
max_consecutive_auto_reply=1,
code_execution_config=False,
llm_config={
"config_list": config_list,
"model": "gpt-3.5-turbo-0613",
},
# In the system message the "user" always refers to the other agent.
system_message="You ask a user for help. You check the answer from the user and provide feedback.",
)
math_problem = "$x^3=125$. What is x?"
ai_user_proxy.initiate_chat(
assistant,
message=math_problem,
)
# test print
captured_output = io.StringIO()
with redirect_stdout(captured_output):
ai_user_proxy.print_usage_summary()
output = captured_output.getvalue()
assert "Usage summary excluding cached usage:" in output
captured_output = io.StringIO()
with redirect_stdout(captured_output):
assistant.print_usage_summary()
output = captured_output.getvalue()
assert "All completions are non-cached:" in output
# test get
print("Actual usage summary (excluding completion from cache):", assistant.get_actual_usage())
print("Total usage summary (including completion from cache):", assistant.get_total_usage())
print("Actual usage summary (excluding completion from cache):", ai_user_proxy.get_actual_usage())
print("Total usage summary (including completion from cache):", ai_user_proxy.get_total_usage())
if __name__ == "__main__":
test_gathering()
test_agent_usage()

View File

@ -122,8 +122,8 @@ def test_graph_modelling_language_using_select_speaker(save=False):
skip or not sys.version.startswith("3.10"),
reason="do not run if openai is not installed or py!=3.10",
)
def test_oai_client_cost(save=False):
run_notebook("oai_client_cost.ipynb", save=save)
def test_agentchat_cost_token_tracking(save=False):
run_notebook("agentchat_cost_token_tracking.ipynb", save=save)
if __name__ == "__main__":