Add usage summary for agents (#1269)

* update * update * Update notebook/oai_client_cost.ipynb Co-authored-by: Chi Wang <wang.chi@microsoft.com> * update doc and test --------- Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
2025-11-01 10:19:46 +00:00 · 2024-01-16 20:55:29 -05:00 · 2024-01-16 20:55:29 -05:00 · acf81ac420
commit acf81ac420
parent 563b1bb00b
6 changed files with 442 additions and 12 deletions
--- a/autogen/agent_utils.py
+++ b/autogen/agent_utils.py
@ -0,0 +1,51 @@
+from typing import List, Dict, Tuple
+from autogen import Agent
+
+
+def gather_usage_summary(agents: List[Agent]) -> Tuple[Dict[str, any], Dict[str, any]]:
+    """Gather usage summary from all agents.
+
+    Args:
+        agents: (list): List of agents.
+
+    Returns:
+        tuple: (total_usage_summary, actual_usage_summary)
+
+    Example return:
+        total_usage_summary = {
+            'total_cost': 0.0006090000000000001,
+            'gpt-35-turbo':
+                {
+                    'cost': 0.0006090000000000001,
+                    'prompt_tokens': 242,
+                    'completion_tokens': 123,
+                    'total_tokens': 365
+                }
+        }
+        `actual_usage_summary` follows the same format.
+        If none of the agents incurred any cost (not having a client), then the total_usage_summary and actual_usage_summary will be {'total_cost': 0}.
+    """
+
+    def aggregate_summary(usage_summary: Dict[str, any], agent_summary: Dict[str, any]) -> None:
+        if agent_summary is None:
+            return
+        usage_summary["total_cost"] += agent_summary.get("total_cost", 0)
+        for model, data in agent_summary.items():
+            if model != "total_cost":
+                if model not in usage_summary:
+                    usage_summary[model] = data.copy()
+                else:
+                    usage_summary[model]["cost"] += data.get("cost", 0)
+                    usage_summary[model]["prompt_tokens"] += data.get("prompt_tokens", 0)
+                    usage_summary[model]["completion_tokens"] += data.get("completion_tokens", 0)
+                    usage_summary[model]["total_tokens"] += data.get("total_tokens", 0)
+
+    total_usage_summary = {"total_cost": 0}
+    actual_usage_summary = {"total_cost": 0}
+
+    for agent in agents:
+        if agent.client:
+            aggregate_summary(total_usage_summary, agent.client.total_usage_summary)
+            aggregate_summary(actual_usage_summary, agent.client.actual_usage_summary)
+
+    return total_usage_summary, actual_usage_summary
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@ -699,6 +699,8 @@ class ConversableAgent(Agent):
        self.clear_history()
        self.reset_consecutive_auto_reply_counter()
        self.stop_reply_at_receive()
+        if self.client is not None:
+            self.client.clear_usage_summary()
        for reply_func_tuple in self._reply_func_list:
            if reply_func_tuple["reset_config"] is not None:
                reply_func_tuple["reset_config"](reply_func_tuple["config"])
@ -1890,3 +1892,25 @@ class ConversableAgent(Agent):
        messages = messages.copy()
        messages[-1]["content"] = processed_user_text
        return messages
+
+    def print_usage_summary(self, mode: Union[str, List[str]] = ["actual", "total"]) -> None:
+        """Print the usage summary."""
+        if self.client is None:
+            print(f"No cost incurred from agent '{self.name}'.")
+        else:
+            print(f"Agent '{self.name}':")
+            self.client.print_usage_summary(mode)
+
+    def get_actual_usage(self) -> Union[None, Dict[str, int]]:
+        """Get the actual usage summary."""
+        if self.client is None:
+            return None
+        else:
+            return self.client.actual_usage_summary
+
+    def get_total_usage(self) -> Union[None, Dict[str, int]]:
+        """Get the total usage summary."""
+        if self.client is None:
+            return None
+        else:
+            return self.client.total_usage_summary
--- a/autogen/oai/openai_utils.py
+++ b/autogen/oai/openai_utils.py
@ -42,7 +42,7 @@ OAI_PRICE1K = {
    "gpt-4-0613": (0.03, 0.06),
    "gpt-4-32k-0613": (0.06, 0.12),
    # 11-06
-    "gpt-3.5-turbo": (0.001, 0.002),
+    "gpt-3.5-turbo": (0.0015, 0.002),  # default is still 0613
    "gpt-3.5-turbo-1106": (0.001, 0.002),
    "gpt-35-turbo-1106": (0.001, 0.002),
    "gpt-4-1106-preview": (0.01, 0.03),
--- a/notebook/agentchat_cost_token_tracking.ipynb
+++ b/notebook/agentchat_cost_token_tracking.ipynb
@ -15,7 +15,8 @@
    "\n",
    "Licensed under the MIT License.\n",
    "\n",
-    "# Use AutoGen's OpenAIWrapper for cost estimation\n",
+    "# Usage tracking with AtuoGen\n",
+    "## 1. Use AutoGen's OpenAIWrapper for cost estimation\n",
    "The `OpenAIWrapper` from `autogen` tracks token counts and costs of your API calls. Use the `create()` method to initiate requests and `print_usage_summary()` to retrieve a detailed usage report, including total cost and token usage for both cached and actual requests.\n",
    "\n",
    "- `mode=[\"actual\", \"total\"]` (default): print usage summary for non-caching completions and all completions (including cache).\n",
@ -24,6 +25,17 @@
    "\n",
    "Reset your session's usage data with `clear_usage_summary()` when needed.\n",
    "\n",
+    "## 2. Track cost and token count for agents\n",
+    "We also support cost estimation for agents. Use `Agent.print_usage_summary()` to print the cost summary for the agent.\n",
+    "You can retrieve usage summary in a dict using `Agent.get_actual_usage()` and `Agent.get_total_usage()`. Note that `Agent.reset()` will also reset the usage summary.\n",
+    "\n",
+    "To gather usage data for a list of agents, we provide an utility function `autogen.agent_utils.gather_usage_summary(agents)` where you pass in a list of agents and gather the usage summary.\n",
+    "\n",
+    "## Caution when using Azure OpenAI!\n",
+    "If you are using azure OpenAI, the model returned from completion doesn't have the version information. The returned model is either 'gpt-35-turbo' or 'gpt-4'. From there, we are calculating the cost based on gpt-3.5-0613: ((0.0015, 0.002) per 1k prompt and completion tokens) and gpt-4-0613: (0.03,0.06). This means the cost is wrong if you are using the 1106 version of the models from azure OpenAI.\n",
+    "\n",
+    "This will be improved in the future. However, the token count summary is accurate. You can use the token count to calculate the cost yourself.\n",
+    "\n",
    "## Requirements\n",
    "\n",
    "AutoGen requires `Python>=3.8`:\n",
@ -43,12 +55,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import autogen\n",
    "from autogen import OpenAIWrapper\n",
+    "from autogen import AssistantAgent, UserProxyAgent\n",
+    "from autogen.agent_utils import gather_usage_summary\n",
    "\n",
    "# config_list = autogen.config_list_from_json(\n",
    "#     \"OAI_CONFIG_LIST\",\n",
@ -79,7 +93,7 @@
    "        \"api_key\": \"<your OpenAI API key>\",\n",
    "    },  # OpenAI API endpoint for gpt-4\n",
    "    {\n",
-    "        \"model\": \"gpt-35-turbo-0631\",  # 0631 or newer is needed to use functions\n",
+    "        \"model\": \"gpt-35-turbo-0613\",  # 0613 or newer is needed to use functions\n",
    "        \"base_url\": \"<your Azure OpenAI API base>\", \n",
    "        \"api_type\": \"azure\", \n",
    "        \"api_version\": \"2023-08-01-preview\", # 2023-07-01-preview or newer is needed to use functions\n",
@ -100,15 +114,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "In update_usage_summary\n",
-      "0.0001555\n"
+      "0.0003535\n"
     ]
    }
   ],
@ -117,7 +130,7 @@
    "messages = [\n",
    "    {\"role\": \"user\", \"content\": \"Can you give me 3 useful tips on learning Python? Keep it simple and short.\"},\n",
    "]\n",
-    "response = client.create(messages=messages, model=\"gpt-35-turbo-1106\", cache_seed=None)\n",
+    "response = client.create(messages=messages, model=\"gpt-3.5-turbo\", cache_seed=None)\n",
    "print(response.cost)"
   ]
  },
@ -125,7 +138,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Usage Summary\n",
+    "## Usage Summary for OpenAIWrapper\n",
    "\n",
    "When creating a instance of OpenAIWrapper, cost of all completions from the same instance is recorded. You can call `print_usage_summary()` to checkout your usage summary. To clear up, use `clear_usage_summary()`.\n"
   ]
@ -283,6 +296,209 @@
    "response = client.create(messages=messages, model=\"gpt-35-turbo-1106\", cache_seed=41)\n",
    "client.print_usage_summary()"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Usage Summary for Agents\n",
+    "\n",
+    "- `Agent.print_usage_summary()` will print the cost summary for the agent.\n",
+    "- `Agent.get_actual_usage()` and `Agent.get_total_usage()` will return the usage summary in a dict. When an agent doesn't use LLM, they will return None.\n",
+    "- `Agent.reset()` will reset the usage summary.\n",
+    "- `autogen.agent_utils.gather_usage_summary` will gather the usage summary for a list of agents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mai_user\u001b[0m (to assistant):\n",
+      "\n",
+      "$x^3=125$. What is x?\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33massistant\u001b[0m (to ai_user):\n",
+      "\n",
+      "To find the value of x, we need to find the cube root of 125. \n",
+      "\n",
+      "The cube root of 125 is 5. \n",
+      "\n",
+      "Therefore, x = 5.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mai_user\u001b[0m (to assistant):\n",
+      "\n",
+      "Great job! Your answer is correct.\n",
+      "\n",
+      "Indeed, to find the value of x in the equation $x^3 = 125$, we need to find the cube root of 125. The cube root of 125 is indeed 5.\n",
+      "\n",
+      "Therefore, x = 5 is the correct solution. Well done!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33massistant\u001b[0m (to ai_user):\n",
+      "\n",
+      "Thank you! I'm glad I could assist you. If you have any more questions, feel free to ask.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "assistant = AssistantAgent(\n",
+    "    \"assistant\",\n",
+    "    system_message=\"You are a helpful assistant.\",\n",
+    "    llm_config={\n",
+    "        \"timeout\": 600,\n",
+    "        \"cache_seed\": None,\n",
+    "        \"config_list\": config_list,\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "ai_user_proxy = UserProxyAgent(\n",
+    "    name=\"ai_user\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=1,\n",
+    "    code_execution_config=False,\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list,\n",
+    "    },\n",
+    "    # In the system message the \"user\" always refers to the other agent.\n",
+    "    system_message=\"You ask a user for help. You check the answer from the user and provide feedback.\",\n",
+    ")\n",
+    "assistant.reset()\n",
+    "\n",
+    "math_problem = \"$x^3=125$. What is x?\"\n",
+    "ai_user_proxy.initiate_chat(\n",
+    "    assistant,\n",
+    "    message=math_problem,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Agent 'ai_user':\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "Usage summary excluding cached usage: \n",
+      "Total cost: 0.00025\n",
+      "* Model 'gpt-35-turbo': cost: 0.00025, prompt_tokens: 80, completion_tokens: 63, total_tokens: 143\n",
+      "\n",
+      "All completions are non-cached: the total cost with cached completions is the same as actual cost.\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "\n",
+      "Agent 'assistant':\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "Usage summary excluding cached usage: \n",
+      "Total cost: 0.00036\n",
+      "* Model 'gpt-35-turbo': cost: 0.00036, prompt_tokens: 162, completion_tokens: 60, total_tokens: 222\n",
+      "\n",
+      "All completions are non-cached: the total cost with cached completions is the same as actual cost.\n",
+      "----------------------------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "ai_user_proxy.print_usage_summary()\n",
+    "print()\n",
+    "assistant.print_usage_summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No cost incurred from agent 'user'.\n"
+     ]
+    }
+   ],
+   "source": [
+    "user_proxy = UserProxyAgent(\n",
+    "    name=\"user\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=2,\n",
+    "    code_execution_config=False,\n",
+    "    default_auto_reply=\"That's all. Thank you.\",\n",
+    ")\n",
+    "user_proxy.print_usage_summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Actual usage summary for assistant (excluding completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n",
+      "Total usage summary for assistant (including completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n",
+      "Actual usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n",
+      "Total usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n",
+      "Actual usage summary for user_proxy: None\n",
+      "Total usage summary for user_proxy: None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Actual usage summary for assistant (excluding completion from cache):\", assistant.get_actual_usage())\n",
+    "print(\"Total usage summary for assistant (including completion from cache):\", assistant.get_total_usage())\n",
+    "\n",
+    "print(\"Actual usage summary for ai_user_proxy:\", ai_user_proxy.get_actual_usage())\n",
+    "print(\"Total usage summary for ai_user_proxy:\", ai_user_proxy.get_total_usage())\n",
+    "\n",
+    "print(\"Actual usage summary for user_proxy:\", user_proxy.get_actual_usage())\n",
+    "print(\"Total usage summary for user_proxy:\", user_proxy.get_total_usage())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'total_cost': 0.0006090000000000001,\n",
+       " 'gpt-35-turbo': {'cost': 0.0006090000000000001,\n",
+       "  'prompt_tokens': 242,\n",
+       "  'completion_tokens': 123,\n",
+       "  'total_tokens': 365}}"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "total_usage_summary, actual_usage_summary = gather_usage_summary([assistant, ai_user_proxy, user_proxy])\n",
+    "total_usage_summary"
+   ]
  }
 ],
 "metadata": {
@ -301,7 +517,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.9.18"
  }
 },
 "nbformat": 4,
--- a/test/agentchat/test_agent_usage.py
+++ b/test/agentchat/test_agent_usage.py
@ -0,0 +1,139 @@
+from autogen.agent_utils import gather_usage_summary
+from autogen import AssistantAgent, UserProxyAgent
+from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
+import pytest
+from conftest import skip_openai
+import autogen
+import io
+from contextlib import redirect_stdout
+
+try:
+    import openai
+except ImportError:
+    skip = True
+else:
+    skip = False or skip_openai
+
+
+@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
+def test_gathering():
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+    )
+    assistant1 = AssistantAgent(
+        "assistant",
+        system_message="You are a helpful assistant.",
+        llm_config={
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
+        },
+    )
+    assistant2 = AssistantAgent(
+        "assistant",
+        system_message="You are a helpful assistant.",
+        llm_config={
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
+        },
+    )
+    assistant3 = AssistantAgent(
+        "assistant",
+        system_message="You are a helpful assistant.",
+        llm_config={
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
+        },
+    )
+
+    assistant1.client.total_usage_summary = {
+        "total_cost": 0.1,
+        "gpt-35-turbo": {"cost": 0.1, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+    }
+    assistant2.client.total_usage_summary = {
+        "total_cost": 0.2,
+        "gpt-35-turbo": {"cost": 0.2, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+    }
+    assistant3.client.total_usage_summary = {
+        "total_cost": 0.3,
+        "gpt-4": {"cost": 0.3, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+    }
+
+    total_usage, _ = gather_usage_summary([assistant1, assistant2, assistant3])
+
+    assert round(total_usage["total_cost"], 8) == 0.6
+    assert round(total_usage["gpt-35-turbo"]["cost"], 8) == 0.3
+    assert round(total_usage["gpt-4"]["cost"], 8) == 0.3
+
+    # test when agent doesn't have client
+    user_proxy = UserProxyAgent(
+        name="ai_user",
+        human_input_mode="NEVER",
+        max_consecutive_auto_reply=2,
+        code_execution_config=False,
+        default_auto_reply="That's all. Thank you.",
+    )
+
+    total_usage, acutal_usage = gather_usage_summary([user_proxy])
+
+
+@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
+def test_agent_usage():
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+    )
+    assistant = AssistantAgent(
+        "assistant",
+        system_message="You are a helpful assistant.",
+        llm_config={
+            "timeout": 600,
+            "cache_seed": None,
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
+        },
+    )
+
+    ai_user_proxy = UserProxyAgent(
+        name="ai_user",
+        human_input_mode="NEVER",
+        max_consecutive_auto_reply=1,
+        code_execution_config=False,
+        llm_config={
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
+        },
+        # In the system message the "user" always refers to the other agent.
+        system_message="You ask a user for help. You check the answer from the user and provide feedback.",
+    )
+
+    math_problem = "$x^3=125$. What is x?"
+    ai_user_proxy.initiate_chat(
+        assistant,
+        message=math_problem,
+    )
+
+    # test print
+    captured_output = io.StringIO()
+    with redirect_stdout(captured_output):
+        ai_user_proxy.print_usage_summary()
+    output = captured_output.getvalue()
+    assert "Usage summary excluding cached usage:" in output
+
+    captured_output = io.StringIO()
+    with redirect_stdout(captured_output):
+        assistant.print_usage_summary()
+    output = captured_output.getvalue()
+    assert "All completions are non-cached:" in output
+
+    # test get
+    print("Actual usage summary (excluding completion from cache):", assistant.get_actual_usage())
+    print("Total usage summary (including completion from cache):", assistant.get_total_usage())
+
+    print("Actual usage summary (excluding completion from cache):", ai_user_proxy.get_actual_usage())
+    print("Total usage summary (including completion from cache):", ai_user_proxy.get_total_usage())
+
+
+if __name__ == "__main__":
+    test_gathering()
+    test_agent_usage()
--- a/test/test_notebook.py
+++ b/test/test_notebook.py
@ -122,8 +122,8 @@ def test_graph_modelling_language_using_select_speaker(save=False):
    skip or not sys.version.startswith("3.10"),
    reason="do not run if openai is not installed or py!=3.10",
 )
-def test_oai_client_cost(save=False):
-    run_notebook("oai_client_cost.ipynb", save=save)
+def test_agentchat_cost_token_tracking(save=False):
+    run_notebook("agentchat_cost_token_tracking.ipynb", save=save)


 if __name__ == "__main__":