mirror of
https://github.com/microsoft/autogen.git
synced 2025-07-03 23:19:33 +00:00

_(EXPERIMENTAL, RESEARCH IN PROGRESS)_ In 2023 AutoGen introduced [Teachable Agents](https://microsoft.github.io/autogen/0.2/blog/2023/10/26/TeachableAgent/) that users could teach new facts, preferences and skills. But teachable agents were limited in several ways: They could only be `ConversableAgent` subclasses, they couldn't learn a new skill unless the user stated (in a single turn) both the task and how to solve it, and they couldn't learn on their own. **Task-Centric Memory** overcomes these limitations, allowing users to teach arbitrary agents (or teams) more flexibly and reliably, and enabling agents to learn from their own trial-and-error experiences. This PR is large and complex. All of the files are new, and most of the added components depend on the others to run at all. But the review process can be accelerated if approached in the following order. 1. Start with the [Task-Centric Memory README](https://github.com/microsoft/autogen/tree/agentic_memory/python/packages/autogen-ext/src/autogen_ext/task_centric_memory). 1. Install the memory extension locally, since it won't be in pypi until it's merged. In the `agentic_memory` branch, and the `python/packages` directory: - `pip install -e autogen-agentchat` - `pip install -e autogen-ext[openai]` - `pip install -e autogen-ext[task-centric-memory]` 2. Run the Quickstart sample code, then immediately open the `./pagelogs/quick/0 Call Tree.html` file in a browser to view the work in progress. 3. Click through the web page links to see the details. 2. Continue through the rest of the main README to get a high-level overview of the architecture. 3. Read through the [code samples README](https://github.com/microsoft/autogen/tree/agentic_memory/python/samples/task_centric_memory), running each of the 4 code samples while viewing their page logs. 4. Skim through the 4 code samples, along with their corresponding yaml config files: 1. `chat_with_teachable_agent.py` 2. `eval_retrieval.py` 3. `eval_teachability.py` 4. `eval_learning_from_demonstration.py` 5. `eval_self_teaching.py` 6. Read `task_centric_memory_controller.py`, referring back to the previously generated page logs as needed. This is the most important and complex file in the PR. 7. Read the remaining core files. 1. `_task_centric_memory_bank.py` 2. `_string_similarity_map.py` 3. `_prompter.py` 8. Read the supporting files in the utils dir. 1. `teachability.py` 2. `apprentice.py` 3. `grader.py` 4. `page_logger.py` 5. `_functions.py`
129 lines
4.8 KiB
Python
129 lines
4.8 KiB
Python
import asyncio
|
|
import sys
|
|
from typing import Any, Dict
|
|
|
|
from autogen_core.models import (
|
|
ChatCompletionClient,
|
|
)
|
|
from autogen_ext.experimental.task_centric_memory.utils import Apprentice, Grader, PageLogger
|
|
|
|
from utils import create_oai_client, load_yaml_file
|
|
|
|
|
|
"""
|
|
This code sample connects task-centric memory to a selectable agent with no changes to that agent's code.
|
|
See the block diagram in the README for an overview of the components and their interactions.
|
|
See the config file configs/self_teaching.yaml for an overall view of the structure and settings in this sample.
|
|
|
|
Execute the sample with this command:
|
|
python eval_self_teaching.py configs/self_teaching.yaml
|
|
|
|
We say that an agent is self-teaching if it can learn quickly from its own trial and error with no user input.
|
|
This sample asks the agent to perform a reasoning task on which it usually fails.
|
|
Then using automatic success or failure feedback (for a verifiable task with no side-effects on the environment),
|
|
the agent iterates through a background learning loop to find a solution, which it then stores as an insight in memory.
|
|
Finally the agent is tested again to see if it can retrieve and apply its insight to the original task,
|
|
as well as to a similar but different task as a test of generalization.
|
|
|
|
If adapting this sample code to a new setting, the Apprentice class can be used or completely replaced by other code.
|
|
"""
|
|
|
|
|
|
async def eval_self_teaching(
|
|
apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, config: Dict[str, Any]
|
|
) -> str:
|
|
"""
|
|
Evaluates the ability of an agent to learn quickly from its own trial and error.
|
|
"""
|
|
logger.enter_function()
|
|
|
|
num_loops = config["num_loops"]
|
|
num_final_test_trials = config["num_final_test_trials"]
|
|
grader = Grader(client, logger)
|
|
|
|
# Load the specified data.
|
|
task_dict_1 = load_yaml_file(config["task_file_1"])
|
|
task_description_1 = task_dict_1["task_description"]
|
|
expected_answer_1 = task_dict_1["expected_answer"]
|
|
|
|
# Test generalization on this different, similar task.
|
|
task_dict_2 = load_yaml_file(config["task_file_2"])
|
|
task_description_2 = task_dict_2["task_description"]
|
|
expected_answer_2 = task_dict_2["expected_answer"]
|
|
|
|
# Start the test with empty memory.
|
|
apprentice.reset_memory()
|
|
|
|
total_num_successes_1 = 0
|
|
total_num_successes_2 = 0
|
|
total_num_trials = 0
|
|
for _ in range(num_loops):
|
|
# Train on the first task.
|
|
await apprentice.train_on_task(task=task_description_1, expected_answer=expected_answer_1)
|
|
|
|
# Test on the first task.
|
|
num_successes, num_trials = await grader.test_apprentice(
|
|
apprentice=apprentice,
|
|
task_description=task_description_1,
|
|
expected_answer=expected_answer_1,
|
|
num_trials=num_final_test_trials,
|
|
use_memory=True,
|
|
client=client,
|
|
)
|
|
logger.info("Task 1 success rate: {}%".format(round((num_successes / num_trials) * 100)))
|
|
total_num_successes_1 += num_successes
|
|
|
|
# Test on the second task.
|
|
num_successes, num_trials = await grader.test_apprentice(
|
|
apprentice=apprentice,
|
|
task_description=task_description_2,
|
|
expected_answer=expected_answer_2,
|
|
num_trials=num_final_test_trials,
|
|
use_memory=True,
|
|
client=client,
|
|
)
|
|
logger.info("Task 2 success rate: {}%".format(round((num_successes / num_trials) * 100)))
|
|
total_num_successes_2 += num_successes
|
|
|
|
total_num_trials += num_final_test_trials
|
|
logger.info("")
|
|
|
|
overall_success_rate_1 = round((total_num_successes_1 / total_num_trials) * 100)
|
|
overall_success_rate_2 = round((total_num_successes_2 / total_num_trials) * 100)
|
|
|
|
results_str_1 = "Overall task 1 success rate: {}%".format(overall_success_rate_1)
|
|
results_str_2 = "Overall task 2 success rate: {}%".format(overall_success_rate_2)
|
|
logger.info("\n" + results_str_1)
|
|
logger.info(results_str_2)
|
|
|
|
logger.leave_function()
|
|
return "\neval_self_teaching\n" + results_str_1 + "\n" + results_str_2
|
|
|
|
|
|
async def run_example(config_filepath: str) -> None:
|
|
"""
|
|
Runs the code example with the necessary components.
|
|
"""
|
|
config = load_yaml_file(config_filepath)
|
|
|
|
# Create the necessary components.
|
|
logger = PageLogger(config["PageLogger"])
|
|
client = create_oai_client(config["client"])
|
|
apprentice = Apprentice(client, config["Apprentice"], logger)
|
|
|
|
# Call the example function.
|
|
results = await eval_self_teaching(apprentice, client, logger, config["test"])
|
|
|
|
# Finish up.
|
|
print(results)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = sys.argv[1:]
|
|
if len(args) != 1:
|
|
# Print usage information.
|
|
print("Usage: amt.py <path to *.yaml file>")
|
|
else:
|
|
# Run the code example.
|
|
asyncio.run(run_example(config_filepath=args[0]))
|