mirror of
https://github.com/microsoft/autogen.git
synced 2025-07-04 15:41:15 +00:00

_(EXPERIMENTAL, RESEARCH IN PROGRESS)_ In 2023 AutoGen introduced [Teachable Agents](https://microsoft.github.io/autogen/0.2/blog/2023/10/26/TeachableAgent/) that users could teach new facts, preferences and skills. But teachable agents were limited in several ways: They could only be `ConversableAgent` subclasses, they couldn't learn a new skill unless the user stated (in a single turn) both the task and how to solve it, and they couldn't learn on their own. **Task-Centric Memory** overcomes these limitations, allowing users to teach arbitrary agents (or teams) more flexibly and reliably, and enabling agents to learn from their own trial-and-error experiences. This PR is large and complex. All of the files are new, and most of the added components depend on the others to run at all. But the review process can be accelerated if approached in the following order. 1. Start with the [Task-Centric Memory README](https://github.com/microsoft/autogen/tree/agentic_memory/python/packages/autogen-ext/src/autogen_ext/task_centric_memory). 1. Install the memory extension locally, since it won't be in pypi until it's merged. In the `agentic_memory` branch, and the `python/packages` directory: - `pip install -e autogen-agentchat` - `pip install -e autogen-ext[openai]` - `pip install -e autogen-ext[task-centric-memory]` 2. Run the Quickstart sample code, then immediately open the `./pagelogs/quick/0 Call Tree.html` file in a browser to view the work in progress. 3. Click through the web page links to see the details. 2. Continue through the rest of the main README to get a high-level overview of the architecture. 3. Read through the [code samples README](https://github.com/microsoft/autogen/tree/agentic_memory/python/samples/task_centric_memory), running each of the 4 code samples while viewing their page logs. 4. Skim through the 4 code samples, along with their corresponding yaml config files: 1. `chat_with_teachable_agent.py` 2. `eval_retrieval.py` 3. `eval_teachability.py` 4. `eval_learning_from_demonstration.py` 5. `eval_self_teaching.py` 6. Read `task_centric_memory_controller.py`, referring back to the previously generated page logs as needed. This is the most important and complex file in the PR. 7. Read the remaining core files. 1. `_task_centric_memory_bank.py` 2. `_string_similarity_map.py` 3. `_prompter.py` 8. Read the supporting files in the utils dir. 1. `teachability.py` 2. `apprentice.py` 3. `grader.py` 4. `page_logger.py` 5. `_functions.py`
119 lines
4.4 KiB
Python
119 lines
4.4 KiB
Python
import asyncio
|
|
import sys
|
|
from typing import Any, Dict, Set
|
|
|
|
from autogen_core.models import (
|
|
ChatCompletionClient,
|
|
)
|
|
from autogen_ext.experimental.task_centric_memory import MemoryController
|
|
from autogen_ext.experimental.task_centric_memory.utils import PageLogger
|
|
from utils import create_oai_client, load_yaml_file
|
|
|
|
|
|
"""
|
|
This code sample evaluates memory precision and recall, with no agent involved at all.
|
|
See the config file configs/retrieval.yaml for an overall view of the structure and settings in this sample,
|
|
as well as the data files used for the test.
|
|
|
|
Execute the sample with this command:
|
|
python eval_retrieval.py configs/retrieval.yaml
|
|
|
|
This sample shows how an app can access the `MemoryController` directly
|
|
to retrieve previously stored task-insight pairs as potentially useful examplars when solving some new task.
|
|
A task is any text instruction that the app may give to an agent.
|
|
An insight is any text (like a hint, advice, a demonstration or plan) that might help the agent perform such tasks.
|
|
"""
|
|
|
|
|
|
async def eval_retrieval(
|
|
memory_controller: MemoryController, client: ChatCompletionClient, logger: PageLogger, config: Dict[str, Any]
|
|
) -> str:
|
|
"""
|
|
Evaluates precision and recall of task-centric memory retrieval.
|
|
"""
|
|
logger.enter_function()
|
|
|
|
# Load the specified data.
|
|
task_files = config["tasks"]
|
|
task_list = [load_yaml_file(task)["task_description"] for task in task_files]
|
|
|
|
insight_files = config["insights"]
|
|
insight_list = [load_yaml_file(insight)["insight"] for insight in insight_files]
|
|
|
|
task_insight_relevance = config["task_insight_relevance"]
|
|
|
|
# Clear memory, then store the specified task-insight pairs.
|
|
memory_controller.reset_memory()
|
|
for ti, task in enumerate(task_list):
|
|
for ii, insight in enumerate(insight_list):
|
|
if task_insight_relevance[ti][ii] == 2:
|
|
await memory_controller.add_memo(task=task, insight=insight)
|
|
|
|
# Test memory retrieval.
|
|
num_retrieved = 0
|
|
num_relevant = 0
|
|
num_relevant_and_retrieved = 0
|
|
for ti, task in enumerate(task_list):
|
|
# Retrieve insights for this task.
|
|
memos = await memory_controller.retrieve_relevant_memos(task=task)
|
|
set_of_retrieved_insights = set(memo.insight for memo in memos)
|
|
|
|
# Gather the insights that are relevant to this task according to ground truth.
|
|
set_of_relevant_insights: Set[str] = set()
|
|
for ii, insight in enumerate(insight_list):
|
|
if task_insight_relevance[ti][ii] > 0:
|
|
set_of_relevant_insights.add(insight)
|
|
|
|
# Accumulate the counts.
|
|
num_retrieved += len(set_of_retrieved_insights)
|
|
num_relevant += len(set_of_relevant_insights)
|
|
num_relevant_and_retrieved += len(set_of_relevant_insights & set_of_retrieved_insights)
|
|
logger.info("\nNum retrieved: {}".format(num_retrieved))
|
|
logger.info("\nNum relevant: {}".format(num_relevant))
|
|
logger.info("\nNum relevant and retrieved: {}".format(num_relevant_and_retrieved))
|
|
|
|
# Compute precision and recall as percentages.
|
|
precision = num_relevant_and_retrieved / num_retrieved if num_retrieved > 0 else 0
|
|
recall = num_relevant_and_retrieved / num_relevant if num_relevant > 0 else 0
|
|
precision_str = "Precision: {:.3f}%".format(precision * 100)
|
|
recall_str = "Recall: {:.3f}%".format(recall * 100)
|
|
logger.info("\n" + precision_str)
|
|
logger.info("\n" + recall_str)
|
|
|
|
logger.leave_function()
|
|
return "\neval_retrieval\n" + precision_str + "\n" + recall_str
|
|
|
|
|
|
async def run_example(config_filepath: str) -> None:
|
|
"""
|
|
Runs the code example with the necessary components.
|
|
"""
|
|
config = load_yaml_file(config_filepath)
|
|
|
|
# Create the necessary components.
|
|
logger = PageLogger(config["PageLogger"])
|
|
client = create_oai_client(config["client"])
|
|
memory_controller = MemoryController(
|
|
reset=True,
|
|
client=client,
|
|
task_assignment_callback=None,
|
|
config=config["MemoryController"],
|
|
logger=logger,
|
|
)
|
|
|
|
# Call the example function.
|
|
results = await eval_retrieval(memory_controller, client, logger, config["test"])
|
|
|
|
# Finish up.
|
|
print(results)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = sys.argv[1:]
|
|
if len(args) != 1:
|
|
# Print usage information.
|
|
print("Usage: amt.py <path to *.yaml file>")
|
|
else:
|
|
# Run the code example.
|
|
asyncio.run(run_example(config_filepath=args[0]))
|