diff --git a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/memory.ipynb b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/memory.ipynb
index 1169889e0..616bd105f 100644
--- a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/memory.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/memory.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Memory \n",
+    "## Memory and RAG\n",
     "\n",
     "There are several use cases where it is valuable to maintain a _store_ of useful facts that can be intelligently added to the context of the agent just before a specific step. The typically use case here is a RAG pattern where a query is used to retrieve relevant information from a database that is then added to the agent's context.\n",
     "\n",
@@ -26,7 +26,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -72,36 +72,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "---------- user ----------\n",
-      "What is the weather in New York?\n",
-      "---------- assistant_agent ----------\n",
-      "[MemoryContent(content='The weather should be in metric units', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None), MemoryContent(content='Meal recipe must be vegan', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None)]\n",
-      "---------- assistant_agent ----------\n",
-      "[FunctionCall(id='call_GpimUGED5zUbfxORaGo2JD6F', arguments='{\"city\":\"New York\",\"units\":\"metric\"}', name='get_weather')]\n",
-      "---------- assistant_agent ----------\n",
-      "[FunctionExecutionResult(content='The weather in New York is 23 °C and Sunny.', call_id='call_GpimUGED5zUbfxORaGo2JD6F', is_error=False)]\n",
-      "---------- assistant_agent ----------\n",
-      "The weather in New York is 23 °C and Sunny.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "TaskResult(messages=[TextMessage(source='user', models_usage=None, metadata={}, content='What is the weather in New York?', type='TextMessage'), MemoryQueryEvent(source='assistant_agent', models_usage=None, metadata={}, content=[MemoryContent(content='The weather should be in metric units', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None), MemoryContent(content='Meal recipe must be vegan', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None)], type='MemoryQueryEvent'), ToolCallRequestEvent(source='assistant_agent', models_usage=RequestUsage(prompt_tokens=123, completion_tokens=20), metadata={}, content=[FunctionCall(id='call_GpimUGED5zUbfxORaGo2JD6F', arguments='{\"city\":\"New York\",\"units\":\"metric\"}', name='get_weather')], type='ToolCallRequestEvent'), ToolCallExecutionEvent(source='assistant_agent', models_usage=None, metadata={}, content=[FunctionExecutionResult(content='The weather in New York is 23 °C and Sunny.', call_id='call_GpimUGED5zUbfxORaGo2JD6F', is_error=False)], type='ToolCallExecutionEvent'), ToolCallSummaryMessage(source='assistant_agent', models_usage=None, metadata={}, content='The weather in New York is 23 °C and Sunny.', type='ToolCallSummaryMessage')], stop_reason=None)"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Run the agent with a task.\n",
     "stream = assistant_agent.run_stream(task=\"What is the weather in New York?\")\n",
@@ -117,23 +90,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[UserMessage(content='What is the weather in New York?', source='user', type='UserMessage'),\n",
-       " SystemMessage(content='\\nRelevant memory content (in chronological order):\\n1. The weather should be in metric units\\n2. Meal recipe must be vegan\\n', type='SystemMessage'),\n",
-       " AssistantMessage(content=[FunctionCall(id='call_GpimUGED5zUbfxORaGo2JD6F', arguments='{\"city\":\"New York\",\"units\":\"metric\"}', name='get_weather')], thought=None, source='assistant_agent', type='AssistantMessage'),\n",
-       " FunctionExecutionResultMessage(content=[FunctionExecutionResult(content='The weather in New York is 23 °C and Sunny.', call_id='call_GpimUGED5zUbfxORaGo2JD6F', is_error=False)], type='FunctionExecutionResultMessage')]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "await assistant_agent._model_context.get_messages()"
    ]
@@ -149,64 +108,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "---------- user ----------\n",
-      "Write brief meal recipe with broth\n",
-      "---------- assistant_agent ----------\n",
-      "[MemoryContent(content='The weather should be in metric units', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None), MemoryContent(content='Meal recipe must be vegan', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None)]\n",
-      "---------- assistant_agent ----------\n",
-      "Here's a brief vegan broth recipe:\n",
-      "\n",
-      "**Vegan Vegetable Broth**\n",
-      "\n",
-      "**Ingredients:**\n",
-      "- 2 tablespoons olive oil\n",
-      "- 1 large onion, chopped\n",
-      "- 2 carrots, sliced\n",
-      "- 2 celery stalks, sliced\n",
-      "- 4 cloves garlic, minced\n",
-      "- 1 teaspoon salt\n",
-      "- 1/2 teaspoon pepper\n",
-      "- 1 bay leaf\n",
-      "- 1 teaspoon thyme\n",
-      "- 1 teaspoon rosemary\n",
-      "- 8 cups water\n",
-      "- 1 cup mushrooms, sliced\n",
-      "- 1 cup chopped leafy greens (e.g., kale, spinach)\n",
-      "- 1 tablespoon soy sauce (optional)\n",
-      "\n",
-      "**Instructions:**\n",
-      "\n",
-      "1. **Sauté Vegetables:** In a large pot, heat olive oil over medium heat. Add the onion, carrots, and celery, and sauté until they begin to soften, about 5-7 minutes.\n",
-      "\n",
-      "2. **Add Garlic & Seasonings:** Stir in the garlic, salt, pepper, bay leaf, thyme, and rosemary. Cook for another 2 minutes until fragrant.\n",
-      "\n",
-      "3. **Simmer Broth:** Pour in the water, add mushrooms and soy sauce (if using). Increase heat and bring to a boil. Reduce heat and let it simmer for 30-45 minutes.\n",
-      "\n",
-      "4. **Add Greens:** In the last 5 minutes of cooking, add the chopped leafy greens.\n",
-      "\n",
-      "5. **Strain & Serve:** Remove from heat, strain out the vegetables (or leave them in for a chunkier texture), and adjust seasoning if needed. Serve hot as a base for soups or enjoy as is!\n",
-      "\n",
-      "Enjoy your flavorful, nourishing vegan broth! TERMINATE\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "TaskResult(messages=[TextMessage(source='user', models_usage=None, metadata={}, content='Write brief meal recipe with broth', type='TextMessage'), MemoryQueryEvent(source='assistant_agent', models_usage=None, metadata={}, content=[MemoryContent(content='The weather should be in metric units', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None), MemoryContent(content='Meal recipe must be vegan', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None)], type='MemoryQueryEvent'), TextMessage(source='assistant_agent', models_usage=RequestUsage(prompt_tokens=208, completion_tokens=331), metadata={}, content=\"Here's a brief vegan broth recipe:\\n\\n**Vegan Vegetable Broth**\\n\\n**Ingredients:**\\n- 2 tablespoons olive oil\\n- 1 large onion, chopped\\n- 2 carrots, sliced\\n- 2 celery stalks, sliced\\n- 4 cloves garlic, minced\\n- 1 teaspoon salt\\n- 1/2 teaspoon pepper\\n- 1 bay leaf\\n- 1 teaspoon thyme\\n- 1 teaspoon rosemary\\n- 8 cups water\\n- 1 cup mushrooms, sliced\\n- 1 cup chopped leafy greens (e.g., kale, spinach)\\n- 1 tablespoon soy sauce (optional)\\n\\n**Instructions:**\\n\\n1. **Sauté Vegetables:** In a large pot, heat olive oil over medium heat. Add the onion, carrots, and celery, and sauté until they begin to soften, about 5-7 minutes.\\n\\n2. **Add Garlic & Seasonings:** Stir in the garlic, salt, pepper, bay leaf, thyme, and rosemary. Cook for another 2 minutes until fragrant.\\n\\n3. **Simmer Broth:** Pour in the water, add mushrooms and soy sauce (if using). Increase heat and bring to a boil. Reduce heat and let it simmer for 30-45 minutes.\\n\\n4. **Add Greens:** In the last 5 minutes of cooking, add the chopped leafy greens.\\n\\n5. **Strain & Serve:** Remove from heat, strain out the vegetables (or leave them in for a chunkier texture), and adjust seasoning if needed. Serve hot as a base for soups or enjoy as is!\\n\\nEnjoy your flavorful, nourishing vegan broth! TERMINATE\", type='TextMessage')], stop_reason=None)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "stream = assistant_agent.run_stream(task=\"Write brief meal recipe with broth\")\n",
     "await Console(stream)"
@@ -232,24 +136,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "---------- user ----------\n",
-      "What is the weather in New York?\n",
-      "---------- assistant_agent ----------\n",
-      "[MemoryContent(content='The weather should be in metric units', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None), MemoryContent(content='Meal recipe must be vegan', mime_type=<MemoryMimeType.TEXT: 'text/plain'>, metadata=None)]\n",
-      "---------- assistant_agent ----------\n",
-      "[FunctionCall(id='call_PKcfeEHXimGG2QOhJwXzCLuZ', arguments='{\"city\":\"New York\",\"units\":\"metric\"}', name='get_weather')]\n",
-      "---------- assistant_agent ----------\n",
-      "[FunctionExecutionResult(content='The weather in New York is 23 °C and Sunny.', call_id='call_PKcfeEHXimGG2QOhJwXzCLuZ', is_error=False)]\n",
-      "---------- assistant_agent ----------\n",
-      "The weather in New York is 23 °C and Sunny.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import os\n",
     "from pathlib import Path\n",
@@ -314,29 +201,218 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'{\"provider\":\"autogen_ext.memory.chromadb.ChromaDBVectorMemory\",\"component_type\":\"memory\",\"version\":1,\"component_version\":1,\"description\":\"ChromaDB-based vector memory implementation with similarity search.\",\"label\":\"ChromaDBVectorMemory\",\"config\":{\"client_type\":\"persistent\",\"collection_name\":\"preferences\",\"distance_metric\":\"cosine\",\"k\":2,\"score_threshold\":0.4,\"allow_reset\":false,\"tenant\":\"default_tenant\",\"database\":\"default_database\",\"persistence_path\":\"/Users/victordibia/.chromadb_autogen\"}}'"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "chroma_user_memory.dump_component().model_dump_json()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## RAG Agent: Putting It All Together\n",
+    "\n",
+    "The RAG (Retrieval Augmented Generation) pattern which is common in building AI systems encompasses two distinct phases:\n",
+    "\n",
+    "1. **Indexing**: Loading documents, chunking them, and storing them in a vector database\n",
+    "2. **Retrieval**: Finding and using relevant chunks during conversation runtime\n",
+    "\n",
+    "In our previous examples, we manually added items to memory and passed them to our agents. In practice, the indexing process is usually automated and based on much larger document sources like product documentation, internal files, or knowledge bases.\n",
+    "\n",
+    "> Note: The quality of a RAG system is dependent on the quality of the chunking and retrieval process (models, embeddings, etc.). You may need to experiement with more advanced chunking and retrieval models to get the best results.\n",
+    "\n",
+    "### Building a Simple RAG Agent\n",
+    "\n",
+    "To begin, let's create a simple document indexer that we will used to load documents, chunk them, and store them in a `ChromaDBVectorMemory` memory store. "
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import re\n",
+    "from typing import List\n",
+    "\n",
+    "import aiofiles\n",
+    "import aiohttp\n",
+    "from autogen_core.memory import Memory, MemoryContent, MemoryMimeType\n",
+    "\n",
+    "\n",
+    "class SimpleDocumentIndexer:\n",
+    "    \"\"\"Basic document indexer for AutoGen Memory.\"\"\"\n",
+    "\n",
+    "    def __init__(self, memory: Memory, chunk_size: int = 1500) -> None:\n",
+    "        self.memory = memory\n",
+    "        self.chunk_size = chunk_size\n",
+    "\n",
+    "    async def _fetch_content(self, source: str) -> str:\n",
+    "        \"\"\"Fetch content from URL or file.\"\"\"\n",
+    "        if source.startswith((\"http://\", \"https://\")):\n",
+    "            async with aiohttp.ClientSession() as session:\n",
+    "                async with session.get(source) as response:\n",
+    "                    return await response.text()\n",
+    "        else:\n",
+    "            async with aiofiles.open(source, \"r\", encoding=\"utf-8\") as f:\n",
+    "                return await f.read()\n",
+    "\n",
+    "    def _strip_html(self, text: str) -> str:\n",
+    "        \"\"\"Remove HTML tags and normalize whitespace.\"\"\"\n",
+    "        text = re.sub(r\"<[^>]*>\", \" \", text)\n",
+    "        text = re.sub(r\"\\s+\", \" \", text)\n",
+    "        return text.strip()\n",
+    "\n",
+    "    def _split_text(self, text: str) -> List[str]:\n",
+    "        \"\"\"Split text into fixed-size chunks.\"\"\"\n",
+    "        chunks: list[str] = []\n",
+    "        # Just split text into fixed-size chunks\n",
+    "        for i in range(0, len(text), self.chunk_size):\n",
+    "            chunk = text[i : i + self.chunk_size]\n",
+    "            chunks.append(chunk.strip())\n",
+    "        return chunks\n",
+    "\n",
+    "    async def index_documents(self, sources: List[str]) -> int:\n",
+    "        \"\"\"Index documents into memory.\"\"\"\n",
+    "        total_chunks = 0\n",
+    "\n",
+    "        for source in sources:\n",
+    "            try:\n",
+    "                content = await self._fetch_content(source)\n",
+    "\n",
+    "                # Strip HTML if content appears to be HTML\n",
+    "                if \"<\" in content and \">\" in content:\n",
+    "                    content = self._strip_html(content)\n",
+    "\n",
+    "                chunks = self._split_text(content)\n",
+    "\n",
+    "                for i, chunk in enumerate(chunks):\n",
+    "                    await self.memory.add(\n",
+    "                        MemoryContent(\n",
+    "                            content=chunk, mime_type=MemoryMimeType.TEXT, metadata={\"source\": source, \"chunk_index\": i}\n",
+    "                        )\n",
+    "                    )\n",
+    "\n",
+    "                total_chunks += len(chunks)\n",
+    "\n",
+    "            except Exception as e:\n",
+    "                print(f\"Error indexing {source}: {str(e)}\")\n",
+    "\n",
+    "        return total_chunks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    " \n",
+    "Now let's use our indexer with ChromaDBVectorMemory to build a complete RAG agent:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Indexed 72 chunks from 4 AutoGen documents\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "from autogen_agentchat.agents import AssistantAgent\n",
+    "from autogen_agentchat.ui import Console\n",
+    "from autogen_ext.memory.chromadb import ChromaDBVectorMemory, PersistentChromaDBVectorMemoryConfig\n",
+    "from autogen_ext.models.openai import OpenAIChatCompletionClient\n",
+    "\n",
+    "# Initialize vector memory\n",
+    "\n",
+    "rag_memory = ChromaDBVectorMemory(\n",
+    "    config=PersistentChromaDBVectorMemoryConfig(\n",
+    "        collection_name=\"autogen_docs\",\n",
+    "        persistence_path=os.path.join(str(Path.home()), \".chromadb_autogen\"),\n",
+    "        k=3,  # Return top 3 results\n",
+    "        score_threshold=0.4,  # Minimum similarity score\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "await rag_memory.clear()  # Clear existing memory\n",
+    "\n",
+    "\n",
+    "# Index AutoGen documentation\n",
+    "async def index_autogen_docs() -> None:\n",
+    "    indexer = SimpleDocumentIndexer(memory=rag_memory)\n",
+    "    sources = [\n",
+    "        \"https://raw.githubusercontent.com/microsoft/autogen/main/README.md\",\n",
+    "        \"https://microsoft.github.io/autogen/dev/user-guide/agentchat-user-guide/tutorial/agents.html\",\n",
+    "        \"https://microsoft.github.io/autogen/dev/user-guide/agentchat-user-guide/tutorial/teams.html\",\n",
+    "        \"https://microsoft.github.io/autogen/dev/user-guide/agentchat-user-guide/tutorial/termination.html\",\n",
+    "    ]\n",
+    "    chunks: int = await indexer.index_documents(sources)\n",
+    "    print(f\"Indexed {chunks} chunks from {len(sources)} AutoGen documents\")\n",
+    "\n",
+    "\n",
+    "await index_autogen_docs()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "---------- user ----------\n",
+      "What is AgentChat?\n",
+      "Query results: results=[MemoryContent(content='ng OpenAI\\'s GPT-4o model. See [other supported models](https://microsoft.github.io/autogen/stable/user-guide/agentchat-user-guide/tutorial/models.html). ```python import asyncio from autogen_agentchat.agents import AssistantAgent from autogen_ext.models.openai import OpenAIChatCompletionClient async def main() -> None: model_client = OpenAIChatCompletionClient(model=\"gpt-4o\") agent = AssistantAgent(\"assistant\", model_client=model_client) print(await agent.run(task=\"Say \\'Hello World!\\'\")) await model_client.close() asyncio.run(main()) ``` ### Web Browsing Agent Team Create a group chat team with a web surfer agent and a user proxy agent for web browsing tasks. You need to install [playwright](https://playwright.dev/python/docs/library). ```python # pip install -U autogen-agentchat autogen-ext[openai,web-surfer] # playwright install import asyncio from autogen_agentchat.agents import UserProxyAgent from autogen_agentchat.conditions import TextMentionTermination from autogen_agentchat.teams import RoundRobinGroupChat from autogen_agentchat.ui import Console from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_ext.agents.web_surfer import MultimodalWebSurfer async def main() -> None: model_client = OpenAIChatCompletionClient(model=\"gpt-4o\") # The web surfer will open a Chromium browser window to perform web browsing tasks. web_surfer = MultimodalWebSurfer(\"web_surfer\", model_client, headless=False, animate_actions=True) # The user proxy agent is used to ge', mime_type='MemoryMimeType.TEXT', metadata={'chunk_index': 1, 'mime_type': 'MemoryMimeType.TEXT', 'source': 'https://raw.githubusercontent.com/microsoft/autogen/main/README.md', 'score': 0.48810458183288574, 'id': '16088e03-0153-4da3-9dec-643b39c549f5'}), MemoryContent(content='els_usage=None content=&#39;AutoGen is a programming framework for building multi-agent applications.&#39; type=&#39;ToolCallSummaryMessage&#39; The call to the on_messages() method returns a Response that contains the agent’s final response in the chat_message attribute, as well as a list of inner messages in the inner_messages attribute, which stores the agent’s “thought process” that led to the final response. Note It is important to note that on_messages() will update the internal state of the agent – it will add the messages to the agent’s history. So you should call this method with new messages. You should not repeatedly call this method with the same messages or the complete history. Note Unlike in v0.2 AgentChat, the tools are executed by the same agent directly within the same call to on_messages() . By default, the agent will return the result of the tool call as the final response. You can also call the run() method, which is a convenience method that calls on_messages() . It follows the same interface as Teams and returns a TaskResult object. Multi-Modal Input # The AssistantAgent can handle multi-modal input by providing the input as a MultiModalMessage . from io import BytesIO import PIL import requests from autogen_agentchat.messages import MultiModalMessage from autogen_core import Image # Create a multi-modal message with random image and text. pil_image = PIL . Image . open ( BytesIO ( requests . get ( &quot;https://picsum.photos/300/200&quot; ) . content )', mime_type='MemoryMimeType.TEXT', metadata={'chunk_index': 3, 'mime_type': 'MemoryMimeType.TEXT', 'source': 'https://microsoft.github.io/autogen/dev/user-guide/agentchat-user-guide/tutorial/agents.html', 'score': 0.4665141701698303, 'id': '3d603b62-7cab-4f74-b671-586fe36306f2'}), MemoryContent(content='AgentChat Termination Termination # In the previous section, we explored how to define agents, and organize them into teams that can solve tasks. However, a run can go on forever, and in many cases, we need to know when to stop them. This is the role of the termination condition. AgentChat supports several termination condition by providing a base TerminationCondition class and several implementations that inherit from it. A termination condition is a callable that takes a sequence of AgentEvent or ChatMessage objects since the last time the condition was called , and returns a StopMessage if the conversation should be terminated, or None otherwise. Once a termination condition has been reached, it must be reset by calling reset() before it can be used again. Some important things to note about termination conditions: They are stateful but reset automatically after each run ( run() or run_stream() ) is finished. They can be combined using the AND and OR operators. Note For group chat teams (i.e., RoundRobinGroupChat , SelectorGroupChat , and Swarm ), the termination condition is called after each agent responds. While a response may contain multiple inner messages, the team calls its termination condition just once for all the messages from a single response. So the condition is called with the “delta sequence” of messages since the last time it was called. Built-In Termination Conditions: MaxMessageTermination : Stops after a specified number of messages have been produced,', mime_type='MemoryMimeType.TEXT', metadata={'chunk_index': 1, 'mime_type': 'MemoryMimeType.TEXT', 'source': 'https://microsoft.github.io/autogen/dev/user-guide/agentchat-user-guide/tutorial/termination.html', 'score': 0.461774212772051, 'id': '699ef490-d108-4cd3-b629-c1198d6b78ba'})]\n",
+      "---------- rag_assistant ----------\n",
+      "[MemoryContent(content='ng OpenAI\\'s GPT-4o model. See [other supported models](https://microsoft.github.io/autogen/stable/user-guide/agentchat-user-guide/tutorial/models.html). ```python import asyncio from autogen_agentchat.agents import AssistantAgent from autogen_ext.models.openai import OpenAIChatCompletionClient async def main() -> None: model_client = OpenAIChatCompletionClient(model=\"gpt-4o\") agent = AssistantAgent(\"assistant\", model_client=model_client) print(await agent.run(task=\"Say \\'Hello World!\\'\")) await model_client.close() asyncio.run(main()) ``` ### Web Browsing Agent Team Create a group chat team with a web surfer agent and a user proxy agent for web browsing tasks. You need to install [playwright](https://playwright.dev/python/docs/library). ```python # pip install -U autogen-agentchat autogen-ext[openai,web-surfer] # playwright install import asyncio from autogen_agentchat.agents import UserProxyAgent from autogen_agentchat.conditions import TextMentionTermination from autogen_agentchat.teams import RoundRobinGroupChat from autogen_agentchat.ui import Console from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_ext.agents.web_surfer import MultimodalWebSurfer async def main() -> None: model_client = OpenAIChatCompletionClient(model=\"gpt-4o\") # The web surfer will open a Chromium browser window to perform web browsing tasks. web_surfer = MultimodalWebSurfer(\"web_surfer\", model_client, headless=False, animate_actions=True) # The user proxy agent is used to ge', mime_type='MemoryMimeType.TEXT', metadata={'chunk_index': 1, 'mime_type': 'MemoryMimeType.TEXT', 'source': 'https://raw.githubusercontent.com/microsoft/autogen/main/README.md', 'score': 0.48810458183288574, 'id': '16088e03-0153-4da3-9dec-643b39c549f5'}), MemoryContent(content='els_usage=None content=&#39;AutoGen is a programming framework for building multi-agent applications.&#39; type=&#39;ToolCallSummaryMessage&#39; The call to the on_messages() method returns a Response that contains the agent’s final response in the chat_message attribute, as well as a list of inner messages in the inner_messages attribute, which stores the agent’s “thought process” that led to the final response. Note It is important to note that on_messages() will update the internal state of the agent – it will add the messages to the agent’s history. So you should call this method with new messages. You should not repeatedly call this method with the same messages or the complete history. Note Unlike in v0.2 AgentChat, the tools are executed by the same agent directly within the same call to on_messages() . By default, the agent will return the result of the tool call as the final response. You can also call the run() method, which is a convenience method that calls on_messages() . It follows the same interface as Teams and returns a TaskResult object. Multi-Modal Input # The AssistantAgent can handle multi-modal input by providing the input as a MultiModalMessage . from io import BytesIO import PIL import requests from autogen_agentchat.messages import MultiModalMessage from autogen_core import Image # Create a multi-modal message with random image and text. pil_image = PIL . Image . open ( BytesIO ( requests . get ( &quot;https://picsum.photos/300/200&quot; ) . content )', mime_type='MemoryMimeType.TEXT', metadata={'chunk_index': 3, 'mime_type': 'MemoryMimeType.TEXT', 'source': 'https://microsoft.github.io/autogen/dev/user-guide/agentchat-user-guide/tutorial/agents.html', 'score': 0.4665141701698303, 'id': '3d603b62-7cab-4f74-b671-586fe36306f2'}), MemoryContent(content='AgentChat Termination Termination # In the previous section, we explored how to define agents, and organize them into teams that can solve tasks. However, a run can go on forever, and in many cases, we need to know when to stop them. This is the role of the termination condition. AgentChat supports several termination condition by providing a base TerminationCondition class and several implementations that inherit from it. A termination condition is a callable that takes a sequence of AgentEvent or ChatMessage objects since the last time the condition was called , and returns a StopMessage if the conversation should be terminated, or None otherwise. Once a termination condition has been reached, it must be reset by calling reset() before it can be used again. Some important things to note about termination conditions: They are stateful but reset automatically after each run ( run() or run_stream() ) is finished. They can be combined using the AND and OR operators. Note For group chat teams (i.e., RoundRobinGroupChat , SelectorGroupChat , and Swarm ), the termination condition is called after each agent responds. While a response may contain multiple inner messages, the team calls its termination condition just once for all the messages from a single response. So the condition is called with the “delta sequence” of messages since the last time it was called. Built-In Termination Conditions: MaxMessageTermination : Stops after a specified number of messages have been produced,', mime_type='MemoryMimeType.TEXT', metadata={'chunk_index': 1, 'mime_type': 'MemoryMimeType.TEXT', 'source': 'https://microsoft.github.io/autogen/dev/user-guide/agentchat-user-guide/tutorial/termination.html', 'score': 0.461774212772051, 'id': '699ef490-d108-4cd3-b629-c1198d6b78ba'})]\n",
+      "---------- rag_assistant ----------\n",
+      "AgentChat is part of the AutoGen framework, a programming environment for building multi-agent applications. In AgentChat, agents can interact with each other and with users to perform various tasks, including web browsing and engaging in dialogue. It utilizes models from OpenAI for chat completions and supports multi-modal input, which means agents can handle inputs that include both text and images. Additionally, AgentChat provides mechanisms to define termination conditions to control when a conversation or task should be concluded, ensuring that the agent interactions are efficient and goal-oriented. TERMINATE\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create our RAG assistant agent\n",
+    "rag_assistant = AssistantAgent(\n",
+    "    name=\"rag_assistant\", model_client=OpenAIChatCompletionClient(model=\"gpt-4o\"), memory=[rag_memory]\n",
+    ")\n",
+    "\n",
+    "# Ask questions about AutoGen\n",
+    "stream = rag_assistant.run_stream(task=\"What is AgentChat?\")\n",
+    "await Console(stream)\n",
+    "\n",
+    "# Remember to close the memory when done\n",
+    "await rag_memory.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This implementation provides a RAG agent that can answer questions based on AutoGen documentation. When a question is asked, the Memory system  retrieves relevant chunks and adds them to the context, enabling the assistant to generate informed responses.\n",
+    "\n",
+    "For production systems, you might want to:\n",
+    "1. Implement more sophisticated chunking strategies\n",
+    "2. Add metadata filtering capabilities\n",
+    "3. Customize the retrieval scoring\n",
+    "4. Optimize embedding models for your specific domain\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
    "source": []
   }
  ],
diff --git a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/migration-guide.md b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/migration-guide.md
index 25b0fef24..982e2151f 100644
--- a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/migration-guide.md
+++ b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/migration-guide.md
@@ -45,6 +45,7 @@ See each feature below for detailed information on how to migrate.
   - [Assistant Agent](#assistant-agent)
   - [Multi-Modal Agent](#multi-modal-agent)
   - [User Proxy](#user-proxy)
+  - [RAG Agent](#rag-agent)
   - [Conversable Agent and Register Reply](#conversable-agent-and-register-reply)
   - [Save and Load Agent State](#save-and-load-agent-state)
   - [Two-Agent Chat](#two-agent-chat)
@@ -361,6 +362,54 @@ user_proxy = UserProxyAgent("user_proxy")
 See {py:class}`~autogen_agentchat.agents.UserProxyAgent`
 for more details and how to customize the input function with timeout.
 
+## RAG Agent
+
+In `v0.2`, there was the concept of teachable agents as well as a RAG agents that could take a database config.
+
+```python
+teachable_agent = ConversableAgent(
+    name="teachable_agent",
+    llm_config=llm_config
+)
+
+# Instantiate a Teachability object. Its parameters are all optional.
+teachability = Teachability(
+    reset_db=False,
+    path_to_db_dir="./tmp/interactive/teachability_db"
+)
+
+teachability.add_to_agent(teachable_agent)
+```
+
+In `v0.4`, you can implement a RAG agent using the {py:class}`~autogen_core.memory.Memory` class. Specifically, you can define a memory store class, and pass that as a parameter to the assistant agent. See the [Memory](memory.ipynb) tutorial for more details.
+
+This clear separation of concerns allows you to implement a memory store that uses any database or storage system you want (you have to inherit from the `Memory` class) and use it with an assistant agent. The example below shows how to use a ChromaDB vector memory store with the assistant agent. In addition, your application logic should determine how and when to add content to the memory store. For example, you may choose to call `memory.add` for every response from the assistant agent or use a separate LLM call to determine if the content should be added to the memory store.
+
+```python
+
+# ...
+# example of a ChromaDBVectorMemory class
+chroma_user_memory = ChromaDBVectorMemory(
+    config=PersistentChromaDBVectorMemoryConfig(
+        collection_name="preferences",
+        persistence_path=os.path.join(str(Path.home()), ".chromadb_autogen"),
+        k=2,  # Return top  k results
+        score_threshold=0.4,  # Minimum similarity score
+    )
+)
+
+# you can add logic such as a document indexer that adds content to the memory store
+
+assistant_agent = AssistantAgent(
+    name="assistant_agent",
+    model_client=OpenAIChatCompletionClient(
+        model="gpt-4o",
+    ),
+    tools=[get_weather],
+    memory=[chroma_user_memory],
+)
+```
+
 ## Conversable Agent and Register Reply
 
 In `v0.2`, you can create a conversable agent and register a reply function as follows: