migrate models (#3848)

* migrate models * Update python/packages/autogen-agentchat/src/autogen_agentchat/agents/_tool_use_assistant_agent.py Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com> * refactor missing imports * ignore type check errors * Update python/packages/autogen-ext/src/autogen_ext/models/_openai/_model_info.py Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com> * update packages index page --------- Co-authored-by: Leonardo Pinheiro <lpinheiro@microsoft.com> Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
2025-12-05 03:18:57 +00:00 · 2024-10-23 01:40:41 +10:00 · 2024-10-23 01:40:41 +10:00 · 38f62e1609
commit 38f62e1609
parent b7509b3659
33 changed files with 1141 additions and 55 deletions
--- a/README.md
+++ b/README.md
@ -114,7 +114,7 @@ from autogen_agentchat.agents import CodeExecutorAgent, CodingAssistantAgent
 from autogen_agentchat.logging import ConsoleLogHandler
 from autogen_agentchat.teams import RoundRobinGroupChat, StopMessageTermination
 from autogen_ext.code_executor.docker_executor import DockerCommandLineCodeExecutor
-from autogen_core.components.models import OpenAIChatCompletionClient
+from autogen_ext.models import OpenAIChatCompletionClient

 logger = logging.getLogger(EVENT_LOGGER_NAME)
 logger.addHandler(ConsoleLogHandler())
--- a/python/packages/agbench/benchmarks/AssistantBench/Templates/MagenticOne/scenario.py
+++ b/python/packages/agbench/benchmarks/AssistantBench/Templates/MagenticOne/scenario.py
@ -12,9 +12,7 @@ from autogen_core.base import AgentId, AgentProxy, TopicId
 from autogen_core.application import SingleThreadedAgentRuntime
 from autogen_core.application.logging import EVENT_LOGGER_NAME
 from autogen_core.components.models import (
-    AzureOpenAIChatCompletionClient,
    ChatCompletionClient,
-    ModelCapabilities,
    UserMessage,
    LLMMessage,
 )
--- a/python/packages/agbench/benchmarks/GAIA/Templates/MagenticOne/scenario.py
+++ b/python/packages/agbench/benchmarks/GAIA/Templates/MagenticOne/scenario.py
@ -12,7 +12,6 @@ from autogen_core.base import AgentId, AgentProxy, TopicId
 from autogen_core.application import SingleThreadedAgentRuntime
 from autogen_core.application.logging import EVENT_LOGGER_NAME
 from autogen_core.components.models import (
-    AzureOpenAIChatCompletionClient,
    ChatCompletionClient,
    ModelCapabilities,
    UserMessage,
--- a/python/packages/agbench/benchmarks/HumanEval/Templates/MagenticOne/scenario.py
+++ b/python/packages/agbench/benchmarks/HumanEval/Templates/MagenticOne/scenario.py
@ -7,8 +7,6 @@ from autogen_core.application.logging import EVENT_LOGGER_NAME
 from autogen_core.components import DefaultSubscription, DefaultTopicId
 from autogen_core.components.code_executor import LocalCommandLineCodeExecutor
 from autogen_core.components.models import (
-    AzureOpenAIChatCompletionClient,
-    ModelCapabilities,
    UserMessage,
 )

--- a/python/packages/agbench/benchmarks/WebArena/Templates/MagenticOne/scenario.py
+++ b/python/packages/agbench/benchmarks/WebArena/Templates/MagenticOne/scenario.py
@ -13,9 +13,7 @@ from autogen_core.application.logging import EVENT_LOGGER_NAME
 from autogen_core.components import DefaultSubscription, DefaultTopicId
 from autogen_core.components.code_executor import LocalCommandLineCodeExecutor
 from autogen_core.components.models import (
-    AzureOpenAIChatCompletionClient,
    ChatCompletionClient,
-    ModelCapabilities,
    UserMessage,
    SystemMessage,
    LLMMessage,
--- a/python/packages/autogen-agentchat/tests/test_group_chat.py
+++ b/python/packages/autogen-agentchat/tests/test_group_chat.py
@ -24,8 +24,9 @@ from autogen_agentchat.teams import (
 from autogen_core.base import CancellationToken
 from autogen_core.components import FunctionCall
 from autogen_core.components.code_executor import LocalCommandLineCodeExecutor
-from autogen_core.components.models import FunctionExecutionResult, OpenAIChatCompletionClient
+from autogen_core.components.models import FunctionExecutionResult
 from autogen_core.components.tools import FunctionTool
+from autogen_ext.models import OpenAIChatCompletionClient
 from openai.resources.chat.completions import AsyncCompletions
 from openai.types.chat.chat_completion import ChatCompletion, Choice
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
--- a/python/packages/autogen-core/docs/src/packages/index.md
+++ b/python/packages/autogen-core/docs/src/packages/index.md
@ -65,9 +65,10 @@ pip install autogen-ext==0.4.0dev1

 Extras:

- `langchain-tools` needed for {py:class}`~autogen_ext.tools.LangChainToolAdapter`
- `azure-code-executor` needed for {py:class}`~autogen_ext.code_executors.ACADynamicSessionsCodeExecutor`
- `docker-code-executor` needed for {py:class}`~autogen_ext.code_executors.DockerCommandLineCodeExecutor`
+- `langchain` needed for {py:class}`~autogen_ext.tools.LangChainToolAdapter`
+- `azure` needed for {py:class}`~autogen_ext.code_executors.ACADynamicSessionsCodeExecutor`
+- `docker` needed for {py:class}`~autogen_ext.code_executors.DockerCommandLineCodeExecutor`
+- `openai` needed for {py:class}`~autogen_ext.models.OpenAIChatCompletionClient`

 [{fas}`circle-info;pst-color-primary` User Guide](/user-guide/extensions-user-guide/index.md) | [{fas}`file-code;pst-color-primary` API Reference](/reference/python/autogen_ext/autogen_ext.rst) | [{fab}`python;pst-color-primary` PyPI](https://pypi.org/project/autogen-ext/0.4.0.dev1/) | [{fab}`github;pst-color-primary` Source](https://github.com/microsoft/autogen/tree/main/python/packages/autogen-ext)
 :::
--- a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/examples/company-research.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/examples/company-research.ipynb
@ -24,8 +24,8 @@
   "source": [
    "from autogen_agentchat.agents import CodingAssistantAgent, ToolUseAssistantAgent\n",
    "from autogen_agentchat.teams import RoundRobinGroupChat, StopMessageTermination\n",
-    "from autogen_core.components.models import OpenAIChatCompletionClient\n",
-    "from autogen_core.components.tools import FunctionTool"
+    "from autogen_core.components.tools import FunctionTool\n",
+    "from autogen_ext.models import OpenAIChatCompletionClient"
   ]
  },
  {
--- a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/examples/literature-review.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/examples/literature-review.ipynb
@ -24,8 +24,8 @@
   "source": [
    "from autogen_agentchat.agents import CodingAssistantAgent, ToolUseAssistantAgent\n",
    "from autogen_agentchat.teams import RoundRobinGroupChat, StopMessageTermination\n",
-    "from autogen_core.components.models import OpenAIChatCompletionClient\n",
-    "from autogen_core.components.tools import FunctionTool"
+    "from autogen_core.components.tools import FunctionTool\n",
+    "from autogen_ext.models import OpenAIChatCompletionClient"
   ]
  },
  {
--- a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/examples/travel-planning.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/examples/travel-planning.ipynb
@ -19,7 +19,7 @@
   "source": [
    "from autogen_agentchat.agents import CodingAssistantAgent\n",
    "from autogen_agentchat.teams import RoundRobinGroupChat, StopMessageTermination\n",
-    "from autogen_core.components.models import OpenAIChatCompletionClient"
+    "from autogen_ext.models import OpenAIChatCompletionClient"
   ]
  },
  {
--- a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/tutorial/selector-group-chat.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/tutorial/selector-group-chat.ipynb
@ -50,8 +50,8 @@
    ")\n",
    "from autogen_agentchat.teams import SelectorGroupChat, StopMessageTermination\n",
    "from autogen_core.base import CancellationToken\n",
-    "from autogen_core.components.models import OpenAIChatCompletionClient\n",
-    "from autogen_core.components.tools import FunctionTool"
+    "from autogen_core.components.tools import FunctionTool\n",
+    "from autogen_ext.models import OpenAIChatCompletionClient"
   ]
  },
  {
--- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/cookbook/azure-openai-with-aad-auth.md
+++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/cookbook/azure-openai-with-aad-auth.md
@ -15,7 +15,7 @@ pip install azure-identity
 ## Using the Model Client

 ```python
-from autogen_core.components.models import AzureOpenAIChatCompletionClient
+from autogen_ext.models import AzureOpenAIChatCompletionClient
 from azure.identity import DefaultAzureCredential, get_bearer_token_provider

 # Create the token provider
--- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/cookbook/structured-output-agent.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/cookbook/structured-output-agent.ipynb
@ -65,7 +65,8 @@
    "import os\n",
    "from typing import Optional\n",
    "\n",
-    "from autogen_core.components.models import AzureOpenAIChatCompletionClient, UserMessage\n",
+    "from autogen_core.components.models import UserMessage\n",
+    "from autogen_ext.models import AzureOpenAIChatCompletionClient\n",
    "\n",
    "\n",
    "# Function to get environment variable and ensure it is not None\n",
--- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/cookbook/tool-use-with-intervention.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/cookbook/tool-use-with-intervention.ipynb
@ -26,13 +26,13 @@
    "from autogen_core.components.models import (\n",
    "    ChatCompletionClient,\n",
    "    LLMMessage,\n",
-    "    OpenAIChatCompletionClient,\n",
    "    SystemMessage,\n",
    "    UserMessage,\n",
    ")\n",
    "from autogen_core.components.tool_agent import ToolAgent, ToolException, tool_agent_caller_loop\n",
    "from autogen_core.components.tools import PythonCodeExecutionTool, ToolSchema\n",
-    "from autogen_ext.code_executors import DockerCommandLineCodeExecutor"
+    "from autogen_ext.code_executors import DockerCommandLineCodeExecutor\n",
+    "from autogen_ext.models import OpenAIChatCompletionClient"
   ]
  },
  {
--- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/design-patterns/group-chat.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/design-patterns/group-chat.ipynb
@ -86,11 +86,11 @@
    "    AssistantMessage,\n",
    "    ChatCompletionClient,\n",
    "    LLMMessage,\n",
-    "    OpenAIChatCompletionClient,\n",
    "    SystemMessage,\n",
    "    UserMessage,\n",
    ")\n",
    "from autogen_core.components.tools import FunctionTool\n",
+    "from autogen_ext.models import OpenAIChatCompletionClient\n",
    "from IPython.display import display  # type: ignore\n",
    "from pydantic import BaseModel\n",
    "from rich.console import Console\n",
--- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/design-patterns/handoffs.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/design-patterns/handoffs.ipynb
@ -65,11 +65,11 @@
    "    FunctionExecutionResult,\n",
    "    FunctionExecutionResultMessage,\n",
    "    LLMMessage,\n",
-    "    OpenAIChatCompletionClient,\n",
    "    SystemMessage,\n",
    "    UserMessage,\n",
    ")\n",
    "from autogen_core.components.tools import FunctionTool, Tool\n",
+    "from autogen_ext.models import OpenAIChatCompletionClient\n",
    "from pydantic import BaseModel"
   ]
  },
@ -459,7 +459,7 @@
    "We have defined the AI agents, the Human Agent, the User Agent, the tools, and the topic types.\n",
    "Now we can create the team of agents.\n",
    "\n",
-    "For the AI agents, we use the {py:class}`~autogen_core.components.models.OpenAIChatCompletionClient`\n",
+    "For the AI agents, we use the {py:class}~autogen_ext.models.OpenAIChatCompletionClient`\n",
    "and `gpt-4o-mini` model.\n",
    "\n",
    "After creating the agent runtime, we register each of the agent by providing\n",
--- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/design-patterns/reflection.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/design-patterns/reflection.ipynb
@ -444,7 +444,7 @@
   "source": [
    "from autogen_core.application import SingleThreadedAgentRuntime\n",
    "from autogen_core.components import DefaultTopicId\n",
-    "from autogen_core.components.models import OpenAIChatCompletionClient\n",
+    "from autogen_ext.models import OpenAIChatCompletionClient\n",
    "\n",
    "runtime = SingleThreadedAgentRuntime()\n",
    "await ReviewerAgent.register(\n",
--- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb
@ -18,11 +18,11 @@
    "## Built-in Model Clients\n",
    "\n",
    "Currently there are two built-in model clients:\n",
-    "{py:class}`~autogen_core.components.models.OpenAIChatCompletionClient` and\n",
-    "{py:class}`~autogen_core.components.models.AzureOpenAIChatCompletionClient`.\n",
+    "{py:class}~autogen_ext.models.OpenAIChatCompletionClient` and\n",
+    "{py:class}`~autogen_ext.models.AzureOpenAIChatCompletionClient`.\n",
    "Both clients are asynchronous.\n",
    "\n",
-    "To use the {py:class}`~autogen_core.components.models.OpenAIChatCompletionClient`, you need to provide the API key\n",
+    "To use the {py:class}~autogen_ext.models.OpenAIChatCompletionClient`, you need to provide the API key\n",
    "either through the environment variable `OPENAI_API_KEY` or through the `api_key` argument."
   ]
  },
@ -32,7 +32,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from autogen_core.components.models import OpenAIChatCompletionClient, UserMessage\n",
+    "from autogen_ext.models import OpenAIChatCompletionClient, UserMessage\n",
    "\n",
    "# Create an OpenAI model client.\n",
    "model_client = OpenAIChatCompletionClient(\n",
@ -45,7 +45,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "You can call the {py:meth}`~autogen_core.components.models.OpenAIChatCompletionClient.create` method to create a\n",
+    "You can call the {py:meth}~autogen_ext.models.OpenAIChatCompletionClient.create` method to create a\n",
    "chat completion request, and await for an {py:class}`~autogen_core.components.models.CreateResult` object in return."
   ]
  },
@ -79,7 +79,7 @@
   "source": [
    "### Streaming Response\n",
    "\n",
-    "You can use the {py:meth}`~autogen_core.components.models.OpenAIChatCompletionClient.create_streaming` method to create a\n",
+    "You can use the {py:meth}~autogen_ext.models.OpenAIChatCompletionClient.create_streaming` method to create a\n",
    "chat completion request with streaming response."
   ]
  },
@ -151,7 +151,7 @@
   "source": [
    "### Azure OpenAI\n",
    "\n",
-    "To use the {py:class}`~autogen_core.components.models.AzureOpenAIChatCompletionClient`, you need to provide\n",
+    "To use the {py:class}`~autogen_ext.models.AzureOpenAIChatCompletionClient`, you need to provide\n",
    "the deployment id, Azure Cognitive Services endpoint, api version, and model capabilities.\n",
    "For authentication, you can either provide an API key or an Azure Active Directory (AAD) token credential.\n",
    "To use AAD authentication, you need to first install the `azure-identity` package."
@ -184,7 +184,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from autogen_core.components.models import AzureOpenAIChatCompletionClient\n",
+    "from autogen_ext.models import AzureOpenAIChatCompletionClient\n",
    "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
    "\n",
    "# Create the token provider\n",
--- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/quickstart.ipynb
+++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/quickstart.ipynb
@ -312,8 +312,8 @@
    "import tempfile\n",
    "\n",
    "from autogen_core.application import SingleThreadedAgentRuntime\n",
-    "from autogen_core.components.models import OpenAIChatCompletionClient\n",
    "from autogen_ext.code_executors import DockerCommandLineCodeExecutor\n",
+    "from autogen_ext.models import OpenAIChatCompletionClient\n",
    "\n",
    "work_dir = tempfile.mkdtemp()\n",
    "\n",
--- a/python/packages/autogen-core/src/autogen_core/components/models/init.py
+++ b/python/packages/autogen-core/src/autogen_core/components/models/init.py
@ -1,10 +1,11 @@
+import importlib
+import warnings
+from typing import TYPE_CHECKING, Any
+
 from ._model_client import ChatCompletionClient, ModelCapabilities
-from ._openai_client import (
-    AzureOpenAIChatCompletionClient,
-    OpenAIChatCompletionClient,
-)
 from ._types import (
    AssistantMessage,
+    ChatCompletionTokenLogprob,
    CreateResult,
    FinishReasons,
    FunctionExecutionResult,
@ -12,9 +13,14 @@ from ._types import (
    LLMMessage,
    RequestUsage,
    SystemMessage,
+    TopLogprob,
    UserMessage,
 )

+if TYPE_CHECKING:
+    from ._openai_client import AzureOpenAIChatCompletionClient, OpenAIChatCompletionClient
+
+
 __all__ = [
    "AzureOpenAIChatCompletionClient",
    "OpenAIChatCompletionClient",
@ -29,4 +35,26 @@ __all__ = [
    "RequestUsage",
    "FinishReasons",
    "CreateResult",
+    "TopLogprob",
+    "ChatCompletionTokenLogprob",
 ]
+
+
+def __getattr__(name: str) -> Any:
+    deprecated_classes = {
+        "AzureOpenAIChatCompletionClient": "autogen_ext.models.AzureOpenAIChatCompletionClient",
+        "OpenAIChatCompletionClient": "autogen_ext.modelsChatCompletionClient",
+    }
+    if name in deprecated_classes:
+        warnings.warn(
+            f"{name} moved to autogen_ext. " f"Please import it from {deprecated_classes[name]}.",
+            FutureWarning,
+            stacklevel=2,
+        )
+        # Dynamically import the class from the current module
+        module = importlib.import_module("._openai_client", __name__)
+        attr = getattr(module, name)
+        # Cache the attribute in the module's global namespace
+        globals()[name] = attr
+        return attr
+    raise AttributeError(f"module {__name__} has no attribute {name}")
--- a/python/packages/autogen-ext/pyproject.toml
+++ b/python/packages/autogen-ext/pyproject.toml
@ -23,6 +23,10 @@ dependencies = [
 langchain-tools = ["langchain_core~= 0.3.3"]
 azure-code-executor = ["azure-core"]
 docker-code-executor = ["docker~=7.0"]
+langchain = ["langchain_core~= 0.3.3"]
+azure = ["azure-core", "azure-identity"]
+docker = ["docker~=7.0"]
+openai = ["openai>=1.3"]

 [tool.hatch.build.targets.wheel]
 packages = ["src/autogen_ext"]
--- a/python/packages/autogen-ext/src/autogen_ext/code_executors/_azure_container_code_executor.py
+++ b/python/packages/autogen-ext/src/autogen_ext/code_executors/_azure_container_code_executor.py
@ -48,7 +48,7 @@ class ACADynamicSessionsCodeExecutor(CodeExecutor):

    .. note::

-        This class requires the :code:`azure-code-executor` extra for the :code:`autogen-ext` package.
+        This class requires the :code:`azure` extra for the :code:`autogen-ext` package.


    **This will execute LLM generated code on an Azure dynamic code container.**
--- a/python/packages/autogen-ext/src/autogen_ext/code_executors/_docker_code_executor.py
+++ b/python/packages/autogen-ext/src/autogen_ext/code_executors/_docker_code_executor.py
@ -52,7 +52,7 @@ class DockerCommandLineCodeExecutor(CodeExecutor):

    .. note::

-        This class requires the :code:`docker-code-executor` extra for the :code:`autogen-ext` package.
+        This class requires the :code:`docker` extra for the :code:`autogen-ext` package.


    The executor first saves each code block in a file in the working
@ -160,7 +160,7 @@ $functions"""
            from docker.models.containers import Container
        except ImportError as e:
            raise RuntimeError(
-                "Missing dependecies for DockerCommandLineCodeExecutor. Please ensure the autogen-ext package was installed with the 'docker-code-executor' extra."
+                "Missing dependecies for DockerCommandLineCodeExecutor. Please ensure the autogen-ext package was installed with the 'docker' extra."
            ) from e

        self._container: Container | None = None
@ -305,7 +305,7 @@ $functions"""
            from docker.errors import NotFound
        except ImportError as e:
            raise RuntimeError(
-                "Missing dependecies for DockerCommandLineCodeExecutor. Please ensure the autogen-ext package was installed with the 'docker-code-executor' extra."
+                "Missing dependecies for DockerCommandLineCodeExecutor. Please ensure the autogen-ext package was installed with the 'docker' extra."
            ) from e

        client = docker.from_env()
@ -324,7 +324,7 @@ $functions"""
            from docker.errors import ImageNotFound
        except ImportError as e:
            raise RuntimeError(
-                "Missing dependecies for DockerCommandLineCodeExecutor. Please ensure the autogen-ext package was installed with the 'docker-code-executor' extra."
+                "Missing dependecies for DockerCommandLineCodeExecutor. Please ensure the autogen-ext package was installed with the 'docker' extra."
            ) from e

        # Start a container from the image, read to exec commands later
--- a/python/packages/autogen-ext/src/autogen_ext/models/init.py
+++ b/python/packages/autogen-ext/src/autogen_ext/models/init.py
@ -0,0 +1,9 @@
+from ._openai._openai_client import (
+    AzureOpenAIChatCompletionClient,
+    OpenAIChatCompletionClient,
+)
+
+__all__ = [
+    "AzureOpenAIChatCompletionClient",
+    "OpenAIChatCompletionClient",
+]
--- a/python/packages/autogen-ext/src/autogen_ext/models/_openai/_model_info.py
+++ b/python/packages/autogen-ext/src/autogen_ext/models/_openai/_model_info.py
@ -0,0 +1,122 @@
+from typing import Dict
+
+from autogen_core.components.models import ModelCapabilities
+
+# Based on: https://platform.openai.com/docs/models/continuous-model-upgrades
+# This is a moving target, so correctness is checked by the model value returned by openai against expected values at runtime``
+_MODEL_POINTERS = {
+    "gpt-4o": "gpt-4o-2024-08-06",
+    "gpt-4o-mini": "gpt-4o-mini-2024-07-18",
+    "gpt-4-turbo": "gpt-4-turbo-2024-04-09",
+    "gpt-4-turbo-preview": "gpt-4-0125-preview",
+    "gpt-4": "gpt-4-0613",
+    "gpt-4-32k": "gpt-4-32k-0613",
+    "gpt-3.5-turbo": "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
+}
+
+_MODEL_CAPABILITIES: Dict[str, ModelCapabilities] = {
+    "gpt-4o-2024-08-06": {
+        "vision": True,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-4o-2024-05-13": {
+        "vision": True,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-4o-mini-2024-07-18": {
+        "vision": True,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-4-turbo-2024-04-09": {
+        "vision": True,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-4-0125-preview": {
+        "vision": False,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-4-1106-preview": {
+        "vision": False,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-4-1106-vision-preview": {
+        "vision": True,
+        "function_calling": False,
+        "json_output": False,
+    },
+    "gpt-4-0613": {
+        "vision": False,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-4-32k-0613": {
+        "vision": False,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-3.5-turbo-0125": {
+        "vision": False,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-3.5-turbo-1106": {
+        "vision": False,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-3.5-turbo-instruct": {
+        "vision": False,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-3.5-turbo-0613": {
+        "vision": False,
+        "function_calling": True,
+        "json_output": True,
+    },
+    "gpt-3.5-turbo-16k-0613": {
+        "vision": False,
+        "function_calling": True,
+        "json_output": True,
+    },
+}
+
+_MODEL_TOKEN_LIMITS: Dict[str, int] = {
+    "gpt-4o-2024-08-06": 128000,
+    "gpt-4o-2024-05-13": 128000,
+    "gpt-4o-mini-2024-07-18": 128000,
+    "gpt-4-turbo-2024-04-09": 128000,
+    "gpt-4-0125-preview": 128000,
+    "gpt-4-1106-preview": 128000,
+    "gpt-4-1106-vision-preview": 128000,
+    "gpt-4-0613": 8192,
+    "gpt-4-32k-0613": 32768,
+    "gpt-3.5-turbo-0125": 16385,
+    "gpt-3.5-turbo-1106": 16385,
+    "gpt-3.5-turbo-instruct": 4096,
+    "gpt-3.5-turbo-0613": 4096,
+    "gpt-3.5-turbo-16k-0613": 16385,
+}
+
+
+def resolve_model(model: str) -> str:
+    if model in _MODEL_POINTERS:
+        return _MODEL_POINTERS[model]
+    return model
+
+
+def get_capabilties(model: str) -> ModelCapabilities:
+    resolved_model = resolve_model(model)
+    return _MODEL_CAPABILITIES[resolved_model]
+
+
+def get_token_limit(model: str) -> int:
+    resolved_model = resolve_model(model)
+    return _MODEL_TOKEN_LIMITS[resolved_model]
--- a/python/packages/autogen-ext/src/autogen_ext/models/_openai/_openai_client.py
+++ b/python/packages/autogen-ext/src/autogen_ext/models/_openai/_openai_client.py
@ -0,0 +1,856 @@
+import asyncio
+import inspect
+import json
+import logging
+import math
+import re
+import warnings
+from asyncio import Task
+from typing import (
+    Any,
+    AsyncGenerator,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Set,
+    Type,
+    Union,
+    cast,
+)
+
+import tiktoken
+from autogen_core.application.logging import EVENT_LOGGER_NAME, TRACE_LOGGER_NAME
+from autogen_core.application.logging.events import LLMCallEvent
+from autogen_core.base import CancellationToken
+from autogen_core.components import (
+    FunctionCall,
+    Image,
+)
+from autogen_core.components.models import (
+    AssistantMessage,
+    ChatCompletionClient,
+    ChatCompletionTokenLogprob,
+    CreateResult,
+    FunctionExecutionResultMessage,
+    LLMMessage,
+    ModelCapabilities,
+    RequestUsage,
+    SystemMessage,
+    TopLogprob,
+    UserMessage,
+)
+from autogen_core.components.tools import Tool, ToolSchema
+from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionContentPartParam,
+    ChatCompletionContentPartTextParam,
+    ChatCompletionMessageParam,
+    ChatCompletionMessageToolCallParam,
+    ChatCompletionRole,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionToolParam,
+    ChatCompletionUserMessageParam,
+    ParsedChatCompletion,
+    ParsedChoice,
+    completion_create_params,
+)
+from openai.types.chat.chat_completion import Choice
+from openai.types.shared_params import FunctionDefinition, FunctionParameters
+from pydantic import BaseModel
+from typing_extensions import Unpack
+
+from . import _model_info
+from .config import AzureOpenAIClientConfiguration, OpenAIClientConfiguration
+
+logger = logging.getLogger(EVENT_LOGGER_NAME)
+trace_logger = logging.getLogger(TRACE_LOGGER_NAME)
+
+openai_init_kwargs = set(inspect.getfullargspec(AsyncOpenAI.__init__).kwonlyargs)
+aopenai_init_kwargs = set(inspect.getfullargspec(AsyncAzureOpenAI.__init__).kwonlyargs)
+
+create_kwargs = set(completion_create_params.CompletionCreateParamsBase.__annotations__.keys()) | set(
+    ("timeout", "stream")
+)
+# Only single choice allowed
+disallowed_create_args = set(["stream", "messages", "function_call", "functions", "n"])
+required_create_args: Set[str] = set(["model"])
+
+
+def _azure_openai_client_from_config(config: Mapping[str, Any]) -> AsyncAzureOpenAI:
+    # Take a copy
+    copied_config = dict(config).copy()
+
+    # Do some fixups
+    copied_config["azure_deployment"] = copied_config.get("azure_deployment", config.get("model"))
+    if copied_config["azure_deployment"] is not None:
+        copied_config["azure_deployment"] = copied_config["azure_deployment"].replace(".", "")
+    copied_config["azure_endpoint"] = copied_config.get("azure_endpoint", copied_config.pop("base_url", None))
+
+    # Shave down the config to just the AzureOpenAIChatCompletionClient kwargs
+    azure_config = {k: v for k, v in copied_config.items() if k in aopenai_init_kwargs}
+    return AsyncAzureOpenAI(**azure_config)
+
+
+def _openai_client_from_config(config: Mapping[str, Any]) -> AsyncOpenAI:
+    # Shave down the config to just the OpenAI kwargs
+    openai_config = {k: v for k, v in config.items() if k in openai_init_kwargs}
+    return AsyncOpenAI(**openai_config)
+
+
+def _create_args_from_config(config: Mapping[str, Any]) -> Dict[str, Any]:
+    create_args = {k: v for k, v in config.items() if k in create_kwargs}
+    create_args_keys = set(create_args.keys())
+    if not required_create_args.issubset(create_args_keys):
+        raise ValueError(f"Required create args are missing: {required_create_args - create_args_keys}")
+    if disallowed_create_args.intersection(create_args_keys):
+        raise ValueError(f"Disallowed create args are present: {disallowed_create_args.intersection(create_args_keys)}")
+    return create_args
+
+
+# TODO check types
+# oai_system_message_schema = type2schema(ChatCompletionSystemMessageParam)
+# oai_user_message_schema = type2schema(ChatCompletionUserMessageParam)
+# oai_assistant_message_schema = type2schema(ChatCompletionAssistantMessageParam)
+# oai_tool_message_schema = type2schema(ChatCompletionToolMessageParam)
+
+
+def type_to_role(message: LLMMessage) -> ChatCompletionRole:
+    if isinstance(message, SystemMessage):
+        return "system"
+    elif isinstance(message, UserMessage):
+        return "user"
+    elif isinstance(message, AssistantMessage):
+        return "assistant"
+    else:
+        return "tool"
+
+
+def user_message_to_oai(message: UserMessage) -> ChatCompletionUserMessageParam:
+    assert_valid_name(message.source)
+    if isinstance(message.content, str):
+        return ChatCompletionUserMessageParam(
+            content=message.content,
+            role="user",
+            name=message.source,
+        )
+    else:
+        parts: List[ChatCompletionContentPartParam] = []
+        for part in message.content:
+            if isinstance(part, str):
+                oai_part = ChatCompletionContentPartTextParam(
+                    text=part,
+                    type="text",
+                )
+                parts.append(oai_part)
+            elif isinstance(part, Image):
+                # TODO: support url based images
+                # TODO: support specifying details
+                parts.append(part.to_openai_format())
+            else:
+                raise ValueError(f"Unknown content type: {part}")
+        return ChatCompletionUserMessageParam(
+            content=parts,
+            role="user",
+            name=message.source,
+        )
+
+
+def system_message_to_oai(message: SystemMessage) -> ChatCompletionSystemMessageParam:
+    return ChatCompletionSystemMessageParam(
+        content=message.content,
+        role="system",
+    )
+
+
+def func_call_to_oai(message: FunctionCall) -> ChatCompletionMessageToolCallParam:
+    return ChatCompletionMessageToolCallParam(
+        id=message.id,
+        function={
+            "arguments": message.arguments,
+            "name": message.name,
+        },
+        type="function",
+    )
+
+
+def tool_message_to_oai(
+    message: FunctionExecutionResultMessage,
+) -> Sequence[ChatCompletionToolMessageParam]:
+    return [
+        ChatCompletionToolMessageParam(content=x.content, role="tool", tool_call_id=x.call_id) for x in message.content
+    ]
+
+
+def assistant_message_to_oai(
+    message: AssistantMessage,
+) -> ChatCompletionAssistantMessageParam:
+    assert_valid_name(message.source)
+    if isinstance(message.content, list):
+        return ChatCompletionAssistantMessageParam(
+            tool_calls=[func_call_to_oai(x) for x in message.content],
+            role="assistant",
+            name=message.source,
+        )
+    else:
+        return ChatCompletionAssistantMessageParam(
+            content=message.content,
+            role="assistant",
+            name=message.source,
+        )
+
+
+def to_oai_type(message: LLMMessage) -> Sequence[ChatCompletionMessageParam]:
+    if isinstance(message, SystemMessage):
+        return [system_message_to_oai(message)]
+    elif isinstance(message, UserMessage):
+        return [user_message_to_oai(message)]
+    elif isinstance(message, AssistantMessage):
+        return [assistant_message_to_oai(message)]
+    else:
+        return tool_message_to_oai(message)
+
+
+def calculate_vision_tokens(image: Image, detail: str = "auto") -> int:
+    MAX_LONG_EDGE = 2048
+    BASE_TOKEN_COUNT = 85
+    TOKENS_PER_TILE = 170
+    MAX_SHORT_EDGE = 768
+    TILE_SIZE = 512
+
+    if detail == "low":
+        return BASE_TOKEN_COUNT
+
+    width, height = image.image.size
+
+    # Scale down to fit within a MAX_LONG_EDGE x MAX_LONG_EDGE square if necessary
+
+    if width > MAX_LONG_EDGE or height > MAX_LONG_EDGE:
+        aspect_ratio = width / height
+        if aspect_ratio > 1:
+            # Width is greater than height
+            width = MAX_LONG_EDGE
+            height = int(MAX_LONG_EDGE / aspect_ratio)
+        else:
+            # Height is greater than or equal to width
+            height = MAX_LONG_EDGE
+            width = int(MAX_LONG_EDGE * aspect_ratio)
+
+    # Resize such that the shortest side is MAX_SHORT_EDGE if both dimensions exceed MAX_SHORT_EDGE
+    aspect_ratio = width / height
+    if width > MAX_SHORT_EDGE and height > MAX_SHORT_EDGE:
+        if aspect_ratio > 1:
+            # Width is greater than height
+            height = MAX_SHORT_EDGE
+            width = int(MAX_SHORT_EDGE * aspect_ratio)
+        else:
+            # Height is greater than or equal to width
+            width = MAX_SHORT_EDGE
+            height = int(MAX_SHORT_EDGE / aspect_ratio)
+
+    # Calculate the number of tiles based on TILE_SIZE
+
+    tiles_width = math.ceil(width / TILE_SIZE)
+    tiles_height = math.ceil(height / TILE_SIZE)
+    total_tiles = tiles_width * tiles_height
+    # Calculate the total tokens based on the number of tiles and the base token count
+
+    total_tokens = BASE_TOKEN_COUNT + TOKENS_PER_TILE * total_tiles
+
+    return total_tokens
+
+
+def _add_usage(usage1: RequestUsage, usage2: RequestUsage) -> RequestUsage:
+    return RequestUsage(
+        prompt_tokens=usage1.prompt_tokens + usage2.prompt_tokens,
+        completion_tokens=usage1.completion_tokens + usage2.completion_tokens,
+    )
+
+
+def convert_tools(
+    tools: Sequence[Tool | ToolSchema],
+) -> List[ChatCompletionToolParam]:
+    result: List[ChatCompletionToolParam] = []
+    for tool in tools:
+        if isinstance(tool, Tool):
+            tool_schema = tool.schema
+        else:
+            assert isinstance(tool, dict)
+            tool_schema = tool
+
+        result.append(
+            ChatCompletionToolParam(
+                type="function",
+                function=FunctionDefinition(
+                    name=tool_schema["name"],
+                    description=(tool_schema["description"] if "description" in tool_schema else ""),
+                    parameters=(
+                        cast(FunctionParameters, tool_schema["parameters"]) if "parameters" in tool_schema else {}
+                    ),
+                ),
+            )
+        )
+    # Check if all tools have valid names.
+    for tool_param in result:
+        assert_valid_name(tool_param["function"]["name"])
+    return result
+
+
+def normalize_name(name: str) -> str:
+    """
+    LLMs sometimes ask functions while ignoring their own format requirements, this function should be used to replace invalid characters with "_".
+
+    Prefer _assert_valid_name for validating user configuration or input
+    """
+    return re.sub(r"[^a-zA-Z0-9_-]", "_", name)[:64]
+
+
+def assert_valid_name(name: str) -> str:
+    """
+    Ensure that configured names are valid, raises ValueError if not.
+
+    For munging LLM responses use _normalize_name to ensure LLM specified names don't break the API.
+    """
+    if not re.match(r"^[a-zA-Z0-9_-]+$", name):
+        raise ValueError(f"Invalid name: {name}. Only letters, numbers, '_' and '-' are allowed.")
+    if len(name) > 64:
+        raise ValueError(f"Invalid name: {name}. Name must be less than 64 characters.")
+    return name
+
+
+class BaseOpenAIChatCompletionClient(ChatCompletionClient):
+    def __init__(
+        self,
+        client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+        create_args: Dict[str, Any],
+        model_capabilities: Optional[ModelCapabilities] = None,
+    ):
+        self._client = client
+        if model_capabilities is None and isinstance(client, AsyncAzureOpenAI):
+            raise ValueError("AzureOpenAIChatCompletionClient requires explicit model capabilities")
+        elif model_capabilities is None:
+            self._model_capabilities = _model_info.get_capabilties(create_args["model"])
+        else:
+            self._model_capabilities = model_capabilities
+
+        self._resolved_model: Optional[str] = None
+        if "model" in create_args:
+            self._resolved_model = _model_info.resolve_model(create_args["model"])
+
+        if (
+            "response_format" in create_args
+            and create_args["response_format"]["type"] == "json_object"
+            and not self._model_capabilities["json_output"]
+        ):
+            raise ValueError("Model does not support JSON output")
+
+        self._create_args = create_args
+        self._total_usage = RequestUsage(prompt_tokens=0, completion_tokens=0)
+        self._actual_usage = RequestUsage(prompt_tokens=0, completion_tokens=0)
+
+    @classmethod
+    def create_from_config(cls, config: Dict[str, Any]) -> ChatCompletionClient:
+        return OpenAIChatCompletionClient(**config)
+
+    async def create(
+        self,
+        messages: Sequence[LLMMessage],
+        tools: Sequence[Tool | ToolSchema] = [],
+        json_output: Optional[bool] = None,
+        extra_create_args: Mapping[str, Any] = {},
+        cancellation_token: Optional[CancellationToken] = None,
+    ) -> CreateResult:
+        # Make sure all extra_create_args are valid
+        extra_create_args_keys = set(extra_create_args.keys())
+        if not create_kwargs.issuperset(extra_create_args_keys):
+            raise ValueError(f"Extra create args are invalid: {extra_create_args_keys - create_kwargs}")
+
+        # Copy the create args and overwrite anything in extra_create_args
+        create_args = self._create_args.copy()
+        create_args.update(extra_create_args)
+
+        # Declare use_beta_client
+        use_beta_client: bool = False
+        response_format_value: Optional[Type[BaseModel]] = None
+
+        if "response_format" in create_args:
+            value = create_args["response_format"]
+            # If value is a Pydantic model class, use the beta client
+            if isinstance(value, type) and issubclass(value, BaseModel):
+                response_format_value = value
+                use_beta_client = True
+            else:
+                # response_format_value is not a Pydantic model class
+                use_beta_client = False
+                response_format_value = None
+
+        # Remove 'response_format' from create_args to prevent passing it twice
+        create_args_no_response_format = {k: v for k, v in create_args.items() if k != "response_format"}
+
+        # TODO: allow custom handling.
+        # For now we raise an error if images are present and vision is not supported
+        if self.capabilities["vision"] is False:
+            for message in messages:
+                if isinstance(message, UserMessage):
+                    if isinstance(message.content, list) and any(isinstance(x, Image) for x in message.content):
+                        raise ValueError("Model does not support vision and image was provided")
+
+        if json_output is not None:
+            if self.capabilities["json_output"] is False and json_output is True:
+                raise ValueError("Model does not support JSON output")
+
+            if json_output is True:
+                create_args["response_format"] = {"type": "json_object"}
+            else:
+                create_args["response_format"] = {"type": "text"}
+
+        if self.capabilities["json_output"] is False and json_output is True:
+            raise ValueError("Model does not support JSON output")
+
+        oai_messages_nested = [to_oai_type(m) for m in messages]
+        oai_messages = [item for sublist in oai_messages_nested for item in sublist]
+
+        if self.capabilities["function_calling"] is False and len(tools) > 0:
+            raise ValueError("Model does not support function calling")
+        future: Union[Task[ParsedChatCompletion[BaseModel]], Task[ChatCompletion]]
+        if len(tools) > 0:
+            converted_tools = convert_tools(tools)
+            if use_beta_client:
+                # Pass response_format_value if it's not None
+                if response_format_value is not None:
+                    future = asyncio.ensure_future(
+                        self._client.beta.chat.completions.parse(
+                            messages=oai_messages,
+                            tools=converted_tools,
+                            response_format=response_format_value,
+                            **create_args_no_response_format,
+                        )
+                    )
+                else:
+                    future = asyncio.ensure_future(
+                        self._client.beta.chat.completions.parse(
+                            messages=oai_messages,
+                            tools=converted_tools,
+                            **create_args_no_response_format,
+                        )
+                    )
+            else:
+                future = asyncio.ensure_future(
+                    self._client.chat.completions.create(
+                        messages=oai_messages,
+                        stream=False,
+                        tools=converted_tools,
+                        **create_args,
+                    )
+                )
+        else:
+            if use_beta_client:
+                if response_format_value is not None:
+                    future = asyncio.ensure_future(
+                        self._client.beta.chat.completions.parse(
+                            messages=oai_messages,
+                            response_format=response_format_value,
+                            **create_args_no_response_format,
+                        )
+                    )
+                else:
+                    future = asyncio.ensure_future(
+                        self._client.beta.chat.completions.parse(
+                            messages=oai_messages,
+                            **create_args_no_response_format,
+                        )
+                    )
+            else:
+                future = asyncio.ensure_future(
+                    self._client.chat.completions.create(
+                        messages=oai_messages,
+                        stream=False,
+                        **create_args,
+                    )
+                )
+
+        if cancellation_token is not None:
+            cancellation_token.link_future(future)
+        result: Union[ParsedChatCompletion[BaseModel], ChatCompletion] = await future
+        if use_beta_client:
+            result = cast(ParsedChatCompletion[Any], result)
+
+        if result.usage is not None:
+            logger.info(
+                LLMCallEvent(
+                    prompt_tokens=result.usage.prompt_tokens,
+                    completion_tokens=result.usage.completion_tokens,
+                )
+            )
+
+        usage = RequestUsage(
+            # TODO backup token counting
+            prompt_tokens=result.usage.prompt_tokens if result.usage is not None else 0,
+            completion_tokens=(result.usage.completion_tokens if result.usage is not None else 0),
+        )
+
+        if self._resolved_model is not None:
+            if self._resolved_model != result.model:
+                warnings.warn(
+                    f"Resolved model mismatch: {self._resolved_model} != {result.model}. Model mapping may be incorrect.",
+                    stacklevel=2,
+                )
+
+        # Limited to a single choice currently.
+        choice: Union[ParsedChoice[Any], ParsedChoice[BaseModel], Choice] = result.choices[0]
+        if choice.finish_reason == "function_call":
+            raise ValueError("Function calls are not supported in this context")
+
+        content: Union[str, List[FunctionCall]]
+        if choice.finish_reason == "tool_calls":
+            assert choice.message.tool_calls is not None
+            assert choice.message.function_call is None
+
+            # NOTE: If OAI response type changes, this will need to be updated
+            content = [
+                FunctionCall(
+                    id=x.id,
+                    arguments=x.function.arguments,
+                    name=normalize_name(x.function.name),
+                )
+                for x in choice.message.tool_calls
+            ]
+            finish_reason = "function_calls"
+        else:
+            finish_reason = choice.finish_reason
+            content = choice.message.content or ""
+        logprobs: Optional[List[ChatCompletionTokenLogprob]] = None
+        if choice.logprobs and choice.logprobs.content:
+            logprobs = [
+                ChatCompletionTokenLogprob(
+                    token=x.token,
+                    logprob=x.logprob,
+                    top_logprobs=[TopLogprob(logprob=y.logprob, bytes=y.bytes) for y in x.top_logprobs],
+                    bytes=x.bytes,
+                )
+                for x in choice.logprobs.content
+            ]
+        response = CreateResult(
+            finish_reason=finish_reason,  # type: ignore
+            content=content,
+            usage=usage,
+            cached=False,
+            logprobs=logprobs,
+        )
+
+        _add_usage(self._actual_usage, usage)
+        _add_usage(self._total_usage, usage)
+
+        # TODO - why is this cast needed?
+        return response
+
+    async def create_stream(
+        self,
+        messages: Sequence[LLMMessage],
+        tools: Sequence[Tool | ToolSchema] = [],
+        json_output: Optional[bool] = None,
+        extra_create_args: Mapping[str, Any] = {},
+        cancellation_token: Optional[CancellationToken] = None,
+    ) -> AsyncGenerator[Union[str, CreateResult], None]:
+        # Make sure all extra_create_args are valid
+        extra_create_args_keys = set(extra_create_args.keys())
+        if not create_kwargs.issuperset(extra_create_args_keys):
+            raise ValueError(f"Extra create args are invalid: {extra_create_args_keys - create_kwargs}")
+
+        # Copy the create args and overwrite anything in extra_create_args
+        create_args = self._create_args.copy()
+        create_args.update(extra_create_args)
+
+        oai_messages_nested = [to_oai_type(m) for m in messages]
+        oai_messages = [item for sublist in oai_messages_nested for item in sublist]
+
+        # TODO: allow custom handling.
+        # For now we raise an error if images are present and vision is not supported
+        if self.capabilities["vision"] is False:
+            for message in messages:
+                if isinstance(message, UserMessage):
+                    if isinstance(message.content, list) and any(isinstance(x, Image) for x in message.content):
+                        raise ValueError("Model does not support vision and image was provided")
+
+        if json_output is not None:
+            if self.capabilities["json_output"] is False and json_output is True:
+                raise ValueError("Model does not support JSON output")
+
+            if json_output is True:
+                create_args["response_format"] = {"type": "json_object"}
+            else:
+                create_args["response_format"] = {"type": "text"}
+
+        if len(tools) > 0:
+            converted_tools = convert_tools(tools)
+            stream_future = asyncio.ensure_future(
+                self._client.chat.completions.create(
+                    messages=oai_messages,
+                    stream=True,
+                    tools=converted_tools,
+                    **create_args,
+                )
+            )
+        else:
+            stream_future = asyncio.ensure_future(
+                self._client.chat.completions.create(messages=oai_messages, stream=True, **create_args)
+            )
+        if cancellation_token is not None:
+            cancellation_token.link_future(stream_future)
+        stream = await stream_future
+
+        stop_reason = None
+        maybe_model = None
+        content_deltas: List[str] = []
+        full_tool_calls: Dict[int, FunctionCall] = {}
+        completion_tokens = 0
+        logprobs: Optional[List[ChatCompletionTokenLogprob]] = None
+        while True:
+            try:
+                chunk_future = asyncio.ensure_future(anext(stream))
+                if cancellation_token is not None:
+                    cancellation_token.link_future(chunk_future)
+                chunk = await chunk_future
+                choice = chunk.choices[0]
+                stop_reason = choice.finish_reason
+                maybe_model = chunk.model
+                # First try get content
+                if choice.delta.content is not None:
+                    content_deltas.append(choice.delta.content)
+                    if len(choice.delta.content) > 0:
+                        yield choice.delta.content
+                    continue
+
+                # Otherwise, get tool calls
+                if choice.delta.tool_calls is not None:
+                    for tool_call_chunk in choice.delta.tool_calls:
+                        idx = tool_call_chunk.index
+                        if idx not in full_tool_calls:
+                            # We ignore the type hint here because we want to fill in type when the delta provides it
+                            full_tool_calls[idx] = FunctionCall(id="", arguments="", name="")
+
+                        if tool_call_chunk.id is not None:
+                            full_tool_calls[idx].id += tool_call_chunk.id
+
+                        if tool_call_chunk.function is not None:
+                            if tool_call_chunk.function.name is not None:
+                                full_tool_calls[idx].name += tool_call_chunk.function.name
+                            if tool_call_chunk.function.arguments is not None:
+                                full_tool_calls[idx].arguments += tool_call_chunk.function.arguments
+                if choice.logprobs and choice.logprobs.content:
+                    logprobs = [
+                        ChatCompletionTokenLogprob(
+                            token=x.token,
+                            logprob=x.logprob,
+                            top_logprobs=[TopLogprob(logprob=y.logprob, bytes=y.bytes) for y in x.top_logprobs],
+                            bytes=x.bytes,
+                        )
+                        for x in choice.logprobs.content
+                    ]
+
+            except StopAsyncIteration:
+                break
+
+        model = maybe_model or create_args["model"]
+        model = model.replace("gpt-35", "gpt-3.5")  # hack for Azure API
+
+        # TODO fix count token
+        prompt_tokens = 0
+        # prompt_tokens = count_token(messages, model=model)
+        if stop_reason is None:
+            raise ValueError("No stop reason found")
+
+        content: Union[str, List[FunctionCall]]
+        if len(content_deltas) > 1:
+            content = "".join(content_deltas)
+            completion_tokens = 0
+            # completion_tokens = count_token(content, model=model)
+        else:
+            completion_tokens = 0
+            # TODO: fix assumption that dict values were added in order and actually order by int index
+            # for tool_call in full_tool_calls.values():
+            #     # value = json.dumps(tool_call)
+            #     # completion_tokens += count_token(value, model=model)
+            #     completion_tokens += 0
+            content = list(full_tool_calls.values())
+
+        usage = RequestUsage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+        if stop_reason == "function_call":
+            raise ValueError("Function calls are not supported in this context")
+        if stop_reason == "tool_calls":
+            stop_reason = "function_calls"
+
+        result = CreateResult(
+            finish_reason=stop_reason,  # type: ignore
+            content=content,
+            usage=usage,
+            cached=False,
+            logprobs=logprobs,
+        )
+
+        _add_usage(self._actual_usage, usage)
+        _add_usage(self._total_usage, usage)
+
+        yield result
+
+    def actual_usage(self) -> RequestUsage:
+        return self._actual_usage
+
+    def total_usage(self) -> RequestUsage:
+        return self._total_usage
+
+    def count_tokens(self, messages: Sequence[LLMMessage], tools: Sequence[Tool | ToolSchema] = []) -> int:
+        model = self._create_args["model"]
+        try:
+            encoding = tiktoken.encoding_for_model(model)
+        except KeyError:
+            trace_logger.warning(f"Model {model} not found. Using cl100k_base encoding.")
+            encoding = tiktoken.get_encoding("cl100k_base")
+        tokens_per_message = 3
+        tokens_per_name = 1
+        num_tokens = 0
+
+        # Message tokens.
+        for message in messages:
+            num_tokens += tokens_per_message
+            oai_message = to_oai_type(message)
+            for oai_message_part in oai_message:
+                for key, value in oai_message_part.items():
+                    if value is None:
+                        continue
+
+                    if isinstance(message, UserMessage) and isinstance(value, list):
+                        typed_message_value = cast(List[ChatCompletionContentPartParam], value)
+
+                        assert len(typed_message_value) == len(
+                            message.content
+                        ), "Mismatch in message content and typed message value"
+
+                        # We need image properties that are only in the original message
+                        for part, content_part in zip(typed_message_value, message.content, strict=False):
+                            if isinstance(content_part, Image):
+                                # TODO: add detail parameter
+                                num_tokens += calculate_vision_tokens(content_part)
+                            elif isinstance(part, str):
+                                num_tokens += len(encoding.encode(part))
+                            else:
+                                try:
+                                    serialized_part = json.dumps(part)
+                                    num_tokens += len(encoding.encode(serialized_part))
+                                except TypeError:
+                                    trace_logger.warning(f"Could not convert {part} to string, skipping.")
+                    else:
+                        if not isinstance(value, str):
+                            try:
+                                value = json.dumps(value)
+                            except TypeError:
+                                trace_logger.warning(f"Could not convert {value} to string, skipping.")
+                                continue
+                        num_tokens += len(encoding.encode(value))
+                        if key == "name":
+                            num_tokens += tokens_per_name
+        num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+
+        # Tool tokens.
+        oai_tools = convert_tools(tools)
+        for tool in oai_tools:
+            function = tool["function"]
+            tool_tokens = len(encoding.encode(function["name"]))
+            if "description" in function:
+                tool_tokens += len(encoding.encode(function["description"]))
+            tool_tokens -= 2
+            if "parameters" in function:
+                parameters = function["parameters"]
+                if "properties" in parameters:
+                    assert isinstance(parameters["properties"], dict)
+                    for propertiesKey in parameters["properties"]:  # pyright: ignore
+                        assert isinstance(propertiesKey, str)
+                        tool_tokens += len(encoding.encode(propertiesKey))
+                        v = parameters["properties"][propertiesKey]  # pyright: ignore
+                        for field in v:  # pyright: ignore
+                            if field == "type":
+                                tool_tokens += 2
+                                tool_tokens += len(encoding.encode(v["type"]))  # pyright: ignore
+                            elif field == "description":
+                                tool_tokens += 2
+                                tool_tokens += len(encoding.encode(v["description"]))  # pyright: ignore
+                            elif field == "enum":
+                                tool_tokens -= 3
+                                for o in v["enum"]:  # pyright: ignore
+                                    tool_tokens += 3
+                                    tool_tokens += len(encoding.encode(o))  # pyright: ignore
+                            else:
+                                trace_logger.warning(f"Not supported field {field}")
+                    tool_tokens += 11
+                    if len(parameters["properties"]) == 0:  # pyright: ignore
+                        tool_tokens -= 2
+            num_tokens += tool_tokens
+        num_tokens += 12
+        return num_tokens
+
+    def remaining_tokens(self, messages: Sequence[LLMMessage], tools: Sequence[Tool | ToolSchema] = []) -> int:
+        token_limit = _model_info.get_token_limit(self._create_args["model"])
+        return token_limit - self.count_tokens(messages, tools)
+
+    @property
+    def capabilities(self) -> ModelCapabilities:
+        return self._model_capabilities
+
+
+class OpenAIChatCompletionClient(BaseOpenAIChatCompletionClient):
+    def __init__(self, **kwargs: Unpack[OpenAIClientConfiguration]):
+        if "model" not in kwargs:
+            raise ValueError("model is required for OpenAIChatCompletionClient")
+
+        model_capabilities: Optional[ModelCapabilities] = None
+        copied_args = dict(kwargs).copy()
+        if "model_capabilities" in kwargs:
+            model_capabilities = kwargs["model_capabilities"]
+            del copied_args["model_capabilities"]
+
+        client = _openai_client_from_config(copied_args)
+        create_args = _create_args_from_config(copied_args)
+        self._raw_config = copied_args
+        super().__init__(client, create_args, model_capabilities)
+
+    def __getstate__(self) -> Dict[str, Any]:
+        state = self.__dict__.copy()
+        state["_client"] = None
+        return state
+
+    def __setstate__(self, state: Dict[str, Any]) -> None:
+        self.__dict__.update(state)
+        self._client = _openai_client_from_config(state["_raw_config"])
+
+
+class AzureOpenAIChatCompletionClient(BaseOpenAIChatCompletionClient):
+    def __init__(self, **kwargs: Unpack[AzureOpenAIClientConfiguration]):
+        if "model" not in kwargs:
+            raise ValueError("model is required for OpenAIChatCompletionClient")
+
+        model_capabilities: Optional[ModelCapabilities] = None
+        copied_args = dict(kwargs).copy()
+        if "model_capabilities" in kwargs:
+            model_capabilities = kwargs["model_capabilities"]
+            del copied_args["model_capabilities"]
+
+        client = _azure_openai_client_from_config(copied_args)
+        create_args = _create_args_from_config(copied_args)
+        self._raw_config = copied_args
+        super().__init__(client, create_args, model_capabilities)
+
+    def __getstate__(self) -> Dict[str, Any]:
+        state = self.__dict__.copy()
+        state["_client"] = None
+        return state
+
+    def __setstate__(self, state: Dict[str, Any]) -> None:
+        self.__dict__.update(state)
+        self._client = _azure_openai_client_from_config(state["_raw_config"])
--- a/python/packages/autogen-ext/src/autogen_ext/models/_openai/config/init.py
+++ b/python/packages/autogen-ext/src/autogen_ext/models/_openai/config/init.py
@ -0,0 +1,51 @@
+from typing import Awaitable, Callable, Dict, List, Literal, Optional, Union
+
+from autogen_core.components.models import ModelCapabilities
+from typing_extensions import Required, TypedDict
+
+
+class ResponseFormat(TypedDict):
+    type: Literal["text", "json_object"]
+
+
+class CreateArguments(TypedDict, total=False):
+    frequency_penalty: Optional[float]
+    logit_bias: Optional[Dict[str, int]]
+    max_tokens: Optional[int]
+    n: Optional[int]
+    presence_penalty: Optional[float]
+    response_format: ResponseFormat
+    seed: Optional[int]
+    stop: Union[Optional[str], List[str]]
+    temperature: Optional[float]
+    top_p: Optional[float]
+    user: str
+
+
+AsyncAzureADTokenProvider = Callable[[], Union[str, Awaitable[str]]]
+
+
+class BaseOpenAIClientConfiguration(CreateArguments, total=False):
+    model: str
+    api_key: str
+    timeout: Union[float, None]
+    max_retries: int
+
+
+# See OpenAI docs for explanation of these parameters
+class OpenAIClientConfiguration(BaseOpenAIClientConfiguration, total=False):
+    organization: str
+    base_url: str
+    # Not required
+    model_capabilities: ModelCapabilities
+
+
+class AzureOpenAIClientConfiguration(BaseOpenAIClientConfiguration, total=False):
+    # Azure specific
+    azure_endpoint: Required[str]
+    azure_deployment: str
+    api_version: Required[str]
+    azure_ad_token: str
+    azure_ad_token_provider: AsyncAzureADTokenProvider
+    # Must be provided
+    model_capabilities: Required[ModelCapabilities]
--- a/python/packages/autogen-ext/src/autogen_ext/tools/_langchain_adapter.py
+++ b/python/packages/autogen-ext/src/autogen_ext/tools/_langchain_adapter.py
@ -17,7 +17,7 @@ class LangChainToolAdapter(BaseTool[BaseModel, Any]):

    .. note::

-        This class requires the :code:`docker-code-executor` extra for the :code:`autogen-ext` package.
+        This class requires the :code:`langchain` extra for the :code:`autogen-ext` package.


    Args:
--- a/python/packages/autogen-ext/tests/models/test_openai_model_client.py
+++ b/python/packages/autogen-ext/tests/models/test_openai_model_client.py
@ -7,18 +7,17 @@ from autogen_core.base import CancellationToken
 from autogen_core.components import Image
 from autogen_core.components.models import (
    AssistantMessage,
-    AzureOpenAIChatCompletionClient,
    CreateResult,
    FunctionExecutionResult,
    FunctionExecutionResultMessage,
    LLMMessage,
-    OpenAIChatCompletionClient,
    SystemMessage,
    UserMessage,
 )
-from autogen_core.components.models._model_info import resolve_model
-from autogen_core.components.models._openai_client import calculate_vision_tokens
 from autogen_core.components.tools import FunctionTool
+from autogen_ext.models import AzureOpenAIChatCompletionClient, OpenAIChatCompletionClient
+from autogen_ext.models._openai._model_info import resolve_model
+from autogen_ext.models._openai._openai_client import calculate_vision_tokens
 from openai.resources.chat.completions import AsyncCompletions
 from openai.types.chat.chat_completion import ChatCompletion, Choice
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, ChoiceDelta
@ -166,7 +165,7 @@ async def test_openai_chat_completion_client_count_tokens(monkeypatch: pytest.Mo

    mockcalculate_vision_tokens = MagicMock()
    monkeypatch.setattr(
-        "autogen_core.components.models._openai_client.calculate_vision_tokens", mockcalculate_vision_tokens
+        "autogen_ext.models._openai._openai_client.calculate_vision_tokens", mockcalculate_vision_tokens
    )

    num_tokens = client.count_tokens(messages, tools=tools)
--- a/python/packages/autogen-magentic-one/pyproject.toml
+++ b/python/packages/autogen-magentic-one/pyproject.toml
@ -18,6 +18,7 @@ classifiers = [

 dependencies = [
    "autogen-core",
+    "autogen-ext",
    "beautifulsoup4",
    "aiofiles",
    "requests",
--- a/python/packages/autogen-magentic-one/src/autogen_magentic_one/utils.py
+++ b/python/packages/autogen-magentic-one/src/autogen_magentic_one/utils.py
@ -8,11 +8,10 @@ from typing import Any, Dict, List, Literal
 from autogen_core.application.logging.events import LLMCallEvent
 from autogen_core.components import Image
 from autogen_core.components.models import (
-    AzureOpenAIChatCompletionClient,
    ChatCompletionClient,
    ModelCapabilities,
-    OpenAIChatCompletionClient,
 )
+from autogen_ext.models import AzureOpenAIChatCompletionClient, OpenAIChatCompletionClient

 from .messages import (
    AgentEvent,
@ -66,7 +65,7 @@ def create_completion_client_from_env(env: Dict[str, str] | None = None, **kwarg

    # Instantiate the correct client
    if _provider == "openai":
-        return OpenAIChatCompletionClient(**_kwargs)
+        return OpenAIChatCompletionClient(**_kwargs)  # type: ignore
    elif _provider == "azure":
        if _kwargs.get("azure_ad_token_provider", "").lower() == "default":
            if _default_azure_ad_token_provider is None:
@ -76,7 +75,7 @@ def create_completion_client_from_env(env: Dict[str, str] | None = None, **kwarg
                    DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
                )
            _kwargs["azure_ad_token_provider"] = _default_azure_ad_token_provider
-        return AzureOpenAIChatCompletionClient(**_kwargs)
+        return AzureOpenAIChatCompletionClient(**_kwargs)  # type: ignore
    else:
        raise ValueError(f"Unknown OAI provider '{_provider}'")

--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@ -24,6 +24,7 @@ dev-dependencies = [

 [tool.uv.sources]
 autogen-core = { workspace = true }
+autogen-ext = { workspace = true }

 [tool.ruff]
 line-length = 120
--- a/python/uv.lock
+++ b/python/uv.lock
@ -491,22 +491,40 @@ dependencies = [
 ]

 [package.optional-dependencies]
+azure = [
+    { name = "azure-core" },
+    { name = "azure-identity" },
+]
 azure-code-executor = [
    { name = "azure-core" },
 ]
+docker = [
+    { name = "docker" },
+]
 docker-code-executor = [
    { name = "docker" },
 ]
+langchain = [
+    { name = "langchain-core" },
+]
 langchain-tools = [
    { name = "langchain-core" },
 ]
+openai = [
+    { name = "openai" },
+]

 [package.metadata]
 requires-dist = [
    { name = "autogen-core", editable = "packages/autogen-core" },
+    { name = "azure-core", marker = "extra == 'azure'" },
    { name = "azure-core", marker = "extra == 'azure-code-executor'" },
+    { name = "azure-identity", marker = "extra == 'azure'" },
+    { name = "docker", marker = "extra == 'docker'", specifier = "~=7.0" },
    { name = "docker", marker = "extra == 'docker-code-executor'", specifier = "~=7.0" },
+    { name = "langchain-core", marker = "extra == 'langchain'", specifier = "~=0.3.3" },
    { name = "langchain-core", marker = "extra == 'langchain-tools'", specifier = "~=0.3.3" },
+    { name = "openai", marker = "extra == 'openai'", specifier = ">=1.3" },
 ]

 [[package]]
@ -516,6 +534,7 @@ source = { editable = "packages/autogen-magentic-one" }
 dependencies = [
    { name = "aiofiles" },
    { name = "autogen-core" },
+    { name = "autogen-ext" },
    { name = "beautifulsoup4" },
    { name = "mammoth" },
    { name = "markdownify" },
@ -548,6 +567,7 @@ dev = [
 requires-dist = [
    { name = "aiofiles" },
    { name = "autogen-core", editable = "packages/autogen-core" },
+    { name = "autogen-ext", editable = "packages/autogen-ext" },
    { name = "beautifulsoup4" },
    { name = "mammoth" },
    { name = "markdownify" },