Support iterm inline image protocol in console output (#4200)

* Support iterm inline image protocol in console output * fix lint --------- Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
2025-11-03 11:20:35 +00:00 · 2024-11-15 16:26:18 -05:00 · 2024-11-15 16:26:18 -05:00 · d55e68fc29
commit d55e68fc29
parent 908ed8c60f
1 changed files with 31 additions and 6 deletions
--- a/python/packages/autogen-agentchat/src/autogen_agentchat/task/_console.py
+++ b/python/packages/autogen-agentchat/src/autogen_agentchat/task/_console.py
@ -1,17 +1,33 @@
+import os
 import sys
 import time
 from typing import AsyncGenerator, List

+from autogen_core.components import Image
 from autogen_core.components.models import RequestUsage

 from autogen_agentchat.base import TaskResult
 from autogen_agentchat.messages import AgentMessage, MultiModalMessage


-async def Console(stream: AsyncGenerator[AgentMessage | TaskResult, None]) -> None:
-    """Consume the stream from  :meth:`~autogen_agentchat.teams.Team.run_stream`
-    and print the messages to the console."""
+def _is_running_in_iterm() -> bool:
+    return os.getenv("TERM_PROGRAM") == "iTerm.app"

+
+def _is_output_a_tty() -> bool:
+    return sys.stdout.isatty()
+
+
+async def Console(stream: AsyncGenerator[AgentMessage | TaskResult, None], *, no_inline_images: bool = False) -> None:
+    """Consume the stream from  :meth:`~autogen_agentchat.teams.Team.run_stream`
+    and print the messages to the console.
+
+    Args:
+        stream (AsyncGenerator[AgentMessage  |  TaskResult, None]): Stream to render
+        no_inline_images (bool, optional): If terminal is iTerm2 will render images inline. Use this to disable this behavior. Defaults to False.
+    """
+
+    render_image_iterm = _is_running_in_iterm() and _is_output_a_tty() and not no_inline_images
    start_time = time.time()
    total_usage = RequestUsage(prompt_tokens=0, completion_tokens=0)
    async for message in stream:
@ -27,7 +43,7 @@ async def Console(stream: AsyncGenerator[AgentMessage | TaskResult, None]) -> No
            )
            sys.stdout.write(output)
        else:
-            output = f"{'-' * 10} {message.source} {'-' * 10}\n{_message_to_str(message)}\n"
+            output = f"{'-' * 10} {message.source} {'-' * 10}\n{_message_to_str(message, render_image_iterm=render_image_iterm)}\n"
            if message.models_usage:
                output += f"[Prompt tokens: {message.models_usage.prompt_tokens}, Completion tokens: {message.models_usage.completion_tokens}]\n"
                total_usage.completion_tokens += message.models_usage.completion_tokens
@ -35,14 +51,23 @@ async def Console(stream: AsyncGenerator[AgentMessage | TaskResult, None]) -> No
            sys.stdout.write(output)


-def _message_to_str(message: AgentMessage) -> str:
+# iTerm2 image rendering protocol: https://iterm2.com/documentation-images.html
+def _image_to_iterm(image: Image) -> str:
+    image_data = image.to_base64()
+    return f"\033]1337;File=inline=1:{image_data}\a\n"
+
+
+def _message_to_str(message: AgentMessage, *, render_image_iterm: bool = False) -> str:
    if isinstance(message, MultiModalMessage):
        result: List[str] = []
        for c in message.content:
            if isinstance(c, str):
                result.append(c)
            else:
-                result.append("<image>")
+                if render_image_iterm:
+                    result.append(_image_to_iterm(c))
+                else:
+                    result.append("<image>")
        return "\n".join(result)
    else:
        return f"{message.content}"