autogen/python/packages/autogen-agentchat/tests/test_society_of_mind_agent.py
EeS 39321266f9
Improve SocietyOfMindAgent message handling (#6142)
Please refer to #6123  for full context.

That issue outlines several design and behavioral problems with
`SocietyOfMindAgent`.
This DRAFT PR focuses on resolving the most critical and broken
behaviors first.

Here is the error list
🔍 SocietyOfMindAgent: Design Issues and Historical Comparison (v0.2 vs
v0.4+)

###  P1–P4 Regression Issue Table (Updated with Fixes in PR #6142)

| ID | Description | Current v0.4+ Issue | Resolution in PR #6142 | Was
it a problem in v0.2? | Notes |

|-----|-------------|----------------------|--------------------------|----------------------------|-------|
| **P1** | `inner_messages` leaks into outer team termination evaluation
| `Response.inner_messages` is appended to the outer team's
`_message_thread`, affecting termination conditions. Violates
encapsulation. |  `inner_messages` is excluded from `_message_thread`,
avoiding contamination of outer termination logic. |  No | Structural
boundary is now enforced |
| **P2** | Inner team does not execute when outer message history is
empty | In chained executions, if no new outer message exists, no task
is created and the inner team is skipped entirely |  Detects absence of
new outer message and reuses the previous task, passing it via a handoff
message. This ensures the inner team always receives a valid task to
execute |  No | The issue was silent task omission, not summary
failure. Summary succeeds as a downstream effect |
| **P3** | Summary LLM prompt is built from external input only | Prompt
is constructed using external message history, ignoring internal
reasoning |  Prompt construction now uses
`final_response.inner_messages`, restoring internal reasoning as the
source of summarization |  No | Matches v0.2 internal monologue
behavior |
| **P4** | External input is included in summary prompt (possibly
incorrectly) | Outer messages are used in the final LLM summarization
prompt |  Resolved via the same fix as P3; outer messages are no longer
used for summary |  No | Redundant with P3, now fully addressed |


<!-- Thank you for your contribution! Please review
https://microsoft.github.io/autogen/docs/Contribute before opening a
pull request. -->

<!-- Please add a reviewer to the assignee section when you create a PR.
If you don't have the access to it, we will shortly find a reviewer and
assign them to your PR. -->

## Why are these changes needed?

<!-- Please give a short summary of the change and the problem this
solves. -->

## Related issue number
resolve #6123 
Blocked #6168 (Sometimes SoMA send last whitespace message)
related #6187
<!-- For example: "Closes #1234" -->

## Checks

- [ ] I've included any doc changes needed for
<https://microsoft.github.io/autogen/>. See
<https://github.com/microsoft/autogen/blob/main/CONTRIBUTING.md> to
build and test documentation locally.
- [ ] I've added tests (if relevant) corresponding to the changes
introduced in this PR.
- [ ] I've made sure all auto checks have passed.

---------

Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
2025-04-04 13:50:50 -07:00

116 lines
5.9 KiB
Python

from typing import AsyncGenerator
import pytest
import pytest_asyncio
from autogen_agentchat.agents import AssistantAgent, SocietyOfMindAgent
from autogen_agentchat.conditions import MaxMessageTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_core import AgentRuntime, SingleThreadedAgentRuntime
from autogen_ext.models.replay import ReplayChatCompletionClient
@pytest_asyncio.fixture(params=["single_threaded", "embedded"]) # type: ignore
async def runtime(request: pytest.FixtureRequest) -> AsyncGenerator[AgentRuntime | None, None]:
if request.param == "single_threaded":
runtime = SingleThreadedAgentRuntime()
runtime.start()
yield runtime
await runtime.stop()
elif request.param == "embedded":
yield None
@pytest.mark.asyncio
async def test_society_of_mind_agent(runtime: AgentRuntime | None) -> None:
model_client = ReplayChatCompletionClient(
["1", "2", "3"],
)
agent1 = AssistantAgent("assistant1", model_client=model_client, system_message="You are a helpful assistant.")
agent2 = AssistantAgent("assistant2", model_client=model_client, system_message="You are a helpful assistant.")
inner_termination = MaxMessageTermination(3)
inner_team = RoundRobinGroupChat([agent1, agent2], termination_condition=inner_termination, runtime=runtime)
society_of_mind_agent = SocietyOfMindAgent("society_of_mind", team=inner_team, model_client=model_client)
response = await society_of_mind_agent.run(task="Count to 10.")
assert len(response.messages) == 2
assert response.messages[0].source == "user"
assert response.messages[1].source == "society_of_mind"
# Test save and load state.
state = await society_of_mind_agent.save_state()
assert state is not None
agent1 = AssistantAgent("assistant1", model_client=model_client, system_message="You are a helpful assistant.")
agent2 = AssistantAgent("assistant2", model_client=model_client, system_message="You are a helpful assistant.")
inner_termination = MaxMessageTermination(3)
inner_team = RoundRobinGroupChat([agent1, agent2], termination_condition=inner_termination, runtime=runtime)
society_of_mind_agent2 = SocietyOfMindAgent("society_of_mind", team=inner_team, model_client=model_client)
await society_of_mind_agent2.load_state(state)
state2 = await society_of_mind_agent2.save_state()
assert state == state2
# Test serialization.
soc_agent_config = society_of_mind_agent.dump_component()
assert soc_agent_config.provider == "autogen_agentchat.agents.SocietyOfMindAgent"
# Test deserialization.
loaded_soc_agent = SocietyOfMindAgent.load_component(soc_agent_config)
assert isinstance(loaded_soc_agent, SocietyOfMindAgent)
assert loaded_soc_agent.name == "society_of_mind"
@pytest.mark.asyncio
async def test_society_of_mind_agent_empty_messges(runtime: AgentRuntime | None) -> None:
model_client = ReplayChatCompletionClient(
["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
)
agent1 = AssistantAgent("assistant1", model_client=model_client, system_message="You are a helpful assistant.")
agent2 = AssistantAgent("assistant2", model_client=model_client, system_message="You are a helpful assistant.")
inner_termination = MaxMessageTermination(3)
inner_team = RoundRobinGroupChat([agent1, agent2], termination_condition=inner_termination, runtime=runtime)
society_of_mind_agent = SocietyOfMindAgent("society_of_mind", team=inner_team, model_client=model_client)
response = await society_of_mind_agent.run()
assert len(response.messages) == 1
assert response.messages[0].source == "society_of_mind"
@pytest.mark.asyncio
async def test_society_of_mind_agent_no_response(runtime: AgentRuntime | None) -> None:
model_client = ReplayChatCompletionClient(
["1", "2", "3"],
)
agent1 = AssistantAgent("assistant1", model_client=model_client, system_message="You are a helpful assistant.")
agent2 = AssistantAgent("assistant2", model_client=model_client, system_message="You are a helpful assistant.")
inner_termination = MaxMessageTermination(1) # Set to 1 to force no response.
inner_team = RoundRobinGroupChat([agent1, agent2], termination_condition=inner_termination, runtime=runtime)
society_of_mind_agent = SocietyOfMindAgent("society_of_mind", team=inner_team, model_client=model_client)
response = await society_of_mind_agent.run(task="Count to 10.")
assert len(response.messages) == 2
assert response.messages[0].source == "user"
assert response.messages[1].source == "society_of_mind"
assert response.messages[1].to_text() == "No response."
@pytest.mark.asyncio
async def test_society_of_mind_agent_multiple_rounds(runtime: AgentRuntime | None) -> None:
model_client = ReplayChatCompletionClient(
["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
)
agent1 = AssistantAgent("assistant1", model_client=model_client, system_message="You are a helpful assistant.")
agent2 = AssistantAgent("assistant2", model_client=model_client, system_message="You are a helpful assistant.")
inner_termination = MaxMessageTermination(3)
inner_team = RoundRobinGroupChat([agent1, agent2], termination_condition=inner_termination, runtime=runtime)
society_of_mind_agent = SocietyOfMindAgent("society_of_mind", team=inner_team, model_client=model_client)
response = await society_of_mind_agent.run(task="Count to 10.")
assert len(response.messages) == 2
assert response.messages[0].source == "user"
assert response.messages[1].source == "society_of_mind"
# Continue.
response = await society_of_mind_agent.run()
assert len(response.messages) == 1
assert response.messages[0].source == "society_of_mind"
# Continue.
response = await society_of_mind_agent.run()
assert len(response.messages) == 1
assert response.messages[0].source == "society_of_mind"