2024-12-04 11:28:44 -08:00
import pytest
from autogen_agentchat . agents import CodeExecutorAgent
from autogen_agentchat . base import Response
2025-04-15 11:36:40 +05:30
from autogen_agentchat . messages import (
CodeExecutionEvent ,
CodeGenerationEvent ,
TextMessage ,
)
2024-12-04 11:28:44 -08:00
from autogen_core import CancellationToken
2025-04-22 21:54:05 +05:30
from autogen_core . models import ModelFamily , ModelInfo
2024-12-04 20:21:46 -08:00
from autogen_ext . code_executors . local import LocalCommandLineCodeExecutor
2025-04-15 11:36:40 +05:30
from autogen_ext . models . replay import ReplayChatCompletionClient
2024-12-04 11:28:44 -08:00
@pytest.mark.asyncio
async def test_basic_code_execution ( ) - > None :
""" Test basic code execution """
agent = CodeExecutorAgent ( name = " code_executor " , code_executor = LocalCommandLineCodeExecutor ( ) )
messages = [
TextMessage (
content = """
` ` ` python
import math
number = 42
square_root = math . sqrt ( number )
print ( " %0.3f " % ( square_root , ) )
` ` `
""" .strip(),
source = " assistant " ,
)
]
response = await agent . on_messages ( messages , CancellationToken ( ) )
assert isinstance ( response , Response )
assert isinstance ( response . chat_message , TextMessage )
assert response . chat_message . content . strip ( ) == " 6.481 "
assert response . chat_message . source == " code_executor "
2025-04-15 11:36:40 +05:30
@pytest.mark.asyncio
async def test_code_generation_and_execution_with_model_client ( ) - > None :
"""
Tests the code generation , execution and reflection pipeline using a model client .
"""
language = " python "
code = ' import math \n \n number = 42 \n square_root = math.sqrt(number) \n print( " %0.3f " % (square_root,)) '
model_client = ReplayChatCompletionClient (
[ f " Here is the code to calculate the square root of 42: \n ``` { language } \n { code } ``` " . strip ( ) , " TERMINATE " ]
)
agent = CodeExecutorAgent (
name = " code_executor_agent " , code_executor = LocalCommandLineCodeExecutor ( ) , model_client = model_client
)
messages = [
TextMessage (
content = " Generate python code to calculate the square root of 42 " ,
source = " assistant " ,
)
]
code_generation_event : CodeGenerationEvent | None = None
code_execution_event : CodeExecutionEvent | None = None
response : Response | None = None
async for message in agent . on_messages_stream ( messages , CancellationToken ( ) ) :
if isinstance ( message , CodeGenerationEvent ) :
code_block = message . code_blocks [ 0 ]
assert code_block . code == code , " Code block does not match "
assert code_block . language == language , " Language does not match "
code_generation_event = message
elif isinstance ( message , CodeExecutionEvent ) :
assert message . to_text ( ) . strip ( ) == " 6.481 " , f " Expected ' 6.481 ' , got: { message . to_text ( ) . strip ( ) } "
code_execution_event = message
elif isinstance ( message , Response ) :
assert isinstance (
message . chat_message , TextMessage
) , f " Expected TextMessage, got: { type ( message . chat_message ) } "
assert (
message . chat_message . source == " code_executor_agent "
) , f " Expected source ' code_executor_agent ' , got: { message . chat_message . source } "
response = message
else :
raise AssertionError ( f " Unexpected message type: { type ( message ) } " )
assert code_generation_event is not None , " Code generation event was not received "
assert code_execution_event is not None , " Code execution event was not received "
assert response is not None , " Response was not received "
@pytest.mark.asyncio
async def test_no_code_response_with_model_client ( ) - > None :
"""
Tests agent behavior when the model client responds with non - code content .
"""
model_client = ReplayChatCompletionClient ( [ " The capital of France is Paris. " , " TERMINATE " ] )
agent = CodeExecutorAgent (
name = " code_executor_agent " , code_executor = LocalCommandLineCodeExecutor ( ) , model_client = model_client
)
messages = [
TextMessage (
content = " What is the capital of France? " ,
source = " assistant " ,
)
]
response : Response | None = None
async for message in agent . on_messages_stream ( messages , CancellationToken ( ) ) :
if isinstance ( message , Response ) :
assert isinstance (
message . chat_message , TextMessage
) , f " Expected TextMessage, got: { type ( message . chat_message ) } "
assert (
message . chat_message . source == " code_executor_agent "
) , f " Expected source ' code_executor_agent ' , got: { message . chat_message . source } "
assert (
message . chat_message . content . strip ( ) == " The capital of France is Paris. "
) , f " Expected ' The capital of France is Paris. ' , got: { message . chat_message . content . strip ( ) } "
response = message
else :
raise AssertionError ( f " Unexpected message type: { type ( message ) } " )
assert response is not None , " Response was not received "
2025-04-22 21:54:05 +05:30
@pytest.mark.asyncio
async def test_self_debugging_loop ( ) - > None :
"""
Tests self debugging loop when the model client responds with incorrect code .
"""
language = " python "
incorrect_code_block = """
numbers = [ 10 , 20 , 30 , 40 , 50 ]
mean = sum ( numbers ) / len ( numbers
print ( " The mean is: " , mean )
""" .strip()
incorrect_code_result = """
mean = sum ( numbers ) / len ( numbers
^
SyntaxError : ' ( ' was never closed
""" .strip()
correct_code_block = """
numbers = [ 10 , 20 , 30 , 40 , 50 ]
mean = sum ( numbers ) / len ( numbers )
print ( " The mean is: " , mean )
""" .strip()
correct_code_result = """
The mean is : 30.0
""" .strip()
model_client = ReplayChatCompletionClient (
[
f """
Here is the code to calculate the mean of 10 , 20 , 30 , 40 , 50
` ` ` { language }
{ incorrect_code_block }
` ` `
""" ,
""" { " retry " : " true " , " reason " : " Retry 1: It is a test environment " } """ ,
f """
Here is the updated code to calculate the mean of 10 , 20 , 30 , 40 , 50
` ` ` { language }
{ correct_code_block }
` ` ` """ ,
" Final Response " ,
" TERMINATE " ,
] ,
model_info = ModelInfo (
vision = False ,
function_calling = False ,
json_output = True ,
family = ModelFamily . UNKNOWN ,
structured_output = True ,
) ,
)
agent = CodeExecutorAgent (
name = " code_executor_agent " ,
code_executor = LocalCommandLineCodeExecutor ( ) ,
model_client = model_client ,
max_retries_on_error = 1 ,
)
messages = [
TextMessage (
content = " Calculate the mean of 10, 20, 30, 40, 50. " ,
source = " assistant " ,
)
]
incorrect_code_generation_event : CodeGenerationEvent | None = None
correct_code_generation_event : CodeGenerationEvent | None = None
retry_decision_event : CodeGenerationEvent | None = None
incorrect_code_execution_event : CodeExecutionEvent | None = None
correct_code_execution_event : CodeExecutionEvent | None = None
response : Response | None = None
message_id : int = 0
async for message in agent . on_messages_stream ( messages , CancellationToken ( ) ) :
if isinstance ( message , CodeGenerationEvent ) and message_id == 0 :
# Step 1: First code generation
code_block = message . code_blocks [ 0 ]
assert code_block . code . strip ( ) == incorrect_code_block , " Incorrect code block does not match "
assert code_block . language == language , " Language does not match "
incorrect_code_generation_event = message
elif isinstance ( message , CodeExecutionEvent ) and message_id == 1 :
# Step 2: First code execution
assert (
incorrect_code_result in message . to_text ( ) . strip ( )
) , f " Expected { incorrect_code_result } in execution result, got: { message . to_text ( ) . strip ( ) } "
incorrect_code_execution_event = message
elif isinstance ( message , CodeGenerationEvent ) and message_id == 2 :
# Step 3: Retry generation with proposed correction
retry_response = " Attempt number: 1 \n Proposed correction: Retry 1: It is a test environment "
assert (
message . to_text ( ) . strip ( ) == retry_response
) , f " Expected { retry_response } , got: { message . to_text ( ) . strip ( ) } "
retry_decision_event = message
elif isinstance ( message , CodeGenerationEvent ) and message_id == 3 :
# Step 4: Second retry code generation
code_block = message . code_blocks [ 0 ]
assert code_block . code . strip ( ) == correct_code_block , " Correct code block does not match "
assert code_block . language == language , " Language does not match "
correct_code_generation_event = message
elif isinstance ( message , CodeExecutionEvent ) and message_id == 4 :
# Step 5: Second retry code execution
assert (
message . to_text ( ) . strip ( ) == correct_code_result
) , f " Expected { correct_code_result } in execution result, got: { message . to_text ( ) . strip ( ) } "
correct_code_execution_event = message
elif isinstance ( message , Response ) and message_id == 5 :
# Step 6: Final response
assert isinstance (
message . chat_message , TextMessage
) , f " Expected TextMessage, got: { type ( message . chat_message ) } "
assert (
message . chat_message . source == " code_executor_agent "
) , f " Expected source ' code_executor_agent ' , got: { message . chat_message . source } "
response = message
else :
raise AssertionError ( f " Unexpected message type: { type ( message ) } " )
message_id + = 1
assert incorrect_code_generation_event is not None , " Incorrect code generation event was not received "
assert incorrect_code_execution_event is not None , " Incorrect code execution event was not received "
assert retry_decision_event is not None , " Retry decision event was not received "
assert correct_code_generation_event is not None , " Correct code generation event was not received "
assert correct_code_execution_event is not None , " Correct code execution event was not received "
assert response is not None , " Response was not received "
2024-12-04 11:28:44 -08:00
@pytest.mark.asyncio
async def test_code_execution_error ( ) - > None :
""" Test basic code execution """
agent = CodeExecutorAgent ( name = " code_executor " , code_executor = LocalCommandLineCodeExecutor ( ) )
messages = [
TextMessage (
content = """
` ` ` python
import math
number = - 1.0
square_root = math . sqrt ( number )
print ( " %0.3f " % ( square_root , ) )
` ` `
""" .strip(),
source = " assistant " ,
)
]
response = await agent . on_messages ( messages , CancellationToken ( ) )
assert isinstance ( response , Response )
assert isinstance ( response . chat_message , TextMessage )
assert " The script ran, then exited with an error (POSIX exit code: 1) " in response . chat_message . content
assert " ValueError: math domain error " in response . chat_message . content
@pytest.mark.asyncio
async def test_code_execution_no_output ( ) - > None :
""" Test basic code execution """
agent = CodeExecutorAgent ( name = " code_executor " , code_executor = LocalCommandLineCodeExecutor ( ) )
messages = [
TextMessage (
content = """
` ` ` python
import math
number = 42
square_root = math . sqrt ( number )
` ` `
""" .strip(),
source = " assistant " ,
)
]
response = await agent . on_messages ( messages , CancellationToken ( ) )
assert isinstance ( response , Response )
assert isinstance ( response . chat_message , TextMessage )
assert (
" The script ran but produced no output to console. The POSIX exit code was: 0. If you were expecting output, consider revising the script to ensure content is printed to stdout. "
in response . chat_message . content
)
@pytest.mark.asyncio
async def test_code_execution_no_block ( ) - > None :
""" Test basic code execution """
agent = CodeExecutorAgent ( name = " code_executor " , code_executor = LocalCommandLineCodeExecutor ( ) )
messages = [
TextMessage (
content = """
import math
number = 42
square_root = math . sqrt ( number )
""" .strip(),
source = " assistant " ,
)
]
response = await agent . on_messages ( messages , CancellationToken ( ) )
assert isinstance ( response , Response )
assert isinstance ( response . chat_message , TextMessage )
assert (
" No code blocks found in the thread. Please provide at least one markdown-encoded code block "
in response . chat_message . content
)
@pytest.mark.asyncio
async def test_code_execution_multiple_blocks ( ) - > None :
""" Test basic code execution """
agent = CodeExecutorAgent ( name = " code_executor " , code_executor = LocalCommandLineCodeExecutor ( ) )
messages = [
TextMessage (
content = """
` ` ` python
import math
number = 42
square_root = math . sqrt ( number )
print ( " %0.3f " % ( square_root , ) )
` ` `
And also :
` ` ` python
import time
print ( f " The current time is: { time . time ( ) } " )
` ` `
And this should result in an error :
` ` ` python
import math
number = - 1.0
square_root = math . sqrt ( number )
print ( " %0.3f " % ( square_root , ) )
` ` `
""" .strip(),
source = " assistant " ,
)
]
response = await agent . on_messages ( messages , CancellationToken ( ) )
assert isinstance ( response , Response )
assert isinstance ( response . chat_message , TextMessage )
assert " 6.481 " in response . chat_message . content
assert " The current time is: " in response . chat_message . content
assert " The script ran, then exited with an error (POSIX exit code: 1) " in response . chat_message . content
assert " ValueError: math domain error " in response . chat_message . content
2025-03-01 07:46:30 -08:00
@pytest.mark.asyncio
async def test_code_execution_agent_serialization ( ) - > None :
""" Test agent config serialization """
agent = CodeExecutorAgent ( name = " code_executor " , code_executor = LocalCommandLineCodeExecutor ( ) )
# Serialize and deserialize the agent
serialized_agent = agent . dump_component ( )
deserialized_agent = CodeExecutorAgent . load_component ( serialized_agent )
assert isinstance ( deserialized_agent , CodeExecutorAgent )
assert deserialized_agent . name == " code_executor "
2025-04-15 11:36:40 +05:30
@pytest.mark.asyncio
async def test_code_execution_agent_serialization_with_model_client ( ) - > None :
""" Test agent config serialization """
model_client = ReplayChatCompletionClient ( [ " The capital of France is Paris. " , " TERMINATE " ] )
agent = CodeExecutorAgent (
name = " code_executor_agent " , code_executor = LocalCommandLineCodeExecutor ( ) , model_client = model_client
)
# Serialize and deserialize the agent
serialized_agent = agent . dump_component ( )
deserialized_agent = CodeExecutorAgent . load_component ( serialized_agent )
assert isinstance ( deserialized_agent , CodeExecutorAgent )
assert deserialized_agent . name == " code_executor_agent "
assert deserialized_agent . _model_client is not None # type: ignore