autogen/test/coding/test_commandline_code_executor.py

import sys
import tempfile
import pytest
from autogen.agentchat.conversable_agent import ConversableAgent
from autogen.coding.base import CodeBlock, CodeExecutor
from autogen.coding.factory import CodeExecutorFactory
from autogen.coding.local_commandline_code_executor import LocalCommandlineCodeExecutor
from autogen.oai.openai_utils import config_list_from_json

from conftest import MOCK_OPEN_AI_API_KEY, skip_openai


def test_create() -> None:
    config = {"executor": "commandline-local"}
    executor = CodeExecutorFactory.create(config)
    assert isinstance(executor, LocalCommandlineCodeExecutor)

    config = {"executor": LocalCommandlineCodeExecutor()}
    executor = CodeExecutorFactory.create(config)
    assert executor is config["executor"]


def test_local_commandline_executor_init() -> None:
    executor = LocalCommandlineCodeExecutor(timeout=10, work_dir=".")
    assert executor.timeout == 10 and executor.work_dir == "."

    # Try invalid working directory.
    with pytest.raises(ValueError, match="Working directory .* does not exist."):
        executor = LocalCommandlineCodeExecutor(timeout=111, work_dir="/invalid/directory")


def test_local_commandline_executor_execute_code() -> None:
    with tempfile.TemporaryDirectory() as temp_dir:
        executor = LocalCommandlineCodeExecutor(work_dir=temp_dir)
        _test_execute_code(executor=executor)


def _test_execute_code(executor: CodeExecutor) -> None:
    # Test single code block.
    code_blocks = [CodeBlock(code="import sys; print('hello world!')", language="python")]
    code_result = executor.execute_code_blocks(code_blocks)
    assert code_result.exit_code == 0 and "hello world!" in code_result.output and code_result.code_file is not None

    # Test multiple code blocks.
    code_blocks = [
        CodeBlock(code="import sys; print('hello world!')", language="python"),
        CodeBlock(code="a = 100 + 100; print(a)", language="python"),
    ]
    code_result = executor.execute_code_blocks(code_blocks)
    assert (
        code_result.exit_code == 0
        and "hello world!" in code_result.output
        and "200" in code_result.output
        and code_result.code_file is not None
    )

    # Test bash script.
    if sys.platform not in ["win32"]:
        code_blocks = [CodeBlock(code="echo 'hello world!'", language="bash")]
        code_result = executor.execute_code_blocks(code_blocks)
        assert code_result.exit_code == 0 and "hello world!" in code_result.output and code_result.code_file is not None

    # Test running code.
    file_lines = ["import sys", "print('hello world!')", "a = 100 + 100", "print(a)"]
    code_blocks = [CodeBlock(code="\n".join(file_lines), language="python")]
    code_result = executor.execute_code_blocks(code_blocks)
    assert (
        code_result.exit_code == 0
        and "hello world!" in code_result.output
        and "200" in code_result.output
        and code_result.code_file is not None
    )

    # Check saved code file.
    with open(code_result.code_file) as f:
        code_lines = f.readlines()
        for file_line, code_line in zip(file_lines, code_lines):
            assert file_line.strip() == code_line.strip()


@pytest.mark.skipif(sys.platform in ["win32"], reason="do not run on windows")
def test_local_commandline_code_executor_timeout() -> None:
    with tempfile.TemporaryDirectory() as temp_dir:
        executor = LocalCommandlineCodeExecutor(timeout=1, work_dir=temp_dir)
        _test_timeout(executor)


def _test_timeout(executor: CodeExecutor) -> None:
    code_blocks = [CodeBlock(code="import time; time.sleep(10); print('hello world!')", language="python")]
    code_result = executor.execute_code_blocks(code_blocks)
    assert code_result.exit_code and "Timeout" in code_result.output


def test_local_commandline_code_executor_restart() -> None:
    executor = LocalCommandlineCodeExecutor()
    _test_restart(executor)


def _test_restart(executor: CodeExecutor) -> None:
    # Check warning.
    with pytest.warns(UserWarning, match=r".*No action is taken."):
        executor.restart()


@pytest.mark.skipif(skip_openai, reason="requested to skip openai tests")
def test_local_commandline_executor_conversable_agent_capability() -> None:
    with tempfile.TemporaryDirectory() as temp_dir:
        executor = LocalCommandlineCodeExecutor(work_dir=temp_dir)
        _test_conversable_agent_capability(executor=executor)


def _test_conversable_agent_capability(executor: CodeExecutor) -> None:
    KEY_LOC = "notebook"
    OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
    config_list = config_list_from_json(
        OAI_CONFIG_LIST,
        file_location=KEY_LOC,
        filter_dict={
            "model": {
                "gpt-3.5-turbo",
                "gpt-35-turbo",
            },
        },
    )
    llm_config = {"config_list": config_list}
    agent = ConversableAgent(
        "coding_agent",
        llm_config=llm_config,
        code_execution_config=False,
    )
    executor.user_capability.add_to_agent(agent)

    # Test updated system prompt.
    assert executor.DEFAULT_SYSTEM_MESSAGE_UPDATE in agent.system_message

    # Test code generation.
    reply = agent.generate_reply(
        [{"role": "user", "content": "write a python script to print 'hello world' to the console"}],
        sender=ConversableAgent(name="user", llm_config=False, code_execution_config=False),
    )

    # Test code extraction.
    code_blocks = executor.code_extractor.extract_code_blocks(reply)  # type: ignore[arg-type]
    assert len(code_blocks) == 1 and code_blocks[0].language == "python"

    # Test code execution.
    code_result = executor.execute_code_blocks(code_blocks)
    assert code_result.exit_code == 0 and "hello world" in code_result.output.lower().replace(",", "")


def test_local_commandline_executor_conversable_agent_code_execution() -> None:
    with tempfile.TemporaryDirectory() as temp_dir:
        executor = LocalCommandlineCodeExecutor(work_dir=temp_dir)
        with pytest.MonkeyPatch.context() as mp:
            mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY)
            _test_conversable_agent_code_execution(executor)


def _test_conversable_agent_code_execution(executor: CodeExecutor) -> None:
    agent = ConversableAgent(
        "user_proxy",
        code_execution_config={"executor": executor},
        llm_config=False,
    )

    assert agent.code_executor is executor

    message = """
    Example:
    ```python
    print("hello extract code")
    ```
    """

    reply = agent.generate_reply(
        [{"role": "user", "content": message}],
        sender=ConversableAgent("user", llm_config=False, code_execution_config=False),
    )
    assert "hello extract code" in reply  # type: ignore[operator]
Code executors (#1405) * code executor * test * revert to main conversable agent * prepare for pr * kernel * run open ai tests only when it's out of draft status * update workflow file * revert workflow changes * ipython executor * check kernel installed; fix tests * fix tests * fix tests * update system prompt * Update notebook, more tests * notebook * raise instead of return None * allow user provided code executor. * fixing types * wip * refactoring * polishing * fixed failing tests * resolved merge conflict * fixing failing test * wip * local command line executor and embedded ipython executor * revert notebook * fix format * fix merged error * fix lmm test * fix lmm test * move warning * name and description should be part of the agent protocol, reset is not as it is only used for ConversableAgent; removing accidentally commited file * version for dependency * Update autogen/agentchat/conversable_agent.py Co-authored-by: Jack Gerrits <jackgerrits@users.noreply.github.com> * ordering of protocol * description * fix tests * make ipython executor dependency optional * update document optional dependencies * Remove exclude from Agent protocol * Make ConversableAgent consistent with Agent * fix tests * add doc string * add doc string * fix notebook * fix interface * merge and update agents * disable config usage in reply function * description field setter * customize system message update * update doc --------- Co-authored-by: Davor Runje <davor@airt.ai> Co-authored-by: Jack Gerrits <jackgerrits@users.noreply.github.com> Co-authored-by: Aaron <aaronlaptop12@hotmail.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2024-02-09 20:52:16 -08:00			`import sys`
			`import tempfile`
			`import pytest`
			`from autogen.agentchat.conversable_agent import ConversableAgent`
			`from autogen.coding.base import CodeBlock, CodeExecutor`
			`from autogen.coding.factory import CodeExecutorFactory`
			`from autogen.coding.local_commandline_code_executor import LocalCommandlineCodeExecutor`
			`from autogen.oai.openai_utils import config_list_from_json`

Validate the OpenAI API key format (#1635) * Validate the OpenAI API key format Increase the amount of internal validation for OpenAI API keys. The intent is to shorten the debugging loop in case of typos. The changes do not add validation for Azure OpenAI API keys. * Add the validation in `__init__` of `OpenAIClient`. * Introduce the `MOCK_OPEN_AI_API_KEY` constant for testing. * Add unit test coverage for the `is_valid_api_key` function. * Validate the OpenAI API key format Increase the amount of internal validation for OpenAI API keys. The intent is to shorten the debugging loop in case of typos. The changes do not add validation for Azure OpenAI API keys. * Add the validation in `__init__` of `OpenAIClient`. * Introduce the `MOCK_OPEN_AI_API_KEY` constant for testing. Add unit test coverage for the `is_valid_api_key` function. Log a warning when register a default client fails. * Validate the OpenAI API key format Increase the amount of internal validation for OpenAI API keys. The intent is to shorten the debugging loop in case of typos. The changes do not add validation for Azure OpenAI API keys. * Add the validation in `__init__` of `OpenAIClient`. We'll log a warning when the OpenAI API key isn't valid. * Introduce the `MOCK_OPEN_AI_API_KEY` constant for testing. * Add unit test coverage for the `is_valid_api_key` function. * Check for OpenAI base_url before API key validation --------- Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2024-02-14 10:51:38 -08:00			`from conftest import MOCK_OPEN_AI_API_KEY, skip_openai`
Code executors (#1405) * code executor * test * revert to main conversable agent * prepare for pr * kernel * run open ai tests only when it's out of draft status * update workflow file * revert workflow changes * ipython executor * check kernel installed; fix tests * fix tests * fix tests * update system prompt * Update notebook, more tests * notebook * raise instead of return None * allow user provided code executor. * fixing types * wip * refactoring * polishing * fixed failing tests * resolved merge conflict * fixing failing test * wip * local command line executor and embedded ipython executor * revert notebook * fix format * fix merged error * fix lmm test * fix lmm test * move warning * name and description should be part of the agent protocol, reset is not as it is only used for ConversableAgent; removing accidentally commited file * version for dependency * Update autogen/agentchat/conversable_agent.py Co-authored-by: Jack Gerrits <jackgerrits@users.noreply.github.com> * ordering of protocol * description * fix tests * make ipython executor dependency optional * update document optional dependencies * Remove exclude from Agent protocol * Make ConversableAgent consistent with Agent * fix tests * add doc string * add doc string * fix notebook * fix interface * merge and update agents * disable config usage in reply function * description field setter * customize system message update * update doc --------- Co-authored-by: Davor Runje <davor@airt.ai> Co-authored-by: Jack Gerrits <jackgerrits@users.noreply.github.com> Co-authored-by: Aaron <aaronlaptop12@hotmail.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2024-02-09 20:52:16 -08:00

			`def test_create() -> None:`
			`config = {"executor": "commandline-local"}`
			`executor = CodeExecutorFactory.create(config)`
			`assert isinstance(executor, LocalCommandlineCodeExecutor)`

			`config = {"executor": LocalCommandlineCodeExecutor()}`
			`executor = CodeExecutorFactory.create(config)`
			`assert executor is config["executor"]`


			`def test_local_commandline_executor_init() -> None:`
			`executor = LocalCommandlineCodeExecutor(timeout=10, work_dir=".")`
			`assert executor.timeout == 10 and executor.work_dir == "."`

			`# Try invalid working directory.`
			`with pytest.raises(ValueError, match="Working directory .* does not exist."):`
			`executor = LocalCommandlineCodeExecutor(timeout=111, work_dir="/invalid/directory")`


			`def test_local_commandline_executor_execute_code() -> None:`
			`with tempfile.TemporaryDirectory() as temp_dir:`
			`executor = LocalCommandlineCodeExecutor(work_dir=temp_dir)`
			`_test_execute_code(executor=executor)`


			`def _test_execute_code(executor: CodeExecutor) -> None:`
			`# Test single code block.`
			`code_blocks = [CodeBlock(code="import sys; print('hello world!')", language="python")]`
			`code_result = executor.execute_code_blocks(code_blocks)`
			`assert code_result.exit_code == 0 and "hello world!" in code_result.output and code_result.code_file is not None`

			`# Test multiple code blocks.`
			`code_blocks = [`
			`CodeBlock(code="import sys; print('hello world!')", language="python"),`
			`CodeBlock(code="a = 100 + 100; print(a)", language="python"),`
			`]`
			`code_result = executor.execute_code_blocks(code_blocks)`
			`assert (`
			`code_result.exit_code == 0`
			`and "hello world!" in code_result.output`
			`and "200" in code_result.output`
			`and code_result.code_file is not None`
			`)`

			`# Test bash script.`
			`if sys.platform not in ["win32"]:`
			`code_blocks = [CodeBlock(code="echo 'hello world!'", language="bash")]`
			`code_result = executor.execute_code_blocks(code_blocks)`
			`assert code_result.exit_code == 0 and "hello world!" in code_result.output and code_result.code_file is not None`

			`# Test running code.`
			`file_lines = ["import sys", "print('hello world!')", "a = 100 + 100", "print(a)"]`
			`code_blocks = [CodeBlock(code="\n".join(file_lines), language="python")]`
			`code_result = executor.execute_code_blocks(code_blocks)`
			`assert (`
			`code_result.exit_code == 0`
			`and "hello world!" in code_result.output`
			`and "200" in code_result.output`
			`and code_result.code_file is not None`
			`)`

			`# Check saved code file.`
			`with open(code_result.code_file) as f:`
			`code_lines = f.readlines()`
			`for file_line, code_line in zip(file_lines, code_lines):`
			`assert file_line.strip() == code_line.strip()`


			`@pytest.mark.skipif(sys.platform in ["win32"], reason="do not run on windows")`
			`def test_local_commandline_code_executor_timeout() -> None:`
			`with tempfile.TemporaryDirectory() as temp_dir:`
			`executor = LocalCommandlineCodeExecutor(timeout=1, work_dir=temp_dir)`
			`_test_timeout(executor)`


			`def _test_timeout(executor: CodeExecutor) -> None:`
			`code_blocks = [CodeBlock(code="import time; time.sleep(10); print('hello world!')", language="python")]`
			`code_result = executor.execute_code_blocks(code_blocks)`
			`assert code_result.exit_code and "Timeout" in code_result.output`


			`def test_local_commandline_code_executor_restart() -> None:`
			`executor = LocalCommandlineCodeExecutor()`
			`_test_restart(executor)`


			`def _test_restart(executor: CodeExecutor) -> None:`
			`# Check warning.`
			`with pytest.warns(UserWarning, match=r".*No action is taken."):`
			`executor.restart()`


			`@pytest.mark.skipif(skip_openai, reason="requested to skip openai tests")`
			`def test_local_commandline_executor_conversable_agent_capability() -> None:`
			`with tempfile.TemporaryDirectory() as temp_dir:`
			`executor = LocalCommandlineCodeExecutor(work_dir=temp_dir)`
			`_test_conversable_agent_capability(executor=executor)`


			`def _test_conversable_agent_capability(executor: CodeExecutor) -> None:`
			`KEY_LOC = "notebook"`
			`OAI_CONFIG_LIST = "OAI_CONFIG_LIST"`
			`config_list = config_list_from_json(`
			`OAI_CONFIG_LIST,`
			`file_location=KEY_LOC,`
			`filter_dict={`
			`"model": {`
			`"gpt-3.5-turbo",`
			`"gpt-35-turbo",`
			`},`
			`},`
			`)`
			`llm_config = {"config_list": config_list}`
			`agent = ConversableAgent(`
			`"coding_agent",`
			`llm_config=llm_config,`
			`code_execution_config=False,`
			`)`
			`executor.user_capability.add_to_agent(agent)`

			`# Test updated system prompt.`
			`assert executor.DEFAULT_SYSTEM_MESSAGE_UPDATE in agent.system_message`

			`# Test code generation.`
			`reply = agent.generate_reply(`
			`[{"role": "user", "content": "write a python script to print 'hello world' to the console"}],`
			`sender=ConversableAgent(name="user", llm_config=False, code_execution_config=False),`
			`)`

			`# Test code extraction.`
			`code_blocks = executor.code_extractor.extract_code_blocks(reply) # type: ignore[arg-type]`
			`assert len(code_blocks) == 1 and code_blocks[0].language == "python"`

			`# Test code execution.`
			`code_result = executor.execute_code_blocks(code_blocks)`
			`assert code_result.exit_code == 0 and "hello world" in code_result.output.lower().replace(",", "")`


			`def test_local_commandline_executor_conversable_agent_code_execution() -> None:`
			`with tempfile.TemporaryDirectory() as temp_dir:`
			`executor = LocalCommandlineCodeExecutor(work_dir=temp_dir)`
			`with pytest.MonkeyPatch.context() as mp:`
Validate the OpenAI API key format (#1635) * Validate the OpenAI API key format Increase the amount of internal validation for OpenAI API keys. The intent is to shorten the debugging loop in case of typos. The changes do not add validation for Azure OpenAI API keys. * Add the validation in `__init__` of `OpenAIClient`. * Introduce the `MOCK_OPEN_AI_API_KEY` constant for testing. * Add unit test coverage for the `is_valid_api_key` function. * Validate the OpenAI API key format Increase the amount of internal validation for OpenAI API keys. The intent is to shorten the debugging loop in case of typos. The changes do not add validation for Azure OpenAI API keys. * Add the validation in `__init__` of `OpenAIClient`. * Introduce the `MOCK_OPEN_AI_API_KEY` constant for testing. Add unit test coverage for the `is_valid_api_key` function. Log a warning when register a default client fails. * Validate the OpenAI API key format Increase the amount of internal validation for OpenAI API keys. The intent is to shorten the debugging loop in case of typos. The changes do not add validation for Azure OpenAI API keys. * Add the validation in `__init__` of `OpenAIClient`. We'll log a warning when the OpenAI API key isn't valid. * Introduce the `MOCK_OPEN_AI_API_KEY` constant for testing. * Add unit test coverage for the `is_valid_api_key` function. * Check for OpenAI base_url before API key validation --------- Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2024-02-14 10:51:38 -08:00			`mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY)`
Code executors (#1405) * code executor * test * revert to main conversable agent * prepare for pr * kernel * run open ai tests only when it's out of draft status * update workflow file * revert workflow changes * ipython executor * check kernel installed; fix tests * fix tests * fix tests * update system prompt * Update notebook, more tests * notebook * raise instead of return None * allow user provided code executor. * fixing types * wip * refactoring * polishing * fixed failing tests * resolved merge conflict * fixing failing test * wip * local command line executor and embedded ipython executor * revert notebook * fix format * fix merged error * fix lmm test * fix lmm test * move warning * name and description should be part of the agent protocol, reset is not as it is only used for ConversableAgent; removing accidentally commited file * version for dependency * Update autogen/agentchat/conversable_agent.py Co-authored-by: Jack Gerrits <jackgerrits@users.noreply.github.com> * ordering of protocol * description * fix tests * make ipython executor dependency optional * update document optional dependencies * Remove exclude from Agent protocol * Make ConversableAgent consistent with Agent * fix tests * add doc string * add doc string * fix notebook * fix interface * merge and update agents * disable config usage in reply function * description field setter * customize system message update * update doc --------- Co-authored-by: Davor Runje <davor@airt.ai> Co-authored-by: Jack Gerrits <jackgerrits@users.noreply.github.com> Co-authored-by: Aaron <aaronlaptop12@hotmail.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2024-02-09 20:52:16 -08:00			`_test_conversable_agent_code_execution(executor)`


			`def _test_conversable_agent_code_execution(executor: CodeExecutor) -> None:`
			`agent = ConversableAgent(`
			`"user_proxy",`
			`code_execution_config={"executor": executor},`
			`llm_config=False,`
			`)`

			`assert agent.code_executor is executor`

			`message = """`
			`Example:`
			```python
			`print("hello extract code")`
			```
			`"""`

			`reply = agent.generate_reply(`
			`[{"role": "user", "content": message}],`
			`sender=ConversableAgent("user", llm_config=False, code_execution_config=False),`
			`)`
			`assert "hello extract code" in reply # type: ignore[operator]`