mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-17 18:44:47 +00:00
Add support for HTML, CSS and Javascript in LocalCommandLineCodeExecutor with Mapping executor/saver #2303 (#2464)
* Add support for HTML, CSS and Javascript in LocalCommandLineCodeExecutor * init branch * init branch * feat: test code execution added * fix: test update * fix: test * fix: policy test * feat: default policy --------- Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
This commit is contained in:
parent
ebde196d6b
commit
31fe75ad0e
@ -6,7 +6,7 @@ import warnings
|
|||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from string import Template
|
from string import Template
|
||||||
from typing import Any, Callable, ClassVar, List, TypeVar, Union, cast
|
from typing import Any, Callable, ClassVar, Dict, List, Optional, Union
|
||||||
|
|
||||||
from typing_extensions import ParamSpec
|
from typing_extensions import ParamSpec
|
||||||
|
|
||||||
@ -28,7 +28,31 @@ A = ParamSpec("A")
|
|||||||
|
|
||||||
|
|
||||||
class LocalCommandLineCodeExecutor(CodeExecutor):
|
class LocalCommandLineCodeExecutor(CodeExecutor):
|
||||||
SUPPORTED_LANGUAGES: ClassVar[List[str]] = ["bash", "shell", "sh", "pwsh", "powershell", "ps1", "python"]
|
SUPPORTED_LANGUAGES: ClassVar[List[str]] = [
|
||||||
|
"bash",
|
||||||
|
"shell",
|
||||||
|
"sh",
|
||||||
|
"pwsh",
|
||||||
|
"powershell",
|
||||||
|
"ps1",
|
||||||
|
"python",
|
||||||
|
"javascript",
|
||||||
|
"html",
|
||||||
|
"css",
|
||||||
|
]
|
||||||
|
DEFAULT_EXECUTION_POLICY: ClassVar[Dict[str, bool]] = {
|
||||||
|
"bash": True,
|
||||||
|
"shell": True,
|
||||||
|
"sh": True,
|
||||||
|
"pwsh": True,
|
||||||
|
"powershell": True,
|
||||||
|
"ps1": True,
|
||||||
|
"python": True,
|
||||||
|
"javascript": False,
|
||||||
|
"html": False,
|
||||||
|
"css": False,
|
||||||
|
}
|
||||||
|
|
||||||
FUNCTION_PROMPT_TEMPLATE: ClassVar[
|
FUNCTION_PROMPT_TEMPLATE: ClassVar[
|
||||||
str
|
str
|
||||||
] = """You have access to the following user defined functions. They can be accessed from the module called `$module_name` by their function names.
|
] = """You have access to the following user defined functions. They can be accessed from the module called `$module_name` by their function names.
|
||||||
@ -43,29 +67,27 @@ $functions"""
|
|||||||
work_dir: Union[Path, str] = Path("."),
|
work_dir: Union[Path, str] = Path("."),
|
||||||
functions: List[Union[FunctionWithRequirements[Any, A], Callable[..., Any], FunctionWithRequirementsStr]] = [],
|
functions: List[Union[FunctionWithRequirements[Any, A], Callable[..., Any], FunctionWithRequirementsStr]] = [],
|
||||||
functions_module: str = "functions",
|
functions_module: str = "functions",
|
||||||
|
execution_policies: Optional[Dict[str, bool]] = None,
|
||||||
):
|
):
|
||||||
"""(Experimental) A code executor class that executes code through a local command line
|
"""(Experimental) A code executor class that executes or saves LLM generated code a local command line
|
||||||
environment.
|
environment.
|
||||||
|
|
||||||
**This will execute LLM generated code on the local machine.**
|
**This will execute or save LLM generated code on the local machine.**
|
||||||
|
|
||||||
Each code block is saved as a file and executed in a separate process in
|
Each code block is saved as a file in the working directory. Depending on the execution policy,
|
||||||
the working directory, and a unique file is generated and saved in the
|
the code may be executed in a separate process.
|
||||||
working directory for each code block.
|
The code blocks are executed or save in the order they are received.
|
||||||
The code blocks are executed in the order they are received.
|
Command line code is sanitized against a list of dangerous commands to prevent self-destructive commands from being executed,
|
||||||
Command line code is sanitized using regular expression match against a list of dangerous commands in order to prevent self-destructive
|
which could potentially affect the user's environment. Supported languages include Python, shell scripts (bash, shell, sh),
|
||||||
commands from being executed which may potentially affect the users environment.
|
PowerShell (pwsh, powershell, ps1), HTML, CSS, and JavaScript.
|
||||||
Currently the only supported languages is Python and shell scripts.
|
Execution policies determine whether each language's code blocks are executed or saved only.
|
||||||
For Python code, use the language "python" for the code block.
|
|
||||||
For shell scripts, use the language "bash", "shell", or "sh" for the code
|
|
||||||
block.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
timeout (int): The timeout for code execution. Default is 60.
|
timeout (int): The timeout for code execution, default is 60 seconds.
|
||||||
work_dir (str): The working directory for the code execution. If None,
|
work_dir (Union[Path, str]): The working directory for code execution, defaults to the current directory.
|
||||||
a default working directory will be used. The default working
|
functions (List[Union[FunctionWithRequirements[Any, A], Callable[..., Any], FunctionWithRequirementsStr]]): A list of callable functions available to the executor.
|
||||||
directory is the current directory ".".
|
functions_module (str): The module name under which functions are accessible.
|
||||||
functions (List[Union[FunctionWithRequirements[Any, A], Callable[..., Any]]]): A list of functions that are available to the code executor. Default is an empty list.
|
execution_policies (Optional[Dict[str, bool]]): A dictionary mapping languages to execution policies (True for execution, False for saving only). Defaults to class-wide DEFAULT_EXECUTION_POLICY.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if timeout < 1:
|
if timeout < 1:
|
||||||
@ -91,6 +113,10 @@ $functions"""
|
|||||||
else:
|
else:
|
||||||
self._setup_functions_complete = True
|
self._setup_functions_complete = True
|
||||||
|
|
||||||
|
self.execution_policies = self.DEFAULT_EXECUTION_POLICY.copy()
|
||||||
|
if execution_policies is not None:
|
||||||
|
self.execution_policies.update(execution_policies)
|
||||||
|
|
||||||
def format_functions_for_prompt(self, prompt_template: str = FUNCTION_PROMPT_TEMPLATE) -> str:
|
def format_functions_for_prompt(self, prompt_template: str = FUNCTION_PROMPT_TEMPLATE) -> str:
|
||||||
"""(Experimental) Format the functions for a prompt.
|
"""(Experimental) Format the functions for a prompt.
|
||||||
|
|
||||||
@ -104,7 +130,6 @@ $functions"""
|
|||||||
Returns:
|
Returns:
|
||||||
str: The formatted prompt.
|
str: The formatted prompt.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
template = Template(prompt_template)
|
template = Template(prompt_template)
|
||||||
return template.substitute(
|
return template.substitute(
|
||||||
module_name=self._functions_module,
|
module_name=self._functions_module,
|
||||||
@ -171,26 +196,19 @@ $functions"""
|
|||||||
required_packages = list(set(flattened_packages))
|
required_packages = list(set(flattened_packages))
|
||||||
if len(required_packages) > 0:
|
if len(required_packages) > 0:
|
||||||
logging.info("Ensuring packages are installed in executor.")
|
logging.info("Ensuring packages are installed in executor.")
|
||||||
|
cmd = [sys.executable, "-m", "pip", "install"] + required_packages
|
||||||
cmd = [sys.executable, "-m", "pip", "install"]
|
|
||||||
cmd.extend(required_packages)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)
|
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)
|
||||||
)
|
)
|
||||||
except subprocess.TimeoutExpired as e:
|
except subprocess.TimeoutExpired as e:
|
||||||
raise ValueError("Pip install timed out") from e
|
raise ValueError("Pip install timed out") from e
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
raise ValueError(f"Pip install failed. {result.stdout}, {result.stderr}")
|
raise ValueError(f"Pip install failed. {result.stdout}, {result.stderr}")
|
||||||
|
|
||||||
# Attempt to load the function file to check for syntax errors, imports etc.
|
# Attempt to load the function file to check for syntax errors, imports etc.
|
||||||
exec_result = self._execute_code_dont_check_setup([CodeBlock(code=func_file_content, language="python")])
|
exec_result = self._execute_code_dont_check_setup([CodeBlock(code=func_file_content, language="python")])
|
||||||
|
|
||||||
if exec_result.exit_code != 0:
|
if exec_result.exit_code != 0:
|
||||||
raise ValueError(f"Functions failed to load: {exec_result.output}")
|
raise ValueError(f"Functions failed to load: {exec_result.output}")
|
||||||
|
|
||||||
self._setup_functions_complete = True
|
self._setup_functions_complete = True
|
||||||
|
|
||||||
def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
|
def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
|
||||||
@ -201,10 +219,8 @@ $functions"""
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
CommandLineCodeResult: The result of the code execution."""
|
CommandLineCodeResult: The result of the code execution."""
|
||||||
|
|
||||||
if not self._setup_functions_complete:
|
if not self._setup_functions_complete:
|
||||||
self._setup_functions()
|
self._setup_functions()
|
||||||
|
|
||||||
return self._execute_code_dont_check_setup(code_blocks)
|
return self._execute_code_dont_check_setup(code_blocks)
|
||||||
|
|
||||||
def _execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
|
def _execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
|
||||||
@ -229,6 +245,7 @@ $functions"""
|
|||||||
logs_all += "\n" + f"unknown language {lang}"
|
logs_all += "\n" + f"unknown language {lang}"
|
||||||
break
|
break
|
||||||
|
|
||||||
|
execute_code = self.execution_policies.get(lang, False)
|
||||||
try:
|
try:
|
||||||
# Check if there is a filename comment
|
# Check if there is a filename comment
|
||||||
filename = _get_file_name_from_content(code, self._work_dir)
|
filename = _get_file_name_from_content(code, self._work_dir)
|
||||||
@ -239,15 +256,19 @@ $functions"""
|
|||||||
# create a file with an automatically generated name
|
# create a file with an automatically generated name
|
||||||
code_hash = md5(code.encode()).hexdigest()
|
code_hash = md5(code.encode()).hexdigest()
|
||||||
filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"
|
filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"
|
||||||
|
|
||||||
written_file = (self._work_dir / filename).resolve()
|
written_file = (self._work_dir / filename).resolve()
|
||||||
with written_file.open("w", encoding="utf-8") as f:
|
with written_file.open("w", encoding="utf-8") as f:
|
||||||
f.write(code)
|
f.write(code)
|
||||||
file_names.append(written_file)
|
file_names.append(written_file)
|
||||||
|
|
||||||
program = sys.executable if lang.startswith("python") else _cmd(lang)
|
if not execute_code:
|
||||||
cmd = [program, str(written_file.absolute())]
|
# Just return a message that the file is saved.
|
||||||
|
logs_all += f"Code saved to {str(written_file)}\n"
|
||||||
|
exitcode = 0
|
||||||
|
continue
|
||||||
|
|
||||||
|
program = _cmd(lang)
|
||||||
|
cmd = [program, str(written_file.absolute())]
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)
|
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)
|
||||||
|
|||||||
@ -3,23 +3,31 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
filename_patterns = [
|
||||||
|
re.compile(r"^<!-- (filename:)?(.+?) -->", re.DOTALL),
|
||||||
|
re.compile(r"^/\* (filename:)?(.+?) \*/", re.DOTALL),
|
||||||
|
re.compile(r"^// (filename:)?(.+?)$", re.DOTALL),
|
||||||
|
re.compile(r"^# (filename:)?(.+?)$", re.DOTALL),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# Raises ValueError if the file is not in the workspace
|
# Raises ValueError if the file is not in the workspace
|
||||||
def _get_file_name_from_content(code: str, workspace_path: Path) -> Optional[str]:
|
def _get_file_name_from_content(code: str, workspace_path: Path) -> Optional[str]:
|
||||||
first_line = code.split("\n")[0]
|
first_line = code.split("\n")[0].strip()
|
||||||
# TODO - support other languages
|
# TODO - support other languages
|
||||||
if first_line.startswith("# filename:"):
|
for pattern in filename_patterns:
|
||||||
filename = first_line.split(":")[1].strip()
|
matches = pattern.match(first_line)
|
||||||
|
if matches is not None:
|
||||||
# Handle relative paths in the filename
|
filename = matches.group(2).strip()
|
||||||
path = Path(filename)
|
|
||||||
if not path.is_absolute():
|
|
||||||
path = workspace_path / path
|
|
||||||
path = path.resolve()
|
|
||||||
# Throws an error if the file is not in the workspace
|
|
||||||
relative = path.relative_to(workspace_path.resolve())
|
|
||||||
return str(relative)
|
|
||||||
|
|
||||||
|
# Handle relative paths in the filename
|
||||||
|
path = Path(filename)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = workspace_path / path
|
||||||
|
path = path.resolve()
|
||||||
|
# Throws an error if the file is not in the workspace
|
||||||
|
relative = path.relative_to(workspace_path.resolve())
|
||||||
|
return str(relative)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -26,6 +26,34 @@ WINDOWS_SHELLS = ["ps1", "pwsh", "powershell"]
|
|||||||
PYTHON_VARIANTS = ["python", "Python", "py"]
|
PYTHON_VARIANTS = ["python", "Python", "py"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"lang, should_execute",
|
||||||
|
[
|
||||||
|
("python", False), # Python should not execute
|
||||||
|
("bash", False), # Bash should execute
|
||||||
|
("html", False), # HTML should not execute
|
||||||
|
("javascript", False), # JavaScript should not execute
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_execution_policy_enforcement(lang, should_execute):
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
executor = LocalCommandLineCodeExecutor(
|
||||||
|
work_dir=temp_dir,
|
||||||
|
execution_policies={"python": False, "bash": False, "html": False, "javascript": False, "css": False},
|
||||||
|
)
|
||||||
|
code = "print('Hello, world!')" if lang == "python" else "echo 'Hello, world!'"
|
||||||
|
code_block = CodeBlock(code=code, language=lang)
|
||||||
|
result = executor.execute_code_blocks([code_block])
|
||||||
|
|
||||||
|
if should_execute:
|
||||||
|
assert "Hello, world!" in result.output, f"Expected execution for {lang}, but it didn't execute."
|
||||||
|
else:
|
||||||
|
assert "Hello, world!" not in result.output, f"Expected no execution for {lang}, but it executed."
|
||||||
|
|
||||||
|
# Ensure files are saved regardless of execution
|
||||||
|
assert result.code_file is not None, f"Expected code file to be saved for {lang}, but it wasn't."
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("cls", classes_to_test)
|
@pytest.mark.parametrize("cls", classes_to_test)
|
||||||
def test_is_code_executor(cls) -> None:
|
def test_is_code_executor(cls) -> None:
|
||||||
assert isinstance(cls, CodeExecutor)
|
assert isinstance(cls, CodeExecutor)
|
||||||
@ -114,6 +142,87 @@ def _test_execute_code(py_variant, executor: CodeExecutor) -> None:
|
|||||||
assert file_line.strip() == code_line.strip()
|
assert file_line.strip() == code_line.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def test_local_commandline_code_executor_save_files() -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
executor = LocalCommandLineCodeExecutor(work_dir=temp_dir)
|
||||||
|
_test_save_files(executor, save_file_only=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_local_commandline_code_executor_save_files_only() -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
# Using execution_policies to specify that no languages should execute
|
||||||
|
executor = LocalCommandLineCodeExecutor(
|
||||||
|
work_dir=temp_dir,
|
||||||
|
execution_policies={"python": False, "bash": False, "javascript": False, "html": False, "css": False},
|
||||||
|
)
|
||||||
|
_test_save_files(executor, save_file_only=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _test_save_files(executor: CodeExecutor, save_file_only: bool) -> None:
|
||||||
|
|
||||||
|
def _check_output(code_result: CodeBlock, expected_output: str) -> None:
|
||||||
|
if save_file_only:
|
||||||
|
return expected_output not in code_result.output
|
||||||
|
else:
|
||||||
|
return expected_output in code_result.output
|
||||||
|
|
||||||
|
# Test executable code block.
|
||||||
|
|
||||||
|
# Test saving to a given filename, Python.
|
||||||
|
code_blocks = [CodeBlock(code="# filename: test.py\nimport sys; print('hello world!')", language="python")]
|
||||||
|
code_result = executor.execute_code_blocks(code_blocks)
|
||||||
|
assert (
|
||||||
|
code_result.exit_code == 0 and _check_output(code_result, "hello world!") and code_result.code_file is not None
|
||||||
|
)
|
||||||
|
assert os.path.basename(code_result.code_file) == "test.py"
|
||||||
|
|
||||||
|
# Test saving to a given filename without "filename" prefix, Python.
|
||||||
|
code_blocks = [CodeBlock(code="# test.py\nimport sys; print('hello world!')", language="python")]
|
||||||
|
code_result = executor.execute_code_blocks(code_blocks)
|
||||||
|
assert (
|
||||||
|
code_result.exit_code == 0 and _check_output(code_result, "hello world!") and code_result.code_file is not None
|
||||||
|
)
|
||||||
|
assert os.path.basename(code_result.code_file) == "test.py"
|
||||||
|
|
||||||
|
# Test non-executable code block.
|
||||||
|
|
||||||
|
# Test saving to a given filename, Javascript.
|
||||||
|
code_blocks = [CodeBlock(code="// filename: test.js\nconsole.log('hello world!')", language="javascript")]
|
||||||
|
code_result = executor.execute_code_blocks(code_blocks)
|
||||||
|
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||||
|
assert os.path.basename(code_result.code_file) == "test.js"
|
||||||
|
|
||||||
|
# Test saving to a given filename without "filename" prefix, Javascript.
|
||||||
|
code_blocks = [CodeBlock(code="// test.js\nconsole.log('hello world!')", language="javascript")]
|
||||||
|
code_result = executor.execute_code_blocks(code_blocks)
|
||||||
|
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||||
|
assert os.path.basename(code_result.code_file) == "test.js"
|
||||||
|
|
||||||
|
# Test saving to a given filename, CSS.
|
||||||
|
code_blocks = [CodeBlock(code="/* filename: test.css */\nh1 { color: red; }", language="css")]
|
||||||
|
code_result = executor.execute_code_blocks(code_blocks)
|
||||||
|
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||||
|
assert os.path.basename(code_result.code_file) == "test.css"
|
||||||
|
|
||||||
|
# Test saving to a given filename without "filename" prefix, CSS.
|
||||||
|
code_blocks = [CodeBlock(code="/* test.css */\nh1 { color: red; }", language="css")]
|
||||||
|
code_result = executor.execute_code_blocks(code_blocks)
|
||||||
|
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||||
|
assert os.path.basename(code_result.code_file) == "test.css"
|
||||||
|
|
||||||
|
# Test saving to a given filename, HTML.
|
||||||
|
code_blocks = [CodeBlock(code="<!-- filename: test.html -->\n<h1>hello world!</h1>", language="html")]
|
||||||
|
code_result = executor.execute_code_blocks(code_blocks)
|
||||||
|
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||||
|
assert os.path.basename(code_result.code_file) == "test.html"
|
||||||
|
|
||||||
|
# Test saving to a given filename without "filename" prefix, HTML.
|
||||||
|
code_blocks = [CodeBlock(code="<!-- test.html -->\n<h1>hello world!</h1>", language="html")]
|
||||||
|
code_result = executor.execute_code_blocks(code_blocks)
|
||||||
|
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||||
|
assert os.path.basename(code_result.code_file) == "test.html"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("cls", classes_to_test)
|
@pytest.mark.parametrize("cls", classes_to_test)
|
||||||
def test_commandline_code_executor_timeout(cls) -> None:
|
def test_commandline_code_executor_timeout(cls) -> None:
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user