autogen/samples/tools/testbed/utils/prepare_autogpt.py

import base64
import glob
import json
import os
import shutil

current_file_dir = os.path.dirname(os.path.abspath(__file__))
challenge_path = os.path.join(os.path.dirname(current_file_dir), "scenarios/AutoGPT/challenges")
data_paths = glob.glob(str(challenge_path) + "/*/data.json")

for data_path in data_paths:
    print("Converting data path: ", data_path)
    workspace = os.path.dirname(data_path)

    with open(data_path, "r") as f:
        data = json.load(f)

    should_contain = data["ground"].get("should_contain", [])
    should_not_contain = data["ground"].get("should_not_contain", [])
    case_sensitive = data["ground"].get("case_sensitive", False)

    # since 'should_contain' field may contain escape characters, this can cause problems when using str() method and eval(), I used base64 encode to avoid such problems
    should_contain_base64 = []
    for word in should_contain:
        encoded_word = base64.b64encode(word.encode("utf-8")).decode("utf-8")
        should_contain_base64.append(encoded_word)

    should_not_contain_base64 = []
    for word in should_not_contain:
        encoded_word = base64.b64encode(word.encode("utf-8")).decode("utf-8")
        should_not_contain_base64.append(encoded_word)

    # copy all the files needed to 'coding' directory
    # 1. 'artifacts_in' directory: all the files needed for QA
    save_path = os.path.join(os.path.dirname(current_file_dir), "scenarios/AutoGPT")
    artifacts_in = False
    if os.path.exists(os.path.join(workspace, "artifacts_in")):
        artifacts_in = True
        target_folder = os.path.join(save_path, "Templates/TwoAgents/coding/file", data["name"])
        if os.path.exists(target_folder):
            shutil.rmtree(target_folder)
        shutil.copytree(os.path.join(workspace, "artifacts_in"), target_folder)
        # print(f"All the artifacts are copied from {os.path.join(workspace, 'artifacts_in')} to {target_folder}")

    # 2. 'custom_python' directory: all the files needed for testing python code
    if os.path.exists(os.path.join(workspace, "custom_python")):
        target_folder = os.path.join(save_path, "Templates/TwoAgents/custom_python")
        if not os.path.exists(target_folder):
            os.makedirs(target_folder)
        for filename in os.listdir(os.path.join(workspace, "custom_python")):
            shutil.copy(os.path.join(workspace, "custom_python", filename), os.path.join(target_folder, filename))
            # print(f"File copied from {os.path.join(workspace, 'custom_python', filename)} to {target_folder}")

    record = {
        "id": data["name"],
        "template": "Templates/TwoAgents",
        "substitutions": {
            "scenario.py": {
                "__MODEL__": "gpt-35-turbo-16k",
                "__TASK__": data["task"],
                "__TARGET_FOLDER__": f"file/{data['name']}" if artifacts_in else "",
            },
            "check.py": {
                "__FILE_PATTERN__": data["ground"]["files"][0],
                "__EVAL_TYPE__": data["ground"]["eval"]["type"],
                "__CASE_SENSITIVE__": str(case_sensitive),
            },
            "should_contain.txt": {
                "__CONTAIN__": str(should_contain_base64),
            },
            "should_not_contain.txt": {
                "__NO_CONTAIN__": str(should_not_contain_base64),
            },
        },
    }
    with open(os.path.join(save_path, "autogpt_twoagent_gpt35.jsonl"), "a") as f:
        f.write(json.dumps(record).strip() + "\n")

    record = {
        "id": data["name"],
        "template": "Templates/TwoAgents",
        "substitutions": {
            "scenario.py": {
                "__MODEL__": "gpt-4-1106-preview",
                "__TASK__": data["task"],
                "__TARGET_FOLDER__": f"file/{data['name']}" if artifacts_in else "",
            },
            "check.py": {
                "__FILE_PATTERN__": data["ground"]["files"][0],
                "__EVAL_TYPE__": data["ground"]["eval"]["type"],
                "__CASE_SENSITIVE__": str(case_sensitive),
            },
            "should_contain.txt": {
                "__CONTAIN__": str(should_contain_base64),
            },
            "should_not_contain.txt": {
                "__NO_CONTAIN__": str(should_not_contain_base64),
            },
        },
    }
    with open(os.path.join(save_path, "autogpt_twoagent_gpt4.jsonl"), "a") as f:
        f.write(json.dumps(record).strip() + "\n")
Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00			`import base64`
			`import glob`
			`import json`
			`import os`
			`import shutil`

Add collate file and more tests from autogpt into testbed (#915) * Add collate file. * Add requirements.txt, Fix typo, Add tests * More tests. * Update check.py * Update scenario.py * Update prepare_autogpt.py * Update prepare_autogpt.py * More tasks for testset. * Add more tests. * Update docs. * Optimize file organize. 2023-12-15 00:26:30 +08:00			`current_file_dir = os.path.dirname(os.path.abspath(__file__))`
			`challenge_path = os.path.join(os.path.dirname(current_file_dir), "scenarios/AutoGPT/challenges")`
			`data_paths = glob.glob(str(challenge_path) + "/*/data.json")`
Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00
			`for data_path in data_paths:`
			`print("Converting data path: ", data_path)`
			`workspace = os.path.dirname(data_path)`

			`with open(data_path, "r") as f:`
			`data = json.load(f)`

			`should_contain = data["ground"].get("should_contain", [])`
			`should_not_contain = data["ground"].get("should_not_contain", [])`
			`case_sensitive = data["ground"].get("case_sensitive", False)`

			`# since 'should_contain' field may contain escape characters, this can cause problems when using str() method and eval(), I used base64 encode to avoid such problems`
			`should_contain_base64 = []`
			`for word in should_contain:`
			`encoded_word = base64.b64encode(word.encode("utf-8")).decode("utf-8")`
			`should_contain_base64.append(encoded_word)`

			`should_not_contain_base64 = []`
			`for word in should_not_contain:`
			`encoded_word = base64.b64encode(word.encode("utf-8")).decode("utf-8")`
			`should_not_contain_base64.append(encoded_word)`

			`# copy all the files needed to 'coding' directory`
Add collate file and more tests from autogpt into testbed (#915) * Add collate file. * Add requirements.txt, Fix typo, Add tests * More tests. * Update check.py * Update scenario.py * Update prepare_autogpt.py * Update prepare_autogpt.py * More tasks for testset. * Add more tests. * Update docs. * Optimize file organize. 2023-12-15 00:26:30 +08:00			`# 1. 'artifacts_in' directory: all the files needed for QA`
			`save_path = os.path.join(os.path.dirname(current_file_dir), "scenarios/AutoGPT")`
Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00			`artifacts_in = False`
			`if os.path.exists(os.path.join(workspace, "artifacts_in")):`
			`artifacts_in = True`
Add collate file and more tests from autogpt into testbed (#915) * Add collate file. * Add requirements.txt, Fix typo, Add tests * More tests. * Update check.py * Update scenario.py * Update prepare_autogpt.py * Update prepare_autogpt.py * More tasks for testset. * Add more tests. * Update docs. * Optimize file organize. 2023-12-15 00:26:30 +08:00			`target_folder = os.path.join(save_path, "Templates/TwoAgents/coding/file", data["name"])`
Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00			`if os.path.exists(target_folder):`
			`shutil.rmtree(target_folder)`
			`shutil.copytree(os.path.join(workspace, "artifacts_in"), target_folder)`
			`# print(f"All the artifacts are copied from {os.path.join(workspace, 'artifacts_in')} to {target_folder}")`

Add collate file and more tests from autogpt into testbed (#915) * Add collate file. * Add requirements.txt, Fix typo, Add tests * More tests. * Update check.py * Update scenario.py * Update prepare_autogpt.py * Update prepare_autogpt.py * More tasks for testset. * Add more tests. * Update docs. * Optimize file organize. 2023-12-15 00:26:30 +08:00			`# 2. 'custom_python' directory: all the files needed for testing python code`
			`if os.path.exists(os.path.join(workspace, "custom_python")):`
			`target_folder = os.path.join(save_path, "Templates/TwoAgents/custom_python")`
			`if not os.path.exists(target_folder):`
			`os.makedirs(target_folder)`
			`for filename in os.listdir(os.path.join(workspace, "custom_python")):`
			`shutil.copy(os.path.join(workspace, "custom_python", filename), os.path.join(target_folder, filename))`
			`# print(f"File copied from {os.path.join(workspace, 'custom_python', filename)} to {target_folder}")`

Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00			`record = {`
Add collate file and more tests from autogpt into testbed (#915) * Add collate file. * Add requirements.txt, Fix typo, Add tests * More tests. * Update check.py * Update scenario.py * Update prepare_autogpt.py * Update prepare_autogpt.py * More tasks for testset. * Add more tests. * Update docs. * Optimize file organize. 2023-12-15 00:26:30 +08:00			`"id": data["name"],`
Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00			`"template": "Templates/TwoAgents",`
			`"substitutions": {`
			`"scenario.py": {`
Add collate file and more tests from autogpt into testbed (#915) * Add collate file. * Add requirements.txt, Fix typo, Add tests * More tests. * Update check.py * Update scenario.py * Update prepare_autogpt.py * Update prepare_autogpt.py * More tasks for testset. * Add more tests. * Update docs. * Optimize file organize. 2023-12-15 00:26:30 +08:00			`"__MODEL__": "gpt-35-turbo-16k",`
Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00			`"__TASK__": data["task"],`
			`"__TARGET_FOLDER__": f"file/{data['name']}" if artifacts_in else "",`
			`},`
			`"check.py": {`
			`"__FILE_PATTERN__": data["ground"]["files"][0],`
			`"__EVAL_TYPE__": data["ground"]["eval"]["type"],`
			`"__CASE_SENSITIVE__": str(case_sensitive),`
			`},`
			`"should_contain.txt": {`
			`"__CONTAIN__": str(should_contain_base64),`
			`},`
			`"should_not_contain.txt": {`
			`"__NO_CONTAIN__": str(should_not_contain_base64),`
			`},`
			`},`
			`}`
Add collate file and more tests from autogpt into testbed (#915) * Add collate file. * Add requirements.txt, Fix typo, Add tests * More tests. * Update check.py * Update scenario.py * Update prepare_autogpt.py * Update prepare_autogpt.py * More tasks for testset. * Add more tests. * Update docs. * Optimize file organize. 2023-12-15 00:26:30 +08:00			`with open(os.path.join(save_path, "autogpt_twoagent_gpt35.jsonl"), "a") as f:`
Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00			`f.write(json.dumps(record).strip() + "\n")`

			`record = {`
Add collate file and more tests from autogpt into testbed (#915) * Add collate file. * Add requirements.txt, Fix typo, Add tests * More tests. * Update check.py * Update scenario.py * Update prepare_autogpt.py * Update prepare_autogpt.py * More tasks for testset. * Add more tests. * Update docs. * Optimize file organize. 2023-12-15 00:26:30 +08:00			`"id": data["name"],`
Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00			`"template": "Templates/TwoAgents",`
			`"substitutions": {`
			`"scenario.py": {`
			`"__MODEL__": "gpt-4-1106-preview",`
			`"__TASK__": data["task"],`
			`"__TARGET_FOLDER__": f"file/{data['name']}" if artifacts_in else "",`
			`},`
			`"check.py": {`
			`"__FILE_PATTERN__": data["ground"]["files"][0],`
			`"__EVAL_TYPE__": data["ground"]["eval"]["type"],`
			`"__CASE_SENSITIVE__": str(case_sensitive),`
			`},`
			`"should_contain.txt": {`
			`"__CONTAIN__": str(should_contain_base64),`
			`},`
			`"should_not_contain.txt": {`
			`"__NO_CONTAIN__": str(should_not_contain_base64),`
			`},`
			`},`
			`}`
Add collate file and more tests from autogpt into testbed (#915) * Add collate file. * Add requirements.txt, Fix typo, Add tests * More tests. * Update check.py * Update scenario.py * Update prepare_autogpt.py * Update prepare_autogpt.py * More tasks for testset. * Add more tests. * Update docs. * Optimize file organize. 2023-12-15 00:26:30 +08:00			`with open(os.path.join(save_path, "autogpt_twoagent_gpt4.jsonl"), "a") as f:`
Testbed folders (#792) * Re-added completion logging when using older versions of autogen. * Extended scenario definitions and templating to include folders. * Prepare collate_human_eval.py for working with group chat scenarios. * Converted HumanEval to the folder-based approach, and added GroupChat scenarios. * Fixed the default termination message. * Fixed another termination condition. * Updated compatible autogen versions. * Fixed a bug in executing the finalize scripts. * Generalized the template further to support multiple folder copy operations. * Add tests from AutoGPT. * Update README.md * Fix typo * Update samples/tools/testbed/README.md --------- Co-authored-by: LeoLjl <3110503618@qq.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> 2023-11-30 08:43:03 -08:00			`f.write(json.dumps(record).strip() + "\n")`