autogen/samples/tools/autogenbench/scenarios/AutoGPT/Scripts/init_tasks.py

#
# Run this file to download the human_eval dataset, and create a corresponding testbed scenario:
# (default: ../scenarios/human_eval_two_agents_gpt4.jsonl and ./scenarios/human_eval_two_agents_gpt35.jsonl)
#

import base64
import glob
import json
import os
import re
import sys

from huggingface_hub import snapshot_download

SCRIPT_PATH = os.path.realpath(__file__)
SCRIPT_NAME = os.path.basename(SCRIPT_PATH)
SCRIPT_DIR = os.path.dirname(SCRIPT_PATH)

SCENARIO_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, os.path.pardir))
TEMPLATES_DIR = os.path.join(SCENARIO_DIR, "Templates")
TASKS_DIR = os.path.join(SCENARIO_DIR, "Tasks")
CHALLENGES_DIR = os.path.join(SCENARIO_DIR, "Challenges")


def create_jsonl(name, template):
    """Creates a JSONL scenario file with a given name, and template path."""

    if not os.path.isdir(TASKS_DIR):
        os.mkdir(TASKS_DIR)

    with open(os.path.join(TASKS_DIR, name + ".jsonl"), "wt") as fh:
        data_paths = glob.glob(str(CHALLENGES_DIR + "/*/data.json"))
        for data_path in data_paths:
            print("Converting data path: ", data_path)
            workspace = os.path.dirname(data_path)
            artifacts = os.path.join(workspace, "artifacts_in")
            custom_python = os.path.join(workspace, "custom_python")

            with open(data_path, "r") as f:
                data = json.load(f)

            should_contain = data["ground"].get("should_contain", [])
            should_not_contain = data["ground"].get("should_not_contain", [])
            case_sensitive = data["ground"].get("case_sensitive", False)

            # Figure out what files we need to copy
            template_cp_list = [template]

            # Artifacts in
            if os.path.exists(artifacts):
                template_cp_list.append(
                    [
                        artifacts,
                        "coding",
                    ]
                )

            # Custom python
            if os.path.exists(custom_python):
                template_cp_list.append(
                    [
                        custom_python,
                        "custom_python",
                    ]
                )

            record = {
                "id": data["name"],
                "template": template_cp_list,
                "substitutions": {
                    "scenario.py": {
                        "__TASK__": data["task"],
                    },
                    "check.py": {
                        "__FILE_PATTERN__": data["ground"]["files"][0],
                        "__EVAL_TYPE__": data["ground"]["eval"]["type"],
                        "__CASE_SENSITIVE__": str(case_sensitive),
                    },
                    "should_contain.json.txt": {
                        "__CONTAIN__": json.dumps(should_contain),  # Double-encoded
                    },
                    "should_not_contain.json.txt": {
                        "__NO_CONTAIN__": json.dumps(should_not_contain),  # Double-encoded
                    },
                },
            }

            fh.write(json.dumps(record).strip() + "\n")


###############################################################################
def main():
    # list all directories in the Templates directory
    # and populate a dictionary with the name and path
    templates = {}
    for entry in os.scandir(TEMPLATES_DIR):
        if entry.is_dir():
            templates[re.sub(r"\s", "", entry.name)] = entry.path

    # Add coding directories if needed (these are usually empty and left out of the repo)
    for template in templates.values():
        code_dir_path = os.path.join(template, "coding")
        if not os.path.isdir(code_dir_path):
            os.mkdir(code_dir_path)

    # Create the various combinations of [models] x [templates]
    for t in templates.items():
        create_jsonl(
            f"autogpt__{t[0]}",
            t[1],
        )


if __name__ == "__main__" and __package__ is None:
    main()
Introduces AutoGenBench (#1048) * Initial commit of AutoGenBench * wording * typo * pre-commit reformulation * Updated README to point to contributor's guide earlier. * Simplified the description of the JSON format. * Added print statements to indicate when run.sh and scenario.py are starting. * Added SocietyOfMind scenario to GAIA. * Pointing autogenbench clone command to the latest branch. * Temporarily disable subsample option. * Updated the GAIA readme to specify how to define a BING API key. * Fixed and re-enabled the subsample option. * Added a draft of a blog post. * Updated authors. * Incorporating Gagan's feedback. * Fixed code formatting. * Updated the help string in the docs. * Light editing of the AutoGenBench blogpost. * Support filtering on model tags. * Added websurfer dependencies to Dockerfile. * Renamed testbed -> autogenbench * Attempting to fix formatting. * Added more gracefull handling of task timeouts (the script is allowed to terminate before Docker is stopped). * Updated the blogpost based on Saleema's and Julia's feedback. * Fixed formatting... again. * Added a main MANIFEST to list available scenarios. * Limit main manifest to directories. * Manifests now use relative paths. * All manifests are now relative. * Updated the contributing guide, and address windows path issues. * Updated the version. Fixed formatting. * Fixed formatting. * De-listing Examples, since it has no clear tabulate criteria. * Updated email in pyproject * typo in blogpost * wording --------- Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> Co-authored-by: Qingyun Wu <qingyun0327@gmail.com> 2024-01-25 16:46:58 -08:00			`#`
			`# Run this file to download the human_eval dataset, and create a corresponding testbed scenario:`
			`# (default: ../scenarios/human_eval_two_agents_gpt4.jsonl and ./scenarios/human_eval_two_agents_gpt35.jsonl)`
			`#`

Add isort (#2265) * Add isort * Apply isort on py files * Fix circular import * Fix format for notebooks * Fix format --------- Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2024-04-05 10:26:06 +08:00			`import base64`
			`import glob`
Introduces AutoGenBench (#1048) * Initial commit of AutoGenBench * wording * typo * pre-commit reformulation * Updated README to point to contributor's guide earlier. * Simplified the description of the JSON format. * Added print statements to indicate when run.sh and scenario.py are starting. * Added SocietyOfMind scenario to GAIA. * Pointing autogenbench clone command to the latest branch. * Temporarily disable subsample option. * Updated the GAIA readme to specify how to define a BING API key. * Fixed and re-enabled the subsample option. * Added a draft of a blog post. * Updated authors. * Incorporating Gagan's feedback. * Fixed code formatting. * Updated the help string in the docs. * Light editing of the AutoGenBench blogpost. * Support filtering on model tags. * Added websurfer dependencies to Dockerfile. * Renamed testbed -> autogenbench * Attempting to fix formatting. * Added more gracefull handling of task timeouts (the script is allowed to terminate before Docker is stopped). * Updated the blogpost based on Saleema's and Julia's feedback. * Fixed formatting... again. * Added a main MANIFEST to list available scenarios. * Limit main manifest to directories. * Manifests now use relative paths. * All manifests are now relative. * Updated the contributing guide, and address windows path issues. * Updated the version. Fixed formatting. * Fixed formatting. * De-listing Examples, since it has no clear tabulate criteria. * Updated email in pyproject * typo in blogpost * wording --------- Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> Co-authored-by: Qingyun Wu <qingyun0327@gmail.com> 2024-01-25 16:46:58 -08:00			`import json`
			`import os`
Version 0.0.2 of Autogenbench (#1548) * Prints the version of AutoGenBench from the command line, closing i1458 * Added autogenbench version to timestamp.txt * Attempting to fix formatting. * Add a gitignore for autogenbench * Generalize to read all template dirs from Templates * AutoGenBench logs telemetry when available. * Remove spaces if present from template names. * Bump version. * Fixed formatting. * Allow native warning to be skipped. Mount autogen repo in Docker if it can be found (experimental). * Native execution now occurs in a venv. * Bump version. * Fixed a prompt escaping bug evident in GAIA task '6f37996b-2ac7-44b0-8e68-6d28256631b4' * Updated all scenarios to use template discovery. * Update with main version of runtime_logging. --------- Co-authored-by: gagb <gagb@users.noreply.github.com> 2024-02-24 10:12:57 -08:00			`import re`
Add isort (#2265) * Add isort * Apply isort on py files * Fix circular import * Fix format for notebooks * Fix format --------- Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2024-04-05 10:26:06 +08:00			`import sys`

Introduces AutoGenBench (#1048) * Initial commit of AutoGenBench * wording * typo * pre-commit reformulation * Updated README to point to contributor's guide earlier. * Simplified the description of the JSON format. * Added print statements to indicate when run.sh and scenario.py are starting. * Added SocietyOfMind scenario to GAIA. * Pointing autogenbench clone command to the latest branch. * Temporarily disable subsample option. * Updated the GAIA readme to specify how to define a BING API key. * Fixed and re-enabled the subsample option. * Added a draft of a blog post. * Updated authors. * Incorporating Gagan's feedback. * Fixed code formatting. * Updated the help string in the docs. * Light editing of the AutoGenBench blogpost. * Support filtering on model tags. * Added websurfer dependencies to Dockerfile. * Renamed testbed -> autogenbench * Attempting to fix formatting. * Added more gracefull handling of task timeouts (the script is allowed to terminate before Docker is stopped). * Updated the blogpost based on Saleema's and Julia's feedback. * Fixed formatting... again. * Added a main MANIFEST to list available scenarios. * Limit main manifest to directories. * Manifests now use relative paths. * All manifests are now relative. * Updated the contributing guide, and address windows path issues. * Updated the version. Fixed formatting. * Fixed formatting. * De-listing Examples, since it has no clear tabulate criteria. * Updated email in pyproject * typo in blogpost * wording --------- Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> Co-authored-by: Qingyun Wu <qingyun0327@gmail.com> 2024-01-25 16:46:58 -08:00			`from huggingface_hub import snapshot_download`

			`SCRIPT_PATH = os.path.realpath(__file__)`
			`SCRIPT_NAME = os.path.basename(SCRIPT_PATH)`
			`SCRIPT_DIR = os.path.dirname(SCRIPT_PATH)`

			`SCENARIO_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, os.path.pardir))`
			`TEMPLATES_DIR = os.path.join(SCENARIO_DIR, "Templates")`
			`TASKS_DIR = os.path.join(SCENARIO_DIR, "Tasks")`
			`CHALLENGES_DIR = os.path.join(SCENARIO_DIR, "Challenges")`


			`def create_jsonl(name, template):`
			`"""Creates a JSONL scenario file with a given name, and template path."""`

			`if not os.path.isdir(TASKS_DIR):`
			`os.mkdir(TASKS_DIR)`

			`with open(os.path.join(TASKS_DIR, name + ".jsonl"), "wt") as fh:`
			`data_paths = glob.glob(str(CHALLENGES_DIR + "/*/data.json"))`
			`for data_path in data_paths:`
			`print("Converting data path: ", data_path)`
			`workspace = os.path.dirname(data_path)`
			`artifacts = os.path.join(workspace, "artifacts_in")`
			`custom_python = os.path.join(workspace, "custom_python")`

			`with open(data_path, "r") as f:`
			`data = json.load(f)`

			`should_contain = data["ground"].get("should_contain", [])`
			`should_not_contain = data["ground"].get("should_not_contain", [])`
			`case_sensitive = data["ground"].get("case_sensitive", False)`

			`# Figure out what files we need to copy`
			`template_cp_list = [template]`

			`# Artifacts in`
			`if os.path.exists(artifacts):`
			`template_cp_list.append(`
			`[`
			`artifacts,`
			`"coding",`
			`]`
			`)`

			`# Custom python`
			`if os.path.exists(custom_python):`
			`template_cp_list.append(`
			`[`
			`custom_python,`
			`"custom_python",`
			`]`
			`)`

			`record = {`
			`"id": data["name"],`
			`"template": template_cp_list,`
			`"substitutions": {`
			`"scenario.py": {`
			`"__TASK__": data["task"],`
			`},`
			`"check.py": {`
			`"__FILE_PATTERN__": data["ground"]["files"][0],`
			`"__EVAL_TYPE__": data["ground"]["eval"]["type"],`
			`"__CASE_SENSITIVE__": str(case_sensitive),`
			`},`
			`"should_contain.json.txt": {`
			`"__CONTAIN__": json.dumps(should_contain), # Double-encoded`
			`},`
			`"should_not_contain.json.txt": {`
			`"__NO_CONTAIN__": json.dumps(should_not_contain), # Double-encoded`
			`},`
			`},`
			`}`

			`fh.write(json.dumps(record).strip() + "\n")`


			`###############################################################################`
			`def main():`
Version 0.0.2 of Autogenbench (#1548) * Prints the version of AutoGenBench from the command line, closing i1458 * Added autogenbench version to timestamp.txt * Attempting to fix formatting. * Add a gitignore for autogenbench * Generalize to read all template dirs from Templates * AutoGenBench logs telemetry when available. * Remove spaces if present from template names. * Bump version. * Fixed formatting. * Allow native warning to be skipped. Mount autogen repo in Docker if it can be found (experimental). * Native execution now occurs in a venv. * Bump version. * Fixed a prompt escaping bug evident in GAIA task '6f37996b-2ac7-44b0-8e68-6d28256631b4' * Updated all scenarios to use template discovery. * Update with main version of runtime_logging. --------- Co-authored-by: gagb <gagb@users.noreply.github.com> 2024-02-24 10:12:57 -08:00			`# list all directories in the Templates directory`
			`# and populate a dictionary with the name and path`
			`templates = {}`
			`for entry in os.scandir(TEMPLATES_DIR):`
			`if entry.is_dir():`
			`templates[re.sub(r"\s", "", entry.name)] = entry.path`
Introduces AutoGenBench (#1048) * Initial commit of AutoGenBench * wording * typo * pre-commit reformulation * Updated README to point to contributor's guide earlier. * Simplified the description of the JSON format. * Added print statements to indicate when run.sh and scenario.py are starting. * Added SocietyOfMind scenario to GAIA. * Pointing autogenbench clone command to the latest branch. * Temporarily disable subsample option. * Updated the GAIA readme to specify how to define a BING API key. * Fixed and re-enabled the subsample option. * Added a draft of a blog post. * Updated authors. * Incorporating Gagan's feedback. * Fixed code formatting. * Updated the help string in the docs. * Light editing of the AutoGenBench blogpost. * Support filtering on model tags. * Added websurfer dependencies to Dockerfile. * Renamed testbed -> autogenbench * Attempting to fix formatting. * Added more gracefull handling of task timeouts (the script is allowed to terminate before Docker is stopped). * Updated the blogpost based on Saleema's and Julia's feedback. * Fixed formatting... again. * Added a main MANIFEST to list available scenarios. * Limit main manifest to directories. * Manifests now use relative paths. * All manifests are now relative. * Updated the contributing guide, and address windows path issues. * Updated the version. Fixed formatting. * Fixed formatting. * De-listing Examples, since it has no clear tabulate criteria. * Updated email in pyproject * typo in blogpost * wording --------- Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> Co-authored-by: Qingyun Wu <qingyun0327@gmail.com> 2024-01-25 16:46:58 -08:00
			`# Add coding directories if needed (these are usually empty and left out of the repo)`
			`for template in templates.values():`
			`code_dir_path = os.path.join(template, "coding")`
			`if not os.path.isdir(code_dir_path):`
			`os.mkdir(code_dir_path)`

			`# Create the various combinations of [models] x [templates]`
			`for t in templates.items():`
			`create_jsonl(`
			`f"autogpt__{t[0]}",`
			`t[1],`
			`)`


			`if __name__ == "__main__" and __package__ is None:`
			`main()`