mirror of
https://github.com/microsoft/autogen.git
synced 2025-07-11 11:02:08 +00:00

* Move from tomllib to tomli * added example code for magentic-one + code comments * adding benchmarks temporarily * add license for datasets * revert changes to magentic-one * change license location --------- Co-authored-by: Ryan Sweet <rysweet@microsoft.com>
123 lines
4.2 KiB
Python
123 lines
4.2 KiB
Python
#
|
|
# Run this file to download the human_eval dataset, and create a corresponding testbed scenario:
|
|
# (default: ../scenarios/human_eval_two_agents_gpt4.jsonl and ./scenarios/human_eval_two_agents_gpt35.jsonl)
|
|
#
|
|
|
|
import requests
|
|
import tarfile
|
|
import hashlib
|
|
import io
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
URL = "https://raw.githubusercontent.com/web-arena-x/webarena/main/config_files/test.raw.json"
|
|
|
|
SCRIPT_PATH = os.path.realpath(__file__)
|
|
SCRIPT_NAME = os.path.basename(SCRIPT_PATH)
|
|
SCRIPT_DIR = os.path.dirname(SCRIPT_PATH)
|
|
|
|
SCENARIO_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, os.path.pardir))
|
|
TEMPLATES_DIR = os.path.join(SCENARIO_DIR, "Templates")
|
|
TASKS_DIR = os.path.join(SCENARIO_DIR, "Tasks")
|
|
DOWNLOADS_DIR = os.path.join(SCENARIO_DIR, "Downloads")
|
|
|
|
|
|
def download():
|
|
"""Download the WebArena dataset (if not already downloaded).
|
|
Return a JSON list of problem instances."""
|
|
|
|
if not os.path.isdir(DOWNLOADS_DIR):
|
|
os.mkdir(DOWNLOADS_DIR)
|
|
|
|
json_file = os.path.join(DOWNLOADS_DIR, "test.raw.json")
|
|
|
|
if not os.path.isfile(json_file):
|
|
# Send a HTTP request to the URL
|
|
response = requests.get(URL, stream=True)
|
|
response.raise_for_status()
|
|
|
|
# If the HTTP request returns a status code 200, proceed
|
|
with open(json_file, "wb") as fh:
|
|
for chunk in response.iter_content(chunk_size=512):
|
|
fh.write(chunk)
|
|
|
|
# Load the problems
|
|
problems = None
|
|
with open(json_file, "rb") as fh:
|
|
problems = json.load(fh)
|
|
return problems
|
|
|
|
|
|
def create_jsonl(name, tasks, template):
|
|
"""Creates a JSONL scenario file with a given name, dictionary of MATH problems, and template path."""
|
|
|
|
# Create a task directory if it doesn't exist
|
|
if not os.path.isdir(TASKS_DIR):
|
|
os.mkdir(TASKS_DIR)
|
|
|
|
# Create the jsonl file
|
|
prompt_fields = ["task_id", "intent_template_id", "sites", "require_login", "start_url", "geolocation", "intent"]
|
|
with open(os.path.join(TASKS_DIR, name + ".jsonl"), "wt") as fh:
|
|
for task in tasks:
|
|
print(f"Converting: {name}, {task['task_id']}")
|
|
|
|
task_prompt = {}
|
|
for field in prompt_fields:
|
|
task_prompt[field] = task[field]
|
|
|
|
record = {
|
|
"id": str(task["task_id"]),
|
|
"template": [os.path.join(TEMPLATES_DIR, "Common"), template],
|
|
"substitutions": {
|
|
"task_prompt.json.txt": {"__TASK_PROMPT__": json.dumps(task_prompt, indent=4)},
|
|
"full_task.json.txt": {"__FULL_TASK__": json.dumps(task, indent=4)},
|
|
},
|
|
}
|
|
|
|
fh.write(json.dumps(record).strip() + "\n")
|
|
|
|
|
|
###############################################################################
|
|
def main():
|
|
tasks = download()
|
|
|
|
# list all directories in the Templates directory
|
|
# and populate a dictionary with the name and path
|
|
templates = {}
|
|
for entry in os.scandir(TEMPLATES_DIR):
|
|
if entry.is_dir():
|
|
if entry.name == "Common": # Skip the common template, which will be included in all
|
|
continue
|
|
templates[re.sub(r"\s", "", entry.name)] = entry.path
|
|
|
|
# Divide the tasks by their websites and if they are validation or test
|
|
page_groups = dict()
|
|
for task in tasks:
|
|
|
|
# We don't know how the intent ids are distributed, so hash them to get a uniform distribution
|
|
template_hash = hashlib.md5(str(task["intent_template_id"]).encode("utf-8")).hexdigest()
|
|
|
|
# The full hash will consist of 32 hexadecimal digits. We can get a 50/50 split by checking if the first digit is in the range (0-7) vs (8-F)
|
|
task_set = "validation" if template_hash[0] in "01234567" else "test"
|
|
|
|
key = task["sites"][0]
|
|
if len(task["sites"]) > 1:
|
|
key = "several_sites"
|
|
key = task_set + "_" + key
|
|
|
|
# key = "__".join(sorted([s for s in task["sites"]]))
|
|
if key not in page_groups:
|
|
page_groups[key] = list()
|
|
page_groups[key].append(task)
|
|
|
|
# Create the json files
|
|
for t in templates.items():
|
|
for pg in page_groups:
|
|
create_jsonl(f"webarena__{pg}_{t[0]}", page_groups[pg], t[1])
|
|
|
|
|
|
if __name__ == "__main__" and __package__ is None:
|
|
main()
|