# # Run this file to download the human_eval dataset, and create a corresponding testbed scenario: # (default: ../scenarios/human_eval_two_agents_gpt4.jsonl and ./scenarios/human_eval_two_agents_gpt35.jsonl) # import requests import tarfile import hashlib import io import json import os import re import sys URL = "https://raw.githubusercontent.com/web-arena-x/webarena/main/config_files/test.raw.json" SCRIPT_PATH = os.path.realpath(__file__) SCRIPT_NAME = os.path.basename(SCRIPT_PATH) SCRIPT_DIR = os.path.dirname(SCRIPT_PATH) SCENARIO_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, os.path.pardir)) TEMPLATES_DIR = os.path.join(SCENARIO_DIR, "Templates") TASKS_DIR = os.path.join(SCENARIO_DIR, "Tasks") DOWNLOADS_DIR = os.path.join(SCENARIO_DIR, "Downloads") def download(): """Download the WebArena dataset (if not already downloaded). Return a JSON list of problem instances.""" if not os.path.isdir(DOWNLOADS_DIR): os.mkdir(DOWNLOADS_DIR) json_file = os.path.join(DOWNLOADS_DIR, "test.raw.json") if not os.path.isfile(json_file): # Send a HTTP request to the URL response = requests.get(URL, stream=True) response.raise_for_status() # If the HTTP request returns a status code 200, proceed with open(json_file, "wb") as fh: for chunk in response.iter_content(chunk_size=512): fh.write(chunk) # Load the problems problems = None with open(json_file, "rb") as fh: problems = json.load(fh) return problems def create_jsonl(name, tasks, template): """Creates a JSONL scenario file with a given name, dictionary of MATH problems, and template path.""" # Create a task directory if it doesn't exist if not os.path.isdir(TASKS_DIR): os.mkdir(TASKS_DIR) # Create the jsonl file prompt_fields = ["task_id", "intent_template_id", "sites", "require_login", "start_url", "geolocation", "intent"] with open(os.path.join(TASKS_DIR, name + ".jsonl"), "wt") as fh: for task in tasks: print(f"Converting: {name}, {task['task_id']}") task_prompt = {} for field in prompt_fields: task_prompt[field] = task[field] record = { "id": str(task["task_id"]), "template": [os.path.join(TEMPLATES_DIR, "Common"), template], "substitutions": { "task_prompt.json.txt": {"__TASK_PROMPT__": json.dumps(task_prompt, indent=4)}, "full_task.json.txt": {"__FULL_TASK__": json.dumps(task, indent=4)}, }, } fh.write(json.dumps(record).strip() + "\n") ############################################################################### def main(): tasks = download() # list all directories in the Templates directory # and populate a dictionary with the name and path templates = {} for entry in os.scandir(TEMPLATES_DIR): if entry.is_dir(): if entry.name == "Common": # Skip the common template, which will be included in all continue templates[re.sub(r"\s", "", entry.name)] = entry.path # Divide the tasks by their websites and if they are validation or test page_groups = dict() for task in tasks: # We don't know how the intent ids are distributed, so hash them to get a uniform distribution template_hash = hashlib.md5(str(task["intent_template_id"]).encode("utf-8")).hexdigest() # The full hash will consist of 32 hexadecimal digits. We can get a 50/50 split by checking if the first digit is in the range (0-7) vs (8-F) task_set = "validation" if template_hash[0] in "01234567" else "test" key = task["sites"][0] if len(task["sites"]) > 1: key = "several_sites" key = task_set + "_" + key # key = "__".join(sorted([s for s in task["sites"]])) if key not in page_groups: page_groups[key] = list() page_groups[key].append(task) # Create the json files for t in templates.items(): for pg in page_groups: create_jsonl(f"webarena__{pg}_{t[0]}", page_groups[pg], t[1]) if __name__ == "__main__" and __package__ is None: main()