afourney cd199c7ab7
Introduces AutoGenBench (#1048)
* Initial commit of AutoGenBench

* wording

* typo

* pre-commit reformulation

* Updated README to point to contributor's guide earlier.

* Simplified the description of the JSON format.

* Added print statements to indicate when run.sh and scenario.py are starting.

* Added SocietyOfMind scenario to GAIA.

* Pointing autogenbench clone command to the latest branch.

* Temporarily disable subsample option.

* Updated the GAIA readme to specify how to define a BING API key.

* Fixed and re-enabled the subsample option.

* Added a draft of a blog post.

* Updated authors.

* Incorporating Gagan's feedback.

* Fixed code formatting.

* Updated the help string in the docs.

* Light editing of the AutoGenBench blogpost.

* Support filtering on model tags.

* Added websurfer dependencies to Dockerfile.

* Renamed testbed -> autogenbench

* Attempting to fix formatting.

* Added more gracefull handling of task timeouts (the script is allowed to terminate before Docker is stopped).

* Updated the blogpost based on Saleema's and Julia's feedback.

* Fixed formatting... again.

* Added a main MANIFEST to list available scenarios.

* Limit main manifest to directories.

* Manifests now use relative paths.

* All manifests are now relative.

* Updated the contributing guide, and address windows path issues.

* Updated the version. Fixed formatting.

* Fixed formatting.

* De-listing Examples, since it has no clear tabulate criteria.

* Updated email in pyproject

* typo in blogpost

* wording

---------

Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu>
Co-authored-by: Qingyun Wu <qingyun0327@gmail.com>
2024-01-26 00:46:58 +00:00

118 lines
3.6 KiB
Python

#
# Run this file to download the human_eval dataset, and create a corresponding testbed scenario:
# (default: ../scenarios/human_eval_two_agents_gpt4.jsonl and ./scenarios/human_eval_two_agents_gpt35.jsonl)
#
import requests
import tarfile
import io
import json
import os
import sys
URL = "https://people.eecs.berkeley.edu/~hendrycks/MATH.tar"
SCRIPT_PATH = os.path.realpath(__file__)
SCRIPT_NAME = os.path.basename(SCRIPT_PATH)
SCRIPT_DIR = os.path.dirname(SCRIPT_PATH)
SCENARIO_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, os.path.pardir))
TEMPLATES_DIR = os.path.join(SCENARIO_DIR, "Templates")
TASKS_DIR = os.path.join(SCENARIO_DIR, "Tasks")
DOWNLOADS_DIR = os.path.join(SCENARIO_DIR, "Downloads")
SELECTED_PROBLEMS = [
"MATH/test/algebra/2144.json",
"MATH/test/algebra/1997.json",
"MATH/test/algebra/2072.json",
"MATH/test/algebra/2137.json",
"MATH/test/algebra/2557.json",
"MATH/test/algebra/2045.json",
"MATH/test/algebra/2499.json",
"MATH/test/counting_and_probability/483.json",
"MATH/test/intermediate_algebra/590.json",
"MATH/test/prealgebra/1511.json",
"MATH/test/intermediate_algebra/935.json",
"MATH/test/prealgebra/808.json",
"MATH/test/number_theory/233.json",
"MATH/test/number_theory/960.json",
"MATH/test/precalculus/551.json",
"MATH/test/counting_and_probability/909.json",
"MATH/test/algebra/2417.json",
]
def download_math():
"""Download the MATH dataset (if not already downloaded).
Return a JSON dictionary of selected problems."""
selected_problems = dict()
if not os.path.isdir(DOWNLOADS_DIR):
os.mkdir(DOWNLOADS_DIR)
tar_file = os.path.join(DOWNLOADS_DIR, "MATH.tar")
if not os.path.isfile(tar_file):
# Send a HTTP request to the URL
response = requests.get(URL, stream=True)
response.raise_for_status()
# If the HTTP request returns a status code 200, proceed
with open(tar_file, "wb") as fh:
for chunk in response.iter_content(chunk_size=512):
fh.write(chunk)
# Extract selected problems
tar = tarfile.open(tar_file)
for member in tar.getmembers():
if member.name in SELECTED_PROBLEMS:
print(f"Extracting: {member.name}")
content = tar.extractfile(member).read()
selected_problems[member.name] = json.loads(content)
return selected_problems
def create_jsonl(name, problems, template):
"""Creates a JSONL scenario file with a given name, dictionary of MATH problems, and template path."""
# Create a task directory if it doesn't exist
if not os.path.isdir(TASKS_DIR):
os.mkdir(TASKS_DIR)
# Create the jsonl file
with open(os.path.join(TASKS_DIR, name + ".jsonl"), "wt") as fh:
for item in problems.items():
data = item[1]
task_id = item[0].replace("MATH/", "").replace(".json", "").replace("/", "_")
print(f"Converting: [{item[0]}] {task_id}")
record = {
"id": task_id,
"template": os.path.join(os.path.pardir, template),
"substitutions": {
"prompt.txt": {"__PROMPT__": data["problem"]},
"expected_answer.txt": {"__ANSWER__": data["solution"]},
},
}
fh.write(json.dumps(record).strip() + "\n")
###############################################################################
def main():
problems = download_math()
templates = {
"two_agents": "Templates/TwoAgents",
}
for t in templates.items():
create_jsonl(f"math_{t[0]}", problems, t[1])
if __name__ == "__main__" and __package__ is None:
main()