mirror of
https://github.com/microsoft/autogen.git
synced 2025-08-12 10:41:13 +00:00
68 lines
2.3 KiB
Python
68 lines
2.3 KiB
Python
![]() |
#
|
||
|
# Run this file to download the human_eval dataset, and create a corresponding testbed scenario:
|
||
|
# (default: ../scenarios/human_eval_two_agents_gpt4.jsonl and ./scenarios/human_eval_two_agents_gpt35.jsonl)
|
||
|
#
|
||
|
|
||
|
import requests
|
||
|
import gzip
|
||
|
import io
|
||
|
import json
|
||
|
import os
|
||
|
import base64
|
||
|
|
||
|
|
||
|
script_path = os.path.realpath(__file__)
|
||
|
script_name = os.path.basename(script_path)
|
||
|
script_dir = os.path.dirname(script_path)
|
||
|
|
||
|
# Directory where scenarios are stored
|
||
|
scenarios_dir = os.path.realpath(os.path.join(script_dir, os.path.pardir, "scenarios"))
|
||
|
print("Saving HumanEval scenarios to: " + scenarios_dir)
|
||
|
|
||
|
|
||
|
# URL of the file to download
|
||
|
url = "https://github.com/openai/human-eval/raw/master/data/HumanEval.jsonl.gz"
|
||
|
|
||
|
# Send a HTTP request to the URL of the file
|
||
|
response = requests.get(url)
|
||
|
|
||
|
# Ensure we raise an error if the download failed
|
||
|
response.raise_for_status()
|
||
|
|
||
|
# Create a BytesIO object from the response content
|
||
|
buffer = io.BytesIO(response.content)
|
||
|
|
||
|
# Create a scenario file
|
||
|
fh_gpt4 = open(os.path.join(scenarios_dir, "human_eval_two_agents_gpt4.jsonl"), "wt")
|
||
|
fh_gpt35 = open(os.path.join(scenarios_dir, "human_eval_two_agents_gpt35.jsonl"), "wt")
|
||
|
|
||
|
# Open the buffer as a .gz file and read it line by line
|
||
|
with gzip.GzipFile(fileobj=buffer) as f_in:
|
||
|
for line in f_in:
|
||
|
# Parse each line as JSON
|
||
|
data = json.loads(line)
|
||
|
print("Converting: " + data["task_id"])
|
||
|
|
||
|
# Write the GPT-4 scenario
|
||
|
# Prompts and tests are saved in base 64 to greatly simplify escaping them as they
|
||
|
# move through the various formats and scripts. I welcome a better, more readable, alternative.
|
||
|
record = {
|
||
|
"id": data["task_id"].replace("/", "_"),
|
||
|
"template": "human_eval_two_agents.py",
|
||
|
"values": {
|
||
|
"__MODEL__": "gpt-4",
|
||
|
"__PROMPT_BASE64__": base64.b64encode(data["prompt"].encode("utf-8")).decode("utf-8"),
|
||
|
"__ENTRY_POINT__": data["entry_point"],
|
||
|
"__TEST_BASE64__": base64.b64encode(data["test"].encode("utf-8")).decode("utf-8"),
|
||
|
},
|
||
|
}
|
||
|
fh_gpt4.write(json.dumps(record).strip() + "\n")
|
||
|
|
||
|
# Write the GPT 3.5 Version
|
||
|
record["values"]["__MODEL__"] = "gpt-3.5-turbo-16k"
|
||
|
fh_gpt35.write(json.dumps(record).strip() + "\n")
|
||
|
|
||
|
|
||
|
fh_gpt4.close()
|
||
|
fh_gpt35.close()
|