autogen/samples/tools/testbed/utils/download_humaneval.py

#
# Run this file to download the human_eval dataset, and create a corresponding testbed scenario:
# (default: ../scenarios/human_eval_two_agents_gpt4.jsonl and ./scenarios/human_eval_two_agents_gpt35.jsonl)
#

import requests
import gzip
import io
import json
import os
import base64


script_path = os.path.realpath(__file__)
script_name = os.path.basename(script_path)
script_dir = os.path.dirname(script_path)

# Directory where scenarios are stored
scenarios_dir = os.path.realpath(os.path.join(script_dir, os.path.pardir, "scenarios"))
print("Saving HumanEval scenarios to: " + scenarios_dir)


# URL of the file to download
url = "https://github.com/openai/human-eval/raw/master/data/HumanEval.jsonl.gz"

# Send a HTTP request to the URL of the file
response = requests.get(url)

# Ensure we raise an error if the download failed
response.raise_for_status()

# Create a BytesIO object from the response content
buffer = io.BytesIO(response.content)

# Create a scenario file
fh_gpt4 = open(os.path.join(scenarios_dir, "human_eval_two_agents_gpt4.jsonl"), "wt")
fh_gpt35 = open(os.path.join(scenarios_dir, "human_eval_two_agents_gpt35.jsonl"), "wt")

# Open the buffer as a .gz file and read it line by line
with gzip.GzipFile(fileobj=buffer) as f_in:
    for line in f_in:
        # Parse each line as JSON
        data = json.loads(line)
        print("Converting: " + data["task_id"])

        # Write the GPT-4 scenario
        # Prompts and tests are saved in base 64 to greatly simplify escaping them as they
        # move through the various formats and scripts. I welcome a better, more readable, alternative.
        record = {
            "id": data["task_id"].replace("/", "_"),
            "template": "human_eval_two_agents.py",
            "values": {
                "__MODEL__": "gpt-4",
                "__PROMPT_BASE64__": base64.b64encode(data["prompt"].encode("utf-8")).decode("utf-8"),
                "__ENTRY_POINT__": data["entry_point"],
                "__TEST_BASE64__": base64.b64encode(data["test"].encode("utf-8")).decode("utf-8"),
            },
        }
        fh_gpt4.write(json.dumps(record).strip() + "\n")

        # Write the GPT 3.5 Version
        record["values"]["__MODEL__"] = "gpt-3.5-turbo-16k"
        fh_gpt35.write(json.dumps(record).strip() + "\n")


fh_gpt4.close()
fh_gpt35.close()