afourney 45c2a78970
Testbed folders (#792)
* Re-added completion logging when using older versions of autogen.

* Extended scenario definitions and templating to include folders.

* Prepare collate_human_eval.py for working with group chat scenarios.

* Converted HumanEval to the folder-based approach, and added GroupChat scenarios.

* Fixed the default termination message.

* Fixed another termination condition.

* Updated compatible autogen versions.

* Fixed a bug in executing the finalize scripts.

* Generalized the template further to support multiple folder copy operations.

* Add tests from AutoGPT.

* Update README.md

* Fix typo

* Update samples/tools/testbed/README.md

---------

Co-authored-by: LeoLjl <3110503618@qq.com>
Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu>
2023-11-30 16:43:03 +00:00

90 lines
3.4 KiB
Python

import base64
import glob
import json
import os
import shutil
data_paths = glob.glob("challenges/*/data.json")
for data_path in data_paths:
print("Converting data path: ", data_path)
workspace = os.path.dirname(data_path)
with open(data_path, "r") as f:
data = json.load(f)
should_contain = data["ground"].get("should_contain", [])
should_not_contain = data["ground"].get("should_not_contain", [])
case_sensitive = data["ground"].get("case_sensitive", False)
# since 'should_contain' field may contain escape characters, this can cause problems when using str() method and eval(), I used base64 encode to avoid such problems
should_contain_base64 = []
for word in should_contain:
encoded_word = base64.b64encode(word.encode("utf-8")).decode("utf-8")
should_contain_base64.append(encoded_word)
should_not_contain_base64 = []
for word in should_not_contain:
encoded_word = base64.b64encode(word.encode("utf-8")).decode("utf-8")
should_not_contain_base64.append(encoded_word)
# copy all the files needed to 'coding' directory
artifacts_in = False
if os.path.exists(os.path.join(workspace, "artifacts_in")):
artifacts_in = True
target_folder = os.path.join("Templates/TwoAgents/coding/file", data["name"])
if os.path.exists(target_folder):
shutil.rmtree(target_folder)
shutil.copytree(os.path.join(workspace, "artifacts_in"), target_folder)
# print(f"All the artifacts are copied from {os.path.join(workspace, 'artifacts_in')} to {target_folder}")
record = {
"id": data["eval_id"],
"template": "Templates/TwoAgents",
"substitutions": {
"scenario.py": {
"__MODEL__": "gpt-3.5-turbo-16k",
"__TASK__": data["task"],
"__TARGET_FOLDER__": f"file/{data['name']}" if artifacts_in else "",
},
"check.py": {
"__FILE_PATTERN__": data["ground"]["files"][0],
"__EVAL_TYPE__": data["ground"]["eval"]["type"],
"__CASE_SENSITIVE__": str(case_sensitive),
},
"should_contain.txt": {
"__CONTAIN__": str(should_contain_base64),
},
"should_not_contain.txt": {
"__NO_CONTAIN__": str(should_not_contain_base64),
},
},
}
with open(f"{data['name']}_gpt35.jsonl", "wt") as f:
f.write(json.dumps(record).strip() + "\n")
record = {
"id": data["eval_id"],
"template": "Templates/TwoAgents",
"substitutions": {
"scenario.py": {
"__MODEL__": "gpt-4-1106-preview",
"__TASK__": data["task"],
"__TARGET_FOLDER__": f"file/{data['name']}" if artifacts_in else "",
},
"check.py": {
"__FILE_PATTERN__": data["ground"]["files"][0],
"__EVAL_TYPE__": data["ground"]["eval"]["type"],
"__CASE_SENSITIVE__": str(case_sensitive),
},
"should_contain.txt": {
"__CONTAIN__": str(should_contain_base64),
},
"should_not_contain.txt": {
"__NO_CONTAIN__": str(should_not_contain_base64),
},
},
}
with open(f"{data['name']}_gpt4.jsonl", "wt") as f:
f.write(json.dumps(record).strip() + "\n")