autogen/samples/tools/autogenbench/scenarios/MATH/Scripts/custom_tabulate.py

import os
import sys
import json
from autogenbench.tabulate_cmd import default_tabulate


def scorer(instance_dir):
    checker_messages = os.path.join(instance_dir, "checker_messages.json")
    if os.path.isfile(checker_messages):
        with open(checker_messages, "rt") as fh:
            messages = json.loads(fh.read())["checker_proxy"]
            results = messages[-1]["content"].lower()
            if "the answer is correct" in results or "the answer is approximated but should be correct" in results:
                return True
            else:
                return False
    else:
        return None


def main(args):
    default_tabulate(args, scorer=scorer)


if __name__ == "__main__" and __package__ is None:
    main(sys.argv)