| | if __name__ == "__main__": |
| | import argparse |
| | import json |
| | import os |
| |
|
| | from tempdir import TempDir |
| |
|
| | parser = argparse.ArgumentParser() |
| | parser.add_argument("--dataset", default="humaneval", type=str) |
| | parser.add_argument("--plus-input", required=True, type=str) |
| | parser.add_argument("--output", required=True, type=str) |
| | args = parser.parse_args() |
| |
|
| | assert args.dataset == "humaneval" |
| | assert not os.path.exists(args.output), f"{args.output} already exists!" |
| |
|
| | with TempDir() as tempdir: |
| | |
| | plus_input = {} |
| | with open(args.plus_input) as file: |
| | for line in file: |
| | problem = json.loads(line) |
| | plus_input[problem["task_id"]] = problem["inputs"] |
| |
|
| | tempf = None |
| | if args.dataset == "humaneval": |
| | from evalplus.data import get_human_eval_plus |
| |
|
| | |
| | problems = get_human_eval_plus(err_incomplete=False) |
| | tempf = os.path.join(tempdir, "HumanEvalPlus.jsonl") |
| | with open(tempf, "w") as file: |
| | for problem in problems: |
| | problem["plus_input"] = plus_input[problem["task_id"]] |
| | file.write(json.dumps(problem) + "\n") |
| |
|
| | |
| | os.rename(tempf, args.output) |
| |
|