Spaces:
Running
on
T4
Running
on
T4
File size: 821 Bytes
2afcb7e 926ff6c 2afcb7e 926ff6c 2afcb7e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import json
from argparse import ArgumentParser
from generate_txt_dataset import DELIMITER_0, DELIMITER_1, STOP
def main(input_path: str, output_path: str):
with open(input_path) as f:
prompts = [json.loads(l) for l in f]
with open(output_path, "w") as f:
for prompt in prompts:
prompt_for_gpt = {
"prompt": f"{prompt['input']}{DELIMITER_0}",
"completion": f"{prompt['edit']}{DELIMITER_1}{prompt['output']}{STOP}",
}
f.write(f"{json.dumps(prompt_for_gpt)}\n")
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--input-path", required=True, type=str)
parser.add_argument("--output-path", required=True, type=str)
args = parser.parse_args()
main(args.input_path, args.output_path)
|