Spaces:
Paused
Paused
#! /usr/bin/env python | |
import json | |
from collections import Counter | |
from argparse import ArgumentParser | |
import os | |
parser = ArgumentParser() | |
parser.add_argument("--path", type=str, required=True) | |
args = parser.parse_args() | |
with open(args.path) as f: | |
data = [json.loads(line) for line in f] | |
train_examples = [{ | |
"prompt": x['content'], | |
"response": x['summary'], | |
} for x in data] | |
os.makedirs("formatted_data", exist_ok=True) | |
with open("formatted_data/advertise_gen.jsonl", "w") as f: | |
for e in train_examples: | |
f.write(json.dumps(e, ensure_ascii=False) + "\n") | |