File size: 624 Bytes
29e9c37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import json
from tqdm import tqdm
lang = ""
with open(f"dset{lang}.txt", "r") as f:
lines = [x.rstrip("\n").lower().split("→") for x in tqdm(f.readlines())]
lines = [(x[0].replace("\\n", "\n"), x[1].replace("\\n", "\n")) for x in lines]
responses = []
for i in tqdm(lines):
if i[1] not in responses:
responses.append(i[1])
dset = {}
for sample in tqdm(lines):
dset[sample[0]] = responses.index(sample[1])
with open(f"dataset{lang}.json", "w") as f:
json.dump(dset, f, ensure_ascii=False)
with open(f"responses{lang}.txt", "w") as f:
for i in tqdm(responses):
f.write(i+"\n")
|