File size: 624 Bytes
29e9c37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import json
from tqdm import tqdm

lang = ""

with open(f"dset{lang}.txt", "r") as f:
    lines = [x.rstrip("\n").lower().split("→") for x in tqdm(f.readlines())]
    lines = [(x[0].replace("\\n", "\n"), x[1].replace("\\n", "\n")) for x in lines]

responses = []
for i in tqdm(lines):
    if i[1] not in responses:
        responses.append(i[1])

dset = {}
for sample in tqdm(lines):
    dset[sample[0]] = responses.index(sample[1])

with open(f"dataset{lang}.json", "w") as f:
    json.dump(dset, f, ensure_ascii=False)

with open(f"responses{lang}.txt", "w") as f:
    for i in tqdm(responses):
        f.write(i+"\n")