ierhon commited on
Commit
c6acb7c
1 Parent(s): 01b4ea4

Create todset.py

Browse files
Files changed (1) hide show
  1. todset.py +14 -0
todset.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def todset(text: str):
2
+ lines = [x.rstrip("\n").lower().split("→") for x in text.split("\n")]
3
+ lines = [(x[0].replace("\\n", "\n"), x[1].replace("\\n", "\n")) for x in lines]
4
+
5
+ responses = []
6
+ for i in lines:
7
+ if i[1] not in responses:
8
+ responses.append(i[1])
9
+
10
+ dset = {}
11
+ for sample in tqdm(lines):
12
+ dset[sample[0]] = responses.index(sample[1])
13
+
14
+ return (responses, dset)