ierhon's picture
Create todset.py
c6acb7c
raw history blame
No virus
420 Bytes
def todset(text: str):
lines = [x.rstrip("\n").lower().split("→") for x in text.split("\n")]
lines = [(x[0].replace("\\n", "\n"), x[1].replace("\\n", "\n")) for x in lines]
responses = []
for i in lines:
if i[1] not in responses:
responses.append(i[1])
dset = {}
for sample in tqdm(lines):
dset[sample[0]] = responses.index(sample[1])
return (responses, dset)