File size: 499 Bytes
b9cfcf9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def prepare_vocab(file_path="vocab.txt"):
    """Returns Vocab size and decode/encode funcs"""
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()
        chars = sorted(list(set(text)))

    vocab_size = len(chars)

    str_to_int = {ch: i for i, ch in enumerate(chars)}
    int_to_ch = {i: ch for i, ch in enumerate(chars)}

    encode = lambda s: [str_to_int[c] for c in s]
    decode = lambda t: "".join([int_to_ch[n] for n in t])

    return vocab_size, encode, decode