def prepare_vocab(file_path="vocab.txt"): """Returns Vocab size and decode/encode funcs""" with open(file_path, "r", encoding="utf-8") as f: text = f.read() chars = sorted(list(set(text))) vocab_size = len(chars) str_to_int = {ch: i for i, ch in enumerate(chars)} int_to_ch = {i: ch for i, ch in enumerate(chars)} encode = lambda s: [str_to_int[c] for c in s] decode = lambda t: "".join([int_to_ch[n] for n in t]) return vocab_size, encode, decode