Med_chat_bot / utils.py
Matthev00's picture
utils with vocab prepration
b9cfcf9
raw
history blame contribute delete
No virus
499 Bytes
def prepare_vocab(file_path="vocab.txt"):
"""Returns Vocab size and decode/encode funcs"""
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
chars = sorted(list(set(text)))
vocab_size = len(chars)
str_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_ch = {i: ch for i, ch in enumerate(chars)}
encode = lambda s: [str_to_int[c] for c in s]
decode = lambda t: "".join([int_to_ch[n] for n in t])
return vocab_size, encode, decode