eson's picture
add more tokenizers
f4973d4
raw
history blame
No virus
199 Bytes
from vocab.gpt_35_turbo import tokenizer
print(tokenizer.get_vocab())
text = "中"
token_ids = tokenizer.encode(text)
decode_str = tokenizer.convert_tokens_to_string(token_ids)
print(decode_str)