File size: 389 Bytes
d27a756 a6c67ec d27a756 751936e a6c67ec 751936e 9495a4f 751936e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
"""
"""
import tiktoken
import tokenizer.tiktoken_patch
tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo')
tokenizer.vocab_size = tokenizer.n_vocab
tokenizer.comments = "tiktoken is a fast BPE tokeniser for use with OpenAI's models. There are 16 tokens KeyError"
tokenizer.reversible = True # It's reversible and lossless, so you can convert tokens back into the original text
|