eson's picture
add more tokenizers
f4973d4
raw
history blame
No virus
132 Bytes
from vocab.chinese_llama2 import tokenizer
encoding = tokenizer.encode("<s>开始</s>站位符<pad>试试<unk>")
print(encoding)