```python from transformers import LlamaTokenizer tokenizer = LlamaTokenizer.from_pretrained( 'ocisd4/openllama_tokenizer_v2', add_bos_token=False, add_eos_token=True, force_download=False, use_auth_token=True, # additional_special_tokens=['<|spcout|>', '<|sep|>', '<|eot|>', '<|output|>'] ) print('vocab size:',tokenizer.vocab_size) #vocab size: 51456 text = '今天天氣真好!' print(tokenizer.tokenize(text)) #['▁', '今天', '天氣', '真', '好', '!'] print(tokenizer.encode(text)) #[29500, 32097, 32916, 30615, 30192, 30042, 2] print(tokenizer.decode(tokenizer.encode(text))) # 今天天氣真好! ```