File size: 282 Bytes
9495a4f
 
 
 
 
 
 
0ce6477
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
"""
## 词典扩容
32000 <pad>
32001 但

"""

from transformers import LlamaTokenizer

tokenizer = LlamaTokenizer.from_pretrained("ziqingyang/chinese-llama-2-7b")

tokenizer.comments = "重新设计了新词表(大小:55296),进一步提升了中文字词的覆盖程度"