File size: 209 Bytes
751936e
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
"""

"""

from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("uer/gpt2-chinese-cluecorpussmall")

encoding = tokenizer.encode("这是很久之前的\n事情了")
print(encoding)