File size: 209 Bytes
751936e |
1 2 3 4 5 6 7 8 9 10 11 |
"""
"""
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained("uer/gpt2-chinese-cluecorpussmall")
encoding = tokenizer.encode("这是很久之前的\n事情了")
print(encoding)
|