xu-song's picture
update
751936e
raw
history blame
209 Bytes
"""
"""
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained("uer/gpt2-chinese-cluecorpussmall")
encoding = tokenizer.encode("这是很久之前的\n事情了")
print(encoding)