xu-song's picture
update
d10ecd7
raw
history blame
196 Bytes
import os
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-chinese")
# vocab_size = len(tokenizer.get_vocab())
# vocab_size = tokenizer.vocab_size