File size: 233 Bytes
751936e
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
from transformers import AutoTokenizer
from vocab import TokenizerType

tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/Baichuan-7B", trust_remote_code=True)


# byte-bpe  sentencepiece
tokenizer.type = TokenizerType.ByteBPE