Atest / tokenizer_config.json
bkbj's picture
Update tokenizer_config.json
7521432 verified
raw
history blame contribute delete
570 Bytes
{
"do_lower_case": true,
"model_type": "bert",
"vocab_size": 30522,
"special_tokens_map": {
"unk_token": "[UNK]",
"sep_token": "[SEP]",
"pad_token": "[PAD]",
"cls_token": "[CLS]",
"mask_token": "[MASK]",
"eos_token": "<|endoftext|>",
"bos_token": "<|startoftext|>",
"user_token": "<|user|>",
"assistant_token": "<|assistant|>"
},
"padding_side": "right",
"truncation_side": "right",
"max_length": 1024,
"use_fast": true,
"tokenizer_class": "BertTokenizer"
}