SaulLu's picture
add tokenizer example with model config
8a27784
{
"model_max_length": 512,
"bos_token": {
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"__type": "AddedToken"
},
"eos_token": {
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"__type": "AddedToken"
},
"sep_token": {
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"__type": "AddedToken"
},
"cls_token": {
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"__type": "AddedToken"
},
"unk_token": {
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"__type": "AddedToken"
},
"pad_token": {
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"__type": "AddedToken"
},
"mask_token": {
"content": "[MASK]",
"single_word": false,
"lstrip": true,
"rstrip": false,
"normalized": true,
"__type": "AddedToken"
}
}