|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "<s>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "<pad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "</s>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "<unk>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57521": { |
|
"content": "<mask>", |
|
"lstrip": true, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57522": { |
|
"content": "<sep/>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57523": { |
|
"content": "<s_iitcdip>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57524": { |
|
"content": "<s_synthdog>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57525": { |
|
"content": "<s_province>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57526": { |
|
"content": "<s_no>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57527": { |
|
"content": "</s_ignore>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57528": { |
|
"content": "<s_ignore>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57529": { |
|
"content": "<s_First>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57530": { |
|
"content": "</s_no>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57531": { |
|
"content": "</s_First>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57532": { |
|
"content": "<s_Last>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57533": { |
|
"content": "</s_Last>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57534": { |
|
"content": "</s_province>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57535": { |
|
"content": "<s_Middle>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57536": { |
|
"content": "</s_Middle>", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"<s_iitcdip>", |
|
"<s_synthdog>", |
|
"</s>", |
|
"<s_province>", |
|
"<s_no>", |
|
"</s_ignore>", |
|
"<s_ignore>", |
|
"<s_First>", |
|
"</s_no>", |
|
"</s_First>", |
|
"<s_Last>", |
|
"</s_Last>", |
|
"</s_province>", |
|
"<s_Middle>", |
|
"<s>", |
|
"</s_Middle>" |
|
], |
|
"bos_token": "<s>", |
|
"clean_up_tokenization_spaces": true, |
|
"cls_token": "<s>", |
|
"eos_token": "</s>", |
|
"mask_token": "<mask>", |
|
"model_max_length": 1000000000000000019884624838656, |
|
"pad_token": "<pad>", |
|
"processor_class": "DonutProcessor", |
|
"sep_token": "</s>", |
|
"sp_model_kwargs": {}, |
|
"tokenizer_class": "XLMRobertaTokenizer", |
|
"unk_token": "<unk>" |
|
} |
|
|