|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "<s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "<pad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "</s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "<unk>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57521": { |
|
"content": "<mask>", |
|
"lstrip": true, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57522": { |
|
"content": "<sep/>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57523": { |
|
"content": "<s_iitcdip>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57524": { |
|
"content": "<s_synthdog>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"57525": { |
|
"content": "<s_result>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57526": { |
|
"content": "</s_result>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57527": { |
|
"content": "<s_mn>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57528": { |
|
"content": "</s_mn>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57529": { |
|
"content": "<s_ab>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57530": { |
|
"content": "</s_ab>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57531": { |
|
"content": "<s_ht>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57532": { |
|
"content": "</s_ht>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57533": { |
|
"content": "<s_mc>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57534": { |
|
"content": "</s_mc>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57535": { |
|
"content": "<s_fb>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57536": { |
|
"content": "</s_fb>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57537": { |
|
"content": "<s_ts>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57538": { |
|
"content": "</s_ts>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57539": { |
|
"content": "<s_df>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57540": { |
|
"content": "</s_df>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57541": { |
|
"content": "<s_ms>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57542": { |
|
"content": "</s_ms>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57543": { |
|
"content": "<s_cb>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57544": { |
|
"content": "</s_cb>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57545": { |
|
"content": "<s_lv>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57546": { |
|
"content": "</s_lv>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57547": { |
|
"content": "<s_br>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57548": { |
|
"content": "</s_br>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57549": { |
|
"content": "<s_bl>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57550": { |
|
"content": "</s_bl>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57551": { |
|
"content": "<s_dm>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57552": { |
|
"content": "</s_dm>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57553": { |
|
"content": "<s_sc>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
}, |
|
"57554": { |
|
"content": "</s_sc>", |
|
"lstrip": false, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": false |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"<s_iitcdip>", |
|
"<s_synthdog>" |
|
], |
|
"bos_token": "<s>", |
|
"clean_up_tokenization_spaces": true, |
|
"cls_token": "<s>", |
|
"eos_token": "</s>", |
|
"mask_token": "<mask>", |
|
"max_length": 768, |
|
"model_max_length": 1000000000000000019884624838656, |
|
"pad_to_multiple_of": null, |
|
"pad_token": "<pad>", |
|
"pad_token_type_id": 0, |
|
"padding_side": "right", |
|
"processor_class": "DonutProcessor", |
|
"sep_token": "</s>", |
|
"sp_model_kwargs": {}, |
|
"stride": 0, |
|
"tokenizer_class": "XLMRobertaTokenizer", |
|
"truncation_side": "right", |
|
"truncation_strategy": "longest_first", |
|
"unk_token": "<unk>" |
|
} |
|
|