idiom_bert_kurdish / tokenizer_config.json
abdulhade's picture
Upload tokenizer
ab6e6dd verified
raw
history blame contribute delete
No virus
48.9 kB
{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"2": {
"content": "[CLS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "[SEP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "[MASK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50000": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"50001": {
"content": "<|¡|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50002": {
"content": "<|¢|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50003": {
"content": "<|£|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50004": {
"content": "<|¤|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50005": {
"content": "<|¥|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50006": {
"content": "<|¦|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50007": {
"content": "<|§|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50008": {
"content": "<|¨|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50009": {
"content": "<|©|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50010": {
"content": "<|ª|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50011": {
"content": "<|«|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50012": {
"content": "<|¬|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50013": {
"content": "<|®|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50014": {
"content": "<|¯|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50015": {
"content": "<|°|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50016": {
"content": "<|±|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50017": {
"content": "<|²|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50018": {
"content": "<|³|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50019": {
"content": "<|´|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50020": {
"content": "<|µ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50021": {
"content": "<|¶|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50022": {
"content": "<|·|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50023": {
"content": "<|¸|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50024": {
"content": "<|¹|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50025": {
"content": "<|º|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50026": {
"content": "<|»|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50027": {
"content": "<|¼|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50028": {
"content": "<|½|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50029": {
"content": "<|¾|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50030": {
"content": "<|¿|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50031": {
"content": "<|À|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50032": {
"content": "<|Á|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50033": {
"content": "<|Â|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50034": {
"content": "<|Ã|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50035": {
"content": "<|Ä|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50036": {
"content": "<|Å|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50037": {
"content": "<|Æ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50038": {
"content": "<|Ç|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50039": {
"content": "<|È|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50040": {
"content": "<|É|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50041": {
"content": "<|Ê|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50042": {
"content": "<|Ë|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50043": {
"content": "<|Ì|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50044": {
"content": "<|Í|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50045": {
"content": "<|Î|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50046": {
"content": "<|Ï|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50047": {
"content": "<|Ð|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50048": {
"content": "<|Ñ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50049": {
"content": "<|Ò|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50050": {
"content": "<|Ó|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50051": {
"content": "<|Ô|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50052": {
"content": "<|Õ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50053": {
"content": "<|Ö|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50054": {
"content": "<|×|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50055": {
"content": "<|Ø|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50056": {
"content": "<|Ù|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50057": {
"content": "<|Ú|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50058": {
"content": "<|Û|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50059": {
"content": "<|Ü|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50060": {
"content": "<|Ý|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50061": {
"content": "<|Þ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50062": {
"content": "<|ß|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50063": {
"content": "<|à|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50064": {
"content": "<|á|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50065": {
"content": "<|â|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50066": {
"content": "<|ã|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50067": {
"content": "<|ä|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50068": {
"content": "<|å|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50069": {
"content": "<|æ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50070": {
"content": "<|ç|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50071": {
"content": "<|è|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50072": {
"content": "<|é|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50073": {
"content": "<|ê|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50074": {
"content": "<|ë|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50075": {
"content": "<|ì|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50076": {
"content": "<|í|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50077": {
"content": "<|î|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50078": {
"content": "<|ï|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50079": {
"content": "<|ð|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50080": {
"content": "<|ñ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50081": {
"content": "<|ò|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50082": {
"content": "<|ó|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50083": {
"content": "<|ô|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50084": {
"content": "<|õ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50085": {
"content": "<|ö|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50086": {
"content": "<|÷|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50087": {
"content": "<|ø|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50088": {
"content": "<|ù|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50089": {
"content": "<|ú|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50090": {
"content": "<|û|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50091": {
"content": "<|ü|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50092": {
"content": "<|ý|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50093": {
"content": "<|þ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50094": {
"content": "<|ÿ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50095": {
"content": "<|Ā|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50096": {
"content": "<|ā|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50097": {
"content": "<|Ă|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50098": {
"content": "<|ă|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50099": {
"content": "<|Ą|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50100": {
"content": "<|ą|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50101": {
"content": "<|Ć|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50102": {
"content": "<|ć|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50103": {
"content": "<|Ĉ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50104": {
"content": "<|ĉ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50105": {
"content": "<|Ċ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50106": {
"content": "<|ċ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50107": {
"content": "<|Č|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50108": {
"content": "<|č|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50109": {
"content": "<|Ď|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50110": {
"content": "<|ď|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50111": {
"content": "<|Đ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50112": {
"content": "<|đ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50113": {
"content": "<|Ē|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50114": {
"content": "<|ē|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50115": {
"content": "<|Ĕ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50116": {
"content": "<|ĕ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50117": {
"content": "<|Ė|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50118": {
"content": "<|ė|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50119": {
"content": "<|Ę|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50120": {
"content": "<|ę|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50121": {
"content": "<|Ě|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50122": {
"content": "<|ě|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50123": {
"content": "<|Ĝ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50124": {
"content": "<|ĝ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50125": {
"content": "<|Ğ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50126": {
"content": "<|ğ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50127": {
"content": "<|Ġ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50128": {
"content": "<|ġ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50129": {
"content": "<|Ģ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50130": {
"content": "<|ģ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50131": {
"content": "<|Ĥ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50132": {
"content": "<|ĥ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50133": {
"content": "<|Ħ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50134": {
"content": "<|ħ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50135": {
"content": "<|Ĩ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50136": {
"content": "<|ĩ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50137": {
"content": "<|Ī|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50138": {
"content": "<|ī|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50139": {
"content": "<|Ĭ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50140": {
"content": "<|ĭ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50141": {
"content": "<|Į|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50142": {
"content": "<|į|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50143": {
"content": "<|İ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50144": {
"content": "<|ı|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50145": {
"content": "<|IJ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50146": {
"content": "<|ij|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50147": {
"content": "<|Ĵ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50148": {
"content": "<|ĵ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50149": {
"content": "<|Ķ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50150": {
"content": "<|ķ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50151": {
"content": "<|ĸ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50152": {
"content": "[new_token1]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50153": {
"content": "[new_token2]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50154": {
"content": "<|Ļ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50155": {
"content": "<|ļ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50156": {
"content": "<|Ľ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50157": {
"content": "<|ľ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50158": {
"content": "<|Ŀ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50159": {
"content": "<|ŀ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50160": {
"content": "<|Ł|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50161": {
"content": "<|ł|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50162": {
"content": "<|Ń|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50163": {
"content": "<|Ûķ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50164": {
"content": "<|ÛĮ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50165": {
"content": "<|ا|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50166": {
"content": "<|ĠØ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50167": {
"content": "<|ÙĪ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50168": {
"content": "<|ÙĨ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50169": {
"content": "<|ر|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50170": {
"content": "<|Ú©|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50171": {
"content": "<|ĠÙ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50172": {
"content": "<|ت|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50173": {
"content": "<|اÙĨ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50174": {
"content": "<|Ûİ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50175": {
"content": "<|د|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50176": {
"content": "<|Ùħ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50177": {
"content": "<|Ġب|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50178": {
"content": "<|ÛĨ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50179": {
"content": "<|س|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50180": {
"content": "<|hu|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50181": {
"content": "<|ta|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50182": {
"content": "<|no|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50183": {
"content": "<|th|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50184": {
"content": "<|ur|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50185": {
"content": "<|hr|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50186": {
"content": "<|bg|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50187": {
"content": "<|lt|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50188": {
"content": "<|la|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50189": {
"content": "<|mi|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50190": {
"content": "<|ml|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50191": {
"content": "<|cy|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50192": {
"content": "<|sk|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50193": {
"content": "<|te|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50194": {
"content": "<|fa|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50195": {
"content": "<|lv|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50196": {
"content": "<|bn|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50197": {
"content": "<|sr|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50198": {
"content": "<|az|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50199": {
"content": "<|sl|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50200": {
"content": "<|kn|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50201": {
"content": "<|et|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50202": {
"content": "<|mk|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50203": {
"content": "<|br|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50204": {
"content": "<|eu|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50205": {
"content": "<|is|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50206": {
"content": "<|hy|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50207": {
"content": "<|ne|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50208": {
"content": "<|mn|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50209": {
"content": "<|bs|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50210": {
"content": "<|kk|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50211": {
"content": "<|sq|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50212": {
"content": "<|sw|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50213": {
"content": "<|gl|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50214": {
"content": "<|mr|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50215": {
"content": "<|pa|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50216": {
"content": "<|si|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50217": {
"content": "<|km|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50218": {
"content": "<|sn|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50219": {
"content": "<|yo|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50220": {
"content": "<|so|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50221": {
"content": "<|af|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50222": {
"content": "<|oc|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50223": {
"content": "<|ka|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50224": {
"content": "<|be|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50225": {
"content": "<|tg|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50226": {
"content": "<|sd|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50227": {
"content": "<|gu|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50228": {
"content": "<|am|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50229": {
"content": "<|yi|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50230": {
"content": "<|lo|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50231": {
"content": "<|uz|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50232": {
"content": "<|fo|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50233": {
"content": "<|ht|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50234": {
"content": "<|ps|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50235": {
"content": "<|tk|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50236": {
"content": "<|nn|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50237": {
"content": "<|mt|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50238": {
"content": "<|sa|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50239": {
"content": "<|lb|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50240": {
"content": "<|my|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50241": {
"content": "<|bo|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50242": {
"content": "<|tl|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50243": {
"content": "<|mg|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50244": {
"content": "<|as|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50245": {
"content": "<|tt|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50246": {
"content": "<|haw|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50247": {
"content": "<|ln|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50248": {
"content": "<|ha|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50249": {
"content": "<|ba|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50250": {
"content": "<|jw|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50251": {
"content": "<|su|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50252": {
"content": "<|translate|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50253": {
"content": "<|transcribe|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50254": {
"content": "<|startoflm|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50255": {
"content": "<|startofprev|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50256": {
"content": "<|nocaptions|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50257": {
"content": "<|notimestamps|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50258": {
"content": "<|startoftranscript|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50259": {
"content": "<|Ĺ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50260": {
"content": "<|ĺ|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<|endoftext|>",
"<|startoftranscript|>",
"<|¡|>",
"<|¢|>",
"<|£|>",
"<|¤|>",
"<|¥|>",
"<|¦|>",
"<|§|>",
"<|¨|>",
"<|©|>",
"<|ª|>",
"<|«|>",
"<|¬|>",
"<|®|>",
"<|¯|>",
"<|°|>",
"<|±|>",
"<|²|>",
"<|³|>",
"<|´|>",
"<|µ|>",
"<|¶|>",
"<|·|>",
"<|¸|>",
"<|¹|>",
"<|º|>",
"<|»|>",
"<|¼|>",
"<|½|>",
"<|¾|>",
"<|¿|>",
"<|À|>",
"<|Á|>",
"<|Â|>",
"<|Ã|>",
"<|Ä|>",
"<|Å|>",
"<|Æ|>",
"<|Ç|>",
"<|È|>",
"<|É|>",
"<|Ê|>",
"<|Ë|>",
"<|Ì|>",
"<|Í|>",
"<|Î|>",
"<|Ï|>",
"<|Ð|>",
"<|Ñ|>",
"<|Ò|>",
"<|Ó|>",
"<|Ô|>",
"<|Õ|>",
"<|Ö|>",
"<|×|>",
"<|Ø|>",
"<|Ù|>",
"<|Ú|>",
"<|Û|>",
"<|Ü|>",
"<|Ý|>",
"<|Þ|>",
"<|ß|>",
"<|à|>",
"<|á|>",
"<|â|>",
"<|ã|>",
"<|ä|>",
"<|å|>",
"<|æ|>",
"<|ç|>",
"<|è|>",
"<|é|>",
"<|ê|>",
"<|ë|>",
"<|ì|>",
"<|í|>",
"<|î|>",
"<|ï|>",
"<|ð|>",
"<|ñ|>",
"<|ò|>",
"<|ó|>",
"<|ô|>",
"<|õ|>",
"<|ö|>",
"<|÷|>",
"<|ø|>",
"<|ù|>",
"<|ú|>",
"<|û|>",
"<|ü|>",
"<|ý|>",
"<|þ|>",
"<|ÿ|>",
"<|Ā|>",
"<|ā|>",
"<|Ă|>",
"<|ă|>",
"<|Ą|>",
"<|ą|>",
"<|Ć|>",
"<|ć|>",
"<|Ĉ|>",
"<|ĉ|>",
"<|Ċ|>",
"<|ċ|>",
"<|Č|>",
"<|č|>",
"<|Ď|>",
"<|ď|>",
"<|Đ|>",
"<|đ|>",
"<|Ē|>",
"<|ē|>",
"<|Ĕ|>",
"<|ĕ|>",
"<|Ė|>",
"<|ė|>",
"<|Ę|>",
"<|ę|>",
"<|Ě|>",
"<|ě|>",
"<|Ĝ|>",
"<|ĝ|>",
"<|Ğ|>",
"<|ğ|>",
"<|Ġ|>",
"<|ġ|>",
"<|Ģ|>",
"<|ģ|>",
"<|Ĥ|>",
"<|ĥ|>",
"<|Ħ|>",
"<|ħ|>",
"<|Ĩ|>",
"<|ĩ|>",
"<|Ī|>",
"<|ī|>",
"<|Ĭ|>",
"<|ĭ|>",
"<|Į|>",
"<|į|>",
"<|İ|>",
"<|ı|>",
"<|IJ|>",
"<|ij|>",
"<|Ĵ|>",
"<|ĵ|>",
"<|Ķ|>",
"<|ķ|>",
"<|ĸ|>",
"<|Ĺ|>",
"<|ĺ|>",
"<|Ļ|>",
"<|ļ|>",
"<|Ľ|>",
"<|ľ|>",
"<|Ŀ|>",
"<|ŀ|>",
"<|Ł|>",
"<|ł|>",
"<|Ń|>",
"<|Ûķ|>",
"<|ÛĮ|>",
"<|ا|>",
"<|ĠØ|>",
"<|ÙĪ|>",
"<|ÙĨ|>",
"<|ر|>",
"<|Ú©|>",
"<|ĠÙ|>",
"<|ت|>",
"<|اÙĨ|>",
"<|Ûİ|>",
"<|د|>",
"<|Ùħ|>",
"<|Ġب|>",
"<|ÛĨ|>",
"<|س|>",
"<|translate|>",
"<|transcribe|>",
"<|startoflm|>",
"<|startofprev|>",
"<|nocaptions|>",
"<|notimestamps|>"
],
"bos_token": "<|endoftext|>",
"clean_up_tokenization_spaces": true,
"cls_token": "[CLS]",
"do_basic_tokenize": true,
"do_lower_case": true,
"eos_token": "<|endoftext|>",
"errors": "replace",
"mask_token": "[MASK]",
"model_max_length": 1024,
"never_split": null,
"pad_token": "<|endoftext|>",
"processor_class": "WhisperProcessor",
"return_attention_mask": false,
"sep_token": "[SEP]",
"strip_accents": null,
"tokenize_chinese_chars": true,
"tokenizer_class": "BertTokenizer",
"unk_token": "<|endoftext|>"
}