{ | |
"added_tokens_decoder": { | |
"0": { | |
"content": "<s>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"1": { | |
"content": "<pad>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"2": { | |
"content": "</s>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"3": { | |
"content": "<unk>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50000": { | |
"content": "<mask>", | |
"lstrip": true, | |
"normalized": true, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50001": { | |
"content": "ace_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50002": { | |
"content": "ace_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50003": { | |
"content": "acm_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50004": { | |
"content": "acq_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50005": { | |
"content": "aeb_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50006": { | |
"content": "afr_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50007": { | |
"content": "ajp_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50008": { | |
"content": "aka_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50009": { | |
"content": "als_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50010": { | |
"content": "amh_Ethi", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50011": { | |
"content": "apc_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50012": { | |
"content": "arb_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50013": { | |
"content": "arg_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50014": { | |
"content": "arn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50015": { | |
"content": "ars_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50016": { | |
"content": "ary_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50017": { | |
"content": "arz_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50018": { | |
"content": "asm_Beng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50019": { | |
"content": "ast_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50020": { | |
"content": "awa_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50021": { | |
"content": "ayr_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50022": { | |
"content": "azb_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50023": { | |
"content": "azj_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50024": { | |
"content": "bak_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50025": { | |
"content": "bam_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50026": { | |
"content": "ban_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50027": { | |
"content": "bel_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50028": { | |
"content": "bem_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50029": { | |
"content": "ben_Beng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50030": { | |
"content": "bho_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50031": { | |
"content": "bjn_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50032": { | |
"content": "bjn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50033": { | |
"content": "bod_Tibt", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50034": { | |
"content": "bos_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50035": { | |
"content": "bug_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50036": { | |
"content": "bul_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50037": { | |
"content": "cat_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50038": { | |
"content": "ceb_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50039": { | |
"content": "ces_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50040": { | |
"content": "cjk_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50041": { | |
"content": "ckb_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50042": { | |
"content": "crh_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50043": { | |
"content": "cym_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50044": { | |
"content": "dan_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50045": { | |
"content": "deu_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50046": { | |
"content": "dik_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50047": { | |
"content": "dyu_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50048": { | |
"content": "dzo_Tibt", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50049": { | |
"content": "ell_Grek", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50050": { | |
"content": "eng_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50051": { | |
"content": "epo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50052": { | |
"content": "est_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50053": { | |
"content": "eus_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50054": { | |
"content": "ewe_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50055": { | |
"content": "fao_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50056": { | |
"content": "fij_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50057": { | |
"content": "fin_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50058": { | |
"content": "fon_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50059": { | |
"content": "fra_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50060": { | |
"content": "fur_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50061": { | |
"content": "fuv_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50062": { | |
"content": "gaz_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50063": { | |
"content": "gla_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50064": { | |
"content": "gle_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50065": { | |
"content": "glg_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50066": { | |
"content": "grn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50067": { | |
"content": "guj_Gujr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50068": { | |
"content": "hat_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50069": { | |
"content": "hau_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50070": { | |
"content": "heb_Hebr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50071": { | |
"content": "hin_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50072": { | |
"content": "hne_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50073": { | |
"content": "hrv_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50074": { | |
"content": "hun_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50075": { | |
"content": "hye_Armn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50076": { | |
"content": "ibo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50077": { | |
"content": "ilo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50078": { | |
"content": "ind_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50079": { | |
"content": "isl_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50080": { | |
"content": "ita_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50081": { | |
"content": "jav_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50082": { | |
"content": "jpn_Jpan", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50083": { | |
"content": "kab_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50084": { | |
"content": "kac_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50085": { | |
"content": "kam_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50086": { | |
"content": "kan_Knda", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50087": { | |
"content": "kas_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50088": { | |
"content": "kas_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50089": { | |
"content": "kat_Geor", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50090": { | |
"content": "kaz_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50091": { | |
"content": "kbp_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50092": { | |
"content": "kea_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50093": { | |
"content": "khk_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50094": { | |
"content": "khm_Khmr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50095": { | |
"content": "kik_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50096": { | |
"content": "kin_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50097": { | |
"content": "kir_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50098": { | |
"content": "kmb_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50099": { | |
"content": "kmr_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50100": { | |
"content": "knc_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50101": { | |
"content": "knc_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50102": { | |
"content": "kon_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50103": { | |
"content": "kor_Hang", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50104": { | |
"content": "lao_Laoo", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50105": { | |
"content": "lij_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50106": { | |
"content": "lim_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50107": { | |
"content": "lin_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50108": { | |
"content": "lit_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50109": { | |
"content": "lmo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50110": { | |
"content": "ltg_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50111": { | |
"content": "ltz_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50112": { | |
"content": "lua_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50113": { | |
"content": "lug_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50114": { | |
"content": "luo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50115": { | |
"content": "lus_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50116": { | |
"content": "lvs_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50117": { | |
"content": "mag_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50118": { | |
"content": "mai_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50119": { | |
"content": "mal_Mlym", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50120": { | |
"content": "mar_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50121": { | |
"content": "min_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50122": { | |
"content": "mkd_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50123": { | |
"content": "mlt_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50124": { | |
"content": "mni_Beng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50125": { | |
"content": "mos_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50126": { | |
"content": "mri_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50127": { | |
"content": "mya_Mymr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50128": { | |
"content": "nld_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50129": { | |
"content": "nno_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50130": { | |
"content": "nob_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50131": { | |
"content": "npi_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50132": { | |
"content": "nso_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50133": { | |
"content": "nus_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50134": { | |
"content": "nya_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50135": { | |
"content": "oci_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50136": { | |
"content": "ory_Orya", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50137": { | |
"content": "pag_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50138": { | |
"content": "pan_Guru", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50139": { | |
"content": "pap_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50140": { | |
"content": "pbt_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50141": { | |
"content": "pes_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50142": { | |
"content": "plt_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50143": { | |
"content": "pol_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50144": { | |
"content": "por_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50145": { | |
"content": "prs_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50146": { | |
"content": "quy_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50147": { | |
"content": "ron_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50148": { | |
"content": "run_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50149": { | |
"content": "rus_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50150": { | |
"content": "sag_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50151": { | |
"content": "san_Deva", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50152": { | |
"content": "sat_Beng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50153": { | |
"content": "scn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50154": { | |
"content": "shn_Mymr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50155": { | |
"content": "sin_Sinh", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50156": { | |
"content": "slk_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50157": { | |
"content": "slv_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50158": { | |
"content": "smo_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50159": { | |
"content": "sna_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50160": { | |
"content": "snd_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50161": { | |
"content": "som_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50162": { | |
"content": "sot_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50163": { | |
"content": "spa_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50164": { | |
"content": "srd_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50165": { | |
"content": "srp_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50166": { | |
"content": "ssw_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50167": { | |
"content": "sun_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50168": { | |
"content": "swe_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50169": { | |
"content": "swh_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50170": { | |
"content": "szl_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50171": { | |
"content": "tam_Taml", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50172": { | |
"content": "taq_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50173": { | |
"content": "taq_Tfng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50174": { | |
"content": "tat_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50175": { | |
"content": "tel_Telu", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50176": { | |
"content": "tgk_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50177": { | |
"content": "tgl_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50178": { | |
"content": "tha_Thai", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50179": { | |
"content": "tir_Ethi", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50180": { | |
"content": "tpi_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50181": { | |
"content": "tsn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50182": { | |
"content": "tso_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50183": { | |
"content": "tuk_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50184": { | |
"content": "tum_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50185": { | |
"content": "tur_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50186": { | |
"content": "twi_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50187": { | |
"content": "tzm_Tfng", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50188": { | |
"content": "uig_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50189": { | |
"content": "ukr_Cyrl", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50190": { | |
"content": "umb_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50191": { | |
"content": "urd_Arab", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50192": { | |
"content": "uzn_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50193": { | |
"content": "vec_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50194": { | |
"content": "vie_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50195": { | |
"content": "war_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50196": { | |
"content": "wol_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50197": { | |
"content": "xho_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50198": { | |
"content": "ydd_Hebr", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50199": { | |
"content": "yor_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50200": { | |
"content": "yue_Hant", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50201": { | |
"content": "zho_Hans", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50202": { | |
"content": "zho_Hant", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50203": { | |
"content": "zsm_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"50204": { | |
"content": "zul_Latn", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
} | |
}, | |
"additional_special_tokens": [ | |
"ace_Arab", | |
"ace_Latn", | |
"acm_Arab", | |
"acq_Arab", | |
"aeb_Arab", | |
"afr_Latn", | |
"ajp_Arab", | |
"aka_Latn", | |
"als_Latn", | |
"amh_Ethi", | |
"apc_Arab", | |
"arb_Arab", | |
"arg_Latn", | |
"arn_Latn", | |
"ars_Arab", | |
"ary_Arab", | |
"arz_Arab", | |
"asm_Beng", | |
"ast_Latn", | |
"awa_Deva", | |
"ayr_Latn", | |
"azb_Arab", | |
"azj_Latn", | |
"bak_Cyrl", | |
"bam_Latn", | |
"ban_Latn", | |
"bel_Cyrl", | |
"bem_Latn", | |
"ben_Beng", | |
"bho_Deva", | |
"bjn_Arab", | |
"bjn_Latn", | |
"bod_Tibt", | |
"bos_Latn", | |
"bug_Latn", | |
"bul_Cyrl", | |
"cat_Latn", | |
"ceb_Latn", | |
"ces_Latn", | |
"cjk_Latn", | |
"ckb_Arab", | |
"crh_Latn", | |
"cym_Latn", | |
"dan_Latn", | |
"deu_Latn", | |
"dik_Latn", | |
"dyu_Latn", | |
"dzo_Tibt", | |
"ell_Grek", | |
"eng_Latn", | |
"epo_Latn", | |
"est_Latn", | |
"eus_Latn", | |
"ewe_Latn", | |
"fao_Latn", | |
"fij_Latn", | |
"fin_Latn", | |
"fon_Latn", | |
"fra_Latn", | |
"fur_Latn", | |
"fuv_Latn", | |
"gaz_Latn", | |
"gla_Latn", | |
"gle_Latn", | |
"glg_Latn", | |
"grn_Latn", | |
"guj_Gujr", | |
"hat_Latn", | |
"hau_Latn", | |
"heb_Hebr", | |
"hin_Deva", | |
"hne_Deva", | |
"hrv_Latn", | |
"hun_Latn", | |
"hye_Armn", | |
"ibo_Latn", | |
"ilo_Latn", | |
"ind_Latn", | |
"isl_Latn", | |
"ita_Latn", | |
"jav_Latn", | |
"jpn_Jpan", | |
"kab_Latn", | |
"kac_Latn", | |
"kam_Latn", | |
"kan_Knda", | |
"kas_Arab", | |
"kas_Deva", | |
"kat_Geor", | |
"kaz_Cyrl", | |
"kbp_Latn", | |
"kea_Latn", | |
"khk_Cyrl", | |
"khm_Khmr", | |
"kik_Latn", | |
"kin_Latn", | |
"kir_Cyrl", | |
"kmb_Latn", | |
"kmr_Latn", | |
"knc_Arab", | |
"knc_Latn", | |
"kon_Latn", | |
"kor_Hang", | |
"lao_Laoo", | |
"lij_Latn", | |
"lim_Latn", | |
"lin_Latn", | |
"lit_Latn", | |
"lmo_Latn", | |
"ltg_Latn", | |
"ltz_Latn", | |
"lua_Latn", | |
"lug_Latn", | |
"luo_Latn", | |
"lus_Latn", | |
"lvs_Latn", | |
"mag_Deva", | |
"mai_Deva", | |
"mal_Mlym", | |
"mar_Deva", | |
"min_Latn", | |
"mkd_Cyrl", | |
"mlt_Latn", | |
"mni_Beng", | |
"mos_Latn", | |
"mri_Latn", | |
"mya_Mymr", | |
"nld_Latn", | |
"nno_Latn", | |
"nob_Latn", | |
"npi_Deva", | |
"nso_Latn", | |
"nus_Latn", | |
"nya_Latn", | |
"oci_Latn", | |
"ory_Orya", | |
"pag_Latn", | |
"pan_Guru", | |
"pap_Latn", | |
"pbt_Arab", | |
"pes_Arab", | |
"plt_Latn", | |
"pol_Latn", | |
"por_Latn", | |
"prs_Arab", | |
"quy_Latn", | |
"ron_Latn", | |
"run_Latn", | |
"rus_Cyrl", | |
"sag_Latn", | |
"san_Deva", | |
"sat_Beng", | |
"scn_Latn", | |
"shn_Mymr", | |
"sin_Sinh", | |
"slk_Latn", | |
"slv_Latn", | |
"smo_Latn", | |
"sna_Latn", | |
"snd_Arab", | |
"som_Latn", | |
"sot_Latn", | |
"spa_Latn", | |
"srd_Latn", | |
"srp_Cyrl", | |
"ssw_Latn", | |
"sun_Latn", | |
"swe_Latn", | |
"swh_Latn", | |
"szl_Latn", | |
"tam_Taml", | |
"taq_Latn", | |
"taq_Tfng", | |
"tat_Cyrl", | |
"tel_Telu", | |
"tgk_Cyrl", | |
"tgl_Latn", | |
"tha_Thai", | |
"tir_Ethi", | |
"tpi_Latn", | |
"tsn_Latn", | |
"tso_Latn", | |
"tuk_Latn", | |
"tum_Latn", | |
"tur_Latn", | |
"twi_Latn", | |
"tzm_Tfng", | |
"uig_Arab", | |
"ukr_Cyrl", | |
"umb_Latn", | |
"urd_Arab", | |
"uzn_Latn", | |
"vec_Latn", | |
"vie_Latn", | |
"war_Latn", | |
"wol_Latn", | |
"xho_Latn", | |
"ydd_Hebr", | |
"yor_Latn", | |
"yue_Hant", | |
"zho_Hans", | |
"zho_Hant", | |
"zsm_Latn", | |
"zul_Latn" | |
], | |
"bos_token": "<s>", | |
"clean_up_tokenization_spaces": true, | |
"cls_token": "<s>", | |
"eos_token": "</s>", | |
"legacy_behaviour": false, | |
"mask_token": "<mask>", | |
"model_max_length": 1000000000000000019884624838656, | |
"pad_token": "<pad>", | |
"sep_token": "</s>", | |
"sp_model_kwargs": {}, | |
"src_lang": "arn_Latn", | |
"tgt_lang": null, | |
"tokenizer_class": "NllbTokenizer", | |
"unk_token": "<unk>" | |
} | |