nllb-200-distilled-400M-v1 / tokenizer_config.json
igorktech's picture
Upload tokenizer
dfa49b6 verified
raw
history blame contribute delete
No virus
40.1 kB
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50000": {
"content": "<mask>",
"lstrip": true,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"50001": {
"content": "ace_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50002": {
"content": "ace_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50003": {
"content": "acm_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50004": {
"content": "acq_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50005": {
"content": "aeb_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50006": {
"content": "afr_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50007": {
"content": "ajp_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50008": {
"content": "aka_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50009": {
"content": "als_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50010": {
"content": "amh_Ethi",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50011": {
"content": "apc_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50012": {
"content": "arb_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50013": {
"content": "arg_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50014": {
"content": "arn_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50015": {
"content": "ars_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50016": {
"content": "ary_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50017": {
"content": "arz_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50018": {
"content": "asm_Beng",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50019": {
"content": "ast_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50020": {
"content": "awa_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50021": {
"content": "ayr_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50022": {
"content": "azb_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50023": {
"content": "azj_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50024": {
"content": "bak_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50025": {
"content": "bam_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50026": {
"content": "ban_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50027": {
"content": "bel_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50028": {
"content": "bem_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50029": {
"content": "ben_Beng",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50030": {
"content": "bho_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50031": {
"content": "bjn_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50032": {
"content": "bjn_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50033": {
"content": "bod_Tibt",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50034": {
"content": "bos_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50035": {
"content": "bug_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50036": {
"content": "bul_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50037": {
"content": "cat_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50038": {
"content": "ceb_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50039": {
"content": "ces_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50040": {
"content": "cjk_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50041": {
"content": "ckb_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50042": {
"content": "crh_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50043": {
"content": "cym_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50044": {
"content": "dan_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50045": {
"content": "deu_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50046": {
"content": "dik_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50047": {
"content": "dyu_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50048": {
"content": "dzo_Tibt",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50049": {
"content": "ell_Grek",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50050": {
"content": "eng_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50051": {
"content": "epo_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50052": {
"content": "est_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50053": {
"content": "eus_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50054": {
"content": "ewe_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50055": {
"content": "fao_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50056": {
"content": "fij_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50057": {
"content": "fin_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50058": {
"content": "fon_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50059": {
"content": "fra_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50060": {
"content": "fur_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50061": {
"content": "fuv_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50062": {
"content": "gaz_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50063": {
"content": "gla_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50064": {
"content": "gle_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50065": {
"content": "glg_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50066": {
"content": "grn_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50067": {
"content": "guj_Gujr",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50068": {
"content": "hat_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50069": {
"content": "hau_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50070": {
"content": "heb_Hebr",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50071": {
"content": "hin_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50072": {
"content": "hne_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50073": {
"content": "hrv_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50074": {
"content": "hun_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50075": {
"content": "hye_Armn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50076": {
"content": "ibo_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50077": {
"content": "ilo_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50078": {
"content": "ind_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50079": {
"content": "isl_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50080": {
"content": "ita_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50081": {
"content": "jav_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50082": {
"content": "jpn_Jpan",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50083": {
"content": "kab_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50084": {
"content": "kac_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50085": {
"content": "kam_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50086": {
"content": "kan_Knda",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50087": {
"content": "kas_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50088": {
"content": "kas_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50089": {
"content": "kat_Geor",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50090": {
"content": "kaz_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50091": {
"content": "kbp_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50092": {
"content": "kea_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50093": {
"content": "khk_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50094": {
"content": "khm_Khmr",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50095": {
"content": "kik_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50096": {
"content": "kin_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50097": {
"content": "kir_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50098": {
"content": "kmb_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50099": {
"content": "kmr_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50100": {
"content": "knc_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50101": {
"content": "knc_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50102": {
"content": "kon_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50103": {
"content": "kor_Hang",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50104": {
"content": "lao_Laoo",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50105": {
"content": "lij_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50106": {
"content": "lim_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50107": {
"content": "lin_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50108": {
"content": "lit_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50109": {
"content": "lmo_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50110": {
"content": "ltg_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50111": {
"content": "ltz_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50112": {
"content": "lua_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50113": {
"content": "lug_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50114": {
"content": "luo_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50115": {
"content": "lus_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50116": {
"content": "lvs_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50117": {
"content": "mag_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50118": {
"content": "mai_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50119": {
"content": "mal_Mlym",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50120": {
"content": "mar_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50121": {
"content": "min_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50122": {
"content": "mkd_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50123": {
"content": "mlt_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50124": {
"content": "mni_Beng",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50125": {
"content": "mos_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50126": {
"content": "mri_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50127": {
"content": "mya_Mymr",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50128": {
"content": "nld_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50129": {
"content": "nno_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50130": {
"content": "nob_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50131": {
"content": "npi_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50132": {
"content": "nso_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50133": {
"content": "nus_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50134": {
"content": "nya_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50135": {
"content": "oci_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50136": {
"content": "ory_Orya",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50137": {
"content": "pag_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50138": {
"content": "pan_Guru",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50139": {
"content": "pap_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50140": {
"content": "pbt_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50141": {
"content": "pes_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50142": {
"content": "plt_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50143": {
"content": "pol_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50144": {
"content": "por_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50145": {
"content": "prs_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50146": {
"content": "quy_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50147": {
"content": "ron_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50148": {
"content": "run_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50149": {
"content": "rus_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50150": {
"content": "sag_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50151": {
"content": "san_Deva",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50152": {
"content": "sat_Beng",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50153": {
"content": "scn_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50154": {
"content": "shn_Mymr",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50155": {
"content": "sin_Sinh",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50156": {
"content": "slk_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50157": {
"content": "slv_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50158": {
"content": "smo_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50159": {
"content": "sna_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50160": {
"content": "snd_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50161": {
"content": "som_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50162": {
"content": "sot_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50163": {
"content": "spa_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50164": {
"content": "srd_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50165": {
"content": "srp_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50166": {
"content": "ssw_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50167": {
"content": "sun_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50168": {
"content": "swe_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50169": {
"content": "swh_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50170": {
"content": "szl_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50171": {
"content": "tam_Taml",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50172": {
"content": "taq_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50173": {
"content": "taq_Tfng",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50174": {
"content": "tat_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50175": {
"content": "tel_Telu",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50176": {
"content": "tgk_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50177": {
"content": "tgl_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50178": {
"content": "tha_Thai",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50179": {
"content": "tir_Ethi",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50180": {
"content": "tpi_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50181": {
"content": "tsn_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50182": {
"content": "tso_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50183": {
"content": "tuk_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50184": {
"content": "tum_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50185": {
"content": "tur_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50186": {
"content": "twi_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50187": {
"content": "tzm_Tfng",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50188": {
"content": "uig_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50189": {
"content": "ukr_Cyrl",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50190": {
"content": "umb_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50191": {
"content": "urd_Arab",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50192": {
"content": "uzn_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50193": {
"content": "vec_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50194": {
"content": "vie_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50195": {
"content": "war_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50196": {
"content": "wol_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50197": {
"content": "xho_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50198": {
"content": "ydd_Hebr",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50199": {
"content": "yor_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50200": {
"content": "yue_Hant",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50201": {
"content": "zho_Hans",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50202": {
"content": "zho_Hant",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50203": {
"content": "zsm_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50204": {
"content": "zul_Latn",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"ace_Arab",
"ace_Latn",
"acm_Arab",
"acq_Arab",
"aeb_Arab",
"afr_Latn",
"ajp_Arab",
"aka_Latn",
"als_Latn",
"amh_Ethi",
"apc_Arab",
"arb_Arab",
"arg_Latn",
"arn_Latn",
"ars_Arab",
"ary_Arab",
"arz_Arab",
"asm_Beng",
"ast_Latn",
"awa_Deva",
"ayr_Latn",
"azb_Arab",
"azj_Latn",
"bak_Cyrl",
"bam_Latn",
"ban_Latn",
"bel_Cyrl",
"bem_Latn",
"ben_Beng",
"bho_Deva",
"bjn_Arab",
"bjn_Latn",
"bod_Tibt",
"bos_Latn",
"bug_Latn",
"bul_Cyrl",
"cat_Latn",
"ceb_Latn",
"ces_Latn",
"cjk_Latn",
"ckb_Arab",
"crh_Latn",
"cym_Latn",
"dan_Latn",
"deu_Latn",
"dik_Latn",
"dyu_Latn",
"dzo_Tibt",
"ell_Grek",
"eng_Latn",
"epo_Latn",
"est_Latn",
"eus_Latn",
"ewe_Latn",
"fao_Latn",
"fij_Latn",
"fin_Latn",
"fon_Latn",
"fra_Latn",
"fur_Latn",
"fuv_Latn",
"gaz_Latn",
"gla_Latn",
"gle_Latn",
"glg_Latn",
"grn_Latn",
"guj_Gujr",
"hat_Latn",
"hau_Latn",
"heb_Hebr",
"hin_Deva",
"hne_Deva",
"hrv_Latn",
"hun_Latn",
"hye_Armn",
"ibo_Latn",
"ilo_Latn",
"ind_Latn",
"isl_Latn",
"ita_Latn",
"jav_Latn",
"jpn_Jpan",
"kab_Latn",
"kac_Latn",
"kam_Latn",
"kan_Knda",
"kas_Arab",
"kas_Deva",
"kat_Geor",
"kaz_Cyrl",
"kbp_Latn",
"kea_Latn",
"khk_Cyrl",
"khm_Khmr",
"kik_Latn",
"kin_Latn",
"kir_Cyrl",
"kmb_Latn",
"kmr_Latn",
"knc_Arab",
"knc_Latn",
"kon_Latn",
"kor_Hang",
"lao_Laoo",
"lij_Latn",
"lim_Latn",
"lin_Latn",
"lit_Latn",
"lmo_Latn",
"ltg_Latn",
"ltz_Latn",
"lua_Latn",
"lug_Latn",
"luo_Latn",
"lus_Latn",
"lvs_Latn",
"mag_Deva",
"mai_Deva",
"mal_Mlym",
"mar_Deva",
"min_Latn",
"mkd_Cyrl",
"mlt_Latn",
"mni_Beng",
"mos_Latn",
"mri_Latn",
"mya_Mymr",
"nld_Latn",
"nno_Latn",
"nob_Latn",
"npi_Deva",
"nso_Latn",
"nus_Latn",
"nya_Latn",
"oci_Latn",
"ory_Orya",
"pag_Latn",
"pan_Guru",
"pap_Latn",
"pbt_Arab",
"pes_Arab",
"plt_Latn",
"pol_Latn",
"por_Latn",
"prs_Arab",
"quy_Latn",
"ron_Latn",
"run_Latn",
"rus_Cyrl",
"sag_Latn",
"san_Deva",
"sat_Beng",
"scn_Latn",
"shn_Mymr",
"sin_Sinh",
"slk_Latn",
"slv_Latn",
"smo_Latn",
"sna_Latn",
"snd_Arab",
"som_Latn",
"sot_Latn",
"spa_Latn",
"srd_Latn",
"srp_Cyrl",
"ssw_Latn",
"sun_Latn",
"swe_Latn",
"swh_Latn",
"szl_Latn",
"tam_Taml",
"taq_Latn",
"taq_Tfng",
"tat_Cyrl",
"tel_Telu",
"tgk_Cyrl",
"tgl_Latn",
"tha_Thai",
"tir_Ethi",
"tpi_Latn",
"tsn_Latn",
"tso_Latn",
"tuk_Latn",
"tum_Latn",
"tur_Latn",
"twi_Latn",
"tzm_Tfng",
"uig_Arab",
"ukr_Cyrl",
"umb_Latn",
"urd_Arab",
"uzn_Latn",
"vec_Latn",
"vie_Latn",
"war_Latn",
"wol_Latn",
"xho_Latn",
"ydd_Hebr",
"yor_Latn",
"yue_Hant",
"zho_Hans",
"zho_Hant",
"zsm_Latn",
"zul_Latn"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": true,
"cls_token": "<s>",
"eos_token": "</s>",
"legacy_behaviour": false,
"mask_token": "<mask>",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<pad>",
"sep_token": "</s>",
"sp_model_kwargs": {},
"src_lang": "eng_Latn",
"tgt_lang": "eng_Latn",
"tokenizer_class": "NllbTokenizer",
"unk_token": "<unk>"
}