Upload tokenizer
4b7a1e6
verified
|
{ |
|
"<mask>": 40000, |
|
"ace_Arab": 40001, |
|
"ace_Latn": 40002, |
|
"acm_Arab": 40003, |
|
"acq_Arab": 40004, |
|
"aeb_Arab": 40005, |
|
"afr_Latn": 40006, |
|
"ajp_Arab": 40007, |
|
"aka_Latn": 40008, |
|
"als_Latn": 40009, |
|
"amh_Ethi": 40010, |
|
"apc_Arab": 40011, |
|
"arb_Arab": 40012, |
|
"arg_Latn": 40013, |
|
"arn_Latn": 40014, |
|
"ars_Arab": 40015, |
|
"ary_Arab": 40016, |
|
"arz_Arab": 40017, |
|
"asm_Beng": 40018, |
|
"ast_Latn": 40019, |
|
"awa_Deva": 40020, |
|
"ayr_Latn": 40021, |
|
"azb_Arab": 40022, |
|
"azj_Latn": 40023, |
|
"bak_Cyrl": 40024, |
|
"bam_Latn": 40025, |
|
"ban_Latn": 40026, |
|
"bel_Cyrl": 40027, |
|
"bem_Latn": 40028, |
|
"ben_Beng": 40029, |
|
"bho_Deva": 40030, |
|
"bjn_Arab": 40031, |
|
"bjn_Latn": 40032, |
|
"bod_Tibt": 40033, |
|
"bos_Latn": 40034, |
|
"bug_Latn": 40035, |
|
"bul_Cyrl": 40036, |
|
"cat_Latn": 40037, |
|
"ceb_Latn": 40038, |
|
"ces_Latn": 40039, |
|
"cjk_Latn": 40040, |
|
"ckb_Arab": 40041, |
|
"crh_Latn": 40042, |
|
"cym_Latn": 40043, |
|
"dan_Latn": 40044, |
|
"deu_Latn": 40045, |
|
"dik_Latn": 40046, |
|
"dyu_Latn": 40047, |
|
"dzo_Tibt": 40048, |
|
"ell_Grek": 40049, |
|
"eng_Latn": 40050, |
|
"epo_Latn": 40051, |
|
"est_Latn": 40052, |
|
"eus_Latn": 40053, |
|
"ewe_Latn": 40054, |
|
"fao_Latn": 40055, |
|
"fij_Latn": 40056, |
|
"fin_Latn": 40057, |
|
"fon_Latn": 40058, |
|
"fra_Latn": 40059, |
|
"fur_Latn": 40060, |
|
"fuv_Latn": 40061, |
|
"gaz_Latn": 40062, |
|
"gla_Latn": 40063, |
|
"gle_Latn": 40064, |
|
"glg_Latn": 40065, |
|
"grn_Latn": 40066, |
|
"guj_Gujr": 40067, |
|
"hat_Latn": 40068, |
|
"hau_Latn": 40069, |
|
"heb_Hebr": 40070, |
|
"hin_Deva": 40071, |
|
"hne_Deva": 40072, |
|
"hrv_Latn": 40073, |
|
"hun_Latn": 40074, |
|
"hye_Armn": 40075, |
|
"ibo_Latn": 40076, |
|
"ilo_Latn": 40077, |
|
"ind_Latn": 40078, |
|
"isl_Latn": 40079, |
|
"ita_Latn": 40080, |
|
"jav_Latn": 40081, |
|
"jpn_Jpan": 40082, |
|
"kab_Latn": 40083, |
|
"kac_Latn": 40084, |
|
"kam_Latn": 40085, |
|
"kan_Knda": 40086, |
|
"kas_Arab": 40087, |
|
"kas_Deva": 40088, |
|
"kat_Geor": 40089, |
|
"kaz_Cyrl": 40090, |
|
"kbp_Latn": 40091, |
|
"kea_Latn": 40092, |
|
"khk_Cyrl": 40093, |
|
"khm_Khmr": 40094, |
|
"kik_Latn": 40095, |
|
"kin_Latn": 40096, |
|
"kir_Cyrl": 40097, |
|
"kmb_Latn": 40098, |
|
"kmr_Latn": 40099, |
|
"knc_Arab": 40100, |
|
"knc_Latn": 40101, |
|
"kon_Latn": 40102, |
|
"kor_Hang": 40103, |
|
"lao_Laoo": 40104, |
|
"lij_Latn": 40105, |
|
"lim_Latn": 40106, |
|
"lin_Latn": 40107, |
|
"lit_Latn": 40108, |
|
"lmo_Latn": 40109, |
|
"ltg_Latn": 40110, |
|
"ltz_Latn": 40111, |
|
"lua_Latn": 40112, |
|
"lug_Latn": 40113, |
|
"luo_Latn": 40114, |
|
"lus_Latn": 40115, |
|
"lvs_Latn": 40116, |
|
"mag_Deva": 40117, |
|
"mai_Deva": 40118, |
|
"mal_Mlym": 40119, |
|
"mar_Deva": 40120, |
|
"min_Latn": 40121, |
|
"mkd_Cyrl": 40122, |
|
"mlt_Latn": 40123, |
|
"mni_Beng": 40124, |
|
"mos_Latn": 40125, |
|
"mri_Latn": 40126, |
|
"mya_Mymr": 40127, |
|
"nld_Latn": 40128, |
|
"nno_Latn": 40129, |
|
"nob_Latn": 40130, |
|
"npi_Deva": 40131, |
|
"nso_Latn": 40132, |
|
"nus_Latn": 40133, |
|
"nya_Latn": 40134, |
|
"oci_Latn": 40135, |
|
"ory_Orya": 40136, |
|
"pag_Latn": 40137, |
|
"pan_Guru": 40138, |
|
"pap_Latn": 40139, |
|
"pbt_Arab": 40140, |
|
"pes_Arab": 40141, |
|
"plt_Latn": 40142, |
|
"pol_Latn": 40143, |
|
"por_Latn": 40144, |
|
"prs_Arab": 40145, |
|
"quy_Latn": 40146, |
|
"ron_Latn": 40147, |
|
"run_Latn": 40148, |
|
"rus_Cyrl": 40149, |
|
"sag_Latn": 40150, |
|
"san_Deva": 40151, |
|
"sat_Beng": 40152, |
|
"scn_Latn": 40153, |
|
"shn_Mymr": 40154, |
|
"sin_Sinh": 40155, |
|
"slk_Latn": 40156, |
|
"slv_Latn": 40157, |
|
"smo_Latn": 40158, |
|
"sna_Latn": 40159, |
|
"snd_Arab": 40160, |
|
"som_Latn": 40161, |
|
"sot_Latn": 40162, |
|
"spa_Latn": 40163, |
|
"srd_Latn": 40164, |
|
"srp_Cyrl": 40165, |
|
"ssw_Latn": 40166, |
|
"sun_Latn": 40167, |
|
"swe_Latn": 40168, |
|
"swh_Latn": 40169, |
|
"szl_Latn": 40170, |
|
"tam_Taml": 40171, |
|
"taq_Latn": 40172, |
|
"taq_Tfng": 40173, |
|
"tat_Cyrl": 40174, |
|
"tel_Telu": 40175, |
|
"tgk_Cyrl": 40176, |
|
"tgl_Latn": 40177, |
|
"tha_Thai": 40178, |
|
"tir_Ethi": 40179, |
|
"tpi_Latn": 40180, |
|
"tsn_Latn": 40181, |
|
"tso_Latn": 40182, |
|
"tuk_Latn": 40183, |
|
"tum_Latn": 40184, |
|
"tur_Latn": 40185, |
|
"twi_Latn": 40186, |
|
"tzm_Tfng": 40187, |
|
"uig_Arab": 40188, |
|
"ukr_Cyrl": 40189, |
|
"umb_Latn": 40190, |
|
"urd_Arab": 40191, |
|
"uzn_Latn": 40192, |
|
"vec_Latn": 40193, |
|
"vie_Latn": 40194, |
|
"war_Latn": 40195, |
|
"wol_Latn": 40196, |
|
"xho_Latn": 40197, |
|
"ydd_Hebr": 40198, |
|
"yor_Latn": 40199, |
|
"yue_Hant": 40200, |
|
"zho_Hans": 40201, |
|
"zho_Hant": 40202, |
|
"zsm_Latn": 40203, |
|
"zul_Latn": 40204 |
|
} |
|
|