Spaces:
Running
Running
#https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/flores200_codes.py | |
codes_as_string = '''Acehnese (Arabic script) ace_Arab | |
Acehnese (Latin script) ace_Latn | |
Mesopotamian Arabic acm_Arab | |
Ta’izzi-Adeni Arabic acq_Arab | |
Tunisian Arabic aeb_Arab | |
Afrikaans afr_Latn | |
South Levantine Arabic ajp_Arab | |
Akan aka_Latn | |
Amharic amh_Ethi | |
North Levantine Arabic apc_Arab | |
Modern Standard Arabic arb_Arab | |
Modern Standard Arabic (Romanized) arb_Latn | |
Najdi Arabic ars_Arab | |
Moroccan Arabic ary_Arab | |
Egyptian Arabic arz_Arab | |
Assamese asm_Beng | |
Asturian ast_Latn | |
Awadhi awa_Deva | |
Central Aymara ayr_Latn | |
South Azerbaijani azb_Arab | |
North Azerbaijani azj_Latn | |
Bashkir bak_Cyrl | |
Bambara bam_Latn | |
Balinese ban_Latn | |
Belarusian bel_Cyrl | |
Bemba bem_Latn | |
Bengali ben_Beng | |
Bhojpuri bho_Deva | |
Banjar (Arabic script) bjn_Arab | |
Banjar (Latin script) bjn_Latn | |
Standard Tibetan bod_Tibt | |
Bosnian bos_Latn | |
Buginese bug_Latn | |
Bulgarian bul_Cyrl | |
Catalan cat_Latn | |
Cebuano ceb_Latn | |
Czech ces_Latn | |
Chokwe cjk_Latn | |
Central Kurdish ckb_Arab | |
Crimean Tatar crh_Latn | |
Welsh cym_Latn | |
Danish dan_Latn | |
German deu_Latn | |
Southwestern Dinka dik_Latn | |
Dyula dyu_Latn | |
Dzongkha dzo_Tibt | |
Greek ell_Grek | |
English eng_Latn | |
Esperanto epo_Latn | |
Estonian est_Latn | |
Basque eus_Latn | |
Ewe ewe_Latn | |
Faroese fao_Latn | |
Fijian fij_Latn | |
Finnish fin_Latn | |
Fon fon_Latn | |
French fra_Latn | |
Friulian fur_Latn | |
Nigerian Fulfulde fuv_Latn | |
Scottish Gaelic gla_Latn | |
Irish gle_Latn | |
Galician glg_Latn | |
Guarani grn_Latn | |
Gujarati guj_Gujr | |
Haitian Creole hat_Latn | |
Hausa hau_Latn | |
Hebrew heb_Hebr | |
Hindi hin_Deva | |
Chhattisgarhi hne_Deva | |
Croatian hrv_Latn | |
Hungarian hun_Latn | |
Armenian hye_Armn | |
Igbo ibo_Latn | |
Ilocano ilo_Latn | |
Indonesian ind_Latn | |
Icelandic isl_Latn | |
Italian ita_Latn | |
Javanese jav_Latn | |
Japanese jpn_Jpan | |
Kabyle kab_Latn | |
Jingpho kac_Latn | |
Kamba kam_Latn | |
Kannada kan_Knda | |
Kashmiri (Arabic script) kas_Arab | |
Kashmiri (Devanagari script) kas_Deva | |
Georgian kat_Geor | |
Central Kanuri (Arabic script) knc_Arab | |
Central Kanuri (Latin script) knc_Latn | |
Kazakh kaz_Cyrl | |
Kabiyè kbp_Latn | |
Kabuverdianu kea_Latn | |
Khmer khm_Khmr | |
Kikuyu kik_Latn | |
Kinyarwanda kin_Latn | |
Kyrgyz kir_Cyrl | |
Kimbundu kmb_Latn | |
Northern Kurdish kmr_Latn | |
Kikongo kon_Latn | |
Korean kor_Hang | |
Lao lao_Laoo | |
Ligurian lij_Latn | |
Limburgish lim_Latn | |
Lingala lin_Latn | |
Lithuanian lit_Latn | |
Lombard lmo_Latn | |
Latgalian ltg_Latn | |
Luxembourgish ltz_Latn | |
Luba-Kasai lua_Latn | |
Ganda lug_Latn | |
Luo luo_Latn | |
Mizo lus_Latn | |
Standard Latvian lvs_Latn | |
Magahi mag_Deva | |
Maithili mai_Deva | |
Malayalam mal_Mlym | |
Marathi mar_Deva | |
Minangkabau (Arabic script) min_Arab | |
Minangkabau (Latin script) min_Latn | |
Macedonian mkd_Cyrl | |
Plateau Malagasy plt_Latn | |
Maltese mlt_Latn | |
Meitei (Bengali script) mni_Beng | |
Halh Mongolian khk_Cyrl | |
Mossi mos_Latn | |
Maori mri_Latn | |
Burmese mya_Mymr | |
Dutch nld_Latn | |
Norwegian Nynorsk nno_Latn | |
Norwegian Bokmål nob_Latn | |
Nepali npi_Deva | |
Northern Sotho nso_Latn | |
Nuer nus_Latn | |
Nyanja nya_Latn | |
Occitan oci_Latn | |
West Central Oromo gaz_Latn | |
Odia ory_Orya | |
Pangasinan pag_Latn | |
Eastern Panjabi pan_Guru | |
Papiamento pap_Latn | |
Western Persian pes_Arab | |
Polish pol_Latn | |
Portuguese por_Latn | |
Dari prs_Arab | |
Southern Pashto pbt_Arab | |
Ayacucho Quechua quy_Latn | |
Romanian ron_Latn | |
Rundi run_Latn | |
Russian rus_Cyrl | |
Sango sag_Latn | |
Sanskrit san_Deva | |
Santali sat_Olck | |
Sicilian scn_Latn | |
Shan shn_Mymr | |
Sinhala sin_Sinh | |
Slovak slk_Latn | |
Slovenian slv_Latn | |
Samoan smo_Latn | |
Shona sna_Latn | |
Sindhi snd_Arab | |
Somali som_Latn | |
Southern Sotho sot_Latn | |
Spanish spa_Latn | |
Tosk Albanian als_Latn | |
Sardinian srd_Latn | |
Serbian srp_Cyrl | |
Swati ssw_Latn | |
Sundanese sun_Latn | |
Swedish swe_Latn | |
Swahili swh_Latn | |
Silesian szl_Latn | |
Tamil tam_Taml | |
Tatar tat_Cyrl | |
Telugu tel_Telu | |
Tajik tgk_Cyrl | |
Tagalog tgl_Latn | |
Thai tha_Thai | |
Tigrinya tir_Ethi | |
Tamasheq (Latin script) taq_Latn | |
Tamasheq (Tifinagh script) taq_Tfng | |
Tok Pisin tpi_Latn | |
Tswana tsn_Latn | |
Tsonga tso_Latn | |
Turkmen tuk_Latn | |
Tumbuka tum_Latn | |
Turkish tur_Latn | |
Twi twi_Latn | |
Central Atlas Tamazight tzm_Tfng | |
Uyghur uig_Arab | |
Ukrainian ukr_Cyrl | |
Umbundu umb_Latn | |
Urdu urd_Arab | |
Northern Uzbek uzn_Latn | |
Venetian vec_Latn | |
Vietnamese vie_Latn | |
Waray war_Latn | |
Wolof wol_Latn | |
Xhosa xho_Latn | |
Eastern Yiddish ydd_Hebr | |
Yoruba yor_Latn | |
Yue Chinese yue_Hant | |
Chinese (Simplified) zho_Hans | |
Chinese (Traditional) zho_Hant | |
Standard Malay zsm_Latn | |
Zulu zul_Latn''' | |
codes_as_string = codes_as_string.split('\n') | |
flores_codes = {} | |
for code in codes_as_string: | |
lang, lang_code = code.split('\t') | |
flores_codes[lang] = lang_code | |