edchengg commited on
Commit
98d497c
1 Parent(s): ed7b79a
Files changed (3) hide show
  1. app.py +212 -1
  2. flores200_codes.py +0 -0
  3. requirements.txt +3 -0
app.py CHANGED
@@ -3,7 +3,212 @@ import torch
3
  import gradio as gr
4
  import time
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
- from flores200_codes import flores_codes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
  def load_models():
@@ -52,6 +257,12 @@ def translation(source, target, text):
52
 
53
  if __name__ == '__main__':
54
  print('\tinit models')
 
 
 
 
 
 
55
 
56
  global model_dict
57
 
 
3
  import gradio as gr
4
  import time
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
+
7
+ codes_as_string = '''Acehnese (Arabic script) ace_Arab
8
+ Acehnese (Latin script) ace_Latn
9
+ Mesopotamian Arabic acm_Arab
10
+ Ta’izzi-Adeni Arabic acq_Arab
11
+ Tunisian Arabic aeb_Arab
12
+ Afrikaans afr_Latn
13
+ South Levantine Arabic ajp_Arab
14
+ Akan aka_Latn
15
+ Amharic amh_Ethi
16
+ North Levantine Arabic apc_Arab
17
+ Modern Standard Arabic arb_Arab
18
+ Modern Standard Arabic (Romanized) arb_Latn
19
+ Najdi Arabic ars_Arab
20
+ Moroccan Arabic ary_Arab
21
+ Egyptian Arabic arz_Arab
22
+ Assamese asm_Beng
23
+ Asturian ast_Latn
24
+ Awadhi awa_Deva
25
+ Central Aymara ayr_Latn
26
+ South Azerbaijani azb_Arab
27
+ North Azerbaijani azj_Latn
28
+ Bashkir bak_Cyrl
29
+ Bambara bam_Latn
30
+ Balinese ban_Latn
31
+ Belarusian bel_Cyrl
32
+ Bemba bem_Latn
33
+ Bengali ben_Beng
34
+ Bhojpuri bho_Deva
35
+ Banjar (Arabic script) bjn_Arab
36
+ Banjar (Latin script) bjn_Latn
37
+ Standard Tibetan bod_Tibt
38
+ Bosnian bos_Latn
39
+ Buginese bug_Latn
40
+ Bulgarian bul_Cyrl
41
+ Catalan cat_Latn
42
+ Cebuano ceb_Latn
43
+ Czech ces_Latn
44
+ Chokwe cjk_Latn
45
+ Central Kurdish ckb_Arab
46
+ Crimean Tatar crh_Latn
47
+ Welsh cym_Latn
48
+ Danish dan_Latn
49
+ German deu_Latn
50
+ Southwestern Dinka dik_Latn
51
+ Dyula dyu_Latn
52
+ Dzongkha dzo_Tibt
53
+ Greek ell_Grek
54
+ English eng_Latn
55
+ Esperanto epo_Latn
56
+ Estonian est_Latn
57
+ Basque eus_Latn
58
+ Ewe ewe_Latn
59
+ Faroese fao_Latn
60
+ Fijian fij_Latn
61
+ Finnish fin_Latn
62
+ Fon fon_Latn
63
+ French fra_Latn
64
+ Friulian fur_Latn
65
+ Nigerian Fulfulde fuv_Latn
66
+ Scottish Gaelic gla_Latn
67
+ Irish gle_Latn
68
+ Galician glg_Latn
69
+ Guarani grn_Latn
70
+ Gujarati guj_Gujr
71
+ Haitian Creole hat_Latn
72
+ Hausa hau_Latn
73
+ Hebrew heb_Hebr
74
+ Hindi hin_Deva
75
+ Chhattisgarhi hne_Deva
76
+ Croatian hrv_Latn
77
+ Hungarian hun_Latn
78
+ Armenian hye_Armn
79
+ Igbo ibo_Latn
80
+ Ilocano ilo_Latn
81
+ Indonesian ind_Latn
82
+ Icelandic isl_Latn
83
+ Italian ita_Latn
84
+ Javanese jav_Latn
85
+ Japanese jpn_Jpan
86
+ Kabyle kab_Latn
87
+ Jingpho kac_Latn
88
+ Kamba kam_Latn
89
+ Kannada kan_Knda
90
+ Kashmiri (Arabic script) kas_Arab
91
+ Kashmiri (Devanagari script) kas_Deva
92
+ Georgian kat_Geor
93
+ Central Kanuri (Arabic script) knc_Arab
94
+ Central Kanuri (Latin script) knc_Latn
95
+ Kazakh kaz_Cyrl
96
+ Kabiyè kbp_Latn
97
+ Kabuverdianu kea_Latn
98
+ Khmer khm_Khmr
99
+ Kikuyu kik_Latn
100
+ Kinyarwanda kin_Latn
101
+ Kyrgyz kir_Cyrl
102
+ Kimbundu kmb_Latn
103
+ Northern Kurdish kmr_Latn
104
+ Kikongo kon_Latn
105
+ Korean kor_Hang
106
+ Lao lao_Laoo
107
+ Ligurian lij_Latn
108
+ Limburgish lim_Latn
109
+ Lingala lin_Latn
110
+ Lithuanian lit_Latn
111
+ Lombard lmo_Latn
112
+ Latgalian ltg_Latn
113
+ Luxembourgish ltz_Latn
114
+ Luba-Kasai lua_Latn
115
+ Ganda lug_Latn
116
+ Luo luo_Latn
117
+ Mizo lus_Latn
118
+ Standard Latvian lvs_Latn
119
+ Magahi mag_Deva
120
+ Maithili mai_Deva
121
+ Malayalam mal_Mlym
122
+ Marathi mar_Deva
123
+ Minangkabau (Arabic script) min_Arab
124
+ Minangkabau (Latin script) min_Latn
125
+ Macedonian mkd_Cyrl
126
+ Plateau Malagasy plt_Latn
127
+ Maltese mlt_Latn
128
+ Meitei (Bengali script) mni_Beng
129
+ Halh Mongolian khk_Cyrl
130
+ Mossi mos_Latn
131
+ Maori mri_Latn
132
+ Burmese mya_Mymr
133
+ Dutch nld_Latn
134
+ Norwegian Nynorsk nno_Latn
135
+ Norwegian Bokmål nob_Latn
136
+ Nepali npi_Deva
137
+ Northern Sotho nso_Latn
138
+ Nuer nus_Latn
139
+ Nyanja nya_Latn
140
+ Occitan oci_Latn
141
+ West Central Oromo gaz_Latn
142
+ Odia ory_Orya
143
+ Pangasinan pag_Latn
144
+ Eastern Panjabi pan_Guru
145
+ Papiamento pap_Latn
146
+ Western Persian pes_Arab
147
+ Polish pol_Latn
148
+ Portuguese por_Latn
149
+ Dari prs_Arab
150
+ Southern Pashto pbt_Arab
151
+ Ayacucho Quechua quy_Latn
152
+ Romanian ron_Latn
153
+ Rundi run_Latn
154
+ Russian rus_Cyrl
155
+ Sango sag_Latn
156
+ Sanskrit san_Deva
157
+ Santali sat_Olck
158
+ Sicilian scn_Latn
159
+ Shan shn_Mymr
160
+ Sinhala sin_Sinh
161
+ Slovak slk_Latn
162
+ Slovenian slv_Latn
163
+ Samoan smo_Latn
164
+ Shona sna_Latn
165
+ Sindhi snd_Arab
166
+ Somali som_Latn
167
+ Southern Sotho sot_Latn
168
+ Spanish spa_Latn
169
+ Tosk Albanian als_Latn
170
+ Sardinian srd_Latn
171
+ Serbian srp_Cyrl
172
+ Swati ssw_Latn
173
+ Sundanese sun_Latn
174
+ Swedish swe_Latn
175
+ Swahili swh_Latn
176
+ Silesian szl_Latn
177
+ Tamil tam_Taml
178
+ Tatar tat_Cyrl
179
+ Telugu tel_Telu
180
+ Tajik tgk_Cyrl
181
+ Tagalog tgl_Latn
182
+ Thai tha_Thai
183
+ Tigrinya tir_Ethi
184
+ Tamasheq (Latin script) taq_Latn
185
+ Tamasheq (Tifinagh script) taq_Tfng
186
+ Tok Pisin tpi_Latn
187
+ Tswana tsn_Latn
188
+ Tsonga tso_Latn
189
+ Turkmen tuk_Latn
190
+ Tumbuka tum_Latn
191
+ Turkish tur_Latn
192
+ Twi twi_Latn
193
+ Central Atlas Tamazight tzm_Tfng
194
+ Uyghur uig_Arab
195
+ Ukrainian ukr_Cyrl
196
+ Umbundu umb_Latn
197
+ Urdu urd_Arab
198
+ Northern Uzbek uzn_Latn
199
+ Venetian vec_Latn
200
+ Vietnamese vie_Latn
201
+ Waray war_Latn
202
+ Wolof wol_Latn
203
+ Xhosa xho_Latn
204
+ Eastern Yiddish ydd_Hebr
205
+ Yoruba yor_Latn
206
+ Yue Chinese yue_Hant
207
+ Chinese (Simplified) zho_Hans
208
+ Chinese (Traditional) zho_Hant
209
+ Standard Malay zsm_Latn
210
+ Zulu zul_Latn'''
211
+
212
 
213
 
214
  def load_models():
 
257
 
258
  if __name__ == '__main__':
259
  print('\tinit models')
260
+ codes_as_string = codes_as_string.split('\n')
261
+
262
+ flores_codes = {}
263
+ for code in codes_as_string:
264
+ lang, lang_code = code.split('\t')
265
+ flores_codes[lang] = lang_code
266
 
267
  global model_dict
268
 
flores200_codes.py DELETED
File without changes
requirements.txt CHANGED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ gradio
3
+ torch