Lianglan commited on
Commit
ce93184
1 Parent(s): ac2434b
Files changed (6) hide show
  1. app.py +54 -0
  2. langs.py +8 -0
  3. langs_all.py +204 -0
  4. requirements.txt +3 -0
  5. ui.cpython-310.pyc +0 -0
  6. ui.py +12 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
+ import torch
4
+ from ui import title, description, examples
5
+ from langs import LANGS
6
+ #from langs_all import LANGS ##for 200+ languages
7
+
8
+ TASK = "translation"
9
+ CKPT = "facebook/nllb-200-distilled-600M"
10
+
11
+ model = AutoModelForSeq2SeqLM.from_pretrained(CKPT)
12
+ tokenizer = AutoTokenizer.from_pretrained(CKPT)
13
+
14
+ # device = 0 if torch.cuda.is_available() else -1
15
+
16
+
17
+ def translate(text, src_lang, tgt_lang, max_length=512):
18
+ """
19
+ Translate the text from source lang to target lang
20
+ """
21
+ translation_pipeline = pipeline(TASK,
22
+ model=model,
23
+ tokenizer=tokenizer,
24
+ src_lang=src_lang,
25
+ tgt_lang=tgt_lang,
26
+ max_length=max_length)
27
+
28
+ # translation_pipeline = pipeline(TASK,
29
+ # model=model,
30
+ # tokenizer=tokenizer,
31
+ # src_lang=src_lang,
32
+ # tgt_lang=tgt_lang,
33
+ # max_length=max_length,
34
+ # device=device)
35
+
36
+ result = translation_pipeline(text)
37
+ return result[0]['translation_text']
38
+
39
+
40
+ gr.Interface(
41
+ translate,
42
+ [
43
+ gr.components.Textbox(label="Text"),
44
+ gr.components.Dropdown(label="Source Language", choices=LANGS),
45
+ gr.components.Dropdown(label="Target Language", choices=LANGS),
46
+ gr.components.Slider(8, 512, value=512, step=8, label="Max Length")
47
+ ],
48
+ ["text"],
49
+ examples=examples,
50
+ # article=article,
51
+ cache_examples=False,
52
+ title=title,
53
+ description=description
54
+ ).launch()
langs.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ LANGS = [
2
+ "bod_Tibt",
3
+ "khk_Cyrl",
4
+ "uig_Arab",
5
+ "yue_Hant",
6
+ "zho_Hans",
7
+ "zho_Hant"
8
+ ]
langs_all.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LANGS = [
2
+ "ace_Arab",
3
+ "ace_Latn",
4
+ "acm_Arab",
5
+ "acq_Arab",
6
+ "aeb_Arab",
7
+ "afr_Latn",
8
+ "ajp_Arab",
9
+ "aka_Latn",
10
+ "amh_Ethi",
11
+ "apc_Arab",
12
+ "arb_Arab",
13
+ "ars_Arab",
14
+ "ary_Arab",
15
+ "arz_Arab",
16
+ "asm_Beng",
17
+ "ast_Latn",
18
+ "awa_Deva",
19
+ "ayr_Latn",
20
+ "azb_Arab",
21
+ "azj_Latn",
22
+ "bak_Cyrl",
23
+ "bam_Latn",
24
+ "ban_Latn",
25
+ "bel_Cyrl",
26
+ "bem_Latn",
27
+ "ben_Beng",
28
+ "bho_Deva",
29
+ "bjn_Arab",
30
+ "bjn_Latn",
31
+ "bod_Tibt",
32
+ "bos_Latn",
33
+ "bug_Latn",
34
+ "bul_Cyrl",
35
+ "cat_Latn",
36
+ "ceb_Latn",
37
+ "ces_Latn",
38
+ "cjk_Latn",
39
+ "ckb_Arab",
40
+ "crh_Latn",
41
+ "cym_Latn",
42
+ "dan_Latn",
43
+ "deu_Latn",
44
+ "dik_Latn",
45
+ "dyu_Latn",
46
+ "dzo_Tibt",
47
+ "ell_Grek",
48
+ "eng_Latn",
49
+ "epo_Latn",
50
+ "est_Latn",
51
+ "eus_Latn",
52
+ "ewe_Latn",
53
+ "fao_Latn",
54
+ "pes_Arab",
55
+ "fij_Latn",
56
+ "fin_Latn",
57
+ "fon_Latn",
58
+ "fra_Latn",
59
+ "fur_Latn",
60
+ "fuv_Latn",
61
+ "gla_Latn",
62
+ "gle_Latn",
63
+ "glg_Latn",
64
+ "grn_Latn",
65
+ "guj_Gujr",
66
+ "hat_Latn",
67
+ "hau_Latn",
68
+ "heb_Hebr",
69
+ "hin_Deva",
70
+ "hne_Deva",
71
+ "hrv_Latn",
72
+ "hun_Latn",
73
+ "hye_Armn",
74
+ "ibo_Latn",
75
+ "ilo_Latn",
76
+ "ind_Latn",
77
+ "isl_Latn",
78
+ "ita_Latn",
79
+ "jav_Latn",
80
+ "jpn_Jpan",
81
+ "kab_Latn",
82
+ "kac_Latn",
83
+ "kam_Latn",
84
+ "kan_Knda",
85
+ "kas_Arab",
86
+ "kas_Deva",
87
+ "kat_Geor",
88
+ "knc_Arab",
89
+ "knc_Latn",
90
+ "kaz_Cyrl",
91
+ "kbp_Latn",
92
+ "kea_Latn",
93
+ "khm_Khmr",
94
+ "kik_Latn",
95
+ "kin_Latn",
96
+ "kir_Cyrl",
97
+ "kmb_Latn",
98
+ "kon_Latn",
99
+ "kor_Hang",
100
+ "kmr_Latn",
101
+ "lao_Laoo",
102
+ "lvs_Latn",
103
+ "lij_Latn",
104
+ "lim_Latn",
105
+ "lin_Latn",
106
+ "lit_Latn",
107
+ "lmo_Latn",
108
+ "ltg_Latn",
109
+ "ltz_Latn",
110
+ "lua_Latn",
111
+ "lug_Latn",
112
+ "luo_Latn",
113
+ "lus_Latn",
114
+ "mag_Deva",
115
+ "mai_Deva",
116
+ "mal_Mlym",
117
+ "mar_Deva",
118
+ "min_Latn",
119
+ "mkd_Cyrl",
120
+ "plt_Latn",
121
+ "mlt_Latn",
122
+ "mni_Beng",
123
+ "khk_Cyrl",
124
+ "mos_Latn",
125
+ "mri_Latn",
126
+ "zsm_Latn",
127
+ "mya_Mymr",
128
+ "nld_Latn",
129
+ "nno_Latn",
130
+ "nob_Latn",
131
+ "npi_Deva",
132
+ "nso_Latn",
133
+ "nus_Latn",
134
+ "nya_Latn",
135
+ "oci_Latn",
136
+ "gaz_Latn",
137
+ "ory_Orya",
138
+ "pag_Latn",
139
+ "pan_Guru",
140
+ "pap_Latn",
141
+ "pol_Latn",
142
+ "por_Latn",
143
+ "prs_Arab",
144
+ "pbt_Arab",
145
+ "quy_Latn",
146
+ "ron_Latn",
147
+ "run_Latn",
148
+ "rus_Cyrl",
149
+ "sag_Latn",
150
+ "san_Deva",
151
+ "sat_Beng",
152
+ "scn_Latn",
153
+ "shn_Mymr",
154
+ "sin_Sinh",
155
+ "slk_Latn",
156
+ "slv_Latn",
157
+ "smo_Latn",
158
+ "sna_Latn",
159
+ "snd_Arab",
160
+ "som_Latn",
161
+ "sot_Latn",
162
+ "spa_Latn",
163
+ "als_Latn",
164
+ "srd_Latn",
165
+ "srp_Cyrl",
166
+ "ssw_Latn",
167
+ "sun_Latn",
168
+ "swe_Latn",
169
+ "swh_Latn",
170
+ "szl_Latn",
171
+ "tam_Taml",
172
+ "tat_Cyrl",
173
+ "tel_Telu",
174
+ "tgk_Cyrl",
175
+ "tgl_Latn",
176
+ "tha_Thai",
177
+ "tir_Ethi",
178
+ "taq_Latn",
179
+ "taq_Tfng",
180
+ "tpi_Latn",
181
+ "tsn_Latn",
182
+ "tso_Latn",
183
+ "tuk_Latn",
184
+ "tum_Latn",
185
+ "tur_Latn",
186
+ "twi_Latn",
187
+ "tzm_Tfng",
188
+ "uig_Arab",
189
+ "ukr_Cyrl",
190
+ "umb_Latn",
191
+ "urd_Arab",
192
+ "uzn_Latn",
193
+ "vec_Latn",
194
+ "vie_Latn",
195
+ "war_Latn",
196
+ "wol_Latn",
197
+ "xho_Latn",
198
+ "ydd_Hebr",
199
+ "yor_Latn",
200
+ "yue_Hant",
201
+ "zho_Hans",
202
+ "zho_Hant",
203
+ "zul_Latn"
204
+ ]
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ gradio
3
+ torch
ui.cpython-310.pyc ADDED
Binary file (672 Bytes). View file
 
ui.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ title = "NLLB-200 Traslation Demo"
2
+ description = """
3
+ <p>
4
+ <center>
5
+ Translator using <a href='https://ai.facebook.com/research/no-language-left-behind/' target='_blank'>Facebook's NLLB</a> models.
6
+ Codes Using <a href='https://github.com/facebookresearch/fairseq/tree/nllb' target=blank'>Facebook's fairseq NLLB</a>.
7
+ Demo is running on CPU.
8
+ </center>
9
+ </p>
10
+ """
11
+
12
+ examples = [["我非常喜欢这个地方", "zho_Hans", "yue_Hant", 512]]