Maximofn commited on
Commit
cfd5440
·
1 Parent(s): 0d050fc

Add source and target languajes

Browse files
Files changed (2) hide show
  1. lang_list.py +360 -0
  2. translatube.py +13 -2
lang_list.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Language dict
2
+ language_code_to_name = {
3
+ "afr": "Afrikaans",
4
+ "amh": "Amharic",
5
+ "arb": "Modern Standard Arabic",
6
+ "ary": "Moroccan Arabic",
7
+ "arz": "Egyptian Arabic",
8
+ "asm": "Assamese",
9
+ "ast": "Asturian",
10
+ "azj": "North Azerbaijani",
11
+ "bel": "Belarusian",
12
+ "ben": "Bengali",
13
+ "bos": "Bosnian",
14
+ "bul": "Bulgarian",
15
+ "cat": "Catalan",
16
+ "ceb": "Cebuano",
17
+ "ces": "Czech",
18
+ "ckb": "Central Kurdish",
19
+ "cmn": "Mandarin Chinese",
20
+ "cym": "Welsh",
21
+ "dan": "Danish",
22
+ "deu": "German",
23
+ "ell": "Greek",
24
+ "eng": "English",
25
+ "est": "Estonian",
26
+ "eus": "Basque",
27
+ "fin": "Finnish",
28
+ "fra": "French",
29
+ "gaz": "West Central Oromo",
30
+ "gle": "Irish",
31
+ "glg": "Galician",
32
+ "guj": "Gujarati",
33
+ "heb": "Hebrew",
34
+ "hin": "Hindi",
35
+ "hrv": "Croatian",
36
+ "hun": "Hungarian",
37
+ "hye": "Armenian",
38
+ "ibo": "Igbo",
39
+ "ind": "Indonesian",
40
+ "isl": "Icelandic",
41
+ "ita": "Italian",
42
+ "jav": "Javanese",
43
+ "jpn": "Japanese",
44
+ "kam": "Kamba",
45
+ "kan": "Kannada",
46
+ "kat": "Georgian",
47
+ "kaz": "Kazakh",
48
+ "kea": "Kabuverdianu",
49
+ "khk": "Halh Mongolian",
50
+ "khm": "Khmer",
51
+ "kir": "Kyrgyz",
52
+ "kor": "Korean",
53
+ "lao": "Lao",
54
+ "lit": "Lithuanian",
55
+ "ltz": "Luxembourgish",
56
+ "lug": "Ganda",
57
+ "luo": "Luo",
58
+ "lvs": "Standard Latvian",
59
+ "mai": "Maithili",
60
+ "mal": "Malayalam",
61
+ "mar": "Marathi",
62
+ "mkd": "Macedonian",
63
+ "mlt": "Maltese",
64
+ "mni": "Meitei",
65
+ "mya": "Burmese",
66
+ "nld": "Dutch",
67
+ "nno": "Norwegian Nynorsk",
68
+ "nob": "Norwegian Bokm\u00e5l",
69
+ "npi": "Nepali",
70
+ "nya": "Nyanja",
71
+ "oci": "Occitan",
72
+ "ory": "Odia",
73
+ "pan": "Punjabi",
74
+ "pbt": "Southern Pashto",
75
+ "pes": "Western Persian",
76
+ "pol": "Polish",
77
+ "por": "Portuguese",
78
+ "ron": "Romanian",
79
+ "rus": "Russian",
80
+ "slk": "Slovak",
81
+ "slv": "Slovenian",
82
+ "sna": "Shona",
83
+ "snd": "Sindhi",
84
+ "som": "Somali",
85
+ "spa": "Spanish",
86
+ "srp": "Serbian",
87
+ "swe": "Swedish",
88
+ "swh": "Swahili",
89
+ "tam": "Tamil",
90
+ "tel": "Telugu",
91
+ "tgk": "Tajik",
92
+ "tgl": "Tagalog",
93
+ "tha": "Thai",
94
+ "tur": "Turkish",
95
+ "ukr": "Ukrainian",
96
+ "urd": "Urdu",
97
+ "uzn": "Northern Uzbek",
98
+ "vie": "Vietnamese",
99
+ "xho": "Xhosa",
100
+ "yor": "Yoruba",
101
+ "yue": "Cantonese",
102
+ "zlm": "Colloquial Malay",
103
+ "zsm": "Standard Malay",
104
+ "zul": "Zulu",
105
+ }
106
+ original_language_code_to_name = {
107
+ "afr": "Afrikaans",
108
+ "amh": "አማርኛ",
109
+ "arb": "العربية",
110
+ "ary": "الدارجة المغربية",
111
+ "arz": "العامية المصرية",
112
+ "asm": "অসমীয়া",
113
+ "ast": "Asturianu",
114
+ "azj": "Azərbaycanca",
115
+ "bel": "Беларуская",
116
+ "ben": "বাংলা",
117
+ "bos": "Bosanski",
118
+ "bul": "Български",
119
+ "cat": "Català",
120
+ "ceb": "Cebuano",
121
+ "ces": "Čeština",
122
+ "ckb": "کوردی ناوەندی",
123
+ "cmn": "普通话",
124
+ "cym": "Cymraeg",
125
+ "dan": "Dansk",
126
+ "deu": "Deutsch",
127
+ "ell": "Ελληνικά",
128
+ "eng": "English",
129
+ "est": "Eesti",
130
+ "eus": "Euskara",
131
+ "fin": "Suomi",
132
+ "fra": "Français",
133
+ "gaz": "Afaan Oromoo",
134
+ "gle": "Gaeilge",
135
+ "glg": "Galego",
136
+ "guj": "ગુજરાતી",
137
+ "heb": "עברית",
138
+ "hin": "हिंदी",
139
+ "hrv": "Hrvatski",
140
+ "hun": "Magyar",
141
+ "hye": "Հայերեն",
142
+ "ibo": "Igbo",
143
+ "ind": "Bahasa Indonesia",
144
+ "isl": "Íslenska",
145
+ "ita": "Italiano",
146
+ "jav": "Basa Jawa",
147
+ "jpn": "日本語",
148
+ "kam": "Kikamba",
149
+ "kan": "ಕನ್ನಡ",
150
+ "kat": "ქართული",
151
+ "kaz": "Қазақ тілі",
152
+ "kea": "Kriolu di Kabuverdianu",
153
+ "khk": "Халх",
154
+ "khm": "ខ្មែរ",
155
+ "kir": "Кыргызча",
156
+ "kor": "한국어",
157
+ "lao": "ລາວ",
158
+ "lit": "Lietuvių",
159
+ "ltz": "Lëtzebuergesch",
160
+ "lug": "Luganda",
161
+ "luo": "Dholuo",
162
+ "lvs": "Latviešu",
163
+ "mai": "मैथिली",
164
+ "mal": "മലയാളം",
165
+ "mar": "मराठी",
166
+ "mkd": "Македонски",
167
+ "mlt": "Malti",
168
+ "mni": "মৈতৈলোন",
169
+ "mya": "မြန်မာ",
170
+ "nld": "Nederlands",
171
+ "nno": "Nynorsk",
172
+ "nob": "Bokmål",
173
+ "npi": "नेपाली",
174
+ "nya": "Chichewa",
175
+ "oci": "Occitan",
176
+ "ory": "ଓଡ଼ିଆ",
177
+ "pan": "ਪੰਜਾਬੀ",
178
+ "pbt": "پښتو",
179
+ "pes": "فارسی",
180
+ "pol": "Polski",
181
+ "por": "Português",
182
+ "ron": "Română",
183
+ "rus": "Русский",
184
+ "slk": "Slovenčina",
185
+ "slv": "Slovenščina",
186
+ "sna": "ChiShona",
187
+ "snd": "سنڌي",
188
+ "som": "Soomaali",
189
+ "spa": "Español",
190
+ "srp": "Српски",
191
+ "swe": "Svenska",
192
+ "swh": "Kiswahili",
193
+ "tam": "தமிழ்",
194
+ "tel": "తెలుగు",
195
+ "tgk": "Тоҷикӣ",
196
+ "tgl": "Tagalog",
197
+ "tha": "ไทย",
198
+ "tur": "Türkçe",
199
+ "ukr": "Українська",
200
+ "urd": "اردو",
201
+ "uzn": "O‘zbekcha",
202
+ "vie": "Tiếng Việt",
203
+ "xho": "IsiXhosa",
204
+ "yor": "Yorùbá",
205
+ "yue": "粤语",
206
+ "zlm": "Bahasa Melayu",
207
+ "zsm": "Bahasa Melayu",
208
+ "zul": "IsiZulu",
209
+ }
210
+ LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()}
211
+ ORIGINAL_LANGUAGE_NAME_TO_CODE = {v: k for k, v in original_language_code_to_name.items()}
212
+
213
+ # Source langs: S2ST / S2TT / ASR don't need source lang
214
+ # T2TT / T2ST use this
215
+ text_source_language_codes = [
216
+ "afr",
217
+ "amh",
218
+ "arb",
219
+ "ary",
220
+ "arz",
221
+ "asm",
222
+ "azj",
223
+ "bel",
224
+ "ben",
225
+ "bos",
226
+ "bul",
227
+ "cat",
228
+ "ceb",
229
+ "ces",
230
+ "ckb",
231
+ "cmn",
232
+ "cym",
233
+ "dan",
234
+ "deu",
235
+ "ell",
236
+ "eng",
237
+ "est",
238
+ "eus",
239
+ "fin",
240
+ "fra",
241
+ "gaz",
242
+ "gle",
243
+ "glg",
244
+ "guj",
245
+ "heb",
246
+ "hin",
247
+ "hrv",
248
+ "hun",
249
+ "hye",
250
+ "ibo",
251
+ "ind",
252
+ "isl",
253
+ "ita",
254
+ "jav",
255
+ "jpn",
256
+ "kan",
257
+ "kat",
258
+ "kaz",
259
+ "khk",
260
+ "khm",
261
+ "kir",
262
+ "kor",
263
+ "lao",
264
+ "lit",
265
+ "lug",
266
+ "luo",
267
+ "lvs",
268
+ "mai",
269
+ "mal",
270
+ "mar",
271
+ "mkd",
272
+ "mlt",
273
+ "mni",
274
+ "mya",
275
+ "nld",
276
+ "nno",
277
+ "nob",
278
+ "npi",
279
+ "nya",
280
+ "ory",
281
+ "pan",
282
+ "pbt",
283
+ "pes",
284
+ "pol",
285
+ "por",
286
+ "ron",
287
+ "rus",
288
+ "slk",
289
+ "slv",
290
+ "sna",
291
+ "snd",
292
+ "som",
293
+ "spa",
294
+ "srp",
295
+ "swe",
296
+ "swh",
297
+ "tam",
298
+ "tel",
299
+ "tgk",
300
+ "tgl",
301
+ "tha",
302
+ "tur",
303
+ "ukr",
304
+ "urd",
305
+ "uzn",
306
+ "vie",
307
+ "yor",
308
+ "yue",
309
+ "zsm",
310
+ "zul",
311
+ ]
312
+ TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes])
313
+
314
+ # Target langs:
315
+ # S2ST / T2ST
316
+ s2st_target_language_codes = [
317
+ "eng",
318
+ "arb",
319
+ "ben",
320
+ "cat",
321
+ "ces",
322
+ "cmn",
323
+ "cym",
324
+ "dan",
325
+ "deu",
326
+ "est",
327
+ "fin",
328
+ "fra",
329
+ "hin",
330
+ "ind",
331
+ "ita",
332
+ "jpn",
333
+ "kor",
334
+ "mlt",
335
+ "nld",
336
+ "pes",
337
+ "pol",
338
+ "por",
339
+ "ron",
340
+ "rus",
341
+ "slk",
342
+ "spa",
343
+ "swe",
344
+ "swh",
345
+ "tel",
346
+ "tgl",
347
+ "tha",
348
+ "tur",
349
+ "ukr",
350
+ "urd",
351
+ "uzn",
352
+ "vie",
353
+ ]
354
+ S2ST_TARGET_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in s2st_target_language_codes])
355
+ S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES = sorted([original_language_code_to_name[code] for code in s2st_target_language_codes])
356
+
357
+ # S2TT / ASR
358
+ S2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
359
+ # T2TT
360
+ T2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
translatube.py CHANGED
@@ -4,6 +4,7 @@ import urllib.parse as urlparse
4
  from pytube import YouTube
5
  import re
6
  import subprocess
 
7
 
8
  YOUTUBE = "youtube"
9
  TWITCH = "twitch"
@@ -31,11 +32,15 @@ def is_valid_url(url):
31
  button = gr.Button(size="sm", value="translate", min_width="10px", scale=0, visible=True)
32
  original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True)
33
  translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=True)
 
 
34
  if "youtube" in url.lower() or "youtu.be" in url.lower():
35
  thumbnail = get_youtube_video_id(url)
36
  if thumbnail:
37
  return (
38
  gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
 
 
39
  button,
40
  gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
41
  original_audio,
@@ -44,6 +49,8 @@ def is_valid_url(url):
44
  elif "twitch" in url.lower() or "twitch.tv" in url.lower():
45
  return (
46
  gr.Image(value="assets/twitch.webp", visible=True),
 
 
47
  button,
48
  gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
49
  original_audio,
@@ -90,11 +97,15 @@ with gr.Blocks() as demo:
90
  visible = False
91
  with gr.Row(equal_height=False):
92
  image = gr.Image(visible=visible, scale=1)
93
- translate_button = gr.Button(size="sm", value="translate", min_width="10px", scale=0, visible=visible)
 
 
 
 
94
 
95
  original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible)
96
  translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
97
- url_textbox.change(fn=is_valid_url, inputs=url_textbox, outputs=[image, translate_button, stream_page, original_audio, translated_audio])
98
  translate_button.click(fn=get_audio_from_video, inputs=[url_textbox, stream_page], outputs=original_audio)
99
 
100
  demo.launch()
 
4
  from pytube import YouTube
5
  import re
6
  import subprocess
7
+ from lang_list import ORIGINAL_LANGUAGE_NAME_TO_CODE, S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES
8
 
9
  YOUTUBE = "youtube"
10
  TWITCH = "twitch"
 
32
  button = gr.Button(size="sm", value="translate", min_width="10px", scale=0, visible=True)
33
  original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True)
34
  translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=True)
35
+ source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=ORIGINAL_LANGUAGE_NAME_TO_CODE, scale=1, interactive=True)
36
+ target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES, scale=1, interactive=True)
37
  if "youtube" in url.lower() or "youtu.be" in url.lower():
38
  thumbnail = get_youtube_video_id(url)
39
  if thumbnail:
40
  return (
41
  gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
42
+ source_languaje,
43
+ target_languaje,
44
  button,
45
  gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
46
  original_audio,
 
49
  elif "twitch" in url.lower() or "twitch.tv" in url.lower():
50
  return (
51
  gr.Image(value="assets/twitch.webp", visible=True),
52
+ source_languaje,
53
+ target_languaje,
54
  button,
55
  gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
56
  original_audio,
 
97
  visible = False
98
  with gr.Row(equal_height=False):
99
  image = gr.Image(visible=visible, scale=1)
100
+ with gr.Column():
101
+ with gr.Row():
102
+ source_languaje = gr.Dropdown(visible=visible, label="Source languaje", choices=ORIGINAL_LANGUAGE_NAME_TO_CODE, scale=1, interactive=True)
103
+ target_languaje = gr.Dropdown(visible=visible, label="Target languaje", choices=S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES, scale=1, interactive=True)
104
+ translate_button = gr.Button(size="lg", value="translate", min_width="10px", visible=visible)
105
 
106
  original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible)
107
  translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
108
+ url_textbox.change(fn=is_valid_url, inputs=url_textbox, outputs=[image, source_languaje, target_languaje, translate_button, stream_page, original_audio, translated_audio])
109
  translate_button.click(fn=get_audio_from_video, inputs=[url_textbox, stream_page], outputs=original_audio)
110
 
111
  demo.launch()