Maximofn commited on
Commit
8240f8e
1 Parent(s): b487977

Uncomment all code

Browse files
Files changed (1) hide show
  1. app.py +489 -490
app.py CHANGED
@@ -1,509 +1,508 @@
1
  import gradio as gr
2
  import argparse
3
  import sys
4
- # import os
5
- # import torch
6
- # from time import sleep
7
- # from tqdm import tqdm
8
- # from lang_list import union_language_dict
9
- # # import pyperclip
10
- # from pytube import YouTube
11
- # import re
12
-
13
- # NUMBER = 100
14
- # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
- # # DEVICE = "cpu"
16
- # DOWNLOAD = True
17
- # SLICE_AUDIO = False
18
- # SEPARE_VOCALS = False
19
- # TRANSCRIBE_AUDIO = False
20
- # CONCATENATE_TRANSCRIPTIONS = False
21
- # TRANSLATE_TRANSCRIPTIONS = False
22
- # ADD_SUBTITLES_TO_VIDEO = False
23
- # REMOVE_FILES = False
24
- # REMOVE_ALL = False
25
- # if SEPARE_VOCALS:
26
- # SECONDS = 150
27
- # else:
28
- # SECONDS = 300
29
-
30
- # YOUTUBE = "youtube"
31
- # TWITCH = "twitch"
32
- # ERROR = "error"
33
-
34
- # language_dict = union_language_dict()
35
-
36
- # def subtify_no_ui():
37
- # number_works = 7
38
- # progress_bar = tqdm(total=number_works, desc="Subtify")
39
-
40
- # ################## Download video and audio ##################
41
- # if DOWNLOAD:
42
- # print('*'*NUMBER)
43
- # # url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas
44
- # # url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos
45
- # # url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos
46
- # url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos
47
- # # url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos
48
- # # url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short
49
- # print(f"Downloading video and audio from {url}")
50
- # python_file = "download.py"
51
- # command = f"python {python_file} {url}"
52
- # os.system(command)
53
- # sleep(5)
54
- # print('*'*NUMBER)
55
- # print("\n\n")
56
- # progress_bar.update(1)
57
-
58
- # ################## Slice audio ##################
59
- # if SLICE_AUDIO:
60
- # print('*'*NUMBER)
61
- # print("Slicing audio")
62
- # python_file = "slice_audio.py"
63
- # audio = "audios/download_audio.mp3"
64
- # command = f"python {python_file} {audio} {SECONDS}"
65
- # os.system(command)
66
- # print('*'*NUMBER)
67
- # print("\n\n")
68
- # progress_bar.update(1)
69
-
70
- # ################## Get vocals ##################
71
- # chunck_file = "chunks/output_files.txt"
72
- # print('*'*NUMBER)
73
- # if SEPARE_VOCALS:
74
- # print("Get vocals")
75
- # python_file = "separe_vocals.py"
76
- # command = f"python {python_file} {chunck_file} {DEVICE}"
77
- # os.system(command)
78
- # if REMOVE_FILES:
79
- # with open(chunck_file, 'r') as f:
80
- # files = f.read().splitlines()
81
- # for file in files:
82
- # command = f"rm {file}"
83
- # os.system(command)
84
- # else:
85
- # print("Moving chunks")
86
- # folder_vocals = "vocals"
87
- # folder_chunck = "chunks"
88
- # with open(f"{folder_vocals}/speakers.txt", 'w') as f:
89
- # f.write(str(0))
90
- # if REMOVE_FILES:
91
- # command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
92
- # os.system(command)
93
- # else:
94
- # command = f"cp {folder_chunck}/*.mp3 {folder_vocals}/"
95
- # os.system(command)
96
- # print('*'*NUMBER)
97
- # print("\n\n")
98
- # progress_bar.update(1)
99
-
100
- # ################# Transcript vocals ##################
101
- # speakers_file = "vocals/speakers.txt"
102
- # if TRANSCRIBE_AUDIO:
103
- # print('*'*NUMBER)
104
- # print("Transcript vocals")
105
- # python_file = "transcribe.py"
106
- # language = "English"
107
- # command = f"python {python_file} {chunck_file} {language} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
108
- # os.system(command)
109
- # if REMOVE_FILES:
110
- # vocals_folder = "vocals"
111
- # with open(chunck_file, 'r') as f:
112
- # files = f.read().splitlines()
113
- # with open(speakers_file, 'r') as f:
114
- # speakers = f.read().splitlines()
115
- # speakers = int(speakers[0])
116
- # for file in files:
117
- # if speakers > 0:
118
- # vocals_extension = "wav"
119
- # for i in range(speakers):
120
- # file_name, _ = file.split(".")
121
- # _, file_name = file_name.split("/")
122
- # vocal = f'{vocals_folder}/{file_name}_speaker{i:003d}.{vocals_extension}'
123
- # command = f"rm {vocal}"
124
- # os.system(command)
125
- # else:
126
- # vocals_extension = "mp3"
127
- # file_name, _ = file.split(".")
128
- # _, file_name = file_name.split("/")
129
- # vocal = f'{vocals_folder}/{file_name}.{vocals_extension}'
130
- # command = f"rm {vocal}"
131
- # os.system(command)
132
- # print('*'*NUMBER)
133
- # print("\n\n")
134
- # progress_bar.update(1)
135
-
136
- # ################## Concatenate transcriptions ##################
137
- # if CONCATENATE_TRANSCRIPTIONS:
138
- # print('*'*NUMBER)
139
- # print("Concatenate transcriptions")
140
- # python_file = "concat_transcriptions.py"
141
- # command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
142
- # os.system(command)
143
- # if REMOVE_FILES:
144
- # with open(chunck_file, 'r') as f:
145
- # files = f.read().splitlines()
146
- # for file in files:
147
- # file_name, _ = file.split(".")
148
- # _, file_name = file_name.split("/")
149
- # transcriptions_folder = "transcriptions"
150
- # transcription_extension = "srt"
151
- # command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
152
- # os.system(command)
153
- # print('*'*NUMBER)
154
- # print("\n\n")
155
- # progress_bar.update(1)
156
-
157
- # ################## Translate transcription ##################
158
- # target_languaje = "Español"
159
- # if TRANSLATE_TRANSCRIPTIONS:
160
- # print('*'*NUMBER)
161
- # print("Translate transcription")
162
- # transcription_file = "concatenated_transcriptions/download_audio.srt"
163
- # source_languaje = "English"
164
- # python_file = "translate_transcriptions.py"
165
- # command = f"python {python_file} {transcription_file} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
166
- # os.system(command)
167
- # if REMOVE_FILES:
168
- # command = f"rm {transcription_file}"
169
- # os.system(command)
170
- # print('*'*NUMBER)
171
- # print("\n\n")
172
- # progress_bar.update(1)
173
-
174
- # ################## Add subtitles to video ##################
175
- # if ADD_SUBTITLES_TO_VIDEO:
176
- # print('*'*NUMBER)
177
- # print("Add subtitles to video")
178
- # python_file = "add_subtitles_to_video.py"
179
- # transcription_file = f"translated_transcriptions/download_audio_{target_languaje}.srt"
180
- # input_video_file = "videos/download_video.mp4"
181
- # input_audio_file = "audios/download_audio.mp3"
182
- # command = f"python {python_file} {transcription_file} {input_video_file} {input_audio_file}"
183
- # os.system(command)
184
- # if REMOVE_FILES:
185
- # command = f"rm {input_video_file}"
186
- # os.system(command)
187
- # command = f"rm {input_audio_file}"
188
- # os.system(command)
189
- # command = f"rm {transcription_file}"
190
- # os.system(command)
191
- # command = f"rm chunks/output_files.txt"
192
- # os.system(command)
193
- # command = f"rm vocals/speakers.txt"
194
- # os.system(command)
195
- # print('*'*NUMBER)
196
- # print("\n\n")
197
- # progress_bar.update(1)
198
-
199
- # ################## Remove all ##################
200
- # if REMOVE_ALL:
201
- # command = f"rm audios/*"
202
- # os.system(command)
203
- # command = f"rm chunks/*"
204
- # os.system(command)
205
- # command = f"rm concatenated_transcriptions/*"
206
- # os.system(command)
207
- # command = f"rm transcriptions/*"
208
- # os.system(command)
209
- # command = f"rm translated_transcriptions/*"
210
- # os.system(command)
211
- # # Check if videos/download_video.mp4 exists
212
- # if os.path.isfile("videos/download_video.mp4"):
213
- # command = f"rm videos/download_video.mp4"
214
- # os.system(command)
215
- # # command = f"rm videos/*"
216
- # # os.system(command)
217
- # command = f"rm vocals/*"
218
- # os.system(command)
219
 
220
  # # def copy_url_from_clipboard():
221
  # # return pyperclip.paste()
222
 
223
- # def clear_video_url():
224
- # visible = False
225
- # image = gr.Image(visible=visible, scale=1)
226
- # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
227
- # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
228
- # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
229
- # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
230
- # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
231
- # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
232
- # return (
233
- # "",
234
- # image,
235
- # source_languaje,
236
- # target_languaje,
237
- # translate_button,
238
- # original_audio,
239
- # original_audio_transcribed,
240
- # original_audio_translated,
241
- # )
242
-
243
- # def get_youtube_thumbnail(url):
244
- # yt = YouTube(url)
245
- # thumbnail_url = yt.thumbnail_url
246
- # return thumbnail_url
247
-
248
- # def is_valid_youtube_url(url):
249
- # patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+'
250
- # if not re.match(patron_youtube, url):
251
- # return False
252
- # return True
253
-
254
- # def is_valid_url(url):
255
- # source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
256
- # target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
257
- # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=True)
258
- # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
259
- # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=True)
260
- # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True)
261
- # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
262
-
263
- # # Youtube
264
- # if "youtube" in url.lower() or "youtu.be" in url.lower():
265
- # if is_valid_youtube_url(url):
266
- # thumbnail = get_youtube_thumbnail(url)
267
- # if thumbnail:
268
- # return (
269
- # gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
270
- # source_languaje,
271
- # target_languaje,
272
- # translate_button,
273
- # gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
274
- # original_audio,
275
- # original_audio_transcribed,
276
- # original_audio_translated,
277
- # subtitled_video
278
- # )
279
- # else:
280
- # return (
281
- # gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
282
- # source_languaje,
283
- # target_languaje,
284
- # translate_button,
285
- # gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
286
- # original_audio,
287
- # original_audio_transcribed,
288
- # original_audio_translated,
289
- # subtitled_video
290
- # )
291
 
292
- # # Twitch
293
- # elif "twitch" in url.lower() or "twitch.tv" in url.lower():
294
- # return (
295
- # gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
296
- # source_languaje,
297
- # target_languaje,
298
- # translate_button,
299
- # gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
300
- # original_audio,
301
- # original_audio_transcribed,
302
- # original_audio_translated,
303
- # subtitled_video
304
- # )
305
 
306
- # # Error
307
- # visible = False
308
- # image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
309
- # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
310
- # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
311
- # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
312
- # stream_page = gr.Textbox(value=ERROR, label="Stream page", elem_id="stream_page", visible=visible)
313
- # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
314
- # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
315
- # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
316
- # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
317
- # return (
318
- # image,
319
- # source_languaje,
320
- # target_languaje,
321
- # translate_button,
322
- # stream_page,
323
- # original_audio,
324
- # original_audio_transcribed,
325
- # original_audio_translated,
326
- # subtitled_video
327
- # )
328
-
329
- # def get_audio_and_video_from_video(url, stream_page):
330
- # python_file = "download.py"
331
- # command = f"python {python_file} {url}"
332
- # os.system(command)
333
- # # sleep(5)
334
-
335
- # audio = "audios/download_audio.mp3"
336
- # video = "videos/download_video.mp4"
337
-
338
- # return (
339
- # gr.Audio(value=audio, label="Original audio", elem_id="original_audio", visible=True, interactive=False),
340
- # gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
341
- # gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
342
- # )
343
-
344
- # def trascribe_audio(audio_path, source_languaje):
345
- # python_file = "slice_audio.py"
346
- # command = f"python {python_file} {audio_path} {SECONDS}"
347
- # os.system(command)
348
-
349
- # folder_vocals = "vocals"
350
- # folder_chunck = "chunks"
351
- # with open(f"{folder_vocals}/speakers.txt", 'w') as f:
352
- # f.write(str(0))
353
- # command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
354
- # os.system(command)
355
-
356
- # python_file = "transcribe.py"
357
- # chunck_file = "chunks/output_files.txt"
358
- # speakers_file = "vocals/speakers.txt"
359
- # command = f"python {python_file} {chunck_file} {source_languaje} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
360
- # os.system(command)
361
- # with open(chunck_file, 'r') as f:
362
- # files = f.read().splitlines()
363
- # with open(speakers_file, 'r') as f:
364
- # speakers = f.read().splitlines()
365
- # speakers = int(speakers[0])
366
- # for file in files:
367
- # if speakers > 0:
368
- # vocals_extension = "wav"
369
- # for i in range(speakers):
370
- # file_name, _ = file.split(".")
371
- # _, file_name = file_name.split("/")
372
- # vocal = f'{folder_vocals}/{file_name}_speaker{i:003d}.{vocals_extension}'
373
- # command = f"rm {vocal}"
374
- # os.system(command)
375
- # else:
376
- # vocals_extension = "mp3"
377
- # file_name, _ = file.split(".")
378
- # _, file_name = file_name.split("/")
379
- # vocal = f'{folder_vocals}/{file_name}.{vocals_extension}'
380
- # command = f"rm {vocal}"
381
- # os.system(command)
382
-
383
- # python_file = "concat_transcriptions.py"
384
- # command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
385
- # os.system(command)
386
- # with open(chunck_file, 'r') as f:
387
- # files = f.read().splitlines()
388
- # for file in files:
389
- # file_name, _ = file.split(".")
390
- # _, file_name = file_name.split("/")
391
- # transcriptions_folder = "transcriptions"
392
- # transcription_extension = "srt"
393
- # command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
394
- # os.system(command)
395
-
396
- # audio_transcribed = "concatenated_transcriptions/download_audio.srt"
397
- # with open(audio_transcribed, 'r') as f:
398
- # result = f.read()
399
-
400
- # return (
401
- # result,
402
- # gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
403
- # )
404
-
405
- # def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
406
- # python_file = "translate_transcriptions.py"
407
- # command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
408
- # os.system(command)
409
-
410
- # translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt"
411
- # with open(translated_transcription, 'r') as f:
412
- # result = f.read()
413
- # transcription_file = "concatenated_transcriptions/download_audio.srt"
414
- # command = f"rm {transcription_file}"
415
- # os.system(command)
416
-
417
- # return (
418
- # result,
419
- # gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
420
- # )
421
-
422
- # def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path):
423
- # python_file = "add_subtitles_to_video.py"
424
- # command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}"
425
- # os.system(command)
426
-
427
- # command = f"rm {original_video_path}"
428
- # os.system(command)
429
- # command = f"rm {original_audio_path}"
430
- # os.system(command)
431
- # command = f"rm {original_audio_translated_path}"
432
- # os.system(command)
433
- # command = f"rm chunks/output_files.txt"
434
- # os.system(command)
435
- # command = f"rm vocals/speakers.txt"
436
- # os.system(command)
437
-
438
- # subtitled_video = "videos/download_video_with_subtitles.mp4"
439
 
440
- # return gr.Video(value=subtitled_video, label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
441
 
442
  def subtify():
443
  with gr.Blocks() as demo:
444
  # Layout
445
  gr.Markdown("""# Subtify""")
446
- gr.Markdown("""# Subtify""")
447
  gr.Markdown(f"Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
448
- # with gr.Row(variant="panel"):
449
- # url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
450
- # copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
451
- # delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
452
-
453
- # stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
454
- # visible = False
455
- # with gr.Row(equal_height=False):
456
- # image = gr.Image(visible=visible, scale=1)
457
- # with gr.Column():
458
- # with gr.Row():
459
- # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
460
- # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
461
- # with gr.Row():
462
- # subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
463
-
464
- # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
465
- # original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
466
- # original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
467
- # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
468
- # original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
469
- # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
470
- # original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
471
- # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
472
-
473
- # # Events
474
- # # copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
475
- # delete_button.click(
476
- # fn=clear_video_url,
477
- # outputs=[
478
- # url_textbox,
479
- # image,
480
- # source_languaje,
481
- # target_languaje,
482
- # subtify_button,
483
- # original_audio,
484
- # original_audio_transcribed,
485
- # original_audio_translated,
486
- # ]
487
- # )
488
- # url_textbox.change(
489
- # fn=is_valid_url,
490
- # inputs=url_textbox,
491
- # outputs=[
492
- # image,
493
- # source_languaje,
494
- # target_languaje,
495
- # subtify_button,
496
- # stream_page,
497
- # original_audio,
498
- # original_audio_transcribed,
499
- # original_audio_translated,
500
- # subtitled_video
501
- # ]
502
- # )
503
- # subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox, stream_page], outputs=[original_audio, original_audio_path, original_video_path])
504
- # original_audio.change(fn=trascribe_audio, inputs=[original_audio_path, source_languaje], outputs=[original_audio_transcribed, original_audio_transcribed_path])
505
- # original_audio_transcribed.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path])
506
- # original_audio_translated.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video)
507
 
508
 
509
  demo.launch()
 
1
  import gradio as gr
2
  import argparse
3
  import sys
4
+ import os
5
+ import torch
6
+ from time import sleep
7
+ from tqdm import tqdm
8
+ from lang_list import union_language_dict
9
+ # import pyperclip
10
+ from pytube import YouTube
11
+ import re
12
+
13
+ NUMBER = 100
14
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
+ # DEVICE = "cpu"
16
+ DOWNLOAD = True
17
+ SLICE_AUDIO = False
18
+ SEPARE_VOCALS = False
19
+ TRANSCRIBE_AUDIO = False
20
+ CONCATENATE_TRANSCRIPTIONS = False
21
+ TRANSLATE_TRANSCRIPTIONS = False
22
+ ADD_SUBTITLES_TO_VIDEO = False
23
+ REMOVE_FILES = False
24
+ REMOVE_ALL = False
25
+ if SEPARE_VOCALS:
26
+ SECONDS = 150
27
+ else:
28
+ SECONDS = 300
29
+
30
+ YOUTUBE = "youtube"
31
+ TWITCH = "twitch"
32
+ ERROR = "error"
33
+
34
+ language_dict = union_language_dict()
35
+
36
+ def subtify_no_ui():
37
+ number_works = 7
38
+ progress_bar = tqdm(total=number_works, desc="Subtify")
39
+
40
+ ################## Download video and audio ##################
41
+ if DOWNLOAD:
42
+ print('*'*NUMBER)
43
+ # url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas
44
+ # url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos
45
+ # url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos
46
+ url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos
47
+ # url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos
48
+ # url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short
49
+ print(f"Downloading video and audio from {url}")
50
+ python_file = "download.py"
51
+ command = f"python {python_file} {url}"
52
+ os.system(command)
53
+ sleep(5)
54
+ print('*'*NUMBER)
55
+ print("\n\n")
56
+ progress_bar.update(1)
57
+
58
+ ################## Slice audio ##################
59
+ if SLICE_AUDIO:
60
+ print('*'*NUMBER)
61
+ print("Slicing audio")
62
+ python_file = "slice_audio.py"
63
+ audio = "audios/download_audio.mp3"
64
+ command = f"python {python_file} {audio} {SECONDS}"
65
+ os.system(command)
66
+ print('*'*NUMBER)
67
+ print("\n\n")
68
+ progress_bar.update(1)
69
+
70
+ ################## Get vocals ##################
71
+ chunck_file = "chunks/output_files.txt"
72
+ print('*'*NUMBER)
73
+ if SEPARE_VOCALS:
74
+ print("Get vocals")
75
+ python_file = "separe_vocals.py"
76
+ command = f"python {python_file} {chunck_file} {DEVICE}"
77
+ os.system(command)
78
+ if REMOVE_FILES:
79
+ with open(chunck_file, 'r') as f:
80
+ files = f.read().splitlines()
81
+ for file in files:
82
+ command = f"rm {file}"
83
+ os.system(command)
84
+ else:
85
+ print("Moving chunks")
86
+ folder_vocals = "vocals"
87
+ folder_chunck = "chunks"
88
+ with open(f"{folder_vocals}/speakers.txt", 'w') as f:
89
+ f.write(str(0))
90
+ if REMOVE_FILES:
91
+ command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
92
+ os.system(command)
93
+ else:
94
+ command = f"cp {folder_chunck}/*.mp3 {folder_vocals}/"
95
+ os.system(command)
96
+ print('*'*NUMBER)
97
+ print("\n\n")
98
+ progress_bar.update(1)
99
+
100
+ ################# Transcript vocals ##################
101
+ speakers_file = "vocals/speakers.txt"
102
+ if TRANSCRIBE_AUDIO:
103
+ print('*'*NUMBER)
104
+ print("Transcript vocals")
105
+ python_file = "transcribe.py"
106
+ language = "English"
107
+ command = f"python {python_file} {chunck_file} {language} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
108
+ os.system(command)
109
+ if REMOVE_FILES:
110
+ vocals_folder = "vocals"
111
+ with open(chunck_file, 'r') as f:
112
+ files = f.read().splitlines()
113
+ with open(speakers_file, 'r') as f:
114
+ speakers = f.read().splitlines()
115
+ speakers = int(speakers[0])
116
+ for file in files:
117
+ if speakers > 0:
118
+ vocals_extension = "wav"
119
+ for i in range(speakers):
120
+ file_name, _ = file.split(".")
121
+ _, file_name = file_name.split("/")
122
+ vocal = f'{vocals_folder}/{file_name}_speaker{i:003d}.{vocals_extension}'
123
+ command = f"rm {vocal}"
124
+ os.system(command)
125
+ else:
126
+ vocals_extension = "mp3"
127
+ file_name, _ = file.split(".")
128
+ _, file_name = file_name.split("/")
129
+ vocal = f'{vocals_folder}/{file_name}.{vocals_extension}'
130
+ command = f"rm {vocal}"
131
+ os.system(command)
132
+ print('*'*NUMBER)
133
+ print("\n\n")
134
+ progress_bar.update(1)
135
+
136
+ ################## Concatenate transcriptions ##################
137
+ if CONCATENATE_TRANSCRIPTIONS:
138
+ print('*'*NUMBER)
139
+ print("Concatenate transcriptions")
140
+ python_file = "concat_transcriptions.py"
141
+ command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
142
+ os.system(command)
143
+ if REMOVE_FILES:
144
+ with open(chunck_file, 'r') as f:
145
+ files = f.read().splitlines()
146
+ for file in files:
147
+ file_name, _ = file.split(".")
148
+ _, file_name = file_name.split("/")
149
+ transcriptions_folder = "transcriptions"
150
+ transcription_extension = "srt"
151
+ command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
152
+ os.system(command)
153
+ print('*'*NUMBER)
154
+ print("\n\n")
155
+ progress_bar.update(1)
156
+
157
+ ################## Translate transcription ##################
158
+ target_languaje = "Español"
159
+ if TRANSLATE_TRANSCRIPTIONS:
160
+ print('*'*NUMBER)
161
+ print("Translate transcription")
162
+ transcription_file = "concatenated_transcriptions/download_audio.srt"
163
+ source_languaje = "English"
164
+ python_file = "translate_transcriptions.py"
165
+ command = f"python {python_file} {transcription_file} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
166
+ os.system(command)
167
+ if REMOVE_FILES:
168
+ command = f"rm {transcription_file}"
169
+ os.system(command)
170
+ print('*'*NUMBER)
171
+ print("\n\n")
172
+ progress_bar.update(1)
173
+
174
+ ################## Add subtitles to video ##################
175
+ if ADD_SUBTITLES_TO_VIDEO:
176
+ print('*'*NUMBER)
177
+ print("Add subtitles to video")
178
+ python_file = "add_subtitles_to_video.py"
179
+ transcription_file = f"translated_transcriptions/download_audio_{target_languaje}.srt"
180
+ input_video_file = "videos/download_video.mp4"
181
+ input_audio_file = "audios/download_audio.mp3"
182
+ command = f"python {python_file} {transcription_file} {input_video_file} {input_audio_file}"
183
+ os.system(command)
184
+ if REMOVE_FILES:
185
+ command = f"rm {input_video_file}"
186
+ os.system(command)
187
+ command = f"rm {input_audio_file}"
188
+ os.system(command)
189
+ command = f"rm {transcription_file}"
190
+ os.system(command)
191
+ command = f"rm chunks/output_files.txt"
192
+ os.system(command)
193
+ command = f"rm vocals/speakers.txt"
194
+ os.system(command)
195
+ print('*'*NUMBER)
196
+ print("\n\n")
197
+ progress_bar.update(1)
198
+
199
+ ################## Remove all ##################
200
+ if REMOVE_ALL:
201
+ command = f"rm audios/*"
202
+ os.system(command)
203
+ command = f"rm chunks/*"
204
+ os.system(command)
205
+ command = f"rm concatenated_transcriptions/*"
206
+ os.system(command)
207
+ command = f"rm transcriptions/*"
208
+ os.system(command)
209
+ command = f"rm translated_transcriptions/*"
210
+ os.system(command)
211
+ # Check if videos/download_video.mp4 exists
212
+ if os.path.isfile("videos/download_video.mp4"):
213
+ command = f"rm videos/download_video.mp4"
214
+ os.system(command)
215
+ # command = f"rm videos/*"
216
+ # os.system(command)
217
+ command = f"rm vocals/*"
218
+ os.system(command)
219
 
220
  # # def copy_url_from_clipboard():
221
  # # return pyperclip.paste()
222
 
223
+ def clear_video_url():
224
+ visible = False
225
+ image = gr.Image(visible=visible, scale=1)
226
+ source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
227
+ target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
228
+ translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
229
+ original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
230
+ original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
231
+ original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
232
+ return (
233
+ "",
234
+ image,
235
+ source_languaje,
236
+ target_languaje,
237
+ translate_button,
238
+ original_audio,
239
+ original_audio_transcribed,
240
+ original_audio_translated,
241
+ )
242
+
243
+ def get_youtube_thumbnail(url):
244
+ yt = YouTube(url)
245
+ thumbnail_url = yt.thumbnail_url
246
+ return thumbnail_url
247
+
248
+ def is_valid_youtube_url(url):
249
+ patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+'
250
+ if not re.match(patron_youtube, url):
251
+ return False
252
+ return True
253
+
254
+ def is_valid_url(url):
255
+ source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
256
+ target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
257
+ translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=True)
258
+ original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
259
+ original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=True)
260
+ original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True)
261
+ subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
262
+
263
+ # Youtube
264
+ if "youtube" in url.lower() or "youtu.be" in url.lower():
265
+ if is_valid_youtube_url(url):
266
+ thumbnail = get_youtube_thumbnail(url)
267
+ if thumbnail:
268
+ return (
269
+ gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
270
+ source_languaje,
271
+ target_languaje,
272
+ translate_button,
273
+ gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
274
+ original_audio,
275
+ original_audio_transcribed,
276
+ original_audio_translated,
277
+ subtitled_video
278
+ )
279
+ else:
280
+ return (
281
+ gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
282
+ source_languaje,
283
+ target_languaje,
284
+ translate_button,
285
+ gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
286
+ original_audio,
287
+ original_audio_transcribed,
288
+ original_audio_translated,
289
+ subtitled_video
290
+ )
291
 
292
+ # Twitch
293
+ elif "twitch" in url.lower() or "twitch.tv" in url.lower():
294
+ return (
295
+ gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
296
+ source_languaje,
297
+ target_languaje,
298
+ translate_button,
299
+ gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
300
+ original_audio,
301
+ original_audio_transcribed,
302
+ original_audio_translated,
303
+ subtitled_video
304
+ )
305
 
306
+ # Error
307
+ visible = False
308
+ image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
309
+ source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
310
+ target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
311
+ translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
312
+ stream_page = gr.Textbox(value=ERROR, label="Stream page", elem_id="stream_page", visible=visible)
313
+ original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
314
+ original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
315
+ original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
316
+ subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
317
+ return (
318
+ image,
319
+ source_languaje,
320
+ target_languaje,
321
+ translate_button,
322
+ stream_page,
323
+ original_audio,
324
+ original_audio_transcribed,
325
+ original_audio_translated,
326
+ subtitled_video
327
+ )
328
+
329
+ def get_audio_and_video_from_video(url, stream_page):
330
+ python_file = "download.py"
331
+ command = f"python {python_file} {url}"
332
+ os.system(command)
333
+ # sleep(5)
334
+
335
+ audio = "audios/download_audio.mp3"
336
+ video = "videos/download_video.mp4"
337
+
338
+ return (
339
+ gr.Audio(value=audio, label="Original audio", elem_id="original_audio", visible=True, interactive=False),
340
+ gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
341
+ gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
342
+ )
343
+
344
+ def trascribe_audio(audio_path, source_languaje):
345
+ python_file = "slice_audio.py"
346
+ command = f"python {python_file} {audio_path} {SECONDS}"
347
+ os.system(command)
348
+
349
+ folder_vocals = "vocals"
350
+ folder_chunck = "chunks"
351
+ with open(f"{folder_vocals}/speakers.txt", 'w') as f:
352
+ f.write(str(0))
353
+ command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
354
+ os.system(command)
355
+
356
+ python_file = "transcribe.py"
357
+ chunck_file = "chunks/output_files.txt"
358
+ speakers_file = "vocals/speakers.txt"
359
+ command = f"python {python_file} {chunck_file} {source_languaje} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
360
+ os.system(command)
361
+ with open(chunck_file, 'r') as f:
362
+ files = f.read().splitlines()
363
+ with open(speakers_file, 'r') as f:
364
+ speakers = f.read().splitlines()
365
+ speakers = int(speakers[0])
366
+ for file in files:
367
+ if speakers > 0:
368
+ vocals_extension = "wav"
369
+ for i in range(speakers):
370
+ file_name, _ = file.split(".")
371
+ _, file_name = file_name.split("/")
372
+ vocal = f'{folder_vocals}/{file_name}_speaker{i:003d}.{vocals_extension}'
373
+ command = f"rm {vocal}"
374
+ os.system(command)
375
+ else:
376
+ vocals_extension = "mp3"
377
+ file_name, _ = file.split(".")
378
+ _, file_name = file_name.split("/")
379
+ vocal = f'{folder_vocals}/{file_name}.{vocals_extension}'
380
+ command = f"rm {vocal}"
381
+ os.system(command)
382
+
383
+ python_file = "concat_transcriptions.py"
384
+ command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
385
+ os.system(command)
386
+ with open(chunck_file, 'r') as f:
387
+ files = f.read().splitlines()
388
+ for file in files:
389
+ file_name, _ = file.split(".")
390
+ _, file_name = file_name.split("/")
391
+ transcriptions_folder = "transcriptions"
392
+ transcription_extension = "srt"
393
+ command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
394
+ os.system(command)
395
+
396
+ audio_transcribed = "concatenated_transcriptions/download_audio.srt"
397
+ with open(audio_transcribed, 'r') as f:
398
+ result = f.read()
399
+
400
+ return (
401
+ result,
402
+ gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
403
+ )
404
+
405
+ def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
406
+ python_file = "translate_transcriptions.py"
407
+ command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
408
+ os.system(command)
409
+
410
+ translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt"
411
+ with open(translated_transcription, 'r') as f:
412
+ result = f.read()
413
+ transcription_file = "concatenated_transcriptions/download_audio.srt"
414
+ command = f"rm {transcription_file}"
415
+ os.system(command)
416
+
417
+ return (
418
+ result,
419
+ gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
420
+ )
421
+
422
+ def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path):
423
+ python_file = "add_subtitles_to_video.py"
424
+ command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}"
425
+ os.system(command)
426
+
427
+ command = f"rm {original_video_path}"
428
+ os.system(command)
429
+ command = f"rm {original_audio_path}"
430
+ os.system(command)
431
+ command = f"rm {original_audio_translated_path}"
432
+ os.system(command)
433
+ command = f"rm chunks/output_files.txt"
434
+ os.system(command)
435
+ command = f"rm vocals/speakers.txt"
436
+ os.system(command)
437
+
438
+ subtitled_video = "videos/download_video_with_subtitles.mp4"
439
 
440
+ return gr.Video(value=subtitled_video, label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
441
 
442
  def subtify():
443
  with gr.Blocks() as demo:
444
  # Layout
445
  gr.Markdown("""# Subtify""")
 
446
  gr.Markdown(f"Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
447
+ with gr.Row(variant="panel"):
448
+ url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
449
+ copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
450
+ delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
451
+
452
+ stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
453
+ visible = False
454
+ with gr.Row(equal_height=False):
455
+ image = gr.Image(visible=visible, scale=1)
456
+ with gr.Column():
457
+ with gr.Row():
458
+ source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
459
+ target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
460
+ with gr.Row():
461
+ subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
462
+
463
+ original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
464
+ original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
465
+ original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
466
+ original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
467
+ original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
468
+ original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
469
+ original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
470
+ subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
471
+
472
+ # Events
473
+ # copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
474
+ delete_button.click(
475
+ fn=clear_video_url,
476
+ outputs=[
477
+ url_textbox,
478
+ image,
479
+ source_languaje,
480
+ target_languaje,
481
+ subtify_button,
482
+ original_audio,
483
+ original_audio_transcribed,
484
+ original_audio_translated,
485
+ ]
486
+ )
487
+ url_textbox.change(
488
+ fn=is_valid_url,
489
+ inputs=url_textbox,
490
+ outputs=[
491
+ image,
492
+ source_languaje,
493
+ target_languaje,
494
+ subtify_button,
495
+ stream_page,
496
+ original_audio,
497
+ original_audio_transcribed,
498
+ original_audio_translated,
499
+ subtitled_video
500
+ ]
501
+ )
502
+ subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox, stream_page], outputs=[original_audio, original_audio_path, original_video_path])
503
+ original_audio.change(fn=trascribe_audio, inputs=[original_audio_path, source_languaje], outputs=[original_audio_transcribed, original_audio_transcribed_path])
504
+ original_audio_transcribed.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path])
505
+ original_audio_translated.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video)
506
 
507
 
508
  demo.launch()