Maximofn commited on
Commit
b78bd38
1 Parent(s): 35596fd

Remove app.py and rename subtify.py to app.py

Browse files
Files changed (2) hide show
  1. app.py +502 -44
  2. subtify.py +0 -518
app.py CHANGED
@@ -1,60 +1,518 @@
1
  import gradio as gr
2
- from modelscope.pipelines import pipeline
3
- from modelscope.utils.constant import Tasks
4
- import soundfile as sf
5
- import numpy as np
6
- import os
7
  # import torch
 
 
 
 
 
 
8
 
9
- SAMPLE_RATE = 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- def get_sample_rate(audio_file_path):
12
- _, sample_rate = sf.read(audio_file_path, always_2d=True)
13
- return sample_rate
14
 
15
- def change_sample_rate(input_audio_file_path, output_audio_file_path, sample_rate):
16
- # do ffmpeg -i $input_audio_file_path -ar $sample_rate $output_audio_file_path
17
- os.system(f'ffmpeg -i {input_audio_file_path} -ar {sample_rate} {output_audio_file_path}')
18
 
19
- def audio_is_stereo(audio_file_path):
20
- audio, _ = sf.read(audio_file_path, always_2d=True)
21
- return audio.shape[1] == 2
22
 
23
- def set_mono(input_audio_file_path, output_audio_file_path):
24
- os.system(f'ffmpeg -i {input_audio_file_path} -ac 1 {output_audio_file_path}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- os.system('wget https://maximofn.com/wp-content/uploads/2023/10/vocals.wav')
27
- input = "vocals.wav"
28
- input_8k = "vocals_8k.wav"
29
- input_8k_mono = "vocals_8k_mono.wav"
 
 
 
 
 
 
 
30
 
31
- sr = get_sample_rate(input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- if sr != SAMPLE_RATE:
34
- change_sample_rate(input, input_8k, SAMPLE_RATE)
35
- else:
36
- input_8k = input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- if audio_is_stereo(input_8k):
39
- set_mono(input_8k, input_8k_mono)
40
- else:
41
- input_8k_mono = input_8k
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- # device = 'cuda' if torch.cuda.is_available() else 'cpu'
44
- device = 'cpu'
45
- separation = pipeline(Tasks.speech_separation, model='damo/speech_mossformer_separation_temporal_8k', device=device)
46
- print("Separating...")
47
- result = separation(input_8k_mono)
48
- print("Separated!")
 
 
 
 
 
 
 
 
 
 
49
 
50
- print("Saving...")
51
- for i, signal in enumerate(result['output_pcm_list']):
52
- save_file = f'output_spk{i}.wav'
53
- sf.write(save_file, np.frombuffer(signal, dtype=np.int16), SAMPLE_RATE)
54
- print("Saved!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- with gr.Blocks() as demo:
58
- gr.Textbox("Subtify")
59
 
60
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import argparse
3
+ # import os
 
 
 
4
  # import torch
5
+ # from time import sleep
6
+ # from tqdm import tqdm
7
+ # from lang_list import union_language_dict
8
+ # # import pyperclip
9
+ # from pytube import YouTube
10
+ # import re
11
 
12
+ # NUMBER = 100
13
+ # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
+ # # DEVICE = "cpu"
15
+ # DOWNLOAD = True
16
+ # SLICE_AUDIO = False
17
+ # SEPARE_VOCALS = False
18
+ # TRANSCRIBE_AUDIO = False
19
+ # CONCATENATE_TRANSCRIPTIONS = False
20
+ # TRANSLATE_TRANSCRIPTIONS = False
21
+ # ADD_SUBTITLES_TO_VIDEO = False
22
+ # REMOVE_FILES = False
23
+ # REMOVE_ALL = False
24
+ # if SEPARE_VOCALS:
25
+ # SECONDS = 150
26
+ # else:
27
+ # SECONDS = 300
28
 
29
+ # YOUTUBE = "youtube"
30
+ # TWITCH = "twitch"
31
+ # ERROR = "error"
32
 
33
+ # language_dict = union_language_dict()
 
 
34
 
35
+ # def subtify_no_ui():
36
+ # number_works = 7
37
+ # progress_bar = tqdm(total=number_works, desc="Subtify")
38
 
39
+ # ################## Download video and audio ##################
40
+ # if DOWNLOAD:
41
+ # print('*'*NUMBER)
42
+ # # url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas
43
+ # # url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos
44
+ # # url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos
45
+ # url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos
46
+ # # url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos
47
+ # # url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short
48
+ # print(f"Downloading video and audio from {url}")
49
+ # python_file = "download.py"
50
+ # command = f"python {python_file} {url}"
51
+ # os.system(command)
52
+ # sleep(5)
53
+ # print('*'*NUMBER)
54
+ # print("\n\n")
55
+ # progress_bar.update(1)
56
 
57
+ # ################## Slice audio ##################
58
+ # if SLICE_AUDIO:
59
+ # print('*'*NUMBER)
60
+ # print("Slicing audio")
61
+ # python_file = "slice_audio.py"
62
+ # audio = "audios/download_audio.mp3"
63
+ # command = f"python {python_file} {audio} {SECONDS}"
64
+ # os.system(command)
65
+ # print('*'*NUMBER)
66
+ # print("\n\n")
67
+ # progress_bar.update(1)
68
 
69
+ # ################## Get vocals ##################
70
+ # chunck_file = "chunks/output_files.txt"
71
+ # print('*'*NUMBER)
72
+ # if SEPARE_VOCALS:
73
+ # print("Get vocals")
74
+ # python_file = "separe_vocals.py"
75
+ # command = f"python {python_file} {chunck_file} {DEVICE}"
76
+ # os.system(command)
77
+ # if REMOVE_FILES:
78
+ # with open(chunck_file, 'r') as f:
79
+ # files = f.read().splitlines()
80
+ # for file in files:
81
+ # command = f"rm {file}"
82
+ # os.system(command)
83
+ # else:
84
+ # print("Moving chunks")
85
+ # folder_vocals = "vocals"
86
+ # folder_chunck = "chunks"
87
+ # with open(f"{folder_vocals}/speakers.txt", 'w') as f:
88
+ # f.write(str(0))
89
+ # if REMOVE_FILES:
90
+ # command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
91
+ # os.system(command)
92
+ # else:
93
+ # command = f"cp {folder_chunck}/*.mp3 {folder_vocals}/"
94
+ # os.system(command)
95
+ # print('*'*NUMBER)
96
+ # print("\n\n")
97
+ # progress_bar.update(1)
98
 
99
+ # ################# Transcript vocals ##################
100
+ # speakers_file = "vocals/speakers.txt"
101
+ # if TRANSCRIBE_AUDIO:
102
+ # print('*'*NUMBER)
103
+ # print("Transcript vocals")
104
+ # python_file = "transcribe.py"
105
+ # language = "English"
106
+ # command = f"python {python_file} {chunck_file} {language} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
107
+ # os.system(command)
108
+ # if REMOVE_FILES:
109
+ # vocals_folder = "vocals"
110
+ # with open(chunck_file, 'r') as f:
111
+ # files = f.read().splitlines()
112
+ # with open(speakers_file, 'r') as f:
113
+ # speakers = f.read().splitlines()
114
+ # speakers = int(speakers[0])
115
+ # for file in files:
116
+ # if speakers > 0:
117
+ # vocals_extension = "wav"
118
+ # for i in range(speakers):
119
+ # file_name, _ = file.split(".")
120
+ # _, file_name = file_name.split("/")
121
+ # vocal = f'{vocals_folder}/{file_name}_speaker{i:003d}.{vocals_extension}'
122
+ # command = f"rm {vocal}"
123
+ # os.system(command)
124
+ # else:
125
+ # vocals_extension = "mp3"
126
+ # file_name, _ = file.split(".")
127
+ # _, file_name = file_name.split("/")
128
+ # vocal = f'{vocals_folder}/{file_name}.{vocals_extension}'
129
+ # command = f"rm {vocal}"
130
+ # os.system(command)
131
+ # print('*'*NUMBER)
132
+ # print("\n\n")
133
+ # progress_bar.update(1)
134
 
135
+ # ################## Concatenate transcriptions ##################
136
+ # if CONCATENATE_TRANSCRIPTIONS:
137
+ # print('*'*NUMBER)
138
+ # print("Concatenate transcriptions")
139
+ # python_file = "concat_transcriptions.py"
140
+ # command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
141
+ # os.system(command)
142
+ # if REMOVE_FILES:
143
+ # with open(chunck_file, 'r') as f:
144
+ # files = f.read().splitlines()
145
+ # for file in files:
146
+ # file_name, _ = file.split(".")
147
+ # _, file_name = file_name.split("/")
148
+ # transcriptions_folder = "transcriptions"
149
+ # transcription_extension = "srt"
150
+ # command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
151
+ # os.system(command)
152
+ # print('*'*NUMBER)
153
+ # print("\n\n")
154
+ # progress_bar.update(1)
155
 
156
+ # ################## Translate transcription ##################
157
+ # target_languaje = "Español"
158
+ # if TRANSLATE_TRANSCRIPTIONS:
159
+ # print('*'*NUMBER)
160
+ # print("Translate transcription")
161
+ # transcription_file = "concatenated_transcriptions/download_audio.srt"
162
+ # source_languaje = "English"
163
+ # python_file = "translate_transcriptions.py"
164
+ # command = f"python {python_file} {transcription_file} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
165
+ # os.system(command)
166
+ # if REMOVE_FILES:
167
+ # command = f"rm {transcription_file}"
168
+ # os.system(command)
169
+ # print('*'*NUMBER)
170
+ # print("\n\n")
171
+ # progress_bar.update(1)
172
 
173
+ # ################## Add subtitles to video ##################
174
+ # if ADD_SUBTITLES_TO_VIDEO:
175
+ # print('*'*NUMBER)
176
+ # print("Add subtitles to video")
177
+ # python_file = "add_subtitles_to_video.py"
178
+ # transcription_file = f"translated_transcriptions/download_audio_{target_languaje}.srt"
179
+ # input_video_file = "videos/download_video.mp4"
180
+ # input_audio_file = "audios/download_audio.mp3"
181
+ # command = f"python {python_file} {transcription_file} {input_video_file} {input_audio_file}"
182
+ # os.system(command)
183
+ # if REMOVE_FILES:
184
+ # command = f"rm {input_video_file}"
185
+ # os.system(command)
186
+ # command = f"rm {input_audio_file}"
187
+ # os.system(command)
188
+ # command = f"rm {transcription_file}"
189
+ # os.system(command)
190
+ # command = f"rm chunks/output_files.txt"
191
+ # os.system(command)
192
+ # command = f"rm vocals/speakers.txt"
193
+ # os.system(command)
194
+ # print('*'*NUMBER)
195
+ # print("\n\n")
196
+ # progress_bar.update(1)
197
 
198
+ # ################## Remove all ##################
199
+ # if REMOVE_ALL:
200
+ # command = f"rm audios/*"
201
+ # os.system(command)
202
+ # command = f"rm chunks/*"
203
+ # os.system(command)
204
+ # command = f"rm concatenated_transcriptions/*"
205
+ # os.system(command)
206
+ # command = f"rm transcriptions/*"
207
+ # os.system(command)
208
+ # command = f"rm translated_transcriptions/*"
209
+ # os.system(command)
210
+ # # Check if videos/download_video.mp4 exists
211
+ # if os.path.isfile("videos/download_video.mp4"):
212
+ # command = f"rm videos/download_video.mp4"
213
+ # os.system(command)
214
+ # # command = f"rm videos/*"
215
+ # # os.system(command)
216
+ # command = f"rm vocals/*"
217
+ # os.system(command)
218
 
219
+ # # def copy_url_from_clipboard():
220
+ # # return pyperclip.paste()
221
 
222
+ # def clear_video_url():
223
+ # visible = False
224
+ # image = gr.Image(visible=visible, scale=1)
225
+ # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
226
+ # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
227
+ # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
228
+ # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
229
+ # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
230
+ # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
231
+ # return (
232
+ # "",
233
+ # image,
234
+ # source_languaje,
235
+ # target_languaje,
236
+ # translate_button,
237
+ # original_audio,
238
+ # original_audio_transcribed,
239
+ # original_audio_translated,
240
+ # )
241
+
242
+ # def get_youtube_thumbnail(url):
243
+ # yt = YouTube(url)
244
+ # thumbnail_url = yt.thumbnail_url
245
+ # return thumbnail_url
246
+
247
+ # def is_valid_youtube_url(url):
248
+ # patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+'
249
+ # if not re.match(patron_youtube, url):
250
+ # return False
251
+ # return True
252
+
253
+ # def is_valid_url(url):
254
+ # source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
255
+ # target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
256
+ # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=True)
257
+ # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
258
+ # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=True)
259
+ # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True)
260
+ # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
261
+
262
+ # # Youtube
263
+ # if "youtube" in url.lower() or "youtu.be" in url.lower():
264
+ # if is_valid_youtube_url(url):
265
+ # thumbnail = get_youtube_thumbnail(url)
266
+ # if thumbnail:
267
+ # return (
268
+ # gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
269
+ # source_languaje,
270
+ # target_languaje,
271
+ # translate_button,
272
+ # gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
273
+ # original_audio,
274
+ # original_audio_transcribed,
275
+ # original_audio_translated,
276
+ # subtitled_video
277
+ # )
278
+ # else:
279
+ # return (
280
+ # gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
281
+ # source_languaje,
282
+ # target_languaje,
283
+ # translate_button,
284
+ # gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
285
+ # original_audio,
286
+ # original_audio_transcribed,
287
+ # original_audio_translated,
288
+ # subtitled_video
289
+ # )
290
+
291
+ # # Twitch
292
+ # elif "twitch" in url.lower() or "twitch.tv" in url.lower():
293
+ # return (
294
+ # gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
295
+ # source_languaje,
296
+ # target_languaje,
297
+ # translate_button,
298
+ # gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
299
+ # original_audio,
300
+ # original_audio_transcribed,
301
+ # original_audio_translated,
302
+ # subtitled_video
303
+ # )
304
+
305
+ # # Error
306
+ # visible = False
307
+ # image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
308
+ # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
309
+ # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
310
+ # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
311
+ # stream_page = gr.Textbox(value=ERROR, label="Stream page", elem_id="stream_page", visible=visible)
312
+ # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
313
+ # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
314
+ # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
315
+ # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
316
+ # return (
317
+ # image,
318
+ # source_languaje,
319
+ # target_languaje,
320
+ # translate_button,
321
+ # stream_page,
322
+ # original_audio,
323
+ # original_audio_transcribed,
324
+ # original_audio_translated,
325
+ # subtitled_video
326
+ # )
327
+
328
+ # def get_audio_and_video_from_video(url, stream_page):
329
+ # python_file = "download.py"
330
+ # command = f"python {python_file} {url}"
331
+ # os.system(command)
332
+ # # sleep(5)
333
+
334
+ # audio = "audios/download_audio.mp3"
335
+ # video = "videos/download_video.mp4"
336
+
337
+ # return (
338
+ # gr.Audio(value=audio, label="Original audio", elem_id="original_audio", visible=True, interactive=False),
339
+ # gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
340
+ # gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
341
+ # )
342
+
343
+ # def trascribe_audio(audio_path, source_languaje):
344
+ # python_file = "slice_audio.py"
345
+ # command = f"python {python_file} {audio_path} {SECONDS}"
346
+ # os.system(command)
347
+
348
+ # folder_vocals = "vocals"
349
+ # folder_chunck = "chunks"
350
+ # with open(f"{folder_vocals}/speakers.txt", 'w') as f:
351
+ # f.write(str(0))
352
+ # command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
353
+ # os.system(command)
354
+
355
+ # python_file = "transcribe.py"
356
+ # chunck_file = "chunks/output_files.txt"
357
+ # speakers_file = "vocals/speakers.txt"
358
+ # command = f"python {python_file} {chunck_file} {source_languaje} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
359
+ # os.system(command)
360
+ # with open(chunck_file, 'r') as f:
361
+ # files = f.read().splitlines()
362
+ # with open(speakers_file, 'r') as f:
363
+ # speakers = f.read().splitlines()
364
+ # speakers = int(speakers[0])
365
+ # for file in files:
366
+ # if speakers > 0:
367
+ # vocals_extension = "wav"
368
+ # for i in range(speakers):
369
+ # file_name, _ = file.split(".")
370
+ # _, file_name = file_name.split("/")
371
+ # vocal = f'{folder_vocals}/{file_name}_speaker{i:003d}.{vocals_extension}'
372
+ # command = f"rm {vocal}"
373
+ # os.system(command)
374
+ # else:
375
+ # vocals_extension = "mp3"
376
+ # file_name, _ = file.split(".")
377
+ # _, file_name = file_name.split("/")
378
+ # vocal = f'{folder_vocals}/{file_name}.{vocals_extension}'
379
+ # command = f"rm {vocal}"
380
+ # os.system(command)
381
+
382
+ # python_file = "concat_transcriptions.py"
383
+ # command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
384
+ # os.system(command)
385
+ # with open(chunck_file, 'r') as f:
386
+ # files = f.read().splitlines()
387
+ # for file in files:
388
+ # file_name, _ = file.split(".")
389
+ # _, file_name = file_name.split("/")
390
+ # transcriptions_folder = "transcriptions"
391
+ # transcription_extension = "srt"
392
+ # command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
393
+ # os.system(command)
394
+
395
+ # audio_transcribed = "concatenated_transcriptions/download_audio.srt"
396
+ # with open(audio_transcribed, 'r') as f:
397
+ # result = f.read()
398
+
399
+ # return (
400
+ # result,
401
+ # gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
402
+ # )
403
+
404
+ # def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
405
+ # python_file = "translate_transcriptions.py"
406
+ # command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
407
+ # os.system(command)
408
+
409
+ # translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt"
410
+ # with open(translated_transcription, 'r') as f:
411
+ # result = f.read()
412
+ # transcription_file = "concatenated_transcriptions/download_audio.srt"
413
+ # command = f"rm {transcription_file}"
414
+ # os.system(command)
415
+
416
+ # return (
417
+ # result,
418
+ # gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
419
+ # )
420
+
421
+ # def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path):
422
+ # python_file = "add_subtitles_to_video.py"
423
+ # command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}"
424
+ # os.system(command)
425
+
426
+ # command = f"rm {original_video_path}"
427
+ # os.system(command)
428
+ # command = f"rm {original_audio_path}"
429
+ # os.system(command)
430
+ # command = f"rm {original_audio_translated_path}"
431
+ # os.system(command)
432
+ # command = f"rm chunks/output_files.txt"
433
+ # os.system(command)
434
+ # command = f"rm vocals/speakers.txt"
435
+ # os.system(command)
436
+
437
+ # subtitled_video = "videos/download_video_with_subtitles.mp4"
438
+
439
+ # return gr.Video(value=subtitled_video, label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
440
+
441
+ def subtify():
442
+ with gr.Blocks() as demo:
443
+ # Layout
444
+ gr.Markdown("""# Subtify""")
445
+ # with gr.Row(variant="panel"):
446
+ # url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
447
+ # copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
448
+ # delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
449
+
450
+ # stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
451
+ # visible = False
452
+ # with gr.Row(equal_height=False):
453
+ # image = gr.Image(visible=visible, scale=1)
454
+ # with gr.Column():
455
+ # with gr.Row():
456
+ # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
457
+ # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
458
+ # with gr.Row():
459
+ # subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
460
+
461
+ # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
462
+ # original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
463
+ # original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
464
+ # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
465
+ # original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
466
+ # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
467
+ # original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
468
+ # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
469
+
470
+ # # Events
471
+ # # copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
472
+ # delete_button.click(
473
+ # fn=clear_video_url,
474
+ # outputs=[
475
+ # url_textbox,
476
+ # image,
477
+ # source_languaje,
478
+ # target_languaje,
479
+ # subtify_button,
480
+ # original_audio,
481
+ # original_audio_transcribed,
482
+ # original_audio_translated,
483
+ # ]
484
+ # )
485
+ # url_textbox.change(
486
+ # fn=is_valid_url,
487
+ # inputs=url_textbox,
488
+ # outputs=[
489
+ # image,
490
+ # source_languaje,
491
+ # target_languaje,
492
+ # subtify_button,
493
+ # stream_page,
494
+ # original_audio,
495
+ # original_audio_transcribed,
496
+ # original_audio_translated,
497
+ # subtitled_video
498
+ # ]
499
+ # )
500
+ # subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox, stream_page], outputs=[original_audio, original_audio_path, original_video_path])
501
+ # original_audio.change(fn=trascribe_audio, inputs=[original_audio_path, source_languaje], outputs=[original_audio_transcribed, original_audio_transcribed_path])
502
+ # original_audio_transcribed.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path])
503
+ # original_audio_translated.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video)
504
+
505
+
506
+ demo.launch()
507
+
508
+
509
+ if __name__ == "__main__":
510
+ parser = argparse.ArgumentParser()
511
+ parser.add_argument("--no_ui", action="store_true")
512
+ args = parser.parse_args()
513
+
514
+ if args.no_ui:
515
+ pass
516
+ # subtify_no_ui()
517
+ else:
518
+ subtify()
subtify.py DELETED
@@ -1,518 +0,0 @@
1
- import gradio as gr
2
- import argparse
3
- # import os
4
- # import torch
5
- # from time import sleep
6
- # from tqdm import tqdm
7
- # from lang_list import union_language_dict
8
- # # import pyperclip
9
- # from pytube import YouTube
10
- # import re
11
-
12
- # NUMBER = 100
13
- # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
14
- # # DEVICE = "cpu"
15
- # DOWNLOAD = True
16
- # SLICE_AUDIO = False
17
- # SEPARE_VOCALS = False
18
- # TRANSCRIBE_AUDIO = False
19
- # CONCATENATE_TRANSCRIPTIONS = False
20
- # TRANSLATE_TRANSCRIPTIONS = False
21
- # ADD_SUBTITLES_TO_VIDEO = False
22
- # REMOVE_FILES = False
23
- # REMOVE_ALL = False
24
- # if SEPARE_VOCALS:
25
- # SECONDS = 150
26
- # else:
27
- # SECONDS = 300
28
-
29
- # YOUTUBE = "youtube"
30
- # TWITCH = "twitch"
31
- # ERROR = "error"
32
-
33
- # language_dict = union_language_dict()
34
-
35
- # def subtify_no_ui():
36
- # number_works = 7
37
- # progress_bar = tqdm(total=number_works, desc="Subtify")
38
-
39
- # ################## Download video and audio ##################
40
- # if DOWNLOAD:
41
- # print('*'*NUMBER)
42
- # # url = "https://www.twitch.tv/videos/1936119752" # twitch Rob Mula 2 horas
43
- # # url = "https://www.youtube.com/watch?v=yX5EJf4R77s" # ✅ debate, varios hablantes, 3 minutos
44
- # # url = "https://www.youtube.com/watch?v=cgx0QnXo1OU" # ✅ smart home, un solo hablante, 4:42 minutos
45
- # url = "https://www.youtube.com/watch?v=dgOBxhi19T8" # ✅ rob mula, muchos hablantes, 4:28 minutos
46
- # # url = "https://www.youtube.com/watch?v=Coj72EzmX20" # rob mula, un solo hablante, 16 minutos
47
- # # url = "https://www.youtube.com/watch?v=Tqth0fKo0_g" # Conversación short
48
- # print(f"Downloading video and audio from {url}")
49
- # python_file = "download.py"
50
- # command = f"python {python_file} {url}"
51
- # os.system(command)
52
- # sleep(5)
53
- # print('*'*NUMBER)
54
- # print("\n\n")
55
- # progress_bar.update(1)
56
-
57
- # ################## Slice audio ##################
58
- # if SLICE_AUDIO:
59
- # print('*'*NUMBER)
60
- # print("Slicing audio")
61
- # python_file = "slice_audio.py"
62
- # audio = "audios/download_audio.mp3"
63
- # command = f"python {python_file} {audio} {SECONDS}"
64
- # os.system(command)
65
- # print('*'*NUMBER)
66
- # print("\n\n")
67
- # progress_bar.update(1)
68
-
69
- # ################## Get vocals ##################
70
- # chunck_file = "chunks/output_files.txt"
71
- # print('*'*NUMBER)
72
- # if SEPARE_VOCALS:
73
- # print("Get vocals")
74
- # python_file = "separe_vocals.py"
75
- # command = f"python {python_file} {chunck_file} {DEVICE}"
76
- # os.system(command)
77
- # if REMOVE_FILES:
78
- # with open(chunck_file, 'r') as f:
79
- # files = f.read().splitlines()
80
- # for file in files:
81
- # command = f"rm {file}"
82
- # os.system(command)
83
- # else:
84
- # print("Moving chunks")
85
- # folder_vocals = "vocals"
86
- # folder_chunck = "chunks"
87
- # with open(f"{folder_vocals}/speakers.txt", 'w') as f:
88
- # f.write(str(0))
89
- # if REMOVE_FILES:
90
- # command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
91
- # os.system(command)
92
- # else:
93
- # command = f"cp {folder_chunck}/*.mp3 {folder_vocals}/"
94
- # os.system(command)
95
- # print('*'*NUMBER)
96
- # print("\n\n")
97
- # progress_bar.update(1)
98
-
99
- # ################# Transcript vocals ##################
100
- # speakers_file = "vocals/speakers.txt"
101
- # if TRANSCRIBE_AUDIO:
102
- # print('*'*NUMBER)
103
- # print("Transcript vocals")
104
- # python_file = "transcribe.py"
105
- # language = "English"
106
- # command = f"python {python_file} {chunck_file} {language} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
107
- # os.system(command)
108
- # if REMOVE_FILES:
109
- # vocals_folder = "vocals"
110
- # with open(chunck_file, 'r') as f:
111
- # files = f.read().splitlines()
112
- # with open(speakers_file, 'r') as f:
113
- # speakers = f.read().splitlines()
114
- # speakers = int(speakers[0])
115
- # for file in files:
116
- # if speakers > 0:
117
- # vocals_extension = "wav"
118
- # for i in range(speakers):
119
- # file_name, _ = file.split(".")
120
- # _, file_name = file_name.split("/")
121
- # vocal = f'{vocals_folder}/{file_name}_speaker{i:003d}.{vocals_extension}'
122
- # command = f"rm {vocal}"
123
- # os.system(command)
124
- # else:
125
- # vocals_extension = "mp3"
126
- # file_name, _ = file.split(".")
127
- # _, file_name = file_name.split("/")
128
- # vocal = f'{vocals_folder}/{file_name}.{vocals_extension}'
129
- # command = f"rm {vocal}"
130
- # os.system(command)
131
- # print('*'*NUMBER)
132
- # print("\n\n")
133
- # progress_bar.update(1)
134
-
135
- # ################## Concatenate transcriptions ##################
136
- # if CONCATENATE_TRANSCRIPTIONS:
137
- # print('*'*NUMBER)
138
- # print("Concatenate transcriptions")
139
- # python_file = "concat_transcriptions.py"
140
- # command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
141
- # os.system(command)
142
- # if REMOVE_FILES:
143
- # with open(chunck_file, 'r') as f:
144
- # files = f.read().splitlines()
145
- # for file in files:
146
- # file_name, _ = file.split(".")
147
- # _, file_name = file_name.split("/")
148
- # transcriptions_folder = "transcriptions"
149
- # transcription_extension = "srt"
150
- # command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
151
- # os.system(command)
152
- # print('*'*NUMBER)
153
- # print("\n\n")
154
- # progress_bar.update(1)
155
-
156
- # ################## Translate transcription ##################
157
- # target_languaje = "Español"
158
- # if TRANSLATE_TRANSCRIPTIONS:
159
- # print('*'*NUMBER)
160
- # print("Translate transcription")
161
- # transcription_file = "concatenated_transcriptions/download_audio.srt"
162
- # source_languaje = "English"
163
- # python_file = "translate_transcriptions.py"
164
- # command = f"python {python_file} {transcription_file} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
165
- # os.system(command)
166
- # if REMOVE_FILES:
167
- # command = f"rm {transcription_file}"
168
- # os.system(command)
169
- # print('*'*NUMBER)
170
- # print("\n\n")
171
- # progress_bar.update(1)
172
-
173
- # ################## Add subtitles to video ##################
174
- # if ADD_SUBTITLES_TO_VIDEO:
175
- # print('*'*NUMBER)
176
- # print("Add subtitles to video")
177
- # python_file = "add_subtitles_to_video.py"
178
- # transcription_file = f"translated_transcriptions/download_audio_{target_languaje}.srt"
179
- # input_video_file = "videos/download_video.mp4"
180
- # input_audio_file = "audios/download_audio.mp3"
181
- # command = f"python {python_file} {transcription_file} {input_video_file} {input_audio_file}"
182
- # os.system(command)
183
- # if REMOVE_FILES:
184
- # command = f"rm {input_video_file}"
185
- # os.system(command)
186
- # command = f"rm {input_audio_file}"
187
- # os.system(command)
188
- # command = f"rm {transcription_file}"
189
- # os.system(command)
190
- # command = f"rm chunks/output_files.txt"
191
- # os.system(command)
192
- # command = f"rm vocals/speakers.txt"
193
- # os.system(command)
194
- # print('*'*NUMBER)
195
- # print("\n\n")
196
- # progress_bar.update(1)
197
-
198
- # ################## Remove all ##################
199
- # if REMOVE_ALL:
200
- # command = f"rm audios/*"
201
- # os.system(command)
202
- # command = f"rm chunks/*"
203
- # os.system(command)
204
- # command = f"rm concatenated_transcriptions/*"
205
- # os.system(command)
206
- # command = f"rm transcriptions/*"
207
- # os.system(command)
208
- # command = f"rm translated_transcriptions/*"
209
- # os.system(command)
210
- # # Check if videos/download_video.mp4 exists
211
- # if os.path.isfile("videos/download_video.mp4"):
212
- # command = f"rm videos/download_video.mp4"
213
- # os.system(command)
214
- # # command = f"rm videos/*"
215
- # # os.system(command)
216
- # command = f"rm vocals/*"
217
- # os.system(command)
218
-
219
- # # def copy_url_from_clipboard():
220
- # # return pyperclip.paste()
221
-
222
- # def clear_video_url():
223
- # visible = False
224
- # image = gr.Image(visible=visible, scale=1)
225
- # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
226
- # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
227
- # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
228
- # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
229
- # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
230
- # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
231
- # return (
232
- # "",
233
- # image,
234
- # source_languaje,
235
- # target_languaje,
236
- # translate_button,
237
- # original_audio,
238
- # original_audio_transcribed,
239
- # original_audio_translated,
240
- # )
241
-
242
- # def get_youtube_thumbnail(url):
243
- # yt = YouTube(url)
244
- # thumbnail_url = yt.thumbnail_url
245
- # return thumbnail_url
246
-
247
- # def is_valid_youtube_url(url):
248
- # patron_youtube = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/)[\w-]+'
249
- # if not re.match(patron_youtube, url):
250
- # return False
251
- # return True
252
-
253
- # def is_valid_url(url):
254
- # source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
255
- # target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
256
- # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=True)
257
- # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
258
- # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=True)
259
- # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True)
260
- # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
261
-
262
- # # Youtube
263
- # if "youtube" in url.lower() or "youtu.be" in url.lower():
264
- # if is_valid_youtube_url(url):
265
- # thumbnail = get_youtube_thumbnail(url)
266
- # if thumbnail:
267
- # return (
268
- # gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
269
- # source_languaje,
270
- # target_languaje,
271
- # translate_button,
272
- # gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
273
- # original_audio,
274
- # original_audio_transcribed,
275
- # original_audio_translated,
276
- # subtitled_video
277
- # )
278
- # else:
279
- # return (
280
- # gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
281
- # source_languaje,
282
- # target_languaje,
283
- # translate_button,
284
- # gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
285
- # original_audio,
286
- # original_audio_transcribed,
287
- # original_audio_translated,
288
- # subtitled_video
289
- # )
290
-
291
- # # Twitch
292
- # elif "twitch" in url.lower() or "twitch.tv" in url.lower():
293
- # return (
294
- # gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
295
- # source_languaje,
296
- # target_languaje,
297
- # translate_button,
298
- # gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
299
- # original_audio,
300
- # original_audio_transcribed,
301
- # original_audio_translated,
302
- # subtitled_video
303
- # )
304
-
305
- # # Error
306
- # visible = False
307
- # image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
308
- # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
309
- # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
310
- # translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
311
- # stream_page = gr.Textbox(value=ERROR, label="Stream page", elem_id="stream_page", visible=visible)
312
- # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
313
- # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
314
- # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
315
- # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
316
- # return (
317
- # image,
318
- # source_languaje,
319
- # target_languaje,
320
- # translate_button,
321
- # stream_page,
322
- # original_audio,
323
- # original_audio_transcribed,
324
- # original_audio_translated,
325
- # subtitled_video
326
- # )
327
-
328
- # def get_audio_and_video_from_video(url, stream_page):
329
- # python_file = "download.py"
330
- # command = f"python {python_file} {url}"
331
- # os.system(command)
332
- # # sleep(5)
333
-
334
- # audio = "audios/download_audio.mp3"
335
- # video = "videos/download_video.mp4"
336
-
337
- # return (
338
- # gr.Audio(value=audio, label="Original audio", elem_id="original_audio", visible=True, interactive=False),
339
- # gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
340
- # gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
341
- # )
342
-
343
- # def trascribe_audio(audio_path, source_languaje):
344
- # python_file = "slice_audio.py"
345
- # command = f"python {python_file} {audio_path} {SECONDS}"
346
- # os.system(command)
347
-
348
- # folder_vocals = "vocals"
349
- # folder_chunck = "chunks"
350
- # with open(f"{folder_vocals}/speakers.txt", 'w') as f:
351
- # f.write(str(0))
352
- # command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
353
- # os.system(command)
354
-
355
- # python_file = "transcribe.py"
356
- # chunck_file = "chunks/output_files.txt"
357
- # speakers_file = "vocals/speakers.txt"
358
- # command = f"python {python_file} {chunck_file} {source_languaje} {speakers_file} {DEVICE} {not SEPARE_VOCALS}"
359
- # os.system(command)
360
- # with open(chunck_file, 'r') as f:
361
- # files = f.read().splitlines()
362
- # with open(speakers_file, 'r') as f:
363
- # speakers = f.read().splitlines()
364
- # speakers = int(speakers[0])
365
- # for file in files:
366
- # if speakers > 0:
367
- # vocals_extension = "wav"
368
- # for i in range(speakers):
369
- # file_name, _ = file.split(".")
370
- # _, file_name = file_name.split("/")
371
- # vocal = f'{folder_vocals}/{file_name}_speaker{i:003d}.{vocals_extension}'
372
- # command = f"rm {vocal}"
373
- # os.system(command)
374
- # else:
375
- # vocals_extension = "mp3"
376
- # file_name, _ = file.split(".")
377
- # _, file_name = file_name.split("/")
378
- # vocal = f'{folder_vocals}/{file_name}.{vocals_extension}'
379
- # command = f"rm {vocal}"
380
- # os.system(command)
381
-
382
- # python_file = "concat_transcriptions.py"
383
- # command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
384
- # os.system(command)
385
- # with open(chunck_file, 'r') as f:
386
- # files = f.read().splitlines()
387
- # for file in files:
388
- # file_name, _ = file.split(".")
389
- # _, file_name = file_name.split("/")
390
- # transcriptions_folder = "transcriptions"
391
- # transcription_extension = "srt"
392
- # command = f"rm {transcriptions_folder}/{file_name}.{transcription_extension}"
393
- # os.system(command)
394
-
395
- # audio_transcribed = "concatenated_transcriptions/download_audio.srt"
396
- # with open(audio_transcribed, 'r') as f:
397
- # result = f.read()
398
-
399
- # return (
400
- # result,
401
- # gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
402
- # )
403
-
404
- # def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
405
- # python_file = "translate_transcriptions.py"
406
- # command = f"python {python_file} {original_audio_transcribed_path} --source_languaje {source_languaje} --target_languaje {target_languaje} --device {DEVICE}"
407
- # os.system(command)
408
-
409
- # translated_transcription = f"translated_transcriptions/download_audio_{target_languaje}.srt"
410
- # with open(translated_transcription, 'r') as f:
411
- # result = f.read()
412
- # transcription_file = "concatenated_transcriptions/download_audio.srt"
413
- # command = f"rm {transcription_file}"
414
- # os.system(command)
415
-
416
- # return (
417
- # result,
418
- # gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
419
- # )
420
-
421
- # def add_translated_subtitles_to_video(original_video_path, original_audio_path, original_audio_translated_path):
422
- # python_file = "add_subtitles_to_video.py"
423
- # command = f"python {python_file} {original_audio_translated_path} {original_video_path} {original_audio_path}"
424
- # os.system(command)
425
-
426
- # command = f"rm {original_video_path}"
427
- # os.system(command)
428
- # command = f"rm {original_audio_path}"
429
- # os.system(command)
430
- # command = f"rm {original_audio_translated_path}"
431
- # os.system(command)
432
- # command = f"rm chunks/output_files.txt"
433
- # os.system(command)
434
- # command = f"rm vocals/speakers.txt"
435
- # os.system(command)
436
-
437
- # subtitled_video = "videos/download_video_with_subtitles.mp4"
438
-
439
- # return gr.Video(value=subtitled_video, label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
440
-
441
- def subtify():
442
- with gr.Blocks() as demo:
443
- # Layout
444
- gr.Markdown("""# Subtify""")
445
- # with gr.Row(variant="panel"):
446
- # url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
447
- # copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
448
- # delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
449
-
450
- # stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
451
- # visible = False
452
- # with gr.Row(equal_height=False):
453
- # image = gr.Image(visible=visible, scale=1)
454
- # with gr.Column():
455
- # with gr.Row():
456
- # source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
457
- # target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
458
- # with gr.Row():
459
- # subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
460
-
461
- # original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
462
- # original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
463
- # original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
464
- # original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
465
- # original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
466
- # original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
467
- # original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
468
- # subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
469
-
470
- # # Events
471
- # # copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
472
- # delete_button.click(
473
- # fn=clear_video_url,
474
- # outputs=[
475
- # url_textbox,
476
- # image,
477
- # source_languaje,
478
- # target_languaje,
479
- # subtify_button,
480
- # original_audio,
481
- # original_audio_transcribed,
482
- # original_audio_translated,
483
- # ]
484
- # )
485
- # url_textbox.change(
486
- # fn=is_valid_url,
487
- # inputs=url_textbox,
488
- # outputs=[
489
- # image,
490
- # source_languaje,
491
- # target_languaje,
492
- # subtify_button,
493
- # stream_page,
494
- # original_audio,
495
- # original_audio_transcribed,
496
- # original_audio_translated,
497
- # subtitled_video
498
- # ]
499
- # )
500
- # subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox, stream_page], outputs=[original_audio, original_audio_path, original_video_path])
501
- # original_audio.change(fn=trascribe_audio, inputs=[original_audio_path, source_languaje], outputs=[original_audio_transcribed, original_audio_transcribed_path])
502
- # original_audio_transcribed.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path])
503
- # original_audio_translated.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video)
504
-
505
-
506
- demo.launch()
507
-
508
-
509
- if __name__ == "__main__":
510
- parser = argparse.ArgumentParser()
511
- parser.add_argument("--no_ui", action="store_true")
512
- args = parser.parse_args()
513
-
514
- if args.no_ui:
515
- pass
516
- # subtify_no_ui()
517
- else:
518
- subtify()