RASMUS commited on
Commit
7e2e27e
1 Parent(s): 668f0a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +244 -174
app.py CHANGED
@@ -1,14 +1,10 @@
1
  import os
2
-
3
- # Download and build ggergavos/whisper.cpp Kudos to this man for wonderful whisper implementation!
4
- # This means speed!
5
 
6
  os.system('git clone https://github.com/ggerganov/whisper.cpp.git')
7
  os.system('make -C ./whisper.cpp')
8
-
9
- # Download models, add finetuned languages later once whisper finetuning event is ready
10
- # Models are downloaded on the fly so we can get quite many models :)
11
-
12
  os.system('bash ./whisper.cpp/models/download-ggml-model.sh small')
13
  os.system('bash ./whisper.cpp/models/download-ggml-model.sh base')
14
  os.system('bash ./whisper.cpp/models/download-ggml-model.sh medium')
@@ -21,144 +17,149 @@ os.system('bash ./whisper.cpp/models/download-ggml-model.sh base.en')
21
  #print("MOI")
22
 
23
 
24
-
25
  import gradio as gr
26
  from pathlib import Path
27
  import pysrt
28
  import pandas as pd
29
  import re
30
  import time
31
- import os
32
- import json
33
- import requests
34
 
35
  from pytube import YouTube
36
- from transformers import MarianMTModel, MarianTokenizer
37
 
38
  import psutil
39
  num_cores = psutil.cpu_count()
40
  os.environ["OMP_NUM_THREADS"] = f"{num_cores}"
41
  headers = {'Authorization': os.environ['DeepL_API_KEY']}
42
 
 
 
 
43
  whisper_models = ["base", "small", "medium", "large", "base.en"]
44
 
 
 
 
 
 
 
45
 
46
  LANGUAGES = {
47
- "en": "english",
48
- "zh": "chinese",
49
- "de": "german",
50
- "es": "spanish",
51
- "ru": "russian",
52
- "ko": "korean",
53
- "fr": "french",
54
- "ja": "japanese",
55
- "pt": "portuguese",
56
- "tr": "turkish",
57
- "pl": "polish",
58
- "ca": "catalan",
59
- "nl": "dutch",
60
- "ar": "arabic",
61
- "sv": "swedish",
62
- "it": "italian",
63
- "id": "indonesian",
64
- "hi": "hindi",
65
- "fi": "finnish",
66
- "vi": "vietnamese",
67
- "he": "hebrew",
68
- "uk": "ukrainian",
69
- "el": "greek",
70
- "ms": "malay",
71
- "cs": "czech",
72
- "ro": "romanian",
73
- "da": "danish",
74
- "hu": "hungarian",
75
- "ta": "tamil",
76
- "no": "norwegian",
77
- "th": "thai",
78
- "ur": "urdu",
79
- "hr": "croatian",
80
- "bg": "bulgarian",
81
- "lt": "lithuanian",
82
- "la": "latin",
83
- "mi": "maori",
84
- "ml": "malayalam",
85
- "cy": "welsh",
86
- "sk": "slovak",
87
- "te": "telugu",
88
- "fa": "persian",
89
- "lv": "latvian",
90
- "bn": "bengali",
91
- "sr": "serbian",
92
- "az": "azerbaijani",
93
- "sl": "slovenian",
94
- "kn": "kannada",
95
- "et": "estonian",
96
- "mk": "macedonian",
97
- "br": "breton",
98
- "eu": "basque",
99
- "is": "icelandic",
100
- "hy": "armenian",
101
- "ne": "nepali",
102
- "mn": "mongolian",
103
- "bs": "bosnian",
104
- "kk": "kazakh",
105
- "sq": "albanian",
106
- "sw": "swahili",
107
- "gl": "galician",
108
- "mr": "marathi",
109
- "pa": "punjabi",
110
- "si": "sinhala",
111
- "km": "khmer",
112
- "sn": "shona",
113
- "yo": "yoruba",
114
- "so": "somali",
115
- "af": "afrikaans",
116
- "oc": "occitan",
117
- "ka": "georgian",
118
- "be": "belarusian",
119
- "tg": "tajik",
120
- "sd": "sindhi",
121
- "gu": "gujarati",
122
- "am": "amharic",
123
- "yi": "yiddish",
124
- "lo": "lao",
125
- "uz": "uzbek",
126
- "fo": "faroese",
127
- "ht": "haitian creole",
128
- "ps": "pashto",
129
- "tk": "turkmen",
130
- "nn": "nynorsk",
131
- "mt": "maltese",
132
- "sa": "sanskrit",
133
- "lb": "luxembourgish",
134
- "my": "myanmar",
135
- "bo": "tibetan",
136
- "tl": "tagalog",
137
- "mg": "malagasy",
138
- "as": "assamese",
139
- "tt": "tatar",
140
- "haw": "hawaiian",
141
- "ln": "lingala",
142
- "ha": "hausa",
143
- "ba": "bashkir",
144
- "jw": "javanese",
145
- "su": "sundanese",
146
  }
147
 
148
  # language code lookup by name, with a few language aliases
149
  source_languages = {
150
  **{language: code for code, language in LANGUAGES.items()},
151
- "burmese": "my",
152
- "valencian": "ca",
153
- "flemish": "nl",
154
- "haitian": "ht",
155
- "letzeburgesch": "lb",
156
- "pushto": "ps",
157
- "panjabi": "pa",
158
- "moldavian": "ro",
159
- "moldovan": "ro",
160
- "sinhalese": "si",
161
- "castilian": "es",
162
  "Let the model analyze": "Let the model analyze"
163
  }
164
 
@@ -193,12 +194,16 @@ DeepL_language_codes_for_translation = {
193
  }
194
 
195
 
196
-
197
  transcribe_options = dict(beam_size=3, best_of=3, without_timestamps=False)
198
 
199
 
200
  source_language_list = [key[0] for key in source_languages.items()]
201
  translation_models_list = [key[0] for key in DeepL_language_codes_for_translation.items()]
 
 
 
 
 
202
 
203
  videos_out_path = Path("./videos_out")
204
  videos_out_path.mkdir(parents=True, exist_ok=True)
@@ -228,7 +233,7 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
228
  This space is using c++ implementation by https://github.com/ggerganov/whisper.cpp
229
  """
230
 
231
- if(video_file_path == None):
232
  raise ValueError("Error no video input")
233
  print(video_file_path)
234
  try:
@@ -244,9 +249,12 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
244
  srt_path = str(video_file_path.replace(file_ending, ".wav")) + ".srt"
245
  os.system(f'rm -f {srt_path}')
246
  if selected_source_lang == "Let the model analyze":
247
- os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt')
248
  else:
249
- os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -l {source_languages.get(selected_source_lang)} -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt')
 
 
 
250
  print("starting whisper done with whisper")
251
  except Exception as e:
252
  raise RuntimeError("Error converting video to audio")
@@ -294,7 +302,7 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
294
 
295
  def translate_transcriptions(df, selected_translation_lang_2):
296
  if selected_translation_lang_2 is None:
297
- selected_translation_lang_2 = 'english'
298
  df.reset_index(inplace=True)
299
 
300
  print("start_translation")
@@ -313,35 +321,61 @@ def translate_transcriptions(df, selected_translation_lang_2):
313
  'tag_spitting': 'xml',
314
  'target_lang': DeepL_language_codes_for_translation.get(selected_translation_lang_2)
315
  }
316
- response = requests.post('https://api-free.deepl.com/v2/translate', headers=headers, data=data)
317
-
318
- # Print the response from the server
319
- translated_sentences = json.loads(response.text)
320
- translated_sentences = translated_sentences['translations'][0]['text'].split('\n')
321
- df['translation'] = translated_sentences
322
-
323
 
 
 
 
 
 
 
 
 
 
324
  print("translations done")
325
 
326
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
 
328
 
329
- def create_srt_and_burn(df, video_in):
 
330
 
331
- print("Starting creation of video wit srt")
332
- print("video in path is:")
333
- print(video_in)
334
-
335
-
336
- with open('testi.srt','w', encoding="utf-8") as file:
337
  for i in range(len(df)):
338
  file.write(str(i+1))
339
  file.write('\n')
340
  start = df.iloc[i]['start']
341
 
342
-
343
-
344
- file.write(f"{start}")
345
 
346
  stop = df.iloc[i]['end']
347
 
@@ -353,30 +387,50 @@ def create_srt_and_burn(df, video_in):
353
  if int(i) != len(df)-1:
354
  file.write('\n\n')
355
 
356
- print("SRT DONE")
357
- try:
358
- file1 = open('./testi.srt', 'r', encoding="utf-8")
359
- Lines = file1.readlines()
 
 
 
 
 
 
 
 
 
 
 
 
360
 
361
- count = 0
362
- # Strips the newline character
363
- for line in Lines:
364
- count += 1
365
- print("{}".format(line))
366
-
367
- print(type(video_in))
368
- print(video_in)
369
 
370
- video_out = video_in.replace('.mp4', '_out.mp4')
371
- print("video_out_path")
372
- print(video_out)
373
- command = 'ffmpeg -i "{}" -y -vf subtitles=./testi.srt "{}"'.format(video_in, video_out)
374
- print(command)
375
- os.system(command)
376
- return video_out
377
- except Exception as e:
378
- print(e)
379
- return video_out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
 
382
  # ---- Gradio Layout -----
@@ -386,7 +440,7 @@ video_out = gr.Video(label="Video Out", mirror_webcam=False)
386
 
387
 
388
 
389
- df_init = pd.DataFrame(columns=['start','end','text'])
390
 
391
  selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="Let the model analyze", label="Spoken language in video", interactive=True)
392
  selected_translation_lang_2 = gr.Dropdown(choices=translation_models_list, type="value", value="English", label="In which language you want the transcriptions?", interactive=True)
@@ -395,6 +449,15 @@ selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value
395
  transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
396
  transcription_and_translation_df = gr.DataFrame(value=df_init,label="Transcription and translation dataframe", max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
397
 
 
 
 
 
 
 
 
 
 
398
 
399
  demo = gr.Blocks(css='''
400
  #cut_btn, #reset_btn { align-self:stretch; }
@@ -464,21 +527,28 @@ with demo:
464
  ##### Here you will can translate transcriptions to 26 languages.
465
  ##### If spoken language is not in the list, translation might not work. In this case original transcriptions are used
466
  ##### ''')
467
- selected_translation_lang_2.render()
468
  translate_transcriptions_button = gr.Button("Step 3. Translate transcription")
469
- translate_transcriptions_button.click(translate_transcriptions, [transcription_df, selected_translation_lang_2], transcription_and_translation_df)
470
  transcription_and_translation_df.render()
 
 
 
 
 
471
 
472
  with gr.Row():
473
  with gr.Column():
474
  gr.Markdown('''
475
  ##### Now press the Step 4. Button to create output video with translated transcriptions
476
  ##### ''')
477
- translate_and_make_srt_btn = gr.Button("Step 4. Create and burn srt to video")
478
  print(video_in)
479
- translate_and_make_srt_btn.click(create_srt_and_burn, [transcription_and_translation_df,video_in], [
480
- video_out])
481
- video_out.render()
 
 
482
 
483
 
484
  demo.launch()
 
1
  import os
2
+ import requests
3
+ import json
4
+ import base64
5
 
6
  os.system('git clone https://github.com/ggerganov/whisper.cpp.git')
7
  os.system('make -C ./whisper.cpp')
 
 
 
 
8
  os.system('bash ./whisper.cpp/models/download-ggml-model.sh small')
9
  os.system('bash ./whisper.cpp/models/download-ggml-model.sh base')
10
  os.system('bash ./whisper.cpp/models/download-ggml-model.sh medium')
 
17
  #print("MOI")
18
 
19
 
 
20
  import gradio as gr
21
  from pathlib import Path
22
  import pysrt
23
  import pandas as pd
24
  import re
25
  import time
 
 
 
26
 
27
  from pytube import YouTube
28
+ #from transformers import MarianMTModel, MarianTokenizer
29
 
30
  import psutil
31
  num_cores = psutil.cpu_count()
32
  os.environ["OMP_NUM_THREADS"] = f"{num_cores}"
33
  headers = {'Authorization': os.environ['DeepL_API_KEY']}
34
 
35
+
36
+ import torch
37
+
38
  whisper_models = ["base", "small", "medium", "large", "base.en"]
39
 
40
+ custom_models = ["belarus-small"]
41
+
42
+ combined_models = []
43
+ combined_models.extend(whisper_models)
44
+ combined_models.extend(custom_models)
45
+
46
 
47
  LANGUAGES = {
48
+ "en": "English",
49
+ "zh": "Chinese",
50
+ "de": "German",
51
+ "es": "Spanish",
52
+ "ru": "Russian",
53
+ "ko": "Korean",
54
+ "fr": "French",
55
+ "ja": "Japanese",
56
+ "pt": "Portuguese",
57
+ "tr": "Turkish",
58
+ "pl": "Polish",
59
+ "ca": "Catalan",
60
+ "nl": "Dutch",
61
+ "ar": "Arabic",
62
+ "sv": "Swedish",
63
+ "it": "Italian",
64
+ "id": "Indonesian",
65
+ "hi": "Hindi",
66
+ "fi": "Finnish",
67
+ "vi": "Vietnamese",
68
+ "he": "Hebrew",
69
+ "uk": "Ukrainian",
70
+ "el": "Greek",
71
+ "ms": "Malay",
72
+ "cs": "Czech",
73
+ "ro": "Romanian",
74
+ "da": "Danish",
75
+ "hu": "Hungarian",
76
+ "ta": "Tamil",
77
+ "no": "Norwegian",
78
+ "th": "Thai",
79
+ "ur": "Urdu",
80
+ "hr": "Croatian",
81
+ "bg": "Bulgarian",
82
+ "lt": "Lithuanian",
83
+ "la": "Latin",
84
+ "mi": "Maori",
85
+ "ml": "Malayalam",
86
+ "cy": "Welsh",
87
+ "sk": "Slovak",
88
+ "te": "Telugu",
89
+ "fa": "Persian",
90
+ "lv": "Latvian",
91
+ "bn": "Bengali",
92
+ "sr": "Serbian",
93
+ "az": "Azerbaijani",
94
+ "sl": "Slovenian",
95
+ "kn": "Kannada",
96
+ "et": "Estonian",
97
+ "mk": "Macedonian",
98
+ "br": "Breton",
99
+ "eu": "Basque",
100
+ "is": "Icelandic",
101
+ "hy": "Armenian",
102
+ "ne": "Nepali",
103
+ "mn": "Mongolian",
104
+ "bs": "Bosnian",
105
+ "kk": "Kazakh",
106
+ "sq": "Albanian",
107
+ "sw": "Swahili",
108
+ "gl": "Galician",
109
+ "mr": "Marathi",
110
+ "pa": "Punjabi",
111
+ "si": "Sinhala",
112
+ "km": "Khmer",
113
+ "sn": "Shona",
114
+ "yo": "Yoruba",
115
+ "so": "Somali",
116
+ "af": "Afrikaans",
117
+ "oc": "Occitan",
118
+ "ka": "Georgian",
119
+ "be": "Belarusian",
120
+ "tg": "Tajik",
121
+ "sd": "Sindhi",
122
+ "gu": "Gujarati",
123
+ "am": "Amharic",
124
+ "yi": "Yiddish",
125
+ "lo": "Lao",
126
+ "uz": "Uzbek",
127
+ "fo": "Faroese",
128
+ "ht": "Haitian creole",
129
+ "ps": "Pashto",
130
+ "tk": "Turkmen",
131
+ "nn": "Nynorsk",
132
+ "mt": "Maltese",
133
+ "sa": "Sanskrit",
134
+ "lb": "Luxembourgish",
135
+ "my": "Myanmar",
136
+ "bo": "Tibetan",
137
+ "tl": "Tagalog",
138
+ "mg": "Malagasy",
139
+ "as": "Assamese",
140
+ "tt": "Tatar",
141
+ "haw": "Hawaiian",
142
+ "ln": "Lingala",
143
+ "ha": "Hausa",
144
+ "ba": "Bashkir",
145
+ "jw": "Javanese",
146
+ "su": "Sundanese",
147
  }
148
 
149
  # language code lookup by name, with a few language aliases
150
  source_languages = {
151
  **{language: code for code, language in LANGUAGES.items()},
152
+ "Burmese": "my",
153
+ "Valencian": "ca",
154
+ "Flemish": "nl",
155
+ "Haitian": "ht",
156
+ "Letzeburgesch": "lb",
157
+ "Pushto": "ps",
158
+ "Panjabi": "pa",
159
+ "Moldavian": "ro",
160
+ "Moldovan": "ro",
161
+ "Sinhalese": "si",
162
+ "Castilian": "es",
163
  "Let the model analyze": "Let the model analyze"
164
  }
165
 
 
194
  }
195
 
196
 
 
197
  transcribe_options = dict(beam_size=3, best_of=3, without_timestamps=False)
198
 
199
 
200
  source_language_list = [key[0] for key in source_languages.items()]
201
  translation_models_list = [key[0] for key in DeepL_language_codes_for_translation.items()]
202
+
203
+
204
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
205
+ print("DEVICE IS: ")
206
+ print(device)
207
 
208
  videos_out_path = Path("./videos_out")
209
  videos_out_path.mkdir(parents=True, exist_ok=True)
 
233
  This space is using c++ implementation by https://github.com/ggerganov/whisper.cpp
234
  """
235
 
236
+ if(video_file_path == None):
237
  raise ValueError("Error no video input")
238
  print(video_file_path)
239
  try:
 
249
  srt_path = str(video_file_path.replace(file_ending, ".wav")) + ".srt"
250
  os.system(f'rm -f {srt_path}')
251
  if selected_source_lang == "Let the model analyze":
252
+ os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -l "auto" -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt')
253
  else:
254
+ if whisper_model in custom_models:
255
+ os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -l {source_languages.get(selected_source_lang)} -m ./converted_models/ggml-{whisper_model}.bin -osrt')
256
+ else:
257
+ os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -l {source_languages.get(selected_source_lang)} -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt')
258
  print("starting whisper done with whisper")
259
  except Exception as e:
260
  raise RuntimeError("Error converting video to audio")
 
302
 
303
  def translate_transcriptions(df, selected_translation_lang_2):
304
  if selected_translation_lang_2 is None:
305
+ selected_translation_lang_2 = 'English'
306
  df.reset_index(inplace=True)
307
 
308
  print("start_translation")
 
321
  'tag_spitting': 'xml',
322
  'target_lang': DeepL_language_codes_for_translation.get(selected_translation_lang_2)
323
  }
324
+ try:
325
+ response = requests.post('https://api-free.deepl.com/v2/translate', headers=headers, data=data)
 
 
 
 
 
326
 
327
+ # Print the response from the server
328
+ translated_sentences = json.loads(response.text)
329
+ translated_sentences = translated_sentences['translations'][0]['text'].split('\n')
330
+ df['translation'] = translated_sentences
331
+ except Exception as e:
332
+ print("EXCEPTION WITH DEEPL API")
333
+ print(e)
334
+ df['translation'] = df['text']
335
+
336
  print("translations done")
337
 
338
+ print("Starting SRT-file creation")
339
+ print(df.head())
340
+ df.reset_index(inplace=True)
341
+ with open('subtitles.vtt','w', encoding="utf-8") as file:
342
+ print("Starting WEBVTT-file creation")
343
+
344
+ for i in range(len(df)):
345
+ if i == 0:
346
+ file.write('WEBVTT')
347
+ file.write('\n')
348
+
349
+ else:
350
+ file.write(str(i+1))
351
+ file.write('\n')
352
+ start = df.iloc[i]['start']
353
+
354
+
355
+ file.write(f"{start.strip()}")
356
+
357
+ stop = df.iloc[i]['end']
358
+
359
+
360
+ file.write(' --> ')
361
+ file.write(f"{stop}")
362
+ file.write('\n')
363
+ file.writelines(df.iloc[i]['translation'])
364
+ if int(i) != len(df)-1:
365
+ file.write('\n\n')
366
 
367
+ print("WEBVTT DONE")
368
 
369
+ with open('subtitles.srt','w', encoding="utf-8") as file:
370
+ print("Starting SRT-file creation")
371
 
 
 
 
 
 
 
372
  for i in range(len(df)):
373
  file.write(str(i+1))
374
  file.write('\n')
375
  start = df.iloc[i]['start']
376
 
377
+
378
+ file.write(f"{start.strip()}")
 
379
 
380
  stop = df.iloc[i]['end']
381
 
 
387
  if int(i) != len(df)-1:
388
  file.write('\n\n')
389
 
390
+ print("SRT DONE")
391
+ subtitle_files = ['subtitles.vtt','subtitles.srt']
392
+
393
+ return df, subtitle_files
394
+
395
+ # def burn_srt_to_video(srt_file, video_in):
396
+
397
+ # print("Starting creation of video wit srt")
398
+
399
+ # try:
400
+ # video_out = video_in.replace('.mp4', '_out.mp4')
401
+ # print(os.system('ls -lrth'))
402
+ # print(video_in)
403
+ # print(video_out)
404
+ # command = 'ffmpeg -i "{}" -y -vf subtitles=./subtitles.srt "{}"'.format(video_in, video_out)
405
+ # os.system(command)
406
 
407
+ # return video_out
 
 
 
 
 
 
 
408
 
409
+ # except Exception as e:
410
+ # print(e)
411
+ # return video_out
412
+
413
+ def create_video_player(subtitle_files, video_in):
414
+
415
+ with open(video_in, "rb") as file:
416
+ video_base64 = base64.b64encode(file.read())
417
+ with open('./subtitles.vtt', "rb") as file:
418
+ subtitle_base64 = base64.b64encode(file.read())
419
+
420
+ video_player = f'''<video id="video" controls preload="metadata">
421
+ <source src="data:video/mp4;base64,{str(video_base64)[2:-1]}" type="video/mp4" />
422
+ <track
423
+ label="English"
424
+ kind="subtitles"
425
+ srclang="en"
426
+ src="data:text/vtt;base64,{str(subtitle_base64)[2:-1]}"
427
+ default />
428
+ </video>
429
+ '''
430
+ #video_player = gr.HTML(video_player)
431
+ return video_player
432
+
433
+
434
 
435
 
436
  # ---- Gradio Layout -----
 
440
 
441
 
442
 
443
+ df_init = pd.DataFrame(columns=['start','end','text', 'translation'])
444
 
445
  selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="Let the model analyze", label="Spoken language in video", interactive=True)
446
  selected_translation_lang_2 = gr.Dropdown(choices=translation_models_list, type="value", value="English", label="In which language you want the transcriptions?", interactive=True)
 
449
  transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
450
  transcription_and_translation_df = gr.DataFrame(value=df_init,label="Transcription and translation dataframe", max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
451
 
452
+ subtitle_files = gr.File(
453
+ label="Download srt-file",
454
+ file_count="multiple",
455
+ type="file",
456
+ interactive=False,
457
+ )
458
+
459
+ video_player = gr.HTML('<p>video will be played here after you press the button at step 4')
460
+
461
 
462
  demo = gr.Blocks(css='''
463
  #cut_btn, #reset_btn { align-self:stretch; }
 
527
  ##### Here you will can translate transcriptions to 26 languages.
528
  ##### If spoken language is not in the list, translation might not work. In this case original transcriptions are used
529
  ##### ''')
530
+ selected_translation_lang_2.render()
531
  translate_transcriptions_button = gr.Button("Step 3. Translate transcription")
532
+ translate_transcriptions_button.click(translate_transcriptions, [transcription_df, selected_translation_lang_2], [transcription_and_translation_df, subtitle_files])
533
  transcription_and_translation_df.render()
534
+
535
+ with gr.Row():
536
+ with gr.Column():
537
+ gr.Markdown('''##### From here you can download the srt-file ''')
538
+ subtitle_files.render()
539
 
540
  with gr.Row():
541
  with gr.Column():
542
  gr.Markdown('''
543
  ##### Now press the Step 4. Button to create output video with translated transcriptions
544
  ##### ''')
545
+ create_video_button = gr.Button("Step 4. Create and add subtitles to video")
546
  print(video_in)
547
+ create_video_button.click(create_video_player, [subtitle_files,video_in], [
548
+ video_player])
549
+ video_player.render()
550
+
551
+
552
 
553
 
554
  demo.launch()