RASMUS commited on
Commit
ed211c4
1 Parent(s): c41b9dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -23
app.py CHANGED
@@ -34,6 +34,12 @@ combined_models = []
34
  combined_models.extend(whisper_models)
35
  combined_models.extend(custom_models)
36
 
 
 
 
 
 
 
37
 
38
  LANGUAGES = {
39
  "en": "English",
@@ -217,7 +223,7 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
217
  1. Download youtube video with a given url
218
  2. Watch it in the first video component
219
  3. Run automatic speech recognition on the video using fast Whisper models
220
- 4. Translate the recognized transcriptions to 26 languages supported by deepL
221
  5. Download generated subtitles in .vtt and .srt formats
222
  6. Watch the the original video with generated subtitles
223
 
@@ -229,13 +235,19 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
229
  raise ValueError("Error no video input")
230
  print(video_file_path)
231
  try:
 
 
 
232
  _,file_ending = os.path.splitext(f'{video_file_path}')
233
  print(f'file enging is {file_ending}')
234
  print("starting conversion to wav")
235
  os.system(f'ffmpeg -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{video_file_path.replace(file_ending, ".wav")}"')
236
  print("conversion to wav ready")
 
 
 
237
 
238
-
239
 
240
  print("starting whisper c++")
241
  srt_path = str(video_file_path.replace(file_ending, ".wav")) + ".srt"
@@ -249,7 +261,7 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
249
  os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -l {source_languages.get(selected_source_lang)} -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt')
250
  print("starting whisper done with whisper")
251
  except Exception as e:
252
- raise RuntimeError("Error converting video to audio")
253
 
254
  try:
255
 
@@ -283,12 +295,27 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
283
  }
284
 
285
  df = pd.concat([df, pd.DataFrame(srt_to_df)])
 
 
 
286
 
287
-
288
- return df
 
 
 
 
 
 
 
289
 
290
  except Exception as e:
291
- raise RuntimeError("Error Running inference with local model", e)
 
 
 
 
 
292
 
293
 
294
  def translate_transcriptions(df, selected_translation_lang_2):
@@ -316,20 +343,24 @@ def translate_transcriptions(df, selected_translation_lang_2):
316
 
317
  usage = requests.get('https://api-free.deepl.com/v2/usage', headers=headers)
318
  usage = json.loads(usage.text)
 
319
  try:
320
- print('Usage is at: ' + str(usage['character_count']) + 'characters')
321
  except Exception as e:
322
  print(e)
323
 
324
- if usage['character_count'] >= 490000:
325
- print("USAGE CLOSE TO LIMIT")
326
-
327
- response = requests.post('https://api-free.deepl.com/v2/translate', headers=headers, data=data)
328
 
329
- # Print the response from the server
330
- translated_sentences = json.loads(response.text)
331
- translated_sentences = translated_sentences['translations'][0]['text'].split('\n')
332
- df['translation'] = translated_sentences
 
 
 
 
333
  except Exception as e:
334
  print("EXCEPTION WITH DEEPL API")
335
  print(e)
@@ -391,7 +422,7 @@ def translate_transcriptions(df, selected_translation_lang_2):
391
 
392
  print("SRT DONE")
393
  subtitle_files = ['subtitles.vtt','subtitles.srt']
394
-
395
  return df, subtitle_files
396
 
397
  # def burn_srt_to_video(srt_file, video_in):
@@ -467,6 +498,10 @@ demo = gr.Blocks(css='''
467
  .output-markdown {max-width: 65ch !important;}
468
  ''')
469
  demo.encrypt = False
 
 
 
 
470
  with demo:
471
  transcription_var = gr.Variable()
472
 
@@ -484,8 +519,9 @@ with demo:
484
 
485
  with gr.Column():
486
  gr.Markdown('''
487
- ### 1. Copy any Youtube video URL to box below
488
- (But please **consider using short videos** so others won't get queued) or click one of the examples and then press button "1. Download Youtube video"-button:
 
489
  ''')
490
  examples = gr.Examples(examples=
491
  [ "https://www.youtube.com/watch?v=nlMuHtV82q8&ab_channel=NothingforSale24",
@@ -509,13 +545,13 @@ with demo:
509
  with gr.Column():
510
  gr.Markdown('''
511
  ##### Here you can start the transcription and translation process.
512
- ##### Be aware that processing will last some time. With base model it is around 3x speed
513
- ##### **Please select source language** for better transcriptions. Using 'Let the model analyze' makes mistakes sometimes and may lead to bad transcriptions
514
  ''')
515
  selected_source_lang.render()
516
  selected_whisper_model.render()
517
  transcribe_btn = gr.Button("Step 2. Transcribe audio")
518
- transcribe_btn.click(speech_to_text, [video_in, selected_source_lang, selected_whisper_model], transcription_df)
519
 
520
 
521
  with gr.Row():
@@ -530,9 +566,15 @@ with demo:
530
  with gr.Row():
531
  with gr.Column():
532
  gr.Markdown('''
533
- ##### PLEASE READ BELOW
534
  Here you will can translate transcriptions to 26 languages.
535
- If spoken language is not in the list, translation might not work. In this case original transcriptions are used
 
 
 
 
 
 
536
  ''')
537
  selected_translation_lang_2.render()
538
  translate_transcriptions_button = gr.Button("Step 3. Translate transcription")
 
34
  combined_models.extend(whisper_models)
35
  combined_models.extend(custom_models)
36
 
37
+ usage = requests.get('https://api-free.deepl.com/v2/usage', headers=headers)
38
+ usage = json.loads(usage.text)
39
+ deepL_character_usage = str(usage['character_count'])
40
+ print("deepL_character_usage")
41
+
42
+
43
 
44
  LANGUAGES = {
45
  "en": "English",
 
223
  1. Download youtube video with a given url
224
  2. Watch it in the first video component
225
  3. Run automatic speech recognition on the video using fast Whisper models
226
+ 4. Translate the recognized transcriptions to 26 languages supported by deepL (If free API usage for the month is not yet fully consumed)
227
  5. Download generated subtitles in .vtt and .srt formats
228
  6. Watch the the original video with generated subtitles
229
 
 
235
  raise ValueError("Error no video input")
236
  print(video_file_path)
237
  try:
238
+
239
+
240
+
241
  _,file_ending = os.path.splitext(f'{video_file_path}')
242
  print(f'file enging is {file_ending}')
243
  print("starting conversion to wav")
244
  os.system(f'ffmpeg -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{video_file_path.replace(file_ending, ".wav")}"')
245
  print("conversion to wav ready")
246
+
247
+ except Exception as e:
248
+ raise RuntimeError("Error Running inference with local model", e)
249
 
250
+ try:
251
 
252
  print("starting whisper c++")
253
  srt_path = str(video_file_path.replace(file_ending, ".wav")) + ".srt"
 
261
  os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -l {source_languages.get(selected_source_lang)} -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt')
262
  print("starting whisper done with whisper")
263
  except Exception as e:
264
+ raise RuntimeError("Error running Whisper cpp model")
265
 
266
  try:
267
 
 
295
  }
296
 
297
  df = pd.concat([df, pd.DataFrame(srt_to_df)])
298
+ except Exception as e:
299
+ print("Error creating srt df")
300
+
301
 
302
+ try:
303
+ usage = requests.get('https://api-free.deepl.com/v2/usage', headers=headers)
304
+ usage = json.loads(usage.text)
305
+ char_count = str(usage['character_count'])
306
+
307
+ print('Usage is at: ' + str(usage['character_count']) + ' characters')
308
+
309
+ if usage['character_count'] >= 490000:
310
+ print("USAGE CLOSE TO LIMIT")
311
 
312
  except Exception as e:
313
+ print('Error with DeepL API requesting usage count')
314
+
315
+
316
+ return df
317
+
318
+
319
 
320
 
321
  def translate_transcriptions(df, selected_translation_lang_2):
 
343
 
344
  usage = requests.get('https://api-free.deepl.com/v2/usage', headers=headers)
345
  usage = json.loads(usage.text)
346
+ deepL_character_usage = str(usage['character_count'])
347
  try:
348
+ print('Usage is at: ' + deepL_character_usage + 'characters')
349
  except Exception as e:
350
  print(e)
351
 
352
+ if int(deepL_character_usage) <= 490000:
353
+ print("STILL CHARACTERS LEFT")
354
+ response = requests.post('https://api-free.deepl.com/v2/translate', headers=headers, data=data)
 
355
 
356
+ # Print the response from the server
357
+ translated_sentences = json.loads(response.text)
358
+ translated_sentences = translated_sentences['translations'][0]['text'].split('\n')
359
+ df['translation'] = translated_sentences
360
+
361
+ else:
362
+ df['translation'] = df['text']
363
+
364
  except Exception as e:
365
  print("EXCEPTION WITH DEEPL API")
366
  print(e)
 
422
 
423
  print("SRT DONE")
424
  subtitle_files = ['subtitles.vtt','subtitles.srt']
425
+
426
  return df, subtitle_files
427
 
428
  # def burn_srt_to_video(srt_file, video_in):
 
498
  .output-markdown {max-width: 65ch !important;}
499
  ''')
500
  demo.encrypt = False
501
+
502
+
503
+
504
+
505
  with demo:
506
  transcription_var = gr.Variable()
507
 
 
519
 
520
  with gr.Column():
521
  gr.Markdown('''
522
+ ### 1. Copy any non-private Youtube video URL to box below or click one of the examples.
523
+ (But please **consider using short videos** so others won't get queued) <br>
524
+ Then press button "1. Download Youtube video"-button:
525
  ''')
526
  examples = gr.Examples(examples=
527
  [ "https://www.youtube.com/watch?v=nlMuHtV82q8&ab_channel=NothingforSale24",
 
545
  with gr.Column():
546
  gr.Markdown('''
547
  ##### Here you can start the transcription and translation process.
548
+ Be aware that processing will last some time. With base model it is around 3x speed
549
+ **Please select source language** for better transcriptions. Using 'Let the model analyze' makes mistakes sometimes and may lead to bad transcriptions
550
  ''')
551
  selected_source_lang.render()
552
  selected_whisper_model.render()
553
  transcribe_btn = gr.Button("Step 2. Transcribe audio")
554
+ transcribe_btn.click(speech_to_text, [video_in, selected_source_lang, selected_whisper_model], [transcription_df])
555
 
556
 
557
  with gr.Row():
 
566
  with gr.Row():
567
  with gr.Column():
568
  gr.Markdown('''
569
+ ### PLEASE READ BELOW
570
  Here you will can translate transcriptions to 26 languages.
571
+ If spoken language is not in the list, translation might not work. In this case original transcriptions are used.
572
+ ''')
573
+ gr.Markdown(f'''
574
+ DeepL API character usage:
575
+ {deepL_character_usage if deepL_character_usage is not None else ''}/500 000 characters
576
+ If usage is over 490 000 characters original transcriptions will be used for subtitles.
577
+ API usage resets on 5th of every month.
578
  ''')
579
  selected_translation_lang_2.render()
580
  translate_transcriptions_button = gr.Button("Step 3. Translate transcription")