Maximofn commited on
Commit
1ab31c9
1 Parent(s): ebb4685
Files changed (3) hide show
  1. app.py +80 -86
  2. requirements.txt +0 -11
  3. transcribe.py +5 -1
app.py CHANGED
@@ -26,7 +26,7 @@ if DEVICE == "cpu":
26
  # I supose that I am on huggingface server
27
  SECONDS = 300
28
  else:
29
- SECONDS = 50
30
 
31
  YOUTUBE = "youtube"
32
  TWITCH = "twitch"
@@ -231,8 +231,9 @@ def clear_video_url():
231
  image = gr.Image(visible=visible, scale=1)
232
  source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
233
  target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
 
 
234
  translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
235
- original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
236
  original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
237
  original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
238
  return (
@@ -240,8 +241,8 @@ def clear_video_url():
240
  image,
241
  source_languaje,
242
  target_languaje,
 
243
  translate_button,
244
- original_audio,
245
  original_audio_transcribed,
246
  original_audio_translated,
247
  )
@@ -260,10 +261,9 @@ def is_valid_youtube_url(url):
260
  def is_valid_url(url):
261
  source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
262
  target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
263
- translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=True)
264
- original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
265
- original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=True)
266
- original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=True)
267
  subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
268
 
269
  # Youtube
@@ -275,24 +275,18 @@ def is_valid_url(url):
275
  gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
276
  source_languaje,
277
  target_languaje,
278
- translate_button,
279
- gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
280
- original_audio,
281
- original_audio_transcribed,
282
- original_audio_translated,
283
- subtitled_video
284
  )
285
  else:
286
  return (
287
  gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
288
  source_languaje,
289
  target_languaje,
290
- translate_button,
291
- gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
292
- original_audio,
293
- original_audio_transcribed,
294
- original_audio_translated,
295
- subtitled_video
296
  )
297
 
298
  # Twitch
@@ -301,12 +295,9 @@ def is_valid_url(url):
301
  gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
302
  source_languaje,
303
  target_languaje,
304
- translate_button,
305
- gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
306
- original_audio,
307
- original_audio_transcribed,
308
- original_audio_translated,
309
- subtitled_video
310
  )
311
 
312
  # Error
@@ -314,49 +305,40 @@ def is_valid_url(url):
314
  image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
315
  source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
316
  target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
317
- translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
318
- stream_page = gr.Textbox(value=ERROR, label="Stream page", elem_id="stream_page", visible=visible)
319
- original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
320
- original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
321
- original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
322
- subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
323
  return (
324
  image,
325
  source_languaje,
326
  target_languaje,
327
- translate_button,
328
- stream_page,
329
- original_audio,
330
- original_audio_transcribed,
331
- original_audio_translated,
332
- subtitled_video
333
  )
334
 
335
- def get_audio_and_video_from_video(url, stream_page):
336
  python_file = "download.py"
337
  command = f"python {python_file} {url}"
338
  os.system(command)
339
- # sleep(5)
340
 
341
  audio = "audios/download_audio.mp3"
342
  video = "videos/download_video.mp4"
343
 
344
  return (
345
- gr.Audio(value=audio, label="Original audio", elem_id="original_audio", visible=True, interactive=False),
346
  gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
347
  gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
348
  )
349
 
350
- def trascribe_audio(audio_path, source_languaje):
351
  folder_vocals = "vocals"
352
  folder_chunck = "chunks"
353
- folder_concatenated = "concatenated_transcriptions"
354
  if not os.path.exists(folder_vocals):
355
  os.makedirs(folder_vocals)
356
  if not os.path.exists(folder_chunck):
357
  os.makedirs(folder_chunck)
358
- if not os.path.exists(folder_concatenated):
359
- os.makedirs(folder_concatenated)
360
  python_file = "slice_audio.py"
361
  command = f"python {python_file} {audio_path} {SECONDS}"
362
  os.system(command)
@@ -365,7 +347,14 @@ def trascribe_audio(audio_path, source_languaje):
365
  f.write(str(0))
366
  command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
367
  os.system(command)
 
 
 
 
 
368
 
 
 
369
  python_file = "transcribe.py"
370
  chunck_file = "chunks/output_files.txt"
371
  speakers_file = "vocals/speakers.txt"
@@ -393,6 +382,16 @@ def trascribe_audio(audio_path, source_languaje):
393
  command = f"rm {vocal}"
394
  os.system(command)
395
 
 
 
 
 
 
 
 
 
 
 
396
  python_file = "concat_transcriptions.py"
397
  command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
398
  os.system(command)
@@ -411,8 +410,8 @@ def trascribe_audio(audio_path, source_languaje):
411
  result = f.read()
412
 
413
  return (
414
- result,
415
- gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
416
  )
417
 
418
  def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
@@ -431,7 +430,7 @@ def translate_transcription(original_audio_transcribed_path, source_languaje, ta
431
  os.system(command)
432
 
433
  return (
434
- result,
435
  gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
436
  )
437
 
@@ -458,56 +457,52 @@ def add_translated_subtitles_to_video(original_video_path, original_audio_path,
458
  def subtify():
459
  with gr.Blocks() as demo:
460
  # Layout
 
461
  gr.Markdown("""# Subtify""")
462
- gr.Markdown(f"translate, Python: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
463
- # model = transformers.AutoModel.from_pretrained("huggingface/my_model")
464
- # gr.Markdown(f"model.config.url: {model.config.url}")
465
- token = os.getenv("HF_TOKEN")
466
- if token is not None:
467
- print(token)
468
- gr.Markdown(f"Huggingface token: {token}")
469
- else:
470
- gr.Markdown(f"Huggingface token: None")
471
  with gr.Row(variant="panel"):
472
  url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
473
- copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
474
- delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
475
 
476
- stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
477
  visible = False
478
  with gr.Row(equal_height=False):
479
  image = gr.Image(visible=visible, scale=1)
480
  with gr.Column():
481
  with gr.Row():
482
- source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
483
- target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
 
484
  with gr.Row():
485
  subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
486
 
487
- original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible, interactive=False)
 
 
 
 
 
 
488
  original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
489
  original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
490
- original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
491
  original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
492
- original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
493
  original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
494
  subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
495
 
496
  # Events
497
  # copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
498
- delete_button.click(
499
- fn=clear_video_url,
500
- outputs=[
501
- url_textbox,
502
- image,
503
- source_languaje,
504
- target_languaje,
505
- subtify_button,
506
- original_audio,
507
- original_audio_transcribed,
508
- original_audio_translated,
509
- ]
510
- )
511
  url_textbox.change(
512
  fn=is_valid_url,
513
  inputs=url_textbox,
@@ -515,18 +510,17 @@ def subtify():
515
  image,
516
  source_languaje,
517
  target_languaje,
 
518
  subtify_button,
519
- stream_page,
520
- original_audio,
521
- original_audio_transcribed,
522
- original_audio_translated,
523
- subtitled_video
524
  ]
525
  )
526
- subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox, stream_page], outputs=[original_audio, original_audio_path, original_video_path])
527
- original_audio.change(fn=trascribe_audio, inputs=[original_audio_path, source_languaje], outputs=[original_audio_transcribed, original_audio_transcribed_path])
528
- original_audio_transcribed.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[original_audio_translated, original_audio_translated_path])
529
- original_audio_translated.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video)
 
 
530
 
531
  demo.launch()
532
 
 
26
  # I supose that I am on huggingface server
27
  SECONDS = 300
28
  else:
29
+ SECONDS = 300
30
 
31
  YOUTUBE = "youtube"
32
  TWITCH = "twitch"
 
231
  image = gr.Image(visible=visible, scale=1)
232
  source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
233
  target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
234
+ num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
235
+ number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
236
  translate_button = gr.Button(size="lg", value="translate", min_width="10px", scale=0, visible=visible)
 
237
  original_audio_transcribed = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", interactive=False, visible=visible)
238
  original_audio_translated = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", interactive=False, visible=visible)
239
  return (
 
241
  image,
242
  source_languaje,
243
  target_languaje,
244
+ number_of_speakers,
245
  translate_button,
 
246
  original_audio_transcribed,
247
  original_audio_translated,
248
  )
 
261
  def is_valid_url(url):
262
  source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
263
  target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
264
+ num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
265
+ number_of_speakers = gr.Dropdown(visible=True, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
266
+ subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=True)
 
267
  subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=True, interactive=False)
268
 
269
  # Youtube
 
275
  gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
276
  source_languaje,
277
  target_languaje,
278
+ number_of_speakers,
279
+ subtify_button,
280
+ subtitled_video,
 
 
 
281
  )
282
  else:
283
  return (
284
  gr.Image(value="assets/youtube-no-thumbnails.webp", visible=True, show_download_button=False, container=False),
285
  source_languaje,
286
  target_languaje,
287
+ number_of_speakers,
288
+ subtify_button,
289
+ subtitled_video,
 
 
 
290
  )
291
 
292
  # Twitch
 
295
  gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
296
  source_languaje,
297
  target_languaje,
298
+ number_of_speakers,
299
+ subtify_button,
300
+ subtitled_video,
 
 
 
301
  )
302
 
303
  # Error
 
305
  image = gr.Image(value="assets/youtube_error.webp", visible=visible, show_download_button=False, container=False)
306
  source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True)
307
  target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True)
308
+ number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True)
309
+ subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
310
+ subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False),
 
 
 
311
  return (
312
  image,
313
  source_languaje,
314
  target_languaje,
315
+ number_of_speakers,
316
+ subtify_button,
317
+ subtitled_video,
 
 
 
318
  )
319
 
320
+ def get_audio_and_video_from_video(url):
321
  python_file = "download.py"
322
  command = f"python {python_file} {url}"
323
  os.system(command)
324
+ sleep(5)
325
 
326
  audio = "audios/download_audio.mp3"
327
  video = "videos/download_video.mp4"
328
 
329
  return (
330
+ gr.Textbox(value="Ok", label="Video downloaded", elem_id="video_downloaded", interactive=False, visible=True),
331
  gr.Textbox(value=audio, label="Original audio path", elem_id="original_audio_path", visible=False),
332
  gr.Textbox(value=video, label="Original video path", elem_id="original_video_path", visible=False)
333
  )
334
 
335
+ def slice_audio(audio_path):
336
  folder_vocals = "vocals"
337
  folder_chunck = "chunks"
 
338
  if not os.path.exists(folder_vocals):
339
  os.makedirs(folder_vocals)
340
  if not os.path.exists(folder_chunck):
341
  os.makedirs(folder_chunck)
 
 
342
  python_file = "slice_audio.py"
343
  command = f"python {python_file} {audio_path} {SECONDS}"
344
  os.system(command)
 
347
  f.write(str(0))
348
  command = f"mv {folder_chunck}/*.mp3 {folder_vocals}/"
349
  os.system(command)
350
+ # sleep(5)
351
+
352
+ return (
353
+ gr.Textbox(value="Ok", label="Video sliced", elem_id="video_sliced", interactive=False, visible=True)
354
+ )
355
 
356
+ def trascribe_audio(source_languaje):
357
+ folder_vocals = "vocals"
358
  python_file = "transcribe.py"
359
  chunck_file = "chunks/output_files.txt"
360
  speakers_file = "vocals/speakers.txt"
 
382
  command = f"rm {vocal}"
383
  os.system(command)
384
 
385
+ return (
386
+ gr.Textbox(value="Ok", label="Video transcribed", elem_id="video_transcribed", interactive=False, visible=True)
387
+ )
388
+
389
+ def concatenate_transcriptions():
390
+ folder_concatenated = "concatenated_transcriptions"
391
+ if not os.path.exists(folder_concatenated):
392
+ os.makedirs(folder_concatenated)
393
+ chunck_file = "chunks/output_files.txt"
394
+ speakers_file = "vocals/speakers.txt"
395
  python_file = "concat_transcriptions.py"
396
  command = f"python {python_file} {chunck_file} {SECONDS} {speakers_file}"
397
  os.system(command)
 
410
  result = f.read()
411
 
412
  return (
413
+ gr.Textbox(value="Ok", label="Transcription translated", elem_id="transcription_translated", interactive=False, visible=True),
414
+ gr.Textbox(value=audio_transcribed, label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False),
415
  )
416
 
417
  def translate_transcription(original_audio_transcribed_path, source_languaje, target_languaje):
 
430
  os.system(command)
431
 
432
  return (
433
+ gr.Textbox(value="Ok", label="Video subtitled", elem_id="video_subtitled", interactive=False, visible=True),
434
  gr.Textbox(value=translated_transcription, label="Original audio translated", elem_id="original_audio_translated", visible=False)
435
  )
436
 
 
457
  def subtify():
458
  with gr.Blocks() as demo:
459
  # Layout
460
+ num_speaker = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
461
  gr.Markdown("""# Subtify""")
 
 
 
 
 
 
 
 
 
462
  with gr.Row(variant="panel"):
463
  url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
464
+ # copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
465
+ # delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
466
 
 
467
  visible = False
468
  with gr.Row(equal_height=False):
469
  image = gr.Image(visible=visible, scale=1)
470
  with gr.Column():
471
  with gr.Row():
472
+ source_languaje = gr.Dropdown(visible=visible, label="Source languaje", show_label=True, value="English", choices=language_dict, scale=1, interactive=True, info="Language of the video")
473
+ target_languaje = gr.Dropdown(visible=visible, label="Target languaje", show_label=True, value="Español", choices=language_dict, scale=1, interactive=True, info="Language to translate the subtitles")
474
+ number_of_speakers = gr.Dropdown(visible=visible, label="Number of speakers", show_label=True, value=10, choices=num_speaker, scale=1, interactive=True, info="Number of speakers in the video, if you don't know, select 10")
475
  with gr.Row():
476
  subtify_button = gr.Button(size="lg", value="subtify", min_width="10px", scale=0, visible=visible)
477
 
478
+ with gr.Row():
479
+ video_donwloaded = gr.Textbox(placeholder="Waiting", label="Video downloaded", elem_id="video_downloaded", interactive=False, visible=True)
480
+ video_sliced = gr.Textbox(placeholder="Waiting", label="Video sliced", elem_id="video_sliced", interactive=False, visible=True)
481
+ video_transcribed = gr.Textbox(placeholder="Waiting", label="Video transcribed", elem_id="video_transcribed", interactive=False, visible=True)
482
+ video_translated = gr.Textbox(placeholder="Waiting", label="Transcription translated", elem_id="transcription_translated", interactive=False, visible=True)
483
+ video_subtitled = gr.Textbox(placeholder="Waiting", label="Video subtitled", elem_id="video_subtitled", interactive=False, visible=True)
484
+
485
  original_audio_path = gr.Textbox(label="Original audio path", elem_id="original_audio_path", visible=False)
486
  original_video_path = gr.Textbox(label="Original video path", elem_id="original_video_path", visible=False)
 
487
  original_audio_transcribed_path = gr.Textbox(label="Original audio transcribed", elem_id="original_audio_transcribed", visible=False)
 
488
  original_audio_translated_path = gr.Textbox(label="Original audio translated", elem_id="original_audio_translated", visible=False)
489
  subtitled_video = gr.Video(label="Subtitled video", elem_id="subtitled_video", visible=visible, interactive=False)
490
 
491
  # Events
492
  # copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
493
+ # delete_button.click(
494
+ # fn=clear_video_url,
495
+ # outputs=[
496
+ # url_textbox,
497
+ # image,
498
+ # source_languaje,
499
+ # target_languaje,
500
+ # number_of_speakers,
501
+ # subtify_button,
502
+ # original_audio_transcribed,
503
+ # original_audio_translated,
504
+ # ]
505
+ # )
506
  url_textbox.change(
507
  fn=is_valid_url,
508
  inputs=url_textbox,
 
510
  image,
511
  source_languaje,
512
  target_languaje,
513
+ number_of_speakers,
514
  subtify_button,
515
+ subtitled_video,
 
 
 
 
516
  ]
517
  )
518
+ subtify_button.click(fn=get_audio_and_video_from_video, inputs=[url_textbox], outputs=[video_donwloaded, original_audio_path, original_video_path])
519
+ video_donwloaded.change(fn=slice_audio, inputs=[original_audio_path], outputs=[video_sliced])
520
+ video_sliced.change(fn=trascribe_audio, inputs=[source_languaje], outputs=[video_transcribed])
521
+ video_transcribed.change(fn=concatenate_transcriptions, inputs=[], outputs=[video_translated, original_audio_transcribed_path])
522
+ video_translated.change(fn=translate_transcription, inputs=[original_audio_transcribed_path, source_languaje, target_languaje], outputs=[video_subtitled, original_audio_translated_path])
523
+ video_subtitled.change(fn=add_translated_subtitles_to_video, inputs=[original_video_path, original_audio_path, original_audio_translated_path], outputs=subtitled_video)
524
 
525
  demo.launch()
526
 
requirements.txt CHANGED
@@ -9,18 +9,7 @@ pytube
9
  yt-dlp
10
  twitch-dl
11
 
12
- # # mossformer --> separate speech audios
13
- # torch
14
- # torchvision
15
- # torchaudio
16
- # speechbrain
17
- # soundfile
18
- # modelscope
19
- # rotary-embedding-torch
20
- # transformers
21
-
22
  # Trascribe audios
23
- # git+https://github.com/openai/whisper.git
24
  git+https://github.com/m-bain/whisperx.git
25
  pyannote.audio
26
 
 
9
  yt-dlp
10
  twitch-dl
11
 
 
 
 
 
 
 
 
 
 
 
12
  # Trascribe audios
 
13
  git+https://github.com/m-bain/whisperx.git
14
  pyannote.audio
15
 
transcribe.py CHANGED
@@ -28,7 +28,11 @@ def transcribe(audio_file, language, device, vocals):
28
  model = "large-v2"
29
  # word_timestamps = True
30
  print_progress = False
31
- compute_type = "float32"
 
 
 
 
32
  fp16 = True
33
  batch_size = 8
34
  verbose = False
 
28
  model = "large-v2"
29
  # word_timestamps = True
30
  print_progress = False
31
+ if device == "cpu":
32
+ # I supose that I am on huggingface server
33
+ compute_type = "float32"
34
+ else:
35
+ compute_type = "float16"
36
  fp16 = True
37
  batch_size = 8
38
  verbose = False