Spaces:

RASMUS
/

Youtube-videos-with-crosslingual-transcriptions

Build error

RASMUS commited on Dec 21, 2022

Commit

24020bb

1 Parent(s): 07903ab

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,11 +32,13 @@ print(device)
 asr_model_base = whisper.load_model("base", device=device)
 asr_model_small = whisper.load_model("small", device=device)
-whisper_models = {
     'base': asr_model_base,
     'small': asr_model_small
 }
 transcribe_options = dict(beam_size=3, best_of=3, without_timestamps=False)
 source_languages  = {
@@ -189,7 +191,7 @@ def get_youtube(video_url):
     return abs_video_path
-async def speech_to_text(video_file_path, selected_translation_lang, whisper_model):
     """
     # Youtube with translated subtitles using OpenAI Whisper and Opus-MT models.
     # Currently supports only English audio
@@ -216,14 +218,10 @@ async def speech_to_text(video_file_path, selected_translation_lang, whisper_mod
     try:
         print(f'Transcribing via local model')
-        transcribe_options = dict(beam_size=5, best_of=5, without_timestamps=False)
-        transcription = asr_model.transcribe(audio, **transcribe_options)
-        #translation_options = dict(language=selected_translation_lang, beam_size=5, best_of=5, without_timestamps=False)
-        #translations = asr_model.transcribe(audio, **translation_options)
         df = pd.DataFrame(columns=['start','end','text'])

 asr_model_base = whisper.load_model("base", device=device)
 asr_model_small = whisper.load_model("small", device=device)
+whisper_models_dict = {
     'base': asr_model_base,
     'small': asr_model_small
 }
+whisper_models = ["base", "small"]
 transcribe_options = dict(beam_size=3, best_of=3, without_timestamps=False)
 source_languages  = {
     return abs_video_path
+def speech_to_text(video_file_path, selected_translation_lang, whisper_model):
     """
     # Youtube with translated subtitles using OpenAI Whisper and Opus-MT models.
     # Currently supports only English audio
     try:
         print(f'Transcribing via local model')
+        transcribe_options = dict(beam_size=5, best_of=5, without_timestamps=False)
+        transcription = whisper_models_dict.get(whisper_model).transcribe(audio, **transcribe_options)
         df = pd.DataFrame(columns=['start','end','text'])