Spaces:

mikegarts
/

subs

Runtime error

App Files Files Community

mikegarts commited on Jan 25, 2023

Commit

665f810

•

1 Parent(s): 737295b

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -11

app.py CHANGED Viewed

@@ -21,6 +21,9 @@ import pandas as pd
 import re
 import time
 from pytube import YouTube
 import torch
@@ -28,11 +31,13 @@ INTRO_MSG = '''
 #### <p>There are many not very widely spoken languages for which it is quite hard to find learning materials,
 especially well dubbed videos (target language video with target language subs).
 This tool will hopefully transcribe and add subs to your videos.
-At least for me this is a nice tool to practice both listening and reading skills.
 <p>Speech Recognition is based on models from OpenAI Whisper - https://github.com/openai/whisper
 <p> This space is using the c++ implementation by https://github.com/ggerganov/whisper.cpp
 '''
 whisper_models = MODELS_TO_DOWNLOAD #["medium"]#["base", "small", "medium", "large", "base.en"]
 custom_models = []
@@ -42,6 +47,104 @@ combined_models.extend(custom_models)
 LANGUAGES = {
     "bg": "Bulgarian",
 }
 # language code lookup by name, with a few language aliases
@@ -60,7 +163,27 @@ def get_youtube(video_url):
     print(f"Download complete - {abs_video_path}")
     return abs_video_path
-def speech_to_text(video_file_path, selected_source_lang, whisper_model):
     """
     Speech Recognition is based on models from OpenAI Whisper https://github.com/openai/whisper
     This space is using c++ implementation by https://github.com/ggerganov/whisper.cpp
@@ -90,8 +213,9 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
     try:
         print("starting whisper c++")
         os.system(f'rm -f {srt_path}')
-        print('Running regular model')
-        os.system(f'./whisper.cpp/main "{input_wav_file}" -t {os.cpu_count()} -l {source_languages.get(selected_source_lang)} -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt -ovtt')
         print("whisper c++ finished")
     except Exception as e:
         raise RuntimeError("Error running Whisper cpp model")
@@ -149,6 +273,7 @@ subtitle_files = gr.File(
 video_player = gr.HTML('<p>video will be played here')
 eventslider = gr.Slider(visible=False)
 status_msg = gr.Markdown('Status')
 demo = gr.Blocks()
 demo.encrypt = False
@@ -157,24 +282,26 @@ def set_app_msg(app_state, msg):
     app_state['status_msg'] = msg
 def transcribe(app_state, youtube_url_in, selected_source_lang, selected_whisper_model):
     set_app_msg(app_state, 'Downloading the movie ...')
     video_file_path = get_youtube(youtube_url_in)
     set_app_msg(app_state, f'Running the speech to text model {selected_source_lang}/{selected_whisper_model}. This can take some time.')
-    subtitle_files = speech_to_text(video_file_path, selected_source_lang, selected_whisper_model)
     set_app_msg(app_state, f'Creating the video player ...')
     video_player = create_video_player(subtitle_files, video_file_path)
-    set_app_msg(app_state, f'Transcribing done, generating video player ...')
     return subtitle_files, video_player
 def on_change_event(app_state):
-    print('Running!')
-    return app_state['status_msg']
 with demo:
     app_state = gr.State({
-                          'running':False,
-                          'status_msg': ''
                          })
     with gr.Row():
@@ -196,12 +323,13 @@ with demo:
             eventslider.render()
             status_msg.render()
             subtitle_files.render()
             video_player.render()
     with gr.Row():
         gr.Markdown('This app is based on [this code](https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles/tree/main) by RASMUS.')
-    dep = demo.load(on_change_event, inputs=[app_state], outputs=[status_msg], every=10)
 #### RUN ###

 import re
 import time
+import subprocess
+import shlex
 from pytube import YouTube
 import torch
 #### <p>There are many not very widely spoken languages for which it is quite hard to find learning materials,
 especially well dubbed videos (target language video with target language subs).
 This tool will hopefully transcribe and add subs to your videos.
+At least for me this is a nice tool to practice both listening and reading skills.
+This is a 'one-click' variant of similar spaces found here on the HF hub.
 <p>Speech Recognition is based on models from OpenAI Whisper - https://github.com/openai/whisper
 <p> This space is using the c++ implementation by https://github.com/ggerganov/whisper.cpp
 '''
 whisper_models = MODELS_TO_DOWNLOAD #["medium"]#["base", "small", "medium", "large", "base.en"]
 custom_models = []
 LANGUAGES = {
     "bg": "Bulgarian",
+    "en": "English",
+    "zh": "Chinese",
+    "de": "German",
+    "es": "Spanish",
+    "ru": "Russian",
+    "ko": "Korean",
+    "fr": "French",
+    "ja": "Japanese",
+    "pt": "Portuguese",
+    "tr": "Turkish",
+    "pl": "Polish",
+    "ca": "Catalan",
+    "nl": "Dutch",
+    "ar": "Arabic",
+    "sv": "Swedish",
+    "it": "Italian",
+    "id": "Indonesian",
+    "hi": "Hindi",
+    "fi": "Finnish",
+    "vi": "Vietnamese",
+    "he": "Hebrew",
+    "uk": "Ukrainian",
+    "el": "Greek",
+    "ms": "Malay",
+    "cs": "Czech",
+    "ro": "Romanian",
+    "da": "Danish",
+    "hu": "Hungarian",
+    "ta": "Tamil",
+    "no": "Norwegian",
+    "th": "Thai",
+    "ur": "Urdu",
+    "hr": "Croatian",
+    "lt": "Lithuanian",
+    "la": "Latin",
+    "mi": "Maori",
+    "ml": "Malayalam",
+    "cy": "Welsh",
+    "sk": "Slovak",
+    "te": "Telugu",
+    "fa": "Persian",
+    "lv": "Latvian",
+    "bn": "Bengali",
+    "sr": "Serbian",
+    "az": "Azerbaijani",
+    "sl": "Slovenian",
+    "kn": "Kannada",
+    "et": "Estonian",
+    "mk": "Macedonian",
+    "br": "Breton",
+    "eu": "Basque",
+    "is": "Icelandic",
+    "hy": "Armenian",
+    "ne": "Nepali",
+    "mn": "Mongolian",
+    "bs": "Bosnian",
+    "kk": "Kazakh",
+    "sq": "Albanian",
+    "sw": "Swahili",
+    "gl": "Galician",
+    "mr": "Marathi",
+    "pa": "Punjabi",
+    "si": "Sinhala",
+    "km": "Khmer",
+    "sn": "Shona",
+    "yo": "Yoruba",
+    "so": "Somali",
+    "af": "Afrikaans",
+    "oc": "Occitan",
+    "ka": "Georgian",
+    "be": "Belarusian",
+    "tg": "Tajik",
+    "sd": "Sindhi",
+    "gu": "Gujarati",
+    "am": "Amharic",
+    "yi": "Yiddish",
+    "lo": "Lao",
+    "uz": "Uzbek",
+    "fo": "Faroese",
+    "ht": "Haitian creole",
+    "ps": "Pashto",
+    "tk": "Turkmen",
+    "nn": "Nynorsk",
+    "mt": "Maltese",
+    "sa": "Sanskrit",
+    "lb": "Luxembourgish",
+    "my": "Myanmar",
+    "bo": "Tibetan",
+    "tl": "Tagalog",
+    "mg": "Malagasy",
+    "as": "Assamese",
+    "tt": "Tatar",
+    "haw": "Hawaiian",
+    "ln": "Lingala",
+    "ha": "Hausa",
+    "ba": "Bashkir",
+    "jw": "Javanese",
+    "su": "Sundanese",
 }
 # language code lookup by name, with a few language aliases
     print(f"Download complete - {abs_video_path}")
     return abs_video_path
+def run_command(command, app_state):
+    print(command)
+    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
+    while process.poll() is None:
+        time.sleep(5)
+        output = process.stdout.readline()
+        if output == '' and process.poll() is not None:
+            break
+        if output:
+            decoded = output.decode()
+            print(decoded)
+            app_state['output'] += decoded
+    rc = process.poll()
+    print(f'{cmd} ret code is {rc}')
+    return rc
+def speech_to_text(video_file_path,
+                   selected_source_lang,
+                   whisper_model,
+                   app_state):
     """
     Speech Recognition is based on models from OpenAI Whisper https://github.com/openai/whisper
     This space is using c++ implementation by https://github.com/ggerganov/whisper.cpp
     try:
         print("starting whisper c++")
         os.system(f'rm -f {srt_path}')
+        run_command(f'./whisper.cpp/main "{input_wav_file}" -t {os.cpu_count()} -l {source_languages.get(selected_source_lang)} -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt -ovtt',
+                   app_state)
+#         os.system(f'./whisper.cpp/main "{input_wav_file}" -t {os.cpu_count()} -l {source_languages.get(selected_source_lang)} -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt -ovtt')
         print("whisper c++ finished")
     except Exception as e:
         raise RuntimeError("Error running Whisper cpp model")
 video_player = gr.HTML('<p>video will be played here')
 eventslider = gr.Slider(visible=False)
 status_msg = gr.Markdown('Status')
+output_label = gr.Textbox('', interactive=False, show_label=False)
 demo = gr.Blocks()
 demo.encrypt = False
     app_state['status_msg'] = msg
 def transcribe(app_state, youtube_url_in, selected_source_lang, selected_whisper_model):
+    app_state['output'] = ''
     set_app_msg(app_state, 'Downloading the movie ...')
     video_file_path = get_youtube(youtube_url_in)
     set_app_msg(app_state, f'Running the speech to text model {selected_source_lang}/{selected_whisper_model}. This can take some time.')
+    subtitle_files = speech_to_text(video_file_path, selected_source_lang, selected_whisper_model, app_state)
     set_app_msg(app_state, f'Creating the video player ...')
     video_player = create_video_player(subtitle_files, video_file_path)
+    set_app_msg(app_state, f'Transcribing done, generating video player')
     return subtitle_files, video_player
 def on_change_event(app_state):
+    print(f'Running! {app_state}')
+    return app_state['status_msg'], app_state['output']
 with demo:
     app_state = gr.State({
+                          'running': False,
+                          'status_msg': '',
+                          'output': ''
                          })
     with gr.Row():
             eventslider.render()
             status_msg.render()
+            output_label.render()
             subtitle_files.render()
             video_player.render()
     with gr.Row():
         gr.Markdown('This app is based on [this code](https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles/tree/main) by RASMUS.')
+    dep = demo.load(on_change_event, inputs=[app_state], outputs=[status_msg, output_label], every=10)
 #### RUN ###