Spaces:

k1ngtai
/

MMS

Runtime error

App Files Files Community

k1ngtai commited on Dec 26, 2023

Commit

5361747

1 Parent(s): 4f3218b

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -47

app.py CHANGED Viewed

@@ -1,54 +1,41 @@
 import gradio as gr
 import librosa
-from asr import transcribe
-from tts import synthesize, TTS_EXAMPLES
-ALL_LANGUAGES = {}
-for task in ["asr", "tts", "lid"]:
-    ALL_LANGUAGES.setdefault(task, {})
-    with open(f"data/{task}/all_langs.tsv") as f:
-        for line in f:
-            iso, name = line.split(" ", 1)
-            ALL_LANGUAGES[task][iso] = name
-def identify(microphone, file_upload):
-    LID_SAMPLING_RATE = 16_000
-    warn_output = ""
-    if (microphone is not None) and (file_upload is not None):
-        warn_output = (
-            "WARNING: You've uploaded an audio file and used the microphone. "
-            "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
-        )
-    elif (microphone is None) and (file_upload is None):
-        return "ERROR: You have to either use the microphone or upload an audio file"
-    audio_fp = microphone if microphone is not None else file_upload
-    inputs = librosa.load(audio_fp, sr=LID_SAMPLING_RATE, mono=True)[0]
-    raw_output = {"eng": 0.9, "hin": 0.04, "heb": 0.03, "ara": 0.02, "fra": 0.01}
-    return {(k + ": " + ALL_LANGUAGES["lid"][k]): v for k, v in raw_output.items()}
 demo = gr.Blocks()
 mms_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
-        gr.Audio(source="microphone", type="filepath"),
-        gr.Audio(source="upload", type="filepath"),
         gr.Dropdown(
-            [f"{k}: {v}" for k, v in ALL_LANGUAGES["asr"].items()],
             label="Language",
-            value="shn: Shan",
         ),
     ],
     outputs="text",
     title="Speech-to-text",
-    description=("Transcribe audio!"),
     allow_flagging="never",
 )
@@ -57,9 +44,9 @@ mms_synthesize = gr.Interface(
     inputs=[
         gr.Text(label="Input text"),
         gr.Dropdown(
-            [f"{k}: {v}" for k, v in ALL_LANGUAGES["tts"].items()],
             label="Language",
-            value="shn: Shan",
         ),
         gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
     ],
@@ -69,26 +56,77 @@ mms_synthesize = gr.Interface(
     ],
     examples=TTS_EXAMPLES,
     title="Text-to-speech",
-    description=("Generate audio!"),
     allow_flagging="never",
 )
 mms_identify = gr.Interface(
     fn=identify,
     inputs=[
-        gr.Audio(source="microphone", type="filepath"),
-        gr.Audio(source="upload", type="filepath"),
     ],
     outputs=gr.Label(num_top_classes=10),
     title="Language Identification",
-    description=("Identity the language of audio!"),
     allow_flagging="never",
 )
-with demo:
-    gr.TabbedInterface(
-        [mms_transcribe, mms_synthesize, mms_identify],
-        ["Speech-to-text", "Text-to-speech", "Language Identification"],
     )
-demo.launch()

 import gradio as gr
 import librosa
+from asr import transcribe, ASR_EXAMPLES, ASR_LANGUAGES, ASR_NOTE
+from tts import synthesize, TTS_EXAMPLES, TTS_LANGUAGES
+from lid import identify, LID_EXAMPLES
 demo = gr.Blocks()
+mms_select_source_trans = gr.Radio(
+    ["Record from Mic", "Upload audio"],
+    label="Audio input",
+    value="Record from Mic",
+)
+mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
+mms_upload_source_trans = gr.Audio(
+    source="upload", type="filepath", label="Upload file", visible=False
+)
 mms_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
+        mms_select_source_trans,
+        mms_mic_source_trans,
+        mms_upload_source_trans,
         gr.Dropdown(
+            [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
             label="Language",
+            value="eng English",
         ),
+        # gr.Checkbox(label="Use Language Model (if available)", default=True),
     ],
     outputs="text",
+    examples=ASR_EXAMPLES,
     title="Speech-to-text",
+    description=(
+        "Transcribe audio from a microphone or input file in your desired language."
+    ),
+    article=ASR_NOTE,
     allow_flagging="never",
 )
     inputs=[
         gr.Text(label="Input text"),
         gr.Dropdown(
+            [f"{k} ({v})" for k, v in TTS_LANGUAGES.items()],
             label="Language",
+            value="eng English",
         ),
         gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
     ],
     ],
     examples=TTS_EXAMPLES,
     title="Text-to-speech",
+    description=("Generate audio in your desired language from input text."),
     allow_flagging="never",
 )
+mms_select_source_iden = gr.Radio(
+    ["Record from Mic", "Upload audio"],
+    label="Audio input",
+    value="Record from Mic",
+)
+mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
+mms_upload_source_iden = gr.Audio(
+    source="upload", type="filepath", label="Upload file", visible=False
+)
 mms_identify = gr.Interface(
     fn=identify,
     inputs=[
+        mms_select_source_iden,
+        mms_mic_source_iden,
+        mms_upload_source_iden,
     ],
     outputs=gr.Label(num_top_classes=10),
+    examples=LID_EXAMPLES,
     title="Language Identification",
+    description=("Identity the language of input audio."),
     allow_flagging="never",
 )
+tabbed_interface = gr.TabbedInterface(
+    [mms_transcribe, mms_synthesize, mms_identify],
+    ["Speech-to-text", "Text-to-speech", "Language Identification"],
+)
+with gr.Blocks() as demo:
+    gr.Markdown(
+        "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
+    )
+    gr.HTML(
+        """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos.   </center>"""
+    )
+    gr.HTML(
+        """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true"  style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
+    )
+    tabbed_interface.render()
+    mms_select_source_trans.change(
+        lambda x: [
+            gr.update(visible=True if x == "Record from Mic" else False),
+            gr.update(visible=True if x == "Upload audio" else False),
+        ],
+        inputs=[mms_select_source_trans],
+        outputs=[mms_mic_source_trans, mms_upload_source_trans],
+        queue=False,
     )
+    mms_select_source_iden.change(
+        lambda x: [
+            gr.update(visible=True if x == "Record from Mic" else False),
+            gr.update(visible=True if x == "Upload audio" else False),
+        ],
+        inputs=[mms_select_source_iden],
+        outputs=[mms_mic_source_iden, mms_upload_source_iden],
+        queue=False,
+    )
+    gr.HTML(
+        """
+            <div class="footer" style="text-align:center">
+                <p>
+                    Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face
+                </p>
+            </div>
+           """
+        )
+demo.queue(concurrency_count=3)
+demo.launch()