import gradio as gr import librosa from asr import transcribe, ASR_EXAMPLES, ASR_LANGUAGES, ASR_NOTE from tts import synthesize, TTS_EXAMPLES, TTS_LANGUAGES from lid import identify, LID_EXAMPLES mms_transcribe = gr.Interface( fn=transcribe, inputs=[ gr.Audio(), gr.Dropdown( [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()], label="Language", value="eng English", ), ], outputs="text", examples=ASR_EXAMPLES, title="Speech-to-text", description=( "Transcribe audio from a microphone or input file in your desired language." ), article=ASR_NOTE, allow_flagging="never", ) def tts_wrapper(text, lang, speed): audio, filtered_text, s3_url = synthesize(text, lang, speed) return audio, filtered_text, s3_url mms_synthesize = gr.Interface( fn=tts_wrapper, inputs=[ gr.Text(label="Input text"), gr.Dropdown( [f"{k} ({v})" for k, v in TTS_LANGUAGES.items()], label="Language", value="eng English", ), gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"), ], outputs=[ gr.Audio(label="Generated Audio"), gr.Text(label="Filtered text after removing OOVs"), gr.Text(label="S3 URL or Status"), ], examples=TTS_EXAMPLES, title="Text-to-speech", description=("Generate audio in your desired language from input text."), allow_flagging="never", ) mms_identify = gr.Interface( fn=identify, inputs=[ gr.Audio(), ], outputs=gr.Label(num_top_classes=10), examples=LID_EXAMPLES, title="Language Identification", description=("Identity the language of input audio."), allow_flagging="never", ) tabbed_interface = gr.TabbedInterface( [mms_transcribe, mms_synthesize, mms_identify], ["Speech-to-text", "Text-to-speech", "Language Identification"], ) with gr.Blocks() as demo: gr.Markdown( "

MMS: Scaling Speech Technology to 1000+ languages demo. See our blog post and paper.

" ) gr.HTML( """
Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos.
""" ) gr.HTML( """
You can also finetune MMS models on your data using the recipes provides here - ASR TTS
""" ) gr.HTML( """
Duplicate Space for more control and no queue.
""" ) tabbed_interface.render() gr.HTML( """ """ ) if __name__ == "__main__": demo.queue() demo.launch(show_error=True)