import gradio as gr from asr import transcribe # from tts import synthesize mms_transcribe = gr.Interface( fn=transcribe, inputs=[ gr.Audio() ], outputs="text", #examples=ASR_EXAMPLES, title="Speech-to-text", description=( "Transcribe audio from a microphone or input file." ), #article=ASR_NOTE, allow_flagging="never", ) # mms_synthesize = gr.Interface( # fn=synthesize, # inputs=[ # gr.Text(label="Input text"), # ], # outputs=[ # gr.Audio(label="Generated Audio", type="numpy"), # gr.Text(label="Filtered text after removing OOVs"), # ], # #examples=TTS_EXAMPLES, # title="Text-to-speech", # description=("Generate audio from input text."), # allow_flagging="never", # ) tabbed_interface = gr.TabbedInterface( [mms_transcribe], ["Speech-to-text"], ) # tabbed_interface = gr.TabbedInterface( # [mms_transcribe, mms_synthesize], # ["Speech-to-text", "Text-to-speech"], # ) with gr.Blocks() as demo: tabbed_interface.render() if __name__ == "__main__": demo.queue() demo.launch()