import gradio as gr gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="numpy"), "state" ], outputs= [ "text", "state" ], live=True).launch()