import gradio as gr import time from transformers import pipeline p= pipeline("automatic-speech-recognition") def transcribe (audio, state=""): time.sleep(3) # text = p(audio)["text"] text=["Test"] state += text+ " " return state, state gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), 'state' ], outputs=[ "textbox", "state" ], live=True).launch()