from transformers import pipeline p = pipeline("automatic-speech-recognition") import gradio as gr import time def transcribe(audio, state=""): time.sleep(3) text = p(audio)["text"] state += text + " " return state, state # embedded 'Record' may not work hence use 'https://39357.gradio.app'. In an external browser, speak and you will get the text output ST_ASR_demo = gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), 'state' ], outputs=[ "textbox", "state" ], title = 'Real-Time Speech Transcription', description = 'Speak something, you will get the text as an output', live=True)#.launch(inline = False)