import gradio as gr import nemo.collections.asr as nemo_asr asr_model = nemo_asr.models.EncDecCTCModelBPE. \ from_pretrained("theodotus/stt_uk_squeezeformer_ctc_xs",map_location="cpu") def transcribe(audio, state=""): text = asr_model.transcribe([audio], batch_size=1)[0] state += text + " " return state, state gr.Interface( fn=transcribe, inputs=[ gr.Audio(source="microphone", type="filepath", streaming=True), "state" ], outputs=[ "textbox", "state" ], live=True).launch()