from transformers import pipeline import gradio as gr import time asr = pipeline("automatic-speech-recognition", "YSU/aspram") def transcribe(audio, state=None): if state is None: state = "" # time.sleep(5) text = asr(audio)["text"] state += text + " " print('chunk!') return state, state gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), "state" ], outputs=[ "textbox", "state" ], live=False, css='body {background-color: rgba(240, 200, 192)}', ).launch()