from transformers import pipeline import gradio as gr import time import unicodedata p = pipeline("automatic-speech-recognition",model="openai/whisper-tiny") def transcribe(audio, state=""): time.sleep(10) text = p(audio)["text"] state += unicodedata.normalize("NFC",text) + " " return state, state ################### Gradio Web APP ################################ title = "Real-Time ASR" gr.Interface( fn=transcribe, inputs=[ gr.Audio(source="microphone", type="filepath", streaming=True), "state" ], outputs=[ "textbox", "state" ], title=title, theme='EveryPizza/Cartoony-Gradio-Theme', live=True).launch()