import gradio as gr  
import time 
from transformers import pipeline 
  
  
p= pipeline("automatic-speech-recognition") 
  
def transcribe (audio, state=""): 
    time.sleep(3) 
#    text = p(audio)["text"] 
    text=["Test"] 
    state += text+ " " 
    return state, state 
     
gr.Interface( 
     fn=transcribe, 
     inputs=[ 
       gr.inputs.Audio(source="microphone", type="filepath"), 
       'state' 
     ], 
     outputs=[ 
     "textbox", 
     "state" 
     ], 
     live=True).launch()