import os import numpy as np import gradio as gr import whisper model = whisper.load_model("base.en") def fun(audio, state=''): text = model.transcribe(audio)["text"] state += text + " " return state, state def transcribe(audio, state=""): text = p(audio)["text"] state += text + " " return state, state # Set the starting state to an empty string #gr.Interface( # fn=transcribe, # inputs=[ # gr.Audio(source="microphone", type="filepath", streaming=True), # "state" # ], # outputs=[ # "textbox", # "state" # ], # live=True).launch() gr.Interface( title = 'Testing Whisper', fn=fun, inputs=[ gr.inputs.Audio(source="microphone", streaming = True, type="filepath"), "state" ], outputs=[ "textbox", "state" ], live=True).launch()