from transformers import pipeline 
import gradio as gr 

recognize_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish")

def trasncript(sound):
  text= recognize_model(sound)["txt"]
  return txt

gr.Interface(fn=trasncript, inputs=[gr.Audio(source="microphone", type="filepath")], outputs=["textbox"]).launch()