import gradio as gr from transformers import pipeline, AutoModelForCTC, AutoTokenizer # Load the model from PyTorch weights model_name = "facebook/wav2vec2-large-xlsr-53-spanish" model = AutoModelForCTC.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Create the ASR pipeline trans = pipeline("automatic-speech-recognition", model=model, tokenizer=tokenizer) def audio2text(audio): text = trans(audio)["text"] return text gr.Interface( fn=audio2text, inputs = [gr.Audio(source="microphone", type="filepath")], outputs=["textbox"] ).launch()