import gradio as gr from transformers import pipeline model = pipeline(task="automatic-speech-recognition", model="facebook/s2t-medium-librispeech-asr") def predict_speech_to_text(audio): prediction = model(audio) text = prediction['text'] return text gr.Interface(fn=predict_speech_to_text, title="Automatic Speech Recognition (ASR)", inputs=gr.inputs.Audio( source="microphone", type="filepath", label="Input"), outputs=gr.outputs.Textbox(label="Output"), description="Using pipeline with Facebook S2T for ASR.", examples=['ljspeech.wav'], allow_flagging='never' ).launch()