import gradio as gr
from transformers import pipeline


model = pipeline(task="automatic-speech-recognition",
                 model="facebook/s2t-medium-librispeech-asr")


def predict_speech_to_text(audio):
    prediction = model(audio)
    text = prediction['text']
    return text


gr.Interface(fn=predict_speech_to_text,
             title="Automatic Speech Recognition (ASR)",
             inputs=gr.inputs.Audio(
                 source="microphone", type="filepath", label="Input"),
             outputs=gr.outputs.Textbox(label="Output"),
             description="Using pipeline with Facebook S2T for ASR.",
             examples=['ljspeech.wav'],
             allow_flagging='never'
             ).launch()