import gradio as gr
from transformers import pipeline

pipe = pipeline("audio-classification",
                model="0xb1/wav2vec2-base-finetuned-speech_commands-v0.02")

def predict(audio_path):
    return pipe(audio_path)[0]["label"]

demo = gr.Interface(
    title='Fast audio commands recognition',
    fn=predict,
    inputs=gr.Audio(source="upload", type='filepath'),
    outputs='text',
)

demo.launch()