import gradio as gr from transformers import pipeline pipe = pipeline("audio-classification", model="juliensimon/wav2vec2-conformer-rel-pos-large-finetuned-speech-commands") def predict(audio): return pipe(audio)[0]["label"] demo = gr.Interface( title='Audio commands datection' fn=predict, inputs='audio', outputs='text', ) demo.launch()