Spaces:

juliensimon
/

keyword-spotting

Runtime error

File size: 1,788 Bytes

e130e80
 
 
bd27e22
 
 
 
e130e80
 
bd27e22
 
92f8ba8
 
 
e130e80
 
bd27e22
 
e130e80
 
bd27e22
e130e80
bd27e22
e130e80
 
92f8ba8
 
 
bd27e22
92f8ba8
 
bd27e22
 
 
 
 
 
92f8ba8
 
e130e80
515a989
e130e80

import gradio as gr
from transformers import pipeline

model_names = [
    "juliensimon/wav2vec2-conformer-rel-pos-large-finetuned-speech-commands",
    "MIT/ast-finetuned-speech-commands-v2",
]


def process(file, model_name):
    p = pipeline("audio-classification", model=model_name)
    pred = p(file)
    return {x["label"]: x["score"] for x in pred}


# Gradio inputs
mic = gr.Audio(source="microphone", type="filepath", label="Speech input")
model_selection = gr.Dropdown(model_names, label="Model selection")

# Gradio outputs
labels = gr.Label(num_top_classes=3)

description = "This Space showcases two audio classification models fine-tuned on the speech_commands dataset:\n\n - wav2vec2-conformer: 97.2% accuracy, added in transformers 4.20.0.\n - audio-spectrogram-transformer: 98.12% accuracy, added in transformers 4.25.1.\n \n They can spot one of the following keywords: 'Yes', 'No', 'Up', 'Down', 'Left', 'Right', 'On', 'Off', 'Stop', 'Go', 'Zero', 'One', 'Two', 'Three', 'Four', 'Five', 'Six', 'Seven', 'Eight', 'Nine', 'Bed', 'Bird', 'Cat', 'Dog', 'Happy', 'House', 'Marvin', 'Sheila', 'Tree', 'Wow', 'Backward', 'Forward', 'Follow', 'Learn', 'Visual'."

iface = gr.Interface(
    theme="huggingface",
    description=description,
    fn=process,
    inputs=[mic, model_selection],
    outputs=[labels],
    examples=[
        ["backward16k.wav", "MIT/ast-finetuned-speech-commands-v2"],
        ["happy16k.wav", "MIT/ast-finetuned-speech-commands-v2"],
        ["marvin16k.wav", "MIT/ast-finetuned-speech-commands-v2"],
        ["seven16k.wav", "MIT/ast-finetuned-speech-commands-v2"],
        ["stop16k.wav", "MIT/ast-finetuned-speech-commands-v2"],
        ["up16k.wav", "MIT/ast-finetuned-speech-commands-v2"],
    ],
    allow_flagging="never",
)

iface.launch()