Julien Simon commited on
Commit
92f8ba8
1 Parent(s): 515a989
Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -5,12 +5,14 @@ model_name = "juliensimon/wav2vec2-conformer-rel-pos-large-finetuned-speech-comm
5
 
6
  p = pipeline("audio-classification", model=model_name)
7
 
 
8
  def process(file):
9
- pred = p(file)
10
- return { x['label']:x['score'] for x in pred}
 
11
 
12
  # Gradio inputs
13
- mic = gr.inputs.Audio(source='microphone', type='filepath', label='Speech input')
14
 
15
  # Gradio outputs
16
  labels = gr.outputs.Label(num_top_classes=3)
@@ -18,20 +20,20 @@ labels = gr.outputs.Label(num_top_classes=3)
18
  description = "This Space showcases a wav2vec2-conformer-rel-pos-large model fine-tuned for audio classification on the speech_commands dataset. \n \n It can spot one of the following keywords: 'Yes', 'No', 'Up', 'Down', 'Left', 'Right', 'On', 'Off', 'Stop', 'Go', 'Zero', 'One', 'Two', 'Three', 'Four', 'Five', 'Six', 'Seven', 'Eight', 'Nine', 'Bed', 'Bird', 'Cat', 'Dog', 'Happy', 'House', 'Marvin', 'Sheila', 'Tree', 'Wow', 'Backward', 'Forward', 'Follow', 'Learn', 'Visual'."
19
 
20
  iface = gr.Interface(
21
- theme='huggingface',
22
- description=description,
23
- fn=process,
24
- inputs=[mic],
25
- outputs=[labels],
26
- examples=[
27
- ['backward16k.wav'],
28
- ['happy16k.wav'],
29
- ['marvin16k.wav'],
30
- ['seven16k.wav'],
31
- ['stop16k.wav'],
32
- ['up16k.wav'],
33
- ],
34
- allow_flagging='never',
35
  )
36
 
37
  iface.launch()
 
5
 
6
  p = pipeline("audio-classification", model=model_name)
7
 
8
+
9
  def process(file):
10
+ pred = p(file)
11
+ return {x["label"]: x["score"] for x in pred}
12
+
13
 
14
  # Gradio inputs
15
+ mic = gr.inputs.Audio(source="microphone", type="filepath", label="Speech input")
16
 
17
  # Gradio outputs
18
  labels = gr.outputs.Label(num_top_classes=3)
 
20
  description = "This Space showcases a wav2vec2-conformer-rel-pos-large model fine-tuned for audio classification on the speech_commands dataset. \n \n It can spot one of the following keywords: 'Yes', 'No', 'Up', 'Down', 'Left', 'Right', 'On', 'Off', 'Stop', 'Go', 'Zero', 'One', 'Two', 'Three', 'Four', 'Five', 'Six', 'Seven', 'Eight', 'Nine', 'Bed', 'Bird', 'Cat', 'Dog', 'Happy', 'House', 'Marvin', 'Sheila', 'Tree', 'Wow', 'Backward', 'Forward', 'Follow', 'Learn', 'Visual'."
21
 
22
  iface = gr.Interface(
23
+ theme="huggingface",
24
+ description=description,
25
+ fn=process,
26
+ inputs=[mic],
27
+ outputs=[labels],
28
+ examples=[
29
+ ["backward16k.wav"],
30
+ ["happy16k.wav"],
31
+ ["marvin16k.wav"],
32
+ ["seven16k.wav"],
33
+ ["stop16k.wav"],
34
+ ["up16k.wav"],
35
+ ],
36
+ allow_flagging="never",
37
  )
38
 
39
  iface.launch()