openWakeWord / app.py
davidscripka's picture
fixed bugs in app code
987011f
import gradio as gr
import json
import pandas as pd
import collections
import scipy.signal
import numpy as np
from functools import partial
from openwakeword.model import Model
# Load openWakeWord models
model = Model(inference_framework="onnx")
# Define function to process audio
def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=60))):
# Resample audio to 16khz if needed
if audio[0] != 16000:
data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000))
# Get predictions
for i in range(0, data.shape[0], 1280):
if len(data.shape) == 2 or data.shape[-1] == 2:
chunk = data[i:i+1280][:, 0] # just get one channel of audio
else:
chunk = data[i:i+1280]
if chunk.shape[0] == 1280:
prediction = model.predict(chunk)
for key in prediction:
#Fill deque with zeros if it's empty
if len(state[key]) == 0:
state[key].extend(np.zeros(60))
# Add prediction
state[key].append(prediction[key])
# Make line plot
dfs = []
for key in state.keys():
df = pd.DataFrame({"x": np.arange(len(state[key])), "y": state[key], "Model": key})
dfs.append(df)
df = pd.concat(dfs)
plot = gr.LinePlot().update(value = df, x='x', y='y', color="Model", y_lim = (0,1), tooltip="Model",
width=600, height=300, x_title="Time (frames)", y_title="Model Score", color_legend_position="bottom")
# Manually adjust how the legend is displayed
tmp = json.loads(plot["value"]["plot"])
tmp["layer"][0]['encoding']['color']['legend']["direction"] = "vertical"
tmp["layer"][0]['encoding']['color']['legend']["columns"] = 4
tmp["layer"][0]['encoding']['color']['legend']["labelFontSize"] = 12
tmp["layer"][0]['encoding']['color']['legend']["titleFontSize"] = 14
plot["value"]['plot'] = json.dumps(tmp)
return plot, state
# Create Gradio interface and launch
desc = """
This is a demo of the pre-trained models included in the latest release
of the [openWakeWord](https://github.com/dscripka/openWakeWord) library.
Click on the "record from microphone" button below to start capturing.
The real-time scores from each model will be shown in the line plot. Hover over
each line to see the name of the corresponding model.
Different models will respond to different wake words/phrases (see [the model docs](https://github.com/dscripka/openWakeWord/tree/main/docs/models) for more details).
If everything is working properly,
you should see a spike in the score for a given model after speaking a related word/phrase. Below are some suggested phrases to try!
| Model Name | Word/Phrase |
| --- | --- |
| alexa | "alexa" |
| hey_mycroft | "hey mycroft"|
| hey_jarvis | "hey jarvis"|
| hey_rhasspy | "hey rhasspy"|
| weather | "what's the weather", "tell me today's weather" |
| x_minute_timer | "set a timer for 1 minute", "create 1 hour alarm" |
"""
gr_int = gr.Interface(
title = "openWakeWord Live Demo",
description = desc,
css = ".flex {flex-direction: column} .gr-panel {width: 100%}",
fn=process_audio,
inputs=[
gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False),
"state"
],
outputs=[
gr.LinePlot(show_label=False),
"state"
],
live=True)
gr_int.launch()