import gradio as gr import json import pandas as pd import collections import scipy.signal import numpy as np from functools import partial from openwakeword.model import Model # Load openWakeWord models model = Model(inference_framework="onnx") # Define function to process audio def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=60))): # Resample audio to 16khz if needed if audio[0] != 16000: data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000)) # Get predictions for i in range(0, data.shape[0], 1280): if len(data.shape) == 2 or data.shape[-1] == 2: chunk = data[i:i+1280][:, 0] # just get one channel of audio else: chunk = data[i:i+1280] if chunk.shape[0] == 1280: prediction = model.predict(chunk) for key in prediction: #Fill deque with zeros if it's empty if len(state[key]) == 0: state[key].extend(np.zeros(60)) # Add prediction state[key].append(prediction[key]) # Make line plot dfs = [] for key in state.keys(): df = pd.DataFrame({"x": np.arange(len(state[key])), "y": state[key], "Model": key}) dfs.append(df) df = pd.concat(dfs) plot = gr.LinePlot().update(value = df, x='x', y='y', color="Model", y_lim = (0,1), tooltip="Model", width=600, height=300, x_title="Time (frames)", y_title="Model Score", color_legend_position="bottom") # Manually adjust how the legend is displayed tmp = json.loads(plot["value"]["plot"]) tmp["layer"][0]['encoding']['color']['legend']["direction"] = "vertical" tmp["layer"][0]['encoding']['color']['legend']["columns"] = 4 tmp["layer"][0]['encoding']['color']['legend']["labelFontSize"] = 12 tmp["layer"][0]['encoding']['color']['legend']["titleFontSize"] = 14 plot["value"]['plot'] = json.dumps(tmp) return plot, state # Create Gradio interface and launch desc = """ This is a demo of the pre-trained models included in the latest release of the [openWakeWord](https://github.com/dscripka/openWakeWord) library. Click on the "record from microphone" button below to start capturing. The real-time scores from each model will be shown in the line plot. Hover over each line to see the name of the corresponding model. Different models will respond to different wake words/phrases (see [the model docs](https://github.com/dscripka/openWakeWord/tree/main/docs/models) for more details). If everything is working properly, you should see a spike in the score for a given model after speaking a related word/phrase. Below are some suggested phrases to try! | Model Name | Word/Phrase | | --- | --- | | alexa | "alexa" | | hey_mycroft | "hey mycroft"| | hey_jarvis | "hey jarvis"| | hey_rhasspy | "hey rhasspy"| | weather | "what's the weather", "tell me today's weather" | | x_minute_timer | "set a timer for 1 minute", "create 1 hour alarm" | """ gr_int = gr.Interface( title = "openWakeWord Live Demo", description = desc, css = ".flex {flex-direction: column} .gr-panel {width: 100%}", fn=process_audio, inputs=[ gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False), "state" ], outputs=[ gr.LinePlot(show_label=False), "state" ], live=True) gr_int.launch()