Spaces:
Running
Running
benjamin-paine
commited on
Upload index.js
Browse files
index.js
ADDED
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/** Configuration */
|
2 |
+
const colors = {
|
3 |
+
"buddy": [0,119,187],
|
4 |
+
"hey buddy": [51,187,238],
|
5 |
+
"hi buddy": [0,153,136],
|
6 |
+
"sup buddy": [238,119,51],
|
7 |
+
"yo buddy": [204,51,17],
|
8 |
+
"okay buddy": [238,51,119],
|
9 |
+
"speech": [22,200,206],
|
10 |
+
"frame budget": [25,255,25]
|
11 |
+
};
|
12 |
+
const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy"];
|
13 |
+
const canvasSize = { width: 640, height: 100 };
|
14 |
+
const graphLineWidth = 1;
|
15 |
+
const options = {
|
16 |
+
debug: true,
|
17 |
+
modelPath: wakeWords.map((word) => `/models/${word.replace(' ', '-')}.onnx`)
|
18 |
+
};
|
19 |
+
|
20 |
+
/** Helper method for conversion */
|
21 |
+
const float32ToWavBlob = (audioData, sampleRate, numChannels = 1) => {
|
22 |
+
// Helper to write a string to the DataView
|
23 |
+
const writeString = (view, offset, string) => {
|
24 |
+
for (let i = 0; i < string.length; i++) {
|
25 |
+
view.setUint8(offset + i, string.charCodeAt(i));
|
26 |
+
}
|
27 |
+
};
|
28 |
+
|
29 |
+
// Helper to convert Float32Array to Int16Array (16-bit PCM)
|
30 |
+
const floatTo16BitPCM = (output, offset, input) => {
|
31 |
+
for (let i = 0; i < input.length; i++, offset += 2) {
|
32 |
+
let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1]
|
33 |
+
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM
|
34 |
+
}
|
35 |
+
};
|
36 |
+
|
37 |
+
const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample
|
38 |
+
|
39 |
+
// Calculate sizes
|
40 |
+
const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio
|
41 |
+
const wavHeaderSize = 44;
|
42 |
+
const dataLength = audioData.length * numChannels * 2; // 16-bit PCM data length
|
43 |
+
const buffer = new ArrayBuffer(wavHeaderSize + dataLength);
|
44 |
+
const view = new DataView(buffer);
|
45 |
+
|
46 |
+
// Write WAV file headers
|
47 |
+
writeString(view, 0, 'RIFF'); // ChunkID
|
48 |
+
view.setUint32(4, 36 + dataLength, true); // ChunkSize
|
49 |
+
writeString(view, 8, 'WAVE'); // Format
|
50 |
+
writeString(view, 12, 'fmt '); // Subchunk1ID
|
51 |
+
view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16)
|
52 |
+
view.setUint16(20, 1, true); // AudioFormat (PCM = 1)
|
53 |
+
view.setUint16(22, numChannels, true); // NumChannels
|
54 |
+
view.setUint32(24, sampleRate, true); // SampleRate
|
55 |
+
view.setUint32(28, byteRate, true); // ByteRate
|
56 |
+
view.setUint16(32, blockAlign, true); // BlockAlign
|
57 |
+
view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM)
|
58 |
+
writeString(view, 36, 'data'); // Subchunk2ID
|
59 |
+
view.setUint32(40, dataLength, true); // Subchunk2Size
|
60 |
+
|
61 |
+
// Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView
|
62 |
+
floatTo16BitPCM(view, wavHeaderSize, audioData);
|
63 |
+
|
64 |
+
// Create and return the Blob
|
65 |
+
return new Blob([view], { type: 'audio/wav' });
|
66 |
+
}
|
67 |
+
|
68 |
+
/** Helper method for turning the audio samples into an audio element */
|
69 |
+
const saveRecording = (audioContainer, audioSamples, sampleRate = 16000) => {
|
70 |
+
const blob = float32ToWavBlob(audioSamples, sampleRate);
|
71 |
+
const url = URL.createObjectURL(blob);
|
72 |
+
audioContainer.innerHTML = `<audio controls src="${url}"></audio>`;
|
73 |
+
}
|
74 |
+
|
75 |
+
/** Main */
|
76 |
+
document.addEventListener("DOMContentLoaded", () => {
|
77 |
+
/** DOM elements */
|
78 |
+
const graphsContainer = document.getElementById("graphs");
|
79 |
+
const audioContainer = document.getElementById("audio");
|
80 |
+
|
81 |
+
/** Memory for drawing */
|
82 |
+
const graphs = {};
|
83 |
+
const history = {};
|
84 |
+
const current = {};
|
85 |
+
const active = {};
|
86 |
+
|
87 |
+
/** Instantiate */
|
88 |
+
const heyBuddy = new HeyBuddy(options);
|
89 |
+
|
90 |
+
/** Add callbacks */
|
91 |
+
|
92 |
+
// When processed, update state for next draw
|
93 |
+
heyBuddy.onProcessed((result) => {
|
94 |
+
current["frame budget"] = heyBuddy.frameTimeEma;
|
95 |
+
current["speech"] = result.speech.probability || 0.0;
|
96 |
+
active["speech"] = result.speech.active;
|
97 |
+
for (let wakeWord in result.wakeWords) {
|
98 |
+
current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability || 0.0;
|
99 |
+
active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active;
|
100 |
+
}
|
101 |
+
if (result.recording) {
|
102 |
+
audioContainer.innerHTML = "Recording…";
|
103 |
+
}
|
104 |
+
});
|
105 |
+
|
106 |
+
// When recording is complete, replace the audio element
|
107 |
+
heyBuddy.onRecording((audioSamples) => {
|
108 |
+
saveRecording(audioContainer, audioSamples);
|
109 |
+
});
|
110 |
+
|
111 |
+
/** Add graphs */
|
112 |
+
for (let graphName of ["wake words", "speech", "frame budget"]) {
|
113 |
+
// Create containers for the graph and its label
|
114 |
+
const graphContainer = document.createElement("div");
|
115 |
+
const graphLabel = document.createElement("label");
|
116 |
+
graphLabel.textContent = graphName;
|
117 |
+
|
118 |
+
// Create a canvas for the graph
|
119 |
+
const graphCanvas = document.createElement("canvas");
|
120 |
+
graphCanvas.className = "graph";
|
121 |
+
graphCanvas.width = canvasSize.width;
|
122 |
+
graphCanvas.height = canvasSize.height;
|
123 |
+
graphs[graphName] = graphCanvas;
|
124 |
+
|
125 |
+
// Add the canvas to the container and the container to the document
|
126 |
+
graphContainer.appendChild(graphCanvas);
|
127 |
+
graphContainer.appendChild(graphLabel);
|
128 |
+
graphsContainer.appendChild(graphContainer);
|
129 |
+
|
130 |
+
// If this is the wake-word graph, also add legend
|
131 |
+
if (graphName === "wake words") {
|
132 |
+
const graphLegend = document.createElement("div");
|
133 |
+
graphLegend.className = "legend";
|
134 |
+
for (let wakeWord of wakeWords) {
|
135 |
+
const legendItem = document.createElement("div");
|
136 |
+
const [r,g,b] = colors[wakeWord];
|
137 |
+
legendItem.style.color = `rgb(${r},${g},${b})`;
|
138 |
+
legendItem.textContent = wakeWord;
|
139 |
+
graphLegend.appendChild(legendItem);
|
140 |
+
}
|
141 |
+
graphLabel.appendChild(graphLegend);
|
142 |
+
}
|
143 |
+
}
|
144 |
+
|
145 |
+
/** Define draw loop */
|
146 |
+
const draw = () => {
|
147 |
+
// Draw speech and model graphs
|
148 |
+
for (let graphName in graphs) {
|
149 |
+
const isWakeWords = graphName === "wake words";
|
150 |
+
const isFrameBudget = graphName === "frame budget";
|
151 |
+
const subGraphs = isWakeWords ? wakeWords : [graphName];
|
152 |
+
|
153 |
+
let isFirst = true;
|
154 |
+
for (let name of subGraphs) {
|
155 |
+
// Update history
|
156 |
+
history[name] = history[name] || [];
|
157 |
+
if (isFrameBudget) {
|
158 |
+
history[name].push((current[name] || 0.0) / 120.0); // 120ms budget
|
159 |
+
} else {
|
160 |
+
history[name].push(current[name] || 0.0);
|
161 |
+
}
|
162 |
+
|
163 |
+
// Trim history
|
164 |
+
if (history[name].length > canvasSize.width) {
|
165 |
+
history[name] = history[name].slice(history[name].length - canvasSize.width);
|
166 |
+
}
|
167 |
+
|
168 |
+
// Draw graph
|
169 |
+
const canvas = graphs[graphName];
|
170 |
+
const ctx = canvas.getContext("2d");
|
171 |
+
const [r,g,b] = colors[name];
|
172 |
+
const opacity = isFrameBudget || active[name] ? 1.0 : 0.5;
|
173 |
+
|
174 |
+
if (isFirst) {
|
175 |
+
// Clear canvas on first draw
|
176 |
+
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
177 |
+
isFirst = false;
|
178 |
+
}
|
179 |
+
|
180 |
+
ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`;
|
181 |
+
ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`;
|
182 |
+
ctx.lineWidth = graphLineWidth;
|
183 |
+
|
184 |
+
// Draw from left to right (the frame shifts right to left)
|
185 |
+
ctx.beginPath();
|
186 |
+
let lastX;
|
187 |
+
for (let i = 0; i < history[name].length; i++) {
|
188 |
+
const x = i;
|
189 |
+
const y = canvas.height - history[name][i] * canvas.height;
|
190 |
+
if (i === 0) {
|
191 |
+
ctx.moveTo(1, y);
|
192 |
+
} else {
|
193 |
+
ctx.lineTo(x, y);
|
194 |
+
}
|
195 |
+
lastX = x;
|
196 |
+
}
|
197 |
+
// extend downwards to make a polygon
|
198 |
+
ctx.lineTo(lastX, canvas.height);
|
199 |
+
ctx.lineTo(0, canvas.height);
|
200 |
+
ctx.closePath();
|
201 |
+
ctx.fill();
|
202 |
+
ctx.stroke();
|
203 |
+
}
|
204 |
+
}
|
205 |
+
|
206 |
+
// Request next frame
|
207 |
+
requestAnimationFrame(draw);
|
208 |
+
};
|
209 |
+
|
210 |
+
/** Start the loop */
|
211 |
+
requestAnimationFrame(draw);
|
212 |
+
});
|