Spaces:
Running
Running
Delete index.js
Browse files
index.js
DELETED
@@ -1,262 +0,0 @@
|
|
1 |
-
/**
|
2 |
-
* Play audio samples using the Web Audio API.
|
3 |
-
* @param {Float32Array} audioSamples - The audio samples to play.
|
4 |
-
* @param {number} sampleRate - The sample rate of the audio samples.
|
5 |
-
*/
|
6 |
-
function playAudioSamples(audioSamples, sampleRate = 16000) {
|
7 |
-
// Create an AudioContext
|
8 |
-
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
9 |
-
|
10 |
-
// Create an AudioBuffer
|
11 |
-
const audioBuffer = audioContext.createBuffer(
|
12 |
-
1, // number of channels
|
13 |
-
audioSamples.length, // length of the buffer in samples
|
14 |
-
sampleRate // sample rate (samples per second)
|
15 |
-
);
|
16 |
-
|
17 |
-
// Fill the AudioBuffer with the Float32Array of audio samples
|
18 |
-
audioBuffer.getChannelData(0).set(audioSamples);
|
19 |
-
|
20 |
-
// Create a BufferSource node
|
21 |
-
const source = audioContext.createBufferSource();
|
22 |
-
source.buffer = audioBuffer;
|
23 |
-
|
24 |
-
// Connect the source to the AudioContext's destination (the speakers)
|
25 |
-
source.connect(audioContext.destination);
|
26 |
-
|
27 |
-
// Start playback
|
28 |
-
source.start();
|
29 |
-
};
|
30 |
-
|
31 |
-
/**
|
32 |
-
* Turns floating-point audio samples to a Wave blob.
|
33 |
-
* @param {Float32Array} audioSamples - The audio samples to play.
|
34 |
-
* @param {number} sampleRate - The sample rate of the audio samples.
|
35 |
-
* @param {number} numChannels - The number of channels in the audio. Defaults to 1 (mono).
|
36 |
-
* @return {Blob} A blob of type `audio/wav`
|
37 |
-
*/
|
38 |
-
function samplesToBlob(audioSamples, sampleRate = 16000, numChannels = 1) {
|
39 |
-
// Helper to write a string to the DataView
|
40 |
-
const writeString = (view, offset, string) => {
|
41 |
-
for (let i = 0; i < string.length; i++) {
|
42 |
-
view.setUint8(offset + i, string.charCodeAt(i));
|
43 |
-
}
|
44 |
-
};
|
45 |
-
|
46 |
-
// Helper to convert Float32Array to Int16Array (16-bit PCM)
|
47 |
-
const floatTo16BitPCM = (output, offset, input) => {
|
48 |
-
for (let i = 0; i < input.length; i++, offset += 2) {
|
49 |
-
let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1]
|
50 |
-
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM
|
51 |
-
}
|
52 |
-
};
|
53 |
-
|
54 |
-
const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample
|
55 |
-
|
56 |
-
// Calculate sizes
|
57 |
-
const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio
|
58 |
-
const wavHeaderSize = 44;
|
59 |
-
const dataLength = audioSamples.length * numChannels * 2; // 16-bit PCM data length
|
60 |
-
const buffer = new ArrayBuffer(wavHeaderSize + dataLength);
|
61 |
-
const view = new DataView(buffer);
|
62 |
-
|
63 |
-
// Write WAV file headers
|
64 |
-
writeString(view, 0, 'RIFF'); // ChunkID
|
65 |
-
view.setUint32(4, 36 + dataLength, true); // ChunkSize
|
66 |
-
writeString(view, 8, 'WAVE'); // Format
|
67 |
-
writeString(view, 12, 'fmt '); // Subchunk1ID
|
68 |
-
view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16)
|
69 |
-
view.setUint16(20, 1, true); // AudioFormat (PCM = 1)
|
70 |
-
view.setUint16(22, numChannels, true); // NumChannels
|
71 |
-
view.setUint32(24, sampleRate, true); // SampleRate
|
72 |
-
view.setUint32(28, byteRate, true); // ByteRate
|
73 |
-
view.setUint16(32, blockAlign, true); // BlockAlign
|
74 |
-
view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM)
|
75 |
-
writeString(view, 36, 'data'); // Subchunk2ID
|
76 |
-
view.setUint32(40, dataLength, true); // Subchunk2Size
|
77 |
-
|
78 |
-
// Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView
|
79 |
-
floatTo16BitPCM(view, wavHeaderSize, audioSamples);
|
80 |
-
|
81 |
-
// Create and return the Blob
|
82 |
-
return new Blob([view], { type: 'audio/wav' });
|
83 |
-
}
|
84 |
-
|
85 |
-
/**
|
86 |
-
* Renders a blob to an audio element with controls.
|
87 |
-
* Use `appendChild(result)` to add to the document or a node.
|
88 |
-
* @param {Blob} audioBlob - A blob with a valid audio type.
|
89 |
-
* @see samplesToBlob
|
90 |
-
*/
|
91 |
-
function blobToAudio(audioBlob) {
|
92 |
-
const url = URL.createObjectURL(audioBlob);
|
93 |
-
const audio = document.createElement("audio");
|
94 |
-
audio.controls = true;
|
95 |
-
audio.src = url;
|
96 |
-
return audio;
|
97 |
-
}
|
98 |
-
|
99 |
-
/** Configuration */
|
100 |
-
const colors = {
|
101 |
-
"buddy": [0,119,187],
|
102 |
-
"hey buddy": [51,187,238],
|
103 |
-
"hi buddy": [0,153,136],
|
104 |
-
"sup buddy": [238,119,51],
|
105 |
-
"yo buddy": [204,51,17],
|
106 |
-
"okay buddy": [238,51,119],
|
107 |
-
"speech": [22,200,206],
|
108 |
-
"frame budget": [25,255,25]
|
109 |
-
};
|
110 |
-
const rootUrl = "https://huggingface.co/benjamin-paine/hey-buddy/resolve/main";
|
111 |
-
const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy"];
|
112 |
-
const canvasSize = { width: 640, height: 100 };
|
113 |
-
const graphLineWidth = 1;
|
114 |
-
const options = {
|
115 |
-
debug: true,
|
116 |
-
modelPath: wakeWords.map((word) => `${rootUrl}/models/${word.replace(' ', '-')}.onnx`),
|
117 |
-
vadModelPath: `${rootUrl}/pretrained/silero-vad.onnx`,
|
118 |
-
spectrogramModelPath: `${rootUrl}/pretrained/mel-spectrogram.onnx`,
|
119 |
-
embeddingModelPath: `${rootUrl}/pretrained/speech-embedding.onnx`,
|
120 |
-
};
|
121 |
-
|
122 |
-
/** Main */
|
123 |
-
document.addEventListener("DOMContentLoaded", () => {
|
124 |
-
/** DOM elements */
|
125 |
-
const graphsContainer = document.getElementById("graphs");
|
126 |
-
const audioContainer = document.getElementById("audio");
|
127 |
-
|
128 |
-
/** Memory for drawing */
|
129 |
-
const graphs = {};
|
130 |
-
const history = {};
|
131 |
-
const current = {};
|
132 |
-
const active = {};
|
133 |
-
|
134 |
-
/** Instantiate */
|
135 |
-
const heyBuddy = new HeyBuddy(options);
|
136 |
-
|
137 |
-
/** Add callbacks */
|
138 |
-
|
139 |
-
// When processed, update state for next draw
|
140 |
-
heyBuddy.onProcessed((result) => {
|
141 |
-
current["frame budget"] = heyBuddy.frameTimeEma;
|
142 |
-
current["speech"] = result.speech.probability || 0.0;
|
143 |
-
active["speech"] = result.speech.active;
|
144 |
-
for (let wakeWord in result.wakeWords) {
|
145 |
-
current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability || 0.0;
|
146 |
-
active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active;
|
147 |
-
}
|
148 |
-
if (result.recording) {
|
149 |
-
audioContainer.innerHTML = "Recording…";
|
150 |
-
}
|
151 |
-
});
|
152 |
-
|
153 |
-
// When recording is complete, replace the audio element
|
154 |
-
heyBuddy.onRecording((audioSamples) => {
|
155 |
-
const audioBlob = samplesToBlob(audioSamples);
|
156 |
-
const audioElement = blobToAudio(audioBlob);
|
157 |
-
audioContainer.innerHTML = "";
|
158 |
-
audioContainer.appendChild(audioElement);
|
159 |
-
});
|
160 |
-
|
161 |
-
/** Add graphs */
|
162 |
-
for (let graphName of ["wake words", "speech", "frame budget"]) {
|
163 |
-
// Create containers for the graph and its label
|
164 |
-
const graphContainer = document.createElement("div");
|
165 |
-
const graphLabel = document.createElement("label");
|
166 |
-
graphLabel.textContent = graphName;
|
167 |
-
|
168 |
-
// Create a canvas for the graph
|
169 |
-
const graphCanvas = document.createElement("canvas");
|
170 |
-
graphCanvas.className = "graph";
|
171 |
-
graphCanvas.width = canvasSize.width;
|
172 |
-
graphCanvas.height = canvasSize.height;
|
173 |
-
graphs[graphName] = graphCanvas;
|
174 |
-
|
175 |
-
// Add the canvas to the container and the container to the document
|
176 |
-
graphContainer.appendChild(graphCanvas);
|
177 |
-
graphContainer.appendChild(graphLabel);
|
178 |
-
graphsContainer.appendChild(graphContainer);
|
179 |
-
|
180 |
-
// If this is the wake-word graph, also add legend
|
181 |
-
if (graphName === "wake words") {
|
182 |
-
const graphLegend = document.createElement("div");
|
183 |
-
graphLegend.className = "legend";
|
184 |
-
for (let wakeWord of wakeWords) {
|
185 |
-
const legendItem = document.createElement("div");
|
186 |
-
const [r,g,b] = colors[wakeWord];
|
187 |
-
legendItem.style.color = `rgb(${r},${g},${b})`;
|
188 |
-
legendItem.textContent = wakeWord;
|
189 |
-
graphLegend.appendChild(legendItem);
|
190 |
-
}
|
191 |
-
graphLabel.appendChild(graphLegend);
|
192 |
-
}
|
193 |
-
}
|
194 |
-
|
195 |
-
/** Define draw loop */
|
196 |
-
const draw = () => {
|
197 |
-
// Draw speech and model graphs
|
198 |
-
for (let graphName in graphs) {
|
199 |
-
const isWakeWords = graphName === "wake words";
|
200 |
-
const isFrameBudget = graphName === "frame budget";
|
201 |
-
const subGraphs = isWakeWords ? wakeWords : [graphName];
|
202 |
-
|
203 |
-
let isFirst = true;
|
204 |
-
for (let name of subGraphs) {
|
205 |
-
// Update history
|
206 |
-
history[name] = history[name] || [];
|
207 |
-
if (isFrameBudget) {
|
208 |
-
history[name].push((current[name] || 0.0) / 120.0); // 120ms budget
|
209 |
-
} else {
|
210 |
-
history[name].push(current[name] || 0.0);
|
211 |
-
}
|
212 |
-
|
213 |
-
// Trim history
|
214 |
-
if (history[name].length > canvasSize.width) {
|
215 |
-
history[name] = history[name].slice(history[name].length - canvasSize.width);
|
216 |
-
}
|
217 |
-
|
218 |
-
// Draw graph
|
219 |
-
const canvas = graphs[graphName];
|
220 |
-
const ctx = canvas.getContext("2d");
|
221 |
-
const [r,g,b] = colors[name];
|
222 |
-
const opacity = isFrameBudget || active[name] ? 1.0 : 0.5;
|
223 |
-
|
224 |
-
if (isFirst) {
|
225 |
-
// Clear canvas on first draw
|
226 |
-
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
227 |
-
isFirst = false;
|
228 |
-
}
|
229 |
-
|
230 |
-
ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`;
|
231 |
-
ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`;
|
232 |
-
ctx.lineWidth = graphLineWidth;
|
233 |
-
|
234 |
-
// Draw from left to right (the frame shifts right to left)
|
235 |
-
ctx.beginPath();
|
236 |
-
let lastX;
|
237 |
-
for (let i = 0; i < history[name].length; i++) {
|
238 |
-
const x = i;
|
239 |
-
const y = canvas.height - history[name][i] * canvas.height;
|
240 |
-
if (i === 0) {
|
241 |
-
ctx.moveTo(1, y);
|
242 |
-
} else {
|
243 |
-
ctx.lineTo(x, y);
|
244 |
-
}
|
245 |
-
lastX = x;
|
246 |
-
}
|
247 |
-
// extend downwards to make a polygon
|
248 |
-
ctx.lineTo(lastX, canvas.height);
|
249 |
-
ctx.lineTo(0, canvas.height);
|
250 |
-
ctx.closePath();
|
251 |
-
ctx.fill();
|
252 |
-
ctx.stroke();
|
253 |
-
}
|
254 |
-
}
|
255 |
-
|
256 |
-
// Request next frame
|
257 |
-
requestAnimationFrame(draw);
|
258 |
-
};
|
259 |
-
|
260 |
-
/** Start the loop */
|
261 |
-
requestAnimationFrame(draw);
|
262 |
-
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|