hey-buddy / index.js
benjamin-paine's picture
Update index.js
254ae0e verified
raw
history blame
8.25 kB
/** Configuration */
const colors = {
"buddy": [0,119,187],
"hey buddy": [51,187,238],
"hi buddy": [0,153,136],
"sup buddy": [238,119,51],
"yo buddy": [204,51,17],
"okay buddy": [238,51,119],
"speech": [22,200,206],
"frame budget": [25,255,25]
};
const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy"];
const canvasSize = { width: 640, height: 100 };
const graphLineWidth = 1;
const options = {
debug: true,
modelPath: wakeWords.map((word) => `https://huggingface.co/benjamin-paine/hey-buddy/resolve/main/models/${word.replace(' ', '-')}.onnx`)
};
/** Helper method for conversion */
const float32ToWavBlob = (audioData, sampleRate, numChannels = 1) => {
// Helper to write a string to the DataView
const writeString = (view, offset, string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
// Helper to convert Float32Array to Int16Array (16-bit PCM)
const floatTo16BitPCM = (output, offset, input) => {
for (let i = 0; i < input.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1]
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM
}
};
const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample
// Calculate sizes
const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio
const wavHeaderSize = 44;
const dataLength = audioData.length * numChannels * 2; // 16-bit PCM data length
const buffer = new ArrayBuffer(wavHeaderSize + dataLength);
const view = new DataView(buffer);
// Write WAV file headers
writeString(view, 0, 'RIFF'); // ChunkID
view.setUint32(4, 36 + dataLength, true); // ChunkSize
writeString(view, 8, 'WAVE'); // Format
writeString(view, 12, 'fmt '); // Subchunk1ID
view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16)
view.setUint16(20, 1, true); // AudioFormat (PCM = 1)
view.setUint16(22, numChannels, true); // NumChannels
view.setUint32(24, sampleRate, true); // SampleRate
view.setUint32(28, byteRate, true); // ByteRate
view.setUint16(32, blockAlign, true); // BlockAlign
view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM)
writeString(view, 36, 'data'); // Subchunk2ID
view.setUint32(40, dataLength, true); // Subchunk2Size
// Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView
floatTo16BitPCM(view, wavHeaderSize, audioData);
// Create and return the Blob
return new Blob([view], { type: 'audio/wav' });
}
/** Helper method for turning the audio samples into an audio element */
const saveRecording = (audioContainer, audioSamples, sampleRate = 16000) => {
const blob = float32ToWavBlob(audioSamples, sampleRate);
const url = URL.createObjectURL(blob);
audioContainer.innerHTML = `<audio controls src="${url}"></audio>`;
}
/** Main */
document.addEventListener("DOMContentLoaded", () => {
/** DOM elements */
const graphsContainer = document.getElementById("graphs");
const audioContainer = document.getElementById("audio");
/** Memory for drawing */
const graphs = {};
const history = {};
const current = {};
const active = {};
/** Instantiate */
const heyBuddy = new HeyBuddy(options);
/** Add callbacks */
// When processed, update state for next draw
heyBuddy.onProcessed((result) => {
current["frame budget"] = heyBuddy.frameTimeEma;
current["speech"] = result.speech.probability || 0.0;
active["speech"] = result.speech.active;
for (let wakeWord in result.wakeWords) {
current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability || 0.0;
active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active;
}
if (result.recording) {
audioContainer.innerHTML = "Recording&hellip;";
}
});
// When recording is complete, replace the audio element
heyBuddy.onRecording((audioSamples) => {
saveRecording(audioContainer, audioSamples);
});
/** Add graphs */
for (let graphName of ["wake words", "speech", "frame budget"]) {
// Create containers for the graph and its label
const graphContainer = document.createElement("div");
const graphLabel = document.createElement("label");
graphLabel.textContent = graphName;
// Create a canvas for the graph
const graphCanvas = document.createElement("canvas");
graphCanvas.className = "graph";
graphCanvas.width = canvasSize.width;
graphCanvas.height = canvasSize.height;
graphs[graphName] = graphCanvas;
// Add the canvas to the container and the container to the document
graphContainer.appendChild(graphCanvas);
graphContainer.appendChild(graphLabel);
graphsContainer.appendChild(graphContainer);
// If this is the wake-word graph, also add legend
if (graphName === "wake words") {
const graphLegend = document.createElement("div");
graphLegend.className = "legend";
for (let wakeWord of wakeWords) {
const legendItem = document.createElement("div");
const [r,g,b] = colors[wakeWord];
legendItem.style.color = `rgb(${r},${g},${b})`;
legendItem.textContent = wakeWord;
graphLegend.appendChild(legendItem);
}
graphLabel.appendChild(graphLegend);
}
}
/** Define draw loop */
const draw = () => {
// Draw speech and model graphs
for (let graphName in graphs) {
const isWakeWords = graphName === "wake words";
const isFrameBudget = graphName === "frame budget";
const subGraphs = isWakeWords ? wakeWords : [graphName];
let isFirst = true;
for (let name of subGraphs) {
// Update history
history[name] = history[name] || [];
if (isFrameBudget) {
history[name].push((current[name] || 0.0) / 120.0); // 120ms budget
} else {
history[name].push(current[name] || 0.0);
}
// Trim history
if (history[name].length > canvasSize.width) {
history[name] = history[name].slice(history[name].length - canvasSize.width);
}
// Draw graph
const canvas = graphs[graphName];
const ctx = canvas.getContext("2d");
const [r,g,b] = colors[name];
const opacity = isFrameBudget || active[name] ? 1.0 : 0.5;
if (isFirst) {
// Clear canvas on first draw
ctx.clearRect(0, 0, canvas.width, canvas.height);
isFirst = false;
}
ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`;
ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`;
ctx.lineWidth = graphLineWidth;
// Draw from left to right (the frame shifts right to left)
ctx.beginPath();
let lastX;
for (let i = 0; i < history[name].length; i++) {
const x = i;
const y = canvas.height - history[name][i] * canvas.height;
if (i === 0) {
ctx.moveTo(1, y);
} else {
ctx.lineTo(x, y);
}
lastX = x;
}
// extend downwards to make a polygon
ctx.lineTo(lastX, canvas.height);
ctx.lineTo(0, canvas.height);
ctx.closePath();
ctx.fill();
ctx.stroke();
}
}
// Request next frame
requestAnimationFrame(draw);
};
/** Start the loop */
requestAnimationFrame(draw);
});