Spaces:
Running
Running
/** Configuration */ | |
const colors = { | |
"buddy": [0,119,187], | |
"hey buddy": [51,187,238], | |
"hi buddy": [0,153,136], | |
"sup buddy": [238,119,51], | |
"yo buddy": [204,51,17], | |
"okay buddy": [238,51,119], | |
"speech": [22,200,206], | |
"frame budget": [25,255,25] | |
}; | |
const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy"]; | |
const canvasSize = { width: 640, height: 100 }; | |
const graphLineWidth = 1; | |
const options = { | |
debug: true, | |
modelPath: wakeWords.map((word) => `https://huggingface.co/benjamin-paine/hey-buddy/resolve/main/models/${word.replace(' ', '-')}.onnx`) | |
}; | |
/** Helper method for conversion */ | |
const float32ToWavBlob = (audioData, sampleRate, numChannels = 1) => { | |
// Helper to write a string to the DataView | |
const writeString = (view, offset, string) => { | |
for (let i = 0; i < string.length; i++) { | |
view.setUint8(offset + i, string.charCodeAt(i)); | |
} | |
}; | |
// Helper to convert Float32Array to Int16Array (16-bit PCM) | |
const floatTo16BitPCM = (output, offset, input) => { | |
for (let i = 0; i < input.length; i++, offset += 2) { | |
let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1] | |
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM | |
} | |
}; | |
const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample | |
// Calculate sizes | |
const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio | |
const wavHeaderSize = 44; | |
const dataLength = audioData.length * numChannels * 2; // 16-bit PCM data length | |
const buffer = new ArrayBuffer(wavHeaderSize + dataLength); | |
const view = new DataView(buffer); | |
// Write WAV file headers | |
writeString(view, 0, 'RIFF'); // ChunkID | |
view.setUint32(4, 36 + dataLength, true); // ChunkSize | |
writeString(view, 8, 'WAVE'); // Format | |
writeString(view, 12, 'fmt '); // Subchunk1ID | |
view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16) | |
view.setUint16(20, 1, true); // AudioFormat (PCM = 1) | |
view.setUint16(22, numChannels, true); // NumChannels | |
view.setUint32(24, sampleRate, true); // SampleRate | |
view.setUint32(28, byteRate, true); // ByteRate | |
view.setUint16(32, blockAlign, true); // BlockAlign | |
view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM) | |
writeString(view, 36, 'data'); // Subchunk2ID | |
view.setUint32(40, dataLength, true); // Subchunk2Size | |
// Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView | |
floatTo16BitPCM(view, wavHeaderSize, audioData); | |
// Create and return the Blob | |
return new Blob([view], { type: 'audio/wav' }); | |
} | |
/** Helper method for turning the audio samples into an audio element */ | |
const saveRecording = (audioContainer, audioSamples, sampleRate = 16000) => { | |
const blob = float32ToWavBlob(audioSamples, sampleRate); | |
const url = URL.createObjectURL(blob); | |
audioContainer.innerHTML = `<audio controls src="${url}"></audio>`; | |
} | |
/** Main */ | |
document.addEventListener("DOMContentLoaded", () => { | |
/** DOM elements */ | |
const graphsContainer = document.getElementById("graphs"); | |
const audioContainer = document.getElementById("audio"); | |
/** Memory for drawing */ | |
const graphs = {}; | |
const history = {}; | |
const current = {}; | |
const active = {}; | |
/** Instantiate */ | |
const heyBuddy = new HeyBuddy(options); | |
/** Add callbacks */ | |
// When processed, update state for next draw | |
heyBuddy.onProcessed((result) => { | |
current["frame budget"] = heyBuddy.frameTimeEma; | |
current["speech"] = result.speech.probability || 0.0; | |
active["speech"] = result.speech.active; | |
for (let wakeWord in result.wakeWords) { | |
current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability || 0.0; | |
active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active; | |
} | |
if (result.recording) { | |
audioContainer.innerHTML = "Recording…"; | |
} | |
}); | |
// When recording is complete, replace the audio element | |
heyBuddy.onRecording((audioSamples) => { | |
saveRecording(audioContainer, audioSamples); | |
}); | |
/** Add graphs */ | |
for (let graphName of ["wake words", "speech", "frame budget"]) { | |
// Create containers for the graph and its label | |
const graphContainer = document.createElement("div"); | |
const graphLabel = document.createElement("label"); | |
graphLabel.textContent = graphName; | |
// Create a canvas for the graph | |
const graphCanvas = document.createElement("canvas"); | |
graphCanvas.className = "graph"; | |
graphCanvas.width = canvasSize.width; | |
graphCanvas.height = canvasSize.height; | |
graphs[graphName] = graphCanvas; | |
// Add the canvas to the container and the container to the document | |
graphContainer.appendChild(graphCanvas); | |
graphContainer.appendChild(graphLabel); | |
graphsContainer.appendChild(graphContainer); | |
// If this is the wake-word graph, also add legend | |
if (graphName === "wake words") { | |
const graphLegend = document.createElement("div"); | |
graphLegend.className = "legend"; | |
for (let wakeWord of wakeWords) { | |
const legendItem = document.createElement("div"); | |
const [r,g,b] = colors[wakeWord]; | |
legendItem.style.color = `rgb(${r},${g},${b})`; | |
legendItem.textContent = wakeWord; | |
graphLegend.appendChild(legendItem); | |
} | |
graphLabel.appendChild(graphLegend); | |
} | |
} | |
/** Define draw loop */ | |
const draw = () => { | |
// Draw speech and model graphs | |
for (let graphName in graphs) { | |
const isWakeWords = graphName === "wake words"; | |
const isFrameBudget = graphName === "frame budget"; | |
const subGraphs = isWakeWords ? wakeWords : [graphName]; | |
let isFirst = true; | |
for (let name of subGraphs) { | |
// Update history | |
history[name] = history[name] || []; | |
if (isFrameBudget) { | |
history[name].push((current[name] || 0.0) / 120.0); // 120ms budget | |
} else { | |
history[name].push(current[name] || 0.0); | |
} | |
// Trim history | |
if (history[name].length > canvasSize.width) { | |
history[name] = history[name].slice(history[name].length - canvasSize.width); | |
} | |
// Draw graph | |
const canvas = graphs[graphName]; | |
const ctx = canvas.getContext("2d"); | |
const [r,g,b] = colors[name]; | |
const opacity = isFrameBudget || active[name] ? 1.0 : 0.5; | |
if (isFirst) { | |
// Clear canvas on first draw | |
ctx.clearRect(0, 0, canvas.width, canvas.height); | |
isFirst = false; | |
} | |
ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`; | |
ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`; | |
ctx.lineWidth = graphLineWidth; | |
// Draw from left to right (the frame shifts right to left) | |
ctx.beginPath(); | |
let lastX; | |
for (let i = 0; i < history[name].length; i++) { | |
const x = i; | |
const y = canvas.height - history[name][i] * canvas.height; | |
if (i === 0) { | |
ctx.moveTo(1, y); | |
} else { | |
ctx.lineTo(x, y); | |
} | |
lastX = x; | |
} | |
// extend downwards to make a polygon | |
ctx.lineTo(lastX, canvas.height); | |
ctx.lineTo(0, canvas.height); | |
ctx.closePath(); | |
ctx.fill(); | |
ctx.stroke(); | |
} | |
} | |
// Request next frame | |
requestAnimationFrame(draw); | |
}; | |
/** Start the loop */ | |
requestAnimationFrame(draw); | |
}); | |