Spaces:
Running
Running
Update index.html
Browse files- index.html +78 -164
index.html
CHANGED
|
@@ -182,157 +182,33 @@
|
|
| 182 |
let myvad;
|
| 183 |
let sttPipeline;
|
| 184 |
let ttsPipeline;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
let isListening = false;
|
|
|
|
| 186 |
let isSpeaking = false;
|
|
|
|
| 187 |
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
this.userIsSpeaking = false;
|
| 195 |
-
}
|
| 196 |
-
|
| 197 |
-
async start() {
|
| 198 |
-
const constraints = { audio: true };
|
| 199 |
-
this.stream = await navigator.mediaDevices.getUserMedia(constraints);
|
| 200 |
-
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
| 201 |
-
sampleRate: 24000,
|
| 202 |
-
});
|
| 203 |
-
const source = this.audioContext.createMediaStreamSource(this.stream);
|
| 204 |
-
this.processor = this.audioContext.createScriptProcessor(1024, 1, 1);
|
| 205 |
-
this.processor.onaudioprocess = (event) => {
|
| 206 |
-
const inputData = event.inputBuffer.getChannelData(0);
|
| 207 |
-
const gain = this.userIsSpeaking ? 1 : 0.2;
|
| 208 |
-
const processedData = inputData.map(sample => sample * gain);
|
| 209 |
-
this.onAudioProcess(processedData);
|
| 210 |
-
};
|
| 211 |
-
source.connect(this.processor);
|
| 212 |
-
this.processor.connect(this.audioContext.destination);
|
| 213 |
-
}
|
| 214 |
-
|
| 215 |
-
stop() {
|
| 216 |
-
this.stream.getTracks().forEach(track => track.stop());
|
| 217 |
-
this.processor.disconnect();
|
| 218 |
-
this.audioContext.close();
|
| 219 |
-
}
|
| 220 |
-
|
| 221 |
-
setUserIsSpeaking(isSpeaking) {
|
| 222 |
-
this.userIsSpeaking = isSpeaking;
|
| 223 |
-
}
|
| 224 |
-
}
|
| 225 |
-
|
| 226 |
-
class Playback {
|
| 227 |
-
constructor(onPlaybackStart, onPlaybackEnd) {
|
| 228 |
-
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
| 229 |
-
sampleRate: 24000,
|
| 230 |
-
});
|
| 231 |
-
this.samples = [];
|
| 232 |
-
this.scriptNode = this.audioContext.createScriptProcessor(1024, 1, 1);
|
| 233 |
-
this.gainNode = this.audioContext.createGain();
|
| 234 |
-
this.gainNode.gain.value = 0.5;
|
| 235 |
-
this.isPlaying = false;
|
| 236 |
-
this.onPlaybackStart = onPlaybackStart;
|
| 237 |
-
this.onPlaybackEnd = onPlaybackEnd;
|
| 238 |
-
this.currentBuffer = new Float32Array(1024);
|
| 239 |
-
this.currentBufferIndex = 0;
|
| 240 |
-
|
| 241 |
-
this.scriptNode.onaudioprocess = (event) => {
|
| 242 |
-
const outputBuffer = event.outputBuffer.getChannelData(0);
|
| 243 |
-
|
| 244 |
-
if (this.samples.length > 0 || this.currentBufferIndex < this.currentBuffer.length) {
|
| 245 |
-
if (!this.isPlaying) {
|
| 246 |
-
this.isPlaying = true;
|
| 247 |
-
this.onPlaybackStart();
|
| 248 |
-
}
|
| 249 |
-
|
| 250 |
-
for (let i = 0; i < outputBuffer.length; i++) {
|
| 251 |
-
if (this.currentBufferIndex >= this.currentBuffer.length) {
|
| 252 |
-
if (this.samples.length > 0) {
|
| 253 |
-
this.currentBuffer = this.samples.shift();
|
| 254 |
-
this.currentBufferIndex = 0;
|
| 255 |
-
} else {
|
| 256 |
-
outputBuffer[i] = 0;
|
| 257 |
-
continue;
|
| 258 |
-
}
|
| 259 |
-
}
|
| 260 |
-
outputBuffer[i] = this.currentBuffer[this.currentBufferIndex++];
|
| 261 |
-
}
|
| 262 |
-
} else {
|
| 263 |
-
if (this.isPlaying) {
|
| 264 |
-
this.isPlaying = false;
|
| 265 |
-
this.onPlaybackEnd();
|
| 266 |
-
}
|
| 267 |
-
outputBuffer.fill(0);
|
| 268 |
-
}
|
| 269 |
-
};
|
| 270 |
-
|
| 271 |
-
this.scriptNode.connect(this.gainNode);
|
| 272 |
-
this.gainNode.connect(this.audioContext.destination);
|
| 273 |
-
}
|
| 274 |
-
|
| 275 |
-
start() {
|
| 276 |
-
this.audioContext.resume();
|
| 277 |
-
}
|
| 278 |
-
|
| 279 |
-
stop() {
|
| 280 |
-
this.audioContext.suspend();
|
| 281 |
-
this.samples = [];
|
| 282 |
-
this.currentBuffer = new Float32Array(1024);
|
| 283 |
-
this.currentBufferIndex = 0;
|
| 284 |
-
this.isPlaying = false;
|
| 285 |
-
}
|
| 286 |
-
|
| 287 |
-
addSamples(samples) {
|
| 288 |
-
for (let i = 0; i < samples.length; i += 1024) {
|
| 289 |
-
const chunk = samples.slice(i, i + 1024);
|
| 290 |
-
if (chunk.length < 1024) {
|
| 291 |
-
const paddedChunk = new Float32Array(1024);
|
| 292 |
-
paddedChunk.set(chunk);
|
| 293 |
-
this.samples.push(paddedChunk);
|
| 294 |
-
} else {
|
| 295 |
-
this.samples.push(chunk);
|
| 296 |
-
}
|
| 297 |
-
}
|
| 298 |
-
}
|
| 299 |
-
|
| 300 |
-
clear() {
|
| 301 |
-
this.samples = [];
|
| 302 |
-
this.currentBuffer = new Float32Array(1024);
|
| 303 |
-
this.currentBufferIndex = 0;
|
| 304 |
-
this.isPlaying = false;
|
| 305 |
-
}
|
| 306 |
-
}
|
| 307 |
-
|
| 308 |
-
const streamer = new Streamer(processMicrophoneInput);
|
| 309 |
-
const playback = new Playback(
|
| 310 |
-
() => {
|
| 311 |
-
addLog("Bot started speaking");
|
| 312 |
-
isSpeaking = true;
|
| 313 |
-
},
|
| 314 |
-
() => {
|
| 315 |
-
addLog("Bot finished speaking");
|
| 316 |
-
isSpeaking = false;
|
| 317 |
}
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
function processMicrophoneInput(audioData) {
|
| 321 |
-
updateVisualizer(audioData);
|
| 322 |
}
|
| 323 |
|
| 324 |
-
function updateVisualizer(
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
for (let i = 0; i < bufferLength; i++) {
|
| 330 |
-
const barHeight = Math.abs(audioData[i]) * 100;
|
| 331 |
-
if (bars[i]) {
|
| 332 |
-
bars[i].style.height = barHeight + '%';
|
| 333 |
-
bars[i].style.width = barWidth + 'px';
|
| 334 |
-
}
|
| 335 |
}
|
|
|
|
| 336 |
}
|
| 337 |
|
| 338 |
async function initializePipelines() {
|
|
@@ -363,8 +239,10 @@
|
|
| 363 |
const botResponse = `I heard you say: "${transcription.text}".`;
|
| 364 |
addLog(`Bot: ${botResponse}`);
|
| 365 |
|
|
|
|
| 366 |
const speechOutput = await ttsPipeline(botResponse);
|
| 367 |
-
|
|
|
|
| 368 |
} catch (error) {
|
| 369 |
console.error('Error processing speech:', error);
|
| 370 |
addLog('System: Error processing speech. Please try again.');
|
|
@@ -381,6 +259,32 @@
|
|
| 381 |
logsDiv.scrollTop = logsDiv.scrollHeight;
|
| 382 |
}
|
| 383 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
async function toggleListening() {
|
| 385 |
if (isListening) {
|
| 386 |
await stopListening();
|
|
@@ -391,28 +295,35 @@
|
|
| 391 |
|
| 392 |
async function startListening() {
|
| 393 |
try {
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
|
|
|
| 397 |
|
| 398 |
myvad = await vad.MicVAD.new({
|
| 399 |
onSpeechStart: () => {
|
| 400 |
addLog('--- vad: speech start');
|
| 401 |
-
|
| 402 |
if (isSpeaking) {
|
| 403 |
addLog('User interrupted. Stopping bot speech.');
|
| 404 |
-
|
|
|
|
| 405 |
}
|
| 406 |
},
|
| 407 |
onSpeechEnd: (audio) => {
|
| 408 |
addLog('--- vad: speech end');
|
| 409 |
-
|
| 410 |
processSpeech(audio);
|
| 411 |
}
|
| 412 |
});
|
| 413 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
await myvad.start();
|
| 415 |
startButton.textContent = 'End Call';
|
|
|
|
| 416 |
addLog('System: Listening...');
|
| 417 |
} catch (error) {
|
| 418 |
console.error('Error starting VAD:', error);
|
|
@@ -422,14 +333,26 @@
|
|
| 422 |
|
| 423 |
async function stopListening() {
|
| 424 |
if (myvad) {
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
myvad = null;
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
startButton.textContent = 'Begin Call';
|
| 431 |
isListening = false;
|
| 432 |
addLog('System: Stopped listening.');
|
|
|
|
| 433 |
addLog('System: Microphone closed');
|
| 434 |
}
|
| 435 |
|
|
@@ -438,15 +361,6 @@
|
|
| 438 |
logsDiv.innerHTML = '';
|
| 439 |
});
|
| 440 |
|
| 441 |
-
function createVisualizer() {
|
| 442 |
-
const barCount = 64;
|
| 443 |
-
for (let i = 0; i < barCount; i++) {
|
| 444 |
-
const bar = document.createElement('div');
|
| 445 |
-
bar.className = 'bar';
|
| 446 |
-
visualizer.appendChild(bar);
|
| 447 |
-
}
|
| 448 |
-
}
|
| 449 |
-
|
| 450 |
createVisualizer();
|
| 451 |
initializePipelines();
|
| 452 |
</script>
|
|
|
|
| 182 |
let myvad;
|
| 183 |
let sttPipeline;
|
| 184 |
let ttsPipeline;
|
| 185 |
+
let audioContext;
|
| 186 |
+
let analyser;
|
| 187 |
+
let dataArray;
|
| 188 |
+
let bars;
|
| 189 |
+
let animationId;
|
| 190 |
let isListening = false;
|
| 191 |
+
let microphoneStream;
|
| 192 |
let isSpeaking = false;
|
| 193 |
+
let currentAudioSource = null;
|
| 194 |
|
| 195 |
+
function createVisualizer() {
|
| 196 |
+
const barCount = 64;
|
| 197 |
+
for (let i = 0; i < barCount; i++) {
|
| 198 |
+
const bar = document.createElement('div');
|
| 199 |
+
bar.className = 'bar';
|
| 200 |
+
visualizer.appendChild(bar);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
}
|
| 202 |
+
bars = visualizer.getElementsByClassName('bar');
|
|
|
|
|
|
|
|
|
|
| 203 |
}
|
| 204 |
|
| 205 |
+
function updateVisualizer() {
|
| 206 |
+
analyser.getByteFrequencyData(dataArray);
|
| 207 |
+
for (let i = 0; i < bars.length; i++) {
|
| 208 |
+
const barHeight = dataArray[i] / 2;
|
| 209 |
+
bars[i].style.height = barHeight + 'px';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
}
|
| 211 |
+
animationId = requestAnimationFrame(updateVisualizer);
|
| 212 |
}
|
| 213 |
|
| 214 |
async function initializePipelines() {
|
|
|
|
| 239 |
const botResponse = `I heard you say: "${transcription.text}".`;
|
| 240 |
addLog(`Bot: ${botResponse}`);
|
| 241 |
|
| 242 |
+
isSpeaking = true;
|
| 243 |
const speechOutput = await ttsPipeline(botResponse);
|
| 244 |
+
await playAudio(speechOutput.audio);
|
| 245 |
+
isSpeaking = false;
|
| 246 |
} catch (error) {
|
| 247 |
console.error('Error processing speech:', error);
|
| 248 |
addLog('System: Error processing speech. Please try again.');
|
|
|
|
| 259 |
logsDiv.scrollTop = logsDiv.scrollHeight;
|
| 260 |
}
|
| 261 |
|
| 262 |
+
function playAudio(audioArray) {
|
| 263 |
+
return new Promise((resolve) => {
|
| 264 |
+
const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
|
| 265 |
+
const channelData = audioBuffer.getChannelData(0);
|
| 266 |
+
channelData.set(audioArray);
|
| 267 |
+
|
| 268 |
+
const source = audioContext.createBufferSource();
|
| 269 |
+
currentAudioSource = source; // Store the current audio source
|
| 270 |
+
source.buffer = audioBuffer;
|
| 271 |
+
source.connect(analyser);
|
| 272 |
+
analyser.connect(audioContext.destination);
|
| 273 |
+
source.start();
|
| 274 |
+
source.onended = () => {
|
| 275 |
+
currentAudioSource = null;
|
| 276 |
+
resolve();
|
| 277 |
+
};
|
| 278 |
+
});
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
function stopCurrentAudio() {
|
| 282 |
+
if (currentAudioSource) {
|
| 283 |
+
currentAudioSource.stop();
|
| 284 |
+
currentAudioSource = null;
|
| 285 |
+
}
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
async function toggleListening() {
|
| 289 |
if (isListening) {
|
| 290 |
await stopListening();
|
|
|
|
| 295 |
|
| 296 |
async function startListening() {
|
| 297 |
try {
|
| 298 |
+
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
| 299 |
+
analyser = audioContext.createAnalyser();
|
| 300 |
+
analyser.fftSize = 128;
|
| 301 |
+
dataArray = new Uint8Array(analyser.frequencyBinCount);
|
| 302 |
|
| 303 |
myvad = await vad.MicVAD.new({
|
| 304 |
onSpeechStart: () => {
|
| 305 |
addLog('--- vad: speech start');
|
| 306 |
+
updateVisualizer();
|
| 307 |
if (isSpeaking) {
|
| 308 |
addLog('User interrupted. Stopping bot speech.');
|
| 309 |
+
stopCurrentAudio();
|
| 310 |
+
isSpeaking = false;
|
| 311 |
}
|
| 312 |
},
|
| 313 |
onSpeechEnd: (audio) => {
|
| 314 |
addLog('--- vad: speech end');
|
| 315 |
+
cancelAnimationFrame(animationId);
|
| 316 |
processSpeech(audio);
|
| 317 |
}
|
| 318 |
});
|
| 319 |
|
| 320 |
+
microphoneStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
| 321 |
+
const source = audioContext.createMediaStreamSource(microphoneStream);
|
| 322 |
+
source.connect(analyser);
|
| 323 |
+
|
| 324 |
await myvad.start();
|
| 325 |
startButton.textContent = 'End Call';
|
| 326 |
+
isListening = true;
|
| 327 |
addLog('System: Listening...');
|
| 328 |
} catch (error) {
|
| 329 |
console.error('Error starting VAD:', error);
|
|
|
|
| 333 |
|
| 334 |
async function stopListening() {
|
| 335 |
if (myvad) {
|
| 336 |
+
try {
|
| 337 |
+
await myvad.destroy();
|
| 338 |
+
} catch (error) {
|
| 339 |
+
console.error('Error stopping VAD:', error);
|
| 340 |
+
}
|
| 341 |
myvad = null;
|
| 342 |
+
}
|
| 343 |
+
if (microphoneStream) {
|
| 344 |
+
microphoneStream.getTracks().forEach(track => track.stop());
|
| 345 |
+
microphoneStream = null;
|
| 346 |
+
}
|
| 347 |
+
if (audioContext) {
|
| 348 |
+
await audioContext.close();
|
| 349 |
+
audioContext = null;
|
| 350 |
+
}
|
| 351 |
+
stopCurrentAudio();
|
| 352 |
startButton.textContent = 'Begin Call';
|
| 353 |
isListening = false;
|
| 354 |
addLog('System: Stopped listening.');
|
| 355 |
+
cancelAnimationFrame(animationId);
|
| 356 |
addLog('System: Microphone closed');
|
| 357 |
}
|
| 358 |
|
|
|
|
| 361 |
logsDiv.innerHTML = '';
|
| 362 |
});
|
| 363 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
createVisualizer();
|
| 365 |
initializePipelines();
|
| 366 |
</script>
|