atlury commited on
Commit
02408e1
·
verified ·
1 Parent(s): c9c1733

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +78 -164
index.html CHANGED
@@ -182,157 +182,33 @@
182
  let myvad;
183
  let sttPipeline;
184
  let ttsPipeline;
 
 
 
 
 
185
  let isListening = false;
 
186
  let isSpeaking = false;
 
187
 
188
- class Streamer {
189
- constructor(onAudioProcess) {
190
- this.stream = null;
191
- this.processor = null;
192
- this.audioContext = null;
193
- this.onAudioProcess = onAudioProcess;
194
- this.userIsSpeaking = false;
195
- }
196
-
197
- async start() {
198
- const constraints = { audio: true };
199
- this.stream = await navigator.mediaDevices.getUserMedia(constraints);
200
- this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
201
- sampleRate: 24000,
202
- });
203
- const source = this.audioContext.createMediaStreamSource(this.stream);
204
- this.processor = this.audioContext.createScriptProcessor(1024, 1, 1);
205
- this.processor.onaudioprocess = (event) => {
206
- const inputData = event.inputBuffer.getChannelData(0);
207
- const gain = this.userIsSpeaking ? 1 : 0.2;
208
- const processedData = inputData.map(sample => sample * gain);
209
- this.onAudioProcess(processedData);
210
- };
211
- source.connect(this.processor);
212
- this.processor.connect(this.audioContext.destination);
213
- }
214
-
215
- stop() {
216
- this.stream.getTracks().forEach(track => track.stop());
217
- this.processor.disconnect();
218
- this.audioContext.close();
219
- }
220
-
221
- setUserIsSpeaking(isSpeaking) {
222
- this.userIsSpeaking = isSpeaking;
223
- }
224
- }
225
-
226
- class Playback {
227
- constructor(onPlaybackStart, onPlaybackEnd) {
228
- this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
229
- sampleRate: 24000,
230
- });
231
- this.samples = [];
232
- this.scriptNode = this.audioContext.createScriptProcessor(1024, 1, 1);
233
- this.gainNode = this.audioContext.createGain();
234
- this.gainNode.gain.value = 0.5;
235
- this.isPlaying = false;
236
- this.onPlaybackStart = onPlaybackStart;
237
- this.onPlaybackEnd = onPlaybackEnd;
238
- this.currentBuffer = new Float32Array(1024);
239
- this.currentBufferIndex = 0;
240
-
241
- this.scriptNode.onaudioprocess = (event) => {
242
- const outputBuffer = event.outputBuffer.getChannelData(0);
243
-
244
- if (this.samples.length > 0 || this.currentBufferIndex < this.currentBuffer.length) {
245
- if (!this.isPlaying) {
246
- this.isPlaying = true;
247
- this.onPlaybackStart();
248
- }
249
-
250
- for (let i = 0; i < outputBuffer.length; i++) {
251
- if (this.currentBufferIndex >= this.currentBuffer.length) {
252
- if (this.samples.length > 0) {
253
- this.currentBuffer = this.samples.shift();
254
- this.currentBufferIndex = 0;
255
- } else {
256
- outputBuffer[i] = 0;
257
- continue;
258
- }
259
- }
260
- outputBuffer[i] = this.currentBuffer[this.currentBufferIndex++];
261
- }
262
- } else {
263
- if (this.isPlaying) {
264
- this.isPlaying = false;
265
- this.onPlaybackEnd();
266
- }
267
- outputBuffer.fill(0);
268
- }
269
- };
270
-
271
- this.scriptNode.connect(this.gainNode);
272
- this.gainNode.connect(this.audioContext.destination);
273
- }
274
-
275
- start() {
276
- this.audioContext.resume();
277
- }
278
-
279
- stop() {
280
- this.audioContext.suspend();
281
- this.samples = [];
282
- this.currentBuffer = new Float32Array(1024);
283
- this.currentBufferIndex = 0;
284
- this.isPlaying = false;
285
- }
286
-
287
- addSamples(samples) {
288
- for (let i = 0; i < samples.length; i += 1024) {
289
- const chunk = samples.slice(i, i + 1024);
290
- if (chunk.length < 1024) {
291
- const paddedChunk = new Float32Array(1024);
292
- paddedChunk.set(chunk);
293
- this.samples.push(paddedChunk);
294
- } else {
295
- this.samples.push(chunk);
296
- }
297
- }
298
- }
299
-
300
- clear() {
301
- this.samples = [];
302
- this.currentBuffer = new Float32Array(1024);
303
- this.currentBufferIndex = 0;
304
- this.isPlaying = false;
305
- }
306
- }
307
-
308
- const streamer = new Streamer(processMicrophoneInput);
309
- const playback = new Playback(
310
- () => {
311
- addLog("Bot started speaking");
312
- isSpeaking = true;
313
- },
314
- () => {
315
- addLog("Bot finished speaking");
316
- isSpeaking = false;
317
  }
318
- );
319
-
320
- function processMicrophoneInput(audioData) {
321
- updateVisualizer(audioData);
322
  }
323
 
324
- function updateVisualizer(audioData) {
325
- const bars = visualizer.getElementsByClassName('bar');
326
- const bufferLength = audioData.length;
327
- const barWidth = visualizer.clientWidth / bufferLength;
328
-
329
- for (let i = 0; i < bufferLength; i++) {
330
- const barHeight = Math.abs(audioData[i]) * 100;
331
- if (bars[i]) {
332
- bars[i].style.height = barHeight + '%';
333
- bars[i].style.width = barWidth + 'px';
334
- }
335
  }
 
336
  }
337
 
338
  async function initializePipelines() {
@@ -363,8 +239,10 @@
363
  const botResponse = `I heard you say: "${transcription.text}".`;
364
  addLog(`Bot: ${botResponse}`);
365
 
 
366
  const speechOutput = await ttsPipeline(botResponse);
367
- playback.addSamples(new Float32Array(speechOutput.audio));
 
368
  } catch (error) {
369
  console.error('Error processing speech:', error);
370
  addLog('System: Error processing speech. Please try again.');
@@ -381,6 +259,32 @@
381
  logsDiv.scrollTop = logsDiv.scrollHeight;
382
  }
383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  async function toggleListening() {
385
  if (isListening) {
386
  await stopListening();
@@ -391,28 +295,35 @@
391
 
392
  async function startListening() {
393
  try {
394
- isListening = true;
395
- await streamer.start();
396
- playback.start();
 
397
 
398
  myvad = await vad.MicVAD.new({
399
  onSpeechStart: () => {
400
  addLog('--- vad: speech start');
401
- streamer.setUserIsSpeaking(true);
402
  if (isSpeaking) {
403
  addLog('User interrupted. Stopping bot speech.');
404
- playback.clear();
 
405
  }
406
  },
407
  onSpeechEnd: (audio) => {
408
  addLog('--- vad: speech end');
409
- streamer.setUserIsSpeaking(false);
410
  processSpeech(audio);
411
  }
412
  });
413
 
 
 
 
 
414
  await myvad.start();
415
  startButton.textContent = 'End Call';
 
416
  addLog('System: Listening...');
417
  } catch (error) {
418
  console.error('Error starting VAD:', error);
@@ -422,14 +333,26 @@
422
 
423
  async function stopListening() {
424
  if (myvad) {
425
- await myvad.destroy();
 
 
 
 
426
  myvad = null;
427
- }
428
- streamer.stop();
429
- playback.stop();
 
 
 
 
 
 
 
430
  startButton.textContent = 'Begin Call';
431
  isListening = false;
432
  addLog('System: Stopped listening.');
 
433
  addLog('System: Microphone closed');
434
  }
435
 
@@ -438,15 +361,6 @@
438
  logsDiv.innerHTML = '';
439
  });
440
 
441
- function createVisualizer() {
442
- const barCount = 64;
443
- for (let i = 0; i < barCount; i++) {
444
- const bar = document.createElement('div');
445
- bar.className = 'bar';
446
- visualizer.appendChild(bar);
447
- }
448
- }
449
-
450
  createVisualizer();
451
  initializePipelines();
452
  </script>
 
182
  let myvad;
183
  let sttPipeline;
184
  let ttsPipeline;
185
+ let audioContext;
186
+ let analyser;
187
+ let dataArray;
188
+ let bars;
189
+ let animationId;
190
  let isListening = false;
191
+ let microphoneStream;
192
  let isSpeaking = false;
193
+ let currentAudioSource = null;
194
 
195
+ function createVisualizer() {
196
+ const barCount = 64;
197
+ for (let i = 0; i < barCount; i++) {
198
+ const bar = document.createElement('div');
199
+ bar.className = 'bar';
200
+ visualizer.appendChild(bar);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
+ bars = visualizer.getElementsByClassName('bar');
 
 
 
203
  }
204
 
205
+ function updateVisualizer() {
206
+ analyser.getByteFrequencyData(dataArray);
207
+ for (let i = 0; i < bars.length; i++) {
208
+ const barHeight = dataArray[i] / 2;
209
+ bars[i].style.height = barHeight + 'px';
 
 
 
 
 
 
210
  }
211
+ animationId = requestAnimationFrame(updateVisualizer);
212
  }
213
 
214
  async function initializePipelines() {
 
239
  const botResponse = `I heard you say: "${transcription.text}".`;
240
  addLog(`Bot: ${botResponse}`);
241
 
242
+ isSpeaking = true;
243
  const speechOutput = await ttsPipeline(botResponse);
244
+ await playAudio(speechOutput.audio);
245
+ isSpeaking = false;
246
  } catch (error) {
247
  console.error('Error processing speech:', error);
248
  addLog('System: Error processing speech. Please try again.');
 
259
  logsDiv.scrollTop = logsDiv.scrollHeight;
260
  }
261
 
262
+ function playAudio(audioArray) {
263
+ return new Promise((resolve) => {
264
+ const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
265
+ const channelData = audioBuffer.getChannelData(0);
266
+ channelData.set(audioArray);
267
+
268
+ const source = audioContext.createBufferSource();
269
+ currentAudioSource = source; // Store the current audio source
270
+ source.buffer = audioBuffer;
271
+ source.connect(analyser);
272
+ analyser.connect(audioContext.destination);
273
+ source.start();
274
+ source.onended = () => {
275
+ currentAudioSource = null;
276
+ resolve();
277
+ };
278
+ });
279
+ }
280
+
281
+ function stopCurrentAudio() {
282
+ if (currentAudioSource) {
283
+ currentAudioSource.stop();
284
+ currentAudioSource = null;
285
+ }
286
+ }
287
+
288
  async function toggleListening() {
289
  if (isListening) {
290
  await stopListening();
 
295
 
296
  async function startListening() {
297
  try {
298
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
299
+ analyser = audioContext.createAnalyser();
300
+ analyser.fftSize = 128;
301
+ dataArray = new Uint8Array(analyser.frequencyBinCount);
302
 
303
  myvad = await vad.MicVAD.new({
304
  onSpeechStart: () => {
305
  addLog('--- vad: speech start');
306
+ updateVisualizer();
307
  if (isSpeaking) {
308
  addLog('User interrupted. Stopping bot speech.');
309
+ stopCurrentAudio();
310
+ isSpeaking = false;
311
  }
312
  },
313
  onSpeechEnd: (audio) => {
314
  addLog('--- vad: speech end');
315
+ cancelAnimationFrame(animationId);
316
  processSpeech(audio);
317
  }
318
  });
319
 
320
+ microphoneStream = await navigator.mediaDevices.getUserMedia({ audio: true });
321
+ const source = audioContext.createMediaStreamSource(microphoneStream);
322
+ source.connect(analyser);
323
+
324
  await myvad.start();
325
  startButton.textContent = 'End Call';
326
+ isListening = true;
327
  addLog('System: Listening...');
328
  } catch (error) {
329
  console.error('Error starting VAD:', error);
 
333
 
334
  async function stopListening() {
335
  if (myvad) {
336
+ try {
337
+ await myvad.destroy();
338
+ } catch (error) {
339
+ console.error('Error stopping VAD:', error);
340
+ }
341
  myvad = null;
342
+ }
343
+ if (microphoneStream) {
344
+ microphoneStream.getTracks().forEach(track => track.stop());
345
+ microphoneStream = null;
346
+ }
347
+ if (audioContext) {
348
+ await audioContext.close();
349
+ audioContext = null;
350
+ }
351
+ stopCurrentAudio();
352
  startButton.textContent = 'Begin Call';
353
  isListening = false;
354
  addLog('System: Stopped listening.');
355
+ cancelAnimationFrame(animationId);
356
  addLog('System: Microphone closed');
357
  }
358
 
 
361
  logsDiv.innerHTML = '';
362
  });
363
 
 
 
 
 
 
 
 
 
 
364
  createVisualizer();
365
  initializePipelines();
366
  </script>