atlury commited on
Commit
2de9418
·
verified ·
1 Parent(s): ea5620d

Create index.backup4.html

Browse files
Files changed (1) hide show
  1. index.backup4.html +436 -0
index.backup4.html ADDED
@@ -0,0 +1,436 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Voice Chat Bot with Advanced Echo Cancellation</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
8
+ <script src="https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.18/dist/bundle.min.js"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2"></script>
10
+ <style>
11
+ body {
12
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
13
+ margin: 0;
14
+ padding: 20px;
15
+ background-color: #1a1a1a;
16
+ color: #f0f0f0;
17
+ }
18
+ .container {
19
+ max-width: 800px;
20
+ margin: 0 auto;
21
+ }
22
+ h1 {
23
+ color: #ffd700;
24
+ text-align: center;
25
+ margin-bottom: 10px;
26
+ }
27
+ .subtitle {
28
+ text-align: center;
29
+ color: #ffd700;
30
+ margin-bottom: 20px;
31
+ }
32
+ #chat-container {
33
+ display: flex;
34
+ flex-direction: column;
35
+ height: 70vh;
36
+ }
37
+ #conversation {
38
+ flex-grow: 1;
39
+ border: 1px solid #444;
40
+ padding: 10px;
41
+ overflow-y: scroll;
42
+ background-color: #2a2a2a;
43
+ border-radius: 5px;
44
+ margin-bottom: 20px;
45
+ }
46
+ #controls {
47
+ display: flex;
48
+ justify-content: center;
49
+ margin-bottom: 20px;
50
+ }
51
+ button {
52
+ font-size: 18px;
53
+ padding: 10px 20px;
54
+ background-color: #ffd700;
55
+ color: #1a1a1a;
56
+ border: none;
57
+ border-radius: 5px;
58
+ cursor: pointer;
59
+ transition: background-color 0.3s;
60
+ }
61
+ button:hover {
62
+ background-color: #ffec8b;
63
+ }
64
+ button:disabled {
65
+ background-color: #666;
66
+ cursor: not-allowed;
67
+ }
68
+ #visualizer {
69
+ width: 100%;
70
+ height: 100px;
71
+ background-color: #2a2a2a;
72
+ border-radius: 5px;
73
+ overflow: hidden;
74
+ margin-bottom: 20px;
75
+ }
76
+ .bar {
77
+ width: 5px;
78
+ height: 100%;
79
+ background-color: #ffd700;
80
+ display: inline-block;
81
+ margin-right: 1px;
82
+ }
83
+ #loading {
84
+ position: fixed;
85
+ top: 0;
86
+ left: 0;
87
+ width: 100%;
88
+ height: 100%;
89
+ background-color: rgba(0, 0, 0, 0.8);
90
+ display: flex;
91
+ justify-content: center;
92
+ align-items: center;
93
+ z-index: 1000;
94
+ }
95
+ .spinner {
96
+ width: 50px;
97
+ height: 50px;
98
+ border: 5px solid #f3f3f3;
99
+ border-top: 5px solid #ffd700;
100
+ border-radius: 50%;
101
+ animation: spin 1s linear infinite;
102
+ }
103
+ @keyframes spin {
104
+ 0% { transform: rotate(0deg); }
105
+ 100% { transform: rotate(360deg); }
106
+ }
107
+ #configuration {
108
+ margin-bottom: 20px;
109
+ }
110
+ select {
111
+ width: 100%;
112
+ padding: 10px;
113
+ font-size: 16px;
114
+ background-color: #2a2a2a;
115
+ color: #f0f0f0;
116
+ border: 1px solid #444;
117
+ border-radius: 5px;
118
+ }
119
+ #model-info {
120
+ margin-top: 10px;
121
+ font-size: 14px;
122
+ color: #aaa;
123
+ }
124
+ #logs {
125
+ background-color: #2a2a2a;
126
+ border: 1px solid #444;
127
+ border-radius: 5px;
128
+ padding: 10px;
129
+ height: 200px;
130
+ overflow-y: scroll;
131
+ font-family: monospace;
132
+ font-size: 14px;
133
+ }
134
+ #clear-logs {
135
+ margin-top: 10px;
136
+ font-size: 14px;
137
+ padding: 5px 10px;
138
+ }
139
+ #localVideo, #remoteVideo {
140
+ display: none;
141
+ }
142
+ </style>
143
+ </head>
144
+ <body>
145
+ <div id="loading">
146
+ <div class="spinner"></div>
147
+ </div>
148
+ <div class="container">
149
+ <h1>Voice Chat Bot Demo</h1>
150
+ <p class="subtitle">For best results, use headphones.</p>
151
+ <div id="chat-container">
152
+ <div id="controls">
153
+ <button id="startButton" disabled>Begin Call</button>
154
+ </div>
155
+ <div id="configuration">
156
+ <select id="configSelect">
157
+ <option value="fastest">Fastest</option>
158
+ <option value="balanced">Balanced</option>
159
+ <option value="quality">Highest Quality</option>
160
+ </select>
161
+ <div id="model-info">
162
+ TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
163
+ </div>
164
+ </div>
165
+ <div id="visualizer"></div>
166
+ <div id="conversation"></div>
167
+ </div>
168
+ <h2>Logs</h2>
169
+ <div id="logs"></div>
170
+ <button id="clear-logs">Clear</button>
171
+ </div>
172
+ <video id="localVideo" autoplay></video>
173
+ <video id="remoteVideo" autoplay></video>
174
+
175
+ <script type="module">
176
+ import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
177
+
178
+ env.localModelPath = './models';
179
+
180
+ const conversationDiv = document.getElementById('conversation');
181
+ const startButton = document.getElementById('startButton');
182
+ const visualizer = document.getElementById('visualizer');
183
+ const loadingDiv = document.getElementById('loading');
184
+ const logsDiv = document.getElementById('logs');
185
+ const clearLogsButton = document.getElementById('clear-logs');
186
+ const localVideo = document.getElementById('localVideo');
187
+ const remoteVideo = document.getElementById('remoteVideo');
188
+
189
+ let myvad;
190
+ let sttPipeline;
191
+ let ttsPipeline;
192
+ let audioContext;
193
+ let analyser;
194
+ let dataArray;
195
+ let bars;
196
+ let animationId;
197
+ let isListening = false;
198
+ let microphoneStream;
199
+ let isSpeaking = false;
200
+ let currentAudioSource = null;
201
+ let rtcConnection = null;
202
+ let rtcLoopbackConnection = null;
203
+ let loopbackStream = new MediaStream();
204
+
205
+ function createVisualizer() {
206
+ const barCount = 64;
207
+ for (let i = 0; i < barCount; i++) {
208
+ const bar = document.createElement('div');
209
+ bar.className = 'bar';
210
+ visualizer.appendChild(bar);
211
+ }
212
+ bars = visualizer.getElementsByClassName('bar');
213
+ }
214
+
215
+ function updateVisualizer() {
216
+ analyser.getByteFrequencyData(dataArray);
217
+ for (let i = 0; i < bars.length; i++) {
218
+ const barHeight = dataArray[i] / 2;
219
+ bars[i].style.height = barHeight + 'px';
220
+ }
221
+ animationId = requestAnimationFrame(updateVisualizer);
222
+ }
223
+
224
+ async function initializePipelines() {
225
+ try {
226
+ sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
227
+ ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
228
+ quantized: false,
229
+ });
230
+ addLog('System: Voice Chat Bot initialized. Click "Begin Call" to start.');
231
+ startButton.disabled = false;
232
+ loadingDiv.style.display = 'none';
233
+ } catch (error) {
234
+ console.error('Error initializing pipelines:', error);
235
+ addLog('System: Error initializing Voice Chat Bot. Please check the console for details.');
236
+ loadingDiv.style.display = 'none';
237
+ }
238
+ }
239
+
240
+ async function processSpeech(audio) {
241
+ try {
242
+ if (!sttPipeline || !ttsPipeline) {
243
+ throw new Error('Pipelines not initialized');
244
+ }
245
+
246
+ const transcription = await sttPipeline(audio);
247
+ addLog(User: ${transcription.text});
248
+
249
+ const botResponse = I heard you say: "${transcription.text}".;
250
+ addLog(Bot: ${botResponse});
251
+
252
+ isSpeaking = true;
253
+ const speechOutput = await ttsPipeline(botResponse);
254
+ await playAudio(speechOutput.audio);
255
+ isSpeaking = false;
256
+ } catch (error) {
257
+ console.error('Error processing speech:', error);
258
+ addLog('System: Error processing speech. Please try again.');
259
+ }
260
+ }
261
+
262
+ function addLog(message) {
263
+ const now = new Date();
264
+ const timestamp = now.toLocaleTimeString();
265
+ const logMessage = [${timestamp}] ${message};
266
+ const messageElement = document.createElement('div');
267
+ messageElement.textContent = logMessage;
268
+ logsDiv.appendChild(messageElement);
269
+ logsDiv.scrollTop = logsDiv.scrollHeight;
270
+ }
271
+
272
+ function playAudio(audioArray) {
273
+ return new Promise((resolve) => {
274
+ const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
275
+ const channelData = audioBuffer.getChannelData(0);
276
+ channelData.set(audioArray);
277
+
278
+ const source = audioContext.createBufferSource();
279
+ currentAudioSource = source;
280
+ source.buffer = audioBuffer;
281
+ source.connect(analyser);
282
+ analyser.connect(audioContext.destination);
283
+ source.start();
284
+ source.onended = () => {
285
+ currentAudioSource = null;
286
+ resolve();
287
+ };
288
+ });
289
+ }
290
+
291
+ function stopCurrentAudio() {
292
+ if (currentAudioSource) {
293
+ currentAudioSource.stop();
294
+ currentAudioSource = null;
295
+ }
296
+ }
297
+
298
+ async function toggleListening() {
299
+ if (isListening) {
300
+ await stopListening();
301
+ } else {
302
+ await startListening();
303
+ }
304
+ }
305
+
306
+ async function startListening() {
307
+ try {
308
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
309
+ analyser = audioContext.createAnalyser();
310
+ analyser.fftSize = 128;
311
+ dataArray = new Uint8Array(analyser.frequencyBinCount);
312
+
313
+ localVideo.volume = 0;
314
+ localVideo.muted = true;
315
+ document.getElementById('localVideo').volume = 0;
316
+
317
+ remoteVideo.volume = 0;
318
+ remoteVideo.muted = true;
319
+ document.getElementById('remoteVideo').volume = 0;
320
+
321
+ // Request both audio and video streams
322
+ microphoneStream = await navigator.mediaDevices.getUserMedia({
323
+ audio: true,
324
+ video: { width: 1, height: 1 } // Minimal video for echo cancellation
325
+ });
326
+
327
+ localVideo.srcObject = microphoneStream;
328
+ await localVideo.play();
329
+
330
+ console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
331
+ console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
332
+
333
+ // Implement loopback hack for improved echo cancellation
334
+ const offerOptions = {
335
+ offerToReceiveAudio: true,
336
+ offerToReceiveVideo: false,
337
+ };
338
+
339
+ rtcConnection = new RTCPeerConnection();
340
+ rtcLoopbackConnection = new RTCPeerConnection();
341
+
342
+ rtcConnection.onicecandidate = e => e.candidate && rtcLoopbackConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
343
+ rtcLoopbackConnection.onicecandidate = e => e.candidate && rtcConnection.addIceCandidate(new RTCIceCandidate(e.candidate));
344
+
345
+ rtcLoopbackConnection.ontrack = e => e.streams[0].getTracks().forEach(track => loopbackStream.addTrack(track));
346
+
347
+ microphoneStream.getTracks().forEach(track => rtcConnection.addTrack(track, microphoneStream));
348
+
349
+ const offer = await rtcConnection.createOffer(offerOptions);
350
+ await rtcConnection.setLocalDescription(offer);
351
+ await rtcLoopbackConnection.setRemoteDescription(offer);
352
+ const answer = await rtcLoopbackConnection.createAnswer();
353
+ await rtcLoopbackConnection.setLocalDescription(answer);
354
+ await rtcConnection.setRemoteDescription(answer);
355
+
356
+ // Use the loopback stream for audio processing
357
+ const source = audioContext.createMediaStreamSource(loopbackStream);
358
+ source.connect(analyser);
359
+
360
+ myvad = await vad.MicVAD.new({
361
+ onSpeechStart: () => {
362
+ addLog('--- vad: speech start');
363
+ updateVisualizer();
364
+ if (isSpeaking) {
365
+ addLog('User interrupted. Stopping bot speech.');
366
+ stopCurrentAudio();
367
+ isSpeaking = false;
368
+ }
369
+ },
370
+ onSpeechEnd: (audio) => {
371
+ addLog('--- vad: speech end');
372
+ cancelAnimationFrame(animationId);
373
+ processSpeech(audio);
374
+ }
375
+ });
376
+
377
+ await myvad.start();
378
+ startButton.textContent = 'End Call';
379
+ isListening = true;
380
+ addLog('System: Listening...');
381
+ } catch (error) {
382
+ console.error('Error starting VAD:', error);
383
+ addLog('System: Error starting voice detection. Please check your microphone and try again.');
384
+ }
385
+ }
386
+
387
+ async function stopListening() {
388
+ if (myvad) {
389
+ try {
390
+ await myvad.destroy();
391
+ } catch (error) {
392
+ console.error('Error stopping VAD:', error);
393
+ }
394
+ myvad = null;
395
+ }
396
+ if (microphoneStream) {
397
+ microphoneStream.getTracks().forEach(track => track.stop());
398
+ microphoneStream = null;
399
+ }
400
+ if (audioContext) {
401
+ await audioContext.close();
402
+ audioContext = null;
403
+ }
404
+ if (localVideo) {
405
+ localVideo.srcObject = null;
406
+ }
407
+ if (remoteVideo) {
408
+ remoteVideo.srcObject = null;
409
+ }
410
+ if (rtcConnection) {
411
+ rtcConnection.close();
412
+ rtcConnection = null;
413
+ }
414
+ if (rtcLoopbackConnection) {
415
+ rtcLoopbackConnection.close();
416
+ rtcLoopbackConnection = null;
417
+ }
418
+ loopbackStream = new MediaStream();
419
+ stopCurrentAudio();
420
+ startButton.textContent = 'Begin Call';
421
+ isListening = false;
422
+ addLog('System: Stopped listening.');
423
+ cancelAnimationFrame(animationId);
424
+ addLog('System: Microphone closed');
425
+ }
426
+
427
+ startButton.addEventListener('click', toggleListening);
428
+ clearLogsButton.addEventListener('click', () => {
429
+ logsDiv.innerHTML = '';
430
+ });
431
+
432
+ createVisualizer();
433
+ initializePipelines();
434
+ </script>
435
+ </body>
436
+ </html>