KingNish commited on
Commit
7fd8389
1 Parent(s): 3725ddf

Update script1.js

Browse files
Files changed (1) hide show
  1. script1.js +34 -22
script1.js CHANGED
@@ -1,12 +1,12 @@
1
  // Constants and Configuration
2
  const USER_SPEECH_INTERRUPT_DELAY = 500;
3
- const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
4
  const CHUNK_SIZE = 300;
5
- const MAX_PREFETCH_REQUESTS = 10;
6
  const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
7
  const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
8
- const WEBCAM_INTERVAL = 5000;
9
- const MAX_HISTORY_LENGTH = 6;
10
 
11
  // DOM Elements
12
  const startStopButton = document.getElementById('startStopButton');
@@ -50,7 +50,7 @@ const audioCache = new Map();
50
 
51
  // Image Captioning State
52
  let isCaptioningEnabled = false;
53
- let lastCaption = "";
54
 
55
  // Webcam Integration
56
  import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
@@ -155,8 +155,12 @@ const interruptAudioPlayback = (reason = 'unknown') => {
155
  requestAbortController = null;
156
  }
157
 
158
- prefetchCache.clear();
159
- prefetchQueue.length = 0;
 
 
 
 
160
  updateActivityIndicators();
161
  };
162
 
@@ -165,12 +169,16 @@ const interruptAudioPlayback = (reason = 'unknown') => {
165
 
166
  // Prefetch and cache the first TTS audio chunk
167
  const prefetchFirstAudioChunk = (query, voice) => {
168
- const normalizedQuery = normalizeQueryText(query);
 
 
 
 
169
  const cacheKey = generateCacheKey(normalizedQuery, voice, conversationHistory, modelSelectionDropdown.value);
170
 
171
  if (pendingPrefetchRequests.has(cacheKey) || prefetchCache.has(cacheKey)) return;
172
 
173
- prefetchQueue.push({ query: query.trim(), voice, cacheKey });
174
  processPrefetchQueue();
175
  };
176
 
@@ -197,7 +205,11 @@ const processPrefetchQueue = async () => {
197
 
198
  // Cancel pending prefetch requests
199
  const cancelPrefetchRequests = (query) => {
200
- const normalizedQuery = normalizeQueryText(query);
 
 
 
 
201
 
202
  for (const [cacheKey, abortController] of pendingPrefetchRequests) {
203
  if (cacheKey.startsWith(normalizedQuery)) {
@@ -224,7 +236,7 @@ async function sendQueryToAI(query) {
224
  try {
225
  let combinedQuery = `{USER: "${query}"}`;
226
  if (lastCaption !== "") {
227
- combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
228
  }
229
 
230
  await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
@@ -243,8 +255,8 @@ const processSpeechTranscript = (transcript) => {
243
  const trimmedTranscript = transcript.trimStart();
244
  if (trimmedTranscript !== '' && !isRequestInProgress) {
245
  activeQuery = trimmedTranscript;
246
- addToConversationHistory('user', activeQuery);
247
- sendQueryToAI(activeQuery);
248
  }
249
  };
250
 
@@ -368,7 +380,7 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
368
  if (textContent) {
369
  if (!firstResponseTextTimestamp) firstResponseTextTimestamp = Date.now();
370
 
371
- fullResponseText += textContent + " ";
372
  fullResponseText2 += textContent + " ";
373
  textChunk += textContent + " ";
374
  transcriptDiv.textContent = fullResponseText2;
@@ -380,13 +392,13 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
380
  audioPlaybackQueue.push({ url: audioUrl });
381
  if (!currentAudio) playNextAudio();
382
  }
383
- textChunk = "";
384
  }
385
  }
386
  }
387
  }
388
 
389
- buffer = lines[lines.length - 1];
390
  }
391
  } catch (error) {
392
  console.error("Error in handleStreamingResponse:", error);
@@ -402,8 +414,8 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
402
  }
403
 
404
  addToConversationHistory('assistant', fullResponseText2);
405
- fullResponseText = "";
406
- fullResponseText2 = "";
407
  }
408
  };
409
 
@@ -502,13 +514,13 @@ if ('webkitSpeechRecognition' in window) {
502
  isSpeechRecognitionActive = false;
503
  startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
504
  clearInterval(webcamInterval);
505
- video.srcObject = null;
506
  lastCaption = "";
507
  } else {
508
  speechRecognizer.start();
509
  isSpeechRecognitionActive = true;
510
  startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
511
- isCaptioningEnabled = true;
512
  startWebcam();
513
  }
514
  });
@@ -525,7 +537,7 @@ async function startWebcam() {
525
  try {
526
  const stream = await navigator.mediaDevices.getUserMedia({ video: true });
527
  video.srcObject = stream;
528
- webcamInterval = setInterval(captureAndProcessImage, WEBCAM_INTERVAL);
529
  } catch (error) {
530
  console.error("Error accessing webcam: ", error);
531
  }
@@ -551,7 +563,7 @@ async function processWithGradio(imageBlob) {
551
  const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
552
 
553
  const dataString = result.data[0];
554
- lastCaption = dataString || lastCaption;
555
  } catch (error) {
556
  console.error("Error processing with Gradio:", error);
557
  }
 
1
  // Constants and Configuration
2
  const USER_SPEECH_INTERRUPT_DELAY = 500;
3
+ const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
4
  const CHUNK_SIZE = 300;
5
+ const MAX_PREFETCH_REQUESTS = 10;
6
  const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
7
  const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
8
+ const WEBCAM_INTERVAL = 5000;
9
+ const MAX_HISTORY_LENGTH = 6;
10
 
11
  // DOM Elements
12
  const startStopButton = document.getElementById('startStopButton');
 
50
 
51
  // Image Captioning State
52
  let isCaptioningEnabled = false;
53
+ let lastCaption = "";
54
 
55
  // Webcam Integration
56
  import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
 
155
  requestAbortController = null;
156
  }
157
 
158
+ // Clear prefetch cache and queue only if the interruption is due to user speech
159
+ if (reason === 'user is speaking' || reason === 'interim') {
160
+ prefetchCache.clear();
161
+ prefetchQueue.length = 0;
162
+ }
163
+
164
  updateActivityIndicators();
165
  };
166
 
 
169
 
170
  // Prefetch and cache the first TTS audio chunk
171
  const prefetchFirstAudioChunk = (query, voice) => {
172
+ let combinedQuery = `{USER: "${query}"}`;
173
+ if (lastCaption !== "") {
174
+ combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
175
+ }
176
+ const normalizedQuery = normalizeQueryText(combinedQuery); // Normalize combined query
177
  const cacheKey = generateCacheKey(normalizedQuery, voice, conversationHistory, modelSelectionDropdown.value);
178
 
179
  if (pendingPrefetchRequests.has(cacheKey) || prefetchCache.has(cacheKey)) return;
180
 
181
+ prefetchQueue.push({ query: combinedQuery.trim(), voice, cacheKey }); // Use combined query
182
  processPrefetchQueue();
183
  };
184
 
 
205
 
206
  // Cancel pending prefetch requests
207
  const cancelPrefetchRequests = (query) => {
208
+ let combinedQuery = `{USER: "${query}"}`;
209
+ if (lastCaption !== "") {
210
+ combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
211
+ }
212
+ const normalizedQuery = normalizeQueryText(combinedQuery); // Normalize combined query
213
 
214
  for (const [cacheKey, abortController] of pendingPrefetchRequests) {
215
  if (cacheKey.startsWith(normalizedQuery)) {
 
236
  try {
237
  let combinedQuery = `{USER: "${query}"}`;
238
  if (lastCaption !== "") {
239
+ combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
240
  }
241
 
242
  await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
 
255
  const trimmedTranscript = transcript.trimStart();
256
  if (trimmedTranscript !== '' && !isRequestInProgress) {
257
  activeQuery = trimmedTranscript;
258
+ addToConversationHistory('user', activeQuery);
259
+ sendQueryToAI(activeQuery);
260
  }
261
  };
262
 
 
380
  if (textContent) {
381
  if (!firstResponseTextTimestamp) firstResponseTextTimestamp = Date.now();
382
 
383
+ fullResponseText += textContent + " ";
384
  fullResponseText2 += textContent + " ";
385
  textChunk += textContent + " ";
386
  transcriptDiv.textContent = fullResponseText2;
 
392
  audioPlaybackQueue.push({ url: audioUrl });
393
  if (!currentAudio) playNextAudio();
394
  }
395
+ textChunk = "";
396
  }
397
  }
398
  }
399
  }
400
 
401
+ buffer = lines[lines.length - 1];
402
  }
403
  } catch (error) {
404
  console.error("Error in handleStreamingResponse:", error);
 
414
  }
415
 
416
  addToConversationHistory('assistant', fullResponseText2);
417
+ fullResponseText = "";
418
+ fullResponseText2 = "";
419
  }
420
  };
421
 
 
514
  isSpeechRecognitionActive = false;
515
  startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
516
  clearInterval(webcamInterval);
517
+ video.srcObject = null;
518
  lastCaption = "";
519
  } else {
520
  speechRecognizer.start();
521
  isSpeechRecognitionActive = true;
522
  startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
523
+ isCaptioningEnabled = true;
524
  startWebcam();
525
  }
526
  });
 
537
  try {
538
  const stream = await navigator.mediaDevices.getUserMedia({ video: true });
539
  video.srcObject = stream;
540
+ webcamInterval = setInterval(captureAndProcessImage, WEBCAM_INTERVAL);
541
  } catch (error) {
542
  console.error("Error accessing webcam: ", error);
543
  }
 
563
  const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
564
 
565
  const dataString = result.data[0];
566
+ lastCaption = dataString || lastCaption;
567
  } catch (error) {
568
  console.error("Error processing with Gradio:", error);
569
  }