Spaces:
Running
Running
Update script1.js
Browse files- script1.js +34 -22
script1.js
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
// Constants and Configuration
|
2 |
const USER_SPEECH_INTERRUPT_DELAY = 500;
|
3 |
-
const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
|
4 |
const CHUNK_SIZE = 300;
|
5 |
-
const MAX_PREFETCH_REQUESTS = 10;
|
6 |
const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
|
7 |
const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
|
8 |
-
const WEBCAM_INTERVAL = 5000;
|
9 |
-
const MAX_HISTORY_LENGTH = 6;
|
10 |
|
11 |
// DOM Elements
|
12 |
const startStopButton = document.getElementById('startStopButton');
|
@@ -50,7 +50,7 @@ const audioCache = new Map();
|
|
50 |
|
51 |
// Image Captioning State
|
52 |
let isCaptioningEnabled = false;
|
53 |
-
let lastCaption = "";
|
54 |
|
55 |
// Webcam Integration
|
56 |
import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
|
@@ -155,8 +155,12 @@ const interruptAudioPlayback = (reason = 'unknown') => {
|
|
155 |
requestAbortController = null;
|
156 |
}
|
157 |
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
160 |
updateActivityIndicators();
|
161 |
};
|
162 |
|
@@ -165,12 +169,16 @@ const interruptAudioPlayback = (reason = 'unknown') => {
|
|
165 |
|
166 |
// Prefetch and cache the first TTS audio chunk
|
167 |
const prefetchFirstAudioChunk = (query, voice) => {
|
168 |
-
|
|
|
|
|
|
|
|
|
169 |
const cacheKey = generateCacheKey(normalizedQuery, voice, conversationHistory, modelSelectionDropdown.value);
|
170 |
|
171 |
if (pendingPrefetchRequests.has(cacheKey) || prefetchCache.has(cacheKey)) return;
|
172 |
|
173 |
-
prefetchQueue.push({ query:
|
174 |
processPrefetchQueue();
|
175 |
};
|
176 |
|
@@ -197,7 +205,11 @@ const processPrefetchQueue = async () => {
|
|
197 |
|
198 |
// Cancel pending prefetch requests
|
199 |
const cancelPrefetchRequests = (query) => {
|
200 |
-
|
|
|
|
|
|
|
|
|
201 |
|
202 |
for (const [cacheKey, abortController] of pendingPrefetchRequests) {
|
203 |
if (cacheKey.startsWith(normalizedQuery)) {
|
@@ -224,7 +236,7 @@ async function sendQueryToAI(query) {
|
|
224 |
try {
|
225 |
let combinedQuery = `{USER: "${query}"}`;
|
226 |
if (lastCaption !== "") {
|
227 |
-
combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
|
228 |
}
|
229 |
|
230 |
await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
|
@@ -243,8 +255,8 @@ const processSpeechTranscript = (transcript) => {
|
|
243 |
const trimmedTranscript = transcript.trimStart();
|
244 |
if (trimmedTranscript !== '' && !isRequestInProgress) {
|
245 |
activeQuery = trimmedTranscript;
|
246 |
-
addToConversationHistory('user', activeQuery);
|
247 |
-
sendQueryToAI(activeQuery);
|
248 |
}
|
249 |
};
|
250 |
|
@@ -368,7 +380,7 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
|
|
368 |
if (textContent) {
|
369 |
if (!firstResponseTextTimestamp) firstResponseTextTimestamp = Date.now();
|
370 |
|
371 |
-
fullResponseText += textContent + " ";
|
372 |
fullResponseText2 += textContent + " ";
|
373 |
textChunk += textContent + " ";
|
374 |
transcriptDiv.textContent = fullResponseText2;
|
@@ -380,13 +392,13 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
|
|
380 |
audioPlaybackQueue.push({ url: audioUrl });
|
381 |
if (!currentAudio) playNextAudio();
|
382 |
}
|
383 |
-
textChunk = "";
|
384 |
}
|
385 |
}
|
386 |
}
|
387 |
}
|
388 |
|
389 |
-
buffer = lines[lines.length - 1];
|
390 |
}
|
391 |
} catch (error) {
|
392 |
console.error("Error in handleStreamingResponse:", error);
|
@@ -402,8 +414,8 @@ const handleStreamingResponse = async (responseStream, voice, abortSignal) => {
|
|
402 |
}
|
403 |
|
404 |
addToConversationHistory('assistant', fullResponseText2);
|
405 |
-
fullResponseText = "";
|
406 |
-
fullResponseText2 = "";
|
407 |
}
|
408 |
};
|
409 |
|
@@ -502,13 +514,13 @@ if ('webkitSpeechRecognition' in window) {
|
|
502 |
isSpeechRecognitionActive = false;
|
503 |
startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
|
504 |
clearInterval(webcamInterval);
|
505 |
-
video.srcObject = null;
|
506 |
lastCaption = "";
|
507 |
} else {
|
508 |
speechRecognizer.start();
|
509 |
isSpeechRecognitionActive = true;
|
510 |
startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
|
511 |
-
isCaptioningEnabled = true;
|
512 |
startWebcam();
|
513 |
}
|
514 |
});
|
@@ -525,7 +537,7 @@ async function startWebcam() {
|
|
525 |
try {
|
526 |
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
527 |
video.srcObject = stream;
|
528 |
-
webcamInterval = setInterval(captureAndProcessImage, WEBCAM_INTERVAL);
|
529 |
} catch (error) {
|
530 |
console.error("Error accessing webcam: ", error);
|
531 |
}
|
@@ -551,7 +563,7 @@ async function processWithGradio(imageBlob) {
|
|
551 |
const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
|
552 |
|
553 |
const dataString = result.data[0];
|
554 |
-
lastCaption = dataString || lastCaption;
|
555 |
} catch (error) {
|
556 |
console.error("Error processing with Gradio:", error);
|
557 |
}
|
|
|
1 |
// Constants and Configuration
|
2 |
const USER_SPEECH_INTERRUPT_DELAY = 500;
|
3 |
+
const TEXT_TO_SPEECH_API_ENDPOINT = "https://api.streamelements.com/kappa/v2/speech";
|
4 |
const CHUNK_SIZE = 300;
|
5 |
+
const MAX_PREFETCH_REQUESTS = 10;
|
6 |
const PREFETCH_CACHE_EXPIRATION = 60000; // 1 minute
|
7 |
const AUDIO_CACHE_EXPIRATION = 3600000; // 1 hour
|
8 |
+
const WEBCAM_INTERVAL = 5000;
|
9 |
+
const MAX_HISTORY_LENGTH = 6;
|
10 |
|
11 |
// DOM Elements
|
12 |
const startStopButton = document.getElementById('startStopButton');
|
|
|
50 |
|
51 |
// Image Captioning State
|
52 |
let isCaptioningEnabled = false;
|
53 |
+
let lastCaption = "";
|
54 |
|
55 |
// Webcam Integration
|
56 |
import { client, handle_file } from 'https://cdn.jsdelivr.net/npm/@gradio/client/+esm';
|
|
|
155 |
requestAbortController = null;
|
156 |
}
|
157 |
|
158 |
+
// Clear prefetch cache and queue only if the interruption is due to user speech
|
159 |
+
if (reason === 'user is speaking' || reason === 'interim') {
|
160 |
+
prefetchCache.clear();
|
161 |
+
prefetchQueue.length = 0;
|
162 |
+
}
|
163 |
+
|
164 |
updateActivityIndicators();
|
165 |
};
|
166 |
|
|
|
169 |
|
170 |
// Prefetch and cache the first TTS audio chunk
|
171 |
const prefetchFirstAudioChunk = (query, voice) => {
|
172 |
+
let combinedQuery = `{USER: "${query}"}`;
|
173 |
+
if (lastCaption !== "") {
|
174 |
+
combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
|
175 |
+
}
|
176 |
+
const normalizedQuery = normalizeQueryText(combinedQuery); // Normalize combined query
|
177 |
const cacheKey = generateCacheKey(normalizedQuery, voice, conversationHistory, modelSelectionDropdown.value);
|
178 |
|
179 |
if (pendingPrefetchRequests.has(cacheKey) || prefetchCache.has(cacheKey)) return;
|
180 |
|
181 |
+
prefetchQueue.push({ query: combinedQuery.trim(), voice, cacheKey }); // Use combined query
|
182 |
processPrefetchQueue();
|
183 |
};
|
184 |
|
|
|
205 |
|
206 |
// Cancel pending prefetch requests
|
207 |
const cancelPrefetchRequests = (query) => {
|
208 |
+
let combinedQuery = `{USER: "${query}"}`;
|
209 |
+
if (lastCaption !== "") {
|
210 |
+
combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
|
211 |
+
}
|
212 |
+
const normalizedQuery = normalizeQueryText(combinedQuery); // Normalize combined query
|
213 |
|
214 |
for (const [cacheKey, abortController] of pendingPrefetchRequests) {
|
215 |
if (cacheKey.startsWith(normalizedQuery)) {
|
|
|
236 |
try {
|
237 |
let combinedQuery = `{USER: "${query}"}`;
|
238 |
if (lastCaption !== "") {
|
239 |
+
combinedQuery += `, ${lastCaption} , {USER: "${query}"}`;
|
240 |
}
|
241 |
|
242 |
await streamAndHandleAudioResponse(combinedQuery, voiceSelectionDropdown.value, requestAbortController.signal);
|
|
|
255 |
const trimmedTranscript = transcript.trimStart();
|
256 |
if (trimmedTranscript !== '' && !isRequestInProgress) {
|
257 |
activeQuery = trimmedTranscript;
|
258 |
+
addToConversationHistory('user', activeQuery);
|
259 |
+
sendQueryToAI(activeQuery);
|
260 |
}
|
261 |
};
|
262 |
|
|
|
380 |
if (textContent) {
|
381 |
if (!firstResponseTextTimestamp) firstResponseTextTimestamp = Date.now();
|
382 |
|
383 |
+
fullResponseText += textContent + " ";
|
384 |
fullResponseText2 += textContent + " ";
|
385 |
textChunk += textContent + " ";
|
386 |
transcriptDiv.textContent = fullResponseText2;
|
|
|
392 |
audioPlaybackQueue.push({ url: audioUrl });
|
393 |
if (!currentAudio) playNextAudio();
|
394 |
}
|
395 |
+
textChunk = "";
|
396 |
}
|
397 |
}
|
398 |
}
|
399 |
}
|
400 |
|
401 |
+
buffer = lines[lines.length - 1];
|
402 |
}
|
403 |
} catch (error) {
|
404 |
console.error("Error in handleStreamingResponse:", error);
|
|
|
414 |
}
|
415 |
|
416 |
addToConversationHistory('assistant', fullResponseText2);
|
417 |
+
fullResponseText = "";
|
418 |
+
fullResponseText2 = "";
|
419 |
}
|
420 |
};
|
421 |
|
|
|
514 |
isSpeechRecognitionActive = false;
|
515 |
startStopButton.innerHTML = '<svg id="microphoneIcon" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Start Listening';
|
516 |
clearInterval(webcamInterval);
|
517 |
+
video.srcObject = null;
|
518 |
lastCaption = "";
|
519 |
} else {
|
520 |
speechRecognizer.start();
|
521 |
isSpeechRecognitionActive = true;
|
522 |
startStopButton.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M9 9h6v6h-6z"></path><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path><path d="M19 10v2a7 7 0 0 1-14 0v-2"></path><line x1="12" y1="19" x2="12" y2="23"></line><line x1="8" y1="23" x2="16" y2="23"></line></svg> Stop Listening';
|
523 |
+
isCaptioningEnabled = true;
|
524 |
startWebcam();
|
525 |
}
|
526 |
});
|
|
|
537 |
try {
|
538 |
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
539 |
video.srcObject = stream;
|
540 |
+
webcamInterval = setInterval(captureAndProcessImage, WEBCAM_INTERVAL);
|
541 |
} catch (error) {
|
542 |
console.error("Error accessing webcam: ", error);
|
543 |
}
|
|
|
563 |
const result = await app.predict("/process_image", [handledFile, "More Detailed Caption"]);
|
564 |
|
565 |
const dataString = result.data[0];
|
566 |
+
lastCaption = dataString || lastCaption;
|
567 |
} catch (error) {
|
568 |
console.error("Error processing with Gradio:", error);
|
569 |
}
|