Spaces:
No application file
No application file
| /** | |
| * Playback Engine - Unified state machine for lecture playback and live discussion | |
| * | |
| * Consumes Scene.actions[] directly via ActionEngine. | |
| * No intermediate compile step β actions are executed as-is. | |
| * | |
| * State machine: | |
| * | |
| * start() pause() | |
| * idle βββββββββββββββββββ playing βββββββββββββββ paused | |
| * β² β² β | |
| * β β resume() β | |
| * β βββββββββββββββββββββββββ | |
| * β | |
| * β handleEndDiscussion() | |
| * β confirmDiscussion() | |
| * β / handleUserInterrupt() | |
| * β β | |
| * β βΌ pause() | |
| * βββββββββββββββββββββββββ live βββββββββββββββ paused | |
| * β² β | |
| * β resume / user msg β | |
| * ββββββββββββββββββββββ | |
| */ | |
| import type { Scene } from '@/lib/types/stage'; | |
| import type { Action, SpeechAction, DiscussionAction } from '@/lib/types/action'; | |
| import type { | |
| EngineMode, | |
| TopicState, | |
| PlaybackEngineCallbacks, | |
| PlaybackSnapshot, | |
| TriggerEvent, | |
| Effect, | |
| } from './types'; | |
| import type { AudioPlayer } from '@/lib/utils/audio-player'; | |
| import { ActionEngine } from '@/lib/action/engine'; | |
| import { useCanvasStore } from '@/lib/store/canvas'; | |
| import { useSettingsStore } from '@/lib/store/settings'; | |
| import { createLogger } from '@/lib/logger'; | |
| const log = createLogger('PlaybackEngine'); | |
| /** | |
| * If more than 30% of characters are CJK, treat the text as Chinese. | |
| * Intentionally low: mixed Chinese text often contains punctuation, | |
| * numbers, and short Latin fragments (e.g. "AIθ―Ύε "). | |
| */ | |
| const CJK_LANG_THRESHOLD = 0.3; | |
| export class PlaybackEngine { | |
| private scenes: Scene[] = []; | |
| private sceneIndex: number = 0; | |
| private actionIndex: number = 0; | |
| private mode: EngineMode = 'idle'; | |
| private consumedDiscussions: Set<string> = new Set(); | |
| // Discussion state save | |
| private savedSceneIndex: number | null = null; | |
| private savedActionIndex: number | null = null; | |
| // Discussion topic state | |
| private currentTopicState: TopicState | null = null; | |
| // Dependencies | |
| private audioPlayer: AudioPlayer; | |
| private actionEngine: ActionEngine; | |
| private callbacks: PlaybackEngineCallbacks; | |
| // Scene identity (for snapshot validation) | |
| private sceneId: string | undefined; | |
| // Internal state | |
| private currentTrigger: TriggerEvent | null = null; | |
| private triggerDelayTimer: ReturnType<typeof setTimeout> | null = null; | |
| // Reading-time timer for speech actions without pre-generated audio (TTS disabled) | |
| private speechTimer: ReturnType<typeof setTimeout> | null = null; | |
| private speechTimerStart: number = 0; // Date.now() when timer was scheduled | |
| // Browser-native TTS state (Web Speech API) | |
| private browserTTSActive: boolean = false; | |
| private browserTTSChunks: string[] = []; // sentence-level chunks for sequential playback | |
| private browserTTSChunkIndex: number = 0; // current chunk being spoken | |
| private browserTTSPausedChunks: string[] = []; // remaining chunks saved on pause (for cancel+re-speak) | |
| private speechTimerRemaining: number = 0; // remaining ms (set on pause) | |
| constructor( | |
| scenes: Scene[], | |
| actionEngine: ActionEngine, | |
| audioPlayer: AudioPlayer, | |
| callbacks: PlaybackEngineCallbacks = {}, | |
| ) { | |
| this.scenes = scenes; | |
| this.sceneId = scenes[0]?.id; | |
| this.actionEngine = actionEngine; | |
| this.audioPlayer = audioPlayer; | |
| this.callbacks = callbacks; | |
| } | |
| // ==================== Public API ==================== | |
| /** Get the current engine mode */ | |
| getMode(): EngineMode { | |
| return this.mode; | |
| } | |
| /** Export a serializable playback snapshot */ | |
| getSnapshot(): PlaybackSnapshot { | |
| return { | |
| sceneIndex: this.sceneIndex, | |
| actionIndex: this.actionIndex, | |
| consumedDiscussions: [...this.consumedDiscussions], | |
| sceneId: this.sceneId, | |
| }; | |
| } | |
| /** Restore playback position from a snapshot */ | |
| restoreFromSnapshot(snapshot: PlaybackSnapshot): void { | |
| this.sceneIndex = snapshot.sceneIndex; | |
| this.actionIndex = snapshot.actionIndex; | |
| this.consumedDiscussions = new Set(snapshot.consumedDiscussions); | |
| } | |
| /** idle β playing (from beginning) */ | |
| start(): void { | |
| if (this.mode !== 'idle') { | |
| log.warn('Cannot start: not idle, current mode:', this.mode); | |
| return; | |
| } | |
| this.sceneIndex = 0; | |
| this.actionIndex = 0; | |
| this.setMode('playing'); | |
| this.processNext(); | |
| } | |
| /** idle β playing (continue from current position, e.g. after discussion end) */ | |
| continuePlayback(): void { | |
| if (this.mode !== 'idle') { | |
| log.warn('Cannot continue: not idle, current mode:', this.mode); | |
| return; | |
| } | |
| this.setMode('playing'); | |
| this.processNext(); | |
| } | |
| /** playing β paused | live β paused (abort SSE, truncate, topic pending) */ | |
| pause(): void { | |
| if (this.mode === 'playing') { | |
| // Cancel pending timers | |
| if (this.triggerDelayTimer) { | |
| clearTimeout(this.triggerDelayTimer); | |
| this.triggerDelayTimer = null; | |
| } | |
| if (this.speechTimer) { | |
| // Save remaining time so resume() can reschedule | |
| this.speechTimerRemaining = Math.max( | |
| 0, | |
| this.speechTimerRemaining - (Date.now() - this.speechTimerStart), | |
| ); | |
| clearTimeout(this.speechTimer); | |
| this.speechTimer = null; | |
| } | |
| this.setMode('paused'); | |
| // Freeze TTS β but skip if waiting on ProactiveCard (no active speech) | |
| if (!this.currentTrigger) { | |
| if (this.browserTTSActive) { | |
| // Cancel+re-speak pattern: save remaining chunks for resume. | |
| // speechSynthesis.pause()/resume() is broken on Firefox, so we | |
| // cancel now and re-speak from current chunk onward on resume. | |
| this.browserTTSPausedChunks = this.browserTTSChunks.slice(this.browserTTSChunkIndex); | |
| window.speechSynthesis?.cancel(); | |
| // Note: cancel fires onerror('canceled'), which we ignore (see playBrowserTTSChunk) | |
| } else if (this.audioPlayer.isPlaying()) { | |
| this.audioPlayer.pause(); | |
| } | |
| } | |
| } else if (this.mode === 'live') { | |
| this.setMode('paused'); | |
| this.currentTopicState = 'pending'; | |
| // Caller is responsible for aborting SSE | |
| } else { | |
| log.warn('Cannot pause: mode is', this.mode); | |
| } | |
| } | |
| /** paused β playing (TTS resume) | paused (in discussion) β live */ | |
| resume(): void { | |
| if (this.mode !== 'paused') { | |
| log.warn('Cannot resume: not paused, mode is', this.mode); | |
| return; | |
| } | |
| if (this.currentTopicState === 'pending') { | |
| // Resume discussion β live | |
| this.currentTopicState = 'active'; | |
| this.setMode('live'); | |
| } else if (this.currentTrigger) { | |
| // Waiting on ProactiveCard β just resume mode, don't touch audio | |
| this.setMode('playing'); | |
| } else { | |
| // Resume lecture | |
| this.setMode('playing'); | |
| if (this.browserTTSPausedChunks.length > 0) { | |
| // Browser TTS was paused via cancel β re-speak remaining chunks | |
| this.browserTTSActive = true; | |
| this.browserTTSChunks = this.browserTTSPausedChunks; | |
| this.browserTTSChunkIndex = 0; | |
| this.browserTTSPausedChunks = []; | |
| this.playBrowserTTSChunk(); | |
| } else if (this.audioPlayer.hasActiveAudio()) { | |
| // Audio is paused β resume it; TTS onend will call processNext | |
| this.audioPlayer.resume(); | |
| } else if (this.speechTimerRemaining > 0) { | |
| // Reading timer was paused β reschedule with remaining time | |
| this.speechTimerStart = Date.now(); | |
| this.speechTimer = setTimeout(() => { | |
| this.speechTimer = null; | |
| this.speechTimerRemaining = 0; | |
| this.callbacks.onSpeechEnd?.(); | |
| if (this.mode === 'playing') this.processNext(); | |
| }, this.speechTimerRemaining); | |
| } else { | |
| // TTS finished while paused, continue to next event | |
| this.processNext(); | |
| } | |
| } | |
| } | |
| /** β idle */ | |
| stop(): void { | |
| // Set mode BEFORE stopping audio to prevent spurious processNext from | |
| // synchronous onend callbacks (see handleUserInterrupt for details). | |
| this.setMode('idle'); | |
| this.audioPlayer.stop(); | |
| this.cancelBrowserTTS(); | |
| this.actionEngine.clearEffects(); | |
| if (this.triggerDelayTimer) { | |
| clearTimeout(this.triggerDelayTimer); | |
| this.triggerDelayTimer = null; | |
| } | |
| if (this.speechTimer) { | |
| clearTimeout(this.speechTimer); | |
| this.speechTimer = null; | |
| } | |
| this.speechTimerRemaining = 0; | |
| this.sceneIndex = 0; | |
| this.actionIndex = 0; | |
| this.savedSceneIndex = null; | |
| this.savedActionIndex = null; | |
| this.currentTopicState = null; | |
| this.currentTrigger = null; | |
| } | |
| /** User clicks "Join" on ProactiveCard β save cursor β live */ | |
| confirmDiscussion(): void { | |
| if (!this.currentTrigger) { | |
| log.warn('confirmDiscussion called but no trigger'); | |
| return; | |
| } | |
| // Mark consumed so it won't re-trigger on replay | |
| this.consumedDiscussions.add(this.currentTrigger.id); | |
| // Save lecture state β keep actionIndex as-is (past the discussion). | |
| // Discussions are placed after all speech actions, so the preceding | |
| // speech was already fully played; no need to replay it. | |
| this.savedSceneIndex = this.sceneIndex; | |
| this.savedActionIndex = this.actionIndex; | |
| // Enter live mode | |
| this.currentTopicState = 'active'; | |
| this.setMode('live'); | |
| // Notify callbacks | |
| this.callbacks.onProactiveHide?.(); | |
| this.callbacks.onDiscussionConfirmed?.( | |
| this.currentTrigger.question, | |
| this.currentTrigger.prompt, | |
| this.currentTrigger.agentId, | |
| ); | |
| this.currentTrigger = null; | |
| } | |
| /** User clicks "Skip" on ProactiveCard β consumed β processNext */ | |
| skipDiscussion(): void { | |
| if (this.currentTrigger) { | |
| this.consumedDiscussions.add(this.currentTrigger.id); | |
| this.currentTrigger = null; | |
| } | |
| this.callbacks.onProactiveHide?.(); | |
| if (this.mode === 'playing') { | |
| this.processNext(); | |
| } | |
| } | |
| /** End discussion β restore lecture β idle (user clicks "start" to continue) */ | |
| handleEndDiscussion(): void { | |
| this.actionEngine.clearEffects(); | |
| this.currentTopicState = 'closed'; | |
| // Close whiteboard if it was open during the discussion | |
| useCanvasStore.getState().setWhiteboardOpen(false); | |
| this.callbacks.onDiscussionEnd?.(); | |
| // Restore lecture state | |
| if (this.savedSceneIndex !== null && this.savedActionIndex !== null) { | |
| this.sceneIndex = this.savedSceneIndex; | |
| this.actionIndex = this.savedActionIndex; | |
| this.savedSceneIndex = null; | |
| this.savedActionIndex = null; | |
| } | |
| this.setMode('idle'); | |
| } | |
| /** User sends a message during playback β interrupt β live mode */ | |
| handleUserInterrupt(text: string): void { | |
| if (this.mode === 'playing' || this.mode === 'paused') { | |
| // Save lecture state BEFORE stopping audio β actionIndex was already | |
| // incremented by processNext, so subtract 1 to replay the interrupted | |
| // sentence when resuming. Guard against overwriting a previously saved | |
| // position (e.g. live β paused β new message). | |
| if (this.savedSceneIndex === null) { | |
| this.savedSceneIndex = this.sceneIndex; | |
| this.savedActionIndex = Math.max(0, this.actionIndex - 1); | |
| } | |
| // Cancel pending trigger delay | |
| if (this.triggerDelayTimer) { | |
| clearTimeout(this.triggerDelayTimer); | |
| this.triggerDelayTimer = null; | |
| } | |
| } | |
| // Set mode BEFORE stopping audio β speechSynthesis.cancel() may fire the | |
| // onend callback synchronously, and the processNext guard checks | |
| // `this.mode === 'playing'`. Setting mode first prevents a spurious | |
| // processNext that would advance actionIndex past the interrupted speech. | |
| this.currentTopicState = 'active'; | |
| this.setMode('live'); | |
| this.audioPlayer.stop(); | |
| this.cancelBrowserTTS(); | |
| this.callbacks.onUserInterrupt?.(text); | |
| } | |
| /** Whether all remaining actions have been consumed (no speech left to play) */ | |
| isExhausted(): boolean { | |
| let si = this.sceneIndex; | |
| let ai = this.actionIndex; | |
| while (si < this.scenes.length) { | |
| const actions = this.scenes[si].actions || []; | |
| while (ai < actions.length) { | |
| const action = actions[ai]; | |
| // Consumed discussions don't count as remaining work | |
| if (action.type === 'discussion' && this.consumedDiscussions.has(action.id)) { | |
| ai++; | |
| continue; | |
| } | |
| return false; | |
| } | |
| si++; | |
| ai = 0; | |
| } | |
| return true; | |
| } | |
| // ==================== Private ==================== | |
| private setMode(mode: EngineMode): void { | |
| if (this.mode === mode) return; | |
| this.mode = mode; | |
| this.callbacks.onModeChange?.(mode); | |
| } | |
| /** | |
| * Get the current action, or null if playback is complete. | |
| * Advances sceneIndex automatically when a scene's actions are exhausted. | |
| */ | |
| private getCurrentAction(): { action: Action; sceneId: string } | null { | |
| while (this.sceneIndex < this.scenes.length) { | |
| const scene = this.scenes[this.sceneIndex]; | |
| const actions = scene.actions || []; | |
| if (this.actionIndex < actions.length) { | |
| return { action: actions[this.actionIndex], sceneId: scene.id }; | |
| } | |
| // Move to next scene | |
| this.sceneIndex++; | |
| this.actionIndex = 0; | |
| } | |
| return null; | |
| } | |
| /** | |
| * Core processing loop: consume the next action. | |
| */ | |
| private async processNext(): Promise<void> { | |
| if (this.mode !== 'playing') return; | |
| // Check for scene boundary (fire scene change callback at start of each new scene) | |
| if (this.actionIndex === 0 && this.sceneIndex < this.scenes.length) { | |
| const scene = this.scenes[this.sceneIndex]; | |
| this.actionEngine.clearEffects(); | |
| this.callbacks.onSceneChange?.(scene.id); | |
| this.callbacks.onSpeakerChange?.('teacher'); | |
| } | |
| const current = this.getCurrentAction(); | |
| if (!current) { | |
| // All scenes complete | |
| this.actionEngine.clearEffects(); | |
| this.setMode('idle'); | |
| this.callbacks.onComplete?.(); | |
| return; | |
| } | |
| const { action } = current; | |
| // Notify progress BEFORE advancing the cursor so the snapshot points at | |
| // the current action. On restore the same action will be replayed β this | |
| // is the desired behaviour for speech (user may have only heard half). | |
| this.callbacks.onProgress?.(this.getSnapshot()); | |
| this.actionIndex++; | |
| switch (action.type) { | |
| case 'speech': { | |
| const speechAction = action as SpeechAction; | |
| this.callbacks.onSpeechStart?.(speechAction.text); | |
| // onEnded β processNext; if paused, resume() will call processNext | |
| this.audioPlayer.onEnded(() => { | |
| this.callbacks.onSpeechEnd?.(); | |
| if (this.mode === 'playing') { | |
| this.processNext(); | |
| } | |
| }); | |
| // Estimated reading time when no pre-generated audio (TTS disabled). | |
| // CJK text: ~150ms/char (one char β one word). | |
| // Non-CJK text: ~240ms/word (β250 WPM). | |
| // Min 2s. Cancelled on pause; resume() calls processNext directly. | |
| const scheduleReadingTimer = () => { | |
| const text = speechAction.text; | |
| const cjkCount = ( | |
| text.match(/[\u4e00-\u9fff\u3400-\u4dbf\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/g) || [] | |
| ).length; | |
| const isCJK = cjkCount > text.length * 0.3; | |
| const speed = this.callbacks.getPlaybackSpeed?.() ?? 1; | |
| const rawMs = isCJK | |
| ? Math.max(2000, text.length * 150) | |
| : Math.max(2000, text.split(/\s+/).filter(Boolean).length * 240); | |
| const readingMs = rawMs / speed; | |
| this.speechTimerStart = Date.now(); | |
| this.speechTimerRemaining = readingMs; | |
| this.speechTimer = setTimeout(() => { | |
| this.speechTimer = null; | |
| this.speechTimerRemaining = 0; | |
| this.callbacks.onSpeechEnd?.(); | |
| if (this.mode === 'playing') this.processNext(); | |
| }, readingMs); | |
| }; | |
| this.audioPlayer | |
| .play(speechAction.audioId || '', speechAction.audioUrl) | |
| .then((audioStarted) => { | |
| if (!audioStarted) { | |
| // No pre-generated audio β try browser-native TTS if selected | |
| const settings = useSettingsStore.getState(); | |
| if ( | |
| settings.ttsEnabled && | |
| settings.ttsProviderId === 'browser-native-tts' && | |
| typeof window !== 'undefined' && | |
| window.speechSynthesis | |
| ) { | |
| this.playBrowserTTS(speechAction); | |
| } else { | |
| scheduleReadingTimer(); | |
| } | |
| } | |
| }) | |
| .catch((err) => { | |
| log.error('TTS error:', err); | |
| scheduleReadingTimer(); | |
| }); | |
| break; | |
| } | |
| case 'spotlight': | |
| case 'laser': { | |
| // Fire-and-forget visual effects via ActionEngine | |
| this.actionEngine.execute(action); | |
| this.callbacks.onEffectFire?.({ | |
| kind: action.type, | |
| targetId: action.elementId, | |
| ...(action.type === 'spotlight' | |
| ? { dimOpacity: action.dimOpacity } | |
| : { color: action.color }), | |
| } as Effect); | |
| // Don't block β continue immediately | |
| this.processNext(); | |
| break; | |
| } | |
| case 'discussion': { | |
| const discussionAction = action as DiscussionAction; | |
| // Check if already consumed | |
| if (this.consumedDiscussions.has(discussionAction.id)) { | |
| this.processNext(); | |
| return; | |
| } | |
| // Skip if the discussion's agent isn't in the user's selected list | |
| if ( | |
| discussionAction.agentId && | |
| this.callbacks.isAgentSelected && | |
| !this.callbacks.isAgentSelected(discussionAction.agentId) | |
| ) { | |
| this.consumedDiscussions.add(discussionAction.id); | |
| this.processNext(); | |
| return; | |
| } | |
| // 3s delay before showing ProactiveCard (allows previous speech to finish naturally) | |
| const trigger: TriggerEvent = { | |
| id: discussionAction.id, | |
| question: discussionAction.topic, | |
| prompt: discussionAction.prompt, | |
| agentId: discussionAction.agentId, | |
| }; | |
| this.triggerDelayTimer = setTimeout(() => { | |
| this.triggerDelayTimer = null; | |
| if (this.mode !== 'playing') return; // Cancelled if user paused/stopped | |
| this.currentTrigger = trigger; | |
| this.callbacks.onProactiveShow?.(trigger); | |
| // Engine pauses here β user calls confirmDiscussion() or skipDiscussion() | |
| }, 3000); | |
| break; | |
| } | |
| case 'play_video': | |
| case 'wb_open': | |
| case 'wb_draw_text': | |
| case 'wb_draw_shape': | |
| case 'wb_draw_chart': | |
| case 'wb_draw_latex': | |
| case 'wb_draw_table': | |
| case 'wb_clear': | |
| case 'wb_delete': | |
| case 'wb_close': { | |
| // Synchronous whiteboard actions β await completion, then continue | |
| await this.actionEngine.execute(action); | |
| if (this.mode === 'playing') { | |
| this.processNext(); | |
| } | |
| break; | |
| } | |
| default: | |
| // Unknown action, skip | |
| this.processNext(); | |
| break; | |
| } | |
| } | |
| // ==================== Browser Native TTS ==================== | |
| /** | |
| * Split text into sentence-level chunks for sequential playback. | |
| * Chrome has a bug where utterances >~15s are silently cut off and onend | |
| * never fires, causing the engine to hang. Chunking avoids this. | |
| */ | |
| private splitIntoChunks(text: string): string[] { | |
| // Split on sentence-ending punctuation (Latin + CJK) and newlines | |
| const chunks = text | |
| .split(/(?<=[.!?γοΌοΌ\n])\s*/) | |
| .map((s) => s.trim()) | |
| .filter((s) => s.length > 0); | |
| // If splitting produced nothing (no punctuation), return the original text | |
| return chunks.length > 0 ? chunks : [text]; | |
| } | |
| /** | |
| * Play text using the Web Speech API (browser-native TTS). | |
| * Splits text into sentence-level chunks to avoid Chrome's ~15s cutoff. | |
| * Uses cancel+re-speak for pause/resume (Firefox compatibility). | |
| */ | |
| private playBrowserTTS(speechAction: SpeechAction): void { | |
| this.browserTTSChunks = this.splitIntoChunks(speechAction.text); | |
| this.browserTTSChunkIndex = 0; | |
| this.browserTTSPausedChunks = []; | |
| this.browserTTSActive = true; | |
| this.playBrowserTTSChunk(); | |
| } | |
| /** Speak the current chunk; on completion, advance to next or finish. */ | |
| private async playBrowserTTSChunk(): Promise<void> { | |
| if (this.browserTTSChunkIndex >= this.browserTTSChunks.length) { | |
| // All chunks done | |
| this.browserTTSActive = false; | |
| this.browserTTSChunks = []; | |
| this.callbacks.onSpeechEnd?.(); | |
| if (this.mode === 'playing') this.processNext(); | |
| return; | |
| } | |
| const settings = useSettingsStore.getState(); | |
| const chunkText = this.browserTTSChunks[this.browserTTSChunkIndex]; | |
| const utterance = new SpeechSynthesisUtterance(chunkText); | |
| // Apply settings | |
| const speed = this.callbacks.getPlaybackSpeed?.() ?? 1; | |
| utterance.rate = (settings.ttsSpeed ?? 1) * speed; | |
| utterance.volume = settings.ttsMuted ? 0 : (settings.ttsVolume ?? 1); | |
| // Ensure voices are loaded (Chrome loads them asynchronously) | |
| const voices = await this.ensureVoicesLoaded(); | |
| // Set voice: try user's configured voice, fall back to auto-detect language | |
| let voiceFound = false; | |
| if (settings.ttsVoice && settings.ttsVoice !== 'default') { | |
| const voice = voices.find((v) => v.voiceURI === settings.ttsVoice); | |
| if (voice) { | |
| utterance.voice = voice; | |
| utterance.lang = voice.lang; | |
| voiceFound = true; | |
| } | |
| } | |
| if (!voiceFound) { | |
| // No usable voice configured β detect text language so the browser | |
| // auto-selects an appropriate voice. | |
| const cjkRatio = | |
| (chunkText.match(/[\u4e00-\u9fff\u3400-\u4dbf]/g) || []).length / chunkText.length; | |
| utterance.lang = cjkRatio > CJK_LANG_THRESHOLD ? 'zh-CN' : 'en-US'; | |
| } | |
| utterance.onend = () => { | |
| this.browserTTSChunkIndex++; | |
| if (this.mode === 'playing') { | |
| this.playBrowserTTSChunk(); // next chunk | |
| } | |
| }; | |
| utterance.onerror = (event) => { | |
| // 'canceled' is expected when stop/pause is called β not a real error | |
| if (event.error !== 'canceled') { | |
| log.warn('Browser TTS chunk error:', event.error); | |
| // Skip failed chunk, try next | |
| this.browserTTSChunkIndex++; | |
| if (this.mode === 'playing') { | |
| this.playBrowserTTSChunk(); | |
| } | |
| } | |
| // On 'canceled': do nothing β pause handler already saved state | |
| }; | |
| window.speechSynthesis.speak(utterance); | |
| } | |
| /** | |
| * Wait for speechSynthesis voices to load (Chrome loads them asynchronously). | |
| * Caches result so subsequent calls return immediately. | |
| */ | |
| private cachedVoices: SpeechSynthesisVoice[] | null = null; | |
| private async ensureVoicesLoaded(): Promise<SpeechSynthesisVoice[]> { | |
| if (this.cachedVoices && this.cachedVoices.length > 0) { | |
| return this.cachedVoices; | |
| } | |
| let voices = window.speechSynthesis.getVoices(); | |
| if (voices.length > 0) { | |
| this.cachedVoices = voices; | |
| return voices; | |
| } | |
| // Chrome: voices load asynchronously β wait for the voiceschanged event | |
| await new Promise<void>((resolve) => { | |
| const onVoicesChanged = () => { | |
| window.speechSynthesis.removeEventListener('voiceschanged', onVoicesChanged); | |
| resolve(); | |
| }; | |
| window.speechSynthesis.addEventListener('voiceschanged', onVoicesChanged); | |
| // Timeout after 2s to avoid hanging | |
| setTimeout(() => { | |
| window.speechSynthesis.removeEventListener('voiceschanged', onVoicesChanged); | |
| resolve(); | |
| }, 2000); | |
| }); | |
| voices = window.speechSynthesis.getVoices(); | |
| this.cachedVoices = voices; | |
| return voices; | |
| } | |
| /** Cancel any active browser-native TTS */ | |
| private cancelBrowserTTS(): void { | |
| if (this.browserTTSActive) { | |
| this.browserTTSActive = false; | |
| this.browserTTSChunks = []; | |
| this.browserTTSChunkIndex = 0; | |
| this.browserTTSPausedChunks = []; | |
| window.speechSynthesis?.cancel(); | |
| } | |
| } | |
| } | |