PaperStack / components /TextToSpeech.tsx
Akhil-Theerthala's picture
Upload 32 files
46a757e verified
import React, { useState, useRef, useEffect, useCallback } from 'react';
import { Play, Pause, SkipBack, SkipForward, Volume2, VolumeX, Settings, X } from 'lucide-react';
interface Props {
text: string;
sectionId: string;
onWordChange?: (wordIndex: number) => void;
onPlayingChange?: (isPlaying: boolean) => void;
}
interface TTSSettings {
rate: number;
pitch: number;
voice: SpeechSynthesisVoice | null;
}
const TextToSpeech: React.FC<Props> = ({
text,
sectionId,
onWordChange,
onPlayingChange
}) => {
const [isPlaying, setIsPlaying] = useState(false);
const [isPaused, setIsPaused] = useState(false);
const [currentWordIndex, setCurrentWordIndex] = useState(0);
const [showSettings, setShowSettings] = useState(false);
const [settings, setSettings] = useState<TTSSettings>({
rate: 1.0,
pitch: 1.0,
voice: null
});
const [availableVoices, setAvailableVoices] = useState<SpeechSynthesisVoice[]>([]);
const [progress, setProgress] = useState(0);
const utteranceRef = useRef<SpeechSynthesisUtterance | null>(null);
const wordsRef = useRef<string[]>([]);
// Parse text into words
useEffect(() => {
// Strip markdown and split into words
const cleanText = text
.replace(/[#*_`\[\]()]/g, '')
.replace(/\n+/g, ' ')
.trim();
wordsRef.current = cleanText.split(/\s+/).filter(w => w.length > 0);
}, [text]);
// Load available voices
useEffect(() => {
const loadVoices = () => {
const voices = window.speechSynthesis.getVoices();
const englishVoices = voices.filter(v => v.lang.startsWith('en'));
setAvailableVoices(englishVoices);
// Set default voice (prefer a natural-sounding one)
if (!settings.voice && englishVoices.length > 0) {
const preferredVoice = englishVoices.find(v =>
v.name.includes('Samantha') ||
v.name.includes('Alex') ||
v.name.includes('Google') ||
v.name.includes('Natural')
) || englishVoices[0];
setSettings(prev => ({ ...prev, voice: preferredVoice }));
}
};
loadVoices();
window.speechSynthesis.onvoiceschanged = loadVoices;
return () => {
window.speechSynthesis.cancel();
};
}, []);
// Notify parent of word changes
useEffect(() => {
onWordChange?.(currentWordIndex);
}, [currentWordIndex, onWordChange]);
// Notify parent of playing state changes
useEffect(() => {
onPlayingChange?.(isPlaying);
}, [isPlaying, onPlayingChange]);
const handlePlay = useCallback(() => {
if (isPaused) {
window.speechSynthesis.resume();
setIsPaused(false);
setIsPlaying(true);
return;
}
// Cancel any existing speech
window.speechSynthesis.cancel();
// Clean text for speech
const cleanText = text
.replace(/[#*_`\[\]()]/g, '')
.replace(/\n+/g, '. ')
.trim();
const utterance = new SpeechSynthesisUtterance(cleanText);
utterance.rate = settings.rate;
utterance.pitch = settings.pitch;
if (settings.voice) {
utterance.voice = settings.voice;
}
// Track word boundaries
let charIndex = 0;
utterance.onboundary = (event) => {
if (event.name === 'word') {
charIndex = event.charIndex;
// Estimate word index based on character position
const spokenText = cleanText.substring(0, charIndex);
const wordIndex = spokenText.split(/\s+/).length - 1;
setCurrentWordIndex(Math.max(0, wordIndex));
setProgress((charIndex / cleanText.length) * 100);
}
};
utterance.onend = () => {
setIsPlaying(false);
setIsPaused(false);
setCurrentWordIndex(0);
setProgress(0);
};
utterance.onerror = () => {
setIsPlaying(false);
setIsPaused(false);
};
utteranceRef.current = utterance;
window.speechSynthesis.speak(utterance);
setIsPlaying(true);
setIsPaused(false);
}, [text, settings, isPaused]);
const handlePause = useCallback(() => {
if (isPlaying) {
window.speechSynthesis.pause();
setIsPaused(true);
setIsPlaying(false);
}
}, [isPlaying]);
const handleStop = useCallback(() => {
window.speechSynthesis.cancel();
setIsPlaying(false);
setIsPaused(false);
setCurrentWordIndex(0);
setProgress(0);
}, []);
const handleSkipBack = useCallback(() => {
// Restart from beginning
handleStop();
setTimeout(handlePlay, 100);
}, [handleStop, handlePlay]);
const handleSkipForward = useCallback(() => {
// Skip to next section (stop current)
handleStop();
}, [handleStop]);
return (
<div className="relative">
{/* Main TTS Control Bar */}
<div className="flex items-center gap-3 p-3 rounded-xl bg-gray-100 dark:bg-gray-800 border border-gray-200 dark:border-gray-700">
{/* Play/Pause Button */}
<button
onClick={isPlaying ? handlePause : handlePlay}
className={`
p-3 rounded-full transition-all duration-200
${isPlaying
? 'bg-brand-500 text-white shadow-lg shadow-brand-500/30 hover:bg-brand-600'
: 'bg-white dark:bg-gray-700 text-gray-700 dark:text-gray-200 hover:bg-gray-50 dark:hover:bg-gray-600 shadow-sm'
}
`}
title={isPlaying ? 'Pause' : 'Play'}
>
{isPlaying ? <Pause size={20} /> : <Play size={20} className="ml-0.5" />}
</button>
{/* Skip Controls */}
<div className="flex items-center gap-1">
<button
onClick={handleSkipBack}
className="p-2 rounded-lg text-gray-500 hover:text-gray-700 dark:hover:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-700 transition-colors"
title="Restart"
>
<SkipBack size={16} />
</button>
<button
onClick={handleSkipForward}
className="p-2 rounded-lg text-gray-500 hover:text-gray-700 dark:hover:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-700 transition-colors"
title="Stop"
>
<SkipForward size={16} />
</button>
</div>
{/* Progress Bar */}
<div className="flex-1 h-2 bg-gray-200 dark:bg-gray-700 rounded-full overflow-hidden">
<div
className="h-full bg-gradient-to-r from-brand-500 to-purple-500 transition-all duration-150"
style={{ width: `${progress}%` }}
/>
</div>
{/* Speed Indicator */}
<span className="text-xs font-mono text-gray-500 dark:text-gray-400 min-w-[3rem] text-center">
{settings.rate.toFixed(1)}x
</span>
{/* Settings Button */}
<button
onClick={() => setShowSettings(!showSettings)}
className={`
p-2 rounded-lg transition-colors
${showSettings
? 'bg-brand-100 dark:bg-brand-900/30 text-brand-600 dark:text-brand-400'
: 'text-gray-500 hover:text-gray-700 dark:hover:text-gray-300 hover:bg-gray-200 dark:hover:bg-gray-700'
}
`}
title="Settings"
>
<Settings size={16} />
</button>
</div>
{/* Settings Panel */}
{showSettings && (
<div className="absolute top-full left-0 right-0 mt-2 p-4 rounded-xl bg-white dark:bg-gray-800 border border-gray-200 dark:border-gray-700 shadow-xl z-50 animate-in fade-in slide-in-from-top-2 duration-200">
<div className="flex items-center justify-between mb-4">
<h4 className="text-sm font-bold text-gray-900 dark:text-white">Audio Settings</h4>
<button
onClick={() => setShowSettings(false)}
className="p-1 rounded hover:bg-gray-100 dark:hover:bg-gray-700"
>
<X size={14} />
</button>
</div>
<div className="space-y-4">
{/* Speed Control */}
<div>
<label className="flex items-center justify-between text-xs font-medium text-gray-600 dark:text-gray-400 mb-2">
<span>Speed</span>
<span className="font-mono">{settings.rate.toFixed(1)}x</span>
</label>
<input
type="range"
min="0.5"
max="2"
step="0.1"
value={settings.rate}
onChange={(e) => setSettings(prev => ({ ...prev, rate: parseFloat(e.target.value) }))}
className="w-full h-2 bg-gray-200 dark:bg-gray-700 rounded-lg appearance-none cursor-pointer accent-brand-500"
/>
<div className="flex justify-between text-[10px] text-gray-400 mt-1">
<span>0.5x</span>
<span>1x</span>
<span>1.5x</span>
<span>2x</span>
</div>
</div>
{/* Pitch Control */}
<div>
<label className="flex items-center justify-between text-xs font-medium text-gray-600 dark:text-gray-400 mb-2">
<span>Pitch</span>
<span className="font-mono">{settings.pitch.toFixed(1)}</span>
</label>
<input
type="range"
min="0.5"
max="1.5"
step="0.1"
value={settings.pitch}
onChange={(e) => setSettings(prev => ({ ...prev, pitch: parseFloat(e.target.value) }))}
className="w-full h-2 bg-gray-200 dark:bg-gray-700 rounded-lg appearance-none cursor-pointer accent-brand-500"
/>
</div>
{/* Voice Selection */}
{availableVoices.length > 0 && (
<div>
<label className="text-xs font-medium text-gray-600 dark:text-gray-400 mb-2 block">
Voice
</label>
<select
value={settings.voice?.name || ''}
onChange={(e) => {
const voice = availableVoices.find(v => v.name === e.target.value) || null;
setSettings(prev => ({ ...prev, voice }));
}}
className="w-full px-3 py-2 text-sm bg-gray-50 dark:bg-gray-700 border border-gray-200 dark:border-gray-600 rounded-lg focus:ring-2 focus:ring-brand-500 outline-none"
>
{availableVoices.map((voice) => (
<option key={voice.name} value={voice.name}>
{voice.name} ({voice.lang})
</option>
))}
</select>
</div>
)}
</div>
</div>
)}
{/* Current word indicator (for debugging, usually hidden) */}
{/* <div className="mt-2 text-xs text-gray-400">
Word {currentWordIndex + 1} of {wordsRef.current.length}
</div> */}
</div>
);
};
export default TextToSpeech;