| import { useEffect, useRef, useMemo } from 'react'; | |
| import { useRecoilState } from 'recoil'; | |
| import { useToastContext } from '@librechat/client'; | |
| import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'; | |
| import useGetAudioSettings from './useGetAudioSettings'; | |
| import store from '~/store'; | |
| const useSpeechToTextBrowser = ( | |
| setText: (text: string) => void, | |
| onTranscriptionComplete: (text: string) => void, | |
| ) => { | |
| const { showToast } = useToastContext(); | |
| const { speechToTextEndpoint } = useGetAudioSettings(); | |
| const isBrowserSTTEnabled = speechToTextEndpoint === 'browser'; | |
| const lastTranscript = useRef<string | null>(null); | |
| const lastInterim = useRef<string | null>(null); | |
| const timeoutRef = useRef<NodeJS.Timeout | null>(); | |
| const [autoSendText] = useRecoilState(store.autoSendText); | |
| const [languageSTT] = useRecoilState<string>(store.languageSTT); | |
| const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio); | |
| const { | |
| listening, | |
| finalTranscript, | |
| resetTranscript, | |
| interimTranscript, | |
| isMicrophoneAvailable, | |
| browserSupportsSpeechRecognition, | |
| } = useSpeechRecognition(); | |
| const isListening = useMemo(() => listening, [listening]); | |
| useEffect(() => { | |
| if (interimTranscript == null || interimTranscript === '') { | |
| return; | |
| } | |
| if (lastInterim.current === interimTranscript) { | |
| return; | |
| } | |
| setText(interimTranscript); | |
| lastInterim.current = interimTranscript; | |
| }, [setText, interimTranscript]); | |
| useEffect(() => { | |
| if (finalTranscript == null || finalTranscript === '') { | |
| return; | |
| } | |
| if (lastTranscript.current === finalTranscript) { | |
| return; | |
| } | |
| setText(finalTranscript); | |
| lastTranscript.current = finalTranscript; | |
| if (autoSendText > -1 && finalTranscript.length > 0) { | |
| timeoutRef.current = setTimeout(() => { | |
| onTranscriptionComplete(finalTranscript); | |
| resetTranscript(); | |
| }, autoSendText * 1000); | |
| } | |
| return () => { | |
| if (timeoutRef.current) { | |
| clearTimeout(timeoutRef.current); | |
| } | |
| }; | |
| }, [setText, onTranscriptionComplete, resetTranscript, finalTranscript, autoSendText]); | |
| const toggleListening = () => { | |
| if (!browserSupportsSpeechRecognition) { | |
| showToast({ | |
| message: 'Browser does not support SpeechRecognition', | |
| status: 'error', | |
| }); | |
| return; | |
| } | |
| if (!isMicrophoneAvailable) { | |
| showToast({ | |
| message: 'Microphone is not available', | |
| status: 'error', | |
| }); | |
| return; | |
| } | |
| if (isListening === true) { | |
| SpeechRecognition.stopListening(); | |
| } else { | |
| SpeechRecognition.startListening({ | |
| language: languageSTT, | |
| continuous: autoTranscribeAudio, | |
| }); | |
| } | |
| }; | |
| useEffect(() => { | |
| const handleKeyDown = (e: KeyboardEvent) => { | |
| if (e.shiftKey && e.altKey && e.code === 'KeyL' && !isBrowserSTTEnabled) { | |
| toggleListening(); | |
| } | |
| }; | |
| window.addEventListener('keydown', handleKeyDown); | |
| return () => window.removeEventListener('keydown', handleKeyDown); | |
| }, []); | |
| return { | |
| isListening, | |
| isLoading: false, | |
| startRecording: toggleListening, | |
| stopRecording: toggleListening, | |
| }; | |
| }; | |
| export default useSpeechToTextBrowser; | |