| | import { useState, useMemo, useRef, useCallback, useEffect } from 'react'; |
| | import { useRecoilValue } from 'recoil'; |
| | import { useToastContext } from '@librechat/client'; |
| | import { useTextToSpeechMutation, useVoicesQuery } from '~/data-provider'; |
| | import { useLocalize } from '~/hooks'; |
| | import store from '~/store'; |
| |
|
| | const createFormData = (text: string, voice: string) => { |
| | const formData = new FormData(); |
| | formData.append('input', text); |
| | formData.append('voice', voice); |
| | return formData; |
| | }; |
| |
|
| | type TUseTTSExternal = { |
| | setIsSpeaking: React.Dispatch<React.SetStateAction<boolean>>; |
| | audioRef: React.MutableRefObject<HTMLAudioElement | null>; |
| | messageId?: string; |
| | isLast: boolean; |
| | index?: number; |
| | }; |
| |
|
| | function useTextToSpeechExternal({ |
| | setIsSpeaking, |
| | audioRef, |
| | messageId, |
| | isLast, |
| | index = 0, |
| | }: TUseTTSExternal) { |
| | const localize = useLocalize(); |
| | const { showToast } = useToastContext(); |
| | const voice = useRecoilValue(store.voice); |
| | const cacheTTS = useRecoilValue(store.cacheTTS); |
| | const playbackRate = useRecoilValue(store.playbackRate); |
| |
|
| | const [downloadFile, setDownloadFile] = useState(false); |
| |
|
| | const promiseAudioRef = useRef<HTMLAudioElement | null>(null); |
| |
|
| | |
| | const globalIsFetching = useRecoilValue(store.globalAudioFetchingFamily(index)); |
| | const globalIsPlaying = useRecoilValue(store.globalAudioPlayingFamily(index)); |
| |
|
| | const autoPlayAudio = (blobUrl: string) => { |
| | const newAudio = new Audio(blobUrl); |
| | audioRef.current = newAudio; |
| | }; |
| |
|
| | const playAudioPromise = (blobUrl: string) => { |
| | const newAudio = new Audio(blobUrl); |
| | const initializeAudio = () => { |
| | if (playbackRate != null && playbackRate !== 1 && playbackRate > 0) { |
| | newAudio.playbackRate = playbackRate; |
| | } |
| | }; |
| |
|
| | initializeAudio(); |
| | const playPromise = () => newAudio.play().then(() => setIsSpeaking(true)); |
| |
|
| | playPromise().catch((error: Error) => { |
| | if ( |
| | error.message && |
| | error.message.includes('The play() request was interrupted by a call to pause()') |
| | ) { |
| | console.log('Play request was interrupted by a call to pause()'); |
| | initializeAudio(); |
| | return playPromise().catch(console.error); |
| | } |
| | console.error(error); |
| | showToast({ |
| | message: localize('com_nav_audio_play_error', { 0: error.message }), |
| | status: 'error', |
| | }); |
| | }); |
| |
|
| | newAudio.onended = () => { |
| | console.log('Cached message audio ended'); |
| | URL.revokeObjectURL(blobUrl); |
| | setIsSpeaking(false); |
| | }; |
| |
|
| | promiseAudioRef.current = newAudio; |
| | }; |
| |
|
| | const downloadAudio = (blobUrl: string) => { |
| | const a = document.createElement('a'); |
| | a.href = blobUrl; |
| | a.download = 'audio.mp3'; |
| | a.click(); |
| | setDownloadFile(false); |
| | }; |
| |
|
| | const { mutate: processAudio, isLoading } = useTextToSpeechMutation({ |
| | onMutate: (variables) => { |
| | const inputText = (variables.get('input') ?? '') as string; |
| | if (inputText.length >= 4096) { |
| | showToast({ |
| | message: localize('com_nav_long_audio_warning'), |
| | status: 'warning', |
| | }); |
| | } |
| | }, |
| | onSuccess: async (data: ArrayBuffer, variables) => { |
| | try { |
| | const inputText = (variables.get('input') ?? '') as string; |
| | const audioBlob = new Blob([data], { type: 'audio/mpeg' }); |
| |
|
| | if (cacheTTS && inputText) { |
| | const cache = await caches.open('tts-responses'); |
| | const request = new Request(inputText); |
| | const response = new Response(audioBlob); |
| | cache.put(request, response); |
| | } |
| |
|
| | const blobUrl = URL.createObjectURL(audioBlob); |
| | if (downloadFile) { |
| | downloadAudio(blobUrl); |
| | } |
| | autoPlayAudio(blobUrl); |
| | } catch (error) { |
| | showToast({ |
| | message: `Error processing audio: ${(error as Error).message}`, |
| | status: 'error', |
| | }); |
| | } |
| | }, |
| | onError: (error: unknown) => { |
| | showToast({ |
| | message: localize('com_nav_audio_process_error', { 0: (error as Error).message }), |
| | status: 'error', |
| | }); |
| | }, |
| | }); |
| |
|
| | const startMutation = (text: string, download: boolean) => { |
| | const formData = createFormData(text, voice ?? ''); |
| | setDownloadFile(download); |
| | processAudio(formData); |
| | }; |
| |
|
| | const generateSpeechExternal = (text: string, download: boolean) => { |
| | if (cacheTTS) { |
| | handleCachedResponse(text, download); |
| | } else { |
| | startMutation(text, download); |
| | } |
| | }; |
| |
|
| | const handleCachedResponse = async (text: string, download: boolean) => { |
| | const cachedResponse = await caches.match(text); |
| | if (!cachedResponse) { |
| | return startMutation(text, download); |
| | } |
| | const audioBlob = await cachedResponse.blob(); |
| | const blobUrl = URL.createObjectURL(audioBlob); |
| | if (download) { |
| | downloadAudio(blobUrl); |
| | } else { |
| | playAudioPromise(blobUrl); |
| | } |
| | }; |
| |
|
| | const cancelSpeech = () => { |
| | const messageAudio = document.getElementById(`audio-${messageId}`) as HTMLAudioElement | null; |
| | const pauseAudio = (currentElement: HTMLAudioElement | null) => { |
| | if (currentElement) { |
| | currentElement.pause(); |
| | currentElement.src && URL.revokeObjectURL(currentElement.src); |
| | audioRef.current = null; |
| | } |
| | }; |
| | pauseAudio(messageAudio); |
| | pauseAudio(promiseAudioRef.current); |
| | setIsSpeaking(false); |
| | }; |
| |
|
| | const cancelPromiseSpeech = useCallback(() => { |
| | if (promiseAudioRef.current) { |
| | promiseAudioRef.current.pause(); |
| | promiseAudioRef.current.src && URL.revokeObjectURL(promiseAudioRef.current.src); |
| | promiseAudioRef.current = null; |
| | setIsSpeaking(false); |
| | } |
| | }, [setIsSpeaking]); |
| |
|
| | useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]); |
| |
|
| | const isFetching = useMemo( |
| | () => isLast && globalIsFetching && !globalIsPlaying, |
| | [globalIsFetching, globalIsPlaying, isLast], |
| | ); |
| |
|
| | const { data: voicesData = [] } = useVoicesQuery(); |
| |
|
| | return { |
| | generateSpeechExternal, |
| | cancelSpeech, |
| | isLoading: isFetching || isLoading, |
| | audioRef, |
| | voices: voicesData, |
| | }; |
| | } |
| |
|
| | export default useTextToSpeechExternal; |
| |
|