|
|
import { getRequestHeaders, substituteParams } from '../../../script.js'; |
|
|
import { saveTtsProviderSettings, sanitizeId } from './index.js'; |
|
|
|
|
|
export { OpenAITtsProvider }; |
|
|
|
|
|
class OpenAITtsProvider { |
|
|
static voices = [ |
|
|
{ name: 'Alloy', voice_id: 'alloy', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/alloy.wav' }, |
|
|
{ name: 'Ash', voice_id: 'ash', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/ash.wav' }, |
|
|
{ name: 'Coral', voice_id: 'coral', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/coral.wav' }, |
|
|
{ name: 'Echo', voice_id: 'echo', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/echo.wav' }, |
|
|
{ name: 'Fable', voice_id: 'fable', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/fable.wav' }, |
|
|
{ name: 'Onyx', voice_id: 'onyx', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/onyx.wav' }, |
|
|
{ name: 'Nova', voice_id: 'nova', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/nova.wav' }, |
|
|
{ name: 'Sage', voice_id: 'sage', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/sage.wav' }, |
|
|
{ name: 'Shimmer', voice_id: 'shimmer', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/shimmer.wav' }, |
|
|
]; |
|
|
|
|
|
settings; |
|
|
voices = []; |
|
|
separator = ' . '; |
|
|
audioElement = document.createElement('audio'); |
|
|
|
|
|
defaultSettings = { |
|
|
voiceMap: {}, |
|
|
customVoices: [], |
|
|
model: 'tts-1', |
|
|
speed: 1, |
|
|
characterInstructions: {}, |
|
|
}; |
|
|
|
|
|
get settingsHtml() { |
|
|
let html = ` |
|
|
<div>Use OpenAI's TTS engine.</div> |
|
|
<small>Hint: Save an API key in the OpenAI API settings to use it here.</small> |
|
|
<div> |
|
|
<label for="openai-tts-model">Model:</label> |
|
|
<select id="openai-tts-model"> |
|
|
<optgroup label="Latest"> |
|
|
<option value="tts-1">tts-1</option> |
|
|
<option value="tts-1-hd">tts-1-hd</option> |
|
|
<option value="gpt-4o-mini-tts">gpt-4o-mini-tts</option> |
|
|
</optgroup> |
|
|
<optgroup label="Snapshots"> |
|
|
<option value="tts-1-1106">tts-1-1106</option> |
|
|
<option value="tts-1-hd-1106">tts-1-hd-1106</option> |
|
|
</optgroup> |
|
|
<select> |
|
|
</div> |
|
|
<div> |
|
|
<label for="openai-tts-speed">Speed: <span id="openai-tts-speed-output"></span></label> |
|
|
<input type="range" id="openai-tts-speed" value="1" min="0.25" max="4" step="0.05"> |
|
|
</div>`; |
|
|
return html; |
|
|
} |
|
|
|
|
|
async loadSettings(settings) { |
|
|
|
|
|
if (Object.keys(settings).length == 0) { |
|
|
console.info('Using default TTS Provider settings'); |
|
|
} |
|
|
|
|
|
|
|
|
this.settings = this.defaultSettings; |
|
|
|
|
|
for (const key in settings) { |
|
|
if (key in this.settings) { |
|
|
this.settings[key] = settings[key]; |
|
|
} else { |
|
|
throw `Invalid setting passed to TTS Provider: ${key}`; |
|
|
} |
|
|
} |
|
|
|
|
|
$('#openai-tts-model').val(this.settings.model); |
|
|
$('#openai-tts-model').on('change', () => { |
|
|
this.onSettingsChange(); |
|
|
}); |
|
|
|
|
|
$('#openai-tts-speed').val(this.settings.speed); |
|
|
$('#openai-tts-speed').on('input', () => { |
|
|
this.onSettingsChange(); |
|
|
}); |
|
|
|
|
|
$('#openai-tts-speed-output').text(this.settings.speed); |
|
|
|
|
|
await this.checkReady(); |
|
|
|
|
|
this.updateInstructionsUI(); |
|
|
|
|
|
this.setupVoiceMapObserver(); |
|
|
|
|
|
console.debug('OpenAI TTS: Settings loaded'); |
|
|
} |
|
|
|
|
|
setupVoiceMapObserver() { |
|
|
if (this.voiceMapObserver) { |
|
|
this.voiceMapObserver.disconnect(); |
|
|
this.voiceMapObserver = null; |
|
|
} |
|
|
|
|
|
const targetNode = document.getElementById('tts_voicemap_block'); |
|
|
if (!targetNode) return; |
|
|
|
|
|
const observer = new MutationObserver(() => { |
|
|
if (this.settings.model === 'gpt-4o-mini-tts') { |
|
|
this.populateCharacterInstructions(); |
|
|
} |
|
|
}); |
|
|
|
|
|
observer.observe(targetNode, { childList: true, subtree: true }); |
|
|
this.voiceMapObserver = observer; |
|
|
} |
|
|
|
|
|
onSettingsChange() { |
|
|
|
|
|
this.settings.model = String($('#openai-tts-model').find(':selected').val()); |
|
|
this.settings.speed = Number($('#openai-tts-speed').val()); |
|
|
$('#openai-tts-speed-output').text(this.settings.speed); |
|
|
this.updateInstructionsUI(); |
|
|
saveTtsProviderSettings(); |
|
|
} |
|
|
|
|
|
updateInstructionsUI() { |
|
|
if (this.settings.model === 'gpt-4o-mini-tts') { |
|
|
this.createInstructionsContainer(); |
|
|
$('#openai-instructions-container').show(); |
|
|
this.populateCharacterInstructions(); |
|
|
} else { |
|
|
$('#openai-instructions-container').hide(); |
|
|
this.voiceMapObserver?.disconnect(); |
|
|
this.voiceMapObserver = null; |
|
|
} |
|
|
} |
|
|
|
|
|
createInstructionsContainer() { |
|
|
if ($('#openai-instructions-container').length === 0) { |
|
|
const containerHtml = ` |
|
|
<div id="openai-instructions-container" style="display: none;"> |
|
|
<span>Voice Instructions (GPT-4o Mini TTS)</span><br> |
|
|
<small>Customize how each character speaks</small> |
|
|
<div id="openai-character-instructions"></div> |
|
|
</div> |
|
|
`; |
|
|
$('#openai-tts-speed').parent().after(containerHtml); |
|
|
} |
|
|
} |
|
|
|
|
|
populateCharacterInstructions() { |
|
|
|
|
|
const currentCharacters = $('.tts_voicemap_block_char span').map((i, el) => $(el).text()).get(); |
|
|
|
|
|
$('#openai-character-instructions').empty(); |
|
|
|
|
|
for (const char of currentCharacters) { |
|
|
if (char === 'SillyTavern System' || char === '[Default Voice]') continue; |
|
|
|
|
|
const sanitizedName = sanitizeId(char); |
|
|
const savedInstructions = this.settings.characterInstructions?.[char] || ''; |
|
|
|
|
|
const instructionBlock = document.createElement('div'); |
|
|
const label = document.createElement('label'); |
|
|
const textArea = document.createElement('textarea'); |
|
|
instructionBlock.appendChild(label); |
|
|
instructionBlock.appendChild(textArea); |
|
|
instructionBlock.className = 'character-instructions'; |
|
|
label.setAttribute('for', `openai_char_${sanitizedName}`); |
|
|
label.innerText = `${char}:`; |
|
|
textArea.id = `openai_char_${sanitizedName}`; |
|
|
textArea.placeholder = 'e.g., "Speak cheerfully and energetically"'; |
|
|
textArea.className = 'textarea_compact autoSetHeight'; |
|
|
textArea.value = savedInstructions; |
|
|
textArea.addEventListener('input', () => { |
|
|
this.saveCharacterInstructions(char, textArea.value); |
|
|
}); |
|
|
|
|
|
$('#openai-character-instructions').append(instructionBlock); |
|
|
} |
|
|
} |
|
|
|
|
|
saveCharacterInstructions(characterName, instructions) { |
|
|
if (!this.settings.characterInstructions) { |
|
|
this.settings.characterInstructions = {}; |
|
|
} |
|
|
this.settings.characterInstructions[characterName] = instructions; |
|
|
saveTtsProviderSettings(); |
|
|
} |
|
|
|
|
|
async checkReady() { |
|
|
await this.fetchTtsVoiceObjects(); |
|
|
} |
|
|
|
|
|
async onRefreshClick() { |
|
|
return; |
|
|
} |
|
|
|
|
|
async getVoice(voiceName) { |
|
|
if (!voiceName) { |
|
|
throw 'TTS Voice name not provided'; |
|
|
} |
|
|
|
|
|
const voice = OpenAITtsProvider.voices.find(voice => voice.voice_id === voiceName || voice.name === voiceName); |
|
|
|
|
|
if (!voice) { |
|
|
throw `TTS Voice not found: ${voiceName}`; |
|
|
} |
|
|
|
|
|
return voice; |
|
|
} |
|
|
|
|
|
async generateTts(text, voiceId, characterName = null) { |
|
|
const response = await this.fetchTtsGeneration(text, voiceId, characterName); |
|
|
return response; |
|
|
} |
|
|
|
|
|
async fetchTtsVoiceObjects() { |
|
|
return OpenAITtsProvider.voices; |
|
|
} |
|
|
|
|
|
async previewTtsVoice(_) { |
|
|
return; |
|
|
} |
|
|
|
|
|
async fetchTtsGeneration(inputText, voiceId, characterName = null) { |
|
|
console.info(`Generating new TTS for voice_id ${voiceId}`); |
|
|
|
|
|
const requestBody = { |
|
|
'text': inputText, |
|
|
'voice': voiceId, |
|
|
'model': this.settings.model, |
|
|
'speed': this.settings.speed, |
|
|
}; |
|
|
|
|
|
if (this.settings.model === 'gpt-4o-mini-tts' && characterName) { |
|
|
const instructions = this.settings.characterInstructions?.[characterName]; |
|
|
if (instructions && instructions.trim()) { |
|
|
requestBody.instructions = substituteParams(instructions); |
|
|
} |
|
|
} |
|
|
|
|
|
const response = await fetch('/api/openai/generate-voice', { |
|
|
method: 'POST', |
|
|
headers: getRequestHeaders(), |
|
|
body: JSON.stringify(requestBody), |
|
|
}); |
|
|
|
|
|
if (!response.ok) { |
|
|
toastr.error(response.statusText, 'TTS Generation Failed'); |
|
|
throw new Error(`HTTP ${response.status}: ${await response.text()}`); |
|
|
} |
|
|
|
|
|
return response; |
|
|
} |
|
|
} |
|
|
|