|
import {serve} from "https://deno.land/std/http/server.ts"; |
|
import {EdgeSpeechTTS} from "https://esm.sh/@lobehub/tts@1"; |
|
|
|
async function synthesizeSpeech(model: string, voice: string, text: string) { |
|
let voiceName; |
|
let rate = 0; |
|
let pitch = 0; |
|
|
|
if (!model.includes("Neural")) { |
|
switch (model) { |
|
case "ava": |
|
voiceName = "en-US-AvaMultilingualNeural"; |
|
break; |
|
case "andrew": |
|
voiceName = "en-US-AndrewMultilingualNeural"; |
|
break; |
|
case "emma": |
|
voiceName = "en-US-EmmaMultilingualNeural"; |
|
break; |
|
case "brian": |
|
voiceName = "en-US-BrianMultilingualNeural"; |
|
break; |
|
case "vivienne": |
|
voiceName = "fr-FR-VivienneMultilingualNeural"; |
|
break; |
|
case "remy": |
|
voiceName = "fr-FR-RemyMultilingualNeural"; |
|
break; |
|
case "seraphina": |
|
voiceName = "de-DE-SeraphinaMultilingualNeural"; |
|
break; |
|
case "florian": |
|
voiceName = "de-DE-FlorianMultilingualNeural"; |
|
break; |
|
case "dmitry": |
|
voiceName = "ru-RU-DmitryNeural"; |
|
break; |
|
case "svetlana": |
|
voiceName = "ru-RU-SvetlanaNeural"; |
|
break; |
|
default: |
|
voiceName = "en-US-BrianMultilingualNeural"; |
|
break; |
|
} |
|
} else { |
|
voiceName = model; |
|
const params = Object.fromEntries(voice.split("|").map((p) => p.split(":") as [string, string])); |
|
rate = Number(params["rate"] || 0); |
|
pitch = Number(params["pitch"] || 0); |
|
} |
|
|
|
const tts = new EdgeSpeechTTS(); |
|
|
|
const payload = { |
|
input: text, options: { |
|
rate: rate, pitch: pitch, voice: voiceName |
|
}, |
|
}; |
|
const response = await tts.create(payload); |
|
const mp3Buffer = new Uint8Array(await response.arrayBuffer()); |
|
return new Response(mp3Buffer, { |
|
headers: {"Content-Type": "audio/mpeg"}, |
|
}); |
|
} |
|
|
|
function validateContentType(req: Request, expected: string) { |
|
const contentType = req.headers.get("Content-Type"); |
|
if (contentType !== expected) { |
|
console.log(`Invalid Content-Type ${contentType}, expected ${expected}`); |
|
return new Response("Bad Request", {status: 400}); |
|
} |
|
} |
|
|
|
async function handleDebugRequest() { |
|
const voice = "rate:0.0|pitch:0.0"; |
|
const model = "en-US-BrianMultilingualNeural"; |
|
const text = "Приветик! Надеюсь ты меня хорошо слышишь? Алё?!"; |
|
console.log(`model=${model}, voice=${voice}, text=${text}`); |
|
return synthesizeSpeech(model, voice, text); |
|
} |
|
|
|
async function handleSynthesisRequest(req: Request) { |
|
if (req.method !== "POST") { |
|
return new Response("Method Not Allowed", {status: 405}); |
|
} |
|
const invalidContentType = validateContentType(req, "application/json"); |
|
if (invalidContentType) return invalidContentType; |
|
const {model, input, voice} = await req.json(); |
|
return synthesizeSpeech(model, voice, input); |
|
} |
|
|
|
|
|
async function handleDemoRequest(req: Request) { |
|
const html = `<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8" /> |
|
<meta content="width=device-width, initial-scale=1.0" name="viewport" /> |
|
<title>tts</title> |
|
<style> |
|
body { |
|
background-color: #121212; |
|
color: #e0e0e0; |
|
font-family: Arial, sans-serif; |
|
margin: 0; |
|
padding: 20px; |
|
} |
|
|
|
.container { |
|
max-width: 800px; |
|
margin: 0 auto; |
|
padding: 20px; |
|
background-color: #1e1e1e; |
|
border-radius: 8px; |
|
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); |
|
} |
|
|
|
.input-area, .output-area { |
|
margin-bottom: 20px; |
|
} |
|
|
|
.slider-container, .textarea-container, .dropdown-container { |
|
margin-bottom: 20px; |
|
} |
|
|
|
label { |
|
display: block; |
|
margin-bottom: 8px; |
|
font-weight: bold; |
|
} |
|
|
|
input[type="range"] { |
|
width: 100%; |
|
} |
|
|
|
.slider-value { |
|
text-align: center; |
|
margin-top: 8px; |
|
} |
|
|
|
textarea { |
|
max-width: 780px; |
|
width: calc(100% - 20px); |
|
height: 100px; |
|
padding: 10px; |
|
border: 1px solid #333; |
|
border-radius: 4px; |
|
background-color: #2e2e2e; |
|
color: #e0e0e0; |
|
resize: none; |
|
} |
|
|
|
select { |
|
width: 100%; |
|
padding: 10px; |
|
border: 1px solid #333; |
|
border-radius: 4px; |
|
background-color: #2e2e2e; |
|
color: #e0e0e0; |
|
} |
|
|
|
button { |
|
width: 100%; |
|
padding: 10px; |
|
border: none; |
|
border-radius: 4px; |
|
background-color: #6200ea; |
|
color: #fff; |
|
font-size: 16px; |
|
cursor: pointer; |
|
transition: background-color 0.3s; |
|
} |
|
|
|
button:hover { |
|
background-color: #3700b3; |
|
} |
|
|
|
h1 { |
|
font-size: 24px; |
|
margin-bottom: 20px; |
|
} |
|
|
|
a { |
|
color: #bb86fc; |
|
text-decoration: none; |
|
} |
|
|
|
a:hover { |
|
text-decoration: underline; |
|
} |
|
|
|
#audioPlayerContainer { |
|
text-align: center; |
|
} |
|
|
|
audio { |
|
width: 100%; |
|
max-width: 600px; |
|
margin: 10px 0; |
|
} |
|
|
|
a { |
|
display: block; |
|
margin: 10px 0; |
|
} |
|
pre { |
|
color: #94c890; |
|
background: #000000; |
|
padding: 5px 10px; |
|
margin: 0; |
|
font-size: 1.12em; |
|
} |
|
</style> |
|
|
|
</head> |
|
<body> |
|
<div class="container"> |
|
<div class="input-area"> |
|
<div class="textarea-container"> |
|
<label for="inputText">текст:</label |
|
><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea> |
|
</div> |
|
<div class="dropdown-container"> |
|
<label for="voiceSelect">голос:</label> |
|
<select id="voiceSelect"></select> |
|
</div> |
|
<button id="synthesizeButton">синтезировать</button> |
|
</div> |
|
<div class="output-area"> |
|
<div id="audioPlayerContainer"></div> |
|
</div> |
|
<details> |
|
<summary>api</summary> |
|
<p>получить список голосов:</p> |
|
<pre id="apiVoices"></pre> |
|
<p>post-запрос для синтеза голоса из текста:</p> |
|
<pre id="apiExamples"></pre> |
|
</details> |
|
</div> |
|
<script> |
|
let audio = null; |
|
|
|
document.getElementById('synthesizeButton').addEventListener('click', () => { |
|
const text = document.getElementById('inputText').value || 'приветик! давай поболтаем немного?'; |
|
const rate = '0.0'; |
|
const pitch = '0.0'; |
|
const voice = \`rate:\${rate}|pitch:\${pitch}\`; |
|
const model = document.getElementById('voiceSelect').value; |
|
|
|
if (audio) { |
|
audio.pause(); |
|
audio.currentTime = 0; |
|
} |
|
|
|
fetch('/v1/audio/speech', { |
|
method: 'POST', |
|
headers: { 'Content-Type': 'application/json' }, |
|
body: JSON.stringify({ model, input: text, voice }) |
|
}) |
|
.then(response => response.blob()) |
|
.then(blob => { |
|
const audioUrl = URL.createObjectURL(blob); |
|
const audioPlayerContainer = document.getElementById('audioPlayerContainer'); |
|
|
|
if (audio) { |
|
audio.pause(); |
|
audioPlayerContainer.innerHTML = ''; |
|
} |
|
|
|
audio = new Audio(audioUrl); |
|
audio.controls = true; |
|
audioPlayerContainer.appendChild(audio); |
|
|
|
const downloadLink = document.createElement('a'); |
|
downloadLink.href = audioUrl; |
|
downloadLink.download = 'synthesized_voice.mp3'; |
|
downloadLink.textContent = 'скачать аудио'; |
|
downloadLink.style.display = 'block'; |
|
downloadLink.style.marginTop = '10px'; |
|
|
|
audioPlayerContainer.appendChild(downloadLink); |
|
audio.play(); |
|
}); |
|
|
|
}); |
|
|
|
async function fetchModels() { |
|
try { |
|
const response = await fetch('/v1/audio/models'); |
|
const models = await response.json(); |
|
const voiceSelect = document.getElementById('voiceSelect'); |
|
|
|
models.forEach((model, index) => { |
|
const option = document.createElement('option'); |
|
option.value = model.model; |
|
option.textContent = model.model; |
|
if (index === 1) {option.selected = true;} |
|
voiceSelect.appendChild(option); |
|
}); |
|
} catch (error) { |
|
console.error('ошибка при получении списка моделей:', error); |
|
} |
|
} |
|
fetchModels(); |
|
|
|
function createApiExamples() { |
|
const apiExamples = document.getElementById('apiExamples'); |
|
const apiVoices = document.getElementById('apiVoices'); |
|
const currentUrl = window.location.origin; |
|
const voices_pre = \`curl \${currentUrl}/v1/audio/models\`; |
|
const examples_pre = \`curl \${currentUrl}/v1/audio/speech \\\\\\\\ |
|
-H 'content-type: application/json' \\\\\\\\ |
|
--data-raw '{"model":"brian","input":"привет! хрю-хрю!","voice":"rate:0|pitch:0"}' \\\\\\\\ |
|
-o tts_voice.mp3 |
|
\`; |
|
apiVoices.textContent = voices_pre.replace(/\\\\\\\\/g, '\\\\'); |
|
apiExamples.textContent = examples_pre.replace(/\\\\\\\\/g, '\\\\'); |
|
|
|
} |
|
createApiExamples(); |
|
</script> |
|
</body></html>`; |
|
|
|
return new Response(html, { |
|
headers: {"Content-Type": "text/html"}, |
|
}); |
|
} |
|
|
|
async function handleVoiceList() { |
|
let voices = [{model: 'ava', gender: 'female'}, {model: 'andrew', gender: 'male'}, {model: 'emma', gender: 'female'}, {model: 'brian', gender: 'male'}, {model: 'vivienne', gender: 'female'}, {model: 'remy', gender: 'male'}, { |
|
model: 'seraphina', gender: 'female' |
|
}, {model: 'florian', gender: 'male'}, {model: 'dmitry', gender: 'male'}, {model: 'svetlana', gender: 'female'}]; |
|
|
|
const sortedVoiceList = voices.sort((a, b) => { |
|
if (a.gender === 'male' && b.gender === 'female') return -1; |
|
if (a.gender === 'female' && b.gender === 'male') return 1; |
|
return 0; |
|
}); |
|
|
|
return new Response(JSON.stringify(sortedVoiceList), { |
|
headers: {"Content-Type": "application/json"}, |
|
}); |
|
|
|
} |
|
|
|
|
|
serve(async (req) => { |
|
try { |
|
const url = new URL(req.url); |
|
|
|
if (url.pathname === "/") { |
|
return handleDemoRequest(req); |
|
} |
|
if (url.pathname === "/v1/audio/models") { |
|
return handleVoiceList(); |
|
} |
|
if (url.pathname === "/tts") { |
|
return handleDebugRequest(); |
|
} |
|
|
|
if (url.pathname !== "/v1/audio/speech") { |
|
console.log(`Unhandled path ${url.pathname}`); |
|
return new Response("Not Found", {status: 404}); |
|
} |
|
|
|
return handleSynthesisRequest(req); |
|
} catch (err) { |
|
console.error(`Error processing request: ${err.message}`); |
|
return new Response(`Internal Server Error\n${err.message}`, { |
|
status: 500, |
|
}); |
|
} |
|
}, { port: 7860 }); |