mstts / app.ts
getapi's picture
Update app.ts
77b7e19 verified
import {serve} from "https://deno.land/std/http/server.ts";
import {EdgeSpeechTTS} from "https://esm.sh/@lobehub/tts@1";
async function synthesizeSpeech(model: string, voice: string, text: string) {
let voiceName;
let rate = 0;
let pitch = 0;
if (!model.includes("Neural")) {
switch (model) {
case "ava":
voiceName = "en-US-AvaMultilingualNeural";
break;
case "andrew":
voiceName = "en-US-AndrewMultilingualNeural";
break;
case "emma":
voiceName = "en-US-EmmaMultilingualNeural";
break;
case "brian":
voiceName = "en-US-BrianMultilingualNeural";
break;
case "vivienne":
voiceName = "fr-FR-VivienneMultilingualNeural";
break;
case "remy":
voiceName = "fr-FR-RemyMultilingualNeural";
break;
case "seraphina":
voiceName = "de-DE-SeraphinaMultilingualNeural";
break;
case "florian":
voiceName = "de-DE-FlorianMultilingualNeural";
break;
case "dmitry":
voiceName = "ru-RU-DmitryNeural";
break;
case "svetlana":
voiceName = "ru-RU-SvetlanaNeural";
break;
default:
voiceName = "en-US-BrianMultilingualNeural";
break;
}
} else {
voiceName = model;
const params = Object.fromEntries(voice.split("|").map((p) => p.split(":") as [string, string]));
rate = Number(params["rate"] || 0);
pitch = Number(params["pitch"] || 0);
}
const tts = new EdgeSpeechTTS();
const payload = {
input: text, options: {
rate: rate, pitch: pitch, voice: voiceName
},
};
const response = await tts.create(payload);
const mp3Buffer = new Uint8Array(await response.arrayBuffer());
return new Response(mp3Buffer, {
headers: {"Content-Type": "audio/mpeg"},
});
}
function validateContentType(req: Request, expected: string) {
const contentType = req.headers.get("Content-Type");
if (contentType !== expected) {
console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
return new Response("Bad Request", {status: 400});
}
}
async function handleDebugRequest() {
const voice = "rate:0.0|pitch:0.0";
const model = "en-US-BrianMultilingualNeural";
const text = "Приветик! Надеюсь ты меня хорошо слышишь? Алё?!";
console.log(`model=${model}, voice=${voice}, text=${text}`);
return synthesizeSpeech(model, voice, text);
}
async function handleSynthesisRequest(req: Request) {
if (req.method !== "POST") {
return new Response("Method Not Allowed", {status: 405});
}
const invalidContentType = validateContentType(req, "application/json");
if (invalidContentType) return invalidContentType;
const {model, input, voice} = await req.json();
return synthesizeSpeech(model, voice, input);
}
async function handleDemoRequest(req: Request) {
const html = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta content="width=device-width, initial-scale=1.0" name="viewport" />
<title>tts</title>
<style>
body {
background-color: #121212;
color: #e0e0e0;
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 20px;
background-color: #1e1e1e;
border-radius: 8px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
}
.input-area, .output-area {
margin-bottom: 20px;
}
.slider-container, .textarea-container, .dropdown-container {
margin-bottom: 20px;
}
label {
display: block;
margin-bottom: 8px;
font-weight: bold;
}
input[type="range"] {
width: 100%;
}
.slider-value {
text-align: center;
margin-top: 8px;
}
textarea {
max-width: 780px;
width: calc(100% - 20px);
height: 100px;
padding: 10px;
border: 1px solid #333;
border-radius: 4px;
background-color: #2e2e2e;
color: #e0e0e0;
resize: none;
}
select {
width: 100%;
padding: 10px;
border: 1px solid #333;
border-radius: 4px;
background-color: #2e2e2e;
color: #e0e0e0;
}
button {
width: 100%;
padding: 10px;
border: none;
border-radius: 4px;
background-color: #6200ea;
color: #fff;
font-size: 16px;
cursor: pointer;
transition: background-color 0.3s;
}
button:hover {
background-color: #3700b3;
}
h1 {
font-size: 24px;
margin-bottom: 20px;
}
a {
color: #bb86fc;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
#audioPlayerContainer {
text-align: center;
}
audio {
width: 100%;
max-width: 600px;
margin: 10px 0;
}
a {
display: block;
margin: 10px 0;
}
pre {
color: #94c890;
background: #000000;
padding: 5px 10px;
margin: 0;
font-size: 1.12em;
}
</style>
</head>
<body>
<div class="container">
<div class="input-area">
<div class="textarea-container">
<label for="inputText">текст:</label
><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea>
</div>
<div class="dropdown-container">
<label for="voiceSelect">голос:</label>
<select id="voiceSelect"></select>
</div>
<button id="synthesizeButton">синтезировать</button>
</div>
<div class="output-area">
<div id="audioPlayerContainer"></div>
</div>
<details>
<summary>api</summary>
<p>получить список голосов:</p>
<pre id="apiVoices"></pre>
<p>post-запрос для синтеза голоса из текста:</p>
<pre id="apiExamples"></pre>
</details>
</div>
<script>
let audio = null;
document.getElementById('synthesizeButton').addEventListener('click', () => {
const text = document.getElementById('inputText').value || 'приветик! давай поболтаем немного?';
const rate = '0.0';
const pitch = '0.0';
const voice = \`rate:\${rate}|pitch:\${pitch}\`;
const model = document.getElementById('voiceSelect').value;
if (audio) {
audio.pause();
audio.currentTime = 0;
}
fetch('/v1/audio/speech', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, input: text, voice })
})
.then(response => response.blob())
.then(blob => {
const audioUrl = URL.createObjectURL(blob);
const audioPlayerContainer = document.getElementById('audioPlayerContainer');
if (audio) {
audio.pause();
audioPlayerContainer.innerHTML = '';
}
audio = new Audio(audioUrl);
audio.controls = true;
audioPlayerContainer.appendChild(audio);
const downloadLink = document.createElement('a');
downloadLink.href = audioUrl;
downloadLink.download = 'synthesized_voice.mp3';
downloadLink.textContent = 'скачать аудио';
downloadLink.style.display = 'block';
downloadLink.style.marginTop = '10px';
audioPlayerContainer.appendChild(downloadLink);
audio.play();
});
});
async function fetchModels() {
try {
const response = await fetch('/v1/audio/models');
const models = await response.json();
const voiceSelect = document.getElementById('voiceSelect');
models.forEach((model, index) => {
const option = document.createElement('option');
option.value = model.model;
option.textContent = model.model;
if (index === 1) {option.selected = true;}
voiceSelect.appendChild(option);
});
} catch (error) {
console.error('ошибка при получении списка моделей:', error);
}
}
fetchModels();
function createApiExamples() {
const apiExamples = document.getElementById('apiExamples');
const apiVoices = document.getElementById('apiVoices');
const currentUrl = window.location.origin;
const voices_pre = \`curl \${currentUrl}/v1/audio/models\`;
const examples_pre = \`curl \${currentUrl}/v1/audio/speech \\\\\\\\
-H 'content-type: application/json' \\\\\\\\
--data-raw '{"model":"brian","input":"привет! хрю-хрю!","voice":"rate:0|pitch:0"}' \\\\\\\\
-o tts_voice.mp3
\`;
apiVoices.textContent = voices_pre.replace(/\\\\\\\\/g, '\\\\');
apiExamples.textContent = examples_pre.replace(/\\\\\\\\/g, '\\\\');
}
createApiExamples();
</script>
</body></html>`;
return new Response(html, {
headers: {"Content-Type": "text/html"},
});
}
async function handleVoiceList() {
let voices = [{model: 'ava', gender: 'female'}, {model: 'andrew', gender: 'male'}, {model: 'emma', gender: 'female'}, {model: 'brian', gender: 'male'}, {model: 'vivienne', gender: 'female'}, {model: 'remy', gender: 'male'}, {
model: 'seraphina', gender: 'female'
}, {model: 'florian', gender: 'male'}, {model: 'dmitry', gender: 'male'}, {model: 'svetlana', gender: 'female'}];
const sortedVoiceList = voices.sort((a, b) => {
if (a.gender === 'male' && b.gender === 'female') return -1;
if (a.gender === 'female' && b.gender === 'male') return 1;
return 0;
});
return new Response(JSON.stringify(sortedVoiceList), {
headers: {"Content-Type": "application/json"},
});
}
serve(async (req) => {
try {
const url = new URL(req.url);
if (url.pathname === "/") {
return handleDemoRequest(req);
}
if (url.pathname === "/v1/audio/models") {
return handleVoiceList();
}
if (url.pathname === "/tts") {
return handleDebugRequest();
}
if (url.pathname !== "/v1/audio/speech") {
console.log(`Unhandled path ${url.pathname}`);
return new Response("Not Found", {status: 404});
}
return handleSynthesisRequest(req);
} catch (err) {
console.error(`Error processing request: ${err.message}`);
return new Response(`Internal Server Error\n${err.message}`, {
status: 500,
});
}
}, { port: 7860 });