vits / templates /index.html
streamerbtw1002's picture
Duplicate from Artrajz/vits-simple-api
ff664dd
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<title>vits-simple-api</title>
<link rel="stylesheet" href="/static/css/style.css">
<link rel="stylesheet" href="/static/css/bootstrap.min.css"/>
</head>
<body>
<main class="main-container">
<div class="container flex flex-wrap mx-auto">
<div class="text-center d-flex align-items-center w-100" style="height: 100px;" id="component-1">
<h1 class="w-100">
<a href="https://github.com/Artrajz/vits-simple-api" target="_blank"
style="text-decoration: none; color: black"> vits-simple-api </a>
</h1>
</div>
<div class="tabs w-100 border-b-2" id="component-2">
<button class="tab-button px-4 pb-2 pt-2 active " onclick="showContent(0)">VITS</button>
<button class="tab-button px-4 pb-2 pt-2" onclick="showContent(1)">W2V2-VITS</button>
<button class="tab-button px-4 pb-2 pt-2" onclick="showContent(2)">Bert-VITS2</button>
</div>
<div class="content w-100 border-lr-2 border-b-2" id="component-3">
<div class="content-pane active w-100 flex-wrap">
<form class="w-100">
<div class="form-group">
<label>text</label>
<textarea class="form-control" id="input_text1" rows="3"
oninput="updateLink()">你好,こんにちは</textarea>
</div>
<div class="form-group">
<label>id</label>
<select class="form-control" id="input_id1" oninput="updateLink()">
{% for speaker in speakers["VITS"] %}
<option value="{{ speaker["id"] }}">{{ speaker["id"] }} | {{ speaker["name"] }}
| {{ speaker["lang"] }}</option>
{% endfor %}
{% if vits_speakers_count <=0 %}
<option value="" disabled selected hidden>未加载模型</option>
{% endif %}
</select>
</div>
</form>
<form class="w-100">
<div class="row">
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="默认为wav">format</label>
<select class="form-control" id="input_format1" oninput="updateLink()">
<option></option>
<option>wav</option>
<option>mp3</option>
<option>ogg</option>
<option>silk</option>
<option>flac</option>
</select>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="自动识别语言auto:可识别的语言根据不同speaker而不同,方言无法自动识别。方言模型需要手动指定语言,比如粤语Cantonese要指定参数lang=gd">lang</label>
<input type="text" class="form-control" id="input_lang1" oninput="updateLink()" value=""
placeholder="auto"/>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="调节语音长度,相当于调节语速,该数值越大语速越慢。">length</label>
<input type="number" class="form-control" id="input_length1" oninput="updateLink()" value=""
placeholder="1" min="0" step="0.001"/>
</div>
</div>
<div class="row">
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="样本噪声,控制合成的随机性。">noise</label>
<input type="number" class="form-control" id="input_noise1" oninput="updateLink()" value=""
placeholder="0.33" min="0" step="0.001"/>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="随机时长预测器噪声,控制音素发音长度。">noisew</label>
<input type="number" class="form-control" id="input_noisew1" oninput="updateLink()" value=""
placeholder="0.4" min="0" step="0.001"/>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="按标点符号分段,加起来大于max时为一段文本。max<=0表示不分段。">max</label>
<input type="number" class="form-control" id="input_max1" oninput="updateLink()" value=""
placeholder="50" step="1"/>
</div>
</div>
</form>
<div class="flex flex-wrap w-100"
style="justify-content: center; align-items: center; height: 80px; margin-top: 20px; margin-bottom: 20px; border: 1px solid rgba(0,0,0,.125); border-radius: 0.25rem;">
<button type="button" class="btn btn-outline-secondary" onclick="setAudioSource()"
style="margin-right: 10px">
播放器生成
</button>
<audio id="audioPlayer1" controls>
<source src="" type="audio/mp3"/>
Your browser does not support the audio element.
</audio>
<div class="form-group form-check">
<input type="checkbox" id="streaming1" onchange="updateLink()">
<label class="form-check-label" data-toggle="tooltip" data-placement="top"
title="按照max分段推理文本,推理好一段即输出,无需等待所有文本都推理完毕">流式响应</label>
</div>
</div>
</div>
<div class="content-pane w-100 flex-wrap">
<form class="w-100">
<div class="form-group">
<label>text</label>
<textarea class="form-control" id="input_text2" rows="3"
oninput="updateLink()">你好,こんにちは</textarea>
</div>
<div class="form-group">
<label>id</label>
<select class="form-control" id="input_id2" oninput="updateLink()">
{% for speaker in speakers["W2V2-VITS"] %}
<option value="{{ speaker["id"] }}">{{ speaker["id"] }} | {{ speaker["name"] }}
| {{ speaker["lang"] }}</option>
{% endfor %}
{% if w2v2_speakers_count <=0 %}
<option value="" disabled selected hidden>未加载模型</option>
{% endif %}
</select>
</div>
<div class="form-group mb-3">
<label data-toggle="tooltip" data-placement="top"
title="情感嵌入,{% if w2v2_emotion_count > 0 %}
可输入范围是0-{{ w2v2_emotion_count-1 }}
{% else %}
未加载emotion
{% endif %}">emotion</label>
<input type="number" class="form-control" min="0" max="{{ w2v2_emotion_count-1 }}" step="1"
id="emotion" value="0" oninput="updateLink()">
</div>
</form>
<form class="w-100">
<div class="row">
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="默认为wav">format</label>
<select class="form-control" id="input_format2" oninput="updateLink()">
<option></option>
<option>wav</option>
<option>mp3</option>
<option>ogg</option>
<option>silk</option>
<option>flac</option>
</select>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="自动识别语言auto:可识别的语言根据不同speaker而不同,方言无法自动识别。方言模型需要手动指定语言,比如粤语Cantonese要指定参数lang=gd">lang</label>
<input type="text" class="form-control" id="input_lang2" oninput="updateLink()" value=""
placeholder="auto"/>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="调节语音长度,相当于调节语速,该数值越大语速越慢。">length</label>
<input type="number" class="form-control" id="input_length2" oninput="updateLink()" value=""
placeholder="1" min="0" step="0.001"/>
</div>
</div>
<div class="row">
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="样本噪声,控制合成的随机性。">noise</label>
<input type="number" class="form-control" id="input_noise2" oninput="updateLink()" value=""
placeholder="0.33" min="0" step="0.001"/>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="随机时长预测器噪声,控制音素发音长度。">noisew</label>
<input type="number" class="form-control" id="input_noisew2" oninput="updateLink()" value=""
placeholder="0.4" min="0" step="0.001"/>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="按标点符号分段,加起来大于max时为一段文本。max<=0表示不分段。">max</label>
<input type="number" class="form-control" id="input_max2" oninput="updateLink()" value=""
placeholder="50" step="1"/>
</div>
</div>
</form>
<div class="flex flex-wrap w-100"
style="justify-content: center; align-items: center; height: 80px; margin-top: 20px; margin-bottom: 20px; border: 1px solid rgba(0,0,0,.125); border-radius: 0.25rem;">
<button type="button" class="btn btn-outline-secondary" onclick="setAudioSource()"
style="margin-right: 10px">
播放器生成
</button>
<audio id="audioPlayer2" controls>
<source src="" type="audio/mp3"/>
Your browser does not support the audio element.
</audio>
</div>
</div>
<div class="content-pane w-100 flex-wrap">
<form class="w-100">
<div class="form-group">
<label>text</label>
<textarea class="form-control" id="input_text3" rows="3"
oninput="updateLink()">你好</textarea>
</div>
<div class="form-group">
<label>id</label>
<select class="form-control" id="input_id3" oninput="updateLink()">
{% for speaker in speakers["BERT-VITS2"] %}
<option value="{{ speaker["id"] }}">{{ speaker["id"] }} | {{ speaker["name"] }}
| {{ speaker["lang"] }}</option>
{% endfor %}
{% if bert_vits2_speakers_count <=0 %}
<option value="" disabled selected hidden>未加载模型</option>
{% endif %}
</select>
</div>
</form>
<form class="w-100">
<div class="row">
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="默认为wav">format</label>
<select class="form-control" id="input_format3" oninput="updateLink()">
<option></option>
<option>wav</option>
<option>mp3</option>
<option>ogg</option>
<option>silk</option>
<option>flac</option>
</select>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="自动识别语言auto:可识别的语言根据不同speaker而不同,方言无法自动识别。方言模型需要手动指定语言,比如粤语Cantonese要指定参数lang=gd">lang</label>
<input type="text" class="form-control" id="input_lang3" oninput="updateLink()" value=""
placeholder="auto"/>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="调节语音长度,相当于调节语速,该数值越大语速越慢。">length</label>
<input type="number" class="form-control" id="input_length3" oninput="updateLink()" value=""
placeholder="1" min="0" step="0.001"/>
</div>
</div>
<div class="row">
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="样本噪声,控制合成的随机性。">noise</label>
<input type="number" class="form-control" id="input_noise3" oninput="updateLink()" value=""
placeholder="0.5" min="0" step="0.001"/>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="随机时长预测器噪声,控制音素发音长度。">noisew</label>
<input type="number" class="form-control" id="input_noisew3" oninput="updateLink()" value=""
placeholder="0.6" min="0" step="0.001"/>
</div>
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="按标点符号分段,加起来大于max时为一段文本。max<=0表示不分段。">max</label>
<input type="number" class="form-control" id="input_max3" oninput="updateLink()" value=""
placeholder="50" step="1"/>
</div>
</div>
<div class="row">
<div class="col-md-4 form-group">
<label data-toggle="tooltip" data-placement="top"
title="SDP/DP混合比:SDP在合成时的占比,理论上此比率越高,合成的语音语调方差越大。">sdp_radio</label>
<input type="number" class="form-control" id="input_sdp_ratio" oninput="updateLink()"
value=""
placeholder="0.2" step="0.01" min="0" max="1"/>
</div>
</div>
</form>
<div class="flex flex-wrap w-100"
style="justify-content: center; align-items: center; height: 80px; margin-top: 20px; margin-bottom: 20px; border: 1px solid rgba(0,0,0,.125); border-radius: 0.25rem;">
<button type="button" class="btn btn-outline-secondary" onclick="setAudioSource()"
style="margin-right: 10px">
播放器生成
</button>
<audio id="audioPlayer3" controls>
<source src="" type="audio/mp3"/>
Your browser does not support the audio element.
</audio>
</div>
</div>
</div>
<div class="mt-2">
{% if speakers_count == 0 %}
<div style="color: red;">未加载任何模型</div>
{% endif %}
<div>
<label>返回speakers(json):</label>
<a id="speakers_link" href="https://artrajz-vits-simple-api.hf.space/voice/speakers" target="_blank"
style="text-decoration: none; color: black">
https://artrajz-vits-simple-api.hf.space/voice/speakers
</a>
</div>
<div>
<label>API调用:</label>
<a id="vits_link" href="https://artrajz-vits-simple-api.hf.space/voice/vits?text=你好,こんにちは&id=164"
style="text-decoration: none; color: black">
https://artrajz-vits-simple-api.hf.space/voice/vits?text=你好,こんにちは&id=164
</a>
</div>
</div>
<div>
<h2>所有模型均为网络搜集,感谢模型原作者的付出!</h2>
<h2>请严格遵循模型原作者使用协议!模型一般都是禁止商用的!</h2>
<p>
Nene_Nanami_Rong_Tang:
<a href="https://github.com/CjangCjengh/TTSModels" rel="noreferrer" target="_blank">CjangCjengh/TTSModels</a>
</p>
<p>
louise:
<a href="https://github.com/CjangCjengh/TTSModels" rel="noreferrer" target="_blank">CjangCjengh/TTSModels</a>
</p>
<p>
Cantonese:
<a href="https://github.com/CjangCjengh/TTSModels" rel="noreferrer" target="_blank">CjangCjengh/TTSModels</a>
</p>
<p>
shanghainese:
<a href="https://github.com/CjangCjengh/TTSModels" rel="noreferrer" target="_blank">CjangCjengh/TTSModels</a>
</p>
<p>
w2v2-vits:
<a href="https://github.com/CjangCjengh/TTSModels" rel="noreferrer" target="_blank">CjangCjengh/TTSModels</a>
</p>
<p>
vctk:
<a href="https://github.com/jaywalnut310/vits" rel="noreferrer" target="_blank">jaywalnut310/vits</a>
</p>
<p>
Bishojo Mangekyo:
<a href="https://github.com/Francis-Komizu/VITS" rel="noreferrer" target="_blank">Francis-Komizu/VITS</a>
</p>
<p>
genshin:
<a href="https://huggingface.co/spaces/zomehwh/vits-uma-genshin-honkai" rel="noreferrer" target="_blank">zomehwh/vits-uma-genshin-honkai</a>
</p>
<p>
paimon:
<a href="https://github.com/zixiiu/Digital_Life_Server" rel="noreferrer" target="_blank">zixiiu/Digital_Life_Server</a>
</p>
<p>
vits_chinese:
<a href="https://github.com/PlayVoice/vits_chinese" rel="noreferrer" target="_blank">PlayVoice/vits_chinese</a>
</p>
</div>
</div>
<br/>
</main>
<script src="/static/js/jquery.slim.min.js"></script>
<script src="/static/js/bootstrap.bundle.min.js"></script>
<script>
$(function () {
$('[data-toggle="tooltip"]').tooltip()
})
function getProtocol() {
return 'https:' == location.protocol ? "https://" : "http://";
}
function getUrl() {
var url = window.location.host;
return url;
}
var baseUrl = getProtocol() + getUrl();
var model_type = 1;
var vits_status = false;
var w2v2_status = false;
var bert_vits2_status = false;
{% if vits_speakers_count > 0 %}
vits_status = true;
{% endif %}
{% if w2v2_speakers_count > 0 %}
w2v2_status = true;
{% endif %}
{% if bert_vits2_speakers_count > 0 %}
bert_vits2_status = true;
{% endif %}
setBaseUrl();
function setBaseUrl() {
var text = document.getElementById("input_text" + model_type).value;
var id = document.getElementById("input_id" + model_type).value;
var vits_link = document.getElementById("vits_link");
var speakers_link = document.getElementById("speakers_link");
var vits_url = baseUrl + "/voice/vits?text=" + text + "&id=" + id;
var speakers_url = baseUrl + "/voice/speakers";
vits_link.href = vits_url;
vits_link.textContent = vits_url;
speakers_link.href = speakers_url;
speakers_link.textContent = speakers_url;
}
function getLink() {
var text = document.getElementById("input_text" + model_type).value;
var id = document.getElementById("input_id" + model_type).value;
var format = document.getElementById("input_format" + model_type).value;
var lang = document.getElementById("input_lang" + model_type).value;
var length = document.getElementById("input_length" + model_type).value;
var noise = document.getElementById("input_noise" + model_type).value;
var noisew = document.getElementById("input_noisew" + model_type).value;
var max = document.getElementById("input_max" + model_type).value;
if (model_type == 1) {
var url = baseUrl + "/voice/vits?text=" + text + "&id=" + id;
var streaming = document.getElementById('streaming' + model_type);
} else if (model_type == 2) {
var emotion = document.getElementById('emotion').value;
var url = baseUrl + "/voice/w2v2-vits?text=" + text + "&id=" + id + "&emotion=" + emotion;
} else if (model_type == 3) {
var sdp_ratio = document.getElementById("input_sdp_ratio").value;
var url = baseUrl + "/voice/bert-vits2?text=" + text + "&id=" + id;
}
if (format != "") {
url += "&format=" + format;
}
if (lang != "") {
url += "&lang=" + lang;
}
if (length != "") {
url += "&length=" + length;
}
if (noise != "") {
url += "&noise=" + noise;
}
if (noisew != "") {
url += "&noisew=" + noisew;
}
if (max != "") {
url += "&max=" + max;
}
if (model_type == 1 && streaming.checked) {
url += '&streaming=true';
}
if (model_type == 3 && sdp_ratio != "") {
url += "&sdp_ratio=" + sdp_ratio;
}
return url;
}
function updateLink() {
var url = getLink();
var link = document.getElementById("vits_link");
link.href = url;
link.textContent = url;
}
function setAudioSource() {
if (model_type == 1 && !vits_status) {
alert("未加载VITS模型");
return;
}
if (model_type == 2 && !w2v2_status) {
alert("未加载W2V2-VITS模型");
return;
}
if (model_type == 3 && !bert_vits2_status) {
alert("未加载Bert-VITS2模型");
return;
}
var url = getLink();
// Add a timestamp parameter to prevent browser caching
var timestamp = new Date().getTime();
url += '&t=' + timestamp;
var audioPlayer = document.getElementById("audioPlayer" + model_type);
audioPlayer.src = url;
audioPlayer.play();
}
function showContent(index) {
const panes = document.querySelectorAll(".content-pane");
const buttons = document.querySelectorAll(".tab-button");
model_type = index + 1;
for (let i = 0; i < panes.length; i++) {
if (i === index) {
panes[i].classList.add("active");
buttons[i].classList.add("active");
} else {
panes[i].classList.remove("active");
buttons[i].classList.remove("active");
}
}
updateLink();
}
document.querySelectorAll('.slider-group').forEach(function (group) {
group.addEventListener("input", function (event) {
if (event.target.matches('.slider')) {
let value = event.target.value;
group.querySelector('.slider-input').value = value;
group.querySelector('.slider-value').textContent = value;
} else if (event.target.matches('.slider-input')) {
let value = event.target.value;
group.querySelector('.slider').value = value;
group.querySelector('.slider-value').textContent = value;
}
});
});
</script>
</body>
</html>