Akjava's picture
Update index.html
cbb1027 verified
<!doctype html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width" />
<link rel="stylesheet" href="style.css" />
<meta charset="UTF-8">
<title>Match-TTS Onnx Benchmarks</title>
</head>
<body>
<h1>Match-TTS Onnx Benchmarks</h1>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
<script type="module">
import { MatchaTTSRaw } from "./js-esm/matcha_tts_raw.js";
import { webWavPlay } from "./js-esm/web_wav_play.js";
import { arpa_to_ipa } from "./js-esm/arpa_to_ipa.js";
import { loadCmudict } from "./js-esm/cmudict_loader.js";
import { env,textToArpa} from "./js-esm/text_to_arpa.js";
env.allowLocalModels = true;
env.localModelPath = "./models/";
env.backends.onnx.logLevel = "error";
let matcha_tts_raw
let cmudict ={}
let speaking = false
let total_infer_time=0
let count_infer=0
let loaded_model_name
let load_time
async function main(model_name) {
if (typeof model_name !== 'string') {//via button click
model_name ="en001_ep6399_univ_simplify"
}
console.log(model_name)
if (speaking){
console.log("speaking return")
}
speaking = true
console.log("main called")
if(!matcha_tts_raw){
const load_startTime = performance.now();
matcha_tts_raw = new MatchaTTSRaw()
console.time("load model");
const model_path = `./models/matcha-tts/${model_name}.onnx`
console.log(model_path)
await matcha_tts_raw.load_model(model_path,{ executionProviders: ['webgpu','wasm'] });
console.timeEnd("load model");
load_time = (performance.now() - load_startTime)/1000 //sec
loaded_model_name = model_name
let cmudictReady = loadCmudict(cmudict,'./dictionaries/cmudict-0.7b')
await cmudictReady
update_infer_bench1()
}else{
console.log("session exist skip load model")
}
const startTime = performance.now();
const text = document.getElementById('textInput').value
console.log("### textToArpa call")
const arpa_text = await textToArpa(cmudict,text)
console.log("### arpa returned")
const ipa_text = arpa_to_ipa(arpa_text).replace(/\s/g, "");
//console.log(ipa_text)
const spks = 0
const speed = document.getElementById('speed').value
const tempature = document.getElementById('temperature').value
console.time("infer");
const result = await matcha_tts_raw.infer(ipa_text, tempature, speed,spks);
if (result!=null){
console.timeEnd("infer");
const endTime = performance.now();
const infer_time = endTime-startTime
total_infer_time+=infer_time
count_infer += 1
update_infer_bench2()
webWavPlay(result)
}
speaking = false
}
function update_infer_bench1(){
const text = `${loaded_model_name} load time ${load_time.toFixed(1)} sec`;
document.getElementById('result1').innerText=text
}
function update_infer_bench2(){
const avg = (total_infer_time/count_infer)/1000
const text = `Infer Count ${count_infer} avg infer-time ${avg.toFixed(1)} sec`;
document.getElementById('result2').innerText=text
}
function update_range(){
const value = document.getElementById('spks').value
let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('spks_label').textContent = formattedNumber
}
function update_range2(){
const value = document.getElementById('temperature').value
//let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('tempature_label').textContent = value//formattedNumber
}
function update_range3(){
const value = document.getElementById('speed').value
//let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('speed_label').textContent = value//sformattedNumber
}
window.onload = async function(){
//document.getElementById('textInput').onchange = main;
document.getElementById('myButton').onclick = main;
document.getElementById('temperature').onchange = update_range2
document.getElementById('speed').onchange = update_range3
}
function loadModel(model_name){
total_infer_time=0
count_infer=0
matcha_tts_raw=null
main(model_name)
}
function create_button(label, model_name) {
// ボタンの作成
const button = document.createElement('button');
button.style ="margin:4px;"
button.textContent = label;
// クリックイベントハンドラの設定
button.onclick = function() {
loadModel(model_name);
};
return button
}
document.getElementById('buttons').appendChild(create_button("ljspeech","ljspeech_sim"))
document.getElementById('buttons').appendChild(create_button("ljspeech-quantized","ljspeech_sim_q8"))
document.getElementById('buttons').appendChild(create_button("vctk","vctk_univ_simplify"))
document.getElementById('buttons').appendChild(create_button("vctk-quantized","vctk_univ_simplify_q8"))
document.getElementById('buttons').appendChild(create_button("en001","en001_ep6399_univ_simplify"))
document.getElementById('buttons').appendChild(create_button("en001-quantized","en001_ep6399_univ_simplify_q8"))
document.getElementById('buttons').appendChild(document.createElement('br'))
document.getElementById('buttons').appendChild(create_button("en001-t2-step01","en001_6399_T2_step01"))
document.getElementById('buttons').appendChild(create_button("en001-t2-step05","en001_6399_T2_step05"))
document.getElementById('buttons').appendChild(create_button("en001-t2-step10","en001_6399_T2_step10"))
//document.getElementById('buttons').appendChild(create_button("en001-t2-step20","en001_6399_T2_step20"))
document.getElementById('buttons').appendChild(document.createElement('br'))
document.getElementById('buttons').appendChild(create_button("en001-univ-step01","en001_6399_univ_step01"))
document.getElementById('buttons').appendChild(create_button("en001-univ-step05","en001_6399_univ_step05"))
document.getElementById('buttons').appendChild(create_button("en001-univ-step10","en001_6399_univ_step10"))
//document.getElementById('buttons').appendChild(create_button("en001-univ-step20","en001_6399_univ_step20"))
</script>
<div id="result1">Click button to load a model</div>
 <div id="buttons"></div>
<br>
<div id="result2">en001-T2 and en001-univ are experimental</div>
<br><br>
<input type="text" id="textInput" value ="Hello Huggingface." placeholder="Enter some text here...">
<button id="myButton">Text To Speak</button><br>
<label for ="temperature" style="width: 110px;display: inline-block;">Temperature</label>
<input type="range" id="temperature" min="0" max="1.0" value="0.5" step="0.1"/>
<label for ="temperature" id="tempature_label">0.5</label><br>
<label for ="speed" style="width: 110px;display: inline-block;">Speed</label>
<input type="range" id="speed" min="0.1" max="2.0" value="1.0" step="0.1"/>
<label for ="speed" id="speed_label">1.0</label>
<br>
<br>
<div>almost load time 15 sec,short text TTS time 2 sec(my 2070super-gpu)</div><br>
<div>Quantized version is too slow and exist just for Github Page 100MB limitation so far</div><br>
<div>Multispeaker(vctk) is little bit slow than singlespeaker.default timesteps is 5(smallest 1 is 300msec fast,but audio become low quality)</div>
<br>
<div id="footer">
<b>Spaces</b><br>
<a href="https://huggingface.co/spaces/Akjava/matcha-tts_vctk-onnx" style="font-size: 9px" target="link">Match-TTS VCTK-ONNX</a> |
<a href="https://huggingface.co/spaces/Akjava/matcha-tts-onnx-benchmarks" style="font-size: 9px" target="link">Match-TTS ONNX-Benchmark</a> |
<br><br>
<b>Credits</b><br>
<a href="https://github.com/akjava/Matcha-TTS-Japanese" style="font-size: 9px" target="link">Matcha-TTS-Japanese</a> |
<a href = "http://www.udialogue.org/download/cstr-vctk-corpus.html" style="font-size: 9px" target="link">CSTR VCTK Corpus</a> |
<a href = "https://github.com/cmusphinx/cmudict" style="font-size: 9px" target="link">CMUDict</a> |
<a href = "https://huggingface.co/docs/transformers.js/index" style="font-size: 9px" target="link">Transformer.js</a> |
<a href = "https://huggingface.co/cisco-ai/mini-bart-g2p" style="font-size: 9px" target="link">mini-bart-g2p</a> |
<a href = "https://onnxruntime.ai/docs/get-started/with-javascript/web.html" style="font-size: 9px" target="link">ONNXRuntime-Web</a> |
<a href = "https://github.com/akjava/English-To-IPA-Collections" style="font-size: 9px" target="link">English-To-IPA-Collections</a> |
<a href ="https://huggingface.co/papers/2309.03199" style="font-size: 9px" target="link">Matcha-TTS Paper</a>
</div>
</body>
</html>