File size: 9,858 Bytes
0fcbf28 3eeee98 ccff356 3eeee98 ccff356 ee3b864 3eeee98 76849d7 3eeee98 1d88bdc 3eeee98 5d2461b 3ef7bcd 248976b d06dbc0 8e6e415 d06dbc0 cd81b30 3eeee98 aefaec7 3eeee98 3ef7bcd 3eeee98 b3f6e0e c418a9e 3eeee98 3ef7bcd 0537f85 3ef7bcd 3eeee98 3ef7bcd 747ffc5 3eeee98 aefaec7 3eeee98 9dc01a4 3eeee98 9dc01a4 3eeee98 9dc01a4 3eeee98 6be5772 3eeee98 6be5772 5d2461b 3ef7bcd 3eeee98 5d2461b 3eeee98 5d2461b 3ef7bcd 0537f85 c5889e1 3ef7bcd 0537f85 3ef7bcd 5d2461b 3eeee98 cb8e9a3 3eeee98 cd81b30 8cd4a84 cd81b30 bbebd26 d1ceb3d bbebd26 706ce8f 4f21fc5 bbebd26 758ff20 d1ceb3d 758ff20 d1ceb3d 1112b3e e2f7493 2c00960 1112b3e e2f7493 2c00960 e2f7493 5a70475 1a25f98 bbebd26 c33769f 71f261f bbebd26 3ef7bcd 2c00960 5d2461b 1d88bdc 3eeee98 05c2457 47b61b6 ee3b864 2ea5795 05c2457 3eeee98 1ff7910 cbb1027 f32cb08 3eeee98 0fcbf28 |
|
<!doctype html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width" />
<link rel="stylesheet" href="style.css" />
<meta charset="UTF-8">
<title>Match-TTS Onnx Benchmarks</title>
</head>
<body>
<h1>Match-TTS Onnx Benchmarks</h1>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
<script type="module">
import { MatchaTTSRaw } from "./js-esm/matcha_tts_raw.js";
import { webWavPlay } from "./js-esm/web_wav_play.js";
import { arpa_to_ipa } from "./js-esm/arpa_to_ipa.js";
import { loadCmudict } from "./js-esm/cmudict_loader.js";
import { env,textToArpa} from "./js-esm/text_to_arpa.js";
env.allowLocalModels = true;
env.localModelPath = "./models/";
env.backends.onnx.logLevel = "error";
let matcha_tts_raw
let cmudict ={}
let speaking = false
let total_infer_time=0
let count_infer=0
let loaded_model_name
let load_time
async function main(model_name) {
if (typeof model_name !== 'string') {//via button click
model_name ="en001_ep6399_univ_simplify"
}
console.log(model_name)
if (speaking){
console.log("speaking return")
}
speaking = true
console.log("main called")
if(!matcha_tts_raw){
const load_startTime = performance.now();
matcha_tts_raw = new MatchaTTSRaw()
console.time("load model");
const model_path = `./models/matcha-tts/${model_name}.onnx`
console.log(model_path)
await matcha_tts_raw.load_model(model_path,{ executionProviders: ['webgpu','wasm'] });
console.timeEnd("load model");
load_time = (performance.now() - load_startTime)/1000 //sec
loaded_model_name = model_name
let cmudictReady = loadCmudict(cmudict,'./dictionaries/cmudict-0.7b')
await cmudictReady
update_infer_bench1()
}else{
console.log("session exist skip load model")
}
const startTime = performance.now();
const text = document.getElementById('textInput').value
console.log("### textToArpa call")
const arpa_text = await textToArpa(cmudict,text)
console.log("### arpa returned")
const ipa_text = arpa_to_ipa(arpa_text).replace(/\s/g, "");
//console.log(ipa_text)
const spks = 0
const speed = document.getElementById('speed').value
const tempature = document.getElementById('temperature').value
console.time("infer");
const result = await matcha_tts_raw.infer(ipa_text, tempature, speed,spks);
if (result!=null){
console.timeEnd("infer");
const endTime = performance.now();
const infer_time = endTime-startTime
total_infer_time+=infer_time
count_infer += 1
update_infer_bench2()
webWavPlay(result)
}
speaking = false
}
function update_infer_bench1(){
const text = `${loaded_model_name} load time ${load_time.toFixed(1)} sec`;
document.getElementById('result1').innerText=text
}
function update_infer_bench2(){
const avg = (total_infer_time/count_infer)/1000
const text = `Infer Count ${count_infer} avg infer-time ${avg.toFixed(1)} sec`;
document.getElementById('result2').innerText=text
}
function update_range(){
const value = document.getElementById('spks').value
let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('spks_label').textContent = formattedNumber
}
function update_range2(){
const value = document.getElementById('temperature').value
//let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('tempature_label').textContent = value//formattedNumber
}
function update_range3(){
const value = document.getElementById('speed').value
//let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('speed_label').textContent = value//sformattedNumber
}
window.onload = async function(){
//document.getElementById('textInput').onchange = main;
document.getElementById('myButton').onclick = main;
document.getElementById('temperature').onchange = update_range2
document.getElementById('speed').onchange = update_range3
}
function loadModel(model_name){
total_infer_time=0
count_infer=0
matcha_tts_raw=null
main(model_name)
}
function create_button(label, model_name) {
// ボタンの作成
const button = document.createElement('button');
button.style ="margin:4px;"
button.textContent = label;
// クリックイベントハンドラの設定
button.onclick = function() {
loadModel(model_name);
};
return button
}
document.getElementById('buttons').appendChild(create_button("ljspeech","ljspeech_sim"))
document.getElementById('buttons').appendChild(create_button("ljspeech-quantized","ljspeech_sim_q8"))
document.getElementById('buttons').appendChild(create_button("vctk","vctk_univ_simplify"))
document.getElementById('buttons').appendChild(create_button("vctk-quantized","vctk_univ_simplify_q8"))
document.getElementById('buttons').appendChild(create_button("en001","en001_ep6399_univ_simplify"))
document.getElementById('buttons').appendChild(create_button("en001-quantized","en001_ep6399_univ_simplify_q8"))
document.getElementById('buttons').appendChild(document.createElement('br'))
document.getElementById('buttons').appendChild(create_button("en001-t2-step01","en001_6399_T2_step01"))
document.getElementById('buttons').appendChild(create_button("en001-t2-step05","en001_6399_T2_step05"))
document.getElementById('buttons').appendChild(create_button("en001-t2-step10","en001_6399_T2_step10"))
//document.getElementById('buttons').appendChild(create_button("en001-t2-step20","en001_6399_T2_step20"))
document.getElementById('buttons').appendChild(document.createElement('br'))
document.getElementById('buttons').appendChild(create_button("en001-univ-step01","en001_6399_univ_step01"))
document.getElementById('buttons').appendChild(create_button("en001-univ-step05","en001_6399_univ_step05"))
document.getElementById('buttons').appendChild(create_button("en001-univ-step10","en001_6399_univ_step10"))
//document.getElementById('buttons').appendChild(create_button("en001-univ-step20","en001_6399_univ_step20"))
</script>
<div id="result1">Click button to load a model</div>
<div id="buttons"></div>
<br>
<div id="result2">en001-T2 and en001-univ are experimental</div>
<br><br>
<input type="text" id="textInput" value ="Hello Huggingface." placeholder="Enter some text here...">
<button id="myButton">Text To Speak</button><br>
<label for ="temperature" style="width: 110px;display: inline-block;">Temperature</label>
<input type="range" id="temperature" min="0" max="1.0" value="0.5" step="0.1"/>
<label for ="temperature" id="tempature_label">0.5</label><br>
<label for ="speed" style="width: 110px;display: inline-block;">Speed</label>
<input type="range" id="speed" min="0.1" max="2.0" value="1.0" step="0.1"/>
<label for ="speed" id="speed_label">1.0</label>
<br>
<br>
<div>almost load time 15 sec,short text TTS time 2 sec(my 2070super-gpu)</div><br>
<div>Quantized version is too slow and exist just for Github Page 100MB limitation so far</div><br>
<div>Multispeaker(vctk) is little bit slow than singlespeaker.default timesteps is 5(smallest 1 is 300msec fast,but audio become low quality)</div>
<br>
<div id="footer">
<b>Spaces</b><br>
<a href="https://huggingface.co/spaces/Akjava/matcha-tts_vctk-onnx" style="font-size: 9px" target="link">Match-TTS VCTK-ONNX</a> |
<a href="https://huggingface.co/spaces/Akjava/matcha-tts-onnx-benchmarks" style="font-size: 9px" target="link">Match-TTS ONNX-Benchmark</a> |
<br><br>
<b>Credits</b><br>
<a href="https://github.com/akjava/Matcha-TTS-Japanese" style="font-size: 9px" target="link">Matcha-TTS-Japanese</a> |
<a href = "http://www.udialogue.org/download/cstr-vctk-corpus.html" style="font-size: 9px" target="link">CSTR VCTK Corpus</a> |
<a href = "https://github.com/cmusphinx/cmudict" style="font-size: 9px" target="link">CMUDict</a> |
<a href = "https://huggingface.co/docs/transformers.js/index" style="font-size: 9px" target="link">Transformer.js</a> |
<a href = "https://huggingface.co/cisco-ai/mini-bart-g2p" style="font-size: 9px" target="link">mini-bart-g2p</a> |
<a href = "https://onnxruntime.ai/docs/get-started/with-javascript/web.html" style="font-size: 9px" target="link">ONNXRuntime-Web</a> |
<a href = "https://github.com/akjava/English-To-IPA-Collections" style="font-size: 9px" target="link">English-To-IPA-Collections</a> |
<a href ="https://huggingface.co/papers/2309.03199" style="font-size: 9px" target="link">Matcha-TTS Paper</a>
</div>
</body>
</html>
|