Spaces:

Akjava
/

matcha-tts-onnx-benchmarks

Running

App Files Files Community

matcha-tts-onnx-benchmarks / index.html

Akjava

Update index.html

2ea5795 verified 4 months ago

raw

history blame

9.54 kB

	<!doctype html>
	<html lang="en">
	<head>
	<meta name="viewport" content="width=device-width" />
	<link rel="stylesheet" href="style.css" />
	<meta charset="UTF-8">
	<title>Match-TTS Onnx Benchmarks</title>
	</head>
	<body>
	<h1>Match-TTS Onnx Benchmarks</h1>


	<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
	<script type="module">
	import { MatchaTTSRaw } from "./js-esm/matcha_tts_raw.js";
	import { webWavPlay } from "./js-esm/web_wav_play.js";
	import { arpa_to_ipa } from "./js-esm/arpa_to_ipa.js";
	import { loadCmudict } from "./js-esm/cmudict_loader.js";
	import { env,textToArpa} from "./js-esm/text_to_arpa.js";

	env.allowLocalModels = true;
	env.localModelPath = "./models/";
	env.backends.onnx.logLevel = "error";

	let matcha_tts_raw
	let cmudict ={}
	let speaking = false
	let total_infer_time=0
	let count_infer=0
	let loaded_model_name
	let load_time
	async function main(model_name) {
	if (typeof model_name !== 'string') {//via button click
	model_name ="en001_ep6399_univ_simplify"
	}


	console.log(model_name)
	if (speaking){
	console.log("speaking return")
	}

	speaking = true
	console.log("main called")
	if(!matcha_tts_raw){
	const load_startTime = performance.now();
	matcha_tts_raw = new MatchaTTSRaw()
	console.time("load model");
	const model_path = `./models/matcha-tts/${model_name}.onnx`
	console.log(model_path)
	await matcha_tts_raw.load_model(model_path,{ executionProviders: ['webgpu','wasm'] });

	console.timeEnd("load model");

	load_time = (performance.now() - load_startTime)/1000 //sec
	loaded_model_name = model_name

	let cmudictReady = loadCmudict(cmudict,'./dictionaries/cmudict-0.7b')
	await cmudictReady


	update_infer_bench1()
	}else{
	console.log("session exist skip load model")
	}
	const startTime = performance.now();
	const text = document.getElementById('textInput').value
	console.log("### textToArpa call")
	const arpa_text = await textToArpa(cmudict,text)
	console.log("### arpa returned")
	const ipa_text = arpa_to_ipa(arpa_text).replace(/\s/g, "");
	//console.log(ipa_text)

	const spks = 0
	const speed = document.getElementById('speed').value
	const tempature = document.getElementById('temperature').value

	console.time("infer");
	const result = await matcha_tts_raw.infer(ipa_text, tempature, speed,spks);

	if (result!=null){
	console.timeEnd("infer");
	const endTime = performance.now();
	const infer_time = endTime-startTime
	total_infer_time+=infer_time
	count_infer += 1
	update_infer_bench2()
	webWavPlay(result)


	}

	speaking = false
	}
	function update_infer_bench1(){

	const text = `${loaded_model_name} load time ${load_time.toFixed(1)} sec`;
	document.getElementById('result1').innerText=text
	}

	function update_infer_bench2(){
	const avg = (total_infer_time/count_infer)/1000
	const text = `Infer Count ${count_infer} avg infer-time ${avg.toFixed(1)} sec`;
	document.getElementById('result2').innerText=text
	}
	function update_range(){
	const value = document.getElementById('spks').value
	let formattedNumber = value.toString().padStart(3, '0');
	document.getElementById('spks_label').textContent = formattedNumber
	}
	function update_range2(){
	const value = document.getElementById('temperature').value
	//let formattedNumber = value.toString().padStart(3, '0');
	document.getElementById('tempature_label').textContent = value//formattedNumber
	}
	function update_range3(){
	const value = document.getElementById('speed').value
	//let formattedNumber = value.toString().padStart(3, '0');
	document.getElementById('speed_label').textContent = value//sformattedNumber
	}

	window.onload = async function(){
	//document.getElementById('textInput').onchange = main;
	document.getElementById('myButton').onclick = main;

	document.getElementById('temperature').onchange = update_range2
	document.getElementById('speed').onchange = update_range3
	}
	function loadModel(model_name){
	total_infer_time=0
	count_infer=0
	matcha_tts_raw=null
	main(model_name)
	}

	function create_button(label, model_name) {
	// ボタンの作成
	const button = document.createElement('button');
	button.style ="margin:4px;"
	button.textContent = label;

	// クリックイベントハンドラの設定
	button.onclick = function() {
	loadModel(model_name);
	};
	return button
	}


	document.getElementById('buttons').appendChild(create_button("ljspeech","ljspeech_sim"))
	document.getElementById('buttons').appendChild(create_button("ljspeech-quantized","ljspeech_sim_q8"))
	document.getElementById('buttons').appendChild(create_button("vctk","vctk_univ_simplify"))
	document.getElementById('buttons').appendChild(create_button("vctk-quantized","vctk_univ_simplify_q8"))
	document.getElementById('buttons').appendChild(create_button("en001","en001_ep6399_univ_simplify"))
	document.getElementById('buttons').appendChild(create_button("en001-quantized","en001_ep6399_univ_simplify_q8"))
	document.getElementById('buttons').appendChild(document.createElement('br'))
	document.getElementById('buttons').appendChild(create_button("en001-t2-step01","en001_6399_T2_step01"))
	document.getElementById('buttons').appendChild(create_button("en001-t2-step05","en001_6399_T2_step05"))
	document.getElementById('buttons').appendChild(create_button("en001-t2-step10","en001_6399_T2_step10"))
	//document.getElementById('buttons').appendChild(create_button("en001-t2-step20","en001_6399_T2_step20"))
	document.getElementById('buttons').appendChild(document.createElement('br'))
	document.getElementById('buttons').appendChild(create_button("en001-univ-step01","en001_6399_univ_step01"))
	document.getElementById('buttons').appendChild(create_button("en001-univ-step05","en001_6399_univ_step05"))
	document.getElementById('buttons').appendChild(create_button("en001-univ-step10","en001_6399_univ_step10"))
	//document.getElementById('buttons').appendChild(create_button("en001-univ-step20","en001_6399_univ_step20"))




	</script>
	<div id="result1">Click button to load a model</div>

	<div id="buttons"></div>
	<br>
	<div id="result2">en001-T2 and en001-univ are experimental</div>
	<br><br>
	<input type="text" id="textInput" value ="Hello Huggingface." placeholder="Enter some text here...">

	<button id="myButton">Text To Speak</button><br>


	<label for ="temperature" style="width: 110px;display: inline-block;">Temperature</label>
	<input type="range" id="temperature" min="0" max="1.0" value="0.5" step="0.1"/>
	<label for ="temperature" id="tempature_label">0.5</label><br>

	<label for ="speed" style="width: 110px;display: inline-block;">Speed</label>
	<input type="range" id="speed" min="0.1" max="2.0" value="1.0" step="0.1"/>
	<label for ="speed" id="speed_label">1.0</label>
	<br>
	<br>

	<div>almost load time 15 sec,short text TTS time 2 sec(my 2070super-gpu)</div><br>
	<div>Quantized version is too slow and exist just for Github Page 100MB limitation so far</div><br>
	<div>Multispeaker(vctk) is little bit slow than singlespeaker.default timesteps is 5(smallest 1 is 300msec fast,but audio become low quality)</div>
	<br>
	<div id="footer">
	<b>Credits</b><br>
	<a href="https://github.com/akjava/Matcha-TTS-Japanese" style="font-size: 9px" target="link">Matcha-TTS-Japanese</a> \|
	<a href = "http://www.udialogue.org/download/cstr-vctk-corpus.html" style="font-size: 9px" target="link">CSTR VCTK Corpus</a> \|
	<a href = "https://github.com/cmusphinx/cmudict" style="font-size: 9px" target="link">CMUDict</a> \|
	<a href = "https://huggingface.co/docs/transformers.js/index" style="font-size: 9px" target="link">Transformer.js</a> \|
	<a href = "https://huggingface.co/cisco-ai/mini-bart-g2p" style="font-size: 9px" target="link">mini-bart-g2p</a> \|
	<a href = "https://onnxruntime.ai/docs/get-started/with-javascript/web.html" style="font-size: 9px" target="link">ONNXRuntime-Web</a> \|
	<a href = "https://github.com/akjava/English-To-IPA-Collections" style="font-size: 9px" target="link">English-To-IPA-Collections</a> \|
	<a href ="https://huggingface.co/papers/2309.03199" style="font-size: 9px" target="link">Matcha-TTS Paper</a>
	</div>



	</body>
	</html>