File size: 8,418 Bytes
0fcbf28
3eeee98
 
 
 
 
ccff356
3eeee98
 
ccff356
ee3b864
3eeee98
76849d7
3eeee98
 
 
 
 
 
 
 
 
1d88bdc
3eeee98
 
 
 
5d2461b
 
3ef7bcd
 
248976b
d06dbc0
8e6e415
d06dbc0
 
 
cd81b30
3eeee98
 
 
aefaec7
3eeee98
 
 
3ef7bcd
3eeee98
 
b3f6e0e
c418a9e
 
3eeee98
 
3ef7bcd
0537f85
3ef7bcd
 
3eeee98
 
 
3ef7bcd
747ffc5
3eeee98
 
 
aefaec7
3eeee98
9dc01a4
3eeee98
9dc01a4
3eeee98
9dc01a4
3eeee98
 
 
 
 
 
 
6be5772
3eeee98
6be5772
5d2461b
 
 
 
3ef7bcd
3eeee98
5d2461b
3eeee98
 
 
 
 
5d2461b
3ef7bcd
0537f85
c5889e1
3ef7bcd
 
 
0537f85
 
3ef7bcd
5d2461b
3eeee98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb8e9a3
 
3eeee98
 
 
 
cd81b30
8cd4a84
 
 
cd81b30
 
 
bbebd26
 
 
d1ceb3d
bbebd26
 
 
 
 
 
706ce8f
4f21fc5
bbebd26
758ff20
 
d1ceb3d
758ff20
d1ceb3d
 
 
5a70475
1a25f98
 
bbebd26
c33769f
71f261f
bbebd26
3ef7bcd
 
5d2461b
1d88bdc
3eeee98
 
 
 
 
 
 
 
 
 
 
 
 
05c2457
47b61b6
ee3b864
ff0769e
05c2457
3eeee98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fcbf28
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
<!doctype html>
<html lang="en">
<head>
    <meta name="viewport" content="width=device-width" />
    <link rel="stylesheet" href="style.css" />
    <meta charset="UTF-8">
    <title>Match-TTS Onnx Benchmarks</title>
</head>
<body>
    <h1>Match-TTS Onnx Benchmarks</h1>
    

    <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
    <script type="module">
        import { MatchaTTSRaw } from "./js-esm/matcha_tts_raw.js";
        import { webWavPlay } from "./js-esm/web_wav_play.js";
        import { arpa_to_ipa } from "./js-esm/arpa_to_ipa.js";
        import { loadCmudict } from "./js-esm/cmudict_loader.js";
        import { env,textToArpa} from "./js-esm/text_to_arpa.js";

        env.allowLocalModels = true;
        env.localModelPath = "./models/";
        env.backends.onnx.logLevel = "error";
        
        let matcha_tts_raw
        let cmudict ={}
        let speaking = false
        let total_infer_time=0
        let count_infer=0
        let loaded_model_name
        let load_time
        async function main(model_name) {
          if (typeof model_name !== 'string') {//via button click
                model_name ="en001_ep6399_univ_simplify"
            }

          
            console.log(model_name)
            if (speaking){
                console.log("speaking return")
            }
            
            speaking = true
            console.log("main called")
            if(!matcha_tts_raw){
                const load_startTime = performance.now();
                matcha_tts_raw = new MatchaTTSRaw()
                console.time("load model");
                const model_path = `./models/matcha-tts/${model_name}.onnx`
                console.log(model_path)
                await matcha_tts_raw.load_model(model_path,{ executionProviders: ['webgpu','wasm'] });
                
                console.timeEnd("load model");

                load_time =   (performance.now() - load_startTime)/1000 //sec
                loaded_model_name = model_name
              
                let cmudictReady = loadCmudict(cmudict,'./dictionaries/cmudict-0.7b')
                await cmudictReady

                
                update_infer_bench1()
            }else{
                console.log("session exist skip load model")
            }
            const startTime = performance.now();
            const text =  document.getElementById('textInput').value
             console.log("### textToArpa call")
            const arpa_text = await textToArpa(cmudict,text)
            console.log("### arpa returned")
            const ipa_text = arpa_to_ipa(arpa_text).replace(/\s/g, "");
            //console.log(ipa_text)

            const spks = 0
            const speed = document.getElementById('speed').value
            const tempature = document.getElementById('temperature').value

            console.time("infer");
            const result = await matcha_tts_raw.infer(ipa_text, tempature, speed,spks);
            
            if (result!=null){
                console.timeEnd("infer");
              const endTime = performance.now();
              const infer_time = endTime-startTime
              total_infer_time+=infer_time
              count_infer += 1
              update_infer_bench2()
                webWavPlay(result)
              
               
            }
    
            speaking = false
        }
      function update_infer_bench1(){
      
      const text = `${loaded_model_name} load time ${load_time.toFixed(1)} sec`;
      document.getElementById('result1').innerText=text
      }
      
      function update_infer_bench2(){
      const avg = (total_infer_time/count_infer)/1000
      const text = `Infer Count ${count_infer} avg infer-time ${avg.toFixed(1)} sec`;
      document.getElementById('result2').innerText=text
      }
        function update_range(){
            const value = document.getElementById('spks').value
            let formattedNumber = value.toString().padStart(3, '0');
            document.getElementById('spks_label').textContent  = formattedNumber
        }
        function update_range2(){
            const value = document.getElementById('temperature').value
            //let formattedNumber = value.toString().padStart(3, '0');
            document.getElementById('tempature_label').textContent  = value//formattedNumber
        }
        function update_range3(){
            const value = document.getElementById('speed').value
            //let formattedNumber = value.toString().padStart(3, '0');
            document.getElementById('speed_label').textContent  = value//sformattedNumber
        }

        window.onload = async function(){
            //document.getElementById('textInput').onchange = main;
            document.getElementById('myButton').onclick = main;
            
            document.getElementById('temperature').onchange = update_range2
            document.getElementById('speed').onchange = update_range3
        }
        function loadModel(model_name){
          total_infer_time=0
          count_infer=0
          matcha_tts_raw=null
        main(model_name)
        }

      function create_button(label, model_name) {
      // ボタンの作成
      const button = document.createElement('button');
      button.style ="margin:4px;"
      button.textContent = label;
      
      // クリックイベントハンドラの設定
      button.onclick = function() {
        loadModel(model_name);
      };
      return button
      }

      
      document.getElementById('buttons').appendChild(create_button("ljspeech","ljspeech_sim"))
      document.getElementById('buttons').appendChild(create_button("ljspeech-quantized","ljspeech_sim_q8"))
      document.getElementById('buttons').appendChild(create_button("vctk","vctk_univ_simplify"))
      document.getElementById('buttons').appendChild(create_button("vctk-quantized","vctk_univ_simplify_q8"))
      document.getElementById('buttons').appendChild(create_button("en001","en001_ep6399_univ_simplify"))
      document.getElementById('buttons').appendChild(create_button("en001-quantized","en001_ep6399_univ_simplify_q8"))
      

      
    </script>
<div id="result1">Click button to load a model</div>
  
 <div id="buttons"></div>
<br>
    <div id="result2"></div>
  <br><br>
    <input type="text" id="textInput"  value ="Hello Huggingface." placeholder="Enter some text here...">
    
    <button id="myButton">Text To Speak</button><br>
    

    <label for ="temperature" style="width: 110px;display: inline-block;">Temperature</label>
    <input type="range" id="temperature"  min="0" max="1.0" value="0.5" step="0.1"/>
    <label for ="temperature" id="tempature_label">0.5</label><br>

    <label for ="speed" style="width: 110px;display: inline-block;">Speed</label>
    <input type="range" id="speed"  min="0.1" max="2.0" value="1.0" step="0.1"/>
    <label for ="speed" id="speed_label">1.0</label>
    <br>
    <br>

  <div>almost load time 15 sec,short text TTS time 2 sec(my 2070super-gpu)</div><br>
  <div>Quantized version is too slow and exist just for Github Page 100MB limitation so far</div><br>
  <div>Multispeaker(vctk) is littlebit slow than singlespeaker.en001 is littlg bit firster maybe timesteps is 5(bigger is hq, but it's slow)</div>
  <br>
    <div id="footer">
    <b>Credits</b><br>
    <a href="https://github.com/akjava/Matcha-TTS-Japanese" style="font-size: 9px" target="link">Matcha-TTS-Japanese</a> | 
    <a href = "http://www.udialogue.org/download/cstr-vctk-corpus.html" style="font-size: 9px"  target="link">CSTR VCTK Corpus</a> |
    <a href = "https://github.com/cmusphinx/cmudict" style="font-size: 9px"  target="link">CMUDict</a> |
    <a href = "https://huggingface.co/docs/transformers.js/index" style="font-size: 9px"  target="link">Transformer.js</a> |
    <a href = "https://huggingface.co/cisco-ai/mini-bart-g2p" style="font-size: 9px"  target="link">mini-bart-g2p</a> |
    <a href = "https://onnxruntime.ai/docs/get-started/with-javascript/web.html" style="font-size: 9px"  target="link">ONNXRuntime-Web</a> |
    <a href = "https://github.com/akjava/English-To-IPA-Collections" style="font-size: 9px"  target="link">English-To-IPA-Collections</a> |
    <a href ="https://huggingface.co/papers/2309.03199" style="font-size: 9px"  target="link">Matcha-TTS Paper</a>
    </div>
    
    
    
</body>
</html>