radames commited on
Commit
c4217fa
1 Parent(s): 0ee09dc
Files changed (1) hide show
  1. index.html +507 -496
index.html CHANGED
@@ -1,555 +1,566 @@
1
  <!doctype html>
2
  <html lang="en-us">
3
- <head>
4
- <title>whisper.cpp : WASM example</title>
5
-
6
- <style>
7
- #output {
8
- width: 100%;
9
- height: 100%;
10
- margin: 0 auto;
11
- margin-top: 10px;
12
- border-left: 0px;
13
- border-right: 0px;
14
- padding-left: 0px;
15
- padding-right: 0px;
16
- display: block;
17
- background-color: black;
18
- color: white;
19
- font-size: 10px;
20
- font-family: 'Lucida Console', Monaco, monospace;
21
- outline: none;
22
- white-space: pre;
23
- overflow-wrap: normal;
24
- overflow-x: scroll;
25
- }
26
- </style>
27
- </head>
28
- <body>
29
- <div id="main-container">
30
- <b>Minimal <a href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a> example running fully in the browser</b>
31
-
32
- <br><br>
33
 
34
- Usage instructions:<br>
35
- <ul>
36
- <li>Load a ggml model file (you can obtain one from <a href="https://ggml.ggerganov.com/">here</a>, recommended: <b>tiny</b> or <b>base</b>)</li>
37
- <li>Select audio file to transcribe or record audio from the microphone (sample: <a href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)</li>
38
- <li>Click on the "Transcribe" button to start the transcription</li>
39
- </ul>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- Note that the computation is quite heavy and may take a few seconds to complete.<br>
42
- The transcription results will be displayed in the text area below.<br><br>
43
- <b>Important: your browser must support WASM SIMD instructions for this to work.</b>
44
 
45
- <br><br><hr>
 
 
 
 
 
 
46
 
47
- <div id="model">
48
- Whisper model: <span id="model-whisper-status"></span>
49
- <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
50
- <button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">tiny (75 MB)</button>
51
- <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
52
- <button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
53
- <span id="fetch-whisper-progress"></span>
54
 
55
- <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
56
- </div>
 
 
57
 
58
- <br>
 
 
 
59
 
60
- <!-- radio button to select between file upload or microphone -->
61
- <div id="input">
62
- Input:
63
- <input type="radio" id="file" name="input" value="file" checked="checked" onchange="changeInput('file')" /> File
64
- <input type="radio" id="mic" name="input" value="mic" onchange="changeInput('mic')" /> Microphone
65
  </div>
 
66
 
67
- <br>
68
-
69
- <div id="input_file">
70
- Audio file:
71
- <input type="file" id="file" name="file" onchange="loadAudio(event)" />
72
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- <div id="input_mic" style="display: none;">
75
- Microphone:
76
- <button id="start" onclick="startRecording()">Start</button>
77
- <button id="stop" onclick="stopRecording()" disabled>Stop</button>
78
-
79
- <!-- progress bar to show recording progress -->
80
- <br><br>
81
- <div id="progress" style="display: none;">
82
- <div id="progress-bar" style="width: 0%; height: 10px; background-color: #4CAF50;"></div>
83
- <div id="progress-text">0%</div>
84
- </div>
85
- </div>
86
 
87
- <audio controls="controls" id="audio" loop hidden>
88
- Your browser does not support the &lt;audio&gt; tag.
89
- <source id="source" src="" type="audio/wav" />
90
- </audio>
91
-
92
- <hr><br>
93
-
94
- <table>
95
- <tr>
96
- <td>
97
- Language:
98
- <select id="language" name="language">
99
- <option value="en">English</option>
100
- <option value="ar">Arabic</option>
101
- <option value="hy">Armenian</option>
102
- <option value="az">Azerbaijani</option>
103
- <option value="eu">Basque</option>
104
- <option value="be">Belarusian</option>
105
- <option value="bn">Bengali</option>
106
- <option value="bg">Bulgarian</option>
107
- <option value="ca">Catalan</option>
108
- <option value="zh">Chinese</option>
109
- <option value="hr">Croatian</option>
110
- <option value="cs">Czech</option>
111
- <option value="da">Danish</option>
112
- <option value="nl">Dutch</option>
113
- <option value="en">English</option>
114
- <option value="et">Estonian</option>
115
- <option value="tl">Filipino</option>
116
- <option value="fi">Finnish</option>
117
- <option value="fr">French</option>
118
- <option value="gl">Galician</option>
119
- <option value="ka">Georgian</option>
120
- <option value="de">German</option>
121
- <option value="el">Greek</option>
122
- <option value="gu">Gujarati</option>
123
- <option value="iw">Hebrew</option>
124
- <option value="hi">Hindi</option>
125
- <option value="hu">Hungarian</option>
126
- <option value="is">Icelandic</option>
127
- <option value="id">Indonesian</option>
128
- <option value="ga">Irish</option>
129
- <option value="it">Italian</option>
130
- <option value="ja">Japanese</option>
131
- <option value="kn">Kannada</option>
132
- <option value="ko">Korean</option>
133
- <option value="la">Latin</option>
134
- <option value="lv">Latvian</option>
135
- <option value="lt">Lithuanian</option>
136
- <option value="mk">Macedonian</option>
137
- <option value="ms">Malay</option>
138
- <option value="mt">Maltese</option>
139
- <option value="no">Norwegian</option>
140
- <option value="fa">Persian</option>
141
- <option value="pl">Polish</option>
142
- <option value="pt">Portuguese</option>
143
- <option value="ro">Romanian</option>
144
- <option value="ru">Russian</option>
145
- <option value="sr">Serbian</option>
146
- <option value="sk">Slovak</option>
147
- <option value="sl">Slovenian</option>
148
- <option value="es">Spanish</option>
149
- <option value="sw">Swahili</option>
150
- <option value="sv">Swedish</option>
151
- <option value="ta">Tamil</option>
152
- <option value="te">Telugu</option>
153
- <option value="th">Thai</option>
154
- <option value="tr">Turkish</option>
155
- <option value="uk">Ukrainian</option>
156
- <option value="ur">Urdu</option>
157
- <option value="vi">Vietnamese</option>
158
- <option value="cy">Welsh</option>
159
- <option value="yi">Yiddish</option>
160
- </select>
161
- </td>
162
- <td>
163
- <button onclick="onProcess(false);">Transcribe</button>
164
- </td>
165
- <td>
166
- <button onclick="onProcess(true);">Translate</button>
167
- </td>
168
- </tr>
169
- </table>
170
-
171
- <br>
172
-
173
- <!-- textarea with height filling the rest of the page -->
174
- <textarea id="output" rows="20"></textarea>
175
 
176
- <br><br>
177
 
178
- <div class="cell-version">
179
- <span>
180
- |
181
- Build time: <span class="nav-link">Mon Jan 16 11:57:35 2023</span> |
182
- Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/49b529ba">49b529ba</a> |
183
- Commit subject: <span class="nav-link">whisper.android : add support for loading directly from asset in C (#415)</span> |
184
- <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm">Source Code</a> |
185
- </span>
186
- </div>
187
- </div>
188
 
189
- <script type="text/javascript" src="helpers.js"></script>
190
- <script type='text/javascript'>
191
- // TODO: convert audio buffer to WAV
192
- function setAudio(audio) {
193
- //if (audio) {
194
- // // convert to 16-bit PCM
195
- // var blob = new Blob([audio], { type: 'audio/wav' });
196
- // var url = URL.createObjectURL(blob);
197
- // document.getElementById('source').src = url;
198
- // document.getElementById('audio').hidden = false;
199
- // document.getElementById('audio').loop = false;
200
- // document.getElementById('audio').load();
201
- //} else {
202
- // document.getElementById('audio').hidden = true;
203
- //}
204
  }
205
 
206
- function changeInput(input) {
207
- if (input == 'file') {
208
- document.getElementById('input_file').style.display = 'block';
209
- document.getElementById('input_mic' ).style.display = 'none';
210
- document.getElementById('progress' ).style.display = 'none';
211
- } else {
212
- document.getElementById('input_file').style.display = 'none';
213
- document.getElementById('input_mic' ).style.display = 'block';
214
- document.getElementById('progress' ).style.display = 'block';
215
- }
216
- }
217
 
218
- var Module = {
219
- print: printTextarea,
220
- printErr: printTextarea,
221
- setStatus: function(text) {
222
- printTextarea('js: ' + text);
223
- },
224
- monitorRunDependencies: function(left) {
225
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  };
227
 
228
- // web audio context
229
- var context = null;
230
-
231
- // audio data
232
- var audio = null;
 
233
 
234
- // the whisper instance
235
- var instance = null;
236
- var model_whisper = '';
237
 
238
- // helper function
239
- function convertTypedArray(src, type) {
240
- var buffer = new ArrayBuffer(src.byteLength);
241
- var baseView = new src.constructor(buffer).set(src);
242
- return new type(buffer);
243
- }
244
 
245
- //
246
- // load model
247
- //
248
-
249
- let dbVersion = 1
250
- let dbName = 'whisper.ggerganov.com';
251
- let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
252
-
253
- function storeFS(fname, buf) {
254
- // write to WASM file using FS_createDataFile
255
- // if the file exists, delete it
256
- try {
257
- Module.FS_unlink(fname);
258
- } catch (e) {
259
- // ignore
260
- }
261
 
262
- Module.FS_createDataFile("/", fname, buf, true, true);
 
 
 
263
 
264
- model_whisper = fname;
 
 
 
 
 
 
 
 
265
 
266
- document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
 
267
 
268
- printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
269
- }
 
270
 
271
- function loadFile(event, fname) {
272
- var file = event.target.files[0] || null;
273
- if (file == null) {
274
- return;
275
- }
276
 
277
- printTextarea("loadFile: loading model: " + file.name + ", size: " + file.size + " bytes");
278
- printTextarea('loadFile: please wait ...');
279
 
280
- var reader = new FileReader();
281
- reader.onload = function(event) {
282
- var buf = new Uint8Array(reader.result);
283
- storeFS(fname, buf);
284
- }
285
- reader.readAsArrayBuffer(file);
286
-
287
- document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
288
- document.getElementById('fetch-whisper-base-en').style.display = 'none';
289
- document.getElementById('fetch-whisper-tiny' ).style.display = 'none';
290
- document.getElementById('fetch-whisper-base' ).style.display = 'none';
291
- document.getElementById('whisper-file' ).style.display = 'none';
292
- document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name;
293
  }
294
 
295
- function loadWhisper(model) {
296
- let urls = {
297
- 'tiny.en': 'models/ggml-tiny.en.bin',
298
- 'tiny': 'models/ggml-tiny.bin',
299
- 'base.en': 'models/ggml-base.en.bin',
300
- 'base': 'models/ggml-base.bin',
301
- };
302
-
303
- let sizes = {
304
- 'tiny.en': 75,
305
- 'tiny': 75,
306
- 'base.en': 142,
307
- 'base': 142,
308
- };
309
-
310
- let url = urls[model];
311
- let dst = 'whisper.bin';
312
- let size_mb = sizes[model];
313
-
314
- model_whisper = model;
315
-
316
- document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
317
- document.getElementById('fetch-whisper-base-en').style.display = 'none';
318
- document.getElementById('fetch-whisper-tiny' ).style.display = 'none';
319
- document.getElementById('fetch-whisper-base' ).style.display = 'none';
320
- document.getElementById('whisper-file' ).style.display = 'none';
321
- document.getElementById('model-whisper-status' ).innerHTML = 'loading model: ' + model;
322
-
323
- cbProgress = function(p) {
324
- let el = document.getElementById('fetch-whisper-progress');
325
- el.innerHTML = Math.round(100*p) + '%';
326
- };
327
-
328
- cbCancel = function() {
329
- var el;
330
- el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
331
- el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
332
- el = document.getElementById('fetch-whisper-tiny' ); if (el) el.style.display = 'inline-block';
333
- el = document.getElementById('fetch-whisper-base' ); if (el) el.style.display = 'inline-block';
334
- el = document.getElementById('whisper-file' ); if (el) el.style.display = 'inline-block';
335
- el = document.getElementById('model-whisper-status' ); if (el) el.innerHTML = '';
336
- };
337
-
338
- loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
339
  }
340
 
341
- //
342
- // audio file
343
- //
344
 
345
- const kMaxAudio_s = 120;
346
- const kSampleRate = 16000;
 
347
 
348
- window.AudioContext = window.AudioContext || window.webkitAudioContext;
349
- window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
 
 
 
 
350
 
351
- function loadAudio(event) {
352
- if (!context) {
353
- context = new AudioContext({
354
- sampleRate: kSampleRate,
355
- channelCount: 1,
356
- echoCancellation: false,
357
- autoGainControl: true,
358
- noiseSuppression: true,
359
- });
360
- }
361
 
362
- var file = event.target.files[0] || null;
363
- if (file == null) {
364
- return;
365
- }
 
366
 
367
- printTextarea('js: loading audio: ' + file.name + ', size: ' + file.size + ' bytes');
368
- printTextarea('js: please wait ...');
369
-
370
- var reader = new FileReader();
371
- reader.onload = function(event) {
372
- var buf = new Uint8Array(reader.result);
373
-
374
- context.decodeAudioData(buf.buffer, function(audioBuffer) {
375
- var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
376
- var source = offlineContext.createBufferSource();
377
- source.buffer = audioBuffer;
378
- source.connect(offlineContext.destination);
379
- source.start(0);
380
-
381
- offlineContext.startRendering().then(function(renderedBuffer) {
382
- audio = renderedBuffer.getChannelData(0);
383
- printTextarea('js: audio loaded, size: ' + audio.length);
384
-
385
- // truncate to first 30 seconds
386
- if (audio.length > kMaxAudio_s*kSampleRate) {
387
- audio = audio.slice(0, kMaxAudio_s*kSampleRate);
388
- printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
389
- }
390
-
391
- setAudio(audio);
392
- });
393
- }, function(e) {
394
- printTextarea('js: error decoding audio: ' + e);
395
- audio = null;
396
  setAudio(audio);
397
  });
398
- }
399
- reader.readAsArrayBuffer(file);
 
 
 
400
  }
401
-
402
- //
403
- // microphone
404
- //
405
-
406
- var mediaRecorder = null;
407
- var doRecording = false;
408
- var startTime = 0;
409
-
410
- function stopRecording() {
411
- doRecording = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  }
413
 
414
- // record up to kMaxAudio_s seconds of audio from the microphone
415
- // check if doRecording is false every 1000 ms and stop recording if so
416
- // update progress information
417
- function startRecording() {
418
- if (!context) {
419
- context = new AudioContext({
420
- sampleRate: kSampleRate,
421
- channelCount: 1,
422
- echoCancellation: false,
423
- autoGainControl: true,
424
- noiseSuppression: true,
425
- });
426
- }
427
-
428
- document.getElementById('start').disabled = true;
429
- document.getElementById('stop').disabled = false;
430
-
431
- document.getElementById('progress-bar').style.width = '0%';
432
- document.getElementById('progress-text').innerHTML = '0%';
433
-
434
- doRecording = true;
435
- startTime = Date.now();
436
-
437
- var chunks = [];
438
- var stream = null;
439
-
440
- navigator.mediaDevices.getUserMedia({audio: true, video: false})
441
- .then(function(s) {
442
- stream = s;
443
- mediaRecorder = new MediaRecorder(stream);
444
- mediaRecorder.ondataavailable = function(e) {
445
- chunks.push(e.data);
446
- };
447
- mediaRecorder.onstop = function(e) {
448
- var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
449
- chunks = [];
450
-
451
- document.getElementById('start').disabled = false;
452
- document.getElementById('stop').disabled = true;
453
-
454
- var reader = new FileReader();
455
- reader.onload = function(event) {
456
- var buf = new Uint8Array(reader.result);
457
-
458
- context.decodeAudioData(buf.buffer, function(audioBuffer) {
459
- var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
460
- var source = offlineContext.createBufferSource();
461
- source.buffer = audioBuffer;
462
- source.connect(offlineContext.destination);
463
- source.start(0);
464
-
465
- offlineContext.startRendering().then(function(renderedBuffer) {
466
- audio = renderedBuffer.getChannelData(0);
467
- printTextarea('js: audio recorded, size: ' + audio.length);
468
-
469
- // truncate to first 30 seconds
470
- if (audio.length > kMaxAudio_s*kSampleRate) {
471
- audio = audio.slice(0, kMaxAudio_s*kSampleRate);
472
- printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
473
- }
474
- setAudio(audio);
475
- });
476
- }, function(e) {
477
- printTextarea('js: error decoding audio: ' + e);
478
- audio = null;
479
  setAudio(audio);
480
  });
481
- }
482
-
483
- reader.readAsArrayBuffer(blob);
484
- };
485
- mediaRecorder.start();
486
- })
487
- .catch(function(err) {
488
- printTextarea('js: error getting audio stream: ' + err);
489
- });
490
 
491
- var interval = setInterval(function() {
492
- if (!doRecording) {
493
- clearInterval(interval);
494
- mediaRecorder.stop();
495
- stream.getTracks().forEach(function(track) {
496
- track.stop();
497
- });
498
- }
 
 
 
 
 
 
 
 
499
 
500
- document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxAudio_s) + '%';
501
- document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxAudio_s).toFixed(0) + '%';
502
- }, 1000);
503
 
504
- printTextarea('js: recording ...');
505
 
506
- setTimeout(function() {
507
- if (doRecording) {
508
- printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
509
- stopRecording();
510
- }
511
- }, kMaxAudio_s*1000);
512
- }
513
 
514
- //
515
- // transcribe
516
- //
517
 
518
- function onProcess(translate) {
519
- if (!instance) {
520
- instance = Module.init('whisper.bin');
521
 
522
- if (instance) {
523
- printTextarea("js: whisper initialized, instance: " + instance);
524
- document.getElementById('model').innerHTML = 'Model loaded: ' + model_whisper;
525
- }
526
  }
 
527
 
528
- if (!instance) {
529
- printTextarea("js: failed to initialize whisper");
530
- return;
531
- }
532
 
533
- if (!audio) {
534
- printTextarea("js: no audio data");
535
- return;
536
- }
537
 
538
- if (instance) {
539
- printTextarea('');
540
- printTextarea('js: processing - this might take a while ...');
541
- printTextarea('');
542
-
543
- setTimeout(function() {
544
- var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
545
- console.log('js: full_default returned: ' + ret);
546
- if (ret) {
547
- printTextarea("js: whisper returned: " + ret);
548
- }
549
- }, 100);
550
- }
551
  }
552
- </script>
553
- <script type="text/javascript" src="main.js"></script>
554
- </body>
555
- </html>
 
 
 
1
  <!doctype html>
2
  <html lang="en-us">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ <head>
5
+ <title>whisper.cpp : WASM example</title>
6
+
7
+ <style>
8
+ #output {
9
+ width: 100%;
10
+ height: 100%;
11
+ margin: 0 auto;
12
+ margin-top: 10px;
13
+ border-left: 0px;
14
+ border-right: 0px;
15
+ padding-left: 0px;
16
+ padding-right: 0px;
17
+ display: block;
18
+ background-color: black;
19
+ color: white;
20
+ font-size: 10px;
21
+ font-family: 'Lucida Console', Monaco, monospace;
22
+ outline: none;
23
+ white-space: pre;
24
+ overflow-wrap: normal;
25
+ overflow-x: scroll;
26
+ }
27
+ </style>
28
+ </head>
29
+
30
+ <body>
31
+ <div id="main-container">
32
+ <b>Minimal <a target="_blank" href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a> example running
33
+ fully in the browser</b>
34
+
35
+ <br><br>
36
+
37
+ Usage instructions:<br>
38
+ <ul>
39
+ <li>Load a ggml model file (you can obtain one from <a target="_blank"
40
+ href="https://ggml.ggerganov.com/">here</a>, recommended: <b>tiny</b> or <b>base</b>)</li>
41
+ <li>Select audio file to transcribe or record audio from the microphone (sample: <a target="_blank"
42
+ href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)</li>
43
+ <li>Click on the "Transcribe" button to start the transcription</li>
44
+ </ul>
45
+
46
+ Note that the computation is quite heavy and may take a few seconds to complete.<br>
47
+ The transcription results will be displayed in the text area below.<br><br>
48
+ <b>Important: your browser must support WASM SIMD instructions for this to work.</b>
49
+
50
+ <br><br>
51
+ <hr>
52
+
53
+ <div id="model">
54
+ Whisper model: <span id="model-whisper-status"></span>
55
+ <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
56
+ <button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">tiny (75 MB)</button>
57
+ <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
58
+ <button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
59
+ <span id="fetch-whisper-progress"></span>
60
+
61
+ <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
62
+ </div>
63
 
64
+ <br>
 
 
65
 
66
+ <!-- radio button to select between file upload or microphone -->
67
+ <div id="input">
68
+ Input:
69
+ <input type="radio" id="file" name="input" value="file" checked="checked" onchange="changeInput('file')" />
70
+ File
71
+ <input type="radio" id="mic" name="input" value="mic" onchange="changeInput('mic')" /> Microphone
72
+ </div>
73
 
74
+ <br>
 
 
 
 
 
 
75
 
76
+ <div id="input_file">
77
+ Audio file:
78
+ <input type="file" id="file" name="file" onchange="loadAudio(event)" />
79
+ </div>
80
 
81
+ <div id="input_mic" style="display: none;">
82
+ Microphone:
83
+ <button id="start" onclick="startRecording()">Start</button>
84
+ <button id="stop" onclick="stopRecording()" disabled>Stop</button>
85
 
86
+ <!-- progress bar to show recording progress -->
87
+ <br><br>
88
+ <div id="progress" style="display: none;">
89
+ <div id="progress-bar" style="width: 0%; height: 10px; background-color: #4CAF50;"></div>
90
+ <div id="progress-text">0%</div>
91
  </div>
92
+ </div>
93
 
94
+ <audio controls="controls" id="audio" loop hidden>
95
+ Your browser does not support the &lt;audio&gt; tag.
96
+ <source id="source" src="" type="audio/wav" />
97
+ </audio>
98
+
99
+ <hr><br>
100
+
101
+ <table>
102
+ <tr>
103
+ <td>
104
+ Language:
105
+ <select id="language" name="language">
106
+ <option value="en">English</option>
107
+ <option value="ar">Arabic</option>
108
+ <option value="hy">Armenian</option>
109
+ <option value="az">Azerbaijani</option>
110
+ <option value="eu">Basque</option>
111
+ <option value="be">Belarusian</option>
112
+ <option value="bn">Bengali</option>
113
+ <option value="bg">Bulgarian</option>
114
+ <option value="ca">Catalan</option>
115
+ <option value="zh">Chinese</option>
116
+ <option value="hr">Croatian</option>
117
+ <option value="cs">Czech</option>
118
+ <option value="da">Danish</option>
119
+ <option value="nl">Dutch</option>
120
+ <option value="en">English</option>
121
+ <option value="et">Estonian</option>
122
+ <option value="tl">Filipino</option>
123
+ <option value="fi">Finnish</option>
124
+ <option value="fr">French</option>
125
+ <option value="gl">Galician</option>
126
+ <option value="ka">Georgian</option>
127
+ <option value="de">German</option>
128
+ <option value="el">Greek</option>
129
+ <option value="gu">Gujarati</option>
130
+ <option value="iw">Hebrew</option>
131
+ <option value="hi">Hindi</option>
132
+ <option value="hu">Hungarian</option>
133
+ <option value="is">Icelandic</option>
134
+ <option value="id">Indonesian</option>
135
+ <option value="ga">Irish</option>
136
+ <option value="it">Italian</option>
137
+ <option value="ja">Japanese</option>
138
+ <option value="kn">Kannada</option>
139
+ <option value="ko">Korean</option>
140
+ <option value="la">Latin</option>
141
+ <option value="lv">Latvian</option>
142
+ <option value="lt">Lithuanian</option>
143
+ <option value="mk">Macedonian</option>
144
+ <option value="ms">Malay</option>
145
+ <option value="mt">Maltese</option>
146
+ <option value="no">Norwegian</option>
147
+ <option value="fa">Persian</option>
148
+ <option value="pl">Polish</option>
149
+ <option value="pt">Portuguese</option>
150
+ <option value="ro">Romanian</option>
151
+ <option value="ru">Russian</option>
152
+ <option value="sr">Serbian</option>
153
+ <option value="sk">Slovak</option>
154
+ <option value="sl">Slovenian</option>
155
+ <option value="es">Spanish</option>
156
+ <option value="sw">Swahili</option>
157
+ <option value="sv">Swedish</option>
158
+ <option value="ta">Tamil</option>
159
+ <option value="te">Telugu</option>
160
+ <option value="th">Thai</option>
161
+ <option value="tr">Turkish</option>
162
+ <option value="uk">Ukrainian</option>
163
+ <option value="ur">Urdu</option>
164
+ <option value="vi">Vietnamese</option>
165
+ <option value="cy">Welsh</option>
166
+ <option value="yi">Yiddish</option>
167
+ </select>
168
+ </td>
169
+ <td>
170
+ <button onclick="onProcess(false);">Transcribe</button>
171
+ </td>
172
+ <td>
173
+ <button onclick="onProcess(true);">Translate</button>
174
+ </td>
175
+ </tr>
176
+ </table>
177
+
178
+ <br>
179
+
180
+ <!-- textarea with height filling the rest of the page -->
181
+ <textarea id="output" rows="20"></textarea>
182
+
183
+ <br><br>
184
+
185
+ <div class="cell-version">
186
+ <span>
187
+ |
188
+ Build time: <span class="nav-link">Mon Jan 16 11:57:35 2023</span> |
189
+ Commit hash: <a class="nav-link" target="_blank"
190
+ href="https://github.com/ggerganov/whisper.cpp/commit/49b529ba">49b529ba</a> |
191
+ Commit subject: <span class="nav-link">whisper.android : add support for loading directly from asset in
192
+ C (#415)</span> |
193
+ <a class="nav-link" target="_blank"
194
+ href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm">Source Code</a> |
195
+ </span>
196
+ </div>
197
+ </div>
198
+
199
+ <script type="text/javascript" src="helpers.js"></script>
200
+ <script type='text/javascript'>
201
+ // TODO: convert audio buffer to WAV
202
+ function setAudio(audio) {
203
+ //if (audio) {
204
+ // // convert to 16-bit PCM
205
+ // var blob = new Blob([audio], { type: 'audio/wav' });
206
+ // var url = URL.createObjectURL(blob);
207
+ // document.getElementById('source').src = url;
208
+ // document.getElementById('audio').hidden = false;
209
+ // document.getElementById('audio').loop = false;
210
+ // document.getElementById('audio').load();
211
+ //} else {
212
+ // document.getElementById('audio').hidden = true;
213
+ //}
214
+ }
215
+
216
+ function changeInput(input) {
217
+ if (input == 'file') {
218
+ document.getElementById('input_file').style.display = 'block';
219
+ document.getElementById('input_mic').style.display = 'none';
220
+ document.getElementById('progress').style.display = 'none';
221
+ } else {
222
+ document.getElementById('input_file').style.display = 'none';
223
+ document.getElementById('input_mic').style.display = 'block';
224
+ document.getElementById('progress').style.display = 'block';
225
+ }
226
+ }
227
+
228
+ var Module = {
229
+ print: printTextarea,
230
+ printErr: printTextarea,
231
+ setStatus: function (text) {
232
+ printTextarea('js: ' + text);
233
+ },
234
+ monitorRunDependencies: function (left) {
235
+ }
236
+ };
237
+
238
+ // web audio context
239
+ var context = null;
240
+
241
+ // audio data
242
+ var audio = null;
243
+
244
+ // the whisper instance
245
+ var instance = null;
246
+ var model_whisper = '';
247
+
248
+ // helper function
249
+ function convertTypedArray(src, type) {
250
+ var buffer = new ArrayBuffer(src.byteLength);
251
+ var baseView = new src.constructor(buffer).set(src);
252
+ return new type(buffer);
253
+ }
254
+
255
+ //
256
+ // load model
257
+ //
258
+
259
+ let dbVersion = 1
260
+ let dbName = 'whisper.ggerganov.com';
261
+ let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
262
+
263
+ function storeFS(fname, buf) {
264
+ // write to WASM file using FS_createDataFile
265
+ // if the file exists, delete it
266
+ try {
267
+ Module.FS_unlink(fname);
268
+ } catch (e) {
269
+ // ignore
270
+ }
271
 
272
+ Module.FS_createDataFile("/", fname, buf, true, true);
 
 
 
 
 
 
 
 
 
 
 
273
 
274
+ model_whisper = fname;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
+ document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
277
 
278
+ printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
279
+ }
 
 
 
 
 
 
 
 
280
 
281
+ function loadFile(event, fname) {
282
+ var file = event.target.files[0] || null;
283
+ if (file == null) {
284
+ return;
 
 
 
 
 
 
 
 
 
 
 
285
  }
286
 
287
+ printTextarea("loadFile: loading model: " + file.name + ", size: " + file.size + " bytes");
288
+ printTextarea('loadFile: please wait ...');
 
 
 
 
 
 
 
 
 
289
 
290
+ var reader = new FileReader();
291
+ reader.onload = function (event) {
292
+ var buf = new Uint8Array(reader.result);
293
+ storeFS(fname, buf);
294
+ }
295
+ reader.readAsArrayBuffer(file);
296
+
297
+ document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
298
+ document.getElementById('fetch-whisper-base-en').style.display = 'none';
299
+ document.getElementById('fetch-whisper-tiny').style.display = 'none';
300
+ document.getElementById('fetch-whisper-base').style.display = 'none';
301
+ document.getElementById('whisper-file').style.display = 'none';
302
+ document.getElementById('model-whisper-status').innerHTML = 'loaded model: ' + file.name;
303
+ }
304
+
305
+ function loadWhisper(model) {
306
+ let urls = {
307
+ 'tiny.en': 'models/ggml-tiny.en.bin',
308
+ 'tiny': 'models/ggml-tiny.bin',
309
+ 'base.en': 'models/ggml-base.en.bin',
310
+ 'base': 'models/ggml-base.bin',
311
  };
312
 
313
+ let sizes = {
314
+ 'tiny.en': 75,
315
+ 'tiny': 75,
316
+ 'base.en': 142,
317
+ 'base': 142,
318
+ };
319
 
320
+ let url = urls[model];
321
+ let dst = 'whisper.bin';
322
+ let size_mb = sizes[model];
323
 
324
+ model_whisper = model;
 
 
 
 
 
325
 
326
+ document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
327
+ document.getElementById('fetch-whisper-base-en').style.display = 'none';
328
+ document.getElementById('fetch-whisper-tiny').style.display = 'none';
329
+ document.getElementById('fetch-whisper-base').style.display = 'none';
330
+ document.getElementById('whisper-file').style.display = 'none';
331
+ document.getElementById('model-whisper-status').innerHTML = 'loading model: ' + model;
 
 
 
 
 
 
 
 
 
 
332
 
333
+ cbProgress = function (p) {
334
+ let el = document.getElementById('fetch-whisper-progress');
335
+ el.innerHTML = Math.round(100 * p) + '%';
336
+ };
337
 
338
+ cbCancel = function () {
339
+ var el;
340
+ el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
341
+ el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
342
+ el = document.getElementById('fetch-whisper-tiny'); if (el) el.style.display = 'inline-block';
343
+ el = document.getElementById('fetch-whisper-base'); if (el) el.style.display = 'inline-block';
344
+ el = document.getElementById('whisper-file'); if (el) el.style.display = 'inline-block';
345
+ el = document.getElementById('model-whisper-status'); if (el) el.innerHTML = '';
346
+ };
347
 
348
+ loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
349
+ }
350
 
351
+ //
352
+ // audio file
353
+ //
354
 
355
+ const kMaxAudio_s = 120;
356
+ const kSampleRate = 16000;
 
 
 
357
 
358
+ window.AudioContext = window.AudioContext || window.webkitAudioContext;
359
+ window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
360
 
361
+ function loadAudio(event) {
362
+ if (!context) {
363
+ context = new AudioContext({
364
+ sampleRate: kSampleRate,
365
+ channelCount: 1,
366
+ echoCancellation: false,
367
+ autoGainControl: true,
368
+ noiseSuppression: true,
369
+ });
 
 
 
 
370
  }
371
 
372
+ var file = event.target.files[0] || null;
373
+ if (file == null) {
374
+ return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  }
376
 
377
+ printTextarea('js: loading audio: ' + file.name + ', size: ' + file.size + ' bytes');
378
+ printTextarea('js: please wait ...');
 
379
 
380
+ var reader = new FileReader();
381
+ reader.onload = function (event) {
382
+ var buf = new Uint8Array(reader.result);
383
 
384
+ context.decodeAudioData(buf.buffer, function (audioBuffer) {
385
+ var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
386
+ var source = offlineContext.createBufferSource();
387
+ source.buffer = audioBuffer;
388
+ source.connect(offlineContext.destination);
389
+ source.start(0);
390
 
391
+ offlineContext.startRendering().then(function (renderedBuffer) {
392
+ audio = renderedBuffer.getChannelData(0);
393
+ printTextarea('js: audio loaded, size: ' + audio.length);
 
 
 
 
 
 
 
394
 
395
+ // truncate to first 30 seconds
396
+ if (audio.length > kMaxAudio_s * kSampleRate) {
397
+ audio = audio.slice(0, kMaxAudio_s * kSampleRate);
398
+ printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
399
+ }
400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  setAudio(audio);
402
  });
403
+ }, function (e) {
404
+ printTextarea('js: error decoding audio: ' + e);
405
+ audio = null;
406
+ setAudio(audio);
407
+ });
408
  }
409
+ reader.readAsArrayBuffer(file);
410
+ }
411
+
412
+ //
413
+ // microphone
414
+ //
415
+
416
+ var mediaRecorder = null;
417
+ var doRecording = false;
418
+ var startTime = 0;
419
+
420
+ function stopRecording() {
421
+ doRecording = false;
422
+ }
423
+
424
+ // record up to kMaxAudio_s seconds of audio from the microphone
425
+ // check if doRecording is false every 1000 ms and stop recording if so
426
+ // update progress information
427
+ function startRecording() {
428
+ if (!context) {
429
+ context = new AudioContext({
430
+ sampleRate: kSampleRate,
431
+ channelCount: 1,
432
+ echoCancellation: false,
433
+ autoGainControl: true,
434
+ noiseSuppression: true,
435
+ });
436
  }
437
 
438
+ document.getElementById('start').disabled = true;
439
+ document.getElementById('stop').disabled = false;
440
+
441
+ document.getElementById('progress-bar').style.width = '0%';
442
+ document.getElementById('progress-text').innerHTML = '0%';
443
+
444
+ doRecording = true;
445
+ startTime = Date.now();
446
+
447
+ var chunks = [];
448
+ var stream = null;
449
+
450
+ navigator.mediaDevices.getUserMedia({ audio: true, video: false })
451
+ .then(function (s) {
452
+ stream = s;
453
+ mediaRecorder = new MediaRecorder(stream);
454
+ mediaRecorder.ondataavailable = function (e) {
455
+ chunks.push(e.data);
456
+ };
457
+ mediaRecorder.onstop = function (e) {
458
+ var blob = new Blob(chunks, { 'type': 'audio/ogg; codecs=opus' });
459
+ chunks = [];
460
+
461
+ document.getElementById('start').disabled = false;
462
+ document.getElementById('stop').disabled = true;
463
+
464
+ var reader = new FileReader();
465
+ reader.onload = function (event) {
466
+ var buf = new Uint8Array(reader.result);
467
+
468
+ context.decodeAudioData(buf.buffer, function (audioBuffer) {
469
+ var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
470
+ var source = offlineContext.createBufferSource();
471
+ source.buffer = audioBuffer;
472
+ source.connect(offlineContext.destination);
473
+ source.start(0);
474
+
475
+ offlineContext.startRendering().then(function (renderedBuffer) {
476
+ audio = renderedBuffer.getChannelData(0);
477
+ printTextarea('js: audio recorded, size: ' + audio.length);
478
+
479
+ // truncate to first 30 seconds
480
+ if (audio.length > kMaxAudio_s * kSampleRate) {
481
+ audio = audio.slice(0, kMaxAudio_s * kSampleRate);
482
+ printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
483
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  setAudio(audio);
485
  });
486
+ }, function (e) {
487
+ printTextarea('js: error decoding audio: ' + e);
488
+ audio = null;
489
+ setAudio(audio);
490
+ });
491
+ }
 
 
 
492
 
493
+ reader.readAsArrayBuffer(blob);
494
+ };
495
+ mediaRecorder.start();
496
+ })
497
+ .catch(function (err) {
498
+ printTextarea('js: error getting audio stream: ' + err);
499
+ });
500
+
501
+ var interval = setInterval(function () {
502
+ if (!doRecording) {
503
+ clearInterval(interval);
504
+ mediaRecorder.stop();
505
+ stream.getTracks().forEach(function (track) {
506
+ track.stop();
507
+ });
508
+ }
509
 
510
+ document.getElementById('progress-bar').style.width = (100 * (Date.now() - startTime) / 1000 / kMaxAudio_s) + '%';
511
+ document.getElementById('progress-text').innerHTML = (100 * (Date.now() - startTime) / 1000 / kMaxAudio_s).toFixed(0) + '%';
512
+ }, 1000);
513
 
514
+ printTextarea('js: recording ...');
515
 
516
+ setTimeout(function () {
517
+ if (doRecording) {
518
+ printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
519
+ stopRecording();
520
+ }
521
+ }, kMaxAudio_s * 1000);
522
+ }
523
 
524
+ //
525
+ // transcribe
526
+ //
527
 
528
+ function onProcess(translate) {
529
+ if (!instance) {
530
+ instance = Module.init('whisper.bin');
531
 
532
+ if (instance) {
533
+ printTextarea("js: whisper initialized, instance: " + instance);
534
+ document.getElementById('model').innerHTML = 'Model loaded: ' + model_whisper;
 
535
  }
536
+ }
537
 
538
+ if (!instance) {
539
+ printTextarea("js: failed to initialize whisper");
540
+ return;
541
+ }
542
 
543
+ if (!audio) {
544
+ printTextarea("js: no audio data");
545
+ return;
546
+ }
547
 
548
+ if (instance) {
549
+ printTextarea('');
550
+ printTextarea('js: processing - this might take a while ...');
551
+ printTextarea('');
552
+
553
+ setTimeout(function () {
554
+ var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
555
+ console.log('js: full_default returned: ' + ret);
556
+ if (ret) {
557
+ printTextarea("js: whisper returned: " + ret);
558
+ }
559
+ }, 100);
 
560
  }
561
+ }
562
+ </script>
563
+ <script type="text/javascript" src="main.js"></script>
564
+ </body>
565
+
566
+ </html>