Initial commit
Browse files- build/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/.DS_Store +0 -0
- build/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.no-bundler.js +77 -0
- build/wasm_speech_streaming.d.ts +66 -0
- build/wasm_speech_streaming.js +398 -0
- build/wasm_speech_streaming_bg.wasm +3 -0
- build/wasm_speech_streaming_bg.wasm.d.ts +21 -0
- css/tailwind-3.4.17.js +0 -0
- index.html +298 -18
- moshiWorker.js +125 -0
build/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
build/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.no-bundler.js
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright 2022 Google Inc. All Rights Reserved.
|
3 |
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
* you may not use this file except in compliance with the License.
|
5 |
+
* You may obtain a copy of the License at
|
6 |
+
* http://www.apache.org/licenses/LICENSE-2.0
|
7 |
+
* Unless required by applicable law or agreed to in writing, software
|
8 |
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
9 |
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10 |
+
* See the License for the specific language governing permissions and
|
11 |
+
* limitations under the License.
|
12 |
+
*/
|
13 |
+
|
14 |
+
// This file is kept similar to workerHelpers.js, but intended to be used in
|
15 |
+
// a bundlerless ES module environment (which has a few differences).
|
16 |
+
|
17 |
+
function waitForMsgType(target, type) {
|
18 |
+
return new Promise(resolve => {
|
19 |
+
target.addEventListener('message', function onMsg({ data }) {
|
20 |
+
if (data == null || data.type !== type) return;
|
21 |
+
target.removeEventListener('message', onMsg);
|
22 |
+
resolve(data);
|
23 |
+
});
|
24 |
+
});
|
25 |
+
}
|
26 |
+
|
27 |
+
// We need to wait for a specific message because this file is used both
|
28 |
+
// as a Worker and as a regular script, so it might receive unrelated
|
29 |
+
// messages on the page.
|
30 |
+
waitForMsgType(self, 'wasm_bindgen_worker_init').then(async data => {
|
31 |
+
const pkg = await import(data.mainJS);
|
32 |
+
await pkg.default(data.module, data.memory);
|
33 |
+
postMessage({ type: 'wasm_bindgen_worker_ready' });
|
34 |
+
pkg.wbg_rayon_start_worker(data.receiver);
|
35 |
+
});
|
36 |
+
|
37 |
+
// Note: this is never used, but necessary to prevent a bug in Firefox
|
38 |
+
// (https://bugzilla.mozilla.org/show_bug.cgi?id=1702191) where it collects
|
39 |
+
// Web Workers that have a shared WebAssembly memory with the main thread,
|
40 |
+
// but are not explicitly rooted via a `Worker` instance.
|
41 |
+
//
|
42 |
+
// By storing them in a variable, we can keep `Worker` objects around and
|
43 |
+
// prevent them from getting GC-d.
|
44 |
+
let _workers;
|
45 |
+
|
46 |
+
export async function startWorkers(module, memory, builder) {
|
47 |
+
if (builder.numThreads() === 0) {
|
48 |
+
throw new Error(`num_threads must be > 0.`);
|
49 |
+
}
|
50 |
+
|
51 |
+
const workerInit = {
|
52 |
+
type: 'wasm_bindgen_worker_init',
|
53 |
+
module,
|
54 |
+
memory,
|
55 |
+
receiver: builder.receiver(),
|
56 |
+
mainJS: builder.mainJS()
|
57 |
+
};
|
58 |
+
|
59 |
+
_workers = await Promise.all(
|
60 |
+
Array.from({ length: builder.numThreads() }, async () => {
|
61 |
+
// Self-spawn into a new Worker.
|
62 |
+
// The script is fetched as a blob so it works even if this script is
|
63 |
+
// hosted remotely (e.g. on a CDN). This avoids a cross-origin
|
64 |
+
// security error.
|
65 |
+
let scriptBlob = await fetch(import.meta.url).then(r => r.blob());
|
66 |
+
let url = URL.createObjectURL(scriptBlob);
|
67 |
+
const worker = new Worker(url, {
|
68 |
+
type: 'module'
|
69 |
+
});
|
70 |
+
worker.postMessage(workerInit);
|
71 |
+
await waitForMsgType(worker, 'wasm_bindgen_worker_ready');
|
72 |
+
URL.revokeObjectURL(url);
|
73 |
+
return worker;
|
74 |
+
})
|
75 |
+
);
|
76 |
+
builder.build();
|
77 |
+
}
|
build/wasm_speech_streaming.d.ts
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* tslint:disable */
|
2 |
+
/* eslint-disable */
|
3 |
+
export function wbg_rayon_start_worker(receiver: number): void;
|
4 |
+
export function initThreadPool(num_threads: number): Promise<any>;
|
5 |
+
export class MoshiASRDecoder {
|
6 |
+
free(): void;
|
7 |
+
stop_streaming(): void;
|
8 |
+
start_streaming(): void;
|
9 |
+
process_audio_chunk(audio_data: Float32Array, callback: Function): void;
|
10 |
+
constructor(weights: Uint8Array, tokenizer: Uint8Array, mimi: Uint8Array, config: Uint8Array);
|
11 |
+
}
|
12 |
+
export class wbg_rayon_PoolBuilder {
|
13 |
+
private constructor();
|
14 |
+
free(): void;
|
15 |
+
numThreads(): number;
|
16 |
+
build(): void;
|
17 |
+
mainJS(): string;
|
18 |
+
receiver(): number;
|
19 |
+
}
|
20 |
+
|
21 |
+
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
|
22 |
+
|
23 |
+
export interface InitOutput {
|
24 |
+
readonly __wbg_moshiasrdecoder_free: (a: number, b: number) => void;
|
25 |
+
readonly moshiasrdecoder_new: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => number;
|
26 |
+
readonly moshiasrdecoder_process_audio_chunk: (a: number, b: number, c: number, d: any) => void;
|
27 |
+
readonly moshiasrdecoder_start_streaming: (a: number) => void;
|
28 |
+
readonly moshiasrdecoder_stop_streaming: (a: number) => void;
|
29 |
+
readonly __wbg_wbg_rayon_poolbuilder_free: (a: number, b: number) => void;
|
30 |
+
readonly wbg_rayon_poolbuilder_build: (a: number) => void;
|
31 |
+
readonly wbg_rayon_poolbuilder_mainJS: (a: number) => any;
|
32 |
+
readonly wbg_rayon_poolbuilder_numThreads: (a: number) => number;
|
33 |
+
readonly wbg_rayon_poolbuilder_receiver: (a: number) => number;
|
34 |
+
readonly wbg_rayon_start_worker: (a: number) => void;
|
35 |
+
readonly initThreadPool: (a: number) => any;
|
36 |
+
readonly __wbindgen_exn_store: (a: number) => void;
|
37 |
+
readonly __externref_table_alloc: () => number;
|
38 |
+
readonly __wbindgen_export_2: WebAssembly.Table;
|
39 |
+
readonly memory: WebAssembly.Memory;
|
40 |
+
readonly __wbindgen_malloc: (a: number, b: number) => number;
|
41 |
+
readonly __wbindgen_thread_destroy: (a?: number, b?: number, c?: number) => void;
|
42 |
+
readonly __wbindgen_start: (a: number) => void;
|
43 |
+
}
|
44 |
+
|
45 |
+
export type SyncInitInput = BufferSource | WebAssembly.Module;
|
46 |
+
/**
|
47 |
+
* Instantiates the given `module`, which can either be bytes or
|
48 |
+
* a precompiled `WebAssembly.Module`.
|
49 |
+
*
|
50 |
+
* @param {{ module: SyncInitInput, memory?: WebAssembly.Memory, thread_stack_size?: number }} module - Passing `SyncInitInput` directly is deprecated.
|
51 |
+
* @param {WebAssembly.Memory} memory - Deprecated.
|
52 |
+
*
|
53 |
+
* @returns {InitOutput}
|
54 |
+
*/
|
55 |
+
export function initSync(module: { module: SyncInitInput, memory?: WebAssembly.Memory, thread_stack_size?: number } | SyncInitInput, memory?: WebAssembly.Memory): InitOutput;
|
56 |
+
|
57 |
+
/**
|
58 |
+
* If `module_or_path` is {RequestInfo} or {URL}, makes a request and
|
59 |
+
* for everything else, calls `WebAssembly.instantiate` directly.
|
60 |
+
*
|
61 |
+
* @param {{ module_or_path: InitInput | Promise<InitInput>, memory?: WebAssembly.Memory, thread_stack_size?: number }} module_or_path - Passing `InitInput` directly is deprecated.
|
62 |
+
* @param {WebAssembly.Memory} memory - Deprecated.
|
63 |
+
*
|
64 |
+
* @returns {Promise<InitOutput>}
|
65 |
+
*/
|
66 |
+
export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput>, memory?: WebAssembly.Memory, thread_stack_size?: number } | InitInput | Promise<InitInput>, memory?: WebAssembly.Memory): Promise<InitOutput>;
|
build/wasm_speech_streaming.js
ADDED
@@ -0,0 +1,398 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { startWorkers } from './snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.no-bundler.js';
|
2 |
+
|
3 |
+
let wasm;
|
4 |
+
|
5 |
+
function addToExternrefTable0(obj) {
|
6 |
+
const idx = wasm.__externref_table_alloc();
|
7 |
+
wasm.__wbindgen_export_2.set(idx, obj);
|
8 |
+
return idx;
|
9 |
+
}
|
10 |
+
|
11 |
+
function handleError(f, args) {
|
12 |
+
try {
|
13 |
+
return f.apply(this, args);
|
14 |
+
} catch (e) {
|
15 |
+
const idx = addToExternrefTable0(e);
|
16 |
+
wasm.__wbindgen_exn_store(idx);
|
17 |
+
}
|
18 |
+
}
|
19 |
+
|
20 |
+
const cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
|
21 |
+
|
22 |
+
if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); };
|
23 |
+
|
24 |
+
let cachedUint8ArrayMemory0 = null;
|
25 |
+
|
26 |
+
function getUint8ArrayMemory0() {
|
27 |
+
if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.buffer !== wasm.memory.buffer) {
|
28 |
+
cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
|
29 |
+
}
|
30 |
+
return cachedUint8ArrayMemory0;
|
31 |
+
}
|
32 |
+
|
33 |
+
function getStringFromWasm0(ptr, len) {
|
34 |
+
ptr = ptr >>> 0;
|
35 |
+
return cachedTextDecoder.decode(getUint8ArrayMemory0().slice(ptr, ptr + len));
|
36 |
+
}
|
37 |
+
|
38 |
+
function isLikeNone(x) {
|
39 |
+
return x === undefined || x === null;
|
40 |
+
}
|
41 |
+
|
42 |
+
let cachedFloat32ArrayMemory0 = null;
|
43 |
+
|
44 |
+
function getFloat32ArrayMemory0() {
|
45 |
+
if (cachedFloat32ArrayMemory0 === null || cachedFloat32ArrayMemory0.buffer !== wasm.memory.buffer) {
|
46 |
+
cachedFloat32ArrayMemory0 = new Float32Array(wasm.memory.buffer);
|
47 |
+
}
|
48 |
+
return cachedFloat32ArrayMemory0;
|
49 |
+
}
|
50 |
+
|
51 |
+
let WASM_VECTOR_LEN = 0;
|
52 |
+
|
53 |
+
function passArrayF32ToWasm0(arg, malloc) {
|
54 |
+
const ptr = malloc(arg.length * 4, 4) >>> 0;
|
55 |
+
getFloat32ArrayMemory0().set(arg, ptr / 4);
|
56 |
+
WASM_VECTOR_LEN = arg.length;
|
57 |
+
return ptr;
|
58 |
+
}
|
59 |
+
|
60 |
+
function passArray8ToWasm0(arg, malloc) {
|
61 |
+
const ptr = malloc(arg.length * 1, 1) >>> 0;
|
62 |
+
getUint8ArrayMemory0().set(arg, ptr / 1);
|
63 |
+
WASM_VECTOR_LEN = arg.length;
|
64 |
+
return ptr;
|
65 |
+
}
|
66 |
+
/**
|
67 |
+
* @param {number} receiver
|
68 |
+
*/
|
69 |
+
export function wbg_rayon_start_worker(receiver) {
|
70 |
+
wasm.wbg_rayon_start_worker(receiver);
|
71 |
+
}
|
72 |
+
|
73 |
+
/**
|
74 |
+
* @param {number} num_threads
|
75 |
+
* @returns {Promise<any>}
|
76 |
+
*/
|
77 |
+
export function initThreadPool(num_threads) {
|
78 |
+
const ret = wasm.initThreadPool(num_threads);
|
79 |
+
return ret;
|
80 |
+
}
|
81 |
+
|
82 |
+
const MoshiASRDecoderFinalization = (typeof FinalizationRegistry === 'undefined')
|
83 |
+
? { register: () => {}, unregister: () => {} }
|
84 |
+
: new FinalizationRegistry(ptr => wasm.__wbg_moshiasrdecoder_free(ptr >>> 0, 1));
|
85 |
+
|
86 |
+
export class MoshiASRDecoder {
|
87 |
+
|
88 |
+
__destroy_into_raw() {
|
89 |
+
const ptr = this.__wbg_ptr;
|
90 |
+
this.__wbg_ptr = 0;
|
91 |
+
MoshiASRDecoderFinalization.unregister(this);
|
92 |
+
return ptr;
|
93 |
+
}
|
94 |
+
|
95 |
+
free() {
|
96 |
+
const ptr = this.__destroy_into_raw();
|
97 |
+
wasm.__wbg_moshiasrdecoder_free(ptr, 0);
|
98 |
+
}
|
99 |
+
stop_streaming() {
|
100 |
+
wasm.moshiasrdecoder_stop_streaming(this.__wbg_ptr);
|
101 |
+
}
|
102 |
+
start_streaming() {
|
103 |
+
wasm.moshiasrdecoder_start_streaming(this.__wbg_ptr);
|
104 |
+
}
|
105 |
+
/**
|
106 |
+
* @param {Float32Array} audio_data
|
107 |
+
* @param {Function} callback
|
108 |
+
*/
|
109 |
+
process_audio_chunk(audio_data, callback) {
|
110 |
+
const ptr0 = passArrayF32ToWasm0(audio_data, wasm.__wbindgen_malloc);
|
111 |
+
const len0 = WASM_VECTOR_LEN;
|
112 |
+
wasm.moshiasrdecoder_process_audio_chunk(this.__wbg_ptr, ptr0, len0, callback);
|
113 |
+
}
|
114 |
+
/**
|
115 |
+
* @param {Uint8Array} weights
|
116 |
+
* @param {Uint8Array} tokenizer
|
117 |
+
* @param {Uint8Array} mimi
|
118 |
+
* @param {Uint8Array} config
|
119 |
+
*/
|
120 |
+
constructor(weights, tokenizer, mimi, config) {
|
121 |
+
const ptr0 = passArray8ToWasm0(weights, wasm.__wbindgen_malloc);
|
122 |
+
const len0 = WASM_VECTOR_LEN;
|
123 |
+
const ptr1 = passArray8ToWasm0(tokenizer, wasm.__wbindgen_malloc);
|
124 |
+
const len1 = WASM_VECTOR_LEN;
|
125 |
+
const ptr2 = passArray8ToWasm0(mimi, wasm.__wbindgen_malloc);
|
126 |
+
const len2 = WASM_VECTOR_LEN;
|
127 |
+
const ptr3 = passArray8ToWasm0(config, wasm.__wbindgen_malloc);
|
128 |
+
const len3 = WASM_VECTOR_LEN;
|
129 |
+
const ret = wasm.moshiasrdecoder_new(ptr0, len0, ptr1, len1, ptr2, len2, ptr3, len3);
|
130 |
+
this.__wbg_ptr = ret >>> 0;
|
131 |
+
MoshiASRDecoderFinalization.register(this, this.__wbg_ptr, this);
|
132 |
+
return this;
|
133 |
+
}
|
134 |
+
}
|
135 |
+
|
136 |
+
const wbg_rayon_PoolBuilderFinalization = (typeof FinalizationRegistry === 'undefined')
|
137 |
+
? { register: () => {}, unregister: () => {} }
|
138 |
+
: new FinalizationRegistry(ptr => wasm.__wbg_wbg_rayon_poolbuilder_free(ptr >>> 0, 1));
|
139 |
+
|
140 |
+
export class wbg_rayon_PoolBuilder {
|
141 |
+
|
142 |
+
static __wrap(ptr) {
|
143 |
+
ptr = ptr >>> 0;
|
144 |
+
const obj = Object.create(wbg_rayon_PoolBuilder.prototype);
|
145 |
+
obj.__wbg_ptr = ptr;
|
146 |
+
wbg_rayon_PoolBuilderFinalization.register(obj, obj.__wbg_ptr, obj);
|
147 |
+
return obj;
|
148 |
+
}
|
149 |
+
|
150 |
+
__destroy_into_raw() {
|
151 |
+
const ptr = this.__wbg_ptr;
|
152 |
+
this.__wbg_ptr = 0;
|
153 |
+
wbg_rayon_PoolBuilderFinalization.unregister(this);
|
154 |
+
return ptr;
|
155 |
+
}
|
156 |
+
|
157 |
+
free() {
|
158 |
+
const ptr = this.__destroy_into_raw();
|
159 |
+
wasm.__wbg_wbg_rayon_poolbuilder_free(ptr, 0);
|
160 |
+
}
|
161 |
+
/**
|
162 |
+
* @returns {number}
|
163 |
+
*/
|
164 |
+
numThreads() {
|
165 |
+
const ret = wasm.wbg_rayon_poolbuilder_numThreads(this.__wbg_ptr);
|
166 |
+
return ret >>> 0;
|
167 |
+
}
|
168 |
+
build() {
|
169 |
+
wasm.wbg_rayon_poolbuilder_build(this.__wbg_ptr);
|
170 |
+
}
|
171 |
+
/**
|
172 |
+
* @returns {string}
|
173 |
+
*/
|
174 |
+
mainJS() {
|
175 |
+
const ret = wasm.wbg_rayon_poolbuilder_mainJS(this.__wbg_ptr);
|
176 |
+
return ret;
|
177 |
+
}
|
178 |
+
/**
|
179 |
+
* @returns {number}
|
180 |
+
*/
|
181 |
+
receiver() {
|
182 |
+
const ret = wasm.wbg_rayon_poolbuilder_receiver(this.__wbg_ptr);
|
183 |
+
return ret >>> 0;
|
184 |
+
}
|
185 |
+
}
|
186 |
+
|
187 |
+
async function __wbg_load(module, imports) {
|
188 |
+
if (typeof Response === 'function' && module instanceof Response) {
|
189 |
+
if (typeof WebAssembly.instantiateStreaming === 'function') {
|
190 |
+
try {
|
191 |
+
return await WebAssembly.instantiateStreaming(module, imports);
|
192 |
+
|
193 |
+
} catch (e) {
|
194 |
+
if (module.headers.get('Content-Type') != 'application/wasm') {
|
195 |
+
console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
|
196 |
+
|
197 |
+
} else {
|
198 |
+
throw e;
|
199 |
+
}
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
const bytes = await module.arrayBuffer();
|
204 |
+
return await WebAssembly.instantiate(bytes, imports);
|
205 |
+
|
206 |
+
} else {
|
207 |
+
const instance = await WebAssembly.instantiate(module, imports);
|
208 |
+
|
209 |
+
if (instance instanceof WebAssembly.Instance) {
|
210 |
+
return { instance, module };
|
211 |
+
|
212 |
+
} else {
|
213 |
+
return instance;
|
214 |
+
}
|
215 |
+
}
|
216 |
+
}
|
217 |
+
|
218 |
+
function __wbg_get_imports() {
|
219 |
+
const imports = {};
|
220 |
+
imports.wbg = {};
|
221 |
+
imports.wbg.__wbg_buffer_609cc3eee51ed158 = function(arg0) {
|
222 |
+
const ret = arg0.buffer;
|
223 |
+
return ret;
|
224 |
+
};
|
225 |
+
imports.wbg.__wbg_call_672a4d21634d4a24 = function() { return handleError(function (arg0, arg1) {
|
226 |
+
const ret = arg0.call(arg1);
|
227 |
+
return ret;
|
228 |
+
}, arguments) };
|
229 |
+
imports.wbg.__wbg_call_7cccdd69e0791ae2 = function() { return handleError(function (arg0, arg1, arg2) {
|
230 |
+
const ret = arg0.call(arg1, arg2);
|
231 |
+
return ret;
|
232 |
+
}, arguments) };
|
233 |
+
imports.wbg.__wbg_getRandomValues_80578b2ff2a093ba = function() { return handleError(function (arg0) {
|
234 |
+
globalThis.crypto.getRandomValues(arg0);
|
235 |
+
}, arguments) };
|
236 |
+
imports.wbg.__wbg_instanceof_Window_def73ea0955fc569 = function(arg0) {
|
237 |
+
let result;
|
238 |
+
try {
|
239 |
+
result = arg0 instanceof Window;
|
240 |
+
} catch (_) {
|
241 |
+
result = false;
|
242 |
+
}
|
243 |
+
const ret = result;
|
244 |
+
return ret;
|
245 |
+
};
|
246 |
+
imports.wbg.__wbg_length_a446193dc22c12f8 = function(arg0) {
|
247 |
+
const ret = arg0.length;
|
248 |
+
return ret;
|
249 |
+
};
|
250 |
+
imports.wbg.__wbg_log_8b4e426889933567 = function(arg0, arg1) {
|
251 |
+
console.log(getStringFromWasm0(arg0, arg1));
|
252 |
+
};
|
253 |
+
imports.wbg.__wbg_new_a12002a7f91c75be = function(arg0) {
|
254 |
+
const ret = new Uint8Array(arg0);
|
255 |
+
return ret;
|
256 |
+
};
|
257 |
+
imports.wbg.__wbg_newnoargs_105ed471475aaf50 = function(arg0, arg1) {
|
258 |
+
const ret = new Function(getStringFromWasm0(arg0, arg1));
|
259 |
+
return ret;
|
260 |
+
};
|
261 |
+
imports.wbg.__wbg_newwithlength_a381634e90c276d4 = function(arg0) {
|
262 |
+
const ret = new Uint8Array(arg0 >>> 0);
|
263 |
+
return ret;
|
264 |
+
};
|
265 |
+
imports.wbg.__wbg_set_65595bdd868b3009 = function(arg0, arg1, arg2) {
|
266 |
+
arg0.set(arg1, arg2 >>> 0);
|
267 |
+
};
|
268 |
+
imports.wbg.__wbg_startWorkers_2329d931beb7bef4 = function(arg0, arg1, arg2) {
|
269 |
+
const ret = startWorkers(arg0, arg1, wbg_rayon_PoolBuilder.__wrap(arg2));
|
270 |
+
return ret;
|
271 |
+
};
|
272 |
+
imports.wbg.__wbg_static_accessor_GLOBAL_88a902d13a557d07 = function() {
|
273 |
+
const ret = typeof global === 'undefined' ? null : global;
|
274 |
+
return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
|
275 |
+
};
|
276 |
+
imports.wbg.__wbg_static_accessor_GLOBAL_THIS_56578be7e9f832b0 = function() {
|
277 |
+
const ret = typeof globalThis === 'undefined' ? null : globalThis;
|
278 |
+
return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
|
279 |
+
};
|
280 |
+
imports.wbg.__wbg_static_accessor_SELF_37c5d418e4bf5819 = function() {
|
281 |
+
const ret = typeof self === 'undefined' ? null : self;
|
282 |
+
return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
|
283 |
+
};
|
284 |
+
imports.wbg.__wbg_static_accessor_URL_151cb8815849ce83 = function() {
|
285 |
+
const ret = import.meta.url;
|
286 |
+
return ret;
|
287 |
+
};
|
288 |
+
imports.wbg.__wbg_static_accessor_WINDOW_5de37043a91a9c40 = function() {
|
289 |
+
const ret = typeof window === 'undefined' ? null : window;
|
290 |
+
return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
|
291 |
+
};
|
292 |
+
imports.wbg.__wbg_subarray_aa9065fa9dc5df96 = function(arg0, arg1, arg2) {
|
293 |
+
const ret = arg0.subarray(arg1 >>> 0, arg2 >>> 0);
|
294 |
+
return ret;
|
295 |
+
};
|
296 |
+
imports.wbg.__wbindgen_init_externref_table = function() {
|
297 |
+
const table = wasm.__wbindgen_export_2;
|
298 |
+
const offset = table.grow(4);
|
299 |
+
table.set(0, undefined);
|
300 |
+
table.set(offset + 0, undefined);
|
301 |
+
table.set(offset + 1, null);
|
302 |
+
table.set(offset + 2, true);
|
303 |
+
table.set(offset + 3, false);
|
304 |
+
;
|
305 |
+
};
|
306 |
+
imports.wbg.__wbindgen_is_undefined = function(arg0) {
|
307 |
+
const ret = arg0 === undefined;
|
308 |
+
return ret;
|
309 |
+
};
|
310 |
+
imports.wbg.__wbindgen_memory = function() {
|
311 |
+
const ret = wasm.memory;
|
312 |
+
return ret;
|
313 |
+
};
|
314 |
+
imports.wbg.__wbindgen_module = function() {
|
315 |
+
const ret = __wbg_init.__wbindgen_wasm_module;
|
316 |
+
return ret;
|
317 |
+
};
|
318 |
+
imports.wbg.__wbindgen_string_new = function(arg0, arg1) {
|
319 |
+
const ret = getStringFromWasm0(arg0, arg1);
|
320 |
+
return ret;
|
321 |
+
};
|
322 |
+
imports.wbg.__wbindgen_throw = function(arg0, arg1) {
|
323 |
+
throw new Error(getStringFromWasm0(arg0, arg1));
|
324 |
+
};
|
325 |
+
|
326 |
+
return imports;
|
327 |
+
}
|
328 |
+
|
329 |
+
function __wbg_init_memory(imports, memory) {
|
330 |
+
imports.wbg.memory = memory || new WebAssembly.Memory({initial:29,maximum:65536,shared:true});
|
331 |
+
}
|
332 |
+
|
333 |
+
function __wbg_finalize_init(instance, module, thread_stack_size) {
|
334 |
+
wasm = instance.exports;
|
335 |
+
__wbg_init.__wbindgen_wasm_module = module;
|
336 |
+
cachedFloat32ArrayMemory0 = null;
|
337 |
+
cachedUint8ArrayMemory0 = null;
|
338 |
+
|
339 |
+
if (typeof thread_stack_size !== 'undefined' && (typeof thread_stack_size !== 'number' || thread_stack_size === 0 || thread_stack_size % 65536 !== 0)) { throw 'invalid stack size' }
|
340 |
+
wasm.__wbindgen_start(thread_stack_size);
|
341 |
+
return wasm;
|
342 |
+
}
|
343 |
+
|
344 |
+
function initSync(module, memory) {
|
345 |
+
if (wasm !== undefined) return wasm;
|
346 |
+
|
347 |
+
let thread_stack_size
|
348 |
+
if (typeof module !== 'undefined') {
|
349 |
+
if (Object.getPrototypeOf(module) === Object.prototype) {
|
350 |
+
({module, memory, thread_stack_size} = module)
|
351 |
+
} else {
|
352 |
+
console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
|
353 |
+
}
|
354 |
+
}
|
355 |
+
|
356 |
+
const imports = __wbg_get_imports();
|
357 |
+
|
358 |
+
__wbg_init_memory(imports, memory);
|
359 |
+
|
360 |
+
if (!(module instanceof WebAssembly.Module)) {
|
361 |
+
module = new WebAssembly.Module(module);
|
362 |
+
}
|
363 |
+
|
364 |
+
const instance = new WebAssembly.Instance(module, imports);
|
365 |
+
|
366 |
+
return __wbg_finalize_init(instance, module, thread_stack_size);
|
367 |
+
}
|
368 |
+
|
369 |
+
async function __wbg_init(module_or_path, memory) {
|
370 |
+
if (wasm !== undefined) return wasm;
|
371 |
+
|
372 |
+
let thread_stack_size
|
373 |
+
if (typeof module_or_path !== 'undefined') {
|
374 |
+
if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
|
375 |
+
({module_or_path, memory, thread_stack_size} = module_or_path)
|
376 |
+
} else {
|
377 |
+
console.warn('using deprecated parameters for the initialization function; pass a single object instead')
|
378 |
+
}
|
379 |
+
}
|
380 |
+
|
381 |
+
if (typeof module_or_path === 'undefined') {
|
382 |
+
module_or_path = new URL('wasm_speech_streaming_bg.wasm', import.meta.url);
|
383 |
+
}
|
384 |
+
const imports = __wbg_get_imports();
|
385 |
+
|
386 |
+
if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
|
387 |
+
module_or_path = fetch(module_or_path);
|
388 |
+
}
|
389 |
+
|
390 |
+
__wbg_init_memory(imports, memory);
|
391 |
+
|
392 |
+
const { instance, module } = await __wbg_load(await module_or_path, imports);
|
393 |
+
|
394 |
+
return __wbg_finalize_init(instance, module, thread_stack_size);
|
395 |
+
}
|
396 |
+
|
397 |
+
export { initSync };
|
398 |
+
export default __wbg_init;
|
build/wasm_speech_streaming_bg.wasm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e8b263e170f1e2308ed6c0e6f7e175f7aedf7d3d98d90dfa853a54a3019a320
|
3 |
+
size 4353046
|
build/wasm_speech_streaming_bg.wasm.d.ts
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* tslint:disable */
|
2 |
+
/* eslint-disable */
|
3 |
+
export const __wbg_moshiasrdecoder_free: (a: number, b: number) => void;
|
4 |
+
export const moshiasrdecoder_new: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => number;
|
5 |
+
export const moshiasrdecoder_process_audio_chunk: (a: number, b: number, c: number, d: any) => void;
|
6 |
+
export const moshiasrdecoder_start_streaming: (a: number) => void;
|
7 |
+
export const moshiasrdecoder_stop_streaming: (a: number) => void;
|
8 |
+
export const __wbg_wbg_rayon_poolbuilder_free: (a: number, b: number) => void;
|
9 |
+
export const wbg_rayon_poolbuilder_build: (a: number) => void;
|
10 |
+
export const wbg_rayon_poolbuilder_mainJS: (a: number) => any;
|
11 |
+
export const wbg_rayon_poolbuilder_numThreads: (a: number) => number;
|
12 |
+
export const wbg_rayon_poolbuilder_receiver: (a: number) => number;
|
13 |
+
export const wbg_rayon_start_worker: (a: number) => void;
|
14 |
+
export const initThreadPool: (a: number) => any;
|
15 |
+
export const __wbindgen_exn_store: (a: number) => void;
|
16 |
+
export const __externref_table_alloc: () => number;
|
17 |
+
export const __wbindgen_export_2: WebAssembly.Table;
|
18 |
+
export const memory: WebAssembly.Memory;
|
19 |
+
export const __wbindgen_malloc: (a: number, b: number) => number;
|
20 |
+
export const __wbindgen_thread_destroy: (a?: number, b?: number, c?: number) => void;
|
21 |
+
export const __wbindgen_start: (a: number) => void;
|
css/tailwind-3.4.17.js
ADDED
The diff for this file is too large to render.
See raw diff
|
|
index.html
CHANGED
@@ -1,19 +1,299 @@
|
|
1 |
-
<!
|
2 |
<html>
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
<html>
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8" />
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
6 |
+
<title>WASM Streaming Speech Recognition</title>
|
7 |
+
<style>
|
8 |
+
@import url("https://fonts.googleapis.com/css2?family=Source+Sans+3:wght@300;400;600;700&display=swap");
|
9 |
+
html, body { font-family: "Source Sans 3", system-ui, -apple-system, Segoe UI, Roboto, sans-serif; }
|
10 |
+
</style>
|
11 |
+
<script src="css/tailwind-3.4.17.js"></script>
|
12 |
+
<script type="module">
|
13 |
+
const MODEL_ID = "moshi_1b_en_fr_q4k";
|
14 |
+
const WEIGHTS_URL = "https://huggingface.co/efficient-nlp/stt-1b-en_fr-quantized/resolve/main/model-q4k.gguf";
|
15 |
+
const MIMI_URL = "https://huggingface.co/efficient-nlp/stt-1b-en_fr-quantized/resolve/main/mimi-pytorch-e351c8d8@125.safetensors";
|
16 |
+
const TOKENIZER_URL = "https://huggingface.co/efficient-nlp/stt-1b-en_fr-quantized/resolve/main/tokenizer_en_fr_audio_8000.json";
|
17 |
+
const CONFIG_URL = "https://huggingface.co/efficient-nlp/stt-1b-en_fr-quantized/resolve/main/config.json";
|
18 |
+
|
19 |
+
const moshiWorker = new Worker("./moshiWorker.js", { type: "module" });
|
20 |
+
let mediaRecorder = null;
|
21 |
+
let audioChunks = [];
|
22 |
+
let isRecording = false;
|
23 |
+
let audioStream = null;
|
24 |
+
let audioContext = null;
|
25 |
+
let processor = null;
|
26 |
+
let source = null;
|
27 |
+
let modelInitialized = false;
|
28 |
+
let pendingStart = false;
|
29 |
+
|
30 |
+
// Performance tracking
|
31 |
+
let audioChunksProcessed = 0;
|
32 |
+
let sessionStartTime = 0;
|
33 |
+
|
34 |
+
function updateStatusDiv(message) {
|
35 |
+
document.querySelector("#status-div").textContent = message;
|
36 |
+
}
|
37 |
+
|
38 |
+
function updateDiagnostics() {
|
39 |
+
const diagnostics = document.querySelector("#diagnostics");
|
40 |
+
if (!diagnostics) return;
|
41 |
+
|
42 |
+
const cpuCount = navigator.hardwareConcurrency || 'unknown';
|
43 |
+
|
44 |
+
// Only update metrics when recording, otherwise show final values
|
45 |
+
if (isRecording && sessionStartTime) {
|
46 |
+
// Calculate real-time factor (audio processed / wall clock time)
|
47 |
+
// >1 = faster than real-time, <1 = slower than real-time
|
48 |
+
const audioProcessed = audioChunksProcessed * (1024 / 24000);
|
49 |
+
const audioSessionDuration = (Date.now() - sessionStartTime) / 1000;
|
50 |
+
const realTimeFactor = audioSessionDuration > 0 ? (audioProcessed / audioSessionDuration) : 0;
|
51 |
+
|
52 |
+
// Color code based on performance
|
53 |
+
let factorColor = '';
|
54 |
+
if (realTimeFactor >= 0.95) {
|
55 |
+
factorColor = 'text-green-600';
|
56 |
+
} else if (realTimeFactor >= 0.8) {
|
57 |
+
factorColor = 'text-yellow-600';
|
58 |
+
}
|
59 |
+
else {
|
60 |
+
factorColor = 'text-red-600';
|
61 |
+
}
|
62 |
+
|
63 |
+
diagnostics.innerHTML = `CPUs: ${cpuCount}, Real-time factor: <span class="${factorColor}">${realTimeFactor.toFixed(2)}x</span>, Duration: ${audioSessionDuration.toFixed(1)}s`;
|
64 |
+
} else if (!sessionStartTime) {
|
65 |
+
diagnostics.innerHTML = `CPUs: ${cpuCount}, Real-time factor: <span class="text-gray-600">0.00x</span>, Duration: 0.0s`;
|
66 |
+
}
|
67 |
+
}
|
68 |
+
|
69 |
+
window.addEventListener('load', updateDiagnostics);
|
70 |
+
setInterval(updateDiagnostics, 200);
|
71 |
+
|
72 |
+
function initializeModel() {
|
73 |
+
if (modelInitialized) return;
|
74 |
+
|
75 |
+
const button = document.querySelector("#speech-button");
|
76 |
+
button.disabled = true;
|
77 |
+
button.className = "bg-gray-400 text-gray-700 font-normal py-2 px-4 rounded cursor-not-allowed";
|
78 |
+
|
79 |
+
moshiWorker.postMessage({
|
80 |
+
command: "initialize",
|
81 |
+
weightsURL: WEIGHTS_URL,
|
82 |
+
modelID: MODEL_ID,
|
83 |
+
mimiURL: MIMI_URL,
|
84 |
+
tokenizerURL: TOKENIZER_URL,
|
85 |
+
configURL: CONFIG_URL,
|
86 |
+
});
|
87 |
+
}
|
88 |
+
|
89 |
+
// Handle messages from worker
|
90 |
+
moshiWorker.addEventListener("message", async (event) => {
|
91 |
+
const data = event.data;
|
92 |
+
if (data.status === "model_ready") {
|
93 |
+
modelInitialized = true;
|
94 |
+
updateStatusDiv("Model loaded - Ready to start");
|
95 |
+
|
96 |
+
const button = document.querySelector("#speech-button");
|
97 |
+
button.disabled = false;
|
98 |
+
button.className = "bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 px-4 rounded";
|
99 |
+
|
100 |
+
if (pendingStart) {
|
101 |
+
pendingStart = false;
|
102 |
+
await startRecording();
|
103 |
+
}
|
104 |
+
} else if (data.status === "streaming") {
|
105 |
+
// Add new word to transcription in real-time
|
106 |
+
const outputDiv = document.querySelector("#output-generation");
|
107 |
+
const placeholder = document.querySelector("#output-placeholder");
|
108 |
+
|
109 |
+
if (placeholder) placeholder.hidden = true;
|
110 |
+
|
111 |
+
if (outputDiv.textContent) {
|
112 |
+
outputDiv.textContent += " " + data.word;
|
113 |
+
} else {
|
114 |
+
outputDiv.textContent = data.word;
|
115 |
+
}
|
116 |
+
outputDiv.hidden = false;
|
117 |
+
} else if (data.status === "chunk_processed") {
|
118 |
+
audioChunksProcessed++;
|
119 |
+
} else if (data.status === "loading") {
|
120 |
+
updateStatusDiv(data.message);
|
121 |
+
} else if (data.error) {
|
122 |
+
updateStatusDiv("Error: " + data.error);
|
123 |
+
pendingStart = false;
|
124 |
+
}
|
125 |
+
});
|
126 |
+
|
127 |
+
function updateStatus(data) {
|
128 |
+
const { status, message, word } = data;
|
129 |
+
const outputDiv = document.querySelector("#output-generation");
|
130 |
+
|
131 |
+
if (status === "loading" || status === "decoding") {
|
132 |
+
updateStatusDiv(message || (status === "loading" ? "Loading..." : "Decoding..."));
|
133 |
+
} else if (status === "streaming") {
|
134 |
+
// Add new word to the transcription in real-time
|
135 |
+
if (outputDiv.textContent) {
|
136 |
+
outputDiv.textContent += " " + word;
|
137 |
+
} else {
|
138 |
+
outputDiv.textContent = word;
|
139 |
+
}
|
140 |
+
outputDiv.hidden = false;
|
141 |
+
} else if (status === "complete") {
|
142 |
+
updateStatusDiv("Ready");
|
143 |
+
}
|
144 |
+
}
|
145 |
+
|
146 |
+
async function startMicrophone() {
|
147 |
+
try {
|
148 |
+
audioStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
149 |
+
updateStatusDiv("Microphone access granted");
|
150 |
+
|
151 |
+
audioContext = new AudioContext({ sampleRate: 24000 });
|
152 |
+
source = audioContext.createMediaStreamSource(audioStream);
|
153 |
+
|
154 |
+
processor = audioContext.createScriptProcessor(1024, 1, 1);
|
155 |
+
|
156 |
+
processor.onaudioprocess = function(event) {
|
157 |
+
if (!isRecording || !modelInitialized) return;
|
158 |
+
|
159 |
+
const inputBuffer = event.inputBuffer;
|
160 |
+
const inputData = inputBuffer.getChannelData(0);
|
161 |
+
|
162 |
+
// Send audio chunk to worker
|
163 |
+
const audioChunk = new Float32Array(inputData);
|
164 |
+
moshiWorker.postMessage({
|
165 |
+
command: "process_audio",
|
166 |
+
audioData: audioChunk
|
167 |
+
}, [audioChunk.buffer]);
|
168 |
+
};
|
169 |
+
|
170 |
+
source.connect(processor);
|
171 |
+
processor.connect(audioContext.destination);
|
172 |
+
|
173 |
+
} catch (error) {
|
174 |
+
updateStatusDiv("Microphone access denied: " + error.message);
|
175 |
+
throw error;
|
176 |
+
}
|
177 |
+
}
|
178 |
+
|
179 |
+
function stopMicrophone() {
|
180 |
+
// Disconnect audio nodes
|
181 |
+
if (processor) {
|
182 |
+
processor.disconnect();
|
183 |
+
processor = null;
|
184 |
+
}
|
185 |
+
if (source) {
|
186 |
+
source.disconnect();
|
187 |
+
source = null;
|
188 |
+
}
|
189 |
+
if (audioContext) {
|
190 |
+
audioContext.close();
|
191 |
+
audioContext = null;
|
192 |
+
}
|
193 |
+
|
194 |
+
// Stop media stream
|
195 |
+
if (audioStream) {
|
196 |
+
audioStream.getTracks().forEach(track => track.stop());
|
197 |
+
audioStream = null;
|
198 |
+
}
|
199 |
+
|
200 |
+
updateStatusDiv("Microphone stopped");
|
201 |
+
}
|
202 |
+
|
203 |
+
async function startRecording() {
|
204 |
+
const button = document.querySelector("#speech-button");
|
205 |
+
|
206 |
+
try {
|
207 |
+
updateStatusDiv("Requesting microphone access...");
|
208 |
+
await startMicrophone();
|
209 |
+
|
210 |
+
// Reset performance counters
|
211 |
+
audioChunksProcessed = 0;
|
212 |
+
sessionStartTime = Date.now();
|
213 |
+
|
214 |
+
// Start streaming session
|
215 |
+
moshiWorker.postMessage({ command: "start_stream" });
|
216 |
+
|
217 |
+
isRecording = true;
|
218 |
+
button.textContent = "Stop Speech";
|
219 |
+
button.className = "bg-red-600 hover:bg-red-700 text-white font-normal py-2 px-4 rounded";
|
220 |
+
updateStatusDiv("Listening...");
|
221 |
+
|
222 |
+
// Clear previous transcription
|
223 |
+
document.querySelector("#output-generation").textContent = "";
|
224 |
+
document.querySelector("#output-generation").hidden = true;
|
225 |
+
document.querySelector("#output-placeholder").hidden = true;
|
226 |
+
|
227 |
+
} catch (error) {
|
228 |
+
console.error('Error starting microphone:', error);
|
229 |
+
updateStatusDiv("Error: " + error.message);
|
230 |
+
pendingStart = false;
|
231 |
+
}
|
232 |
+
}
|
233 |
+
|
234 |
+
document.querySelector("#speech-button").addEventListener("click", async () => {
|
235 |
+
const button = document.querySelector("#speech-button");
|
236 |
+
|
237 |
+
if (!isRecording) {
|
238 |
+
// Check if model is ready
|
239 |
+
if (!modelInitialized) {
|
240 |
+
pendingStart = true;
|
241 |
+
initializeModel();
|
242 |
+
return;
|
243 |
+
}
|
244 |
+
|
245 |
+
await startRecording();
|
246 |
+
} else {
|
247 |
+
stopMicrophone();
|
248 |
+
|
249 |
+
// End streaming session
|
250 |
+
moshiWorker.postMessage({ command: "stop_stream" });
|
251 |
+
|
252 |
+
isRecording = false;
|
253 |
+
button.textContent = "Start Speech";
|
254 |
+
button.className = "bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 px-4 rounded";
|
255 |
+
updateStatusDiv("Ready to start");
|
256 |
+
}
|
257 |
+
});
|
258 |
+
</script>
|
259 |
+
</head>
|
260 |
+
<body class="container max-w-4xl mx-auto p-4">
|
261 |
+
<main class="grid grid-cols-1 gap-8 relative">
|
262 |
+
<div>
|
263 |
+
<h1 class="text-4xl font-bold">WASM Streaming Speech Recognition</h1>
|
264 |
+
<p class="text-gray-700">
|
265 |
+
Transcribe audio from your microphone in real time in the browser using Rust/WASM.
|
266 |
+
This demo runs entirely offline on your CPU after downloading a ~950 MB model.
|
267 |
+
It understands English and French, and uses the
|
268 |
+
<a href="https://huggingface.co/kyutai/stt-1b-en_fr" target="_blank" class="underline hover:text-blue-600">Kyutai STT model</a>
|
269 |
+
together with a WASM runtime built in
|
270 |
+
<a href="https://github.com/huggingface/candle/" target="_blank" class="underline hover:text-blue-600">Candle</a>.
|
271 |
+
</p>
|
272 |
+
</div>
|
273 |
+
|
274 |
+
<div>
|
275 |
+
<button id="speech-button" class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 px-4 rounded">
|
276 |
+
Start Speech
|
277 |
+
</button>
|
278 |
+
<div class="mt-2 text-gray-600 text-sm space-y-1">
|
279 |
+
<div>Status: <span id="status-div">Click "Start Speech" to begin</span></div>
|
280 |
+
<div id="diagnostics">CPUs: -, Real-time factor: 0.00x, Duration: 0.0s</div>
|
281 |
+
</div>
|
282 |
+
</div>
|
283 |
+
|
284 |
+
<div>
|
285 |
+
<h3 class="font-medium">Transcription:</h3>
|
286 |
+
<div class="min-h-[200px] bg-slate-100 text-gray-700 p-4 rounded-md">
|
287 |
+
<p id="output-generation" hidden></p>
|
288 |
+
<span id="output-placeholder" class="font-light text-gray-500">Click "Start Speech" to begin transcription</span>
|
289 |
+
</div>
|
290 |
+
</div>
|
291 |
+
|
292 |
+
<div class="mt-4 p-3 bg-gray-50 text-gray-700 rounded-md">
|
293 |
+
💡 This demo shows offline transcription in your browser.
|
294 |
+
For more accurate cloud transcription and real-time LLM grammar correction, check out
|
295 |
+
<a href="https://voicewriter.io" target="_blank" class="underline hover:text-blue-600">Voice Writer</a>.
|
296 |
+
</div>
|
297 |
+
</main>
|
298 |
+
</body>
|
299 |
+
</html>
|
moshiWorker.js
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import init, {
|
2 |
+
MoshiASRDecoder,
|
3 |
+
initThreadPool,
|
4 |
+
} from "./build/wasm_speech_streaming.js";
|
5 |
+
|
6 |
+
async function fetchArrayBuffer(url) {
|
7 |
+
const cacheName = "whisper-candle-cache";
|
8 |
+
const cache = await caches.open(cacheName);
|
9 |
+
const cachedResponse = await cache.match(url);
|
10 |
+
if (cachedResponse) {
|
11 |
+
const data = await cachedResponse.arrayBuffer();
|
12 |
+
return new Uint8Array(data);
|
13 |
+
}
|
14 |
+
const res = await fetch(url, { cache: "force-cache" });
|
15 |
+
cache.put(url, res.clone());
|
16 |
+
return new Uint8Array(await res.arrayBuffer());
|
17 |
+
}
|
18 |
+
|
19 |
+
class MoshiASR {
|
20 |
+
static decoder = null;
|
21 |
+
|
22 |
+
// Initialize the model
|
23 |
+
static async initialize(params) {
|
24 |
+
const { weightsURL, tokenizerURL, mimiURL, configURL } = params;
|
25 |
+
|
26 |
+
if (this.decoder) {
|
27 |
+
self.postMessage({ status: "model_ready" });
|
28 |
+
return;
|
29 |
+
}
|
30 |
+
|
31 |
+
try {
|
32 |
+
await init();
|
33 |
+
const numThreads = navigator.hardwareConcurrency || 4;
|
34 |
+
await initThreadPool(numThreads);
|
35 |
+
|
36 |
+
self.postMessage({
|
37 |
+
status: "loading",
|
38 |
+
message: `Loading Model (~950 MB)`,
|
39 |
+
});
|
40 |
+
|
41 |
+
const [weightsArrayU8, tokenizerArrayU8, mimiArrayU8, configArrayU8] =
|
42 |
+
await Promise.all([
|
43 |
+
fetchArrayBuffer(weightsURL),
|
44 |
+
fetchArrayBuffer(tokenizerURL),
|
45 |
+
fetchArrayBuffer(mimiURL),
|
46 |
+
fetchArrayBuffer(configURL),
|
47 |
+
]);
|
48 |
+
|
49 |
+
this.decoder = new MoshiASRDecoder(
|
50 |
+
weightsArrayU8,
|
51 |
+
tokenizerArrayU8,
|
52 |
+
mimiArrayU8,
|
53 |
+
configArrayU8
|
54 |
+
);
|
55 |
+
|
56 |
+
self.postMessage({ status: "model_ready" });
|
57 |
+
} catch (error) {
|
58 |
+
self.postMessage({ error: error.message });
|
59 |
+
}
|
60 |
+
}
|
61 |
+
|
62 |
+
static startStream() {
|
63 |
+
if (this.decoder) {
|
64 |
+
this.decoder.start_streaming();
|
65 |
+
}
|
66 |
+
}
|
67 |
+
|
68 |
+
static stopStream() {
|
69 |
+
if (this.decoder) {
|
70 |
+
this.decoder.stop_streaming();
|
71 |
+
}
|
72 |
+
}
|
73 |
+
|
74 |
+
static processAudio(audioData) {
|
75 |
+
if (this.decoder) {
|
76 |
+
this.decoder.process_audio_chunk(audioData, (word) => {
|
77 |
+
self.postMessage({
|
78 |
+
status: "streaming",
|
79 |
+
word: word,
|
80 |
+
});
|
81 |
+
});
|
82 |
+
self.postMessage({
|
83 |
+
status: "chunk_processed",
|
84 |
+
});
|
85 |
+
}
|
86 |
+
}
|
87 |
+
}
|
88 |
+
|
89 |
+
self.addEventListener("message", async (event) => {
|
90 |
+
const { command } = event.data;
|
91 |
+
|
92 |
+
try {
|
93 |
+
switch (command) {
|
94 |
+
case "initialize":
|
95 |
+
const { weightsURL, modelID, tokenizerURL, mimiURL, configURL } =
|
96 |
+
event.data;
|
97 |
+
await MoshiASR.initialize({
|
98 |
+
weightsURL,
|
99 |
+
modelID,
|
100 |
+
tokenizerURL,
|
101 |
+
mimiURL,
|
102 |
+
configURL,
|
103 |
+
});
|
104 |
+
break;
|
105 |
+
|
106 |
+
case "start_stream":
|
107 |
+
MoshiASR.startStream();
|
108 |
+
break;
|
109 |
+
|
110 |
+
case "stop_stream":
|
111 |
+
MoshiASR.stopStream();
|
112 |
+
break;
|
113 |
+
|
114 |
+
case "process_audio":
|
115 |
+
const { audioData } = event.data;
|
116 |
+
MoshiASR.processAudio(audioData);
|
117 |
+
break;
|
118 |
+
|
119 |
+
default:
|
120 |
+
self.postMessage({ error: "Unknown command: " + command });
|
121 |
+
}
|
122 |
+
} catch (e) {
|
123 |
+
self.postMessage({ error: e.message });
|
124 |
+
}
|
125 |
+
});
|