radames HF staff commited on
Commit
01f754d
1 Parent(s): 8f38df6

Upload 7 files

Browse files
Files changed (7) hide show
  1. bertWorker.js +77 -0
  2. build/m.d.ts +55 -0
  3. build/m.js +633 -0
  4. build/m_bg.wasm +3 -0
  5. build/m_bg.wasm.d.ts +13 -0
  6. index.html +361 -16
  7. utils.js +99 -0
bertWorker.js ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //load Candle Bert Module wasm module
2
+ import init, { Model } from "./build/m.js";
3
+
4
+ async function fetchArrayBuffer(url) {
5
+ const cacheName = "bert-candle-cache";
6
+ const cache = await caches.open(cacheName);
7
+ const cachedResponse = await cache.match(url);
8
+ if (cachedResponse) {
9
+ const data = await cachedResponse.arrayBuffer();
10
+ return new Uint8Array(data);
11
+ }
12
+ const res = await fetch(url, { cache: "force-cache" });
13
+ cache.put(url, res.clone());
14
+ return new Uint8Array(await res.arrayBuffer());
15
+ }
16
+ class Bert {
17
+ static instance = {};
18
+
19
+ static async getInstance(weightsURL, tokenizerURL, configURL, modelID) {
20
+ if (!this.instance[modelID]) {
21
+ await init();
22
+
23
+ self.postMessage({ status: "loading", message: "Loading Model" });
24
+ const [weightsArrayU8, tokenizerArrayU8, mel_filtersArrayU8] =
25
+ await Promise.all([
26
+ fetchArrayBuffer(weightsURL),
27
+ fetchArrayBuffer(tokenizerURL),
28
+ fetchArrayBuffer(configURL),
29
+ ]);
30
+
31
+ this.instance[modelID] = new Model(
32
+ weightsArrayU8,
33
+ tokenizerArrayU8,
34
+ mel_filtersArrayU8
35
+ );
36
+ } else {
37
+ self.postMessage({ status: "loading", message: "Model Already Loaded" });
38
+ }
39
+ return this.instance[modelID];
40
+ }
41
+ }
42
+
43
+ self.addEventListener("message", async (event) => {
44
+ const {
45
+ weightsURL,
46
+ tokenizerURL,
47
+ configURL,
48
+ modelID,
49
+ sentences,
50
+ normalize = true,
51
+ } = event.data;
52
+ try {
53
+ self.postMessage({ status: "loading", message: "Starting Bert Model" });
54
+ const model = await Bert.getInstance(
55
+ weightsURL,
56
+ tokenizerURL,
57
+ configURL,
58
+ modelID
59
+ );
60
+ self.postMessage({
61
+ status: "embedding",
62
+ message: "Calculating Embeddings",
63
+ });
64
+ const output = model.get_embeddings({
65
+ sentences: sentences,
66
+ normalize_embeddings: normalize,
67
+ });
68
+
69
+ self.postMessage({
70
+ status: "complete",
71
+ message: "complete",
72
+ output: output.data,
73
+ });
74
+ } catch (e) {
75
+ self.postMessage({ error: e });
76
+ }
77
+ });
build/m.d.ts ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ /**
4
+ */
5
+ export class Model {
6
+ free(): void;
7
+ /**
8
+ * @param {Uint8Array} weights
9
+ * @param {Uint8Array} tokenizer
10
+ * @param {Uint8Array} config
11
+ */
12
+ constructor(weights: Uint8Array, tokenizer: Uint8Array, config: Uint8Array);
13
+ /**
14
+ * @param {any} input
15
+ * @returns {any}
16
+ */
17
+ get_embeddings(input: any): any;
18
+ }
19
+
20
+ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
21
+
22
+ export interface InitOutput {
23
+ readonly memory: WebAssembly.Memory;
24
+ readonly __wbg_model_free: (a: number) => void;
25
+ readonly model_load: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
26
+ readonly model_get_embeddings: (a: number, b: number, c: number) => void;
27
+ readonly main: (a: number, b: number) => number;
28
+ readonly __wbindgen_malloc: (a: number, b: number) => number;
29
+ readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
30
+ readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
31
+ readonly __wbindgen_free: (a: number, b: number, c: number) => void;
32
+ readonly __wbindgen_exn_store: (a: number) => void;
33
+ readonly __wbindgen_start: () => void;
34
+ }
35
+
36
+ export type SyncInitInput = BufferSource | WebAssembly.Module;
37
+ /**
38
+ * Instantiates the given `module`, which can either be bytes or
39
+ * a precompiled `WebAssembly.Module`.
40
+ *
41
+ * @param {SyncInitInput} module
42
+ *
43
+ * @returns {InitOutput}
44
+ */
45
+ export function initSync(module: SyncInitInput): InitOutput;
46
+
47
+ /**
48
+ * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
49
+ * for everything else, calls `WebAssembly.instantiate` directly.
50
+ *
51
+ * @param {InitInput | Promise<InitInput>} module_or_path
52
+ *
53
+ * @returns {Promise<InitOutput>}
54
+ */
55
+ export default function __wbg_init (module_or_path?: InitInput | Promise<InitInput>): Promise<InitOutput>;
build/m.js ADDED
@@ -0,0 +1,633 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ let wasm;
2
+
3
+ const heap = new Array(128).fill(undefined);
4
+
5
+ heap.push(undefined, null, true, false);
6
+
7
+ function getObject(idx) { return heap[idx]; }
8
+
9
+ let WASM_VECTOR_LEN = 0;
10
+
11
+ let cachedUint8Memory0 = null;
12
+
13
+ function getUint8Memory0() {
14
+ if (cachedUint8Memory0 === null || cachedUint8Memory0.byteLength === 0) {
15
+ cachedUint8Memory0 = new Uint8Array(wasm.memory.buffer);
16
+ }
17
+ return cachedUint8Memory0;
18
+ }
19
+
20
+ const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } );
21
+
22
+ const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
23
+ ? function (arg, view) {
24
+ return cachedTextEncoder.encodeInto(arg, view);
25
+ }
26
+ : function (arg, view) {
27
+ const buf = cachedTextEncoder.encode(arg);
28
+ view.set(buf);
29
+ return {
30
+ read: arg.length,
31
+ written: buf.length
32
+ };
33
+ });
34
+
35
+ function passStringToWasm0(arg, malloc, realloc) {
36
+
37
+ if (realloc === undefined) {
38
+ const buf = cachedTextEncoder.encode(arg);
39
+ const ptr = malloc(buf.length, 1) >>> 0;
40
+ getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf);
41
+ WASM_VECTOR_LEN = buf.length;
42
+ return ptr;
43
+ }
44
+
45
+ let len = arg.length;
46
+ let ptr = malloc(len, 1) >>> 0;
47
+
48
+ const mem = getUint8Memory0();
49
+
50
+ let offset = 0;
51
+
52
+ for (; offset < len; offset++) {
53
+ const code = arg.charCodeAt(offset);
54
+ if (code > 0x7F) break;
55
+ mem[ptr + offset] = code;
56
+ }
57
+
58
+ if (offset !== len) {
59
+ if (offset !== 0) {
60
+ arg = arg.slice(offset);
61
+ }
62
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
63
+ const view = getUint8Memory0().subarray(ptr + offset, ptr + len);
64
+ const ret = encodeString(arg, view);
65
+
66
+ offset += ret.written;
67
+ }
68
+
69
+ WASM_VECTOR_LEN = offset;
70
+ return ptr;
71
+ }
72
+
73
+ function isLikeNone(x) {
74
+ return x === undefined || x === null;
75
+ }
76
+
77
+ let cachedInt32Memory0 = null;
78
+
79
+ function getInt32Memory0() {
80
+ if (cachedInt32Memory0 === null || cachedInt32Memory0.byteLength === 0) {
81
+ cachedInt32Memory0 = new Int32Array(wasm.memory.buffer);
82
+ }
83
+ return cachedInt32Memory0;
84
+ }
85
+
86
+ let heap_next = heap.length;
87
+
88
+ function dropObject(idx) {
89
+ if (idx < 132) return;
90
+ heap[idx] = heap_next;
91
+ heap_next = idx;
92
+ }
93
+
94
+ function takeObject(idx) {
95
+ const ret = getObject(idx);
96
+ dropObject(idx);
97
+ return ret;
98
+ }
99
+
100
+ const cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
101
+
102
+ if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); };
103
+
104
+ function getStringFromWasm0(ptr, len) {
105
+ ptr = ptr >>> 0;
106
+ return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len));
107
+ }
108
+
109
+ function addHeapObject(obj) {
110
+ if (heap_next === heap.length) heap.push(heap.length + 1);
111
+ const idx = heap_next;
112
+ heap_next = heap[idx];
113
+
114
+ heap[idx] = obj;
115
+ return idx;
116
+ }
117
+
118
+ let cachedFloat64Memory0 = null;
119
+
120
+ function getFloat64Memory0() {
121
+ if (cachedFloat64Memory0 === null || cachedFloat64Memory0.byteLength === 0) {
122
+ cachedFloat64Memory0 = new Float64Array(wasm.memory.buffer);
123
+ }
124
+ return cachedFloat64Memory0;
125
+ }
126
+
127
+ function debugString(val) {
128
+ // primitive types
129
+ const type = typeof val;
130
+ if (type == 'number' || type == 'boolean' || val == null) {
131
+ return `${val}`;
132
+ }
133
+ if (type == 'string') {
134
+ return `"${val}"`;
135
+ }
136
+ if (type == 'symbol') {
137
+ const description = val.description;
138
+ if (description == null) {
139
+ return 'Symbol';
140
+ } else {
141
+ return `Symbol(${description})`;
142
+ }
143
+ }
144
+ if (type == 'function') {
145
+ const name = val.name;
146
+ if (typeof name == 'string' && name.length > 0) {
147
+ return `Function(${name})`;
148
+ } else {
149
+ return 'Function';
150
+ }
151
+ }
152
+ // objects
153
+ if (Array.isArray(val)) {
154
+ const length = val.length;
155
+ let debug = '[';
156
+ if (length > 0) {
157
+ debug += debugString(val[0]);
158
+ }
159
+ for(let i = 1; i < length; i++) {
160
+ debug += ', ' + debugString(val[i]);
161
+ }
162
+ debug += ']';
163
+ return debug;
164
+ }
165
+ // Test for built-in
166
+ const builtInMatches = /\[object ([^\]]+)\]/.exec(toString.call(val));
167
+ let className;
168
+ if (builtInMatches.length > 1) {
169
+ className = builtInMatches[1];
170
+ } else {
171
+ // Failed to match the standard '[object ClassName]'
172
+ return toString.call(val);
173
+ }
174
+ if (className == 'Object') {
175
+ // we're a user defined class or Object
176
+ // JSON.stringify avoids problems with cycles, and is generally much
177
+ // easier than looping through ownProperties of `val`.
178
+ try {
179
+ return 'Object(' + JSON.stringify(val) + ')';
180
+ } catch (_) {
181
+ return 'Object';
182
+ }
183
+ }
184
+ // errors
185
+ if (val instanceof Error) {
186
+ return `${val.name}: ${val.message}\n${val.stack}`;
187
+ }
188
+ // TODO we could test for more things here, like `Set`s and `Map`s.
189
+ return className;
190
+ }
191
+
192
+ function passArray8ToWasm0(arg, malloc) {
193
+ const ptr = malloc(arg.length * 1, 1) >>> 0;
194
+ getUint8Memory0().set(arg, ptr / 1);
195
+ WASM_VECTOR_LEN = arg.length;
196
+ return ptr;
197
+ }
198
+
199
+ function handleError(f, args) {
200
+ try {
201
+ return f.apply(this, args);
202
+ } catch (e) {
203
+ wasm.__wbindgen_exn_store(addHeapObject(e));
204
+ }
205
+ }
206
+ /**
207
+ */
208
+ export class Model {
209
+
210
+ static __wrap(ptr) {
211
+ ptr = ptr >>> 0;
212
+ const obj = Object.create(Model.prototype);
213
+ obj.__wbg_ptr = ptr;
214
+
215
+ return obj;
216
+ }
217
+
218
+ __destroy_into_raw() {
219
+ const ptr = this.__wbg_ptr;
220
+ this.__wbg_ptr = 0;
221
+
222
+ return ptr;
223
+ }
224
+
225
+ free() {
226
+ const ptr = this.__destroy_into_raw();
227
+ wasm.__wbg_model_free(ptr);
228
+ }
229
+ /**
230
+ * @param {Uint8Array} weights
231
+ * @param {Uint8Array} tokenizer
232
+ * @param {Uint8Array} config
233
+ */
234
+ constructor(weights, tokenizer, config) {
235
+ try {
236
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
237
+ const ptr0 = passArray8ToWasm0(weights, wasm.__wbindgen_malloc);
238
+ const len0 = WASM_VECTOR_LEN;
239
+ const ptr1 = passArray8ToWasm0(tokenizer, wasm.__wbindgen_malloc);
240
+ const len1 = WASM_VECTOR_LEN;
241
+ const ptr2 = passArray8ToWasm0(config, wasm.__wbindgen_malloc);
242
+ const len2 = WASM_VECTOR_LEN;
243
+ wasm.model_load(retptr, ptr0, len0, ptr1, len1, ptr2, len2);
244
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
245
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
246
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
247
+ if (r2) {
248
+ throw takeObject(r1);
249
+ }
250
+ return Model.__wrap(r0);
251
+ } finally {
252
+ wasm.__wbindgen_add_to_stack_pointer(16);
253
+ }
254
+ }
255
+ /**
256
+ * @param {any} input
257
+ * @returns {any}
258
+ */
259
+ get_embeddings(input) {
260
+ try {
261
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
262
+ wasm.model_get_embeddings(retptr, this.__wbg_ptr, addHeapObject(input));
263
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
264
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
265
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
266
+ if (r2) {
267
+ throw takeObject(r1);
268
+ }
269
+ return takeObject(r0);
270
+ } finally {
271
+ wasm.__wbindgen_add_to_stack_pointer(16);
272
+ }
273
+ }
274
+ }
275
+
276
+ async function __wbg_load(module, imports) {
277
+ if (typeof Response === 'function' && module instanceof Response) {
278
+ if (typeof WebAssembly.instantiateStreaming === 'function') {
279
+ try {
280
+ return await WebAssembly.instantiateStreaming(module, imports);
281
+
282
+ } catch (e) {
283
+ if (module.headers.get('Content-Type') != 'application/wasm') {
284
+ console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
285
+
286
+ } else {
287
+ throw e;
288
+ }
289
+ }
290
+ }
291
+
292
+ const bytes = await module.arrayBuffer();
293
+ return await WebAssembly.instantiate(bytes, imports);
294
+
295
+ } else {
296
+ const instance = await WebAssembly.instantiate(module, imports);
297
+
298
+ if (instance instanceof WebAssembly.Instance) {
299
+ return { instance, module };
300
+
301
+ } else {
302
+ return instance;
303
+ }
304
+ }
305
+ }
306
+
307
+ function __wbg_get_imports() {
308
+ const imports = {};
309
+ imports.wbg = {};
310
+ imports.wbg.__wbindgen_string_get = function(arg0, arg1) {
311
+ const obj = getObject(arg1);
312
+ const ret = typeof(obj) === 'string' ? obj : undefined;
313
+ var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
314
+ var len1 = WASM_VECTOR_LEN;
315
+ getInt32Memory0()[arg0 / 4 + 1] = len1;
316
+ getInt32Memory0()[arg0 / 4 + 0] = ptr1;
317
+ };
318
+ imports.wbg.__wbindgen_object_drop_ref = function(arg0) {
319
+ takeObject(arg0);
320
+ };
321
+ imports.wbg.__wbindgen_boolean_get = function(arg0) {
322
+ const v = getObject(arg0);
323
+ const ret = typeof(v) === 'boolean' ? (v ? 1 : 0) : 2;
324
+ return ret;
325
+ };
326
+ imports.wbg.__wbindgen_is_object = function(arg0) {
327
+ const val = getObject(arg0);
328
+ const ret = typeof(val) === 'object' && val !== null;
329
+ return ret;
330
+ };
331
+ imports.wbg.__wbindgen_is_undefined = function(arg0) {
332
+ const ret = getObject(arg0) === undefined;
333
+ return ret;
334
+ };
335
+ imports.wbg.__wbindgen_in = function(arg0, arg1) {
336
+ const ret = getObject(arg0) in getObject(arg1);
337
+ return ret;
338
+ };
339
+ imports.wbg.__wbindgen_error_new = function(arg0, arg1) {
340
+ const ret = new Error(getStringFromWasm0(arg0, arg1));
341
+ return addHeapObject(ret);
342
+ };
343
+ imports.wbg.__wbindgen_object_clone_ref = function(arg0) {
344
+ const ret = getObject(arg0);
345
+ return addHeapObject(ret);
346
+ };
347
+ imports.wbg.__wbindgen_jsval_loose_eq = function(arg0, arg1) {
348
+ const ret = getObject(arg0) == getObject(arg1);
349
+ return ret;
350
+ };
351
+ imports.wbg.__wbindgen_number_get = function(arg0, arg1) {
352
+ const obj = getObject(arg1);
353
+ const ret = typeof(obj) === 'number' ? obj : undefined;
354
+ getFloat64Memory0()[arg0 / 8 + 1] = isLikeNone(ret) ? 0 : ret;
355
+ getInt32Memory0()[arg0 / 4 + 0] = !isLikeNone(ret);
356
+ };
357
+ imports.wbg.__wbg_String_4370c5505c674d30 = function(arg0, arg1) {
358
+ const ret = String(getObject(arg1));
359
+ const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
360
+ const len1 = WASM_VECTOR_LEN;
361
+ getInt32Memory0()[arg0 / 4 + 1] = len1;
362
+ getInt32Memory0()[arg0 / 4 + 0] = ptr1;
363
+ };
364
+ imports.wbg.__wbindgen_number_new = function(arg0) {
365
+ const ret = arg0;
366
+ return addHeapObject(ret);
367
+ };
368
+ imports.wbg.__wbindgen_string_new = function(arg0, arg1) {
369
+ const ret = getStringFromWasm0(arg0, arg1);
370
+ return addHeapObject(ret);
371
+ };
372
+ imports.wbg.__wbg_getwithrefkey_d1f0d12f1f1b63ea = function(arg0, arg1) {
373
+ const ret = getObject(arg0)[getObject(arg1)];
374
+ return addHeapObject(ret);
375
+ };
376
+ imports.wbg.__wbg_set_bd72c078edfa51ad = function(arg0, arg1, arg2) {
377
+ getObject(arg0)[takeObject(arg1)] = takeObject(arg2);
378
+ };
379
+ imports.wbg.__wbg_new_abda76e883ba8a5f = function() {
380
+ const ret = new Error();
381
+ return addHeapObject(ret);
382
+ };
383
+ imports.wbg.__wbg_stack_658279fe44541cf6 = function(arg0, arg1) {
384
+ const ret = getObject(arg1).stack;
385
+ const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
386
+ const len1 = WASM_VECTOR_LEN;
387
+ getInt32Memory0()[arg0 / 4 + 1] = len1;
388
+ getInt32Memory0()[arg0 / 4 + 0] = ptr1;
389
+ };
390
+ imports.wbg.__wbg_error_f851667af71bcfc6 = function(arg0, arg1) {
391
+ let deferred0_0;
392
+ let deferred0_1;
393
+ try {
394
+ deferred0_0 = arg0;
395
+ deferred0_1 = arg1;
396
+ console.error(getStringFromWasm0(arg0, arg1));
397
+ } finally {
398
+ wasm.__wbindgen_free(deferred0_0, deferred0_1, 1);
399
+ }
400
+ };
401
+ imports.wbg.__wbg_log_87e6a1c1b12181c3 = function(arg0, arg1) {
402
+ console.log(getStringFromWasm0(arg0, arg1));
403
+ };
404
+ imports.wbg.__wbg_crypto_c48a774b022d20ac = function(arg0) {
405
+ const ret = getObject(arg0).crypto;
406
+ return addHeapObject(ret);
407
+ };
408
+ imports.wbg.__wbg_process_298734cf255a885d = function(arg0) {
409
+ const ret = getObject(arg0).process;
410
+ return addHeapObject(ret);
411
+ };
412
+ imports.wbg.__wbg_versions_e2e78e134e3e5d01 = function(arg0) {
413
+ const ret = getObject(arg0).versions;
414
+ return addHeapObject(ret);
415
+ };
416
+ imports.wbg.__wbg_node_1cd7a5d853dbea79 = function(arg0) {
417
+ const ret = getObject(arg0).node;
418
+ return addHeapObject(ret);
419
+ };
420
+ imports.wbg.__wbindgen_is_string = function(arg0) {
421
+ const ret = typeof(getObject(arg0)) === 'string';
422
+ return ret;
423
+ };
424
+ imports.wbg.__wbg_msCrypto_bcb970640f50a1e8 = function(arg0) {
425
+ const ret = getObject(arg0).msCrypto;
426
+ return addHeapObject(ret);
427
+ };
428
+ imports.wbg.__wbg_require_8f08ceecec0f4fee = function() { return handleError(function () {
429
+ const ret = module.require;
430
+ return addHeapObject(ret);
431
+ }, arguments) };
432
+ imports.wbg.__wbindgen_is_function = function(arg0) {
433
+ const ret = typeof(getObject(arg0)) === 'function';
434
+ return ret;
435
+ };
436
+ imports.wbg.__wbg_getRandomValues_37fa2ca9e4e07fab = function() { return handleError(function (arg0, arg1) {
437
+ getObject(arg0).getRandomValues(getObject(arg1));
438
+ }, arguments) };
439
+ imports.wbg.__wbg_randomFillSync_dc1e9a60c158336d = function() { return handleError(function (arg0, arg1) {
440
+ getObject(arg0).randomFillSync(takeObject(arg1));
441
+ }, arguments) };
442
+ imports.wbg.__wbg_get_44be0491f933a435 = function(arg0, arg1) {
443
+ const ret = getObject(arg0)[arg1 >>> 0];
444
+ return addHeapObject(ret);
445
+ };
446
+ imports.wbg.__wbg_length_fff51ee6522a1a18 = function(arg0) {
447
+ const ret = getObject(arg0).length;
448
+ return ret;
449
+ };
450
+ imports.wbg.__wbg_new_898a68150f225f2e = function() {
451
+ const ret = new Array();
452
+ return addHeapObject(ret);
453
+ };
454
+ imports.wbg.__wbg_newnoargs_581967eacc0e2604 = function(arg0, arg1) {
455
+ const ret = new Function(getStringFromWasm0(arg0, arg1));
456
+ return addHeapObject(ret);
457
+ };
458
+ imports.wbg.__wbg_next_526fc47e980da008 = function(arg0) {
459
+ const ret = getObject(arg0).next;
460
+ return addHeapObject(ret);
461
+ };
462
+ imports.wbg.__wbg_next_ddb3312ca1c4e32a = function() { return handleError(function (arg0) {
463
+ const ret = getObject(arg0).next();
464
+ return addHeapObject(ret);
465
+ }, arguments) };
466
+ imports.wbg.__wbg_done_5c1f01fb660d73b5 = function(arg0) {
467
+ const ret = getObject(arg0).done;
468
+ return ret;
469
+ };
470
+ imports.wbg.__wbg_value_1695675138684bd5 = function(arg0) {
471
+ const ret = getObject(arg0).value;
472
+ return addHeapObject(ret);
473
+ };
474
+ imports.wbg.__wbg_iterator_97f0c81209c6c35a = function() {
475
+ const ret = Symbol.iterator;
476
+ return addHeapObject(ret);
477
+ };
478
+ imports.wbg.__wbg_get_97b561fb56f034b5 = function() { return handleError(function (arg0, arg1) {
479
+ const ret = Reflect.get(getObject(arg0), getObject(arg1));
480
+ return addHeapObject(ret);
481
+ }, arguments) };
482
+ imports.wbg.__wbg_call_cb65541d95d71282 = function() { return handleError(function (arg0, arg1) {
483
+ const ret = getObject(arg0).call(getObject(arg1));
484
+ return addHeapObject(ret);
485
+ }, arguments) };
486
+ imports.wbg.__wbg_new_b51585de1b234aff = function() {
487
+ const ret = new Object();
488
+ return addHeapObject(ret);
489
+ };
490
+ imports.wbg.__wbg_self_1ff1d729e9aae938 = function() { return handleError(function () {
491
+ const ret = self.self;
492
+ return addHeapObject(ret);
493
+ }, arguments) };
494
+ imports.wbg.__wbg_window_5f4faef6c12b79ec = function() { return handleError(function () {
495
+ const ret = window.window;
496
+ return addHeapObject(ret);
497
+ }, arguments) };
498
+ imports.wbg.__wbg_globalThis_1d39714405582d3c = function() { return handleError(function () {
499
+ const ret = globalThis.globalThis;
500
+ return addHeapObject(ret);
501
+ }, arguments) };
502
+ imports.wbg.__wbg_global_651f05c6a0944d1c = function() { return handleError(function () {
503
+ const ret = global.global;
504
+ return addHeapObject(ret);
505
+ }, arguments) };
506
+ imports.wbg.__wbg_set_502d29070ea18557 = function(arg0, arg1, arg2) {
507
+ getObject(arg0)[arg1 >>> 0] = takeObject(arg2);
508
+ };
509
+ imports.wbg.__wbg_isArray_4c24b343cb13cfb1 = function(arg0) {
510
+ const ret = Array.isArray(getObject(arg0));
511
+ return ret;
512
+ };
513
+ imports.wbg.__wbg_instanceof_ArrayBuffer_39ac22089b74fddb = function(arg0) {
514
+ let result;
515
+ try {
516
+ result = getObject(arg0) instanceof ArrayBuffer;
517
+ } catch {
518
+ result = false;
519
+ }
520
+ const ret = result;
521
+ return ret;
522
+ };
523
+ imports.wbg.__wbg_call_01734de55d61e11d = function() { return handleError(function (arg0, arg1, arg2) {
524
+ const ret = getObject(arg0).call(getObject(arg1), getObject(arg2));
525
+ return addHeapObject(ret);
526
+ }, arguments) };
527
+ imports.wbg.__wbg_buffer_085ec1f694018c4f = function(arg0) {
528
+ const ret = getObject(arg0).buffer;
529
+ return addHeapObject(ret);
530
+ };
531
+ imports.wbg.__wbg_newwithbyteoffsetandlength_6da8e527659b86aa = function(arg0, arg1, arg2) {
532
+ const ret = new Uint8Array(getObject(arg0), arg1 >>> 0, arg2 >>> 0);
533
+ return addHeapObject(ret);
534
+ };
535
+ imports.wbg.__wbg_new_8125e318e6245eed = function(arg0) {
536
+ const ret = new Uint8Array(getObject(arg0));
537
+ return addHeapObject(ret);
538
+ };
539
+ imports.wbg.__wbg_set_5cf90238115182c3 = function(arg0, arg1, arg2) {
540
+ getObject(arg0).set(getObject(arg1), arg2 >>> 0);
541
+ };
542
+ imports.wbg.__wbg_length_72e2208bbc0efc61 = function(arg0) {
543
+ const ret = getObject(arg0).length;
544
+ return ret;
545
+ };
546
+ imports.wbg.__wbg_instanceof_Uint8Array_d8d9cb2b8e8ac1d4 = function(arg0) {
547
+ let result;
548
+ try {
549
+ result = getObject(arg0) instanceof Uint8Array;
550
+ } catch {
551
+ result = false;
552
+ }
553
+ const ret = result;
554
+ return ret;
555
+ };
556
+ imports.wbg.__wbg_newwithlength_e5d69174d6984cd7 = function(arg0) {
557
+ const ret = new Uint8Array(arg0 >>> 0);
558
+ return addHeapObject(ret);
559
+ };
560
+ imports.wbg.__wbg_subarray_13db269f57aa838d = function(arg0, arg1, arg2) {
561
+ const ret = getObject(arg0).subarray(arg1 >>> 0, arg2 >>> 0);
562
+ return addHeapObject(ret);
563
+ };
564
+ imports.wbg.__wbindgen_debug_string = function(arg0, arg1) {
565
+ const ret = debugString(getObject(arg1));
566
+ const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
567
+ const len1 = WASM_VECTOR_LEN;
568
+ getInt32Memory0()[arg0 / 4 + 1] = len1;
569
+ getInt32Memory0()[arg0 / 4 + 0] = ptr1;
570
+ };
571
+ imports.wbg.__wbindgen_throw = function(arg0, arg1) {
572
+ throw new Error(getStringFromWasm0(arg0, arg1));
573
+ };
574
+ imports.wbg.__wbindgen_memory = function() {
575
+ const ret = wasm.memory;
576
+ return addHeapObject(ret);
577
+ };
578
+
579
+ return imports;
580
+ }
581
+
582
+ function __wbg_init_memory(imports, maybe_memory) {
583
+
584
+ }
585
+
586
+ function __wbg_finalize_init(instance, module) {
587
+ wasm = instance.exports;
588
+ __wbg_init.__wbindgen_wasm_module = module;
589
+ cachedFloat64Memory0 = null;
590
+ cachedInt32Memory0 = null;
591
+ cachedUint8Memory0 = null;
592
+
593
+ wasm.__wbindgen_start();
594
+ return wasm;
595
+ }
596
+
597
+ function initSync(module) {
598
+ if (wasm !== undefined) return wasm;
599
+
600
+ const imports = __wbg_get_imports();
601
+
602
+ __wbg_init_memory(imports);
603
+
604
+ if (!(module instanceof WebAssembly.Module)) {
605
+ module = new WebAssembly.Module(module);
606
+ }
607
+
608
+ const instance = new WebAssembly.Instance(module, imports);
609
+
610
+ return __wbg_finalize_init(instance, module);
611
+ }
612
+
613
+ async function __wbg_init(input) {
614
+ if (wasm !== undefined) return wasm;
615
+
616
+ if (typeof input === 'undefined') {
617
+ input = new URL('m_bg.wasm', import.meta.url);
618
+ }
619
+ const imports = __wbg_get_imports();
620
+
621
+ if (typeof input === 'string' || (typeof Request === 'function' && input instanceof Request) || (typeof URL === 'function' && input instanceof URL)) {
622
+ input = fetch(input);
623
+ }
624
+
625
+ __wbg_init_memory(imports);
626
+
627
+ const { instance, module } = await __wbg_load(await input, imports);
628
+
629
+ return __wbg_finalize_init(instance, module);
630
+ }
631
+
632
+ export { initSync }
633
+ export default __wbg_init;
build/m_bg.wasm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5717d073fa2a2774df57e257617a7fd5ed71f6793bcda874ee5f2a6c8fc52154
3
+ size 3754562
build/m_bg.wasm.d.ts ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ export const memory: WebAssembly.Memory;
4
+ export function __wbg_model_free(a: number): void;
5
+ export function model_load(a: number, b: number, c: number, d: number, e: number, f: number, g: number): void;
6
+ export function model_get_embeddings(a: number, b: number, c: number): void;
7
+ export function main(a: number, b: number): number;
8
+ export function __wbindgen_malloc(a: number, b: number): number;
9
+ export function __wbindgen_realloc(a: number, b: number, c: number, d: number): number;
10
+ export function __wbindgen_add_to_stack_pointer(a: number): number;
11
+ export function __wbindgen_free(a: number, b: number, c: number): void;
12
+ export function __wbindgen_exn_store(a: number): void;
13
+ export function __wbindgen_start(): void;
index.html CHANGED
@@ -1,19 +1,364 @@
 
 
 
 
 
 
 
 
1
  <!DOCTYPE html>
2
  <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <html>
2
+ <head>
3
+ <meta content="text/html;charset=utf-8" http-equiv="Content-Type" />
4
+ <title>Candle Bert</title>
5
+ </head>
6
+ <body></body>
7
+ </html>
8
+
9
  <!DOCTYPE html>
10
  <html>
11
+ <head>
12
+ <meta charset="UTF-8" />
13
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
14
+ <style>
15
+ @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
16
+ html,
17
+ body {
18
+ font-family: "Source Sans 3", sans-serif;
19
+ }
20
+ </style>
21
+ <script src="https://cdn.tailwindcss.com"></script>
22
+ <script type="module" src="./code.js"></script>
23
+ <script type="module">
24
+ import { hcl } from "https://cdn.skypack.dev/d3-color@3";
25
+ import { interpolateReds } from "https://cdn.skypack.dev/d3-scale-chromatic@3";
26
+ import { scaleLinear } from "https://cdn.skypack.dev/d3-scale@4";
27
+ import {
28
+ getModelInfo,
29
+ getEmbeddings,
30
+ getWikiText,
31
+ cosineSimilarity,
32
+ } from "./utils.js";
33
+
34
+ const bertWorker = new Worker("./bertWorker.js", {
35
+ type: "module",
36
+ });
37
+
38
+ const inputContainerEL = document.querySelector("#input-container");
39
+ const textAreaEl = document.querySelector("#input-area");
40
+ const outputAreaEl = document.querySelector("#output-area");
41
+ const formEl = document.querySelector("#form");
42
+ const searchInputEl = document.querySelector("#search-input");
43
+ const formWikiEl = document.querySelector("#form-wiki");
44
+ const searchWikiEl = document.querySelector("#search-wiki");
45
+ const outputStatusEl = document.querySelector("#output-status");
46
+ const modelSelectEl = document.querySelector("#model");
47
+
48
+ const sentencesRegex =
49
+ /(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<![A-Z]\.)(?<=\.|\?)\s/gm;
50
+
51
+ let sentenceEmbeddings = [];
52
+ let currInputText = "";
53
+ let isCalculating = false;
54
+
55
+ function toggleTextArea(state) {
56
+ if (state) {
57
+ textAreaEl.hidden = false;
58
+ textAreaEl.focus();
59
+ } else {
60
+ textAreaEl.hidden = true;
61
+ }
62
+ }
63
+ inputContainerEL.addEventListener("focus", (e) => {
64
+ toggleTextArea(true);
65
+ });
66
+ textAreaEl.addEventListener("blur", (e) => {
67
+ toggleTextArea(false);
68
+ });
69
+ textAreaEl.addEventListener("focusout", (e) => {
70
+ toggleTextArea(false);
71
+ if (currInputText === textAreaEl.value || isCalculating) return;
72
+ populateOutputArea(textAreaEl.value);
73
+ calculateEmbeddings(textAreaEl.value);
74
+ });
75
+
76
+ modelSelectEl.addEventListener("change", (e) => {
77
+ if (currInputText === "" || isCalculating) return;
78
+ populateOutputArea(textAreaEl.value);
79
+ calculateEmbeddings(textAreaEl.value);
80
+ });
81
+
82
+ function populateOutputArea(text) {
83
+ currInputText = text;
84
+ const sentences = text.split(sentencesRegex);
85
+
86
+ outputAreaEl.innerHTML = "";
87
+ for (const [id, sentence] of sentences.entries()) {
88
+ const sentenceEl = document.createElement("span");
89
+ sentenceEl.id = `sentence-${id}`;
90
+ sentenceEl.innerText = sentence + " ";
91
+ outputAreaEl.appendChild(sentenceEl);
92
+ }
93
+ }
94
+ formEl.addEventListener("submit", async (e) => {
95
+ e.preventDefault();
96
+ if (isCalculating || currInputText === "") return;
97
+ toggleInputs(true);
98
+ const modelID = modelSelectEl.value;
99
+ const { modelURL, tokenizerURL, configURL, search_prefix } =
100
+ getModelInfo(modelID);
101
+
102
+ const text = searchInputEl.value;
103
+ const query = search_prefix + searchInputEl.value;
104
+ outputStatusEl.classList.remove("invisible");
105
+ outputStatusEl.innerText = "Calculating embeddings for query...";
106
+ isCalculating = true;
107
+ const out = await getEmbeddings(
108
+ bertWorker,
109
+ modelURL,
110
+ tokenizerURL,
111
+ configURL,
112
+ modelID,
113
+ [query]
114
+ );
115
+ outputStatusEl.classList.add("invisible");
116
+ const queryEmbeddings = out.output[0];
117
+ // calculate cosine similarity with all sentences given the query
118
+ const distances = sentenceEmbeddings
119
+ .map((embedding, id) => ({
120
+ id,
121
+ similarity: cosineSimilarity(queryEmbeddings, embedding),
122
+ }))
123
+ .sort((a, b) => b.similarity - a.similarity)
124
+ // getting top 10 most similar sentences
125
+ .slice(0, 10);
126
+
127
+ const colorScale = scaleLinear()
128
+ .domain([
129
+ distances[distances.length - 1].similarity,
130
+ distances[0].similarity,
131
+ ])
132
+ .range([0, 1])
133
+ .interpolate(() => interpolateReds);
134
+ outputAreaEl.querySelectorAll("span").forEach((el) => {
135
+ el.style.color = "unset";
136
+ el.style.backgroundColor = "unset";
137
+ });
138
+ distances.forEach((d) => {
139
+ const el = outputAreaEl.querySelector(`#sentence-${d.id}`);
140
+ const color = colorScale(d.similarity);
141
+ const fontColor = hcl(color).l < 70 ? "white" : "black";
142
+ el.style.color = fontColor;
143
+ el.style.backgroundColor = color;
144
+ });
145
+
146
+ outputAreaEl
147
+ .querySelector(`#sentence-${distances[0].id}`)
148
+ .scrollIntoView({
149
+ behavior: "smooth",
150
+ block: "center",
151
+ inline: "nearest",
152
+ });
153
+
154
+ isCalculating = false;
155
+ toggleInputs(false);
156
+ });
157
+ async function calculateEmbeddings(text) {
158
+ isCalculating = true;
159
+ toggleInputs(true);
160
+ const modelID = modelSelectEl.value;
161
+ const { modelURL, tokenizerURL, configURL, document_prefix } =
162
+ getModelInfo(modelID);
163
+
164
+ const sentences = text.split(sentencesRegex);
165
+ const allEmbeddings = [];
166
+ outputStatusEl.classList.remove("invisible");
167
+ for (const [id, sentence] of sentences.entries()) {
168
+ const query = document_prefix + sentence;
169
+ outputStatusEl.innerText = `Calculating embeddings: sentence ${
170
+ id + 1
171
+ } of ${sentences.length}`;
172
+ const embeddings = await getEmbeddings(
173
+ bertWorker,
174
+ modelURL,
175
+ tokenizerURL,
176
+ configURL,
177
+ modelID,
178
+ [query]
179
+ );
180
+ allEmbeddings.push(embeddings);
181
+ }
182
+ outputStatusEl.classList.add("invisible");
183
+ sentenceEmbeddings = allEmbeddings.map((e) => e.output[0]);
184
+ isCalculating = false;
185
+ toggleInputs(false);
186
+ }
187
+
188
+ function updateStatus(data) {
189
+ if ("status" in data) {
190
+ outputStatusEl.innerText = data.message;
191
+ outputStatusEl.classList.remove("invisible");
192
+ }
193
+ }
194
+ function toggleInputs(state) {
195
+ const interactive = document.querySelectorAll(".interactive");
196
+ interactive.forEach((el) => {
197
+ if (state) {
198
+ el.disabled = true;
199
+ } else {
200
+ el.disabled = false;
201
+ }
202
+ });
203
+ }
204
+
205
+ searchWikiEl.addEventListener("input", () => {
206
+ searchWikiEl.setCustomValidity("");
207
+ });
208
+
209
+ formWikiEl.addEventListener("submit", async (e) => {
210
+ e.preventDefault();
211
+ if ("example" in e.submitter.dataset) {
212
+ searchWikiEl.value = e.submitter.innerText;
213
+ }
214
+ const text = searchWikiEl.value;
215
+
216
+ if (isCalculating || text === "") return;
217
+ try {
218
+ const wikiText = await getWikiText(text);
219
+ searchWikiEl.setCustomValidity("");
220
+ textAreaEl.innerHTML = wikiText;
221
+ populateOutputArea(wikiText);
222
+ calculateEmbeddings(wikiText);
223
+ searchWikiEl.value = "";
224
+ } catch {
225
+ searchWikiEl.setCustomValidity("Invalid Wikipedia article name");
226
+ searchWikiEl.reportValidity();
227
+ }
228
+ });
229
+ </script>
230
+ </head>
231
+ <body class="container max-w-4xl mx-auto p-4">
232
+ <main class="grid grid-cols-1 gap-8 relative">
233
+ <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
234
+ <div>
235
+ <h1 class="text-5xl font-bold">Candle BERT</h1>
236
+ <h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
237
+ <p class="max-w-lg">
238
+ Running sentence embeddings and similarity search in the browser using
239
+ the Bert Model written with
240
+ <a
241
+ href="https://github.com/huggingface/candle/"
242
+ target="_blank"
243
+ class="underline hover:text-blue-500 hover:no-underline"
244
+ >Candle
245
+ </a>
246
+ and compiled to Wasm. Embeddings models from are from
247
+ <a
248
+ href="https://huggingface.co/sentence-transformers/"
249
+ target="_blank"
250
+ class="underline hover:text-blue-500 hover:no-underline"
251
+ >
252
+ Sentence Transformers
253
+ </a>
254
+ and
255
+ <a
256
+ href="https://huggingface.co/intfloat/"
257
+ target="_blank"
258
+ class="underline hover:text-blue-500 hover:no-underline"
259
+ >
260
+ Liang Wang - e5 Models
261
+ </a>
262
+ </p>
263
+ </div>
264
+
265
+ <div>
266
+ <label for="model" class="font-medium">Models Options: </label>
267
+ <select
268
+ id="model"
269
+ class="border-2 border-gray-500 rounded-md font-light interactive disabled:cursor-not-allowed w-full max-w-max"
270
+ >
271
+ <option value="intfloat_e5_small_v2" selected>
272
+ intfloat/e5-small-v2 (133 MB)
273
+ </option>
274
+ <option value="intfloat_e5_base_v2">
275
+ intfloat/e5-base-v2 (438 MB)
276
+ </option>
277
+ <option value="intfloat_multilingual_e5_small">
278
+ intfloat/multilingual-e5-small (471 MB)
279
+ </option>
280
+ <option value="sentence_transformers_all_MiniLM_L6_v2">
281
+ sentence-transformers/all-MiniLM-L6-v2 (90.9 MB)
282
+ </option>
283
+ <option value="sentence_transformers_all_MiniLM_L12_v2">
284
+ sentence-transformers/all-MiniLM-L12-v2 (133 MB)
285
+ </option>
286
+ </select>
287
+ </div>
288
+ <form
289
+ id="form"
290
+ class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center"
291
+ >
292
+ <input type="submit" hidden />
293
+ <input
294
+ type="text"
295
+ id="search-input"
296
+ class="font-light w-full px-3 py-2 mx-1 resize-none outline-none interactive disabled:cursor-not-allowed"
297
+ placeholder="Search query here..."
298
+ />
299
+ <button
300
+ class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed interactive"
301
+ >
302
+ Search
303
+ </button>
304
+ </form>
305
+ <div>
306
+ <h3 class="font-medium">Input text:</h3>
307
+ <div class="flex justify-between items-center">
308
+ <div class="rounded-md inline text-xs">
309
+ <span id="output-status" class="m-auto font-light invisible"
310
+ >C</span
311
+ >
312
+ </div>
313
+ </div>
314
+ <div
315
+ id="input-container"
316
+ tabindex="0"
317
+ class="min-h-[250px] bg-slate-100 text-gray-500 rounded-md p-4 flex flex-col gap-2 relative"
318
+ >
319
+ <textarea
320
+ id="input-area"
321
+ hidden
322
+ value=""
323
+ placeholder="Input text to perform semantic similarity search..."
324
+ class="flex-1 resize-none outline-none left-0 right-0 top-0 bottom-0 m-4 absolute interactive disabled:invisible"
325
+ ></textarea>
326
+ <p id="output-area" class="grid-rows-2">
327
+ Input text to perform semantic similarity search...
328
+ </p>
329
+ </div>
330
+ </div>
331
+ <div>
332
+ <h3 class="font-medium">Examples</h3>
333
+
334
+ <form
335
+ id="form-wiki"
336
+ class="flex text-xs rounded-md items-center w-min gap-3"
337
+ >
338
+ <input type="submit" hidden />
339
+
340
+ <button data-example class="disabled:cursor-not-allowed interactive">
341
+ Pizza
342
+ </button>
343
+ <button data-example class="disabled:cursor-not-allowed interactive">
344
+ Paris
345
+ </button>
346
+ <button data-example class="disabled:cursor-not-allowed interactive">
347
+ Physics
348
+ </button>
349
+ <input
350
+ type="text"
351
+ id="search-wiki"
352
+ class="font-light py-0 mx-1 resize-none outline-none w-32 disabled:cursor-not-allowed interactive"
353
+ placeholder="search Wikipedia article..."
354
+ />
355
+ <button
356
+ class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed interactive"
357
+ >
358
+ Enter
359
+ </button>
360
+ </form>
361
+ </div>
362
+ </main>
363
+ </body>
364
  </html>
utils.js ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export async function getEmbeddings(
2
+ worker,
3
+ weightsURL,
4
+ tokenizerURL,
5
+ configURL,
6
+ modelID,
7
+ sentences,
8
+ updateStatus = null
9
+ ) {
10
+ return new Promise((resolve, reject) => {
11
+ worker.postMessage({
12
+ weightsURL,
13
+ tokenizerURL,
14
+ configURL,
15
+ modelID,
16
+ sentences,
17
+ });
18
+ function messageHandler(event) {
19
+ if ("error" in event.data) {
20
+ worker.removeEventListener("message", messageHandler);
21
+ reject(new Error(event.data.error));
22
+ }
23
+ if (event.data.status === "complete") {
24
+ worker.removeEventListener("message", messageHandler);
25
+ resolve(event.data);
26
+ }
27
+ if (updateStatus) updateStatus(event.data);
28
+ }
29
+ worker.addEventListener("message", messageHandler);
30
+ });
31
+ }
32
+
33
+ const MODELS = {
34
+ intfloat_e5_small_v2: {
35
+ base_url: "https://huggingface.co/intfloat/e5-small-v2/resolve/main/",
36
+ search_prefix: "query: ",
37
+ document_prefix: "passage: ",
38
+ },
39
+ intfloat_e5_base_v2: {
40
+ base_url: "https://huggingface.co/intfloat/e5-base-v2/resolve/main/",
41
+ search_prefix: "query: ",
42
+ document_prefix: "passage:",
43
+ },
44
+ intfloat_multilingual_e5_small: {
45
+ base_url:
46
+ "https://huggingface.co/intfloat/multilingual-e5-small/resolve/main/",
47
+ search_prefix: "query: ",
48
+ document_prefix: "passage: ",
49
+ },
50
+ sentence_transformers_all_MiniLM_L6_v2: {
51
+ base_url:
52
+ "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/refs%2Fpr%2F21/",
53
+ search_prefix: "",
54
+ document_prefix: "",
55
+ },
56
+ sentence_transformers_all_MiniLM_L12_v2: {
57
+ base_url:
58
+ "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/refs%2Fpr%2F4/",
59
+ search_prefix: "",
60
+ document_prefix: "",
61
+ },
62
+ };
63
+ export function getModelInfo(id) {
64
+ return {
65
+ modelURL: MODELS[id].base_url + "model.safetensors",
66
+ configURL: MODELS[id].base_url + "config.json",
67
+ tokenizerURL: MODELS[id].base_url + "tokenizer.json",
68
+ search_prefix: MODELS[id].search_prefix,
69
+ document_prefix: MODELS[id].document_prefix,
70
+ };
71
+ }
72
+
73
+ export function cosineSimilarity(vec1, vec2) {
74
+ const dot = vec1.reduce((acc, val, i) => acc + val * vec2[i], 0);
75
+ const a = Math.sqrt(vec1.reduce((acc, val) => acc + val * val, 0));
76
+ const b = Math.sqrt(vec2.reduce((acc, val) => acc + val * val, 0));
77
+ return dot / (a * b);
78
+ }
79
+ export async function getWikiText(article) {
80
+ // thanks to wikipedia for the API
81
+ const URL = `https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exlimit=1&titles=${article}&explaintext=1&exsectionformat=plain&format=json&origin=*`;
82
+ return fetch(URL, {
83
+ method: "GET",
84
+ headers: {
85
+ Accept: "application/json",
86
+ },
87
+ })
88
+ .then((r) => r.json())
89
+ .then((data) => {
90
+ const pages = data.query.pages;
91
+ const pageId = Object.keys(pages)[0];
92
+ const extract = pages[pageId].extract;
93
+ if (extract === undefined || extract === "") {
94
+ throw new Error("No article found");
95
+ }
96
+ return extract;
97
+ })
98
+ .catch((error) => console.error("Error:", error));
99
+ }