Yang Gu commited on
Commit
e4aee93
1 Parent(s): 1322fe0

Add ort-phi2

Browse files
demo/ort-phi2/index.html ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+
4
+ <head>
5
+ <title>Example</title>
6
+ </head>
7
+
8
+ <body>
9
+ <!-- <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@dev/dist/ort.webgpu.min.js"> </script> -->
10
+ <script src="https://wp-27.sh.intel.com/workspace/project/onnxruntime/js/web/dist/ort.webgpu.min.js"> </script>
11
+
12
+ <script type="module">
13
+ import { AutoTokenizer, env } from '../../transformers/transformers.js';
14
+
15
+ function log(i) { console.log(i); document.getElementById('status').innerText += `\n${i}`; }
16
+
17
+ const MODELS = {
18
+ "tinyllama": { name: "tinyllama", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-int4" },
19
+ "tinyllama_fp16": { name: "tinyllama-fp16", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-fp16", externaldata: true },
20
+ "phi2": { name: "phi2", path: "phi2-int4" },
21
+ "phi2-mb": { name: "phi2-mb", path: "schmuell/phi2-mb", externaldata: true },
22
+ "stablelm": { name: "stablelm", path: "schmuell/stablelm-2-zephyr-1_6b-int4" },
23
+ }
24
+
25
+ function getConfig() {
26
+ const query = window.location.search.substring(1);
27
+ var config = {
28
+ model: "phi2",
29
+ provider: "webgpu",
30
+ profiler: 0,
31
+ verbose: 0,
32
+ threads: 1,
33
+ trace: 0,
34
+ csv: 0,
35
+ max_tokens: 256,
36
+ local: 1,
37
+ }
38
+ let vars = query.split("&");
39
+ for (var i = 0; i < vars.length; i++) {
40
+ let pair = vars[i].split("=");
41
+ if (pair[0] in config) {
42
+ const key = pair[0];
43
+ const value = decodeURIComponent(pair[1]);
44
+ if (typeof config[key] == "number") {
45
+ config[key] = parseInt(value);
46
+ }
47
+ else {
48
+ config[key] = value;
49
+ }
50
+ } else if (pair[0].length > 0) {
51
+ throw new Error("unknown argument: " + pair[0]);
52
+ }
53
+ }
54
+ if (MODELS[config.model] !== undefined) {
55
+ config.model = MODELS[config.model];
56
+ }
57
+ return config;
58
+ }
59
+
60
+ class LLM {
61
+ sess = undefined;
62
+ profiler = false;
63
+ trace = false;
64
+ feed = {};
65
+ output_tokens = [];
66
+ eos = 2;
67
+ need_position_ids = true;
68
+ stop = false;
69
+ kv_dims = [];
70
+ dtype = "float16";
71
+
72
+ constructor() {
73
+ }
74
+
75
+ async load(model, options) {
76
+ const provider = options.provider || "webgpu";
77
+ const verbose = options.verbose;
78
+ const local = options.local;
79
+ this.profiler = options.profiler;
80
+ this.trace = options.trace;
81
+
82
+ const model_path = (local) ? "models/" + model.path : "https://huggingface.co/" + model.path + "/resolve/main";
83
+
84
+ log(`loading... ${model.name}, ${provider}`);
85
+ const json_bytes = await fetchAndCache(model_path + "/config.json");
86
+ let textDecoder = new TextDecoder();
87
+ const model_config = JSON.parse(textDecoder.decode(json_bytes));
88
+
89
+ const model_bytes = await fetchAndCache(model_path + "/phi2-int4.onnx");
90
+ const externaldata = (model.externaldata) ? await fetchAndCache(model_path + '/onnx/decoder_model_merged.onnx.data') : false;
91
+ let modelSize = model_bytes.byteLength;
92
+ if (externaldata) {
93
+ modelSize += externaldata.byteLength;
94
+ }
95
+
96
+ log(`model size ${Math.round(modelSize / 1024 / 1024)} MB`);
97
+
98
+ const opt = {
99
+ executionProviders: [provider],
100
+ preferredOutputLocation: {},
101
+ };
102
+
103
+ switch (provider) {
104
+ case "webgpu":
105
+ if (!("gpu" in navigator)) {
106
+ throw new Error("webgpu is NOT supported");
107
+ }
108
+ for (let i = 0; i < model_config.num_hidden_layers; ++i) {
109
+ opt.preferredOutputLocation[`present.${i}.key`] = 'gpu-buffer';
110
+ opt.preferredOutputLocation[`present.${i}.value`] = 'gpu-buffer';
111
+ }
112
+ break;
113
+ case "webnn":
114
+ if (!("ml" in navigator)) {
115
+ throw new Error("webnn is NOT supported");
116
+ }
117
+ break;
118
+ }
119
+
120
+ if (externaldata !== undefined) {
121
+ opt.externalData = [
122
+ {
123
+ data: externaldata,
124
+ path: 'decoder_model_merged.onnx.data'
125
+ },
126
+ ]
127
+ }
128
+ if (verbose) {
129
+ opt.logSeverityLevel = 0;
130
+ opt.logVerbosityLevel = 0;
131
+ ort.env.logLevel = "verbose";
132
+ ort.env.debug = true;
133
+ }
134
+
135
+ ort.env.webgpu.profiling = {};
136
+ if (this.profiler) {
137
+ opt.enableProfiling = true;
138
+ ort.env.webgpu.profilingMode = 'default';
139
+ ort.env.webgpu.profiling.mode = 'default';
140
+ }
141
+
142
+ this.sess = await ort.InferenceSession.create(model_bytes, opt);
143
+
144
+ if (this.trace) {
145
+ ort.env.trace = true;
146
+ ort.env.webgpu.profiling.ondata = (version, inputsMetadata, outputsMetadata, kernelId, kernelType,
147
+ kernelName, programName, startTime, endTime) => { };
148
+ }
149
+
150
+ this.eos = model_config.eos_token_id;
151
+ this.kv_dims = [1, model_config.num_key_value_heads, 0, model_config.hidden_size / model_config.num_attention_heads];
152
+ this.dtype = config.model.dtype || "float16";
153
+ this.num_layers = model_config.num_hidden_layers;
154
+ this.initilize_feed();
155
+ }
156
+
157
+ initilize_feed() {
158
+ this.feed = {};
159
+ const empty = (this.dtype === "float16") ? new Uint16Array() : [];
160
+ for (let i = 0; i < this.num_layers; ++i) {
161
+ this.feed[`past_key_values.${i}.key`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
162
+ this.feed[`past_key_values.${i}.value`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
163
+ }
164
+ this.output_tokens = [];
165
+ }
166
+
167
+
168
+ argmax(t) {
169
+ const arr = t.data;
170
+ const start = t.dims[2] * (t.dims[1] - 1);
171
+ let max = arr[start];
172
+ let maxidx = 0;
173
+
174
+ for (let i = 0; i < t.dims[2]; i++) {
175
+ const val = arr[i + start];
176
+ if (!isFinite(val)) {
177
+ throw new Error("found infinitive in logits");
178
+ }
179
+ if (val > max) {
180
+ max = arr[i + start];
181
+ maxidx = i;
182
+ }
183
+ }
184
+ return maxidx;
185
+ }
186
+
187
+ update_kv_cache(feed, outputs) {
188
+ for (const name in outputs) {
189
+ if (name.startsWith('present')) {
190
+ let newName = name.replace('present', 'past_key_values');
191
+ // free old gpu buffer
192
+ const t = feed[newName];
193
+ if (t.location === 'gpu-buffer') {
194
+ t.dispose();
195
+ }
196
+ feed[newName] = outputs[name];
197
+ }
198
+ }
199
+ }
200
+
201
+ abort() {
202
+ this.stop = true;
203
+ }
204
+
205
+ async generate(tokens, callback, options) {
206
+ const keep_cache = options.keep_cache;
207
+ const max_tokens = options.max_tokens || 256;
208
+ const feed = this.feed;
209
+ const input_ids = new ort.Tensor('int64', BigInt64Array.from(tokens.map(BigInt)), [1, tokens.length]);
210
+ feed['input_ids'] = input_ids;
211
+ this.stop = false;
212
+
213
+ if (keep_cache) {
214
+ this.output_tokens.push(...input_ids)
215
+ } else {
216
+ this.initilize_feed();
217
+ this.output_tokens = Array.from(feed['input_ids'].data);
218
+ }
219
+
220
+ let last_token = 0n;
221
+ let seqlen = this.output_tokens.length;
222
+ if (this.need_position_ids) {
223
+ if (keep_cache) {
224
+ feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, input_ids.length]);
225
+ } else {
226
+ feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, seqlen]);
227
+ }
228
+ }
229
+
230
+ while (last_token != this.eos && seqlen < max_tokens && !this.stop) {
231
+ seqlen = this.output_tokens.length;
232
+ feed['attention_mask'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, () => 1n), [1, seqlen]);
233
+ let outputs;
234
+ if (this.trace) {
235
+ console.timeStamp("RUN-BEGIN");
236
+ outputs = await this.sess.run(feed);
237
+ console.timeStamp("RUN-END");
238
+ } else {
239
+ outputs = await this.sess.run(feed);
240
+ }
241
+ last_token = BigInt(this.argmax(outputs.logits));
242
+ this.output_tokens.push(last_token);
243
+ if (callback && !this.profiler) {
244
+ callback(this.output_tokens);
245
+ }
246
+ this.update_kv_cache(feed, outputs);
247
+ feed['input_ids'] = new ort.Tensor('int64', BigInt64Array.from([last_token]), [1, 1]);
248
+ if (this.need_position_ids) {
249
+ feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from([BigInt(seqlen)]), [1, 1]);
250
+ }
251
+ }
252
+ if (this.profiler) {
253
+ this.sess.endProfiling();
254
+ }
255
+ return this.output_tokens;
256
+ }
257
+ }
258
+
259
+ const config = getConfig();
260
+ env.localModelPath = 'models';
261
+ env.allowRemoteModels = config.local == 0;
262
+ env.allowLocalModels = config.local == 1;
263
+ ort.env.wasm.numThreads = config.threads;
264
+ ort.env.wasm.simd = true;
265
+
266
+ const cons_log = [];
267
+
268
+ if (config.profiler === 2) {
269
+ console.log = function (message) {
270
+ if (!message.includes('_fence_')) {
271
+ cons_log.push(message);
272
+ }
273
+ };
274
+ }
275
+
276
+ const tokenizer = await AutoTokenizer.from_pretrained(config.model.path);
277
+
278
+ function create_download_link(cons_log) {
279
+ if (cons_log.length > 0) {
280
+ let link = document.getElementById('download').childNodes[0];
281
+ if (link === undefined) {
282
+ link = document.createElement("a", "download-link");
283
+ link.download = "profiler.log";
284
+ link.innerText = "Download";
285
+ document.getElementById('download').appendChild(link);
286
+ }
287
+ const base64 = btoa(cons_log.join('\n'));
288
+ link.href = `data:application/json;base64,${base64}`;
289
+ }
290
+ }
291
+
292
+ async function fetchAndCache(url) {
293
+ try {
294
+ const cache = await caches.open("onnx");
295
+ let cachedResponse = await cache.match(url);
296
+ if (cachedResponse == undefined) {
297
+ await cache.add(url);
298
+ cachedResponse = await cache.match(url);
299
+ log(`${url} (network)`);
300
+ } else {
301
+ log(`${url} (cached)`);
302
+ }
303
+ const data = await cachedResponse.arrayBuffer();
304
+ return data;
305
+ } catch (error) {
306
+ log(`${url} (network)`);
307
+ return await fetch(url).then(response => response.arrayBuffer());
308
+ }
309
+ }
310
+
311
+ function token_to_text(tokenizer, tokens, startidx) {
312
+ const txt = tokenizer.decode(tokens.slice(startidx), { skip_special_tokens: true, });
313
+ return txt;
314
+ }
315
+
316
+ const llm = new LLM();
317
+
318
+ async function main() {
319
+
320
+ const model = config.model;
321
+
322
+ await llm.load(model, {
323
+ provider: config.provider,
324
+ verbose: config.verbose,
325
+ profiler: config.profiler,
326
+ trace: config.trace,
327
+ local: config.local,
328
+ });
329
+
330
+
331
+ document.getElementById('status').innerText = "";
332
+ const query = "Tell me about Constantinople.";
333
+ let prompt;
334
+
335
+ if (model.name.includes('phi2')) {
336
+ prompt = `User:${query}\nAssistant:`;
337
+ } else {
338
+ prompt = `"<|system|>\nYou are a friendly assistant.</s>\n<|user|>\n${query}</s>\n<|assistant|>\n`;
339
+ }
340
+ const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true });
341
+
342
+ const start_timer = performance.now();
343
+ const output_tokens = await llm.generate(input_ids, (output_tokens) => {
344
+ document.getElementById('result').innerText = token_to_text(tokenizer, output_tokens, input_ids.length);
345
+ }, {});
346
+ const took = (performance.now() - start_timer) / 1000;
347
+ const txt = token_to_text(tokenizer, output_tokens, input_ids.length);
348
+ const seqlen = output_tokens.length;
349
+ document.getElementById('result').innerText = txt;
350
+ const perf = `${seqlen} tokens in ${took.toFixed(1)}sec, ${(seqlen / took).toFixed(2)} tokens/sec`;
351
+ console.log(perf + " @@1");
352
+ document.getElementById('perf').innerText = perf;
353
+ if (config.csv) {
354
+ log(`${model.name},${took.toFixed(2)},${(seqlen / took).toFixed(3)},${seqlen},@@2`);
355
+ }
356
+ }
357
+ try {
358
+ await main();
359
+ } catch (error) {
360
+ console.error(error);
361
+ document.getElementById('result').innerText = error.message;
362
+ } finally {
363
+ create_download_link(cons_log);
364
+ }
365
+ </script>
366
+
367
+ <div id="status"></div>
368
+ <br />
369
+ <div id="result"></div>
370
+ <br />
371
+ <div id="perf"></div>
372
+ <br />
373
+ <div id="download"></div>
374
+ <br />
375
+
376
+ </body>
377
+
378
+ </html>
demo/ort-phi2/models/phi2-int4/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/phi-2",
3
+ "architectures": [
4
+ "PhiForCausalLM"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_phi.PhiConfig",
8
+ "AutoModelForCausalLM": "modeling_phi.PhiForCausalLM"
9
+ },
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 50256,
12
+ "embd_pdrop": 0.0,
13
+ "eos_token_id": 50256,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_size": 2560,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 10240,
18
+ "layer_norm_eps": 1e-05,
19
+ "max_position_embeddings": 2048,
20
+ "model_type": "phi",
21
+ "num_attention_heads": 32,
22
+ "num_hidden_layers": 32,
23
+ "num_key_value_heads": 32,
24
+ "partial_rotary_factor": 0.4,
25
+ "qk_layernorm": false,
26
+ "resid_pdrop": 0.1,
27
+ "rope_scaling": null,
28
+ "rope_theta": 10000.0,
29
+ "tie_word_embeddings": false,
30
+ "torch_dtype": "float16",
31
+ "transformers_version": "4.37.0",
32
+ "use_cache": true,
33
+ "vocab_size": 51200
34
+ }
demo/ort-phi2/models/phi2-int4/phi2-int4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d4321d1b34279940c9ba43aa984f6090ea5656380f415b7c87e71b6e3cbf977
3
+ size 1770018731
demo/ort-phi2/models/phi2-int4/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
demo/ort-phi2/models/phi2-int4/tokenizer_config.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "50257": {
13
+ "content": " ",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": false
19
+ },
20
+ "50258": {
21
+ "content": " ",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "50259": {
29
+ "content": " ",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "50260": {
37
+ "content": " ",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "50261": {
45
+ "content": " ",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "50262": {
53
+ "content": " ",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "50263": {
61
+ "content": " ",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "50264": {
69
+ "content": " ",
70
+ "lstrip": false,
71
+ "normalized": true,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "50265": {
77
+ "content": " ",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "50266": {
85
+ "content": " ",
86
+ "lstrip": false,
87
+ "normalized": true,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "50267": {
93
+ "content": " ",
94
+ "lstrip": false,
95
+ "normalized": true,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "50268": {
101
+ "content": " ",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "50269": {
109
+ "content": " ",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "50270": {
117
+ "content": " ",
118
+ "lstrip": false,
119
+ "normalized": true,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "50271": {
125
+ "content": " ",
126
+ "lstrip": false,
127
+ "normalized": true,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "50272": {
133
+ "content": " ",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "50273": {
141
+ "content": " ",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "50274": {
149
+ "content": " ",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "50275": {
157
+ "content": " ",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "50276": {
165
+ "content": " ",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "50277": {
173
+ "content": " ",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "50278": {
181
+ "content": " ",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50279": {
189
+ "content": " ",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50280": {
197
+ "content": " ",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "50281": {
205
+ "content": " ",
206
+ "lstrip": false,
207
+ "normalized": true,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ },
212
+ "50282": {
213
+ "content": " ",
214
+ "lstrip": false,
215
+ "normalized": true,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "50283": {
221
+ "content": " ",
222
+ "lstrip": false,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "50284": {
229
+ "content": " ",
230
+ "lstrip": false,
231
+ "normalized": true,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
+ },
236
+ "50285": {
237
+ "content": " ",
238
+ "lstrip": false,
239
+ "normalized": true,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": false
243
+ },
244
+ "50286": {
245
+ "content": " ",
246
+ "lstrip": false,
247
+ "normalized": true,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": false
251
+ },
252
+ "50287": {
253
+ "content": "\t\t\t\t\t\t\t\t\t",
254
+ "lstrip": false,
255
+ "normalized": true,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": false
259
+ },
260
+ "50288": {
261
+ "content": "\t\t\t\t\t\t\t\t",
262
+ "lstrip": false,
263
+ "normalized": true,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": false
267
+ },
268
+ "50289": {
269
+ "content": "\t\t\t\t\t\t\t",
270
+ "lstrip": false,
271
+ "normalized": true,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": false
275
+ },
276
+ "50290": {
277
+ "content": "\t\t\t\t\t\t",
278
+ "lstrip": false,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": false
283
+ },
284
+ "50291": {
285
+ "content": "\t\t\t\t\t",
286
+ "lstrip": false,
287
+ "normalized": true,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": false
291
+ },
292
+ "50292": {
293
+ "content": "\t\t\t\t",
294
+ "lstrip": false,
295
+ "normalized": true,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": false
299
+ },
300
+ "50293": {
301
+ "content": "\t\t\t",
302
+ "lstrip": false,
303
+ "normalized": true,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": false
307
+ },
308
+ "50294": {
309
+ "content": "\t\t",
310
+ "lstrip": false,
311
+ "normalized": true,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": false
315
+ }
316
+ },
317
+ "bos_token": "<|endoftext|>",
318
+ "clean_up_tokenization_spaces": true,
319
+ "eos_token": "<|endoftext|>",
320
+ "model_max_length": 2048,
321
+ "tokenizer_class": "CodeGenTokenizer",
322
+ "unk_token": "<|endoftext|>"
323
+ }
main.js CHANGED
@@ -53,51 +53,45 @@ function createElem(tag, attrs = {}, children = []) {
53
 
54
  const pageCategories = [
55
  {
56
- title: `Computer Vision`,
57
- description: `Computer Vision`,
58
  demos: {
59
  sam: {
60
  name: 'Segment Anything',
61
  description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
62
- filename: "sam",
63
  },
64
  sdturbo: {
65
  name: 'Stable Diffusion Turbo',
66
  description: `Stable Diffusion Turbo from https://github.com/guschmue/ort-webgpu/tree/master/sd-turbo`,
67
- filename: "sdturbo",
 
 
 
 
 
68
  },
69
  yolo: {
70
  name: 'Yolo',
71
  description: `Yolo V9 from https://github.com/guschmue/ort-webgpu/tree/master/yolov9`,
72
- filename: "yolo",
73
  },
74
  },
75
  },
76
  {
77
- title: `MediaPipe and TFLite`,
78
- description: `MediaPipe and TFLite`,
79
  demos: {
80
  gemma: {
81
  name: 'Gemma',
82
- description: `Gemma with MediaPipe and TFLite from https://github.com/googlesamples/mediapipe/tree/main/examples/llm_inference/js, <a href=https://developers.googleblog.com/2024/03/running-large-language-models-on-device-with-mediapipe-andtensorflow-lite.html>more info.</a>`,
83
- filename: "llm-inference",
84
- },
85
- },
86
- },
87
- {
88
- title: `Natural Language Processing`,
89
- description: `Natural Language Processing`,
90
- demos: {
91
- tinyllama: {
92
- name: 'Tiny Llama',
93
- description: `Tiny Llama from https://github.com/guschmue/ort-webgpu/tree/master/chat`,
94
- filename: "tinyllama",
95
  },
96
  },
97
  },
98
  {
99
- title: 'Transformers.js WebGPU',
100
- description: 'Transformers.js WebGPU',
101
  demos: {
102
  benchmark: {
103
  name: 'Benchmark',
@@ -126,8 +120,8 @@ const pageCategories = [
126
  },
127
  },
128
  {
129
- title: 'TVM WebGPU',
130
- description: 'TVM WebGPU',
131
  demos: {
132
  sd: {
133
  name: 'Web Stable Diffusion',
 
53
 
54
  const pageCategories = [
55
  {
56
+ title: `ONNX Runtime`,
57
+ description: `ONNX Runtime`,
58
  demos: {
59
  sam: {
60
  name: 'Segment Anything',
61
  description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
62
+ filename: "ort-sam",
63
  },
64
  sdturbo: {
65
  name: 'Stable Diffusion Turbo',
66
  description: `Stable Diffusion Turbo from https://github.com/guschmue/ort-webgpu/tree/master/sd-turbo`,
67
+ filename: "ort-sd-turbo",
68
+ },
69
+ tinyllama: {
70
+ name: 'Tiny Llama',
71
+ description: `Tiny Llama from https://github.com/guschmue/ort-webgpu/tree/master/chat`,
72
+ filename: "ort-tiny-llama",
73
  },
74
  yolo: {
75
  name: 'Yolo',
76
  description: `Yolo V9 from https://github.com/guschmue/ort-webgpu/tree/master/yolov9`,
77
+ filename: "ort-yolo",
78
  },
79
  },
80
  },
81
  {
82
+ title: `TFLite`,
83
+ description: `TFLite`,
84
  demos: {
85
  gemma: {
86
  name: 'Gemma',
87
+ description: `Gemma with TFLite and MediaPipe from https://github.com/googlesamples/mediapipe/tree/main/examples/llm_inference/js, <a href=https://developers.googleblog.com/2024/03/running-large-language-models-on-device-with-mediapipe-andtensorflow-lite.html>more info.</a>`,
88
+ filename: "tflite-gemma",
 
 
 
 
 
 
 
 
 
 
 
89
  },
90
  },
91
  },
92
  {
93
+ title: 'Transformers.js',
94
+ description: 'Transformers.js',
95
  demos: {
96
  benchmark: {
97
  name: 'Benchmark',
 
120
  },
121
  },
122
  {
123
+ title: 'TVM',
124
+ description: 'TVM',
125
  demos: {
126
  sd: {
127
  name: 'Web Stable Diffusion',
transformers/transformers.js ADDED
The diff for this file is too large to render. See raw diff