Upload /Mistral-7B-v0.1/fp4_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json with huggingface_hub
Browse files
Mistral-7B-v0.1/fp4_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json
CHANGED
@@ -2,194 +2,184 @@
|
|
2 |
"prefill": {
|
3 |
"memory": {
|
4 |
"unit": "MB",
|
5 |
-
"max_ram":
|
6 |
"max_vram": 5490.343936,
|
7 |
"max_reserved": 5005.901824,
|
8 |
"max_allocated": 4844.946944
|
9 |
},
|
10 |
"latency": {
|
11 |
"unit": "s",
|
12 |
-
"mean": 0.
|
13 |
-
"stdev": 0.
|
14 |
"values": [
|
15 |
-
0.
|
16 |
-
0.
|
17 |
-
0.
|
18 |
-
0.
|
19 |
-
0.
|
20 |
-
0.
|
21 |
-
0.
|
22 |
-
0.
|
23 |
-
0.
|
24 |
-
0.
|
25 |
-
0.
|
26 |
-
0.
|
27 |
-
0.
|
28 |
-
0.
|
29 |
-
0.
|
30 |
-
0.
|
31 |
-
0.
|
32 |
-
0.
|
33 |
-
0.
|
34 |
-
0.
|
35 |
-
0.
|
36 |
-
0.
|
37 |
-
0.
|
38 |
-
0.
|
39 |
-
0.
|
40 |
-
0.
|
41 |
-
0.
|
42 |
-
0.
|
43 |
-
0.
|
44 |
-
0.
|
45 |
-
0.
|
46 |
-
0.
|
47 |
-
0.
|
48 |
-
0.
|
49 |
-
0.
|
50 |
-
0.
|
51 |
-
0.
|
52 |
-
0.
|
53 |
-
0.
|
54 |
-
0.
|
55 |
-
0.
|
56 |
-
0.
|
57 |
-
0.
|
58 |
-
0.
|
59 |
-
0.
|
60 |
-
0.
|
61 |
-
0.
|
62 |
-
0.
|
63 |
-
0.
|
64 |
-
0.
|
65 |
-
0.
|
66 |
-
0.
|
67 |
-
0.
|
68 |
-
0.
|
69 |
-
0.
|
70 |
-
0.
|
71 |
-
0.
|
72 |
-
0.
|
73 |
-
0.
|
74 |
-
0.
|
75 |
-
0.
|
76 |
-
0.
|
77 |
-
0.
|
78 |
-
0.
|
79 |
-
0.
|
80 |
-
0.
|
81 |
-
0.
|
82 |
-
0.
|
83 |
-
0.
|
84 |
-
0.
|
85 |
-
0.
|
86 |
-
0.
|
87 |
-
0.
|
88 |
-
0.
|
89 |
-
0.
|
90 |
-
0.
|
91 |
-
0.
|
92 |
-
0.
|
93 |
-
0.
|
94 |
-
0.
|
95 |
-
0.
|
96 |
-
0.
|
97 |
-
0.
|
98 |
-
0.
|
99 |
-
0.
|
100 |
-
0.
|
101 |
-
0.
|
102 |
-
0.
|
103 |
-
0.
|
104 |
-
0.
|
105 |
-
0.
|
106 |
-
0.
|
107 |
-
0.
|
108 |
-
0.
|
109 |
-
0.
|
110 |
-
0.
|
111 |
-
0.
|
112 |
-
0.
|
113 |
-
0.
|
114 |
-
0.
|
115 |
-
0.
|
116 |
-
0.
|
117 |
-
0.
|
118 |
-
0.
|
119 |
-
0.
|
120 |
-
0.
|
121 |
-
0.
|
122 |
-
0.
|
123 |
-
0.
|
124 |
-
0.
|
125 |
-
0.
|
126 |
-
0.
|
127 |
-
0.
|
128 |
-
0.
|
129 |
-
0.
|
130 |
-
0.
|
131 |
-
0.
|
132 |
-
0.
|
133 |
-
0.
|
134 |
-
0.
|
135 |
-
0.
|
136 |
-
0.
|
137 |
-
0.
|
138 |
-
0.
|
139 |
-
0.
|
140 |
-
0.
|
141 |
-
0.
|
142 |
-
0.
|
143 |
-
0.
|
144 |
-
0.
|
145 |
-
0.
|
146 |
-
0.
|
147 |
-
0.
|
148 |
-
0.
|
149 |
-
0.
|
150 |
-
0.
|
151 |
-
0.
|
152 |
-
0.
|
153 |
-
0.
|
154 |
-
0.
|
155 |
-
0.
|
156 |
-
0.
|
157 |
-
0.
|
158 |
-
0.
|
159 |
-
0.
|
160 |
-
0.
|
161 |
-
0.
|
162 |
-
0.
|
163 |
-
0.
|
164 |
-
0.
|
165 |
-
0.
|
166 |
-
0.
|
167 |
-
0.
|
168 |
-
0.
|
169 |
-
0.
|
170 |
-
0.
|
171 |
-
0.
|
172 |
-
0.
|
173 |
-
0.
|
174 |
-
0.
|
175 |
-
0.
|
176 |
-
0.
|
177 |
-
0.
|
178 |
-
0.057491455078125,
|
179 |
-
0.058218494415283206,
|
180 |
-
0.057534465789794924,
|
181 |
-
0.05807308959960938,
|
182 |
-
0.05757952117919922,
|
183 |
-
0.058028030395507815,
|
184 |
-
0.05753548812866211,
|
185 |
-
0.05803212738037109,
|
186 |
-
0.05754982376098633,
|
187 |
-
0.05800447845458984
|
188 |
]
|
189 |
},
|
190 |
"throughput": {
|
191 |
"unit": "tokens/s",
|
192 |
-
"value":
|
193 |
},
|
194 |
"energy": null,
|
195 |
"efficiency": null
|
@@ -197,23 +187,22 @@
|
|
197 |
"decode": {
|
198 |
"memory": {
|
199 |
"unit": "MB",
|
200 |
-
"max_ram":
|
201 |
"max_vram": 5710.544896,
|
202 |
"max_reserved": 5226.102784,
|
203 |
"max_allocated": 4911.274496
|
204 |
},
|
205 |
"latency": {
|
206 |
"unit": "s",
|
207 |
-
"mean":
|
208 |
-
"stdev": 0
|
209 |
"values": [
|
210 |
-
|
211 |
-
8.429527046203614
|
212 |
]
|
213 |
},
|
214 |
"throughput": {
|
215 |
"unit": "tokens/s",
|
216 |
-
"value":
|
217 |
},
|
218 |
"energy": null,
|
219 |
"efficiency": null
|
@@ -222,524 +211,269 @@
|
|
222 |
"memory": null,
|
223 |
"latency": {
|
224 |
"unit": "s",
|
225 |
-
"mean": 0.
|
226 |
-
"stdev": 0.
|
227 |
"values": [
|
228 |
-
0.
|
229 |
-
0.
|
230 |
-
0.
|
231 |
-
0.
|
232 |
-
0.
|
233 |
-
0.
|
234 |
-
0.
|
235 |
-
0.
|
236 |
-
0.
|
237 |
-
0.
|
238 |
-
0.
|
239 |
-
0.
|
240 |
-
0.
|
241 |
-
0.
|
242 |
-
0.
|
243 |
-
0.
|
244 |
-
0.
|
245 |
-
0.
|
246 |
-
0.
|
247 |
-
0.
|
248 |
-
0.
|
249 |
-
0.
|
250 |
-
0.
|
251 |
-
0.
|
252 |
-
0.
|
253 |
-
0.
|
254 |
-
0.
|
255 |
-
0.
|
256 |
-
0.
|
257 |
-
0.
|
258 |
-
0.
|
259 |
-
0.
|
260 |
-
0.
|
261 |
-
0.
|
262 |
-
0.
|
263 |
-
0.
|
264 |
-
0.
|
265 |
-
0.
|
266 |
-
0.
|
267 |
-
0.
|
268 |
-
0.
|
269 |
-
0.
|
270 |
-
0.
|
271 |
-
0.
|
272 |
-
0.
|
273 |
-
0.
|
274 |
-
0.
|
275 |
-
0.
|
276 |
-
0.
|
277 |
-
0.
|
278 |
-
0.
|
279 |
-
0.
|
280 |
-
0.
|
281 |
-
0.
|
282 |
-
0.
|
283 |
-
0.
|
284 |
-
0.
|
285 |
-
0.
|
286 |
-
0.
|
287 |
-
0.
|
288 |
-
0.
|
289 |
-
0.
|
290 |
-
0.
|
291 |
-
0.
|
292 |
-
0.
|
293 |
-
0.
|
294 |
-
0.
|
295 |
-
0.
|
296 |
-
0.
|
297 |
-
0.
|
298 |
-
0.
|
299 |
-
0.
|
300 |
-
0.
|
301 |
-
0.
|
302 |
-
0.
|
303 |
-
0.
|
304 |
-
0.
|
305 |
-
0.
|
306 |
-
0.
|
307 |
-
0.
|
308 |
-
0.
|
309 |
-
0.
|
310 |
-
0.
|
311 |
-
0.
|
312 |
-
0.
|
313 |
-
0.
|
314 |
-
0.
|
315 |
-
0.
|
316 |
-
0.
|
317 |
-
0.
|
318 |
-
0.
|
319 |
-
0.
|
320 |
-
0.
|
321 |
-
0.
|
322 |
-
0.
|
323 |
-
0.
|
324 |
-
0.
|
325 |
-
0.
|
326 |
-
0.
|
327 |
-
0.
|
328 |
-
0.
|
329 |
-
0.
|
330 |
-
0.
|
331 |
-
0.
|
332 |
-
0.
|
333 |
-
0.
|
334 |
-
0.
|
335 |
-
0.
|
336 |
-
0.
|
337 |
-
0.
|
338 |
-
0.
|
339 |
-
0.
|
340 |
-
0.
|
341 |
-
0.
|
342 |
-
0.
|
343 |
-
0.
|
344 |
-
0.
|
345 |
-
0.
|
346 |
-
0.
|
347 |
-
0.
|
348 |
-
0.
|
349 |
-
0.
|
350 |
-
0.
|
351 |
-
0.
|
352 |
-
0.
|
353 |
-
0.
|
354 |
-
0.
|
355 |
-
0.
|
356 |
-
0.
|
357 |
-
0.
|
358 |
-
0.
|
359 |
-
0.
|
360 |
-
0.
|
361 |
-
0.
|
362 |
-
0.
|
363 |
-
0.
|
364 |
-
0.
|
365 |
-
0.
|
366 |
-
0.
|
367 |
-
0.
|
368 |
-
0.
|
369 |
-
0.
|
370 |
-
0.
|
371 |
-
0.
|
372 |
-
0.
|
373 |
-
0.
|
374 |
-
0.
|
375 |
-
0.
|
376 |
-
0.
|
377 |
-
0.
|
378 |
-
0.
|
379 |
-
0.
|
380 |
-
0.
|
381 |
-
0.
|
382 |
-
0.
|
383 |
-
0.
|
384 |
-
0.
|
385 |
-
0.
|
386 |
-
0.
|
387 |
-
0.
|
388 |
-
0.
|
389 |
-
0.
|
390 |
-
0.
|
391 |
-
0.
|
392 |
-
0.
|
393 |
-
0.
|
394 |
-
0.
|
395 |
-
0.
|
396 |
-
0.
|
397 |
-
0.
|
398 |
-
0.
|
399 |
-
0.
|
400 |
-
0.
|
401 |
-
0.
|
402 |
-
0.
|
403 |
-
0.
|
404 |
-
0.
|
405 |
-
0.
|
406 |
-
0.
|
407 |
-
0.
|
408 |
-
0.
|
409 |
-
0.
|
410 |
-
0.
|
411 |
-
0.
|
412 |
-
0.
|
413 |
-
0.
|
414 |
-
0.
|
415 |
-
0.
|
416 |
-
0.
|
417 |
-
0.
|
418 |
-
0.
|
419 |
-
0.
|
420 |
-
0.
|
421 |
-
0.
|
422 |
-
0.
|
423 |
-
0.
|
424 |
-
0.
|
425 |
-
0.
|
426 |
-
0.
|
427 |
-
0.
|
428 |
-
0.
|
429 |
-
0.
|
430 |
-
0.
|
431 |
-
0.
|
432 |
-
0.
|
433 |
-
0.
|
434 |
-
0.
|
435 |
-
0.
|
436 |
-
0.
|
437 |
-
0.
|
438 |
-
0.
|
439 |
-
0.
|
440 |
-
0.
|
441 |
-
0.
|
442 |
-
0.
|
443 |
-
0.
|
444 |
-
0.
|
445 |
-
0.
|
446 |
-
0.
|
447 |
-
0.
|
448 |
-
0.
|
449 |
-
0.
|
450 |
-
0.
|
451 |
-
0.
|
452 |
-
0.
|
453 |
-
0.
|
454 |
-
0.
|
455 |
-
0.
|
456 |
-
0.
|
457 |
-
0.
|
458 |
-
0.
|
459 |
-
0.
|
460 |
-
0.
|
461 |
-
0.
|
462 |
-
0.
|
463 |
-
0.
|
464 |
-
0.
|
465 |
-
0.
|
466 |
-
0.
|
467 |
-
0.
|
468 |
-
0.
|
469 |
-
0.
|
470 |
-
0.
|
471 |
-
0.
|
472 |
-
0.
|
473 |
-
0.
|
474 |
-
0.
|
475 |
-
0.
|
476 |
-
0.
|
477 |
-
0.
|
478 |
-
0.
|
479 |
-
0.
|
480 |
-
0.
|
481 |
-
0.
|
482 |
-
0.
|
483 |
-
0.0331673583984375,
|
484 |
-
0.03307724761962891,
|
485 |
-
0.033185791015625,
|
486 |
-
0.033075199127197266,
|
487 |
-
0.032939006805419925,
|
488 |
-
0.03308031845092774,
|
489 |
-
0.0333240966796875,
|
490 |
-
0.03308230209350586,
|
491 |
-
0.03307929611206055,
|
492 |
-
0.03302604675292969,
|
493 |
-
0.03306086349487305,
|
494 |
-
0.03299225616455078,
|
495 |
-
0.033046817779541014,
|
496 |
-
0.0331363525390625,
|
497 |
-
0.033320960998535154,
|
498 |
-
0.03310079956054687,
|
499 |
-
0.033565696716308595,
|
500 |
-
0.033091808319091795,
|
501 |
-
0.033137439727783206,
|
502 |
-
0.03304959869384766,
|
503 |
-
0.033058815002441407,
|
504 |
-
0.033068031311035154,
|
505 |
-
0.03316633605957031,
|
506 |
-
0.0330618896484375,
|
507 |
-
0.03309081649780273,
|
508 |
-
0.03296128082275391,
|
509 |
-
0.03320832061767578,
|
510 |
-
0.03308441543579101,
|
511 |
-
0.0332861442565918,
|
512 |
-
0.03323904037475586,
|
513 |
-
0.03363942337036133,
|
514 |
-
0.033037311553955076,
|
515 |
-
0.03306598281860352,
|
516 |
-
0.03304579162597656,
|
517 |
-
0.03302883148193359,
|
518 |
-
0.03300966262817383,
|
519 |
-
0.0331776008605957,
|
520 |
-
0.032996353149414064,
|
521 |
-
0.033170433044433595,
|
522 |
-
0.033067230224609376,
|
523 |
-
0.03310160064697266,
|
524 |
-
0.03306528091430664,
|
525 |
-
0.033051326751708986,
|
526 |
-
0.03304345703125,
|
527 |
-
0.033067008972167966,
|
528 |
-
0.033113086700439456,
|
529 |
-
0.03313663864135742,
|
530 |
-
0.03306496047973633,
|
531 |
-
0.03307628631591797,
|
532 |
-
0.03310688018798828,
|
533 |
-
0.0332042236328125,
|
534 |
-
0.033149185180664065,
|
535 |
-
0.03320499038696289,
|
536 |
-
0.033081375122070315,
|
537 |
-
0.03317679977416992,
|
538 |
-
0.03298892974853516,
|
539 |
-
0.03305472183227539,
|
540 |
-
0.03310182571411133,
|
541 |
-
0.03306496047973633,
|
542 |
-
0.03302707290649414,
|
543 |
-
0.03310182571411133,
|
544 |
-
0.03304652786254883,
|
545 |
-
0.03307622528076172,
|
546 |
-
0.03299737548828125,
|
547 |
-
0.033175552368164066,
|
548 |
-
0.0331776008605957,
|
549 |
-
0.0331141128540039,
|
550 |
-
0.03305267333984375,
|
551 |
-
0.03300966262817383,
|
552 |
-
0.033143806457519534,
|
553 |
-
0.03303734588623047,
|
554 |
-
0.03301801681518555,
|
555 |
-
0.03318764877319336,
|
556 |
-
0.03318067169189453,
|
557 |
-
0.03320217514038086,
|
558 |
-
0.033108192443847655,
|
559 |
-
0.03315894317626953,
|
560 |
-
0.03373673629760742,
|
561 |
-
0.03318268966674805,
|
562 |
-
0.033173503875732424,
|
563 |
-
0.033205249786376956,
|
564 |
-
0.03317891311645508,
|
565 |
-
0.03320291137695312,
|
566 |
-
0.03308236694335937,
|
567 |
-
0.0331069450378418,
|
568 |
-
0.03297814559936523,
|
569 |
-
0.033092384338378904,
|
570 |
-
0.032952320098876955,
|
571 |
-
0.03308441543579101,
|
572 |
-
0.033097793579101566,
|
573 |
-
0.033175487518310544,
|
574 |
-
0.03356159973144531,
|
575 |
-
0.03320012664794922,
|
576 |
-
0.03310182571411133,
|
577 |
-
0.03315814590454102,
|
578 |
-
0.03322060775756836,
|
579 |
-
0.0331673583984375,
|
580 |
-
0.03310079956054687,
|
581 |
-
0.033051647186279294,
|
582 |
-
0.03307334518432617,
|
583 |
-
0.033067840576171875,
|
584 |
-
0.0331409912109375,
|
585 |
-
0.0331220474243164,
|
586 |
-
0.033089534759521484,
|
587 |
-
0.033050624847412106,
|
588 |
-
0.033097728729248044,
|
589 |
-
0.033023998260498046,
|
590 |
-
0.03313356781005859,
|
591 |
-
0.032981056213378906,
|
592 |
-
0.03310995101928711,
|
593 |
-
0.03301171112060547,
|
594 |
-
0.03317046356201172,
|
595 |
-
0.03301577758789063,
|
596 |
-
0.03308348846435547,
|
597 |
-
0.033062816619873044,
|
598 |
-
0.03309283065795898,
|
599 |
-
0.03302889633178711,
|
600 |
-
0.03303936004638672,
|
601 |
-
0.03303424072265625,
|
602 |
-
0.03306905746459961,
|
603 |
-
0.03305779266357422,
|
604 |
-
0.03301171112060547,
|
605 |
-
0.03303219223022461,
|
606 |
-
0.03308748626708984,
|
607 |
-
0.03295129776000977,
|
608 |
-
0.03302912139892578,
|
609 |
-
0.03304652786254883,
|
610 |
-
0.0329238395690918,
|
611 |
-
0.033054527282714845,
|
612 |
-
0.03309363174438477,
|
613 |
-
0.032879615783691404,
|
614 |
-
0.032865409851074216,
|
615 |
-
0.0330463981628418,
|
616 |
-
0.032925918579101564,
|
617 |
-
0.03297257614135742,
|
618 |
-
0.03290726470947265,
|
619 |
-
0.03301593780517578,
|
620 |
-
0.032984031677246096,
|
621 |
-
0.03342959976196289,
|
622 |
-
0.032863040924072266,
|
623 |
-
0.03292364883422851,
|
624 |
-
0.03295948791503906,
|
625 |
-
0.03286220932006836,
|
626 |
-
0.03288576126098633,
|
627 |
-
0.03292876815795898,
|
628 |
-
0.03298611068725586,
|
629 |
-
0.033123519897460936,
|
630 |
-
0.032947200775146485,
|
631 |
-
0.03296553421020508,
|
632 |
-
0.03312015914916992,
|
633 |
-
0.033159168243408206,
|
634 |
-
0.03298713684082031,
|
635 |
-
0.03338854217529297,
|
636 |
-
0.032838878631591795,
|
637 |
-
0.03298502349853515,
|
638 |
-
0.03306480026245117,
|
639 |
-
0.03301200103759765,
|
640 |
-
0.03290083312988281,
|
641 |
-
0.032909313201904294,
|
642 |
-
0.032974048614501955,
|
643 |
-
0.03303504180908203,
|
644 |
-
0.032996353149414064,
|
645 |
-
0.032892929077148435,
|
646 |
-
0.0328172492980957,
|
647 |
-
0.03301279830932617,
|
648 |
-
0.03296352005004883,
|
649 |
-
0.03294198226928711,
|
650 |
-
0.03305574417114258,
|
651 |
-
0.03306905746459961,
|
652 |
-
0.033056766510009765,
|
653 |
-
0.03306291198730469,
|
654 |
-
0.03307724761962891,
|
655 |
-
0.033140960693359374,
|
656 |
-
0.033064735412597655,
|
657 |
-
0.03323104095458984,
|
658 |
-
0.032940097808837894,
|
659 |
-
0.03315276718139649,
|
660 |
-
0.03301273727416992,
|
661 |
-
0.03298406219482422,
|
662 |
-
0.03290009689331055,
|
663 |
-
0.03299532699584961,
|
664 |
-
0.03298303985595703,
|
665 |
-
0.03297894287109375,
|
666 |
-
0.03300873565673828,
|
667 |
-
0.032878559112548826,
|
668 |
-
0.03284883117675781,
|
669 |
-
0.03292364883422851,
|
670 |
-
0.032948513031005856,
|
671 |
-
0.032984798431396484,
|
672 |
-
0.033023998260498046,
|
673 |
-
0.03298924636840821,
|
674 |
-
0.03287340927124023,
|
675 |
-
0.032911361694335936,
|
676 |
-
0.03284400177001953,
|
677 |
-
0.03291027069091797,
|
678 |
-
0.033060703277587894,
|
679 |
-
0.033124351501464845,
|
680 |
-
0.03297382354736328,
|
681 |
-
0.03303014373779297,
|
682 |
-
0.03348992156982422,
|
683 |
-
0.03299235153198242,
|
684 |
-
0.033053791046142575,
|
685 |
-
0.03306399917602539,
|
686 |
-
0.032955135345458984,
|
687 |
-
0.032991359710693356,
|
688 |
-
0.032892799377441405,
|
689 |
-
0.0329881591796875,
|
690 |
-
0.03290828704833984,
|
691 |
-
0.03292160034179688,
|
692 |
-
0.03300454330444336,
|
693 |
-
0.033035263061523434,
|
694 |
-
0.033078369140625,
|
695 |
-
0.03300649642944336,
|
696 |
-
0.03344086456298828,
|
697 |
-
0.033134559631347656,
|
698 |
-
0.03300755310058594,
|
699 |
-
0.03307929611206055,
|
700 |
-
0.033031169891357424,
|
701 |
-
0.032952320098876955,
|
702 |
-
0.033089534759521484,
|
703 |
-
0.03298918533325195,
|
704 |
-
0.032939006805419925,
|
705 |
-
0.032966655731201173,
|
706 |
-
0.03299430465698242,
|
707 |
-
0.03299356842041016,
|
708 |
-
0.033051361083984376,
|
709 |
-
0.033113086700439456,
|
710 |
-
0.03297689437866211,
|
711 |
-
0.03301171112060547,
|
712 |
-
0.03291862487792969,
|
713 |
-
0.03299523162841797,
|
714 |
-
0.03288371276855469,
|
715 |
-
0.03292876815795898,
|
716 |
-
0.033075199127197266,
|
717 |
-
0.032998401641845705,
|
718 |
-
0.03321651077270508,
|
719 |
-
0.03300352096557617,
|
720 |
-
0.0330250244140625,
|
721 |
-
0.033081344604492184,
|
722 |
-
0.03307724761962891,
|
723 |
-
0.033104896545410156,
|
724 |
-
0.032911361694335936,
|
725 |
-
0.03300864028930664,
|
726 |
-
0.03287142562866211,
|
727 |
-
0.0329483528137207,
|
728 |
-
0.03305571365356445,
|
729 |
-
0.03292559814453125,
|
730 |
-
0.032968704223632815,
|
731 |
-
0.03297689437866211,
|
732 |
-
0.03288576126098633,
|
733 |
-
0.033040382385253905,
|
734 |
-
0.032901119232177735,
|
735 |
-
0.03300044631958008,
|
736 |
-
0.0329543685913086,
|
737 |
-
0.032833534240722655
|
738 |
]
|
739 |
},
|
740 |
"throughput": {
|
741 |
"unit": "tokens/s",
|
742 |
-
"value":
|
743 |
},
|
744 |
"energy": null,
|
745 |
"efficiency": null
|
|
|
2 |
"prefill": {
|
3 |
"memory": {
|
4 |
"unit": "MB",
|
5 |
+
"max_ram": 2365.382656,
|
6 |
"max_vram": 5490.343936,
|
7 |
"max_reserved": 5005.901824,
|
8 |
"max_allocated": 4844.946944
|
9 |
},
|
10 |
"latency": {
|
11 |
"unit": "s",
|
12 |
+
"mean": 0.06139228851224749,
|
13 |
+
"stdev": 0.0006576332047282342,
|
14 |
"values": [
|
15 |
+
0.06698953247070312,
|
16 |
+
0.06482637023925782,
|
17 |
+
0.061813758850097655,
|
18 |
+
0.061192192077636716,
|
19 |
+
0.06180044937133789,
|
20 |
+
0.06121683120727539,
|
21 |
+
0.061823040008544924,
|
22 |
+
0.061274112701416014,
|
23 |
+
0.061795326232910154,
|
24 |
+
0.06128128051757813,
|
25 |
+
0.06168473434448242,
|
26 |
+
0.0611778564453125,
|
27 |
+
0.061590526580810545,
|
28 |
+
0.06113894271850586,
|
29 |
+
0.06164889526367188,
|
30 |
+
0.06110028839111328,
|
31 |
+
0.06164275360107422,
|
32 |
+
0.061055999755859375,
|
33 |
+
0.06158028793334961,
|
34 |
+
0.06107340621948242,
|
35 |
+
0.06156492614746094,
|
36 |
+
0.06107136154174805,
|
37 |
+
0.06162739181518555,
|
38 |
+
0.061058048248291016,
|
39 |
+
0.06156108856201172,
|
40 |
+
0.06108160018920898,
|
41 |
+
0.06147379302978516,
|
42 |
+
0.060990463256835936,
|
43 |
+
0.061548545837402345,
|
44 |
+
0.061069313049316405,
|
45 |
+
0.06165200042724609,
|
46 |
+
0.061061119079589846,
|
47 |
+
0.061461505889892576,
|
48 |
+
0.060990592956542966,
|
49 |
+
0.06144409561157226,
|
50 |
+
0.06096188735961914,
|
51 |
+
0.06155263900756836,
|
52 |
+
0.06093824005126953,
|
53 |
+
0.06133760070800781,
|
54 |
+
0.060835838317871094,
|
55 |
+
0.06161407852172852,
|
56 |
+
0.060889087677001956,
|
57 |
+
0.061290496826171874,
|
58 |
+
0.060902400970458986,
|
59 |
+
0.06138163375854492,
|
60 |
+
0.06090752029418945,
|
61 |
+
0.06138777542114258,
|
62 |
+
0.06104985427856445,
|
63 |
+
0.06156185531616211,
|
64 |
+
0.06108671951293945,
|
65 |
+
0.06154342269897461,
|
66 |
+
0.06106316757202149,
|
67 |
+
0.061505535125732425,
|
68 |
+
0.061072383880615234,
|
69 |
+
0.0615280647277832,
|
70 |
+
0.061104129791259766,
|
71 |
+
0.06152908706665039,
|
72 |
+
0.06113587188720703,
|
73 |
+
0.061486080169677736,
|
74 |
+
0.060913665771484375,
|
75 |
+
0.06134579086303711,
|
76 |
+
0.06088294219970703,
|
77 |
+
0.06137343978881836,
|
78 |
+
0.060903423309326174,
|
79 |
+
0.06134067153930664,
|
80 |
+
0.06086041641235351,
|
81 |
+
0.06148710250854492,
|
82 |
+
0.061001728057861325,
|
83 |
+
0.06145228958129883,
|
84 |
+
0.06098124694824219,
|
85 |
+
0.06144307327270508,
|
86 |
+
0.0609699821472168,
|
87 |
+
0.061475841522216794,
|
88 |
+
0.06097100830078125,
|
89 |
+
0.061513729095458984,
|
90 |
+
0.060949504852294924,
|
91 |
+
0.06171750259399414,
|
92 |
+
0.06105497741699219,
|
93 |
+
0.06150348663330078,
|
94 |
+
0.06107257461547851,
|
95 |
+
0.06151065444946289,
|
96 |
+
0.061009918212890625,
|
97 |
+
0.061475872039794925,
|
98 |
+
0.06099353790283203,
|
99 |
+
0.06146047973632812,
|
100 |
+
0.06104166412353516,
|
101 |
+
0.06147993469238281,
|
102 |
+
0.06105395126342773,
|
103 |
+
0.06146563339233398,
|
104 |
+
0.061034496307373044,
|
105 |
+
0.06149529647827148,
|
106 |
+
0.060955646514892575,
|
107 |
+
0.0614389762878418,
|
108 |
+
0.061023231506347655,
|
109 |
+
0.06149324798583984,
|
110 |
+
0.061037727355957035,
|
111 |
+
0.06154342269897461,
|
112 |
+
0.06099967956542969,
|
113 |
+
0.06145843124389649,
|
114 |
+
0.06101913452148437,
|
115 |
+
0.061454334259033204,
|
116 |
+
0.06099660873413086,
|
117 |
+
0.0614574089050293,
|
118 |
+
0.060909599304199216,
|
119 |
+
0.06139494323730469,
|
120 |
+
0.060918785095214846,
|
121 |
+
0.06140927886962891,
|
122 |
+
0.061006847381591796,
|
123 |
+
0.06155980682373047,
|
124 |
+
0.06105820846557617,
|
125 |
+
0.06141030502319336,
|
126 |
+
0.0610898551940918,
|
127 |
+
0.0615546875,
|
128 |
+
0.06101606369018555,
|
129 |
+
0.06154751968383789,
|
130 |
+
0.06100409698486328,
|
131 |
+
0.06146480178833008,
|
132 |
+
0.06098944091796875,
|
133 |
+
0.06156390380859375,
|
134 |
+
0.06103551864624023,
|
135 |
+
0.06154240036010742,
|
136 |
+
0.061050880432128904,
|
137 |
+
0.06163046264648438,
|
138 |
+
0.06101212692260742,
|
139 |
+
0.061358081817626954,
|
140 |
+
0.06091263961791992,
|
141 |
+
0.061431808471679686,
|
142 |
+
0.06091263961791992,
|
143 |
+
0.061400062561035154,
|
144 |
+
0.06095974349975586,
|
145 |
+
0.061456382751464846,
|
146 |
+
0.060928256988525394,
|
147 |
+
0.06137036895751953,
|
148 |
+
0.06294323348999023,
|
149 |
+
0.06259609603881836,
|
150 |
+
0.06398259353637695,
|
151 |
+
0.06279183959960938,
|
152 |
+
0.061058048248291016,
|
153 |
+
0.06157619094848633,
|
154 |
+
0.061058048248291016,
|
155 |
+
0.06156185531616211,
|
156 |
+
0.061217025756835936,
|
157 |
+
0.0615464973449707,
|
158 |
+
0.06250393676757812,
|
159 |
+
0.061587455749511716,
|
160 |
+
0.06101401519775391,
|
161 |
+
0.06148940658569336,
|
162 |
+
0.061044734954833986,
|
163 |
+
0.06150758361816406,
|
164 |
+
0.062304256439208984,
|
165 |
+
0.0615464973449707,
|
166 |
+
0.061090816497802736,
|
167 |
+
0.0615333137512207,
|
168 |
+
0.06110723114013672,
|
169 |
+
0.06143916702270508,
|
170 |
+
0.06108671951293945,
|
171 |
+
0.06155596923828125,
|
172 |
+
0.061036544799804686,
|
173 |
+
0.061618175506591794,
|
174 |
+
0.061026302337646485,
|
175 |
+
0.061445121765136716,
|
176 |
+
0.06115238571166992,
|
177 |
+
0.061480960845947265
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
]
|
179 |
},
|
180 |
"throughput": {
|
181 |
"unit": "tokens/s",
|
182 |
+
"value": 4169.904823615251
|
183 |
},
|
184 |
"energy": null,
|
185 |
"efficiency": null
|
|
|
187 |
"decode": {
|
188 |
"memory": {
|
189 |
"unit": "MB",
|
190 |
+
"max_ram": 2365.382656,
|
191 |
"max_vram": 5710.544896,
|
192 |
"max_reserved": 5226.102784,
|
193 |
"max_allocated": 4911.274496
|
194 |
},
|
195 |
"latency": {
|
196 |
"unit": "s",
|
197 |
+
"mean": 10.269249523162834,
|
198 |
+
"stdev": 0,
|
199 |
"values": [
|
200 |
+
10.269249523162834
|
|
|
201 |
]
|
202 |
},
|
203 |
"throughput": {
|
204 |
"unit": "tokens/s",
|
205 |
+
"value": 24.83141532639109
|
206 |
},
|
207 |
"energy": null,
|
208 |
"efficiency": null
|
|
|
211 |
"memory": null,
|
212 |
"latency": {
|
213 |
"unit": "s",
|
214 |
+
"mean": 0.04027156675750131,
|
215 |
+
"stdev": 0.0005416632850634589,
|
216 |
"values": [
|
217 |
+
0.04257177734375,
|
218 |
+
0.04227686309814453,
|
219 |
+
0.040438785552978515,
|
220 |
+
0.04025241470336914,
|
221 |
+
0.04035203170776367,
|
222 |
+
0.040083168029785156,
|
223 |
+
0.040139774322509765,
|
224 |
+
0.04010598373413086,
|
225 |
+
0.040363006591796875,
|
226 |
+
0.040323070526123043,
|
227 |
+
0.04030361557006836,
|
228 |
+
0.04026675033569336,
|
229 |
+
0.040346622467041016,
|
230 |
+
0.0404326400756836,
|
231 |
+
0.04035276794433594,
|
232 |
+
0.040242176055908206,
|
233 |
+
0.04028726577758789,
|
234 |
+
0.04032201766967773,
|
235 |
+
0.04032620620727539,
|
236 |
+
0.04035372924804687,
|
237 |
+
0.040392704010009765,
|
238 |
+
0.04022988891601562,
|
239 |
+
0.04035686492919922,
|
240 |
+
0.04028416061401367,
|
241 |
+
0.04023817443847656,
|
242 |
+
0.04030966567993164,
|
243 |
+
0.040373344421386716,
|
244 |
+
0.0402891845703125,
|
245 |
+
0.040275966644287106,
|
246 |
+
0.040400894165039065,
|
247 |
+
0.04032204818725586,
|
248 |
+
0.040286209106445314,
|
249 |
+
0.0403507194519043,
|
250 |
+
0.040235008239746094,
|
251 |
+
0.04014796829223633,
|
252 |
+
0.04045414352416992,
|
253 |
+
0.0402872314453125,
|
254 |
+
0.04036198425292969,
|
255 |
+
0.040403968811035154,
|
256 |
+
0.04017049789428711,
|
257 |
+
0.04031795120239258,
|
258 |
+
0.040204288482666016,
|
259 |
+
0.04021657562255859,
|
260 |
+
0.040343551635742186,
|
261 |
+
0.040357887268066404,
|
262 |
+
0.040240127563476565,
|
263 |
+
0.04044083023071289,
|
264 |
+
0.040414207458496096,
|
265 |
+
0.04048486328125,
|
266 |
+
0.04031078338623047,
|
267 |
+
0.04032204818725586,
|
268 |
+
0.04019200134277344,
|
269 |
+
0.047661056518554686,
|
270 |
+
0.041534465789794923,
|
271 |
+
0.040325119018554685,
|
272 |
+
0.04011929702758789,
|
273 |
+
0.04045619201660156,
|
274 |
+
0.04031180953979492,
|
275 |
+
0.04044611358642578,
|
276 |
+
0.040282974243164064,
|
277 |
+
0.0403240966796875,
|
278 |
+
0.040237056732177735,
|
279 |
+
0.04026367950439453,
|
280 |
+
0.040395774841308595,
|
281 |
+
0.04018483352661133,
|
282 |
+
0.04016332626342774,
|
283 |
+
0.04030054473876953,
|
284 |
+
0.04032825469970703,
|
285 |
+
0.04040288162231445,
|
286 |
+
0.0405401611328125,
|
287 |
+
0.04029644775390625,
|
288 |
+
0.04018175888061523,
|
289 |
+
0.04032204818725586,
|
290 |
+
0.040323070526123043,
|
291 |
+
0.04020940780639649,
|
292 |
+
0.04011520004272461,
|
293 |
+
0.040156158447265625,
|
294 |
+
0.040427520751953126,
|
295 |
+
0.04040806579589844,
|
296 |
+
0.04041523361206055,
|
297 |
+
0.04009983825683594,
|
298 |
+
0.04023091125488281,
|
299 |
+
0.04048896026611328,
|
300 |
+
0.040199169158935545,
|
301 |
+
0.040272895812988284,
|
302 |
+
0.041350143432617184,
|
303 |
+
0.04156313705444336,
|
304 |
+
0.04027391815185547,
|
305 |
+
0.04037747192382812,
|
306 |
+
0.04008639907836914,
|
307 |
+
0.04024115371704102,
|
308 |
+
0.04010086441040039,
|
309 |
+
0.04025753784179688,
|
310 |
+
0.04028518295288086,
|
311 |
+
0.04046950531005859,
|
312 |
+
0.04053811264038086,
|
313 |
+
0.04035686492919922,
|
314 |
+
0.040392704010009765,
|
315 |
+
0.04051968002319336,
|
316 |
+
0.0401715202331543,
|
317 |
+
0.04017363357543945,
|
318 |
+
0.040140735626220704,
|
319 |
+
0.040374271392822264,
|
320 |
+
0.040325119018554685,
|
321 |
+
0.04017049789428711,
|
322 |
+
0.04024934387207031,
|
323 |
+
0.04025139236450195,
|
324 |
+
0.04024115371704102,
|
325 |
+
0.04040499114990234,
|
326 |
+
0.040269824981689455,
|
327 |
+
0.03996672058105469,
|
328 |
+
0.040025344848632814,
|
329 |
+
0.039972606658935546,
|
330 |
+
0.040158206939697266,
|
331 |
+
0.040308734893798825,
|
332 |
+
0.04019200134277344,
|
333 |
+
0.040226814270019534,
|
334 |
+
0.04015718460083008,
|
335 |
+
0.04025753784179688,
|
336 |
+
0.0402872314453125,
|
337 |
+
0.04021452713012695,
|
338 |
+
0.040153087615966795,
|
339 |
+
0.04032204818725586,
|
340 |
+
0.040360958099365234,
|
341 |
+
0.040258560180664066,
|
342 |
+
0.040275966644287106,
|
343 |
+
0.04021657562255859,
|
344 |
+
0.040275966644287106,
|
345 |
+
0.0401899528503418,
|
346 |
+
0.04012236785888672,
|
347 |
+
0.04008038330078125,
|
348 |
+
0.0401162223815918,
|
349 |
+
0.04002918243408203,
|
350 |
+
0.04017663955688477,
|
351 |
+
0.0400076789855957,
|
352 |
+
0.03996057510375976,
|
353 |
+
0.04003839874267578,
|
354 |
+
0.040018943786621096,
|
355 |
+
0.03996876907348633,
|
356 |
+
0.039907329559326174,
|
357 |
+
0.03992486572265625,
|
358 |
+
0.039940990447998044,
|
359 |
+
0.040256511688232424,
|
360 |
+
0.04002304077148437,
|
361 |
+
0.040150016784667966,
|
362 |
+
0.039989246368408206,
|
363 |
+
0.03999846267700195,
|
364 |
+
0.04000460815429688,
|
365 |
+
0.04017356872558594,
|
366 |
+
0.04035174560546875,
|
367 |
+
0.04016742324829101,
|
368 |
+
0.04012748718261719,
|
369 |
+
0.0403322868347168,
|
370 |
+
0.04016742324829101,
|
371 |
+
0.040048641204833986,
|
372 |
+
0.04006092834472656,
|
373 |
+
0.040240127563476565,
|
374 |
+
0.04009881591796875,
|
375 |
+
0.04012134552001953,
|
376 |
+
0.04022784042358398,
|
377 |
+
0.04029132843017578,
|
378 |
+
0.04015923309326172,
|
379 |
+
0.039992321014404295,
|
380 |
+
0.04015718460083008,
|
381 |
+
0.04031590270996094,
|
382 |
+
0.040174591064453126,
|
383 |
+
0.04025753784179688,
|
384 |
+
0.04016742324829101,
|
385 |
+
0.04012441635131836,
|
386 |
+
0.04002816009521484,
|
387 |
+
0.04011724853515625,
|
388 |
+
0.04014080047607422,
|
389 |
+
0.04044595336914063,
|
390 |
+
0.04051456069946289,
|
391 |
+
0.04025139236450195,
|
392 |
+
0.04036710357666016,
|
393 |
+
0.040182785034179686,
|
394 |
+
0.04026675033569336,
|
395 |
+
0.04014694213867188,
|
396 |
+
0.04022272109985352,
|
397 |
+
0.040197120666503904,
|
398 |
+
0.04013568115234375,
|
399 |
+
0.0399554557800293,
|
400 |
+
0.040089599609375,
|
401 |
+
0.039992321014404295,
|
402 |
+
0.040225791931152347,
|
403 |
+
0.040019966125488284,
|
404 |
+
0.040134654998779294,
|
405 |
+
0.04005068969726563,
|
406 |
+
0.04024627304077148,
|
407 |
+
0.04008755111694336,
|
408 |
+
0.04041625595092774,
|
409 |
+
0.04025753784179688,
|
410 |
+
0.040409088134765625,
|
411 |
+
0.04029747009277344,
|
412 |
+
0.040264705657958984,
|
413 |
+
0.04017663955688477,
|
414 |
+
0.04020940780639649,
|
415 |
+
0.039995391845703124,
|
416 |
+
0.040052734375,
|
417 |
+
0.04014899063110351,
|
418 |
+
0.040202239990234374,
|
419 |
+
0.04018175888061523,
|
420 |
+
0.040313854217529296,
|
421 |
+
0.04025139236450195,
|
422 |
+
0.04017663955688477,
|
423 |
+
0.04022169494628906,
|
424 |
+
0.04015513610839844,
|
425 |
+
0.04014591979980469,
|
426 |
+
0.04013772964477539,
|
427 |
+
0.04026265716552734,
|
428 |
+
0.04014387130737305,
|
429 |
+
0.040389633178710936,
|
430 |
+
0.040240127563476565,
|
431 |
+
0.04022886276245117,
|
432 |
+
0.04030278396606445,
|
433 |
+
0.03998905563354492,
|
434 |
+
0.04021247863769531,
|
435 |
+
0.04043468856811523,
|
436 |
+
0.040321025848388675,
|
437 |
+
0.040188926696777344,
|
438 |
+
0.04009369659423828,
|
439 |
+
0.0401868782043457,
|
440 |
+
0.04030156707763672,
|
441 |
+
0.04008755111694336,
|
442 |
+
0.04026777648925781,
|
443 |
+
0.04004556655883789,
|
444 |
+
0.04028518295288086,
|
445 |
+
0.04009881591796875,
|
446 |
+
0.04015923309326172,
|
447 |
+
0.04014899063110351,
|
448 |
+
0.04010496139526367,
|
449 |
+
0.04002816009521484,
|
450 |
+
0.039965694427490234,
|
451 |
+
0.04000460815429688,
|
452 |
+
0.039959552764892575,
|
453 |
+
0.04009369659423828,
|
454 |
+
0.04009369659423828,
|
455 |
+
0.04008243179321289,
|
456 |
+
0.03999129486083984,
|
457 |
+
0.04012748718261719,
|
458 |
+
0.03990425491333008,
|
459 |
+
0.039967742919921875,
|
460 |
+
0.040048641204833986,
|
461 |
+
0.03986943817138672,
|
462 |
+
0.039995391845703124,
|
463 |
+
0.040022014617919925,
|
464 |
+
0.039766014099121096,
|
465 |
+
0.03983257675170898,
|
466 |
+
0.04000665664672851,
|
467 |
+
0.04008550262451172,
|
468 |
+
0.039667713165283204,
|
469 |
+
0.040136703491210936,
|
470 |
+
0.04025548934936524,
|
471 |
+
0.04055859375
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
]
|
473 |
},
|
474 |
"throughput": {
|
475 |
"unit": "tokens/s",
|
476 |
+
"value": 24.831415326391088
|
477 |
},
|
478 |
"energy": null,
|
479 |
"efficiency": null
|