Upload /Llama-2-13b-hf/fp4_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json with huggingface_hub
Browse files
Llama-2-13b-hf/fp4_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json
ADDED
@@ -0,0 +1,438 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"prefill": {
|
3 |
+
"memory": {
|
4 |
+
"unit": "MB",
|
5 |
+
"max_ram": 1040.412672,
|
6 |
+
"max_vram": 8359.247872,
|
7 |
+
"max_reserved": 7874.80576,
|
8 |
+
"max_allocated": 7776.413184
|
9 |
+
},
|
10 |
+
"latency": {
|
11 |
+
"unit": "s",
|
12 |
+
"mean": 0.08359146785736085,
|
13 |
+
"stdev": 0.0003168160271875822,
|
14 |
+
"values": [
|
15 |
+
0.08683526611328125,
|
16 |
+
0.08383897399902343,
|
17 |
+
0.08395785522460937,
|
18 |
+
0.08395468902587891,
|
19 |
+
0.08392806243896485,
|
20 |
+
0.08374681854248046,
|
21 |
+
0.08348365020751954,
|
22 |
+
0.08350617980957031,
|
23 |
+
0.08352767944335937,
|
24 |
+
0.0835389404296875,
|
25 |
+
0.08349388885498046,
|
26 |
+
0.08330547332763671,
|
27 |
+
0.08352476501464844,
|
28 |
+
0.08337407684326172,
|
29 |
+
0.08358809661865234,
|
30 |
+
0.08357683563232422,
|
31 |
+
0.08349696350097656,
|
32 |
+
0.08338636779785157,
|
33 |
+
0.08351757049560547,
|
34 |
+
0.08328102111816406,
|
35 |
+
0.083557373046875,
|
36 |
+
0.0834150390625,
|
37 |
+
0.08355328369140624,
|
38 |
+
0.0834897918701172,
|
39 |
+
0.08355840301513671,
|
40 |
+
0.08347241973876954,
|
41 |
+
0.08351948547363282,
|
42 |
+
0.08340275573730468,
|
43 |
+
0.08354918670654297,
|
44 |
+
0.08337612915039062,
|
45 |
+
0.08370687866210938,
|
46 |
+
0.08353177642822265,
|
47 |
+
0.08353075408935547,
|
48 |
+
0.08354415893554687,
|
49 |
+
0.08349286651611328,
|
50 |
+
0.08354303741455078,
|
51 |
+
0.08346112060546874,
|
52 |
+
0.08360140991210938,
|
53 |
+
0.083525634765625,
|
54 |
+
0.0836157455444336,
|
55 |
+
0.08351948547363282,
|
56 |
+
0.08360652923583985,
|
57 |
+
0.08336486053466798,
|
58 |
+
0.08361369323730469,
|
59 |
+
0.0834150390625,
|
60 |
+
0.08355430603027343,
|
61 |
+
0.08352767944335937,
|
62 |
+
0.08360243225097656,
|
63 |
+
0.08346214294433593,
|
64 |
+
0.08346623992919922,
|
65 |
+
0.08349900817871093,
|
66 |
+
0.08363212585449219,
|
67 |
+
0.08352051544189452,
|
68 |
+
0.08367513275146485,
|
69 |
+
0.0834672622680664,
|
70 |
+
0.08364543914794922,
|
71 |
+
0.08342937469482421,
|
72 |
+
0.08361984252929687,
|
73 |
+
0.08352460479736327,
|
74 |
+
0.08361062622070313,
|
75 |
+
0.08352665710449218,
|
76 |
+
0.08359219360351562,
|
77 |
+
0.08350208282470703,
|
78 |
+
0.08358624267578126,
|
79 |
+
0.08351129913330078,
|
80 |
+
0.08362290954589843,
|
81 |
+
0.08356147003173828,
|
82 |
+
0.08361369323730469,
|
83 |
+
0.08354611206054688,
|
84 |
+
0.08366899108886719,
|
85 |
+
0.08349491119384765,
|
86 |
+
0.08363827514648438,
|
87 |
+
0.08353997039794922,
|
88 |
+
0.08369356536865234,
|
89 |
+
0.083346435546875,
|
90 |
+
0.0835962905883789,
|
91 |
+
0.08361369323730469,
|
92 |
+
0.08360652923583985,
|
93 |
+
0.08347955322265625,
|
94 |
+
0.08356249237060547,
|
95 |
+
0.0835758056640625,
|
96 |
+
0.08359219360351562,
|
97 |
+
0.0835962905883789,
|
98 |
+
0.08353587341308594,
|
99 |
+
0.08360870361328125,
|
100 |
+
0.083525634765625,
|
101 |
+
0.08362188720703125,
|
102 |
+
0.08359219360351562,
|
103 |
+
0.08364031982421875,
|
104 |
+
0.08353791809082031,
|
105 |
+
0.08360140991210938,
|
106 |
+
0.08339353942871094,
|
107 |
+
0.08354713439941407,
|
108 |
+
0.08360857391357422,
|
109 |
+
0.08356454467773437,
|
110 |
+
0.083557373046875,
|
111 |
+
0.08356864166259766,
|
112 |
+
0.08348377227783203,
|
113 |
+
0.08361676788330077,
|
114 |
+
0.08360038757324219,
|
115 |
+
0.08362188720703125,
|
116 |
+
0.08355123138427735,
|
117 |
+
0.08360550689697266,
|
118 |
+
0.08351948547363282,
|
119 |
+
0.08361369323730469,
|
120 |
+
0.08356966400146484,
|
121 |
+
0.08360959625244141,
|
122 |
+
0.08357478332519531,
|
123 |
+
0.08351948547363282,
|
124 |
+
0.08359526062011718,
|
125 |
+
0.08355328369140624,
|
126 |
+
0.08358399963378907,
|
127 |
+
0.08374169921875,
|
128 |
+
0.08360959625244141,
|
129 |
+
0.08370585632324219,
|
130 |
+
0.08384614562988281,
|
131 |
+
0.08362700653076172,
|
132 |
+
0.08367820739746094,
|
133 |
+
0.08360550689697266,
|
134 |
+
0.08350514984130859
|
135 |
+
]
|
136 |
+
},
|
137 |
+
"throughput": {
|
138 |
+
"unit": "tokens/s",
|
139 |
+
"value": 3062.5135143796533
|
140 |
+
},
|
141 |
+
"energy": null,
|
142 |
+
"efficiency": null
|
143 |
+
},
|
144 |
+
"decode": {
|
145 |
+
"memory": {
|
146 |
+
"unit": "MB",
|
147 |
+
"max_ram": 1040.412672,
|
148 |
+
"max_vram": 9105.833984,
|
149 |
+
"max_reserved": 8621.391872,
|
150 |
+
"max_allocated": 8484.93824
|
151 |
+
},
|
152 |
+
"latency": {
|
153 |
+
"unit": "s",
|
154 |
+
"mean": 10.627742668151864,
|
155 |
+
"stdev": 0,
|
156 |
+
"values": [
|
157 |
+
10.627742668151864
|
158 |
+
]
|
159 |
+
},
|
160 |
+
"throughput": {
|
161 |
+
"unit": "tokens/s",
|
162 |
+
"value": 23.993806395421863
|
163 |
+
},
|
164 |
+
"energy": null,
|
165 |
+
"efficiency": null
|
166 |
+
},
|
167 |
+
"per_token": {
|
168 |
+
"memory": null,
|
169 |
+
"latency": {
|
170 |
+
"unit": "s",
|
171 |
+
"mean": 0.041677422228046525,
|
172 |
+
"stdev": 0.0012397732588279448,
|
173 |
+
"values": [
|
174 |
+
0.04167164611816406,
|
175 |
+
0.042102783203125,
|
176 |
+
0.0420055046081543,
|
177 |
+
0.04145151901245117,
|
178 |
+
0.041191425323486325,
|
179 |
+
0.04131737518310547,
|
180 |
+
0.04123136138916016,
|
181 |
+
0.04142710494995117,
|
182 |
+
0.04152918243408203,
|
183 |
+
0.041420799255371094,
|
184 |
+
0.041488384246826174,
|
185 |
+
0.041543678283691404,
|
186 |
+
0.04138905715942383,
|
187 |
+
0.041818111419677735,
|
188 |
+
0.04131020736694336,
|
189 |
+
0.04139116668701172,
|
190 |
+
0.04164499282836914,
|
191 |
+
0.04136140823364258,
|
192 |
+
0.04150476837158203,
|
193 |
+
0.04135935974121094,
|
194 |
+
0.04147814559936523,
|
195 |
+
0.04135935974121094,
|
196 |
+
0.04153964614868164,
|
197 |
+
0.0413337287902832,
|
198 |
+
0.04142179107666016,
|
199 |
+
0.04132761764526367,
|
200 |
+
0.04142489624023438,
|
201 |
+
0.04138905715942383,
|
202 |
+
0.041338878631591795,
|
203 |
+
0.04162563323974609,
|
204 |
+
0.0413337287902832,
|
205 |
+
0.04134092712402344,
|
206 |
+
0.04127948760986328,
|
207 |
+
0.04148019027709961,
|
208 |
+
0.041409534454345705,
|
209 |
+
0.04124684906005859,
|
210 |
+
0.04147494506835937,
|
211 |
+
0.04141260910034179,
|
212 |
+
0.04144025421142578,
|
213 |
+
0.04137881469726563,
|
214 |
+
0.04145151901245117,
|
215 |
+
0.04148019027709961,
|
216 |
+
0.04151398468017578,
|
217 |
+
0.04123545455932617,
|
218 |
+
0.04150374221801758,
|
219 |
+
0.04131024169921875,
|
220 |
+
0.04148937606811524,
|
221 |
+
0.04153958511352539,
|
222 |
+
0.041419776916503906,
|
223 |
+
0.04144844818115234,
|
224 |
+
0.04202905654907227,
|
225 |
+
0.04156620788574219,
|
226 |
+
0.041452545166015625,
|
227 |
+
0.04154880142211914,
|
228 |
+
0.0415098876953125,
|
229 |
+
0.04137881469726563,
|
230 |
+
0.041576446533203124,
|
231 |
+
0.0413941764831543,
|
232 |
+
0.041322494506835936,
|
233 |
+
0.04136140823364258,
|
234 |
+
0.041411582946777346,
|
235 |
+
0.041336830139160154,
|
236 |
+
0.04154265594482422,
|
237 |
+
0.04140236663818359,
|
238 |
+
0.04135628890991211,
|
239 |
+
0.0413757438659668,
|
240 |
+
0.04136038589477539,
|
241 |
+
0.04141363143920898,
|
242 |
+
0.04146278381347656,
|
243 |
+
0.041279518127441406,
|
244 |
+
0.04157027053833008,
|
245 |
+
0.041524223327636715,
|
246 |
+
0.041562110900878906,
|
247 |
+
0.04135935974121094,
|
248 |
+
0.041441280364990236,
|
249 |
+
0.04146995162963867,
|
250 |
+
0.04131430435180664,
|
251 |
+
0.04137472152709961,
|
252 |
+
0.04149964904785156,
|
253 |
+
0.04145459365844727,
|
254 |
+
0.04140236663818359,
|
255 |
+
0.041495582580566404,
|
256 |
+
0.04150780868530273,
|
257 |
+
0.04149673461914063,
|
258 |
+
0.04136636734008789,
|
259 |
+
0.04160720062255859,
|
260 |
+
0.041773025512695315,
|
261 |
+
0.041404415130615234,
|
262 |
+
0.041474048614501956,
|
263 |
+
0.041565185546875,
|
264 |
+
0.04143820953369141,
|
265 |
+
0.041468929290771485,
|
266 |
+
0.041299968719482424,
|
267 |
+
0.041422847747802735,
|
268 |
+
0.04123955154418945,
|
269 |
+
0.04147711944580078,
|
270 |
+
0.04130508804321289,
|
271 |
+
0.04152041625976562,
|
272 |
+
0.04150140762329101,
|
273 |
+
0.04227993774414063,
|
274 |
+
0.04153958511352539,
|
275 |
+
0.04153855895996094,
|
276 |
+
0.041431041717529295,
|
277 |
+
0.04143308639526367,
|
278 |
+
0.04128255844116211,
|
279 |
+
0.04138086318969727,
|
280 |
+
0.041166847229003906,
|
281 |
+
0.041262081146240234,
|
282 |
+
0.041567230224609376,
|
283 |
+
0.041502975463867185,
|
284 |
+
0.04141337585449219,
|
285 |
+
0.041924606323242186,
|
286 |
+
0.041352191925048826,
|
287 |
+
0.041439231872558595,
|
288 |
+
0.04144025421142578,
|
289 |
+
0.041335807800292966,
|
290 |
+
0.04135424041748047,
|
291 |
+
0.041588737487792966,
|
292 |
+
0.04134707260131836,
|
293 |
+
0.04145459365844727,
|
294 |
+
0.04140851211547852,
|
295 |
+
0.041332736968994144,
|
296 |
+
0.041470977783203126,
|
297 |
+
0.04137779235839844,
|
298 |
+
0.04135833740234375,
|
299 |
+
0.0412149772644043,
|
300 |
+
0.04135321426391601,
|
301 |
+
0.04133478546142578,
|
302 |
+
0.04139212799072266,
|
303 |
+
0.04152524948120117,
|
304 |
+
0.04153855895996094,
|
305 |
+
0.04152323150634766,
|
306 |
+
0.041465824127197265,
|
307 |
+
0.04138598251342773,
|
308 |
+
0.041596927642822266,
|
309 |
+
0.04160921478271484,
|
310 |
+
0.04134195327758789,
|
311 |
+
0.04149760055541992,
|
312 |
+
0.041506816864013675,
|
313 |
+
0.041171966552734376,
|
314 |
+
0.041376766204833985,
|
315 |
+
0.04130303955078125,
|
316 |
+
0.041485313415527345,
|
317 |
+
0.04133171081542969,
|
318 |
+
0.04147609710693359,
|
319 |
+
0.04129587173461914,
|
320 |
+
0.04152115249633789,
|
321 |
+
0.04149769592285156,
|
322 |
+
0.041712543487548825,
|
323 |
+
0.04134627151489258,
|
324 |
+
0.04153833770751953,
|
325 |
+
0.041565185546875,
|
326 |
+
0.041336830139160154,
|
327 |
+
0.04177305603027344,
|
328 |
+
0.04197273635864258,
|
329 |
+
0.04155295944213867,
|
330 |
+
0.041364414215087894,
|
331 |
+
0.041474048614501956,
|
332 |
+
0.04139212799072266,
|
333 |
+
0.042019840240478515,
|
334 |
+
0.041366527557373044,
|
335 |
+
0.04149760055541992,
|
336 |
+
0.04135116958618164,
|
337 |
+
0.04129075241088867,
|
338 |
+
0.04142489624023438,
|
339 |
+
0.04135833740234375,
|
340 |
+
0.04156927871704102,
|
341 |
+
0.04133785629272461,
|
342 |
+
0.04155084609985352,
|
343 |
+
0.046461952209472655,
|
344 |
+
0.04880588912963867,
|
345 |
+
0.04842803192138672,
|
346 |
+
0.04739276885986328,
|
347 |
+
0.04712140655517578,
|
348 |
+
0.04693503952026367,
|
349 |
+
0.04720025634765625,
|
350 |
+
0.04616191864013672,
|
351 |
+
0.043786239624023435,
|
352 |
+
0.04301311874389648,
|
353 |
+
0.045399040222167966,
|
354 |
+
0.050776065826416014,
|
355 |
+
0.043940864562988284,
|
356 |
+
0.041614334106445314,
|
357 |
+
0.041376766204833985,
|
358 |
+
0.041444351196289066,
|
359 |
+
0.04132352066040039,
|
360 |
+
0.04131737518310547,
|
361 |
+
0.041277599334716794,
|
362 |
+
0.04122812652587891,
|
363 |
+
0.041248767852783204,
|
364 |
+
0.0413829116821289,
|
365 |
+
0.04146585464477539,
|
366 |
+
0.04129075241088867,
|
367 |
+
0.04157440185546875,
|
368 |
+
0.04157132720947266,
|
369 |
+
0.04169625473022461,
|
370 |
+
0.04134297561645508,
|
371 |
+
0.04142899322509765,
|
372 |
+
0.04124671936035156,
|
373 |
+
0.04134707260131836,
|
374 |
+
0.041422847747802735,
|
375 |
+
0.0411412467956543,
|
376 |
+
0.0415365104675293,
|
377 |
+
0.04128684616088867,
|
378 |
+
0.04138374328613281,
|
379 |
+
0.04112691116333008,
|
380 |
+
0.04149248123168945,
|
381 |
+
0.04140851211547852,
|
382 |
+
0.04108595275878906,
|
383 |
+
0.041388031005859374,
|
384 |
+
0.04128255844116211,
|
385 |
+
0.04140057754516602,
|
386 |
+
0.041223934173583984,
|
387 |
+
0.04111974334716797,
|
388 |
+
0.04141363143920898,
|
389 |
+
0.04128460693359375,
|
390 |
+
0.04168806457519531,
|
391 |
+
0.041196544647216796,
|
392 |
+
0.041545726776123046,
|
393 |
+
0.04132147216796875,
|
394 |
+
0.04140236663818359,
|
395 |
+
0.0413040657043457,
|
396 |
+
0.041218048095703126,
|
397 |
+
0.04111667251586914,
|
398 |
+
0.04108902359008789,
|
399 |
+
0.04108595275878906,
|
400 |
+
0.04117708969116211,
|
401 |
+
0.04120064163208008,
|
402 |
+
0.04139622497558594,
|
403 |
+
0.04127449417114258,
|
404 |
+
0.04130086517333984,
|
405 |
+
0.04131532669067383,
|
406 |
+
0.04116070556640625,
|
407 |
+
0.04123852920532227,
|
408 |
+
0.04122623825073242,
|
409 |
+
0.041095169067382815,
|
410 |
+
0.04112384033203125,
|
411 |
+
0.04123555374145508,
|
412 |
+
0.041143199920654294,
|
413 |
+
0.04131532669067383,
|
414 |
+
0.04128768157958984,
|
415 |
+
0.041073665618896485,
|
416 |
+
0.041371646881103515,
|
417 |
+
0.04150271987915039,
|
418 |
+
0.041594879150390625,
|
419 |
+
0.04117913436889648,
|
420 |
+
0.04128255844116211,
|
421 |
+
0.041371646881103515,
|
422 |
+
0.04132761764526367,
|
423 |
+
0.04136345672607422,
|
424 |
+
0.041285633087158206,
|
425 |
+
0.04130815887451172,
|
426 |
+
0.04104806518554688,
|
427 |
+
0.041336830139160154,
|
428 |
+
0.04110745620727539
|
429 |
+
]
|
430 |
+
},
|
431 |
+
"throughput": {
|
432 |
+
"unit": "tokens/s",
|
433 |
+
"value": 23.993806395421863
|
434 |
+
},
|
435 |
+
"energy": null,
|
436 |
+
"efficiency": null
|
437 |
+
}
|
438 |
+
}
|