flatala-research commited on
Commit
1584927
1 Parent(s): 1390c3d

End of training

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +8 -0
  3. test_results.json +8 -0
  4. trainer_state.json +820 -0
README.md CHANGED
@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [MCG-NJU/videomae-large](https://huggingface.co/MCG-NJU/videomae-large) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.5204
21
  - Accuracy: 0.6146
22
 
23
  ## Model description
 
17
 
18
  This model is a fine-tuned version of [MCG-NJU/videomae-large](https://huggingface.co/MCG-NJU/videomae-large) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.5240
21
  - Accuracy: 0.6146
22
 
23
  ## Model description
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.042022792022792,
3
+ "eval_accuracy": 0.6146341463414634,
4
+ "eval_loss": 1.5239903926849365,
5
+ "eval_runtime": 17.6083,
6
+ "eval_samples_per_second": 11.642,
7
+ "eval_steps_per_second": 0.738
8
+ }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.042022792022792,
3
+ "eval_accuracy": 0.6146341463414634,
4
+ "eval_loss": 1.5239903926849365,
5
+ "eval_runtime": 17.6083,
6
+ "eval_samples_per_second": 11.642,
7
+ "eval_steps_per_second": 0.738
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,820 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6601941747572816,
3
+ "best_model_checkpoint": "videomae-large-finetuned-right-hand-conflab-v1/checkpoint-708",
4
+ "epoch": 14.042022792022792,
5
+ "eval_steps": 500,
6
+ "global_step": 885,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.007122507122507123,
13
+ "grad_norm": 12.899408340454102,
14
+ "learning_rate": 3.5460992907801423e-06,
15
+ "loss": 2.0965,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.014245014245014245,
20
+ "grad_norm": 9.095556259155273,
21
+ "learning_rate": 7.092198581560285e-06,
22
+ "loss": 2.1002,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.021367521367521368,
27
+ "grad_norm": 11.837288856506348,
28
+ "learning_rate": 1.0638297872340426e-05,
29
+ "loss": 1.9683,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.02849002849002849,
34
+ "grad_norm": 8.063945770263672,
35
+ "learning_rate": 1.418439716312057e-05,
36
+ "loss": 1.9838,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.03561253561253561,
41
+ "grad_norm": 7.636847972869873,
42
+ "learning_rate": 1.773049645390071e-05,
43
+ "loss": 2.0232,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.04202279202279202,
48
+ "eval_accuracy": 0.1941747572815534,
49
+ "eval_loss": 1.9421881437301636,
50
+ "eval_runtime": 24.149,
51
+ "eval_samples_per_second": 8.53,
52
+ "eval_steps_per_second": 0.538,
53
+ "step": 59
54
+ },
55
+ {
56
+ "epoch": 1.0007122507122508,
57
+ "grad_norm": 7.21124267578125,
58
+ "learning_rate": 2.1276595744680852e-05,
59
+ "loss": 1.981,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 1.0078347578347577,
64
+ "grad_norm": 8.352581977844238,
65
+ "learning_rate": 2.4822695035460995e-05,
66
+ "loss": 1.9465,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 1.014957264957265,
71
+ "grad_norm": 8.361379623413086,
72
+ "learning_rate": 2.836879432624114e-05,
73
+ "loss": 1.962,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 1.022079772079772,
78
+ "grad_norm": 5.388889312744141,
79
+ "learning_rate": 3.191489361702128e-05,
80
+ "loss": 1.976,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 1.0292022792022792,
85
+ "grad_norm": 5.553725719451904,
86
+ "learning_rate": 3.546099290780142e-05,
87
+ "loss": 2.0058,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 1.0363247863247864,
92
+ "grad_norm": 6.278877258300781,
93
+ "learning_rate": 3.900709219858156e-05,
94
+ "loss": 1.8426,
95
+ "step": 110
96
+ },
97
+ {
98
+ "epoch": 1.042022792022792,
99
+ "eval_accuracy": 0.33980582524271846,
100
+ "eval_loss": 1.7418017387390137,
101
+ "eval_runtime": 19.8724,
102
+ "eval_samples_per_second": 10.366,
103
+ "eval_steps_per_second": 0.654,
104
+ "step": 118
105
+ },
106
+ {
107
+ "epoch": 2.0014245014245016,
108
+ "grad_norm": 5.7616729736328125,
109
+ "learning_rate": 4.2553191489361704e-05,
110
+ "loss": 1.9192,
111
+ "step": 120
112
+ },
113
+ {
114
+ "epoch": 2.0085470085470085,
115
+ "grad_norm": 6.886305332183838,
116
+ "learning_rate": 4.609929078014185e-05,
117
+ "loss": 1.8077,
118
+ "step": 130
119
+ },
120
+ {
121
+ "epoch": 2.0156695156695155,
122
+ "grad_norm": 12.934743881225586,
123
+ "learning_rate": 4.964539007092199e-05,
124
+ "loss": 1.6519,
125
+ "step": 140
126
+ },
127
+ {
128
+ "epoch": 2.022792022792023,
129
+ "grad_norm": 9.393815994262695,
130
+ "learning_rate": 4.96437054631829e-05,
131
+ "loss": 1.6934,
132
+ "step": 150
133
+ },
134
+ {
135
+ "epoch": 2.02991452991453,
136
+ "grad_norm": 10.663277626037598,
137
+ "learning_rate": 4.924782264449723e-05,
138
+ "loss": 1.7591,
139
+ "step": 160
140
+ },
141
+ {
142
+ "epoch": 2.037037037037037,
143
+ "grad_norm": 6.719297885894775,
144
+ "learning_rate": 4.885193982581156e-05,
145
+ "loss": 1.7424,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 2.042022792022792,
150
+ "eval_accuracy": 0.4174757281553398,
151
+ "eval_loss": 1.6896483898162842,
152
+ "eval_runtime": 17.8016,
153
+ "eval_samples_per_second": 11.572,
154
+ "eval_steps_per_second": 0.73,
155
+ "step": 177
156
+ },
157
+ {
158
+ "epoch": 3.002136752136752,
159
+ "grad_norm": 5.332315921783447,
160
+ "learning_rate": 4.845605700712589e-05,
161
+ "loss": 1.7511,
162
+ "step": 180
163
+ },
164
+ {
165
+ "epoch": 3.009259259259259,
166
+ "grad_norm": 9.056990623474121,
167
+ "learning_rate": 4.806017418844022e-05,
168
+ "loss": 1.6771,
169
+ "step": 190
170
+ },
171
+ {
172
+ "epoch": 3.0163817663817665,
173
+ "grad_norm": 10.588340759277344,
174
+ "learning_rate": 4.766429136975455e-05,
175
+ "loss": 1.4029,
176
+ "step": 200
177
+ },
178
+ {
179
+ "epoch": 3.0235042735042734,
180
+ "grad_norm": 17.486988067626953,
181
+ "learning_rate": 4.7268408551068886e-05,
182
+ "loss": 1.4862,
183
+ "step": 210
184
+ },
185
+ {
186
+ "epoch": 3.030626780626781,
187
+ "grad_norm": 7.175242900848389,
188
+ "learning_rate": 4.687252573238321e-05,
189
+ "loss": 1.4282,
190
+ "step": 220
191
+ },
192
+ {
193
+ "epoch": 3.0377492877492878,
194
+ "grad_norm": 8.013775825500488,
195
+ "learning_rate": 4.647664291369755e-05,
196
+ "loss": 1.2206,
197
+ "step": 230
198
+ },
199
+ {
200
+ "epoch": 3.042022792022792,
201
+ "eval_accuracy": 0.44660194174757284,
202
+ "eval_loss": 1.628009557723999,
203
+ "eval_runtime": 21.5993,
204
+ "eval_samples_per_second": 9.537,
205
+ "eval_steps_per_second": 0.602,
206
+ "step": 236
207
+ },
208
+ {
209
+ "epoch": 4.002849002849003,
210
+ "grad_norm": 8.972482681274414,
211
+ "learning_rate": 4.6080760095011874e-05,
212
+ "loss": 1.6023,
213
+ "step": 240
214
+ },
215
+ {
216
+ "epoch": 4.00997150997151,
217
+ "grad_norm": 7.669183731079102,
218
+ "learning_rate": 4.568487727632621e-05,
219
+ "loss": 1.1725,
220
+ "step": 250
221
+ },
222
+ {
223
+ "epoch": 4.017094017094017,
224
+ "grad_norm": 7.465898513793945,
225
+ "learning_rate": 4.528899445764054e-05,
226
+ "loss": 1.2957,
227
+ "step": 260
228
+ },
229
+ {
230
+ "epoch": 4.024216524216524,
231
+ "grad_norm": 5.810666561126709,
232
+ "learning_rate": 4.4893111638954874e-05,
233
+ "loss": 1.3387,
234
+ "step": 270
235
+ },
236
+ {
237
+ "epoch": 4.031339031339031,
238
+ "grad_norm": 8.172585487365723,
239
+ "learning_rate": 4.44972288202692e-05,
240
+ "loss": 1.2668,
241
+ "step": 280
242
+ },
243
+ {
244
+ "epoch": 4.038461538461538,
245
+ "grad_norm": 7.007075309753418,
246
+ "learning_rate": 4.4101346001583535e-05,
247
+ "loss": 1.0738,
248
+ "step": 290
249
+ },
250
+ {
251
+ "epoch": 4.042022792022792,
252
+ "eval_accuracy": 0.5825242718446602,
253
+ "eval_loss": 1.2310322523117065,
254
+ "eval_runtime": 18.9007,
255
+ "eval_samples_per_second": 10.899,
256
+ "eval_steps_per_second": 0.688,
257
+ "step": 295
258
+ },
259
+ {
260
+ "epoch": 5.003561253561253,
261
+ "grad_norm": 4.775450229644775,
262
+ "learning_rate": 4.370546318289787e-05,
263
+ "loss": 1.0455,
264
+ "step": 300
265
+ },
266
+ {
267
+ "epoch": 5.010683760683761,
268
+ "grad_norm": 9.440917015075684,
269
+ "learning_rate": 4.3309580364212195e-05,
270
+ "loss": 0.99,
271
+ "step": 310
272
+ },
273
+ {
274
+ "epoch": 5.017806267806268,
275
+ "grad_norm": 5.092905521392822,
276
+ "learning_rate": 4.291369754552653e-05,
277
+ "loss": 0.9873,
278
+ "step": 320
279
+ },
280
+ {
281
+ "epoch": 5.0249287749287745,
282
+ "grad_norm": 8.457822799682617,
283
+ "learning_rate": 4.2517814726840856e-05,
284
+ "loss": 0.8012,
285
+ "step": 330
286
+ },
287
+ {
288
+ "epoch": 5.032051282051282,
289
+ "grad_norm": 6.717366695404053,
290
+ "learning_rate": 4.212193190815519e-05,
291
+ "loss": 0.9262,
292
+ "step": 340
293
+ },
294
+ {
295
+ "epoch": 5.039173789173789,
296
+ "grad_norm": 6.962019920349121,
297
+ "learning_rate": 4.172604908946952e-05,
298
+ "loss": 1.0054,
299
+ "step": 350
300
+ },
301
+ {
302
+ "epoch": 5.042022792022792,
303
+ "eval_accuracy": 0.558252427184466,
304
+ "eval_loss": 1.3242673873901367,
305
+ "eval_runtime": 20.0158,
306
+ "eval_samples_per_second": 10.292,
307
+ "eval_steps_per_second": 0.649,
308
+ "step": 354
309
+ },
310
+ {
311
+ "epoch": 6.004273504273504,
312
+ "grad_norm": 17.063520431518555,
313
+ "learning_rate": 4.133016627078385e-05,
314
+ "loss": 0.8909,
315
+ "step": 360
316
+ },
317
+ {
318
+ "epoch": 6.011396011396012,
319
+ "grad_norm": 8.150900840759277,
320
+ "learning_rate": 4.093428345209818e-05,
321
+ "loss": 0.89,
322
+ "step": 370
323
+ },
324
+ {
325
+ "epoch": 6.018518518518518,
326
+ "grad_norm": 8.059175491333008,
327
+ "learning_rate": 4.053840063341251e-05,
328
+ "loss": 0.7925,
329
+ "step": 380
330
+ },
331
+ {
332
+ "epoch": 6.0256410256410255,
333
+ "grad_norm": 7.513158798217773,
334
+ "learning_rate": 4.0142517814726843e-05,
335
+ "loss": 0.853,
336
+ "step": 390
337
+ },
338
+ {
339
+ "epoch": 6.032763532763533,
340
+ "grad_norm": 6.1937971115112305,
341
+ "learning_rate": 3.974663499604117e-05,
342
+ "loss": 0.7338,
343
+ "step": 400
344
+ },
345
+ {
346
+ "epoch": 6.0398860398860394,
347
+ "grad_norm": 5.745666980743408,
348
+ "learning_rate": 3.9350752177355504e-05,
349
+ "loss": 0.782,
350
+ "step": 410
351
+ },
352
+ {
353
+ "epoch": 6.042022792022792,
354
+ "eval_accuracy": 0.6359223300970874,
355
+ "eval_loss": 1.1890981197357178,
356
+ "eval_runtime": 19.1994,
357
+ "eval_samples_per_second": 10.73,
358
+ "eval_steps_per_second": 0.677,
359
+ "step": 413
360
+ },
361
+ {
362
+ "epoch": 7.004985754985755,
363
+ "grad_norm": 7.788235187530518,
364
+ "learning_rate": 3.895486935866984e-05,
365
+ "loss": 0.745,
366
+ "step": 420
367
+ },
368
+ {
369
+ "epoch": 7.012108262108262,
370
+ "grad_norm": 8.95632553100586,
371
+ "learning_rate": 3.8558986539984164e-05,
372
+ "loss": 0.6315,
373
+ "step": 430
374
+ },
375
+ {
376
+ "epoch": 7.019230769230769,
377
+ "grad_norm": 9.59426498413086,
378
+ "learning_rate": 3.81631037212985e-05,
379
+ "loss": 0.6792,
380
+ "step": 440
381
+ },
382
+ {
383
+ "epoch": 7.0263532763532766,
384
+ "grad_norm": 7.637509822845459,
385
+ "learning_rate": 3.7767220902612825e-05,
386
+ "loss": 0.5733,
387
+ "step": 450
388
+ },
389
+ {
390
+ "epoch": 7.033475783475783,
391
+ "grad_norm": 10.775083541870117,
392
+ "learning_rate": 3.737133808392716e-05,
393
+ "loss": 0.7303,
394
+ "step": 460
395
+ },
396
+ {
397
+ "epoch": 7.0405982905982905,
398
+ "grad_norm": 6.580932140350342,
399
+ "learning_rate": 3.6975455265241485e-05,
400
+ "loss": 0.599,
401
+ "step": 470
402
+ },
403
+ {
404
+ "epoch": 7.042022792022792,
405
+ "eval_accuracy": 0.6504854368932039,
406
+ "eval_loss": 1.193009376525879,
407
+ "eval_runtime": 20.2931,
408
+ "eval_samples_per_second": 10.151,
409
+ "eval_steps_per_second": 0.641,
410
+ "step": 472
411
+ },
412
+ {
413
+ "epoch": 8.005698005698006,
414
+ "grad_norm": 8.682097434997559,
415
+ "learning_rate": 3.657957244655582e-05,
416
+ "loss": 0.4704,
417
+ "step": 480
418
+ },
419
+ {
420
+ "epoch": 8.012820512820513,
421
+ "grad_norm": 8.527168273925781,
422
+ "learning_rate": 3.618368962787015e-05,
423
+ "loss": 0.6392,
424
+ "step": 490
425
+ },
426
+ {
427
+ "epoch": 8.01994301994302,
428
+ "grad_norm": 6.198835849761963,
429
+ "learning_rate": 3.578780680918448e-05,
430
+ "loss": 0.3972,
431
+ "step": 500
432
+ },
433
+ {
434
+ "epoch": 8.027065527065528,
435
+ "grad_norm": 6.177005290985107,
436
+ "learning_rate": 3.539192399049881e-05,
437
+ "loss": 0.4873,
438
+ "step": 510
439
+ },
440
+ {
441
+ "epoch": 8.034188034188034,
442
+ "grad_norm": 9.330336570739746,
443
+ "learning_rate": 3.4996041171813146e-05,
444
+ "loss": 0.4534,
445
+ "step": 520
446
+ },
447
+ {
448
+ "epoch": 8.04131054131054,
449
+ "grad_norm": 5.25494384765625,
450
+ "learning_rate": 3.460015835312748e-05,
451
+ "loss": 0.6782,
452
+ "step": 530
453
+ },
454
+ {
455
+ "epoch": 8.042022792022792,
456
+ "eval_accuracy": 0.6359223300970874,
457
+ "eval_loss": 1.2866381406784058,
458
+ "eval_runtime": 19.101,
459
+ "eval_samples_per_second": 10.785,
460
+ "eval_steps_per_second": 0.681,
461
+ "step": 531
462
+ },
463
+ {
464
+ "epoch": 9.006410256410257,
465
+ "grad_norm": 5.766805648803711,
466
+ "learning_rate": 3.4204275534441806e-05,
467
+ "loss": 0.392,
468
+ "step": 540
469
+ },
470
+ {
471
+ "epoch": 9.013532763532764,
472
+ "grad_norm": 11.340324401855469,
473
+ "learning_rate": 3.380839271575614e-05,
474
+ "loss": 0.4548,
475
+ "step": 550
476
+ },
477
+ {
478
+ "epoch": 9.02065527065527,
479
+ "grad_norm": 10.191842079162598,
480
+ "learning_rate": 3.3412509897070474e-05,
481
+ "loss": 0.3105,
482
+ "step": 560
483
+ },
484
+ {
485
+ "epoch": 9.027777777777779,
486
+ "grad_norm": 5.949177265167236,
487
+ "learning_rate": 3.30166270783848e-05,
488
+ "loss": 0.4979,
489
+ "step": 570
490
+ },
491
+ {
492
+ "epoch": 9.034900284900285,
493
+ "grad_norm": 11.043278694152832,
494
+ "learning_rate": 3.2620744259699134e-05,
495
+ "loss": 0.3972,
496
+ "step": 580
497
+ },
498
+ {
499
+ "epoch": 9.042022792022792,
500
+ "grad_norm": 16.354900360107422,
501
+ "learning_rate": 3.222486144101346e-05,
502
+ "loss": 0.3033,
503
+ "step": 590
504
+ },
505
+ {
506
+ "epoch": 9.042022792022792,
507
+ "eval_accuracy": 0.5776699029126213,
508
+ "eval_loss": 1.423584222793579,
509
+ "eval_runtime": 20.9024,
510
+ "eval_samples_per_second": 9.855,
511
+ "eval_steps_per_second": 0.622,
512
+ "step": 590
513
+ },
514
+ {
515
+ "epoch": 10.007122507122507,
516
+ "grad_norm": 1.9087872505187988,
517
+ "learning_rate": 3.1828978622327794e-05,
518
+ "loss": 0.1958,
519
+ "step": 600
520
+ },
521
+ {
522
+ "epoch": 10.014245014245015,
523
+ "grad_norm": 6.443134307861328,
524
+ "learning_rate": 3.143309580364212e-05,
525
+ "loss": 0.2865,
526
+ "step": 610
527
+ },
528
+ {
529
+ "epoch": 10.021367521367521,
530
+ "grad_norm": 5.271445274353027,
531
+ "learning_rate": 3.1037212984956455e-05,
532
+ "loss": 0.2709,
533
+ "step": 620
534
+ },
535
+ {
536
+ "epoch": 10.028490028490028,
537
+ "grad_norm": 5.430334568023682,
538
+ "learning_rate": 3.064133016627079e-05,
539
+ "loss": 0.3552,
540
+ "step": 630
541
+ },
542
+ {
543
+ "epoch": 10.035612535612536,
544
+ "grad_norm": 8.00438117980957,
545
+ "learning_rate": 3.0245447347585115e-05,
546
+ "loss": 0.2236,
547
+ "step": 640
548
+ },
549
+ {
550
+ "epoch": 10.042022792022792,
551
+ "eval_accuracy": 0.6553398058252428,
552
+ "eval_loss": 1.3206462860107422,
553
+ "eval_runtime": 18.1657,
554
+ "eval_samples_per_second": 11.34,
555
+ "eval_steps_per_second": 0.716,
556
+ "step": 649
557
+ },
558
+ {
559
+ "epoch": 11.000712250712251,
560
+ "grad_norm": 8.367637634277344,
561
+ "learning_rate": 2.984956452889945e-05,
562
+ "loss": 0.2997,
563
+ "step": 650
564
+ },
565
+ {
566
+ "epoch": 11.007834757834758,
567
+ "grad_norm": 4.96151065826416,
568
+ "learning_rate": 2.9453681710213776e-05,
569
+ "loss": 0.2366,
570
+ "step": 660
571
+ },
572
+ {
573
+ "epoch": 11.014957264957266,
574
+ "grad_norm": 4.967086315155029,
575
+ "learning_rate": 2.905779889152811e-05,
576
+ "loss": 0.2191,
577
+ "step": 670
578
+ },
579
+ {
580
+ "epoch": 11.022079772079772,
581
+ "grad_norm": 5.6478095054626465,
582
+ "learning_rate": 2.8661916072842436e-05,
583
+ "loss": 0.2042,
584
+ "step": 680
585
+ },
586
+ {
587
+ "epoch": 11.029202279202279,
588
+ "grad_norm": 15.652437210083008,
589
+ "learning_rate": 2.826603325415677e-05,
590
+ "loss": 0.2747,
591
+ "step": 690
592
+ },
593
+ {
594
+ "epoch": 11.036324786324787,
595
+ "grad_norm": 4.030658721923828,
596
+ "learning_rate": 2.7870150435471103e-05,
597
+ "loss": 0.1756,
598
+ "step": 700
599
+ },
600
+ {
601
+ "epoch": 11.042022792022792,
602
+ "eval_accuracy": 0.6601941747572816,
603
+ "eval_loss": 1.5112863779067993,
604
+ "eval_runtime": 18.776,
605
+ "eval_samples_per_second": 10.971,
606
+ "eval_steps_per_second": 0.692,
607
+ "step": 708
608
+ },
609
+ {
610
+ "epoch": 12.001424501424502,
611
+ "grad_norm": 3.84013295173645,
612
+ "learning_rate": 2.7474267616785433e-05,
613
+ "loss": 0.1714,
614
+ "step": 710
615
+ },
616
+ {
617
+ "epoch": 12.008547008547009,
618
+ "grad_norm": 5.428359508514404,
619
+ "learning_rate": 2.7078384798099763e-05,
620
+ "loss": 0.2056,
621
+ "step": 720
622
+ },
623
+ {
624
+ "epoch": 12.015669515669515,
625
+ "grad_norm": 5.529027938842773,
626
+ "learning_rate": 2.6682501979414094e-05,
627
+ "loss": 0.1018,
628
+ "step": 730
629
+ },
630
+ {
631
+ "epoch": 12.022792022792023,
632
+ "grad_norm": 5.976778984069824,
633
+ "learning_rate": 2.6286619160728427e-05,
634
+ "loss": 0.0961,
635
+ "step": 740
636
+ },
637
+ {
638
+ "epoch": 12.02991452991453,
639
+ "grad_norm": 6.043054580688477,
640
+ "learning_rate": 2.5890736342042754e-05,
641
+ "loss": 0.2393,
642
+ "step": 750
643
+ },
644
+ {
645
+ "epoch": 12.037037037037036,
646
+ "grad_norm": 4.8577656745910645,
647
+ "learning_rate": 2.5494853523357088e-05,
648
+ "loss": 0.1341,
649
+ "step": 760
650
+ },
651
+ {
652
+ "epoch": 12.042022792022792,
653
+ "eval_accuracy": 0.6407766990291263,
654
+ "eval_loss": 1.6544133424758911,
655
+ "eval_runtime": 18.7553,
656
+ "eval_samples_per_second": 10.984,
657
+ "eval_steps_per_second": 0.693,
658
+ "step": 767
659
+ },
660
+ {
661
+ "epoch": 13.002136752136753,
662
+ "grad_norm": 3.0963165760040283,
663
+ "learning_rate": 2.509897070467142e-05,
664
+ "loss": 0.1632,
665
+ "step": 770
666
+ },
667
+ {
668
+ "epoch": 13.00925925925926,
669
+ "grad_norm": 1.0908960103988647,
670
+ "learning_rate": 2.4703087885985748e-05,
671
+ "loss": 0.1377,
672
+ "step": 780
673
+ },
674
+ {
675
+ "epoch": 13.016381766381766,
676
+ "grad_norm": 13.51460075378418,
677
+ "learning_rate": 2.4307205067300078e-05,
678
+ "loss": 0.1605,
679
+ "step": 790
680
+ },
681
+ {
682
+ "epoch": 13.023504273504274,
683
+ "grad_norm": 3.4943864345550537,
684
+ "learning_rate": 2.3911322248614412e-05,
685
+ "loss": 0.1576,
686
+ "step": 800
687
+ },
688
+ {
689
+ "epoch": 13.03062678062678,
690
+ "grad_norm": 3.6334426403045654,
691
+ "learning_rate": 2.3515439429928742e-05,
692
+ "loss": 0.1366,
693
+ "step": 810
694
+ },
695
+ {
696
+ "epoch": 13.037749287749287,
697
+ "grad_norm": 1.571234107017517,
698
+ "learning_rate": 2.3119556611243072e-05,
699
+ "loss": 0.0823,
700
+ "step": 820
701
+ },
702
+ {
703
+ "epoch": 13.042022792022792,
704
+ "eval_accuracy": 0.6553398058252428,
705
+ "eval_loss": 1.61236572265625,
706
+ "eval_runtime": 18.782,
707
+ "eval_samples_per_second": 10.968,
708
+ "eval_steps_per_second": 0.692,
709
+ "step": 826
710
+ },
711
+ {
712
+ "epoch": 14.002849002849002,
713
+ "grad_norm": 6.992094039916992,
714
+ "learning_rate": 2.2723673792557402e-05,
715
+ "loss": 0.0825,
716
+ "step": 830
717
+ },
718
+ {
719
+ "epoch": 14.00997150997151,
720
+ "grad_norm": 0.7775176763534546,
721
+ "learning_rate": 2.2327790973871736e-05,
722
+ "loss": 0.0993,
723
+ "step": 840
724
+ },
725
+ {
726
+ "epoch": 14.017094017094017,
727
+ "grad_norm": 5.0337700843811035,
728
+ "learning_rate": 2.1931908155186066e-05,
729
+ "loss": 0.0713,
730
+ "step": 850
731
+ },
732
+ {
733
+ "epoch": 14.024216524216524,
734
+ "grad_norm": 0.6653324365615845,
735
+ "learning_rate": 2.1536025336500396e-05,
736
+ "loss": 0.1065,
737
+ "step": 860
738
+ },
739
+ {
740
+ "epoch": 14.031339031339032,
741
+ "grad_norm": 11.971256256103516,
742
+ "learning_rate": 2.114014251781473e-05,
743
+ "loss": 0.144,
744
+ "step": 870
745
+ },
746
+ {
747
+ "epoch": 14.038461538461538,
748
+ "grad_norm": 10.947636604309082,
749
+ "learning_rate": 2.074425969912906e-05,
750
+ "loss": 0.0691,
751
+ "step": 880
752
+ },
753
+ {
754
+ "epoch": 14.042022792022792,
755
+ "eval_accuracy": 0.6456310679611651,
756
+ "eval_loss": 1.8230090141296387,
757
+ "eval_runtime": 17.8025,
758
+ "eval_samples_per_second": 11.571,
759
+ "eval_steps_per_second": 0.73,
760
+ "step": 885
761
+ },
762
+ {
763
+ "epoch": 14.042022792022792,
764
+ "step": 885,
765
+ "total_flos": 6.204645759270519e+19,
766
+ "train_loss": 0.8409665932847281,
767
+ "train_runtime": 3290.4789,
768
+ "train_samples_per_second": 6.827,
769
+ "train_steps_per_second": 0.427
770
+ },
771
+ {
772
+ "epoch": 14.042022792022792,
773
+ "eval_accuracy": 0.6146341463414634,
774
+ "eval_loss": 1.5203598737716675,
775
+ "eval_runtime": 24.6199,
776
+ "eval_samples_per_second": 8.327,
777
+ "eval_steps_per_second": 0.528,
778
+ "step": 885
779
+ },
780
+ {
781
+ "epoch": 14.042022792022792,
782
+ "eval_accuracy": 0.6146341463414634,
783
+ "eval_loss": 1.5239903926849365,
784
+ "eval_runtime": 17.6083,
785
+ "eval_samples_per_second": 11.642,
786
+ "eval_steps_per_second": 0.738,
787
+ "step": 885
788
+ }
789
+ ],
790
+ "logging_steps": 10,
791
+ "max_steps": 1404,
792
+ "num_input_tokens_seen": 0,
793
+ "num_train_epochs": 9223372036854775807,
794
+ "save_steps": 500,
795
+ "stateful_callbacks": {
796
+ "EarlyStoppingCallback": {
797
+ "args": {
798
+ "early_stopping_patience": 3,
799
+ "early_stopping_threshold": 0.0
800
+ },
801
+ "attributes": {
802
+ "early_stopping_patience_counter": 0
803
+ }
804
+ },
805
+ "TrainerControl": {
806
+ "args": {
807
+ "should_epoch_stop": false,
808
+ "should_evaluate": false,
809
+ "should_log": false,
810
+ "should_save": true,
811
+ "should_training_stop": true
812
+ },
813
+ "attributes": {}
814
+ }
815
+ },
816
+ "total_flos": 6.204645759270519e+19,
817
+ "train_batch_size": 16,
818
+ "trial_name": null,
819
+ "trial_params": null
820
+ }