ninhnguyendx779 commited on
Commit
8ebab4f
1 Parent(s): 40cf913

End of training

Browse files
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [MCG-NJU/videomae-base-finetuned-kinetics](https://huggingface.co/MCG-NJU/videomae-base-finetuned-kinetics) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0012
21
- - Accuracy: 1.0
22
 
23
  ## Model description
24
 
 
17
 
18
  This model is a fine-tuned version of [MCG-NJU/videomae-base-finetuned-kinetics](https://huggingface.co/MCG-NJU/videomae-base-finetuned-kinetics) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.0953
21
+ - Accuracy: 0.9762
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.9887429643527205,
4
- "eval_loss": 0.020199885591864586,
5
- "eval_runtime": 328.4712,
6
- "eval_samples_per_second": 1.623,
7
- "eval_steps_per_second": 0.813
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.9761904761904762,
4
+ "eval_loss": 0.09526590257883072,
5
+ "eval_runtime": 122.5253,
6
+ "eval_samples_per_second": 1.714,
7
+ "eval_steps_per_second": 0.433
8
  }
runs/Apr07_14-34-51_2f21d1345b5e/events.out.tfevents.1712501216.2f21d1345b5e.11416.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cfa6f367726c37195a1938e893b4a7267c3028bc5881c55498da2df2d4792b9
3
+ size 411
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.9887429643527205,
4
- "eval_loss": 0.020199885591864586,
5
- "eval_runtime": 328.4712,
6
- "eval_samples_per_second": 1.623,
7
- "eval_steps_per_second": 0.813
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.9761904761904762,
4
+ "eval_loss": 0.09526590257883072,
5
+ "eval_runtime": 122.5253,
6
+ "eval_samples_per_second": 1.714,
7
+ "eval_steps_per_second": 0.433
8
  }
trainer_state.json CHANGED
@@ -1,663 +1,285 @@
1
  {
2
- "best_metric": 0.9964912280701754,
3
- "best_model_checkpoint": "videomae-base-finetuned-bekhoaxe/checkpoint-636",
4
  "epoch": 3.25,
5
  "eval_steps": 500,
6
- "global_step": 848,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01,
13
- "grad_norm": 10.807732582092285,
14
- "learning_rate": 5.882352941176471e-06,
15
- "loss": 0.6707,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.02,
20
- "grad_norm": 6.269227504730225,
21
- "learning_rate": 1.1764705882352942e-05,
22
- "loss": 0.6435,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.04,
27
- "grad_norm": 10.753657341003418,
28
- "learning_rate": 1.7647058823529414e-05,
29
- "loss": 0.571,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.05,
34
- "grad_norm": 9.196399688720703,
35
- "learning_rate": 2.3529411764705884e-05,
36
- "loss": 0.4578,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.06,
41
- "grad_norm": 3.8521430492401123,
42
- "learning_rate": 2.9411764705882354e-05,
43
- "loss": 0.2468,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.07,
48
- "grad_norm": 4.0225911140441895,
49
- "learning_rate": 3.529411764705883e-05,
50
- "loss": 0.1737,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.08,
55
- "grad_norm": 0.2649253010749817,
56
- "learning_rate": 4.11764705882353e-05,
57
- "loss": 0.0356,
58
  "step": 70
59
  },
60
- {
61
- "epoch": 0.09,
62
- "grad_norm": 106.07608795166016,
63
- "learning_rate": 4.705882352941177e-05,
64
- "loss": 0.3572,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.11,
69
- "grad_norm": 0.02139919251203537,
70
- "learning_rate": 4.9672346002621236e-05,
71
- "loss": 0.2095,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.12,
76
- "grad_norm": 87.85816955566406,
77
- "learning_rate": 4.90170380078637e-05,
78
- "loss": 0.389,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.13,
83
- "grad_norm": 0.4829784035682678,
84
- "learning_rate": 4.836173001310616e-05,
85
- "loss": 0.1773,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.14,
90
- "grad_norm": 0.020957980304956436,
91
- "learning_rate": 4.7706422018348626e-05,
92
- "loss": 0.0059,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 0.15,
97
- "grad_norm": 0.018635602667927742,
98
- "learning_rate": 4.705111402359109e-05,
99
- "loss": 1.1257,
100
- "step": 130
101
- },
102
- {
103
- "epoch": 0.17,
104
- "grad_norm": 0.2680661976337433,
105
- "learning_rate": 4.6395806028833554e-05,
106
- "loss": 0.4074,
107
- "step": 140
108
- },
109
- {
110
- "epoch": 0.18,
111
- "grad_norm": 0.034663423895835876,
112
- "learning_rate": 4.5740498034076015e-05,
113
- "loss": 0.0125,
114
- "step": 150
115
- },
116
- {
117
- "epoch": 0.19,
118
- "grad_norm": 0.027404017746448517,
119
- "learning_rate": 4.508519003931848e-05,
120
- "loss": 0.1217,
121
- "step": 160
122
- },
123
- {
124
- "epoch": 0.2,
125
- "grad_norm": 25.80691146850586,
126
- "learning_rate": 4.4429882044560943e-05,
127
- "loss": 0.3167,
128
- "step": 170
129
- },
130
- {
131
- "epoch": 0.21,
132
- "grad_norm": 0.13836145401000977,
133
- "learning_rate": 4.3774574049803404e-05,
134
- "loss": 0.5605,
135
- "step": 180
136
- },
137
- {
138
- "epoch": 0.22,
139
- "grad_norm": 0.010564002208411694,
140
- "learning_rate": 4.311926605504588e-05,
141
- "loss": 0.002,
142
- "step": 190
143
- },
144
- {
145
- "epoch": 0.24,
146
- "grad_norm": 0.02207314595580101,
147
- "learning_rate": 4.246395806028834e-05,
148
- "loss": 0.1598,
149
- "step": 200
150
- },
151
- {
152
- "epoch": 0.25,
153
- "grad_norm": 0.039745017886161804,
154
- "learning_rate": 4.18086500655308e-05,
155
- "loss": 0.337,
156
- "step": 210
157
- },
158
  {
159
  "epoch": 0.25,
160
- "eval_accuracy": 0.9508771929824561,
161
- "eval_loss": 0.13453243672847748,
162
- "eval_runtime": 161.5975,
163
- "eval_samples_per_second": 1.764,
164
- "eval_steps_per_second": 0.885,
165
- "step": 212
166
  },
167
  {
168
  "epoch": 1.01,
169
- "grad_norm": 1.155490756034851,
170
- "learning_rate": 4.115334207077327e-05,
171
- "loss": 0.3642,
172
- "step": 220
173
- },
174
- {
175
- "epoch": 1.02,
176
- "grad_norm": 0.03908234462141991,
177
- "learning_rate": 4.049803407601573e-05,
178
- "loss": 0.0017,
179
- "step": 230
180
- },
181
- {
182
- "epoch": 1.03,
183
- "grad_norm": 0.07498504221439362,
184
- "learning_rate": 3.984272608125819e-05,
185
- "loss": 0.0012,
186
- "step": 240
187
  },
188
  {
189
  "epoch": 1.04,
190
- "grad_norm": 0.011099644936621189,
191
- "learning_rate": 3.918741808650066e-05,
192
- "loss": 0.6515,
193
- "step": 250
194
- },
195
- {
196
- "epoch": 1.06,
197
- "grad_norm": 0.13020159304141998,
198
- "learning_rate": 3.8532110091743125e-05,
199
- "loss": 0.0009,
200
- "step": 260
201
  },
202
  {
203
  "epoch": 1.07,
204
- "grad_norm": 0.03461850434541702,
205
- "learning_rate": 3.7876802096985586e-05,
206
- "loss": 0.3921,
207
- "step": 270
208
- },
209
- {
210
- "epoch": 1.08,
211
- "grad_norm": 0.013056197203695774,
212
- "learning_rate": 3.722149410222805e-05,
213
- "loss": 0.2843,
214
- "step": 280
215
- },
216
- {
217
- "epoch": 1.09,
218
- "grad_norm": 0.00816601887345314,
219
- "learning_rate": 3.6566186107470514e-05,
220
- "loss": 0.2704,
221
- "step": 290
222
- },
223
- {
224
- "epoch": 1.1,
225
- "grad_norm": 0.4981946647167206,
226
- "learning_rate": 3.5910878112712975e-05,
227
- "loss": 0.0053,
228
- "step": 300
229
- },
230
- {
231
- "epoch": 1.12,
232
- "grad_norm": 0.014170478098094463,
233
- "learning_rate": 3.5255570117955436e-05,
234
- "loss": 0.2984,
235
- "step": 310
236
  },
237
  {
238
- "epoch": 1.13,
239
- "grad_norm": 0.03322271630167961,
240
- "learning_rate": 3.460026212319791e-05,
241
- "loss": 0.0012,
242
- "step": 320
243
  },
244
  {
245
  "epoch": 1.14,
246
- "grad_norm": 0.019952110946178436,
247
- "learning_rate": 3.394495412844037e-05,
248
- "loss": 0.0167,
249
- "step": 330
250
- },
251
- {
252
- "epoch": 1.15,
253
- "grad_norm": 2.8213207721710205,
254
- "learning_rate": 3.328964613368283e-05,
255
- "loss": 0.4437,
256
- "step": 340
257
- },
258
- {
259
- "epoch": 1.16,
260
- "grad_norm": 0.026097455993294716,
261
- "learning_rate": 3.26343381389253e-05,
262
- "loss": 0.0015,
263
- "step": 350
264
  },
265
  {
266
  "epoch": 1.17,
267
- "grad_norm": 0.32980480790138245,
268
- "learning_rate": 3.197903014416776e-05,
269
- "loss": 0.2044,
270
- "step": 360
271
- },
272
- {
273
- "epoch": 1.19,
274
- "grad_norm": 0.3815198540687561,
275
- "learning_rate": 3.132372214941022e-05,
276
- "loss": 0.1395,
277
- "step": 370
278
  },
279
  {
280
  "epoch": 1.2,
281
- "grad_norm": 0.014452760107815266,
282
- "learning_rate": 3.066841415465269e-05,
283
- "loss": 0.0014,
284
- "step": 380
285
- },
286
- {
287
- "epoch": 1.21,
288
- "grad_norm": 7.192620754241943,
289
- "learning_rate": 3.0013106159895154e-05,
290
- "loss": 0.0059,
291
- "step": 390
292
- },
293
- {
294
- "epoch": 1.22,
295
- "grad_norm": 0.009683230891823769,
296
- "learning_rate": 2.9357798165137618e-05,
297
- "loss": 0.0054,
298
- "step": 400
299
- },
300
- {
301
- "epoch": 1.23,
302
- "grad_norm": 0.0035556950606405735,
303
- "learning_rate": 2.8702490170380082e-05,
304
- "loss": 0.0122,
305
- "step": 410
306
  },
307
  {
308
- "epoch": 1.25,
309
- "grad_norm": 0.010539459995925426,
310
- "learning_rate": 2.8047182175622543e-05,
311
- "loss": 0.5753,
312
- "step": 420
313
  },
314
  {
315
  "epoch": 1.25,
316
- "eval_accuracy": 0.9789473684210527,
317
- "eval_loss": 0.05574525147676468,
318
- "eval_runtime": 89.1683,
319
- "eval_samples_per_second": 3.196,
320
- "eval_steps_per_second": 1.604,
321
- "step": 424
322
- },
323
- {
324
- "epoch": 2.01,
325
- "grad_norm": 0.017384065315127373,
326
- "learning_rate": 2.7391874180865007e-05,
327
- "loss": 0.0032,
328
- "step": 430
329
  },
330
  {
331
  "epoch": 2.02,
332
- "grad_norm": 0.041157085448503494,
333
- "learning_rate": 2.673656618610747e-05,
334
- "loss": 0.0388,
335
- "step": 440
336
- },
337
- {
338
- "epoch": 2.03,
339
- "grad_norm": 0.014596754685044289,
340
- "learning_rate": 2.6081258191349932e-05,
341
- "loss": 0.002,
342
- "step": 450
343
- },
344
- {
345
- "epoch": 2.04,
346
- "grad_norm": 0.004826271440833807,
347
- "learning_rate": 2.5425950196592403e-05,
348
- "loss": 0.1837,
349
- "step": 460
350
  },
351
  {
352
  "epoch": 2.05,
353
- "grad_norm": 0.01182369515299797,
354
- "learning_rate": 2.4770642201834864e-05,
355
- "loss": 0.2806,
356
- "step": 470
357
- },
358
- {
359
- "epoch": 2.07,
360
- "grad_norm": 0.012596765533089638,
361
- "learning_rate": 2.411533420707733e-05,
362
- "loss": 0.001,
363
- "step": 480
364
  },
365
  {
366
  "epoch": 2.08,
367
- "grad_norm": 0.014254101552069187,
368
- "learning_rate": 2.3460026212319793e-05,
369
- "loss": 0.0149,
370
- "step": 490
371
- },
372
- {
373
- "epoch": 2.09,
374
- "grad_norm": 0.0014000836526975036,
375
- "learning_rate": 2.2804718217562254e-05,
376
- "loss": 0.3089,
377
- "step": 500
378
- },
379
- {
380
- "epoch": 2.1,
381
- "grad_norm": 0.0013152866158634424,
382
- "learning_rate": 2.214941022280472e-05,
383
- "loss": 0.0019,
384
- "step": 510
385
- },
386
- {
387
- "epoch": 2.11,
388
- "grad_norm": 0.0013162486720830202,
389
- "learning_rate": 2.1494102228047182e-05,
390
- "loss": 0.0012,
391
- "step": 520
392
  },
393
  {
394
  "epoch": 2.12,
395
- "grad_norm": 0.01350224670022726,
396
- "learning_rate": 2.0838794233289646e-05,
397
- "loss": 0.0004,
398
- "step": 530
399
- },
400
- {
401
- "epoch": 2.14,
402
- "grad_norm": 0.0011886212741956115,
403
- "learning_rate": 2.018348623853211e-05,
404
- "loss": 0.0002,
405
- "step": 540
406
  },
407
  {
408
  "epoch": 2.15,
409
- "grad_norm": 0.013104724697768688,
410
- "learning_rate": 1.9528178243774575e-05,
411
- "loss": 0.0005,
412
- "step": 550
413
- },
414
- {
415
- "epoch": 2.16,
416
- "grad_norm": 141.24769592285156,
417
- "learning_rate": 1.887287024901704e-05,
418
- "loss": 0.0858,
419
- "step": 560
420
- },
421
- {
422
- "epoch": 2.17,
423
- "grad_norm": 89.11790466308594,
424
- "learning_rate": 1.82175622542595e-05,
425
- "loss": 0.6766,
426
- "step": 570
427
  },
428
  {
429
  "epoch": 2.18,
430
- "grad_norm": 0.03498009964823723,
431
- "learning_rate": 1.7562254259501968e-05,
432
- "loss": 0.0006,
433
- "step": 580
434
- },
435
- {
436
- "epoch": 2.2,
437
- "grad_norm": 0.03515917435288429,
438
- "learning_rate": 1.6906946264744432e-05,
439
- "loss": 0.0003,
440
- "step": 590
441
  },
442
  {
443
  "epoch": 2.21,
444
- "grad_norm": 0.003264626022428274,
445
- "learning_rate": 1.6251638269986893e-05,
446
- "loss": 0.0004,
447
- "step": 600
448
- },
449
- {
450
- "epoch": 2.22,
451
- "grad_norm": 0.011713879182934761,
452
- "learning_rate": 1.559633027522936e-05,
453
- "loss": 0.5946,
454
- "step": 610
455
- },
456
- {
457
- "epoch": 2.23,
458
- "grad_norm": 0.06233768165111542,
459
- "learning_rate": 1.4941022280471823e-05,
460
- "loss": 0.1258,
461
- "step": 620
462
- },
463
- {
464
- "epoch": 2.24,
465
- "grad_norm": 0.0129940714687109,
466
- "learning_rate": 1.4285714285714285e-05,
467
- "loss": 0.1747,
468
- "step": 630
469
  },
470
  {
471
  "epoch": 2.25,
472
- "eval_accuracy": 0.9964912280701754,
473
- "eval_loss": 0.009211267344653606,
474
- "eval_runtime": 75.987,
475
- "eval_samples_per_second": 3.751,
476
- "eval_steps_per_second": 1.882,
477
- "step": 636
478
- },
479
- {
480
- "epoch": 3.0,
481
- "grad_norm": 0.0608808733522892,
482
- "learning_rate": 1.3630406290956751e-05,
483
- "loss": 0.0226,
484
- "step": 640
485
  },
486
  {
487
- "epoch": 3.02,
488
- "grad_norm": 0.08016235381364822,
489
- "learning_rate": 1.2975098296199214e-05,
490
- "loss": 0.0644,
491
- "step": 650
 
 
492
  },
493
  {
494
  "epoch": 3.03,
495
- "grad_norm": 0.6168311834335327,
496
- "learning_rate": 1.2319790301441678e-05,
497
- "loss": 0.0011,
498
- "step": 660
499
- },
500
- {
501
- "epoch": 3.04,
502
- "grad_norm": 0.0013453299179673195,
503
- "learning_rate": 1.1664482306684142e-05,
504
- "loss": 0.1834,
505
- "step": 670
506
- },
507
- {
508
- "epoch": 3.05,
509
- "grad_norm": 0.0041399141773581505,
510
- "learning_rate": 1.1009174311926607e-05,
511
- "loss": 0.2115,
512
- "step": 680
513
  },
514
  {
515
  "epoch": 3.06,
516
- "grad_norm": 0.10097178816795349,
517
- "learning_rate": 1.035386631716907e-05,
518
- "loss": 0.2718,
519
- "step": 690
520
- },
521
- {
522
- "epoch": 3.08,
523
- "grad_norm": 0.03554755076766014,
524
- "learning_rate": 9.698558322411533e-06,
525
- "loss": 0.0316,
526
- "step": 700
527
  },
528
  {
529
  "epoch": 3.09,
530
- "grad_norm": 0.3528023958206177,
531
- "learning_rate": 9.043250327653998e-06,
532
- "loss": 0.001,
533
- "step": 710
534
- },
535
- {
536
- "epoch": 3.1,
537
- "grad_norm": 0.016770780086517334,
538
- "learning_rate": 8.387942332896462e-06,
539
- "loss": 0.0004,
540
- "step": 720
541
- },
542
- {
543
- "epoch": 3.11,
544
- "grad_norm": 0.6642473936080933,
545
- "learning_rate": 7.732634338138926e-06,
546
- "loss": 0.0012,
547
- "step": 730
548
- },
549
- {
550
- "epoch": 3.12,
551
- "grad_norm": 0.0030567694921046495,
552
- "learning_rate": 7.07732634338139e-06,
553
- "loss": 0.2103,
554
- "step": 740
555
  },
556
  {
557
  "epoch": 3.13,
558
- "grad_norm": 0.003925285767763853,
559
- "learning_rate": 6.422018348623854e-06,
560
- "loss": 0.0006,
561
- "step": 750
562
- },
563
- {
564
- "epoch": 3.15,
565
- "grad_norm": 0.02660321444272995,
566
- "learning_rate": 5.766710353866317e-06,
567
- "loss": 0.0006,
568
- "step": 760
569
  },
570
  {
571
  "epoch": 3.16,
572
- "grad_norm": 0.1314881145954132,
573
- "learning_rate": 5.1114023591087816e-06,
574
- "loss": 0.0008,
575
- "step": 770
576
- },
577
- {
578
- "epoch": 3.17,
579
- "grad_norm": 0.008924220688641071,
580
- "learning_rate": 4.456094364351245e-06,
581
- "loss": 0.0021,
582
- "step": 780
583
- },
584
- {
585
- "epoch": 3.18,
586
- "grad_norm": 0.02253740094602108,
587
- "learning_rate": 3.800786369593709e-06,
588
- "loss": 0.0005,
589
- "step": 790
590
  },
591
  {
592
  "epoch": 3.19,
593
- "grad_norm": 0.006063047330826521,
594
- "learning_rate": 3.145478374836173e-06,
595
- "loss": 0.0005,
596
- "step": 800
597
- },
598
- {
599
- "epoch": 3.21,
600
- "grad_norm": 0.01928607001900673,
601
- "learning_rate": 2.490170380078637e-06,
602
- "loss": 0.0002,
603
- "step": 810
604
  },
605
  {
606
  "epoch": 3.22,
607
- "grad_norm": 0.2391887903213501,
608
- "learning_rate": 1.8348623853211011e-06,
609
- "loss": 0.0005,
610
- "step": 820
611
- },
612
- {
613
- "epoch": 3.23,
614
- "grad_norm": 0.012002573348581791,
615
- "learning_rate": 1.179554390563565e-06,
616
- "loss": 0.0003,
617
- "step": 830
618
- },
619
- {
620
- "epoch": 3.24,
621
- "grad_norm": 0.009785789065063,
622
- "learning_rate": 5.242463958060289e-07,
623
- "loss": 0.001,
624
- "step": 840
625
  },
626
  {
627
  "epoch": 3.25,
628
- "eval_accuracy": 0.9964912280701754,
629
- "eval_loss": 0.005051769781857729,
630
- "eval_runtime": 84.2043,
631
- "eval_samples_per_second": 3.385,
632
- "eval_steps_per_second": 1.698,
633
- "step": 848
634
  },
635
  {
636
  "epoch": 3.25,
637
- "step": 848,
638
- "total_flos": 2.1133255437440778e+18,
639
- "train_loss": 0.16699378463049214,
640
- "train_runtime": 1543.4976,
641
- "train_samples_per_second": 1.099,
642
- "train_steps_per_second": 0.549
643
  },
644
  {
645
  "epoch": 3.25,
646
- "eval_accuracy": 0.9887429643527205,
647
- "eval_loss": 0.020199885591864586,
648
- "eval_runtime": 328.4712,
649
- "eval_samples_per_second": 1.623,
650
- "eval_steps_per_second": 0.813,
651
- "step": 848
652
  }
653
  ],
654
  "logging_steps": 10,
655
- "max_steps": 848,
656
  "num_input_tokens_seen": 0,
657
  "num_train_epochs": 9223372036854775807,
658
  "save_steps": 500,
659
- "total_flos": 2.1133255437440778e+18,
660
- "train_batch_size": 2,
661
  "trial_name": null,
662
  "trial_params": null
663
  }
 
1
  {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "videomae-base-finetuned-bekhoaxe/checkpoint-231",
4
  "epoch": 3.25,
5
  "eval_steps": 500,
6
+ "global_step": 308,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03,
13
+ "grad_norm": 4.9310173988342285,
14
+ "learning_rate": 1.6129032258064517e-05,
15
+ "loss": 0.7032,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.06,
20
+ "grad_norm": 6.229051113128662,
21
+ "learning_rate": 3.2258064516129034e-05,
22
+ "loss": 0.5738,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.1,
27
+ "grad_norm": 9.958233833312988,
28
+ "learning_rate": 4.8387096774193554e-05,
29
+ "loss": 0.3415,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.13,
34
+ "grad_norm": 7.560524940490723,
35
+ "learning_rate": 4.837545126353791e-05,
36
+ "loss": 0.4689,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.16,
41
+ "grad_norm": 5.026527404785156,
42
+ "learning_rate": 4.657039711191336e-05,
43
+ "loss": 0.2069,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 0.19,
48
+ "grad_norm": 0.358181893825531,
49
+ "learning_rate": 4.4765342960288806e-05,
50
+ "loss": 0.0961,
51
  "step": 60
52
  },
53
  {
54
+ "epoch": 0.23,
55
+ "grad_norm": 0.05433480069041252,
56
+ "learning_rate": 4.296028880866426e-05,
57
+ "loss": 0.2172,
58
  "step": 70
59
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  {
61
  "epoch": 0.25,
62
+ "eval_accuracy": 0.9915254237288136,
63
+ "eval_loss": 0.013951542787253857,
64
+ "eval_runtime": 38.5113,
65
+ "eval_samples_per_second": 3.064,
66
+ "eval_steps_per_second": 0.779,
67
+ "step": 77
68
  },
69
  {
70
  "epoch": 1.01,
71
+ "grad_norm": 1.3378506898880005,
72
+ "learning_rate": 4.115523465703972e-05,
73
+ "loss": 0.3363,
74
+ "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  },
76
  {
77
  "epoch": 1.04,
78
+ "grad_norm": 0.04731794446706772,
79
+ "learning_rate": 3.935018050541516e-05,
80
+ "loss": 0.195,
81
+ "step": 90
 
 
 
 
 
 
 
82
  },
83
  {
84
  "epoch": 1.07,
85
+ "grad_norm": 0.04140738397836685,
86
+ "learning_rate": 3.754512635379062e-05,
87
+ "loss": 0.0049,
88
+ "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  },
90
  {
91
+ "epoch": 1.11,
92
+ "grad_norm": 0.08140890300273895,
93
+ "learning_rate": 3.574007220216607e-05,
94
+ "loss": 0.2396,
95
+ "step": 110
96
  },
97
  {
98
  "epoch": 1.14,
99
+ "grad_norm": 0.09178142994642258,
100
+ "learning_rate": 3.3935018050541516e-05,
101
+ "loss": 0.0746,
102
+ "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  },
104
  {
105
  "epoch": 1.17,
106
+ "grad_norm": 0.031925346702337265,
107
+ "learning_rate": 3.212996389891697e-05,
108
+ "loss": 0.3279,
109
+ "step": 130
 
 
 
 
 
 
 
110
  },
111
  {
112
  "epoch": 1.2,
113
+ "grad_norm": 0.11948370933532715,
114
+ "learning_rate": 3.032490974729242e-05,
115
+ "loss": 0.0021,
116
+ "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  },
118
  {
119
+ "epoch": 1.24,
120
+ "grad_norm": 0.05676674470305443,
121
+ "learning_rate": 2.851985559566787e-05,
122
+ "loss": 0.1826,
123
+ "step": 150
124
  },
125
  {
126
  "epoch": 1.25,
127
+ "eval_accuracy": 0.9830508474576272,
128
+ "eval_loss": 0.042561955749988556,
129
+ "eval_runtime": 38.1931,
130
+ "eval_samples_per_second": 3.09,
131
+ "eval_steps_per_second": 0.785,
132
+ "step": 154
 
 
 
 
 
 
 
133
  },
134
  {
135
  "epoch": 2.02,
136
+ "grad_norm": 0.11792542040348053,
137
+ "learning_rate": 2.6714801444043324e-05,
138
+ "loss": 0.1442,
139
+ "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  },
141
  {
142
  "epoch": 2.05,
143
+ "grad_norm": 0.10978111624717712,
144
+ "learning_rate": 2.4909747292418774e-05,
145
+ "loss": 0.0912,
146
+ "step": 170
 
 
 
 
 
 
 
147
  },
148
  {
149
  "epoch": 2.08,
150
+ "grad_norm": 0.016077380627393723,
151
+ "learning_rate": 2.3104693140794227e-05,
152
+ "loss": 0.1236,
153
+ "step": 180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  },
155
  {
156
  "epoch": 2.12,
157
+ "grad_norm": 0.01301741786301136,
158
+ "learning_rate": 2.1299638989169676e-05,
159
+ "loss": 0.1501,
160
+ "step": 190
 
 
 
 
 
 
 
161
  },
162
  {
163
  "epoch": 2.15,
164
+ "grad_norm": 0.026157772168517113,
165
+ "learning_rate": 1.9494584837545125e-05,
166
+ "loss": 0.18,
167
+ "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  },
169
  {
170
  "epoch": 2.18,
171
+ "grad_norm": 0.04890529438853264,
172
+ "learning_rate": 1.768953068592058e-05,
173
+ "loss": 0.0038,
174
+ "step": 210
 
 
 
 
 
 
 
175
  },
176
  {
177
  "epoch": 2.21,
178
+ "grad_norm": 0.038881637156009674,
179
+ "learning_rate": 1.588447653429603e-05,
180
+ "loss": 0.0019,
181
+ "step": 220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  },
183
  {
184
  "epoch": 2.25,
185
+ "grad_norm": 0.01524051371961832,
186
+ "learning_rate": 1.407942238267148e-05,
187
+ "loss": 0.1427,
188
+ "step": 230
 
 
 
 
 
 
 
 
 
189
  },
190
  {
191
+ "epoch": 2.25,
192
+ "eval_accuracy": 1.0,
193
+ "eval_loss": 0.0019937974866479635,
194
+ "eval_runtime": 38.3352,
195
+ "eval_samples_per_second": 3.078,
196
+ "eval_steps_per_second": 0.783,
197
+ "step": 231
198
  },
199
  {
200
  "epoch": 3.03,
201
+ "grad_norm": 0.01709669642150402,
202
+ "learning_rate": 1.2274368231046932e-05,
203
+ "loss": 0.0012,
204
+ "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  },
206
  {
207
  "epoch": 3.06,
208
+ "grad_norm": 0.1900138109922409,
209
+ "learning_rate": 1.0469314079422383e-05,
210
+ "loss": 0.1073,
211
+ "step": 250
 
 
 
 
 
 
 
212
  },
213
  {
214
  "epoch": 3.09,
215
+ "grad_norm": 0.06225905194878578,
216
+ "learning_rate": 8.664259927797834e-06,
217
+ "loss": 0.0029,
218
+ "step": 260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  },
220
  {
221
  "epoch": 3.13,
222
+ "grad_norm": 0.015592777170240879,
223
+ "learning_rate": 6.859205776173286e-06,
224
+ "loss": 0.0009,
225
+ "step": 270
 
 
 
 
 
 
 
226
  },
227
  {
228
  "epoch": 3.16,
229
+ "grad_norm": 0.016208168119192123,
230
+ "learning_rate": 5.054151624548736e-06,
231
+ "loss": 0.0036,
232
+ "step": 280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  },
234
  {
235
  "epoch": 3.19,
236
+ "grad_norm": 0.007616001646965742,
237
+ "learning_rate": 3.2490974729241876e-06,
238
+ "loss": 0.0008,
239
+ "step": 290
 
 
 
 
 
 
 
240
  },
241
  {
242
  "epoch": 3.22,
243
+ "grad_norm": 0.008425813168287277,
244
+ "learning_rate": 1.4440433212996392e-06,
245
+ "loss": 0.2853,
246
+ "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  },
248
  {
249
  "epoch": 3.25,
250
+ "eval_accuracy": 1.0,
251
+ "eval_loss": 0.0012017178814858198,
252
+ "eval_runtime": 38.094,
253
+ "eval_samples_per_second": 3.098,
254
+ "eval_steps_per_second": 0.788,
255
+ "step": 308
256
  },
257
  {
258
  "epoch": 3.25,
259
+ "step": 308,
260
+ "total_flos": 1.5351515742291886e+18,
261
+ "train_loss": 0.16999648642641577,
262
+ "train_runtime": 584.5872,
263
+ "train_samples_per_second": 2.107,
264
+ "train_steps_per_second": 0.527
265
  },
266
  {
267
  "epoch": 3.25,
268
+ "eval_accuracy": 0.9761904761904762,
269
+ "eval_loss": 0.09526590257883072,
270
+ "eval_runtime": 122.5253,
271
+ "eval_samples_per_second": 1.714,
272
+ "eval_steps_per_second": 0.433,
273
+ "step": 308
274
  }
275
  ],
276
  "logging_steps": 10,
277
+ "max_steps": 308,
278
  "num_input_tokens_seen": 0,
279
  "num_train_epochs": 9223372036854775807,
280
  "save_steps": 500,
281
+ "total_flos": 1.5351515742291886e+18,
282
+ "train_batch_size": 4,
283
  "trial_name": null,
284
  "trial_params": null
285
  }