sbaner24 commited on
Commit
f8ec61d
1 Parent(s): dc1fe5a

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +8 -8
  2. eval_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +309 -309
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 44.44,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.03582725301384926,
5
- "eval_runtime": 0.3142,
6
- "eval_samples_per_second": 171.879,
7
- "eval_steps_per_second": 3.183,
8
  "total_flos": 1.6586385457107272e+18,
9
- "train_loss": 0.2113973332196474,
10
- "train_runtime": 676.842,
11
- "train_samples_per_second": 35.533,
12
- "train_steps_per_second": 0.148
13
  }
 
1
  {
2
  "epoch": 44.44,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.03731463849544525,
5
+ "eval_runtime": 0.3143,
6
+ "eval_samples_per_second": 171.8,
7
+ "eval_steps_per_second": 3.181,
8
  "total_flos": 1.6586385457107272e+18,
9
+ "train_loss": 0.20484884686768054,
10
+ "train_runtime": 700.3363,
11
+ "train_samples_per_second": 34.341,
12
+ "train_steps_per_second": 0.143
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 44.44,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.03582725301384926,
5
- "eval_runtime": 0.3142,
6
- "eval_samples_per_second": 171.879,
7
- "eval_steps_per_second": 3.183
8
  }
 
1
  {
2
  "epoch": 44.44,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.03731463849544525,
5
+ "eval_runtime": 0.3143,
6
+ "eval_samples_per_second": 171.8,
7
+ "eval_steps_per_second": 3.181
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 44.44,
3
  "total_flos": 1.6586385457107272e+18,
4
- "train_loss": 0.2113973332196474,
5
- "train_runtime": 676.842,
6
- "train_samples_per_second": 35.533,
7
- "train_steps_per_second": 0.148
8
  }
 
1
  {
2
  "epoch": 44.44,
3
  "total_flos": 1.6586385457107272e+18,
4
+ "train_loss": 0.20484884686768054,
5
+ "train_runtime": 700.3363,
6
+ "train_samples_per_second": 34.341,
7
+ "train_steps_per_second": 0.143
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 1.0,
3
- "best_model_checkpoint": "vit-base-patch16-224-Trial007-YEL_STEM/checkpoint-36",
4
  "epoch": 44.44444444444444,
5
  "global_step": 100,
6
  "is_hyper_param_search": false,
@@ -10,1016 +10,1016 @@
10
  {
11
  "epoch": 0.44,
12
  "learning_rate": 5e-06,
13
- "loss": 0.71,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.89,
18
  "learning_rate": 1e-05,
19
- "loss": 0.7523,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.89,
24
- "eval_accuracy": 0.6111111111111112,
25
- "eval_loss": 0.6941519379615784,
26
- "eval_runtime": 0.4141,
27
- "eval_samples_per_second": 130.403,
28
- "eval_steps_per_second": 2.415,
29
  "step": 2
30
  },
31
  {
32
  "epoch": 1.33,
33
  "learning_rate": 1.5e-05,
34
- "loss": 0.6296,
35
  "step": 3
36
  },
37
  {
38
  "epoch": 1.78,
39
  "learning_rate": 2e-05,
40
- "loss": 0.6785,
41
  "step": 4
42
  },
43
  {
44
  "epoch": 1.78,
45
- "eval_accuracy": 0.5925925925925926,
46
- "eval_loss": 0.6312435269355774,
47
- "eval_runtime": 0.2601,
48
- "eval_samples_per_second": 207.61,
49
- "eval_steps_per_second": 3.845,
50
  "step": 4
51
  },
52
  {
53
  "epoch": 2.22,
54
  "learning_rate": 2.5e-05,
55
- "loss": 0.6053,
56
  "step": 5
57
  },
58
  {
59
  "epoch": 2.67,
60
  "learning_rate": 3e-05,
61
- "loss": 0.5733,
62
  "step": 6
63
  },
64
  {
65
  "epoch": 2.67,
66
- "eval_accuracy": 0.8518518518518519,
67
- "eval_loss": 0.4705863296985626,
68
- "eval_runtime": 0.2543,
69
- "eval_samples_per_second": 212.329,
70
- "eval_steps_per_second": 3.932,
71
  "step": 6
72
  },
73
  {
74
  "epoch": 3.11,
75
  "learning_rate": 3.5e-05,
76
- "loss": 0.4955,
77
  "step": 7
78
  },
79
  {
80
  "epoch": 3.56,
81
  "learning_rate": 4e-05,
82
- "loss": 0.6037,
83
  "step": 8
84
  },
85
  {
86
  "epoch": 4.0,
87
  "learning_rate": 4.5e-05,
88
- "loss": 0.4473,
89
  "step": 9
90
  },
91
  {
92
  "epoch": 4.0,
93
- "eval_accuracy": 0.9074074074074074,
94
- "eval_loss": 0.3479275405406952,
95
- "eval_runtime": 0.2771,
96
- "eval_samples_per_second": 194.879,
97
- "eval_steps_per_second": 3.609,
98
  "step": 9
99
  },
100
  {
101
  "epoch": 4.44,
102
  "learning_rate": 5e-05,
103
- "loss": 0.463,
104
  "step": 10
105
  },
106
  {
107
  "epoch": 4.89,
108
  "learning_rate": 4.9444444444444446e-05,
109
- "loss": 0.4312,
110
  "step": 11
111
  },
112
  {
113
  "epoch": 4.89,
114
- "eval_accuracy": 0.9444444444444444,
115
- "eval_loss": 0.27067238092422485,
116
- "eval_runtime": 0.2603,
117
- "eval_samples_per_second": 207.479,
118
- "eval_steps_per_second": 3.842,
119
  "step": 11
120
  },
121
  {
122
  "epoch": 5.33,
123
  "learning_rate": 4.888888888888889e-05,
124
- "loss": 0.3057,
125
  "step": 12
126
  },
127
  {
128
  "epoch": 5.78,
129
  "learning_rate": 4.8333333333333334e-05,
130
- "loss": 0.3499,
131
  "step": 13
132
  },
133
  {
134
  "epoch": 5.78,
135
  "eval_accuracy": 0.9074074074074074,
136
- "eval_loss": 0.24450431764125824,
137
- "eval_runtime": 0.3251,
138
- "eval_samples_per_second": 166.098,
139
- "eval_steps_per_second": 3.076,
140
  "step": 13
141
  },
142
  {
143
  "epoch": 6.22,
144
  "learning_rate": 4.7777777777777784e-05,
145
- "loss": 0.2501,
146
  "step": 14
147
  },
148
  {
149
  "epoch": 6.67,
150
  "learning_rate": 4.722222222222222e-05,
151
- "loss": 0.3096,
152
  "step": 15
153
  },
154
  {
155
  "epoch": 6.67,
156
- "eval_accuracy": 0.9629629629629629,
157
- "eval_loss": 0.17727157473564148,
158
- "eval_runtime": 0.2547,
159
- "eval_samples_per_second": 212.003,
160
- "eval_steps_per_second": 3.926,
161
  "step": 15
162
  },
163
  {
164
  "epoch": 7.11,
165
  "learning_rate": 4.666666666666667e-05,
166
- "loss": 0.2454,
167
  "step": 16
168
  },
169
  {
170
  "epoch": 7.56,
171
  "learning_rate": 4.6111111111111115e-05,
172
- "loss": 0.272,
173
  "step": 17
174
  },
175
  {
176
  "epoch": 8.0,
177
  "learning_rate": 4.555555555555556e-05,
178
- "loss": 0.1992,
179
  "step": 18
180
  },
181
  {
182
  "epoch": 8.0,
183
  "eval_accuracy": 0.9444444444444444,
184
- "eval_loss": 0.14089126884937286,
185
- "eval_runtime": 0.2563,
186
- "eval_samples_per_second": 210.661,
187
- "eval_steps_per_second": 3.901,
188
  "step": 18
189
  },
190
  {
191
  "epoch": 8.44,
192
  "learning_rate": 4.5e-05,
193
- "loss": 0.2011,
194
  "step": 19
195
  },
196
  {
197
  "epoch": 8.89,
198
  "learning_rate": 4.4444444444444447e-05,
199
- "loss": 0.2195,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 8.89,
204
- "eval_accuracy": 0.9814814814814815,
205
- "eval_loss": 0.11288923025131226,
206
- "eval_runtime": 0.2576,
207
- "eval_samples_per_second": 209.62,
208
- "eval_steps_per_second": 3.882,
209
  "step": 20
210
  },
211
  {
212
  "epoch": 9.33,
213
  "learning_rate": 4.388888888888889e-05,
214
- "loss": 0.5572,
215
  "step": 21
216
  },
217
  {
218
  "epoch": 9.78,
219
  "learning_rate": 4.3333333333333334e-05,
220
- "loss": 0.1992,
221
  "step": 22
222
  },
223
  {
224
  "epoch": 9.78,
225
  "eval_accuracy": 0.9629629629629629,
226
- "eval_loss": 0.10700082033872604,
227
- "eval_runtime": 0.4132,
228
- "eval_samples_per_second": 130.679,
229
- "eval_steps_per_second": 2.42,
230
  "step": 22
231
  },
232
  {
233
  "epoch": 10.22,
234
  "learning_rate": 4.277777777777778e-05,
235
- "loss": 0.167,
236
  "step": 23
237
  },
238
  {
239
  "epoch": 10.67,
240
  "learning_rate": 4.222222222222222e-05,
241
- "loss": 0.2173,
242
  "step": 24
243
  },
244
  {
245
  "epoch": 10.67,
246
- "eval_accuracy": 0.9259259259259259,
247
- "eval_loss": 0.2469346821308136,
248
- "eval_runtime": 0.2584,
249
- "eval_samples_per_second": 208.943,
250
- "eval_steps_per_second": 3.869,
251
  "step": 24
252
  },
253
  {
254
  "epoch": 11.11,
255
  "learning_rate": 4.166666666666667e-05,
256
- "loss": 0.1728,
257
  "step": 25
258
  },
259
  {
260
  "epoch": 11.56,
261
  "learning_rate": 4.111111111111111e-05,
262
- "loss": 0.171,
263
  "step": 26
264
  },
265
  {
266
  "epoch": 12.0,
267
  "learning_rate": 4.055555555555556e-05,
268
- "loss": 0.1489,
269
  "step": 27
270
  },
271
  {
272
  "epoch": 12.0,
273
  "eval_accuracy": 0.9814814814814815,
274
- "eval_loss": 0.08100217580795288,
275
- "eval_runtime": 0.2569,
276
- "eval_samples_per_second": 210.192,
277
- "eval_steps_per_second": 3.892,
278
  "step": 27
279
  },
280
  {
281
  "epoch": 12.44,
282
  "learning_rate": 4e-05,
283
- "loss": 0.2299,
284
  "step": 28
285
  },
286
  {
287
  "epoch": 12.89,
288
  "learning_rate": 3.944444444444445e-05,
289
- "loss": 0.1975,
290
  "step": 29
291
  },
292
  {
293
  "epoch": 12.89,
294
- "eval_accuracy": 0.9814814814814815,
295
- "eval_loss": 0.08110610395669937,
296
  "eval_runtime": 0.2604,
297
- "eval_samples_per_second": 207.381,
298
  "eval_steps_per_second": 3.84,
299
  "step": 29
300
  },
301
  {
302
  "epoch": 13.33,
303
  "learning_rate": 3.888888888888889e-05,
304
- "loss": 0.1396,
305
  "step": 30
306
  },
307
  {
308
  "epoch": 13.78,
309
  "learning_rate": 3.8333333333333334e-05,
310
- "loss": 0.1944,
311
  "step": 31
312
  },
313
  {
314
  "epoch": 13.78,
315
- "eval_accuracy": 0.9814814814814815,
316
- "eval_loss": 0.0979403406381607,
317
- "eval_runtime": 0.2574,
318
- "eval_samples_per_second": 209.778,
319
- "eval_steps_per_second": 3.885,
320
  "step": 31
321
  },
322
  {
323
  "epoch": 14.22,
324
  "learning_rate": 3.777777777777778e-05,
325
- "loss": 0.1272,
326
  "step": 32
327
  },
328
  {
329
  "epoch": 14.67,
330
  "learning_rate": 3.722222222222222e-05,
331
- "loss": 0.1556,
332
  "step": 33
333
  },
334
  {
335
  "epoch": 14.67,
336
  "eval_accuracy": 0.9814814814814815,
337
- "eval_loss": 0.059882353991270065,
338
- "eval_runtime": 0.2583,
339
- "eval_samples_per_second": 209.053,
340
- "eval_steps_per_second": 3.871,
341
  "step": 33
342
  },
343
  {
344
  "epoch": 15.11,
345
  "learning_rate": 3.6666666666666666e-05,
346
- "loss": 0.1173,
347
  "step": 34
348
  },
349
  {
350
  "epoch": 15.56,
351
  "learning_rate": 3.611111111111111e-05,
352
- "loss": 0.1832,
353
  "step": 35
354
  },
355
  {
356
  "epoch": 16.0,
357
  "learning_rate": 3.555555555555556e-05,
358
- "loss": 0.1631,
359
  "step": 36
360
  },
361
  {
362
  "epoch": 16.0,
363
  "eval_accuracy": 1.0,
364
- "eval_loss": 0.03582725301384926,
365
- "eval_runtime": 0.2565,
366
- "eval_samples_per_second": 210.555,
367
- "eval_steps_per_second": 3.899,
368
  "step": 36
369
  },
370
  {
371
  "epoch": 16.44,
372
  "learning_rate": 3.5e-05,
373
- "loss": 0.1637,
374
  "step": 37
375
  },
376
  {
377
  "epoch": 16.89,
378
  "learning_rate": 3.444444444444445e-05,
379
- "loss": 0.1853,
380
  "step": 38
381
  },
382
  {
383
  "epoch": 16.89,
384
- "eval_accuracy": 0.9814814814814815,
385
- "eval_loss": 0.08257294446229935,
386
- "eval_runtime": 0.2586,
387
- "eval_samples_per_second": 208.83,
388
- "eval_steps_per_second": 3.867,
389
  "step": 38
390
  },
391
  {
392
  "epoch": 17.33,
393
  "learning_rate": 3.388888888888889e-05,
394
- "loss": 0.1153,
395
  "step": 39
396
  },
397
  {
398
  "epoch": 17.78,
399
  "learning_rate": 3.3333333333333335e-05,
400
- "loss": 0.1452,
401
  "step": 40
402
  },
403
  {
404
  "epoch": 17.78,
405
- "eval_accuracy": 0.9814814814814815,
406
- "eval_loss": 0.08874496817588806,
407
- "eval_runtime": 0.2563,
408
- "eval_samples_per_second": 210.693,
409
- "eval_steps_per_second": 3.902,
410
  "step": 40
411
  },
412
  {
413
  "epoch": 18.22,
414
  "learning_rate": 3.277777777777778e-05,
415
- "loss": 0.1271,
416
  "step": 41
417
  },
418
  {
419
  "epoch": 18.67,
420
  "learning_rate": 3.222222222222223e-05,
421
- "loss": 0.1682,
422
  "step": 42
423
  },
424
  {
425
  "epoch": 18.67,
426
- "eval_accuracy": 0.9814814814814815,
427
- "eval_loss": 0.06881634145975113,
428
- "eval_runtime": 0.2602,
429
- "eval_samples_per_second": 207.551,
430
- "eval_steps_per_second": 3.844,
431
  "step": 42
432
  },
433
  {
434
  "epoch": 19.11,
435
  "learning_rate": 3.1666666666666666e-05,
436
- "loss": 0.521,
437
  "step": 43
438
  },
439
  {
440
  "epoch": 19.56,
441
  "learning_rate": 3.111111111111111e-05,
442
- "loss": 0.1546,
443
  "step": 44
444
  },
445
  {
446
  "epoch": 20.0,
447
  "learning_rate": 3.055555555555556e-05,
448
- "loss": 0.1674,
449
  "step": 45
450
  },
451
  {
452
  "epoch": 20.0,
453
- "eval_accuracy": 0.9814814814814815,
454
- "eval_loss": 0.10646378248929977,
455
- "eval_runtime": 0.2574,
456
- "eval_samples_per_second": 209.82,
457
- "eval_steps_per_second": 3.886,
458
  "step": 45
459
  },
460
  {
461
  "epoch": 20.44,
462
  "learning_rate": 3e-05,
463
- "loss": 0.1386,
464
  "step": 46
465
  },
466
  {
467
  "epoch": 20.89,
468
  "learning_rate": 2.9444444444444448e-05,
469
- "loss": 0.1729,
470
  "step": 47
471
  },
472
  {
473
  "epoch": 20.89,
474
- "eval_accuracy": 0.9814814814814815,
475
- "eval_loss": 0.11758946627378464,
476
- "eval_runtime": 0.2574,
477
- "eval_samples_per_second": 209.801,
478
- "eval_steps_per_second": 3.885,
479
  "step": 47
480
  },
481
  {
482
  "epoch": 21.33,
483
  "learning_rate": 2.8888888888888888e-05,
484
- "loss": 0.249,
485
  "step": 48
486
  },
487
  {
488
  "epoch": 21.78,
489
  "learning_rate": 2.8333333333333335e-05,
490
- "loss": 0.1234,
491
  "step": 49
492
  },
493
  {
494
  "epoch": 21.78,
495
  "eval_accuracy": 0.9814814814814815,
496
- "eval_loss": 0.05895670875906944,
497
- "eval_runtime": 0.2583,
498
- "eval_samples_per_second": 209.081,
499
- "eval_steps_per_second": 3.872,
500
  "step": 49
501
  },
502
  {
503
  "epoch": 22.22,
504
  "learning_rate": 2.777777777777778e-05,
505
- "loss": 0.0955,
506
  "step": 50
507
  },
508
  {
509
  "epoch": 22.67,
510
  "learning_rate": 2.7222222222222223e-05,
511
- "loss": 0.1696,
512
  "step": 51
513
  },
514
  {
515
  "epoch": 22.67,
516
- "eval_accuracy": 1.0,
517
- "eval_loss": 0.04187057167291641,
518
- "eval_runtime": 0.4982,
519
- "eval_samples_per_second": 108.388,
520
- "eval_steps_per_second": 2.007,
521
  "step": 51
522
  },
523
  {
524
  "epoch": 23.11,
525
  "learning_rate": 2.6666666666666667e-05,
526
- "loss": 0.1788,
527
  "step": 52
528
  },
529
  {
530
  "epoch": 23.56,
531
  "learning_rate": 2.6111111111111114e-05,
532
- "loss": 0.2168,
533
  "step": 53
534
  },
535
  {
536
  "epoch": 24.0,
537
  "learning_rate": 2.5555555555555554e-05,
538
- "loss": 0.12,
539
  "step": 54
540
  },
541
  {
542
  "epoch": 24.0,
543
  "eval_accuracy": 0.9814814814814815,
544
- "eval_loss": 0.05020841583609581,
545
- "eval_runtime": 0.2609,
546
- "eval_samples_per_second": 206.982,
547
- "eval_steps_per_second": 3.833,
548
  "step": 54
549
  },
550
  {
551
  "epoch": 24.44,
552
  "learning_rate": 2.5e-05,
553
- "loss": 0.1713,
554
  "step": 55
555
  },
556
  {
557
  "epoch": 24.89,
558
  "learning_rate": 2.4444444444444445e-05,
559
- "loss": 0.1301,
560
  "step": 56
561
  },
562
  {
563
  "epoch": 24.89,
564
  "eval_accuracy": 0.9814814814814815,
565
- "eval_loss": 0.12487592548131943,
566
- "eval_runtime": 0.2789,
567
- "eval_samples_per_second": 193.603,
568
- "eval_steps_per_second": 3.585,
569
  "step": 56
570
  },
571
  {
572
  "epoch": 25.33,
573
  "learning_rate": 2.3888888888888892e-05,
574
- "loss": 0.1284,
575
  "step": 57
576
  },
577
  {
578
  "epoch": 25.78,
579
  "learning_rate": 2.3333333333333336e-05,
580
- "loss": 0.1324,
581
  "step": 58
582
  },
583
  {
584
  "epoch": 25.78,
585
  "eval_accuracy": 0.9814814814814815,
586
- "eval_loss": 0.1310151070356369,
587
- "eval_runtime": 0.2578,
588
- "eval_samples_per_second": 209.456,
589
- "eval_steps_per_second": 3.879,
590
  "step": 58
591
  },
592
  {
593
  "epoch": 26.22,
594
  "learning_rate": 2.277777777777778e-05,
595
- "loss": 0.0992,
596
  "step": 59
597
  },
598
  {
599
  "epoch": 26.67,
600
  "learning_rate": 2.2222222222222223e-05,
601
- "loss": 0.1128,
602
  "step": 60
603
  },
604
  {
605
  "epoch": 26.67,
606
  "eval_accuracy": 0.9814814814814815,
607
- "eval_loss": 0.07883566617965698,
608
- "eval_runtime": 0.2581,
609
- "eval_samples_per_second": 209.202,
610
- "eval_steps_per_second": 3.874,
611
  "step": 60
612
  },
613
  {
614
  "epoch": 27.11,
615
  "learning_rate": 2.1666666666666667e-05,
616
- "loss": 0.1235,
617
  "step": 61
618
  },
619
  {
620
  "epoch": 27.56,
621
  "learning_rate": 2.111111111111111e-05,
622
- "loss": 0.1672,
623
  "step": 62
624
  },
625
  {
626
  "epoch": 28.0,
627
  "learning_rate": 2.0555555555555555e-05,
628
- "loss": 0.1083,
629
  "step": 63
630
  },
631
  {
632
  "epoch": 28.0,
633
- "eval_accuracy": 1.0,
634
- "eval_loss": 0.024069679901003838,
635
- "eval_runtime": 0.2592,
636
- "eval_samples_per_second": 208.37,
637
- "eval_steps_per_second": 3.859,
638
  "step": 63
639
  },
640
  {
641
  "epoch": 28.44,
642
  "learning_rate": 2e-05,
643
- "loss": 0.1343,
644
  "step": 64
645
  },
646
  {
647
  "epoch": 28.89,
648
  "learning_rate": 1.9444444444444445e-05,
649
- "loss": 0.1551,
650
  "step": 65
651
  },
652
  {
653
  "epoch": 28.89,
654
- "eval_accuracy": 0.9814814814814815,
655
- "eval_loss": 0.032383933663368225,
656
- "eval_runtime": 0.2601,
657
- "eval_samples_per_second": 207.588,
658
- "eval_steps_per_second": 3.844,
659
  "step": 65
660
  },
661
  {
662
  "epoch": 29.33,
663
  "learning_rate": 1.888888888888889e-05,
664
- "loss": 0.0953,
665
  "step": 66
666
  },
667
  {
668
  "epoch": 29.78,
669
  "learning_rate": 1.8333333333333333e-05,
670
- "loss": 0.1311,
671
  "step": 67
672
  },
673
  {
674
  "epoch": 29.78,
675
- "eval_accuracy": 0.9814814814814815,
676
- "eval_loss": 0.06356043368577957,
677
- "eval_runtime": 0.2566,
678
- "eval_samples_per_second": 210.412,
679
- "eval_steps_per_second": 3.897,
680
  "step": 67
681
  },
682
  {
683
  "epoch": 30.22,
684
  "learning_rate": 1.777777777777778e-05,
685
- "loss": 0.0912,
686
  "step": 68
687
  },
688
  {
689
  "epoch": 30.67,
690
  "learning_rate": 1.7222222222222224e-05,
691
- "loss": 0.1367,
692
  "step": 69
693
  },
694
  {
695
  "epoch": 30.67,
696
- "eval_accuracy": 0.9814814814814815,
697
- "eval_loss": 0.06610292941331863,
698
- "eval_runtime": 0.2596,
699
- "eval_samples_per_second": 207.984,
700
- "eval_steps_per_second": 3.852,
701
  "step": 69
702
  },
703
  {
704
  "epoch": 31.11,
705
  "learning_rate": 1.6666666666666667e-05,
706
- "loss": 0.1008,
707
  "step": 70
708
  },
709
  {
710
  "epoch": 31.56,
711
  "learning_rate": 1.6111111111111115e-05,
712
- "loss": 0.1489,
713
  "step": 71
714
  },
715
  {
716
  "epoch": 32.0,
717
  "learning_rate": 1.5555555555555555e-05,
718
- "loss": 0.1315,
719
  "step": 72
720
  },
721
  {
722
  "epoch": 32.0,
723
  "eval_accuracy": 1.0,
724
- "eval_loss": 0.015112031251192093,
725
- "eval_runtime": 0.265,
726
- "eval_samples_per_second": 203.796,
727
- "eval_steps_per_second": 3.774,
728
  "step": 72
729
  },
730
  {
731
  "epoch": 32.44,
732
  "learning_rate": 1.5e-05,
733
- "loss": 0.1153,
734
  "step": 73
735
  },
736
  {
737
  "epoch": 32.89,
738
  "learning_rate": 1.4444444444444444e-05,
739
- "loss": 0.1717,
740
  "step": 74
741
  },
742
  {
743
  "epoch": 32.89,
744
  "eval_accuracy": 1.0,
745
- "eval_loss": 0.018243148922920227,
746
- "eval_runtime": 0.258,
747
- "eval_samples_per_second": 209.273,
748
- "eval_steps_per_second": 3.875,
749
  "step": 74
750
  },
751
  {
752
  "epoch": 33.33,
753
  "learning_rate": 1.388888888888889e-05,
754
- "loss": 0.1959,
755
  "step": 75
756
  },
757
  {
758
  "epoch": 33.78,
759
  "learning_rate": 1.3333333333333333e-05,
760
- "loss": 0.1795,
761
  "step": 76
762
  },
763
  {
764
  "epoch": 33.78,
765
  "eval_accuracy": 1.0,
766
- "eval_loss": 0.019817128777503967,
767
- "eval_runtime": 0.2624,
768
- "eval_samples_per_second": 205.8,
769
- "eval_steps_per_second": 3.811,
770
  "step": 76
771
  },
772
  {
773
  "epoch": 34.22,
774
  "learning_rate": 1.2777777777777777e-05,
775
- "loss": 0.1076,
776
  "step": 77
777
  },
778
  {
779
  "epoch": 34.67,
780
  "learning_rate": 1.2222222222222222e-05,
781
- "loss": 0.1348,
782
  "step": 78
783
  },
784
  {
785
  "epoch": 34.67,
786
  "eval_accuracy": 1.0,
787
- "eval_loss": 0.017830517143011093,
788
- "eval_runtime": 0.2576,
789
- "eval_samples_per_second": 209.607,
790
- "eval_steps_per_second": 3.882,
791
  "step": 78
792
  },
793
  {
794
  "epoch": 35.11,
795
  "learning_rate": 1.1666666666666668e-05,
796
- "loss": 0.3572,
797
  "step": 79
798
  },
799
  {
800
  "epoch": 35.56,
801
  "learning_rate": 1.1111111111111112e-05,
802
- "loss": 0.138,
803
  "step": 80
804
  },
805
  {
806
  "epoch": 36.0,
807
  "learning_rate": 1.0555555555555555e-05,
808
- "loss": 0.136,
809
  "step": 81
810
  },
811
  {
812
  "epoch": 36.0,
813
  "eval_accuracy": 1.0,
814
- "eval_loss": 0.01816863939166069,
815
- "eval_runtime": 0.2585,
816
- "eval_samples_per_second": 208.9,
817
- "eval_steps_per_second": 3.869,
818
  "step": 81
819
  },
820
  {
821
  "epoch": 36.44,
822
  "learning_rate": 1e-05,
823
- "loss": 0.1155,
824
  "step": 82
825
  },
826
  {
827
  "epoch": 36.89,
828
  "learning_rate": 9.444444444444445e-06,
829
- "loss": 0.133,
830
  "step": 83
831
  },
832
  {
833
  "epoch": 36.89,
834
  "eval_accuracy": 1.0,
835
- "eval_loss": 0.019667092710733414,
836
- "eval_runtime": 0.2563,
837
- "eval_samples_per_second": 210.712,
838
- "eval_steps_per_second": 3.902,
839
  "step": 83
840
  },
841
  {
842
  "epoch": 37.33,
843
  "learning_rate": 8.88888888888889e-06,
844
- "loss": 0.0956,
845
  "step": 84
846
  },
847
  {
848
  "epoch": 37.78,
849
  "learning_rate": 8.333333333333334e-06,
850
- "loss": 0.1006,
851
  "step": 85
852
  },
853
  {
854
  "epoch": 37.78,
855
  "eval_accuracy": 1.0,
856
- "eval_loss": 0.02239961363375187,
857
- "eval_runtime": 0.2608,
858
- "eval_samples_per_second": 207.09,
859
- "eval_steps_per_second": 3.835,
860
  "step": 85
861
  },
862
  {
863
  "epoch": 38.22,
864
  "learning_rate": 7.777777777777777e-06,
865
- "loss": 0.1384,
866
  "step": 86
867
  },
868
  {
869
  "epoch": 38.67,
870
  "learning_rate": 7.222222222222222e-06,
871
- "loss": 0.159,
872
  "step": 87
873
  },
874
  {
875
  "epoch": 38.67,
876
  "eval_accuracy": 1.0,
877
- "eval_loss": 0.025877976790070534,
878
- "eval_runtime": 0.2569,
879
- "eval_samples_per_second": 210.173,
880
- "eval_steps_per_second": 3.892,
881
  "step": 87
882
  },
883
  {
884
  "epoch": 39.11,
885
  "learning_rate": 6.666666666666667e-06,
886
- "loss": 0.0773,
887
  "step": 88
888
  },
889
  {
890
  "epoch": 39.56,
891
  "learning_rate": 6.111111111111111e-06,
892
- "loss": 0.1196,
893
  "step": 89
894
  },
895
  {
896
  "epoch": 40.0,
897
  "learning_rate": 5.555555555555556e-06,
898
- "loss": 0.0899,
899
  "step": 90
900
  },
901
  {
902
  "epoch": 40.0,
903
- "eval_accuracy": 0.9814814814814815,
904
- "eval_loss": 0.02953430823981762,
905
- "eval_runtime": 0.2609,
906
- "eval_samples_per_second": 207.015,
907
- "eval_steps_per_second": 3.834,
908
  "step": 90
909
  },
910
  {
911
  "epoch": 40.44,
912
  "learning_rate": 5e-06,
913
- "loss": 0.1458,
914
  "step": 91
915
  },
916
  {
917
  "epoch": 40.89,
918
  "learning_rate": 4.444444444444445e-06,
919
- "loss": 0.1464,
920
  "step": 92
921
  },
922
  {
923
  "epoch": 40.89,
924
- "eval_accuracy": 0.9814814814814815,
925
- "eval_loss": 0.032179489731788635,
926
- "eval_runtime": 0.2579,
927
- "eval_samples_per_second": 209.385,
928
- "eval_steps_per_second": 3.878,
929
  "step": 92
930
  },
931
  {
932
  "epoch": 41.33,
933
  "learning_rate": 3.888888888888889e-06,
934
- "loss": 0.107,
935
  "step": 93
936
  },
937
  {
938
  "epoch": 41.78,
939
  "learning_rate": 3.3333333333333333e-06,
940
- "loss": 0.1098,
941
  "step": 94
942
  },
943
  {
944
  "epoch": 41.78,
945
- "eval_accuracy": 0.9814814814814815,
946
- "eval_loss": 0.0338004007935524,
947
- "eval_runtime": 0.2591,
948
- "eval_samples_per_second": 208.453,
949
- "eval_steps_per_second": 3.86,
950
  "step": 94
951
  },
952
  {
953
  "epoch": 42.22,
954
  "learning_rate": 2.777777777777778e-06,
955
- "loss": 0.1228,
956
  "step": 95
957
  },
958
  {
959
  "epoch": 42.67,
960
  "learning_rate": 2.2222222222222225e-06,
961
- "loss": 0.128,
962
  "step": 96
963
  },
964
  {
965
  "epoch": 42.67,
966
- "eval_accuracy": 0.9814814814814815,
967
- "eval_loss": 0.033080052584409714,
968
- "eval_runtime": 0.2582,
969
- "eval_samples_per_second": 209.121,
970
- "eval_steps_per_second": 3.873,
971
  "step": 96
972
  },
973
  {
974
  "epoch": 43.11,
975
  "learning_rate": 1.6666666666666667e-06,
976
- "loss": 0.1231,
977
  "step": 97
978
  },
979
  {
980
  "epoch": 43.56,
981
  "learning_rate": 1.1111111111111112e-06,
982
- "loss": 0.1276,
983
  "step": 98
984
  },
985
  {
986
  "epoch": 44.0,
987
  "learning_rate": 5.555555555555556e-07,
988
- "loss": 0.0841,
989
  "step": 99
990
  },
991
  {
992
  "epoch": 44.0,
993
- "eval_accuracy": 0.9814814814814815,
994
- "eval_loss": 0.030649887397885323,
995
- "eval_runtime": 0.2594,
996
- "eval_samples_per_second": 208.196,
997
- "eval_steps_per_second": 3.855,
998
  "step": 99
999
  },
1000
  {
1001
  "epoch": 44.44,
1002
  "learning_rate": 0.0,
1003
- "loss": 0.0864,
1004
  "step": 100
1005
  },
1006
  {
1007
  "epoch": 44.44,
1008
- "eval_accuracy": 0.9814814814814815,
1009
- "eval_loss": 0.030413048341870308,
1010
- "eval_runtime": 0.2709,
1011
- "eval_samples_per_second": 199.324,
1012
- "eval_steps_per_second": 3.691,
1013
  "step": 100
1014
  },
1015
  {
1016
  "epoch": 44.44,
1017
  "step": 100,
1018
  "total_flos": 1.6586385457107272e+18,
1019
- "train_loss": 0.2113973332196474,
1020
- "train_runtime": 676.842,
1021
- "train_samples_per_second": 35.533,
1022
- "train_steps_per_second": 0.148
1023
  }
1024
  ],
1025
  "max_steps": 100,
 
1
  {
2
  "best_metric": 1.0,
3
+ "best_model_checkpoint": "vit-base-patch16-224-Trial007-YEL_STEM/checkpoint-29",
4
  "epoch": 44.44444444444444,
5
  "global_step": 100,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 0.44,
12
  "learning_rate": 5e-06,
13
+ "loss": 0.7059,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.89,
18
  "learning_rate": 1e-05,
19
+ "loss": 0.7081,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.89,
24
+ "eval_accuracy": 0.5555555555555556,
25
+ "eval_loss": 0.6817994713783264,
26
+ "eval_runtime": 0.2599,
27
+ "eval_samples_per_second": 207.804,
28
+ "eval_steps_per_second": 3.848,
29
  "step": 2
30
  },
31
  {
32
  "epoch": 1.33,
33
  "learning_rate": 1.5e-05,
34
+ "loss": 0.6401,
35
  "step": 3
36
  },
37
  {
38
  "epoch": 1.78,
39
  "learning_rate": 2e-05,
40
+ "loss": 0.6584,
41
  "step": 4
42
  },
43
  {
44
  "epoch": 1.78,
45
+ "eval_accuracy": 0.7037037037037037,
46
+ "eval_loss": 0.5915446877479553,
47
+ "eval_runtime": 0.2575,
48
+ "eval_samples_per_second": 209.714,
49
+ "eval_steps_per_second": 3.884,
50
  "step": 4
51
  },
52
  {
53
  "epoch": 2.22,
54
  "learning_rate": 2.5e-05,
55
+ "loss": 0.5794,
56
  "step": 5
57
  },
58
  {
59
  "epoch": 2.67,
60
  "learning_rate": 3e-05,
61
+ "loss": 0.5552,
62
  "step": 6
63
  },
64
  {
65
  "epoch": 2.67,
66
+ "eval_accuracy": 0.7407407407407407,
67
+ "eval_loss": 0.5365740656852722,
68
+ "eval_runtime": 0.2565,
69
+ "eval_samples_per_second": 210.539,
70
+ "eval_steps_per_second": 3.899,
71
  "step": 6
72
  },
73
  {
74
  "epoch": 3.11,
75
  "learning_rate": 3.5e-05,
76
+ "loss": 0.4553,
77
  "step": 7
78
  },
79
  {
80
  "epoch": 3.56,
81
  "learning_rate": 4e-05,
82
+ "loss": 0.5446,
83
  "step": 8
84
  },
85
  {
86
  "epoch": 4.0,
87
  "learning_rate": 4.5e-05,
88
+ "loss": 0.3763,
89
  "step": 9
90
  },
91
  {
92
  "epoch": 4.0,
93
+ "eval_accuracy": 0.8518518518518519,
94
+ "eval_loss": 0.35601484775543213,
95
+ "eval_runtime": 0.2638,
96
+ "eval_samples_per_second": 204.731,
97
+ "eval_steps_per_second": 3.791,
98
  "step": 9
99
  },
100
  {
101
  "epoch": 4.44,
102
  "learning_rate": 5e-05,
103
+ "loss": 0.4622,
104
  "step": 10
105
  },
106
  {
107
  "epoch": 4.89,
108
  "learning_rate": 4.9444444444444446e-05,
109
+ "loss": 0.397,
110
  "step": 11
111
  },
112
  {
113
  "epoch": 4.89,
114
+ "eval_accuracy": 0.8518518518518519,
115
+ "eval_loss": 0.2999265789985657,
116
+ "eval_runtime": 0.2681,
117
+ "eval_samples_per_second": 201.389,
118
+ "eval_steps_per_second": 3.729,
119
  "step": 11
120
  },
121
  {
122
  "epoch": 5.33,
123
  "learning_rate": 4.888888888888889e-05,
124
+ "loss": 0.2849,
125
  "step": 12
126
  },
127
  {
128
  "epoch": 5.78,
129
  "learning_rate": 4.8333333333333334e-05,
130
+ "loss": 0.3313,
131
  "step": 13
132
  },
133
  {
134
  "epoch": 5.78,
135
  "eval_accuracy": 0.9074074074074074,
136
+ "eval_loss": 0.23073385655879974,
137
+ "eval_runtime": 0.2595,
138
+ "eval_samples_per_second": 208.115,
139
+ "eval_steps_per_second": 3.854,
140
  "step": 13
141
  },
142
  {
143
  "epoch": 6.22,
144
  "learning_rate": 4.7777777777777784e-05,
145
+ "loss": 0.2448,
146
  "step": 14
147
  },
148
  {
149
  "epoch": 6.67,
150
  "learning_rate": 4.722222222222222e-05,
151
+ "loss": 0.2957,
152
  "step": 15
153
  },
154
  {
155
  "epoch": 6.67,
156
+ "eval_accuracy": 0.9259259259259259,
157
+ "eval_loss": 0.1745777279138565,
158
+ "eval_runtime": 0.2586,
159
+ "eval_samples_per_second": 208.833,
160
+ "eval_steps_per_second": 3.867,
161
  "step": 15
162
  },
163
  {
164
  "epoch": 7.11,
165
  "learning_rate": 4.666666666666667e-05,
166
+ "loss": 0.2555,
167
  "step": 16
168
  },
169
  {
170
  "epoch": 7.56,
171
  "learning_rate": 4.6111111111111115e-05,
172
+ "loss": 0.3153,
173
  "step": 17
174
  },
175
  {
176
  "epoch": 8.0,
177
  "learning_rate": 4.555555555555556e-05,
178
+ "loss": 0.2383,
179
  "step": 18
180
  },
181
  {
182
  "epoch": 8.0,
183
  "eval_accuracy": 0.9444444444444444,
184
+ "eval_loss": 0.14317429065704346,
185
+ "eval_runtime": 0.2592,
186
+ "eval_samples_per_second": 208.343,
187
+ "eval_steps_per_second": 3.858,
188
  "step": 18
189
  },
190
  {
191
  "epoch": 8.44,
192
  "learning_rate": 4.5e-05,
193
+ "loss": 0.1994,
194
  "step": 19
195
  },
196
  {
197
  "epoch": 8.89,
198
  "learning_rate": 4.4444444444444447e-05,
199
+ "loss": 0.2664,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 8.89,
204
+ "eval_accuracy": 0.9074074074074074,
205
+ "eval_loss": 0.33199751377105713,
206
+ "eval_runtime": 0.2627,
207
+ "eval_samples_per_second": 205.531,
208
+ "eval_steps_per_second": 3.806,
209
  "step": 20
210
  },
211
  {
212
  "epoch": 9.33,
213
  "learning_rate": 4.388888888888889e-05,
214
+ "loss": 0.506,
215
  "step": 21
216
  },
217
  {
218
  "epoch": 9.78,
219
  "learning_rate": 4.3333333333333334e-05,
220
+ "loss": 0.2242,
221
  "step": 22
222
  },
223
  {
224
  "epoch": 9.78,
225
  "eval_accuracy": 0.9629629629629629,
226
+ "eval_loss": 0.11195674538612366,
227
+ "eval_runtime": 0.2715,
228
+ "eval_samples_per_second": 198.931,
229
+ "eval_steps_per_second": 3.684,
230
  "step": 22
231
  },
232
  {
233
  "epoch": 10.22,
234
  "learning_rate": 4.277777777777778e-05,
235
+ "loss": 0.1685,
236
  "step": 23
237
  },
238
  {
239
  "epoch": 10.67,
240
  "learning_rate": 4.222222222222222e-05,
241
+ "loss": 0.2072,
242
  "step": 24
243
  },
244
  {
245
  "epoch": 10.67,
246
+ "eval_accuracy": 0.9629629629629629,
247
+ "eval_loss": 0.07178916037082672,
248
+ "eval_runtime": 0.2631,
249
+ "eval_samples_per_second": 205.262,
250
+ "eval_steps_per_second": 3.801,
251
  "step": 24
252
  },
253
  {
254
  "epoch": 11.11,
255
  "learning_rate": 4.166666666666667e-05,
256
+ "loss": 0.1653,
257
  "step": 25
258
  },
259
  {
260
  "epoch": 11.56,
261
  "learning_rate": 4.111111111111111e-05,
262
+ "loss": 0.1573,
263
  "step": 26
264
  },
265
  {
266
  "epoch": 12.0,
267
  "learning_rate": 4.055555555555556e-05,
268
+ "loss": 0.1399,
269
  "step": 27
270
  },
271
  {
272
  "epoch": 12.0,
273
  "eval_accuracy": 0.9814814814814815,
274
+ "eval_loss": 0.049447279423475266,
275
+ "eval_runtime": 0.2574,
276
+ "eval_samples_per_second": 209.772,
277
+ "eval_steps_per_second": 3.885,
278
  "step": 27
279
  },
280
  {
281
  "epoch": 12.44,
282
  "learning_rate": 4e-05,
283
+ "loss": 0.1696,
284
  "step": 28
285
  },
286
  {
287
  "epoch": 12.89,
288
  "learning_rate": 3.944444444444445e-05,
289
+ "loss": 0.1846,
290
  "step": 29
291
  },
292
  {
293
  "epoch": 12.89,
294
+ "eval_accuracy": 1.0,
295
+ "eval_loss": 0.03731463849544525,
296
  "eval_runtime": 0.2604,
297
+ "eval_samples_per_second": 207.336,
298
  "eval_steps_per_second": 3.84,
299
  "step": 29
300
  },
301
  {
302
  "epoch": 13.33,
303
  "learning_rate": 3.888888888888889e-05,
304
+ "loss": 0.1438,
305
  "step": 30
306
  },
307
  {
308
  "epoch": 13.78,
309
  "learning_rate": 3.8333333333333334e-05,
310
+ "loss": 0.1816,
311
  "step": 31
312
  },
313
  {
314
  "epoch": 13.78,
315
+ "eval_accuracy": 1.0,
316
+ "eval_loss": 0.03541439026594162,
317
+ "eval_runtime": 0.2612,
318
+ "eval_samples_per_second": 206.767,
319
+ "eval_steps_per_second": 3.829,
320
  "step": 31
321
  },
322
  {
323
  "epoch": 14.22,
324
  "learning_rate": 3.777777777777778e-05,
325
+ "loss": 0.2004,
326
  "step": 32
327
  },
328
  {
329
  "epoch": 14.67,
330
  "learning_rate": 3.722222222222222e-05,
331
+ "loss": 0.1453,
332
  "step": 33
333
  },
334
  {
335
  "epoch": 14.67,
336
  "eval_accuracy": 0.9814814814814815,
337
+ "eval_loss": 0.04606747254729271,
338
+ "eval_runtime": 0.2706,
339
+ "eval_samples_per_second": 199.556,
340
+ "eval_steps_per_second": 3.695,
341
  "step": 33
342
  },
343
  {
344
  "epoch": 15.11,
345
  "learning_rate": 3.6666666666666666e-05,
346
+ "loss": 0.1129,
347
  "step": 34
348
  },
349
  {
350
  "epoch": 15.56,
351
  "learning_rate": 3.611111111111111e-05,
352
+ "loss": 0.1819,
353
  "step": 35
354
  },
355
  {
356
  "epoch": 16.0,
357
  "learning_rate": 3.555555555555556e-05,
358
+ "loss": 0.1406,
359
  "step": 36
360
  },
361
  {
362
  "epoch": 16.0,
363
  "eval_accuracy": 1.0,
364
+ "eval_loss": 0.03328908979892731,
365
+ "eval_runtime": 0.2597,
366
+ "eval_samples_per_second": 207.893,
367
+ "eval_steps_per_second": 3.85,
368
  "step": 36
369
  },
370
  {
371
  "epoch": 16.44,
372
  "learning_rate": 3.5e-05,
373
+ "loss": 0.1389,
374
  "step": 37
375
  },
376
  {
377
  "epoch": 16.89,
378
  "learning_rate": 3.444444444444445e-05,
379
+ "loss": 0.1749,
380
  "step": 38
381
  },
382
  {
383
  "epoch": 16.89,
384
+ "eval_accuracy": 1.0,
385
+ "eval_loss": 0.02746324986219406,
386
+ "eval_runtime": 0.2621,
387
+ "eval_samples_per_second": 206.026,
388
+ "eval_steps_per_second": 3.815,
389
  "step": 38
390
  },
391
  {
392
  "epoch": 17.33,
393
  "learning_rate": 3.388888888888889e-05,
394
+ "loss": 0.1079,
395
  "step": 39
396
  },
397
  {
398
  "epoch": 17.78,
399
  "learning_rate": 3.3333333333333335e-05,
400
+ "loss": 0.1383,
401
  "step": 40
402
  },
403
  {
404
  "epoch": 17.78,
405
+ "eval_accuracy": 1.0,
406
+ "eval_loss": 0.02032529003918171,
407
+ "eval_runtime": 0.258,
408
+ "eval_samples_per_second": 209.286,
409
+ "eval_steps_per_second": 3.876,
410
  "step": 40
411
  },
412
  {
413
  "epoch": 18.22,
414
  "learning_rate": 3.277777777777778e-05,
415
+ "loss": 0.1454,
416
  "step": 41
417
  },
418
  {
419
  "epoch": 18.67,
420
  "learning_rate": 3.222222222222223e-05,
421
+ "loss": 0.1659,
422
  "step": 42
423
  },
424
  {
425
  "epoch": 18.67,
426
+ "eval_accuracy": 1.0,
427
+ "eval_loss": 0.018641581758856773,
428
+ "eval_runtime": 0.2631,
429
+ "eval_samples_per_second": 205.27,
430
+ "eval_steps_per_second": 3.801,
431
  "step": 42
432
  },
433
  {
434
  "epoch": 19.11,
435
  "learning_rate": 3.1666666666666666e-05,
436
+ "loss": 0.5264,
437
  "step": 43
438
  },
439
  {
440
  "epoch": 19.56,
441
  "learning_rate": 3.111111111111111e-05,
442
+ "loss": 0.1586,
443
  "step": 44
444
  },
445
  {
446
  "epoch": 20.0,
447
  "learning_rate": 3.055555555555556e-05,
448
+ "loss": 0.153,
449
  "step": 45
450
  },
451
  {
452
  "epoch": 20.0,
453
+ "eval_accuracy": 1.0,
454
+ "eval_loss": 0.018401814624667168,
455
+ "eval_runtime": 0.2579,
456
+ "eval_samples_per_second": 209.351,
457
+ "eval_steps_per_second": 3.877,
458
  "step": 45
459
  },
460
  {
461
  "epoch": 20.44,
462
  "learning_rate": 3e-05,
463
+ "loss": 0.1323,
464
  "step": 46
465
  },
466
  {
467
  "epoch": 20.89,
468
  "learning_rate": 2.9444444444444448e-05,
469
+ "loss": 0.1497,
470
  "step": 47
471
  },
472
  {
473
  "epoch": 20.89,
474
+ "eval_accuracy": 1.0,
475
+ "eval_loss": 0.021507540717720985,
476
+ "eval_runtime": 0.2732,
477
+ "eval_samples_per_second": 197.68,
478
+ "eval_steps_per_second": 3.661,
479
  "step": 47
480
  },
481
  {
482
  "epoch": 21.33,
483
  "learning_rate": 2.8888888888888888e-05,
484
+ "loss": 0.2375,
485
  "step": 48
486
  },
487
  {
488
  "epoch": 21.78,
489
  "learning_rate": 2.8333333333333335e-05,
490
+ "loss": 0.1088,
491
  "step": 49
492
  },
493
  {
494
  "epoch": 21.78,
495
  "eval_accuracy": 0.9814814814814815,
496
+ "eval_loss": 0.040791917592287064,
497
+ "eval_runtime": 0.326,
498
+ "eval_samples_per_second": 165.619,
499
+ "eval_steps_per_second": 3.067,
500
  "step": 49
501
  },
502
  {
503
  "epoch": 22.22,
504
  "learning_rate": 2.777777777777778e-05,
505
+ "loss": 0.0887,
506
  "step": 50
507
  },
508
  {
509
  "epoch": 22.67,
510
  "learning_rate": 2.7222222222222223e-05,
511
+ "loss": 0.1796,
512
  "step": 51
513
  },
514
  {
515
  "epoch": 22.67,
516
+ "eval_accuracy": 0.9814814814814815,
517
+ "eval_loss": 0.03766679763793945,
518
+ "eval_runtime": 0.2593,
519
+ "eval_samples_per_second": 208.223,
520
+ "eval_steps_per_second": 3.856,
521
  "step": 51
522
  },
523
  {
524
  "epoch": 23.11,
525
  "learning_rate": 2.6666666666666667e-05,
526
+ "loss": 0.1459,
527
  "step": 52
528
  },
529
  {
530
  "epoch": 23.56,
531
  "learning_rate": 2.6111111111111114e-05,
532
+ "loss": 0.156,
533
  "step": 53
534
  },
535
  {
536
  "epoch": 24.0,
537
  "learning_rate": 2.5555555555555554e-05,
538
+ "loss": 0.1041,
539
  "step": 54
540
  },
541
  {
542
  "epoch": 24.0,
543
  "eval_accuracy": 0.9814814814814815,
544
+ "eval_loss": 0.06314324587583542,
545
+ "eval_runtime": 0.2592,
546
+ "eval_samples_per_second": 208.351,
547
+ "eval_steps_per_second": 3.858,
548
  "step": 54
549
  },
550
  {
551
  "epoch": 24.44,
552
  "learning_rate": 2.5e-05,
553
+ "loss": 0.1796,
554
  "step": 55
555
  },
556
  {
557
  "epoch": 24.89,
558
  "learning_rate": 2.4444444444444445e-05,
559
+ "loss": 0.1193,
560
  "step": 56
561
  },
562
  {
563
  "epoch": 24.89,
564
  "eval_accuracy": 0.9814814814814815,
565
+ "eval_loss": 0.06367243826389313,
566
+ "eval_runtime": 0.2815,
567
+ "eval_samples_per_second": 191.858,
568
+ "eval_steps_per_second": 3.553,
569
  "step": 56
570
  },
571
  {
572
  "epoch": 25.33,
573
  "learning_rate": 2.3888888888888892e-05,
574
+ "loss": 0.0848,
575
  "step": 57
576
  },
577
  {
578
  "epoch": 25.78,
579
  "learning_rate": 2.3333333333333336e-05,
580
+ "loss": 0.1653,
581
  "step": 58
582
  },
583
  {
584
  "epoch": 25.78,
585
  "eval_accuracy": 0.9814814814814815,
586
+ "eval_loss": 0.07295241206884384,
587
+ "eval_runtime": 0.2602,
588
+ "eval_samples_per_second": 207.497,
589
+ "eval_steps_per_second": 3.843,
590
  "step": 58
591
  },
592
  {
593
  "epoch": 26.22,
594
  "learning_rate": 2.277777777777778e-05,
595
+ "loss": 0.0973,
596
  "step": 59
597
  },
598
  {
599
  "epoch": 26.67,
600
  "learning_rate": 2.2222222222222223e-05,
601
+ "loss": 0.1296,
602
  "step": 60
603
  },
604
  {
605
  "epoch": 26.67,
606
  "eval_accuracy": 0.9814814814814815,
607
+ "eval_loss": 0.07793273031711578,
608
+ "eval_runtime": 0.2603,
609
+ "eval_samples_per_second": 207.468,
610
+ "eval_steps_per_second": 3.842,
611
  "step": 60
612
  },
613
  {
614
  "epoch": 27.11,
615
  "learning_rate": 2.1666666666666667e-05,
616
+ "loss": 0.1243,
617
  "step": 61
618
  },
619
  {
620
  "epoch": 27.56,
621
  "learning_rate": 2.111111111111111e-05,
622
+ "loss": 0.1566,
623
  "step": 62
624
  },
625
  {
626
  "epoch": 28.0,
627
  "learning_rate": 2.0555555555555555e-05,
628
+ "loss": 0.1036,
629
  "step": 63
630
  },
631
  {
632
  "epoch": 28.0,
633
+ "eval_accuracy": 0.9814814814814815,
634
+ "eval_loss": 0.031223006546497345,
635
+ "eval_runtime": 0.2604,
636
+ "eval_samples_per_second": 207.369,
637
+ "eval_steps_per_second": 3.84,
638
  "step": 63
639
  },
640
  {
641
  "epoch": 28.44,
642
  "learning_rate": 2e-05,
643
+ "loss": 0.1376,
644
  "step": 64
645
  },
646
  {
647
  "epoch": 28.89,
648
  "learning_rate": 1.9444444444444445e-05,
649
+ "loss": 0.1287,
650
  "step": 65
651
  },
652
  {
653
  "epoch": 28.89,
654
+ "eval_accuracy": 1.0,
655
+ "eval_loss": 0.011618535034358501,
656
+ "eval_runtime": 0.2729,
657
+ "eval_samples_per_second": 197.869,
658
+ "eval_steps_per_second": 3.664,
659
  "step": 65
660
  },
661
  {
662
  "epoch": 29.33,
663
  "learning_rate": 1.888888888888889e-05,
664
+ "loss": 0.0961,
665
  "step": 66
666
  },
667
  {
668
  "epoch": 29.78,
669
  "learning_rate": 1.8333333333333333e-05,
670
+ "loss": 0.1307,
671
  "step": 67
672
  },
673
  {
674
  "epoch": 29.78,
675
+ "eval_accuracy": 1.0,
676
+ "eval_loss": 0.012949406169354916,
677
+ "eval_runtime": 0.2593,
678
+ "eval_samples_per_second": 208.256,
679
+ "eval_steps_per_second": 3.857,
680
  "step": 67
681
  },
682
  {
683
  "epoch": 30.22,
684
  "learning_rate": 1.777777777777778e-05,
685
+ "loss": 0.0873,
686
  "step": 68
687
  },
688
  {
689
  "epoch": 30.67,
690
  "learning_rate": 1.7222222222222224e-05,
691
+ "loss": 0.1337,
692
  "step": 69
693
  },
694
  {
695
  "epoch": 30.67,
696
+ "eval_accuracy": 1.0,
697
+ "eval_loss": 0.01411823183298111,
698
+ "eval_runtime": 0.2669,
699
+ "eval_samples_per_second": 202.286,
700
+ "eval_steps_per_second": 3.746,
701
  "step": 69
702
  },
703
  {
704
  "epoch": 31.11,
705
  "learning_rate": 1.6666666666666667e-05,
706
+ "loss": 0.0961,
707
  "step": 70
708
  },
709
  {
710
  "epoch": 31.56,
711
  "learning_rate": 1.6111111111111115e-05,
712
+ "loss": 0.1316,
713
  "step": 71
714
  },
715
  {
716
  "epoch": 32.0,
717
  "learning_rate": 1.5555555555555555e-05,
718
+ "loss": 0.1274,
719
  "step": 72
720
  },
721
  {
722
  "epoch": 32.0,
723
  "eval_accuracy": 1.0,
724
+ "eval_loss": 0.016076432541012764,
725
+ "eval_runtime": 0.2582,
726
+ "eval_samples_per_second": 209.143,
727
+ "eval_steps_per_second": 3.873,
728
  "step": 72
729
  },
730
  {
731
  "epoch": 32.44,
732
  "learning_rate": 1.5e-05,
733
+ "loss": 0.1192,
734
  "step": 73
735
  },
736
  {
737
  "epoch": 32.89,
738
  "learning_rate": 1.4444444444444444e-05,
739
+ "loss": 0.1612,
740
  "step": 74
741
  },
742
  {
743
  "epoch": 32.89,
744
  "eval_accuracy": 1.0,
745
+ "eval_loss": 0.017708102241158485,
746
+ "eval_runtime": 0.2623,
747
+ "eval_samples_per_second": 205.838,
748
+ "eval_steps_per_second": 3.812,
749
  "step": 74
750
  },
751
  {
752
  "epoch": 33.33,
753
  "learning_rate": 1.388888888888889e-05,
754
+ "loss": 0.2107,
755
  "step": 75
756
  },
757
  {
758
  "epoch": 33.78,
759
  "learning_rate": 1.3333333333333333e-05,
760
+ "loss": 0.1504,
761
  "step": 76
762
  },
763
  {
764
  "epoch": 33.78,
765
  "eval_accuracy": 1.0,
766
+ "eval_loss": 0.018124129623174667,
767
+ "eval_runtime": 0.2587,
768
+ "eval_samples_per_second": 208.707,
769
+ "eval_steps_per_second": 3.865,
770
  "step": 76
771
  },
772
  {
773
  "epoch": 34.22,
774
  "learning_rate": 1.2777777777777777e-05,
775
+ "loss": 0.1108,
776
  "step": 77
777
  },
778
  {
779
  "epoch": 34.67,
780
  "learning_rate": 1.2222222222222222e-05,
781
+ "loss": 0.1307,
782
  "step": 78
783
  },
784
  {
785
  "epoch": 34.67,
786
  "eval_accuracy": 1.0,
787
+ "eval_loss": 0.017455147579312325,
788
+ "eval_runtime": 0.2609,
789
+ "eval_samples_per_second": 206.946,
790
+ "eval_steps_per_second": 3.832,
791
  "step": 78
792
  },
793
  {
794
  "epoch": 35.11,
795
  "learning_rate": 1.1666666666666668e-05,
796
+ "loss": 0.348,
797
  "step": 79
798
  },
799
  {
800
  "epoch": 35.56,
801
  "learning_rate": 1.1111111111111112e-05,
802
+ "loss": 0.1275,
803
  "step": 80
804
  },
805
  {
806
  "epoch": 36.0,
807
  "learning_rate": 1.0555555555555555e-05,
808
+ "loss": 0.125,
809
  "step": 81
810
  },
811
  {
812
  "epoch": 36.0,
813
  "eval_accuracy": 1.0,
814
+ "eval_loss": 0.01702389493584633,
815
+ "eval_runtime": 0.2603,
816
+ "eval_samples_per_second": 207.432,
817
+ "eval_steps_per_second": 3.841,
818
  "step": 81
819
  },
820
  {
821
  "epoch": 36.44,
822
  "learning_rate": 1e-05,
823
+ "loss": 0.1085,
824
  "step": 82
825
  },
826
  {
827
  "epoch": 36.89,
828
  "learning_rate": 9.444444444444445e-06,
829
+ "loss": 0.1357,
830
  "step": 83
831
  },
832
  {
833
  "epoch": 36.89,
834
  "eval_accuracy": 1.0,
835
+ "eval_loss": 0.016507649794220924,
836
+ "eval_runtime": 0.2662,
837
+ "eval_samples_per_second": 202.839,
838
+ "eval_steps_per_second": 3.756,
839
  "step": 83
840
  },
841
  {
842
  "epoch": 37.33,
843
  "learning_rate": 8.88888888888889e-06,
844
+ "loss": 0.1008,
845
  "step": 84
846
  },
847
  {
848
  "epoch": 37.78,
849
  "learning_rate": 8.333333333333334e-06,
850
+ "loss": 0.1033,
851
  "step": 85
852
  },
853
  {
854
  "epoch": 37.78,
855
  "eval_accuracy": 1.0,
856
+ "eval_loss": 0.016153164207935333,
857
+ "eval_runtime": 0.4053,
858
+ "eval_samples_per_second": 133.244,
859
+ "eval_steps_per_second": 2.467,
860
  "step": 85
861
  },
862
  {
863
  "epoch": 38.22,
864
  "learning_rate": 7.777777777777777e-06,
865
+ "loss": 0.1346,
866
  "step": 86
867
  },
868
  {
869
  "epoch": 38.67,
870
  "learning_rate": 7.222222222222222e-06,
871
+ "loss": 0.1749,
872
  "step": 87
873
  },
874
  {
875
  "epoch": 38.67,
876
  "eval_accuracy": 1.0,
877
+ "eval_loss": 0.016394561156630516,
878
+ "eval_runtime": 0.2605,
879
+ "eval_samples_per_second": 207.331,
880
+ "eval_steps_per_second": 3.839,
881
  "step": 87
882
  },
883
  {
884
  "epoch": 39.11,
885
  "learning_rate": 6.666666666666667e-06,
886
+ "loss": 0.0824,
887
  "step": 88
888
  },
889
  {
890
  "epoch": 39.56,
891
  "learning_rate": 6.111111111111111e-06,
892
+ "loss": 0.1232,
893
  "step": 89
894
  },
895
  {
896
  "epoch": 40.0,
897
  "learning_rate": 5.555555555555556e-06,
898
+ "loss": 0.0906,
899
  "step": 90
900
  },
901
  {
902
  "epoch": 40.0,
903
+ "eval_accuracy": 1.0,
904
+ "eval_loss": 0.015265186317265034,
905
+ "eval_runtime": 0.2605,
906
+ "eval_samples_per_second": 207.293,
907
+ "eval_steps_per_second": 3.839,
908
  "step": 90
909
  },
910
  {
911
  "epoch": 40.44,
912
  "learning_rate": 5e-06,
913
+ "loss": 0.1454,
914
  "step": 91
915
  },
916
  {
917
  "epoch": 40.89,
918
  "learning_rate": 4.444444444444445e-06,
919
+ "loss": 0.1349,
920
  "step": 92
921
  },
922
  {
923
  "epoch": 40.89,
924
+ "eval_accuracy": 1.0,
925
+ "eval_loss": 0.01515868864953518,
926
+ "eval_runtime": 0.26,
927
+ "eval_samples_per_second": 207.728,
928
+ "eval_steps_per_second": 3.847,
929
  "step": 92
930
  },
931
  {
932
  "epoch": 41.33,
933
  "learning_rate": 3.888888888888889e-06,
934
+ "loss": 0.1047,
935
  "step": 93
936
  },
937
  {
938
  "epoch": 41.78,
939
  "learning_rate": 3.3333333333333333e-06,
940
+ "loss": 0.1056,
941
  "step": 94
942
  },
943
  {
944
  "epoch": 41.78,
945
+ "eval_accuracy": 1.0,
946
+ "eval_loss": 0.015033537521958351,
947
+ "eval_runtime": 0.2588,
948
+ "eval_samples_per_second": 208.667,
949
+ "eval_steps_per_second": 3.864,
950
  "step": 94
951
  },
952
  {
953
  "epoch": 42.22,
954
  "learning_rate": 2.777777777777778e-06,
955
+ "loss": 0.1196,
956
  "step": 95
957
  },
958
  {
959
  "epoch": 42.67,
960
  "learning_rate": 2.2222222222222225e-06,
961
+ "loss": 0.1328,
962
  "step": 96
963
  },
964
  {
965
  "epoch": 42.67,
966
+ "eval_accuracy": 1.0,
967
+ "eval_loss": 0.014817849732935429,
968
+ "eval_runtime": 0.2611,
969
+ "eval_samples_per_second": 206.804,
970
+ "eval_steps_per_second": 3.83,
971
  "step": 96
972
  },
973
  {
974
  "epoch": 43.11,
975
  "learning_rate": 1.6666666666666667e-06,
976
+ "loss": 0.1358,
977
  "step": 97
978
  },
979
  {
980
  "epoch": 43.56,
981
  "learning_rate": 1.1111111111111112e-06,
982
+ "loss": 0.1221,
983
  "step": 98
984
  },
985
  {
986
  "epoch": 44.0,
987
  "learning_rate": 5.555555555555556e-07,
988
+ "loss": 0.0742,
989
  "step": 99
990
  },
991
  {
992
  "epoch": 44.0,
993
+ "eval_accuracy": 1.0,
994
+ "eval_loss": 0.014790916815400124,
995
+ "eval_runtime": 0.2592,
996
+ "eval_samples_per_second": 208.324,
997
+ "eval_steps_per_second": 3.858,
998
  "step": 99
999
  },
1000
  {
1001
  "epoch": 44.44,
1002
  "learning_rate": 0.0,
1003
+ "loss": 0.0875,
1004
  "step": 100
1005
  },
1006
  {
1007
  "epoch": 44.44,
1008
+ "eval_accuracy": 1.0,
1009
+ "eval_loss": 0.014792277477681637,
1010
+ "eval_runtime": 0.2608,
1011
+ "eval_samples_per_second": 207.072,
1012
+ "eval_steps_per_second": 3.835,
1013
  "step": 100
1014
  },
1015
  {
1016
  "epoch": 44.44,
1017
  "step": 100,
1018
  "total_flos": 1.6586385457107272e+18,
1019
+ "train_loss": 0.20484884686768054,
1020
+ "train_runtime": 700.3363,
1021
+ "train_samples_per_second": 34.341,
1022
+ "train_steps_per_second": 0.143
1023
  }
1024
  ],
1025
  "max_steps": 100,