sbaner24 commited on
Commit
e2eada4
1 Parent(s): e9f32f6

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +8 -8
  2. eval_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +305 -305
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 44.44,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.017473837360739708,
5
- "eval_runtime": 0.3229,
6
- "eval_samples_per_second": 167.215,
7
- "eval_steps_per_second": 3.097,
8
  "total_flos": 1.6586385457107272e+18,
9
- "train_loss": 0.15135242883116007,
10
- "train_runtime": 1078.2452,
11
- "train_samples_per_second": 22.305,
12
- "train_steps_per_second": 0.093
13
  }
 
1
  {
2
  "epoch": 44.44,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.026890547946095467,
5
+ "eval_runtime": 0.3131,
6
+ "eval_samples_per_second": 172.459,
7
+ "eval_steps_per_second": 3.194,
8
  "total_flos": 1.6586385457107272e+18,
9
+ "train_loss": 0.1744049086794257,
10
+ "train_runtime": 686.3928,
11
+ "train_samples_per_second": 35.038,
12
+ "train_steps_per_second": 0.146
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 44.44,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.017473837360739708,
5
- "eval_runtime": 0.3229,
6
- "eval_samples_per_second": 167.215,
7
- "eval_steps_per_second": 3.097
8
  }
 
1
  {
2
  "epoch": 44.44,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.026890547946095467,
5
+ "eval_runtime": 0.3131,
6
+ "eval_samples_per_second": 172.459,
7
+ "eval_steps_per_second": 3.194
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 44.44,
3
  "total_flos": 1.6586385457107272e+18,
4
- "train_loss": 0.15135242883116007,
5
- "train_runtime": 1078.2452,
6
- "train_samples_per_second": 22.305,
7
- "train_steps_per_second": 0.093
8
  }
 
1
  {
2
  "epoch": 44.44,
3
  "total_flos": 1.6586385457107272e+18,
4
+ "train_loss": 0.1744049086794257,
5
+ "train_runtime": 686.3928,
6
+ "train_samples_per_second": 35.038,
7
+ "train_steps_per_second": 0.146
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 1.0,
3
- "best_model_checkpoint": "vit-base-patch16-224-Trial007-YEL_STEM1/checkpoint-45",
4
  "epoch": 44.44444444444444,
5
  "global_step": 100,
6
  "is_hyper_param_search": false,
@@ -10,1016 +10,1016 @@
10
  {
11
  "epoch": 0.44,
12
  "learning_rate": 5e-06,
13
- "loss": 0.6921,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.89,
18
  "learning_rate": 1e-05,
19
- "loss": 0.6861,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.89,
24
- "eval_accuracy": 0.5370370370370371,
25
- "eval_loss": 0.6742474436759949,
26
- "eval_runtime": 0.2725,
27
- "eval_samples_per_second": 198.192,
28
- "eval_steps_per_second": 3.67,
29
  "step": 2
30
  },
31
  {
32
  "epoch": 1.33,
33
  "learning_rate": 1.5e-05,
34
- "loss": 0.659,
35
  "step": 3
36
  },
37
  {
38
  "epoch": 1.78,
39
  "learning_rate": 2e-05,
40
- "loss": 0.6446,
41
  "step": 4
42
  },
43
  {
44
  "epoch": 1.78,
45
- "eval_accuracy": 0.7037037037037037,
46
- "eval_loss": 0.5790768265724182,
47
- "eval_runtime": 0.2769,
48
- "eval_samples_per_second": 195.034,
49
- "eval_steps_per_second": 3.612,
50
  "step": 4
51
  },
52
  {
53
  "epoch": 2.22,
54
  "learning_rate": 2.5e-05,
55
- "loss": 0.5569,
56
  "step": 5
57
  },
58
  {
59
  "epoch": 2.67,
60
  "learning_rate": 3e-05,
61
- "loss": 0.5406,
62
  "step": 6
63
  },
64
  {
65
  "epoch": 2.67,
66
- "eval_accuracy": 0.8148148148148148,
67
- "eval_loss": 0.43681636452674866,
68
- "eval_runtime": 0.2645,
69
- "eval_samples_per_second": 204.182,
70
- "eval_steps_per_second": 3.781,
71
  "step": 6
72
  },
73
  {
74
  "epoch": 3.11,
75
  "learning_rate": 3.5e-05,
76
- "loss": 0.3935,
77
  "step": 7
78
  },
79
  {
80
  "epoch": 3.56,
81
  "learning_rate": 4e-05,
82
- "loss": 0.497,
83
  "step": 8
84
  },
85
  {
86
  "epoch": 4.0,
87
  "learning_rate": 4.5e-05,
88
- "loss": 0.328,
89
  "step": 9
90
  },
91
  {
92
  "epoch": 4.0,
93
- "eval_accuracy": 0.9259259259259259,
94
- "eval_loss": 0.29564177989959717,
95
- "eval_runtime": 0.2626,
96
- "eval_samples_per_second": 205.67,
97
- "eval_steps_per_second": 3.809,
98
  "step": 9
99
  },
100
  {
101
  "epoch": 4.44,
102
  "learning_rate": 5e-05,
103
- "loss": 0.3644,
104
  "step": 10
105
  },
106
  {
107
  "epoch": 4.89,
108
  "learning_rate": 4.9444444444444446e-05,
109
- "loss": 0.3238,
110
  "step": 11
111
  },
112
  {
113
  "epoch": 4.89,
114
- "eval_accuracy": 0.9259259259259259,
115
- "eval_loss": 0.26564526557922363,
116
- "eval_runtime": 0.2618,
117
- "eval_samples_per_second": 206.277,
118
- "eval_steps_per_second": 3.82,
119
  "step": 11
120
  },
121
  {
122
  "epoch": 5.33,
123
  "learning_rate": 4.888888888888889e-05,
124
- "loss": 0.2646,
125
  "step": 12
126
  },
127
  {
128
  "epoch": 5.78,
129
  "learning_rate": 4.8333333333333334e-05,
130
- "loss": 0.2851,
131
  "step": 13
132
  },
133
  {
134
  "epoch": 5.78,
135
- "eval_accuracy": 0.9259259259259259,
136
- "eval_loss": 0.20708920061588287,
137
- "eval_runtime": 0.3296,
138
- "eval_samples_per_second": 163.85,
139
- "eval_steps_per_second": 3.034,
140
  "step": 13
141
  },
142
  {
143
  "epoch": 6.22,
144
  "learning_rate": 4.7777777777777784e-05,
145
- "loss": 0.1854,
146
  "step": 14
147
  },
148
  {
149
  "epoch": 6.67,
150
  "learning_rate": 4.722222222222222e-05,
151
- "loss": 0.2767,
152
  "step": 15
153
  },
154
  {
155
  "epoch": 6.67,
156
- "eval_accuracy": 0.9814814814814815,
157
- "eval_loss": 0.13367433845996857,
158
- "eval_runtime": 0.2809,
159
- "eval_samples_per_second": 192.273,
160
- "eval_steps_per_second": 3.561,
161
  "step": 15
162
  },
163
  {
164
  "epoch": 7.11,
165
  "learning_rate": 4.666666666666667e-05,
166
- "loss": 0.2161,
167
  "step": 16
168
  },
169
  {
170
  "epoch": 7.56,
171
  "learning_rate": 4.6111111111111115e-05,
172
- "loss": 0.1968,
173
  "step": 17
174
  },
175
  {
176
  "epoch": 8.0,
177
  "learning_rate": 4.555555555555556e-05,
178
- "loss": 0.1417,
179
  "step": 18
180
  },
181
  {
182
  "epoch": 8.0,
183
- "eval_accuracy": 0.8888888888888888,
184
- "eval_loss": 0.24701863527297974,
185
- "eval_runtime": 0.2963,
186
- "eval_samples_per_second": 182.256,
187
- "eval_steps_per_second": 3.375,
188
  "step": 18
189
  },
190
  {
191
  "epoch": 8.44,
192
  "learning_rate": 4.5e-05,
193
- "loss": 0.2184,
194
  "step": 19
195
  },
196
  {
197
  "epoch": 8.89,
198
  "learning_rate": 4.4444444444444447e-05,
199
- "loss": 0.1644,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 8.89,
204
- "eval_accuracy": 0.9814814814814815,
205
- "eval_loss": 0.09887748211622238,
206
- "eval_runtime": 0.2603,
207
- "eval_samples_per_second": 207.421,
208
- "eval_steps_per_second": 3.841,
209
  "step": 20
210
  },
211
  {
212
  "epoch": 9.33,
213
  "learning_rate": 4.388888888888889e-05,
214
- "loss": 0.237,
215
  "step": 21
216
  },
217
  {
218
  "epoch": 9.78,
219
  "learning_rate": 4.3333333333333334e-05,
220
- "loss": 0.145,
221
  "step": 22
222
  },
223
  {
224
  "epoch": 9.78,
225
- "eval_accuracy": 0.9814814814814815,
226
- "eval_loss": 0.12777921557426453,
227
- "eval_runtime": 0.2613,
228
- "eval_samples_per_second": 206.669,
229
- "eval_steps_per_second": 3.827,
230
  "step": 22
231
  },
232
  {
233
  "epoch": 10.22,
234
  "learning_rate": 4.277777777777778e-05,
235
- "loss": 0.1986,
236
  "step": 23
237
  },
238
  {
239
  "epoch": 10.67,
240
  "learning_rate": 4.222222222222222e-05,
241
- "loss": 0.1948,
242
  "step": 24
243
  },
244
  {
245
  "epoch": 10.67,
246
- "eval_accuracy": 0.9814814814814815,
247
- "eval_loss": 0.09025944769382477,
248
- "eval_runtime": 0.2617,
249
- "eval_samples_per_second": 206.322,
250
- "eval_steps_per_second": 3.821,
251
  "step": 24
252
  },
253
  {
254
  "epoch": 11.11,
255
  "learning_rate": 4.166666666666667e-05,
256
- "loss": 0.101,
257
  "step": 25
258
  },
259
  {
260
  "epoch": 11.56,
261
  "learning_rate": 4.111111111111111e-05,
262
- "loss": 0.1034,
263
  "step": 26
264
  },
265
  {
266
  "epoch": 12.0,
267
  "learning_rate": 4.055555555555556e-05,
268
- "loss": 0.1407,
269
  "step": 27
270
  },
271
  {
272
  "epoch": 12.0,
273
  "eval_accuracy": 0.9444444444444444,
274
- "eval_loss": 0.10264571756124496,
275
- "eval_runtime": 0.2631,
276
- "eval_samples_per_second": 205.247,
277
- "eval_steps_per_second": 3.801,
278
  "step": 27
279
  },
280
  {
281
  "epoch": 12.44,
282
  "learning_rate": 4e-05,
283
- "loss": 0.0916,
284
  "step": 28
285
  },
286
  {
287
  "epoch": 12.89,
288
  "learning_rate": 3.944444444444445e-05,
289
- "loss": 0.1213,
290
  "step": 29
291
  },
292
  {
293
  "epoch": 12.89,
294
  "eval_accuracy": 0.9814814814814815,
295
- "eval_loss": 0.0812113955616951,
296
- "eval_runtime": 0.2626,
297
- "eval_samples_per_second": 205.617,
298
- "eval_steps_per_second": 3.808,
299
  "step": 29
300
  },
301
  {
302
  "epoch": 13.33,
303
  "learning_rate": 3.888888888888889e-05,
304
- "loss": 0.0859,
305
  "step": 30
306
  },
307
  {
308
  "epoch": 13.78,
309
  "learning_rate": 3.8333333333333334e-05,
310
- "loss": 0.1258,
311
  "step": 31
312
  },
313
  {
314
  "epoch": 13.78,
315
  "eval_accuracy": 0.9814814814814815,
316
- "eval_loss": 0.09858040511608124,
317
- "eval_runtime": 0.2841,
318
- "eval_samples_per_second": 190.055,
319
- "eval_steps_per_second": 3.52,
320
  "step": 31
321
  },
322
  {
323
  "epoch": 14.22,
324
  "learning_rate": 3.777777777777778e-05,
325
- "loss": 0.0752,
326
  "step": 32
327
  },
328
  {
329
  "epoch": 14.67,
330
  "learning_rate": 3.722222222222222e-05,
331
- "loss": 0.1204,
332
  "step": 33
333
  },
334
  {
335
  "epoch": 14.67,
336
  "eval_accuracy": 0.9814814814814815,
337
- "eval_loss": 0.08133846521377563,
338
- "eval_runtime": 0.2617,
339
- "eval_samples_per_second": 206.327,
340
- "eval_steps_per_second": 3.821,
341
  "step": 33
342
  },
343
  {
344
  "epoch": 15.11,
345
  "learning_rate": 3.6666666666666666e-05,
346
- "loss": 0.1135,
347
  "step": 34
348
  },
349
  {
350
  "epoch": 15.56,
351
  "learning_rate": 3.611111111111111e-05,
352
- "loss": 0.1176,
353
  "step": 35
354
  },
355
  {
356
  "epoch": 16.0,
357
  "learning_rate": 3.555555555555556e-05,
358
- "loss": 0.0857,
359
  "step": 36
360
  },
361
  {
362
  "epoch": 16.0,
363
  "eval_accuracy": 0.9814814814814815,
364
- "eval_loss": 0.04036113619804382,
365
- "eval_runtime": 0.262,
366
- "eval_samples_per_second": 206.086,
367
- "eval_steps_per_second": 3.816,
368
  "step": 36
369
  },
370
  {
371
  "epoch": 16.44,
372
  "learning_rate": 3.5e-05,
373
- "loss": 0.0889,
374
  "step": 37
375
  },
376
  {
377
  "epoch": 16.89,
378
  "learning_rate": 3.444444444444445e-05,
379
- "loss": 0.12,
380
  "step": 38
381
  },
382
  {
383
  "epoch": 16.89,
384
  "eval_accuracy": 0.9814814814814815,
385
- "eval_loss": 0.05832526460289955,
386
- "eval_runtime": 0.2597,
387
- "eval_samples_per_second": 207.917,
388
- "eval_steps_per_second": 3.85,
389
  "step": 38
390
  },
391
  {
392
  "epoch": 17.33,
393
  "learning_rate": 3.388888888888889e-05,
394
- "loss": 0.0841,
395
  "step": 39
396
  },
397
  {
398
  "epoch": 17.78,
399
  "learning_rate": 3.3333333333333335e-05,
400
- "loss": 0.0929,
401
  "step": 40
402
  },
403
  {
404
  "epoch": 17.78,
405
- "eval_accuracy": 0.9814814814814815,
406
- "eval_loss": 0.07853430509567261,
407
- "eval_runtime": 0.262,
408
- "eval_samples_per_second": 206.126,
409
- "eval_steps_per_second": 3.817,
410
  "step": 40
411
  },
412
  {
413
  "epoch": 18.22,
414
  "learning_rate": 3.277777777777778e-05,
415
- "loss": 0.0988,
416
  "step": 41
417
  },
418
  {
419
  "epoch": 18.67,
420
  "learning_rate": 3.222222222222223e-05,
421
- "loss": 0.0997,
422
  "step": 42
423
  },
424
  {
425
  "epoch": 18.67,
426
- "eval_accuracy": 0.9814814814814815,
427
- "eval_loss": 0.04468917474150658,
428
- "eval_runtime": 0.2628,
429
- "eval_samples_per_second": 205.474,
430
- "eval_steps_per_second": 3.805,
431
  "step": 42
432
  },
433
  {
434
  "epoch": 19.11,
435
  "learning_rate": 3.1666666666666666e-05,
436
- "loss": 0.0674,
437
  "step": 43
438
  },
439
  {
440
  "epoch": 19.56,
441
  "learning_rate": 3.111111111111111e-05,
442
- "loss": 0.0985,
443
  "step": 44
444
  },
445
  {
446
  "epoch": 20.0,
447
  "learning_rate": 3.055555555555556e-05,
448
- "loss": 0.0649,
449
  "step": 45
450
  },
451
  {
452
  "epoch": 20.0,
453
  "eval_accuracy": 1.0,
454
- "eval_loss": 0.017473837360739708,
455
- "eval_runtime": 0.2637,
456
- "eval_samples_per_second": 204.747,
457
- "eval_steps_per_second": 3.792,
458
  "step": 45
459
  },
460
  {
461
  "epoch": 20.44,
462
  "learning_rate": 3e-05,
463
- "loss": 0.0953,
464
  "step": 46
465
  },
466
  {
467
  "epoch": 20.89,
468
  "learning_rate": 2.9444444444444448e-05,
469
- "loss": 0.0709,
470
  "step": 47
471
  },
472
  {
473
  "epoch": 20.89,
474
  "eval_accuracy": 1.0,
475
- "eval_loss": 0.021981148049235344,
476
- "eval_runtime": 0.2758,
477
- "eval_samples_per_second": 195.775,
478
- "eval_steps_per_second": 3.625,
479
  "step": 47
480
  },
481
  {
482
  "epoch": 21.33,
483
  "learning_rate": 2.8888888888888888e-05,
484
- "loss": 0.0808,
485
  "step": 48
486
  },
487
  {
488
  "epoch": 21.78,
489
  "learning_rate": 2.8333333333333335e-05,
490
- "loss": 0.1041,
491
  "step": 49
492
  },
493
  {
494
  "epoch": 21.78,
495
- "eval_accuracy": 0.9814814814814815,
496
- "eval_loss": 0.03725693002343178,
497
- "eval_runtime": 0.2628,
498
- "eval_samples_per_second": 205.518,
499
- "eval_steps_per_second": 3.806,
500
  "step": 49
501
  },
502
  {
503
  "epoch": 22.22,
504
  "learning_rate": 2.777777777777778e-05,
505
- "loss": 0.0706,
506
  "step": 50
507
  },
508
  {
509
  "epoch": 22.67,
510
  "learning_rate": 2.7222222222222223e-05,
511
- "loss": 0.1194,
512
  "step": 51
513
  },
514
  {
515
  "epoch": 22.67,
516
- "eval_accuracy": 0.9814814814814815,
517
- "eval_loss": 0.03398296236991882,
518
- "eval_runtime": 0.3775,
519
- "eval_samples_per_second": 143.045,
520
- "eval_steps_per_second": 2.649,
521
  "step": 51
522
  },
523
  {
524
  "epoch": 23.11,
525
  "learning_rate": 2.6666666666666667e-05,
526
- "loss": 0.082,
527
  "step": 52
528
  },
529
  {
530
  "epoch": 23.56,
531
  "learning_rate": 2.6111111111111114e-05,
532
- "loss": 0.087,
533
  "step": 53
534
  },
535
  {
536
  "epoch": 24.0,
537
  "learning_rate": 2.5555555555555554e-05,
538
- "loss": 0.07,
539
  "step": 54
540
  },
541
  {
542
  "epoch": 24.0,
543
  "eval_accuracy": 1.0,
544
- "eval_loss": 0.023272547870874405,
545
- "eval_runtime": 0.2622,
546
- "eval_samples_per_second": 205.956,
547
- "eval_steps_per_second": 3.814,
548
  "step": 54
549
  },
550
  {
551
  "epoch": 24.44,
552
  "learning_rate": 2.5e-05,
553
- "loss": 0.0837,
554
  "step": 55
555
  },
556
  {
557
  "epoch": 24.89,
558
  "learning_rate": 2.4444444444444445e-05,
559
- "loss": 0.0778,
560
  "step": 56
561
  },
562
  {
563
  "epoch": 24.89,
564
  "eval_accuracy": 1.0,
565
- "eval_loss": 0.017261186614632607,
566
- "eval_runtime": 0.2621,
567
- "eval_samples_per_second": 206.01,
568
- "eval_steps_per_second": 3.815,
569
  "step": 56
570
  },
571
  {
572
  "epoch": 25.33,
573
  "learning_rate": 2.3888888888888892e-05,
574
- "loss": 0.0507,
575
  "step": 57
576
  },
577
  {
578
  "epoch": 25.78,
579
  "learning_rate": 2.3333333333333336e-05,
580
- "loss": 0.07,
581
  "step": 58
582
  },
583
  {
584
  "epoch": 25.78,
585
  "eval_accuracy": 1.0,
586
- "eval_loss": 0.015340018086135387,
587
- "eval_runtime": 0.2636,
588
- "eval_samples_per_second": 204.892,
589
- "eval_steps_per_second": 3.794,
590
  "step": 58
591
  },
592
  {
593
  "epoch": 26.22,
594
  "learning_rate": 2.277777777777778e-05,
595
- "loss": 0.0984,
596
  "step": 59
597
  },
598
  {
599
  "epoch": 26.67,
600
  "learning_rate": 2.2222222222222223e-05,
601
- "loss": 0.0616,
602
  "step": 60
603
  },
604
  {
605
  "epoch": 26.67,
606
  "eval_accuracy": 1.0,
607
- "eval_loss": 0.01479976624250412,
608
- "eval_runtime": 0.271,
609
- "eval_samples_per_second": 199.266,
610
- "eval_steps_per_second": 3.69,
611
  "step": 60
612
  },
613
  {
614
  "epoch": 27.11,
615
  "learning_rate": 2.1666666666666667e-05,
616
- "loss": 0.103,
617
  "step": 61
618
  },
619
  {
620
  "epoch": 27.56,
621
  "learning_rate": 2.111111111111111e-05,
622
- "loss": 0.0802,
623
  "step": 62
624
  },
625
  {
626
  "epoch": 28.0,
627
  "learning_rate": 2.0555555555555555e-05,
628
- "loss": 0.0675,
629
  "step": 63
630
  },
631
  {
632
  "epoch": 28.0,
633
  "eval_accuracy": 1.0,
634
- "eval_loss": 0.0183400996029377,
635
- "eval_runtime": 0.3105,
636
- "eval_samples_per_second": 173.918,
637
- "eval_steps_per_second": 3.221,
638
  "step": 63
639
  },
640
  {
641
  "epoch": 28.44,
642
  "learning_rate": 2e-05,
643
- "loss": 0.1179,
644
  "step": 64
645
  },
646
  {
647
  "epoch": 28.89,
648
  "learning_rate": 1.9444444444444445e-05,
649
- "loss": 0.0763,
650
  "step": 65
651
  },
652
  {
653
  "epoch": 28.89,
654
  "eval_accuracy": 1.0,
655
- "eval_loss": 0.015255843289196491,
656
- "eval_runtime": 0.2619,
657
- "eval_samples_per_second": 206.15,
658
- "eval_steps_per_second": 3.818,
659
  "step": 65
660
  },
661
  {
662
  "epoch": 29.33,
663
  "learning_rate": 1.888888888888889e-05,
664
- "loss": 0.0636,
665
  "step": 66
666
  },
667
  {
668
  "epoch": 29.78,
669
  "learning_rate": 1.8333333333333333e-05,
670
- "loss": 0.083,
671
  "step": 67
672
  },
673
  {
674
  "epoch": 29.78,
675
  "eval_accuracy": 1.0,
676
- "eval_loss": 0.010834791697561741,
677
- "eval_runtime": 0.2626,
678
- "eval_samples_per_second": 205.618,
679
- "eval_steps_per_second": 3.808,
680
  "step": 67
681
  },
682
  {
683
  "epoch": 30.22,
684
  "learning_rate": 1.777777777777778e-05,
685
- "loss": 0.1013,
686
  "step": 68
687
  },
688
  {
689
  "epoch": 30.67,
690
  "learning_rate": 1.7222222222222224e-05,
691
- "loss": 0.1185,
692
  "step": 69
693
  },
694
  {
695
  "epoch": 30.67,
696
  "eval_accuracy": 1.0,
697
- "eval_loss": 0.014811315573751926,
698
- "eval_runtime": 0.2712,
699
- "eval_samples_per_second": 199.085,
700
- "eval_steps_per_second": 3.687,
701
  "step": 69
702
  },
703
  {
704
  "epoch": 31.11,
705
  "learning_rate": 1.6666666666666667e-05,
706
- "loss": 0.0456,
707
  "step": 70
708
  },
709
  {
710
  "epoch": 31.56,
711
  "learning_rate": 1.6111111111111115e-05,
712
- "loss": 0.1482,
713
  "step": 71
714
  },
715
  {
716
  "epoch": 32.0,
717
  "learning_rate": 1.5555555555555555e-05,
718
- "loss": 0.0911,
719
  "step": 72
720
  },
721
  {
722
  "epoch": 32.0,
723
  "eval_accuracy": 1.0,
724
- "eval_loss": 0.014465795829892159,
725
- "eval_runtime": 0.2594,
726
- "eval_samples_per_second": 208.185,
727
- "eval_steps_per_second": 3.855,
728
  "step": 72
729
  },
730
  {
731
  "epoch": 32.44,
732
  "learning_rate": 1.5e-05,
733
- "loss": 0.0615,
734
  "step": 73
735
  },
736
  {
737
  "epoch": 32.89,
738
  "learning_rate": 1.4444444444444444e-05,
739
- "loss": 0.0868,
740
  "step": 74
741
  },
742
  {
743
  "epoch": 32.89,
744
  "eval_accuracy": 1.0,
745
- "eval_loss": 0.015387580730021,
746
- "eval_runtime": 0.2739,
747
- "eval_samples_per_second": 197.149,
748
- "eval_steps_per_second": 3.651,
749
  "step": 74
750
  },
751
  {
752
  "epoch": 33.33,
753
  "learning_rate": 1.388888888888889e-05,
754
- "loss": 0.1134,
755
  "step": 75
756
  },
757
  {
758
  "epoch": 33.78,
759
  "learning_rate": 1.3333333333333333e-05,
760
- "loss": 0.0941,
761
  "step": 76
762
  },
763
  {
764
  "epoch": 33.78,
765
  "eval_accuracy": 1.0,
766
- "eval_loss": 0.014932823367416859,
767
- "eval_runtime": 0.2614,
768
- "eval_samples_per_second": 206.594,
769
- "eval_steps_per_second": 3.826,
770
  "step": 76
771
  },
772
  {
773
  "epoch": 34.22,
774
  "learning_rate": 1.2777777777777777e-05,
775
- "loss": 0.0976,
776
  "step": 77
777
  },
778
  {
779
  "epoch": 34.67,
780
  "learning_rate": 1.2222222222222222e-05,
781
- "loss": 0.1182,
782
  "step": 78
783
  },
784
  {
785
  "epoch": 34.67,
786
  "eval_accuracy": 1.0,
787
- "eval_loss": 0.014534077607095242,
788
- "eval_runtime": 0.2631,
789
- "eval_samples_per_second": 205.268,
790
- "eval_steps_per_second": 3.801,
791
  "step": 78
792
  },
793
  {
794
  "epoch": 35.11,
795
  "learning_rate": 1.1666666666666668e-05,
796
- "loss": 0.1337,
797
  "step": 79
798
  },
799
  {
800
  "epoch": 35.56,
801
  "learning_rate": 1.1111111111111112e-05,
802
- "loss": 0.0878,
803
  "step": 80
804
  },
805
  {
806
  "epoch": 36.0,
807
  "learning_rate": 1.0555555555555555e-05,
808
- "loss": 0.0859,
809
  "step": 81
810
  },
811
  {
812
  "epoch": 36.0,
813
  "eval_accuracy": 1.0,
814
- "eval_loss": 0.01714230887591839,
815
- "eval_runtime": 0.2613,
816
- "eval_samples_per_second": 206.651,
817
- "eval_steps_per_second": 3.827,
818
  "step": 81
819
  },
820
  {
821
  "epoch": 36.44,
822
  "learning_rate": 1e-05,
823
- "loss": 0.0814,
824
  "step": 82
825
  },
826
  {
827
  "epoch": 36.89,
828
  "learning_rate": 9.444444444444445e-06,
829
- "loss": 0.071,
830
  "step": 83
831
  },
832
  {
833
  "epoch": 36.89,
834
  "eval_accuracy": 1.0,
835
- "eval_loss": 0.018014799803495407,
836
- "eval_runtime": 0.2725,
837
- "eval_samples_per_second": 198.155,
838
- "eval_steps_per_second": 3.67,
839
  "step": 83
840
  },
841
  {
842
  "epoch": 37.33,
843
  "learning_rate": 8.88888888888889e-06,
844
- "loss": 0.0635,
845
  "step": 84
846
  },
847
  {
848
  "epoch": 37.78,
849
  "learning_rate": 8.333333333333334e-06,
850
- "loss": 0.1003,
851
  "step": 85
852
  },
853
  {
854
  "epoch": 37.78,
855
  "eval_accuracy": 1.0,
856
- "eval_loss": 0.015952540561556816,
857
- "eval_runtime": 0.2716,
858
- "eval_samples_per_second": 198.786,
859
- "eval_steps_per_second": 3.681,
860
  "step": 85
861
  },
862
  {
863
  "epoch": 38.22,
864
  "learning_rate": 7.777777777777777e-06,
865
- "loss": 0.0761,
866
  "step": 86
867
  },
868
  {
869
  "epoch": 38.67,
870
  "learning_rate": 7.222222222222222e-06,
871
- "loss": 0.1173,
872
  "step": 87
873
  },
874
  {
875
  "epoch": 38.67,
876
  "eval_accuracy": 1.0,
877
- "eval_loss": 0.016635410487651825,
878
- "eval_runtime": 0.2617,
879
- "eval_samples_per_second": 206.357,
880
- "eval_steps_per_second": 3.821,
881
  "step": 87
882
  },
883
  {
884
  "epoch": 39.11,
885
  "learning_rate": 6.666666666666667e-06,
886
- "loss": 0.0596,
887
  "step": 88
888
  },
889
  {
890
  "epoch": 39.56,
891
  "learning_rate": 6.111111111111111e-06,
892
- "loss": 0.0549,
893
  "step": 89
894
  },
895
  {
896
  "epoch": 40.0,
897
  "learning_rate": 5.555555555555556e-06,
898
- "loss": 0.0478,
899
  "step": 90
900
  },
901
  {
902
  "epoch": 40.0,
903
  "eval_accuracy": 1.0,
904
- "eval_loss": 0.020745275542140007,
905
- "eval_runtime": 0.2774,
906
- "eval_samples_per_second": 194.642,
907
- "eval_steps_per_second": 3.604,
908
  "step": 90
909
  },
910
  {
911
  "epoch": 40.44,
912
  "learning_rate": 5e-06,
913
- "loss": 0.1045,
914
  "step": 91
915
  },
916
  {
917
  "epoch": 40.89,
918
  "learning_rate": 4.444444444444445e-06,
919
- "loss": 0.0674,
920
  "step": 92
921
  },
922
  {
923
  "epoch": 40.89,
924
- "eval_accuracy": 0.9814814814814815,
925
- "eval_loss": 0.022440657019615173,
926
- "eval_runtime": 0.2613,
927
- "eval_samples_per_second": 206.672,
928
- "eval_steps_per_second": 3.827,
929
  "step": 92
930
  },
931
  {
932
  "epoch": 41.33,
933
  "learning_rate": 3.888888888888889e-06,
934
- "loss": 0.077,
935
  "step": 93
936
  },
937
  {
938
  "epoch": 41.78,
939
  "learning_rate": 3.3333333333333333e-06,
940
- "loss": 0.0623,
941
  "step": 94
942
  },
943
  {
944
  "epoch": 41.78,
945
- "eval_accuracy": 0.9814814814814815,
946
- "eval_loss": 0.02340044267475605,
947
- "eval_runtime": 0.2622,
948
- "eval_samples_per_second": 205.961,
949
- "eval_steps_per_second": 3.814,
950
  "step": 94
951
  },
952
  {
953
  "epoch": 42.22,
954
  "learning_rate": 2.777777777777778e-06,
955
- "loss": 0.0503,
956
  "step": 95
957
  },
958
  {
959
  "epoch": 42.67,
960
  "learning_rate": 2.2222222222222225e-06,
961
- "loss": 0.0711,
962
  "step": 96
963
  },
964
  {
965
  "epoch": 42.67,
966
- "eval_accuracy": 0.9814814814814815,
967
- "eval_loss": 0.023668842390179634,
968
- "eval_runtime": 0.2602,
969
- "eval_samples_per_second": 207.57,
970
- "eval_steps_per_second": 3.844,
971
  "step": 96
972
  },
973
  {
974
  "epoch": 43.11,
975
  "learning_rate": 1.6666666666666667e-06,
976
- "loss": 0.0887,
977
  "step": 97
978
  },
979
  {
980
  "epoch": 43.56,
981
  "learning_rate": 1.1111111111111112e-06,
982
- "loss": 0.0767,
983
  "step": 98
984
  },
985
  {
986
  "epoch": 44.0,
987
  "learning_rate": 5.555555555555556e-07,
988
- "loss": 0.1057,
989
  "step": 99
990
  },
991
  {
992
  "epoch": 44.0,
993
- "eval_accuracy": 0.9814814814814815,
994
- "eval_loss": 0.023580273613333702,
995
- "eval_runtime": 0.2623,
996
- "eval_samples_per_second": 205.883,
997
- "eval_steps_per_second": 3.813,
998
  "step": 99
999
  },
1000
  {
1001
  "epoch": 44.44,
1002
  "learning_rate": 0.0,
1003
- "loss": 0.0541,
1004
  "step": 100
1005
  },
1006
  {
1007
  "epoch": 44.44,
1008
- "eval_accuracy": 0.9814814814814815,
1009
- "eval_loss": 0.02352498471736908,
1010
- "eval_runtime": 0.2719,
1011
- "eval_samples_per_second": 198.613,
1012
- "eval_steps_per_second": 3.678,
1013
  "step": 100
1014
  },
1015
  {
1016
  "epoch": 44.44,
1017
  "step": 100,
1018
  "total_flos": 1.6586385457107272e+18,
1019
- "train_loss": 0.15135242883116007,
1020
- "train_runtime": 1078.2452,
1021
- "train_samples_per_second": 22.305,
1022
- "train_steps_per_second": 0.093
1023
  }
1024
  ],
1025
  "max_steps": 100,
 
1
  {
2
  "best_metric": 1.0,
3
+ "best_model_checkpoint": "vit-base-patch16-224-Trial007-YEL_STEM1/checkpoint-40",
4
  "epoch": 44.44444444444444,
5
  "global_step": 100,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 0.44,
12
  "learning_rate": 5e-06,
13
+ "loss": 0.8294,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.89,
18
  "learning_rate": 1e-05,
19
+ "loss": 0.8443,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.89,
24
+ "eval_accuracy": 0.3148148148148148,
25
+ "eval_loss": 0.7812769412994385,
26
+ "eval_runtime": 0.2626,
27
+ "eval_samples_per_second": 205.601,
28
+ "eval_steps_per_second": 3.807,
29
  "step": 2
30
  },
31
  {
32
  "epoch": 1.33,
33
  "learning_rate": 1.5e-05,
34
+ "loss": 0.8169,
35
  "step": 3
36
  },
37
  {
38
  "epoch": 1.78,
39
  "learning_rate": 2e-05,
40
+ "loss": 0.7501,
41
  "step": 4
42
  },
43
  {
44
  "epoch": 1.78,
45
+ "eval_accuracy": 0.5555555555555556,
46
+ "eval_loss": 0.7086877226829529,
47
+ "eval_runtime": 0.2654,
48
+ "eval_samples_per_second": 203.478,
49
+ "eval_steps_per_second": 3.768,
50
  "step": 4
51
  },
52
  {
53
  "epoch": 2.22,
54
  "learning_rate": 2.5e-05,
55
+ "loss": 0.663,
56
  "step": 5
57
  },
58
  {
59
  "epoch": 2.67,
60
  "learning_rate": 3e-05,
61
+ "loss": 0.6312,
62
  "step": 6
63
  },
64
  {
65
  "epoch": 2.67,
66
+ "eval_accuracy": 0.9074074074074074,
67
+ "eval_loss": 0.5305858254432678,
68
+ "eval_runtime": 0.2682,
69
+ "eval_samples_per_second": 201.351,
70
+ "eval_steps_per_second": 3.729,
71
  "step": 6
72
  },
73
  {
74
  "epoch": 3.11,
75
  "learning_rate": 3.5e-05,
76
+ "loss": 0.4814,
77
  "step": 7
78
  },
79
  {
80
  "epoch": 3.56,
81
  "learning_rate": 4e-05,
82
+ "loss": 0.6005,
83
  "step": 8
84
  },
85
  {
86
  "epoch": 4.0,
87
  "learning_rate": 4.5e-05,
88
+ "loss": 0.4329,
89
  "step": 9
90
  },
91
  {
92
  "epoch": 4.0,
93
+ "eval_accuracy": 0.9074074074074074,
94
+ "eval_loss": 0.3618362247943878,
95
+ "eval_runtime": 0.2584,
96
+ "eval_samples_per_second": 208.948,
97
+ "eval_steps_per_second": 3.869,
98
  "step": 9
99
  },
100
  {
101
  "epoch": 4.44,
102
  "learning_rate": 5e-05,
103
+ "loss": 0.4233,
104
  "step": 10
105
  },
106
  {
107
  "epoch": 4.89,
108
  "learning_rate": 4.9444444444444446e-05,
109
+ "loss": 0.4438,
110
  "step": 11
111
  },
112
  {
113
  "epoch": 4.89,
114
+ "eval_accuracy": 0.9444444444444444,
115
+ "eval_loss": 0.26987290382385254,
116
+ "eval_runtime": 0.2579,
117
+ "eval_samples_per_second": 209.394,
118
+ "eval_steps_per_second": 3.878,
119
  "step": 11
120
  },
121
  {
122
  "epoch": 5.33,
123
  "learning_rate": 4.888888888888889e-05,
124
+ "loss": 0.3444,
125
  "step": 12
126
  },
127
  {
128
  "epoch": 5.78,
129
  "learning_rate": 4.8333333333333334e-05,
130
+ "loss": 0.3858,
131
  "step": 13
132
  },
133
  {
134
  "epoch": 5.78,
135
+ "eval_accuracy": 0.7962962962962963,
136
+ "eval_loss": 0.3650400936603546,
137
+ "eval_runtime": 0.2868,
138
+ "eval_samples_per_second": 188.272,
139
+ "eval_steps_per_second": 3.487,
140
  "step": 13
141
  },
142
  {
143
  "epoch": 6.22,
144
  "learning_rate": 4.7777777777777784e-05,
145
+ "loss": 0.3324,
146
  "step": 14
147
  },
148
  {
149
  "epoch": 6.67,
150
  "learning_rate": 4.722222222222222e-05,
151
+ "loss": 0.339,
152
  "step": 15
153
  },
154
  {
155
  "epoch": 6.67,
156
+ "eval_accuracy": 0.9629629629629629,
157
+ "eval_loss": 0.19111506640911102,
158
+ "eval_runtime": 0.2682,
159
+ "eval_samples_per_second": 201.363,
160
+ "eval_steps_per_second": 3.729,
161
  "step": 15
162
  },
163
  {
164
  "epoch": 7.11,
165
  "learning_rate": 4.666666666666667e-05,
166
+ "loss": 0.2704,
167
  "step": 16
168
  },
169
  {
170
  "epoch": 7.56,
171
  "learning_rate": 4.6111111111111115e-05,
172
+ "loss": 0.3059,
173
  "step": 17
174
  },
175
  {
176
  "epoch": 8.0,
177
  "learning_rate": 4.555555555555556e-05,
178
+ "loss": 0.2852,
179
  "step": 18
180
  },
181
  {
182
  "epoch": 8.0,
183
+ "eval_accuracy": 0.9629629629629629,
184
+ "eval_loss": 0.1610794961452484,
185
+ "eval_runtime": 0.2652,
186
+ "eval_samples_per_second": 203.582,
187
+ "eval_steps_per_second": 3.77,
188
  "step": 18
189
  },
190
  {
191
  "epoch": 8.44,
192
  "learning_rate": 4.5e-05,
193
+ "loss": 0.1981,
194
  "step": 19
195
  },
196
  {
197
  "epoch": 8.89,
198
  "learning_rate": 4.4444444444444447e-05,
199
+ "loss": 0.1866,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 8.89,
204
+ "eval_accuracy": 0.9444444444444444,
205
+ "eval_loss": 0.15157748758792877,
206
+ "eval_runtime": 0.2627,
207
+ "eval_samples_per_second": 205.573,
208
+ "eval_steps_per_second": 3.807,
209
  "step": 20
210
  },
211
  {
212
  "epoch": 9.33,
213
  "learning_rate": 4.388888888888889e-05,
214
+ "loss": 0.2223,
215
  "step": 21
216
  },
217
  {
218
  "epoch": 9.78,
219
  "learning_rate": 4.3333333333333334e-05,
220
+ "loss": 0.1748,
221
  "step": 22
222
  },
223
  {
224
  "epoch": 9.78,
225
+ "eval_accuracy": 0.9629629629629629,
226
+ "eval_loss": 0.13326749205589294,
227
+ "eval_runtime": 0.2617,
228
+ "eval_samples_per_second": 206.317,
229
+ "eval_steps_per_second": 3.821,
230
  "step": 22
231
  },
232
  {
233
  "epoch": 10.22,
234
  "learning_rate": 4.277777777777778e-05,
235
+ "loss": 0.1833,
236
  "step": 23
237
  },
238
  {
239
  "epoch": 10.67,
240
  "learning_rate": 4.222222222222222e-05,
241
+ "loss": 0.1996,
242
  "step": 24
243
  },
244
  {
245
  "epoch": 10.67,
246
+ "eval_accuracy": 0.9629629629629629,
247
+ "eval_loss": 0.11881023645401001,
248
+ "eval_runtime": 0.2602,
249
+ "eval_samples_per_second": 207.5,
250
+ "eval_steps_per_second": 3.843,
251
  "step": 24
252
  },
253
  {
254
  "epoch": 11.11,
255
  "learning_rate": 4.166666666666667e-05,
256
+ "loss": 0.1168,
257
  "step": 25
258
  },
259
  {
260
  "epoch": 11.56,
261
  "learning_rate": 4.111111111111111e-05,
262
+ "loss": 0.1335,
263
  "step": 26
264
  },
265
  {
266
  "epoch": 12.0,
267
  "learning_rate": 4.055555555555556e-05,
268
+ "loss": 0.1604,
269
  "step": 27
270
  },
271
  {
272
  "epoch": 12.0,
273
  "eval_accuracy": 0.9444444444444444,
274
+ "eval_loss": 0.11688121408224106,
275
+ "eval_runtime": 0.2613,
276
+ "eval_samples_per_second": 206.693,
277
+ "eval_steps_per_second": 3.828,
278
  "step": 27
279
  },
280
  {
281
  "epoch": 12.44,
282
  "learning_rate": 4e-05,
283
+ "loss": 0.1171,
284
  "step": 28
285
  },
286
  {
287
  "epoch": 12.89,
288
  "learning_rate": 3.944444444444445e-05,
289
+ "loss": 0.1319,
290
  "step": 29
291
  },
292
  {
293
  "epoch": 12.89,
294
  "eval_accuracy": 0.9814814814814815,
295
+ "eval_loss": 0.08352555334568024,
296
+ "eval_runtime": 0.2583,
297
+ "eval_samples_per_second": 209.02,
298
+ "eval_steps_per_second": 3.871,
299
  "step": 29
300
  },
301
  {
302
  "epoch": 13.33,
303
  "learning_rate": 3.888888888888889e-05,
304
+ "loss": 0.0769,
305
  "step": 30
306
  },
307
  {
308
  "epoch": 13.78,
309
  "learning_rate": 3.8333333333333334e-05,
310
+ "loss": 0.141,
311
  "step": 31
312
  },
313
  {
314
  "epoch": 13.78,
315
  "eval_accuracy": 0.9814814814814815,
316
+ "eval_loss": 0.07039038091897964,
317
+ "eval_runtime": 0.2614,
318
+ "eval_samples_per_second": 206.613,
319
+ "eval_steps_per_second": 3.826,
320
  "step": 31
321
  },
322
  {
323
  "epoch": 14.22,
324
  "learning_rate": 3.777777777777778e-05,
325
+ "loss": 0.0732,
326
  "step": 32
327
  },
328
  {
329
  "epoch": 14.67,
330
  "learning_rate": 3.722222222222222e-05,
331
+ "loss": 0.123,
332
  "step": 33
333
  },
334
  {
335
  "epoch": 14.67,
336
  "eval_accuracy": 0.9814814814814815,
337
+ "eval_loss": 0.05736014246940613,
338
+ "eval_runtime": 0.2587,
339
+ "eval_samples_per_second": 208.767,
340
+ "eval_steps_per_second": 3.866,
341
  "step": 33
342
  },
343
  {
344
  "epoch": 15.11,
345
  "learning_rate": 3.6666666666666666e-05,
346
+ "loss": 0.1153,
347
  "step": 34
348
  },
349
  {
350
  "epoch": 15.56,
351
  "learning_rate": 3.611111111111111e-05,
352
+ "loss": 0.1164,
353
  "step": 35
354
  },
355
  {
356
  "epoch": 16.0,
357
  "learning_rate": 3.555555555555556e-05,
358
+ "loss": 0.0678,
359
  "step": 36
360
  },
361
  {
362
  "epoch": 16.0,
363
  "eval_accuracy": 0.9814814814814815,
364
+ "eval_loss": 0.0604197233915329,
365
+ "eval_runtime": 0.263,
366
+ "eval_samples_per_second": 205.352,
367
+ "eval_steps_per_second": 3.803,
368
  "step": 36
369
  },
370
  {
371
  "epoch": 16.44,
372
  "learning_rate": 3.5e-05,
373
+ "loss": 0.0914,
374
  "step": 37
375
  },
376
  {
377
  "epoch": 16.89,
378
  "learning_rate": 3.444444444444445e-05,
379
+ "loss": 0.1208,
380
  "step": 38
381
  },
382
  {
383
  "epoch": 16.89,
384
  "eval_accuracy": 0.9814814814814815,
385
+ "eval_loss": 0.03850555792450905,
386
+ "eval_runtime": 0.2602,
387
+ "eval_samples_per_second": 207.493,
388
+ "eval_steps_per_second": 3.842,
389
  "step": 38
390
  },
391
  {
392
  "epoch": 17.33,
393
  "learning_rate": 3.388888888888889e-05,
394
+ "loss": 0.0833,
395
  "step": 39
396
  },
397
  {
398
  "epoch": 17.78,
399
  "learning_rate": 3.3333333333333335e-05,
400
+ "loss": 0.0942,
401
  "step": 40
402
  },
403
  {
404
  "epoch": 17.78,
405
+ "eval_accuracy": 1.0,
406
+ "eval_loss": 0.026890547946095467,
407
+ "eval_runtime": 0.2611,
408
+ "eval_samples_per_second": 206.793,
409
+ "eval_steps_per_second": 3.829,
410
  "step": 40
411
  },
412
  {
413
  "epoch": 18.22,
414
  "learning_rate": 3.277777777777778e-05,
415
+ "loss": 0.0736,
416
  "step": 41
417
  },
418
  {
419
  "epoch": 18.67,
420
  "learning_rate": 3.222222222222223e-05,
421
+ "loss": 0.0822,
422
  "step": 42
423
  },
424
  {
425
  "epoch": 18.67,
426
+ "eval_accuracy": 1.0,
427
+ "eval_loss": 0.01689229905605316,
428
+ "eval_runtime": 0.2602,
429
+ "eval_samples_per_second": 207.555,
430
+ "eval_steps_per_second": 3.844,
431
  "step": 42
432
  },
433
  {
434
  "epoch": 19.11,
435
  "learning_rate": 3.1666666666666666e-05,
436
+ "loss": 0.086,
437
  "step": 43
438
  },
439
  {
440
  "epoch": 19.56,
441
  "learning_rate": 3.111111111111111e-05,
442
+ "loss": 0.1013,
443
  "step": 44
444
  },
445
  {
446
  "epoch": 20.0,
447
  "learning_rate": 3.055555555555556e-05,
448
+ "loss": 0.0578,
449
  "step": 45
450
  },
451
  {
452
  "epoch": 20.0,
453
  "eval_accuracy": 1.0,
454
+ "eval_loss": 0.017471354454755783,
455
+ "eval_runtime": 0.2785,
456
+ "eval_samples_per_second": 193.881,
457
+ "eval_steps_per_second": 3.59,
458
  "step": 45
459
  },
460
  {
461
  "epoch": 20.44,
462
  "learning_rate": 3e-05,
463
+ "loss": 0.0943,
464
  "step": 46
465
  },
466
  {
467
  "epoch": 20.89,
468
  "learning_rate": 2.9444444444444448e-05,
469
+ "loss": 0.0611,
470
  "step": 47
471
  },
472
  {
473
  "epoch": 20.89,
474
  "eval_accuracy": 1.0,
475
+ "eval_loss": 0.022001970559358597,
476
+ "eval_runtime": 0.2601,
477
+ "eval_samples_per_second": 207.599,
478
+ "eval_steps_per_second": 3.844,
479
  "step": 47
480
  },
481
  {
482
  "epoch": 21.33,
483
  "learning_rate": 2.8888888888888888e-05,
484
+ "loss": 0.083,
485
  "step": 48
486
  },
487
  {
488
  "epoch": 21.78,
489
  "learning_rate": 2.8333333333333335e-05,
490
+ "loss": 0.1053,
491
  "step": 49
492
  },
493
  {
494
  "epoch": 21.78,
495
+ "eval_accuracy": 1.0,
496
+ "eval_loss": 0.0097891166806221,
497
+ "eval_runtime": 0.2612,
498
+ "eval_samples_per_second": 206.762,
499
+ "eval_steps_per_second": 3.829,
500
  "step": 49
501
  },
502
  {
503
  "epoch": 22.22,
504
  "learning_rate": 2.777777777777778e-05,
505
+ "loss": 0.0638,
506
  "step": 50
507
  },
508
  {
509
  "epoch": 22.67,
510
  "learning_rate": 2.7222222222222223e-05,
511
+ "loss": 0.1713,
512
  "step": 51
513
  },
514
  {
515
  "epoch": 22.67,
516
+ "eval_accuracy": 1.0,
517
+ "eval_loss": 0.015564966946840286,
518
+ "eval_runtime": 0.2597,
519
+ "eval_samples_per_second": 207.902,
520
+ "eval_steps_per_second": 3.85,
521
  "step": 51
522
  },
523
  {
524
  "epoch": 23.11,
525
  "learning_rate": 2.6666666666666667e-05,
526
+ "loss": 0.1014,
527
  "step": 52
528
  },
529
  {
530
  "epoch": 23.56,
531
  "learning_rate": 2.6111111111111114e-05,
532
+ "loss": 0.0871,
533
  "step": 53
534
  },
535
  {
536
  "epoch": 24.0,
537
  "learning_rate": 2.5555555555555554e-05,
538
+ "loss": 0.0515,
539
  "step": 54
540
  },
541
  {
542
  "epoch": 24.0,
543
  "eval_accuracy": 1.0,
544
+ "eval_loss": 0.011080882512032986,
545
+ "eval_runtime": 0.2623,
546
+ "eval_samples_per_second": 205.857,
547
+ "eval_steps_per_second": 3.812,
548
  "step": 54
549
  },
550
  {
551
  "epoch": 24.44,
552
  "learning_rate": 2.5e-05,
553
+ "loss": 0.1139,
554
  "step": 55
555
  },
556
  {
557
  "epoch": 24.89,
558
  "learning_rate": 2.4444444444444445e-05,
559
+ "loss": 0.1227,
560
  "step": 56
561
  },
562
  {
563
  "epoch": 24.89,
564
  "eval_accuracy": 1.0,
565
+ "eval_loss": 0.01661752350628376,
566
+ "eval_runtime": 0.2611,
567
+ "eval_samples_per_second": 206.808,
568
+ "eval_steps_per_second": 3.83,
569
  "step": 56
570
  },
571
  {
572
  "epoch": 25.33,
573
  "learning_rate": 2.3888888888888892e-05,
574
+ "loss": 0.0627,
575
  "step": 57
576
  },
577
  {
578
  "epoch": 25.78,
579
  "learning_rate": 2.3333333333333336e-05,
580
+ "loss": 0.0891,
581
  "step": 58
582
  },
583
  {
584
  "epoch": 25.78,
585
  "eval_accuracy": 1.0,
586
+ "eval_loss": 0.0093127666041255,
587
+ "eval_runtime": 0.2618,
588
+ "eval_samples_per_second": 206.246,
589
+ "eval_steps_per_second": 3.819,
590
  "step": 58
591
  },
592
  {
593
  "epoch": 26.22,
594
  "learning_rate": 2.277777777777778e-05,
595
+ "loss": 0.1174,
596
  "step": 59
597
  },
598
  {
599
  "epoch": 26.67,
600
  "learning_rate": 2.2222222222222223e-05,
601
+ "loss": 0.0768,
602
  "step": 60
603
  },
604
  {
605
  "epoch": 26.67,
606
  "eval_accuracy": 1.0,
607
+ "eval_loss": 0.008994304575026035,
608
+ "eval_runtime": 0.2612,
609
+ "eval_samples_per_second": 206.756,
610
+ "eval_steps_per_second": 3.829,
611
  "step": 60
612
  },
613
  {
614
  "epoch": 27.11,
615
  "learning_rate": 2.1666666666666667e-05,
616
+ "loss": 0.1328,
617
  "step": 61
618
  },
619
  {
620
  "epoch": 27.56,
621
  "learning_rate": 2.111111111111111e-05,
622
+ "loss": 0.0935,
623
  "step": 62
624
  },
625
  {
626
  "epoch": 28.0,
627
  "learning_rate": 2.0555555555555555e-05,
628
+ "loss": 0.0755,
629
  "step": 63
630
  },
631
  {
632
  "epoch": 28.0,
633
  "eval_accuracy": 1.0,
634
+ "eval_loss": 0.010785534046590328,
635
+ "eval_runtime": 0.2627,
636
+ "eval_samples_per_second": 205.56,
637
+ "eval_steps_per_second": 3.807,
638
  "step": 63
639
  },
640
  {
641
  "epoch": 28.44,
642
  "learning_rate": 2e-05,
643
+ "loss": 0.117,
644
  "step": 64
645
  },
646
  {
647
  "epoch": 28.89,
648
  "learning_rate": 1.9444444444444445e-05,
649
+ "loss": 0.0798,
650
  "step": 65
651
  },
652
  {
653
  "epoch": 28.89,
654
  "eval_accuracy": 1.0,
655
+ "eval_loss": 0.020129548385739326,
656
+ "eval_runtime": 0.264,
657
+ "eval_samples_per_second": 204.578,
658
+ "eval_steps_per_second": 3.788,
659
  "step": 65
660
  },
661
  {
662
  "epoch": 29.33,
663
  "learning_rate": 1.888888888888889e-05,
664
+ "loss": 0.0671,
665
  "step": 66
666
  },
667
  {
668
  "epoch": 29.78,
669
  "learning_rate": 1.8333333333333333e-05,
670
+ "loss": 0.1005,
671
  "step": 67
672
  },
673
  {
674
  "epoch": 29.78,
675
  "eval_accuracy": 1.0,
676
+ "eval_loss": 0.011831928044557571,
677
+ "eval_runtime": 0.2639,
678
+ "eval_samples_per_second": 204.649,
679
+ "eval_steps_per_second": 3.79,
680
  "step": 67
681
  },
682
  {
683
  "epoch": 30.22,
684
  "learning_rate": 1.777777777777778e-05,
685
+ "loss": 0.0572,
686
  "step": 68
687
  },
688
  {
689
  "epoch": 30.67,
690
  "learning_rate": 1.7222222222222224e-05,
691
+ "loss": 0.1113,
692
  "step": 69
693
  },
694
  {
695
  "epoch": 30.67,
696
  "eval_accuracy": 1.0,
697
+ "eval_loss": 0.013057045638561249,
698
+ "eval_runtime": 0.2625,
699
+ "eval_samples_per_second": 205.749,
700
+ "eval_steps_per_second": 3.81,
701
  "step": 69
702
  },
703
  {
704
  "epoch": 31.11,
705
  "learning_rate": 1.6666666666666667e-05,
706
+ "loss": 0.0474,
707
  "step": 70
708
  },
709
  {
710
  "epoch": 31.56,
711
  "learning_rate": 1.6111111111111115e-05,
712
+ "loss": 0.1374,
713
  "step": 71
714
  },
715
  {
716
  "epoch": 32.0,
717
  "learning_rate": 1.5555555555555555e-05,
718
+ "loss": 0.1034,
719
  "step": 72
720
  },
721
  {
722
  "epoch": 32.0,
723
  "eval_accuracy": 1.0,
724
+ "eval_loss": 0.017138680443167686,
725
+ "eval_runtime": 0.2623,
726
+ "eval_samples_per_second": 205.904,
727
+ "eval_steps_per_second": 3.813,
728
  "step": 72
729
  },
730
  {
731
  "epoch": 32.44,
732
  "learning_rate": 1.5e-05,
733
+ "loss": 0.0604,
734
  "step": 73
735
  },
736
  {
737
  "epoch": 32.89,
738
  "learning_rate": 1.4444444444444444e-05,
739
+ "loss": 0.0857,
740
  "step": 74
741
  },
742
  {
743
  "epoch": 32.89,
744
  "eval_accuracy": 1.0,
745
+ "eval_loss": 0.015838121995329857,
746
+ "eval_runtime": 0.2627,
747
+ "eval_samples_per_second": 205.551,
748
+ "eval_steps_per_second": 3.807,
749
  "step": 74
750
  },
751
  {
752
  "epoch": 33.33,
753
  "learning_rate": 1.388888888888889e-05,
754
+ "loss": 0.1107,
755
  "step": 75
756
  },
757
  {
758
  "epoch": 33.78,
759
  "learning_rate": 1.3333333333333333e-05,
760
+ "loss": 0.0864,
761
  "step": 76
762
  },
763
  {
764
  "epoch": 33.78,
765
  "eval_accuracy": 1.0,
766
+ "eval_loss": 0.014146910980343819,
767
+ "eval_runtime": 0.2643,
768
+ "eval_samples_per_second": 204.346,
769
+ "eval_steps_per_second": 3.784,
770
  "step": 76
771
  },
772
  {
773
  "epoch": 34.22,
774
  "learning_rate": 1.2777777777777777e-05,
775
+ "loss": 0.096,
776
  "step": 77
777
  },
778
  {
779
  "epoch": 34.67,
780
  "learning_rate": 1.2222222222222222e-05,
781
+ "loss": 0.1241,
782
  "step": 78
783
  },
784
  {
785
  "epoch": 34.67,
786
  "eval_accuracy": 1.0,
787
+ "eval_loss": 0.012656173668801785,
788
+ "eval_runtime": 0.2613,
789
+ "eval_samples_per_second": 206.683,
790
+ "eval_steps_per_second": 3.827,
791
  "step": 78
792
  },
793
  {
794
  "epoch": 35.11,
795
  "learning_rate": 1.1666666666666668e-05,
796
+ "loss": 0.2137,
797
  "step": 79
798
  },
799
  {
800
  "epoch": 35.56,
801
  "learning_rate": 1.1111111111111112e-05,
802
+ "loss": 0.1045,
803
  "step": 80
804
  },
805
  {
806
  "epoch": 36.0,
807
  "learning_rate": 1.0555555555555555e-05,
808
+ "loss": 0.0868,
809
  "step": 81
810
  },
811
  {
812
  "epoch": 36.0,
813
  "eval_accuracy": 1.0,
814
+ "eval_loss": 0.011819291859865189,
815
+ "eval_runtime": 0.2714,
816
+ "eval_samples_per_second": 198.995,
817
+ "eval_steps_per_second": 3.685,
818
  "step": 81
819
  },
820
  {
821
  "epoch": 36.44,
822
  "learning_rate": 1e-05,
823
+ "loss": 0.0863,
824
  "step": 82
825
  },
826
  {
827
  "epoch": 36.89,
828
  "learning_rate": 9.444444444444445e-06,
829
+ "loss": 0.0704,
830
  "step": 83
831
  },
832
  {
833
  "epoch": 36.89,
834
  "eval_accuracy": 1.0,
835
+ "eval_loss": 0.011303897947072983,
836
+ "eval_runtime": 0.2629,
837
+ "eval_samples_per_second": 205.397,
838
+ "eval_steps_per_second": 3.804,
839
  "step": 83
840
  },
841
  {
842
  "epoch": 37.33,
843
  "learning_rate": 8.88888888888889e-06,
844
+ "loss": 0.0523,
845
  "step": 84
846
  },
847
  {
848
  "epoch": 37.78,
849
  "learning_rate": 8.333333333333334e-06,
850
+ "loss": 0.0938,
851
  "step": 85
852
  },
853
  {
854
  "epoch": 37.78,
855
  "eval_accuracy": 1.0,
856
+ "eval_loss": 0.010850733146071434,
857
+ "eval_runtime": 0.2773,
858
+ "eval_samples_per_second": 194.757,
859
+ "eval_steps_per_second": 3.607,
860
  "step": 85
861
  },
862
  {
863
  "epoch": 38.22,
864
  "learning_rate": 7.777777777777777e-06,
865
+ "loss": 0.0838,
866
  "step": 86
867
  },
868
  {
869
  "epoch": 38.67,
870
  "learning_rate": 7.222222222222222e-06,
871
+ "loss": 0.1181,
872
  "step": 87
873
  },
874
  {
875
  "epoch": 38.67,
876
  "eval_accuracy": 1.0,
877
+ "eval_loss": 0.011989281512796879,
878
+ "eval_runtime": 0.2593,
879
+ "eval_samples_per_second": 208.25,
880
+ "eval_steps_per_second": 3.856,
881
  "step": 87
882
  },
883
  {
884
  "epoch": 39.11,
885
  "learning_rate": 6.666666666666667e-06,
886
+ "loss": 0.0638,
887
  "step": 88
888
  },
889
  {
890
  "epoch": 39.56,
891
  "learning_rate": 6.111111111111111e-06,
892
+ "loss": 0.0471,
893
  "step": 89
894
  },
895
  {
896
  "epoch": 40.0,
897
  "learning_rate": 5.555555555555556e-06,
898
+ "loss": 0.0509,
899
  "step": 90
900
  },
901
  {
902
  "epoch": 40.0,
903
  "eval_accuracy": 1.0,
904
+ "eval_loss": 0.014911294914782047,
905
+ "eval_runtime": 0.2616,
906
+ "eval_samples_per_second": 206.386,
907
+ "eval_steps_per_second": 3.822,
908
  "step": 90
909
  },
910
  {
911
  "epoch": 40.44,
912
  "learning_rate": 5e-06,
913
+ "loss": 0.1063,
914
  "step": 91
915
  },
916
  {
917
  "epoch": 40.89,
918
  "learning_rate": 4.444444444444445e-06,
919
+ "loss": 0.0684,
920
  "step": 92
921
  },
922
  {
923
  "epoch": 40.89,
924
+ "eval_accuracy": 1.0,
925
+ "eval_loss": 0.015459185466170311,
926
+ "eval_runtime": 0.2635,
927
+ "eval_samples_per_second": 204.956,
928
+ "eval_steps_per_second": 3.795,
929
  "step": 92
930
  },
931
  {
932
  "epoch": 41.33,
933
  "learning_rate": 3.888888888888889e-06,
934
+ "loss": 0.0775,
935
  "step": 93
936
  },
937
  {
938
  "epoch": 41.78,
939
  "learning_rate": 3.3333333333333333e-06,
940
+ "loss": 0.0625,
941
  "step": 94
942
  },
943
  {
944
  "epoch": 41.78,
945
+ "eval_accuracy": 1.0,
946
+ "eval_loss": 0.015089095570147038,
947
+ "eval_runtime": 0.2645,
948
+ "eval_samples_per_second": 204.187,
949
+ "eval_steps_per_second": 3.781,
950
  "step": 94
951
  },
952
  {
953
  "epoch": 42.22,
954
  "learning_rate": 2.777777777777778e-06,
955
+ "loss": 0.0474,
956
  "step": 95
957
  },
958
  {
959
  "epoch": 42.67,
960
  "learning_rate": 2.2222222222222225e-06,
961
+ "loss": 0.0746,
962
  "step": 96
963
  },
964
  {
965
  "epoch": 42.67,
966
+ "eval_accuracy": 1.0,
967
+ "eval_loss": 0.014267035759985447,
968
+ "eval_runtime": 0.2776,
969
+ "eval_samples_per_second": 194.496,
970
+ "eval_steps_per_second": 3.602,
971
  "step": 96
972
  },
973
  {
974
  "epoch": 43.11,
975
  "learning_rate": 1.6666666666666667e-06,
976
+ "loss": 0.0868,
977
  "step": 97
978
  },
979
  {
980
  "epoch": 43.56,
981
  "learning_rate": 1.1111111111111112e-06,
982
+ "loss": 0.0849,
983
  "step": 98
984
  },
985
  {
986
  "epoch": 44.0,
987
  "learning_rate": 5.555555555555556e-07,
988
+ "loss": 0.1062,
989
  "step": 99
990
  },
991
  {
992
  "epoch": 44.0,
993
+ "eval_accuracy": 1.0,
994
+ "eval_loss": 0.013263082131743431,
995
+ "eval_runtime": 0.2615,
996
+ "eval_samples_per_second": 206.506,
997
+ "eval_steps_per_second": 3.824,
998
  "step": 99
999
  },
1000
  {
1001
  "epoch": 44.44,
1002
  "learning_rate": 0.0,
1003
+ "loss": 0.0579,
1004
  "step": 100
1005
  },
1006
  {
1007
  "epoch": 44.44,
1008
+ "eval_accuracy": 1.0,
1009
+ "eval_loss": 0.013171697966754436,
1010
+ "eval_runtime": 0.263,
1011
+ "eval_samples_per_second": 205.31,
1012
+ "eval_steps_per_second": 3.802,
1013
  "step": 100
1014
  },
1015
  {
1016
  "epoch": 44.44,
1017
  "step": 100,
1018
  "total_flos": 1.6586385457107272e+18,
1019
+ "train_loss": 0.1744049086794257,
1020
+ "train_runtime": 686.3928,
1021
+ "train_samples_per_second": 35.038,
1022
+ "train_steps_per_second": 0.146
1023
  }
1024
  ],
1025
  "max_steps": 100,