Soulaimen commited on
Commit
2b988c6
1 Parent(s): eceae0c

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +9 -9
  2. eval_results.json +5 -5
  3. train_results.json +4 -4
  4. trainer_state.json +196 -196
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 9.96,
3
- "eval_accuracy": 0.9988649262202043,
4
- "eval_loss": 0.00955167319625616,
5
- "eval_runtime": 8.0129,
6
- "eval_samples_per_second": 109.948,
7
- "eval_steps_per_second": 13.853,
8
- "train_loss": 0.0672107882744877,
9
- "train_runtime": 1935.8008,
10
- "train_samples_per_second": 40.929,
11
- "train_steps_per_second": 0.728
12
  }
 
1
  {
2
  "epoch": 9.96,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.005081297364085913,
5
+ "eval_runtime": 7.7113,
6
+ "eval_samples_per_second": 114.248,
7
+ "eval_steps_per_second": 14.395,
8
+ "train_loss": 0.0649210772510116,
9
+ "train_runtime": 2044.7769,
10
+ "train_samples_per_second": 38.748,
11
+ "train_steps_per_second": 0.69
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.96,
3
- "eval_accuracy": 0.9988649262202043,
4
- "eval_loss": 0.00955167319625616,
5
- "eval_runtime": 8.0129,
6
- "eval_samples_per_second": 109.948,
7
- "eval_steps_per_second": 13.853
8
  }
 
1
  {
2
  "epoch": 9.96,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.005081297364085913,
5
+ "eval_runtime": 7.7113,
6
+ "eval_samples_per_second": 114.248,
7
+ "eval_steps_per_second": 14.395
8
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 9.96,
3
- "train_loss": 0.0672107882744877,
4
- "train_runtime": 1935.8008,
5
- "train_samples_per_second": 40.929,
6
- "train_steps_per_second": 0.728
7
  }
 
1
  {
2
  "epoch": 9.96,
3
+ "train_loss": 0.0649210772510116,
4
+ "train_runtime": 2044.7769,
5
+ "train_samples_per_second": 38.748,
6
+ "train_steps_per_second": 0.69
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9988649262202043,
3
- "best_model_checkpoint": "convnext-large-224-22k-1k-BottomSportsCasual/checkpoint-849",
4
  "epoch": 9.959636730575177,
5
  "global_step": 1410,
6
  "is_hyper_param_search": false,
@@ -10,947 +10,947 @@
10
  {
11
  "epoch": 0.07,
12
  "learning_rate": 3.3333333333333335e-05,
13
- "loss": 0.6501,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.14,
18
  "learning_rate": 4.982078853046595e-05,
19
- "loss": 0.2875,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.21,
24
  "learning_rate": 4.9462365591397855e-05,
25
- "loss": 0.1896,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.28,
30
  "learning_rate": 4.910394265232976e-05,
31
- "loss": 0.1512,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.35,
36
  "learning_rate": 4.874551971326165e-05,
37
- "loss": 0.1433,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.42,
42
  "learning_rate": 4.8387096774193554e-05,
43
- "loss": 0.1148,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.49,
48
  "learning_rate": 4.802867383512545e-05,
49
- "loss": 0.1106,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.57,
54
  "learning_rate": 4.767025089605735e-05,
55
- "loss": 0.1079,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.64,
60
  "learning_rate": 4.731182795698925e-05,
61
- "loss": 0.1282,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.71,
66
  "learning_rate": 4.695340501792115e-05,
67
- "loss": 0.1306,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.78,
72
  "learning_rate": 4.659498207885305e-05,
73
- "loss": 0.0917,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.85,
78
  "learning_rate": 4.6236559139784944e-05,
79
- "loss": 0.0934,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.92,
84
  "learning_rate": 4.5878136200716846e-05,
85
- "loss": 0.068,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.99,
90
  "learning_rate": 4.551971326164875e-05,
91
- "loss": 0.0752,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 1.0,
96
- "eval_accuracy": 0.9920544835414302,
97
- "eval_loss": 0.028408855199813843,
98
- "eval_runtime": 13.4966,
99
- "eval_samples_per_second": 65.276,
100
- "eval_steps_per_second": 8.224,
101
  "step": 141
102
  },
103
  {
104
  "epoch": 1.06,
105
  "learning_rate": 4.516129032258064e-05,
106
- "loss": 0.0647,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.13,
111
  "learning_rate": 4.4802867383512545e-05,
112
- "loss": 0.1433,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.2,
117
  "learning_rate": 4.4444444444444447e-05,
118
- "loss": 0.0918,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.27,
123
  "learning_rate": 4.408602150537635e-05,
124
- "loss": 0.0898,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.34,
129
  "learning_rate": 4.372759856630825e-05,
130
- "loss": 0.0849,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.41,
135
  "learning_rate": 4.3369175627240145e-05,
136
- "loss": 0.0816,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.48,
141
  "learning_rate": 4.301075268817205e-05,
142
- "loss": 0.0793,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.55,
147
  "learning_rate": 4.265232974910394e-05,
148
- "loss": 0.1098,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.62,
153
  "learning_rate": 4.2293906810035844e-05,
154
- "loss": 0.0801,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.7,
159
  "learning_rate": 4.1935483870967746e-05,
160
- "loss": 0.096,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.77,
165
  "learning_rate": 4.157706093189964e-05,
166
- "loss": 0.0685,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.84,
171
  "learning_rate": 4.121863799283154e-05,
172
- "loss": 0.069,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.91,
177
  "learning_rate": 4.0860215053763444e-05,
178
- "loss": 0.0788,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.98,
183
  "learning_rate": 4.050179211469534e-05,
184
- "loss": 0.0711,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 2.0,
189
- "eval_accuracy": 0.996594778660613,
190
- "eval_loss": 0.007982436567544937,
191
- "eval_runtime": 10.1876,
192
- "eval_samples_per_second": 86.477,
193
- "eval_steps_per_second": 10.896,
194
  "step": 283
195
  },
196
  {
197
  "epoch": 2.05,
198
  "learning_rate": 4.014336917562724e-05,
199
- "loss": 0.0824,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 2.12,
204
  "learning_rate": 3.978494623655914e-05,
205
- "loss": 0.0357,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 2.19,
210
  "learning_rate": 3.9426523297491045e-05,
211
- "loss": 0.0834,
212
  "step": 310
213
  },
214
  {
215
  "epoch": 2.26,
216
  "learning_rate": 3.906810035842295e-05,
217
- "loss": 0.0925,
218
  "step": 320
219
  },
220
  {
221
  "epoch": 2.33,
222
  "learning_rate": 3.870967741935484e-05,
223
- "loss": 0.1001,
224
  "step": 330
225
  },
226
  {
227
  "epoch": 2.4,
228
  "learning_rate": 3.8351254480286743e-05,
229
- "loss": 0.0533,
230
  "step": 340
231
  },
232
  {
233
  "epoch": 2.47,
234
  "learning_rate": 3.799283154121864e-05,
235
- "loss": 0.0652,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.54,
240
  "learning_rate": 3.763440860215054e-05,
241
- "loss": 0.0738,
242
  "step": 360
243
  },
244
  {
245
  "epoch": 2.61,
246
  "learning_rate": 3.727598566308244e-05,
247
- "loss": 0.1049,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.68,
252
  "learning_rate": 3.691756272401434e-05,
253
- "loss": 0.0918,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.75,
258
  "learning_rate": 3.655913978494624e-05,
259
- "loss": 0.0882,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 2.83,
264
  "learning_rate": 3.6200716845878134e-05,
265
- "loss": 0.0636,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.9,
270
  "learning_rate": 3.5842293906810036e-05,
271
- "loss": 0.094,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.97,
276
  "learning_rate": 3.548387096774194e-05,
277
- "loss": 0.074,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.99,
282
- "eval_accuracy": 0.996594778660613,
283
- "eval_loss": 0.007469804957509041,
284
- "eval_runtime": 10.2147,
285
- "eval_samples_per_second": 86.248,
286
- "eval_steps_per_second": 10.867,
287
  "step": 424
288
  },
289
  {
290
  "epoch": 3.04,
291
  "learning_rate": 3.512544802867383e-05,
292
- "loss": 0.0748,
293
  "step": 430
294
  },
295
  {
296
  "epoch": 3.11,
297
  "learning_rate": 3.4767025089605734e-05,
298
- "loss": 0.0573,
299
  "step": 440
300
  },
301
  {
302
  "epoch": 3.18,
303
  "learning_rate": 3.4408602150537636e-05,
304
- "loss": 0.101,
305
  "step": 450
306
  },
307
  {
308
  "epoch": 3.25,
309
  "learning_rate": 3.405017921146954e-05,
310
- "loss": 0.0578,
311
  "step": 460
312
  },
313
  {
314
  "epoch": 3.32,
315
  "learning_rate": 3.369175627240144e-05,
316
- "loss": 0.0723,
317
  "step": 470
318
  },
319
  {
320
  "epoch": 3.39,
321
  "learning_rate": 3.3333333333333335e-05,
322
- "loss": 0.0327,
323
  "step": 480
324
  },
325
  {
326
  "epoch": 3.46,
327
  "learning_rate": 3.297491039426524e-05,
328
- "loss": 0.0735,
329
  "step": 490
330
  },
331
  {
332
  "epoch": 3.53,
333
  "learning_rate": 3.261648745519714e-05,
334
- "loss": 0.1024,
335
  "step": 500
336
  },
337
  {
338
  "epoch": 3.6,
339
  "learning_rate": 3.2258064516129034e-05,
340
- "loss": 0.0746,
341
  "step": 510
342
  },
343
  {
344
  "epoch": 3.67,
345
  "learning_rate": 3.1899641577060935e-05,
346
- "loss": 0.0762,
347
  "step": 520
348
  },
349
  {
350
  "epoch": 3.74,
351
  "learning_rate": 3.154121863799283e-05,
352
- "loss": 0.0392,
353
  "step": 530
354
  },
355
  {
356
  "epoch": 3.81,
357
  "learning_rate": 3.118279569892473e-05,
358
- "loss": 0.0389,
359
  "step": 540
360
  },
361
  {
362
  "epoch": 3.88,
363
  "learning_rate": 3.0824372759856634e-05,
364
- "loss": 0.091,
365
  "step": 550
366
  },
367
  {
368
  "epoch": 3.96,
369
  "learning_rate": 3.046594982078853e-05,
370
- "loss": 0.0696,
371
  "step": 560
372
  },
373
  {
374
  "epoch": 4.0,
375
- "eval_accuracy": 0.996594778660613,
376
- "eval_loss": 0.0062737599946558475,
377
- "eval_runtime": 9.8804,
378
- "eval_samples_per_second": 89.166,
379
- "eval_steps_per_second": 11.234,
380
  "step": 566
381
  },
382
  {
383
  "epoch": 4.03,
384
  "learning_rate": 3.010752688172043e-05,
385
- "loss": 0.0496,
386
  "step": 570
387
  },
388
  {
389
  "epoch": 4.1,
390
  "learning_rate": 2.974910394265233e-05,
391
- "loss": 0.0279,
392
  "step": 580
393
  },
394
  {
395
  "epoch": 4.17,
396
  "learning_rate": 2.939068100358423e-05,
397
- "loss": 0.0647,
398
  "step": 590
399
  },
400
  {
401
  "epoch": 4.24,
402
  "learning_rate": 2.9032258064516133e-05,
403
- "loss": 0.052,
404
  "step": 600
405
  },
406
  {
407
  "epoch": 4.31,
408
  "learning_rate": 2.8673835125448028e-05,
409
- "loss": 0.0606,
410
  "step": 610
411
  },
412
  {
413
  "epoch": 4.38,
414
  "learning_rate": 2.831541218637993e-05,
415
- "loss": 0.0843,
416
  "step": 620
417
  },
418
  {
419
  "epoch": 4.45,
420
  "learning_rate": 2.7956989247311828e-05,
421
- "loss": 0.0687,
422
  "step": 630
423
  },
424
  {
425
  "epoch": 4.52,
426
  "learning_rate": 2.759856630824373e-05,
427
- "loss": 0.0473,
428
  "step": 640
429
  },
430
  {
431
  "epoch": 4.59,
432
  "learning_rate": 2.7240143369175632e-05,
433
- "loss": 0.0448,
434
  "step": 650
435
  },
436
  {
437
  "epoch": 4.66,
438
  "learning_rate": 2.6881720430107527e-05,
439
- "loss": 0.0504,
440
  "step": 660
441
  },
442
  {
443
  "epoch": 4.73,
444
  "learning_rate": 2.652329749103943e-05,
445
- "loss": 0.0595,
446
  "step": 670
447
  },
448
  {
449
  "epoch": 4.8,
450
  "learning_rate": 2.616487455197133e-05,
451
- "loss": 0.0721,
452
  "step": 680
453
  },
454
  {
455
  "epoch": 4.87,
456
  "learning_rate": 2.5806451612903226e-05,
457
- "loss": 0.0709,
458
  "step": 690
459
  },
460
  {
461
  "epoch": 4.94,
462
  "learning_rate": 2.5448028673835127e-05,
463
- "loss": 0.0628,
464
  "step": 700
465
  },
466
  {
467
  "epoch": 4.99,
468
- "eval_accuracy": 0.9954597048808173,
469
- "eval_loss": 0.013796533457934856,
470
- "eval_runtime": 10.1728,
471
- "eval_samples_per_second": 86.603,
472
- "eval_steps_per_second": 10.911,
473
  "step": 707
474
  },
475
  {
476
  "epoch": 5.02,
477
  "learning_rate": 2.5089605734767026e-05,
478
- "loss": 0.0601,
479
  "step": 710
480
  },
481
  {
482
  "epoch": 5.09,
483
  "learning_rate": 2.4731182795698928e-05,
484
- "loss": 0.0512,
485
  "step": 720
486
  },
487
  {
488
  "epoch": 5.16,
489
  "learning_rate": 2.4372759856630826e-05,
490
- "loss": 0.0469,
491
  "step": 730
492
  },
493
  {
494
  "epoch": 5.23,
495
  "learning_rate": 2.4014336917562724e-05,
496
- "loss": 0.0343,
497
  "step": 740
498
  },
499
  {
500
  "epoch": 5.3,
501
  "learning_rate": 2.3655913978494626e-05,
502
- "loss": 0.0334,
503
  "step": 750
504
  },
505
  {
506
  "epoch": 5.37,
507
  "learning_rate": 2.3297491039426525e-05,
508
- "loss": 0.0525,
509
  "step": 760
510
  },
511
  {
512
  "epoch": 5.44,
513
  "learning_rate": 2.2939068100358423e-05,
514
- "loss": 0.0639,
515
  "step": 770
516
  },
517
  {
518
  "epoch": 5.51,
519
  "learning_rate": 2.258064516129032e-05,
520
- "loss": 0.0408,
521
  "step": 780
522
  },
523
  {
524
  "epoch": 5.58,
525
  "learning_rate": 2.2222222222222223e-05,
526
- "loss": 0.0327,
527
  "step": 790
528
  },
529
  {
530
  "epoch": 5.65,
531
  "learning_rate": 2.1863799283154125e-05,
532
- "loss": 0.0571,
533
  "step": 800
534
  },
535
  {
536
  "epoch": 5.72,
537
  "learning_rate": 2.1505376344086024e-05,
538
- "loss": 0.0681,
539
  "step": 810
540
  },
541
  {
542
  "epoch": 5.79,
543
  "learning_rate": 2.1146953405017922e-05,
544
- "loss": 0.0589,
545
  "step": 820
546
  },
547
  {
548
  "epoch": 5.86,
549
  "learning_rate": 2.078853046594982e-05,
550
- "loss": 0.0515,
551
  "step": 830
552
  },
553
  {
554
  "epoch": 5.93,
555
  "learning_rate": 2.0430107526881722e-05,
556
- "loss": 0.0402,
557
  "step": 840
558
  },
559
  {
560
  "epoch": 6.0,
561
  "eval_accuracy": 0.9988649262202043,
562
- "eval_loss": 0.00955167319625616,
563
- "eval_runtime": 9.9549,
564
- "eval_samples_per_second": 88.499,
565
- "eval_steps_per_second": 11.15,
566
  "step": 849
567
  },
568
  {
569
  "epoch": 6.0,
570
  "learning_rate": 2.007168458781362e-05,
571
- "loss": 0.0401,
572
  "step": 850
573
  },
574
  {
575
  "epoch": 6.07,
576
  "learning_rate": 1.9713261648745522e-05,
577
- "loss": 0.0595,
578
  "step": 860
579
  },
580
  {
581
  "epoch": 6.15,
582
  "learning_rate": 1.935483870967742e-05,
583
- "loss": 0.0518,
584
  "step": 870
585
  },
586
  {
587
  "epoch": 6.22,
588
  "learning_rate": 1.899641577060932e-05,
589
- "loss": 0.0354,
590
  "step": 880
591
  },
592
  {
593
  "epoch": 6.29,
594
  "learning_rate": 1.863799283154122e-05,
595
- "loss": 0.0325,
596
  "step": 890
597
  },
598
  {
599
  "epoch": 6.36,
600
  "learning_rate": 1.827956989247312e-05,
601
- "loss": 0.0453,
602
  "step": 900
603
  },
604
  {
605
  "epoch": 6.43,
606
  "learning_rate": 1.7921146953405018e-05,
607
- "loss": 0.0283,
608
  "step": 910
609
  },
610
  {
611
  "epoch": 6.5,
612
  "learning_rate": 1.7562724014336916e-05,
613
- "loss": 0.0439,
614
  "step": 920
615
  },
616
  {
617
  "epoch": 6.57,
618
  "learning_rate": 1.7204301075268818e-05,
619
- "loss": 0.0368,
620
  "step": 930
621
  },
622
  {
623
  "epoch": 6.64,
624
  "learning_rate": 1.684587813620072e-05,
625
- "loss": 0.0561,
626
  "step": 940
627
  },
628
  {
629
  "epoch": 6.71,
630
  "learning_rate": 1.648745519713262e-05,
631
- "loss": 0.054,
632
  "step": 950
633
  },
634
  {
635
  "epoch": 6.78,
636
  "learning_rate": 1.6129032258064517e-05,
637
- "loss": 0.0624,
638
  "step": 960
639
  },
640
  {
641
  "epoch": 6.85,
642
  "learning_rate": 1.5770609318996415e-05,
643
- "loss": 0.0315,
644
  "step": 970
645
  },
646
  {
647
  "epoch": 6.92,
648
  "learning_rate": 1.5412186379928317e-05,
649
- "loss": 0.0625,
650
  "step": 980
651
  },
652
  {
653
  "epoch": 6.99,
654
  "learning_rate": 1.5053763440860215e-05,
655
- "loss": 0.062,
656
  "step": 990
657
  },
658
  {
659
  "epoch": 7.0,
660
- "eval_accuracy": 0.9977298524404086,
661
- "eval_loss": 0.00731318723410368,
662
- "eval_runtime": 9.9651,
663
- "eval_samples_per_second": 88.409,
664
- "eval_steps_per_second": 11.139,
665
  "step": 991
666
  },
667
  {
668
  "epoch": 7.06,
669
  "learning_rate": 1.4695340501792116e-05,
670
- "loss": 0.0506,
671
  "step": 1000
672
  },
673
  {
674
  "epoch": 7.13,
675
  "learning_rate": 1.4336917562724014e-05,
676
- "loss": 0.0394,
677
  "step": 1010
678
  },
679
  {
680
  "epoch": 7.2,
681
  "learning_rate": 1.3978494623655914e-05,
682
- "loss": 0.0458,
683
  "step": 1020
684
  },
685
  {
686
  "epoch": 7.28,
687
  "learning_rate": 1.3620071684587816e-05,
688
- "loss": 0.0257,
689
  "step": 1030
690
  },
691
  {
692
  "epoch": 7.35,
693
  "learning_rate": 1.3261648745519714e-05,
694
- "loss": 0.0312,
695
  "step": 1040
696
  },
697
  {
698
  "epoch": 7.42,
699
  "learning_rate": 1.2903225806451613e-05,
700
- "loss": 0.0324,
701
  "step": 1050
702
  },
703
  {
704
  "epoch": 7.49,
705
  "learning_rate": 1.2544802867383513e-05,
706
- "loss": 0.0416,
707
  "step": 1060
708
  },
709
  {
710
  "epoch": 7.56,
711
  "learning_rate": 1.2186379928315413e-05,
712
- "loss": 0.0309,
713
  "step": 1070
714
  },
715
  {
716
  "epoch": 7.63,
717
  "learning_rate": 1.1827956989247313e-05,
718
- "loss": 0.0201,
719
  "step": 1080
720
  },
721
  {
722
  "epoch": 7.7,
723
  "learning_rate": 1.1469534050179212e-05,
724
- "loss": 0.0531,
725
  "step": 1090
726
  },
727
  {
728
  "epoch": 7.77,
729
  "learning_rate": 1.1111111111111112e-05,
730
- "loss": 0.019,
731
  "step": 1100
732
  },
733
  {
734
  "epoch": 7.84,
735
  "learning_rate": 1.0752688172043012e-05,
736
- "loss": 0.0585,
737
  "step": 1110
738
  },
739
  {
740
  "epoch": 7.91,
741
  "learning_rate": 1.039426523297491e-05,
742
- "loss": 0.0464,
743
  "step": 1120
744
  },
745
  {
746
  "epoch": 7.98,
747
  "learning_rate": 1.003584229390681e-05,
748
- "loss": 0.021,
749
  "step": 1130
750
  },
751
  {
752
  "epoch": 8.0,
753
- "eval_accuracy": 0.9977298524404086,
754
- "eval_loss": 0.007844115607440472,
755
- "eval_runtime": 9.6256,
756
- "eval_samples_per_second": 91.527,
757
- "eval_steps_per_second": 11.532,
758
  "step": 1132
759
  },
760
  {
761
  "epoch": 8.05,
762
  "learning_rate": 9.67741935483871e-06,
763
- "loss": 0.0351,
764
  "step": 1140
765
  },
766
  {
767
  "epoch": 8.12,
768
  "learning_rate": 9.31899641577061e-06,
769
- "loss": 0.0646,
770
  "step": 1150
771
  },
772
  {
773
  "epoch": 8.19,
774
  "learning_rate": 8.960573476702509e-06,
775
- "loss": 0.0377,
776
  "step": 1160
777
  },
778
  {
779
  "epoch": 8.26,
780
  "learning_rate": 8.602150537634409e-06,
781
- "loss": 0.0537,
782
  "step": 1170
783
  },
784
  {
785
  "epoch": 8.34,
786
  "learning_rate": 8.24372759856631e-06,
787
- "loss": 0.0313,
788
  "step": 1180
789
  },
790
  {
791
  "epoch": 8.41,
792
  "learning_rate": 7.885304659498208e-06,
793
- "loss": 0.0477,
794
  "step": 1190
795
  },
796
  {
797
  "epoch": 8.48,
798
  "learning_rate": 7.526881720430108e-06,
799
- "loss": 0.0371,
800
  "step": 1200
801
  },
802
  {
803
  "epoch": 8.55,
804
  "learning_rate": 7.168458781362007e-06,
805
- "loss": 0.0358,
806
  "step": 1210
807
  },
808
  {
809
  "epoch": 8.62,
810
  "learning_rate": 6.810035842293908e-06,
811
- "loss": 0.025,
812
  "step": 1220
813
  },
814
  {
815
  "epoch": 8.69,
816
  "learning_rate": 6.451612903225806e-06,
817
- "loss": 0.0591,
818
  "step": 1230
819
  },
820
  {
821
  "epoch": 8.76,
822
  "learning_rate": 6.0931899641577065e-06,
823
- "loss": 0.0385,
824
  "step": 1240
825
  },
826
  {
827
  "epoch": 8.83,
828
  "learning_rate": 5.734767025089606e-06,
829
- "loss": 0.0456,
830
  "step": 1250
831
  },
832
  {
833
  "epoch": 8.9,
834
  "learning_rate": 5.376344086021506e-06,
835
- "loss": 0.027,
836
  "step": 1260
837
  },
838
  {
839
  "epoch": 8.97,
840
  "learning_rate": 5.017921146953405e-06,
841
- "loss": 0.0317,
842
  "step": 1270
843
  },
844
  {
845
  "epoch": 9.0,
846
- "eval_accuracy": 0.996594778660613,
847
- "eval_loss": 0.008439515717327595,
848
- "eval_runtime": 9.5892,
849
- "eval_samples_per_second": 91.875,
850
- "eval_steps_per_second": 11.576,
851
  "step": 1274
852
  },
853
  {
854
  "epoch": 9.04,
855
  "learning_rate": 4.659498207885305e-06,
856
- "loss": 0.0448,
857
  "step": 1280
858
  },
859
  {
860
  "epoch": 9.11,
861
  "learning_rate": 4.3010752688172045e-06,
862
- "loss": 0.0307,
863
  "step": 1290
864
  },
865
  {
866
  "epoch": 9.18,
867
  "learning_rate": 3.942652329749104e-06,
868
- "loss": 0.0501,
869
  "step": 1300
870
  },
871
  {
872
  "epoch": 9.25,
873
  "learning_rate": 3.5842293906810035e-06,
874
- "loss": 0.0339,
875
  "step": 1310
876
  },
877
  {
878
  "epoch": 9.32,
879
  "learning_rate": 3.225806451612903e-06,
880
- "loss": 0.0325,
881
  "step": 1320
882
  },
883
  {
884
  "epoch": 9.39,
885
  "learning_rate": 2.867383512544803e-06,
886
- "loss": 0.0325,
887
  "step": 1330
888
  },
889
  {
890
  "epoch": 9.47,
891
  "learning_rate": 2.5089605734767026e-06,
892
- "loss": 0.0321,
893
  "step": 1340
894
  },
895
  {
896
  "epoch": 9.54,
897
  "learning_rate": 2.1505376344086023e-06,
898
- "loss": 0.0533,
899
  "step": 1350
900
  },
901
  {
902
  "epoch": 9.61,
903
  "learning_rate": 1.7921146953405017e-06,
904
- "loss": 0.0308,
905
  "step": 1360
906
  },
907
  {
908
  "epoch": 9.68,
909
  "learning_rate": 1.4336917562724014e-06,
910
- "loss": 0.0626,
911
  "step": 1370
912
  },
913
  {
914
  "epoch": 9.75,
915
  "learning_rate": 1.0752688172043011e-06,
916
- "loss": 0.0711,
917
  "step": 1380
918
  },
919
  {
920
  "epoch": 9.82,
921
  "learning_rate": 7.168458781362007e-07,
922
- "loss": 0.035,
923
  "step": 1390
924
  },
925
  {
926
  "epoch": 9.89,
927
  "learning_rate": 3.5842293906810036e-07,
928
- "loss": 0.0239,
929
  "step": 1400
930
  },
931
  {
932
  "epoch": 9.96,
933
  "learning_rate": 0.0,
934
- "loss": 0.0339,
935
  "step": 1410
936
  },
937
  {
938
  "epoch": 9.96,
939
- "eval_accuracy": 0.996594778660613,
940
- "eval_loss": 0.008212185464799404,
941
- "eval_runtime": 10.1675,
942
- "eval_samples_per_second": 86.649,
943
- "eval_steps_per_second": 10.917,
944
  "step": 1410
945
  },
946
  {
947
  "epoch": 9.96,
948
  "step": 1410,
949
  "total_flos": 1.3986242481389875e+19,
950
- "train_loss": 0.0672107882744877,
951
- "train_runtime": 1935.8008,
952
- "train_samples_per_second": 40.929,
953
- "train_steps_per_second": 0.728
954
  }
955
  ],
956
  "max_steps": 1410,
 
1
  {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "convnext-large-224-22k-1k-BottomSportsCasual/checkpoint-283",
4
  "epoch": 9.959636730575177,
5
  "global_step": 1410,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 0.07,
12
  "learning_rate": 3.3333333333333335e-05,
13
+ "loss": 0.6592,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.14,
18
  "learning_rate": 4.982078853046595e-05,
19
+ "loss": 0.2847,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.21,
24
  "learning_rate": 4.9462365591397855e-05,
25
+ "loss": 0.1765,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.28,
30
  "learning_rate": 4.910394265232976e-05,
31
+ "loss": 0.1438,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.35,
36
  "learning_rate": 4.874551971326165e-05,
37
+ "loss": 0.0968,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.42,
42
  "learning_rate": 4.8387096774193554e-05,
43
+ "loss": 0.1287,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.49,
48
  "learning_rate": 4.802867383512545e-05,
49
+ "loss": 0.1062,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.57,
54
  "learning_rate": 4.767025089605735e-05,
55
+ "loss": 0.1034,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.64,
60
  "learning_rate": 4.731182795698925e-05,
61
+ "loss": 0.1068,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.71,
66
  "learning_rate": 4.695340501792115e-05,
67
+ "loss": 0.0912,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.78,
72
  "learning_rate": 4.659498207885305e-05,
73
+ "loss": 0.0704,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.85,
78
  "learning_rate": 4.6236559139784944e-05,
79
+ "loss": 0.1192,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.92,
84
  "learning_rate": 4.5878136200716846e-05,
85
+ "loss": 0.0991,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.99,
90
  "learning_rate": 4.551971326164875e-05,
91
+ "loss": 0.0732,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 1.0,
96
+ "eval_accuracy": 0.9943246311010215,
97
+ "eval_loss": 0.011665264144539833,
98
+ "eval_runtime": 9.6566,
99
+ "eval_samples_per_second": 91.233,
100
+ "eval_steps_per_second": 11.495,
101
  "step": 141
102
  },
103
  {
104
  "epoch": 1.06,
105
  "learning_rate": 4.516129032258064e-05,
106
+ "loss": 0.0578,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.13,
111
  "learning_rate": 4.4802867383512545e-05,
112
+ "loss": 0.1351,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.2,
117
  "learning_rate": 4.4444444444444447e-05,
118
+ "loss": 0.1143,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.27,
123
  "learning_rate": 4.408602150537635e-05,
124
+ "loss": 0.1396,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.34,
129
  "learning_rate": 4.372759856630825e-05,
130
+ "loss": 0.0934,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.41,
135
  "learning_rate": 4.3369175627240145e-05,
136
+ "loss": 0.1086,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.48,
141
  "learning_rate": 4.301075268817205e-05,
142
+ "loss": 0.0862,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.55,
147
  "learning_rate": 4.265232974910394e-05,
148
+ "loss": 0.0786,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.62,
153
  "learning_rate": 4.2293906810035844e-05,
154
+ "loss": 0.1058,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.7,
159
  "learning_rate": 4.1935483870967746e-05,
160
+ "loss": 0.0606,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.77,
165
  "learning_rate": 4.157706093189964e-05,
166
+ "loss": 0.0776,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.84,
171
  "learning_rate": 4.121863799283154e-05,
172
+ "loss": 0.0909,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.91,
177
  "learning_rate": 4.0860215053763444e-05,
178
+ "loss": 0.0704,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.98,
183
  "learning_rate": 4.050179211469534e-05,
184
+ "loss": 0.0664,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 2.0,
189
+ "eval_accuracy": 1.0,
190
+ "eval_loss": 0.005081297364085913,
191
+ "eval_runtime": 9.5071,
192
+ "eval_samples_per_second": 92.667,
193
+ "eval_steps_per_second": 11.675,
194
  "step": 283
195
  },
196
  {
197
  "epoch": 2.05,
198
  "learning_rate": 4.014336917562724e-05,
199
+ "loss": 0.0667,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 2.12,
204
  "learning_rate": 3.978494623655914e-05,
205
+ "loss": 0.072,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 2.19,
210
  "learning_rate": 3.9426523297491045e-05,
211
+ "loss": 0.078,
212
  "step": 310
213
  },
214
  {
215
  "epoch": 2.26,
216
  "learning_rate": 3.906810035842295e-05,
217
+ "loss": 0.0978,
218
  "step": 320
219
  },
220
  {
221
  "epoch": 2.33,
222
  "learning_rate": 3.870967741935484e-05,
223
+ "loss": 0.0667,
224
  "step": 330
225
  },
226
  {
227
  "epoch": 2.4,
228
  "learning_rate": 3.8351254480286743e-05,
229
+ "loss": 0.0646,
230
  "step": 340
231
  },
232
  {
233
  "epoch": 2.47,
234
  "learning_rate": 3.799283154121864e-05,
235
+ "loss": 0.0509,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.54,
240
  "learning_rate": 3.763440860215054e-05,
241
+ "loss": 0.0676,
242
  "step": 360
243
  },
244
  {
245
  "epoch": 2.61,
246
  "learning_rate": 3.727598566308244e-05,
247
+ "loss": 0.0474,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.68,
252
  "learning_rate": 3.691756272401434e-05,
253
+ "loss": 0.084,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.75,
258
  "learning_rate": 3.655913978494624e-05,
259
+ "loss": 0.0636,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 2.83,
264
  "learning_rate": 3.6200716845878134e-05,
265
+ "loss": 0.102,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.9,
270
  "learning_rate": 3.5842293906810036e-05,
271
+ "loss": 0.0863,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.97,
276
  "learning_rate": 3.548387096774194e-05,
277
+ "loss": 0.065,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.99,
282
+ "eval_accuracy": 0.9988649262202043,
283
+ "eval_loss": 0.004680068697780371,
284
+ "eval_runtime": 9.346,
285
+ "eval_samples_per_second": 94.265,
286
+ "eval_steps_per_second": 11.877,
287
  "step": 424
288
  },
289
  {
290
  "epoch": 3.04,
291
  "learning_rate": 3.512544802867383e-05,
292
+ "loss": 0.0655,
293
  "step": 430
294
  },
295
  {
296
  "epoch": 3.11,
297
  "learning_rate": 3.4767025089605734e-05,
298
+ "loss": 0.0511,
299
  "step": 440
300
  },
301
  {
302
  "epoch": 3.18,
303
  "learning_rate": 3.4408602150537636e-05,
304
+ "loss": 0.0829,
305
  "step": 450
306
  },
307
  {
308
  "epoch": 3.25,
309
  "learning_rate": 3.405017921146954e-05,
310
+ "loss": 0.0413,
311
  "step": 460
312
  },
313
  {
314
  "epoch": 3.32,
315
  "learning_rate": 3.369175627240144e-05,
316
+ "loss": 0.0587,
317
  "step": 470
318
  },
319
  {
320
  "epoch": 3.39,
321
  "learning_rate": 3.3333333333333335e-05,
322
+ "loss": 0.077,
323
  "step": 480
324
  },
325
  {
326
  "epoch": 3.46,
327
  "learning_rate": 3.297491039426524e-05,
328
+ "loss": 0.0637,
329
  "step": 490
330
  },
331
  {
332
  "epoch": 3.53,
333
  "learning_rate": 3.261648745519714e-05,
334
+ "loss": 0.0822,
335
  "step": 500
336
  },
337
  {
338
  "epoch": 3.6,
339
  "learning_rate": 3.2258064516129034e-05,
340
+ "loss": 0.0491,
341
  "step": 510
342
  },
343
  {
344
  "epoch": 3.67,
345
  "learning_rate": 3.1899641577060935e-05,
346
+ "loss": 0.0638,
347
  "step": 520
348
  },
349
  {
350
  "epoch": 3.74,
351
  "learning_rate": 3.154121863799283e-05,
352
+ "loss": 0.0866,
353
  "step": 530
354
  },
355
  {
356
  "epoch": 3.81,
357
  "learning_rate": 3.118279569892473e-05,
358
+ "loss": 0.0516,
359
  "step": 540
360
  },
361
  {
362
  "epoch": 3.88,
363
  "learning_rate": 3.0824372759856634e-05,
364
+ "loss": 0.0443,
365
  "step": 550
366
  },
367
  {
368
  "epoch": 3.96,
369
  "learning_rate": 3.046594982078853e-05,
370
+ "loss": 0.0635,
371
  "step": 560
372
  },
373
  {
374
  "epoch": 4.0,
375
+ "eval_accuracy": 1.0,
376
+ "eval_loss": 0.0028926546219736338,
377
+ "eval_runtime": 9.7296,
378
+ "eval_samples_per_second": 90.549,
379
+ "eval_steps_per_second": 11.409,
380
  "step": 566
381
  },
382
  {
383
  "epoch": 4.03,
384
  "learning_rate": 3.010752688172043e-05,
385
+ "loss": 0.0762,
386
  "step": 570
387
  },
388
  {
389
  "epoch": 4.1,
390
  "learning_rate": 2.974910394265233e-05,
391
+ "loss": 0.0427,
392
  "step": 580
393
  },
394
  {
395
  "epoch": 4.17,
396
  "learning_rate": 2.939068100358423e-05,
397
+ "loss": 0.0484,
398
  "step": 590
399
  },
400
  {
401
  "epoch": 4.24,
402
  "learning_rate": 2.9032258064516133e-05,
403
+ "loss": 0.0608,
404
  "step": 600
405
  },
406
  {
407
  "epoch": 4.31,
408
  "learning_rate": 2.8673835125448028e-05,
409
+ "loss": 0.04,
410
  "step": 610
411
  },
412
  {
413
  "epoch": 4.38,
414
  "learning_rate": 2.831541218637993e-05,
415
+ "loss": 0.0436,
416
  "step": 620
417
  },
418
  {
419
  "epoch": 4.45,
420
  "learning_rate": 2.7956989247311828e-05,
421
+ "loss": 0.0418,
422
  "step": 630
423
  },
424
  {
425
  "epoch": 4.52,
426
  "learning_rate": 2.759856630824373e-05,
427
+ "loss": 0.0385,
428
  "step": 640
429
  },
430
  {
431
  "epoch": 4.59,
432
  "learning_rate": 2.7240143369175632e-05,
433
+ "loss": 0.0511,
434
  "step": 650
435
  },
436
  {
437
  "epoch": 4.66,
438
  "learning_rate": 2.6881720430107527e-05,
439
+ "loss": 0.061,
440
  "step": 660
441
  },
442
  {
443
  "epoch": 4.73,
444
  "learning_rate": 2.652329749103943e-05,
445
+ "loss": 0.0256,
446
  "step": 670
447
  },
448
  {
449
  "epoch": 4.8,
450
  "learning_rate": 2.616487455197133e-05,
451
+ "loss": 0.0679,
452
  "step": 680
453
  },
454
  {
455
  "epoch": 4.87,
456
  "learning_rate": 2.5806451612903226e-05,
457
+ "loss": 0.0461,
458
  "step": 690
459
  },
460
  {
461
  "epoch": 4.94,
462
  "learning_rate": 2.5448028673835127e-05,
463
+ "loss": 0.0357,
464
  "step": 700
465
  },
466
  {
467
  "epoch": 4.99,
468
+ "eval_accuracy": 0.996594778660613,
469
+ "eval_loss": 0.0047698114067316055,
470
+ "eval_runtime": 9.8714,
471
+ "eval_samples_per_second": 89.248,
472
+ "eval_steps_per_second": 11.245,
473
  "step": 707
474
  },
475
  {
476
  "epoch": 5.02,
477
  "learning_rate": 2.5089605734767026e-05,
478
+ "loss": 0.0668,
479
  "step": 710
480
  },
481
  {
482
  "epoch": 5.09,
483
  "learning_rate": 2.4731182795698928e-05,
484
+ "loss": 0.0446,
485
  "step": 720
486
  },
487
  {
488
  "epoch": 5.16,
489
  "learning_rate": 2.4372759856630826e-05,
490
+ "loss": 0.0561,
491
  "step": 730
492
  },
493
  {
494
  "epoch": 5.23,
495
  "learning_rate": 2.4014336917562724e-05,
496
+ "loss": 0.0538,
497
  "step": 740
498
  },
499
  {
500
  "epoch": 5.3,
501
  "learning_rate": 2.3655913978494626e-05,
502
+ "loss": 0.0408,
503
  "step": 750
504
  },
505
  {
506
  "epoch": 5.37,
507
  "learning_rate": 2.3297491039426525e-05,
508
+ "loss": 0.0373,
509
  "step": 760
510
  },
511
  {
512
  "epoch": 5.44,
513
  "learning_rate": 2.2939068100358423e-05,
514
+ "loss": 0.0336,
515
  "step": 770
516
  },
517
  {
518
  "epoch": 5.51,
519
  "learning_rate": 2.258064516129032e-05,
520
+ "loss": 0.0571,
521
  "step": 780
522
  },
523
  {
524
  "epoch": 5.58,
525
  "learning_rate": 2.2222222222222223e-05,
526
+ "loss": 0.0609,
527
  "step": 790
528
  },
529
  {
530
  "epoch": 5.65,
531
  "learning_rate": 2.1863799283154125e-05,
532
+ "loss": 0.0689,
533
  "step": 800
534
  },
535
  {
536
  "epoch": 5.72,
537
  "learning_rate": 2.1505376344086024e-05,
538
+ "loss": 0.0654,
539
  "step": 810
540
  },
541
  {
542
  "epoch": 5.79,
543
  "learning_rate": 2.1146953405017922e-05,
544
+ "loss": 0.0341,
545
  "step": 820
546
  },
547
  {
548
  "epoch": 5.86,
549
  "learning_rate": 2.078853046594982e-05,
550
+ "loss": 0.0383,
551
  "step": 830
552
  },
553
  {
554
  "epoch": 5.93,
555
  "learning_rate": 2.0430107526881722e-05,
556
+ "loss": 0.0826,
557
  "step": 840
558
  },
559
  {
560
  "epoch": 6.0,
561
  "eval_accuracy": 0.9988649262202043,
562
+ "eval_loss": 0.00289411679841578,
563
+ "eval_runtime": 10.3189,
564
+ "eval_samples_per_second": 85.378,
565
+ "eval_steps_per_second": 10.757,
566
  "step": 849
567
  },
568
  {
569
  "epoch": 6.0,
570
  "learning_rate": 2.007168458781362e-05,
571
+ "loss": 0.072,
572
  "step": 850
573
  },
574
  {
575
  "epoch": 6.07,
576
  "learning_rate": 1.9713261648745522e-05,
577
+ "loss": 0.043,
578
  "step": 860
579
  },
580
  {
581
  "epoch": 6.15,
582
  "learning_rate": 1.935483870967742e-05,
583
+ "loss": 0.038,
584
  "step": 870
585
  },
586
  {
587
  "epoch": 6.22,
588
  "learning_rate": 1.899641577060932e-05,
589
+ "loss": 0.061,
590
  "step": 880
591
  },
592
  {
593
  "epoch": 6.29,
594
  "learning_rate": 1.863799283154122e-05,
595
+ "loss": 0.0468,
596
  "step": 890
597
  },
598
  {
599
  "epoch": 6.36,
600
  "learning_rate": 1.827956989247312e-05,
601
+ "loss": 0.0689,
602
  "step": 900
603
  },
604
  {
605
  "epoch": 6.43,
606
  "learning_rate": 1.7921146953405018e-05,
607
+ "loss": 0.0671,
608
  "step": 910
609
  },
610
  {
611
  "epoch": 6.5,
612
  "learning_rate": 1.7562724014336916e-05,
613
+ "loss": 0.0648,
614
  "step": 920
615
  },
616
  {
617
  "epoch": 6.57,
618
  "learning_rate": 1.7204301075268818e-05,
619
+ "loss": 0.0502,
620
  "step": 930
621
  },
622
  {
623
  "epoch": 6.64,
624
  "learning_rate": 1.684587813620072e-05,
625
+ "loss": 0.052,
626
  "step": 940
627
  },
628
  {
629
  "epoch": 6.71,
630
  "learning_rate": 1.648745519713262e-05,
631
+ "loss": 0.0363,
632
  "step": 950
633
  },
634
  {
635
  "epoch": 6.78,
636
  "learning_rate": 1.6129032258064517e-05,
637
+ "loss": 0.0261,
638
  "step": 960
639
  },
640
  {
641
  "epoch": 6.85,
642
  "learning_rate": 1.5770609318996415e-05,
643
+ "loss": 0.0413,
644
  "step": 970
645
  },
646
  {
647
  "epoch": 6.92,
648
  "learning_rate": 1.5412186379928317e-05,
649
+ "loss": 0.0295,
650
  "step": 980
651
  },
652
  {
653
  "epoch": 6.99,
654
  "learning_rate": 1.5053763440860215e-05,
655
+ "loss": 0.0177,
656
  "step": 990
657
  },
658
  {
659
  "epoch": 7.0,
660
+ "eval_accuracy": 1.0,
661
+ "eval_loss": 0.0018128241645172238,
662
+ "eval_runtime": 9.7183,
663
+ "eval_samples_per_second": 90.654,
664
+ "eval_steps_per_second": 11.422,
665
  "step": 991
666
  },
667
  {
668
  "epoch": 7.06,
669
  "learning_rate": 1.4695340501792116e-05,
670
+ "loss": 0.0518,
671
  "step": 1000
672
  },
673
  {
674
  "epoch": 7.13,
675
  "learning_rate": 1.4336917562724014e-05,
676
+ "loss": 0.0421,
677
  "step": 1010
678
  },
679
  {
680
  "epoch": 7.2,
681
  "learning_rate": 1.3978494623655914e-05,
682
+ "loss": 0.026,
683
  "step": 1020
684
  },
685
  {
686
  "epoch": 7.28,
687
  "learning_rate": 1.3620071684587816e-05,
688
+ "loss": 0.0234,
689
  "step": 1030
690
  },
691
  {
692
  "epoch": 7.35,
693
  "learning_rate": 1.3261648745519714e-05,
694
+ "loss": 0.0433,
695
  "step": 1040
696
  },
697
  {
698
  "epoch": 7.42,
699
  "learning_rate": 1.2903225806451613e-05,
700
+ "loss": 0.0599,
701
  "step": 1050
702
  },
703
  {
704
  "epoch": 7.49,
705
  "learning_rate": 1.2544802867383513e-05,
706
+ "loss": 0.0447,
707
  "step": 1060
708
  },
709
  {
710
  "epoch": 7.56,
711
  "learning_rate": 1.2186379928315413e-05,
712
+ "loss": 0.0178,
713
  "step": 1070
714
  },
715
  {
716
  "epoch": 7.63,
717
  "learning_rate": 1.1827956989247313e-05,
718
+ "loss": 0.0471,
719
  "step": 1080
720
  },
721
  {
722
  "epoch": 7.7,
723
  "learning_rate": 1.1469534050179212e-05,
724
+ "loss": 0.0539,
725
  "step": 1090
726
  },
727
  {
728
  "epoch": 7.77,
729
  "learning_rate": 1.1111111111111112e-05,
730
+ "loss": 0.031,
731
  "step": 1100
732
  },
733
  {
734
  "epoch": 7.84,
735
  "learning_rate": 1.0752688172043012e-05,
736
+ "loss": 0.0202,
737
  "step": 1110
738
  },
739
  {
740
  "epoch": 7.91,
741
  "learning_rate": 1.039426523297491e-05,
742
+ "loss": 0.0374,
743
  "step": 1120
744
  },
745
  {
746
  "epoch": 7.98,
747
  "learning_rate": 1.003584229390681e-05,
748
+ "loss": 0.0353,
749
  "step": 1130
750
  },
751
  {
752
  "epoch": 8.0,
753
+ "eval_accuracy": 1.0,
754
+ "eval_loss": 0.0006858365959487855,
755
+ "eval_runtime": 9.4026,
756
+ "eval_samples_per_second": 93.698,
757
+ "eval_steps_per_second": 11.805,
758
  "step": 1132
759
  },
760
  {
761
  "epoch": 8.05,
762
  "learning_rate": 9.67741935483871e-06,
763
+ "loss": 0.0393,
764
  "step": 1140
765
  },
766
  {
767
  "epoch": 8.12,
768
  "learning_rate": 9.31899641577061e-06,
769
+ "loss": 0.0318,
770
  "step": 1150
771
  },
772
  {
773
  "epoch": 8.19,
774
  "learning_rate": 8.960573476702509e-06,
775
+ "loss": 0.0499,
776
  "step": 1160
777
  },
778
  {
779
  "epoch": 8.26,
780
  "learning_rate": 8.602150537634409e-06,
781
+ "loss": 0.0246,
782
  "step": 1170
783
  },
784
  {
785
  "epoch": 8.34,
786
  "learning_rate": 8.24372759856631e-06,
787
+ "loss": 0.0689,
788
  "step": 1180
789
  },
790
  {
791
  "epoch": 8.41,
792
  "learning_rate": 7.885304659498208e-06,
793
+ "loss": 0.0599,
794
  "step": 1190
795
  },
796
  {
797
  "epoch": 8.48,
798
  "learning_rate": 7.526881720430108e-06,
799
+ "loss": 0.042,
800
  "step": 1200
801
  },
802
  {
803
  "epoch": 8.55,
804
  "learning_rate": 7.168458781362007e-06,
805
+ "loss": 0.0459,
806
  "step": 1210
807
  },
808
  {
809
  "epoch": 8.62,
810
  "learning_rate": 6.810035842293908e-06,
811
+ "loss": 0.0299,
812
  "step": 1220
813
  },
814
  {
815
  "epoch": 8.69,
816
  "learning_rate": 6.451612903225806e-06,
817
+ "loss": 0.028,
818
  "step": 1230
819
  },
820
  {
821
  "epoch": 8.76,
822
  "learning_rate": 6.0931899641577065e-06,
823
+ "loss": 0.0393,
824
  "step": 1240
825
  },
826
  {
827
  "epoch": 8.83,
828
  "learning_rate": 5.734767025089606e-06,
829
+ "loss": 0.0544,
830
  "step": 1250
831
  },
832
  {
833
  "epoch": 8.9,
834
  "learning_rate": 5.376344086021506e-06,
835
+ "loss": 0.0184,
836
  "step": 1260
837
  },
838
  {
839
  "epoch": 8.97,
840
  "learning_rate": 5.017921146953405e-06,
841
+ "loss": 0.0243,
842
  "step": 1270
843
  },
844
  {
845
  "epoch": 9.0,
846
+ "eval_accuracy": 1.0,
847
+ "eval_loss": 0.0003116827574558556,
848
+ "eval_runtime": 9.8106,
849
+ "eval_samples_per_second": 89.8,
850
+ "eval_steps_per_second": 11.314,
851
  "step": 1274
852
  },
853
  {
854
  "epoch": 9.04,
855
  "learning_rate": 4.659498207885305e-06,
856
+ "loss": 0.0483,
857
  "step": 1280
858
  },
859
  {
860
  "epoch": 9.11,
861
  "learning_rate": 4.3010752688172045e-06,
862
+ "loss": 0.0291,
863
  "step": 1290
864
  },
865
  {
866
  "epoch": 9.18,
867
  "learning_rate": 3.942652329749104e-06,
868
+ "loss": 0.0394,
869
  "step": 1300
870
  },
871
  {
872
  "epoch": 9.25,
873
  "learning_rate": 3.5842293906810035e-06,
874
+ "loss": 0.0242,
875
  "step": 1310
876
  },
877
  {
878
  "epoch": 9.32,
879
  "learning_rate": 3.225806451612903e-06,
880
+ "loss": 0.044,
881
  "step": 1320
882
  },
883
  {
884
  "epoch": 9.39,
885
  "learning_rate": 2.867383512544803e-06,
886
+ "loss": 0.0346,
887
  "step": 1330
888
  },
889
  {
890
  "epoch": 9.47,
891
  "learning_rate": 2.5089605734767026e-06,
892
+ "loss": 0.0521,
893
  "step": 1340
894
  },
895
  {
896
  "epoch": 9.54,
897
  "learning_rate": 2.1505376344086023e-06,
898
+ "loss": 0.041,
899
  "step": 1350
900
  },
901
  {
902
  "epoch": 9.61,
903
  "learning_rate": 1.7921146953405017e-06,
904
+ "loss": 0.0497,
905
  "step": 1360
906
  },
907
  {
908
  "epoch": 9.68,
909
  "learning_rate": 1.4336917562724014e-06,
910
+ "loss": 0.0324,
911
  "step": 1370
912
  },
913
  {
914
  "epoch": 9.75,
915
  "learning_rate": 1.0752688172043011e-06,
916
+ "loss": 0.0312,
917
  "step": 1380
918
  },
919
  {
920
  "epoch": 9.82,
921
  "learning_rate": 7.168458781362007e-07,
922
+ "loss": 0.0169,
923
  "step": 1390
924
  },
925
  {
926
  "epoch": 9.89,
927
  "learning_rate": 3.5842293906810036e-07,
928
+ "loss": 0.0221,
929
  "step": 1400
930
  },
931
  {
932
  "epoch": 9.96,
933
  "learning_rate": 0.0,
934
+ "loss": 0.0259,
935
  "step": 1410
936
  },
937
  {
938
  "epoch": 9.96,
939
+ "eval_accuracy": 1.0,
940
+ "eval_loss": 0.0003799856931436807,
941
+ "eval_runtime": 9.7493,
942
+ "eval_samples_per_second": 90.366,
943
+ "eval_steps_per_second": 11.385,
944
  "step": 1410
945
  },
946
  {
947
  "epoch": 9.96,
948
  "step": 1410,
949
  "total_flos": 1.3986242481389875e+19,
950
+ "train_loss": 0.0649210772510116,
951
+ "train_runtime": 2044.7769,
952
+ "train_samples_per_second": 38.748,
953
+ "train_steps_per_second": 0.69
954
  }
955
  ],
956
  "max_steps": 1410,