mansee commited on
Commit
9efd897
1 Parent(s): e7db2c6

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +9 -9
  2. eval_results.json +5 -5
  3. train_results.json +4 -4
  4. trainer_state.json +323 -323
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.7774538386783285,
4
- "eval_loss": 0.39165472984313965,
5
- "eval_runtime": 52.9368,
6
- "eval_samples_per_second": 136.068,
7
- "eval_steps_per_second": 4.269,
8
  "total_flos": 4.829589697691566e+18,
9
- "train_loss": 0.5911478077470078,
10
- "train_runtime": 3253.5777,
11
- "train_samples_per_second": 59.774,
12
- "train_steps_per_second": 0.467
13
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9372483687352492,
4
+ "eval_loss": 0.17838290333747864,
5
+ "eval_runtime": 51.4343,
6
+ "eval_samples_per_second": 140.043,
7
+ "eval_steps_per_second": 4.394,
8
  "total_flos": 4.829589697691566e+18,
9
+ "train_loss": 0.4650797137551314,
10
+ "train_runtime": 3147.7418,
11
+ "train_samples_per_second": 61.783,
12
+ "train_steps_per_second": 0.482
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.7774538386783285,
4
- "eval_loss": 0.39165472984313965,
5
- "eval_runtime": 52.9368,
6
- "eval_samples_per_second": 136.068,
7
- "eval_steps_per_second": 4.269
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9372483687352492,
4
+ "eval_loss": 0.17838290333747864,
5
+ "eval_runtime": 51.4343,
6
+ "eval_samples_per_second": 140.043,
7
+ "eval_steps_per_second": 4.394
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 4.829589697691566e+18,
4
- "train_loss": 0.5911478077470078,
5
- "train_runtime": 3253.5777,
6
- "train_samples_per_second": 59.774,
7
- "train_steps_per_second": 0.467
8
  }
 
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 4.829589697691566e+18,
4
+ "train_loss": 0.4650797137551314,
5
+ "train_runtime": 3147.7418,
6
+ "train_samples_per_second": 61.783,
7
+ "train_steps_per_second": 0.482
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.7774538386783285,
3
  "best_model_checkpoint": "swin-tiny-patch4-window7-224-img_orientation/checkpoint-1518",
4
  "epoch": 2.9970384995064165,
5
  "eval_steps": 500,
@@ -11,1856 +11,1856 @@
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 1.6447368421052632e-06,
14
- "loss": 1.4583,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.02,
19
  "learning_rate": 3.2894736842105265e-06,
20
- "loss": 1.4742,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.03,
25
  "learning_rate": 4.9342105263157895e-06,
26
- "loss": 1.4362,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 6.578947368421053e-06,
32
- "loss": 1.458,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.05,
37
  "learning_rate": 8.223684210526317e-06,
38
- "loss": 1.3734,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.06,
43
  "learning_rate": 9.868421052631579e-06,
44
- "loss": 1.3655,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.07,
49
  "learning_rate": 1.1513157894736843e-05,
50
- "loss": 1.3442,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.08,
55
  "learning_rate": 1.3157894736842106e-05,
56
- "loss": 1.3333,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.09,
61
  "learning_rate": 1.4802631578947368e-05,
62
- "loss": 1.2669,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.1,
67
  "learning_rate": 1.6447368421052635e-05,
68
- "loss": 1.2279,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.11,
73
  "learning_rate": 1.8092105263157896e-05,
74
- "loss": 1.176,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.12,
79
  "learning_rate": 1.9736842105263158e-05,
80
- "loss": 1.1198,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.13,
85
  "learning_rate": 2.1381578947368423e-05,
86
- "loss": 1.0456,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.14,
91
  "learning_rate": 2.3026315789473685e-05,
92
- "loss": 0.9941,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 2.4671052631578947e-05,
98
- "loss": 0.991,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.16,
103
  "learning_rate": 2.6315789473684212e-05,
104
- "loss": 0.9243,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.17,
109
  "learning_rate": 2.7960526315789477e-05,
110
- "loss": 0.9057,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.18,
115
  "learning_rate": 2.9605263157894735e-05,
116
- "loss": 0.8941,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.19,
121
  "learning_rate": 3.125e-05,
122
- "loss": 0.8725,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.2,
127
  "learning_rate": 3.289473684210527e-05,
128
- "loss": 0.8545,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.21,
133
  "learning_rate": 3.4539473684210524e-05,
134
- "loss": 0.8274,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.22,
139
  "learning_rate": 3.618421052631579e-05,
140
- "loss": 0.818,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.23,
145
  "learning_rate": 3.7828947368421054e-05,
146
- "loss": 0.8329,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.24,
151
  "learning_rate": 3.9473684210526316e-05,
152
- "loss": 0.8005,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.25,
157
  "learning_rate": 4.111842105263158e-05,
158
- "loss": 0.7636,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.26,
163
  "learning_rate": 4.2763157894736847e-05,
164
- "loss": 0.8036,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.27,
169
  "learning_rate": 4.440789473684211e-05,
170
- "loss": 0.7663,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.28,
175
  "learning_rate": 4.605263157894737e-05,
176
- "loss": 0.7535,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.29,
181
  "learning_rate": 4.769736842105263e-05,
182
- "loss": 0.7418,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.3,
187
  "learning_rate": 4.9342105263157894e-05,
188
- "loss": 0.7471,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.31,
193
  "learning_rate": 4.9890190336749635e-05,
194
- "loss": 0.7603,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.32,
199
  "learning_rate": 4.970717423133236e-05,
200
- "loss": 0.7101,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.33,
205
  "learning_rate": 4.952415812591508e-05,
206
- "loss": 0.6832,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.34,
211
  "learning_rate": 4.934114202049781e-05,
212
- "loss": 0.6959,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.35,
217
  "learning_rate": 4.9158125915080526e-05,
218
- "loss": 0.7345,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.36,
223
  "learning_rate": 4.897510980966325e-05,
224
- "loss": 0.7131,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.37,
229
  "learning_rate": 4.879209370424598e-05,
230
- "loss": 0.7713,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.38,
235
  "learning_rate": 4.86090775988287e-05,
236
- "loss": 0.6945,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.38,
241
  "learning_rate": 4.8426061493411424e-05,
242
- "loss": 0.6644,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.39,
247
  "learning_rate": 4.824304538799414e-05,
248
- "loss": 0.679,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.4,
253
  "learning_rate": 4.806002928257687e-05,
254
- "loss": 0.6657,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.41,
259
  "learning_rate": 4.787701317715959e-05,
260
- "loss": 0.685,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.42,
265
  "learning_rate": 4.7693997071742315e-05,
266
- "loss": 0.6931,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.43,
271
  "learning_rate": 4.751098096632504e-05,
272
- "loss": 0.6803,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.44,
277
  "learning_rate": 4.732796486090777e-05,
278
- "loss": 0.6415,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.45,
283
  "learning_rate": 4.7144948755490486e-05,
284
- "loss": 0.6753,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.46,
289
  "learning_rate": 4.696193265007321e-05,
290
- "loss": 0.6676,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.47,
295
  "learning_rate": 4.677891654465593e-05,
296
- "loss": 0.6445,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.48,
301
  "learning_rate": 4.659590043923866e-05,
302
- "loss": 0.7077,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.49,
307
  "learning_rate": 4.641288433382138e-05,
308
- "loss": 0.6417,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.5,
313
  "learning_rate": 4.6229868228404096e-05,
314
- "loss": 0.6383,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.51,
319
  "learning_rate": 4.604685212298683e-05,
320
- "loss": 0.6662,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.52,
325
  "learning_rate": 4.586383601756955e-05,
326
- "loss": 0.6741,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.53,
331
  "learning_rate": 4.5680819912152275e-05,
332
- "loss": 0.6429,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.54,
337
  "learning_rate": 4.5497803806734994e-05,
338
- "loss": 0.6361,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.55,
343
  "learning_rate": 4.531478770131772e-05,
344
- "loss": 0.6221,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.56,
349
  "learning_rate": 4.513177159590044e-05,
350
- "loss": 0.64,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.57,
355
  "learning_rate": 4.4948755490483165e-05,
356
- "loss": 0.6432,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.58,
361
  "learning_rate": 4.4765739385065885e-05,
362
- "loss": 0.6621,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.59,
367
  "learning_rate": 4.458272327964861e-05,
368
- "loss": 0.5987,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.6,
373
  "learning_rate": 4.439970717423134e-05,
374
- "loss": 0.6368,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.61,
379
  "learning_rate": 4.4216691068814056e-05,
380
- "loss": 0.6285,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.62,
385
  "learning_rate": 4.403367496339678e-05,
386
- "loss": 0.645,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.63,
391
  "learning_rate": 4.38506588579795e-05,
392
- "loss": 0.6416,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.64,
397
  "learning_rate": 4.366764275256223e-05,
398
- "loss": 0.6513,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.65,
403
  "learning_rate": 4.348462664714495e-05,
404
- "loss": 0.6353,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.66,
409
  "learning_rate": 4.330161054172767e-05,
410
- "loss": 0.6323,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.67,
415
  "learning_rate": 4.31185944363104e-05,
416
- "loss": 0.6454,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.68,
421
  "learning_rate": 4.2935578330893125e-05,
422
- "loss": 0.6282,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.69,
427
  "learning_rate": 4.2752562225475845e-05,
428
- "loss": 0.6029,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.7,
433
  "learning_rate": 4.256954612005857e-05,
434
- "loss": 0.6069,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.71,
439
  "learning_rate": 4.238653001464129e-05,
440
- "loss": 0.5707,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.72,
445
  "learning_rate": 4.2203513909224016e-05,
446
- "loss": 0.6161,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.73,
451
  "learning_rate": 4.2020497803806735e-05,
452
- "loss": 0.6118,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.74,
457
  "learning_rate": 4.1837481698389455e-05,
458
- "loss": 0.613,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.75,
463
  "learning_rate": 4.165446559297219e-05,
464
- "loss": 0.6044,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.76,
469
  "learning_rate": 4.147144948755491e-05,
470
- "loss": 0.5668,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.77,
475
  "learning_rate": 4.128843338213763e-05,
476
- "loss": 0.6372,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.78,
481
  "learning_rate": 4.110541727672035e-05,
482
- "loss": 0.5783,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.79,
487
  "learning_rate": 4.092240117130308e-05,
488
- "loss": 0.6007,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.8,
493
  "learning_rate": 4.07393850658858e-05,
494
- "loss": 0.5667,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.81,
499
  "learning_rate": 4.0556368960468524e-05,
500
- "loss": 0.5819,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.82,
505
  "learning_rate": 4.037335285505124e-05,
506
- "loss": 0.5884,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.83,
511
  "learning_rate": 4.019033674963397e-05,
512
- "loss": 0.6358,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.84,
517
  "learning_rate": 4.0007320644216695e-05,
518
- "loss": 0.6365,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.85,
523
  "learning_rate": 3.9824304538799415e-05,
524
- "loss": 0.569,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.86,
529
  "learning_rate": 3.964128843338214e-05,
530
- "loss": 0.6281,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.87,
535
  "learning_rate": 3.945827232796486e-05,
536
- "loss": 0.5812,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.88,
541
  "learning_rate": 3.9275256222547586e-05,
542
- "loss": 0.604,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.89,
547
  "learning_rate": 3.9092240117130305e-05,
548
- "loss": 0.5833,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.9,
553
  "learning_rate": 3.890922401171303e-05,
554
- "loss": 0.5247,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.91,
559
  "learning_rate": 3.872620790629576e-05,
560
- "loss": 0.5513,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.92,
565
  "learning_rate": 3.8543191800878484e-05,
566
- "loss": 0.5885,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.93,
571
  "learning_rate": 3.83601756954612e-05,
572
- "loss": 0.5995,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.94,
577
  "learning_rate": 3.817715959004393e-05,
578
- "loss": 0.591,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.95,
583
  "learning_rate": 3.799414348462665e-05,
584
- "loss": 0.6269,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.96,
589
  "learning_rate": 3.7811127379209375e-05,
590
- "loss": 0.5496,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.97,
595
  "learning_rate": 3.7628111273792094e-05,
596
- "loss": 0.6007,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.98,
601
  "learning_rate": 3.744509516837481e-05,
602
- "loss": 0.5894,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.99,
607
  "learning_rate": 3.7262079062957546e-05,
608
- "loss": 0.6417,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 1.0,
613
  "learning_rate": 3.7079062957540265e-05,
614
- "loss": 0.5654,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 1.0,
619
- "eval_accuracy": 0.7356656948493683,
620
- "eval_loss": 0.46419134736061096,
621
- "eval_runtime": 54.4631,
622
- "eval_samples_per_second": 132.255,
623
- "eval_steps_per_second": 4.15,
624
  "step": 506
625
  },
626
  {
627
  "epoch": 1.01,
628
  "learning_rate": 3.689604685212299e-05,
629
- "loss": 0.5932,
630
  "step": 510
631
  },
632
  {
633
  "epoch": 1.02,
634
  "learning_rate": 3.671303074670571e-05,
635
- "loss": 0.5732,
636
  "step": 515
637
  },
638
  {
639
  "epoch": 1.03,
640
  "learning_rate": 3.653001464128844e-05,
641
- "loss": 0.5225,
642
  "step": 520
643
  },
644
  {
645
  "epoch": 1.04,
646
  "learning_rate": 3.6346998535871156e-05,
647
- "loss": 0.57,
648
  "step": 525
649
  },
650
  {
651
  "epoch": 1.05,
652
  "learning_rate": 3.616398243045388e-05,
653
- "loss": 0.5341,
654
  "step": 530
655
  },
656
  {
657
  "epoch": 1.06,
658
  "learning_rate": 3.59809663250366e-05,
659
- "loss": 0.5654,
660
  "step": 535
661
  },
662
  {
663
  "epoch": 1.07,
664
  "learning_rate": 3.579795021961933e-05,
665
- "loss": 0.5749,
666
  "step": 540
667
  },
668
  {
669
  "epoch": 1.08,
670
  "learning_rate": 3.5614934114202054e-05,
671
- "loss": 0.5546,
672
  "step": 545
673
  },
674
  {
675
  "epoch": 1.09,
676
  "learning_rate": 3.543191800878477e-05,
677
- "loss": 0.5608,
678
  "step": 550
679
  },
680
  {
681
  "epoch": 1.1,
682
  "learning_rate": 3.52489019033675e-05,
683
- "loss": 0.5155,
684
  "step": 555
685
  },
686
  {
687
  "epoch": 1.11,
688
  "learning_rate": 3.506588579795022e-05,
689
- "loss": 0.541,
690
  "step": 560
691
  },
692
  {
693
  "epoch": 1.12,
694
  "learning_rate": 3.4882869692532945e-05,
695
- "loss": 0.5515,
696
  "step": 565
697
  },
698
  {
699
  "epoch": 1.13,
700
  "learning_rate": 3.4699853587115664e-05,
701
- "loss": 0.5686,
702
  "step": 570
703
  },
704
  {
705
  "epoch": 1.14,
706
  "learning_rate": 3.451683748169839e-05,
707
- "loss": 0.566,
708
  "step": 575
709
  },
710
  {
711
  "epoch": 1.15,
712
  "learning_rate": 3.4333821376281116e-05,
713
- "loss": 0.5548,
714
  "step": 580
715
  },
716
  {
717
  "epoch": 1.15,
718
  "learning_rate": 3.415080527086384e-05,
719
- "loss": 0.5808,
720
  "step": 585
721
  },
722
  {
723
  "epoch": 1.16,
724
  "learning_rate": 3.396778916544656e-05,
725
- "loss": 0.5683,
726
  "step": 590
727
  },
728
  {
729
  "epoch": 1.17,
730
  "learning_rate": 3.378477306002929e-05,
731
- "loss": 0.5428,
732
  "step": 595
733
  },
734
  {
735
  "epoch": 1.18,
736
  "learning_rate": 3.360175695461201e-05,
737
- "loss": 0.541,
738
  "step": 600
739
  },
740
  {
741
  "epoch": 1.19,
742
  "learning_rate": 3.3418740849194726e-05,
743
- "loss": 0.5082,
744
  "step": 605
745
  },
746
  {
747
  "epoch": 1.2,
748
  "learning_rate": 3.323572474377745e-05,
749
- "loss": 0.5672,
750
  "step": 610
751
  },
752
  {
753
  "epoch": 1.21,
754
  "learning_rate": 3.305270863836017e-05,
755
- "loss": 0.5593,
756
  "step": 615
757
  },
758
  {
759
  "epoch": 1.22,
760
  "learning_rate": 3.2869692532942905e-05,
761
- "loss": 0.5492,
762
  "step": 620
763
  },
764
  {
765
  "epoch": 1.23,
766
  "learning_rate": 3.2686676427525624e-05,
767
- "loss": 0.553,
768
  "step": 625
769
  },
770
  {
771
  "epoch": 1.24,
772
  "learning_rate": 3.250366032210835e-05,
773
- "loss": 0.5812,
774
  "step": 630
775
  },
776
  {
777
  "epoch": 1.25,
778
  "learning_rate": 3.232064421669107e-05,
779
- "loss": 0.5467,
780
  "step": 635
781
  },
782
  {
783
  "epoch": 1.26,
784
  "learning_rate": 3.2137628111273795e-05,
785
- "loss": 0.5604,
786
  "step": 640
787
  },
788
  {
789
  "epoch": 1.27,
790
  "learning_rate": 3.1954612005856515e-05,
791
- "loss": 0.5613,
792
  "step": 645
793
  },
794
  {
795
  "epoch": 1.28,
796
  "learning_rate": 3.177159590043924e-05,
797
- "loss": 0.5247,
798
  "step": 650
799
  },
800
  {
801
  "epoch": 1.29,
802
  "learning_rate": 3.158857979502196e-05,
803
- "loss": 0.5596,
804
  "step": 655
805
  },
806
  {
807
  "epoch": 1.3,
808
  "learning_rate": 3.1405563689604686e-05,
809
- "loss": 0.5211,
810
  "step": 660
811
  },
812
  {
813
  "epoch": 1.31,
814
  "learning_rate": 3.122254758418741e-05,
815
- "loss": 0.5338,
816
  "step": 665
817
  },
818
  {
819
  "epoch": 1.32,
820
  "learning_rate": 3.103953147877013e-05,
821
- "loss": 0.5422,
822
  "step": 670
823
  },
824
  {
825
  "epoch": 1.33,
826
  "learning_rate": 3.085651537335286e-05,
827
- "loss": 0.5502,
828
  "step": 675
829
  },
830
  {
831
  "epoch": 1.34,
832
  "learning_rate": 3.067349926793558e-05,
833
- "loss": 0.5305,
834
  "step": 680
835
  },
836
  {
837
  "epoch": 1.35,
838
  "learning_rate": 3.0490483162518303e-05,
839
- "loss": 0.5415,
840
  "step": 685
841
  },
842
  {
843
  "epoch": 1.36,
844
  "learning_rate": 3.0307467057101026e-05,
845
- "loss": 0.5065,
846
  "step": 690
847
  },
848
  {
849
  "epoch": 1.37,
850
  "learning_rate": 3.012445095168375e-05,
851
- "loss": 0.551,
852
  "step": 695
853
  },
854
  {
855
  "epoch": 1.38,
856
  "learning_rate": 2.9941434846266475e-05,
857
- "loss": 0.5345,
858
  "step": 700
859
  },
860
  {
861
  "epoch": 1.39,
862
  "learning_rate": 2.9758418740849197e-05,
863
- "loss": 0.514,
864
  "step": 705
865
  },
866
  {
867
  "epoch": 1.4,
868
  "learning_rate": 2.957540263543192e-05,
869
- "loss": 0.5535,
870
  "step": 710
871
  },
872
  {
873
  "epoch": 1.41,
874
  "learning_rate": 2.9392386530014643e-05,
875
- "loss": 0.5537,
876
  "step": 715
877
  },
878
  {
879
  "epoch": 1.42,
880
  "learning_rate": 2.9209370424597365e-05,
881
- "loss": 0.5305,
882
  "step": 720
883
  },
884
  {
885
  "epoch": 1.43,
886
  "learning_rate": 2.9026354319180088e-05,
887
- "loss": 0.5551,
888
  "step": 725
889
  },
890
  {
891
  "epoch": 1.44,
892
  "learning_rate": 2.884333821376281e-05,
893
- "loss": 0.5723,
894
  "step": 730
895
  },
896
  {
897
  "epoch": 1.45,
898
  "learning_rate": 2.8660322108345534e-05,
899
- "loss": 0.5722,
900
  "step": 735
901
  },
902
  {
903
  "epoch": 1.46,
904
  "learning_rate": 2.847730600292826e-05,
905
- "loss": 0.5376,
906
  "step": 740
907
  },
908
  {
909
  "epoch": 1.47,
910
  "learning_rate": 2.8294289897510982e-05,
911
- "loss": 0.577,
912
  "step": 745
913
  },
914
  {
915
  "epoch": 1.48,
916
  "learning_rate": 2.8111273792093705e-05,
917
- "loss": 0.5322,
918
  "step": 750
919
  },
920
  {
921
  "epoch": 1.49,
922
  "learning_rate": 2.7928257686676428e-05,
923
- "loss": 0.5505,
924
  "step": 755
925
  },
926
  {
927
  "epoch": 1.5,
928
  "learning_rate": 2.774524158125915e-05,
929
- "loss": 0.4995,
930
  "step": 760
931
  },
932
  {
933
  "epoch": 1.51,
934
  "learning_rate": 2.7562225475841873e-05,
935
- "loss": 0.5371,
936
  "step": 765
937
  },
938
  {
939
  "epoch": 1.52,
940
  "learning_rate": 2.7379209370424596e-05,
941
- "loss": 0.5526,
942
  "step": 770
943
  },
944
  {
945
  "epoch": 1.53,
946
  "learning_rate": 2.719619326500732e-05,
947
- "loss": 0.5275,
948
  "step": 775
949
  },
950
  {
951
  "epoch": 1.54,
952
  "learning_rate": 2.7013177159590048e-05,
953
- "loss": 0.5073,
954
  "step": 780
955
  },
956
  {
957
  "epoch": 1.55,
958
  "learning_rate": 2.683016105417277e-05,
959
- "loss": 0.5645,
960
  "step": 785
961
  },
962
  {
963
  "epoch": 1.56,
964
  "learning_rate": 2.6647144948755493e-05,
965
- "loss": 0.5253,
966
  "step": 790
967
  },
968
  {
969
  "epoch": 1.57,
970
  "learning_rate": 2.6464128843338216e-05,
971
- "loss": 0.5327,
972
  "step": 795
973
  },
974
  {
975
  "epoch": 1.58,
976
  "learning_rate": 2.628111273792094e-05,
977
- "loss": 0.5219,
978
  "step": 800
979
  },
980
  {
981
  "epoch": 1.59,
982
  "learning_rate": 2.609809663250366e-05,
983
- "loss": 0.5068,
984
  "step": 805
985
  },
986
  {
987
  "epoch": 1.6,
988
  "learning_rate": 2.5915080527086384e-05,
989
- "loss": 0.5378,
990
  "step": 810
991
  },
992
  {
993
  "epoch": 1.61,
994
  "learning_rate": 2.5732064421669107e-05,
995
- "loss": 0.5464,
996
  "step": 815
997
  },
998
  {
999
  "epoch": 1.62,
1000
  "learning_rate": 2.5549048316251833e-05,
1001
- "loss": 0.5195,
1002
  "step": 820
1003
  },
1004
  {
1005
  "epoch": 1.63,
1006
  "learning_rate": 2.5366032210834556e-05,
1007
- "loss": 0.5481,
1008
  "step": 825
1009
  },
1010
  {
1011
  "epoch": 1.64,
1012
  "learning_rate": 2.518301610541728e-05,
1013
- "loss": 0.512,
1014
  "step": 830
1015
  },
1016
  {
1017
  "epoch": 1.65,
1018
  "learning_rate": 2.5e-05,
1019
- "loss": 0.5342,
1020
  "step": 835
1021
  },
1022
  {
1023
  "epoch": 1.66,
1024
  "learning_rate": 2.4816983894582724e-05,
1025
- "loss": 0.5444,
1026
  "step": 840
1027
  },
1028
  {
1029
  "epoch": 1.67,
1030
  "learning_rate": 2.463396778916545e-05,
1031
- "loss": 0.533,
1032
  "step": 845
1033
  },
1034
  {
1035
  "epoch": 1.68,
1036
  "learning_rate": 2.4450951683748173e-05,
1037
- "loss": 0.5249,
1038
  "step": 850
1039
  },
1040
  {
1041
  "epoch": 1.69,
1042
  "learning_rate": 2.4267935578330895e-05,
1043
- "loss": 0.5867,
1044
  "step": 855
1045
  },
1046
  {
1047
  "epoch": 1.7,
1048
  "learning_rate": 2.4084919472913618e-05,
1049
- "loss": 0.4845,
1050
  "step": 860
1051
  },
1052
  {
1053
  "epoch": 1.71,
1054
  "learning_rate": 2.390190336749634e-05,
1055
- "loss": 0.5419,
1056
  "step": 865
1057
  },
1058
  {
1059
  "epoch": 1.72,
1060
  "learning_rate": 2.3718887262079064e-05,
1061
- "loss": 0.5173,
1062
  "step": 870
1063
  },
1064
  {
1065
  "epoch": 1.73,
1066
  "learning_rate": 2.3535871156661786e-05,
1067
- "loss": 0.5138,
1068
  "step": 875
1069
  },
1070
  {
1071
  "epoch": 1.74,
1072
  "learning_rate": 2.335285505124451e-05,
1073
- "loss": 0.5291,
1074
  "step": 880
1075
  },
1076
  {
1077
  "epoch": 1.75,
1078
  "learning_rate": 2.3169838945827235e-05,
1079
- "loss": 0.5299,
1080
  "step": 885
1081
  },
1082
  {
1083
  "epoch": 1.76,
1084
  "learning_rate": 2.2986822840409958e-05,
1085
- "loss": 0.5249,
1086
  "step": 890
1087
  },
1088
  {
1089
  "epoch": 1.77,
1090
  "learning_rate": 2.280380673499268e-05,
1091
- "loss": 0.4676,
1092
  "step": 895
1093
  },
1094
  {
1095
  "epoch": 1.78,
1096
  "learning_rate": 2.2620790629575403e-05,
1097
- "loss": 0.5323,
1098
  "step": 900
1099
  },
1100
  {
1101
  "epoch": 1.79,
1102
  "learning_rate": 2.243777452415813e-05,
1103
- "loss": 0.4845,
1104
  "step": 905
1105
  },
1106
  {
1107
  "epoch": 1.8,
1108
  "learning_rate": 2.2254758418740852e-05,
1109
- "loss": 0.5559,
1110
  "step": 910
1111
  },
1112
  {
1113
  "epoch": 1.81,
1114
  "learning_rate": 2.2071742313323575e-05,
1115
- "loss": 0.5064,
1116
  "step": 915
1117
  },
1118
  {
1119
  "epoch": 1.82,
1120
  "learning_rate": 2.1888726207906297e-05,
1121
- "loss": 0.5281,
1122
  "step": 920
1123
  },
1124
  {
1125
  "epoch": 1.83,
1126
  "learning_rate": 2.170571010248902e-05,
1127
- "loss": 0.5471,
1128
  "step": 925
1129
  },
1130
  {
1131
  "epoch": 1.84,
1132
  "learning_rate": 2.1522693997071743e-05,
1133
- "loss": 0.4968,
1134
  "step": 930
1135
  },
1136
  {
1137
  "epoch": 1.85,
1138
  "learning_rate": 2.1339677891654465e-05,
1139
- "loss": 0.481,
1140
  "step": 935
1141
  },
1142
  {
1143
  "epoch": 1.86,
1144
  "learning_rate": 2.1156661786237188e-05,
1145
- "loss": 0.5199,
1146
  "step": 940
1147
  },
1148
  {
1149
  "epoch": 1.87,
1150
  "learning_rate": 2.0973645680819914e-05,
1151
- "loss": 0.4974,
1152
  "step": 945
1153
  },
1154
  {
1155
  "epoch": 1.88,
1156
  "learning_rate": 2.0790629575402637e-05,
1157
- "loss": 0.5176,
1158
  "step": 950
1159
  },
1160
  {
1161
  "epoch": 1.89,
1162
  "learning_rate": 2.060761346998536e-05,
1163
- "loss": 0.5054,
1164
  "step": 955
1165
  },
1166
  {
1167
  "epoch": 1.9,
1168
  "learning_rate": 2.0424597364568082e-05,
1169
- "loss": 0.5211,
1170
  "step": 960
1171
  },
1172
  {
1173
  "epoch": 1.91,
1174
  "learning_rate": 2.024158125915081e-05,
1175
- "loss": 0.4668,
1176
  "step": 965
1177
  },
1178
  {
1179
  "epoch": 1.92,
1180
  "learning_rate": 2.005856515373353e-05,
1181
- "loss": 0.4834,
1182
  "step": 970
1183
  },
1184
  {
1185
  "epoch": 1.92,
1186
  "learning_rate": 1.9875549048316254e-05,
1187
- "loss": 0.5393,
1188
  "step": 975
1189
  },
1190
  {
1191
  "epoch": 1.93,
1192
  "learning_rate": 1.9692532942898977e-05,
1193
- "loss": 0.5269,
1194
  "step": 980
1195
  },
1196
  {
1197
  "epoch": 1.94,
1198
  "learning_rate": 1.95095168374817e-05,
1199
- "loss": 0.5445,
1200
  "step": 985
1201
  },
1202
  {
1203
  "epoch": 1.95,
1204
  "learning_rate": 1.9326500732064422e-05,
1205
- "loss": 0.5473,
1206
  "step": 990
1207
  },
1208
  {
1209
  "epoch": 1.96,
1210
  "learning_rate": 1.9143484626647145e-05,
1211
- "loss": 0.5002,
1212
  "step": 995
1213
  },
1214
  {
1215
  "epoch": 1.97,
1216
  "learning_rate": 1.8960468521229867e-05,
1217
- "loss": 0.4839,
1218
  "step": 1000
1219
  },
1220
  {
1221
  "epoch": 1.98,
1222
  "learning_rate": 1.8777452415812594e-05,
1223
- "loss": 0.5268,
1224
  "step": 1005
1225
  },
1226
  {
1227
  "epoch": 1.99,
1228
  "learning_rate": 1.8594436310395316e-05,
1229
- "loss": 0.5194,
1230
  "step": 1010
1231
  },
1232
  {
1233
  "epoch": 2.0,
1234
- "eval_accuracy": 0.7675968346522283,
1235
- "eval_loss": 0.4097585380077362,
1236
- "eval_runtime": 51.2327,
1237
- "eval_samples_per_second": 140.594,
1238
- "eval_steps_per_second": 4.411,
1239
  "step": 1013
1240
  },
1241
  {
1242
  "epoch": 2.0,
1243
  "learning_rate": 1.841142020497804e-05,
1244
- "loss": 0.4812,
1245
  "step": 1015
1246
  },
1247
  {
1248
  "epoch": 2.01,
1249
  "learning_rate": 1.822840409956076e-05,
1250
- "loss": 0.5085,
1251
  "step": 1020
1252
  },
1253
  {
1254
  "epoch": 2.02,
1255
  "learning_rate": 1.8045387994143488e-05,
1256
- "loss": 0.461,
1257
  "step": 1025
1258
  },
1259
  {
1260
  "epoch": 2.03,
1261
  "learning_rate": 1.786237188872621e-05,
1262
- "loss": 0.5006,
1263
  "step": 1030
1264
  },
1265
  {
1266
  "epoch": 2.04,
1267
  "learning_rate": 1.7679355783308933e-05,
1268
- "loss": 0.4821,
1269
  "step": 1035
1270
  },
1271
  {
1272
  "epoch": 2.05,
1273
  "learning_rate": 1.7496339677891656e-05,
1274
- "loss": 0.4414,
1275
  "step": 1040
1276
  },
1277
  {
1278
  "epoch": 2.06,
1279
  "learning_rate": 1.731332357247438e-05,
1280
- "loss": 0.4986,
1281
  "step": 1045
1282
  },
1283
  {
1284
  "epoch": 2.07,
1285
  "learning_rate": 1.71303074670571e-05,
1286
- "loss": 0.4747,
1287
  "step": 1050
1288
  },
1289
  {
1290
  "epoch": 2.08,
1291
  "learning_rate": 1.6947291361639824e-05,
1292
- "loss": 0.5198,
1293
  "step": 1055
1294
  },
1295
  {
1296
  "epoch": 2.09,
1297
  "learning_rate": 1.6764275256222547e-05,
1298
- "loss": 0.4425,
1299
  "step": 1060
1300
  },
1301
  {
1302
  "epoch": 2.1,
1303
  "learning_rate": 1.6581259150805273e-05,
1304
- "loss": 0.4593,
1305
  "step": 1065
1306
  },
1307
  {
1308
  "epoch": 2.11,
1309
  "learning_rate": 1.6398243045387995e-05,
1310
- "loss": 0.5233,
1311
  "step": 1070
1312
  },
1313
  {
1314
  "epoch": 2.12,
1315
  "learning_rate": 1.6215226939970718e-05,
1316
- "loss": 0.4698,
1317
  "step": 1075
1318
  },
1319
  {
1320
  "epoch": 2.13,
1321
  "learning_rate": 1.603221083455344e-05,
1322
- "loss": 0.5108,
1323
  "step": 1080
1324
  },
1325
  {
1326
  "epoch": 2.14,
1327
  "learning_rate": 1.5849194729136167e-05,
1328
- "loss": 0.5274,
1329
  "step": 1085
1330
  },
1331
  {
1332
  "epoch": 2.15,
1333
  "learning_rate": 1.566617862371889e-05,
1334
- "loss": 0.4443,
1335
  "step": 1090
1336
  },
1337
  {
1338
  "epoch": 2.16,
1339
  "learning_rate": 1.5483162518301612e-05,
1340
- "loss": 0.4591,
1341
  "step": 1095
1342
  },
1343
  {
1344
  "epoch": 2.17,
1345
  "learning_rate": 1.5300146412884335e-05,
1346
- "loss": 0.5101,
1347
  "step": 1100
1348
  },
1349
  {
1350
  "epoch": 2.18,
1351
  "learning_rate": 1.511713030746706e-05,
1352
- "loss": 0.4938,
1353
  "step": 1105
1354
  },
1355
  {
1356
  "epoch": 2.19,
1357
  "learning_rate": 1.4934114202049782e-05,
1358
- "loss": 0.5465,
1359
  "step": 1110
1360
  },
1361
  {
1362
  "epoch": 2.2,
1363
  "learning_rate": 1.4751098096632505e-05,
1364
- "loss": 0.4086,
1365
  "step": 1115
1366
  },
1367
  {
1368
  "epoch": 2.21,
1369
  "learning_rate": 1.4568081991215226e-05,
1370
- "loss": 0.4771,
1371
  "step": 1120
1372
  },
1373
  {
1374
  "epoch": 2.22,
1375
  "learning_rate": 1.4385065885797952e-05,
1376
- "loss": 0.5017,
1377
  "step": 1125
1378
  },
1379
  {
1380
  "epoch": 2.23,
1381
  "learning_rate": 1.4202049780380675e-05,
1382
- "loss": 0.4975,
1383
  "step": 1130
1384
  },
1385
  {
1386
  "epoch": 2.24,
1387
  "learning_rate": 1.4019033674963397e-05,
1388
- "loss": 0.573,
1389
  "step": 1135
1390
  },
1391
  {
1392
  "epoch": 2.25,
1393
  "learning_rate": 1.383601756954612e-05,
1394
- "loss": 0.4946,
1395
  "step": 1140
1396
  },
1397
  {
1398
  "epoch": 2.26,
1399
  "learning_rate": 1.3653001464128845e-05,
1400
- "loss": 0.4861,
1401
  "step": 1145
1402
  },
1403
  {
1404
  "epoch": 2.27,
1405
  "learning_rate": 1.3469985358711567e-05,
1406
- "loss": 0.4579,
1407
  "step": 1150
1408
  },
1409
  {
1410
  "epoch": 2.28,
1411
  "learning_rate": 1.328696925329429e-05,
1412
- "loss": 0.4574,
1413
  "step": 1155
1414
  },
1415
  {
1416
  "epoch": 2.29,
1417
  "learning_rate": 1.3103953147877013e-05,
1418
- "loss": 0.5262,
1419
  "step": 1160
1420
  },
1421
  {
1422
  "epoch": 2.3,
1423
  "learning_rate": 1.2920937042459739e-05,
1424
- "loss": 0.4991,
1425
  "step": 1165
1426
  },
1427
  {
1428
  "epoch": 2.31,
1429
  "learning_rate": 1.2737920937042461e-05,
1430
- "loss": 0.5082,
1431
  "step": 1170
1432
  },
1433
  {
1434
  "epoch": 2.32,
1435
  "learning_rate": 1.2554904831625182e-05,
1436
- "loss": 0.4557,
1437
  "step": 1175
1438
  },
1439
  {
1440
  "epoch": 2.33,
1441
  "learning_rate": 1.2371888726207907e-05,
1442
- "loss": 0.5222,
1443
  "step": 1180
1444
  },
1445
  {
1446
  "epoch": 2.34,
1447
  "learning_rate": 1.218887262079063e-05,
1448
- "loss": 0.4846,
1449
  "step": 1185
1450
  },
1451
  {
1452
  "epoch": 2.35,
1453
  "learning_rate": 1.2005856515373354e-05,
1454
- "loss": 0.4616,
1455
  "step": 1190
1456
  },
1457
  {
1458
  "epoch": 2.36,
1459
  "learning_rate": 1.1822840409956077e-05,
1460
- "loss": 0.4609,
1461
  "step": 1195
1462
  },
1463
  {
1464
  "epoch": 2.37,
1465
  "learning_rate": 1.1639824304538801e-05,
1466
- "loss": 0.501,
1467
  "step": 1200
1468
  },
1469
  {
1470
  "epoch": 2.38,
1471
  "learning_rate": 1.1456808199121522e-05,
1472
- "loss": 0.4591,
1473
  "step": 1205
1474
  },
1475
  {
1476
  "epoch": 2.39,
1477
  "learning_rate": 1.1273792093704246e-05,
1478
- "loss": 0.4996,
1479
  "step": 1210
1480
  },
1481
  {
1482
  "epoch": 2.4,
1483
  "learning_rate": 1.109077598828697e-05,
1484
- "loss": 0.4629,
1485
  "step": 1215
1486
  },
1487
  {
1488
  "epoch": 2.41,
1489
  "learning_rate": 1.0907759882869694e-05,
1490
- "loss": 0.4559,
1491
  "step": 1220
1492
  },
1493
  {
1494
  "epoch": 2.42,
1495
  "learning_rate": 1.0724743777452416e-05,
1496
- "loss": 0.4881,
1497
  "step": 1225
1498
  },
1499
  {
1500
  "epoch": 2.43,
1501
  "learning_rate": 1.054172767203514e-05,
1502
- "loss": 0.4593,
1503
  "step": 1230
1504
  },
1505
  {
1506
  "epoch": 2.44,
1507
  "learning_rate": 1.0358711566617862e-05,
1508
- "loss": 0.4847,
1509
  "step": 1235
1510
  },
1511
  {
1512
  "epoch": 2.45,
1513
  "learning_rate": 1.0175695461200586e-05,
1514
- "loss": 0.4676,
1515
  "step": 1240
1516
  },
1517
  {
1518
  "epoch": 2.46,
1519
  "learning_rate": 9.992679355783309e-06,
1520
- "loss": 0.4893,
1521
  "step": 1245
1522
  },
1523
  {
1524
  "epoch": 2.47,
1525
  "learning_rate": 9.809663250366033e-06,
1526
- "loss": 0.4845,
1527
  "step": 1250
1528
  },
1529
  {
1530
  "epoch": 2.48,
1531
  "learning_rate": 9.626647144948756e-06,
1532
- "loss": 0.4829,
1533
  "step": 1255
1534
  },
1535
  {
1536
  "epoch": 2.49,
1537
  "learning_rate": 9.44363103953148e-06,
1538
- "loss": 0.5043,
1539
  "step": 1260
1540
  },
1541
  {
1542
  "epoch": 2.5,
1543
  "learning_rate": 9.260614934114201e-06,
1544
- "loss": 0.497,
1545
  "step": 1265
1546
  },
1547
  {
1548
  "epoch": 2.51,
1549
  "learning_rate": 9.077598828696926e-06,
1550
- "loss": 0.5003,
1551
  "step": 1270
1552
  },
1553
  {
1554
  "epoch": 2.52,
1555
  "learning_rate": 8.894582723279648e-06,
1556
- "loss": 0.4462,
1557
  "step": 1275
1558
  },
1559
  {
1560
  "epoch": 2.53,
1561
  "learning_rate": 8.711566617862373e-06,
1562
- "loss": 0.5062,
1563
  "step": 1280
1564
  },
1565
  {
1566
  "epoch": 2.54,
1567
  "learning_rate": 8.528550512445096e-06,
1568
- "loss": 0.4795,
1569
  "step": 1285
1570
  },
1571
  {
1572
  "epoch": 2.55,
1573
  "learning_rate": 8.34553440702782e-06,
1574
- "loss": 0.4773,
1575
  "step": 1290
1576
  },
1577
  {
1578
  "epoch": 2.56,
1579
  "learning_rate": 8.162518301610541e-06,
1580
- "loss": 0.4667,
1581
  "step": 1295
1582
  },
1583
  {
1584
  "epoch": 2.57,
1585
  "learning_rate": 7.979502196193265e-06,
1586
- "loss": 0.5078,
1587
  "step": 1300
1588
  },
1589
  {
1590
  "epoch": 2.58,
1591
  "learning_rate": 7.796486090775988e-06,
1592
- "loss": 0.4586,
1593
  "step": 1305
1594
  },
1595
  {
1596
  "epoch": 2.59,
1597
  "learning_rate": 7.613469985358712e-06,
1598
- "loss": 0.4328,
1599
  "step": 1310
1600
  },
1601
  {
1602
  "epoch": 2.6,
1603
  "learning_rate": 7.430453879941435e-06,
1604
- "loss": 0.443,
1605
  "step": 1315
1606
  },
1607
  {
1608
  "epoch": 2.61,
1609
  "learning_rate": 7.247437774524159e-06,
1610
- "loss": 0.4597,
1611
  "step": 1320
1612
  },
1613
  {
1614
  "epoch": 2.62,
1615
  "learning_rate": 7.064421669106881e-06,
1616
- "loss": 0.5333,
1617
  "step": 1325
1618
  },
1619
  {
1620
  "epoch": 2.63,
1621
  "learning_rate": 6.881405563689605e-06,
1622
- "loss": 0.4654,
1623
  "step": 1330
1624
  },
1625
  {
1626
  "epoch": 2.64,
1627
  "learning_rate": 6.698389458272328e-06,
1628
- "loss": 0.4383,
1629
  "step": 1335
1630
  },
1631
  {
1632
  "epoch": 2.65,
1633
  "learning_rate": 6.515373352855052e-06,
1634
- "loss": 0.4828,
1635
  "step": 1340
1636
  },
1637
  {
1638
  "epoch": 2.66,
1639
  "learning_rate": 6.332357247437774e-06,
1640
- "loss": 0.4849,
1641
  "step": 1345
1642
  },
1643
  {
1644
  "epoch": 2.67,
1645
  "learning_rate": 6.149341142020498e-06,
1646
- "loss": 0.465,
1647
  "step": 1350
1648
  },
1649
  {
1650
  "epoch": 2.68,
1651
  "learning_rate": 5.966325036603222e-06,
1652
- "loss": 0.5227,
1653
  "step": 1355
1654
  },
1655
  {
1656
  "epoch": 2.69,
1657
  "learning_rate": 5.7833089311859446e-06,
1658
- "loss": 0.5229,
1659
  "step": 1360
1660
  },
1661
  {
1662
  "epoch": 2.69,
1663
  "learning_rate": 5.600292825768668e-06,
1664
- "loss": 0.4592,
1665
  "step": 1365
1666
  },
1667
  {
1668
  "epoch": 2.7,
1669
  "learning_rate": 5.417276720351392e-06,
1670
- "loss": 0.4769,
1671
  "step": 1370
1672
  },
1673
  {
1674
  "epoch": 2.71,
1675
  "learning_rate": 5.234260614934114e-06,
1676
- "loss": 0.4959,
1677
  "step": 1375
1678
  },
1679
  {
1680
  "epoch": 2.72,
1681
  "learning_rate": 5.051244509516838e-06,
1682
- "loss": 0.4731,
1683
  "step": 1380
1684
  },
1685
  {
1686
  "epoch": 2.73,
1687
  "learning_rate": 4.8682284040995615e-06,
1688
- "loss": 0.4718,
1689
  "step": 1385
1690
  },
1691
  {
1692
  "epoch": 2.74,
1693
  "learning_rate": 4.685212298682284e-06,
1694
- "loss": 0.5007,
1695
  "step": 1390
1696
  },
1697
  {
1698
  "epoch": 2.75,
1699
  "learning_rate": 4.502196193265008e-06,
1700
- "loss": 0.437,
1701
  "step": 1395
1702
  },
1703
  {
1704
  "epoch": 2.76,
1705
  "learning_rate": 4.319180087847731e-06,
1706
- "loss": 0.4732,
1707
  "step": 1400
1708
  },
1709
  {
1710
  "epoch": 2.77,
1711
  "learning_rate": 4.136163982430454e-06,
1712
- "loss": 0.4582,
1713
  "step": 1405
1714
  },
1715
  {
1716
  "epoch": 2.78,
1717
  "learning_rate": 3.9531478770131775e-06,
1718
- "loss": 0.4903,
1719
  "step": 1410
1720
  },
1721
  {
1722
  "epoch": 2.79,
1723
  "learning_rate": 3.7701317715959007e-06,
1724
- "loss": 0.5049,
1725
  "step": 1415
1726
  },
1727
  {
1728
  "epoch": 2.8,
1729
  "learning_rate": 3.587115666178624e-06,
1730
- "loss": 0.457,
1731
  "step": 1420
1732
  },
1733
  {
1734
  "epoch": 2.81,
1735
  "learning_rate": 3.4040995607613473e-06,
1736
- "loss": 0.4516,
1737
  "step": 1425
1738
  },
1739
  {
1740
  "epoch": 2.82,
1741
  "learning_rate": 3.2210834553440705e-06,
1742
- "loss": 0.5026,
1743
  "step": 1430
1744
  },
1745
  {
1746
  "epoch": 2.83,
1747
  "learning_rate": 3.0380673499267936e-06,
1748
- "loss": 0.5247,
1749
  "step": 1435
1750
  },
1751
  {
1752
  "epoch": 2.84,
1753
  "learning_rate": 2.855051244509517e-06,
1754
- "loss": 0.4605,
1755
  "step": 1440
1756
  },
1757
  {
1758
  "epoch": 2.85,
1759
  "learning_rate": 2.6720351390922403e-06,
1760
- "loss": 0.4889,
1761
  "step": 1445
1762
  },
1763
  {
1764
  "epoch": 2.86,
1765
  "learning_rate": 2.4890190336749634e-06,
1766
- "loss": 0.4645,
1767
  "step": 1450
1768
  },
1769
  {
1770
  "epoch": 2.87,
1771
  "learning_rate": 2.306002928257687e-06,
1772
- "loss": 0.4493,
1773
  "step": 1455
1774
  },
1775
  {
1776
  "epoch": 2.88,
1777
  "learning_rate": 2.12298682284041e-06,
1778
- "loss": 0.492,
1779
  "step": 1460
1780
  },
1781
  {
1782
  "epoch": 2.89,
1783
  "learning_rate": 1.9399707174231332e-06,
1784
- "loss": 0.5066,
1785
  "step": 1465
1786
  },
1787
  {
1788
  "epoch": 2.9,
1789
  "learning_rate": 1.7569546120058566e-06,
1790
- "loss": 0.4364,
1791
  "step": 1470
1792
  },
1793
  {
1794
  "epoch": 2.91,
1795
  "learning_rate": 1.57393850658858e-06,
1796
- "loss": 0.4823,
1797
  "step": 1475
1798
  },
1799
  {
1800
  "epoch": 2.92,
1801
  "learning_rate": 1.3909224011713032e-06,
1802
- "loss": 0.5101,
1803
  "step": 1480
1804
  },
1805
  {
1806
  "epoch": 2.93,
1807
  "learning_rate": 1.2079062957540264e-06,
1808
- "loss": 0.5322,
1809
  "step": 1485
1810
  },
1811
  {
1812
  "epoch": 2.94,
1813
  "learning_rate": 1.0248901903367497e-06,
1814
- "loss": 0.4745,
1815
  "step": 1490
1816
  },
1817
  {
1818
  "epoch": 2.95,
1819
  "learning_rate": 8.41874084919473e-07,
1820
- "loss": 0.4728,
1821
  "step": 1495
1822
  },
1823
  {
1824
  "epoch": 2.96,
1825
  "learning_rate": 6.588579795021963e-07,
1826
- "loss": 0.5055,
1827
  "step": 1500
1828
  },
1829
  {
1830
  "epoch": 2.97,
1831
  "learning_rate": 4.758418740849195e-07,
1832
- "loss": 0.5092,
1833
  "step": 1505
1834
  },
1835
  {
1836
  "epoch": 2.98,
1837
  "learning_rate": 2.9282576866764276e-07,
1838
- "loss": 0.4562,
1839
  "step": 1510
1840
  },
1841
  {
1842
  "epoch": 2.99,
1843
  "learning_rate": 1.0980966325036604e-07,
1844
- "loss": 0.4478,
1845
  "step": 1515
1846
  },
1847
  {
1848
  "epoch": 3.0,
1849
- "eval_accuracy": 0.7774538386783285,
1850
- "eval_loss": 0.39165472984313965,
1851
- "eval_runtime": 56.6277,
1852
- "eval_samples_per_second": 127.199,
1853
- "eval_steps_per_second": 3.991,
1854
  "step": 1518
1855
  },
1856
  {
1857
  "epoch": 3.0,
1858
  "step": 1518,
1859
  "total_flos": 4.829589697691566e+18,
1860
- "train_loss": 0.5911478077470078,
1861
- "train_runtime": 3253.5777,
1862
- "train_samples_per_second": 59.774,
1863
- "train_steps_per_second": 0.467
1864
  }
1865
  ],
1866
  "logging_steps": 5,
 
1
  {
2
+ "best_metric": 0.9372483687352492,
3
  "best_model_checkpoint": "swin-tiny-patch4-window7-224-img_orientation/checkpoint-1518",
4
  "epoch": 2.9970384995064165,
5
  "eval_steps": 500,
 
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 1.6447368421052632e-06,
14
+ "loss": 1.4231,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.02,
19
  "learning_rate": 3.2894736842105265e-06,
20
+ "loss": 1.4652,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.03,
25
  "learning_rate": 4.9342105263157895e-06,
26
+ "loss": 1.4556,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 6.578947368421053e-06,
32
+ "loss": 1.4316,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.05,
37
  "learning_rate": 8.223684210526317e-06,
38
+ "loss": 1.3826,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.06,
43
  "learning_rate": 9.868421052631579e-06,
44
+ "loss": 1.3646,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.07,
49
  "learning_rate": 1.1513157894736843e-05,
50
+ "loss": 1.3281,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.08,
55
  "learning_rate": 1.3157894736842106e-05,
56
+ "loss": 1.2863,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.09,
61
  "learning_rate": 1.4802631578947368e-05,
62
+ "loss": 1.2726,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.1,
67
  "learning_rate": 1.6447368421052635e-05,
68
+ "loss": 1.21,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.11,
73
  "learning_rate": 1.8092105263157896e-05,
74
+ "loss": 1.1925,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.12,
79
  "learning_rate": 1.9736842105263158e-05,
80
+ "loss": 1.1344,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.13,
85
  "learning_rate": 2.1381578947368423e-05,
86
+ "loss": 1.0479,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.14,
91
  "learning_rate": 2.3026315789473685e-05,
92
+ "loss": 1.0175,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.15,
97
  "learning_rate": 2.4671052631578947e-05,
98
+ "loss": 0.9697,
99
  "step": 75
100
  },
101
  {
102
  "epoch": 0.16,
103
  "learning_rate": 2.6315789473684212e-05,
104
+ "loss": 0.9277,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.17,
109
  "learning_rate": 2.7960526315789477e-05,
110
+ "loss": 0.9186,
111
  "step": 85
112
  },
113
  {
114
  "epoch": 0.18,
115
  "learning_rate": 2.9605263157894735e-05,
116
+ "loss": 0.938,
117
  "step": 90
118
  },
119
  {
120
  "epoch": 0.19,
121
  "learning_rate": 3.125e-05,
122
+ "loss": 0.8542,
123
  "step": 95
124
  },
125
  {
126
  "epoch": 0.2,
127
  "learning_rate": 3.289473684210527e-05,
128
+ "loss": 0.815,
129
  "step": 100
130
  },
131
  {
132
  "epoch": 0.21,
133
  "learning_rate": 3.4539473684210524e-05,
134
+ "loss": 0.8339,
135
  "step": 105
136
  },
137
  {
138
  "epoch": 0.22,
139
  "learning_rate": 3.618421052631579e-05,
140
+ "loss": 0.8058,
141
  "step": 110
142
  },
143
  {
144
  "epoch": 0.23,
145
  "learning_rate": 3.7828947368421054e-05,
146
+ "loss": 0.7445,
147
  "step": 115
148
  },
149
  {
150
  "epoch": 0.24,
151
  "learning_rate": 3.9473684210526316e-05,
152
+ "loss": 0.8001,
153
  "step": 120
154
  },
155
  {
156
  "epoch": 0.25,
157
  "learning_rate": 4.111842105263158e-05,
158
+ "loss": 0.785,
159
  "step": 125
160
  },
161
  {
162
  "epoch": 0.26,
163
  "learning_rate": 4.2763157894736847e-05,
164
+ "loss": 0.764,
165
  "step": 130
166
  },
167
  {
168
  "epoch": 0.27,
169
  "learning_rate": 4.440789473684211e-05,
170
+ "loss": 0.7118,
171
  "step": 135
172
  },
173
  {
174
  "epoch": 0.28,
175
  "learning_rate": 4.605263157894737e-05,
176
+ "loss": 0.7243,
177
  "step": 140
178
  },
179
  {
180
  "epoch": 0.29,
181
  "learning_rate": 4.769736842105263e-05,
182
+ "loss": 0.6734,
183
  "step": 145
184
  },
185
  {
186
  "epoch": 0.3,
187
  "learning_rate": 4.9342105263157894e-05,
188
+ "loss": 0.7305,
189
  "step": 150
190
  },
191
  {
192
  "epoch": 0.31,
193
  "learning_rate": 4.9890190336749635e-05,
194
+ "loss": 0.7014,
195
  "step": 155
196
  },
197
  {
198
  "epoch": 0.32,
199
  "learning_rate": 4.970717423133236e-05,
200
+ "loss": 0.7475,
201
  "step": 160
202
  },
203
  {
204
  "epoch": 0.33,
205
  "learning_rate": 4.952415812591508e-05,
206
+ "loss": 0.6954,
207
  "step": 165
208
  },
209
  {
210
  "epoch": 0.34,
211
  "learning_rate": 4.934114202049781e-05,
212
+ "loss": 0.6944,
213
  "step": 170
214
  },
215
  {
216
  "epoch": 0.35,
217
  "learning_rate": 4.9158125915080526e-05,
218
+ "loss": 0.707,
219
  "step": 175
220
  },
221
  {
222
  "epoch": 0.36,
223
  "learning_rate": 4.897510980966325e-05,
224
+ "loss": 0.7096,
225
  "step": 180
226
  },
227
  {
228
  "epoch": 0.37,
229
  "learning_rate": 4.879209370424598e-05,
230
+ "loss": 0.6947,
231
  "step": 185
232
  },
233
  {
234
  "epoch": 0.38,
235
  "learning_rate": 4.86090775988287e-05,
236
+ "loss": 0.674,
237
  "step": 190
238
  },
239
  {
240
  "epoch": 0.38,
241
  "learning_rate": 4.8426061493411424e-05,
242
+ "loss": 0.6553,
243
  "step": 195
244
  },
245
  {
246
  "epoch": 0.39,
247
  "learning_rate": 4.824304538799414e-05,
248
+ "loss": 0.6839,
249
  "step": 200
250
  },
251
  {
252
  "epoch": 0.4,
253
  "learning_rate": 4.806002928257687e-05,
254
+ "loss": 0.622,
255
  "step": 205
256
  },
257
  {
258
  "epoch": 0.41,
259
  "learning_rate": 4.787701317715959e-05,
260
+ "loss": 0.6905,
261
  "step": 210
262
  },
263
  {
264
  "epoch": 0.42,
265
  "learning_rate": 4.7693997071742315e-05,
266
+ "loss": 0.6331,
267
  "step": 215
268
  },
269
  {
270
  "epoch": 0.43,
271
  "learning_rate": 4.751098096632504e-05,
272
+ "loss": 0.6139,
273
  "step": 220
274
  },
275
  {
276
  "epoch": 0.44,
277
  "learning_rate": 4.732796486090777e-05,
278
+ "loss": 0.6399,
279
  "step": 225
280
  },
281
  {
282
  "epoch": 0.45,
283
  "learning_rate": 4.7144948755490486e-05,
284
+ "loss": 0.691,
285
  "step": 230
286
  },
287
  {
288
  "epoch": 0.46,
289
  "learning_rate": 4.696193265007321e-05,
290
+ "loss": 0.602,
291
  "step": 235
292
  },
293
  {
294
  "epoch": 0.47,
295
  "learning_rate": 4.677891654465593e-05,
296
+ "loss": 0.6097,
297
  "step": 240
298
  },
299
  {
300
  "epoch": 0.48,
301
  "learning_rate": 4.659590043923866e-05,
302
+ "loss": 0.6069,
303
  "step": 245
304
  },
305
  {
306
  "epoch": 0.49,
307
  "learning_rate": 4.641288433382138e-05,
308
+ "loss": 0.6243,
309
  "step": 250
310
  },
311
  {
312
  "epoch": 0.5,
313
  "learning_rate": 4.6229868228404096e-05,
314
+ "loss": 0.6222,
315
  "step": 255
316
  },
317
  {
318
  "epoch": 0.51,
319
  "learning_rate": 4.604685212298683e-05,
320
+ "loss": 0.603,
321
  "step": 260
322
  },
323
  {
324
  "epoch": 0.52,
325
  "learning_rate": 4.586383601756955e-05,
326
+ "loss": 0.559,
327
  "step": 265
328
  },
329
  {
330
  "epoch": 0.53,
331
  "learning_rate": 4.5680819912152275e-05,
332
+ "loss": 0.5601,
333
  "step": 270
334
  },
335
  {
336
  "epoch": 0.54,
337
  "learning_rate": 4.5497803806734994e-05,
338
+ "loss": 0.5935,
339
  "step": 275
340
  },
341
  {
342
  "epoch": 0.55,
343
  "learning_rate": 4.531478770131772e-05,
344
+ "loss": 0.5495,
345
  "step": 280
346
  },
347
  {
348
  "epoch": 0.56,
349
  "learning_rate": 4.513177159590044e-05,
350
+ "loss": 0.5694,
351
  "step": 285
352
  },
353
  {
354
  "epoch": 0.57,
355
  "learning_rate": 4.4948755490483165e-05,
356
+ "loss": 0.5767,
357
  "step": 290
358
  },
359
  {
360
  "epoch": 0.58,
361
  "learning_rate": 4.4765739385065885e-05,
362
+ "loss": 0.635,
363
  "step": 295
364
  },
365
  {
366
  "epoch": 0.59,
367
  "learning_rate": 4.458272327964861e-05,
368
+ "loss": 0.593,
369
  "step": 300
370
  },
371
  {
372
  "epoch": 0.6,
373
  "learning_rate": 4.439970717423134e-05,
374
+ "loss": 0.5241,
375
  "step": 305
376
  },
377
  {
378
  "epoch": 0.61,
379
  "learning_rate": 4.4216691068814056e-05,
380
+ "loss": 0.5306,
381
  "step": 310
382
  },
383
  {
384
  "epoch": 0.62,
385
  "learning_rate": 4.403367496339678e-05,
386
+ "loss": 0.5309,
387
  "step": 315
388
  },
389
  {
390
  "epoch": 0.63,
391
  "learning_rate": 4.38506588579795e-05,
392
+ "loss": 0.5205,
393
  "step": 320
394
  },
395
  {
396
  "epoch": 0.64,
397
  "learning_rate": 4.366764275256223e-05,
398
+ "loss": 0.4878,
399
  "step": 325
400
  },
401
  {
402
  "epoch": 0.65,
403
  "learning_rate": 4.348462664714495e-05,
404
+ "loss": 0.5199,
405
  "step": 330
406
  },
407
  {
408
  "epoch": 0.66,
409
  "learning_rate": 4.330161054172767e-05,
410
+ "loss": 0.5524,
411
  "step": 335
412
  },
413
  {
414
  "epoch": 0.67,
415
  "learning_rate": 4.31185944363104e-05,
416
+ "loss": 0.5278,
417
  "step": 340
418
  },
419
  {
420
  "epoch": 0.68,
421
  "learning_rate": 4.2935578330893125e-05,
422
+ "loss": 0.4959,
423
  "step": 345
424
  },
425
  {
426
  "epoch": 0.69,
427
  "learning_rate": 4.2752562225475845e-05,
428
+ "loss": 0.5055,
429
  "step": 350
430
  },
431
  {
432
  "epoch": 0.7,
433
  "learning_rate": 4.256954612005857e-05,
434
+ "loss": 0.5002,
435
  "step": 355
436
  },
437
  {
438
  "epoch": 0.71,
439
  "learning_rate": 4.238653001464129e-05,
440
+ "loss": 0.5296,
441
  "step": 360
442
  },
443
  {
444
  "epoch": 0.72,
445
  "learning_rate": 4.2203513909224016e-05,
446
+ "loss": 0.5661,
447
  "step": 365
448
  },
449
  {
450
  "epoch": 0.73,
451
  "learning_rate": 4.2020497803806735e-05,
452
+ "loss": 0.5451,
453
  "step": 370
454
  },
455
  {
456
  "epoch": 0.74,
457
  "learning_rate": 4.1837481698389455e-05,
458
+ "loss": 0.523,
459
  "step": 375
460
  },
461
  {
462
  "epoch": 0.75,
463
  "learning_rate": 4.165446559297219e-05,
464
+ "loss": 0.5313,
465
  "step": 380
466
  },
467
  {
468
  "epoch": 0.76,
469
  "learning_rate": 4.147144948755491e-05,
470
+ "loss": 0.5729,
471
  "step": 385
472
  },
473
  {
474
  "epoch": 0.77,
475
  "learning_rate": 4.128843338213763e-05,
476
+ "loss": 0.5396,
477
  "step": 390
478
  },
479
  {
480
  "epoch": 0.78,
481
  "learning_rate": 4.110541727672035e-05,
482
+ "loss": 0.5299,
483
  "step": 395
484
  },
485
  {
486
  "epoch": 0.79,
487
  "learning_rate": 4.092240117130308e-05,
488
+ "loss": 0.5403,
489
  "step": 400
490
  },
491
  {
492
  "epoch": 0.8,
493
  "learning_rate": 4.07393850658858e-05,
494
+ "loss": 0.5476,
495
  "step": 405
496
  },
497
  {
498
  "epoch": 0.81,
499
  "learning_rate": 4.0556368960468524e-05,
500
+ "loss": 0.5198,
501
  "step": 410
502
  },
503
  {
504
  "epoch": 0.82,
505
  "learning_rate": 4.037335285505124e-05,
506
+ "loss": 0.4541,
507
  "step": 415
508
  },
509
  {
510
  "epoch": 0.83,
511
  "learning_rate": 4.019033674963397e-05,
512
+ "loss": 0.4666,
513
  "step": 420
514
  },
515
  {
516
  "epoch": 0.84,
517
  "learning_rate": 4.0007320644216695e-05,
518
+ "loss": 0.4636,
519
  "step": 425
520
  },
521
  {
522
  "epoch": 0.85,
523
  "learning_rate": 3.9824304538799415e-05,
524
+ "loss": 0.5061,
525
  "step": 430
526
  },
527
  {
528
  "epoch": 0.86,
529
  "learning_rate": 3.964128843338214e-05,
530
+ "loss": 0.54,
531
  "step": 435
532
  },
533
  {
534
  "epoch": 0.87,
535
  "learning_rate": 3.945827232796486e-05,
536
+ "loss": 0.4544,
537
  "step": 440
538
  },
539
  {
540
  "epoch": 0.88,
541
  "learning_rate": 3.9275256222547586e-05,
542
+ "loss": 0.4486,
543
  "step": 445
544
  },
545
  {
546
  "epoch": 0.89,
547
  "learning_rate": 3.9092240117130305e-05,
548
+ "loss": 0.4375,
549
  "step": 450
550
  },
551
  {
552
  "epoch": 0.9,
553
  "learning_rate": 3.890922401171303e-05,
554
+ "loss": 0.5157,
555
  "step": 455
556
  },
557
  {
558
  "epoch": 0.91,
559
  "learning_rate": 3.872620790629576e-05,
560
+ "loss": 0.5203,
561
  "step": 460
562
  },
563
  {
564
  "epoch": 0.92,
565
  "learning_rate": 3.8543191800878484e-05,
566
+ "loss": 0.4583,
567
  "step": 465
568
  },
569
  {
570
  "epoch": 0.93,
571
  "learning_rate": 3.83601756954612e-05,
572
+ "loss": 0.4744,
573
  "step": 470
574
  },
575
  {
576
  "epoch": 0.94,
577
  "learning_rate": 3.817715959004393e-05,
578
+ "loss": 0.4537,
579
  "step": 475
580
  },
581
  {
582
  "epoch": 0.95,
583
  "learning_rate": 3.799414348462665e-05,
584
+ "loss": 0.4854,
585
  "step": 480
586
  },
587
  {
588
  "epoch": 0.96,
589
  "learning_rate": 3.7811127379209375e-05,
590
+ "loss": 0.4668,
591
  "step": 485
592
  },
593
  {
594
  "epoch": 0.97,
595
  "learning_rate": 3.7628111273792094e-05,
596
+ "loss": 0.4958,
597
  "step": 490
598
  },
599
  {
600
  "epoch": 0.98,
601
  "learning_rate": 3.744509516837481e-05,
602
+ "loss": 0.4887,
603
  "step": 495
604
  },
605
  {
606
  "epoch": 0.99,
607
  "learning_rate": 3.7262079062957546e-05,
608
+ "loss": 0.4975,
609
  "step": 500
610
  },
611
  {
612
  "epoch": 1.0,
613
  "learning_rate": 3.7079062957540265e-05,
614
+ "loss": 0.4079,
615
  "step": 505
616
  },
617
  {
618
  "epoch": 1.0,
619
+ "eval_accuracy": 0.8781063445786478,
620
+ "eval_loss": 0.30538803339004517,
621
+ "eval_runtime": 53.2404,
622
+ "eval_samples_per_second": 135.292,
623
+ "eval_steps_per_second": 4.245,
624
  "step": 506
625
  },
626
  {
627
  "epoch": 1.01,
628
  "learning_rate": 3.689604685212299e-05,
629
+ "loss": 0.3657,
630
  "step": 510
631
  },
632
  {
633
  "epoch": 1.02,
634
  "learning_rate": 3.671303074670571e-05,
635
+ "loss": 0.4015,
636
  "step": 515
637
  },
638
  {
639
  "epoch": 1.03,
640
  "learning_rate": 3.653001464128844e-05,
641
+ "loss": 0.431,
642
  "step": 520
643
  },
644
  {
645
  "epoch": 1.04,
646
  "learning_rate": 3.6346998535871156e-05,
647
+ "loss": 0.4145,
648
  "step": 525
649
  },
650
  {
651
  "epoch": 1.05,
652
  "learning_rate": 3.616398243045388e-05,
653
+ "loss": 0.4017,
654
  "step": 530
655
  },
656
  {
657
  "epoch": 1.06,
658
  "learning_rate": 3.59809663250366e-05,
659
+ "loss": 0.411,
660
  "step": 535
661
  },
662
  {
663
  "epoch": 1.07,
664
  "learning_rate": 3.579795021961933e-05,
665
+ "loss": 0.4549,
666
  "step": 540
667
  },
668
  {
669
  "epoch": 1.08,
670
  "learning_rate": 3.5614934114202054e-05,
671
+ "loss": 0.3704,
672
  "step": 545
673
  },
674
  {
675
  "epoch": 1.09,
676
  "learning_rate": 3.543191800878477e-05,
677
+ "loss": 0.439,
678
  "step": 550
679
  },
680
  {
681
  "epoch": 1.1,
682
  "learning_rate": 3.52489019033675e-05,
683
+ "loss": 0.3795,
684
  "step": 555
685
  },
686
  {
687
  "epoch": 1.11,
688
  "learning_rate": 3.506588579795022e-05,
689
+ "loss": 0.4194,
690
  "step": 560
691
  },
692
  {
693
  "epoch": 1.12,
694
  "learning_rate": 3.4882869692532945e-05,
695
+ "loss": 0.434,
696
  "step": 565
697
  },
698
  {
699
  "epoch": 1.13,
700
  "learning_rate": 3.4699853587115664e-05,
701
+ "loss": 0.3572,
702
  "step": 570
703
  },
704
  {
705
  "epoch": 1.14,
706
  "learning_rate": 3.451683748169839e-05,
707
+ "loss": 0.4794,
708
  "step": 575
709
  },
710
  {
711
  "epoch": 1.15,
712
  "learning_rate": 3.4333821376281116e-05,
713
+ "loss": 0.4555,
714
  "step": 580
715
  },
716
  {
717
  "epoch": 1.15,
718
  "learning_rate": 3.415080527086384e-05,
719
+ "loss": 0.3989,
720
  "step": 585
721
  },
722
  {
723
  "epoch": 1.16,
724
  "learning_rate": 3.396778916544656e-05,
725
+ "loss": 0.4357,
726
  "step": 590
727
  },
728
  {
729
  "epoch": 1.17,
730
  "learning_rate": 3.378477306002929e-05,
731
+ "loss": 0.4427,
732
  "step": 595
733
  },
734
  {
735
  "epoch": 1.18,
736
  "learning_rate": 3.360175695461201e-05,
737
+ "loss": 0.3575,
738
  "step": 600
739
  },
740
  {
741
  "epoch": 1.19,
742
  "learning_rate": 3.3418740849194726e-05,
743
+ "loss": 0.4116,
744
  "step": 605
745
  },
746
  {
747
  "epoch": 1.2,
748
  "learning_rate": 3.323572474377745e-05,
749
+ "loss": 0.3938,
750
  "step": 610
751
  },
752
  {
753
  "epoch": 1.21,
754
  "learning_rate": 3.305270863836017e-05,
755
+ "loss": 0.4135,
756
  "step": 615
757
  },
758
  {
759
  "epoch": 1.22,
760
  "learning_rate": 3.2869692532942905e-05,
761
+ "loss": 0.3695,
762
  "step": 620
763
  },
764
  {
765
  "epoch": 1.23,
766
  "learning_rate": 3.2686676427525624e-05,
767
+ "loss": 0.4559,
768
  "step": 625
769
  },
770
  {
771
  "epoch": 1.24,
772
  "learning_rate": 3.250366032210835e-05,
773
+ "loss": 0.4487,
774
  "step": 630
775
  },
776
  {
777
  "epoch": 1.25,
778
  "learning_rate": 3.232064421669107e-05,
779
+ "loss": 0.3635,
780
  "step": 635
781
  },
782
  {
783
  "epoch": 1.26,
784
  "learning_rate": 3.2137628111273795e-05,
785
+ "loss": 0.4071,
786
  "step": 640
787
  },
788
  {
789
  "epoch": 1.27,
790
  "learning_rate": 3.1954612005856515e-05,
791
+ "loss": 0.4295,
792
  "step": 645
793
  },
794
  {
795
  "epoch": 1.28,
796
  "learning_rate": 3.177159590043924e-05,
797
+ "loss": 0.4958,
798
  "step": 650
799
  },
800
  {
801
  "epoch": 1.29,
802
  "learning_rate": 3.158857979502196e-05,
803
+ "loss": 0.4281,
804
  "step": 655
805
  },
806
  {
807
  "epoch": 1.3,
808
  "learning_rate": 3.1405563689604686e-05,
809
+ "loss": 0.4023,
810
  "step": 660
811
  },
812
  {
813
  "epoch": 1.31,
814
  "learning_rate": 3.122254758418741e-05,
815
+ "loss": 0.4148,
816
  "step": 665
817
  },
818
  {
819
  "epoch": 1.32,
820
  "learning_rate": 3.103953147877013e-05,
821
+ "loss": 0.407,
822
  "step": 670
823
  },
824
  {
825
  "epoch": 1.33,
826
  "learning_rate": 3.085651537335286e-05,
827
+ "loss": 0.3638,
828
  "step": 675
829
  },
830
  {
831
  "epoch": 1.34,
832
  "learning_rate": 3.067349926793558e-05,
833
+ "loss": 0.366,
834
  "step": 680
835
  },
836
  {
837
  "epoch": 1.35,
838
  "learning_rate": 3.0490483162518303e-05,
839
+ "loss": 0.3173,
840
  "step": 685
841
  },
842
  {
843
  "epoch": 1.36,
844
  "learning_rate": 3.0307467057101026e-05,
845
+ "loss": 0.4269,
846
  "step": 690
847
  },
848
  {
849
  "epoch": 1.37,
850
  "learning_rate": 3.012445095168375e-05,
851
+ "loss": 0.3789,
852
  "step": 695
853
  },
854
  {
855
  "epoch": 1.38,
856
  "learning_rate": 2.9941434846266475e-05,
857
+ "loss": 0.3952,
858
  "step": 700
859
  },
860
  {
861
  "epoch": 1.39,
862
  "learning_rate": 2.9758418740849197e-05,
863
+ "loss": 0.3611,
864
  "step": 705
865
  },
866
  {
867
  "epoch": 1.4,
868
  "learning_rate": 2.957540263543192e-05,
869
+ "loss": 0.3679,
870
  "step": 710
871
  },
872
  {
873
  "epoch": 1.41,
874
  "learning_rate": 2.9392386530014643e-05,
875
+ "loss": 0.3712,
876
  "step": 715
877
  },
878
  {
879
  "epoch": 1.42,
880
  "learning_rate": 2.9209370424597365e-05,
881
+ "loss": 0.3672,
882
  "step": 720
883
  },
884
  {
885
  "epoch": 1.43,
886
  "learning_rate": 2.9026354319180088e-05,
887
+ "loss": 0.4433,
888
  "step": 725
889
  },
890
  {
891
  "epoch": 1.44,
892
  "learning_rate": 2.884333821376281e-05,
893
+ "loss": 0.3833,
894
  "step": 730
895
  },
896
  {
897
  "epoch": 1.45,
898
  "learning_rate": 2.8660322108345534e-05,
899
+ "loss": 0.4158,
900
  "step": 735
901
  },
902
  {
903
  "epoch": 1.46,
904
  "learning_rate": 2.847730600292826e-05,
905
+ "loss": 0.3979,
906
  "step": 740
907
  },
908
  {
909
  "epoch": 1.47,
910
  "learning_rate": 2.8294289897510982e-05,
911
+ "loss": 0.4005,
912
  "step": 745
913
  },
914
  {
915
  "epoch": 1.48,
916
  "learning_rate": 2.8111273792093705e-05,
917
+ "loss": 0.3757,
918
  "step": 750
919
  },
920
  {
921
  "epoch": 1.49,
922
  "learning_rate": 2.7928257686676428e-05,
923
+ "loss": 0.374,
924
  "step": 755
925
  },
926
  {
927
  "epoch": 1.5,
928
  "learning_rate": 2.774524158125915e-05,
929
+ "loss": 0.4122,
930
  "step": 760
931
  },
932
  {
933
  "epoch": 1.51,
934
  "learning_rate": 2.7562225475841873e-05,
935
+ "loss": 0.3892,
936
  "step": 765
937
  },
938
  {
939
  "epoch": 1.52,
940
  "learning_rate": 2.7379209370424596e-05,
941
+ "loss": 0.4024,
942
  "step": 770
943
  },
944
  {
945
  "epoch": 1.53,
946
  "learning_rate": 2.719619326500732e-05,
947
+ "loss": 0.3734,
948
  "step": 775
949
  },
950
  {
951
  "epoch": 1.54,
952
  "learning_rate": 2.7013177159590048e-05,
953
+ "loss": 0.4271,
954
  "step": 780
955
  },
956
  {
957
  "epoch": 1.55,
958
  "learning_rate": 2.683016105417277e-05,
959
+ "loss": 0.3551,
960
  "step": 785
961
  },
962
  {
963
  "epoch": 1.56,
964
  "learning_rate": 2.6647144948755493e-05,
965
+ "loss": 0.4362,
966
  "step": 790
967
  },
968
  {
969
  "epoch": 1.57,
970
  "learning_rate": 2.6464128843338216e-05,
971
+ "loss": 0.3928,
972
  "step": 795
973
  },
974
  {
975
  "epoch": 1.58,
976
  "learning_rate": 2.628111273792094e-05,
977
+ "loss": 0.3794,
978
  "step": 800
979
  },
980
  {
981
  "epoch": 1.59,
982
  "learning_rate": 2.609809663250366e-05,
983
+ "loss": 0.3925,
984
  "step": 805
985
  },
986
  {
987
  "epoch": 1.6,
988
  "learning_rate": 2.5915080527086384e-05,
989
+ "loss": 0.3458,
990
  "step": 810
991
  },
992
  {
993
  "epoch": 1.61,
994
  "learning_rate": 2.5732064421669107e-05,
995
+ "loss": 0.3678,
996
  "step": 815
997
  },
998
  {
999
  "epoch": 1.62,
1000
  "learning_rate": 2.5549048316251833e-05,
1001
+ "loss": 0.3469,
1002
  "step": 820
1003
  },
1004
  {
1005
  "epoch": 1.63,
1006
  "learning_rate": 2.5366032210834556e-05,
1007
+ "loss": 0.3549,
1008
  "step": 825
1009
  },
1010
  {
1011
  "epoch": 1.64,
1012
  "learning_rate": 2.518301610541728e-05,
1013
+ "loss": 0.3672,
1014
  "step": 830
1015
  },
1016
  {
1017
  "epoch": 1.65,
1018
  "learning_rate": 2.5e-05,
1019
+ "loss": 0.3562,
1020
  "step": 835
1021
  },
1022
  {
1023
  "epoch": 1.66,
1024
  "learning_rate": 2.4816983894582724e-05,
1025
+ "loss": 0.3552,
1026
  "step": 840
1027
  },
1028
  {
1029
  "epoch": 1.67,
1030
  "learning_rate": 2.463396778916545e-05,
1031
+ "loss": 0.4225,
1032
  "step": 845
1033
  },
1034
  {
1035
  "epoch": 1.68,
1036
  "learning_rate": 2.4450951683748173e-05,
1037
+ "loss": 0.3528,
1038
  "step": 850
1039
  },
1040
  {
1041
  "epoch": 1.69,
1042
  "learning_rate": 2.4267935578330895e-05,
1043
+ "loss": 0.3317,
1044
  "step": 855
1045
  },
1046
  {
1047
  "epoch": 1.7,
1048
  "learning_rate": 2.4084919472913618e-05,
1049
+ "loss": 0.344,
1050
  "step": 860
1051
  },
1052
  {
1053
  "epoch": 1.71,
1054
  "learning_rate": 2.390190336749634e-05,
1055
+ "loss": 0.3163,
1056
  "step": 865
1057
  },
1058
  {
1059
  "epoch": 1.72,
1060
  "learning_rate": 2.3718887262079064e-05,
1061
+ "loss": 0.3705,
1062
  "step": 870
1063
  },
1064
  {
1065
  "epoch": 1.73,
1066
  "learning_rate": 2.3535871156661786e-05,
1067
+ "loss": 0.3456,
1068
  "step": 875
1069
  },
1070
  {
1071
  "epoch": 1.74,
1072
  "learning_rate": 2.335285505124451e-05,
1073
+ "loss": 0.326,
1074
  "step": 880
1075
  },
1076
  {
1077
  "epoch": 1.75,
1078
  "learning_rate": 2.3169838945827235e-05,
1079
+ "loss": 0.3429,
1080
  "step": 885
1081
  },
1082
  {
1083
  "epoch": 1.76,
1084
  "learning_rate": 2.2986822840409958e-05,
1085
+ "loss": 0.3485,
1086
  "step": 890
1087
  },
1088
  {
1089
  "epoch": 1.77,
1090
  "learning_rate": 2.280380673499268e-05,
1091
+ "loss": 0.3599,
1092
  "step": 895
1093
  },
1094
  {
1095
  "epoch": 1.78,
1096
  "learning_rate": 2.2620790629575403e-05,
1097
+ "loss": 0.3657,
1098
  "step": 900
1099
  },
1100
  {
1101
  "epoch": 1.79,
1102
  "learning_rate": 2.243777452415813e-05,
1103
+ "loss": 0.4039,
1104
  "step": 905
1105
  },
1106
  {
1107
  "epoch": 1.8,
1108
  "learning_rate": 2.2254758418740852e-05,
1109
+ "loss": 0.328,
1110
  "step": 910
1111
  },
1112
  {
1113
  "epoch": 1.81,
1114
  "learning_rate": 2.2071742313323575e-05,
1115
+ "loss": 0.3446,
1116
  "step": 915
1117
  },
1118
  {
1119
  "epoch": 1.82,
1120
  "learning_rate": 2.1888726207906297e-05,
1121
+ "loss": 0.3491,
1122
  "step": 920
1123
  },
1124
  {
1125
  "epoch": 1.83,
1126
  "learning_rate": 2.170571010248902e-05,
1127
+ "loss": 0.3665,
1128
  "step": 925
1129
  },
1130
  {
1131
  "epoch": 1.84,
1132
  "learning_rate": 2.1522693997071743e-05,
1133
+ "loss": 0.3392,
1134
  "step": 930
1135
  },
1136
  {
1137
  "epoch": 1.85,
1138
  "learning_rate": 2.1339677891654465e-05,
1139
+ "loss": 0.3244,
1140
  "step": 935
1141
  },
1142
  {
1143
  "epoch": 1.86,
1144
  "learning_rate": 2.1156661786237188e-05,
1145
+ "loss": 0.377,
1146
  "step": 940
1147
  },
1148
  {
1149
  "epoch": 1.87,
1150
  "learning_rate": 2.0973645680819914e-05,
1151
+ "loss": 0.3165,
1152
  "step": 945
1153
  },
1154
  {
1155
  "epoch": 1.88,
1156
  "learning_rate": 2.0790629575402637e-05,
1157
+ "loss": 0.3318,
1158
  "step": 950
1159
  },
1160
  {
1161
  "epoch": 1.89,
1162
  "learning_rate": 2.060761346998536e-05,
1163
+ "loss": 0.3049,
1164
  "step": 955
1165
  },
1166
  {
1167
  "epoch": 1.9,
1168
  "learning_rate": 2.0424597364568082e-05,
1169
+ "loss": 0.3265,
1170
  "step": 960
1171
  },
1172
  {
1173
  "epoch": 1.91,
1174
  "learning_rate": 2.024158125915081e-05,
1175
+ "loss": 0.3265,
1176
  "step": 965
1177
  },
1178
  {
1179
  "epoch": 1.92,
1180
  "learning_rate": 2.005856515373353e-05,
1181
+ "loss": 0.3719,
1182
  "step": 970
1183
  },
1184
  {
1185
  "epoch": 1.92,
1186
  "learning_rate": 1.9875549048316254e-05,
1187
+ "loss": 0.3507,
1188
  "step": 975
1189
  },
1190
  {
1191
  "epoch": 1.93,
1192
  "learning_rate": 1.9692532942898977e-05,
1193
+ "loss": 0.3319,
1194
  "step": 980
1195
  },
1196
  {
1197
  "epoch": 1.94,
1198
  "learning_rate": 1.95095168374817e-05,
1199
+ "loss": 0.382,
1200
  "step": 985
1201
  },
1202
  {
1203
  "epoch": 1.95,
1204
  "learning_rate": 1.9326500732064422e-05,
1205
+ "loss": 0.3204,
1206
  "step": 990
1207
  },
1208
  {
1209
  "epoch": 1.96,
1210
  "learning_rate": 1.9143484626647145e-05,
1211
+ "loss": 0.3793,
1212
  "step": 995
1213
  },
1214
  {
1215
  "epoch": 1.97,
1216
  "learning_rate": 1.8960468521229867e-05,
1217
+ "loss": 0.3731,
1218
  "step": 1000
1219
  },
1220
  {
1221
  "epoch": 1.98,
1222
  "learning_rate": 1.8777452415812594e-05,
1223
+ "loss": 0.3628,
1224
  "step": 1005
1225
  },
1226
  {
1227
  "epoch": 1.99,
1228
  "learning_rate": 1.8594436310395316e-05,
1229
+ "loss": 0.3327,
1230
  "step": 1010
1231
  },
1232
  {
1233
  "epoch": 2.0,
1234
+ "eval_accuracy": 0.927391364709149,
1235
+ "eval_loss": 0.19831140339374542,
1236
+ "eval_runtime": 51.0671,
1237
+ "eval_samples_per_second": 141.05,
1238
+ "eval_steps_per_second": 4.426,
1239
  "step": 1013
1240
  },
1241
  {
1242
  "epoch": 2.0,
1243
  "learning_rate": 1.841142020497804e-05,
1244
+ "loss": 0.2912,
1245
  "step": 1015
1246
  },
1247
  {
1248
  "epoch": 2.01,
1249
  "learning_rate": 1.822840409956076e-05,
1250
+ "loss": 0.313,
1251
  "step": 1020
1252
  },
1253
  {
1254
  "epoch": 2.02,
1255
  "learning_rate": 1.8045387994143488e-05,
1256
+ "loss": 0.3181,
1257
  "step": 1025
1258
  },
1259
  {
1260
  "epoch": 2.03,
1261
  "learning_rate": 1.786237188872621e-05,
1262
+ "loss": 0.3328,
1263
  "step": 1030
1264
  },
1265
  {
1266
  "epoch": 2.04,
1267
  "learning_rate": 1.7679355783308933e-05,
1268
+ "loss": 0.3494,
1269
  "step": 1035
1270
  },
1271
  {
1272
  "epoch": 2.05,
1273
  "learning_rate": 1.7496339677891656e-05,
1274
+ "loss": 0.3584,
1275
  "step": 1040
1276
  },
1277
  {
1278
  "epoch": 2.06,
1279
  "learning_rate": 1.731332357247438e-05,
1280
+ "loss": 0.3275,
1281
  "step": 1045
1282
  },
1283
  {
1284
  "epoch": 2.07,
1285
  "learning_rate": 1.71303074670571e-05,
1286
+ "loss": 0.3517,
1287
  "step": 1050
1288
  },
1289
  {
1290
  "epoch": 2.08,
1291
  "learning_rate": 1.6947291361639824e-05,
1292
+ "loss": 0.3464,
1293
  "step": 1055
1294
  },
1295
  {
1296
  "epoch": 2.09,
1297
  "learning_rate": 1.6764275256222547e-05,
1298
+ "loss": 0.2692,
1299
  "step": 1060
1300
  },
1301
  {
1302
  "epoch": 2.1,
1303
  "learning_rate": 1.6581259150805273e-05,
1304
+ "loss": 0.3252,
1305
  "step": 1065
1306
  },
1307
  {
1308
  "epoch": 2.11,
1309
  "learning_rate": 1.6398243045387995e-05,
1310
+ "loss": 0.3393,
1311
  "step": 1070
1312
  },
1313
  {
1314
  "epoch": 2.12,
1315
  "learning_rate": 1.6215226939970718e-05,
1316
+ "loss": 0.2896,
1317
  "step": 1075
1318
  },
1319
  {
1320
  "epoch": 2.13,
1321
  "learning_rate": 1.603221083455344e-05,
1322
+ "loss": 0.2643,
1323
  "step": 1080
1324
  },
1325
  {
1326
  "epoch": 2.14,
1327
  "learning_rate": 1.5849194729136167e-05,
1328
+ "loss": 0.3854,
1329
  "step": 1085
1330
  },
1331
  {
1332
  "epoch": 2.15,
1333
  "learning_rate": 1.566617862371889e-05,
1334
+ "loss": 0.3183,
1335
  "step": 1090
1336
  },
1337
  {
1338
  "epoch": 2.16,
1339
  "learning_rate": 1.5483162518301612e-05,
1340
+ "loss": 0.3572,
1341
  "step": 1095
1342
  },
1343
  {
1344
  "epoch": 2.17,
1345
  "learning_rate": 1.5300146412884335e-05,
1346
+ "loss": 0.2625,
1347
  "step": 1100
1348
  },
1349
  {
1350
  "epoch": 2.18,
1351
  "learning_rate": 1.511713030746706e-05,
1352
+ "loss": 0.3089,
1353
  "step": 1105
1354
  },
1355
  {
1356
  "epoch": 2.19,
1357
  "learning_rate": 1.4934114202049782e-05,
1358
+ "loss": 0.2938,
1359
  "step": 1110
1360
  },
1361
  {
1362
  "epoch": 2.2,
1363
  "learning_rate": 1.4751098096632505e-05,
1364
+ "loss": 0.3711,
1365
  "step": 1115
1366
  },
1367
  {
1368
  "epoch": 2.21,
1369
  "learning_rate": 1.4568081991215226e-05,
1370
+ "loss": 0.3456,
1371
  "step": 1120
1372
  },
1373
  {
1374
  "epoch": 2.22,
1375
  "learning_rate": 1.4385065885797952e-05,
1376
+ "loss": 0.37,
1377
  "step": 1125
1378
  },
1379
  {
1380
  "epoch": 2.23,
1381
  "learning_rate": 1.4202049780380675e-05,
1382
+ "loss": 0.3584,
1383
  "step": 1130
1384
  },
1385
  {
1386
  "epoch": 2.24,
1387
  "learning_rate": 1.4019033674963397e-05,
1388
+ "loss": 0.2956,
1389
  "step": 1135
1390
  },
1391
  {
1392
  "epoch": 2.25,
1393
  "learning_rate": 1.383601756954612e-05,
1394
+ "loss": 0.3251,
1395
  "step": 1140
1396
  },
1397
  {
1398
  "epoch": 2.26,
1399
  "learning_rate": 1.3653001464128845e-05,
1400
+ "loss": 0.2945,
1401
  "step": 1145
1402
  },
1403
  {
1404
  "epoch": 2.27,
1405
  "learning_rate": 1.3469985358711567e-05,
1406
+ "loss": 0.3545,
1407
  "step": 1150
1408
  },
1409
  {
1410
  "epoch": 2.28,
1411
  "learning_rate": 1.328696925329429e-05,
1412
+ "loss": 0.2872,
1413
  "step": 1155
1414
  },
1415
  {
1416
  "epoch": 2.29,
1417
  "learning_rate": 1.3103953147877013e-05,
1418
+ "loss": 0.3433,
1419
  "step": 1160
1420
  },
1421
  {
1422
  "epoch": 2.3,
1423
  "learning_rate": 1.2920937042459739e-05,
1424
+ "loss": 0.3048,
1425
  "step": 1165
1426
  },
1427
  {
1428
  "epoch": 2.31,
1429
  "learning_rate": 1.2737920937042461e-05,
1430
+ "loss": 0.2649,
1431
  "step": 1170
1432
  },
1433
  {
1434
  "epoch": 2.32,
1435
  "learning_rate": 1.2554904831625182e-05,
1436
+ "loss": 0.3022,
1437
  "step": 1175
1438
  },
1439
  {
1440
  "epoch": 2.33,
1441
  "learning_rate": 1.2371888726207907e-05,
1442
+ "loss": 0.3398,
1443
  "step": 1180
1444
  },
1445
  {
1446
  "epoch": 2.34,
1447
  "learning_rate": 1.218887262079063e-05,
1448
+ "loss": 0.272,
1449
  "step": 1185
1450
  },
1451
  {
1452
  "epoch": 2.35,
1453
  "learning_rate": 1.2005856515373354e-05,
1454
+ "loss": 0.3443,
1455
  "step": 1190
1456
  },
1457
  {
1458
  "epoch": 2.36,
1459
  "learning_rate": 1.1822840409956077e-05,
1460
+ "loss": 0.2978,
1461
  "step": 1195
1462
  },
1463
  {
1464
  "epoch": 2.37,
1465
  "learning_rate": 1.1639824304538801e-05,
1466
+ "loss": 0.3128,
1467
  "step": 1200
1468
  },
1469
  {
1470
  "epoch": 2.38,
1471
  "learning_rate": 1.1456808199121522e-05,
1472
+ "loss": 0.3301,
1473
  "step": 1205
1474
  },
1475
  {
1476
  "epoch": 2.39,
1477
  "learning_rate": 1.1273792093704246e-05,
1478
+ "loss": 0.2922,
1479
  "step": 1210
1480
  },
1481
  {
1482
  "epoch": 2.4,
1483
  "learning_rate": 1.109077598828697e-05,
1484
+ "loss": 0.2871,
1485
  "step": 1215
1486
  },
1487
  {
1488
  "epoch": 2.41,
1489
  "learning_rate": 1.0907759882869694e-05,
1490
+ "loss": 0.2858,
1491
  "step": 1220
1492
  },
1493
  {
1494
  "epoch": 2.42,
1495
  "learning_rate": 1.0724743777452416e-05,
1496
+ "loss": 0.3008,
1497
  "step": 1225
1498
  },
1499
  {
1500
  "epoch": 2.43,
1501
  "learning_rate": 1.054172767203514e-05,
1502
+ "loss": 0.345,
1503
  "step": 1230
1504
  },
1505
  {
1506
  "epoch": 2.44,
1507
  "learning_rate": 1.0358711566617862e-05,
1508
+ "loss": 0.3253,
1509
  "step": 1235
1510
  },
1511
  {
1512
  "epoch": 2.45,
1513
  "learning_rate": 1.0175695461200586e-05,
1514
+ "loss": 0.319,
1515
  "step": 1240
1516
  },
1517
  {
1518
  "epoch": 2.46,
1519
  "learning_rate": 9.992679355783309e-06,
1520
+ "loss": 0.3208,
1521
  "step": 1245
1522
  },
1523
  {
1524
  "epoch": 2.47,
1525
  "learning_rate": 9.809663250366033e-06,
1526
+ "loss": 0.3445,
1527
  "step": 1250
1528
  },
1529
  {
1530
  "epoch": 2.48,
1531
  "learning_rate": 9.626647144948756e-06,
1532
+ "loss": 0.3328,
1533
  "step": 1255
1534
  },
1535
  {
1536
  "epoch": 2.49,
1537
  "learning_rate": 9.44363103953148e-06,
1538
+ "loss": 0.3148,
1539
  "step": 1260
1540
  },
1541
  {
1542
  "epoch": 2.5,
1543
  "learning_rate": 9.260614934114201e-06,
1544
+ "loss": 0.2937,
1545
  "step": 1265
1546
  },
1547
  {
1548
  "epoch": 2.51,
1549
  "learning_rate": 9.077598828696926e-06,
1550
+ "loss": 0.2848,
1551
  "step": 1270
1552
  },
1553
  {
1554
  "epoch": 2.52,
1555
  "learning_rate": 8.894582723279648e-06,
1556
+ "loss": 0.2818,
1557
  "step": 1275
1558
  },
1559
  {
1560
  "epoch": 2.53,
1561
  "learning_rate": 8.711566617862373e-06,
1562
+ "loss": 0.3272,
1563
  "step": 1280
1564
  },
1565
  {
1566
  "epoch": 2.54,
1567
  "learning_rate": 8.528550512445096e-06,
1568
+ "loss": 0.3098,
1569
  "step": 1285
1570
  },
1571
  {
1572
  "epoch": 2.55,
1573
  "learning_rate": 8.34553440702782e-06,
1574
+ "loss": 0.2721,
1575
  "step": 1290
1576
  },
1577
  {
1578
  "epoch": 2.56,
1579
  "learning_rate": 8.162518301610541e-06,
1580
+ "loss": 0.3247,
1581
  "step": 1295
1582
  },
1583
  {
1584
  "epoch": 2.57,
1585
  "learning_rate": 7.979502196193265e-06,
1586
+ "loss": 0.3047,
1587
  "step": 1300
1588
  },
1589
  {
1590
  "epoch": 2.58,
1591
  "learning_rate": 7.796486090775988e-06,
1592
+ "loss": 0.3353,
1593
  "step": 1305
1594
  },
1595
  {
1596
  "epoch": 2.59,
1597
  "learning_rate": 7.613469985358712e-06,
1598
+ "loss": 0.3642,
1599
  "step": 1310
1600
  },
1601
  {
1602
  "epoch": 2.6,
1603
  "learning_rate": 7.430453879941435e-06,
1604
+ "loss": 0.3076,
1605
  "step": 1315
1606
  },
1607
  {
1608
  "epoch": 2.61,
1609
  "learning_rate": 7.247437774524159e-06,
1610
+ "loss": 0.3023,
1611
  "step": 1320
1612
  },
1613
  {
1614
  "epoch": 2.62,
1615
  "learning_rate": 7.064421669106881e-06,
1616
+ "loss": 0.3034,
1617
  "step": 1325
1618
  },
1619
  {
1620
  "epoch": 2.63,
1621
  "learning_rate": 6.881405563689605e-06,
1622
+ "loss": 0.3302,
1623
  "step": 1330
1624
  },
1625
  {
1626
  "epoch": 2.64,
1627
  "learning_rate": 6.698389458272328e-06,
1628
+ "loss": 0.2926,
1629
  "step": 1335
1630
  },
1631
  {
1632
  "epoch": 2.65,
1633
  "learning_rate": 6.515373352855052e-06,
1634
+ "loss": 0.3434,
1635
  "step": 1340
1636
  },
1637
  {
1638
  "epoch": 2.66,
1639
  "learning_rate": 6.332357247437774e-06,
1640
+ "loss": 0.2747,
1641
  "step": 1345
1642
  },
1643
  {
1644
  "epoch": 2.67,
1645
  "learning_rate": 6.149341142020498e-06,
1646
+ "loss": 0.3549,
1647
  "step": 1350
1648
  },
1649
  {
1650
  "epoch": 2.68,
1651
  "learning_rate": 5.966325036603222e-06,
1652
+ "loss": 0.2671,
1653
  "step": 1355
1654
  },
1655
  {
1656
  "epoch": 2.69,
1657
  "learning_rate": 5.7833089311859446e-06,
1658
+ "loss": 0.3066,
1659
  "step": 1360
1660
  },
1661
  {
1662
  "epoch": 2.69,
1663
  "learning_rate": 5.600292825768668e-06,
1664
+ "loss": 0.3247,
1665
  "step": 1365
1666
  },
1667
  {
1668
  "epoch": 2.7,
1669
  "learning_rate": 5.417276720351392e-06,
1670
+ "loss": 0.3116,
1671
  "step": 1370
1672
  },
1673
  {
1674
  "epoch": 2.71,
1675
  "learning_rate": 5.234260614934114e-06,
1676
+ "loss": 0.2785,
1677
  "step": 1375
1678
  },
1679
  {
1680
  "epoch": 2.72,
1681
  "learning_rate": 5.051244509516838e-06,
1682
+ "loss": 0.2975,
1683
  "step": 1380
1684
  },
1685
  {
1686
  "epoch": 2.73,
1687
  "learning_rate": 4.8682284040995615e-06,
1688
+ "loss": 0.3296,
1689
  "step": 1385
1690
  },
1691
  {
1692
  "epoch": 2.74,
1693
  "learning_rate": 4.685212298682284e-06,
1694
+ "loss": 0.3022,
1695
  "step": 1390
1696
  },
1697
  {
1698
  "epoch": 2.75,
1699
  "learning_rate": 4.502196193265008e-06,
1700
+ "loss": 0.3067,
1701
  "step": 1395
1702
  },
1703
  {
1704
  "epoch": 2.76,
1705
  "learning_rate": 4.319180087847731e-06,
1706
+ "loss": 0.3437,
1707
  "step": 1400
1708
  },
1709
  {
1710
  "epoch": 2.77,
1711
  "learning_rate": 4.136163982430454e-06,
1712
+ "loss": 0.3191,
1713
  "step": 1405
1714
  },
1715
  {
1716
  "epoch": 2.78,
1717
  "learning_rate": 3.9531478770131775e-06,
1718
+ "loss": 0.3234,
1719
  "step": 1410
1720
  },
1721
  {
1722
  "epoch": 2.79,
1723
  "learning_rate": 3.7701317715959007e-06,
1724
+ "loss": 0.268,
1725
  "step": 1415
1726
  },
1727
  {
1728
  "epoch": 2.8,
1729
  "learning_rate": 3.587115666178624e-06,
1730
+ "loss": 0.3349,
1731
  "step": 1420
1732
  },
1733
  {
1734
  "epoch": 2.81,
1735
  "learning_rate": 3.4040995607613473e-06,
1736
+ "loss": 0.3179,
1737
  "step": 1425
1738
  },
1739
  {
1740
  "epoch": 2.82,
1741
  "learning_rate": 3.2210834553440705e-06,
1742
+ "loss": 0.3158,
1743
  "step": 1430
1744
  },
1745
  {
1746
  "epoch": 2.83,
1747
  "learning_rate": 3.0380673499267936e-06,
1748
+ "loss": 0.3094,
1749
  "step": 1435
1750
  },
1751
  {
1752
  "epoch": 2.84,
1753
  "learning_rate": 2.855051244509517e-06,
1754
+ "loss": 0.2834,
1755
  "step": 1440
1756
  },
1757
  {
1758
  "epoch": 2.85,
1759
  "learning_rate": 2.6720351390922403e-06,
1760
+ "loss": 0.2633,
1761
  "step": 1445
1762
  },
1763
  {
1764
  "epoch": 2.86,
1765
  "learning_rate": 2.4890190336749634e-06,
1766
+ "loss": 0.2923,
1767
  "step": 1450
1768
  },
1769
  {
1770
  "epoch": 2.87,
1771
  "learning_rate": 2.306002928257687e-06,
1772
+ "loss": 0.3066,
1773
  "step": 1455
1774
  },
1775
  {
1776
  "epoch": 2.88,
1777
  "learning_rate": 2.12298682284041e-06,
1778
+ "loss": 0.3047,
1779
  "step": 1460
1780
  },
1781
  {
1782
  "epoch": 2.89,
1783
  "learning_rate": 1.9399707174231332e-06,
1784
+ "loss": 0.3098,
1785
  "step": 1465
1786
  },
1787
  {
1788
  "epoch": 2.9,
1789
  "learning_rate": 1.7569546120058566e-06,
1790
+ "loss": 0.3357,
1791
  "step": 1470
1792
  },
1793
  {
1794
  "epoch": 2.91,
1795
  "learning_rate": 1.57393850658858e-06,
1796
+ "loss": 0.2852,
1797
  "step": 1475
1798
  },
1799
  {
1800
  "epoch": 2.92,
1801
  "learning_rate": 1.3909224011713032e-06,
1802
+ "loss": 0.3455,
1803
  "step": 1480
1804
  },
1805
  {
1806
  "epoch": 2.93,
1807
  "learning_rate": 1.2079062957540264e-06,
1808
+ "loss": 0.3222,
1809
  "step": 1485
1810
  },
1811
  {
1812
  "epoch": 2.94,
1813
  "learning_rate": 1.0248901903367497e-06,
1814
+ "loss": 0.2713,
1815
  "step": 1490
1816
  },
1817
  {
1818
  "epoch": 2.95,
1819
  "learning_rate": 8.41874084919473e-07,
1820
+ "loss": 0.2757,
1821
  "step": 1495
1822
  },
1823
  {
1824
  "epoch": 2.96,
1825
  "learning_rate": 6.588579795021963e-07,
1826
+ "loss": 0.329,
1827
  "step": 1500
1828
  },
1829
  {
1830
  "epoch": 2.97,
1831
  "learning_rate": 4.758418740849195e-07,
1832
+ "loss": 0.3608,
1833
  "step": 1505
1834
  },
1835
  {
1836
  "epoch": 2.98,
1837
  "learning_rate": 2.9282576866764276e-07,
1838
+ "loss": 0.2705,
1839
  "step": 1510
1840
  },
1841
  {
1842
  "epoch": 2.99,
1843
  "learning_rate": 1.0980966325036604e-07,
1844
+ "loss": 0.3041,
1845
  "step": 1515
1846
  },
1847
  {
1848
  "epoch": 3.0,
1849
+ "eval_accuracy": 0.9372483687352492,
1850
+ "eval_loss": 0.17838290333747864,
1851
+ "eval_runtime": 56.76,
1852
+ "eval_samples_per_second": 126.903,
1853
+ "eval_steps_per_second": 3.982,
1854
  "step": 1518
1855
  },
1856
  {
1857
  "epoch": 3.0,
1858
  "step": 1518,
1859
  "total_flos": 4.829589697691566e+18,
1860
+ "train_loss": 0.4650797137551314,
1861
+ "train_runtime": 3147.7418,
1862
+ "train_samples_per_second": 61.783,
1863
+ "train_steps_per_second": 0.482
1864
  }
1865
  ],
1866
  "logging_steps": 5,