2022happy commited on
Commit
f523e74
1 Parent(s): b8cc52e

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 2.99,
3
- "eval_accuracy": 0.9684,
4
- "eval_loss": 0.09927555173635483,
5
- "eval_runtime": 29.2618,
6
- "eval_samples_per_second": 170.872,
7
- "eval_steps_per_second": 5.365,
8
  "total_flos": 3.3497451642252165e+18,
9
- "train_loss": 0.5528646907915077,
10
- "train_runtime": 2092.8132,
11
- "train_samples_per_second": 64.506,
12
- "train_steps_per_second": 0.503
13
  }
 
1
  {
2
  "epoch": 2.99,
3
+ "eval_accuracy": 0.97,
4
+ "eval_loss": 0.08928382396697998,
5
+ "eval_runtime": 30.4532,
6
+ "eval_samples_per_second": 164.186,
7
+ "eval_steps_per_second": 5.155,
8
  "total_flos": 3.3497451642252165e+18,
9
+ "train_loss": 0.5605603609895661,
10
+ "train_runtime": 2054.8417,
11
+ "train_samples_per_second": 65.698,
12
+ "train_steps_per_second": 0.512
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.99,
3
- "eval_accuracy": 0.9684,
4
- "eval_loss": 0.09927555173635483,
5
- "eval_runtime": 29.2618,
6
- "eval_samples_per_second": 170.872,
7
- "eval_steps_per_second": 5.365
8
  }
 
1
  {
2
  "epoch": 2.99,
3
+ "eval_accuracy": 0.97,
4
+ "eval_loss": 0.08928382396697998,
5
+ "eval_runtime": 30.4532,
6
+ "eval_samples_per_second": 164.186,
7
+ "eval_steps_per_second": 5.155
8
  }
runs/Jun17_03-15-58_ae2ad5746172/events.out.tfevents.1686973906.ae2ad5746172.6160.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc5d65027f371b3b7063fafa353a3f8e40ece7fa34214926f5de02ba9c349da7
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.99,
3
  "total_flos": 3.3497451642252165e+18,
4
- "train_loss": 0.5528646907915077,
5
- "train_runtime": 2092.8132,
6
- "train_samples_per_second": 64.506,
7
- "train_steps_per_second": 0.503
8
  }
 
1
  {
2
  "epoch": 2.99,
3
  "total_flos": 3.3497451642252165e+18,
4
+ "train_loss": 0.5605603609895661,
5
+ "train_runtime": 2054.8417,
6
+ "train_samples_per_second": 65.698,
7
+ "train_steps_per_second": 0.512
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9684,
3
  "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1053",
4
  "epoch": 2.9936034115138592,
5
  "global_step": 1053,
@@ -10,668 +10,668 @@
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 4.716981132075472e-06,
13
- "loss": 2.4256,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.06,
18
  "learning_rate": 9.433962264150944e-06,
19
- "loss": 2.33,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.09,
24
  "learning_rate": 1.4150943396226415e-05,
25
- "loss": 2.2084,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.11,
30
  "learning_rate": 1.8867924528301888e-05,
31
- "loss": 2.0657,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.14,
36
  "learning_rate": 2.358490566037736e-05,
37
- "loss": 1.7861,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.17,
42
  "learning_rate": 2.830188679245283e-05,
43
- "loss": 1.4363,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.2,
48
  "learning_rate": 3.30188679245283e-05,
49
- "loss": 1.2018,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.23,
54
  "learning_rate": 3.7735849056603776e-05,
55
- "loss": 0.9936,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.26,
60
  "learning_rate": 4.245283018867925e-05,
61
- "loss": 0.9007,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.28,
66
  "learning_rate": 4.716981132075472e-05,
67
- "loss": 0.7938,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.31,
72
  "learning_rate": 4.978880675818374e-05,
73
- "loss": 0.7828,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.34,
78
  "learning_rate": 4.9260823653643085e-05,
79
- "loss": 0.7341,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.37,
84
  "learning_rate": 4.8732840549102435e-05,
85
- "loss": 0.7327,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.4,
90
  "learning_rate": 4.820485744456177e-05,
91
- "loss": 0.7136,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.43,
96
  "learning_rate": 4.767687434002112e-05,
97
- "loss": 0.6352,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.45,
102
  "learning_rate": 4.7148891235480466e-05,
103
- "loss": 0.6567,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.48,
108
  "learning_rate": 4.662090813093981e-05,
109
- "loss": 0.6657,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.51,
114
  "learning_rate": 4.609292502639916e-05,
115
- "loss": 0.6137,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.54,
120
  "learning_rate": 4.55649419218585e-05,
121
- "loss": 0.5997,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.57,
126
  "learning_rate": 4.503695881731785e-05,
127
- "loss": 0.5445,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.6,
132
  "learning_rate": 4.45089757127772e-05,
133
- "loss": 0.5711,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.63,
138
  "learning_rate": 4.398099260823654e-05,
139
- "loss": 0.5849,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.65,
144
  "learning_rate": 4.3453009503695884e-05,
145
- "loss": 0.5643,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.68,
150
  "learning_rate": 4.292502639915523e-05,
151
- "loss": 0.5207,
152
  "step": 240
153
  },
154
  {
155
  "epoch": 0.71,
156
  "learning_rate": 4.239704329461457e-05,
157
- "loss": 0.5492,
158
  "step": 250
159
  },
160
  {
161
  "epoch": 0.74,
162
  "learning_rate": 4.186906019007392e-05,
163
- "loss": 0.5168,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.77,
168
  "learning_rate": 4.1341077085533265e-05,
169
- "loss": 0.5418,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.8,
174
  "learning_rate": 4.081309398099261e-05,
175
- "loss": 0.5074,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.82,
180
  "learning_rate": 4.028511087645195e-05,
181
- "loss": 0.4611,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.85,
186
  "learning_rate": 3.97571277719113e-05,
187
- "loss": 0.4886,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.88,
192
  "learning_rate": 3.9229144667370646e-05,
193
- "loss": 0.4935,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.91,
198
  "learning_rate": 3.870116156282999e-05,
199
- "loss": 0.5317,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.94,
204
  "learning_rate": 3.817317845828934e-05,
205
- "loss": 0.494,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.97,
210
  "learning_rate": 3.764519535374868e-05,
211
- "loss": 0.5144,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 1.0,
216
  "learning_rate": 3.711721224920803e-05,
217
- "loss": 0.4694,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 1.0,
222
- "eval_accuracy": 0.9514,
223
- "eval_loss": 0.14383168518543243,
224
- "eval_runtime": 30.8843,
225
- "eval_samples_per_second": 161.895,
226
- "eval_steps_per_second": 5.083,
227
  "step": 351
228
  },
229
  {
230
  "epoch": 1.02,
231
  "learning_rate": 3.658922914466738e-05,
232
- "loss": 0.4501,
233
  "step": 360
234
  },
235
  {
236
  "epoch": 1.05,
237
  "learning_rate": 3.6061246040126714e-05,
238
- "loss": 0.4318,
239
  "step": 370
240
  },
241
  {
242
  "epoch": 1.08,
243
  "learning_rate": 3.5533262935586064e-05,
244
- "loss": 0.432,
245
  "step": 380
246
  },
247
  {
248
  "epoch": 1.11,
249
  "learning_rate": 3.500527983104541e-05,
250
- "loss": 0.4569,
251
  "step": 390
252
  },
253
  {
254
  "epoch": 1.14,
255
  "learning_rate": 3.447729672650475e-05,
256
- "loss": 0.4498,
257
  "step": 400
258
  },
259
  {
260
  "epoch": 1.17,
261
  "learning_rate": 3.3949313621964095e-05,
262
- "loss": 0.4444,
263
  "step": 410
264
  },
265
  {
266
  "epoch": 1.19,
267
  "learning_rate": 3.3421330517423445e-05,
268
- "loss": 0.4291,
269
  "step": 420
270
  },
271
  {
272
  "epoch": 1.22,
273
  "learning_rate": 3.289334741288279e-05,
274
- "loss": 0.4295,
275
  "step": 430
276
  },
277
  {
278
  "epoch": 1.25,
279
  "learning_rate": 3.236536430834213e-05,
280
- "loss": 0.4617,
281
  "step": 440
282
  },
283
  {
284
  "epoch": 1.28,
285
  "learning_rate": 3.183738120380148e-05,
286
- "loss": 0.4628,
287
  "step": 450
288
  },
289
  {
290
  "epoch": 1.31,
291
  "learning_rate": 3.130939809926082e-05,
292
- "loss": 0.4402,
293
  "step": 460
294
  },
295
  {
296
  "epoch": 1.34,
297
  "learning_rate": 3.078141499472017e-05,
298
- "loss": 0.4416,
299
  "step": 470
300
  },
301
  {
302
  "epoch": 1.36,
303
  "learning_rate": 3.0253431890179517e-05,
304
- "loss": 0.4297,
305
  "step": 480
306
  },
307
  {
308
  "epoch": 1.39,
309
  "learning_rate": 2.972544878563886e-05,
310
- "loss": 0.4621,
311
  "step": 490
312
  },
313
  {
314
  "epoch": 1.42,
315
  "learning_rate": 2.9197465681098207e-05,
316
- "loss": 0.4076,
317
  "step": 500
318
  },
319
  {
320
  "epoch": 1.45,
321
  "learning_rate": 2.8669482576557548e-05,
322
- "loss": 0.4724,
323
  "step": 510
324
  },
325
  {
326
  "epoch": 1.48,
327
  "learning_rate": 2.8141499472016898e-05,
328
- "loss": 0.4374,
329
  "step": 520
330
  },
331
  {
332
  "epoch": 1.51,
333
  "learning_rate": 2.7613516367476245e-05,
334
- "loss": 0.4335,
335
  "step": 530
336
  },
337
  {
338
  "epoch": 1.54,
339
  "learning_rate": 2.7085533262935585e-05,
340
- "loss": 0.3932,
341
  "step": 540
342
  },
343
  {
344
  "epoch": 1.56,
345
  "learning_rate": 2.6557550158394935e-05,
346
- "loss": 0.435,
347
  "step": 550
348
  },
349
  {
350
  "epoch": 1.59,
351
  "learning_rate": 2.6029567053854276e-05,
352
- "loss": 0.4322,
353
  "step": 560
354
  },
355
  {
356
  "epoch": 1.62,
357
  "learning_rate": 2.5501583949313622e-05,
358
- "loss": 0.419,
359
  "step": 570
360
  },
361
  {
362
  "epoch": 1.65,
363
  "learning_rate": 2.497360084477297e-05,
364
- "loss": 0.4331,
365
  "step": 580
366
  },
367
  {
368
  "epoch": 1.68,
369
  "learning_rate": 2.4445617740232313e-05,
370
- "loss": 0.438,
371
  "step": 590
372
  },
373
  {
374
  "epoch": 1.71,
375
  "learning_rate": 2.391763463569166e-05,
376
- "loss": 0.3929,
377
  "step": 600
378
  },
379
  {
380
  "epoch": 1.73,
381
  "learning_rate": 2.3389651531151003e-05,
382
- "loss": 0.4012,
383
  "step": 610
384
  },
385
  {
386
  "epoch": 1.76,
387
  "learning_rate": 2.286166842661035e-05,
388
- "loss": 0.3926,
389
  "step": 620
390
  },
391
  {
392
  "epoch": 1.79,
393
  "learning_rate": 2.2333685322069694e-05,
394
- "loss": 0.4396,
395
  "step": 630
396
  },
397
  {
398
  "epoch": 1.82,
399
  "learning_rate": 2.180570221752904e-05,
400
- "loss": 0.3653,
401
  "step": 640
402
  },
403
  {
404
  "epoch": 1.85,
405
  "learning_rate": 2.1277719112988384e-05,
406
- "loss": 0.4544,
407
  "step": 650
408
  },
409
  {
410
  "epoch": 1.88,
411
  "learning_rate": 2.074973600844773e-05,
412
- "loss": 0.3983,
413
  "step": 660
414
  },
415
  {
416
  "epoch": 1.9,
417
  "learning_rate": 2.0221752903907075e-05,
418
- "loss": 0.406,
419
  "step": 670
420
  },
421
  {
422
  "epoch": 1.93,
423
  "learning_rate": 1.9693769799366422e-05,
424
- "loss": 0.3858,
425
  "step": 680
426
  },
427
  {
428
  "epoch": 1.96,
429
  "learning_rate": 1.9165786694825765e-05,
430
- "loss": 0.3995,
431
  "step": 690
432
  },
433
  {
434
  "epoch": 1.99,
435
  "learning_rate": 1.863780359028511e-05,
436
- "loss": 0.406,
437
  "step": 700
438
  },
439
  {
440
  "epoch": 2.0,
441
  "eval_accuracy": 0.9652,
442
- "eval_loss": 0.1063823476433754,
443
- "eval_runtime": 30.989,
444
- "eval_samples_per_second": 161.347,
445
- "eval_steps_per_second": 5.066,
446
  "step": 703
447
  },
448
  {
449
  "epoch": 2.02,
450
  "learning_rate": 1.810982048574446e-05,
451
- "loss": 0.3672,
452
  "step": 710
453
  },
454
  {
455
  "epoch": 2.05,
456
  "learning_rate": 1.7581837381203803e-05,
457
- "loss": 0.3513,
458
  "step": 720
459
  },
460
  {
461
  "epoch": 2.08,
462
  "learning_rate": 1.7053854276663146e-05,
463
- "loss": 0.3643,
464
  "step": 730
465
  },
466
  {
467
  "epoch": 2.1,
468
  "learning_rate": 1.6525871172122493e-05,
469
- "loss": 0.3528,
470
  "step": 740
471
  },
472
  {
473
  "epoch": 2.13,
474
  "learning_rate": 1.5997888067581837e-05,
475
- "loss": 0.3633,
476
  "step": 750
477
  },
478
  {
479
  "epoch": 2.16,
480
  "learning_rate": 1.5469904963041184e-05,
481
- "loss": 0.3427,
482
  "step": 760
483
  },
484
  {
485
  "epoch": 2.19,
486
  "learning_rate": 1.4941921858500529e-05,
487
- "loss": 0.3498,
488
  "step": 770
489
  },
490
  {
491
  "epoch": 2.22,
492
  "learning_rate": 1.4413938753959874e-05,
493
- "loss": 0.3615,
494
  "step": 780
495
  },
496
  {
497
  "epoch": 2.25,
498
  "learning_rate": 1.388595564941922e-05,
499
- "loss": 0.3253,
500
  "step": 790
501
  },
502
  {
503
  "epoch": 2.27,
504
  "learning_rate": 1.3357972544878563e-05,
505
- "loss": 0.3979,
506
  "step": 800
507
  },
508
  {
509
  "epoch": 2.3,
510
  "learning_rate": 1.2829989440337912e-05,
511
- "loss": 0.4142,
512
  "step": 810
513
  },
514
  {
515
  "epoch": 2.33,
516
  "learning_rate": 1.2302006335797255e-05,
517
- "loss": 0.3514,
518
  "step": 820
519
  },
520
  {
521
  "epoch": 2.36,
522
  "learning_rate": 1.17740232312566e-05,
523
- "loss": 0.4019,
524
  "step": 830
525
  },
526
  {
527
  "epoch": 2.39,
528
  "learning_rate": 1.1246040126715946e-05,
529
- "loss": 0.3533,
530
  "step": 840
531
  },
532
  {
533
  "epoch": 2.42,
534
  "learning_rate": 1.0718057022175291e-05,
535
- "loss": 0.3902,
536
  "step": 850
537
  },
538
  {
539
  "epoch": 2.44,
540
  "learning_rate": 1.0190073917634636e-05,
541
- "loss": 0.4102,
542
  "step": 860
543
  },
544
  {
545
  "epoch": 2.47,
546
  "learning_rate": 9.662090813093982e-06,
547
- "loss": 0.3479,
548
  "step": 870
549
  },
550
  {
551
  "epoch": 2.5,
552
  "learning_rate": 9.134107708553327e-06,
553
- "loss": 0.3473,
554
  "step": 880
555
  },
556
  {
557
  "epoch": 2.53,
558
  "learning_rate": 8.606124604012672e-06,
559
- "loss": 0.3691,
560
  "step": 890
561
  },
562
  {
563
  "epoch": 2.56,
564
  "learning_rate": 8.078141499472017e-06,
565
- "loss": 0.3778,
566
  "step": 900
567
  },
568
  {
569
  "epoch": 2.59,
570
  "learning_rate": 7.5501583949313625e-06,
571
- "loss": 0.3945,
572
  "step": 910
573
  },
574
  {
575
  "epoch": 2.62,
576
  "learning_rate": 7.022175290390708e-06,
577
- "loss": 0.3241,
578
  "step": 920
579
  },
580
  {
581
  "epoch": 2.64,
582
  "learning_rate": 6.494192185850054e-06,
583
- "loss": 0.3521,
584
  "step": 930
585
  },
586
  {
587
  "epoch": 2.67,
588
  "learning_rate": 5.966209081309398e-06,
589
- "loss": 0.3472,
590
  "step": 940
591
  },
592
  {
593
  "epoch": 2.7,
594
  "learning_rate": 5.438225976768744e-06,
595
- "loss": 0.3409,
596
  "step": 950
597
  },
598
  {
599
  "epoch": 2.73,
600
  "learning_rate": 4.910242872228089e-06,
601
- "loss": 0.3255,
602
  "step": 960
603
  },
604
  {
605
  "epoch": 2.76,
606
  "learning_rate": 4.382259767687434e-06,
607
- "loss": 0.3295,
608
  "step": 970
609
  },
610
  {
611
  "epoch": 2.79,
612
  "learning_rate": 3.854276663146779e-06,
613
- "loss": 0.342,
614
  "step": 980
615
  },
616
  {
617
  "epoch": 2.81,
618
  "learning_rate": 3.326293558606125e-06,
619
- "loss": 0.3773,
620
  "step": 990
621
  },
622
  {
623
  "epoch": 2.84,
624
  "learning_rate": 2.79831045406547e-06,
625
- "loss": 0.3065,
626
  "step": 1000
627
  },
628
  {
629
  "epoch": 2.87,
630
  "learning_rate": 2.2703273495248154e-06,
631
- "loss": 0.3176,
632
  "step": 1010
633
  },
634
  {
635
  "epoch": 2.9,
636
  "learning_rate": 1.7423442449841606e-06,
637
- "loss": 0.3812,
638
  "step": 1020
639
  },
640
  {
641
  "epoch": 2.93,
642
  "learning_rate": 1.2143611404435059e-06,
643
- "loss": 0.3451,
644
  "step": 1030
645
  },
646
  {
647
  "epoch": 2.96,
648
  "learning_rate": 6.863780359028511e-07,
649
- "loss": 0.3498,
650
  "step": 1040
651
  },
652
  {
653
  "epoch": 2.99,
654
  "learning_rate": 1.5839493136219642e-07,
655
- "loss": 0.3475,
656
  "step": 1050
657
  },
658
  {
659
  "epoch": 2.99,
660
- "eval_accuracy": 0.9684,
661
- "eval_loss": 0.09927555173635483,
662
- "eval_runtime": 31.3702,
663
- "eval_samples_per_second": 159.387,
664
- "eval_steps_per_second": 5.005,
665
  "step": 1053
666
  },
667
  {
668
  "epoch": 2.99,
669
  "step": 1053,
670
  "total_flos": 3.3497451642252165e+18,
671
- "train_loss": 0.5528646907915077,
672
- "train_runtime": 2092.8132,
673
- "train_samples_per_second": 64.506,
674
- "train_steps_per_second": 0.503
675
  }
676
  ],
677
  "max_steps": 1053,
 
1
  {
2
+ "best_metric": 0.97,
3
  "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1053",
4
  "epoch": 2.9936034115138592,
5
  "global_step": 1053,
 
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 4.716981132075472e-06,
13
+ "loss": 2.3819,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.06,
18
  "learning_rate": 9.433962264150944e-06,
19
+ "loss": 2.3023,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.09,
24
  "learning_rate": 1.4150943396226415e-05,
25
+ "loss": 2.1803,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.11,
30
  "learning_rate": 1.8867924528301888e-05,
31
+ "loss": 1.9846,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.14,
36
  "learning_rate": 2.358490566037736e-05,
37
+ "loss": 1.743,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.17,
42
  "learning_rate": 2.830188679245283e-05,
43
+ "loss": 1.3872,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.2,
48
  "learning_rate": 3.30188679245283e-05,
49
+ "loss": 1.2176,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.23,
54
  "learning_rate": 3.7735849056603776e-05,
55
+ "loss": 1.0599,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.26,
60
  "learning_rate": 4.245283018867925e-05,
61
+ "loss": 0.9988,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.28,
66
  "learning_rate": 4.716981132075472e-05,
67
+ "loss": 0.9403,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.31,
72
  "learning_rate": 4.978880675818374e-05,
73
+ "loss": 0.8154,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.34,
78
  "learning_rate": 4.9260823653643085e-05,
79
+ "loss": 0.778,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.37,
84
  "learning_rate": 4.8732840549102435e-05,
85
+ "loss": 0.7254,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.4,
90
  "learning_rate": 4.820485744456177e-05,
91
+ "loss": 0.6687,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.43,
96
  "learning_rate": 4.767687434002112e-05,
97
+ "loss": 0.6841,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.45,
102
  "learning_rate": 4.7148891235480466e-05,
103
+ "loss": 0.6386,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.48,
108
  "learning_rate": 4.662090813093981e-05,
109
+ "loss": 0.6381,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.51,
114
  "learning_rate": 4.609292502639916e-05,
115
+ "loss": 0.6225,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.54,
120
  "learning_rate": 4.55649419218585e-05,
121
+ "loss": 0.58,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.57,
126
  "learning_rate": 4.503695881731785e-05,
127
+ "loss": 0.6368,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.6,
132
  "learning_rate": 4.45089757127772e-05,
133
+ "loss": 0.5942,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.63,
138
  "learning_rate": 4.398099260823654e-05,
139
+ "loss": 0.6018,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.65,
144
  "learning_rate": 4.3453009503695884e-05,
145
+ "loss": 0.5649,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.68,
150
  "learning_rate": 4.292502639915523e-05,
151
+ "loss": 0.5518,
152
  "step": 240
153
  },
154
  {
155
  "epoch": 0.71,
156
  "learning_rate": 4.239704329461457e-05,
157
+ "loss": 0.5486,
158
  "step": 250
159
  },
160
  {
161
  "epoch": 0.74,
162
  "learning_rate": 4.186906019007392e-05,
163
+ "loss": 0.5552,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.77,
168
  "learning_rate": 4.1341077085533265e-05,
169
+ "loss": 0.5624,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.8,
174
  "learning_rate": 4.081309398099261e-05,
175
+ "loss": 0.5557,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.82,
180
  "learning_rate": 4.028511087645195e-05,
181
+ "loss": 0.5145,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.85,
186
  "learning_rate": 3.97571277719113e-05,
187
+ "loss": 0.5246,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.88,
192
  "learning_rate": 3.9229144667370646e-05,
193
+ "loss": 0.4836,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.91,
198
  "learning_rate": 3.870116156282999e-05,
199
+ "loss": 0.5173,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.94,
204
  "learning_rate": 3.817317845828934e-05,
205
+ "loss": 0.4717,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.97,
210
  "learning_rate": 3.764519535374868e-05,
211
+ "loss": 0.4785,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 1.0,
216
  "learning_rate": 3.711721224920803e-05,
217
+ "loss": 0.5255,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 1.0,
222
+ "eval_accuracy": 0.9596,
223
+ "eval_loss": 0.12615099549293518,
224
+ "eval_runtime": 30.269,
225
+ "eval_samples_per_second": 165.185,
226
+ "eval_steps_per_second": 5.187,
227
  "step": 351
228
  },
229
  {
230
  "epoch": 1.02,
231
  "learning_rate": 3.658922914466738e-05,
232
+ "loss": 0.4642,
233
  "step": 360
234
  },
235
  {
236
  "epoch": 1.05,
237
  "learning_rate": 3.6061246040126714e-05,
238
+ "loss": 0.5111,
239
  "step": 370
240
  },
241
  {
242
  "epoch": 1.08,
243
  "learning_rate": 3.5533262935586064e-05,
244
+ "loss": 0.4744,
245
  "step": 380
246
  },
247
  {
248
  "epoch": 1.11,
249
  "learning_rate": 3.500527983104541e-05,
250
+ "loss": 0.4522,
251
  "step": 390
252
  },
253
  {
254
  "epoch": 1.14,
255
  "learning_rate": 3.447729672650475e-05,
256
+ "loss": 0.4676,
257
  "step": 400
258
  },
259
  {
260
  "epoch": 1.17,
261
  "learning_rate": 3.3949313621964095e-05,
262
+ "loss": 0.4522,
263
  "step": 410
264
  },
265
  {
266
  "epoch": 1.19,
267
  "learning_rate": 3.3421330517423445e-05,
268
+ "loss": 0.4124,
269
  "step": 420
270
  },
271
  {
272
  "epoch": 1.22,
273
  "learning_rate": 3.289334741288279e-05,
274
+ "loss": 0.4454,
275
  "step": 430
276
  },
277
  {
278
  "epoch": 1.25,
279
  "learning_rate": 3.236536430834213e-05,
280
+ "loss": 0.4763,
281
  "step": 440
282
  },
283
  {
284
  "epoch": 1.28,
285
  "learning_rate": 3.183738120380148e-05,
286
+ "loss": 0.4478,
287
  "step": 450
288
  },
289
  {
290
  "epoch": 1.31,
291
  "learning_rate": 3.130939809926082e-05,
292
+ "loss": 0.4486,
293
  "step": 460
294
  },
295
  {
296
  "epoch": 1.34,
297
  "learning_rate": 3.078141499472017e-05,
298
+ "loss": 0.4103,
299
  "step": 470
300
  },
301
  {
302
  "epoch": 1.36,
303
  "learning_rate": 3.0253431890179517e-05,
304
+ "loss": 0.4655,
305
  "step": 480
306
  },
307
  {
308
  "epoch": 1.39,
309
  "learning_rate": 2.972544878563886e-05,
310
+ "loss": 0.4207,
311
  "step": 490
312
  },
313
  {
314
  "epoch": 1.42,
315
  "learning_rate": 2.9197465681098207e-05,
316
+ "loss": 0.5019,
317
  "step": 500
318
  },
319
  {
320
  "epoch": 1.45,
321
  "learning_rate": 2.8669482576557548e-05,
322
+ "loss": 0.4677,
323
  "step": 510
324
  },
325
  {
326
  "epoch": 1.48,
327
  "learning_rate": 2.8141499472016898e-05,
328
+ "loss": 0.4332,
329
  "step": 520
330
  },
331
  {
332
  "epoch": 1.51,
333
  "learning_rate": 2.7613516367476245e-05,
334
+ "loss": 0.4122,
335
  "step": 530
336
  },
337
  {
338
  "epoch": 1.54,
339
  "learning_rate": 2.7085533262935585e-05,
340
+ "loss": 0.4274,
341
  "step": 540
342
  },
343
  {
344
  "epoch": 1.56,
345
  "learning_rate": 2.6557550158394935e-05,
346
+ "loss": 0.4283,
347
  "step": 550
348
  },
349
  {
350
  "epoch": 1.59,
351
  "learning_rate": 2.6029567053854276e-05,
352
+ "loss": 0.3967,
353
  "step": 560
354
  },
355
  {
356
  "epoch": 1.62,
357
  "learning_rate": 2.5501583949313622e-05,
358
+ "loss": 0.4635,
359
  "step": 570
360
  },
361
  {
362
  "epoch": 1.65,
363
  "learning_rate": 2.497360084477297e-05,
364
+ "loss": 0.3793,
365
  "step": 580
366
  },
367
  {
368
  "epoch": 1.68,
369
  "learning_rate": 2.4445617740232313e-05,
370
+ "loss": 0.3969,
371
  "step": 590
372
  },
373
  {
374
  "epoch": 1.71,
375
  "learning_rate": 2.391763463569166e-05,
376
+ "loss": 0.3831,
377
  "step": 600
378
  },
379
  {
380
  "epoch": 1.73,
381
  "learning_rate": 2.3389651531151003e-05,
382
+ "loss": 0.4043,
383
  "step": 610
384
  },
385
  {
386
  "epoch": 1.76,
387
  "learning_rate": 2.286166842661035e-05,
388
+ "loss": 0.4385,
389
  "step": 620
390
  },
391
  {
392
  "epoch": 1.79,
393
  "learning_rate": 2.2333685322069694e-05,
394
+ "loss": 0.3998,
395
  "step": 630
396
  },
397
  {
398
  "epoch": 1.82,
399
  "learning_rate": 2.180570221752904e-05,
400
+ "loss": 0.4186,
401
  "step": 640
402
  },
403
  {
404
  "epoch": 1.85,
405
  "learning_rate": 2.1277719112988384e-05,
406
+ "loss": 0.404,
407
  "step": 650
408
  },
409
  {
410
  "epoch": 1.88,
411
  "learning_rate": 2.074973600844773e-05,
412
+ "loss": 0.4175,
413
  "step": 660
414
  },
415
  {
416
  "epoch": 1.9,
417
  "learning_rate": 2.0221752903907075e-05,
418
+ "loss": 0.3901,
419
  "step": 670
420
  },
421
  {
422
  "epoch": 1.93,
423
  "learning_rate": 1.9693769799366422e-05,
424
+ "loss": 0.4074,
425
  "step": 680
426
  },
427
  {
428
  "epoch": 1.96,
429
  "learning_rate": 1.9165786694825765e-05,
430
+ "loss": 0.3917,
431
  "step": 690
432
  },
433
  {
434
  "epoch": 1.99,
435
  "learning_rate": 1.863780359028511e-05,
436
+ "loss": 0.3808,
437
  "step": 700
438
  },
439
  {
440
  "epoch": 2.0,
441
  "eval_accuracy": 0.9652,
442
+ "eval_loss": 0.10313060134649277,
443
+ "eval_runtime": 30.4698,
444
+ "eval_samples_per_second": 164.097,
445
+ "eval_steps_per_second": 5.153,
446
  "step": 703
447
  },
448
  {
449
  "epoch": 2.02,
450
  "learning_rate": 1.810982048574446e-05,
451
+ "loss": 0.3707,
452
  "step": 710
453
  },
454
  {
455
  "epoch": 2.05,
456
  "learning_rate": 1.7581837381203803e-05,
457
+ "loss": 0.3597,
458
  "step": 720
459
  },
460
  {
461
  "epoch": 2.08,
462
  "learning_rate": 1.7053854276663146e-05,
463
+ "loss": 0.3906,
464
  "step": 730
465
  },
466
  {
467
  "epoch": 2.1,
468
  "learning_rate": 1.6525871172122493e-05,
469
+ "loss": 0.381,
470
  "step": 740
471
  },
472
  {
473
  "epoch": 2.13,
474
  "learning_rate": 1.5997888067581837e-05,
475
+ "loss": 0.3813,
476
  "step": 750
477
  },
478
  {
479
  "epoch": 2.16,
480
  "learning_rate": 1.5469904963041184e-05,
481
+ "loss": 0.3512,
482
  "step": 760
483
  },
484
  {
485
  "epoch": 2.19,
486
  "learning_rate": 1.4941921858500529e-05,
487
+ "loss": 0.3623,
488
  "step": 770
489
  },
490
  {
491
  "epoch": 2.22,
492
  "learning_rate": 1.4413938753959874e-05,
493
+ "loss": 0.3779,
494
  "step": 780
495
  },
496
  {
497
  "epoch": 2.25,
498
  "learning_rate": 1.388595564941922e-05,
499
+ "loss": 0.3455,
500
  "step": 790
501
  },
502
  {
503
  "epoch": 2.27,
504
  "learning_rate": 1.3357972544878563e-05,
505
+ "loss": 0.3627,
506
  "step": 800
507
  },
508
  {
509
  "epoch": 2.3,
510
  "learning_rate": 1.2829989440337912e-05,
511
+ "loss": 0.3563,
512
  "step": 810
513
  },
514
  {
515
  "epoch": 2.33,
516
  "learning_rate": 1.2302006335797255e-05,
517
+ "loss": 0.3656,
518
  "step": 820
519
  },
520
  {
521
  "epoch": 2.36,
522
  "learning_rate": 1.17740232312566e-05,
523
+ "loss": 0.4006,
524
  "step": 830
525
  },
526
  {
527
  "epoch": 2.39,
528
  "learning_rate": 1.1246040126715946e-05,
529
+ "loss": 0.3738,
530
  "step": 840
531
  },
532
  {
533
  "epoch": 2.42,
534
  "learning_rate": 1.0718057022175291e-05,
535
+ "loss": 0.3639,
536
  "step": 850
537
  },
538
  {
539
  "epoch": 2.44,
540
  "learning_rate": 1.0190073917634636e-05,
541
+ "loss": 0.3911,
542
  "step": 860
543
  },
544
  {
545
  "epoch": 2.47,
546
  "learning_rate": 9.662090813093982e-06,
547
+ "loss": 0.368,
548
  "step": 870
549
  },
550
  {
551
  "epoch": 2.5,
552
  "learning_rate": 9.134107708553327e-06,
553
+ "loss": 0.352,
554
  "step": 880
555
  },
556
  {
557
  "epoch": 2.53,
558
  "learning_rate": 8.606124604012672e-06,
559
+ "loss": 0.3726,
560
  "step": 890
561
  },
562
  {
563
  "epoch": 2.56,
564
  "learning_rate": 8.078141499472017e-06,
565
+ "loss": 0.3765,
566
  "step": 900
567
  },
568
  {
569
  "epoch": 2.59,
570
  "learning_rate": 7.5501583949313625e-06,
571
+ "loss": 0.3967,
572
  "step": 910
573
  },
574
  {
575
  "epoch": 2.62,
576
  "learning_rate": 7.022175290390708e-06,
577
+ "loss": 0.3786,
578
  "step": 920
579
  },
580
  {
581
  "epoch": 2.64,
582
  "learning_rate": 6.494192185850054e-06,
583
+ "loss": 0.3749,
584
  "step": 930
585
  },
586
  {
587
  "epoch": 2.67,
588
  "learning_rate": 5.966209081309398e-06,
589
+ "loss": 0.4022,
590
  "step": 940
591
  },
592
  {
593
  "epoch": 2.7,
594
  "learning_rate": 5.438225976768744e-06,
595
+ "loss": 0.3819,
596
  "step": 950
597
  },
598
  {
599
  "epoch": 2.73,
600
  "learning_rate": 4.910242872228089e-06,
601
+ "loss": 0.3228,
602
  "step": 960
603
  },
604
  {
605
  "epoch": 2.76,
606
  "learning_rate": 4.382259767687434e-06,
607
+ "loss": 0.3876,
608
  "step": 970
609
  },
610
  {
611
  "epoch": 2.79,
612
  "learning_rate": 3.854276663146779e-06,
613
+ "loss": 0.4033,
614
  "step": 980
615
  },
616
  {
617
  "epoch": 2.81,
618
  "learning_rate": 3.326293558606125e-06,
619
+ "loss": 0.3512,
620
  "step": 990
621
  },
622
  {
623
  "epoch": 2.84,
624
  "learning_rate": 2.79831045406547e-06,
625
+ "loss": 0.3261,
626
  "step": 1000
627
  },
628
  {
629
  "epoch": 2.87,
630
  "learning_rate": 2.2703273495248154e-06,
631
+ "loss": 0.3584,
632
  "step": 1010
633
  },
634
  {
635
  "epoch": 2.9,
636
  "learning_rate": 1.7423442449841606e-06,
637
+ "loss": 0.3093,
638
  "step": 1020
639
  },
640
  {
641
  "epoch": 2.93,
642
  "learning_rate": 1.2143611404435059e-06,
643
+ "loss": 0.3498,
644
  "step": 1030
645
  },
646
  {
647
  "epoch": 2.96,
648
  "learning_rate": 6.863780359028511e-07,
649
+ "loss": 0.3284,
650
  "step": 1040
651
  },
652
  {
653
  "epoch": 2.99,
654
  "learning_rate": 1.5839493136219642e-07,
655
+ "loss": 0.3268,
656
  "step": 1050
657
  },
658
  {
659
  "epoch": 2.99,
660
+ "eval_accuracy": 0.97,
661
+ "eval_loss": 0.08928382396697998,
662
+ "eval_runtime": 30.2429,
663
+ "eval_samples_per_second": 165.328,
664
+ "eval_steps_per_second": 5.191,
665
  "step": 1053
666
  },
667
  {
668
  "epoch": 2.99,
669
  "step": 1053,
670
  "total_flos": 3.3497451642252165e+18,
671
+ "train_loss": 0.5605603609895661,
672
+ "train_runtime": 2054.8417,
673
+ "train_samples_per_second": 65.698,
674
+ "train_steps_per_second": 0.512
675
  }
676
  ],
677
  "max_steps": 1053,