mansee commited on
Commit
78a7852
1 Parent(s): 15f8bbe

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +11 -11
  2. eval_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +792 -276
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.7400272758267985,
4
- "eval_loss": 0.4305528998374939,
5
- "eval_runtime": 43.766,
6
- "eval_samples_per_second": 134.031,
7
- "eval_steps_per_second": 4.204,
8
- "total_flos": 3.932039490764636e+18,
9
- "train_loss": 0.6422953501488399,
10
- "train_runtime": 2631.7494,
11
- "train_samples_per_second": 60.174,
12
- "train_steps_per_second": 0.47
13
  }
 
1
  {
2
+ "epoch": 4.99,
3
+ "eval_accuracy": 0.7797476986021139,
4
+ "eval_loss": 0.3965856432914734,
5
+ "eval_runtime": 44.3045,
6
+ "eval_samples_per_second": 132.402,
7
+ "eval_steps_per_second": 4.153,
8
+ "total_flos": 6.553200292249928e+18,
9
+ "train_loss": 0.4588474294514332,
10
+ "train_runtime": 4338.0094,
11
+ "train_samples_per_second": 60.844,
12
+ "train_steps_per_second": 0.475
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.7400272758267985,
4
- "eval_loss": 0.4305528998374939,
5
- "eval_runtime": 43.766,
6
- "eval_samples_per_second": 134.031,
7
- "eval_steps_per_second": 4.204
8
  }
 
1
  {
2
+ "epoch": 4.99,
3
+ "eval_accuracy": 0.7797476986021139,
4
+ "eval_loss": 0.3965856432914734,
5
+ "eval_runtime": 44.3045,
6
+ "eval_samples_per_second": 132.402,
7
+ "eval_steps_per_second": 4.153
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 3.932039490764636e+18,
4
- "train_loss": 0.6422953501488399,
5
- "train_runtime": 2631.7494,
6
- "train_samples_per_second": 60.174,
7
- "train_steps_per_second": 0.47
8
  }
 
1
  {
2
+ "epoch": 4.99,
3
+ "total_flos": 6.553200292249928e+18,
4
+ "train_loss": 0.4588474294514332,
5
+ "train_runtime": 4338.0094,
6
+ "train_samples_per_second": 60.844,
7
+ "train_steps_per_second": 0.475
8
  }
trainer_state.json CHANGED
@@ -1,793 +1,1309 @@
1
  {
2
- "best_metric": 0.7400272758267985,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-img_orientation/checkpoint-1236",
4
- "epoch": 2.996363636363636,
5
  "eval_steps": 500,
6
- "global_step": 1236,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
- "learning_rate": 4.032258064516129e-06,
14
- "loss": 1.4924,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.05,
19
- "learning_rate": 8.064516129032258e-06,
20
- "loss": 1.4872,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.07,
25
- "learning_rate": 1.2096774193548388e-05,
26
- "loss": 1.4135,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.1,
31
- "learning_rate": 1.6129032258064517e-05,
32
- "loss": 1.3284,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.12,
37
- "learning_rate": 2.0161290322580645e-05,
38
- "loss": 1.2488,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.15,
43
- "learning_rate": 2.4193548387096777e-05,
44
- "loss": 1.1429,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.17,
49
- "learning_rate": 2.822580645161291e-05,
50
- "loss": 1.0344,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.19,
55
- "learning_rate": 3.2258064516129034e-05,
56
- "loss": 0.9642,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.22,
61
- "learning_rate": 3.6290322580645165e-05,
62
- "loss": 0.9026,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.24,
67
- "learning_rate": 4.032258064516129e-05,
68
- "loss": 0.8296,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.27,
73
- "learning_rate": 4.435483870967742e-05,
74
- "loss": 0.8004,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.29,
79
- "learning_rate": 4.8387096774193554e-05,
80
- "loss": 0.7866,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.32,
85
- "learning_rate": 4.973021582733813e-05,
86
- "loss": 0.8019,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.34,
91
- "learning_rate": 4.9280575539568345e-05,
92
- "loss": 0.7517,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.36,
97
- "learning_rate": 4.8830935251798564e-05,
98
- "loss": 0.7859,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.39,
103
- "learning_rate": 4.838129496402878e-05,
104
- "loss": 0.7395,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.41,
109
- "learning_rate": 4.7931654676259e-05,
110
- "loss": 0.7045,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.44,
115
- "learning_rate": 4.748201438848921e-05,
116
- "loss": 0.7376,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.46,
121
- "learning_rate": 4.703237410071943e-05,
122
- "loss": 0.7126,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.48,
127
- "learning_rate": 4.658273381294964e-05,
128
- "loss": 0.7137,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.51,
133
- "learning_rate": 4.613309352517986e-05,
134
- "loss": 0.6817,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.53,
139
- "learning_rate": 4.5683453237410076e-05,
140
- "loss": 0.6957,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.56,
145
- "learning_rate": 4.523381294964029e-05,
146
- "loss": 0.7066,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.58,
151
- "learning_rate": 4.47841726618705e-05,
152
- "loss": 0.6956,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.61,
157
- "learning_rate": 4.433453237410072e-05,
158
- "loss": 0.6906,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.63,
163
- "learning_rate": 4.3884892086330935e-05,
164
- "loss": 0.6428,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.65,
169
- "learning_rate": 4.3435251798561155e-05,
170
- "loss": 0.6702,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 0.68,
175
- "learning_rate": 4.298561151079137e-05,
176
- "loss": 0.6601,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 0.7,
181
- "learning_rate": 4.253597122302159e-05,
182
- "loss": 0.6324,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 0.73,
187
- "learning_rate": 4.20863309352518e-05,
188
- "loss": 0.6677,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 0.75,
193
- "learning_rate": 4.163669064748202e-05,
194
- "loss": 0.691,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 0.78,
199
- "learning_rate": 4.1187050359712234e-05,
200
- "loss": 0.6523,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 0.8,
205
- "learning_rate": 4.073741007194245e-05,
206
- "loss": 0.615,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 0.82,
211
- "learning_rate": 4.0287769784172666e-05,
212
- "loss": 0.6322,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 0.85,
217
- "learning_rate": 3.983812949640288e-05,
218
- "loss": 0.6361,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 0.87,
223
- "learning_rate": 3.938848920863309e-05,
224
- "loss": 0.6311,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 0.9,
229
- "learning_rate": 3.893884892086331e-05,
230
- "loss": 0.667,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 0.92,
235
- "learning_rate": 3.8489208633093525e-05,
236
- "loss": 0.6221,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 0.95,
241
- "learning_rate": 3.8039568345323745e-05,
242
- "loss": 0.6288,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 0.97,
247
- "learning_rate": 3.758992805755396e-05,
248
- "loss": 0.6334,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 0.99,
253
- "learning_rate": 3.714028776978418e-05,
254
- "loss": 0.632,
255
  "step": 410
256
  },
257
  {
258
  "epoch": 1.0,
259
- "eval_accuracy": 0.706273440163655,
260
- "eval_loss": 0.502089262008667,
261
- "eval_runtime": 45.3111,
262
- "eval_samples_per_second": 129.461,
263
- "eval_steps_per_second": 4.061,
264
  "step": 412
265
  },
266
  {
267
  "epoch": 1.02,
268
- "learning_rate": 3.669064748201439e-05,
269
- "loss": 0.6121,
270
  "step": 420
271
  },
272
  {
273
  "epoch": 1.04,
274
- "learning_rate": 3.624100719424461e-05,
275
- "loss": 0.5973,
276
  "step": 430
277
  },
278
  {
279
  "epoch": 1.07,
280
- "learning_rate": 3.5791366906474824e-05,
281
- "loss": 0.6158,
282
  "step": 440
283
  },
284
  {
285
  "epoch": 1.09,
286
- "learning_rate": 3.534172661870504e-05,
287
- "loss": 0.5954,
288
  "step": 450
289
  },
290
  {
291
  "epoch": 1.12,
292
- "learning_rate": 3.489208633093525e-05,
293
- "loss": 0.6073,
294
  "step": 460
295
  },
296
  {
297
  "epoch": 1.14,
298
- "learning_rate": 3.444244604316547e-05,
299
- "loss": 0.6001,
300
  "step": 470
301
  },
302
  {
303
  "epoch": 1.16,
304
- "learning_rate": 3.399280575539568e-05,
305
- "loss": 0.6057,
306
  "step": 480
307
  },
308
  {
309
  "epoch": 1.19,
310
- "learning_rate": 3.35431654676259e-05,
311
- "loss": 0.6111,
312
  "step": 490
313
  },
314
  {
315
  "epoch": 1.21,
316
- "learning_rate": 3.3093525179856116e-05,
317
- "loss": 0.6464,
318
  "step": 500
319
  },
320
  {
321
  "epoch": 1.24,
322
- "learning_rate": 3.2643884892086335e-05,
323
- "loss": 0.6279,
324
  "step": 510
325
  },
326
  {
327
  "epoch": 1.26,
328
- "learning_rate": 3.219424460431655e-05,
329
- "loss": 0.5821,
330
  "step": 520
331
  },
332
  {
333
  "epoch": 1.28,
334
- "learning_rate": 3.174460431654677e-05,
335
- "loss": 0.5661,
336
  "step": 530
337
  },
338
  {
339
  "epoch": 1.31,
340
- "learning_rate": 3.129496402877698e-05,
341
- "loss": 0.6214,
342
  "step": 540
343
  },
344
  {
345
  "epoch": 1.33,
346
- "learning_rate": 3.08453237410072e-05,
347
- "loss": 0.5995,
348
  "step": 550
349
  },
350
  {
351
  "epoch": 1.36,
352
- "learning_rate": 3.039568345323741e-05,
353
- "loss": 0.5977,
354
  "step": 560
355
  },
356
  {
357
  "epoch": 1.38,
358
- "learning_rate": 2.994604316546763e-05,
359
- "loss": 0.5727,
360
  "step": 570
361
  },
362
  {
363
  "epoch": 1.41,
364
- "learning_rate": 2.9496402877697844e-05,
365
- "loss": 0.5763,
366
  "step": 580
367
  },
368
  {
369
  "epoch": 1.43,
370
- "learning_rate": 2.904676258992806e-05,
371
- "loss": 0.5922,
372
  "step": 590
373
  },
374
  {
375
  "epoch": 1.45,
376
- "learning_rate": 2.8597122302158273e-05,
377
- "loss": 0.5991,
378
  "step": 600
379
  },
380
  {
381
  "epoch": 1.48,
382
- "learning_rate": 2.8147482014388493e-05,
383
- "loss": 0.5652,
384
  "step": 610
385
  },
386
  {
387
  "epoch": 1.5,
388
- "learning_rate": 2.7697841726618706e-05,
389
- "loss": 0.5861,
390
  "step": 620
391
  },
392
  {
393
  "epoch": 1.53,
394
- "learning_rate": 2.7248201438848926e-05,
395
- "loss": 0.5911,
396
  "step": 630
397
  },
398
  {
399
  "epoch": 1.55,
400
- "learning_rate": 2.679856115107914e-05,
401
- "loss": 0.5723,
402
  "step": 640
403
  },
404
  {
405
  "epoch": 1.58,
406
- "learning_rate": 2.6348920863309355e-05,
407
- "loss": 0.5803,
408
  "step": 650
409
  },
410
  {
411
  "epoch": 1.6,
412
- "learning_rate": 2.589928057553957e-05,
413
- "loss": 0.6001,
414
  "step": 660
415
  },
416
  {
417
  "epoch": 1.62,
418
- "learning_rate": 2.5449640287769788e-05,
419
- "loss": 0.5783,
420
  "step": 670
421
  },
422
  {
423
  "epoch": 1.65,
424
- "learning_rate": 2.5e-05,
425
- "loss": 0.5663,
426
  "step": 680
427
  },
428
  {
429
  "epoch": 1.67,
430
- "learning_rate": 2.4550359712230218e-05,
431
- "loss": 0.5916,
432
  "step": 690
433
  },
434
  {
435
  "epoch": 1.7,
436
- "learning_rate": 2.4100719424460434e-05,
437
- "loss": 0.5655,
438
  "step": 700
439
  },
440
  {
441
  "epoch": 1.72,
442
- "learning_rate": 2.3651079136690647e-05,
443
- "loss": 0.5553,
444
  "step": 710
445
  },
446
  {
447
  "epoch": 1.75,
448
- "learning_rate": 2.3201438848920864e-05,
449
- "loss": 0.5736,
450
  "step": 720
451
  },
452
  {
453
  "epoch": 1.77,
454
- "learning_rate": 2.275179856115108e-05,
455
- "loss": 0.5705,
456
  "step": 730
457
  },
458
  {
459
  "epoch": 1.79,
460
- "learning_rate": 2.2302158273381296e-05,
461
- "loss": 0.5744,
462
  "step": 740
463
  },
464
  {
465
  "epoch": 1.82,
466
- "learning_rate": 2.1852517985611513e-05,
467
- "loss": 0.6035,
468
  "step": 750
469
  },
470
  {
471
  "epoch": 1.84,
472
- "learning_rate": 2.140287769784173e-05,
473
- "loss": 0.5597,
474
  "step": 760
475
  },
476
  {
477
  "epoch": 1.87,
478
- "learning_rate": 2.0953237410071942e-05,
479
- "loss": 0.5818,
480
  "step": 770
481
  },
482
  {
483
  "epoch": 1.89,
484
- "learning_rate": 2.050359712230216e-05,
485
- "loss": 0.5626,
486
  "step": 780
487
  },
488
  {
489
  "epoch": 1.92,
490
- "learning_rate": 2.0053956834532375e-05,
491
- "loss": 0.5592,
492
  "step": 790
493
  },
494
  {
495
  "epoch": 1.94,
496
- "learning_rate": 1.960431654676259e-05,
497
- "loss": 0.5701,
498
  "step": 800
499
  },
500
  {
501
  "epoch": 1.96,
502
- "learning_rate": 1.9154676258992808e-05,
503
- "loss": 0.6025,
504
  "step": 810
505
  },
506
  {
507
  "epoch": 1.99,
508
- "learning_rate": 1.8705035971223024e-05,
509
- "loss": 0.576,
510
  "step": 820
511
  },
512
  {
513
  "epoch": 2.0,
514
- "eval_accuracy": 0.7269007841800205,
515
- "eval_loss": 0.4507100582122803,
516
- "eval_runtime": 44.6192,
517
- "eval_samples_per_second": 131.468,
518
- "eval_steps_per_second": 4.124,
519
  "step": 825
520
  },
521
  {
522
  "epoch": 2.01,
523
- "learning_rate": 1.8255395683453237e-05,
524
- "loss": 0.5507,
525
  "step": 830
526
  },
527
  {
528
  "epoch": 2.04,
529
- "learning_rate": 1.7805755395683454e-05,
530
- "loss": 0.5339,
531
  "step": 840
532
  },
533
  {
534
  "epoch": 2.06,
535
- "learning_rate": 1.735611510791367e-05,
536
- "loss": 0.5527,
537
  "step": 850
538
  },
539
  {
540
  "epoch": 2.08,
541
- "learning_rate": 1.6906474820143887e-05,
542
- "loss": 0.5221,
543
  "step": 860
544
  },
545
  {
546
  "epoch": 2.11,
547
- "learning_rate": 1.6456834532374103e-05,
548
- "loss": 0.555,
549
  "step": 870
550
  },
551
  {
552
  "epoch": 2.13,
553
- "learning_rate": 1.6007194244604316e-05,
554
- "loss": 0.5327,
555
  "step": 880
556
  },
557
  {
558
  "epoch": 2.16,
559
- "learning_rate": 1.5557553956834533e-05,
560
- "loss": 0.548,
561
  "step": 890
562
  },
563
  {
564
  "epoch": 2.18,
565
- "learning_rate": 1.5107913669064749e-05,
566
- "loss": 0.5315,
567
  "step": 900
568
  },
569
  {
570
  "epoch": 2.21,
571
- "learning_rate": 1.4658273381294965e-05,
572
- "loss": 0.5132,
573
  "step": 910
574
  },
575
  {
576
  "epoch": 2.23,
577
- "learning_rate": 1.420863309352518e-05,
578
- "loss": 0.5468,
579
  "step": 920
580
  },
581
  {
582
  "epoch": 2.25,
583
- "learning_rate": 1.3758992805755397e-05,
584
- "loss": 0.5452,
585
  "step": 930
586
  },
587
  {
588
  "epoch": 2.28,
589
- "learning_rate": 1.3309352517985613e-05,
590
- "loss": 0.5466,
591
  "step": 940
592
  },
593
  {
594
  "epoch": 2.3,
595
- "learning_rate": 1.2859712230215828e-05,
596
- "loss": 0.5377,
597
  "step": 950
598
  },
599
  {
600
  "epoch": 2.33,
601
- "learning_rate": 1.2410071942446044e-05,
602
- "loss": 0.5369,
603
  "step": 960
604
  },
605
  {
606
  "epoch": 2.35,
607
- "learning_rate": 1.196043165467626e-05,
608
- "loss": 0.5192,
609
  "step": 970
610
  },
611
  {
612
  "epoch": 2.38,
613
- "learning_rate": 1.1510791366906475e-05,
614
- "loss": 0.5519,
615
  "step": 980
616
  },
617
  {
618
  "epoch": 2.4,
619
- "learning_rate": 1.1061151079136692e-05,
620
- "loss": 0.5333,
621
  "step": 990
622
  },
623
  {
624
  "epoch": 2.42,
625
- "learning_rate": 1.0611510791366908e-05,
626
- "loss": 0.5251,
627
  "step": 1000
628
  },
629
  {
630
  "epoch": 2.45,
631
- "learning_rate": 1.0161870503597123e-05,
632
- "loss": 0.5498,
633
  "step": 1010
634
  },
635
  {
636
  "epoch": 2.47,
637
- "learning_rate": 9.71223021582734e-06,
638
- "loss": 0.5381,
639
  "step": 1020
640
  },
641
  {
642
  "epoch": 2.5,
643
- "learning_rate": 9.262589928057554e-06,
644
- "loss": 0.5141,
645
  "step": 1030
646
  },
647
  {
648
  "epoch": 2.52,
649
- "learning_rate": 8.812949640287769e-06,
650
- "loss": 0.5204,
651
  "step": 1040
652
  },
653
  {
654
  "epoch": 2.55,
655
- "learning_rate": 8.363309352517985e-06,
656
- "loss": 0.5341,
657
  "step": 1050
658
  },
659
  {
660
  "epoch": 2.57,
661
- "learning_rate": 7.913669064748202e-06,
662
- "loss": 0.5161,
663
  "step": 1060
664
  },
665
  {
666
  "epoch": 2.59,
667
- "learning_rate": 7.464028776978417e-06,
668
- "loss": 0.5201,
669
  "step": 1070
670
  },
671
  {
672
  "epoch": 2.62,
673
- "learning_rate": 7.014388489208633e-06,
674
- "loss": 0.5348,
675
  "step": 1080
676
  },
677
  {
678
  "epoch": 2.64,
679
- "learning_rate": 6.5647482014388485e-06,
680
- "loss": 0.5353,
681
  "step": 1090
682
  },
683
  {
684
  "epoch": 2.67,
685
- "learning_rate": 6.115107913669065e-06,
686
- "loss": 0.5133,
687
  "step": 1100
688
  },
689
  {
690
  "epoch": 2.69,
691
- "learning_rate": 5.665467625899281e-06,
692
- "loss": 0.5529,
693
  "step": 1110
694
  },
695
  {
696
  "epoch": 2.72,
697
- "learning_rate": 5.215827338129497e-06,
698
- "loss": 0.5265,
699
  "step": 1120
700
  },
701
  {
702
  "epoch": 2.74,
703
- "learning_rate": 4.7661870503597125e-06,
704
- "loss": 0.4983,
705
  "step": 1130
706
  },
707
  {
708
  "epoch": 2.76,
709
- "learning_rate": 4.316546762589929e-06,
710
- "loss": 0.5152,
711
  "step": 1140
712
  },
713
  {
714
  "epoch": 2.79,
715
- "learning_rate": 3.8669064748201445e-06,
716
- "loss": 0.5268,
717
  "step": 1150
718
  },
719
  {
720
  "epoch": 2.81,
721
- "learning_rate": 3.41726618705036e-06,
722
- "loss": 0.5258,
723
  "step": 1160
724
  },
725
  {
726
  "epoch": 2.84,
727
- "learning_rate": 2.9676258992805756e-06,
728
- "loss": 0.5326,
729
  "step": 1170
730
  },
731
  {
732
  "epoch": 2.86,
733
- "learning_rate": 2.5179856115107916e-06,
734
- "loss": 0.5031,
735
  "step": 1180
736
  },
737
  {
738
  "epoch": 2.88,
739
- "learning_rate": 2.0683453237410072e-06,
740
- "loss": 0.5527,
741
  "step": 1190
742
  },
743
  {
744
  "epoch": 2.91,
745
- "learning_rate": 1.618705035971223e-06,
746
- "loss": 0.5632,
747
  "step": 1200
748
  },
749
  {
750
  "epoch": 2.93,
751
- "learning_rate": 1.169064748201439e-06,
752
- "loss": 0.5227,
753
  "step": 1210
754
  },
755
  {
756
  "epoch": 2.96,
757
- "learning_rate": 7.194244604316547e-07,
758
- "loss": 0.5281,
759
  "step": 1220
760
  },
761
  {
762
  "epoch": 2.98,
763
- "learning_rate": 2.6978417266187056e-07,
764
- "loss": 0.5102,
765
  "step": 1230
766
  },
767
  {
768
  "epoch": 3.0,
769
- "eval_accuracy": 0.7400272758267985,
770
- "eval_loss": 0.4305528998374939,
771
- "eval_runtime": 43.2946,
772
- "eval_samples_per_second": 135.49,
773
- "eval_steps_per_second": 4.25,
774
- "step": 1236
775
  },
776
  {
777
- "epoch": 3.0,
778
- "step": 1236,
779
- "total_flos": 3.932039490764636e+18,
780
- "train_loss": 0.6422953501488399,
781
- "train_runtime": 2631.7494,
782
- "train_samples_per_second": 60.174,
783
- "train_steps_per_second": 0.47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784
  }
785
  ],
786
  "logging_steps": 10,
787
- "max_steps": 1236,
788
- "num_train_epochs": 3,
789
  "save_steps": 500,
790
- "total_flos": 3.932039490764636e+18,
791
  "trial_name": null,
792
  "trial_params": null
793
  }
 
1
  {
2
+ "best_metric": 0.7797476986021139,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-img_orientation/checkpoint-2060",
4
+ "epoch": 4.993939393939394,
5
  "eval_steps": 500,
6
+ "global_step": 2060,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
+ "learning_rate": 2.4271844660194174e-06,
14
+ "loss": 0.5485,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.05,
19
+ "learning_rate": 4.854368932038835e-06,
20
+ "loss": 0.524,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.07,
25
+ "learning_rate": 7.281553398058253e-06,
26
+ "loss": 0.5436,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.1,
31
+ "learning_rate": 9.70873786407767e-06,
32
+ "loss": 0.5309,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.12,
37
+ "learning_rate": 1.2135922330097088e-05,
38
+ "loss": 0.5218,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.15,
43
+ "learning_rate": 1.4563106796116505e-05,
44
+ "loss": 0.4976,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.17,
49
+ "learning_rate": 1.6990291262135926e-05,
50
+ "loss": 0.5168,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.19,
55
+ "learning_rate": 1.941747572815534e-05,
56
+ "loss": 0.5168,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.22,
61
+ "learning_rate": 2.1844660194174756e-05,
62
+ "loss": 0.4882,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.24,
67
+ "learning_rate": 2.4271844660194176e-05,
68
+ "loss": 0.5174,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.27,
73
+ "learning_rate": 2.6699029126213593e-05,
74
+ "loss": 0.5012,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.29,
79
+ "learning_rate": 2.912621359223301e-05,
80
+ "loss": 0.462,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.32,
85
+ "learning_rate": 3.155339805825243e-05,
86
+ "loss": 0.4785,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.34,
91
+ "learning_rate": 3.398058252427185e-05,
92
+ "loss": 0.47,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.36,
97
+ "learning_rate": 3.6407766990291265e-05,
98
+ "loss": 0.4878,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.39,
103
+ "learning_rate": 3.883495145631068e-05,
104
+ "loss": 0.4648,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.41,
109
+ "learning_rate": 4.12621359223301e-05,
110
+ "loss": 0.4636,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.44,
115
+ "learning_rate": 4.368932038834951e-05,
116
+ "loss": 0.4862,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.46,
121
+ "learning_rate": 4.611650485436894e-05,
122
+ "loss": 0.4594,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.48,
127
+ "learning_rate": 4.854368932038835e-05,
128
+ "loss": 0.4882,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.51,
133
+ "learning_rate": 4.9892125134843584e-05,
134
+ "loss": 0.4429,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.53,
139
+ "learning_rate": 4.962243797195254e-05,
140
+ "loss": 0.4683,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.56,
145
+ "learning_rate": 4.935275080906149e-05,
146
+ "loss": 0.4689,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.58,
151
+ "learning_rate": 4.9083063646170444e-05,
152
+ "loss": 0.4786,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.61,
157
+ "learning_rate": 4.88133764832794e-05,
158
+ "loss": 0.5182,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.63,
163
+ "learning_rate": 4.854368932038835e-05,
164
+ "loss": 0.4701,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.65,
169
+ "learning_rate": 4.8274002157497304e-05,
170
+ "loss": 0.4795,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 0.68,
175
+ "learning_rate": 4.800431499460626e-05,
176
+ "loss": 0.4394,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 0.7,
181
+ "learning_rate": 4.773462783171521e-05,
182
+ "loss": 0.4252,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 0.73,
187
+ "learning_rate": 4.7464940668824164e-05,
188
+ "loss": 0.4688,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 0.75,
193
+ "learning_rate": 4.7195253505933115e-05,
194
+ "loss": 0.4879,
195
  "step": 310
196
  },
197
  {
198
  "epoch": 0.78,
199
+ "learning_rate": 4.692556634304207e-05,
200
+ "loss": 0.4517,
201
  "step": 320
202
  },
203
  {
204
  "epoch": 0.8,
205
+ "learning_rate": 4.665587918015103e-05,
206
+ "loss": 0.4492,
207
  "step": 330
208
  },
209
  {
210
  "epoch": 0.82,
211
+ "learning_rate": 4.638619201725998e-05,
212
+ "loss": 0.4738,
213
  "step": 340
214
  },
215
  {
216
  "epoch": 0.85,
217
+ "learning_rate": 4.611650485436894e-05,
218
+ "loss": 0.4579,
219
  "step": 350
220
  },
221
  {
222
  "epoch": 0.87,
223
+ "learning_rate": 4.584681769147789e-05,
224
+ "loss": 0.4413,
225
  "step": 360
226
  },
227
  {
228
  "epoch": 0.9,
229
+ "learning_rate": 4.557713052858684e-05,
230
+ "loss": 0.508,
231
  "step": 370
232
  },
233
  {
234
  "epoch": 0.92,
235
+ "learning_rate": 4.530744336569579e-05,
236
+ "loss": 0.4605,
237
  "step": 380
238
  },
239
  {
240
  "epoch": 0.95,
241
+ "learning_rate": 4.503775620280475e-05,
242
+ "loss": 0.4459,
243
  "step": 390
244
  },
245
  {
246
  "epoch": 0.97,
247
+ "learning_rate": 4.47680690399137e-05,
248
+ "loss": 0.4568,
249
  "step": 400
250
  },
251
  {
252
  "epoch": 0.99,
253
+ "learning_rate": 4.449838187702265e-05,
254
+ "loss": 0.4741,
255
  "step": 410
256
  },
257
  {
258
  "epoch": 1.0,
259
+ "eval_accuracy": 0.7345721104670986,
260
+ "eval_loss": 0.47738537192344666,
261
+ "eval_runtime": 44.109,
262
+ "eval_samples_per_second": 132.989,
263
+ "eval_steps_per_second": 4.171,
264
  "step": 412
265
  },
266
  {
267
  "epoch": 1.02,
268
+ "learning_rate": 4.422869471413161e-05,
269
+ "loss": 0.4517,
270
  "step": 420
271
  },
272
  {
273
  "epoch": 1.04,
274
+ "learning_rate": 4.395900755124056e-05,
275
+ "loss": 0.4245,
276
  "step": 430
277
  },
278
  {
279
  "epoch": 1.07,
280
+ "learning_rate": 4.368932038834951e-05,
281
+ "loss": 0.4318,
282
  "step": 440
283
  },
284
  {
285
  "epoch": 1.09,
286
+ "learning_rate": 4.341963322545847e-05,
287
+ "loss": 0.4309,
288
  "step": 450
289
  },
290
  {
291
  "epoch": 1.12,
292
+ "learning_rate": 4.314994606256743e-05,
293
+ "loss": 0.4387,
294
  "step": 460
295
  },
296
  {
297
  "epoch": 1.14,
298
+ "learning_rate": 4.288025889967638e-05,
299
+ "loss": 0.4605,
300
  "step": 470
301
  },
302
  {
303
  "epoch": 1.16,
304
+ "learning_rate": 4.261057173678533e-05,
305
+ "loss": 0.4623,
306
  "step": 480
307
  },
308
  {
309
  "epoch": 1.19,
310
+ "learning_rate": 4.234088457389429e-05,
311
+ "loss": 0.4471,
312
  "step": 490
313
  },
314
  {
315
  "epoch": 1.21,
316
+ "learning_rate": 4.207119741100324e-05,
317
+ "loss": 0.4685,
318
  "step": 500
319
  },
320
  {
321
  "epoch": 1.24,
322
+ "learning_rate": 4.180151024811219e-05,
323
+ "loss": 0.4549,
324
  "step": 510
325
  },
326
  {
327
  "epoch": 1.26,
328
+ "learning_rate": 4.153182308522115e-05,
329
+ "loss": 0.4197,
330
  "step": 520
331
  },
332
  {
333
  "epoch": 1.28,
334
+ "learning_rate": 4.12621359223301e-05,
335
+ "loss": 0.4243,
336
  "step": 530
337
  },
338
  {
339
  "epoch": 1.31,
340
+ "learning_rate": 4.099244875943905e-05,
341
+ "loss": 0.4701,
342
  "step": 540
343
  },
344
  {
345
  "epoch": 1.33,
346
+ "learning_rate": 4.0722761596548e-05,
347
+ "loss": 0.4475,
348
  "step": 550
349
  },
350
  {
351
  "epoch": 1.36,
352
+ "learning_rate": 4.0453074433656965e-05,
353
+ "loss": 0.444,
354
  "step": 560
355
  },
356
  {
357
  "epoch": 1.38,
358
+ "learning_rate": 4.0183387270765916e-05,
359
+ "loss": 0.4231,
360
  "step": 570
361
  },
362
  {
363
  "epoch": 1.41,
364
+ "learning_rate": 3.991370010787487e-05,
365
+ "loss": 0.431,
366
  "step": 580
367
  },
368
  {
369
  "epoch": 1.43,
370
+ "learning_rate": 3.964401294498382e-05,
371
+ "loss": 0.4441,
372
  "step": 590
373
  },
374
  {
375
  "epoch": 1.45,
376
+ "learning_rate": 3.9374325782092776e-05,
377
+ "loss": 0.466,
378
  "step": 600
379
  },
380
  {
381
  "epoch": 1.48,
382
+ "learning_rate": 3.910463861920173e-05,
383
+ "loss": 0.4114,
384
  "step": 610
385
  },
386
  {
387
  "epoch": 1.5,
388
+ "learning_rate": 3.883495145631068e-05,
389
+ "loss": 0.4523,
390
  "step": 620
391
  },
392
  {
393
  "epoch": 1.53,
394
+ "learning_rate": 3.8565264293419636e-05,
395
+ "loss": 0.4632,
396
  "step": 630
397
  },
398
  {
399
  "epoch": 1.55,
400
+ "learning_rate": 3.829557713052859e-05,
401
+ "loss": 0.4373,
402
  "step": 640
403
  },
404
  {
405
  "epoch": 1.58,
406
+ "learning_rate": 3.802588996763754e-05,
407
+ "loss": 0.4487,
408
  "step": 650
409
  },
410
  {
411
  "epoch": 1.6,
412
+ "learning_rate": 3.7756202804746496e-05,
413
+ "loss": 0.4818,
414
  "step": 660
415
  },
416
  {
417
  "epoch": 1.62,
418
+ "learning_rate": 3.7486515641855454e-05,
419
+ "loss": 0.4461,
420
  "step": 670
421
  },
422
  {
423
  "epoch": 1.65,
424
+ "learning_rate": 3.7216828478964405e-05,
425
+ "loss": 0.4475,
426
  "step": 680
427
  },
428
  {
429
  "epoch": 1.67,
430
+ "learning_rate": 3.6947141316073356e-05,
431
+ "loss": 0.469,
432
  "step": 690
433
  },
434
  {
435
  "epoch": 1.7,
436
+ "learning_rate": 3.6677454153182314e-05,
437
+ "loss": 0.448,
438
  "step": 700
439
  },
440
  {
441
  "epoch": 1.72,
442
+ "learning_rate": 3.6407766990291265e-05,
443
+ "loss": 0.4411,
444
  "step": 710
445
  },
446
  {
447
  "epoch": 1.75,
448
+ "learning_rate": 3.6138079827400216e-05,
449
+ "loss": 0.4529,
450
  "step": 720
451
  },
452
  {
453
  "epoch": 1.77,
454
+ "learning_rate": 3.5868392664509174e-05,
455
+ "loss": 0.4553,
456
  "step": 730
457
  },
458
  {
459
  "epoch": 1.79,
460
+ "learning_rate": 3.5598705501618125e-05,
461
+ "loss": 0.4558,
462
  "step": 740
463
  },
464
  {
465
  "epoch": 1.82,
466
+ "learning_rate": 3.5329018338727076e-05,
467
+ "loss": 0.4976,
468
  "step": 750
469
  },
470
  {
471
  "epoch": 1.84,
472
+ "learning_rate": 3.505933117583603e-05,
473
+ "loss": 0.4474,
474
  "step": 760
475
  },
476
  {
477
  "epoch": 1.87,
478
+ "learning_rate": 3.4789644012944984e-05,
479
+ "loss": 0.4646,
480
  "step": 770
481
  },
482
  {
483
  "epoch": 1.89,
484
+ "learning_rate": 3.451995685005394e-05,
485
+ "loss": 0.4537,
486
  "step": 780
487
  },
488
  {
489
  "epoch": 1.92,
490
+ "learning_rate": 3.425026968716289e-05,
491
+ "loss": 0.4649,
492
  "step": 790
493
  },
494
  {
495
  "epoch": 1.94,
496
+ "learning_rate": 3.398058252427185e-05,
497
+ "loss": 0.4751,
498
  "step": 800
499
  },
500
  {
501
  "epoch": 1.96,
502
+ "learning_rate": 3.37108953613808e-05,
503
+ "loss": 0.4954,
504
  "step": 810
505
  },
506
  {
507
  "epoch": 1.99,
508
+ "learning_rate": 3.344120819848975e-05,
509
+ "loss": 0.4958,
510
  "step": 820
511
  },
512
  {
513
  "epoch": 2.0,
514
+ "eval_accuracy": 0.7569041936583703,
515
+ "eval_loss": 0.440324068069458,
516
+ "eval_runtime": 44.2872,
517
+ "eval_samples_per_second": 132.454,
518
+ "eval_steps_per_second": 4.155,
519
  "step": 825
520
  },
521
  {
522
  "epoch": 2.01,
523
+ "learning_rate": 3.3171521035598704e-05,
524
+ "loss": 0.4511,
525
  "step": 830
526
  },
527
  {
528
  "epoch": 2.04,
529
+ "learning_rate": 3.290183387270766e-05,
530
+ "loss": 0.4175,
531
  "step": 840
532
  },
533
  {
534
  "epoch": 2.06,
535
+ "learning_rate": 3.263214670981661e-05,
536
+ "loss": 0.4511,
537
  "step": 850
538
  },
539
  {
540
  "epoch": 2.08,
541
+ "learning_rate": 3.2362459546925564e-05,
542
+ "loss": 0.4188,
543
  "step": 860
544
  },
545
  {
546
  "epoch": 2.11,
547
+ "learning_rate": 3.209277238403452e-05,
548
+ "loss": 0.453,
549
  "step": 870
550
  },
551
  {
552
  "epoch": 2.13,
553
+ "learning_rate": 3.182308522114347e-05,
554
+ "loss": 0.4392,
555
  "step": 880
556
  },
557
  {
558
  "epoch": 2.16,
559
+ "learning_rate": 3.155339805825243e-05,
560
+ "loss": 0.4585,
561
  "step": 890
562
  },
563
  {
564
  "epoch": 2.18,
565
+ "learning_rate": 3.128371089536138e-05,
566
+ "loss": 0.434,
567
  "step": 900
568
  },
569
  {
570
  "epoch": 2.21,
571
+ "learning_rate": 3.101402373247034e-05,
572
+ "loss": 0.4336,
573
  "step": 910
574
  },
575
  {
576
  "epoch": 2.23,
577
+ "learning_rate": 3.074433656957929e-05,
578
+ "loss": 0.446,
579
  "step": 920
580
  },
581
  {
582
  "epoch": 2.25,
583
+ "learning_rate": 3.0474649406688245e-05,
584
+ "loss": 0.4559,
585
  "step": 930
586
  },
587
  {
588
  "epoch": 2.28,
589
+ "learning_rate": 3.0204962243797196e-05,
590
+ "loss": 0.4529,
591
  "step": 940
592
  },
593
  {
594
  "epoch": 2.3,
595
+ "learning_rate": 2.993527508090615e-05,
596
+ "loss": 0.4681,
597
  "step": 950
598
  },
599
  {
600
  "epoch": 2.33,
601
+ "learning_rate": 2.96655879180151e-05,
602
+ "loss": 0.4512,
603
  "step": 960
604
  },
605
  {
606
  "epoch": 2.35,
607
+ "learning_rate": 2.9395900755124056e-05,
608
+ "loss": 0.4373,
609
  "step": 970
610
  },
611
  {
612
  "epoch": 2.38,
613
+ "learning_rate": 2.912621359223301e-05,
614
+ "loss": 0.4706,
615
  "step": 980
616
  },
617
  {
618
  "epoch": 2.4,
619
+ "learning_rate": 2.885652642934196e-05,
620
+ "loss": 0.4582,
621
  "step": 990
622
  },
623
  {
624
  "epoch": 2.42,
625
+ "learning_rate": 2.8586839266450923e-05,
626
+ "loss": 0.4442,
627
  "step": 1000
628
  },
629
  {
630
  "epoch": 2.45,
631
+ "learning_rate": 2.8317152103559874e-05,
632
+ "loss": 0.4593,
633
  "step": 1010
634
  },
635
  {
636
  "epoch": 2.47,
637
+ "learning_rate": 2.8047464940668828e-05,
638
+ "loss": 0.4525,
639
  "step": 1020
640
  },
641
  {
642
  "epoch": 2.5,
643
+ "learning_rate": 2.777777777777778e-05,
644
+ "loss": 0.4512,
645
  "step": 1030
646
  },
647
  {
648
  "epoch": 2.52,
649
+ "learning_rate": 2.7508090614886734e-05,
650
+ "loss": 0.4668,
651
  "step": 1040
652
  },
653
  {
654
  "epoch": 2.55,
655
+ "learning_rate": 2.7238403451995688e-05,
656
+ "loss": 0.4669,
657
  "step": 1050
658
  },
659
  {
660
  "epoch": 2.57,
661
+ "learning_rate": 2.696871628910464e-05,
662
+ "loss": 0.4513,
663
  "step": 1060
664
  },
665
  {
666
  "epoch": 2.59,
667
+ "learning_rate": 2.6699029126213593e-05,
668
+ "loss": 0.4727,
669
  "step": 1070
670
  },
671
  {
672
  "epoch": 2.62,
673
+ "learning_rate": 2.6429341963322544e-05,
674
+ "loss": 0.4638,
675
  "step": 1080
676
  },
677
  {
678
  "epoch": 2.64,
679
+ "learning_rate": 2.61596548004315e-05,
680
+ "loss": 0.473,
681
  "step": 1090
682
  },
683
  {
684
  "epoch": 2.67,
685
+ "learning_rate": 2.5889967637540453e-05,
686
+ "loss": 0.4535,
687
  "step": 1100
688
  },
689
  {
690
  "epoch": 2.69,
691
+ "learning_rate": 2.5620280474649404e-05,
692
+ "loss": 0.4874,
693
  "step": 1110
694
  },
695
  {
696
  "epoch": 2.72,
697
+ "learning_rate": 2.5350593311758362e-05,
698
+ "loss": 0.4746,
699
  "step": 1120
700
  },
701
  {
702
  "epoch": 2.74,
703
+ "learning_rate": 2.5080906148867317e-05,
704
+ "loss": 0.4474,
705
  "step": 1130
706
  },
707
  {
708
  "epoch": 2.76,
709
+ "learning_rate": 2.481121898597627e-05,
710
+ "loss": 0.4717,
711
  "step": 1140
712
  },
713
  {
714
  "epoch": 2.79,
715
+ "learning_rate": 2.4541531823085222e-05,
716
+ "loss": 0.4634,
717
  "step": 1150
718
  },
719
  {
720
  "epoch": 2.81,
721
+ "learning_rate": 2.4271844660194176e-05,
722
+ "loss": 0.478,
723
  "step": 1160
724
  },
725
  {
726
  "epoch": 2.84,
727
+ "learning_rate": 2.400215749730313e-05,
728
+ "loss": 0.5012,
729
  "step": 1170
730
  },
731
  {
732
  "epoch": 2.86,
733
+ "learning_rate": 2.3732470334412082e-05,
734
+ "loss": 0.4592,
735
  "step": 1180
736
  },
737
  {
738
  "epoch": 2.88,
739
+ "learning_rate": 2.3462783171521036e-05,
740
+ "loss": 0.519,
741
  "step": 1190
742
  },
743
  {
744
  "epoch": 2.91,
745
+ "learning_rate": 2.319309600862999e-05,
746
+ "loss": 0.5202,
747
  "step": 1200
748
  },
749
  {
750
  "epoch": 2.93,
751
+ "learning_rate": 2.2923408845738945e-05,
752
+ "loss": 0.4819,
753
  "step": 1210
754
  },
755
  {
756
  "epoch": 2.96,
757
+ "learning_rate": 2.2653721682847896e-05,
758
+ "loss": 0.4969,
759
  "step": 1220
760
  },
761
  {
762
  "epoch": 2.98,
763
+ "learning_rate": 2.238403451995685e-05,
764
+ "loss": 0.473,
765
  "step": 1230
766
  },
767
  {
768
  "epoch": 3.0,
769
+ "eval_accuracy": 0.7659393112853733,
770
+ "eval_loss": 0.40928295254707336,
771
+ "eval_runtime": 44.1748,
772
+ "eval_samples_per_second": 132.79,
773
+ "eval_steps_per_second": 4.165,
774
+ "step": 1237
775
  },
776
  {
777
+ "epoch": 3.01,
778
+ "learning_rate": 2.2114347357065805e-05,
779
+ "loss": 0.4556,
780
+ "step": 1240
781
+ },
782
+ {
783
+ "epoch": 3.03,
784
+ "learning_rate": 2.1844660194174756e-05,
785
+ "loss": 0.4626,
786
+ "step": 1250
787
+ },
788
+ {
789
+ "epoch": 3.05,
790
+ "learning_rate": 2.1574973031283714e-05,
791
+ "loss": 0.4897,
792
+ "step": 1260
793
+ },
794
+ {
795
+ "epoch": 3.08,
796
+ "learning_rate": 2.1305285868392665e-05,
797
+ "loss": 0.4705,
798
+ "step": 1270
799
+ },
800
+ {
801
+ "epoch": 3.1,
802
+ "learning_rate": 2.103559870550162e-05,
803
+ "loss": 0.4876,
804
+ "step": 1280
805
+ },
806
+ {
807
+ "epoch": 3.13,
808
+ "learning_rate": 2.0765911542610574e-05,
809
+ "loss": 0.4844,
810
+ "step": 1290
811
+ },
812
+ {
813
+ "epoch": 3.15,
814
+ "learning_rate": 2.0496224379719525e-05,
815
+ "loss": 0.4737,
816
+ "step": 1300
817
+ },
818
+ {
819
+ "epoch": 3.18,
820
+ "learning_rate": 2.0226537216828483e-05,
821
+ "loss": 0.4978,
822
+ "step": 1310
823
+ },
824
+ {
825
+ "epoch": 3.2,
826
+ "learning_rate": 1.9956850053937434e-05,
827
+ "loss": 0.4744,
828
+ "step": 1320
829
+ },
830
+ {
831
+ "epoch": 3.22,
832
+ "learning_rate": 1.9687162891046388e-05,
833
+ "loss": 0.4588,
834
+ "step": 1330
835
+ },
836
+ {
837
+ "epoch": 3.25,
838
+ "learning_rate": 1.941747572815534e-05,
839
+ "loss": 0.4344,
840
+ "step": 1340
841
+ },
842
+ {
843
+ "epoch": 3.27,
844
+ "learning_rate": 1.9147788565264294e-05,
845
+ "loss": 0.4651,
846
+ "step": 1350
847
+ },
848
+ {
849
+ "epoch": 3.3,
850
+ "learning_rate": 1.8878101402373248e-05,
851
+ "loss": 0.4813,
852
+ "step": 1360
853
+ },
854
+ {
855
+ "epoch": 3.32,
856
+ "learning_rate": 1.8608414239482202e-05,
857
+ "loss": 0.4614,
858
+ "step": 1370
859
+ },
860
+ {
861
+ "epoch": 3.35,
862
+ "learning_rate": 1.8338727076591157e-05,
863
+ "loss": 0.4687,
864
+ "step": 1380
865
+ },
866
+ {
867
+ "epoch": 3.37,
868
+ "learning_rate": 1.8069039913700108e-05,
869
+ "loss": 0.4735,
870
+ "step": 1390
871
+ },
872
+ {
873
+ "epoch": 3.39,
874
+ "learning_rate": 1.7799352750809062e-05,
875
+ "loss": 0.4798,
876
+ "step": 1400
877
+ },
878
+ {
879
+ "epoch": 3.42,
880
+ "learning_rate": 1.7529665587918013e-05,
881
+ "loss": 0.4288,
882
+ "step": 1410
883
+ },
884
+ {
885
+ "epoch": 3.44,
886
+ "learning_rate": 1.725997842502697e-05,
887
+ "loss": 0.4592,
888
+ "step": 1420
889
+ },
890
+ {
891
+ "epoch": 3.47,
892
+ "learning_rate": 1.6990291262135926e-05,
893
+ "loss": 0.4694,
894
+ "step": 1430
895
+ },
896
+ {
897
+ "epoch": 3.49,
898
+ "learning_rate": 1.6720604099244877e-05,
899
+ "loss": 0.4409,
900
+ "step": 1440
901
+ },
902
+ {
903
+ "epoch": 3.52,
904
+ "learning_rate": 1.645091693635383e-05,
905
+ "loss": 0.4536,
906
+ "step": 1450
907
+ },
908
+ {
909
+ "epoch": 3.54,
910
+ "learning_rate": 1.6181229773462782e-05,
911
+ "loss": 0.4635,
912
+ "step": 1460
913
+ },
914
+ {
915
+ "epoch": 3.56,
916
+ "learning_rate": 1.5911542610571736e-05,
917
+ "loss": 0.4554,
918
+ "step": 1470
919
+ },
920
+ {
921
+ "epoch": 3.59,
922
+ "learning_rate": 1.564185544768069e-05,
923
+ "loss": 0.4504,
924
+ "step": 1480
925
+ },
926
+ {
927
+ "epoch": 3.61,
928
+ "learning_rate": 1.5372168284789645e-05,
929
+ "loss": 0.445,
930
+ "step": 1490
931
+ },
932
+ {
933
+ "epoch": 3.64,
934
+ "learning_rate": 1.5102481121898598e-05,
935
+ "loss": 0.4476,
936
+ "step": 1500
937
+ },
938
+ {
939
+ "epoch": 3.66,
940
+ "learning_rate": 1.483279395900755e-05,
941
+ "loss": 0.4828,
942
+ "step": 1510
943
+ },
944
+ {
945
+ "epoch": 3.68,
946
+ "learning_rate": 1.4563106796116505e-05,
947
+ "loss": 0.4698,
948
+ "step": 1520
949
+ },
950
+ {
951
+ "epoch": 3.71,
952
+ "learning_rate": 1.4293419633225461e-05,
953
+ "loss": 0.4603,
954
+ "step": 1530
955
+ },
956
+ {
957
+ "epoch": 3.73,
958
+ "learning_rate": 1.4023732470334414e-05,
959
+ "loss": 0.4558,
960
+ "step": 1540
961
+ },
962
+ {
963
+ "epoch": 3.76,
964
+ "learning_rate": 1.3754045307443367e-05,
965
+ "loss": 0.4632,
966
+ "step": 1550
967
+ },
968
+ {
969
+ "epoch": 3.78,
970
+ "learning_rate": 1.348435814455232e-05,
971
+ "loss": 0.4785,
972
+ "step": 1560
973
+ },
974
+ {
975
+ "epoch": 3.81,
976
+ "learning_rate": 1.3214670981661272e-05,
977
+ "loss": 0.4593,
978
+ "step": 1570
979
+ },
980
+ {
981
+ "epoch": 3.83,
982
+ "learning_rate": 1.2944983818770227e-05,
983
+ "loss": 0.439,
984
+ "step": 1580
985
+ },
986
+ {
987
+ "epoch": 3.85,
988
+ "learning_rate": 1.2675296655879181e-05,
989
+ "loss": 0.437,
990
+ "step": 1590
991
+ },
992
+ {
993
+ "epoch": 3.88,
994
+ "learning_rate": 1.2405609492988135e-05,
995
+ "loss": 0.4649,
996
+ "step": 1600
997
+ },
998
+ {
999
+ "epoch": 3.9,
1000
+ "learning_rate": 1.2135922330097088e-05,
1001
+ "loss": 0.4976,
1002
+ "step": 1610
1003
+ },
1004
+ {
1005
+ "epoch": 3.93,
1006
+ "learning_rate": 1.1866235167206041e-05,
1007
+ "loss": 0.4667,
1008
+ "step": 1620
1009
+ },
1010
+ {
1011
+ "epoch": 3.95,
1012
+ "learning_rate": 1.1596548004314995e-05,
1013
+ "loss": 0.4591,
1014
+ "step": 1630
1015
+ },
1016
+ {
1017
+ "epoch": 3.98,
1018
+ "learning_rate": 1.1326860841423948e-05,
1019
+ "loss": 0.4113,
1020
+ "step": 1640
1021
+ },
1022
+ {
1023
+ "epoch": 4.0,
1024
+ "learning_rate": 1.1057173678532903e-05,
1025
+ "loss": 0.4476,
1026
+ "step": 1650
1027
+ },
1028
+ {
1029
+ "epoch": 4.0,
1030
+ "eval_accuracy": 0.7734401636549608,
1031
+ "eval_loss": 0.4010549485683441,
1032
+ "eval_runtime": 43.1679,
1033
+ "eval_samples_per_second": 135.888,
1034
+ "eval_steps_per_second": 4.262,
1035
+ "step": 1650
1036
+ },
1037
+ {
1038
+ "epoch": 4.02,
1039
+ "learning_rate": 1.0787486515641857e-05,
1040
+ "loss": 0.4337,
1041
+ "step": 1660
1042
+ },
1043
+ {
1044
+ "epoch": 4.05,
1045
+ "learning_rate": 1.051779935275081e-05,
1046
+ "loss": 0.429,
1047
+ "step": 1670
1048
+ },
1049
+ {
1050
+ "epoch": 4.07,
1051
+ "learning_rate": 1.0248112189859762e-05,
1052
+ "loss": 0.4329,
1053
+ "step": 1680
1054
+ },
1055
+ {
1056
+ "epoch": 4.1,
1057
+ "learning_rate": 9.978425026968717e-06,
1058
+ "loss": 0.451,
1059
+ "step": 1690
1060
+ },
1061
+ {
1062
+ "epoch": 4.12,
1063
+ "learning_rate": 9.70873786407767e-06,
1064
+ "loss": 0.4556,
1065
+ "step": 1700
1066
+ },
1067
+ {
1068
+ "epoch": 4.15,
1069
+ "learning_rate": 9.439050701186624e-06,
1070
+ "loss": 0.466,
1071
+ "step": 1710
1072
+ },
1073
+ {
1074
+ "epoch": 4.17,
1075
+ "learning_rate": 9.169363538295578e-06,
1076
+ "loss": 0.4192,
1077
+ "step": 1720
1078
+ },
1079
+ {
1080
+ "epoch": 4.19,
1081
+ "learning_rate": 8.899676375404531e-06,
1082
+ "loss": 0.4304,
1083
+ "step": 1730
1084
+ },
1085
+ {
1086
+ "epoch": 4.22,
1087
+ "learning_rate": 8.629989212513486e-06,
1088
+ "loss": 0.4474,
1089
+ "step": 1740
1090
+ },
1091
+ {
1092
+ "epoch": 4.24,
1093
+ "learning_rate": 8.360302049622438e-06,
1094
+ "loss": 0.4011,
1095
+ "step": 1750
1096
+ },
1097
+ {
1098
+ "epoch": 4.27,
1099
+ "learning_rate": 8.090614886731391e-06,
1100
+ "loss": 0.4566,
1101
+ "step": 1760
1102
+ },
1103
+ {
1104
+ "epoch": 4.29,
1105
+ "learning_rate": 7.820927723840345e-06,
1106
+ "loss": 0.417,
1107
+ "step": 1770
1108
+ },
1109
+ {
1110
+ "epoch": 4.32,
1111
+ "learning_rate": 7.551240560949299e-06,
1112
+ "loss": 0.4359,
1113
+ "step": 1780
1114
+ },
1115
+ {
1116
+ "epoch": 4.34,
1117
+ "learning_rate": 7.281553398058253e-06,
1118
+ "loss": 0.4322,
1119
+ "step": 1790
1120
+ },
1121
+ {
1122
+ "epoch": 4.36,
1123
+ "learning_rate": 7.011866235167207e-06,
1124
+ "loss": 0.4457,
1125
+ "step": 1800
1126
+ },
1127
+ {
1128
+ "epoch": 4.39,
1129
+ "learning_rate": 6.74217907227616e-06,
1130
+ "loss": 0.4896,
1131
+ "step": 1810
1132
+ },
1133
+ {
1134
+ "epoch": 4.41,
1135
+ "learning_rate": 6.472491909385113e-06,
1136
+ "loss": 0.4384,
1137
+ "step": 1820
1138
+ },
1139
+ {
1140
+ "epoch": 4.44,
1141
+ "learning_rate": 6.202804746494068e-06,
1142
+ "loss": 0.4464,
1143
+ "step": 1830
1144
+ },
1145
+ {
1146
+ "epoch": 4.46,
1147
+ "learning_rate": 5.9331175836030205e-06,
1148
+ "loss": 0.4671,
1149
+ "step": 1840
1150
+ },
1151
+ {
1152
+ "epoch": 4.48,
1153
+ "learning_rate": 5.663430420711974e-06,
1154
+ "loss": 0.4436,
1155
+ "step": 1850
1156
+ },
1157
+ {
1158
+ "epoch": 4.51,
1159
+ "learning_rate": 5.3937432578209285e-06,
1160
+ "loss": 0.4403,
1161
+ "step": 1860
1162
+ },
1163
+ {
1164
+ "epoch": 4.53,
1165
+ "learning_rate": 5.124056094929881e-06,
1166
+ "loss": 0.4329,
1167
+ "step": 1870
1168
+ },
1169
+ {
1170
+ "epoch": 4.56,
1171
+ "learning_rate": 4.854368932038835e-06,
1172
+ "loss": 0.4651,
1173
+ "step": 1880
1174
+ },
1175
+ {
1176
+ "epoch": 4.58,
1177
+ "learning_rate": 4.584681769147789e-06,
1178
+ "loss": 0.424,
1179
+ "step": 1890
1180
+ },
1181
+ {
1182
+ "epoch": 4.61,
1183
+ "learning_rate": 4.314994606256743e-06,
1184
+ "loss": 0.4539,
1185
+ "step": 1900
1186
+ },
1187
+ {
1188
+ "epoch": 4.63,
1189
+ "learning_rate": 4.0453074433656955e-06,
1190
+ "loss": 0.4237,
1191
+ "step": 1910
1192
+ },
1193
+ {
1194
+ "epoch": 4.65,
1195
+ "learning_rate": 3.7756202804746495e-06,
1196
+ "loss": 0.4337,
1197
+ "step": 1920
1198
+ },
1199
+ {
1200
+ "epoch": 4.68,
1201
+ "learning_rate": 3.5059331175836035e-06,
1202
+ "loss": 0.4322,
1203
+ "step": 1930
1204
+ },
1205
+ {
1206
+ "epoch": 4.7,
1207
+ "learning_rate": 3.2362459546925567e-06,
1208
+ "loss": 0.441,
1209
+ "step": 1940
1210
+ },
1211
+ {
1212
+ "epoch": 4.73,
1213
+ "learning_rate": 2.9665587918015102e-06,
1214
+ "loss": 0.4151,
1215
+ "step": 1950
1216
+ },
1217
+ {
1218
+ "epoch": 4.75,
1219
+ "learning_rate": 2.6968716289104642e-06,
1220
+ "loss": 0.4437,
1221
+ "step": 1960
1222
+ },
1223
+ {
1224
+ "epoch": 4.78,
1225
+ "learning_rate": 2.4271844660194174e-06,
1226
+ "loss": 0.4187,
1227
+ "step": 1970
1228
+ },
1229
+ {
1230
+ "epoch": 4.8,
1231
+ "learning_rate": 2.1574973031283714e-06,
1232
+ "loss": 0.4216,
1233
+ "step": 1980
1234
+ },
1235
+ {
1236
+ "epoch": 4.82,
1237
+ "learning_rate": 1.8878101402373248e-06,
1238
+ "loss": 0.4153,
1239
+ "step": 1990
1240
+ },
1241
+ {
1242
+ "epoch": 4.85,
1243
+ "learning_rate": 1.6181229773462783e-06,
1244
+ "loss": 0.4495,
1245
+ "step": 2000
1246
+ },
1247
+ {
1248
+ "epoch": 4.87,
1249
+ "learning_rate": 1.3484358144552321e-06,
1250
+ "loss": 0.4395,
1251
+ "step": 2010
1252
+ },
1253
+ {
1254
+ "epoch": 4.9,
1255
+ "learning_rate": 1.0787486515641857e-06,
1256
+ "loss": 0.4226,
1257
+ "step": 2020
1258
+ },
1259
+ {
1260
+ "epoch": 4.92,
1261
+ "learning_rate": 8.090614886731392e-07,
1262
+ "loss": 0.4334,
1263
+ "step": 2030
1264
+ },
1265
+ {
1266
+ "epoch": 4.95,
1267
+ "learning_rate": 5.393743257820928e-07,
1268
+ "loss": 0.4169,
1269
+ "step": 2040
1270
+ },
1271
+ {
1272
+ "epoch": 4.97,
1273
+ "learning_rate": 2.696871628910464e-07,
1274
+ "loss": 0.4196,
1275
+ "step": 2050
1276
+ },
1277
+ {
1278
+ "epoch": 4.99,
1279
+ "learning_rate": 0.0,
1280
+ "loss": 0.4093,
1281
+ "step": 2060
1282
+ },
1283
+ {
1284
+ "epoch": 4.99,
1285
+ "eval_accuracy": 0.7797476986021139,
1286
+ "eval_loss": 0.3965856432914734,
1287
+ "eval_runtime": 45.954,
1288
+ "eval_samples_per_second": 127.649,
1289
+ "eval_steps_per_second": 4.004,
1290
+ "step": 2060
1291
+ },
1292
+ {
1293
+ "epoch": 4.99,
1294
+ "step": 2060,
1295
+ "total_flos": 6.553200292249928e+18,
1296
+ "train_loss": 0.4588474294514332,
1297
+ "train_runtime": 4338.0094,
1298
+ "train_samples_per_second": 60.844,
1299
+ "train_steps_per_second": 0.475
1300
  }
1301
  ],
1302
  "logging_steps": 10,
1303
+ "max_steps": 2060,
1304
+ "num_train_epochs": 5,
1305
  "save_steps": 500,
1306
+ "total_flos": 6.553200292249928e+18,
1307
  "trial_name": null,
1308
  "trial_params": null
1309
  }