mansee commited on
Commit
50a389a
1 Parent(s): 8df56d6

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 4.95,
3
- "eval_accuracy": 0.9738372093023255,
4
- "eval_loss": 0.10162452608346939,
5
- "eval_runtime": 6.5939,
6
- "eval_samples_per_second": 156.509,
7
- "eval_steps_per_second": 5.005,
8
- "total_flos": 1.1422818298339983e+18,
9
- "train_loss": 0.08449313590923944,
10
- "train_runtime": 710.4711,
11
- "train_samples_per_second": 65.316,
12
- "train_steps_per_second": 0.507
13
  }
 
1
  {
2
+ "epoch": 4.99,
3
+ "eval_accuracy": 0.9796511627906976,
4
+ "eval_loss": 0.07256749272346497,
5
+ "eval_runtime": 7.0579,
6
+ "eval_samples_per_second": 146.218,
7
+ "eval_steps_per_second": 4.676,
8
+ "total_flos": 1.151826529604567e+18,
9
+ "train_loss": 0.13382491601838006,
10
+ "train_runtime": 783.9355,
11
+ "train_samples_per_second": 59.195,
12
+ "train_steps_per_second": 0.459
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.95,
3
- "eval_accuracy": 0.9738372093023255,
4
- "eval_loss": 0.10162452608346939,
5
- "eval_runtime": 6.5939,
6
- "eval_samples_per_second": 156.509,
7
- "eval_steps_per_second": 5.005
8
  }
 
1
  {
2
+ "epoch": 4.99,
3
+ "eval_accuracy": 0.9796511627906976,
4
+ "eval_loss": 0.07256749272346497,
5
+ "eval_runtime": 7.0579,
6
+ "eval_samples_per_second": 146.218,
7
+ "eval_steps_per_second": 4.676
8
  }
runs/Dec18_11-49-22_61f348adfad0/events.out.tfevents.1702901007.61f348adfad0.741.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48270b8e93dc15081da5a1919720ac014262971b9532e657996bc8b891714e2b
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.95,
3
- "total_flos": 1.1422818298339983e+18,
4
- "train_loss": 0.08449313590923944,
5
- "train_runtime": 710.4711,
6
- "train_samples_per_second": 65.316,
7
- "train_steps_per_second": 0.507
8
  }
 
1
  {
2
+ "epoch": 4.99,
3
+ "total_flos": 1.151826529604567e+18,
4
+ "train_loss": 0.13382491601838006,
5
+ "train_runtime": 783.9355,
6
+ "train_samples_per_second": 59.195,
7
+ "train_steps_per_second": 0.459
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
- "best_metric": 0.9738372093023255,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-blank_img/checkpoint-360",
4
- "epoch": 4.948453608247423,
5
- "eval_steps": 500,
6
  "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
@@ -11,495 +10,493 @@
11
  {
12
  "epoch": 0.07,
13
  "learning_rate": 6.944444444444445e-06,
14
- "loss": 0.0881,
15
  "step": 5
16
  },
17
  {
18
  "epoch": 0.14,
19
  "learning_rate": 1.388888888888889e-05,
20
- "loss": 0.0624,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.21,
25
  "learning_rate": 2.0833333333333336e-05,
26
- "loss": 0.063,
27
  "step": 15
28
  },
29
  {
30
  "epoch": 0.27,
31
  "learning_rate": 2.777777777777778e-05,
32
- "loss": 0.0901,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 0.34,
37
  "learning_rate": 3.472222222222222e-05,
38
- "loss": 0.0872,
39
  "step": 25
40
  },
41
  {
42
  "epoch": 0.41,
43
  "learning_rate": 4.166666666666667e-05,
44
- "loss": 0.1016,
45
  "step": 30
46
  },
47
  {
48
  "epoch": 0.48,
49
  "learning_rate": 4.8611111111111115e-05,
50
- "loss": 0.0905,
51
  "step": 35
52
  },
53
  {
54
  "epoch": 0.55,
55
  "learning_rate": 4.938271604938271e-05,
56
- "loss": 0.1495,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.62,
61
  "learning_rate": 4.8611111111111115e-05,
62
- "loss": 0.0949,
63
  "step": 45
64
  },
65
  {
66
  "epoch": 0.69,
67
  "learning_rate": 4.783950617283951e-05,
68
- "loss": 0.1178,
69
  "step": 50
70
  },
71
  {
72
  "epoch": 0.76,
73
  "learning_rate": 4.70679012345679e-05,
74
- "loss": 0.08,
75
  "step": 55
76
  },
77
  {
78
  "epoch": 0.82,
79
  "learning_rate": 4.62962962962963e-05,
80
- "loss": 0.0919,
81
  "step": 60
82
  },
83
  {
84
  "epoch": 0.89,
85
  "learning_rate": 4.5524691358024696e-05,
86
- "loss": 0.0773,
87
  "step": 65
88
  },
89
  {
90
  "epoch": 0.96,
91
  "learning_rate": 4.4753086419753084e-05,
92
- "loss": 0.0502,
93
  "step": 70
94
  },
95
  {
96
  "epoch": 0.99,
97
- "eval_accuracy": 0.9651162790697675,
98
- "eval_loss": 0.12999847531318665,
99
- "eval_runtime": 6.6038,
100
- "eval_samples_per_second": 156.273,
101
- "eval_steps_per_second": 4.997,
102
  "step": 72
103
  },
104
  {
105
- "epoch": 1.03,
106
  "learning_rate": 4.3981481481481486e-05,
107
- "loss": 0.0838,
108
  "step": 75
109
  },
110
  {
111
- "epoch": 1.1,
112
  "learning_rate": 4.3209876543209875e-05,
113
- "loss": 0.067,
114
  "step": 80
115
  },
116
  {
117
- "epoch": 1.17,
118
  "learning_rate": 4.243827160493827e-05,
119
- "loss": 0.0932,
120
  "step": 85
121
  },
122
  {
123
- "epoch": 1.24,
124
  "learning_rate": 4.166666666666667e-05,
125
- "loss": 0.1037,
126
  "step": 90
127
  },
128
  {
129
- "epoch": 1.31,
130
  "learning_rate": 4.089506172839506e-05,
131
- "loss": 0.1057,
132
  "step": 95
133
  },
134
  {
135
- "epoch": 1.37,
136
  "learning_rate": 4.012345679012346e-05,
137
- "loss": 0.0774,
138
  "step": 100
139
  },
140
  {
141
- "epoch": 1.44,
142
  "learning_rate": 3.935185185185186e-05,
143
- "loss": 0.1107,
144
  "step": 105
145
  },
146
  {
147
- "epoch": 1.51,
148
  "learning_rate": 3.8580246913580246e-05,
149
- "loss": 0.1109,
150
  "step": 110
151
  },
152
  {
153
- "epoch": 1.58,
154
  "learning_rate": 3.780864197530865e-05,
155
- "loss": 0.1078,
156
  "step": 115
157
  },
158
  {
159
- "epoch": 1.65,
160
  "learning_rate": 3.7037037037037037e-05,
161
- "loss": 0.0935,
162
  "step": 120
163
  },
164
  {
165
- "epoch": 1.72,
166
  "learning_rate": 3.626543209876543e-05,
167
- "loss": 0.0741,
168
  "step": 125
169
  },
170
  {
171
- "epoch": 1.79,
172
  "learning_rate": 3.5493827160493834e-05,
173
- "loss": 0.1031,
174
  "step": 130
175
  },
176
  {
177
- "epoch": 1.86,
178
  "learning_rate": 3.472222222222222e-05,
179
- "loss": 0.0638,
180
  "step": 135
181
  },
182
  {
183
- "epoch": 1.92,
184
  "learning_rate": 3.395061728395062e-05,
185
- "loss": 0.0915,
186
  "step": 140
187
  },
188
  {
189
  "epoch": 1.99,
190
- "learning_rate": 3.317901234567901e-05,
191
- "loss": 0.1107,
192
- "step": 145
 
 
 
193
  },
194
  {
195
- "epoch": 1.99,
196
- "eval_accuracy": 0.9728682170542635,
197
- "eval_loss": 0.10234300047159195,
198
- "eval_runtime": 7.16,
199
- "eval_samples_per_second": 144.135,
200
- "eval_steps_per_second": 4.609,
201
  "step": 145
202
  },
203
  {
204
- "epoch": 2.06,
205
  "learning_rate": 3.240740740740741e-05,
206
- "loss": 0.0784,
207
  "step": 150
208
  },
209
  {
210
- "epoch": 2.13,
211
  "learning_rate": 3.16358024691358e-05,
212
- "loss": 0.0672,
213
  "step": 155
214
  },
215
  {
216
- "epoch": 2.2,
217
  "learning_rate": 3.08641975308642e-05,
218
- "loss": 0.0598,
219
  "step": 160
220
  },
221
  {
222
- "epoch": 2.27,
223
  "learning_rate": 3.0092592592592593e-05,
224
- "loss": 0.0641,
225
  "step": 165
226
  },
227
  {
228
- "epoch": 2.34,
229
  "learning_rate": 2.9320987654320992e-05,
230
- "loss": 0.0805,
231
  "step": 170
232
  },
233
  {
234
- "epoch": 2.41,
235
  "learning_rate": 2.8549382716049384e-05,
236
- "loss": 0.0864,
237
  "step": 175
238
  },
239
  {
240
- "epoch": 2.47,
241
  "learning_rate": 2.777777777777778e-05,
242
- "loss": 0.0954,
243
  "step": 180
244
  },
245
  {
246
- "epoch": 2.54,
247
  "learning_rate": 2.700617283950617e-05,
248
- "loss": 0.0717,
249
  "step": 185
250
  },
251
  {
252
- "epoch": 2.61,
253
  "learning_rate": 2.623456790123457e-05,
254
- "loss": 0.0634,
255
  "step": 190
256
  },
257
  {
258
- "epoch": 2.68,
259
  "learning_rate": 2.5462962962962965e-05,
260
- "loss": 0.1082,
261
  "step": 195
262
  },
263
  {
264
- "epoch": 2.75,
265
  "learning_rate": 2.4691358024691357e-05,
266
- "loss": 0.0748,
267
  "step": 200
268
  },
269
  {
270
- "epoch": 2.82,
271
  "learning_rate": 2.3919753086419755e-05,
272
- "loss": 0.1221,
273
  "step": 205
274
  },
275
  {
276
- "epoch": 2.89,
277
  "learning_rate": 2.314814814814815e-05,
278
- "loss": 0.0757,
279
  "step": 210
280
  },
281
  {
282
- "epoch": 2.96,
283
  "learning_rate": 2.2376543209876542e-05,
284
- "loss": 0.0917,
285
  "step": 215
286
  },
287
  {
288
- "epoch": 3.0,
289
- "eval_accuracy": 0.9651162790697675,
290
- "eval_loss": 0.12774048745632172,
291
- "eval_runtime": 6.6558,
292
- "eval_samples_per_second": 155.053,
293
- "eval_steps_per_second": 4.958,
294
- "step": 218
295
  },
296
  {
297
- "epoch": 3.02,
298
  "learning_rate": 2.1604938271604937e-05,
299
- "loss": 0.0553,
300
  "step": 220
301
  },
302
  {
303
- "epoch": 3.09,
304
  "learning_rate": 2.0833333333333336e-05,
305
- "loss": 0.0942,
306
  "step": 225
307
  },
308
  {
309
- "epoch": 3.16,
310
  "learning_rate": 2.006172839506173e-05,
311
- "loss": 0.0834,
312
  "step": 230
313
  },
314
  {
315
- "epoch": 3.23,
316
  "learning_rate": 1.9290123456790123e-05,
317
- "loss": 0.057,
318
  "step": 235
319
  },
320
  {
321
- "epoch": 3.3,
322
  "learning_rate": 1.8518518518518518e-05,
323
- "loss": 0.0773,
324
  "step": 240
325
  },
326
  {
327
- "epoch": 3.37,
328
  "learning_rate": 1.7746913580246917e-05,
329
- "loss": 0.0617,
330
  "step": 245
331
  },
332
  {
333
- "epoch": 3.44,
334
  "learning_rate": 1.697530864197531e-05,
335
- "loss": 0.0785,
336
  "step": 250
337
  },
338
  {
339
- "epoch": 3.51,
340
  "learning_rate": 1.6203703703703704e-05,
341
- "loss": 0.057,
342
  "step": 255
343
  },
344
  {
345
- "epoch": 3.57,
346
  "learning_rate": 1.54320987654321e-05,
347
- "loss": 0.0746,
348
  "step": 260
349
  },
350
  {
351
- "epoch": 3.64,
352
  "learning_rate": 1.4660493827160496e-05,
353
- "loss": 0.1117,
354
  "step": 265
355
  },
356
  {
357
- "epoch": 3.71,
358
  "learning_rate": 1.388888888888889e-05,
359
- "loss": 0.0923,
360
  "step": 270
361
  },
362
  {
363
- "epoch": 3.78,
364
  "learning_rate": 1.3117283950617285e-05,
365
- "loss": 0.0599,
366
  "step": 275
367
  },
368
  {
369
- "epoch": 3.85,
370
  "learning_rate": 1.2345679012345678e-05,
371
- "loss": 0.0895,
372
  "step": 280
373
  },
374
  {
375
- "epoch": 3.92,
376
  "learning_rate": 1.1574074074074075e-05,
377
- "loss": 0.0933,
378
  "step": 285
379
  },
380
  {
381
  "epoch": 3.99,
382
- "learning_rate": 1.0802469135802469e-05,
383
- "loss": 0.1022,
384
- "step": 290
 
 
 
385
  },
386
  {
387
- "epoch": 4.0,
388
- "eval_accuracy": 0.9718992248062015,
389
- "eval_loss": 0.12579788267612457,
390
- "eval_runtime": 7.0589,
391
- "eval_samples_per_second": 146.199,
392
- "eval_steps_per_second": 4.675,
393
- "step": 291
394
  },
395
  {
396
- "epoch": 4.05,
397
  "learning_rate": 1.0030864197530866e-05,
398
- "loss": 0.0889,
399
  "step": 295
400
  },
401
  {
402
- "epoch": 4.12,
403
  "learning_rate": 9.259259259259259e-06,
404
- "loss": 0.088,
405
  "step": 300
406
  },
407
  {
408
- "epoch": 4.19,
409
  "learning_rate": 8.487654320987654e-06,
410
- "loss": 0.0679,
411
  "step": 305
412
  },
413
  {
414
- "epoch": 4.26,
415
  "learning_rate": 7.71604938271605e-06,
416
- "loss": 0.0632,
417
  "step": 310
418
  },
419
  {
420
- "epoch": 4.33,
421
  "learning_rate": 6.944444444444445e-06,
422
- "loss": 0.095,
423
  "step": 315
424
  },
425
  {
426
- "epoch": 4.4,
427
  "learning_rate": 6.172839506172839e-06,
428
- "loss": 0.084,
429
  "step": 320
430
  },
431
  {
432
- "epoch": 4.47,
433
  "learning_rate": 5.401234567901234e-06,
434
- "loss": 0.07,
435
  "step": 325
436
  },
437
  {
438
- "epoch": 4.54,
439
  "learning_rate": 4.6296296296296296e-06,
440
- "loss": 0.0692,
441
  "step": 330
442
  },
443
  {
444
- "epoch": 4.6,
445
  "learning_rate": 3.858024691358025e-06,
446
- "loss": 0.0575,
447
  "step": 335
448
  },
449
  {
450
- "epoch": 4.67,
451
  "learning_rate": 3.0864197530864196e-06,
452
- "loss": 0.0625,
453
  "step": 340
454
  },
455
  {
456
- "epoch": 4.74,
457
  "learning_rate": 2.3148148148148148e-06,
458
- "loss": 0.0715,
459
  "step": 345
460
  },
461
  {
462
- "epoch": 4.81,
463
  "learning_rate": 1.5432098765432098e-06,
464
- "loss": 0.0651,
465
  "step": 350
466
  },
467
  {
468
- "epoch": 4.88,
469
  "learning_rate": 7.716049382716049e-07,
470
- "loss": 0.1432,
471
  "step": 355
472
  },
473
  {
474
- "epoch": 4.95,
475
  "learning_rate": 0.0,
476
- "loss": 0.0888,
477
  "step": 360
478
  },
479
  {
480
- "epoch": 4.95,
481
- "eval_accuracy": 0.9738372093023255,
482
- "eval_loss": 0.10162452608346939,
483
- "eval_runtime": 6.9642,
484
- "eval_samples_per_second": 148.186,
485
- "eval_steps_per_second": 4.739,
486
  "step": 360
487
  },
488
  {
489
- "epoch": 4.95,
490
  "step": 360,
491
- "total_flos": 1.1422818298339983e+18,
492
- "train_loss": 0.08449313590923944,
493
- "train_runtime": 710.4711,
494
- "train_samples_per_second": 65.316,
495
- "train_steps_per_second": 0.507
496
  }
497
  ],
498
- "logging_steps": 5,
499
  "max_steps": 360,
500
  "num_train_epochs": 5,
501
- "save_steps": 500,
502
- "total_flos": 1.1422818298339983e+18,
503
  "trial_name": null,
504
  "trial_params": null
505
  }
 
1
  {
2
+ "best_metric": 0.9796511627906976,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-blank_img/checkpoint-288",
4
+ "epoch": 4.989690721649485,
 
5
  "global_step": 360,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
 
10
  {
11
  "epoch": 0.07,
12
  "learning_rate": 6.944444444444445e-06,
13
+ "loss": 0.7256,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.14,
18
  "learning_rate": 1.388888888888889e-05,
19
+ "loss": 0.6125,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.21,
24
  "learning_rate": 2.0833333333333336e-05,
25
+ "loss": 0.4648,
26
  "step": 15
27
  },
28
  {
29
  "epoch": 0.27,
30
  "learning_rate": 2.777777777777778e-05,
31
+ "loss": 0.3041,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 0.34,
36
  "learning_rate": 3.472222222222222e-05,
37
+ "loss": 0.2065,
38
  "step": 25
39
  },
40
  {
41
  "epoch": 0.41,
42
  "learning_rate": 4.166666666666667e-05,
43
+ "loss": 0.1469,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.48,
48
  "learning_rate": 4.8611111111111115e-05,
49
+ "loss": 0.1734,
50
  "step": 35
51
  },
52
  {
53
  "epoch": 0.55,
54
  "learning_rate": 4.938271604938271e-05,
55
+ "loss": 0.1736,
56
  "step": 40
57
  },
58
  {
59
  "epoch": 0.62,
60
  "learning_rate": 4.8611111111111115e-05,
61
+ "loss": 0.1559,
62
  "step": 45
63
  },
64
  {
65
  "epoch": 0.69,
66
  "learning_rate": 4.783950617283951e-05,
67
+ "loss": 0.1355,
68
  "step": 50
69
  },
70
  {
71
  "epoch": 0.76,
72
  "learning_rate": 4.70679012345679e-05,
73
+ "loss": 0.0896,
74
  "step": 55
75
  },
76
  {
77
  "epoch": 0.82,
78
  "learning_rate": 4.62962962962963e-05,
79
+ "loss": 0.0994,
80
  "step": 60
81
  },
82
  {
83
  "epoch": 0.89,
84
  "learning_rate": 4.5524691358024696e-05,
85
+ "loss": 0.127,
86
  "step": 65
87
  },
88
  {
89
  "epoch": 0.96,
90
  "learning_rate": 4.4753086419753084e-05,
91
+ "loss": 0.1329,
92
  "step": 70
93
  },
94
  {
95
  "epoch": 0.99,
96
+ "eval_accuracy": 0.9767441860465116,
97
+ "eval_loss": 0.08817464113235474,
98
+ "eval_runtime": 7.3528,
99
+ "eval_samples_per_second": 140.354,
100
+ "eval_steps_per_second": 4.488,
101
  "step": 72
102
  },
103
  {
104
+ "epoch": 1.04,
105
  "learning_rate": 4.3981481481481486e-05,
106
+ "loss": 0.1512,
107
  "step": 75
108
  },
109
  {
110
+ "epoch": 1.11,
111
  "learning_rate": 4.3209876543209875e-05,
112
+ "loss": 0.1248,
113
  "step": 80
114
  },
115
  {
116
+ "epoch": 1.18,
117
  "learning_rate": 4.243827160493827e-05,
118
+ "loss": 0.1396,
119
  "step": 85
120
  },
121
  {
122
+ "epoch": 1.25,
123
  "learning_rate": 4.166666666666667e-05,
124
+ "loss": 0.1231,
125
  "step": 90
126
  },
127
  {
128
+ "epoch": 1.32,
129
  "learning_rate": 4.089506172839506e-05,
130
+ "loss": 0.1228,
131
  "step": 95
132
  },
133
  {
134
+ "epoch": 1.38,
135
  "learning_rate": 4.012345679012346e-05,
136
+ "loss": 0.1277,
137
  "step": 100
138
  },
139
  {
140
+ "epoch": 1.45,
141
  "learning_rate": 3.935185185185186e-05,
142
+ "loss": 0.1116,
143
  "step": 105
144
  },
145
  {
146
+ "epoch": 1.52,
147
  "learning_rate": 3.8580246913580246e-05,
148
+ "loss": 0.1236,
149
  "step": 110
150
  },
151
  {
152
+ "epoch": 1.59,
153
  "learning_rate": 3.780864197530865e-05,
154
+ "loss": 0.1254,
155
  "step": 115
156
  },
157
  {
158
+ "epoch": 1.66,
159
  "learning_rate": 3.7037037037037037e-05,
160
+ "loss": 0.1004,
161
  "step": 120
162
  },
163
  {
164
+ "epoch": 1.73,
165
  "learning_rate": 3.626543209876543e-05,
166
+ "loss": 0.1199,
167
  "step": 125
168
  },
169
  {
170
+ "epoch": 1.8,
171
  "learning_rate": 3.5493827160493834e-05,
172
+ "loss": 0.1098,
173
  "step": 130
174
  },
175
  {
176
+ "epoch": 1.87,
177
  "learning_rate": 3.472222222222222e-05,
178
+ "loss": 0.0787,
179
  "step": 135
180
  },
181
  {
182
+ "epoch": 1.93,
183
  "learning_rate": 3.395061728395062e-05,
184
+ "loss": 0.1247,
185
  "step": 140
186
  },
187
  {
188
  "epoch": 1.99,
189
+ "eval_accuracy": 0.9767441860465116,
190
+ "eval_loss": 0.08047417551279068,
191
+ "eval_runtime": 7.6368,
192
+ "eval_samples_per_second": 135.135,
193
+ "eval_steps_per_second": 4.321,
194
+ "step": 144
195
  },
196
  {
197
+ "epoch": 2.01,
198
+ "learning_rate": 3.317901234567901e-05,
199
+ "loss": 0.1116,
 
 
 
200
  "step": 145
201
  },
202
  {
203
+ "epoch": 2.08,
204
  "learning_rate": 3.240740740740741e-05,
205
+ "loss": 0.122,
206
  "step": 150
207
  },
208
  {
209
+ "epoch": 2.15,
210
  "learning_rate": 3.16358024691358e-05,
211
+ "loss": 0.1109,
212
  "step": 155
213
  },
214
  {
215
+ "epoch": 2.22,
216
  "learning_rate": 3.08641975308642e-05,
217
+ "loss": 0.109,
218
  "step": 160
219
  },
220
  {
221
+ "epoch": 2.29,
222
  "learning_rate": 3.0092592592592593e-05,
223
+ "loss": 0.104,
224
  "step": 165
225
  },
226
  {
227
+ "epoch": 2.36,
228
  "learning_rate": 2.9320987654320992e-05,
229
+ "loss": 0.0849,
230
  "step": 170
231
  },
232
  {
233
+ "epoch": 2.43,
234
  "learning_rate": 2.8549382716049384e-05,
235
+ "loss": 0.1088,
236
  "step": 175
237
  },
238
  {
239
+ "epoch": 2.49,
240
  "learning_rate": 2.777777777777778e-05,
241
+ "loss": 0.0988,
242
  "step": 180
243
  },
244
  {
245
+ "epoch": 2.56,
246
  "learning_rate": 2.700617283950617e-05,
247
+ "loss": 0.1072,
248
  "step": 185
249
  },
250
  {
251
+ "epoch": 2.63,
252
  "learning_rate": 2.623456790123457e-05,
253
+ "loss": 0.1098,
254
  "step": 190
255
  },
256
  {
257
+ "epoch": 2.7,
258
  "learning_rate": 2.5462962962962965e-05,
259
+ "loss": 0.1288,
260
  "step": 195
261
  },
262
  {
263
+ "epoch": 2.77,
264
  "learning_rate": 2.4691358024691357e-05,
265
+ "loss": 0.1401,
266
  "step": 200
267
  },
268
  {
269
+ "epoch": 2.84,
270
  "learning_rate": 2.3919753086419755e-05,
271
+ "loss": 0.1237,
272
  "step": 205
273
  },
274
  {
275
+ "epoch": 2.91,
276
  "learning_rate": 2.314814814814815e-05,
277
+ "loss": 0.1063,
278
  "step": 210
279
  },
280
  {
281
+ "epoch": 2.98,
282
  "learning_rate": 2.2376543209876542e-05,
283
+ "loss": 0.0742,
284
  "step": 215
285
  },
286
  {
287
+ "epoch": 2.99,
288
+ "eval_accuracy": 0.9767441860465116,
289
+ "eval_loss": 0.07210300117731094,
290
+ "eval_runtime": 7.3216,
291
+ "eval_samples_per_second": 140.953,
292
+ "eval_steps_per_second": 4.507,
293
+ "step": 216
294
  },
295
  {
296
+ "epoch": 3.05,
297
  "learning_rate": 2.1604938271604937e-05,
298
+ "loss": 0.1008,
299
  "step": 220
300
  },
301
  {
302
+ "epoch": 3.12,
303
  "learning_rate": 2.0833333333333336e-05,
304
+ "loss": 0.1196,
305
  "step": 225
306
  },
307
  {
308
+ "epoch": 3.19,
309
  "learning_rate": 2.006172839506173e-05,
310
+ "loss": 0.0712,
311
  "step": 230
312
  },
313
  {
314
+ "epoch": 3.26,
315
  "learning_rate": 1.9290123456790123e-05,
316
+ "loss": 0.0726,
317
  "step": 235
318
  },
319
  {
320
+ "epoch": 3.33,
321
  "learning_rate": 1.8518518518518518e-05,
322
+ "loss": 0.0964,
323
  "step": 240
324
  },
325
  {
326
+ "epoch": 3.4,
327
  "learning_rate": 1.7746913580246917e-05,
328
+ "loss": 0.0932,
329
  "step": 245
330
  },
331
  {
332
+ "epoch": 3.47,
333
  "learning_rate": 1.697530864197531e-05,
334
+ "loss": 0.1022,
335
  "step": 250
336
  },
337
  {
338
+ "epoch": 3.54,
339
  "learning_rate": 1.6203703703703704e-05,
340
+ "loss": 0.1033,
341
  "step": 255
342
  },
343
  {
344
+ "epoch": 3.6,
345
  "learning_rate": 1.54320987654321e-05,
346
+ "loss": 0.0913,
347
  "step": 260
348
  },
349
  {
350
+ "epoch": 3.67,
351
  "learning_rate": 1.4660493827160496e-05,
352
+ "loss": 0.0873,
353
  "step": 265
354
  },
355
  {
356
+ "epoch": 3.74,
357
  "learning_rate": 1.388888888888889e-05,
358
+ "loss": 0.1095,
359
  "step": 270
360
  },
361
  {
362
+ "epoch": 3.81,
363
  "learning_rate": 1.3117283950617285e-05,
364
+ "loss": 0.1047,
365
  "step": 275
366
  },
367
  {
368
+ "epoch": 3.88,
369
  "learning_rate": 1.2345679012345678e-05,
370
+ "loss": 0.0707,
371
  "step": 280
372
  },
373
  {
374
+ "epoch": 3.95,
375
  "learning_rate": 1.1574074074074075e-05,
376
+ "loss": 0.0745,
377
  "step": 285
378
  },
379
  {
380
  "epoch": 3.99,
381
+ "eval_accuracy": 0.9796511627906976,
382
+ "eval_loss": 0.07256749272346497,
383
+ "eval_runtime": 6.7881,
384
+ "eval_samples_per_second": 152.03,
385
+ "eval_steps_per_second": 4.861,
386
+ "step": 288
387
  },
388
  {
389
+ "epoch": 4.03,
390
+ "learning_rate": 1.0802469135802469e-05,
391
+ "loss": 0.129,
392
+ "step": 290
 
 
 
393
  },
394
  {
395
+ "epoch": 4.1,
396
  "learning_rate": 1.0030864197530866e-05,
397
+ "loss": 0.1298,
398
  "step": 295
399
  },
400
  {
401
+ "epoch": 4.16,
402
  "learning_rate": 9.259259259259259e-06,
403
+ "loss": 0.089,
404
  "step": 300
405
  },
406
  {
407
+ "epoch": 4.23,
408
  "learning_rate": 8.487654320987654e-06,
409
+ "loss": 0.0907,
410
  "step": 305
411
  },
412
  {
413
+ "epoch": 4.3,
414
  "learning_rate": 7.71604938271605e-06,
415
+ "loss": 0.0983,
416
  "step": 310
417
  },
418
  {
419
+ "epoch": 4.37,
420
  "learning_rate": 6.944444444444445e-06,
421
+ "loss": 0.0769,
422
  "step": 315
423
  },
424
  {
425
+ "epoch": 4.44,
426
  "learning_rate": 6.172839506172839e-06,
427
+ "loss": 0.1115,
428
  "step": 320
429
  },
430
  {
431
+ "epoch": 4.51,
432
  "learning_rate": 5.401234567901234e-06,
433
+ "loss": 0.058,
434
  "step": 325
435
  },
436
  {
437
+ "epoch": 4.58,
438
  "learning_rate": 4.6296296296296296e-06,
439
+ "loss": 0.0972,
440
  "step": 330
441
  },
442
  {
443
+ "epoch": 4.65,
444
  "learning_rate": 3.858024691358025e-06,
445
+ "loss": 0.1036,
446
  "step": 335
447
  },
448
  {
449
+ "epoch": 4.71,
450
  "learning_rate": 3.0864197530864196e-06,
451
+ "loss": 0.0675,
452
  "step": 340
453
  },
454
  {
455
+ "epoch": 4.78,
456
  "learning_rate": 2.3148148148148148e-06,
457
+ "loss": 0.0909,
458
  "step": 345
459
  },
460
  {
461
+ "epoch": 4.85,
462
  "learning_rate": 1.5432098765432098e-06,
463
+ "loss": 0.1148,
464
  "step": 350
465
  },
466
  {
467
+ "epoch": 4.92,
468
  "learning_rate": 7.716049382716049e-07,
469
+ "loss": 0.0802,
470
  "step": 355
471
  },
472
  {
473
+ "epoch": 4.99,
474
  "learning_rate": 0.0,
475
+ "loss": 0.1289,
476
  "step": 360
477
  },
478
  {
479
+ "epoch": 4.99,
480
+ "eval_accuracy": 0.9728682170542635,
481
+ "eval_loss": 0.08483530580997467,
482
+ "eval_runtime": 6.8918,
483
+ "eval_samples_per_second": 149.743,
484
+ "eval_steps_per_second": 4.788,
485
  "step": 360
486
  },
487
  {
488
+ "epoch": 4.99,
489
  "step": 360,
490
+ "total_flos": 1.151826529604567e+18,
491
+ "train_loss": 0.13382491601838006,
492
+ "train_runtime": 783.9355,
493
+ "train_samples_per_second": 59.195,
494
+ "train_steps_per_second": 0.459
495
  }
496
  ],
 
497
  "max_steps": 360,
498
  "num_train_epochs": 5,
499
+ "total_flos": 1.151826529604567e+18,
 
500
  "trial_name": null,
501
  "trial_params": null
502
  }