sbaner24 commited on
Commit
6a14190
1 Parent(s): 39fc751

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +11 -11
  2. eval_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +769 -514
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.2847636938095093,
5
- "eval_runtime": 0.2092,
6
- "eval_samples_per_second": 114.718,
7
- "eval_steps_per_second": 4.78,
8
- "total_flos": 8.33038888356864e+17,
9
- "train_loss": 0.15836017400026323,
10
- "train_runtime": 489.483,
11
- "train_samples_per_second": 21.962,
12
- "train_steps_per_second": 0.102
13
  }
 
1
  {
2
+ "epoch": 44.44,
3
+ "eval_accuracy": 0.9814814814814815,
4
+ "eval_loss": 0.11719384789466858,
5
+ "eval_runtime": 0.3127,
6
+ "eval_samples_per_second": 172.705,
7
+ "eval_steps_per_second": 3.198,
8
+ "total_flos": 1.6586385457107272e+18,
9
+ "train_loss": 0.13114935230463742,
10
+ "train_runtime": 687.5457,
11
+ "train_samples_per_second": 34.979,
12
+ "train_steps_per_second": 0.145
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_accuracy": 1.0,
4
- "eval_loss": 0.2847636938095093,
5
- "eval_runtime": 0.2092,
6
- "eval_samples_per_second": 114.718,
7
- "eval_steps_per_second": 4.78
8
  }
 
1
  {
2
+ "epoch": 44.44,
3
+ "eval_accuracy": 0.9814814814814815,
4
+ "eval_loss": 0.11719384789466858,
5
+ "eval_runtime": 0.3127,
6
+ "eval_samples_per_second": 172.705,
7
+ "eval_steps_per_second": 3.198
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "total_flos": 8.33038888356864e+17,
4
- "train_loss": 0.15836017400026323,
5
- "train_runtime": 489.483,
6
- "train_samples_per_second": 21.962,
7
- "train_steps_per_second": 0.102
8
  }
 
1
  {
2
+ "epoch": 44.44,
3
+ "total_flos": 1.6586385457107272e+18,
4
+ "train_loss": 0.13114935230463742,
5
+ "train_runtime": 687.5457,
6
+ "train_samples_per_second": 34.979,
7
+ "train_steps_per_second": 0.145
8
  }
trainer_state.json CHANGED
@@ -1,775 +1,1030 @@
1
  {
2
- "best_metric": 1.0,
3
- "best_model_checkpoint": "vit-base-patch16-224-Trial007-YEL_STEM2/checkpoint-5",
4
- "epoch": 50.0,
5
- "global_step": 50,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.0,
12
- "learning_rate": 1e-05,
13
- "loss": 0.8085,
14
  "step": 1
15
  },
16
  {
17
- "epoch": 1.0,
18
- "eval_accuracy": 0.7083333333333334,
19
- "eval_loss": 0.6816200613975525,
20
- "eval_runtime": 0.2076,
21
- "eval_samples_per_second": 115.596,
22
- "eval_steps_per_second": 4.817,
23
- "step": 1
24
  },
25
  {
26
- "epoch": 2.0,
27
- "learning_rate": 2e-05,
28
- "loss": 0.8019,
 
 
 
29
  "step": 2
30
  },
31
  {
32
- "epoch": 2.0,
33
- "eval_accuracy": 0.7083333333333334,
34
- "eval_loss": 0.6144432425498962,
35
- "eval_runtime": 0.1574,
36
- "eval_samples_per_second": 152.515,
37
- "eval_steps_per_second": 6.355,
38
- "step": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  },
40
  {
41
- "epoch": 3.0,
42
  "learning_rate": 3e-05,
43
- "loss": 0.7399,
44
- "step": 3
45
  },
46
  {
47
- "epoch": 3.0,
48
- "eval_accuracy": 0.9166666666666666,
49
- "eval_loss": 0.5203752517700195,
50
- "eval_runtime": 0.1645,
51
- "eval_samples_per_second": 145.862,
52
- "eval_steps_per_second": 6.078,
53
- "step": 3
54
  },
55
  {
56
- "epoch": 4.0,
 
 
 
 
 
 
57
  "learning_rate": 4e-05,
58
- "loss": 0.619,
59
- "step": 4
60
  },
61
  {
62
  "epoch": 4.0,
63
- "eval_accuracy": 0.9583333333333334,
64
- "eval_loss": 0.4258662760257721,
65
- "eval_runtime": 0.1642,
66
- "eval_samples_per_second": 146.161,
67
- "eval_steps_per_second": 6.09,
68
- "step": 4
69
  },
70
  {
71
- "epoch": 5.0,
 
 
 
 
 
 
 
 
 
72
  "learning_rate": 5e-05,
73
- "loss": 0.4802,
74
- "step": 5
75
  },
76
  {
77
- "epoch": 5.0,
78
- "eval_accuracy": 1.0,
79
- "eval_loss": 0.2847636938095093,
80
- "eval_runtime": 0.1622,
81
- "eval_samples_per_second": 147.972,
82
- "eval_steps_per_second": 6.166,
83
- "step": 5
84
  },
85
  {
86
- "epoch": 6.0,
 
 
 
 
 
 
 
 
 
87
  "learning_rate": 4.888888888888889e-05,
88
- "loss": 0.3923,
89
- "step": 6
90
  },
91
  {
92
- "epoch": 6.0,
93
- "eval_accuracy": 0.9583333333333334,
94
- "eval_loss": 0.20102162659168243,
95
- "eval_runtime": 0.166,
96
- "eval_samples_per_second": 144.536,
97
- "eval_steps_per_second": 6.022,
98
- "step": 6
99
  },
100
  {
101
- "epoch": 7.0,
 
 
 
 
 
 
 
 
 
102
  "learning_rate": 4.7777777777777784e-05,
103
- "loss": 0.2963,
104
- "step": 7
105
  },
106
  {
107
- "epoch": 7.0,
108
- "eval_accuracy": 1.0,
109
- "eval_loss": 0.13336171209812164,
110
- "eval_runtime": 0.16,
111
- "eval_samples_per_second": 149.967,
112
- "eval_steps_per_second": 6.249,
113
- "step": 7
114
  },
115
  {
116
- "epoch": 8.0,
 
 
 
 
 
 
 
 
 
117
  "learning_rate": 4.666666666666667e-05,
118
- "loss": 0.2228,
119
- "step": 8
120
  },
121
  {
122
- "epoch": 8.0,
123
- "eval_accuracy": 0.9583333333333334,
124
- "eval_loss": 0.13366058468818665,
125
- "eval_runtime": 0.1586,
126
- "eval_samples_per_second": 151.334,
127
- "eval_steps_per_second": 6.306,
128
- "step": 8
129
  },
130
  {
131
- "epoch": 9.0,
132
  "learning_rate": 4.555555555555556e-05,
133
- "loss": 0.2053,
134
- "step": 9
135
  },
136
  {
137
- "epoch": 9.0,
138
- "eval_accuracy": 0.9583333333333334,
139
- "eval_loss": 0.10479468107223511,
140
- "eval_runtime": 0.1633,
141
- "eval_samples_per_second": 146.936,
142
- "eval_steps_per_second": 6.122,
143
- "step": 9
144
  },
145
  {
146
- "epoch": 10.0,
 
 
 
 
 
 
147
  "learning_rate": 4.4444444444444447e-05,
148
- "loss": 0.146,
149
- "step": 10
150
  },
151
  {
152
- "epoch": 10.0,
153
- "eval_accuracy": 1.0,
154
- "eval_loss": 0.05621153116226196,
155
- "eval_runtime": 0.1604,
156
- "eval_samples_per_second": 149.586,
157
- "eval_steps_per_second": 6.233,
158
- "step": 10
 
 
 
 
 
 
159
  },
160
  {
161
- "epoch": 11.0,
162
  "learning_rate": 4.3333333333333334e-05,
163
- "loss": 0.1088,
164
- "step": 11
165
  },
166
  {
167
- "epoch": 11.0,
168
- "eval_accuracy": 1.0,
169
- "eval_loss": 0.04064302146434784,
170
- "eval_runtime": 0.1649,
171
- "eval_samples_per_second": 145.512,
172
- "eval_steps_per_second": 6.063,
173
- "step": 11
174
  },
175
  {
176
- "epoch": 12.0,
 
 
 
 
 
 
177
  "learning_rate": 4.222222222222222e-05,
178
- "loss": 0.126,
179
- "step": 12
180
  },
181
  {
182
- "epoch": 12.0,
183
- "eval_accuracy": 1.0,
184
- "eval_loss": 0.02910543978214264,
185
- "eval_runtime": 0.1658,
186
- "eval_samples_per_second": 144.782,
187
- "eval_steps_per_second": 6.033,
188
- "step": 12
 
 
 
 
 
 
189
  },
190
  {
191
- "epoch": 13.0,
192
  "learning_rate": 4.111111111111111e-05,
193
- "loss": 0.1165,
194
- "step": 13
195
  },
196
  {
197
- "epoch": 13.0,
198
- "eval_accuracy": 1.0,
199
- "eval_loss": 0.03404092416167259,
200
- "eval_runtime": 0.1612,
201
- "eval_samples_per_second": 148.926,
202
- "eval_steps_per_second": 6.205,
203
- "step": 13
 
 
 
 
 
 
204
  },
205
  {
206
- "epoch": 14.0,
207
  "learning_rate": 4e-05,
208
- "loss": 0.1197,
209
- "step": 14
210
  },
211
  {
212
- "epoch": 14.0,
213
- "eval_accuracy": 1.0,
214
- "eval_loss": 0.030657896772027016,
215
- "eval_runtime": 0.165,
216
- "eval_samples_per_second": 145.474,
217
- "eval_steps_per_second": 6.061,
218
- "step": 14
219
  },
220
  {
221
- "epoch": 15.0,
 
 
 
 
 
 
 
 
 
222
  "learning_rate": 3.888888888888889e-05,
223
- "loss": 0.0658,
224
- "step": 15
225
  },
226
  {
227
- "epoch": 15.0,
228
- "eval_accuracy": 1.0,
229
- "eval_loss": 0.021686667576432228,
230
- "eval_runtime": 0.1609,
231
- "eval_samples_per_second": 149.201,
232
- "eval_steps_per_second": 6.217,
233
- "step": 15
234
  },
235
  {
236
- "epoch": 16.0,
 
 
 
 
 
 
 
 
 
237
  "learning_rate": 3.777777777777778e-05,
238
- "loss": 0.1109,
239
- "step": 16
240
  },
241
  {
242
- "epoch": 16.0,
243
- "eval_accuracy": 1.0,
244
- "eval_loss": 0.011111200787127018,
245
- "eval_runtime": 0.1575,
246
- "eval_samples_per_second": 152.39,
247
- "eval_steps_per_second": 6.35,
248
- "step": 16
249
  },
250
  {
251
- "epoch": 17.0,
 
 
 
 
 
 
 
 
 
252
  "learning_rate": 3.6666666666666666e-05,
253
- "loss": 0.1,
254
- "step": 17
255
  },
256
  {
257
- "epoch": 17.0,
258
- "eval_accuracy": 1.0,
259
- "eval_loss": 0.007706010714173317,
260
- "eval_runtime": 0.1612,
261
- "eval_samples_per_second": 148.841,
262
- "eval_steps_per_second": 6.202,
263
- "step": 17
264
  },
265
  {
266
- "epoch": 18.0,
267
  "learning_rate": 3.555555555555556e-05,
268
- "loss": 0.0811,
269
- "step": 18
270
  },
271
  {
272
- "epoch": 18.0,
273
- "eval_accuracy": 1.0,
274
- "eval_loss": 0.00625573331490159,
275
- "eval_runtime": 0.1686,
276
- "eval_samples_per_second": 142.39,
277
- "eval_steps_per_second": 5.933,
278
- "step": 18
 
 
 
 
 
 
279
  },
280
  {
281
- "epoch": 19.0,
282
  "learning_rate": 3.444444444444445e-05,
283
- "loss": 0.0829,
284
- "step": 19
285
  },
286
  {
287
- "epoch": 19.0,
288
- "eval_accuracy": 1.0,
289
- "eval_loss": 0.004591991659253836,
290
- "eval_runtime": 0.1603,
291
- "eval_samples_per_second": 149.685,
292
- "eval_steps_per_second": 6.237,
293
- "step": 19
294
  },
295
  {
296
- "epoch": 20.0,
 
 
 
 
 
 
297
  "learning_rate": 3.3333333333333335e-05,
298
- "loss": 0.07,
299
- "step": 20
300
  },
301
  {
302
- "epoch": 20.0,
303
- "eval_accuracy": 1.0,
304
- "eval_loss": 0.003371628001332283,
305
- "eval_runtime": 0.1648,
306
- "eval_samples_per_second": 145.588,
307
- "eval_steps_per_second": 6.066,
308
- "step": 20
 
 
 
 
 
 
309
  },
310
  {
311
- "epoch": 21.0,
312
  "learning_rate": 3.222222222222223e-05,
313
- "loss": 0.0517,
314
- "step": 21
315
  },
316
  {
317
- "epoch": 21.0,
318
- "eval_accuracy": 1.0,
319
- "eval_loss": 0.002988017164170742,
320
- "eval_runtime": 0.1649,
321
- "eval_samples_per_second": 145.521,
322
- "eval_steps_per_second": 6.063,
323
- "step": 21
324
  },
325
  {
326
- "epoch": 22.0,
 
 
 
 
 
 
327
  "learning_rate": 3.111111111111111e-05,
328
- "loss": 0.0974,
329
- "step": 22
330
  },
331
  {
332
- "epoch": 22.0,
333
- "eval_accuracy": 1.0,
334
- "eval_loss": 0.0038543986156582832,
335
- "eval_runtime": 0.1601,
336
- "eval_samples_per_second": 149.952,
337
- "eval_steps_per_second": 6.248,
338
- "step": 22
 
 
 
 
 
 
339
  },
340
  {
341
- "epoch": 23.0,
342
  "learning_rate": 3e-05,
343
- "loss": 0.1085,
344
- "step": 23
345
  },
346
  {
347
- "epoch": 23.0,
348
- "eval_accuracy": 1.0,
349
- "eval_loss": 0.005148293450474739,
350
- "eval_runtime": 0.159,
351
- "eval_samples_per_second": 150.916,
352
- "eval_steps_per_second": 6.288,
353
- "step": 23
354
  },
355
  {
356
- "epoch": 24.0,
 
 
 
 
 
 
 
 
 
357
  "learning_rate": 2.8888888888888888e-05,
358
- "loss": 0.0744,
359
- "step": 24
360
  },
361
  {
362
- "epoch": 24.0,
363
- "eval_accuracy": 1.0,
364
- "eval_loss": 0.005187243223190308,
365
- "eval_runtime": 0.1599,
366
- "eval_samples_per_second": 150.121,
367
- "eval_steps_per_second": 6.255,
368
- "step": 24
369
  },
370
  {
371
- "epoch": 25.0,
 
 
 
 
 
 
 
 
 
372
  "learning_rate": 2.777777777777778e-05,
373
- "loss": 0.058,
374
- "step": 25
375
  },
376
  {
377
- "epoch": 25.0,
378
- "eval_accuracy": 1.0,
379
- "eval_loss": 0.00444122264161706,
380
- "eval_runtime": 0.1641,
381
- "eval_samples_per_second": 146.237,
382
- "eval_steps_per_second": 6.093,
383
- "step": 25
 
 
 
 
 
 
384
  },
385
  {
386
- "epoch": 26.0,
387
  "learning_rate": 2.6666666666666667e-05,
388
- "loss": 0.0767,
389
- "step": 26
390
  },
391
  {
392
- "epoch": 26.0,
393
- "eval_accuracy": 1.0,
394
- "eval_loss": 0.0041044591926038265,
395
- "eval_runtime": 0.1581,
396
- "eval_samples_per_second": 151.782,
397
- "eval_steps_per_second": 6.324,
398
- "step": 26
399
  },
400
  {
401
- "epoch": 27.0,
402
  "learning_rate": 2.5555555555555554e-05,
403
- "loss": 0.0909,
404
- "step": 27
405
  },
406
  {
407
- "epoch": 27.0,
408
- "eval_accuracy": 1.0,
409
- "eval_loss": 0.0028256354853510857,
410
- "eval_runtime": 0.161,
411
- "eval_samples_per_second": 149.037,
412
- "eval_steps_per_second": 6.21,
413
- "step": 27
414
  },
415
  {
416
- "epoch": 28.0,
 
 
 
 
 
 
417
  "learning_rate": 2.4444444444444445e-05,
418
- "loss": 0.0844,
419
- "step": 28
420
  },
421
  {
422
- "epoch": 28.0,
423
- "eval_accuracy": 1.0,
424
- "eval_loss": 0.0023171473294496536,
425
- "eval_runtime": 0.1579,
426
- "eval_samples_per_second": 152.033,
427
- "eval_steps_per_second": 6.335,
428
- "step": 28
429
  },
430
  {
431
- "epoch": 29.0,
 
 
 
 
 
 
432
  "learning_rate": 2.3333333333333336e-05,
433
- "loss": 0.0798,
434
- "step": 29
435
  },
436
  {
437
- "epoch": 29.0,
438
- "eval_accuracy": 1.0,
439
- "eval_loss": 0.002017478458583355,
440
- "eval_runtime": 0.1638,
441
- "eval_samples_per_second": 146.521,
442
- "eval_steps_per_second": 6.105,
443
- "step": 29
 
 
 
 
 
 
444
  },
445
  {
446
- "epoch": 30.0,
447
  "learning_rate": 2.2222222222222223e-05,
448
- "loss": 0.0643,
449
- "step": 30
450
  },
451
  {
452
- "epoch": 30.0,
453
- "eval_accuracy": 1.0,
454
- "eval_loss": 0.0018245158717036247,
455
- "eval_runtime": 0.1702,
456
- "eval_samples_per_second": 141.046,
457
- "eval_steps_per_second": 5.877,
458
- "step": 30
 
 
 
 
 
 
459
  },
460
  {
461
- "epoch": 31.0,
462
  "learning_rate": 2.111111111111111e-05,
463
- "loss": 0.0767,
464
- "step": 31
465
  },
466
  {
467
- "epoch": 31.0,
468
- "eval_accuracy": 1.0,
469
- "eval_loss": 0.0018319872906431556,
470
- "eval_runtime": 0.1571,
471
- "eval_samples_per_second": 152.797,
472
- "eval_steps_per_second": 6.367,
473
- "step": 31
474
  },
475
  {
476
- "epoch": 32.0,
 
 
 
 
 
 
 
 
 
477
  "learning_rate": 2e-05,
478
- "loss": 0.081,
479
- "step": 32
480
  },
481
  {
482
- "epoch": 32.0,
483
- "eval_accuracy": 1.0,
484
- "eval_loss": 0.001833016169257462,
485
- "eval_runtime": 0.1667,
486
- "eval_samples_per_second": 143.959,
487
- "eval_steps_per_second": 5.998,
488
- "step": 32
489
  },
490
  {
491
- "epoch": 33.0,
 
 
 
 
 
 
 
 
 
492
  "learning_rate": 1.888888888888889e-05,
493
- "loss": 0.0817,
494
- "step": 33
495
  },
496
  {
497
- "epoch": 33.0,
498
- "eval_accuracy": 1.0,
499
- "eval_loss": 0.0017682599136605859,
500
- "eval_runtime": 0.1588,
501
- "eval_samples_per_second": 151.166,
502
- "eval_steps_per_second": 6.299,
503
- "step": 33
 
 
 
 
 
 
504
  },
505
  {
506
- "epoch": 34.0,
507
  "learning_rate": 1.777777777777778e-05,
508
- "loss": 0.0746,
509
- "step": 34
510
  },
511
  {
512
- "epoch": 34.0,
513
- "eval_accuracy": 1.0,
514
- "eval_loss": 0.0017119242111220956,
515
- "eval_runtime": 0.1641,
516
- "eval_samples_per_second": 146.273,
517
- "eval_steps_per_second": 6.095,
518
- "step": 34
 
 
 
 
 
 
519
  },
520
  {
521
- "epoch": 35.0,
522
  "learning_rate": 1.6666666666666667e-05,
523
- "loss": 0.0591,
524
- "step": 35
525
  },
526
  {
527
- "epoch": 35.0,
528
- "eval_accuracy": 1.0,
529
- "eval_loss": 0.0016080854693427682,
530
- "eval_runtime": 0.1578,
531
- "eval_samples_per_second": 152.087,
532
- "eval_steps_per_second": 6.337,
533
- "step": 35
534
  },
535
  {
536
- "epoch": 36.0,
537
  "learning_rate": 1.5555555555555555e-05,
538
- "loss": 0.0609,
539
- "step": 36
540
  },
541
  {
542
- "epoch": 36.0,
543
- "eval_accuracy": 1.0,
544
- "eval_loss": 0.001627271412871778,
545
- "eval_runtime": 0.1581,
546
- "eval_samples_per_second": 151.817,
547
- "eval_steps_per_second": 6.326,
548
- "step": 36
 
 
 
 
 
 
549
  },
550
  {
551
- "epoch": 37.0,
552
  "learning_rate": 1.4444444444444444e-05,
553
- "loss": 0.0652,
554
- "step": 37
555
  },
556
  {
557
- "epoch": 37.0,
558
- "eval_accuracy": 1.0,
559
- "eval_loss": 0.001705991686321795,
560
- "eval_runtime": 0.1582,
561
- "eval_samples_per_second": 151.676,
562
- "eval_steps_per_second": 6.32,
563
- "step": 37
564
  },
565
  {
566
- "epoch": 38.0,
 
 
 
 
 
 
567
  "learning_rate": 1.3333333333333333e-05,
568
- "loss": 0.0798,
569
- "step": 38
570
  },
571
  {
572
- "epoch": 38.0,
573
- "eval_accuracy": 1.0,
574
- "eval_loss": 0.0017671029781922698,
575
- "eval_runtime": 0.1661,
576
- "eval_samples_per_second": 144.466,
577
- "eval_steps_per_second": 6.019,
578
- "step": 38
 
 
 
 
 
 
579
  },
580
  {
581
- "epoch": 39.0,
582
  "learning_rate": 1.2222222222222222e-05,
583
- "loss": 0.0537,
584
- "step": 39
585
  },
586
  {
587
- "epoch": 39.0,
588
- "eval_accuracy": 1.0,
589
- "eval_loss": 0.0017081074183806777,
590
- "eval_runtime": 0.163,
591
- "eval_samples_per_second": 147.252,
592
- "eval_steps_per_second": 6.135,
593
- "step": 39
594
  },
595
  {
596
- "epoch": 40.0,
 
 
 
 
 
 
597
  "learning_rate": 1.1111111111111112e-05,
598
- "loss": 0.0772,
599
- "step": 40
600
  },
601
  {
602
- "epoch": 40.0,
603
- "eval_accuracy": 1.0,
604
- "eval_loss": 0.0016276226378977299,
605
- "eval_runtime": 0.2469,
606
- "eval_samples_per_second": 97.204,
607
- "eval_steps_per_second": 4.05,
608
- "step": 40
 
 
 
 
 
 
609
  },
610
  {
611
- "epoch": 41.0,
612
  "learning_rate": 1e-05,
613
- "loss": 0.0595,
614
- "step": 41
615
  },
616
  {
617
- "epoch": 41.0,
618
- "eval_accuracy": 1.0,
619
- "eval_loss": 0.0015062604798004031,
620
- "eval_runtime": 0.1595,
621
- "eval_samples_per_second": 150.501,
622
- "eval_steps_per_second": 6.271,
623
- "step": 41
 
 
 
 
 
 
624
  },
625
  {
626
- "epoch": 42.0,
627
  "learning_rate": 8.88888888888889e-06,
628
- "loss": 0.0696,
629
- "step": 42
630
  },
631
  {
632
- "epoch": 42.0,
633
- "eval_accuracy": 1.0,
634
- "eval_loss": 0.0013803044566884637,
635
- "eval_runtime": 0.16,
636
- "eval_samples_per_second": 150.011,
637
- "eval_steps_per_second": 6.25,
638
- "step": 42
639
  },
640
  {
641
- "epoch": 43.0,
 
 
 
 
 
 
 
 
 
642
  "learning_rate": 7.777777777777777e-06,
643
- "loss": 0.0654,
644
- "step": 43
645
  },
646
  {
647
- "epoch": 43.0,
648
- "eval_accuracy": 1.0,
649
- "eval_loss": 0.0012544242199510336,
650
- "eval_runtime": 0.1604,
651
- "eval_samples_per_second": 149.654,
652
- "eval_steps_per_second": 6.236,
653
- "step": 43
654
  },
655
  {
656
- "epoch": 44.0,
 
 
 
 
 
 
 
 
 
657
  "learning_rate": 6.666666666666667e-06,
658
- "loss": 0.0714,
659
- "step": 44
660
  },
661
  {
662
- "epoch": 44.0,
663
- "eval_accuracy": 1.0,
664
- "eval_loss": 0.0011552057694643736,
665
- "eval_runtime": 0.1588,
666
- "eval_samples_per_second": 151.139,
667
- "eval_steps_per_second": 6.297,
668
- "step": 44
669
  },
670
  {
671
- "epoch": 45.0,
672
  "learning_rate": 5.555555555555556e-06,
673
- "loss": 0.0691,
674
- "step": 45
675
  },
676
  {
677
- "epoch": 45.0,
678
- "eval_accuracy": 1.0,
679
- "eval_loss": 0.0010549549479037523,
680
- "eval_runtime": 0.1592,
681
- "eval_samples_per_second": 150.723,
682
- "eval_steps_per_second": 6.28,
683
- "step": 45
 
 
 
 
 
 
684
  },
685
  {
686
- "epoch": 46.0,
687
  "learning_rate": 4.444444444444445e-06,
688
- "loss": 0.0737,
689
- "step": 46
690
  },
691
  {
692
- "epoch": 46.0,
693
- "eval_accuracy": 1.0,
694
- "eval_loss": 0.0009879550198093057,
695
- "eval_runtime": 0.1602,
696
- "eval_samples_per_second": 149.848,
697
- "eval_steps_per_second": 6.244,
698
- "step": 46
 
 
 
 
 
 
699
  },
700
  {
701
- "epoch": 47.0,
702
  "learning_rate": 3.3333333333333333e-06,
703
- "loss": 0.0578,
704
- "step": 47
705
  },
706
  {
707
- "epoch": 47.0,
708
- "eval_accuracy": 1.0,
709
- "eval_loss": 0.0009586880914866924,
710
- "eval_runtime": 0.1859,
711
- "eval_samples_per_second": 129.102,
712
- "eval_steps_per_second": 5.379,
713
- "step": 47
714
  },
715
  {
716
- "epoch": 48.0,
 
 
 
 
 
 
717
  "learning_rate": 2.2222222222222225e-06,
718
- "loss": 0.0708,
719
- "step": 48
720
  },
721
  {
722
- "epoch": 48.0,
723
- "eval_accuracy": 1.0,
724
- "eval_loss": 0.0009338347590528429,
725
- "eval_runtime": 0.1596,
726
- "eval_samples_per_second": 150.35,
727
- "eval_steps_per_second": 6.265,
728
- "step": 48
729
  },
730
  {
731
- "epoch": 49.0,
732
- "learning_rate": 1.1111111111111112e-06,
733
- "loss": 0.0621,
734
- "step": 49
735
  },
736
  {
737
- "epoch": 49.0,
738
- "eval_accuracy": 1.0,
739
- "eval_loss": 0.0009125128271989524,
740
- "eval_runtime": 0.1587,
741
- "eval_samples_per_second": 151.272,
742
- "eval_steps_per_second": 6.303,
743
- "step": 49
744
  },
745
  {
746
- "epoch": 50.0,
747
- "learning_rate": 0.0,
748
- "loss": 0.1288,
749
- "step": 50
750
  },
751
  {
752
- "epoch": 50.0,
753
- "eval_accuracy": 1.0,
754
- "eval_loss": 0.0009006464970298111,
755
- "eval_runtime": 0.164,
756
- "eval_samples_per_second": 146.315,
757
- "eval_steps_per_second": 6.096,
758
- "step": 50
759
  },
760
  {
761
- "epoch": 50.0,
762
- "step": 50,
763
- "total_flos": 8.33038888356864e+17,
764
- "train_loss": 0.15836017400026323,
765
- "train_runtime": 489.483,
766
- "train_samples_per_second": 21.962,
767
- "train_steps_per_second": 0.102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
768
  }
769
  ],
770
- "max_steps": 50,
771
  "num_train_epochs": 50,
772
- "total_flos": 8.33038888356864e+17,
773
  "trial_name": null,
774
  "trial_params": null
775
  }
 
1
  {
2
+ "best_metric": 0.9814814814814815,
3
+ "best_model_checkpoint": "vit-base-patch16-224-Trial007-YEL_STEM2/checkpoint-20",
4
+ "epoch": 44.44444444444444,
5
+ "global_step": 100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.44,
12
+ "learning_rate": 5e-06,
13
+ "loss": 0.6465,
14
  "step": 1
15
  },
16
  {
17
+ "epoch": 0.89,
18
+ "learning_rate": 1e-05,
19
+ "loss": 0.6676,
20
+ "step": 2
 
 
 
21
  },
22
  {
23
+ "epoch": 0.89,
24
+ "eval_accuracy": 0.7222222222222222,
25
+ "eval_loss": 0.6180152297019958,
26
+ "eval_runtime": 0.2619,
27
+ "eval_samples_per_second": 206.218,
28
+ "eval_steps_per_second": 3.819,
29
  "step": 2
30
  },
31
  {
32
+ "epoch": 1.33,
33
+ "learning_rate": 1.5e-05,
34
+ "loss": 0.5752,
35
+ "step": 3
36
+ },
37
+ {
38
+ "epoch": 1.78,
39
+ "learning_rate": 2e-05,
40
+ "loss": 0.5805,
41
+ "step": 4
42
+ },
43
+ {
44
+ "epoch": 1.78,
45
+ "eval_accuracy": 0.7592592592592593,
46
+ "eval_loss": 0.5003750920295715,
47
+ "eval_runtime": 0.2607,
48
+ "eval_samples_per_second": 207.107,
49
+ "eval_steps_per_second": 3.835,
50
+ "step": 4
51
+ },
52
+ {
53
+ "epoch": 2.22,
54
+ "learning_rate": 2.5e-05,
55
+ "loss": 0.4833,
56
+ "step": 5
57
  },
58
  {
59
+ "epoch": 2.67,
60
  "learning_rate": 3e-05,
61
+ "loss": 0.5012,
62
+ "step": 6
63
  },
64
  {
65
+ "epoch": 2.67,
66
+ "eval_accuracy": 0.9629629629629629,
67
+ "eval_loss": 0.37833932042121887,
68
+ "eval_runtime": 0.2715,
69
+ "eval_samples_per_second": 198.886,
70
+ "eval_steps_per_second": 3.683,
71
+ "step": 6
72
  },
73
  {
74
+ "epoch": 3.11,
75
+ "learning_rate": 3.5e-05,
76
+ "loss": 0.3602,
77
+ "step": 7
78
+ },
79
+ {
80
+ "epoch": 3.56,
81
  "learning_rate": 4e-05,
82
+ "loss": 0.396,
83
+ "step": 8
84
  },
85
  {
86
  "epoch": 4.0,
87
+ "learning_rate": 4.5e-05,
88
+ "loss": 0.2794,
89
+ "step": 9
 
 
 
90
  },
91
  {
92
+ "epoch": 4.0,
93
+ "eval_accuracy": 0.9629629629629629,
94
+ "eval_loss": 0.22847579419612885,
95
+ "eval_runtime": 0.269,
96
+ "eval_samples_per_second": 200.773,
97
+ "eval_steps_per_second": 3.718,
98
+ "step": 9
99
+ },
100
+ {
101
+ "epoch": 4.44,
102
  "learning_rate": 5e-05,
103
+ "loss": 0.3094,
104
+ "step": 10
105
  },
106
  {
107
+ "epoch": 4.89,
108
+ "learning_rate": 4.9444444444444446e-05,
109
+ "loss": 0.2695,
110
+ "step": 11
 
 
 
111
  },
112
  {
113
+ "epoch": 4.89,
114
+ "eval_accuracy": 0.8888888888888888,
115
+ "eval_loss": 0.25508877635002136,
116
+ "eval_runtime": 0.2701,
117
+ "eval_samples_per_second": 199.957,
118
+ "eval_steps_per_second": 3.703,
119
+ "step": 11
120
+ },
121
+ {
122
+ "epoch": 5.33,
123
  "learning_rate": 4.888888888888889e-05,
124
+ "loss": 0.2595,
125
+ "step": 12
126
  },
127
  {
128
+ "epoch": 5.78,
129
+ "learning_rate": 4.8333333333333334e-05,
130
+ "loss": 0.2782,
131
+ "step": 13
 
 
 
132
  },
133
  {
134
+ "epoch": 5.78,
135
+ "eval_accuracy": 0.9629629629629629,
136
+ "eval_loss": 0.10787578672170639,
137
+ "eval_runtime": 0.2599,
138
+ "eval_samples_per_second": 207.776,
139
+ "eval_steps_per_second": 3.848,
140
+ "step": 13
141
+ },
142
+ {
143
+ "epoch": 6.22,
144
  "learning_rate": 4.7777777777777784e-05,
145
+ "loss": 0.1556,
146
+ "step": 14
147
  },
148
  {
149
+ "epoch": 6.67,
150
+ "learning_rate": 4.722222222222222e-05,
151
+ "loss": 0.2131,
152
+ "step": 15
 
 
 
153
  },
154
  {
155
+ "epoch": 6.67,
156
+ "eval_accuracy": 0.9629629629629629,
157
+ "eval_loss": 0.12049588561058044,
158
+ "eval_runtime": 0.2817,
159
+ "eval_samples_per_second": 191.698,
160
+ "eval_steps_per_second": 3.55,
161
+ "step": 15
162
+ },
163
+ {
164
+ "epoch": 7.11,
165
  "learning_rate": 4.666666666666667e-05,
166
+ "loss": 0.1482,
167
+ "step": 16
168
  },
169
  {
170
+ "epoch": 7.56,
171
+ "learning_rate": 4.6111111111111115e-05,
172
+ "loss": 0.1872,
173
+ "step": 17
 
 
 
174
  },
175
  {
176
+ "epoch": 8.0,
177
  "learning_rate": 4.555555555555556e-05,
178
+ "loss": 0.1537,
179
+ "step": 18
180
  },
181
  {
182
+ "epoch": 8.0,
183
+ "eval_accuracy": 0.9629629629629629,
184
+ "eval_loss": 0.18612359464168549,
185
+ "eval_runtime": 0.2613,
186
+ "eval_samples_per_second": 206.693,
187
+ "eval_steps_per_second": 3.828,
188
+ "step": 18
189
  },
190
  {
191
+ "epoch": 8.44,
192
+ "learning_rate": 4.5e-05,
193
+ "loss": 0.1043,
194
+ "step": 19
195
+ },
196
+ {
197
+ "epoch": 8.89,
198
  "learning_rate": 4.4444444444444447e-05,
199
+ "loss": 0.1739,
200
+ "step": 20
201
  },
202
  {
203
+ "epoch": 8.89,
204
+ "eval_accuracy": 0.9814814814814815,
205
+ "eval_loss": 0.11719384789466858,
206
+ "eval_runtime": 0.2622,
207
+ "eval_samples_per_second": 205.974,
208
+ "eval_steps_per_second": 3.814,
209
+ "step": 20
210
+ },
211
+ {
212
+ "epoch": 9.33,
213
+ "learning_rate": 4.388888888888889e-05,
214
+ "loss": 0.3945,
215
+ "step": 21
216
  },
217
  {
218
+ "epoch": 9.78,
219
  "learning_rate": 4.3333333333333334e-05,
220
+ "loss": 0.1059,
221
+ "step": 22
222
  },
223
  {
224
+ "epoch": 9.78,
225
+ "eval_accuracy": 0.9814814814814815,
226
+ "eval_loss": 0.10919703543186188,
227
+ "eval_runtime": 0.2607,
228
+ "eval_samples_per_second": 207.114,
229
+ "eval_steps_per_second": 3.835,
230
+ "step": 22
231
  },
232
  {
233
+ "epoch": 10.22,
234
+ "learning_rate": 4.277777777777778e-05,
235
+ "loss": 0.1378,
236
+ "step": 23
237
+ },
238
+ {
239
+ "epoch": 10.67,
240
  "learning_rate": 4.222222222222222e-05,
241
+ "loss": 0.146,
242
+ "step": 24
243
  },
244
  {
245
+ "epoch": 10.67,
246
+ "eval_accuracy": 0.9814814814814815,
247
+ "eval_loss": 0.10719860345125198,
248
+ "eval_runtime": 0.2613,
249
+ "eval_samples_per_second": 206.624,
250
+ "eval_steps_per_second": 3.826,
251
+ "step": 24
252
+ },
253
+ {
254
+ "epoch": 11.11,
255
+ "learning_rate": 4.166666666666667e-05,
256
+ "loss": 0.1155,
257
+ "step": 25
258
  },
259
  {
260
+ "epoch": 11.56,
261
  "learning_rate": 4.111111111111111e-05,
262
+ "loss": 0.0854,
263
+ "step": 26
264
  },
265
  {
266
+ "epoch": 12.0,
267
+ "learning_rate": 4.055555555555556e-05,
268
+ "loss": 0.088,
269
+ "step": 27
270
+ },
271
+ {
272
+ "epoch": 12.0,
273
+ "eval_accuracy": 0.9814814814814815,
274
+ "eval_loss": 0.10145124793052673,
275
+ "eval_runtime": 0.2622,
276
+ "eval_samples_per_second": 205.964,
277
+ "eval_steps_per_second": 3.814,
278
+ "step": 27
279
  },
280
  {
281
+ "epoch": 12.44,
282
  "learning_rate": 4e-05,
283
+ "loss": 0.09,
284
+ "step": 28
285
  },
286
  {
287
+ "epoch": 12.89,
288
+ "learning_rate": 3.944444444444445e-05,
289
+ "loss": 0.1304,
290
+ "step": 29
 
 
 
291
  },
292
  {
293
+ "epoch": 12.89,
294
+ "eval_accuracy": 0.9814814814814815,
295
+ "eval_loss": 0.11511386185884476,
296
+ "eval_runtime": 0.2626,
297
+ "eval_samples_per_second": 205.674,
298
+ "eval_steps_per_second": 3.809,
299
+ "step": 29
300
+ },
301
+ {
302
+ "epoch": 13.33,
303
  "learning_rate": 3.888888888888889e-05,
304
+ "loss": 0.0742,
305
+ "step": 30
306
  },
307
  {
308
+ "epoch": 13.78,
309
+ "learning_rate": 3.8333333333333334e-05,
310
+ "loss": 0.0924,
311
+ "step": 31
 
 
 
312
  },
313
  {
314
+ "epoch": 13.78,
315
+ "eval_accuracy": 0.9814814814814815,
316
+ "eval_loss": 0.131294846534729,
317
+ "eval_runtime": 0.2663,
318
+ "eval_samples_per_second": 202.811,
319
+ "eval_steps_per_second": 3.756,
320
+ "step": 31
321
+ },
322
+ {
323
+ "epoch": 14.22,
324
  "learning_rate": 3.777777777777778e-05,
325
+ "loss": 0.0868,
326
+ "step": 32
327
  },
328
  {
329
+ "epoch": 14.67,
330
+ "learning_rate": 3.722222222222222e-05,
331
+ "loss": 0.091,
332
+ "step": 33
 
 
 
333
  },
334
  {
335
+ "epoch": 14.67,
336
+ "eval_accuracy": 0.9814814814814815,
337
+ "eval_loss": 0.11780886352062225,
338
+ "eval_runtime": 0.2589,
339
+ "eval_samples_per_second": 208.602,
340
+ "eval_steps_per_second": 3.863,
341
+ "step": 33
342
+ },
343
+ {
344
+ "epoch": 15.11,
345
  "learning_rate": 3.6666666666666666e-05,
346
+ "loss": 0.1066,
347
+ "step": 34
348
  },
349
  {
350
+ "epoch": 15.56,
351
+ "learning_rate": 3.611111111111111e-05,
352
+ "loss": 0.1028,
353
+ "step": 35
 
 
 
354
  },
355
  {
356
+ "epoch": 16.0,
357
  "learning_rate": 3.555555555555556e-05,
358
+ "loss": 0.0508,
359
+ "step": 36
360
  },
361
  {
362
+ "epoch": 16.0,
363
+ "eval_accuracy": 0.9814814814814815,
364
+ "eval_loss": 0.09711939841508865,
365
+ "eval_runtime": 0.2672,
366
+ "eval_samples_per_second": 202.064,
367
+ "eval_steps_per_second": 3.742,
368
+ "step": 36
369
+ },
370
+ {
371
+ "epoch": 16.44,
372
+ "learning_rate": 3.5e-05,
373
+ "loss": 0.1105,
374
+ "step": 37
375
  },
376
  {
377
+ "epoch": 16.89,
378
  "learning_rate": 3.444444444444445e-05,
379
+ "loss": 0.1004,
380
+ "step": 38
381
  },
382
  {
383
+ "epoch": 16.89,
384
+ "eval_accuracy": 0.9814814814814815,
385
+ "eval_loss": 0.11753766983747482,
386
+ "eval_runtime": 0.2602,
387
+ "eval_samples_per_second": 207.572,
388
+ "eval_steps_per_second": 3.844,
389
+ "step": 38
390
  },
391
  {
392
+ "epoch": 17.33,
393
+ "learning_rate": 3.388888888888889e-05,
394
+ "loss": 0.0688,
395
+ "step": 39
396
+ },
397
+ {
398
+ "epoch": 17.78,
399
  "learning_rate": 3.3333333333333335e-05,
400
+ "loss": 0.1097,
401
+ "step": 40
402
  },
403
  {
404
+ "epoch": 17.78,
405
+ "eval_accuracy": 0.9629629629629629,
406
+ "eval_loss": 0.14230388402938843,
407
+ "eval_runtime": 0.2664,
408
+ "eval_samples_per_second": 202.739,
409
+ "eval_steps_per_second": 3.754,
410
+ "step": 40
411
+ },
412
+ {
413
+ "epoch": 18.22,
414
+ "learning_rate": 3.277777777777778e-05,
415
+ "loss": 0.1049,
416
+ "step": 41
417
  },
418
  {
419
+ "epoch": 18.67,
420
  "learning_rate": 3.222222222222223e-05,
421
+ "loss": 0.0758,
422
+ "step": 42
423
  },
424
  {
425
+ "epoch": 18.67,
426
+ "eval_accuracy": 0.9629629629629629,
427
+ "eval_loss": 0.15965422987937927,
428
+ "eval_runtime": 0.2786,
429
+ "eval_samples_per_second": 193.807,
430
+ "eval_steps_per_second": 3.589,
431
+ "step": 42
432
  },
433
  {
434
+ "epoch": 19.11,
435
+ "learning_rate": 3.1666666666666666e-05,
436
+ "loss": 0.0947,
437
+ "step": 43
438
+ },
439
+ {
440
+ "epoch": 19.56,
441
  "learning_rate": 3.111111111111111e-05,
442
+ "loss": 0.0425,
443
+ "step": 44
444
  },
445
  {
446
+ "epoch": 20.0,
447
+ "learning_rate": 3.055555555555556e-05,
448
+ "loss": 0.0687,
449
+ "step": 45
450
+ },
451
+ {
452
+ "epoch": 20.0,
453
+ "eval_accuracy": 0.9814814814814815,
454
+ "eval_loss": 0.12045230716466904,
455
+ "eval_runtime": 0.2678,
456
+ "eval_samples_per_second": 201.68,
457
+ "eval_steps_per_second": 3.735,
458
+ "step": 45
459
  },
460
  {
461
+ "epoch": 20.44,
462
  "learning_rate": 3e-05,
463
+ "loss": 0.0722,
464
+ "step": 46
465
  },
466
  {
467
+ "epoch": 20.89,
468
+ "learning_rate": 2.9444444444444448e-05,
469
+ "loss": 0.0513,
470
+ "step": 47
 
 
 
471
  },
472
  {
473
+ "epoch": 20.89,
474
+ "eval_accuracy": 0.9814814814814815,
475
+ "eval_loss": 0.1106700748205185,
476
+ "eval_runtime": 0.2701,
477
+ "eval_samples_per_second": 199.943,
478
+ "eval_steps_per_second": 3.703,
479
+ "step": 47
480
+ },
481
+ {
482
+ "epoch": 21.33,
483
  "learning_rate": 2.8888888888888888e-05,
484
+ "loss": 0.1179,
485
+ "step": 48
486
  },
487
  {
488
+ "epoch": 21.78,
489
+ "learning_rate": 2.8333333333333335e-05,
490
+ "loss": 0.0755,
491
+ "step": 49
 
 
 
492
  },
493
  {
494
+ "epoch": 21.78,
495
+ "eval_accuracy": 0.9814814814814815,
496
+ "eval_loss": 0.11499401181936264,
497
+ "eval_runtime": 0.2594,
498
+ "eval_samples_per_second": 208.189,
499
+ "eval_steps_per_second": 3.855,
500
+ "step": 49
501
+ },
502
+ {
503
+ "epoch": 22.22,
504
  "learning_rate": 2.777777777777778e-05,
505
+ "loss": 0.0559,
506
+ "step": 50
507
  },
508
  {
509
+ "epoch": 22.67,
510
+ "learning_rate": 2.7222222222222223e-05,
511
+ "loss": 0.0897,
512
+ "step": 51
513
+ },
514
+ {
515
+ "epoch": 22.67,
516
+ "eval_accuracy": 0.9629629629629629,
517
+ "eval_loss": 0.13321787118911743,
518
+ "eval_runtime": 0.2629,
519
+ "eval_samples_per_second": 205.413,
520
+ "eval_steps_per_second": 3.804,
521
+ "step": 51
522
  },
523
  {
524
+ "epoch": 23.11,
525
  "learning_rate": 2.6666666666666667e-05,
526
+ "loss": 0.0669,
527
+ "step": 52
528
  },
529
  {
530
+ "epoch": 23.56,
531
+ "learning_rate": 2.6111111111111114e-05,
532
+ "loss": 0.0994,
533
+ "step": 53
 
 
 
534
  },
535
  {
536
+ "epoch": 24.0,
537
  "learning_rate": 2.5555555555555554e-05,
538
+ "loss": 0.0439,
539
+ "step": 54
540
  },
541
  {
542
+ "epoch": 24.0,
543
+ "eval_accuracy": 0.9814814814814815,
544
+ "eval_loss": 0.12632034718990326,
545
+ "eval_runtime": 0.263,
546
+ "eval_samples_per_second": 205.294,
547
+ "eval_steps_per_second": 3.802,
548
+ "step": 54
549
  },
550
  {
551
+ "epoch": 24.44,
552
+ "learning_rate": 2.5e-05,
553
+ "loss": 0.0317,
554
+ "step": 55
555
+ },
556
+ {
557
+ "epoch": 24.89,
558
  "learning_rate": 2.4444444444444445e-05,
559
+ "loss": 0.0607,
560
+ "step": 56
561
  },
562
  {
563
+ "epoch": 24.89,
564
+ "eval_accuracy": 0.9814814814814815,
565
+ "eval_loss": 0.11113300919532776,
566
+ "eval_runtime": 0.2772,
567
+ "eval_samples_per_second": 194.823,
568
+ "eval_steps_per_second": 3.608,
569
+ "step": 56
570
  },
571
  {
572
+ "epoch": 25.33,
573
+ "learning_rate": 2.3888888888888892e-05,
574
+ "loss": 0.0379,
575
+ "step": 57
576
+ },
577
+ {
578
+ "epoch": 25.78,
579
  "learning_rate": 2.3333333333333336e-05,
580
+ "loss": 0.0719,
581
+ "step": 58
582
  },
583
  {
584
+ "epoch": 25.78,
585
+ "eval_accuracy": 0.9814814814814815,
586
+ "eval_loss": 0.10038212686777115,
587
+ "eval_runtime": 0.2604,
588
+ "eval_samples_per_second": 207.335,
589
+ "eval_steps_per_second": 3.84,
590
+ "step": 58
591
+ },
592
+ {
593
+ "epoch": 26.22,
594
+ "learning_rate": 2.277777777777778e-05,
595
+ "loss": 0.069,
596
+ "step": 59
597
  },
598
  {
599
+ "epoch": 26.67,
600
  "learning_rate": 2.2222222222222223e-05,
601
+ "loss": 0.0599,
602
+ "step": 60
603
  },
604
  {
605
+ "epoch": 26.67,
606
+ "eval_accuracy": 0.9814814814814815,
607
+ "eval_loss": 0.10639392584562302,
608
+ "eval_runtime": 0.2743,
609
+ "eval_samples_per_second": 196.877,
610
+ "eval_steps_per_second": 3.646,
611
+ "step": 60
612
+ },
613
+ {
614
+ "epoch": 27.11,
615
+ "learning_rate": 2.1666666666666667e-05,
616
+ "loss": 0.0882,
617
+ "step": 61
618
  },
619
  {
620
+ "epoch": 27.56,
621
  "learning_rate": 2.111111111111111e-05,
622
+ "loss": 0.0658,
623
+ "step": 62
624
  },
625
  {
626
+ "epoch": 28.0,
627
+ "learning_rate": 2.0555555555555555e-05,
628
+ "loss": 0.0613,
629
+ "step": 63
 
 
 
630
  },
631
  {
632
+ "epoch": 28.0,
633
+ "eval_accuracy": 0.9814814814814815,
634
+ "eval_loss": 0.1355225145816803,
635
+ "eval_runtime": 0.2608,
636
+ "eval_samples_per_second": 207.094,
637
+ "eval_steps_per_second": 3.835,
638
+ "step": 63
639
+ },
640
+ {
641
+ "epoch": 28.44,
642
  "learning_rate": 2e-05,
643
+ "loss": 0.0841,
644
+ "step": 64
645
  },
646
  {
647
+ "epoch": 28.89,
648
+ "learning_rate": 1.9444444444444445e-05,
649
+ "loss": 0.0689,
650
+ "step": 65
 
 
 
651
  },
652
  {
653
+ "epoch": 28.89,
654
+ "eval_accuracy": 0.9814814814814815,
655
+ "eval_loss": 0.14443787932395935,
656
+ "eval_runtime": 0.2612,
657
+ "eval_samples_per_second": 206.722,
658
+ "eval_steps_per_second": 3.828,
659
+ "step": 65
660
+ },
661
+ {
662
+ "epoch": 29.33,
663
  "learning_rate": 1.888888888888889e-05,
664
+ "loss": 0.0677,
665
+ "step": 66
666
  },
667
  {
668
+ "epoch": 29.78,
669
+ "learning_rate": 1.8333333333333333e-05,
670
+ "loss": 0.0754,
671
+ "step": 67
672
+ },
673
+ {
674
+ "epoch": 29.78,
675
+ "eval_accuracy": 0.9814814814814815,
676
+ "eval_loss": 0.13980631530284882,
677
+ "eval_runtime": 0.2608,
678
+ "eval_samples_per_second": 207.038,
679
+ "eval_steps_per_second": 3.834,
680
+ "step": 67
681
  },
682
  {
683
+ "epoch": 30.22,
684
  "learning_rate": 1.777777777777778e-05,
685
+ "loss": 0.0538,
686
+ "step": 68
687
  },
688
  {
689
+ "epoch": 30.67,
690
+ "learning_rate": 1.7222222222222224e-05,
691
+ "loss": 0.0835,
692
+ "step": 69
693
+ },
694
+ {
695
+ "epoch": 30.67,
696
+ "eval_accuracy": 0.9814814814814815,
697
+ "eval_loss": 0.13446056842803955,
698
+ "eval_runtime": 0.2647,
699
+ "eval_samples_per_second": 204.022,
700
+ "eval_steps_per_second": 3.778,
701
+ "step": 69
702
  },
703
  {
704
+ "epoch": 31.11,
705
  "learning_rate": 1.6666666666666667e-05,
706
+ "loss": 0.0378,
707
+ "step": 70
708
  },
709
  {
710
+ "epoch": 31.56,
711
+ "learning_rate": 1.6111111111111115e-05,
712
+ "loss": 0.1164,
713
+ "step": 71
 
 
 
714
  },
715
  {
716
+ "epoch": 32.0,
717
  "learning_rate": 1.5555555555555555e-05,
718
+ "loss": 0.0801,
719
+ "step": 72
720
  },
721
  {
722
+ "epoch": 32.0,
723
+ "eval_accuracy": 0.9814814814814815,
724
+ "eval_loss": 0.13478641211986542,
725
+ "eval_runtime": 0.2726,
726
+ "eval_samples_per_second": 198.093,
727
+ "eval_steps_per_second": 3.668,
728
+ "step": 72
729
+ },
730
+ {
731
+ "epoch": 32.44,
732
+ "learning_rate": 1.5e-05,
733
+ "loss": 0.0586,
734
+ "step": 73
735
  },
736
  {
737
+ "epoch": 32.89,
738
  "learning_rate": 1.4444444444444444e-05,
739
+ "loss": 0.0701,
740
+ "step": 74
741
  },
742
  {
743
+ "epoch": 32.89,
744
+ "eval_accuracy": 0.9814814814814815,
745
+ "eval_loss": 0.13650549948215485,
746
+ "eval_runtime": 0.2625,
747
+ "eval_samples_per_second": 205.719,
748
+ "eval_steps_per_second": 3.81,
749
+ "step": 74
750
  },
751
  {
752
+ "epoch": 33.33,
753
+ "learning_rate": 1.388888888888889e-05,
754
+ "loss": 0.0728,
755
+ "step": 75
756
+ },
757
+ {
758
+ "epoch": 33.78,
759
  "learning_rate": 1.3333333333333333e-05,
760
+ "loss": 0.0647,
761
+ "step": 76
762
  },
763
  {
764
+ "epoch": 33.78,
765
+ "eval_accuracy": 0.9814814814814815,
766
+ "eval_loss": 0.13482581079006195,
767
+ "eval_runtime": 0.2622,
768
+ "eval_samples_per_second": 205.981,
769
+ "eval_steps_per_second": 3.814,
770
+ "step": 76
771
+ },
772
+ {
773
+ "epoch": 34.22,
774
+ "learning_rate": 1.2777777777777777e-05,
775
+ "loss": 0.0499,
776
+ "step": 77
777
  },
778
  {
779
+ "epoch": 34.67,
780
  "learning_rate": 1.2222222222222222e-05,
781
+ "loss": 0.0982,
782
+ "step": 78
783
  },
784
  {
785
+ "epoch": 34.67,
786
+ "eval_accuracy": 0.9814814814814815,
787
+ "eval_loss": 0.1346072554588318,
788
+ "eval_runtime": 0.264,
789
+ "eval_samples_per_second": 204.546,
790
+ "eval_steps_per_second": 3.788,
791
+ "step": 78
792
  },
793
  {
794
+ "epoch": 35.11,
795
+ "learning_rate": 1.1666666666666668e-05,
796
+ "loss": 0.0999,
797
+ "step": 79
798
+ },
799
+ {
800
+ "epoch": 35.56,
801
  "learning_rate": 1.1111111111111112e-05,
802
+ "loss": 0.0657,
803
+ "step": 80
804
  },
805
  {
806
+ "epoch": 36.0,
807
+ "learning_rate": 1.0555555555555555e-05,
808
+ "loss": 0.0671,
809
+ "step": 81
810
+ },
811
+ {
812
+ "epoch": 36.0,
813
+ "eval_accuracy": 0.9814814814814815,
814
+ "eval_loss": 0.1377686858177185,
815
+ "eval_runtime": 0.2607,
816
+ "eval_samples_per_second": 207.162,
817
+ "eval_steps_per_second": 3.836,
818
+ "step": 81
819
  },
820
  {
821
+ "epoch": 36.44,
822
  "learning_rate": 1e-05,
823
+ "loss": 0.0674,
824
+ "step": 82
825
  },
826
  {
827
+ "epoch": 36.89,
828
+ "learning_rate": 9.444444444444445e-06,
829
+ "loss": 0.054,
830
+ "step": 83
831
+ },
832
+ {
833
+ "epoch": 36.89,
834
+ "eval_accuracy": 0.9814814814814815,
835
+ "eval_loss": 0.1371222585439682,
836
+ "eval_runtime": 0.2634,
837
+ "eval_samples_per_second": 205.012,
838
+ "eval_steps_per_second": 3.797,
839
+ "step": 83
840
  },
841
  {
842
+ "epoch": 37.33,
843
  "learning_rate": 8.88888888888889e-06,
844
+ "loss": 0.0314,
845
+ "step": 84
846
  },
847
  {
848
+ "epoch": 37.78,
849
+ "learning_rate": 8.333333333333334e-06,
850
+ "loss": 0.0735,
851
+ "step": 85
 
 
 
852
  },
853
  {
854
+ "epoch": 37.78,
855
+ "eval_accuracy": 0.9814814814814815,
856
+ "eval_loss": 0.13551822304725647,
857
+ "eval_runtime": 0.2625,
858
+ "eval_samples_per_second": 205.737,
859
+ "eval_steps_per_second": 3.81,
860
+ "step": 85
861
+ },
862
+ {
863
+ "epoch": 38.22,
864
  "learning_rate": 7.777777777777777e-06,
865
+ "loss": 0.0881,
866
+ "step": 86
867
  },
868
  {
869
+ "epoch": 38.67,
870
+ "learning_rate": 7.222222222222222e-06,
871
+ "loss": 0.0736,
872
+ "step": 87
 
 
 
873
  },
874
  {
875
+ "epoch": 38.67,
876
+ "eval_accuracy": 0.9814814814814815,
877
+ "eval_loss": 0.13492508232593536,
878
+ "eval_runtime": 0.2758,
879
+ "eval_samples_per_second": 195.796,
880
+ "eval_steps_per_second": 3.626,
881
+ "step": 87
882
+ },
883
+ {
884
+ "epoch": 39.11,
885
  "learning_rate": 6.666666666666667e-06,
886
+ "loss": 0.0648,
887
+ "step": 88
888
  },
889
  {
890
+ "epoch": 39.56,
891
+ "learning_rate": 6.111111111111111e-06,
892
+ "loss": 0.0543,
893
+ "step": 89
 
 
 
894
  },
895
  {
896
+ "epoch": 40.0,
897
  "learning_rate": 5.555555555555556e-06,
898
+ "loss": 0.0287,
899
+ "step": 90
900
  },
901
  {
902
+ "epoch": 40.0,
903
+ "eval_accuracy": 0.9814814814814815,
904
+ "eval_loss": 0.13293945789337158,
905
+ "eval_runtime": 0.2609,
906
+ "eval_samples_per_second": 207.01,
907
+ "eval_steps_per_second": 3.834,
908
+ "step": 90
909
+ },
910
+ {
911
+ "epoch": 40.44,
912
+ "learning_rate": 5e-06,
913
+ "loss": 0.0417,
914
+ "step": 91
915
  },
916
  {
917
+ "epoch": 40.89,
918
  "learning_rate": 4.444444444444445e-06,
919
+ "loss": 0.0539,
920
+ "step": 92
921
  },
922
  {
923
+ "epoch": 40.89,
924
+ "eval_accuracy": 0.9814814814814815,
925
+ "eval_loss": 0.1322045475244522,
926
+ "eval_runtime": 0.2653,
927
+ "eval_samples_per_second": 203.574,
928
+ "eval_steps_per_second": 3.77,
929
+ "step": 92
930
+ },
931
+ {
932
+ "epoch": 41.33,
933
+ "learning_rate": 3.888888888888889e-06,
934
+ "loss": 0.0602,
935
+ "step": 93
936
  },
937
  {
938
+ "epoch": 41.78,
939
  "learning_rate": 3.3333333333333333e-06,
940
+ "loss": 0.0483,
941
+ "step": 94
942
  },
943
  {
944
+ "epoch": 41.78,
945
+ "eval_accuracy": 0.9814814814814815,
946
+ "eval_loss": 0.13241925835609436,
947
+ "eval_runtime": 0.2756,
948
+ "eval_samples_per_second": 195.953,
949
+ "eval_steps_per_second": 3.629,
950
+ "step": 94
951
  },
952
  {
953
+ "epoch": 42.22,
954
+ "learning_rate": 2.777777777777778e-06,
955
+ "loss": 0.0855,
956
+ "step": 95
957
+ },
958
+ {
959
+ "epoch": 42.67,
960
  "learning_rate": 2.2222222222222225e-06,
961
+ "loss": 0.083,
962
+ "step": 96
963
  },
964
  {
965
+ "epoch": 42.67,
966
+ "eval_accuracy": 0.9814814814814815,
967
+ "eval_loss": 0.13193319737911224,
968
+ "eval_runtime": 0.2638,
969
+ "eval_samples_per_second": 204.69,
970
+ "eval_steps_per_second": 3.791,
971
+ "step": 96
972
  },
973
  {
974
+ "epoch": 43.11,
975
+ "learning_rate": 1.6666666666666667e-06,
976
+ "loss": 0.0501,
977
+ "step": 97
978
  },
979
  {
980
+ "epoch": 43.56,
981
+ "learning_rate": 1.1111111111111112e-06,
982
+ "loss": 0.0751,
983
+ "step": 98
 
 
 
984
  },
985
  {
986
+ "epoch": 44.0,
987
+ "learning_rate": 5.555555555555556e-07,
988
+ "loss": 0.0558,
989
+ "step": 99
990
  },
991
  {
992
+ "epoch": 44.0,
993
+ "eval_accuracy": 0.9814814814814815,
994
+ "eval_loss": 0.1318960040807724,
995
+ "eval_runtime": 0.2634,
996
+ "eval_samples_per_second": 204.99,
997
+ "eval_steps_per_second": 3.796,
998
+ "step": 99
999
  },
1000
  {
1001
+ "epoch": 44.44,
1002
+ "learning_rate": 0.0,
1003
+ "loss": 0.0752,
1004
+ "step": 100
1005
+ },
1006
+ {
1007
+ "epoch": 44.44,
1008
+ "eval_accuracy": 0.9814814814814815,
1009
+ "eval_loss": 0.13192817568778992,
1010
+ "eval_runtime": 0.2735,
1011
+ "eval_samples_per_second": 197.452,
1012
+ "eval_steps_per_second": 3.657,
1013
+ "step": 100
1014
+ },
1015
+ {
1016
+ "epoch": 44.44,
1017
+ "step": 100,
1018
+ "total_flos": 1.6586385457107272e+18,
1019
+ "train_loss": 0.13114935230463742,
1020
+ "train_runtime": 687.5457,
1021
+ "train_samples_per_second": 34.979,
1022
+ "train_steps_per_second": 0.145
1023
  }
1024
  ],
1025
+ "max_steps": 100,
1026
  "num_train_epochs": 50,
1027
+ "total_flos": 1.6586385457107272e+18,
1028
  "trial_name": null,
1029
  "trial_params": null
1030
  }