Prot10 commited on
Commit
285604a
1 Parent(s): cab8335

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. all_results.json +11 -11
  3. eval_results.json +6 -6
  4. train_results.json +6 -6
  5. trainer_state.json +440 -314
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [facebook/convnextv2-base-1k-224](https://huggingface.co/facebook/convnextv2-base-1k-224) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.7475
21
- - Accuracy: 0.3520
22
 
23
  ## Model description
24
 
 
17
 
18
  This model is a fine-tuned version of [facebook/convnextv2-base-1k-224](https://huggingface.co/facebook/convnextv2-base-1k-224) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.3599
21
+ - Accuracy: 0.4190
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 27.91,
3
- "eval_accuracy": 0.4382284382284382,
4
- "eval_loss": 1.447946548461914,
5
- "eval_runtime": 8.404,
6
- "eval_samples_per_second": 51.047,
7
- "eval_steps_per_second": 1.666,
8
- "total_flos": 3.007291871298355e+18,
9
- "train_loss": 1.1542485936482747,
10
- "train_runtime": 2419.37,
11
- "train_samples_per_second": 16.864,
12
- "train_steps_per_second": 0.124
13
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.41899441340782123,
4
+ "eval_loss": 1.359870195388794,
5
+ "eval_runtime": 7.4764,
6
+ "eval_samples_per_second": 47.884,
7
+ "eval_steps_per_second": 1.605,
8
+ "total_flos": 4.840276186658304e+18,
9
+ "train_loss": 1.0325976332028708,
10
+ "train_runtime": 3493.542,
11
+ "train_samples_per_second": 17.492,
12
+ "train_steps_per_second": 0.137
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 27.91,
3
- "eval_accuracy": 0.4382284382284382,
4
- "eval_loss": 1.447946548461914,
5
- "eval_runtime": 8.404,
6
- "eval_samples_per_second": 51.047,
7
- "eval_steps_per_second": 1.666
8
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.41899441340782123,
4
+ "eval_loss": 1.359870195388794,
5
+ "eval_runtime": 7.4764,
6
+ "eval_samples_per_second": 47.884,
7
+ "eval_steps_per_second": 1.605
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 27.91,
3
- "total_flos": 3.007291871298355e+18,
4
- "train_loss": 1.1542485936482747,
5
- "train_runtime": 2419.37,
6
- "train_samples_per_second": 16.864,
7
- "train_steps_per_second": 0.124
8
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "total_flos": 4.840276186658304e+18,
4
+ "train_loss": 1.0325976332028708,
5
+ "train_runtime": 3493.542,
6
+ "train_samples_per_second": 17.492,
7
+ "train_steps_per_second": 0.137
8
  }
trainer_state.json CHANGED
@@ -1,460 +1,586 @@
1
  {
2
- "best_metric": 0.4382284382284382,
3
- "best_model_checkpoint": "convnextv2-base-1k-224-for-pre_evaluation/checkpoint-268",
4
- "epoch": 27.906976744186046,
5
  "eval_steps": 500,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.93,
13
- "learning_rate": 1.6666666666666667e-05,
14
- "loss": 1.5952,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.93,
19
- "eval_accuracy": 0.29603729603729606,
20
- "eval_loss": 1.5510554313659668,
21
- "eval_runtime": 8.4324,
22
- "eval_samples_per_second": 50.875,
23
- "eval_steps_per_second": 1.66,
24
- "step": 10
25
  },
26
  {
27
- "epoch": 1.86,
28
- "learning_rate": 3.3333333333333335e-05,
29
- "loss": 1.5238,
30
  "step": 20
31
  },
32
  {
33
- "epoch": 1.95,
34
- "eval_accuracy": 0.34265734265734266,
35
- "eval_loss": 1.5091006755828857,
36
- "eval_runtime": 8.9811,
37
- "eval_samples_per_second": 47.767,
38
- "eval_steps_per_second": 1.559,
39
- "step": 21
40
- },
41
- {
42
- "epoch": 2.79,
43
- "learning_rate": 5e-05,
44
- "loss": 1.4881,
45
  "step": 30
46
  },
47
  {
48
- "epoch": 2.98,
49
- "eval_accuracy": 0.34498834498834496,
50
- "eval_loss": 1.4853538274765015,
51
- "eval_runtime": 8.6991,
52
- "eval_samples_per_second": 49.315,
53
- "eval_steps_per_second": 1.609,
54
  "step": 32
55
  },
56
  {
57
- "epoch": 3.72,
58
- "learning_rate": 4.814814814814815e-05,
59
- "loss": 1.4708,
60
  "step": 40
61
  },
62
  {
63
- "epoch": 4.0,
64
- "eval_accuracy": 0.3473193473193473,
65
- "eval_loss": 1.4616328477859497,
66
- "eval_runtime": 8.3701,
67
- "eval_samples_per_second": 51.254,
68
- "eval_steps_per_second": 1.673,
69
- "step": 43
70
  },
71
  {
72
- "epoch": 4.65,
73
- "learning_rate": 4.62962962962963e-05,
74
- "loss": 1.4361,
75
  "step": 50
76
  },
77
  {
78
- "epoch": 4.93,
79
- "eval_accuracy": 0.34498834498834496,
80
- "eval_loss": 1.4416619539260864,
81
- "eval_runtime": 8.7032,
82
- "eval_samples_per_second": 49.292,
83
- "eval_steps_per_second": 1.609,
84
- "step": 53
85
- },
86
- {
87
- "epoch": 5.58,
88
- "learning_rate": 4.4444444444444447e-05,
89
- "loss": 1.3764,
90
  "step": 60
91
  },
92
  {
93
- "epoch": 5.95,
94
- "eval_accuracy": 0.3752913752913753,
95
- "eval_loss": 1.4134629964828491,
96
- "eval_runtime": 8.9398,
97
- "eval_samples_per_second": 47.988,
98
- "eval_steps_per_second": 1.566,
99
  "step": 64
100
  },
101
  {
102
- "epoch": 6.51,
103
- "learning_rate": 4.259259259259259e-05,
104
- "loss": 1.3333,
105
  "step": 70
106
  },
107
  {
108
- "epoch": 6.98,
109
- "eval_accuracy": 0.3986013986013986,
110
- "eval_loss": 1.3822472095489502,
111
- "eval_runtime": 8.7499,
112
- "eval_samples_per_second": 49.029,
113
- "eval_steps_per_second": 1.6,
114
- "step": 75
115
- },
116
- {
117
- "epoch": 7.44,
118
- "learning_rate": 4.074074074074074e-05,
119
- "loss": 1.3296,
120
  "step": 80
121
  },
122
  {
123
- "epoch": 8.0,
124
- "eval_accuracy": 0.36363636363636365,
125
- "eval_loss": 1.4111592769622803,
126
- "eval_runtime": 8.8496,
127
- "eval_samples_per_second": 48.477,
128
- "eval_steps_per_second": 1.582,
129
- "step": 86
130
  },
131
  {
132
- "epoch": 8.37,
133
- "learning_rate": 3.888888888888889e-05,
134
- "loss": 1.2798,
135
  "step": 90
136
  },
137
  {
138
- "epoch": 8.93,
139
- "eval_accuracy": 0.38927738927738925,
140
- "eval_loss": 1.4038037061691284,
141
- "eval_runtime": 8.5853,
142
- "eval_samples_per_second": 49.969,
143
- "eval_steps_per_second": 1.631,
144
  "step": 96
145
  },
146
  {
147
- "epoch": 9.3,
148
- "learning_rate": 3.7037037037037037e-05,
149
- "loss": 1.3129,
150
  "step": 100
151
  },
152
  {
153
- "epoch": 9.95,
154
- "eval_accuracy": 0.3776223776223776,
155
- "eval_loss": 1.424072265625,
156
- "eval_runtime": 8.6578,
157
- "eval_samples_per_second": 49.551,
158
- "eval_steps_per_second": 1.617,
159
- "step": 107
160
- },
161
- {
162
- "epoch": 10.23,
163
- "learning_rate": 3.518518518518519e-05,
164
- "loss": 1.3014,
165
  "step": 110
166
  },
167
  {
168
- "epoch": 10.98,
169
- "eval_accuracy": 0.38927738927738925,
170
- "eval_loss": 1.356952428817749,
171
- "eval_runtime": 8.5422,
172
- "eval_samples_per_second": 50.221,
173
- "eval_steps_per_second": 1.639,
174
- "step": 118
175
  },
176
  {
177
- "epoch": 11.16,
178
- "learning_rate": 3.3333333333333335e-05,
179
- "loss": 1.2332,
180
  "step": 120
181
  },
182
  {
183
- "epoch": 12.0,
184
- "eval_accuracy": 0.38927738927738925,
185
- "eval_loss": 1.4072706699371338,
186
- "eval_runtime": 8.3607,
187
- "eval_samples_per_second": 51.312,
188
- "eval_steps_per_second": 1.675,
189
- "step": 129
190
  },
191
  {
192
- "epoch": 12.09,
193
- "learning_rate": 3.148148148148148e-05,
194
- "loss": 1.212,
195
  "step": 130
196
  },
197
  {
198
- "epoch": 12.93,
199
- "eval_accuracy": 0.40326340326340326,
200
- "eval_loss": 1.376956820487976,
201
- "eval_runtime": 8.3675,
202
- "eval_samples_per_second": 51.27,
203
- "eval_steps_per_second": 1.673,
204
- "step": 139
205
- },
206
- {
207
- "epoch": 13.02,
208
- "learning_rate": 2.962962962962963e-05,
209
- "loss": 1.1844,
210
  "step": 140
211
  },
212
  {
213
- "epoch": 13.95,
214
- "learning_rate": 2.777777777777778e-05,
215
- "loss": 1.1763,
216
- "step": 150
 
 
 
217
  },
218
  {
219
- "epoch": 13.95,
220
- "eval_accuracy": 0.3962703962703963,
221
- "eval_loss": 1.3891488313674927,
222
- "eval_runtime": 8.5717,
223
- "eval_samples_per_second": 50.049,
224
- "eval_steps_per_second": 1.633,
225
  "step": 150
226
  },
227
  {
228
- "epoch": 14.88,
229
- "learning_rate": 2.5925925925925925e-05,
230
- "loss": 1.124,
231
  "step": 160
232
  },
233
  {
234
- "epoch": 14.98,
235
- "eval_accuracy": 0.4125874125874126,
236
- "eval_loss": 1.3915237188339233,
237
- "eval_runtime": 8.5638,
238
- "eval_samples_per_second": 50.095,
239
- "eval_steps_per_second": 1.635,
240
- "step": 161
241
  },
242
  {
243
- "epoch": 15.81,
244
- "learning_rate": 2.4074074074074074e-05,
245
- "loss": 1.0963,
246
  "step": 170
247
  },
248
  {
249
- "epoch": 16.0,
250
- "eval_accuracy": 0.4149184149184149,
251
- "eval_loss": 1.4098657369613647,
252
- "eval_runtime": 8.8116,
253
- "eval_samples_per_second": 48.686,
254
- "eval_steps_per_second": 1.589,
255
- "step": 172
256
  },
257
  {
258
- "epoch": 16.74,
259
- "learning_rate": 2.2222222222222223e-05,
260
- "loss": 1.0547,
261
  "step": 180
262
  },
263
  {
264
- "epoch": 16.93,
265
- "eval_accuracy": 0.40326340326340326,
266
- "eval_loss": 1.4206278324127197,
267
- "eval_runtime": 8.7717,
268
- "eval_samples_per_second": 48.907,
269
- "eval_steps_per_second": 1.596,
270
- "step": 182
271
- },
272
- {
273
- "epoch": 17.67,
274
- "learning_rate": 2.037037037037037e-05,
275
- "loss": 1.0631,
276
  "step": 190
277
  },
278
  {
279
- "epoch": 17.95,
280
- "eval_accuracy": 0.4195804195804196,
281
- "eval_loss": 1.4040827751159668,
282
- "eval_runtime": 8.3983,
283
- "eval_samples_per_second": 51.082,
284
- "eval_steps_per_second": 1.667,
285
- "step": 193
286
  },
287
  {
288
- "epoch": 18.6,
289
- "learning_rate": 1.8518518518518518e-05,
290
- "loss": 0.9911,
291
  "step": 200
292
  },
293
  {
294
- "epoch": 18.98,
295
- "eval_accuracy": 0.4149184149184149,
296
- "eval_loss": 1.4271957874298096,
297
- "eval_runtime": 8.2919,
298
- "eval_samples_per_second": 51.737,
299
- "eval_steps_per_second": 1.688,
300
- "step": 204
301
  },
302
  {
303
- "epoch": 19.53,
304
- "learning_rate": 1.6666666666666667e-05,
305
- "loss": 1.005,
306
  "step": 210
307
  },
308
  {
309
- "epoch": 20.0,
310
- "eval_accuracy": 0.4219114219114219,
311
- "eval_loss": 1.42105233669281,
312
- "eval_runtime": 8.2769,
313
- "eval_samples_per_second": 51.831,
314
- "eval_steps_per_second": 1.691,
315
- "step": 215
316
- },
317
- {
318
- "epoch": 20.47,
319
- "learning_rate": 1.4814814814814815e-05,
320
- "loss": 0.9663,
321
  "step": 220
322
  },
323
  {
324
- "epoch": 20.93,
325
- "eval_accuracy": 0.40093240093240096,
326
- "eval_loss": 1.466171145439148,
327
- "eval_runtime": 9.4718,
328
- "eval_samples_per_second": 45.292,
329
- "eval_steps_per_second": 1.478,
330
- "step": 225
331
  },
332
  {
333
- "epoch": 21.4,
334
- "learning_rate": 1.2962962962962962e-05,
335
- "loss": 0.9533,
336
  "step": 230
337
  },
338
  {
339
- "epoch": 21.95,
340
- "eval_accuracy": 0.43356643356643354,
341
- "eval_loss": 1.428614616394043,
342
- "eval_runtime": 8.343,
343
- "eval_samples_per_second": 51.42,
344
- "eval_steps_per_second": 1.678,
345
- "step": 236
346
- },
347
- {
348
- "epoch": 22.33,
349
- "learning_rate": 1.1111111111111112e-05,
350
- "loss": 0.9506,
351
  "step": 240
352
  },
353
  {
354
- "epoch": 22.98,
355
- "eval_accuracy": 0.43123543123543123,
356
- "eval_loss": 1.413465976715088,
357
- "eval_runtime": 8.7694,
358
- "eval_samples_per_second": 48.92,
359
- "eval_steps_per_second": 1.596,
360
- "step": 247
361
  },
362
  {
363
- "epoch": 23.26,
364
- "learning_rate": 9.259259259259259e-06,
365
- "loss": 0.8973,
366
  "step": 250
367
  },
368
  {
369
- "epoch": 24.0,
370
- "eval_accuracy": 0.42657342657342656,
371
- "eval_loss": 1.442847728729248,
372
- "eval_runtime": 8.7464,
373
- "eval_samples_per_second": 49.049,
374
- "eval_steps_per_second": 1.601,
375
- "step": 258
376
  },
377
  {
378
- "epoch": 24.19,
379
- "learning_rate": 7.4074074074074075e-06,
380
- "loss": 0.8807,
381
  "step": 260
382
  },
383
  {
384
- "epoch": 24.93,
385
- "eval_accuracy": 0.4382284382284382,
386
- "eval_loss": 1.447946548461914,
387
- "eval_runtime": 8.3339,
388
- "eval_samples_per_second": 51.476,
389
- "eval_steps_per_second": 1.68,
390
- "step": 268
391
- },
392
- {
393
- "epoch": 25.12,
394
- "learning_rate": 5.555555555555556e-06,
395
- "loss": 0.8731,
396
  "step": 270
397
  },
398
  {
399
- "epoch": 25.95,
400
- "eval_accuracy": 0.4289044289044289,
401
- "eval_loss": 1.4429428577423096,
402
- "eval_runtime": 8.7998,
403
- "eval_samples_per_second": 48.751,
404
- "eval_steps_per_second": 1.591,
405
- "step": 279
406
  },
407
  {
408
- "epoch": 26.05,
409
- "learning_rate": 3.7037037037037037e-06,
410
- "loss": 0.8366,
411
  "step": 280
412
  },
413
  {
414
- "epoch": 26.98,
415
- "learning_rate": 1.8518518518518519e-06,
416
- "loss": 0.8472,
417
- "step": 290
 
 
 
418
  },
419
  {
420
- "epoch": 26.98,
421
- "eval_accuracy": 0.43123543123543123,
422
- "eval_loss": 1.4461231231689453,
423
- "eval_runtime": 8.7766,
424
- "eval_samples_per_second": 48.88,
425
- "eval_steps_per_second": 1.595,
426
  "step": 290
427
  },
428
  {
429
- "epoch": 27.91,
430
- "learning_rate": 0.0,
431
- "loss": 0.8348,
432
  "step": 300
433
  },
434
  {
435
- "epoch": 27.91,
436
- "eval_accuracy": 0.43356643356643354,
437
- "eval_loss": 1.453087568283081,
438
- "eval_runtime": 8.3523,
439
- "eval_samples_per_second": 51.363,
440
- "eval_steps_per_second": 1.676,
441
- "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  },
443
  {
444
- "epoch": 27.91,
445
- "step": 300,
446
- "total_flos": 3.007291871298355e+18,
447
- "train_loss": 1.1542485936482747,
448
- "train_runtime": 2419.37,
449
- "train_samples_per_second": 16.864,
450
- "train_steps_per_second": 0.124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
  }
452
  ],
453
  "logging_steps": 10,
454
- "max_steps": 300,
455
  "num_train_epochs": 30,
456
  "save_steps": 500,
457
- "total_flos": 3.007291871298355e+18,
458
  "trial_name": null,
459
  "trial_params": null
460
  }
 
1
  {
2
+ "best_metric": 0.41899441340782123,
3
+ "best_model_checkpoint": "convnextv2-base-1k-224-for-pre_evaluation/checkpoint-128",
4
+ "epoch": 30.0,
5
  "eval_steps": 500,
6
+ "global_step": 480,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.62,
13
+ "learning_rate": 1.0416666666666668e-05,
14
+ "loss": 1.6,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 1.0,
19
+ "eval_accuracy": 0.29608938547486036,
20
+ "eval_loss": 1.5315604209899902,
21
+ "eval_runtime": 6.8342,
22
+ "eval_samples_per_second": 52.384,
23
+ "eval_steps_per_second": 1.756,
24
+ "step": 16
25
  },
26
  {
27
+ "epoch": 1.25,
28
+ "learning_rate": 2.0833333333333336e-05,
29
+ "loss": 1.535,
30
  "step": 20
31
  },
32
  {
33
+ "epoch": 1.88,
34
+ "learning_rate": 3.125e-05,
35
+ "loss": 1.5084,
 
 
 
 
 
 
 
 
 
36
  "step": 30
37
  },
38
  {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.2849162011173184,
41
+ "eval_loss": 1.5060781240463257,
42
+ "eval_runtime": 7.4696,
43
+ "eval_samples_per_second": 47.928,
44
+ "eval_steps_per_second": 1.607,
45
  "step": 32
46
  },
47
  {
48
+ "epoch": 2.5,
49
+ "learning_rate": 4.166666666666667e-05,
50
+ "loss": 1.5134,
51
  "step": 40
52
  },
53
  {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.3240223463687151,
56
+ "eval_loss": 1.4968407154083252,
57
+ "eval_runtime": 7.1172,
58
+ "eval_samples_per_second": 50.301,
59
+ "eval_steps_per_second": 1.686,
60
+ "step": 48
61
  },
62
  {
63
+ "epoch": 3.12,
64
+ "learning_rate": 4.976851851851852e-05,
65
+ "loss": 1.4694,
66
  "step": 50
67
  },
68
  {
69
+ "epoch": 3.75,
70
+ "learning_rate": 4.8611111111111115e-05,
71
+ "loss": 1.4663,
 
 
 
 
 
 
 
 
 
72
  "step": 60
73
  },
74
  {
75
+ "epoch": 4.0,
76
+ "eval_accuracy": 0.33519553072625696,
77
+ "eval_loss": 1.4607384204864502,
78
+ "eval_runtime": 7.4013,
79
+ "eval_samples_per_second": 48.37,
80
+ "eval_steps_per_second": 1.621,
81
  "step": 64
82
  },
83
  {
84
+ "epoch": 4.38,
85
+ "learning_rate": 4.745370370370371e-05,
86
+ "loss": 1.4375,
87
  "step": 70
88
  },
89
  {
90
+ "epoch": 5.0,
91
+ "learning_rate": 4.62962962962963e-05,
92
+ "loss": 1.4046,
 
 
 
 
 
 
 
 
 
93
  "step": 80
94
  },
95
  {
96
+ "epoch": 5.0,
97
+ "eval_accuracy": 0.3268156424581006,
98
+ "eval_loss": 1.4509011507034302,
99
+ "eval_runtime": 6.83,
100
+ "eval_samples_per_second": 52.416,
101
+ "eval_steps_per_second": 1.757,
102
+ "step": 80
103
  },
104
  {
105
+ "epoch": 5.62,
106
+ "learning_rate": 4.5138888888888894e-05,
107
+ "loss": 1.4085,
108
  "step": 90
109
  },
110
  {
111
+ "epoch": 6.0,
112
+ "eval_accuracy": 0.388268156424581,
113
+ "eval_loss": 1.4423185586929321,
114
+ "eval_runtime": 7.3683,
115
+ "eval_samples_per_second": 48.587,
116
+ "eval_steps_per_second": 1.629,
117
  "step": 96
118
  },
119
  {
120
+ "epoch": 6.25,
121
+ "learning_rate": 4.3981481481481486e-05,
122
+ "loss": 1.3765,
123
  "step": 100
124
  },
125
  {
126
+ "epoch": 6.88,
127
+ "learning_rate": 4.282407407407408e-05,
128
+ "loss": 1.3443,
 
 
 
 
 
 
 
 
 
129
  "step": 110
130
  },
131
  {
132
+ "epoch": 7.0,
133
+ "eval_accuracy": 0.4022346368715084,
134
+ "eval_loss": 1.400512456893921,
135
+ "eval_runtime": 6.9156,
136
+ "eval_samples_per_second": 51.767,
137
+ "eval_steps_per_second": 1.735,
138
+ "step": 112
139
  },
140
  {
141
+ "epoch": 7.5,
142
+ "learning_rate": 4.166666666666667e-05,
143
+ "loss": 1.3025,
144
  "step": 120
145
  },
146
  {
147
+ "epoch": 8.0,
148
+ "eval_accuracy": 0.41899441340782123,
149
+ "eval_loss": 1.359870195388794,
150
+ "eval_runtime": 7.041,
151
+ "eval_samples_per_second": 50.845,
152
+ "eval_steps_per_second": 1.704,
153
+ "step": 128
154
  },
155
  {
156
+ "epoch": 8.12,
157
+ "learning_rate": 4.0509259259259265e-05,
158
+ "loss": 1.2668,
159
  "step": 130
160
  },
161
  {
162
+ "epoch": 8.75,
163
+ "learning_rate": 3.935185185185186e-05,
164
+ "loss": 1.2627,
 
 
 
 
 
 
 
 
 
165
  "step": 140
166
  },
167
  {
168
+ "epoch": 9.0,
169
+ "eval_accuracy": 0.39106145251396646,
170
+ "eval_loss": 1.3638169765472412,
171
+ "eval_runtime": 7.2532,
172
+ "eval_samples_per_second": 49.357,
173
+ "eval_steps_per_second": 1.654,
174
+ "step": 144
175
  },
176
  {
177
+ "epoch": 9.38,
178
+ "learning_rate": 3.8194444444444444e-05,
179
+ "loss": 1.2427,
 
 
 
180
  "step": 150
181
  },
182
  {
183
+ "epoch": 10.0,
184
+ "learning_rate": 3.7037037037037037e-05,
185
+ "loss": 1.2099,
186
  "step": 160
187
  },
188
  {
189
+ "epoch": 10.0,
190
+ "eval_accuracy": 0.34916201117318435,
191
+ "eval_loss": 1.4057648181915283,
192
+ "eval_runtime": 7.508,
193
+ "eval_samples_per_second": 47.682,
194
+ "eval_steps_per_second": 1.598,
195
+ "step": 160
196
  },
197
  {
198
+ "epoch": 10.62,
199
+ "learning_rate": 3.587962962962963e-05,
200
+ "loss": 1.2086,
201
  "step": 170
202
  },
203
  {
204
+ "epoch": 11.0,
205
+ "eval_accuracy": 0.3407821229050279,
206
+ "eval_loss": 1.443146824836731,
207
+ "eval_runtime": 7.4283,
208
+ "eval_samples_per_second": 48.194,
209
+ "eval_steps_per_second": 1.615,
210
+ "step": 176
211
  },
212
  {
213
+ "epoch": 11.25,
214
+ "learning_rate": 3.472222222222222e-05,
215
+ "loss": 1.1684,
216
  "step": 180
217
  },
218
  {
219
+ "epoch": 11.88,
220
+ "learning_rate": 3.3564814814814815e-05,
221
+ "loss": 1.1393,
 
 
 
 
 
 
 
 
 
222
  "step": 190
223
  },
224
  {
225
+ "epoch": 12.0,
226
+ "eval_accuracy": 0.34916201117318435,
227
+ "eval_loss": 1.4143450260162354,
228
+ "eval_runtime": 7.1396,
229
+ "eval_samples_per_second": 50.143,
230
+ "eval_steps_per_second": 1.681,
231
+ "step": 192
232
  },
233
  {
234
+ "epoch": 12.5,
235
+ "learning_rate": 3.240740740740741e-05,
236
+ "loss": 1.1039,
237
  "step": 200
238
  },
239
  {
240
+ "epoch": 13.0,
241
+ "eval_accuracy": 0.388268156424581,
242
+ "eval_loss": 1.4305065870285034,
243
+ "eval_runtime": 6.8442,
244
+ "eval_samples_per_second": 52.307,
245
+ "eval_steps_per_second": 1.753,
246
+ "step": 208
247
  },
248
  {
249
+ "epoch": 13.12,
250
+ "learning_rate": 3.125e-05,
251
+ "loss": 1.0641,
252
  "step": 210
253
  },
254
  {
255
+ "epoch": 13.75,
256
+ "learning_rate": 3.0092592592592593e-05,
257
+ "loss": 1.0551,
 
 
 
 
 
 
 
 
 
258
  "step": 220
259
  },
260
  {
261
+ "epoch": 14.0,
262
+ "eval_accuracy": 0.35195530726256985,
263
+ "eval_loss": 1.5202596187591553,
264
+ "eval_runtime": 7.2925,
265
+ "eval_samples_per_second": 49.091,
266
+ "eval_steps_per_second": 1.646,
267
+ "step": 224
268
  },
269
  {
270
+ "epoch": 14.38,
271
+ "learning_rate": 2.8935185185185186e-05,
272
+ "loss": 1.0686,
273
  "step": 230
274
  },
275
  {
276
+ "epoch": 15.0,
277
+ "learning_rate": 2.777777777777778e-05,
278
+ "loss": 1.0368,
 
 
 
 
 
 
 
 
 
279
  "step": 240
280
  },
281
  {
282
+ "epoch": 15.0,
283
+ "eval_accuracy": 0.3324022346368715,
284
+ "eval_loss": 1.5117393732070923,
285
+ "eval_runtime": 6.8377,
286
+ "eval_samples_per_second": 52.356,
287
+ "eval_steps_per_second": 1.755,
288
+ "step": 240
289
  },
290
  {
291
+ "epoch": 15.62,
292
+ "learning_rate": 2.6620370370370372e-05,
293
+ "loss": 0.9753,
294
  "step": 250
295
  },
296
  {
297
+ "epoch": 16.0,
298
+ "eval_accuracy": 0.3770949720670391,
299
+ "eval_loss": 1.4545259475708008,
300
+ "eval_runtime": 7.0796,
301
+ "eval_samples_per_second": 50.568,
302
+ "eval_steps_per_second": 1.695,
303
+ "step": 256
304
  },
305
  {
306
+ "epoch": 16.25,
307
+ "learning_rate": 2.5462962962962965e-05,
308
+ "loss": 0.9677,
309
  "step": 260
310
  },
311
  {
312
+ "epoch": 16.88,
313
+ "learning_rate": 2.4305555555555558e-05,
314
+ "loss": 0.938,
 
 
 
 
 
 
 
 
 
315
  "step": 270
316
  },
317
  {
318
+ "epoch": 17.0,
319
+ "eval_accuracy": 0.33519553072625696,
320
+ "eval_loss": 1.5396308898925781,
321
+ "eval_runtime": 7.1168,
322
+ "eval_samples_per_second": 50.304,
323
+ "eval_steps_per_second": 1.686,
324
+ "step": 272
325
  },
326
  {
327
+ "epoch": 17.5,
328
+ "learning_rate": 2.314814814814815e-05,
329
+ "loss": 0.899,
330
  "step": 280
331
  },
332
  {
333
+ "epoch": 18.0,
334
+ "eval_accuracy": 0.3407821229050279,
335
+ "eval_loss": 1.5770219564437866,
336
+ "eval_runtime": 6.8822,
337
+ "eval_samples_per_second": 52.018,
338
+ "eval_steps_per_second": 1.744,
339
+ "step": 288
340
  },
341
  {
342
+ "epoch": 18.12,
343
+ "learning_rate": 2.1990740740740743e-05,
344
+ "loss": 0.9047,
 
 
 
345
  "step": 290
346
  },
347
  {
348
+ "epoch": 18.75,
349
+ "learning_rate": 2.0833333333333336e-05,
350
+ "loss": 0.8629,
351
  "step": 300
352
  },
353
  {
354
+ "epoch": 19.0,
355
+ "eval_accuracy": 0.3128491620111732,
356
+ "eval_loss": 1.7105906009674072,
357
+ "eval_runtime": 7.3502,
358
+ "eval_samples_per_second": 48.706,
359
+ "eval_steps_per_second": 1.633,
360
+ "step": 304
361
+ },
362
+ {
363
+ "epoch": 19.38,
364
+ "learning_rate": 1.967592592592593e-05,
365
+ "loss": 0.8624,
366
+ "step": 310
367
+ },
368
+ {
369
+ "epoch": 20.0,
370
+ "learning_rate": 1.8518518518518518e-05,
371
+ "loss": 0.8674,
372
+ "step": 320
373
+ },
374
+ {
375
+ "epoch": 20.0,
376
+ "eval_accuracy": 0.33519553072625696,
377
+ "eval_loss": 1.5864217281341553,
378
+ "eval_runtime": 7.1963,
379
+ "eval_samples_per_second": 49.748,
380
+ "eval_steps_per_second": 1.668,
381
+ "step": 320
382
+ },
383
+ {
384
+ "epoch": 20.62,
385
+ "learning_rate": 1.736111111111111e-05,
386
+ "loss": 0.7789,
387
+ "step": 330
388
+ },
389
+ {
390
+ "epoch": 21.0,
391
+ "eval_accuracy": 0.3407821229050279,
392
+ "eval_loss": 1.6129050254821777,
393
+ "eval_runtime": 7.1496,
394
+ "eval_samples_per_second": 50.073,
395
+ "eval_steps_per_second": 1.678,
396
+ "step": 336
397
+ },
398
+ {
399
+ "epoch": 21.25,
400
+ "learning_rate": 1.6203703703703704e-05,
401
+ "loss": 0.8161,
402
+ "step": 340
403
+ },
404
+ {
405
+ "epoch": 21.88,
406
+ "learning_rate": 1.5046296296296297e-05,
407
+ "loss": 0.7426,
408
+ "step": 350
409
+ },
410
+ {
411
+ "epoch": 22.0,
412
+ "eval_accuracy": 0.36033519553072624,
413
+ "eval_loss": 1.6353477239608765,
414
+ "eval_runtime": 7.4456,
415
+ "eval_samples_per_second": 48.082,
416
+ "eval_steps_per_second": 1.612,
417
+ "step": 352
418
+ },
419
+ {
420
+ "epoch": 22.5,
421
+ "learning_rate": 1.388888888888889e-05,
422
+ "loss": 0.7677,
423
+ "step": 360
424
+ },
425
+ {
426
+ "epoch": 23.0,
427
+ "eval_accuracy": 0.3463687150837989,
428
+ "eval_loss": 1.6793445348739624,
429
+ "eval_runtime": 6.994,
430
+ "eval_samples_per_second": 51.187,
431
+ "eval_steps_per_second": 1.716,
432
+ "step": 368
433
+ },
434
+ {
435
+ "epoch": 23.12,
436
+ "learning_rate": 1.2731481481481482e-05,
437
+ "loss": 0.7327,
438
+ "step": 370
439
+ },
440
+ {
441
+ "epoch": 23.75,
442
+ "learning_rate": 1.1574074074074075e-05,
443
+ "loss": 0.7172,
444
+ "step": 380
445
+ },
446
+ {
447
+ "epoch": 24.0,
448
+ "eval_accuracy": 0.3575418994413408,
449
+ "eval_loss": 1.6759321689605713,
450
+ "eval_runtime": 7.4394,
451
+ "eval_samples_per_second": 48.122,
452
+ "eval_steps_per_second": 1.613,
453
+ "step": 384
454
+ },
455
+ {
456
+ "epoch": 24.38,
457
+ "learning_rate": 1.0416666666666668e-05,
458
+ "loss": 0.6759,
459
+ "step": 390
460
+ },
461
+ {
462
+ "epoch": 25.0,
463
+ "learning_rate": 9.259259259259259e-06,
464
+ "loss": 0.6809,
465
+ "step": 400
466
+ },
467
+ {
468
+ "epoch": 25.0,
469
+ "eval_accuracy": 0.3659217877094972,
470
+ "eval_loss": 1.701292634010315,
471
+ "eval_runtime": 7.4138,
472
+ "eval_samples_per_second": 48.288,
473
+ "eval_steps_per_second": 1.619,
474
+ "step": 400
475
+ },
476
+ {
477
+ "epoch": 25.62,
478
+ "learning_rate": 8.101851851851852e-06,
479
+ "loss": 0.6619,
480
+ "step": 410
481
+ },
482
+ {
483
+ "epoch": 26.0,
484
+ "eval_accuracy": 0.36312849162011174,
485
+ "eval_loss": 1.7108293771743774,
486
+ "eval_runtime": 7.238,
487
+ "eval_samples_per_second": 49.461,
488
+ "eval_steps_per_second": 1.658,
489
+ "step": 416
490
+ },
491
+ {
492
+ "epoch": 26.25,
493
+ "learning_rate": 6.944444444444445e-06,
494
+ "loss": 0.6773,
495
+ "step": 420
496
+ },
497
+ {
498
+ "epoch": 26.88,
499
+ "learning_rate": 5.787037037037038e-06,
500
+ "loss": 0.6656,
501
+ "step": 430
502
+ },
503
+ {
504
+ "epoch": 27.0,
505
+ "eval_accuracy": 0.3715083798882682,
506
+ "eval_loss": 1.7327028512954712,
507
+ "eval_runtime": 6.8416,
508
+ "eval_samples_per_second": 52.327,
509
+ "eval_steps_per_second": 1.754,
510
+ "step": 432
511
  },
512
  {
513
+ "epoch": 27.5,
514
+ "learning_rate": 4.6296296296296296e-06,
515
+ "loss": 0.6258,
516
+ "step": 440
517
+ },
518
+ {
519
+ "epoch": 28.0,
520
+ "eval_accuracy": 0.3547486033519553,
521
+ "eval_loss": 1.7377949953079224,
522
+ "eval_runtime": 7.2785,
523
+ "eval_samples_per_second": 49.186,
524
+ "eval_steps_per_second": 1.649,
525
+ "step": 448
526
+ },
527
+ {
528
+ "epoch": 28.12,
529
+ "learning_rate": 3.4722222222222224e-06,
530
+ "loss": 0.6646,
531
+ "step": 450
532
+ },
533
+ {
534
+ "epoch": 28.75,
535
+ "learning_rate": 2.3148148148148148e-06,
536
+ "loss": 0.6173,
537
+ "step": 460
538
+ },
539
+ {
540
+ "epoch": 29.0,
541
+ "eval_accuracy": 0.36033519553072624,
542
+ "eval_loss": 1.7461235523223877,
543
+ "eval_runtime": 6.8622,
544
+ "eval_samples_per_second": 52.17,
545
+ "eval_steps_per_second": 1.749,
546
+ "step": 464
547
+ },
548
+ {
549
+ "epoch": 29.38,
550
+ "learning_rate": 1.1574074074074074e-06,
551
+ "loss": 0.6482,
552
+ "step": 470
553
+ },
554
+ {
555
+ "epoch": 30.0,
556
+ "learning_rate": 0.0,
557
+ "loss": 0.6214,
558
+ "step": 480
559
+ },
560
+ {
561
+ "epoch": 30.0,
562
+ "eval_accuracy": 0.35195530726256985,
563
+ "eval_loss": 1.7475444078445435,
564
+ "eval_runtime": 7.4355,
565
+ "eval_samples_per_second": 48.148,
566
+ "eval_steps_per_second": 1.614,
567
+ "step": 480
568
+ },
569
+ {
570
+ "epoch": 30.0,
571
+ "step": 480,
572
+ "total_flos": 4.840276186658304e+18,
573
+ "train_loss": 1.0325976332028708,
574
+ "train_runtime": 3493.542,
575
+ "train_samples_per_second": 17.492,
576
+ "train_steps_per_second": 0.137
577
  }
578
  ],
579
  "logging_steps": 10,
580
+ "max_steps": 480,
581
  "num_train_epochs": 30,
582
  "save_steps": 500,
583
+ "total_flos": 4.840276186658304e+18,
584
  "trial_name": null,
585
  "trial_params": null
586
  }