nicolasdupuisroy commited on
Commit
fee8e38
1 Parent(s): ae1d126

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - imagefolder
@@ -22,7 +24,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.5538461538461539
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +34,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 2.8622
36
- - Accuracy: 0.5538
37
 
38
  ## Model description
39
 
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
+ - vision
7
  - generated_from_trainer
8
  datasets:
9
  - imagefolder
 
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
+ value: 0.5461538461538461
28
  ---
29
 
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
34
 
35
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
36
  It achieves the following results on the evaluation set:
37
+ - Loss: 2.8599
38
+ - Accuracy: 0.5462
39
 
40
  ## Model description
41
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_accuracy": 0.15306122448979592,
4
- "eval_loss": 3.643535614013672,
5
- "eval_runtime": 1.6218,
6
- "eval_samples_per_second": 60.427,
7
- "eval_steps_per_second": 1.233,
8
- "train_loss": 3.289549721309117,
9
- "train_runtime": 4059.8356,
10
- "train_samples_per_second": 13.597,
11
- "train_steps_per_second": 0.172
12
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_accuracy": 0.5461538461538461,
4
+ "eval_loss": 2.859877347946167,
5
+ "eval_runtime": 2.5644,
6
+ "eval_samples_per_second": 50.694,
7
+ "eval_steps_per_second": 0.78,
8
+ "train_loss": 2.6316724504743303,
9
+ "train_runtime": 4344.3701,
10
+ "train_samples_per_second": 11.97,
11
+ "train_steps_per_second": 0.161
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_accuracy": 0.15306122448979592,
4
- "eval_loss": 3.643535614013672,
5
- "eval_runtime": 1.6218,
6
- "eval_samples_per_second": 60.427,
7
- "eval_steps_per_second": 1.233
8
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_accuracy": 0.5461538461538461,
4
+ "eval_loss": 2.859877347946167,
5
+ "eval_runtime": 2.5644,
6
+ "eval_samples_per_second": 50.694,
7
+ "eval_steps_per_second": 0.78
8
  }
runs/Jan17_18-13-33_d06676088071/events.out.tfevents.1705519839.d06676088071.1503.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63a6efa2ac983c8a4ef9eed4231219ac5348c0a7d1f235e72d3287697daddb4
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 100.0,
3
- "train_loss": 3.289549721309117,
4
- "train_runtime": 4059.8356,
5
- "train_samples_per_second": 13.597,
6
- "train_steps_per_second": 0.172
7
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "train_loss": 2.6316724504743303,
4
+ "train_runtime": 4344.3701,
5
+ "train_samples_per_second": 11.97,
6
+ "train_steps_per_second": 0.161
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 3.643535614013672,
3
- "best_model_checkpoint": "./drive/MyDrive/repositories/torch_example_image-classification/outputs_letter3/checkpoint-700",
4
  "epoch": 100.0,
5
  "eval_steps": 500,
6
  "global_step": 700,
@@ -10,1332 +10,1332 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.030612244897959183,
14
- "eval_loss": 3.945223808288574,
15
- "eval_runtime": 35.9416,
16
- "eval_samples_per_second": 2.727,
17
- "eval_steps_per_second": 0.056,
18
  "step": 7
19
  },
20
  {
21
  "epoch": 1.43,
22
  "learning_rate": 1.9714285714285718e-05,
23
- "loss": 3.9498,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.05102040816326531,
29
- "eval_loss": 3.943814277648926,
30
- "eval_runtime": 1.6003,
31
- "eval_samples_per_second": 61.237,
32
- "eval_steps_per_second": 1.25,
33
  "step": 14
34
  },
35
  {
36
  "epoch": 2.86,
37
  "learning_rate": 1.942857142857143e-05,
38
- "loss": 3.9413,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.0,
43
- "eval_accuracy": 0.04081632653061224,
44
- "eval_loss": 3.9436657428741455,
45
- "eval_runtime": 1.5584,
46
- "eval_samples_per_second": 62.886,
47
- "eval_steps_per_second": 1.283,
48
  "step": 21
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.04081632653061224,
53
- "eval_loss": 3.9431352615356445,
54
- "eval_runtime": 1.6475,
55
- "eval_samples_per_second": 59.483,
56
- "eval_steps_per_second": 1.214,
57
  "step": 28
58
  },
59
  {
60
  "epoch": 4.29,
61
  "learning_rate": 1.9142857142857146e-05,
62
- "loss": 3.9255,
63
  "step": 30
64
  },
65
  {
66
  "epoch": 5.0,
67
- "eval_accuracy": 0.04081632653061224,
68
- "eval_loss": 3.9424338340759277,
69
- "eval_runtime": 1.5527,
70
- "eval_samples_per_second": 63.114,
71
- "eval_steps_per_second": 1.288,
72
  "step": 35
73
  },
74
  {
75
  "epoch": 5.71,
76
  "learning_rate": 1.885714285714286e-05,
77
- "loss": 3.9132,
78
  "step": 40
79
  },
80
  {
81
  "epoch": 6.0,
82
- "eval_accuracy": 0.030612244897959183,
83
- "eval_loss": 3.9400839805603027,
84
- "eval_runtime": 1.6728,
85
- "eval_samples_per_second": 58.585,
86
- "eval_steps_per_second": 1.196,
87
  "step": 42
88
  },
89
  {
90
  "epoch": 7.0,
91
- "eval_accuracy": 0.030612244897959183,
92
- "eval_loss": 3.937299966812134,
93
- "eval_runtime": 1.6864,
94
- "eval_samples_per_second": 58.111,
95
- "eval_steps_per_second": 1.186,
96
  "step": 49
97
  },
98
  {
99
  "epoch": 7.14,
100
  "learning_rate": 1.8571428571428575e-05,
101
- "loss": 3.8913,
102
  "step": 50
103
  },
104
  {
105
  "epoch": 8.0,
106
- "eval_accuracy": 0.02040816326530612,
107
- "eval_loss": 3.9351377487182617,
108
- "eval_runtime": 1.7475,
109
- "eval_samples_per_second": 56.08,
110
- "eval_steps_per_second": 1.144,
111
  "step": 56
112
  },
113
  {
114
  "epoch": 8.57,
115
  "learning_rate": 1.8285714285714288e-05,
116
- "loss": 3.8685,
117
  "step": 60
118
  },
119
  {
120
  "epoch": 9.0,
121
- "eval_accuracy": 0.02040816326530612,
122
- "eval_loss": 3.931184768676758,
123
- "eval_runtime": 1.7165,
124
- "eval_samples_per_second": 57.092,
125
- "eval_steps_per_second": 1.165,
126
  "step": 63
127
  },
128
  {
129
  "epoch": 10.0,
130
  "learning_rate": 1.8e-05,
131
- "loss": 3.8413,
132
  "step": 70
133
  },
134
  {
135
  "epoch": 10.0,
136
- "eval_accuracy": 0.030612244897959183,
137
- "eval_loss": 3.9259071350097656,
138
- "eval_runtime": 1.7086,
139
- "eval_samples_per_second": 57.357,
140
- "eval_steps_per_second": 1.171,
141
  "step": 70
142
  },
143
  {
144
  "epoch": 11.0,
145
- "eval_accuracy": 0.030612244897959183,
146
- "eval_loss": 3.9218833446502686,
147
- "eval_runtime": 1.6006,
148
- "eval_samples_per_second": 61.228,
149
- "eval_steps_per_second": 1.25,
150
  "step": 77
151
  },
152
  {
153
  "epoch": 11.43,
154
  "learning_rate": 1.7714285714285717e-05,
155
- "loss": 3.8163,
156
  "step": 80
157
  },
158
  {
159
  "epoch": 12.0,
160
- "eval_accuracy": 0.02040816326530612,
161
- "eval_loss": 3.9182682037353516,
162
- "eval_runtime": 1.8155,
163
- "eval_samples_per_second": 53.98,
164
- "eval_steps_per_second": 1.102,
165
  "step": 84
166
  },
167
  {
168
  "epoch": 12.86,
169
  "learning_rate": 1.742857142857143e-05,
170
- "loss": 3.7912,
171
  "step": 90
172
  },
173
  {
174
  "epoch": 13.0,
175
- "eval_accuracy": 0.04081632653061224,
176
- "eval_loss": 3.9151253700256348,
177
- "eval_runtime": 1.704,
178
- "eval_samples_per_second": 57.511,
179
- "eval_steps_per_second": 1.174,
180
  "step": 91
181
  },
182
  {
183
  "epoch": 14.0,
184
- "eval_accuracy": 0.030612244897959183,
185
- "eval_loss": 3.911611795425415,
186
- "eval_runtime": 1.7037,
187
- "eval_samples_per_second": 57.521,
188
- "eval_steps_per_second": 1.174,
189
  "step": 98
190
  },
191
  {
192
  "epoch": 14.29,
193
  "learning_rate": 1.7142857142857142e-05,
194
- "loss": 3.7616,
195
  "step": 100
196
  },
197
  {
198
  "epoch": 15.0,
199
- "eval_accuracy": 0.04081632653061224,
200
- "eval_loss": 3.9074392318725586,
201
- "eval_runtime": 1.6801,
202
- "eval_samples_per_second": 58.331,
203
- "eval_steps_per_second": 1.19,
204
  "step": 105
205
  },
206
  {
207
  "epoch": 15.71,
208
  "learning_rate": 1.6857142857142858e-05,
209
- "loss": 3.734,
210
  "step": 110
211
  },
212
  {
213
  "epoch": 16.0,
214
- "eval_accuracy": 0.04081632653061224,
215
- "eval_loss": 3.9029488563537598,
216
- "eval_runtime": 1.7659,
217
- "eval_samples_per_second": 55.496,
218
- "eval_steps_per_second": 1.133,
219
  "step": 112
220
  },
221
  {
222
  "epoch": 17.0,
223
- "eval_accuracy": 0.061224489795918366,
224
- "eval_loss": 3.8968818187713623,
225
- "eval_runtime": 1.6451,
226
- "eval_samples_per_second": 59.57,
227
- "eval_steps_per_second": 1.216,
228
  "step": 119
229
  },
230
  {
231
  "epoch": 17.14,
232
  "learning_rate": 1.6571428571428574e-05,
233
- "loss": 3.7014,
234
  "step": 120
235
  },
236
  {
237
  "epoch": 18.0,
238
- "eval_accuracy": 0.07142857142857142,
239
- "eval_loss": 3.890695095062256,
240
- "eval_runtime": 1.6901,
241
- "eval_samples_per_second": 57.983,
242
- "eval_steps_per_second": 1.183,
243
  "step": 126
244
  },
245
  {
246
  "epoch": 18.57,
247
  "learning_rate": 1.6285714285714287e-05,
248
- "loss": 3.6707,
249
  "step": 130
250
  },
251
  {
252
  "epoch": 19.0,
253
- "eval_accuracy": 0.07142857142857142,
254
- "eval_loss": 3.8844714164733887,
255
- "eval_runtime": 1.6505,
256
- "eval_samples_per_second": 59.374,
257
- "eval_steps_per_second": 1.212,
258
  "step": 133
259
  },
260
  {
261
  "epoch": 20.0,
262
  "learning_rate": 1.6000000000000003e-05,
263
- "loss": 3.6307,
264
  "step": 140
265
  },
266
  {
267
  "epoch": 20.0,
268
- "eval_accuracy": 0.08163265306122448,
269
- "eval_loss": 3.877913475036621,
270
- "eval_runtime": 1.8041,
271
- "eval_samples_per_second": 54.322,
272
- "eval_steps_per_second": 1.109,
273
  "step": 140
274
  },
275
  {
276
  "epoch": 21.0,
277
- "eval_accuracy": 0.08163265306122448,
278
- "eval_loss": 3.8703930377960205,
279
- "eval_runtime": 1.7575,
280
- "eval_samples_per_second": 55.762,
281
- "eval_steps_per_second": 1.138,
282
  "step": 147
283
  },
284
  {
285
  "epoch": 21.43,
286
  "learning_rate": 1.5714285714285715e-05,
287
- "loss": 3.596,
288
  "step": 150
289
  },
290
  {
291
  "epoch": 22.0,
292
- "eval_accuracy": 0.09183673469387756,
293
- "eval_loss": 3.8646252155303955,
294
- "eval_runtime": 1.6598,
295
- "eval_samples_per_second": 59.042,
296
- "eval_steps_per_second": 1.205,
297
  "step": 154
298
  },
299
  {
300
  "epoch": 22.86,
301
  "learning_rate": 1.542857142857143e-05,
302
- "loss": 3.5875,
303
  "step": 160
304
  },
305
  {
306
  "epoch": 23.0,
307
- "eval_accuracy": 0.09183673469387756,
308
- "eval_loss": 3.8603618144989014,
309
- "eval_runtime": 1.6878,
310
- "eval_samples_per_second": 58.065,
311
- "eval_steps_per_second": 1.185,
312
  "step": 161
313
  },
314
  {
315
  "epoch": 24.0,
316
- "eval_accuracy": 0.09183673469387756,
317
- "eval_loss": 3.8561482429504395,
318
- "eval_runtime": 1.6677,
319
- "eval_samples_per_second": 58.764,
320
- "eval_steps_per_second": 1.199,
321
  "step": 168
322
  },
323
  {
324
  "epoch": 24.29,
325
  "learning_rate": 1.5142857142857144e-05,
326
- "loss": 3.5532,
327
  "step": 170
328
  },
329
  {
330
  "epoch": 25.0,
331
- "eval_accuracy": 0.09183673469387756,
332
- "eval_loss": 3.8509910106658936,
333
- "eval_runtime": 1.7363,
334
- "eval_samples_per_second": 56.441,
335
- "eval_steps_per_second": 1.152,
336
  "step": 175
337
  },
338
  {
339
  "epoch": 25.71,
340
  "learning_rate": 1.4857142857142858e-05,
341
- "loss": 3.5374,
342
  "step": 180
343
  },
344
  {
345
  "epoch": 26.0,
346
- "eval_accuracy": 0.09183673469387756,
347
- "eval_loss": 3.844221353530884,
348
- "eval_runtime": 1.7776,
349
- "eval_samples_per_second": 55.131,
350
- "eval_steps_per_second": 1.125,
351
  "step": 182
352
  },
353
  {
354
  "epoch": 27.0,
355
- "eval_accuracy": 0.10204081632653061,
356
- "eval_loss": 3.83986496925354,
357
- "eval_runtime": 1.6514,
358
- "eval_samples_per_second": 59.342,
359
- "eval_steps_per_second": 1.211,
360
  "step": 189
361
  },
362
  {
363
  "epoch": 27.14,
364
  "learning_rate": 1.4571428571428573e-05,
365
- "loss": 3.51,
366
  "step": 190
367
  },
368
  {
369
  "epoch": 28.0,
370
- "eval_accuracy": 0.11224489795918367,
371
- "eval_loss": 3.8350086212158203,
372
- "eval_runtime": 1.6793,
373
- "eval_samples_per_second": 58.357,
374
- "eval_steps_per_second": 1.191,
375
  "step": 196
376
  },
377
  {
378
  "epoch": 28.57,
379
  "learning_rate": 1.4285714285714287e-05,
380
- "loss": 3.4842,
381
  "step": 200
382
  },
383
  {
384
  "epoch": 29.0,
385
- "eval_accuracy": 0.12244897959183673,
386
- "eval_loss": 3.8296377658843994,
387
- "eval_runtime": 1.7517,
388
- "eval_samples_per_second": 55.946,
389
- "eval_steps_per_second": 1.142,
390
  "step": 203
391
  },
392
  {
393
  "epoch": 30.0,
394
  "learning_rate": 1.4e-05,
395
- "loss": 3.4495,
396
  "step": 210
397
  },
398
  {
399
  "epoch": 30.0,
400
- "eval_accuracy": 0.12244897959183673,
401
- "eval_loss": 3.8243255615234375,
402
- "eval_runtime": 1.7837,
403
- "eval_samples_per_second": 54.942,
404
- "eval_steps_per_second": 1.121,
405
  "step": 210
406
  },
407
  {
408
  "epoch": 31.0,
409
- "eval_accuracy": 0.12244897959183673,
410
- "eval_loss": 3.8213043212890625,
411
- "eval_runtime": 1.6545,
412
- "eval_samples_per_second": 59.233,
413
- "eval_steps_per_second": 1.209,
414
  "step": 217
415
  },
416
  {
417
  "epoch": 31.43,
418
  "learning_rate": 1.3714285714285716e-05,
419
- "loss": 3.4155,
420
  "step": 220
421
  },
422
  {
423
  "epoch": 32.0,
424
- "eval_accuracy": 0.12244897959183673,
425
- "eval_loss": 3.815812110900879,
426
- "eval_runtime": 1.6721,
427
- "eval_samples_per_second": 58.61,
428
- "eval_steps_per_second": 1.196,
429
  "step": 224
430
  },
431
  {
432
  "epoch": 32.86,
433
  "learning_rate": 1.3428571428571429e-05,
434
- "loss": 3.4257,
435
  "step": 230
436
  },
437
  {
438
  "epoch": 33.0,
439
- "eval_accuracy": 0.12244897959183673,
440
- "eval_loss": 3.8117594718933105,
441
- "eval_runtime": 1.6581,
442
- "eval_samples_per_second": 59.102,
443
- "eval_steps_per_second": 1.206,
444
  "step": 231
445
  },
446
  {
447
  "epoch": 34.0,
448
- "eval_accuracy": 0.1326530612244898,
449
- "eval_loss": 3.8061439990997314,
450
- "eval_runtime": 1.6272,
451
- "eval_samples_per_second": 60.227,
452
- "eval_steps_per_second": 1.229,
453
  "step": 238
454
  },
455
  {
456
  "epoch": 34.29,
457
  "learning_rate": 1.3142857142857145e-05,
458
- "loss": 3.395,
459
  "step": 240
460
  },
461
  {
462
  "epoch": 35.0,
463
- "eval_accuracy": 0.1326530612244898,
464
- "eval_loss": 3.8029837608337402,
465
- "eval_runtime": 1.6449,
466
- "eval_samples_per_second": 59.577,
467
- "eval_steps_per_second": 1.216,
468
  "step": 245
469
  },
470
  {
471
  "epoch": 35.71,
472
  "learning_rate": 1.2857142857142859e-05,
473
- "loss": 3.3693,
474
  "step": 250
475
  },
476
  {
477
  "epoch": 36.0,
478
- "eval_accuracy": 0.14285714285714285,
479
- "eval_loss": 3.795672655105591,
480
- "eval_runtime": 1.6624,
481
- "eval_samples_per_second": 58.953,
482
- "eval_steps_per_second": 1.203,
483
  "step": 252
484
  },
485
  {
486
  "epoch": 37.0,
487
- "eval_accuracy": 0.12244897959183673,
488
- "eval_loss": 3.790419578552246,
489
- "eval_runtime": 1.6455,
490
- "eval_samples_per_second": 59.556,
491
- "eval_steps_per_second": 1.215,
492
  "step": 259
493
  },
494
  {
495
  "epoch": 37.14,
496
  "learning_rate": 1.2571428571428572e-05,
497
- "loss": 3.35,
498
  "step": 260
499
  },
500
  {
501
  "epoch": 38.0,
502
- "eval_accuracy": 0.12244897959183673,
503
- "eval_loss": 3.783411741256714,
504
- "eval_runtime": 1.6562,
505
- "eval_samples_per_second": 59.172,
506
- "eval_steps_per_second": 1.208,
507
  "step": 266
508
  },
509
  {
510
  "epoch": 38.57,
511
  "learning_rate": 1.2285714285714288e-05,
512
- "loss": 3.3453,
513
  "step": 270
514
  },
515
  {
516
  "epoch": 39.0,
517
- "eval_accuracy": 0.12244897959183673,
518
- "eval_loss": 3.778721332550049,
519
- "eval_runtime": 1.7932,
520
- "eval_samples_per_second": 54.651,
521
- "eval_steps_per_second": 1.115,
522
  "step": 273
523
  },
524
  {
525
  "epoch": 40.0,
526
  "learning_rate": 1.2e-05,
527
- "loss": 3.2977,
528
  "step": 280
529
  },
530
  {
531
  "epoch": 40.0,
532
- "eval_accuracy": 0.12244897959183673,
533
- "eval_loss": 3.772735357284546,
534
- "eval_runtime": 1.6336,
535
- "eval_samples_per_second": 59.991,
536
- "eval_steps_per_second": 1.224,
537
  "step": 280
538
  },
539
  {
540
  "epoch": 41.0,
541
- "eval_accuracy": 0.12244897959183673,
542
- "eval_loss": 3.7680680751800537,
543
- "eval_runtime": 1.7171,
544
- "eval_samples_per_second": 57.072,
545
- "eval_steps_per_second": 1.165,
546
  "step": 287
547
  },
548
  {
549
  "epoch": 41.43,
550
  "learning_rate": 1.1714285714285716e-05,
551
- "loss": 3.2875,
552
  "step": 290
553
  },
554
  {
555
  "epoch": 42.0,
556
- "eval_accuracy": 0.12244897959183673,
557
- "eval_loss": 3.762765407562256,
558
- "eval_runtime": 1.6284,
559
- "eval_samples_per_second": 60.181,
560
- "eval_steps_per_second": 1.228,
561
  "step": 294
562
  },
563
  {
564
  "epoch": 42.86,
565
  "learning_rate": 1.1428571428571429e-05,
566
- "loss": 3.2504,
567
  "step": 300
568
  },
569
  {
570
  "epoch": 43.0,
571
- "eval_accuracy": 0.12244897959183673,
572
- "eval_loss": 3.758150100708008,
573
- "eval_runtime": 1.6731,
574
- "eval_samples_per_second": 58.575,
575
- "eval_steps_per_second": 1.195,
576
  "step": 301
577
  },
578
  {
579
  "epoch": 44.0,
580
- "eval_accuracy": 0.12244897959183673,
581
- "eval_loss": 3.7527263164520264,
582
- "eval_runtime": 1.7657,
583
- "eval_samples_per_second": 55.502,
584
- "eval_steps_per_second": 1.133,
585
  "step": 308
586
  },
587
  {
588
  "epoch": 44.29,
589
  "learning_rate": 1.1142857142857143e-05,
590
- "loss": 3.2772,
591
  "step": 310
592
  },
593
  {
594
  "epoch": 45.0,
595
- "eval_accuracy": 0.12244897959183673,
596
- "eval_loss": 3.749258041381836,
597
- "eval_runtime": 1.6842,
598
- "eval_samples_per_second": 58.188,
599
- "eval_steps_per_second": 1.188,
600
  "step": 315
601
  },
602
  {
603
  "epoch": 45.71,
604
  "learning_rate": 1.0857142857142858e-05,
605
- "loss": 3.2353,
606
  "step": 320
607
  },
608
  {
609
  "epoch": 46.0,
610
- "eval_accuracy": 0.11224489795918367,
611
- "eval_loss": 3.7462167739868164,
612
- "eval_runtime": 1.6536,
613
- "eval_samples_per_second": 59.264,
614
- "eval_steps_per_second": 1.209,
615
  "step": 322
616
  },
617
  {
618
  "epoch": 47.0,
619
- "eval_accuracy": 0.1326530612244898,
620
- "eval_loss": 3.743089437484741,
621
- "eval_runtime": 1.639,
622
- "eval_samples_per_second": 59.793,
623
- "eval_steps_per_second": 1.22,
624
  "step": 329
625
  },
626
  {
627
  "epoch": 47.14,
628
  "learning_rate": 1.0571428571428572e-05,
629
- "loss": 3.2198,
630
  "step": 330
631
  },
632
  {
633
  "epoch": 48.0,
634
- "eval_accuracy": 0.1326530612244898,
635
- "eval_loss": 3.7392406463623047,
636
- "eval_runtime": 1.6659,
637
- "eval_samples_per_second": 58.827,
638
- "eval_steps_per_second": 1.201,
639
  "step": 336
640
  },
641
  {
642
  "epoch": 48.57,
643
  "learning_rate": 1.0285714285714285e-05,
644
- "loss": 3.204,
645
  "step": 340
646
  },
647
  {
648
  "epoch": 49.0,
649
- "eval_accuracy": 0.14285714285714285,
650
- "eval_loss": 3.73702073097229,
651
- "eval_runtime": 1.7576,
652
- "eval_samples_per_second": 55.758,
653
- "eval_steps_per_second": 1.138,
654
  "step": 343
655
  },
656
  {
657
  "epoch": 50.0,
658
  "learning_rate": 1e-05,
659
- "loss": 3.1762,
660
  "step": 350
661
  },
662
  {
663
  "epoch": 50.0,
664
- "eval_accuracy": 0.14285714285714285,
665
- "eval_loss": 3.733855962753296,
666
- "eval_runtime": 1.6472,
667
- "eval_samples_per_second": 59.494,
668
- "eval_steps_per_second": 1.214,
669
  "step": 350
670
  },
671
  {
672
  "epoch": 51.0,
673
- "eval_accuracy": 0.14285714285714285,
674
- "eval_loss": 3.730581283569336,
675
- "eval_runtime": 1.6737,
676
- "eval_samples_per_second": 58.554,
677
- "eval_steps_per_second": 1.195,
678
  "step": 357
679
  },
680
  {
681
  "epoch": 51.43,
682
  "learning_rate": 9.714285714285715e-06,
683
- "loss": 3.1741,
684
  "step": 360
685
  },
686
  {
687
  "epoch": 52.0,
688
- "eval_accuracy": 0.16326530612244897,
689
- "eval_loss": 3.7267162799835205,
690
- "eval_runtime": 1.669,
691
- "eval_samples_per_second": 58.718,
692
- "eval_steps_per_second": 1.198,
693
  "step": 364
694
  },
695
  {
696
  "epoch": 52.86,
697
  "learning_rate": 9.42857142857143e-06,
698
- "loss": 3.1757,
699
  "step": 370
700
  },
701
  {
702
  "epoch": 53.0,
703
- "eval_accuracy": 0.16326530612244897,
704
- "eval_loss": 3.7221927642822266,
705
- "eval_runtime": 1.6595,
706
- "eval_samples_per_second": 59.054,
707
- "eval_steps_per_second": 1.205,
708
  "step": 371
709
  },
710
  {
711
  "epoch": 54.0,
712
- "eval_accuracy": 0.15306122448979592,
713
- "eval_loss": 3.717994451522827,
714
- "eval_runtime": 1.6603,
715
- "eval_samples_per_second": 59.027,
716
- "eval_steps_per_second": 1.205,
717
  "step": 378
718
  },
719
  {
720
  "epoch": 54.29,
721
  "learning_rate": 9.142857142857144e-06,
722
- "loss": 3.1492,
723
  "step": 380
724
  },
725
  {
726
  "epoch": 55.0,
727
- "eval_accuracy": 0.15306122448979592,
728
- "eval_loss": 3.7148733139038086,
729
- "eval_runtime": 1.6452,
730
- "eval_samples_per_second": 59.566,
731
- "eval_steps_per_second": 1.216,
732
  "step": 385
733
  },
734
  {
735
  "epoch": 55.71,
736
  "learning_rate": 8.857142857142858e-06,
737
- "loss": 3.1442,
738
  "step": 390
739
  },
740
  {
741
  "epoch": 56.0,
742
- "eval_accuracy": 0.15306122448979592,
743
- "eval_loss": 3.7107248306274414,
744
- "eval_runtime": 1.6947,
745
- "eval_samples_per_second": 57.827,
746
- "eval_steps_per_second": 1.18,
747
  "step": 392
748
  },
749
  {
750
  "epoch": 57.0,
751
- "eval_accuracy": 0.15306122448979592,
752
- "eval_loss": 3.7084951400756836,
753
- "eval_runtime": 1.6087,
754
- "eval_samples_per_second": 60.919,
755
- "eval_steps_per_second": 1.243,
756
  "step": 399
757
  },
758
  {
759
  "epoch": 57.14,
760
  "learning_rate": 8.571428571428571e-06,
761
- "loss": 3.1174,
762
  "step": 400
763
  },
764
  {
765
  "epoch": 58.0,
766
- "eval_accuracy": 0.15306122448979592,
767
- "eval_loss": 3.705909013748169,
768
- "eval_runtime": 1.7081,
769
- "eval_samples_per_second": 57.372,
770
- "eval_steps_per_second": 1.171,
771
  "step": 406
772
  },
773
  {
774
  "epoch": 58.57,
775
  "learning_rate": 8.285714285714287e-06,
776
- "loss": 3.0962,
777
  "step": 410
778
  },
779
  {
780
  "epoch": 59.0,
781
- "eval_accuracy": 0.15306122448979592,
782
- "eval_loss": 3.7031126022338867,
783
- "eval_runtime": 1.5815,
784
- "eval_samples_per_second": 61.967,
785
- "eval_steps_per_second": 1.265,
786
  "step": 413
787
  },
788
  {
789
  "epoch": 60.0,
790
  "learning_rate": 8.000000000000001e-06,
791
- "loss": 3.1237,
792
  "step": 420
793
  },
794
  {
795
  "epoch": 60.0,
796
- "eval_accuracy": 0.15306122448979592,
797
- "eval_loss": 3.701946258544922,
798
- "eval_runtime": 1.7026,
799
- "eval_samples_per_second": 57.559,
800
- "eval_steps_per_second": 1.175,
801
  "step": 420
802
  },
803
  {
804
  "epoch": 61.0,
805
- "eval_accuracy": 0.15306122448979592,
806
- "eval_loss": 3.6996471881866455,
807
- "eval_runtime": 1.6975,
808
- "eval_samples_per_second": 57.732,
809
- "eval_steps_per_second": 1.178,
810
  "step": 427
811
  },
812
  {
813
  "epoch": 61.43,
814
  "learning_rate": 7.714285714285716e-06,
815
- "loss": 3.1229,
816
  "step": 430
817
  },
818
  {
819
  "epoch": 62.0,
820
- "eval_accuracy": 0.15306122448979592,
821
- "eval_loss": 3.6955974102020264,
822
- "eval_runtime": 1.6826,
823
- "eval_samples_per_second": 58.242,
824
- "eval_steps_per_second": 1.189,
825
  "step": 434
826
  },
827
  {
828
  "epoch": 62.86,
829
  "learning_rate": 7.428571428571429e-06,
830
- "loss": 3.0946,
831
  "step": 440
832
  },
833
  {
834
  "epoch": 63.0,
835
- "eval_accuracy": 0.15306122448979592,
836
- "eval_loss": 3.692981719970703,
837
- "eval_runtime": 1.7683,
838
- "eval_samples_per_second": 55.42,
839
- "eval_steps_per_second": 1.131,
840
  "step": 441
841
  },
842
  {
843
  "epoch": 64.0,
844
- "eval_accuracy": 0.15306122448979592,
845
- "eval_loss": 3.6915957927703857,
846
- "eval_runtime": 1.6752,
847
- "eval_samples_per_second": 58.501,
848
- "eval_steps_per_second": 1.194,
849
  "step": 448
850
  },
851
  {
852
  "epoch": 64.29,
853
  "learning_rate": 7.1428571428571436e-06,
854
- "loss": 3.0861,
855
  "step": 450
856
  },
857
  {
858
  "epoch": 65.0,
859
- "eval_accuracy": 0.15306122448979592,
860
- "eval_loss": 3.6893138885498047,
861
- "eval_runtime": 1.6711,
862
- "eval_samples_per_second": 58.644,
863
- "eval_steps_per_second": 1.197,
864
  "step": 455
865
  },
866
  {
867
  "epoch": 65.71,
868
  "learning_rate": 6.857142857142858e-06,
869
- "loss": 3.0406,
870
  "step": 460
871
  },
872
  {
873
  "epoch": 66.0,
874
- "eval_accuracy": 0.15306122448979592,
875
- "eval_loss": 3.6859352588653564,
876
- "eval_runtime": 1.7238,
877
- "eval_samples_per_second": 56.85,
878
- "eval_steps_per_second": 1.16,
879
  "step": 462
880
  },
881
  {
882
  "epoch": 67.0,
883
- "eval_accuracy": 0.15306122448979592,
884
- "eval_loss": 3.6839077472686768,
885
- "eval_runtime": 1.6549,
886
- "eval_samples_per_second": 59.218,
887
- "eval_steps_per_second": 1.209,
888
  "step": 469
889
  },
890
  {
891
  "epoch": 67.14,
892
  "learning_rate": 6.571428571428572e-06,
893
- "loss": 3.077,
894
  "step": 470
895
  },
896
  {
897
  "epoch": 68.0,
898
- "eval_accuracy": 0.15306122448979592,
899
- "eval_loss": 3.6815552711486816,
900
- "eval_runtime": 1.673,
901
- "eval_samples_per_second": 58.578,
902
- "eval_steps_per_second": 1.195,
903
  "step": 476
904
  },
905
  {
906
  "epoch": 68.57,
907
  "learning_rate": 6.285714285714286e-06,
908
- "loss": 3.0555,
909
  "step": 480
910
  },
911
  {
912
  "epoch": 69.0,
913
- "eval_accuracy": 0.15306122448979592,
914
- "eval_loss": 3.678163766860962,
915
- "eval_runtime": 1.6518,
916
- "eval_samples_per_second": 59.329,
917
- "eval_steps_per_second": 1.211,
918
  "step": 483
919
  },
920
  {
921
  "epoch": 70.0,
922
  "learning_rate": 6e-06,
923
- "loss": 3.035,
924
  "step": 490
925
  },
926
  {
927
  "epoch": 70.0,
928
- "eval_accuracy": 0.15306122448979592,
929
- "eval_loss": 3.6762583255767822,
930
- "eval_runtime": 1.6591,
931
- "eval_samples_per_second": 59.069,
932
- "eval_steps_per_second": 1.205,
933
  "step": 490
934
  },
935
  {
936
  "epoch": 71.0,
937
- "eval_accuracy": 0.15306122448979592,
938
- "eval_loss": 3.672853469848633,
939
- "eval_runtime": 1.6495,
940
- "eval_samples_per_second": 59.412,
941
- "eval_steps_per_second": 1.212,
942
  "step": 497
943
  },
944
  {
945
  "epoch": 71.43,
946
  "learning_rate": 5.7142857142857145e-06,
947
- "loss": 3.0246,
948
  "step": 500
949
  },
950
  {
951
  "epoch": 72.0,
952
- "eval_accuracy": 0.15306122448979592,
953
- "eval_loss": 3.6719117164611816,
954
- "eval_runtime": 1.6901,
955
- "eval_samples_per_second": 57.986,
956
- "eval_steps_per_second": 1.183,
957
  "step": 504
958
  },
959
  {
960
  "epoch": 72.86,
961
  "learning_rate": 5.428571428571429e-06,
962
- "loss": 3.0282,
963
  "step": 510
964
  },
965
  {
966
  "epoch": 73.0,
967
- "eval_accuracy": 0.15306122448979592,
968
- "eval_loss": 3.670848846435547,
969
- "eval_runtime": 1.7293,
970
- "eval_samples_per_second": 56.669,
971
- "eval_steps_per_second": 1.157,
972
  "step": 511
973
  },
974
  {
975
  "epoch": 74.0,
976
- "eval_accuracy": 0.14285714285714285,
977
- "eval_loss": 3.6683461666107178,
978
- "eval_runtime": 1.6959,
979
- "eval_samples_per_second": 57.785,
980
- "eval_steps_per_second": 1.179,
981
  "step": 518
982
  },
983
  {
984
  "epoch": 74.29,
985
  "learning_rate": 5.142857142857142e-06,
986
- "loss": 3.0293,
987
  "step": 520
988
  },
989
  {
990
  "epoch": 75.0,
991
- "eval_accuracy": 0.14285714285714285,
992
- "eval_loss": 3.665170669555664,
993
- "eval_runtime": 1.6438,
994
- "eval_samples_per_second": 59.617,
995
- "eval_steps_per_second": 1.217,
996
  "step": 525
997
  },
998
  {
999
  "epoch": 75.71,
1000
  "learning_rate": 4.857142857142858e-06,
1001
- "loss": 2.9893,
1002
  "step": 530
1003
  },
1004
  {
1005
  "epoch": 76.0,
1006
- "eval_accuracy": 0.14285714285714285,
1007
- "eval_loss": 3.66402268409729,
1008
- "eval_runtime": 1.6852,
1009
- "eval_samples_per_second": 58.155,
1010
- "eval_steps_per_second": 1.187,
1011
  "step": 532
1012
  },
1013
  {
1014
  "epoch": 77.0,
1015
- "eval_accuracy": 0.14285714285714285,
1016
- "eval_loss": 3.6634998321533203,
1017
- "eval_runtime": 1.7537,
1018
- "eval_samples_per_second": 55.882,
1019
- "eval_steps_per_second": 1.14,
1020
  "step": 539
1021
  },
1022
  {
1023
  "epoch": 77.14,
1024
  "learning_rate": 4.571428571428572e-06,
1025
- "loss": 2.9888,
1026
  "step": 540
1027
  },
1028
  {
1029
  "epoch": 78.0,
1030
- "eval_accuracy": 0.14285714285714285,
1031
- "eval_loss": 3.6618170738220215,
1032
- "eval_runtime": 1.7774,
1033
- "eval_samples_per_second": 55.135,
1034
- "eval_steps_per_second": 1.125,
1035
  "step": 546
1036
  },
1037
  {
1038
  "epoch": 78.57,
1039
  "learning_rate": 4.2857142857142855e-06,
1040
- "loss": 2.9833,
1041
  "step": 550
1042
  },
1043
  {
1044
  "epoch": 79.0,
1045
- "eval_accuracy": 0.14285714285714285,
1046
- "eval_loss": 3.659451484680176,
1047
- "eval_runtime": 1.6474,
1048
- "eval_samples_per_second": 59.487,
1049
- "eval_steps_per_second": 1.214,
1050
  "step": 553
1051
  },
1052
  {
1053
  "epoch": 80.0,
1054
  "learning_rate": 4.000000000000001e-06,
1055
- "loss": 2.9739,
1056
  "step": 560
1057
  },
1058
  {
1059
  "epoch": 80.0,
1060
- "eval_accuracy": 0.14285714285714285,
1061
- "eval_loss": 3.6578376293182373,
1062
- "eval_runtime": 1.6531,
1063
- "eval_samples_per_second": 59.283,
1064
- "eval_steps_per_second": 1.21,
1065
  "step": 560
1066
  },
1067
  {
1068
  "epoch": 81.0,
1069
- "eval_accuracy": 0.14285714285714285,
1070
- "eval_loss": 3.656160593032837,
1071
- "eval_runtime": 1.7409,
1072
- "eval_samples_per_second": 56.294,
1073
- "eval_steps_per_second": 1.149,
1074
  "step": 567
1075
  },
1076
  {
1077
  "epoch": 81.43,
1078
  "learning_rate": 3.7142857142857146e-06,
1079
- "loss": 2.9513,
1080
  "step": 570
1081
  },
1082
  {
1083
  "epoch": 82.0,
1084
- "eval_accuracy": 0.14285714285714285,
1085
- "eval_loss": 3.655242681503296,
1086
- "eval_runtime": 1.6811,
1087
- "eval_samples_per_second": 58.294,
1088
- "eval_steps_per_second": 1.19,
1089
  "step": 574
1090
  },
1091
  {
1092
  "epoch": 82.86,
1093
  "learning_rate": 3.428571428571429e-06,
1094
- "loss": 2.9503,
1095
  "step": 580
1096
  },
1097
  {
1098
  "epoch": 83.0,
1099
- "eval_accuracy": 0.14285714285714285,
1100
- "eval_loss": 3.6539218425750732,
1101
- "eval_runtime": 1.7574,
1102
- "eval_samples_per_second": 55.764,
1103
- "eval_steps_per_second": 1.138,
1104
  "step": 581
1105
  },
1106
  {
1107
  "epoch": 84.0,
1108
- "eval_accuracy": 0.15306122448979592,
1109
- "eval_loss": 3.653193950653076,
1110
- "eval_runtime": 1.6302,
1111
- "eval_samples_per_second": 60.114,
1112
- "eval_steps_per_second": 1.227,
1113
  "step": 588
1114
  },
1115
  {
1116
  "epoch": 84.29,
1117
  "learning_rate": 3.142857142857143e-06,
1118
- "loss": 2.9792,
1119
  "step": 590
1120
  },
1121
  {
1122
  "epoch": 85.0,
1123
- "eval_accuracy": 0.15306122448979592,
1124
- "eval_loss": 3.651691436767578,
1125
- "eval_runtime": 1.7277,
1126
- "eval_samples_per_second": 56.724,
1127
- "eval_steps_per_second": 1.158,
1128
  "step": 595
1129
  },
1130
  {
1131
  "epoch": 85.71,
1132
  "learning_rate": 2.8571428571428573e-06,
1133
- "loss": 2.9561,
1134
  "step": 600
1135
  },
1136
  {
1137
  "epoch": 86.0,
1138
- "eval_accuracy": 0.15306122448979592,
1139
- "eval_loss": 3.6496691703796387,
1140
- "eval_runtime": 1.6772,
1141
- "eval_samples_per_second": 58.432,
1142
- "eval_steps_per_second": 1.192,
1143
  "step": 602
1144
  },
1145
  {
1146
  "epoch": 87.0,
1147
- "eval_accuracy": 0.15306122448979592,
1148
- "eval_loss": 3.6485908031463623,
1149
- "eval_runtime": 1.664,
1150
- "eval_samples_per_second": 58.893,
1151
- "eval_steps_per_second": 1.202,
1152
  "step": 609
1153
  },
1154
  {
1155
  "epoch": 87.14,
1156
  "learning_rate": 2.571428571428571e-06,
1157
- "loss": 2.964,
1158
  "step": 610
1159
  },
1160
  {
1161
  "epoch": 88.0,
1162
- "eval_accuracy": 0.15306122448979592,
1163
- "eval_loss": 3.647573709487915,
1164
- "eval_runtime": 1.7778,
1165
- "eval_samples_per_second": 55.124,
1166
- "eval_steps_per_second": 1.125,
1167
  "step": 616
1168
  },
1169
  {
1170
  "epoch": 88.57,
1171
  "learning_rate": 2.285714285714286e-06,
1172
- "loss": 2.9665,
1173
  "step": 620
1174
  },
1175
  {
1176
  "epoch": 89.0,
1177
- "eval_accuracy": 0.15306122448979592,
1178
- "eval_loss": 3.6470184326171875,
1179
- "eval_runtime": 1.6689,
1180
- "eval_samples_per_second": 58.722,
1181
- "eval_steps_per_second": 1.198,
1182
  "step": 623
1183
  },
1184
  {
1185
  "epoch": 90.0,
1186
  "learning_rate": 2.0000000000000003e-06,
1187
- "loss": 2.9439,
1188
  "step": 630
1189
  },
1190
  {
1191
  "epoch": 90.0,
1192
- "eval_accuracy": 0.15306122448979592,
1193
- "eval_loss": 3.646164894104004,
1194
- "eval_runtime": 2.2025,
1195
- "eval_samples_per_second": 44.495,
1196
- "eval_steps_per_second": 0.908,
1197
  "step": 630
1198
  },
1199
  {
1200
  "epoch": 91.0,
1201
- "eval_accuracy": 0.15306122448979592,
1202
- "eval_loss": 3.6452953815460205,
1203
- "eval_runtime": 1.6826,
1204
- "eval_samples_per_second": 58.242,
1205
- "eval_steps_per_second": 1.189,
1206
  "step": 637
1207
  },
1208
  {
1209
  "epoch": 91.43,
1210
  "learning_rate": 1.7142857142857145e-06,
1211
- "loss": 2.9369,
1212
  "step": 640
1213
  },
1214
  {
1215
  "epoch": 92.0,
1216
- "eval_accuracy": 0.15306122448979592,
1217
- "eval_loss": 3.645128011703491,
1218
- "eval_runtime": 1.646,
1219
- "eval_samples_per_second": 59.539,
1220
- "eval_steps_per_second": 1.215,
1221
  "step": 644
1222
  },
1223
  {
1224
  "epoch": 92.86,
1225
  "learning_rate": 1.4285714285714286e-06,
1226
- "loss": 2.9619,
1227
  "step": 650
1228
  },
1229
  {
1230
  "epoch": 93.0,
1231
- "eval_accuracy": 0.15306122448979592,
1232
- "eval_loss": 3.64505934715271,
1233
- "eval_runtime": 1.7663,
1234
- "eval_samples_per_second": 55.483,
1235
- "eval_steps_per_second": 1.132,
1236
  "step": 651
1237
  },
1238
  {
1239
  "epoch": 94.0,
1240
- "eval_accuracy": 0.15306122448979592,
1241
- "eval_loss": 3.6448814868927,
1242
- "eval_runtime": 1.6466,
1243
- "eval_samples_per_second": 59.516,
1244
- "eval_steps_per_second": 1.215,
1245
  "step": 658
1246
  },
1247
  {
1248
  "epoch": 94.29,
1249
  "learning_rate": 1.142857142857143e-06,
1250
- "loss": 2.955,
1251
  "step": 660
1252
  },
1253
  {
1254
  "epoch": 95.0,
1255
- "eval_accuracy": 0.15306122448979592,
1256
- "eval_loss": 3.6444060802459717,
1257
- "eval_runtime": 1.6863,
1258
- "eval_samples_per_second": 58.115,
1259
- "eval_steps_per_second": 1.186,
1260
  "step": 665
1261
  },
1262
  {
1263
  "epoch": 95.71,
1264
  "learning_rate": 8.571428571428572e-07,
1265
- "loss": 2.9323,
1266
  "step": 670
1267
  },
1268
  {
1269
  "epoch": 96.0,
1270
- "eval_accuracy": 0.15306122448979592,
1271
- "eval_loss": 3.6440815925598145,
1272
- "eval_runtime": 1.626,
1273
- "eval_samples_per_second": 60.271,
1274
- "eval_steps_per_second": 1.23,
1275
  "step": 672
1276
  },
1277
  {
1278
  "epoch": 97.0,
1279
- "eval_accuracy": 0.15306122448979592,
1280
- "eval_loss": 3.6438138484954834,
1281
- "eval_runtime": 1.7724,
1282
- "eval_samples_per_second": 55.292,
1283
- "eval_steps_per_second": 1.128,
1284
  "step": 679
1285
  },
1286
  {
1287
  "epoch": 97.14,
1288
  "learning_rate": 5.714285714285715e-07,
1289
- "loss": 2.9466,
1290
  "step": 680
1291
  },
1292
  {
1293
  "epoch": 98.0,
1294
- "eval_accuracy": 0.15306122448979592,
1295
- "eval_loss": 3.643672466278076,
1296
- "eval_runtime": 1.7638,
1297
- "eval_samples_per_second": 55.56,
1298
- "eval_steps_per_second": 1.134,
1299
  "step": 686
1300
  },
1301
  {
1302
  "epoch": 98.57,
1303
  "learning_rate": 2.8571428571428575e-07,
1304
- "loss": 2.945,
1305
  "step": 690
1306
  },
1307
  {
1308
  "epoch": 99.0,
1309
- "eval_accuracy": 0.15306122448979592,
1310
- "eval_loss": 3.6435706615448,
1311
- "eval_runtime": 1.6937,
1312
- "eval_samples_per_second": 57.862,
1313
- "eval_steps_per_second": 1.181,
1314
  "step": 693
1315
  },
1316
  {
1317
  "epoch": 100.0,
1318
  "learning_rate": 0.0,
1319
- "loss": 2.9665,
1320
  "step": 700
1321
  },
1322
  {
1323
  "epoch": 100.0,
1324
- "eval_accuracy": 0.15306122448979592,
1325
- "eval_loss": 3.643535614013672,
1326
- "eval_runtime": 1.66,
1327
- "eval_samples_per_second": 59.037,
1328
- "eval_steps_per_second": 1.205,
1329
  "step": 700
1330
  },
1331
  {
1332
  "epoch": 100.0,
1333
  "step": 700,
1334
- "total_flos": 4.2794747466153984e+18,
1335
- "train_loss": 3.289549721309117,
1336
- "train_runtime": 4059.8356,
1337
- "train_samples_per_second": 13.597,
1338
- "train_steps_per_second": 0.172
1339
  }
1340
  ],
1341
  "logging_steps": 10,
@@ -1343,7 +1343,7 @@
1343
  "num_input_tokens_seen": 0,
1344
  "num_train_epochs": 100,
1345
  "save_steps": 500,
1346
- "total_flos": 4.2794747466153984e+18,
1347
  "train_batch_size": 80,
1348
  "trial_name": null,
1349
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.859877347946167,
3
+ "best_model_checkpoint": "./drive/MyDrive/repositories/torch_example_image-classification/outputs_letter3/checkpoint-644",
4
  "epoch": 100.0,
5
  "eval_steps": 500,
6
  "global_step": 700,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.015384615384615385,
14
+ "eval_loss": 3.9449453353881836,
15
+ "eval_runtime": 2.0983,
16
+ "eval_samples_per_second": 61.955,
17
+ "eval_steps_per_second": 0.953,
18
  "step": 7
19
  },
20
  {
21
  "epoch": 1.43,
22
  "learning_rate": 1.9714285714285718e-05,
23
+ "loss": 3.9333,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.023076923076923078,
29
+ "eval_loss": 3.9366512298583984,
30
+ "eval_runtime": 2.2606,
31
+ "eval_samples_per_second": 57.506,
32
+ "eval_steps_per_second": 0.885,
33
  "step": 14
34
  },
35
  {
36
  "epoch": 2.86,
37
  "learning_rate": 1.942857142857143e-05,
38
+ "loss": 3.8939,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 3.0,
43
+ "eval_accuracy": 0.03076923076923077,
44
+ "eval_loss": 3.9279532432556152,
45
+ "eval_runtime": 2.3275,
46
+ "eval_samples_per_second": 55.855,
47
+ "eval_steps_per_second": 0.859,
48
  "step": 21
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.046153846153846156,
53
+ "eval_loss": 3.9167158603668213,
54
+ "eval_runtime": 2.2361,
55
+ "eval_samples_per_second": 58.137,
56
+ "eval_steps_per_second": 0.894,
57
  "step": 28
58
  },
59
  {
60
  "epoch": 4.29,
61
  "learning_rate": 1.9142857142857146e-05,
62
+ "loss": 3.8562,
63
  "step": 30
64
  },
65
  {
66
  "epoch": 5.0,
67
+ "eval_accuracy": 0.06923076923076923,
68
+ "eval_loss": 3.9032633304595947,
69
+ "eval_runtime": 2.4923,
70
+ "eval_samples_per_second": 52.161,
71
+ "eval_steps_per_second": 0.802,
72
  "step": 35
73
  },
74
  {
75
  "epoch": 5.71,
76
  "learning_rate": 1.885714285714286e-05,
77
+ "loss": 3.8008,
78
  "step": 40
79
  },
80
  {
81
  "epoch": 6.0,
82
+ "eval_accuracy": 0.07692307692307693,
83
+ "eval_loss": 3.8873634338378906,
84
+ "eval_runtime": 2.5262,
85
+ "eval_samples_per_second": 51.461,
86
+ "eval_steps_per_second": 0.792,
87
  "step": 42
88
  },
89
  {
90
  "epoch": 7.0,
91
+ "eval_accuracy": 0.1076923076923077,
92
+ "eval_loss": 3.867002010345459,
93
+ "eval_runtime": 2.5898,
94
+ "eval_samples_per_second": 50.196,
95
+ "eval_steps_per_second": 0.772,
96
  "step": 49
97
  },
98
  {
99
  "epoch": 7.14,
100
  "learning_rate": 1.8571428571428575e-05,
101
+ "loss": 3.7555,
102
  "step": 50
103
  },
104
  {
105
  "epoch": 8.0,
106
+ "eval_accuracy": 0.1,
107
+ "eval_loss": 3.8494887351989746,
108
+ "eval_runtime": 2.6867,
109
+ "eval_samples_per_second": 48.387,
110
+ "eval_steps_per_second": 0.744,
111
  "step": 56
112
  },
113
  {
114
  "epoch": 8.57,
115
  "learning_rate": 1.8285714285714288e-05,
116
+ "loss": 3.6917,
117
  "step": 60
118
  },
119
  {
120
  "epoch": 9.0,
121
+ "eval_accuracy": 0.11538461538461539,
122
+ "eval_loss": 3.8304965496063232,
123
+ "eval_runtime": 2.4963,
124
+ "eval_samples_per_second": 52.077,
125
+ "eval_steps_per_second": 0.801,
126
  "step": 63
127
  },
128
  {
129
  "epoch": 10.0,
130
  "learning_rate": 1.8e-05,
131
+ "loss": 3.6372,
132
  "step": 70
133
  },
134
  {
135
  "epoch": 10.0,
136
+ "eval_accuracy": 0.13846153846153847,
137
+ "eval_loss": 3.8138267993927,
138
+ "eval_runtime": 2.3942,
139
+ "eval_samples_per_second": 54.298,
140
+ "eval_steps_per_second": 0.835,
141
  "step": 70
142
  },
143
  {
144
  "epoch": 11.0,
145
+ "eval_accuracy": 0.12307692307692308,
146
+ "eval_loss": 3.796628952026367,
147
+ "eval_runtime": 2.3929,
148
+ "eval_samples_per_second": 54.328,
149
+ "eval_steps_per_second": 0.836,
150
  "step": 77
151
  },
152
  {
153
  "epoch": 11.43,
154
  "learning_rate": 1.7714285714285717e-05,
155
+ "loss": 3.5846,
156
  "step": 80
157
  },
158
  {
159
  "epoch": 12.0,
160
+ "eval_accuracy": 0.15384615384615385,
161
+ "eval_loss": 3.7766666412353516,
162
+ "eval_runtime": 2.4605,
163
+ "eval_samples_per_second": 52.835,
164
+ "eval_steps_per_second": 0.813,
165
  "step": 84
166
  },
167
  {
168
  "epoch": 12.86,
169
  "learning_rate": 1.742857142857143e-05,
170
+ "loss": 3.5047,
171
  "step": 90
172
  },
173
  {
174
  "epoch": 13.0,
175
+ "eval_accuracy": 0.23076923076923078,
176
+ "eval_loss": 3.751601219177246,
177
+ "eval_runtime": 2.5029,
178
+ "eval_samples_per_second": 51.94,
179
+ "eval_steps_per_second": 0.799,
180
  "step": 91
181
  },
182
  {
183
  "epoch": 14.0,
184
+ "eval_accuracy": 0.23846153846153847,
185
+ "eval_loss": 3.7278525829315186,
186
+ "eval_runtime": 2.6158,
187
+ "eval_samples_per_second": 49.699,
188
+ "eval_steps_per_second": 0.765,
189
  "step": 98
190
  },
191
  {
192
  "epoch": 14.29,
193
  "learning_rate": 1.7142857142857142e-05,
194
+ "loss": 3.4547,
195
  "step": 100
196
  },
197
  {
198
  "epoch": 15.0,
199
+ "eval_accuracy": 0.23846153846153847,
200
+ "eval_loss": 3.7031078338623047,
201
+ "eval_runtime": 2.5457,
202
+ "eval_samples_per_second": 51.066,
203
+ "eval_steps_per_second": 0.786,
204
  "step": 105
205
  },
206
  {
207
  "epoch": 15.71,
208
  "learning_rate": 1.6857142857142858e-05,
209
+ "loss": 3.3796,
210
  "step": 110
211
  },
212
  {
213
  "epoch": 16.0,
214
+ "eval_accuracy": 0.2692307692307692,
215
+ "eval_loss": 3.672494649887085,
216
+ "eval_runtime": 2.368,
217
+ "eval_samples_per_second": 54.899,
218
+ "eval_steps_per_second": 0.845,
219
  "step": 112
220
  },
221
  {
222
  "epoch": 17.0,
223
+ "eval_accuracy": 0.27692307692307694,
224
+ "eval_loss": 3.646197557449341,
225
+ "eval_runtime": 2.3752,
226
+ "eval_samples_per_second": 54.731,
227
+ "eval_steps_per_second": 0.842,
228
  "step": 119
229
  },
230
  {
231
  "epoch": 17.14,
232
  "learning_rate": 1.6571428571428574e-05,
233
+ "loss": 3.3283,
234
  "step": 120
235
  },
236
  {
237
  "epoch": 18.0,
238
+ "eval_accuracy": 0.2923076923076923,
239
+ "eval_loss": 3.622591495513916,
240
+ "eval_runtime": 2.3748,
241
+ "eval_samples_per_second": 54.741,
242
+ "eval_steps_per_second": 0.842,
243
  "step": 126
244
  },
245
  {
246
  "epoch": 18.57,
247
  "learning_rate": 1.6285714285714287e-05,
248
+ "loss": 3.2728,
249
  "step": 130
250
  },
251
  {
252
  "epoch": 19.0,
253
+ "eval_accuracy": 0.2846153846153846,
254
+ "eval_loss": 3.6022486686706543,
255
+ "eval_runtime": 2.4033,
256
+ "eval_samples_per_second": 54.091,
257
+ "eval_steps_per_second": 0.832,
258
  "step": 133
259
  },
260
  {
261
  "epoch": 20.0,
262
  "learning_rate": 1.6000000000000003e-05,
263
+ "loss": 3.2229,
264
  "step": 140
265
  },
266
  {
267
  "epoch": 20.0,
268
+ "eval_accuracy": 0.27692307692307694,
269
+ "eval_loss": 3.5929768085479736,
270
+ "eval_runtime": 2.4687,
271
+ "eval_samples_per_second": 52.66,
272
+ "eval_steps_per_second": 0.81,
273
  "step": 140
274
  },
275
  {
276
  "epoch": 21.0,
277
+ "eval_accuracy": 0.33076923076923076,
278
+ "eval_loss": 3.574838876724243,
279
+ "eval_runtime": 2.5603,
280
+ "eval_samples_per_second": 50.775,
281
+ "eval_steps_per_second": 0.781,
282
  "step": 147
283
  },
284
  {
285
  "epoch": 21.43,
286
  "learning_rate": 1.5714285714285715e-05,
287
+ "loss": 3.1514,
288
  "step": 150
289
  },
290
  {
291
  "epoch": 22.0,
292
+ "eval_accuracy": 0.3384615384615385,
293
+ "eval_loss": 3.5403976440429688,
294
+ "eval_runtime": 2.3284,
295
+ "eval_samples_per_second": 55.833,
296
+ "eval_steps_per_second": 0.859,
297
  "step": 154
298
  },
299
  {
300
  "epoch": 22.86,
301
  "learning_rate": 1.542857142857143e-05,
302
+ "loss": 3.1179,
303
  "step": 160
304
  },
305
  {
306
  "epoch": 23.0,
307
+ "eval_accuracy": 0.3384615384615385,
308
+ "eval_loss": 3.514573335647583,
309
+ "eval_runtime": 2.3901,
310
+ "eval_samples_per_second": 54.39,
311
+ "eval_steps_per_second": 0.837,
312
  "step": 161
313
  },
314
  {
315
  "epoch": 24.0,
316
+ "eval_accuracy": 0.34615384615384615,
317
+ "eval_loss": 3.491649866104126,
318
+ "eval_runtime": 2.3495,
319
+ "eval_samples_per_second": 55.331,
320
+ "eval_steps_per_second": 0.851,
321
  "step": 168
322
  },
323
  {
324
  "epoch": 24.29,
325
  "learning_rate": 1.5142857142857144e-05,
326
+ "loss": 3.0559,
327
  "step": 170
328
  },
329
  {
330
  "epoch": 25.0,
331
+ "eval_accuracy": 0.3384615384615385,
332
+ "eval_loss": 3.473316192626953,
333
+ "eval_runtime": 2.387,
334
+ "eval_samples_per_second": 54.461,
335
+ "eval_steps_per_second": 0.838,
336
  "step": 175
337
  },
338
  {
339
  "epoch": 25.71,
340
  "learning_rate": 1.4857142857142858e-05,
341
+ "loss": 3.0051,
342
  "step": 180
343
  },
344
  {
345
  "epoch": 26.0,
346
+ "eval_accuracy": 0.36153846153846153,
347
+ "eval_loss": 3.453972339630127,
348
+ "eval_runtime": 2.5367,
349
+ "eval_samples_per_second": 51.249,
350
+ "eval_steps_per_second": 0.788,
351
  "step": 182
352
  },
353
  {
354
  "epoch": 27.0,
355
+ "eval_accuracy": 0.36923076923076925,
356
+ "eval_loss": 3.4498963356018066,
357
+ "eval_runtime": 2.5601,
358
+ "eval_samples_per_second": 50.779,
359
+ "eval_steps_per_second": 0.781,
360
  "step": 189
361
  },
362
  {
363
  "epoch": 27.14,
364
  "learning_rate": 1.4571428571428573e-05,
365
+ "loss": 2.9775,
366
  "step": 190
367
  },
368
  {
369
  "epoch": 28.0,
370
+ "eval_accuracy": 0.3769230769230769,
371
+ "eval_loss": 3.435525417327881,
372
+ "eval_runtime": 2.3663,
373
+ "eval_samples_per_second": 54.939,
374
+ "eval_steps_per_second": 0.845,
375
  "step": 196
376
  },
377
  {
378
  "epoch": 28.57,
379
  "learning_rate": 1.4285714285714287e-05,
380
+ "loss": 2.9277,
381
  "step": 200
382
  },
383
  {
384
  "epoch": 29.0,
385
+ "eval_accuracy": 0.38461538461538464,
386
+ "eval_loss": 3.4166250228881836,
387
+ "eval_runtime": 2.3557,
388
+ "eval_samples_per_second": 55.185,
389
+ "eval_steps_per_second": 0.849,
390
  "step": 203
391
  },
392
  {
393
  "epoch": 30.0,
394
  "learning_rate": 1.4e-05,
395
+ "loss": 2.9066,
396
  "step": 210
397
  },
398
  {
399
  "epoch": 30.0,
400
+ "eval_accuracy": 0.4,
401
+ "eval_loss": 3.40069842338562,
402
+ "eval_runtime": 2.3914,
403
+ "eval_samples_per_second": 54.362,
404
+ "eval_steps_per_second": 0.836,
405
  "step": 210
406
  },
407
  {
408
  "epoch": 31.0,
409
+ "eval_accuracy": 0.36923076923076925,
410
+ "eval_loss": 3.3825573921203613,
411
+ "eval_runtime": 2.5239,
412
+ "eval_samples_per_second": 51.509,
413
+ "eval_steps_per_second": 0.792,
414
  "step": 217
415
  },
416
  {
417
  "epoch": 31.43,
418
  "learning_rate": 1.3714285714285716e-05,
419
+ "loss": 2.8464,
420
  "step": 220
421
  },
422
  {
423
  "epoch": 32.0,
424
+ "eval_accuracy": 0.4076923076923077,
425
+ "eval_loss": 3.369760274887085,
426
+ "eval_runtime": 2.3953,
427
+ "eval_samples_per_second": 54.273,
428
+ "eval_steps_per_second": 0.835,
429
  "step": 224
430
  },
431
  {
432
  "epoch": 32.86,
433
  "learning_rate": 1.3428571428571429e-05,
434
+ "loss": 2.8044,
435
  "step": 230
436
  },
437
  {
438
  "epoch": 33.0,
439
+ "eval_accuracy": 0.4076923076923077,
440
+ "eval_loss": 3.350893497467041,
441
+ "eval_runtime": 2.5141,
442
+ "eval_samples_per_second": 51.708,
443
+ "eval_steps_per_second": 0.796,
444
  "step": 231
445
  },
446
  {
447
  "epoch": 34.0,
448
+ "eval_accuracy": 0.3769230769230769,
449
+ "eval_loss": 3.324298620223999,
450
+ "eval_runtime": 2.5107,
451
+ "eval_samples_per_second": 51.778,
452
+ "eval_steps_per_second": 0.797,
453
  "step": 238
454
  },
455
  {
456
  "epoch": 34.29,
457
  "learning_rate": 1.3142857142857145e-05,
458
+ "loss": 2.7699,
459
  "step": 240
460
  },
461
  {
462
  "epoch": 35.0,
463
+ "eval_accuracy": 0.3923076923076923,
464
+ "eval_loss": 3.320146322250366,
465
+ "eval_runtime": 2.3695,
466
+ "eval_samples_per_second": 54.864,
467
+ "eval_steps_per_second": 0.844,
468
  "step": 245
469
  },
470
  {
471
  "epoch": 35.71,
472
  "learning_rate": 1.2857142857142859e-05,
473
+ "loss": 2.7251,
474
  "step": 250
475
  },
476
  {
477
  "epoch": 36.0,
478
+ "eval_accuracy": 0.4,
479
+ "eval_loss": 3.301337242126465,
480
+ "eval_runtime": 2.3948,
481
+ "eval_samples_per_second": 54.285,
482
+ "eval_steps_per_second": 0.835,
483
  "step": 252
484
  },
485
  {
486
  "epoch": 37.0,
487
+ "eval_accuracy": 0.4230769230769231,
488
+ "eval_loss": 3.2935874462127686,
489
+ "eval_runtime": 2.3921,
490
+ "eval_samples_per_second": 54.345,
491
+ "eval_steps_per_second": 0.836,
492
  "step": 259
493
  },
494
  {
495
  "epoch": 37.14,
496
  "learning_rate": 1.2571428571428572e-05,
497
+ "loss": 2.6915,
498
  "step": 260
499
  },
500
  {
501
  "epoch": 38.0,
502
+ "eval_accuracy": 0.45384615384615384,
503
+ "eval_loss": 3.2827184200286865,
504
+ "eval_runtime": 2.4835,
505
+ "eval_samples_per_second": 52.345,
506
+ "eval_steps_per_second": 0.805,
507
  "step": 266
508
  },
509
  {
510
  "epoch": 38.57,
511
  "learning_rate": 1.2285714285714288e-05,
512
+ "loss": 2.6527,
513
  "step": 270
514
  },
515
  {
516
  "epoch": 39.0,
517
+ "eval_accuracy": 0.46153846153846156,
518
+ "eval_loss": 3.262709379196167,
519
+ "eval_runtime": 2.5052,
520
+ "eval_samples_per_second": 51.892,
521
+ "eval_steps_per_second": 0.798,
522
  "step": 273
523
  },
524
  {
525
  "epoch": 40.0,
526
  "learning_rate": 1.2e-05,
527
+ "loss": 2.6541,
528
  "step": 280
529
  },
530
  {
531
  "epoch": 40.0,
532
+ "eval_accuracy": 0.46153846153846156,
533
+ "eval_loss": 3.2581355571746826,
534
+ "eval_runtime": 2.5207,
535
+ "eval_samples_per_second": 51.573,
536
+ "eval_steps_per_second": 0.793,
537
  "step": 280
538
  },
539
  {
540
  "epoch": 41.0,
541
+ "eval_accuracy": 0.4230769230769231,
542
+ "eval_loss": 3.2342007160186768,
543
+ "eval_runtime": 2.363,
544
+ "eval_samples_per_second": 55.016,
545
+ "eval_steps_per_second": 0.846,
546
  "step": 287
547
  },
548
  {
549
  "epoch": 41.43,
550
  "learning_rate": 1.1714285714285716e-05,
551
+ "loss": 2.5968,
552
  "step": 290
553
  },
554
  {
555
  "epoch": 42.0,
556
+ "eval_accuracy": 0.43846153846153846,
557
+ "eval_loss": 3.2211241722106934,
558
+ "eval_runtime": 2.5033,
559
+ "eval_samples_per_second": 51.932,
560
+ "eval_steps_per_second": 0.799,
561
  "step": 294
562
  },
563
  {
564
  "epoch": 42.86,
565
  "learning_rate": 1.1428571428571429e-05,
566
+ "loss": 2.573,
567
  "step": 300
568
  },
569
  {
570
  "epoch": 43.0,
571
+ "eval_accuracy": 0.4076923076923077,
572
+ "eval_loss": 3.212242603302002,
573
+ "eval_runtime": 2.4013,
574
+ "eval_samples_per_second": 54.137,
575
+ "eval_steps_per_second": 0.833,
576
  "step": 301
577
  },
578
  {
579
  "epoch": 44.0,
580
+ "eval_accuracy": 0.46153846153846156,
581
+ "eval_loss": 3.2259228229522705,
582
+ "eval_runtime": 2.5062,
583
+ "eval_samples_per_second": 51.871,
584
+ "eval_steps_per_second": 0.798,
585
  "step": 308
586
  },
587
  {
588
  "epoch": 44.29,
589
  "learning_rate": 1.1142857142857143e-05,
590
+ "loss": 2.554,
591
  "step": 310
592
  },
593
  {
594
  "epoch": 45.0,
595
+ "eval_accuracy": 0.4307692307692308,
596
+ "eval_loss": 3.2271060943603516,
597
+ "eval_runtime": 2.357,
598
+ "eval_samples_per_second": 55.154,
599
+ "eval_steps_per_second": 0.849,
600
  "step": 315
601
  },
602
  {
603
  "epoch": 45.71,
604
  "learning_rate": 1.0857142857142858e-05,
605
+ "loss": 2.5222,
606
  "step": 320
607
  },
608
  {
609
  "epoch": 46.0,
610
+ "eval_accuracy": 0.4461538461538462,
611
+ "eval_loss": 3.2208163738250732,
612
+ "eval_runtime": 2.7528,
613
+ "eval_samples_per_second": 47.225,
614
+ "eval_steps_per_second": 0.727,
615
  "step": 322
616
  },
617
  {
618
  "epoch": 47.0,
619
+ "eval_accuracy": 0.4461538461538462,
620
+ "eval_loss": 3.2139155864715576,
621
+ "eval_runtime": 2.3673,
622
+ "eval_samples_per_second": 54.915,
623
+ "eval_steps_per_second": 0.845,
624
  "step": 329
625
  },
626
  {
627
  "epoch": 47.14,
628
  "learning_rate": 1.0571428571428572e-05,
629
+ "loss": 2.5085,
630
  "step": 330
631
  },
632
  {
633
  "epoch": 48.0,
634
+ "eval_accuracy": 0.45384615384615384,
635
+ "eval_loss": 3.204026460647583,
636
+ "eval_runtime": 2.3343,
637
+ "eval_samples_per_second": 55.691,
638
+ "eval_steps_per_second": 0.857,
639
  "step": 336
640
  },
641
  {
642
  "epoch": 48.57,
643
  "learning_rate": 1.0285714285714285e-05,
644
+ "loss": 2.4593,
645
  "step": 340
646
  },
647
  {
648
  "epoch": 49.0,
649
+ "eval_accuracy": 0.49230769230769234,
650
+ "eval_loss": 3.205333948135376,
651
+ "eval_runtime": 2.3891,
652
+ "eval_samples_per_second": 54.414,
653
+ "eval_steps_per_second": 0.837,
654
  "step": 343
655
  },
656
  {
657
  "epoch": 50.0,
658
  "learning_rate": 1e-05,
659
+ "loss": 2.4585,
660
  "step": 350
661
  },
662
  {
663
  "epoch": 50.0,
664
+ "eval_accuracy": 0.47692307692307695,
665
+ "eval_loss": 3.1822495460510254,
666
+ "eval_runtime": 2.3305,
667
+ "eval_samples_per_second": 55.781,
668
+ "eval_steps_per_second": 0.858,
669
  "step": 350
670
  },
671
  {
672
  "epoch": 51.0,
673
+ "eval_accuracy": 0.46923076923076923,
674
+ "eval_loss": 3.1696622371673584,
675
+ "eval_runtime": 2.3113,
676
+ "eval_samples_per_second": 56.246,
677
+ "eval_steps_per_second": 0.865,
678
  "step": 357
679
  },
680
  {
681
  "epoch": 51.43,
682
  "learning_rate": 9.714285714285715e-06,
683
+ "loss": 2.4228,
684
  "step": 360
685
  },
686
  {
687
  "epoch": 52.0,
688
+ "eval_accuracy": 0.46923076923076923,
689
+ "eval_loss": 3.1589303016662598,
690
+ "eval_runtime": 2.5283,
691
+ "eval_samples_per_second": 51.419,
692
+ "eval_steps_per_second": 0.791,
693
  "step": 364
694
  },
695
  {
696
  "epoch": 52.86,
697
  "learning_rate": 9.42857142857143e-06,
698
+ "loss": 2.3954,
699
  "step": 370
700
  },
701
  {
702
  "epoch": 53.0,
703
+ "eval_accuracy": 0.47692307692307695,
704
+ "eval_loss": 3.1375324726104736,
705
+ "eval_runtime": 2.3577,
706
+ "eval_samples_per_second": 55.139,
707
+ "eval_steps_per_second": 0.848,
708
  "step": 371
709
  },
710
  {
711
  "epoch": 54.0,
712
+ "eval_accuracy": 0.45384615384615384,
713
+ "eval_loss": 3.1092171669006348,
714
+ "eval_runtime": 2.3799,
715
+ "eval_samples_per_second": 54.625,
716
+ "eval_steps_per_second": 0.84,
717
  "step": 378
718
  },
719
  {
720
  "epoch": 54.29,
721
  "learning_rate": 9.142857142857144e-06,
722
+ "loss": 2.3641,
723
  "step": 380
724
  },
725
  {
726
  "epoch": 55.0,
727
+ "eval_accuracy": 0.47692307692307695,
728
+ "eval_loss": 3.099919319152832,
729
+ "eval_runtime": 2.3488,
730
+ "eval_samples_per_second": 55.348,
731
+ "eval_steps_per_second": 0.852,
732
  "step": 385
733
  },
734
  {
735
  "epoch": 55.71,
736
  "learning_rate": 8.857142857142858e-06,
737
+ "loss": 2.3651,
738
  "step": 390
739
  },
740
  {
741
  "epoch": 56.0,
742
+ "eval_accuracy": 0.46153846153846156,
743
+ "eval_loss": 3.0859622955322266,
744
+ "eval_runtime": 2.5127,
745
+ "eval_samples_per_second": 51.737,
746
+ "eval_steps_per_second": 0.796,
747
  "step": 392
748
  },
749
  {
750
  "epoch": 57.0,
751
+ "eval_accuracy": 0.46153846153846156,
752
+ "eval_loss": 3.0813159942626953,
753
+ "eval_runtime": 2.449,
754
+ "eval_samples_per_second": 53.082,
755
+ "eval_steps_per_second": 0.817,
756
  "step": 399
757
  },
758
  {
759
  "epoch": 57.14,
760
  "learning_rate": 8.571428571428571e-06,
761
+ "loss": 2.3182,
762
  "step": 400
763
  },
764
  {
765
  "epoch": 58.0,
766
+ "eval_accuracy": 0.49230769230769234,
767
+ "eval_loss": 3.0692226886749268,
768
+ "eval_runtime": 2.3556,
769
+ "eval_samples_per_second": 55.188,
770
+ "eval_steps_per_second": 0.849,
771
  "step": 406
772
  },
773
  {
774
  "epoch": 58.57,
775
  "learning_rate": 8.285714285714287e-06,
776
+ "loss": 2.3029,
777
  "step": 410
778
  },
779
  {
780
  "epoch": 59.0,
781
+ "eval_accuracy": 0.4846153846153846,
782
+ "eval_loss": 3.061020612716675,
783
+ "eval_runtime": 2.4546,
784
+ "eval_samples_per_second": 52.961,
785
+ "eval_steps_per_second": 0.815,
786
  "step": 413
787
  },
788
  {
789
  "epoch": 60.0,
790
  "learning_rate": 8.000000000000001e-06,
791
+ "loss": 2.2988,
792
  "step": 420
793
  },
794
  {
795
  "epoch": 60.0,
796
+ "eval_accuracy": 0.46153846153846156,
797
+ "eval_loss": 3.0626943111419678,
798
+ "eval_runtime": 2.3415,
799
+ "eval_samples_per_second": 55.521,
800
+ "eval_steps_per_second": 0.854,
801
  "step": 420
802
  },
803
  {
804
  "epoch": 61.0,
805
+ "eval_accuracy": 0.46923076923076923,
806
+ "eval_loss": 3.051997184753418,
807
+ "eval_runtime": 2.492,
808
+ "eval_samples_per_second": 52.167,
809
+ "eval_steps_per_second": 0.803,
810
  "step": 427
811
  },
812
  {
813
  "epoch": 61.43,
814
  "learning_rate": 7.714285714285716e-06,
815
+ "loss": 2.2865,
816
  "step": 430
817
  },
818
  {
819
  "epoch": 62.0,
820
+ "eval_accuracy": 0.45384615384615384,
821
+ "eval_loss": 3.039459705352783,
822
+ "eval_runtime": 2.4542,
823
+ "eval_samples_per_second": 52.97,
824
+ "eval_steps_per_second": 0.815,
825
  "step": 434
826
  },
827
  {
828
  "epoch": 62.86,
829
  "learning_rate": 7.428571428571429e-06,
830
+ "loss": 2.2623,
831
  "step": 440
832
  },
833
  {
834
  "epoch": 63.0,
835
+ "eval_accuracy": 0.46153846153846156,
836
+ "eval_loss": 3.0356502532958984,
837
+ "eval_runtime": 2.3341,
838
+ "eval_samples_per_second": 55.697,
839
+ "eval_steps_per_second": 0.857,
840
  "step": 441
841
  },
842
  {
843
  "epoch": 64.0,
844
+ "eval_accuracy": 0.46153846153846156,
845
+ "eval_loss": 3.0332510471343994,
846
+ "eval_runtime": 2.3535,
847
+ "eval_samples_per_second": 55.236,
848
+ "eval_steps_per_second": 0.85,
849
  "step": 448
850
  },
851
  {
852
  "epoch": 64.29,
853
  "learning_rate": 7.1428571428571436e-06,
854
+ "loss": 2.2252,
855
  "step": 450
856
  },
857
  {
858
  "epoch": 65.0,
859
+ "eval_accuracy": 0.47692307692307695,
860
+ "eval_loss": 3.022916316986084,
861
+ "eval_runtime": 2.5362,
862
+ "eval_samples_per_second": 51.258,
863
+ "eval_steps_per_second": 0.789,
864
  "step": 455
865
  },
866
  {
867
  "epoch": 65.71,
868
  "learning_rate": 6.857142857142858e-06,
869
+ "loss": 2.2339,
870
  "step": 460
871
  },
872
  {
873
  "epoch": 66.0,
874
+ "eval_accuracy": 0.47692307692307695,
875
+ "eval_loss": 3.020322799682617,
876
+ "eval_runtime": 2.3519,
877
+ "eval_samples_per_second": 55.275,
878
+ "eval_steps_per_second": 0.85,
879
  "step": 462
880
  },
881
  {
882
  "epoch": 67.0,
883
+ "eval_accuracy": 0.49230769230769234,
884
+ "eval_loss": 3.0076119899749756,
885
+ "eval_runtime": 2.359,
886
+ "eval_samples_per_second": 55.107,
887
+ "eval_steps_per_second": 0.848,
888
  "step": 469
889
  },
890
  {
891
  "epoch": 67.14,
892
  "learning_rate": 6.571428571428572e-06,
893
+ "loss": 2.2017,
894
  "step": 470
895
  },
896
  {
897
  "epoch": 68.0,
898
+ "eval_accuracy": 0.4846153846153846,
899
+ "eval_loss": 2.9876248836517334,
900
+ "eval_runtime": 2.372,
901
+ "eval_samples_per_second": 54.805,
902
+ "eval_steps_per_second": 0.843,
903
  "step": 476
904
  },
905
  {
906
  "epoch": 68.57,
907
  "learning_rate": 6.285714285714286e-06,
908
+ "loss": 2.1972,
909
  "step": 480
910
  },
911
  {
912
  "epoch": 69.0,
913
+ "eval_accuracy": 0.49230769230769234,
914
+ "eval_loss": 2.971572160720825,
915
+ "eval_runtime": 2.3479,
916
+ "eval_samples_per_second": 55.368,
917
+ "eval_steps_per_second": 0.852,
918
  "step": 483
919
  },
920
  {
921
  "epoch": 70.0,
922
  "learning_rate": 6e-06,
923
+ "loss": 2.1964,
924
  "step": 490
925
  },
926
  {
927
  "epoch": 70.0,
928
+ "eval_accuracy": 0.5,
929
+ "eval_loss": 2.96323823928833,
930
+ "eval_runtime": 2.4351,
931
+ "eval_samples_per_second": 53.387,
932
+ "eval_steps_per_second": 0.821,
933
  "step": 490
934
  },
935
  {
936
  "epoch": 71.0,
937
+ "eval_accuracy": 0.49230769230769234,
938
+ "eval_loss": 2.9597153663635254,
939
+ "eval_runtime": 2.4929,
940
+ "eval_samples_per_second": 52.149,
941
+ "eval_steps_per_second": 0.802,
942
  "step": 497
943
  },
944
  {
945
  "epoch": 71.43,
946
  "learning_rate": 5.7142857142857145e-06,
947
+ "loss": 2.1775,
948
  "step": 500
949
  },
950
  {
951
  "epoch": 72.0,
952
+ "eval_accuracy": 0.5,
953
+ "eval_loss": 2.9580914974212646,
954
+ "eval_runtime": 2.5027,
955
+ "eval_samples_per_second": 51.943,
956
+ "eval_steps_per_second": 0.799,
957
  "step": 504
958
  },
959
  {
960
  "epoch": 72.86,
961
  "learning_rate": 5.428571428571429e-06,
962
+ "loss": 2.1619,
963
  "step": 510
964
  },
965
  {
966
  "epoch": 73.0,
967
+ "eval_accuracy": 0.5076923076923077,
968
+ "eval_loss": 2.9516122341156006,
969
+ "eval_runtime": 2.4828,
970
+ "eval_samples_per_second": 52.361,
971
+ "eval_steps_per_second": 0.806,
972
  "step": 511
973
  },
974
  {
975
  "epoch": 74.0,
976
+ "eval_accuracy": 0.5153846153846153,
977
+ "eval_loss": 2.935607433319092,
978
+ "eval_runtime": 2.3758,
979
+ "eval_samples_per_second": 54.719,
980
+ "eval_steps_per_second": 0.842,
981
  "step": 518
982
  },
983
  {
984
  "epoch": 74.29,
985
  "learning_rate": 5.142857142857142e-06,
986
+ "loss": 2.1633,
987
  "step": 520
988
  },
989
  {
990
  "epoch": 75.0,
991
+ "eval_accuracy": 0.5076923076923077,
992
+ "eval_loss": 2.928561210632324,
993
+ "eval_runtime": 2.3327,
994
+ "eval_samples_per_second": 55.729,
995
+ "eval_steps_per_second": 0.857,
996
  "step": 525
997
  },
998
  {
999
  "epoch": 75.71,
1000
  "learning_rate": 4.857142857142858e-06,
1001
+ "loss": 2.1207,
1002
  "step": 530
1003
  },
1004
  {
1005
  "epoch": 76.0,
1006
+ "eval_accuracy": 0.5153846153846153,
1007
+ "eval_loss": 2.9266109466552734,
1008
+ "eval_runtime": 2.3813,
1009
+ "eval_samples_per_second": 54.591,
1010
+ "eval_steps_per_second": 0.84,
1011
  "step": 532
1012
  },
1013
  {
1014
  "epoch": 77.0,
1015
+ "eval_accuracy": 0.5230769230769231,
1016
+ "eval_loss": 2.920504093170166,
1017
+ "eval_runtime": 2.4777,
1018
+ "eval_samples_per_second": 52.468,
1019
+ "eval_steps_per_second": 0.807,
1020
  "step": 539
1021
  },
1022
  {
1023
  "epoch": 77.14,
1024
  "learning_rate": 4.571428571428572e-06,
1025
+ "loss": 2.1353,
1026
  "step": 540
1027
  },
1028
  {
1029
  "epoch": 78.0,
1030
+ "eval_accuracy": 0.5153846153846153,
1031
+ "eval_loss": 2.9131386280059814,
1032
+ "eval_runtime": 2.3865,
1033
+ "eval_samples_per_second": 54.473,
1034
+ "eval_steps_per_second": 0.838,
1035
  "step": 546
1036
  },
1037
  {
1038
  "epoch": 78.57,
1039
  "learning_rate": 4.2857142857142855e-06,
1040
+ "loss": 2.1075,
1041
  "step": 550
1042
  },
1043
  {
1044
  "epoch": 79.0,
1045
+ "eval_accuracy": 0.5230769230769231,
1046
+ "eval_loss": 2.907454252243042,
1047
+ "eval_runtime": 2.3713,
1048
+ "eval_samples_per_second": 54.823,
1049
+ "eval_steps_per_second": 0.843,
1050
  "step": 553
1051
  },
1052
  {
1053
  "epoch": 80.0,
1054
  "learning_rate": 4.000000000000001e-06,
1055
+ "loss": 2.1025,
1056
  "step": 560
1057
  },
1058
  {
1059
  "epoch": 80.0,
1060
+ "eval_accuracy": 0.5230769230769231,
1061
+ "eval_loss": 2.907280921936035,
1062
+ "eval_runtime": 2.3736,
1063
+ "eval_samples_per_second": 54.768,
1064
+ "eval_steps_per_second": 0.843,
1065
  "step": 560
1066
  },
1067
  {
1068
  "epoch": 81.0,
1069
+ "eval_accuracy": 0.5153846153846153,
1070
+ "eval_loss": 2.9174439907073975,
1071
+ "eval_runtime": 2.3602,
1072
+ "eval_samples_per_second": 55.079,
1073
+ "eval_steps_per_second": 0.847,
1074
  "step": 567
1075
  },
1076
  {
1077
  "epoch": 81.43,
1078
  "learning_rate": 3.7142857142857146e-06,
1079
+ "loss": 2.1031,
1080
  "step": 570
1081
  },
1082
  {
1083
  "epoch": 82.0,
1084
+ "eval_accuracy": 0.5307692307692308,
1085
+ "eval_loss": 2.913137197494507,
1086
+ "eval_runtime": 2.4712,
1087
+ "eval_samples_per_second": 52.605,
1088
+ "eval_steps_per_second": 0.809,
1089
  "step": 574
1090
  },
1091
  {
1092
  "epoch": 82.86,
1093
  "learning_rate": 3.428571428571429e-06,
1094
+ "loss": 2.0932,
1095
  "step": 580
1096
  },
1097
  {
1098
  "epoch": 83.0,
1099
+ "eval_accuracy": 0.5307692307692308,
1100
+ "eval_loss": 2.909245491027832,
1101
+ "eval_runtime": 2.3787,
1102
+ "eval_samples_per_second": 54.651,
1103
+ "eval_steps_per_second": 0.841,
1104
  "step": 581
1105
  },
1106
  {
1107
  "epoch": 84.0,
1108
+ "eval_accuracy": 0.5307692307692308,
1109
+ "eval_loss": 2.8977668285369873,
1110
+ "eval_runtime": 4.0301,
1111
+ "eval_samples_per_second": 32.257,
1112
+ "eval_steps_per_second": 0.496,
1113
  "step": 588
1114
  },
1115
  {
1116
  "epoch": 84.29,
1117
  "learning_rate": 3.142857142857143e-06,
1118
+ "loss": 2.0861,
1119
  "step": 590
1120
  },
1121
  {
1122
  "epoch": 85.0,
1123
+ "eval_accuracy": 0.5307692307692308,
1124
+ "eval_loss": 2.8871328830718994,
1125
+ "eval_runtime": 3.7091,
1126
+ "eval_samples_per_second": 35.049,
1127
+ "eval_steps_per_second": 0.539,
1128
  "step": 595
1129
  },
1130
  {
1131
  "epoch": 85.71,
1132
  "learning_rate": 2.8571428571428573e-06,
1133
+ "loss": 2.0478,
1134
  "step": 600
1135
  },
1136
  {
1137
  "epoch": 86.0,
1138
+ "eval_accuracy": 0.5384615384615384,
1139
+ "eval_loss": 2.882897138595581,
1140
+ "eval_runtime": 4.3893,
1141
+ "eval_samples_per_second": 29.617,
1142
+ "eval_steps_per_second": 0.456,
1143
  "step": 602
1144
  },
1145
  {
1146
  "epoch": 87.0,
1147
+ "eval_accuracy": 0.5461538461538461,
1148
+ "eval_loss": 2.880449056625366,
1149
+ "eval_runtime": 2.9082,
1150
+ "eval_samples_per_second": 44.701,
1151
+ "eval_steps_per_second": 0.688,
1152
  "step": 609
1153
  },
1154
  {
1155
  "epoch": 87.14,
1156
  "learning_rate": 2.571428571428571e-06,
1157
+ "loss": 2.0815,
1158
  "step": 610
1159
  },
1160
  {
1161
  "epoch": 88.0,
1162
+ "eval_accuracy": 0.5461538461538461,
1163
+ "eval_loss": 2.872459888458252,
1164
+ "eval_runtime": 2.8921,
1165
+ "eval_samples_per_second": 44.95,
1166
+ "eval_steps_per_second": 0.692,
1167
  "step": 616
1168
  },
1169
  {
1170
  "epoch": 88.57,
1171
  "learning_rate": 2.285714285714286e-06,
1172
+ "loss": 2.0756,
1173
  "step": 620
1174
  },
1175
  {
1176
  "epoch": 89.0,
1177
+ "eval_accuracy": 0.5461538461538461,
1178
+ "eval_loss": 2.8694419860839844,
1179
+ "eval_runtime": 3.9643,
1180
+ "eval_samples_per_second": 32.793,
1181
+ "eval_steps_per_second": 0.505,
1182
  "step": 623
1183
  },
1184
  {
1185
  "epoch": 90.0,
1186
  "learning_rate": 2.0000000000000003e-06,
1187
+ "loss": 2.065,
1188
  "step": 630
1189
  },
1190
  {
1191
  "epoch": 90.0,
1192
+ "eval_accuracy": 0.5461538461538461,
1193
+ "eval_loss": 2.866473913192749,
1194
+ "eval_runtime": 3.1035,
1195
+ "eval_samples_per_second": 41.888,
1196
+ "eval_steps_per_second": 0.644,
1197
  "step": 630
1198
  },
1199
  {
1200
  "epoch": 91.0,
1201
+ "eval_accuracy": 0.5461538461538461,
1202
+ "eval_loss": 2.861470937728882,
1203
+ "eval_runtime": 3.1207,
1204
+ "eval_samples_per_second": 41.657,
1205
+ "eval_steps_per_second": 0.641,
1206
  "step": 637
1207
  },
1208
  {
1209
  "epoch": 91.43,
1210
  "learning_rate": 1.7142857142857145e-06,
1211
+ "loss": 2.0572,
1212
  "step": 640
1213
  },
1214
  {
1215
  "epoch": 92.0,
1216
+ "eval_accuracy": 0.5461538461538461,
1217
+ "eval_loss": 2.859877347946167,
1218
+ "eval_runtime": 2.6425,
1219
+ "eval_samples_per_second": 49.197,
1220
+ "eval_steps_per_second": 0.757,
1221
  "step": 644
1222
  },
1223
  {
1224
  "epoch": 92.86,
1225
  "learning_rate": 1.4285714285714286e-06,
1226
+ "loss": 2.0358,
1227
  "step": 650
1228
  },
1229
  {
1230
  "epoch": 93.0,
1231
+ "eval_accuracy": 0.5461538461538461,
1232
+ "eval_loss": 2.8619918823242188,
1233
+ "eval_runtime": 2.4174,
1234
+ "eval_samples_per_second": 53.776,
1235
+ "eval_steps_per_second": 0.827,
1236
  "step": 651
1237
  },
1238
  {
1239
  "epoch": 94.0,
1240
+ "eval_accuracy": 0.5461538461538461,
1241
+ "eval_loss": 2.862945079803467,
1242
+ "eval_runtime": 2.4216,
1243
+ "eval_samples_per_second": 53.684,
1244
+ "eval_steps_per_second": 0.826,
1245
  "step": 658
1246
  },
1247
  {
1248
  "epoch": 94.29,
1249
  "learning_rate": 1.142857142857143e-06,
1250
+ "loss": 2.0663,
1251
  "step": 660
1252
  },
1253
  {
1254
  "epoch": 95.0,
1255
+ "eval_accuracy": 0.5538461538461539,
1256
+ "eval_loss": 2.862516403198242,
1257
+ "eval_runtime": 2.3749,
1258
+ "eval_samples_per_second": 54.739,
1259
+ "eval_steps_per_second": 0.842,
1260
  "step": 665
1261
  },
1262
  {
1263
  "epoch": 95.71,
1264
  "learning_rate": 8.571428571428572e-07,
1265
+ "loss": 2.0353,
1266
  "step": 670
1267
  },
1268
  {
1269
  "epoch": 96.0,
1270
+ "eval_accuracy": 0.5538461538461539,
1271
+ "eval_loss": 2.8627583980560303,
1272
+ "eval_runtime": 2.3527,
1273
+ "eval_samples_per_second": 55.257,
1274
+ "eval_steps_per_second": 0.85,
1275
  "step": 672
1276
  },
1277
  {
1278
  "epoch": 97.0,
1279
+ "eval_accuracy": 0.5538461538461539,
1280
+ "eval_loss": 2.8628671169281006,
1281
+ "eval_runtime": 2.5748,
1282
+ "eval_samples_per_second": 50.49,
1283
+ "eval_steps_per_second": 0.777,
1284
  "step": 679
1285
  },
1286
  {
1287
  "epoch": 97.14,
1288
  "learning_rate": 5.714285714285715e-07,
1289
+ "loss": 2.0506,
1290
  "step": 680
1291
  },
1292
  {
1293
  "epoch": 98.0,
1294
+ "eval_accuracy": 0.5538461538461539,
1295
+ "eval_loss": 2.862170457839966,
1296
+ "eval_runtime": 2.3987,
1297
+ "eval_samples_per_second": 54.196,
1298
+ "eval_steps_per_second": 0.834,
1299
  "step": 686
1300
  },
1301
  {
1302
  "epoch": 98.57,
1303
  "learning_rate": 2.8571428571428575e-07,
1304
+ "loss": 2.0494,
1305
  "step": 690
1306
  },
1307
  {
1308
  "epoch": 99.0,
1309
+ "eval_accuracy": 0.5538461538461539,
1310
+ "eval_loss": 2.8621606826782227,
1311
+ "eval_runtime": 2.676,
1312
+ "eval_samples_per_second": 48.58,
1313
+ "eval_steps_per_second": 0.747,
1314
  "step": 693
1315
  },
1316
  {
1317
  "epoch": 100.0,
1318
  "learning_rate": 0.0,
1319
+ "loss": 2.0566,
1320
  "step": 700
1321
  },
1322
  {
1323
  "epoch": 100.0,
1324
+ "eval_accuracy": 0.5538461538461539,
1325
+ "eval_loss": 2.862189292907715,
1326
+ "eval_runtime": 2.8132,
1327
+ "eval_samples_per_second": 46.211,
1328
+ "eval_steps_per_second": 0.711,
1329
  "step": 700
1330
  },
1331
  {
1332
  "epoch": 100.0,
1333
  "step": 700,
1334
+ "total_flos": 4.031389254057984e+18,
1335
+ "train_loss": 2.6316724504743303,
1336
+ "train_runtime": 4344.3701,
1337
+ "train_samples_per_second": 11.97,
1338
+ "train_steps_per_second": 0.161
1339
  }
1340
  ],
1341
  "logging_steps": 10,
 
1343
  "num_input_tokens_seen": 0,
1344
  "num_train_epochs": 100,
1345
  "save_steps": 500,
1346
+ "total_flos": 4.031389254057984e+18,
1347
  "train_batch_size": 80,
1348
  "trial_name": null,
1349
  "trial_params": null