farleyknight commited on
Commit
1632508
1 Parent(s): 53efc5b

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.9947777777777778,
4
- "eval_loss": 0.023610670119524002,
5
- "eval_runtime": 37.1335,
6
- "eval_samples_per_second": 242.369,
7
- "eval_steps_per_second": 30.296,
8
- "train_loss": 0.37173893965178845,
9
- "train_runtime": 3249.9087,
10
- "train_samples_per_second": 78.464,
11
- "train_steps_per_second": 9.808
12
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.8308823529411765,
4
+ "eval_loss": 0.689139723777771,
5
+ "eval_runtime": 1.7925,
6
+ "eval_samples_per_second": 227.609,
7
+ "eval_steps_per_second": 28.451,
8
+ "train_loss": 1.2808735091793495,
9
+ "train_runtime": 453.7657,
10
+ "train_samples_per_second": 25.443,
11
+ "train_steps_per_second": 3.184
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.9947777777777778,
4
- "eval_loss": 0.023610670119524002,
5
- "eval_runtime": 37.1335,
6
- "eval_samples_per_second": 242.369,
7
- "eval_steps_per_second": 30.296
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.8308823529411765,
4
+ "eval_loss": 0.689139723777771,
5
+ "eval_runtime": 1.7925,
6
+ "eval_samples_per_second": 227.609,
7
+ "eval_steps_per_second": 28.451
8
  }
runs/Aug24_02-01-57_psiguc7f9/events.out.tfevents.1661307090.psiguc7f9.1058.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08a88b208f077ff5531adf774fb7454f59f06938e494b358f96944181df7dcd7
3
+ size 363
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.37173893965178845,
4
- "train_runtime": 3249.9087,
5
- "train_samples_per_second": 78.464,
6
- "train_steps_per_second": 9.808
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 1.2808735091793495,
4
+ "train_runtime": 453.7657,
5
+ "train_samples_per_second": 25.443,
6
+ "train_steps_per_second": 3.184
7
  }
trainer_state.json CHANGED
@@ -1,1978 +1,154 @@
1
  {
2
- "best_metric": 0.023610670119524002,
3
- "best_model_checkpoint": "/storage/img-cls-data/mnist_outputs/checkpoint-31875",
4
  "epoch": 5.0,
5
- "global_step": 31875,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.02,
12
- "learning_rate": 1.9937254901960785e-05,
13
- "loss": 2.0426,
14
- "step": 100
15
- },
16
- {
17
- "epoch": 0.03,
18
- "learning_rate": 1.987450980392157e-05,
19
- "loss": 1.5416,
20
- "step": 200
21
- },
22
- {
23
- "epoch": 0.05,
24
- "learning_rate": 1.9811764705882354e-05,
25
- "loss": 1.2198,
26
- "step": 300
27
- },
28
- {
29
- "epoch": 0.06,
30
- "learning_rate": 1.9749019607843137e-05,
31
- "loss": 1.0002,
32
- "step": 400
33
- },
34
- {
35
- "epoch": 0.08,
36
- "learning_rate": 1.9686274509803924e-05,
37
- "loss": 0.9311,
38
- "step": 500
39
- },
40
- {
41
- "epoch": 0.09,
42
- "learning_rate": 1.9623529411764707e-05,
43
- "loss": 0.8639,
44
- "step": 600
45
- },
46
- {
47
- "epoch": 0.11,
48
- "learning_rate": 1.9560784313725493e-05,
49
- "loss": 0.7643,
50
- "step": 700
51
- },
52
- {
53
- "epoch": 0.13,
54
- "learning_rate": 1.9498039215686276e-05,
55
- "loss": 0.7039,
56
- "step": 800
57
- },
58
- {
59
- "epoch": 0.14,
60
- "learning_rate": 1.9435294117647063e-05,
61
- "loss": 0.6675,
62
- "step": 900
63
- },
64
- {
65
- "epoch": 0.16,
66
- "learning_rate": 1.9372549019607846e-05,
67
- "loss": 0.6378,
68
- "step": 1000
69
- },
70
- {
71
- "epoch": 0.17,
72
- "learning_rate": 1.930980392156863e-05,
73
- "loss": 0.6793,
74
- "step": 1100
75
- },
76
- {
77
- "epoch": 0.19,
78
- "learning_rate": 1.9247058823529415e-05,
79
- "loss": 0.5975,
80
- "step": 1200
81
- },
82
- {
83
- "epoch": 0.2,
84
- "learning_rate": 1.9184313725490198e-05,
85
- "loss": 0.6072,
86
- "step": 1300
87
- },
88
- {
89
- "epoch": 0.22,
90
- "learning_rate": 1.912156862745098e-05,
91
- "loss": 0.6145,
92
- "step": 1400
93
- },
94
- {
95
- "epoch": 0.24,
96
- "learning_rate": 1.9058823529411764e-05,
97
- "loss": 0.5963,
98
- "step": 1500
99
- },
100
- {
101
- "epoch": 0.25,
102
- "learning_rate": 1.899607843137255e-05,
103
- "loss": 0.5764,
104
- "step": 1600
105
- },
106
- {
107
- "epoch": 0.27,
108
- "learning_rate": 1.8933333333333334e-05,
109
- "loss": 0.5801,
110
- "step": 1700
111
- },
112
- {
113
- "epoch": 0.28,
114
- "learning_rate": 1.8870588235294117e-05,
115
- "loss": 0.5679,
116
- "step": 1800
117
- },
118
- {
119
- "epoch": 0.3,
120
- "learning_rate": 1.8807843137254903e-05,
121
- "loss": 0.5489,
122
- "step": 1900
123
- },
124
- {
125
- "epoch": 0.31,
126
- "learning_rate": 1.8745098039215686e-05,
127
- "loss": 0.5423,
128
- "step": 2000
129
- },
130
- {
131
- "epoch": 0.33,
132
- "learning_rate": 1.8682352941176473e-05,
133
- "loss": 0.5429,
134
- "step": 2100
135
- },
136
  {
137
  "epoch": 0.35,
138
- "learning_rate": 1.8619607843137256e-05,
139
- "loss": 0.502,
140
- "step": 2200
141
- },
142
- {
143
- "epoch": 0.36,
144
- "learning_rate": 1.8556862745098042e-05,
145
- "loss": 0.4131,
146
- "step": 2300
147
- },
148
- {
149
- "epoch": 0.38,
150
- "learning_rate": 1.8494117647058825e-05,
151
- "loss": 0.4839,
152
- "step": 2400
153
- },
154
- {
155
- "epoch": 0.39,
156
- "learning_rate": 1.843137254901961e-05,
157
- "loss": 0.515,
158
- "step": 2500
159
- },
160
- {
161
- "epoch": 0.41,
162
- "learning_rate": 1.8368627450980395e-05,
163
- "loss": 0.4417,
164
- "step": 2600
165
- },
166
- {
167
- "epoch": 0.42,
168
- "learning_rate": 1.8305882352941178e-05,
169
- "loss": 0.4844,
170
- "step": 2700
171
- },
172
- {
173
- "epoch": 0.44,
174
- "learning_rate": 1.824313725490196e-05,
175
- "loss": 0.4979,
176
- "step": 2800
177
- },
178
- {
179
- "epoch": 0.45,
180
- "learning_rate": 1.8180392156862747e-05,
181
- "loss": 0.4589,
182
- "step": 2900
183
- },
184
- {
185
- "epoch": 0.47,
186
- "learning_rate": 1.811764705882353e-05,
187
- "loss": 0.4334,
188
- "step": 3000
189
- },
190
- {
191
- "epoch": 0.49,
192
- "learning_rate": 1.8054901960784314e-05,
193
- "loss": 0.512,
194
- "step": 3100
195
- },
196
- {
197
- "epoch": 0.5,
198
- "learning_rate": 1.79921568627451e-05,
199
- "loss": 0.4219,
200
- "step": 3200
201
- },
202
- {
203
- "epoch": 0.52,
204
- "learning_rate": 1.7929411764705883e-05,
205
- "loss": 0.4288,
206
- "step": 3300
207
- },
208
- {
209
- "epoch": 0.53,
210
- "learning_rate": 1.7866666666666666e-05,
211
- "loss": 0.4818,
212
- "step": 3400
213
- },
214
- {
215
- "epoch": 0.55,
216
- "learning_rate": 1.7803921568627453e-05,
217
- "loss": 0.427,
218
- "step": 3500
219
- },
220
- {
221
- "epoch": 0.56,
222
- "learning_rate": 1.7741176470588236e-05,
223
- "loss": 0.4187,
224
- "step": 3600
225
- },
226
- {
227
- "epoch": 0.58,
228
- "learning_rate": 1.7678431372549022e-05,
229
- "loss": 0.4482,
230
- "step": 3700
231
- },
232
- {
233
- "epoch": 0.6,
234
- "learning_rate": 1.7615686274509805e-05,
235
- "loss": 0.4352,
236
- "step": 3800
237
- },
238
- {
239
- "epoch": 0.61,
240
- "learning_rate": 1.755294117647059e-05,
241
- "loss": 0.5184,
242
- "step": 3900
243
- },
244
- {
245
- "epoch": 0.63,
246
- "learning_rate": 1.7490196078431375e-05,
247
- "loss": 0.3797,
248
- "step": 4000
249
- },
250
- {
251
- "epoch": 0.64,
252
- "learning_rate": 1.7427450980392158e-05,
253
- "loss": 0.4232,
254
- "step": 4100
255
- },
256
- {
257
- "epoch": 0.66,
258
- "learning_rate": 1.7364705882352944e-05,
259
- "loss": 0.4047,
260
- "step": 4200
261
- },
262
- {
263
- "epoch": 0.67,
264
- "learning_rate": 1.7301960784313727e-05,
265
- "loss": 0.4213,
266
- "step": 4300
267
  },
268
  {
269
  "epoch": 0.69,
270
- "learning_rate": 1.723921568627451e-05,
271
- "loss": 0.3669,
272
- "step": 4400
273
- },
274
- {
275
- "epoch": 0.71,
276
- "learning_rate": 1.7176470588235293e-05,
277
- "loss": 0.4331,
278
- "step": 4500
279
- },
280
- {
281
- "epoch": 0.72,
282
- "learning_rate": 1.711372549019608e-05,
283
- "loss": 0.4496,
284
- "step": 4600
285
- },
286
- {
287
- "epoch": 0.74,
288
- "learning_rate": 1.7050980392156863e-05,
289
- "loss": 0.4153,
290
- "step": 4700
291
- },
292
- {
293
- "epoch": 0.75,
294
- "learning_rate": 1.698823529411765e-05,
295
- "loss": 0.4602,
296
- "step": 4800
297
- },
298
- {
299
- "epoch": 0.77,
300
- "learning_rate": 1.6925490196078432e-05,
301
- "loss": 0.4452,
302
- "step": 4900
303
- },
304
- {
305
- "epoch": 0.78,
306
- "learning_rate": 1.686274509803922e-05,
307
- "loss": 0.3844,
308
- "step": 5000
309
- },
310
- {
311
- "epoch": 0.8,
312
- "learning_rate": 1.6800000000000002e-05,
313
- "loss": 0.4109,
314
- "step": 5100
315
- },
316
- {
317
- "epoch": 0.82,
318
- "learning_rate": 1.6737254901960788e-05,
319
- "loss": 0.4972,
320
- "step": 5200
321
- },
322
- {
323
- "epoch": 0.83,
324
- "learning_rate": 1.667450980392157e-05,
325
- "loss": 0.3655,
326
- "step": 5300
327
- },
328
- {
329
- "epoch": 0.85,
330
- "learning_rate": 1.6611764705882354e-05,
331
- "loss": 0.4499,
332
- "step": 5400
333
- },
334
- {
335
- "epoch": 0.86,
336
- "learning_rate": 1.6549019607843137e-05,
337
- "loss": 0.4042,
338
- "step": 5500
339
- },
340
- {
341
- "epoch": 0.88,
342
- "learning_rate": 1.6486274509803924e-05,
343
- "loss": 0.4236,
344
- "step": 5600
345
- },
346
- {
347
- "epoch": 0.89,
348
- "learning_rate": 1.6423529411764707e-05,
349
- "loss": 0.4216,
350
- "step": 5700
351
- },
352
- {
353
- "epoch": 0.91,
354
- "learning_rate": 1.636078431372549e-05,
355
- "loss": 0.4134,
356
- "step": 5800
357
- },
358
- {
359
- "epoch": 0.93,
360
- "learning_rate": 1.6298039215686276e-05,
361
- "loss": 0.3641,
362
- "step": 5900
363
- },
364
- {
365
- "epoch": 0.94,
366
- "learning_rate": 1.623529411764706e-05,
367
- "loss": 0.4342,
368
- "step": 6000
369
- },
370
- {
371
- "epoch": 0.96,
372
- "learning_rate": 1.6172549019607842e-05,
373
- "loss": 0.4604,
374
- "step": 6100
375
- },
376
- {
377
- "epoch": 0.97,
378
- "learning_rate": 1.610980392156863e-05,
379
- "loss": 0.4144,
380
- "step": 6200
381
- },
382
- {
383
- "epoch": 0.99,
384
- "learning_rate": 1.6047058823529412e-05,
385
- "loss": 0.379,
386
- "step": 6300
387
- },
388
- {
389
- "epoch": 1.0,
390
- "eval_accuracy": 0.9895555555555555,
391
- "eval_loss": 0.0506252758204937,
392
- "eval_runtime": 37.8747,
393
- "eval_samples_per_second": 237.626,
394
- "eval_steps_per_second": 29.703,
395
- "step": 6375
396
  },
397
  {
398
  "epoch": 1.0,
399
- "learning_rate": 1.59843137254902e-05,
400
- "loss": 0.3759,
401
- "step": 6400
402
- },
403
- {
404
- "epoch": 1.02,
405
- "learning_rate": 1.592156862745098e-05,
406
- "loss": 0.4208,
407
- "step": 6500
408
  },
409
  {
410
  "epoch": 1.04,
411
- "learning_rate": 1.5858823529411768e-05,
412
- "loss": 0.3947,
413
- "step": 6600
414
- },
415
- {
416
- "epoch": 1.05,
417
- "learning_rate": 1.579607843137255e-05,
418
- "loss": 0.3622,
419
- "step": 6700
420
- },
421
- {
422
- "epoch": 1.07,
423
- "learning_rate": 1.5733333333333334e-05,
424
- "loss": 0.4034,
425
- "step": 6800
426
- },
427
- {
428
- "epoch": 1.08,
429
- "learning_rate": 1.567058823529412e-05,
430
- "loss": 0.3489,
431
- "step": 6900
432
- },
433
- {
434
- "epoch": 1.1,
435
- "learning_rate": 1.5607843137254904e-05,
436
- "loss": 0.3794,
437
- "step": 7000
438
- },
439
- {
440
- "epoch": 1.11,
441
- "learning_rate": 1.5545098039215687e-05,
442
- "loss": 0.3644,
443
- "step": 7100
444
- },
445
- {
446
- "epoch": 1.13,
447
- "learning_rate": 1.5482352941176473e-05,
448
- "loss": 0.5059,
449
- "step": 7200
450
- },
451
- {
452
- "epoch": 1.15,
453
- "learning_rate": 1.5419607843137256e-05,
454
- "loss": 0.348,
455
- "step": 7300
456
- },
457
- {
458
- "epoch": 1.16,
459
- "learning_rate": 1.535686274509804e-05,
460
- "loss": 0.402,
461
- "step": 7400
462
- },
463
- {
464
- "epoch": 1.18,
465
- "learning_rate": 1.5294117647058822e-05,
466
- "loss": 0.3457,
467
- "step": 7500
468
- },
469
- {
470
- "epoch": 1.19,
471
- "learning_rate": 1.523137254901961e-05,
472
- "loss": 0.3608,
473
- "step": 7600
474
- },
475
- {
476
- "epoch": 1.21,
477
- "learning_rate": 1.5168627450980393e-05,
478
- "loss": 0.3808,
479
- "step": 7700
480
- },
481
- {
482
- "epoch": 1.22,
483
- "learning_rate": 1.5105882352941176e-05,
484
- "loss": 0.3887,
485
- "step": 7800
486
- },
487
- {
488
- "epoch": 1.24,
489
- "learning_rate": 1.5043137254901963e-05,
490
- "loss": 0.354,
491
- "step": 7900
492
- },
493
- {
494
- "epoch": 1.25,
495
- "learning_rate": 1.4980392156862746e-05,
496
- "loss": 0.3918,
497
- "step": 8000
498
- },
499
- {
500
- "epoch": 1.27,
501
- "learning_rate": 1.491764705882353e-05,
502
- "loss": 0.3946,
503
- "step": 8100
504
- },
505
- {
506
- "epoch": 1.29,
507
- "learning_rate": 1.4854901960784315e-05,
508
- "loss": 0.3348,
509
- "step": 8200
510
- },
511
- {
512
- "epoch": 1.3,
513
- "learning_rate": 1.47921568627451e-05,
514
- "loss": 0.3523,
515
- "step": 8300
516
- },
517
- {
518
- "epoch": 1.32,
519
- "learning_rate": 1.4729411764705883e-05,
520
- "loss": 0.4209,
521
- "step": 8400
522
- },
523
- {
524
- "epoch": 1.33,
525
- "learning_rate": 1.4666666666666666e-05,
526
- "loss": 0.4297,
527
- "step": 8500
528
- },
529
- {
530
- "epoch": 1.35,
531
- "learning_rate": 1.4603921568627453e-05,
532
- "loss": 0.319,
533
- "step": 8600
534
- },
535
- {
536
- "epoch": 1.36,
537
- "learning_rate": 1.4541176470588236e-05,
538
- "loss": 0.367,
539
- "step": 8700
540
  },
541
  {
542
  "epoch": 1.38,
543
- "learning_rate": 1.447843137254902e-05,
544
- "loss": 0.4016,
545
- "step": 8800
546
- },
547
- {
548
- "epoch": 1.4,
549
- "learning_rate": 1.4415686274509805e-05,
550
- "loss": 0.3831,
551
- "step": 8900
552
- },
553
- {
554
- "epoch": 1.41,
555
- "learning_rate": 1.435294117647059e-05,
556
- "loss": 0.4467,
557
- "step": 9000
558
- },
559
- {
560
- "epoch": 1.43,
561
- "learning_rate": 1.4290196078431373e-05,
562
- "loss": 0.3631,
563
- "step": 9100
564
- },
565
- {
566
- "epoch": 1.44,
567
- "learning_rate": 1.422745098039216e-05,
568
- "loss": 0.3893,
569
- "step": 9200
570
- },
571
- {
572
- "epoch": 1.46,
573
- "learning_rate": 1.4164705882352943e-05,
574
- "loss": 0.3574,
575
- "step": 9300
576
- },
577
- {
578
- "epoch": 1.47,
579
- "learning_rate": 1.4101960784313726e-05,
580
- "loss": 0.3498,
581
- "step": 9400
582
- },
583
- {
584
- "epoch": 1.49,
585
- "learning_rate": 1.403921568627451e-05,
586
- "loss": 0.3911,
587
- "step": 9500
588
- },
589
- {
590
- "epoch": 1.51,
591
- "learning_rate": 1.3976470588235295e-05,
592
- "loss": 0.3566,
593
- "step": 9600
594
- },
595
- {
596
- "epoch": 1.52,
597
- "learning_rate": 1.391372549019608e-05,
598
- "loss": 0.3766,
599
- "step": 9700
600
- },
601
- {
602
- "epoch": 1.54,
603
- "learning_rate": 1.3850980392156863e-05,
604
- "loss": 0.394,
605
- "step": 9800
606
- },
607
- {
608
- "epoch": 1.55,
609
- "learning_rate": 1.378823529411765e-05,
610
- "loss": 0.3985,
611
- "step": 9900
612
- },
613
- {
614
- "epoch": 1.57,
615
- "learning_rate": 1.3725490196078432e-05,
616
- "loss": 0.3957,
617
- "step": 10000
618
- },
619
- {
620
- "epoch": 1.58,
621
- "learning_rate": 1.3662745098039215e-05,
622
- "loss": 0.3097,
623
- "step": 10100
624
- },
625
- {
626
- "epoch": 1.6,
627
- "learning_rate": 1.3600000000000002e-05,
628
- "loss": 0.346,
629
- "step": 10200
630
- },
631
- {
632
- "epoch": 1.62,
633
- "learning_rate": 1.3537254901960785e-05,
634
- "loss": 0.3856,
635
- "step": 10300
636
- },
637
- {
638
- "epoch": 1.63,
639
- "learning_rate": 1.347450980392157e-05,
640
- "loss": 0.3609,
641
- "step": 10400
642
- },
643
- {
644
- "epoch": 1.65,
645
- "learning_rate": 1.3411764705882353e-05,
646
- "loss": 0.3631,
647
- "step": 10500
648
- },
649
- {
650
- "epoch": 1.66,
651
- "learning_rate": 1.334901960784314e-05,
652
- "loss": 0.3309,
653
- "step": 10600
654
- },
655
- {
656
- "epoch": 1.68,
657
- "learning_rate": 1.3286274509803922e-05,
658
- "loss": 0.4049,
659
- "step": 10700
660
- },
661
- {
662
- "epoch": 1.69,
663
- "learning_rate": 1.3223529411764705e-05,
664
- "loss": 0.3737,
665
- "step": 10800
666
- },
667
- {
668
- "epoch": 1.71,
669
- "learning_rate": 1.3160784313725492e-05,
670
- "loss": 0.358,
671
- "step": 10900
672
  },
673
  {
674
  "epoch": 1.73,
675
- "learning_rate": 1.3098039215686275e-05,
676
- "loss": 0.313,
677
- "step": 11000
678
- },
679
- {
680
- "epoch": 1.74,
681
- "learning_rate": 1.303529411764706e-05,
682
- "loss": 0.3648,
683
- "step": 11100
684
- },
685
- {
686
- "epoch": 1.76,
687
- "learning_rate": 1.2972549019607846e-05,
688
- "loss": 0.3757,
689
- "step": 11200
690
- },
691
- {
692
- "epoch": 1.77,
693
- "learning_rate": 1.2909803921568629e-05,
694
- "loss": 0.321,
695
- "step": 11300
696
- },
697
- {
698
- "epoch": 1.79,
699
- "learning_rate": 1.2847058823529412e-05,
700
- "loss": 0.385,
701
- "step": 11400
702
- },
703
- {
704
- "epoch": 1.8,
705
- "learning_rate": 1.2784313725490197e-05,
706
- "loss": 0.3515,
707
- "step": 11500
708
  },
709
  {
710
- "epoch": 1.82,
711
- "learning_rate": 1.2721568627450982e-05,
712
- "loss": 0.3449,
713
- "step": 11600
 
 
 
714
  },
715
  {
716
- "epoch": 1.84,
717
- "learning_rate": 1.2658823529411766e-05,
718
- "loss": 0.3392,
719
- "step": 11700
720
  },
721
  {
722
- "epoch": 1.85,
723
- "learning_rate": 1.259607843137255e-05,
724
- "loss": 0.3581,
725
- "step": 11800
726
  },
727
  {
728
- "epoch": 1.87,
729
- "learning_rate": 1.2533333333333336e-05,
730
- "loss": 0.3761,
731
- "step": 11900
732
  },
733
  {
734
- "epoch": 1.88,
735
- "learning_rate": 1.2470588235294119e-05,
736
- "loss": 0.3392,
737
- "step": 12000
 
 
 
738
  },
739
  {
740
- "epoch": 1.9,
741
- "learning_rate": 1.2407843137254902e-05,
742
- "loss": 0.3809,
743
- "step": 12100
744
  },
745
  {
746
- "epoch": 1.91,
747
- "learning_rate": 1.2345098039215688e-05,
748
- "loss": 0.3388,
749
- "step": 12200
750
  },
751
  {
752
- "epoch": 1.93,
753
- "learning_rate": 1.2282352941176471e-05,
754
- "loss": 0.3665,
755
- "step": 12300
756
  },
757
  {
758
- "epoch": 1.95,
759
- "learning_rate": 1.2219607843137256e-05,
760
- "loss": 0.3331,
761
- "step": 12400
 
 
 
762
  },
763
  {
764
- "epoch": 1.96,
765
- "learning_rate": 1.215686274509804e-05,
766
- "loss": 0.3819,
767
- "step": 12500
768
  },
769
  {
770
- "epoch": 1.98,
771
- "learning_rate": 1.2094117647058826e-05,
772
- "loss": 0.3106,
773
- "step": 12600
774
  },
775
  {
776
- "epoch": 1.99,
777
- "learning_rate": 1.2031372549019609e-05,
778
- "loss": 0.3384,
779
- "step": 12700
780
- },
781
- {
782
- "epoch": 2.0,
783
- "eval_accuracy": 0.9905555555555555,
784
- "eval_loss": 0.03622434660792351,
785
- "eval_runtime": 36.0542,
786
- "eval_samples_per_second": 249.624,
787
- "eval_steps_per_second": 31.203,
788
- "step": 12750
789
- },
790
- {
791
- "epoch": 2.01,
792
- "learning_rate": 1.1968627450980392e-05,
793
- "loss": 0.3693,
794
- "step": 12800
795
- },
796
- {
797
- "epoch": 2.02,
798
- "learning_rate": 1.1905882352941178e-05,
799
- "loss": 0.3453,
800
- "step": 12900
801
- },
802
- {
803
- "epoch": 2.04,
804
- "learning_rate": 1.1843137254901961e-05,
805
- "loss": 0.3708,
806
- "step": 13000
807
- },
808
- {
809
- "epoch": 2.05,
810
- "learning_rate": 1.1780392156862746e-05,
811
- "loss": 0.3384,
812
- "step": 13100
813
- },
814
- {
815
- "epoch": 2.07,
816
- "learning_rate": 1.171764705882353e-05,
817
- "loss": 0.3415,
818
- "step": 13200
819
- },
820
- {
821
- "epoch": 2.09,
822
- "learning_rate": 1.1654901960784316e-05,
823
- "loss": 0.3503,
824
- "step": 13300
825
- },
826
- {
827
- "epoch": 2.1,
828
- "learning_rate": 1.1592156862745099e-05,
829
- "loss": 0.3107,
830
- "step": 13400
831
- },
832
- {
833
- "epoch": 2.12,
834
- "learning_rate": 1.1529411764705882e-05,
835
- "loss": 0.3516,
836
- "step": 13500
837
- },
838
- {
839
- "epoch": 2.13,
840
- "learning_rate": 1.1466666666666668e-05,
841
- "loss": 0.3555,
842
- "step": 13600
843
- },
844
- {
845
- "epoch": 2.15,
846
- "learning_rate": 1.1403921568627451e-05,
847
- "loss": 0.361,
848
- "step": 13700
849
- },
850
- {
851
- "epoch": 2.16,
852
- "learning_rate": 1.1341176470588236e-05,
853
- "loss": 0.3284,
854
- "step": 13800
855
- },
856
- {
857
- "epoch": 2.18,
858
- "learning_rate": 1.127843137254902e-05,
859
- "loss": 0.3398,
860
- "step": 13900
861
- },
862
- {
863
- "epoch": 2.2,
864
- "learning_rate": 1.1215686274509805e-05,
865
- "loss": 0.3075,
866
- "step": 14000
867
- },
868
- {
869
- "epoch": 2.21,
870
- "learning_rate": 1.1152941176470588e-05,
871
- "loss": 0.2937,
872
- "step": 14100
873
- },
874
- {
875
- "epoch": 2.23,
876
- "learning_rate": 1.1090196078431375e-05,
877
- "loss": 0.2807,
878
- "step": 14200
879
- },
880
- {
881
- "epoch": 2.24,
882
- "learning_rate": 1.1027450980392158e-05,
883
- "loss": 0.3763,
884
- "step": 14300
885
- },
886
- {
887
- "epoch": 2.26,
888
- "learning_rate": 1.0964705882352941e-05,
889
- "loss": 0.2695,
890
- "step": 14400
891
- },
892
- {
893
- "epoch": 2.27,
894
- "learning_rate": 1.0901960784313726e-05,
895
- "loss": 0.379,
896
- "step": 14500
897
- },
898
- {
899
- "epoch": 2.29,
900
- "learning_rate": 1.083921568627451e-05,
901
- "loss": 0.3548,
902
- "step": 14600
903
- },
904
- {
905
- "epoch": 2.31,
906
- "learning_rate": 1.0776470588235295e-05,
907
- "loss": 0.3215,
908
- "step": 14700
909
- },
910
- {
911
- "epoch": 2.32,
912
- "learning_rate": 1.0713725490196078e-05,
913
- "loss": 0.3178,
914
- "step": 14800
915
- },
916
- {
917
- "epoch": 2.34,
918
- "learning_rate": 1.0650980392156865e-05,
919
- "loss": 0.3556,
920
- "step": 14900
921
- },
922
- {
923
- "epoch": 2.35,
924
- "learning_rate": 1.0588235294117648e-05,
925
- "loss": 0.3235,
926
- "step": 15000
927
- },
928
- {
929
- "epoch": 2.37,
930
- "learning_rate": 1.0525490196078431e-05,
931
- "loss": 0.3363,
932
- "step": 15100
933
- },
934
- {
935
- "epoch": 2.38,
936
- "learning_rate": 1.0462745098039217e-05,
937
- "loss": 0.3251,
938
- "step": 15200
939
- },
940
- {
941
- "epoch": 2.4,
942
- "learning_rate": 1.04e-05,
943
- "loss": 0.3177,
944
- "step": 15300
945
- },
946
- {
947
- "epoch": 2.42,
948
- "learning_rate": 1.0337254901960785e-05,
949
- "loss": 0.3387,
950
- "step": 15400
951
- },
952
- {
953
- "epoch": 2.43,
954
- "learning_rate": 1.0274509803921568e-05,
955
- "loss": 0.3551,
956
- "step": 15500
957
- },
958
- {
959
- "epoch": 2.45,
960
- "learning_rate": 1.0211764705882355e-05,
961
- "loss": 0.3471,
962
- "step": 15600
963
- },
964
- {
965
- "epoch": 2.46,
966
- "learning_rate": 1.0149019607843138e-05,
967
- "loss": 0.3212,
968
- "step": 15700
969
- },
970
- {
971
- "epoch": 2.48,
972
- "learning_rate": 1.0086274509803922e-05,
973
- "loss": 0.3222,
974
- "step": 15800
975
- },
976
- {
977
- "epoch": 2.49,
978
- "learning_rate": 1.0023529411764707e-05,
979
- "loss": 0.4107,
980
- "step": 15900
981
- },
982
- {
983
- "epoch": 2.51,
984
- "learning_rate": 9.960784313725492e-06,
985
- "loss": 0.2782,
986
- "step": 16000
987
- },
988
- {
989
- "epoch": 2.53,
990
- "learning_rate": 9.898039215686275e-06,
991
- "loss": 0.3176,
992
- "step": 16100
993
- },
994
- {
995
- "epoch": 2.54,
996
- "learning_rate": 9.83529411764706e-06,
997
- "loss": 0.3233,
998
- "step": 16200
999
- },
1000
- {
1001
- "epoch": 2.56,
1002
- "learning_rate": 9.772549019607844e-06,
1003
- "loss": 0.3402,
1004
- "step": 16300
1005
- },
1006
- {
1007
- "epoch": 2.57,
1008
- "learning_rate": 9.709803921568628e-06,
1009
- "loss": 0.3766,
1010
- "step": 16400
1011
- },
1012
- {
1013
- "epoch": 2.59,
1014
- "learning_rate": 9.647058823529412e-06,
1015
- "loss": 0.266,
1016
- "step": 16500
1017
- },
1018
- {
1019
- "epoch": 2.6,
1020
- "learning_rate": 9.584313725490197e-06,
1021
- "loss": 0.2861,
1022
- "step": 16600
1023
- },
1024
- {
1025
- "epoch": 2.62,
1026
- "learning_rate": 9.521568627450982e-06,
1027
- "loss": 0.3104,
1028
- "step": 16700
1029
- },
1030
- {
1031
- "epoch": 2.64,
1032
- "learning_rate": 9.458823529411767e-06,
1033
- "loss": 0.3355,
1034
- "step": 16800
1035
- },
1036
- {
1037
- "epoch": 2.65,
1038
- "learning_rate": 9.39607843137255e-06,
1039
- "loss": 0.3552,
1040
- "step": 16900
1041
- },
1042
- {
1043
- "epoch": 2.67,
1044
- "learning_rate": 9.333333333333334e-06,
1045
- "loss": 0.2709,
1046
- "step": 17000
1047
- },
1048
- {
1049
- "epoch": 2.68,
1050
- "learning_rate": 9.270588235294117e-06,
1051
- "loss": 0.3766,
1052
- "step": 17100
1053
- },
1054
- {
1055
- "epoch": 2.7,
1056
- "learning_rate": 9.207843137254902e-06,
1057
- "loss": 0.3349,
1058
- "step": 17200
1059
- },
1060
- {
1061
- "epoch": 2.71,
1062
- "learning_rate": 9.145098039215687e-06,
1063
- "loss": 0.3093,
1064
- "step": 17300
1065
- },
1066
- {
1067
- "epoch": 2.73,
1068
- "learning_rate": 9.082352941176472e-06,
1069
- "loss": 0.3151,
1070
- "step": 17400
1071
- },
1072
- {
1073
- "epoch": 2.75,
1074
- "learning_rate": 9.019607843137256e-06,
1075
- "loss": 0.3199,
1076
- "step": 17500
1077
- },
1078
- {
1079
- "epoch": 2.76,
1080
- "learning_rate": 8.95686274509804e-06,
1081
- "loss": 0.3569,
1082
- "step": 17600
1083
- },
1084
- {
1085
- "epoch": 2.78,
1086
- "learning_rate": 8.894117647058824e-06,
1087
- "loss": 0.3279,
1088
- "step": 17700
1089
- },
1090
- {
1091
- "epoch": 2.79,
1092
- "learning_rate": 8.831372549019609e-06,
1093
- "loss": 0.4161,
1094
- "step": 17800
1095
- },
1096
- {
1097
- "epoch": 2.81,
1098
- "learning_rate": 8.768627450980392e-06,
1099
- "loss": 0.3488,
1100
- "step": 17900
1101
- },
1102
- {
1103
- "epoch": 2.82,
1104
- "learning_rate": 8.705882352941177e-06,
1105
- "loss": 0.4203,
1106
- "step": 18000
1107
- },
1108
- {
1109
- "epoch": 2.84,
1110
- "learning_rate": 8.643137254901961e-06,
1111
- "loss": 0.3567,
1112
- "step": 18100
1113
- },
1114
- {
1115
- "epoch": 2.85,
1116
- "learning_rate": 8.580392156862746e-06,
1117
- "loss": 0.3649,
1118
- "step": 18200
1119
- },
1120
- {
1121
- "epoch": 2.87,
1122
- "learning_rate": 8.517647058823531e-06,
1123
- "loss": 0.2921,
1124
- "step": 18300
1125
- },
1126
- {
1127
- "epoch": 2.89,
1128
- "learning_rate": 8.454901960784314e-06,
1129
- "loss": 0.326,
1130
- "step": 18400
1131
- },
1132
- {
1133
- "epoch": 2.9,
1134
- "learning_rate": 8.392156862745099e-06,
1135
- "loss": 0.3584,
1136
- "step": 18500
1137
- },
1138
- {
1139
- "epoch": 2.92,
1140
- "learning_rate": 8.329411764705882e-06,
1141
- "loss": 0.3613,
1142
- "step": 18600
1143
- },
1144
- {
1145
- "epoch": 2.93,
1146
- "learning_rate": 8.266666666666667e-06,
1147
- "loss": 0.3316,
1148
- "step": 18700
1149
- },
1150
- {
1151
- "epoch": 2.95,
1152
- "learning_rate": 8.203921568627451e-06,
1153
- "loss": 0.3124,
1154
- "step": 18800
1155
- },
1156
- {
1157
- "epoch": 2.96,
1158
- "learning_rate": 8.141176470588236e-06,
1159
- "loss": 0.3319,
1160
- "step": 18900
1161
- },
1162
- {
1163
- "epoch": 2.98,
1164
- "learning_rate": 8.07843137254902e-06,
1165
- "loss": 0.3028,
1166
- "step": 19000
1167
- },
1168
- {
1169
- "epoch": 3.0,
1170
- "learning_rate": 8.015686274509804e-06,
1171
- "loss": 0.3605,
1172
- "step": 19100
1173
- },
1174
- {
1175
- "epoch": 3.0,
1176
- "eval_accuracy": 0.9923333333333333,
1177
- "eval_loss": 0.03128869831562042,
1178
- "eval_runtime": 35.5438,
1179
- "eval_samples_per_second": 253.208,
1180
- "eval_steps_per_second": 31.651,
1181
- "step": 19125
1182
- },
1183
- {
1184
- "epoch": 3.01,
1185
- "learning_rate": 7.952941176470589e-06,
1186
- "loss": 0.3341,
1187
- "step": 19200
1188
- },
1189
- {
1190
- "epoch": 3.03,
1191
- "learning_rate": 7.890196078431373e-06,
1192
- "loss": 0.3268,
1193
- "step": 19300
1194
- },
1195
- {
1196
- "epoch": 3.04,
1197
- "learning_rate": 7.827450980392156e-06,
1198
- "loss": 0.3216,
1199
- "step": 19400
1200
- },
1201
- {
1202
- "epoch": 3.06,
1203
- "learning_rate": 7.764705882352941e-06,
1204
- "loss": 0.3544,
1205
- "step": 19500
1206
- },
1207
- {
1208
- "epoch": 3.07,
1209
- "learning_rate": 7.701960784313726e-06,
1210
- "loss": 0.288,
1211
- "step": 19600
1212
- },
1213
- {
1214
- "epoch": 3.09,
1215
- "learning_rate": 7.63921568627451e-06,
1216
- "loss": 0.3547,
1217
- "step": 19700
1218
- },
1219
- {
1220
- "epoch": 3.11,
1221
- "learning_rate": 7.576470588235295e-06,
1222
- "loss": 0.2793,
1223
- "step": 19800
1224
- },
1225
- {
1226
- "epoch": 3.12,
1227
- "learning_rate": 7.5137254901960785e-06,
1228
- "loss": 0.3052,
1229
- "step": 19900
1230
- },
1231
- {
1232
- "epoch": 3.14,
1233
- "learning_rate": 7.450980392156863e-06,
1234
- "loss": 0.2737,
1235
- "step": 20000
1236
- },
1237
- {
1238
- "epoch": 3.15,
1239
- "learning_rate": 7.388235294117647e-06,
1240
- "loss": 0.2816,
1241
- "step": 20100
1242
- },
1243
- {
1244
- "epoch": 3.17,
1245
- "learning_rate": 7.325490196078432e-06,
1246
- "loss": 0.2758,
1247
- "step": 20200
1248
- },
1249
- {
1250
- "epoch": 3.18,
1251
- "learning_rate": 7.262745098039217e-06,
1252
- "loss": 0.3412,
1253
- "step": 20300
1254
- },
1255
- {
1256
- "epoch": 3.2,
1257
- "learning_rate": 7.2000000000000005e-06,
1258
- "loss": 0.3241,
1259
- "step": 20400
1260
- },
1261
- {
1262
- "epoch": 3.22,
1263
- "learning_rate": 7.137254901960785e-06,
1264
- "loss": 0.3032,
1265
- "step": 20500
1266
- },
1267
- {
1268
- "epoch": 3.23,
1269
- "learning_rate": 7.074509803921568e-06,
1270
- "loss": 0.3231,
1271
- "step": 20600
1272
- },
1273
- {
1274
- "epoch": 3.25,
1275
- "learning_rate": 7.011764705882353e-06,
1276
- "loss": 0.3038,
1277
- "step": 20700
1278
- },
1279
- {
1280
- "epoch": 3.26,
1281
- "learning_rate": 6.949019607843138e-06,
1282
- "loss": 0.2632,
1283
- "step": 20800
1284
- },
1285
- {
1286
- "epoch": 3.28,
1287
- "learning_rate": 6.886274509803922e-06,
1288
- "loss": 0.2852,
1289
- "step": 20900
1290
- },
1291
- {
1292
- "epoch": 3.29,
1293
- "learning_rate": 6.8235294117647065e-06,
1294
- "loss": 0.2977,
1295
- "step": 21000
1296
- },
1297
- {
1298
- "epoch": 3.31,
1299
- "learning_rate": 6.76078431372549e-06,
1300
- "loss": 0.3352,
1301
- "step": 21100
1302
- },
1303
- {
1304
- "epoch": 3.33,
1305
- "learning_rate": 6.698039215686275e-06,
1306
- "loss": 0.3002,
1307
- "step": 21200
1308
- },
1309
- {
1310
- "epoch": 3.34,
1311
- "learning_rate": 6.63529411764706e-06,
1312
- "loss": 0.306,
1313
- "step": 21300
1314
- },
1315
- {
1316
- "epoch": 3.36,
1317
- "learning_rate": 6.572549019607844e-06,
1318
- "loss": 0.2627,
1319
- "step": 21400
1320
- },
1321
- {
1322
- "epoch": 3.37,
1323
- "learning_rate": 6.5098039215686285e-06,
1324
- "loss": 0.3526,
1325
- "step": 21500
1326
- },
1327
- {
1328
- "epoch": 3.39,
1329
- "learning_rate": 6.4470588235294116e-06,
1330
- "loss": 0.3441,
1331
- "step": 21600
1332
- },
1333
- {
1334
- "epoch": 3.4,
1335
- "learning_rate": 6.384313725490196e-06,
1336
- "loss": 0.2755,
1337
- "step": 21700
1338
- },
1339
- {
1340
- "epoch": 3.42,
1341
- "learning_rate": 6.321568627450981e-06,
1342
- "loss": 0.3685,
1343
- "step": 21800
1344
- },
1345
- {
1346
- "epoch": 3.44,
1347
- "learning_rate": 6.258823529411765e-06,
1348
- "loss": 0.2978,
1349
- "step": 21900
1350
- },
1351
- {
1352
- "epoch": 3.45,
1353
- "learning_rate": 6.19607843137255e-06,
1354
- "loss": 0.3128,
1355
- "step": 22000
1356
- },
1357
- {
1358
- "epoch": 3.47,
1359
- "learning_rate": 6.133333333333334e-06,
1360
- "loss": 0.3221,
1361
- "step": 22100
1362
- },
1363
- {
1364
- "epoch": 3.48,
1365
- "learning_rate": 6.070588235294118e-06,
1366
- "loss": 0.3258,
1367
- "step": 22200
1368
- },
1369
- {
1370
- "epoch": 3.5,
1371
- "learning_rate": 6.007843137254903e-06,
1372
- "loss": 0.2819,
1373
- "step": 22300
1374
- },
1375
- {
1376
- "epoch": 3.51,
1377
- "learning_rate": 5.945098039215686e-06,
1378
- "loss": 0.2855,
1379
- "step": 22400
1380
- },
1381
- {
1382
- "epoch": 3.53,
1383
- "learning_rate": 5.882352941176471e-06,
1384
- "loss": 0.3273,
1385
- "step": 22500
1386
- },
1387
- {
1388
- "epoch": 3.55,
1389
- "learning_rate": 5.819607843137255e-06,
1390
- "loss": 0.3031,
1391
- "step": 22600
1392
- },
1393
- {
1394
- "epoch": 3.56,
1395
- "learning_rate": 5.7568627450980396e-06,
1396
- "loss": 0.3269,
1397
- "step": 22700
1398
- },
1399
- {
1400
- "epoch": 3.58,
1401
- "learning_rate": 5.694117647058824e-06,
1402
- "loss": 0.3031,
1403
- "step": 22800
1404
- },
1405
- {
1406
- "epoch": 3.59,
1407
- "learning_rate": 5.631372549019608e-06,
1408
- "loss": 0.3113,
1409
- "step": 22900
1410
- },
1411
- {
1412
- "epoch": 3.61,
1413
- "learning_rate": 5.568627450980393e-06,
1414
- "loss": 0.3704,
1415
- "step": 23000
1416
- },
1417
- {
1418
- "epoch": 3.62,
1419
- "learning_rate": 5.505882352941177e-06,
1420
- "loss": 0.3338,
1421
- "step": 23100
1422
- },
1423
- {
1424
- "epoch": 3.64,
1425
- "learning_rate": 5.443137254901962e-06,
1426
- "loss": 0.3142,
1427
- "step": 23200
1428
- },
1429
- {
1430
- "epoch": 3.65,
1431
- "learning_rate": 5.380392156862746e-06,
1432
- "loss": 0.3333,
1433
- "step": 23300
1434
- },
1435
- {
1436
- "epoch": 3.67,
1437
- "learning_rate": 5.317647058823529e-06,
1438
- "loss": 0.3285,
1439
- "step": 23400
1440
- },
1441
- {
1442
- "epoch": 3.69,
1443
- "learning_rate": 5.254901960784314e-06,
1444
- "loss": 0.2986,
1445
- "step": 23500
1446
- },
1447
- {
1448
- "epoch": 3.7,
1449
- "learning_rate": 5.192156862745098e-06,
1450
- "loss": 0.3304,
1451
- "step": 23600
1452
- },
1453
- {
1454
- "epoch": 3.72,
1455
- "learning_rate": 5.129411764705883e-06,
1456
- "loss": 0.3997,
1457
- "step": 23700
1458
- },
1459
- {
1460
- "epoch": 3.73,
1461
- "learning_rate": 5.0666666666666676e-06,
1462
- "loss": 0.3033,
1463
- "step": 23800
1464
- },
1465
- {
1466
- "epoch": 3.75,
1467
- "learning_rate": 5.0039215686274515e-06,
1468
- "loss": 0.3083,
1469
- "step": 23900
1470
- },
1471
- {
1472
- "epoch": 3.76,
1473
- "learning_rate": 4.941176470588236e-06,
1474
- "loss": 0.2966,
1475
- "step": 24000
1476
- },
1477
- {
1478
- "epoch": 3.78,
1479
- "learning_rate": 4.87843137254902e-06,
1480
- "loss": 0.3007,
1481
- "step": 24100
1482
- },
1483
- {
1484
- "epoch": 3.8,
1485
- "learning_rate": 4.815686274509804e-06,
1486
- "loss": 0.2544,
1487
- "step": 24200
1488
- },
1489
- {
1490
- "epoch": 3.81,
1491
- "learning_rate": 4.752941176470589e-06,
1492
- "loss": 0.2299,
1493
- "step": 24300
1494
- },
1495
- {
1496
- "epoch": 3.83,
1497
- "learning_rate": 4.690196078431373e-06,
1498
- "loss": 0.2575,
1499
- "step": 24400
1500
- },
1501
- {
1502
- "epoch": 3.84,
1503
- "learning_rate": 4.627450980392157e-06,
1504
- "loss": 0.2813,
1505
- "step": 24500
1506
- },
1507
- {
1508
- "epoch": 3.86,
1509
- "learning_rate": 4.564705882352941e-06,
1510
- "loss": 0.3245,
1511
- "step": 24600
1512
- },
1513
- {
1514
- "epoch": 3.87,
1515
- "learning_rate": 4.501960784313726e-06,
1516
- "loss": 0.3666,
1517
- "step": 24700
1518
- },
1519
- {
1520
- "epoch": 3.89,
1521
- "learning_rate": 4.43921568627451e-06,
1522
- "loss": 0.2552,
1523
- "step": 24800
1524
- },
1525
- {
1526
- "epoch": 3.91,
1527
- "learning_rate": 4.376470588235294e-06,
1528
- "loss": 0.3075,
1529
- "step": 24900
1530
- },
1531
- {
1532
- "epoch": 3.92,
1533
- "learning_rate": 4.313725490196079e-06,
1534
- "loss": 0.3015,
1535
- "step": 25000
1536
- },
1537
- {
1538
- "epoch": 3.94,
1539
- "learning_rate": 4.250980392156863e-06,
1540
- "loss": 0.3143,
1541
- "step": 25100
1542
- },
1543
- {
1544
- "epoch": 3.95,
1545
- "learning_rate": 4.188235294117647e-06,
1546
- "loss": 0.3493,
1547
- "step": 25200
1548
- },
1549
- {
1550
- "epoch": 3.97,
1551
- "learning_rate": 4.125490196078432e-06,
1552
- "loss": 0.2725,
1553
- "step": 25300
1554
- },
1555
- {
1556
- "epoch": 3.98,
1557
- "learning_rate": 4.062745098039216e-06,
1558
- "loss": 0.2831,
1559
- "step": 25400
1560
- },
1561
- {
1562
- "epoch": 4.0,
1563
- "learning_rate": 4.000000000000001e-06,
1564
- "loss": 0.3252,
1565
- "step": 25500
1566
- },
1567
- {
1568
- "epoch": 4.0,
1569
- "eval_accuracy": 0.9937777777777778,
1570
- "eval_loss": 0.026244711130857468,
1571
- "eval_runtime": 37.8619,
1572
- "eval_samples_per_second": 237.706,
1573
- "eval_steps_per_second": 29.713,
1574
- "step": 25500
1575
- },
1576
- {
1577
- "epoch": 4.02,
1578
- "learning_rate": 3.9372549019607846e-06,
1579
- "loss": 0.2856,
1580
- "step": 25600
1581
- },
1582
- {
1583
- "epoch": 4.03,
1584
- "learning_rate": 3.874509803921569e-06,
1585
- "loss": 0.2335,
1586
- "step": 25700
1587
- },
1588
- {
1589
- "epoch": 4.05,
1590
- "learning_rate": 3.8117647058823532e-06,
1591
- "loss": 0.2649,
1592
- "step": 25800
1593
- },
1594
- {
1595
- "epoch": 4.06,
1596
- "learning_rate": 3.7490196078431375e-06,
1597
- "loss": 0.2944,
1598
- "step": 25900
1599
- },
1600
- {
1601
- "epoch": 4.08,
1602
- "learning_rate": 3.6862745098039223e-06,
1603
- "loss": 0.274,
1604
- "step": 26000
1605
- },
1606
- {
1607
- "epoch": 4.09,
1608
- "learning_rate": 3.623529411764706e-06,
1609
- "loss": 0.2954,
1610
- "step": 26100
1611
- },
1612
- {
1613
- "epoch": 4.11,
1614
- "learning_rate": 3.5607843137254905e-06,
1615
- "loss": 0.3167,
1616
- "step": 26200
1617
- },
1618
- {
1619
- "epoch": 4.13,
1620
- "learning_rate": 3.498039215686275e-06,
1621
- "loss": 0.2988,
1622
- "step": 26300
1623
- },
1624
- {
1625
- "epoch": 4.14,
1626
- "learning_rate": 3.4352941176470587e-06,
1627
- "loss": 0.2723,
1628
- "step": 26400
1629
- },
1630
- {
1631
- "epoch": 4.16,
1632
- "learning_rate": 3.3725490196078435e-06,
1633
- "loss": 0.2515,
1634
- "step": 26500
1635
- },
1636
- {
1637
- "epoch": 4.17,
1638
- "learning_rate": 3.309803921568628e-06,
1639
- "loss": 0.3059,
1640
- "step": 26600
1641
- },
1642
- {
1643
- "epoch": 4.19,
1644
- "learning_rate": 3.247058823529412e-06,
1645
- "loss": 0.2851,
1646
- "step": 26700
1647
- },
1648
- {
1649
- "epoch": 4.2,
1650
- "learning_rate": 3.184313725490196e-06,
1651
- "loss": 0.2909,
1652
- "step": 26800
1653
- },
1654
- {
1655
- "epoch": 4.22,
1656
- "learning_rate": 3.1215686274509804e-06,
1657
- "loss": 0.2882,
1658
- "step": 26900
1659
- },
1660
- {
1661
- "epoch": 4.24,
1662
- "learning_rate": 3.058823529411765e-06,
1663
- "loss": 0.245,
1664
- "step": 27000
1665
- },
1666
- {
1667
- "epoch": 4.25,
1668
- "learning_rate": 2.9960784313725494e-06,
1669
- "loss": 0.2893,
1670
- "step": 27100
1671
- },
1672
- {
1673
- "epoch": 4.27,
1674
- "learning_rate": 2.9333333333333338e-06,
1675
- "loss": 0.2718,
1676
- "step": 27200
1677
- },
1678
- {
1679
- "epoch": 4.28,
1680
- "learning_rate": 2.8705882352941177e-06,
1681
- "loss": 0.2973,
1682
- "step": 27300
1683
- },
1684
- {
1685
- "epoch": 4.3,
1686
- "learning_rate": 2.807843137254902e-06,
1687
- "loss": 0.2615,
1688
- "step": 27400
1689
- },
1690
- {
1691
- "epoch": 4.31,
1692
- "learning_rate": 2.7450980392156867e-06,
1693
- "loss": 0.2875,
1694
- "step": 27500
1695
- },
1696
- {
1697
- "epoch": 4.33,
1698
- "learning_rate": 2.682352941176471e-06,
1699
- "loss": 0.3191,
1700
- "step": 27600
1701
- },
1702
- {
1703
- "epoch": 4.35,
1704
- "learning_rate": 2.619607843137255e-06,
1705
- "loss": 0.2995,
1706
- "step": 27700
1707
- },
1708
- {
1709
- "epoch": 4.36,
1710
- "learning_rate": 2.5568627450980393e-06,
1711
- "loss": 0.3184,
1712
- "step": 27800
1713
- },
1714
- {
1715
- "epoch": 4.38,
1716
- "learning_rate": 2.4941176470588236e-06,
1717
- "loss": 0.3621,
1718
- "step": 27900
1719
- },
1720
- {
1721
- "epoch": 4.39,
1722
- "learning_rate": 2.431372549019608e-06,
1723
- "loss": 0.3315,
1724
- "step": 28000
1725
- },
1726
- {
1727
- "epoch": 4.41,
1728
- "learning_rate": 2.3686274509803927e-06,
1729
- "loss": 0.2731,
1730
- "step": 28100
1731
- },
1732
- {
1733
- "epoch": 4.42,
1734
- "learning_rate": 2.3058823529411766e-06,
1735
- "loss": 0.3311,
1736
- "step": 28200
1737
- },
1738
- {
1739
- "epoch": 4.44,
1740
- "learning_rate": 2.243137254901961e-06,
1741
- "loss": 0.3231,
1742
- "step": 28300
1743
- },
1744
- {
1745
- "epoch": 4.45,
1746
- "learning_rate": 2.1803921568627452e-06,
1747
- "loss": 0.2697,
1748
- "step": 28400
1749
- },
1750
- {
1751
- "epoch": 4.47,
1752
- "learning_rate": 2.1176470588235296e-06,
1753
- "loss": 0.2057,
1754
- "step": 28500
1755
- },
1756
- {
1757
- "epoch": 4.49,
1758
- "learning_rate": 2.054901960784314e-06,
1759
- "loss": 0.301,
1760
- "step": 28600
1761
- },
1762
- {
1763
- "epoch": 4.5,
1764
- "learning_rate": 1.9921568627450982e-06,
1765
- "loss": 0.2742,
1766
- "step": 28700
1767
- },
1768
- {
1769
- "epoch": 4.52,
1770
- "learning_rate": 1.9294117647058825e-06,
1771
- "loss": 0.3332,
1772
- "step": 28800
1773
- },
1774
- {
1775
- "epoch": 4.53,
1776
- "learning_rate": 1.8666666666666669e-06,
1777
- "loss": 0.2852,
1778
- "step": 28900
1779
- },
1780
- {
1781
- "epoch": 4.55,
1782
- "learning_rate": 1.8039215686274512e-06,
1783
- "loss": 0.2685,
1784
- "step": 29000
1785
- },
1786
- {
1787
- "epoch": 4.56,
1788
- "learning_rate": 1.7411764705882353e-06,
1789
- "loss": 0.2576,
1790
- "step": 29100
1791
- },
1792
- {
1793
- "epoch": 4.58,
1794
- "learning_rate": 1.6784313725490198e-06,
1795
- "loss": 0.2498,
1796
- "step": 29200
1797
- },
1798
- {
1799
- "epoch": 4.6,
1800
- "learning_rate": 1.615686274509804e-06,
1801
- "loss": 0.305,
1802
- "step": 29300
1803
- },
1804
- {
1805
- "epoch": 4.61,
1806
- "learning_rate": 1.5529411764705885e-06,
1807
- "loss": 0.333,
1808
- "step": 29400
1809
- },
1810
- {
1811
- "epoch": 4.63,
1812
- "learning_rate": 1.4901960784313726e-06,
1813
- "loss": 0.2751,
1814
- "step": 29500
1815
- },
1816
- {
1817
- "epoch": 4.64,
1818
- "learning_rate": 1.427450980392157e-06,
1819
- "loss": 0.2952,
1820
- "step": 29600
1821
- },
1822
- {
1823
- "epoch": 4.66,
1824
- "learning_rate": 1.3647058823529413e-06,
1825
- "loss": 0.2523,
1826
- "step": 29700
1827
- },
1828
- {
1829
- "epoch": 4.67,
1830
- "learning_rate": 1.3019607843137256e-06,
1831
- "loss": 0.3139,
1832
- "step": 29800
1833
- },
1834
- {
1835
- "epoch": 4.69,
1836
- "learning_rate": 1.23921568627451e-06,
1837
- "loss": 0.3337,
1838
- "step": 29900
1839
- },
1840
- {
1841
- "epoch": 4.71,
1842
- "learning_rate": 1.1764705882352942e-06,
1843
- "loss": 0.2828,
1844
- "step": 30000
1845
- },
1846
- {
1847
- "epoch": 4.72,
1848
- "learning_rate": 1.1137254901960786e-06,
1849
- "loss": 0.2612,
1850
- "step": 30100
1851
- },
1852
- {
1853
- "epoch": 4.74,
1854
- "learning_rate": 1.0509803921568629e-06,
1855
- "loss": 0.2731,
1856
- "step": 30200
1857
- },
1858
- {
1859
- "epoch": 4.75,
1860
- "learning_rate": 9.88235294117647e-07,
1861
- "loss": 0.2368,
1862
- "step": 30300
1863
- },
1864
- {
1865
- "epoch": 4.77,
1866
- "learning_rate": 9.254901960784314e-07,
1867
- "loss": 0.294,
1868
- "step": 30400
1869
- },
1870
- {
1871
- "epoch": 4.78,
1872
- "learning_rate": 8.627450980392157e-07,
1873
- "loss": 0.2785,
1874
- "step": 30500
1875
- },
1876
- {
1877
- "epoch": 4.8,
1878
- "learning_rate": 8.000000000000001e-07,
1879
- "loss": 0.2766,
1880
- "step": 30600
1881
- },
1882
- {
1883
- "epoch": 4.82,
1884
- "learning_rate": 7.372549019607844e-07,
1885
- "loss": 0.2555,
1886
- "step": 30700
1887
- },
1888
- {
1889
- "epoch": 4.83,
1890
- "learning_rate": 6.745098039215686e-07,
1891
- "loss": 0.304,
1892
- "step": 30800
1893
- },
1894
- {
1895
- "epoch": 4.85,
1896
- "learning_rate": 6.11764705882353e-07,
1897
- "loss": 0.3045,
1898
- "step": 30900
1899
- },
1900
- {
1901
- "epoch": 4.86,
1902
- "learning_rate": 5.490196078431373e-07,
1903
- "loss": 0.28,
1904
- "step": 31000
1905
- },
1906
- {
1907
- "epoch": 4.88,
1908
- "learning_rate": 4.862745098039216e-07,
1909
- "loss": 0.3639,
1910
- "step": 31100
1911
- },
1912
- {
1913
- "epoch": 4.89,
1914
- "learning_rate": 4.235294117647059e-07,
1915
- "loss": 0.2899,
1916
- "step": 31200
1917
- },
1918
- {
1919
- "epoch": 4.91,
1920
- "learning_rate": 3.6078431372549024e-07,
1921
- "loss": 0.3196,
1922
- "step": 31300
1923
- },
1924
- {
1925
- "epoch": 4.93,
1926
- "learning_rate": 2.9803921568627456e-07,
1927
- "loss": 0.2737,
1928
- "step": 31400
1929
- },
1930
- {
1931
- "epoch": 4.94,
1932
- "learning_rate": 2.3529411764705883e-07,
1933
- "loss": 0.2781,
1934
- "step": 31500
1935
- },
1936
- {
1937
- "epoch": 4.96,
1938
- "learning_rate": 1.7254901960784313e-07,
1939
- "loss": 0.264,
1940
- "step": 31600
1941
- },
1942
- {
1943
- "epoch": 4.97,
1944
- "learning_rate": 1.0980392156862746e-07,
1945
- "loss": 0.2394,
1946
- "step": 31700
1947
- },
1948
- {
1949
- "epoch": 4.99,
1950
- "learning_rate": 4.705882352941176e-08,
1951
- "loss": 0.2885,
1952
- "step": 31800
1953
  },
1954
  {
1955
  "epoch": 5.0,
1956
- "eval_accuracy": 0.9947777777777778,
1957
- "eval_loss": 0.023610670119524002,
1958
- "eval_runtime": 38.1337,
1959
- "eval_samples_per_second": 236.012,
1960
- "eval_steps_per_second": 29.501,
1961
- "step": 31875
1962
  },
1963
  {
1964
  "epoch": 5.0,
1965
- "step": 31875,
1966
- "total_flos": 1.976187420555264e+19,
1967
- "train_loss": 0.37173893965178845,
1968
- "train_runtime": 3249.9087,
1969
- "train_samples_per_second": 78.464,
1970
- "train_steps_per_second": 9.808
1971
  }
1972
  ],
1973
- "max_steps": 31875,
1974
  "num_train_epochs": 5,
1975
- "total_flos": 1.976187420555264e+19,
1976
  "trial_name": null,
1977
  "trial_params": null
1978
  }
 
1
  {
2
+ "best_metric": 0.689139723777771,
3
+ "best_model_checkpoint": "/storage/img-cls-data/roman_numeral_outputs/checkpoint-1445",
4
  "epoch": 5.0,
5
+ "global_step": 1445,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.35,
12
+ "learning_rate": 1.8615916955017305e-05,
13
+ "loss": 2.2093,
14
+ "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 0.69,
18
+ "learning_rate": 1.7231833910034604e-05,
19
+ "loss": 1.9053,
20
+ "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  {
23
  "epoch": 1.0,
24
+ "eval_accuracy": 0.7107843137254902,
25
+ "eval_loss": 1.324064016342163,
26
+ "eval_runtime": 2.1357,
27
+ "eval_samples_per_second": 191.039,
28
+ "eval_steps_per_second": 23.88,
29
+ "step": 289
 
 
 
30
  },
31
  {
32
  "epoch": 1.04,
33
+ "learning_rate": 1.5847750865051904e-05,
34
+ "loss": 1.6347,
35
+ "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  },
37
  {
38
  "epoch": 1.38,
39
+ "learning_rate": 1.4463667820069205e-05,
40
+ "loss": 1.4087,
41
+ "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  },
43
  {
44
  "epoch": 1.73,
45
+ "learning_rate": 1.3079584775086506e-05,
46
+ "loss": 1.3293,
47
+ "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  },
49
  {
50
+ "epoch": 2.0,
51
+ "eval_accuracy": 0.7892156862745098,
52
+ "eval_loss": 0.933335542678833,
53
+ "eval_runtime": 1.9774,
54
+ "eval_samples_per_second": 206.327,
55
+ "eval_steps_per_second": 25.791,
56
+ "step": 578
57
  },
58
  {
59
+ "epoch": 2.08,
60
+ "learning_rate": 1.1695501730103806e-05,
61
+ "loss": 1.2134,
62
+ "step": 600
63
  },
64
  {
65
+ "epoch": 2.42,
66
+ "learning_rate": 1.0311418685121109e-05,
67
+ "loss": 1.1315,
68
+ "step": 700
69
  },
70
  {
71
+ "epoch": 2.77,
72
+ "learning_rate": 8.92733564013841e-06,
73
+ "loss": 1.1251,
74
+ "step": 800
75
  },
76
  {
77
+ "epoch": 3.0,
78
+ "eval_accuracy": 0.7843137254901961,
79
+ "eval_loss": 0.7988855838775635,
80
+ "eval_runtime": 2.1384,
81
+ "eval_samples_per_second": 190.795,
82
+ "eval_steps_per_second": 23.849,
83
+ "step": 867
84
  },
85
  {
86
+ "epoch": 3.11,
87
+ "learning_rate": 7.5432525951557104e-06,
88
+ "loss": 1.0903,
89
+ "step": 900
90
  },
91
  {
92
+ "epoch": 3.46,
93
+ "learning_rate": 6.159169550173011e-06,
94
+ "loss": 1.05,
95
+ "step": 1000
96
  },
97
  {
98
+ "epoch": 3.81,
99
+ "learning_rate": 4.775086505190312e-06,
100
+ "loss": 0.9837,
101
+ "step": 1100
102
  },
103
  {
104
+ "epoch": 4.0,
105
+ "eval_accuracy": 0.8186274509803921,
106
+ "eval_loss": 0.6956034898757935,
107
+ "eval_runtime": 2.0398,
108
+ "eval_samples_per_second": 200.024,
109
+ "eval_steps_per_second": 25.003,
110
+ "step": 1156
111
  },
112
  {
113
+ "epoch": 4.15,
114
+ "learning_rate": 3.3910034602076125e-06,
115
+ "loss": 1.0053,
116
+ "step": 1200
117
  },
118
  {
119
+ "epoch": 4.5,
120
+ "learning_rate": 2.0069204152249138e-06,
121
+ "loss": 0.9802,
122
+ "step": 1300
123
  },
124
  {
125
+ "epoch": 4.84,
126
+ "learning_rate": 6.228373702422146e-07,
127
+ "loss": 0.999,
128
+ "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  },
130
  {
131
  "epoch": 5.0,
132
+ "eval_accuracy": 0.8308823529411765,
133
+ "eval_loss": 0.689139723777771,
134
+ "eval_runtime": 1.9257,
135
+ "eval_samples_per_second": 211.874,
136
+ "eval_steps_per_second": 26.484,
137
+ "step": 1445
138
  },
139
  {
140
  "epoch": 5.0,
141
+ "step": 1445,
142
+ "total_flos": 8.947091674631578e+17,
143
+ "train_loss": 1.2808735091793495,
144
+ "train_runtime": 453.7657,
145
+ "train_samples_per_second": 25.443,
146
+ "train_steps_per_second": 3.184
147
  }
148
  ],
149
+ "max_steps": 1445,
150
  "num_train_epochs": 5,
151
+ "total_flos": 8.947091674631578e+17,
152
  "trial_name": null,
153
  "trial_params": null
154
  }