alkzar90 commited on
Commit
f22f7b0
1 Parent(s): 92682ec

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 50.0,
3
  "eval_accuracy": 0.7471264367816092,
4
- "eval_loss": 0.847703754901886,
5
- "eval_runtime": 1.6712,
6
- "eval_samples_per_second": 104.114,
7
- "eval_steps_per_second": 13.164,
8
- "total_flos": 5.622144623740109e+18,
9
- "train_loss": 0.2358846340533141,
10
- "train_runtime": 1979.1189,
11
- "train_samples_per_second": 36.658,
12
- "train_steps_per_second": 2.299
13
  }
 
1
  {
2
+ "epoch": 5.0,
3
  "eval_accuracy": 0.7471264367816092,
4
+ "eval_loss": 0.7583417296409607,
5
+ "eval_runtime": 1.9455,
6
+ "eval_samples_per_second": 89.438,
7
+ "eval_steps_per_second": 11.308,
8
+ "total_flos": 5.622144623740109e+17,
9
+ "train_loss": 0.5662190929873959,
10
+ "train_runtime": 205.6317,
11
+ "train_samples_per_second": 35.282,
12
+ "train_steps_per_second": 2.213
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_accuracy": 0.7470588235294118,
4
- "eval_loss": 0.7239104509353638,
5
- "eval_runtime": 1.5971,
6
- "eval_samples_per_second": 106.444,
7
- "eval_steps_per_second": 13.775
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.7941176470588235,
4
+ "eval_loss": 0.6151905059814453,
5
+ "eval_runtime": 1.752,
6
+ "eval_samples_per_second": 97.032,
7
+ "eval_steps_per_second": 12.557
8
  }
runs/Aug08_02-43-29_ef222df47c0f/events.out.tfevents.1659927535.ef222df47c0f.73.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0271c042d22fa8eedacf11924d245bf724b36c676e92fc39de262ccc25a407ca
3
+ size 686
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
  "eval_accuracy": 0.7471264367816092,
4
- "eval_loss": 0.847703754901886,
5
- "eval_runtime": 1.6712,
6
- "eval_samples_per_second": 104.114,
7
- "eval_steps_per_second": 13.164
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
  "eval_accuracy": 0.7471264367816092,
4
+ "eval_loss": 0.7583417296409607,
5
+ "eval_runtime": 1.9455,
6
+ "eval_samples_per_second": 89.438,
7
+ "eval_steps_per_second": 11.308
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "total_flos": 5.622144623740109e+18,
4
- "train_loss": 0.2358846340533141,
5
- "train_runtime": 1979.1189,
6
- "train_samples_per_second": 36.658,
7
- "train_steps_per_second": 2.299
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 5.622144623740109e+17,
4
+ "train_loss": 0.5662190929873959,
5
+ "train_runtime": 205.6317,
6
+ "train_samples_per_second": 35.282,
7
+ "train_steps_per_second": 2.213
8
  }
trainer_state.json CHANGED
@@ -1,3160 +1,331 @@
1
  {
2
- "best_metric": 0.6692019104957581,
3
- "best_model_checkpoint": "./croupier-creature-classifier/checkpoint-2000",
4
- "epoch": 50.0,
5
- "global_step": 4550,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.11,
12
- "learning_rate": 2.9934065934065934e-05,
13
- "loss": 1.3796,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.22,
18
- "learning_rate": 2.9868131868131868e-05,
19
- "loss": 1.3597,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.33,
24
- "learning_rate": 2.9802197802197805e-05,
25
- "loss": 1.3427,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.44,
30
- "learning_rate": 2.973626373626374e-05,
31
- "loss": 1.3331,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.55,
36
- "learning_rate": 2.9670329670329673e-05,
37
- "loss": 1.275,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.66,
42
- "learning_rate": 2.9604395604395606e-05,
43
- "loss": 1.2765,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.77,
48
- "learning_rate": 2.953846153846154e-05,
49
- "loss": 1.2306,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.88,
54
- "learning_rate": 2.947252747252747e-05,
55
- "loss": 1.2621,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.99,
60
- "learning_rate": 2.9406593406593407e-05,
61
- "loss": 1.1997,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 1.1,
66
- "learning_rate": 2.934065934065934e-05,
67
- "loss": 1.1159,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 1.1,
72
- "eval_accuracy": 0.611764705882353,
73
- "eval_loss": 1.1144014596939087,
74
- "eval_runtime": 1.5221,
75
- "eval_samples_per_second": 111.687,
76
- "eval_steps_per_second": 14.454,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 1.21,
81
- "learning_rate": 2.9274725274725275e-05,
82
- "loss": 1.0839,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 1.32,
87
- "learning_rate": 2.920879120879121e-05,
88
- "loss": 1.0689,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 1.43,
93
- "learning_rate": 2.9142857142857142e-05,
94
- "loss": 0.9898,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 1.54,
99
- "learning_rate": 2.907692307692308e-05,
100
- "loss": 0.9495,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.65,
105
- "learning_rate": 2.9010989010989013e-05,
106
- "loss": 0.9943,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.76,
111
- "learning_rate": 2.8945054945054947e-05,
112
- "loss": 0.98,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.87,
117
- "learning_rate": 2.887912087912088e-05,
118
- "loss": 0.9986,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.98,
123
- "learning_rate": 2.8813186813186814e-05,
124
- "loss": 0.9522,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 2.09,
129
- "learning_rate": 2.8747252747252748e-05,
130
- "loss": 0.8533,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 2.2,
135
- "learning_rate": 2.8681318681318685e-05,
136
- "loss": 0.8183,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 2.2,
141
- "eval_accuracy": 0.6882352941176471,
142
- "eval_loss": 0.9109101891517639,
143
- "eval_runtime": 1.6142,
144
- "eval_samples_per_second": 105.317,
145
- "eval_steps_per_second": 13.629,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 2.31,
150
- "learning_rate": 2.861538461538462e-05,
151
- "loss": 0.7843,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 2.42,
156
- "learning_rate": 2.854945054945055e-05,
157
- "loss": 0.8093,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 2.53,
162
- "learning_rate": 2.8483516483516482e-05,
163
- "loss": 0.8541,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 2.64,
168
- "learning_rate": 2.8417582417582416e-05,
169
- "loss": 0.7405,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 2.75,
174
- "learning_rate": 2.8351648351648353e-05,
175
- "loss": 0.7858,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 2.86,
180
- "learning_rate": 2.8285714285714287e-05,
181
- "loss": 0.7966,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 2.97,
186
- "learning_rate": 2.821978021978022e-05,
187
- "loss": 0.695,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 3.08,
192
- "learning_rate": 2.8153846153846154e-05,
193
- "loss": 0.6649,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 3.19,
198
- "learning_rate": 2.8087912087912088e-05,
199
- "loss": 0.605,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 3.3,
204
- "learning_rate": 2.802197802197802e-05,
205
- "loss": 0.6829,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 3.3,
210
- "eval_accuracy": 0.7235294117647059,
211
- "eval_loss": 0.7676671743392944,
212
- "eval_runtime": 1.6247,
213
- "eval_samples_per_second": 104.633,
214
- "eval_steps_per_second": 13.541,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 3.41,
219
- "learning_rate": 2.795604395604396e-05,
220
- "loss": 0.5722,
221
  "step": 310
222
  },
223
  {
224
  "epoch": 3.52,
225
- "learning_rate": 2.7890109890109892e-05,
226
- "loss": 0.654,
227
  "step": 320
228
  },
229
  {
230
  "epoch": 3.63,
231
- "learning_rate": 2.7824175824175826e-05,
232
- "loss": 0.7001,
233
  "step": 330
234
  },
235
  {
236
  "epoch": 3.74,
237
- "learning_rate": 2.775824175824176e-05,
238
- "loss": 0.5804,
239
  "step": 340
240
  },
241
  {
242
  "epoch": 3.85,
243
- "learning_rate": 2.7692307692307694e-05,
244
- "loss": 0.6638,
245
  "step": 350
246
  },
247
  {
248
  "epoch": 3.96,
249
- "learning_rate": 2.7626373626373624e-05,
250
- "loss": 0.6668,
251
  "step": 360
252
  },
253
  {
254
  "epoch": 4.07,
255
- "learning_rate": 2.756043956043956e-05,
256
- "loss": 0.5599,
257
  "step": 370
258
  },
259
  {
260
  "epoch": 4.18,
261
- "learning_rate": 2.7494505494505495e-05,
262
- "loss": 0.5317,
263
  "step": 380
264
  },
265
  {
266
  "epoch": 4.29,
267
- "learning_rate": 2.7428571428571428e-05,
268
- "loss": 0.5403,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 4.4,
273
- "learning_rate": 2.7362637362637362e-05,
274
- "loss": 0.5575,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 4.4,
279
- "eval_accuracy": 0.6764705882352942,
280
- "eval_loss": 0.7670463919639587,
281
- "eval_runtime": 1.6237,
282
- "eval_samples_per_second": 104.698,
283
- "eval_steps_per_second": 13.549,
284
  "step": 400
285
  },
286
  {
287
  "epoch": 4.51,
288
- "learning_rate": 2.7296703296703296e-05,
289
- "loss": 0.5018,
290
  "step": 410
291
  },
292
  {
293
  "epoch": 4.62,
294
- "learning_rate": 2.7230769230769233e-05,
295
- "loss": 0.5715,
296
  "step": 420
297
  },
298
  {
299
  "epoch": 4.73,
300
- "learning_rate": 2.7164835164835166e-05,
301
- "loss": 0.4884,
302
  "step": 430
303
  },
304
  {
305
  "epoch": 4.84,
306
- "learning_rate": 2.7105494505494504e-05,
307
- "loss": 0.5533,
308
  "step": 440
309
  },
310
  {
311
  "epoch": 4.95,
312
- "learning_rate": 2.703956043956044e-05,
313
- "loss": 0.5183,
314
  "step": 450
315
  },
316
  {
317
- "epoch": 5.05,
318
- "learning_rate": 2.6973626373626375e-05,
319
- "loss": 0.4451,
320
- "step": 460
321
- },
322
- {
323
- "epoch": 5.16,
324
- "learning_rate": 2.690769230769231e-05,
325
- "loss": 0.4605,
326
- "step": 470
327
- },
328
- {
329
- "epoch": 5.27,
330
- "learning_rate": 2.6841758241758243e-05,
331
- "loss": 0.4289,
332
- "step": 480
333
- },
334
- {
335
- "epoch": 5.38,
336
- "learning_rate": 2.6775824175824176e-05,
337
- "loss": 0.4433,
338
- "step": 490
339
- },
340
- {
341
- "epoch": 5.49,
342
- "learning_rate": 2.670989010989011e-05,
343
- "loss": 0.4644,
344
- "step": 500
345
- },
346
- {
347
- "epoch": 5.49,
348
- "eval_accuracy": 0.6647058823529411,
349
- "eval_loss": 0.8459996581077576,
350
- "eval_runtime": 1.6386,
351
- "eval_samples_per_second": 103.747,
352
- "eval_steps_per_second": 13.426,
353
- "step": 500
354
- },
355
- {
356
- "epoch": 5.6,
357
- "learning_rate": 2.6643956043956047e-05,
358
- "loss": 0.4732,
359
- "step": 510
360
- },
361
- {
362
- "epoch": 5.71,
363
- "learning_rate": 2.657802197802198e-05,
364
- "loss": 0.4939,
365
- "step": 520
366
- },
367
- {
368
- "epoch": 5.82,
369
- "learning_rate": 2.651208791208791e-05,
370
- "loss": 0.4209,
371
- "step": 530
372
- },
373
- {
374
- "epoch": 5.93,
375
- "learning_rate": 2.6446153846153845e-05,
376
- "loss": 0.3783,
377
- "step": 540
378
- },
379
- {
380
- "epoch": 6.04,
381
- "learning_rate": 2.638021978021978e-05,
382
- "loss": 0.4665,
383
- "step": 550
384
- },
385
- {
386
- "epoch": 6.15,
387
- "learning_rate": 2.6314285714285715e-05,
388
- "loss": 0.2572,
389
- "step": 560
390
- },
391
- {
392
- "epoch": 6.26,
393
- "learning_rate": 2.624835164835165e-05,
394
- "loss": 0.3752,
395
- "step": 570
396
- },
397
- {
398
- "epoch": 6.37,
399
- "learning_rate": 2.6182417582417583e-05,
400
- "loss": 0.2943,
401
- "step": 580
402
- },
403
- {
404
- "epoch": 6.48,
405
- "learning_rate": 2.6116483516483517e-05,
406
- "loss": 0.3949,
407
- "step": 590
408
- },
409
- {
410
- "epoch": 6.59,
411
- "learning_rate": 2.605054945054945e-05,
412
- "loss": 0.3096,
413
- "step": 600
414
- },
415
- {
416
- "epoch": 6.59,
417
- "eval_accuracy": 0.7529411764705882,
418
- "eval_loss": 0.7081632614135742,
419
- "eval_runtime": 1.6198,
420
- "eval_samples_per_second": 104.953,
421
- "eval_steps_per_second": 13.582,
422
- "step": 600
423
- },
424
- {
425
- "epoch": 6.7,
426
- "learning_rate": 2.5984615384615384e-05,
427
- "loss": 0.4314,
428
- "step": 610
429
- },
430
- {
431
- "epoch": 6.81,
432
- "learning_rate": 2.591868131868132e-05,
433
- "loss": 0.3389,
434
- "step": 620
435
- },
436
- {
437
- "epoch": 6.92,
438
- "learning_rate": 2.5852747252747255e-05,
439
- "loss": 0.399,
440
- "step": 630
441
- },
442
- {
443
- "epoch": 7.03,
444
- "learning_rate": 2.578681318681319e-05,
445
- "loss": 0.3425,
446
- "step": 640
447
- },
448
- {
449
- "epoch": 7.14,
450
- "learning_rate": 2.5720879120879122e-05,
451
- "loss": 0.3395,
452
- "step": 650
453
- },
454
- {
455
- "epoch": 7.25,
456
- "learning_rate": 2.5654945054945056e-05,
457
- "loss": 0.3558,
458
- "step": 660
459
- },
460
- {
461
- "epoch": 7.36,
462
- "learning_rate": 2.558901098901099e-05,
463
- "loss": 0.3773,
464
- "step": 670
465
- },
466
- {
467
- "epoch": 7.47,
468
- "learning_rate": 2.5523076923076923e-05,
469
- "loss": 0.3493,
470
- "step": 680
471
- },
472
- {
473
- "epoch": 7.58,
474
- "learning_rate": 2.5457142857142857e-05,
475
- "loss": 0.402,
476
- "step": 690
477
- },
478
- {
479
- "epoch": 7.69,
480
- "learning_rate": 2.539120879120879e-05,
481
- "loss": 0.305,
482
- "step": 700
483
- },
484
- {
485
- "epoch": 7.69,
486
- "eval_accuracy": 0.7647058823529411,
487
- "eval_loss": 0.693938136100769,
488
- "eval_runtime": 1.7847,
489
- "eval_samples_per_second": 95.254,
490
- "eval_steps_per_second": 12.327,
491
- "step": 700
492
- },
493
- {
494
- "epoch": 7.8,
495
- "learning_rate": 2.5325274725274724e-05,
496
- "loss": 0.3702,
497
- "step": 710
498
- },
499
- {
500
- "epoch": 7.91,
501
- "learning_rate": 2.5259340659340658e-05,
502
- "loss": 0.374,
503
- "step": 720
504
- },
505
- {
506
- "epoch": 8.02,
507
- "learning_rate": 2.5193406593406595e-05,
508
- "loss": 0.3374,
509
- "step": 730
510
- },
511
- {
512
- "epoch": 8.13,
513
- "learning_rate": 2.512747252747253e-05,
514
- "loss": 0.2479,
515
- "step": 740
516
- },
517
- {
518
- "epoch": 8.24,
519
- "learning_rate": 2.5061538461538462e-05,
520
- "loss": 0.3161,
521
- "step": 750
522
- },
523
- {
524
- "epoch": 8.35,
525
- "learning_rate": 2.4995604395604396e-05,
526
- "loss": 0.3229,
527
- "step": 760
528
- },
529
- {
530
- "epoch": 8.46,
531
- "learning_rate": 2.492967032967033e-05,
532
- "loss": 0.2871,
533
- "step": 770
534
- },
535
- {
536
- "epoch": 8.57,
537
- "learning_rate": 2.4863736263736267e-05,
538
- "loss": 0.3116,
539
- "step": 780
540
- },
541
- {
542
- "epoch": 8.68,
543
- "learning_rate": 2.47978021978022e-05,
544
- "loss": 0.3632,
545
- "step": 790
546
- },
547
- {
548
- "epoch": 8.79,
549
- "learning_rate": 2.4731868131868134e-05,
550
- "loss": 0.3349,
551
- "step": 800
552
- },
553
- {
554
- "epoch": 8.79,
555
- "eval_accuracy": 0.7235294117647059,
556
- "eval_loss": 0.7285170555114746,
557
- "eval_runtime": 1.7358,
558
- "eval_samples_per_second": 97.94,
559
- "eval_steps_per_second": 12.675,
560
- "step": 800
561
- },
562
- {
563
- "epoch": 8.9,
564
- "learning_rate": 2.4665934065934068e-05,
565
- "loss": 0.3465,
566
- "step": 810
567
- },
568
- {
569
- "epoch": 9.01,
570
- "learning_rate": 2.4599999999999998e-05,
571
- "loss": 0.2579,
572
- "step": 820
573
- },
574
- {
575
- "epoch": 9.12,
576
- "learning_rate": 2.4534065934065932e-05,
577
- "loss": 0.324,
578
- "step": 830
579
- },
580
- {
581
- "epoch": 9.23,
582
- "learning_rate": 2.446813186813187e-05,
583
- "loss": 0.3192,
584
- "step": 840
585
- },
586
- {
587
- "epoch": 9.34,
588
- "learning_rate": 2.4402197802197803e-05,
589
- "loss": 0.2076,
590
- "step": 850
591
- },
592
- {
593
- "epoch": 9.45,
594
- "learning_rate": 2.4336263736263736e-05,
595
- "loss": 0.2456,
596
- "step": 860
597
- },
598
- {
599
- "epoch": 9.56,
600
- "learning_rate": 2.427032967032967e-05,
601
- "loss": 0.3255,
602
- "step": 870
603
- },
604
- {
605
- "epoch": 9.67,
606
- "learning_rate": 2.4204395604395604e-05,
607
- "loss": 0.3601,
608
- "step": 880
609
- },
610
- {
611
- "epoch": 9.78,
612
- "learning_rate": 2.413846153846154e-05,
613
- "loss": 0.1986,
614
- "step": 890
615
- },
616
- {
617
- "epoch": 9.89,
618
- "learning_rate": 2.4072527472527475e-05,
619
- "loss": 0.36,
620
- "step": 900
621
- },
622
- {
623
- "epoch": 9.89,
624
- "eval_accuracy": 0.7294117647058823,
625
- "eval_loss": 0.7664376497268677,
626
- "eval_runtime": 1.6387,
627
- "eval_samples_per_second": 103.742,
628
- "eval_steps_per_second": 13.425,
629
- "step": 900
630
- },
631
- {
632
- "epoch": 10.0,
633
- "learning_rate": 2.4006593406593408e-05,
634
- "loss": 0.259,
635
- "step": 910
636
- },
637
- {
638
- "epoch": 10.11,
639
- "learning_rate": 2.3940659340659342e-05,
640
- "loss": 0.2984,
641
- "step": 920
642
- },
643
- {
644
- "epoch": 10.22,
645
- "learning_rate": 2.3874725274725276e-05,
646
- "loss": 0.2073,
647
- "step": 930
648
- },
649
- {
650
- "epoch": 10.33,
651
- "learning_rate": 2.380879120879121e-05,
652
- "loss": 0.3265,
653
- "step": 940
654
- },
655
- {
656
- "epoch": 10.44,
657
- "learning_rate": 2.3742857142857146e-05,
658
- "loss": 0.3372,
659
- "step": 950
660
- },
661
- {
662
- "epoch": 10.55,
663
- "learning_rate": 2.3676923076923077e-05,
664
- "loss": 0.3518,
665
- "step": 960
666
- },
667
- {
668
- "epoch": 10.66,
669
- "learning_rate": 2.361098901098901e-05,
670
- "loss": 0.2707,
671
- "step": 970
672
- },
673
- {
674
- "epoch": 10.77,
675
- "learning_rate": 2.3545054945054944e-05,
676
- "loss": 0.2529,
677
- "step": 980
678
- },
679
- {
680
- "epoch": 10.88,
681
- "learning_rate": 2.3479120879120878e-05,
682
- "loss": 0.258,
683
- "step": 990
684
- },
685
- {
686
- "epoch": 10.99,
687
- "learning_rate": 2.341978021978022e-05,
688
- "loss": 0.3184,
689
- "step": 1000
690
- },
691
- {
692
- "epoch": 10.99,
693
- "eval_accuracy": 0.7588235294117647,
694
- "eval_loss": 0.6806656718254089,
695
- "eval_runtime": 1.6209,
696
- "eval_samples_per_second": 104.882,
697
- "eval_steps_per_second": 13.573,
698
- "step": 1000
699
- },
700
- {
701
- "epoch": 11.1,
702
- "learning_rate": 2.3353846153846153e-05,
703
- "loss": 0.2439,
704
- "step": 1010
705
- },
706
- {
707
- "epoch": 11.21,
708
- "learning_rate": 2.3287912087912087e-05,
709
- "loss": 0.1827,
710
- "step": 1020
711
- },
712
- {
713
- "epoch": 11.32,
714
- "learning_rate": 2.3221978021978024e-05,
715
- "loss": 0.2845,
716
- "step": 1030
717
- },
718
- {
719
- "epoch": 11.43,
720
- "learning_rate": 2.3156043956043957e-05,
721
- "loss": 0.2503,
722
- "step": 1040
723
- },
724
- {
725
- "epoch": 11.54,
726
- "learning_rate": 2.309010989010989e-05,
727
- "loss": 0.2009,
728
- "step": 1050
729
- },
730
- {
731
- "epoch": 11.65,
732
- "learning_rate": 2.3024175824175825e-05,
733
- "loss": 0.2486,
734
- "step": 1060
735
- },
736
- {
737
- "epoch": 11.76,
738
- "learning_rate": 2.295824175824176e-05,
739
- "loss": 0.1877,
740
- "step": 1070
741
- },
742
- {
743
- "epoch": 11.87,
744
- "learning_rate": 2.2892307692307692e-05,
745
- "loss": 0.3497,
746
- "step": 1080
747
- },
748
- {
749
- "epoch": 11.98,
750
- "learning_rate": 2.282637362637363e-05,
751
- "loss": 0.2559,
752
- "step": 1090
753
- },
754
- {
755
- "epoch": 12.09,
756
- "learning_rate": 2.2760439560439563e-05,
757
- "loss": 0.2815,
758
- "step": 1100
759
- },
760
- {
761
- "epoch": 12.09,
762
- "eval_accuracy": 0.7352941176470589,
763
- "eval_loss": 0.7407693266868591,
764
- "eval_runtime": 1.6974,
765
- "eval_samples_per_second": 100.156,
766
- "eval_steps_per_second": 12.961,
767
- "step": 1100
768
- },
769
- {
770
- "epoch": 12.2,
771
- "learning_rate": 2.2694505494505497e-05,
772
- "loss": 0.2352,
773
- "step": 1110
774
- },
775
- {
776
- "epoch": 12.31,
777
- "learning_rate": 2.262857142857143e-05,
778
- "loss": 0.2422,
779
- "step": 1120
780
- },
781
- {
782
- "epoch": 12.42,
783
- "learning_rate": 2.256263736263736e-05,
784
- "loss": 0.2287,
785
- "step": 1130
786
- },
787
- {
788
- "epoch": 12.53,
789
- "learning_rate": 2.2496703296703298e-05,
790
- "loss": 0.282,
791
- "step": 1140
792
- },
793
- {
794
- "epoch": 12.64,
795
- "learning_rate": 2.243076923076923e-05,
796
- "loss": 0.2579,
797
- "step": 1150
798
- },
799
- {
800
- "epoch": 12.75,
801
- "learning_rate": 2.2364835164835165e-05,
802
- "loss": 0.2907,
803
- "step": 1160
804
- },
805
- {
806
- "epoch": 12.86,
807
- "learning_rate": 2.22989010989011e-05,
808
- "loss": 0.3221,
809
- "step": 1170
810
- },
811
- {
812
- "epoch": 12.97,
813
- "learning_rate": 2.2232967032967032e-05,
814
- "loss": 0.2657,
815
- "step": 1180
816
- },
817
- {
818
- "epoch": 13.08,
819
- "learning_rate": 2.2167032967032966e-05,
820
- "loss": 0.2456,
821
- "step": 1190
822
- },
823
- {
824
- "epoch": 13.19,
825
- "learning_rate": 2.2101098901098903e-05,
826
- "loss": 0.1745,
827
- "step": 1200
828
- },
829
- {
830
- "epoch": 13.19,
831
- "eval_accuracy": 0.7294117647058823,
832
- "eval_loss": 0.7527948617935181,
833
- "eval_runtime": 1.6243,
834
- "eval_samples_per_second": 104.661,
835
- "eval_steps_per_second": 13.544,
836
- "step": 1200
837
- },
838
- {
839
- "epoch": 13.3,
840
- "learning_rate": 2.2035164835164837e-05,
841
- "loss": 0.202,
842
- "step": 1210
843
- },
844
- {
845
- "epoch": 13.41,
846
- "learning_rate": 2.196923076923077e-05,
847
- "loss": 0.1601,
848
- "step": 1220
849
- },
850
- {
851
- "epoch": 13.52,
852
- "learning_rate": 2.1903296703296704e-05,
853
- "loss": 0.1467,
854
- "step": 1230
855
- },
856
- {
857
- "epoch": 13.63,
858
- "learning_rate": 2.1837362637362638e-05,
859
- "loss": 0.287,
860
- "step": 1240
861
- },
862
- {
863
- "epoch": 13.74,
864
- "learning_rate": 2.177142857142857e-05,
865
- "loss": 0.2805,
866
- "step": 1250
867
- },
868
- {
869
- "epoch": 13.85,
870
- "learning_rate": 2.170549450549451e-05,
871
- "loss": 0.2405,
872
- "step": 1260
873
- },
874
- {
875
- "epoch": 13.96,
876
- "learning_rate": 2.163956043956044e-05,
877
- "loss": 0.3035,
878
- "step": 1270
879
- },
880
- {
881
- "epoch": 14.07,
882
- "learning_rate": 2.1573626373626373e-05,
883
- "loss": 0.2273,
884
- "step": 1280
885
- },
886
- {
887
- "epoch": 14.18,
888
- "learning_rate": 2.1507692307692306e-05,
889
- "loss": 0.1933,
890
- "step": 1290
891
- },
892
- {
893
- "epoch": 14.29,
894
- "learning_rate": 2.144175824175824e-05,
895
- "loss": 0.1894,
896
- "step": 1300
897
- },
898
- {
899
- "epoch": 14.29,
900
- "eval_accuracy": 0.7470588235294118,
901
- "eval_loss": 0.7634099721908569,
902
- "eval_runtime": 1.6355,
903
- "eval_samples_per_second": 103.947,
904
- "eval_steps_per_second": 13.452,
905
- "step": 1300
906
- },
907
- {
908
- "epoch": 14.4,
909
- "learning_rate": 2.1375824175824177e-05,
910
- "loss": 0.1862,
911
- "step": 1310
912
- },
913
- {
914
- "epoch": 14.51,
915
- "learning_rate": 2.130989010989011e-05,
916
- "loss": 0.258,
917
- "step": 1320
918
- },
919
- {
920
- "epoch": 14.62,
921
- "learning_rate": 2.1243956043956045e-05,
922
- "loss": 0.231,
923
- "step": 1330
924
- },
925
- {
926
- "epoch": 14.73,
927
- "learning_rate": 2.1178021978021978e-05,
928
- "loss": 0.2558,
929
- "step": 1340
930
- },
931
- {
932
- "epoch": 14.84,
933
- "learning_rate": 2.1112087912087912e-05,
934
- "loss": 0.1848,
935
- "step": 1350
936
- },
937
- {
938
- "epoch": 14.95,
939
- "learning_rate": 2.1046153846153846e-05,
940
- "loss": 0.2495,
941
- "step": 1360
942
- },
943
- {
944
- "epoch": 15.05,
945
- "learning_rate": 2.0980219780219783e-05,
946
- "loss": 0.2079,
947
- "step": 1370
948
- },
949
- {
950
- "epoch": 15.16,
951
- "learning_rate": 2.0914285714285716e-05,
952
- "loss": 0.1803,
953
- "step": 1380
954
- },
955
- {
956
- "epoch": 15.27,
957
- "learning_rate": 2.084835164835165e-05,
958
- "loss": 0.2434,
959
- "step": 1390
960
- },
961
- {
962
- "epoch": 15.38,
963
- "learning_rate": 2.0782417582417584e-05,
964
- "loss": 0.1641,
965
- "step": 1400
966
- },
967
- {
968
- "epoch": 15.38,
969
- "eval_accuracy": 0.7647058823529411,
970
- "eval_loss": 0.7208631634712219,
971
- "eval_runtime": 1.7468,
972
- "eval_samples_per_second": 97.323,
973
- "eval_steps_per_second": 12.595,
974
- "step": 1400
975
- },
976
- {
977
- "epoch": 15.49,
978
- "learning_rate": 2.0716483516483514e-05,
979
- "loss": 0.2347,
980
- "step": 1410
981
- },
982
- {
983
- "epoch": 15.6,
984
- "learning_rate": 2.065054945054945e-05,
985
- "loss": 0.1929,
986
- "step": 1420
987
- },
988
- {
989
- "epoch": 15.71,
990
- "learning_rate": 2.0584615384615385e-05,
991
- "loss": 0.1588,
992
- "step": 1430
993
- },
994
- {
995
- "epoch": 15.82,
996
- "learning_rate": 2.051868131868132e-05,
997
- "loss": 0.2216,
998
- "step": 1440
999
- },
1000
- {
1001
- "epoch": 15.93,
1002
- "learning_rate": 2.0452747252747252e-05,
1003
- "loss": 0.1755,
1004
- "step": 1450
1005
- },
1006
- {
1007
- "epoch": 16.04,
1008
- "learning_rate": 2.0386813186813186e-05,
1009
- "loss": 0.2623,
1010
- "step": 1460
1011
- },
1012
- {
1013
- "epoch": 16.15,
1014
- "learning_rate": 2.032087912087912e-05,
1015
- "loss": 0.2498,
1016
- "step": 1470
1017
- },
1018
- {
1019
- "epoch": 16.26,
1020
- "learning_rate": 2.0254945054945057e-05,
1021
- "loss": 0.2103,
1022
- "step": 1480
1023
- },
1024
- {
1025
- "epoch": 16.37,
1026
- "learning_rate": 2.018901098901099e-05,
1027
- "loss": 0.1713,
1028
- "step": 1490
1029
- },
1030
- {
1031
- "epoch": 16.48,
1032
- "learning_rate": 2.0123076923076924e-05,
1033
- "loss": 0.1932,
1034
- "step": 1500
1035
- },
1036
- {
1037
- "epoch": 16.48,
1038
- "eval_accuracy": 0.7,
1039
- "eval_loss": 0.9090902805328369,
1040
- "eval_runtime": 1.6369,
1041
- "eval_samples_per_second": 103.854,
1042
- "eval_steps_per_second": 13.44,
1043
- "step": 1500
1044
- },
1045
- {
1046
- "epoch": 16.59,
1047
- "learning_rate": 2.0057142857142858e-05,
1048
- "loss": 0.256,
1049
- "step": 1510
1050
- },
1051
- {
1052
- "epoch": 16.7,
1053
- "learning_rate": 1.999120879120879e-05,
1054
- "loss": 0.1954,
1055
- "step": 1520
1056
- },
1057
- {
1058
- "epoch": 16.81,
1059
- "learning_rate": 1.992527472527473e-05,
1060
- "loss": 0.2644,
1061
- "step": 1530
1062
- },
1063
- {
1064
- "epoch": 16.92,
1065
- "learning_rate": 1.9859340659340662e-05,
1066
- "loss": 0.2049,
1067
- "step": 1540
1068
- },
1069
- {
1070
- "epoch": 17.03,
1071
- "learning_rate": 1.9793406593406596e-05,
1072
- "loss": 0.1822,
1073
- "step": 1550
1074
- },
1075
- {
1076
- "epoch": 17.14,
1077
- "learning_rate": 1.9727472527472526e-05,
1078
- "loss": 0.1925,
1079
- "step": 1560
1080
- },
1081
- {
1082
- "epoch": 17.25,
1083
- "learning_rate": 1.966153846153846e-05,
1084
- "loss": 0.2075,
1085
- "step": 1570
1086
- },
1087
- {
1088
- "epoch": 17.36,
1089
- "learning_rate": 1.9595604395604394e-05,
1090
- "loss": 0.2604,
1091
- "step": 1580
1092
- },
1093
- {
1094
- "epoch": 17.47,
1095
- "learning_rate": 1.952967032967033e-05,
1096
- "loss": 0.1662,
1097
- "step": 1590
1098
- },
1099
- {
1100
- "epoch": 17.58,
1101
- "learning_rate": 1.9463736263736264e-05,
1102
- "loss": 0.1609,
1103
- "step": 1600
1104
- },
1105
- {
1106
- "epoch": 17.58,
1107
- "eval_accuracy": 0.7588235294117647,
1108
- "eval_loss": 0.7208409309387207,
1109
- "eval_runtime": 1.6376,
1110
- "eval_samples_per_second": 103.809,
1111
- "eval_steps_per_second": 13.434,
1112
- "step": 1600
1113
- },
1114
- {
1115
- "epoch": 17.69,
1116
- "learning_rate": 1.9397802197802198e-05,
1117
- "loss": 0.1817,
1118
- "step": 1610
1119
- },
1120
- {
1121
- "epoch": 17.8,
1122
- "learning_rate": 1.9331868131868132e-05,
1123
- "loss": 0.2423,
1124
- "step": 1620
1125
- },
1126
- {
1127
- "epoch": 17.91,
1128
- "learning_rate": 1.9265934065934065e-05,
1129
- "loss": 0.1722,
1130
- "step": 1630
1131
- },
1132
- {
1133
- "epoch": 18.02,
1134
- "learning_rate": 1.9200000000000003e-05,
1135
- "loss": 0.1544,
1136
- "step": 1640
1137
- },
1138
- {
1139
- "epoch": 18.13,
1140
- "learning_rate": 1.9134065934065936e-05,
1141
- "loss": 0.1976,
1142
- "step": 1650
1143
- },
1144
- {
1145
- "epoch": 18.24,
1146
- "learning_rate": 1.906813186813187e-05,
1147
- "loss": 0.2191,
1148
- "step": 1660
1149
- },
1150
- {
1151
- "epoch": 18.35,
1152
- "learning_rate": 1.9002197802197804e-05,
1153
- "loss": 0.1458,
1154
- "step": 1670
1155
- },
1156
- {
1157
- "epoch": 18.46,
1158
- "learning_rate": 1.8936263736263737e-05,
1159
- "loss": 0.2027,
1160
- "step": 1680
1161
- },
1162
- {
1163
- "epoch": 18.57,
1164
- "learning_rate": 1.887032967032967e-05,
1165
- "loss": 0.1637,
1166
- "step": 1690
1167
- },
1168
- {
1169
- "epoch": 18.68,
1170
- "learning_rate": 1.8804395604395605e-05,
1171
- "loss": 0.132,
1172
- "step": 1700
1173
- },
1174
- {
1175
- "epoch": 18.68,
1176
- "eval_accuracy": 0.7588235294117647,
1177
- "eval_loss": 0.8486713171005249,
1178
- "eval_runtime": 1.6357,
1179
- "eval_samples_per_second": 103.93,
1180
- "eval_steps_per_second": 13.45,
1181
- "step": 1700
1182
- },
1183
- {
1184
- "epoch": 18.79,
1185
- "learning_rate": 1.873846153846154e-05,
1186
- "loss": 0.1319,
1187
- "step": 1710
1188
- },
1189
- {
1190
- "epoch": 18.9,
1191
- "learning_rate": 1.8672527472527472e-05,
1192
- "loss": 0.2005,
1193
- "step": 1720
1194
- },
1195
- {
1196
- "epoch": 19.01,
1197
- "learning_rate": 1.8606593406593406e-05,
1198
- "loss": 0.2234,
1199
- "step": 1730
1200
- },
1201
- {
1202
- "epoch": 19.12,
1203
- "learning_rate": 1.854065934065934e-05,
1204
- "loss": 0.188,
1205
- "step": 1740
1206
- },
1207
- {
1208
- "epoch": 19.23,
1209
- "learning_rate": 1.8474725274725277e-05,
1210
- "loss": 0.1322,
1211
- "step": 1750
1212
- },
1213
- {
1214
- "epoch": 19.34,
1215
- "learning_rate": 1.840879120879121e-05,
1216
- "loss": 0.2137,
1217
- "step": 1760
1218
- },
1219
- {
1220
- "epoch": 19.45,
1221
- "learning_rate": 1.8342857142857144e-05,
1222
- "loss": 0.2314,
1223
- "step": 1770
1224
- },
1225
- {
1226
- "epoch": 19.56,
1227
- "learning_rate": 1.8276923076923078e-05,
1228
- "loss": 0.1987,
1229
- "step": 1780
1230
- },
1231
- {
1232
- "epoch": 19.67,
1233
- "learning_rate": 1.821098901098901e-05,
1234
- "loss": 0.1955,
1235
- "step": 1790
1236
- },
1237
- {
1238
- "epoch": 19.78,
1239
- "learning_rate": 1.8145054945054945e-05,
1240
- "loss": 0.1903,
1241
- "step": 1800
1242
- },
1243
- {
1244
- "epoch": 19.78,
1245
- "eval_accuracy": 0.7470588235294118,
1246
- "eval_loss": 0.7911531329154968,
1247
- "eval_runtime": 1.6371,
1248
- "eval_samples_per_second": 103.842,
1249
- "eval_steps_per_second": 13.438,
1250
- "step": 1800
1251
- },
1252
- {
1253
- "epoch": 19.89,
1254
- "learning_rate": 1.8079120879120882e-05,
1255
- "loss": 0.1333,
1256
- "step": 1810
1257
- },
1258
- {
1259
- "epoch": 20.0,
1260
- "learning_rate": 1.8013186813186816e-05,
1261
- "loss": 0.2119,
1262
- "step": 1820
1263
- },
1264
- {
1265
- "epoch": 20.11,
1266
- "learning_rate": 1.794725274725275e-05,
1267
- "loss": 0.1784,
1268
- "step": 1830
1269
- },
1270
- {
1271
- "epoch": 20.22,
1272
- "learning_rate": 1.788131868131868e-05,
1273
- "loss": 0.1821,
1274
- "step": 1840
1275
- },
1276
- {
1277
- "epoch": 20.33,
1278
- "learning_rate": 1.7815384615384613e-05,
1279
- "loss": 0.2508,
1280
- "step": 1850
1281
- },
1282
- {
1283
- "epoch": 20.44,
1284
- "learning_rate": 1.7749450549450547e-05,
1285
- "loss": 0.1599,
1286
- "step": 1860
1287
- },
1288
- {
1289
- "epoch": 20.55,
1290
- "learning_rate": 1.7683516483516484e-05,
1291
- "loss": 0.133,
1292
- "step": 1870
1293
- },
1294
- {
1295
- "epoch": 20.66,
1296
- "learning_rate": 1.7617582417582418e-05,
1297
- "loss": 0.1344,
1298
- "step": 1880
1299
- },
1300
- {
1301
- "epoch": 20.77,
1302
- "learning_rate": 1.755164835164835e-05,
1303
- "loss": 0.163,
1304
- "step": 1890
1305
- },
1306
- {
1307
- "epoch": 20.88,
1308
- "learning_rate": 1.7485714285714285e-05,
1309
- "loss": 0.121,
1310
- "step": 1900
1311
- },
1312
- {
1313
- "epoch": 20.88,
1314
- "eval_accuracy": 0.7470588235294118,
1315
- "eval_loss": 0.6734881401062012,
1316
- "eval_runtime": 1.6331,
1317
- "eval_samples_per_second": 104.095,
1318
- "eval_steps_per_second": 13.471,
1319
- "step": 1900
1320
- },
1321
- {
1322
- "epoch": 20.99,
1323
- "learning_rate": 1.741978021978022e-05,
1324
- "loss": 0.1984,
1325
- "step": 1910
1326
- },
1327
- {
1328
- "epoch": 21.1,
1329
- "learning_rate": 1.7353846153846156e-05,
1330
- "loss": 0.15,
1331
- "step": 1920
1332
- },
1333
- {
1334
- "epoch": 21.21,
1335
- "learning_rate": 1.728791208791209e-05,
1336
- "loss": 0.11,
1337
- "step": 1930
1338
- },
1339
- {
1340
- "epoch": 21.32,
1341
- "learning_rate": 1.7221978021978023e-05,
1342
- "loss": 0.1753,
1343
- "step": 1940
1344
- },
1345
- {
1346
- "epoch": 21.43,
1347
- "learning_rate": 1.7156043956043957e-05,
1348
- "loss": 0.1535,
1349
- "step": 1950
1350
- },
1351
- {
1352
- "epoch": 21.54,
1353
- "learning_rate": 1.709010989010989e-05,
1354
- "loss": 0.1269,
1355
- "step": 1960
1356
- },
1357
- {
1358
- "epoch": 21.65,
1359
- "learning_rate": 1.7024175824175825e-05,
1360
- "loss": 0.1711,
1361
- "step": 1970
1362
- },
1363
- {
1364
- "epoch": 21.76,
1365
- "learning_rate": 1.6958241758241758e-05,
1366
- "loss": 0.1871,
1367
- "step": 1980
1368
- },
1369
- {
1370
- "epoch": 21.87,
1371
- "learning_rate": 1.6892307692307692e-05,
1372
- "loss": 0.1544,
1373
- "step": 1990
1374
- },
1375
- {
1376
- "epoch": 21.98,
1377
- "learning_rate": 1.6826373626373626e-05,
1378
- "loss": 0.1903,
1379
- "step": 2000
1380
- },
1381
- {
1382
- "epoch": 21.98,
1383
- "eval_accuracy": 0.7823529411764706,
1384
- "eval_loss": 0.6692019104957581,
1385
- "eval_runtime": 1.6545,
1386
- "eval_samples_per_second": 102.753,
1387
- "eval_steps_per_second": 13.297,
1388
- "step": 2000
1389
- },
1390
- {
1391
- "epoch": 22.09,
1392
- "learning_rate": 1.676043956043956e-05,
1393
- "loss": 0.1258,
1394
- "step": 2010
1395
- },
1396
- {
1397
- "epoch": 22.2,
1398
- "learning_rate": 1.6694505494505493e-05,
1399
- "loss": 0.1057,
1400
- "step": 2020
1401
- },
1402
- {
1403
- "epoch": 22.31,
1404
- "learning_rate": 1.662857142857143e-05,
1405
- "loss": 0.0954,
1406
- "step": 2030
1407
- },
1408
- {
1409
- "epoch": 22.42,
1410
- "learning_rate": 1.6562637362637364e-05,
1411
- "loss": 0.2413,
1412
- "step": 2040
1413
- },
1414
- {
1415
- "epoch": 22.53,
1416
- "learning_rate": 1.6496703296703297e-05,
1417
- "loss": 0.1073,
1418
- "step": 2050
1419
- },
1420
- {
1421
- "epoch": 22.64,
1422
- "learning_rate": 1.643076923076923e-05,
1423
- "loss": 0.2491,
1424
- "step": 2060
1425
- },
1426
- {
1427
- "epoch": 22.75,
1428
- "learning_rate": 1.6364835164835165e-05,
1429
- "loss": 0.1972,
1430
- "step": 2070
1431
- },
1432
- {
1433
- "epoch": 22.86,
1434
- "learning_rate": 1.62989010989011e-05,
1435
- "loss": 0.1852,
1436
- "step": 2080
1437
- },
1438
- {
1439
- "epoch": 22.97,
1440
- "learning_rate": 1.6232967032967036e-05,
1441
- "loss": 0.135,
1442
- "step": 2090
1443
- },
1444
- {
1445
- "epoch": 23.08,
1446
- "learning_rate": 1.616703296703297e-05,
1447
- "loss": 0.176,
1448
- "step": 2100
1449
- },
1450
- {
1451
- "epoch": 23.08,
1452
- "eval_accuracy": 0.7176470588235294,
1453
- "eval_loss": 0.8350964784622192,
1454
- "eval_runtime": 1.6244,
1455
- "eval_samples_per_second": 104.652,
1456
- "eval_steps_per_second": 13.543,
1457
- "step": 2100
1458
- },
1459
- {
1460
- "epoch": 23.19,
1461
- "learning_rate": 1.6101098901098903e-05,
1462
- "loss": 0.1485,
1463
- "step": 2110
1464
- },
1465
- {
1466
- "epoch": 23.3,
1467
- "learning_rate": 1.6035164835164837e-05,
1468
- "loss": 0.1436,
1469
- "step": 2120
1470
- },
1471
- {
1472
- "epoch": 23.41,
1473
- "learning_rate": 1.5969230769230767e-05,
1474
- "loss": 0.1783,
1475
- "step": 2130
1476
- },
1477
- {
1478
- "epoch": 23.52,
1479
- "learning_rate": 1.5903296703296704e-05,
1480
- "loss": 0.1655,
1481
- "step": 2140
1482
- },
1483
- {
1484
- "epoch": 23.63,
1485
- "learning_rate": 1.5837362637362638e-05,
1486
- "loss": 0.1274,
1487
- "step": 2150
1488
- },
1489
- {
1490
- "epoch": 23.74,
1491
- "learning_rate": 1.577142857142857e-05,
1492
- "loss": 0.1261,
1493
- "step": 2160
1494
- },
1495
- {
1496
- "epoch": 23.85,
1497
- "learning_rate": 1.5705494505494505e-05,
1498
- "loss": 0.2141,
1499
- "step": 2170
1500
- },
1501
- {
1502
- "epoch": 23.96,
1503
- "learning_rate": 1.563956043956044e-05,
1504
- "loss": 0.2215,
1505
- "step": 2180
1506
- },
1507
- {
1508
- "epoch": 24.07,
1509
- "learning_rate": 1.5573626373626373e-05,
1510
- "loss": 0.2243,
1511
- "step": 2190
1512
- },
1513
- {
1514
- "epoch": 24.18,
1515
- "learning_rate": 1.550769230769231e-05,
1516
- "loss": 0.1186,
1517
- "step": 2200
1518
- },
1519
- {
1520
- "epoch": 24.18,
1521
- "eval_accuracy": 0.7470588235294118,
1522
- "eval_loss": 0.7317846417427063,
1523
- "eval_runtime": 1.63,
1524
- "eval_samples_per_second": 104.295,
1525
- "eval_steps_per_second": 13.497,
1526
- "step": 2200
1527
- },
1528
- {
1529
- "epoch": 24.29,
1530
- "learning_rate": 1.5441758241758243e-05,
1531
- "loss": 0.2046,
1532
- "step": 2210
1533
- },
1534
- {
1535
- "epoch": 24.4,
1536
- "learning_rate": 1.5375824175824177e-05,
1537
- "loss": 0.1992,
1538
- "step": 2220
1539
- },
1540
- {
1541
- "epoch": 24.51,
1542
- "learning_rate": 1.530989010989011e-05,
1543
- "loss": 0.1289,
1544
- "step": 2230
1545
- },
1546
- {
1547
- "epoch": 24.62,
1548
- "learning_rate": 1.5243956043956046e-05,
1549
- "loss": 0.1033,
1550
- "step": 2240
1551
- },
1552
- {
1553
- "epoch": 24.73,
1554
- "learning_rate": 1.517802197802198e-05,
1555
- "loss": 0.2098,
1556
- "step": 2250
1557
- },
1558
- {
1559
- "epoch": 24.84,
1560
- "learning_rate": 1.5112087912087913e-05,
1561
- "loss": 0.0928,
1562
- "step": 2260
1563
- },
1564
- {
1565
- "epoch": 24.95,
1566
- "learning_rate": 1.5046153846153845e-05,
1567
- "loss": 0.1469,
1568
- "step": 2270
1569
- },
1570
- {
1571
- "epoch": 25.05,
1572
- "learning_rate": 1.498021978021978e-05,
1573
- "loss": 0.0645,
1574
- "step": 2280
1575
- },
1576
- {
1577
- "epoch": 25.16,
1578
- "learning_rate": 1.4914285714285715e-05,
1579
- "loss": 0.1786,
1580
- "step": 2290
1581
- },
1582
- {
1583
- "epoch": 25.27,
1584
- "learning_rate": 1.4848351648351648e-05,
1585
- "loss": 0.1424,
1586
- "step": 2300
1587
- },
1588
- {
1589
- "epoch": 25.27,
1590
- "eval_accuracy": 0.7588235294117647,
1591
- "eval_loss": 0.7859658598899841,
1592
- "eval_runtime": 1.6259,
1593
- "eval_samples_per_second": 104.556,
1594
- "eval_steps_per_second": 13.531,
1595
- "step": 2300
1596
- },
1597
- {
1598
- "epoch": 25.38,
1599
- "learning_rate": 1.4782417582417582e-05,
1600
- "loss": 0.0742,
1601
- "step": 2310
1602
- },
1603
- {
1604
- "epoch": 25.49,
1605
- "learning_rate": 1.4716483516483517e-05,
1606
- "loss": 0.1224,
1607
- "step": 2320
1608
- },
1609
- {
1610
- "epoch": 25.6,
1611
- "learning_rate": 1.4650549450549451e-05,
1612
- "loss": 0.1334,
1613
- "step": 2330
1614
- },
1615
- {
1616
- "epoch": 25.71,
1617
- "learning_rate": 1.4584615384615385e-05,
1618
- "loss": 0.1924,
1619
- "step": 2340
1620
- },
1621
- {
1622
- "epoch": 25.82,
1623
- "learning_rate": 1.451868131868132e-05,
1624
- "loss": 0.1386,
1625
- "step": 2350
1626
- },
1627
- {
1628
- "epoch": 25.93,
1629
- "learning_rate": 1.4452747252747254e-05,
1630
- "loss": 0.1982,
1631
- "step": 2360
1632
- },
1633
- {
1634
- "epoch": 26.04,
1635
- "learning_rate": 1.4386813186813186e-05,
1636
- "loss": 0.1341,
1637
- "step": 2370
1638
- },
1639
- {
1640
- "epoch": 26.15,
1641
- "learning_rate": 1.4320879120879121e-05,
1642
- "loss": 0.1664,
1643
- "step": 2380
1644
- },
1645
- {
1646
- "epoch": 26.26,
1647
- "learning_rate": 1.4254945054945055e-05,
1648
- "loss": 0.0903,
1649
- "step": 2390
1650
- },
1651
- {
1652
- "epoch": 26.37,
1653
- "learning_rate": 1.4189010989010989e-05,
1654
- "loss": 0.144,
1655
- "step": 2400
1656
- },
1657
- {
1658
- "epoch": 26.37,
1659
- "eval_accuracy": 0.788235294117647,
1660
- "eval_loss": 0.7021328806877136,
1661
- "eval_runtime": 1.618,
1662
- "eval_samples_per_second": 105.065,
1663
- "eval_steps_per_second": 13.597,
1664
- "step": 2400
1665
- },
1666
- {
1667
- "epoch": 26.48,
1668
- "learning_rate": 1.4123076923076924e-05,
1669
- "loss": 0.1747,
1670
- "step": 2410
1671
- },
1672
- {
1673
- "epoch": 26.59,
1674
- "learning_rate": 1.4057142857142858e-05,
1675
- "loss": 0.1655,
1676
- "step": 2420
1677
- },
1678
- {
1679
- "epoch": 26.7,
1680
- "learning_rate": 1.3991208791208793e-05,
1681
- "loss": 0.1003,
1682
- "step": 2430
1683
- },
1684
- {
1685
- "epoch": 26.81,
1686
- "learning_rate": 1.3925274725274727e-05,
1687
- "loss": 0.142,
1688
- "step": 2440
1689
- },
1690
- {
1691
- "epoch": 26.92,
1692
- "learning_rate": 1.3859340659340659e-05,
1693
- "loss": 0.089,
1694
- "step": 2450
1695
- },
1696
- {
1697
- "epoch": 27.03,
1698
- "learning_rate": 1.3793406593406594e-05,
1699
- "loss": 0.2091,
1700
- "step": 2460
1701
- },
1702
- {
1703
- "epoch": 27.14,
1704
- "learning_rate": 1.3727472527472528e-05,
1705
- "loss": 0.1794,
1706
- "step": 2470
1707
- },
1708
- {
1709
- "epoch": 27.25,
1710
- "learning_rate": 1.3661538461538461e-05,
1711
- "loss": 0.133,
1712
- "step": 2480
1713
- },
1714
- {
1715
- "epoch": 27.36,
1716
- "learning_rate": 1.3595604395604397e-05,
1717
- "loss": 0.1039,
1718
- "step": 2490
1719
- },
1720
- {
1721
- "epoch": 27.47,
1722
- "learning_rate": 1.352967032967033e-05,
1723
- "loss": 0.1088,
1724
- "step": 2500
1725
- },
1726
- {
1727
- "epoch": 27.47,
1728
- "eval_accuracy": 0.7470588235294118,
1729
- "eval_loss": 0.8109354972839355,
1730
- "eval_runtime": 1.7131,
1731
- "eval_samples_per_second": 99.233,
1732
- "eval_steps_per_second": 12.842,
1733
- "step": 2500
1734
- },
1735
- {
1736
- "epoch": 27.58,
1737
- "learning_rate": 1.3463736263736264e-05,
1738
- "loss": 0.1443,
1739
- "step": 2510
1740
- },
1741
- {
1742
- "epoch": 27.69,
1743
- "learning_rate": 1.3397802197802198e-05,
1744
- "loss": 0.1106,
1745
- "step": 2520
1746
- },
1747
- {
1748
- "epoch": 27.8,
1749
- "learning_rate": 1.3331868131868132e-05,
1750
- "loss": 0.0908,
1751
- "step": 2530
1752
- },
1753
- {
1754
- "epoch": 27.91,
1755
- "learning_rate": 1.3265934065934065e-05,
1756
- "loss": 0.1268,
1757
- "step": 2540
1758
- },
1759
- {
1760
- "epoch": 28.02,
1761
- "learning_rate": 1.32e-05,
1762
- "loss": 0.194,
1763
- "step": 2550
1764
- },
1765
- {
1766
- "epoch": 28.13,
1767
- "learning_rate": 1.3134065934065934e-05,
1768
- "loss": 0.1085,
1769
- "step": 2560
1770
- },
1771
- {
1772
- "epoch": 28.24,
1773
- "learning_rate": 1.306813186813187e-05,
1774
- "loss": 0.0856,
1775
- "step": 2570
1776
- },
1777
- {
1778
- "epoch": 28.35,
1779
- "learning_rate": 1.3002197802197803e-05,
1780
- "loss": 0.1157,
1781
- "step": 2580
1782
- },
1783
- {
1784
- "epoch": 28.46,
1785
- "learning_rate": 1.2936263736263735e-05,
1786
- "loss": 0.0941,
1787
- "step": 2590
1788
- },
1789
- {
1790
- "epoch": 28.57,
1791
- "learning_rate": 1.287032967032967e-05,
1792
- "loss": 0.1019,
1793
- "step": 2600
1794
- },
1795
- {
1796
- "epoch": 28.57,
1797
- "eval_accuracy": 0.7470588235294118,
1798
- "eval_loss": 0.8157252669334412,
1799
- "eval_runtime": 1.6087,
1800
- "eval_samples_per_second": 105.676,
1801
- "eval_steps_per_second": 13.676,
1802
- "step": 2600
1803
- },
1804
- {
1805
- "epoch": 28.68,
1806
- "learning_rate": 1.2804395604395605e-05,
1807
- "loss": 0.1466,
1808
- "step": 2610
1809
- },
1810
- {
1811
- "epoch": 28.79,
1812
- "learning_rate": 1.2738461538461538e-05,
1813
- "loss": 0.0661,
1814
- "step": 2620
1815
- },
1816
- {
1817
- "epoch": 28.9,
1818
- "learning_rate": 1.2672527472527474e-05,
1819
- "loss": 0.2147,
1820
- "step": 2630
1821
- },
1822
- {
1823
- "epoch": 29.01,
1824
- "learning_rate": 1.2606593406593407e-05,
1825
- "loss": 0.2561,
1826
- "step": 2640
1827
- },
1828
- {
1829
- "epoch": 29.12,
1830
- "learning_rate": 1.2540659340659341e-05,
1831
- "loss": 0.1408,
1832
- "step": 2650
1833
- },
1834
- {
1835
- "epoch": 29.23,
1836
- "learning_rate": 1.2474725274725275e-05,
1837
- "loss": 0.0896,
1838
- "step": 2660
1839
- },
1840
- {
1841
- "epoch": 29.34,
1842
- "learning_rate": 1.2408791208791208e-05,
1843
- "loss": 0.1356,
1844
- "step": 2670
1845
- },
1846
- {
1847
- "epoch": 29.45,
1848
- "learning_rate": 1.2342857142857144e-05,
1849
- "loss": 0.0914,
1850
- "step": 2680
1851
- },
1852
- {
1853
- "epoch": 29.56,
1854
- "learning_rate": 1.2276923076923077e-05,
1855
- "loss": 0.1355,
1856
- "step": 2690
1857
- },
1858
- {
1859
- "epoch": 29.67,
1860
- "learning_rate": 1.2210989010989011e-05,
1861
- "loss": 0.0947,
1862
- "step": 2700
1863
- },
1864
- {
1865
- "epoch": 29.67,
1866
- "eval_accuracy": 0.7588235294117647,
1867
- "eval_loss": 0.8027762174606323,
1868
- "eval_runtime": 1.6188,
1869
- "eval_samples_per_second": 105.014,
1870
- "eval_steps_per_second": 13.59,
1871
- "step": 2700
1872
- },
1873
- {
1874
- "epoch": 29.78,
1875
- "learning_rate": 1.2145054945054947e-05,
1876
- "loss": 0.0745,
1877
- "step": 2710
1878
- },
1879
- {
1880
- "epoch": 29.89,
1881
- "learning_rate": 1.207912087912088e-05,
1882
- "loss": 0.172,
1883
- "step": 2720
1884
- },
1885
- {
1886
- "epoch": 30.0,
1887
- "learning_rate": 1.2013186813186812e-05,
1888
- "loss": 0.1363,
1889
- "step": 2730
1890
- },
1891
- {
1892
- "epoch": 30.11,
1893
- "learning_rate": 1.1947252747252748e-05,
1894
- "loss": 0.1109,
1895
- "step": 2740
1896
- },
1897
- {
1898
- "epoch": 30.22,
1899
- "learning_rate": 1.1881318681318681e-05,
1900
- "loss": 0.0705,
1901
- "step": 2750
1902
- },
1903
- {
1904
- "epoch": 30.33,
1905
- "learning_rate": 1.1815384615384615e-05,
1906
- "loss": 0.1153,
1907
- "step": 2760
1908
- },
1909
- {
1910
- "epoch": 30.44,
1911
- "learning_rate": 1.174945054945055e-05,
1912
- "loss": 0.0552,
1913
- "step": 2770
1914
- },
1915
- {
1916
- "epoch": 30.55,
1917
- "learning_rate": 1.1683516483516484e-05,
1918
- "loss": 0.1828,
1919
- "step": 2780
1920
- },
1921
- {
1922
- "epoch": 30.66,
1923
- "learning_rate": 1.1617582417582418e-05,
1924
- "loss": 0.1159,
1925
- "step": 2790
1926
- },
1927
- {
1928
- "epoch": 30.77,
1929
- "learning_rate": 1.1551648351648351e-05,
1930
- "loss": 0.1715,
1931
- "step": 2800
1932
- },
1933
- {
1934
- "epoch": 30.77,
1935
- "eval_accuracy": 0.7470588235294118,
1936
- "eval_loss": 0.8344667553901672,
1937
- "eval_runtime": 1.6594,
1938
- "eval_samples_per_second": 102.448,
1939
- "eval_steps_per_second": 13.258,
1940
- "step": 2800
1941
- },
1942
- {
1943
- "epoch": 30.88,
1944
- "learning_rate": 1.1485714285714285e-05,
1945
- "loss": 0.2173,
1946
- "step": 2810
1947
- },
1948
- {
1949
- "epoch": 30.99,
1950
- "learning_rate": 1.141978021978022e-05,
1951
- "loss": 0.0776,
1952
- "step": 2820
1953
- },
1954
- {
1955
- "epoch": 31.1,
1956
- "learning_rate": 1.1353846153846154e-05,
1957
- "loss": 0.0849,
1958
- "step": 2830
1959
- },
1960
- {
1961
- "epoch": 31.21,
1962
- "learning_rate": 1.1287912087912088e-05,
1963
- "loss": 0.1367,
1964
- "step": 2840
1965
- },
1966
- {
1967
- "epoch": 31.32,
1968
- "learning_rate": 1.1221978021978023e-05,
1969
- "loss": 0.1146,
1970
- "step": 2850
1971
- },
1972
- {
1973
- "epoch": 31.43,
1974
- "learning_rate": 1.1156043956043957e-05,
1975
- "loss": 0.2185,
1976
- "step": 2860
1977
- },
1978
- {
1979
- "epoch": 31.54,
1980
- "learning_rate": 1.1090109890109889e-05,
1981
- "loss": 0.1426,
1982
- "step": 2870
1983
- },
1984
- {
1985
- "epoch": 31.65,
1986
- "learning_rate": 1.1024175824175824e-05,
1987
- "loss": 0.1934,
1988
- "step": 2880
1989
- },
1990
- {
1991
- "epoch": 31.76,
1992
- "learning_rate": 1.0958241758241758e-05,
1993
- "loss": 0.1535,
1994
- "step": 2890
1995
- },
1996
- {
1997
- "epoch": 31.87,
1998
- "learning_rate": 1.0892307692307692e-05,
1999
- "loss": 0.1046,
2000
- "step": 2900
2001
- },
2002
- {
2003
- "epoch": 31.87,
2004
- "eval_accuracy": 0.7411764705882353,
2005
- "eval_loss": 0.8577510118484497,
2006
- "eval_runtime": 1.6297,
2007
- "eval_samples_per_second": 104.313,
2008
- "eval_steps_per_second": 13.499,
2009
- "step": 2900
2010
- },
2011
- {
2012
- "epoch": 31.98,
2013
- "learning_rate": 1.0826373626373627e-05,
2014
- "loss": 0.1664,
2015
- "step": 2910
2016
- },
2017
- {
2018
- "epoch": 32.09,
2019
- "learning_rate": 1.076043956043956e-05,
2020
- "loss": 0.1792,
2021
- "step": 2920
2022
- },
2023
- {
2024
- "epoch": 32.2,
2025
- "learning_rate": 1.0694505494505496e-05,
2026
- "loss": 0.1166,
2027
- "step": 2930
2028
- },
2029
- {
2030
- "epoch": 32.31,
2031
- "learning_rate": 1.0628571428571428e-05,
2032
- "loss": 0.078,
2033
- "step": 2940
2034
- },
2035
- {
2036
- "epoch": 32.42,
2037
- "learning_rate": 1.0562637362637362e-05,
2038
- "loss": 0.0868,
2039
- "step": 2950
2040
- },
2041
- {
2042
- "epoch": 32.53,
2043
- "learning_rate": 1.0496703296703297e-05,
2044
- "loss": 0.0976,
2045
- "step": 2960
2046
- },
2047
- {
2048
- "epoch": 32.64,
2049
- "learning_rate": 1.0430769230769231e-05,
2050
- "loss": 0.2388,
2051
- "step": 2970
2052
- },
2053
- {
2054
- "epoch": 32.75,
2055
- "learning_rate": 1.0364835164835165e-05,
2056
- "loss": 0.1135,
2057
- "step": 2980
2058
- },
2059
- {
2060
- "epoch": 32.86,
2061
- "learning_rate": 1.02989010989011e-05,
2062
- "loss": 0.1377,
2063
- "step": 2990
2064
- },
2065
- {
2066
- "epoch": 32.97,
2067
- "learning_rate": 1.0232967032967034e-05,
2068
- "loss": 0.1367,
2069
- "step": 3000
2070
- },
2071
- {
2072
- "epoch": 32.97,
2073
- "eval_accuracy": 0.788235294117647,
2074
- "eval_loss": 0.7669554948806763,
2075
- "eval_runtime": 1.6159,
2076
- "eval_samples_per_second": 105.202,
2077
- "eval_steps_per_second": 13.614,
2078
- "step": 3000
2079
- },
2080
- {
2081
- "epoch": 33.08,
2082
- "learning_rate": 1.0167032967032966e-05,
2083
- "loss": 0.1076,
2084
- "step": 3010
2085
- },
2086
- {
2087
- "epoch": 33.19,
2088
- "learning_rate": 1.0101098901098901e-05,
2089
- "loss": 0.1596,
2090
- "step": 3020
2091
- },
2092
- {
2093
- "epoch": 33.3,
2094
- "learning_rate": 1.0035164835164835e-05,
2095
- "loss": 0.1152,
2096
- "step": 3030
2097
- },
2098
- {
2099
- "epoch": 33.41,
2100
- "learning_rate": 9.96923076923077e-06,
2101
- "loss": 0.1093,
2102
- "step": 3040
2103
- },
2104
- {
2105
- "epoch": 33.52,
2106
- "learning_rate": 9.903296703296704e-06,
2107
- "loss": 0.1465,
2108
- "step": 3050
2109
- },
2110
- {
2111
- "epoch": 33.63,
2112
- "learning_rate": 9.843956043956044e-06,
2113
- "loss": 0.1281,
2114
- "step": 3060
2115
- },
2116
- {
2117
- "epoch": 33.74,
2118
- "learning_rate": 9.778021978021979e-06,
2119
- "loss": 0.0871,
2120
- "step": 3070
2121
- },
2122
- {
2123
- "epoch": 33.85,
2124
- "learning_rate": 9.712087912087913e-06,
2125
- "loss": 0.0713,
2126
- "step": 3080
2127
- },
2128
- {
2129
- "epoch": 33.96,
2130
- "learning_rate": 9.646153846153846e-06,
2131
- "loss": 0.0605,
2132
- "step": 3090
2133
- },
2134
- {
2135
- "epoch": 34.07,
2136
- "learning_rate": 9.580219780219782e-06,
2137
- "loss": 0.1339,
2138
- "step": 3100
2139
- },
2140
- {
2141
- "epoch": 34.07,
2142
- "eval_accuracy": 0.7647058823529411,
2143
- "eval_loss": 0.776252269744873,
2144
- "eval_runtime": 1.6196,
2145
- "eval_samples_per_second": 104.967,
2146
- "eval_steps_per_second": 13.584,
2147
- "step": 3100
2148
- },
2149
- {
2150
- "epoch": 34.18,
2151
- "learning_rate": 9.514285714285714e-06,
2152
- "loss": 0.1543,
2153
- "step": 3110
2154
- },
2155
- {
2156
- "epoch": 34.29,
2157
- "learning_rate": 9.448351648351647e-06,
2158
- "loss": 0.0526,
2159
- "step": 3120
2160
- },
2161
- {
2162
- "epoch": 34.4,
2163
- "learning_rate": 9.382417582417583e-06,
2164
- "loss": 0.1342,
2165
- "step": 3130
2166
- },
2167
- {
2168
- "epoch": 34.51,
2169
- "learning_rate": 9.316483516483516e-06,
2170
- "loss": 0.0849,
2171
- "step": 3140
2172
- },
2173
- {
2174
- "epoch": 34.62,
2175
- "learning_rate": 9.25054945054945e-06,
2176
- "loss": 0.0898,
2177
- "step": 3150
2178
- },
2179
- {
2180
- "epoch": 34.73,
2181
- "learning_rate": 9.184615384615386e-06,
2182
- "loss": 0.1102,
2183
- "step": 3160
2184
- },
2185
- {
2186
- "epoch": 34.84,
2187
- "learning_rate": 9.11868131868132e-06,
2188
- "loss": 0.1245,
2189
- "step": 3170
2190
- },
2191
- {
2192
- "epoch": 34.95,
2193
- "learning_rate": 9.052747252747255e-06,
2194
- "loss": 0.1517,
2195
- "step": 3180
2196
- },
2197
- {
2198
- "epoch": 35.05,
2199
- "learning_rate": 8.986813186813187e-06,
2200
- "loss": 0.15,
2201
- "step": 3190
2202
- },
2203
- {
2204
- "epoch": 35.16,
2205
- "learning_rate": 8.92087912087912e-06,
2206
- "loss": 0.1194,
2207
- "step": 3200
2208
- },
2209
- {
2210
- "epoch": 35.16,
2211
- "eval_accuracy": 0.7705882352941177,
2212
- "eval_loss": 0.7726542949676514,
2213
- "eval_runtime": 1.6435,
2214
- "eval_samples_per_second": 103.438,
2215
- "eval_steps_per_second": 13.386,
2216
- "step": 3200
2217
- },
2218
- {
2219
- "epoch": 35.27,
2220
- "learning_rate": 8.854945054945056e-06,
2221
- "loss": 0.0909,
2222
- "step": 3210
2223
- },
2224
- {
2225
- "epoch": 35.38,
2226
- "learning_rate": 8.78901098901099e-06,
2227
- "loss": 0.0842,
2228
- "step": 3220
2229
- },
2230
- {
2231
- "epoch": 35.49,
2232
- "learning_rate": 8.723076923076923e-06,
2233
- "loss": 0.1693,
2234
- "step": 3230
2235
- },
2236
- {
2237
- "epoch": 35.6,
2238
- "learning_rate": 8.657142857142858e-06,
2239
- "loss": 0.1059,
2240
- "step": 3240
2241
- },
2242
- {
2243
- "epoch": 35.71,
2244
- "learning_rate": 8.591208791208792e-06,
2245
- "loss": 0.1426,
2246
- "step": 3250
2247
- },
2248
- {
2249
- "epoch": 35.82,
2250
- "learning_rate": 8.525274725274724e-06,
2251
- "loss": 0.1127,
2252
- "step": 3260
2253
- },
2254
- {
2255
- "epoch": 35.93,
2256
- "learning_rate": 8.45934065934066e-06,
2257
- "loss": 0.0513,
2258
- "step": 3270
2259
- },
2260
- {
2261
- "epoch": 36.04,
2262
- "learning_rate": 8.393406593406593e-06,
2263
- "loss": 0.1005,
2264
- "step": 3280
2265
- },
2266
- {
2267
- "epoch": 36.15,
2268
- "learning_rate": 8.327472527472527e-06,
2269
- "loss": 0.0906,
2270
- "step": 3290
2271
- },
2272
- {
2273
- "epoch": 36.26,
2274
- "learning_rate": 8.261538461538462e-06,
2275
- "loss": 0.151,
2276
- "step": 3300
2277
- },
2278
- {
2279
- "epoch": 36.26,
2280
- "eval_accuracy": 0.7470588235294118,
2281
- "eval_loss": 0.8271679878234863,
2282
- "eval_runtime": 1.8428,
2283
- "eval_samples_per_second": 92.249,
2284
- "eval_steps_per_second": 11.938,
2285
- "step": 3300
2286
- },
2287
- {
2288
- "epoch": 36.37,
2289
- "learning_rate": 8.195604395604396e-06,
2290
- "loss": 0.0569,
2291
- "step": 3310
2292
- },
2293
- {
2294
- "epoch": 36.48,
2295
- "learning_rate": 8.129670329670331e-06,
2296
- "loss": 0.1806,
2297
- "step": 3320
2298
- },
2299
- {
2300
- "epoch": 36.59,
2301
- "learning_rate": 8.063736263736263e-06,
2302
- "loss": 0.1399,
2303
- "step": 3330
2304
- },
2305
- {
2306
- "epoch": 36.7,
2307
- "learning_rate": 7.997802197802197e-06,
2308
- "loss": 0.1186,
2309
- "step": 3340
2310
- },
2311
- {
2312
- "epoch": 36.81,
2313
- "learning_rate": 7.931868131868132e-06,
2314
- "loss": 0.0971,
2315
- "step": 3350
2316
- },
2317
- {
2318
- "epoch": 36.92,
2319
- "learning_rate": 7.865934065934066e-06,
2320
- "loss": 0.1759,
2321
- "step": 3360
2322
- },
2323
- {
2324
- "epoch": 37.03,
2325
- "learning_rate": 7.8e-06,
2326
- "loss": 0.0609,
2327
- "step": 3370
2328
- },
2329
- {
2330
- "epoch": 37.14,
2331
- "learning_rate": 7.734065934065935e-06,
2332
- "loss": 0.101,
2333
- "step": 3380
2334
- },
2335
- {
2336
- "epoch": 37.25,
2337
- "learning_rate": 7.668131868131869e-06,
2338
- "loss": 0.0826,
2339
- "step": 3390
2340
- },
2341
- {
2342
- "epoch": 37.36,
2343
- "learning_rate": 7.602197802197802e-06,
2344
- "loss": 0.0646,
2345
- "step": 3400
2346
- },
2347
- {
2348
- "epoch": 37.36,
2349
- "eval_accuracy": 0.7764705882352941,
2350
- "eval_loss": 0.7721197009086609,
2351
- "eval_runtime": 1.6753,
2352
- "eval_samples_per_second": 101.472,
2353
- "eval_steps_per_second": 13.132,
2354
- "step": 3400
2355
- },
2356
- {
2357
- "epoch": 37.47,
2358
- "learning_rate": 7.536263736263736e-06,
2359
- "loss": 0.1707,
2360
- "step": 3410
2361
- },
2362
- {
2363
- "epoch": 37.58,
2364
- "learning_rate": 7.47032967032967e-06,
2365
- "loss": 0.114,
2366
- "step": 3420
2367
- },
2368
- {
2369
- "epoch": 37.69,
2370
- "learning_rate": 7.4043956043956046e-06,
2371
- "loss": 0.1188,
2372
- "step": 3430
2373
- },
2374
- {
2375
- "epoch": 37.8,
2376
- "learning_rate": 7.338461538461539e-06,
2377
- "loss": 0.1788,
2378
- "step": 3440
2379
- },
2380
- {
2381
- "epoch": 37.91,
2382
- "learning_rate": 7.272527472527472e-06,
2383
- "loss": 0.1241,
2384
- "step": 3450
2385
- },
2386
- {
2387
- "epoch": 38.02,
2388
- "learning_rate": 7.2065934065934065e-06,
2389
- "loss": 0.1308,
2390
- "step": 3460
2391
- },
2392
- {
2393
- "epoch": 38.13,
2394
- "learning_rate": 7.140659340659341e-06,
2395
- "loss": 0.1154,
2396
- "step": 3470
2397
- },
2398
- {
2399
- "epoch": 38.24,
2400
- "learning_rate": 7.0747252747252756e-06,
2401
- "loss": 0.1787,
2402
- "step": 3480
2403
- },
2404
- {
2405
- "epoch": 38.35,
2406
- "learning_rate": 7.008791208791208e-06,
2407
- "loss": 0.1262,
2408
- "step": 3490
2409
- },
2410
- {
2411
- "epoch": 38.46,
2412
- "learning_rate": 6.942857142857143e-06,
2413
- "loss": 0.0801,
2414
- "step": 3500
2415
- },
2416
- {
2417
- "epoch": 38.46,
2418
- "eval_accuracy": 0.7529411764705882,
2419
- "eval_loss": 0.8170506358146667,
2420
- "eval_runtime": 1.6607,
2421
- "eval_samples_per_second": 102.365,
2422
- "eval_steps_per_second": 13.247,
2423
- "step": 3500
2424
- },
2425
- {
2426
- "epoch": 38.57,
2427
- "learning_rate": 6.8769230769230775e-06,
2428
- "loss": 0.098,
2429
- "step": 3510
2430
- },
2431
- {
2432
- "epoch": 38.68,
2433
- "learning_rate": 6.810989010989011e-06,
2434
- "loss": 0.1109,
2435
- "step": 3520
2436
- },
2437
- {
2438
- "epoch": 38.79,
2439
- "learning_rate": 6.745054945054945e-06,
2440
- "loss": 0.1078,
2441
- "step": 3530
2442
- },
2443
- {
2444
- "epoch": 38.9,
2445
- "learning_rate": 6.679120879120879e-06,
2446
- "loss": 0.0767,
2447
- "step": 3540
2448
- },
2449
- {
2450
- "epoch": 39.01,
2451
- "learning_rate": 6.613186813186814e-06,
2452
- "loss": 0.0886,
2453
- "step": 3550
2454
- },
2455
- {
2456
- "epoch": 39.12,
2457
- "learning_rate": 6.547252747252747e-06,
2458
- "loss": 0.0631,
2459
- "step": 3560
2460
- },
2461
- {
2462
- "epoch": 39.23,
2463
- "learning_rate": 6.481318681318681e-06,
2464
- "loss": 0.0737,
2465
- "step": 3570
2466
- },
2467
- {
2468
- "epoch": 39.34,
2469
- "learning_rate": 6.415384615384616e-06,
2470
- "loss": 0.1207,
2471
- "step": 3580
2472
- },
2473
- {
2474
- "epoch": 39.45,
2475
- "learning_rate": 6.3494505494505496e-06,
2476
- "loss": 0.0831,
2477
- "step": 3590
2478
- },
2479
- {
2480
- "epoch": 39.56,
2481
- "learning_rate": 6.283516483516483e-06,
2482
- "loss": 0.1038,
2483
- "step": 3600
2484
- },
2485
- {
2486
- "epoch": 39.56,
2487
- "eval_accuracy": 0.7058823529411765,
2488
- "eval_loss": 0.9464375376701355,
2489
- "eval_runtime": 1.6231,
2490
- "eval_samples_per_second": 104.738,
2491
- "eval_steps_per_second": 13.554,
2492
- "step": 3600
2493
- },
2494
- {
2495
- "epoch": 39.67,
2496
- "learning_rate": 6.217582417582418e-06,
2497
- "loss": 0.1151,
2498
- "step": 3610
2499
- },
2500
- {
2501
- "epoch": 39.78,
2502
- "learning_rate": 6.151648351648352e-06,
2503
- "loss": 0.113,
2504
- "step": 3620
2505
- },
2506
- {
2507
- "epoch": 39.89,
2508
- "learning_rate": 6.085714285714285e-06,
2509
- "loss": 0.1226,
2510
- "step": 3630
2511
- },
2512
- {
2513
- "epoch": 40.0,
2514
- "learning_rate": 6.01978021978022e-06,
2515
- "loss": 0.0645,
2516
- "step": 3640
2517
- },
2518
- {
2519
- "epoch": 40.11,
2520
- "learning_rate": 5.953846153846154e-06,
2521
- "loss": 0.1398,
2522
- "step": 3650
2523
- },
2524
- {
2525
- "epoch": 40.22,
2526
- "learning_rate": 5.887912087912089e-06,
2527
- "loss": 0.0801,
2528
- "step": 3660
2529
- },
2530
- {
2531
- "epoch": 40.33,
2532
- "learning_rate": 5.821978021978022e-06,
2533
- "loss": 0.0787,
2534
- "step": 3670
2535
- },
2536
- {
2537
- "epoch": 40.44,
2538
- "learning_rate": 5.756043956043956e-06,
2539
- "loss": 0.1388,
2540
- "step": 3680
2541
- },
2542
- {
2543
- "epoch": 40.55,
2544
- "learning_rate": 5.690109890109891e-06,
2545
- "loss": 0.0356,
2546
- "step": 3690
2547
- },
2548
- {
2549
- "epoch": 40.66,
2550
- "learning_rate": 5.624175824175824e-06,
2551
- "loss": 0.16,
2552
- "step": 3700
2553
- },
2554
- {
2555
- "epoch": 40.66,
2556
- "eval_accuracy": 0.7705882352941177,
2557
- "eval_loss": 0.8004665374755859,
2558
- "eval_runtime": 1.6262,
2559
- "eval_samples_per_second": 104.537,
2560
- "eval_steps_per_second": 13.528,
2561
- "step": 3700
2562
- },
2563
- {
2564
- "epoch": 40.77,
2565
- "learning_rate": 5.558241758241758e-06,
2566
- "loss": 0.1363,
2567
- "step": 3710
2568
- },
2569
- {
2570
- "epoch": 40.88,
2571
- "learning_rate": 5.492307692307693e-06,
2572
- "loss": 0.0803,
2573
- "step": 3720
2574
- },
2575
- {
2576
- "epoch": 40.99,
2577
- "learning_rate": 5.426373626373627e-06,
2578
- "loss": 0.077,
2579
- "step": 3730
2580
- },
2581
- {
2582
- "epoch": 41.1,
2583
- "learning_rate": 5.36043956043956e-06,
2584
- "loss": 0.0912,
2585
- "step": 3740
2586
- },
2587
- {
2588
- "epoch": 41.21,
2589
- "learning_rate": 5.2945054945054946e-06,
2590
- "loss": 0.1425,
2591
- "step": 3750
2592
- },
2593
- {
2594
- "epoch": 41.32,
2595
- "learning_rate": 5.228571428571429e-06,
2596
- "loss": 0.0815,
2597
- "step": 3760
2598
- },
2599
- {
2600
- "epoch": 41.43,
2601
- "learning_rate": 5.162637362637363e-06,
2602
- "loss": 0.0987,
2603
- "step": 3770
2604
- },
2605
- {
2606
- "epoch": 41.54,
2607
- "learning_rate": 5.0967032967032965e-06,
2608
- "loss": 0.0457,
2609
- "step": 3780
2610
- },
2611
- {
2612
- "epoch": 41.65,
2613
- "learning_rate": 5.030769230769231e-06,
2614
- "loss": 0.1181,
2615
- "step": 3790
2616
- },
2617
- {
2618
- "epoch": 41.76,
2619
- "learning_rate": 4.9648351648351656e-06,
2620
- "loss": 0.1151,
2621
- "step": 3800
2622
- },
2623
- {
2624
- "epoch": 41.76,
2625
- "eval_accuracy": 0.7470588235294118,
2626
- "eval_loss": 0.8784206509590149,
2627
- "eval_runtime": 1.6555,
2628
- "eval_samples_per_second": 102.688,
2629
- "eval_steps_per_second": 13.289,
2630
- "step": 3800
2631
- },
2632
- {
2633
- "epoch": 41.87,
2634
- "learning_rate": 4.898901098901098e-06,
2635
- "loss": 0.1219,
2636
- "step": 3810
2637
- },
2638
- {
2639
- "epoch": 41.98,
2640
- "learning_rate": 4.832967032967033e-06,
2641
- "loss": 0.0719,
2642
- "step": 3820
2643
- },
2644
- {
2645
- "epoch": 42.09,
2646
- "learning_rate": 4.7670329670329675e-06,
2647
- "loss": 0.0647,
2648
- "step": 3830
2649
- },
2650
- {
2651
- "epoch": 42.2,
2652
- "learning_rate": 4.701098901098901e-06,
2653
- "loss": 0.1066,
2654
- "step": 3840
2655
- },
2656
- {
2657
- "epoch": 42.31,
2658
- "learning_rate": 4.635164835164835e-06,
2659
- "loss": 0.052,
2660
- "step": 3850
2661
- },
2662
- {
2663
- "epoch": 42.42,
2664
- "learning_rate": 4.569230769230769e-06,
2665
- "loss": 0.0658,
2666
- "step": 3860
2667
- },
2668
- {
2669
- "epoch": 42.53,
2670
- "learning_rate": 4.503296703296704e-06,
2671
- "loss": 0.0851,
2672
- "step": 3870
2673
- },
2674
- {
2675
- "epoch": 42.64,
2676
- "learning_rate": 4.437362637362637e-06,
2677
- "loss": 0.071,
2678
- "step": 3880
2679
- },
2680
- {
2681
- "epoch": 42.75,
2682
- "learning_rate": 4.371428571428571e-06,
2683
- "loss": 0.0922,
2684
- "step": 3890
2685
- },
2686
- {
2687
- "epoch": 42.86,
2688
- "learning_rate": 4.305494505494506e-06,
2689
- "loss": 0.1159,
2690
- "step": 3900
2691
- },
2692
- {
2693
- "epoch": 42.86,
2694
- "eval_accuracy": 0.7470588235294118,
2695
- "eval_loss": 0.85979163646698,
2696
- "eval_runtime": 1.6327,
2697
- "eval_samples_per_second": 104.124,
2698
- "eval_steps_per_second": 13.475,
2699
- "step": 3900
2700
- },
2701
- {
2702
- "epoch": 42.97,
2703
- "learning_rate": 4.2395604395604396e-06,
2704
- "loss": 0.1347,
2705
- "step": 3910
2706
- },
2707
- {
2708
- "epoch": 43.08,
2709
- "learning_rate": 4.173626373626373e-06,
2710
- "loss": 0.0455,
2711
- "step": 3920
2712
- },
2713
- {
2714
- "epoch": 43.19,
2715
- "learning_rate": 4.107692307692308e-06,
2716
- "loss": 0.054,
2717
- "step": 3930
2718
- },
2719
- {
2720
- "epoch": 43.3,
2721
- "learning_rate": 4.041758241758242e-06,
2722
- "loss": 0.1078,
2723
- "step": 3940
2724
- },
2725
- {
2726
- "epoch": 43.41,
2727
- "learning_rate": 3.975824175824176e-06,
2728
- "loss": 0.0738,
2729
- "step": 3950
2730
- },
2731
- {
2732
- "epoch": 43.52,
2733
- "learning_rate": 3.90989010989011e-06,
2734
- "loss": 0.0672,
2735
- "step": 3960
2736
- },
2737
- {
2738
- "epoch": 43.63,
2739
- "learning_rate": 3.843956043956044e-06,
2740
- "loss": 0.0476,
2741
- "step": 3970
2742
- },
2743
- {
2744
- "epoch": 43.74,
2745
- "learning_rate": 3.7780219780219784e-06,
2746
- "loss": 0.1283,
2747
- "step": 3980
2748
- },
2749
- {
2750
- "epoch": 43.85,
2751
- "learning_rate": 3.712087912087912e-06,
2752
- "loss": 0.1116,
2753
- "step": 3990
2754
- },
2755
- {
2756
- "epoch": 43.96,
2757
- "learning_rate": 3.646153846153846e-06,
2758
- "loss": 0.0575,
2759
- "step": 4000
2760
- },
2761
- {
2762
- "epoch": 43.96,
2763
- "eval_accuracy": 0.7529411764705882,
2764
- "eval_loss": 0.8543334007263184,
2765
- "eval_runtime": 1.6405,
2766
- "eval_samples_per_second": 103.629,
2767
- "eval_steps_per_second": 13.411,
2768
- "step": 4000
2769
- },
2770
- {
2771
- "epoch": 44.07,
2772
- "learning_rate": 3.5802197802197803e-06,
2773
- "loss": 0.079,
2774
- "step": 4010
2775
- },
2776
- {
2777
- "epoch": 44.18,
2778
- "learning_rate": 3.5142857142857144e-06,
2779
- "loss": 0.0763,
2780
- "step": 4020
2781
- },
2782
- {
2783
- "epoch": 44.29,
2784
- "learning_rate": 3.4483516483516485e-06,
2785
- "loss": 0.0445,
2786
- "step": 4030
2787
- },
2788
- {
2789
- "epoch": 44.4,
2790
- "learning_rate": 3.3824175824175826e-06,
2791
- "loss": 0.0939,
2792
- "step": 4040
2793
- },
2794
- {
2795
- "epoch": 44.51,
2796
- "learning_rate": 3.3164835164835163e-06,
2797
- "loss": 0.1475,
2798
- "step": 4050
2799
- },
2800
- {
2801
- "epoch": 44.62,
2802
- "learning_rate": 3.250549450549451e-06,
2803
- "loss": 0.1596,
2804
- "step": 4060
2805
- },
2806
- {
2807
- "epoch": 44.73,
2808
- "learning_rate": 3.1846153846153846e-06,
2809
- "loss": 0.1322,
2810
- "step": 4070
2811
- },
2812
- {
2813
- "epoch": 44.84,
2814
- "learning_rate": 3.1186813186813187e-06,
2815
- "loss": 0.1158,
2816
- "step": 4080
2817
- },
2818
- {
2819
- "epoch": 44.95,
2820
- "learning_rate": 3.0527472527472528e-06,
2821
- "loss": 0.075,
2822
- "step": 4090
2823
- },
2824
- {
2825
- "epoch": 45.05,
2826
- "learning_rate": 2.986813186813187e-06,
2827
- "loss": 0.164,
2828
- "step": 4100
2829
- },
2830
- {
2831
- "epoch": 45.05,
2832
- "eval_accuracy": 0.7588235294117647,
2833
- "eval_loss": 0.8658636808395386,
2834
- "eval_runtime": 1.518,
2835
- "eval_samples_per_second": 111.989,
2836
- "eval_steps_per_second": 14.493,
2837
- "step": 4100
2838
- },
2839
- {
2840
- "epoch": 45.16,
2841
- "learning_rate": 2.920879120879121e-06,
2842
- "loss": 0.0847,
2843
- "step": 4110
2844
- },
2845
- {
2846
- "epoch": 45.27,
2847
- "learning_rate": 2.854945054945055e-06,
2848
- "loss": 0.1098,
2849
- "step": 4120
2850
- },
2851
- {
2852
- "epoch": 45.38,
2853
- "learning_rate": 2.7890109890109892e-06,
2854
- "loss": 0.1282,
2855
- "step": 4130
2856
- },
2857
- {
2858
- "epoch": 45.49,
2859
- "learning_rate": 2.723076923076923e-06,
2860
- "loss": 0.0736,
2861
- "step": 4140
2862
- },
2863
- {
2864
- "epoch": 45.6,
2865
- "learning_rate": 2.657142857142857e-06,
2866
- "loss": 0.1344,
2867
- "step": 4150
2868
- },
2869
- {
2870
- "epoch": 45.71,
2871
- "learning_rate": 2.591208791208791e-06,
2872
- "loss": 0.1191,
2873
- "step": 4160
2874
- },
2875
- {
2876
- "epoch": 45.82,
2877
- "learning_rate": 2.5252747252747253e-06,
2878
- "loss": 0.0721,
2879
- "step": 4170
2880
- },
2881
- {
2882
- "epoch": 45.93,
2883
- "learning_rate": 2.4593406593406594e-06,
2884
- "loss": 0.0371,
2885
- "step": 4180
2886
- },
2887
- {
2888
- "epoch": 46.04,
2889
- "learning_rate": 2.3934065934065935e-06,
2890
- "loss": 0.0407,
2891
- "step": 4190
2892
- },
2893
- {
2894
- "epoch": 46.15,
2895
- "learning_rate": 2.3274725274725276e-06,
2896
- "loss": 0.1319,
2897
- "step": 4200
2898
- },
2899
- {
2900
- "epoch": 46.15,
2901
- "eval_accuracy": 0.7411764705882353,
2902
- "eval_loss": 0.885351300239563,
2903
- "eval_runtime": 1.4998,
2904
- "eval_samples_per_second": 113.347,
2905
- "eval_steps_per_second": 14.668,
2906
- "step": 4200
2907
- },
2908
- {
2909
- "epoch": 46.26,
2910
- "learning_rate": 2.2615384615384617e-06,
2911
- "loss": 0.1159,
2912
- "step": 4210
2913
- },
2914
- {
2915
- "epoch": 46.37,
2916
- "learning_rate": 2.195604395604396e-06,
2917
- "loss": 0.0659,
2918
- "step": 4220
2919
- },
2920
- {
2921
- "epoch": 46.48,
2922
- "learning_rate": 2.1296703296703296e-06,
2923
- "loss": 0.0948,
2924
- "step": 4230
2925
- },
2926
- {
2927
- "epoch": 46.59,
2928
- "learning_rate": 2.0637362637362637e-06,
2929
- "loss": 0.0886,
2930
- "step": 4240
2931
- },
2932
- {
2933
- "epoch": 46.7,
2934
- "learning_rate": 1.9978021978021978e-06,
2935
- "loss": 0.0889,
2936
- "step": 4250
2937
- },
2938
- {
2939
- "epoch": 46.81,
2940
- "learning_rate": 1.931868131868132e-06,
2941
- "loss": 0.0339,
2942
- "step": 4260
2943
- },
2944
- {
2945
- "epoch": 46.92,
2946
- "learning_rate": 1.8659340659340658e-06,
2947
- "loss": 0.0697,
2948
- "step": 4270
2949
- },
2950
- {
2951
- "epoch": 47.03,
2952
- "learning_rate": 1.8e-06,
2953
- "loss": 0.0777,
2954
- "step": 4280
2955
- },
2956
- {
2957
- "epoch": 47.14,
2958
- "learning_rate": 1.734065934065934e-06,
2959
- "loss": 0.0546,
2960
- "step": 4290
2961
- },
2962
- {
2963
- "epoch": 47.25,
2964
- "learning_rate": 1.6681318681318681e-06,
2965
- "loss": 0.0489,
2966
- "step": 4300
2967
- },
2968
- {
2969
- "epoch": 47.25,
2970
- "eval_accuracy": 0.7588235294117647,
2971
- "eval_loss": 0.7507675886154175,
2972
- "eval_runtime": 1.5139,
2973
- "eval_samples_per_second": 112.29,
2974
- "eval_steps_per_second": 14.532,
2975
- "step": 4300
2976
- },
2977
- {
2978
- "epoch": 47.36,
2979
- "learning_rate": 1.6021978021978023e-06,
2980
- "loss": 0.087,
2981
- "step": 4310
2982
- },
2983
- {
2984
- "epoch": 47.47,
2985
- "learning_rate": 1.5362637362637364e-06,
2986
- "loss": 0.0452,
2987
- "step": 4320
2988
- },
2989
- {
2990
- "epoch": 47.58,
2991
- "learning_rate": 1.4703296703296705e-06,
2992
- "loss": 0.0508,
2993
- "step": 4330
2994
- },
2995
- {
2996
- "epoch": 47.69,
2997
- "learning_rate": 1.4043956043956046e-06,
2998
- "loss": 0.0812,
2999
- "step": 4340
3000
- },
3001
- {
3002
- "epoch": 47.8,
3003
- "learning_rate": 1.3384615384615383e-06,
3004
- "loss": 0.0721,
3005
- "step": 4350
3006
- },
3007
- {
3008
- "epoch": 47.91,
3009
- "learning_rate": 1.2725274725274724e-06,
3010
- "loss": 0.0835,
3011
- "step": 4360
3012
- },
3013
- {
3014
- "epoch": 48.02,
3015
- "learning_rate": 1.2065934065934065e-06,
3016
- "loss": 0.1077,
3017
- "step": 4370
3018
- },
3019
- {
3020
- "epoch": 48.13,
3021
- "learning_rate": 1.1406593406593406e-06,
3022
- "loss": 0.0636,
3023
- "step": 4380
3024
- },
3025
- {
3026
- "epoch": 48.24,
3027
- "learning_rate": 1.0747252747252748e-06,
3028
- "loss": 0.1026,
3029
- "step": 4390
3030
- },
3031
- {
3032
- "epoch": 48.35,
3033
- "learning_rate": 1.0087912087912089e-06,
3034
- "loss": 0.0678,
3035
- "step": 4400
3036
- },
3037
- {
3038
- "epoch": 48.35,
3039
- "eval_accuracy": 0.7352941176470589,
3040
- "eval_loss": 0.8784447312355042,
3041
- "eval_runtime": 1.5075,
3042
- "eval_samples_per_second": 112.768,
3043
- "eval_steps_per_second": 14.594,
3044
- "step": 4400
3045
- },
3046
- {
3047
- "epoch": 48.46,
3048
- "learning_rate": 9.42857142857143e-07,
3049
- "loss": 0.072,
3050
- "step": 4410
3051
- },
3052
- {
3053
- "epoch": 48.57,
3054
- "learning_rate": 8.769230769230769e-07,
3055
- "loss": 0.0526,
3056
- "step": 4420
3057
- },
3058
- {
3059
- "epoch": 48.68,
3060
- "learning_rate": 8.10989010989011e-07,
3061
- "loss": 0.0802,
3062
- "step": 4430
3063
- },
3064
- {
3065
- "epoch": 48.79,
3066
- "learning_rate": 7.450549450549451e-07,
3067
- "loss": 0.046,
3068
- "step": 4440
3069
- },
3070
- {
3071
- "epoch": 48.9,
3072
- "learning_rate": 6.791208791208791e-07,
3073
- "loss": 0.0656,
3074
- "step": 4450
3075
- },
3076
- {
3077
- "epoch": 49.01,
3078
- "learning_rate": 6.131868131868131e-07,
3079
- "loss": 0.139,
3080
- "step": 4460
3081
- },
3082
- {
3083
- "epoch": 49.12,
3084
- "learning_rate": 5.472527472527473e-07,
3085
- "loss": 0.0576,
3086
- "step": 4470
3087
- },
3088
- {
3089
- "epoch": 49.23,
3090
- "learning_rate": 4.813186813186814e-07,
3091
- "loss": 0.0686,
3092
- "step": 4480
3093
- },
3094
- {
3095
- "epoch": 49.34,
3096
- "learning_rate": 4.153846153846154e-07,
3097
- "loss": 0.0658,
3098
- "step": 4490
3099
- },
3100
- {
3101
- "epoch": 49.45,
3102
- "learning_rate": 3.4945054945054945e-07,
3103
- "loss": 0.0832,
3104
- "step": 4500
3105
- },
3106
- {
3107
- "epoch": 49.45,
3108
- "eval_accuracy": 0.7764705882352941,
3109
- "eval_loss": 0.7247602343559265,
3110
- "eval_runtime": 1.5084,
3111
- "eval_samples_per_second": 112.699,
3112
- "eval_steps_per_second": 14.585,
3113
- "step": 4500
3114
- },
3115
- {
3116
- "epoch": 49.56,
3117
- "learning_rate": 2.835164835164835e-07,
3118
- "loss": 0.0692,
3119
- "step": 4510
3120
- },
3121
- {
3122
- "epoch": 49.67,
3123
- "learning_rate": 2.175824175824176e-07,
3124
- "loss": 0.1135,
3125
- "step": 4520
3126
- },
3127
- {
3128
- "epoch": 49.78,
3129
- "learning_rate": 1.5164835164835167e-07,
3130
- "loss": 0.1512,
3131
- "step": 4530
3132
- },
3133
- {
3134
- "epoch": 49.89,
3135
- "learning_rate": 8.571428571428572e-08,
3136
- "loss": 0.1339,
3137
- "step": 4540
3138
- },
3139
- {
3140
- "epoch": 50.0,
3141
- "learning_rate": 1.9780219780219782e-08,
3142
- "loss": 0.1045,
3143
- "step": 4550
3144
- },
3145
- {
3146
- "epoch": 50.0,
3147
- "step": 4550,
3148
- "total_flos": 5.622144623740109e+18,
3149
- "train_loss": 0.2358846340533141,
3150
- "train_runtime": 1979.1189,
3151
- "train_samples_per_second": 36.658,
3152
- "train_steps_per_second": 2.299
3153
  }
3154
  ],
3155
- "max_steps": 4550,
3156
- "num_train_epochs": 50,
3157
- "total_flos": 5.622144623740109e+18,
3158
  "trial_name": null,
3159
  "trial_params": null
3160
  }
 
1
  {
2
+ "best_metric": 0.5748319625854492,
3
+ "best_model_checkpoint": "./croupier-creature-classifier/checkpoint-400",
4
+ "epoch": 5.0,
5
+ "global_step": 455,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.11,
12
+ "learning_rate": 0.00019560439560439562,
13
+ "loss": 1.3464,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.22,
18
+ "learning_rate": 0.00019120879120879122,
19
+ "loss": 1.2608,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.33,
24
+ "learning_rate": 0.00018681318681318683,
25
+ "loss": 1.1356,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.44,
30
+ "learning_rate": 0.0001824175824175824,
31
+ "loss": 1.1795,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.55,
36
+ "learning_rate": 0.00017802197802197802,
37
+ "loss": 0.9829,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.66,
42
+ "learning_rate": 0.00017362637362637365,
43
+ "loss": 1.049,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.77,
48
+ "learning_rate": 0.00016923076923076923,
49
+ "loss": 1.0607,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.88,
54
+ "learning_rate": 0.00016483516483516484,
55
+ "loss": 0.9264,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.99,
60
+ "learning_rate": 0.00016043956043956044,
61
+ "loss": 0.8931,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 1.1,
66
+ "learning_rate": 0.00015604395604395605,
67
+ "loss": 0.6663,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 1.1,
72
+ "eval_accuracy": 0.5941176470588235,
73
+ "eval_loss": 1.0179336071014404,
74
+ "eval_runtime": 1.6282,
75
+ "eval_samples_per_second": 104.41,
76
+ "eval_steps_per_second": 13.512,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 1.21,
81
+ "learning_rate": 0.00015164835164835165,
82
+ "loss": 0.7923,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 1.32,
87
+ "learning_rate": 0.00014725274725274726,
88
+ "loss": 0.83,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 1.43,
93
+ "learning_rate": 0.00014285714285714287,
94
+ "loss": 0.6796,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 1.54,
99
+ "learning_rate": 0.00013846153846153847,
100
+ "loss": 0.7702,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.65,
105
+ "learning_rate": 0.00013406593406593405,
106
+ "loss": 0.8231,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.76,
111
+ "learning_rate": 0.0001296703296703297,
112
+ "loss": 0.7022,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.87,
117
+ "learning_rate": 0.00012527472527472527,
118
+ "loss": 0.6745,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.98,
123
+ "learning_rate": 0.00012087912087912087,
124
+ "loss": 0.7639,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 2.09,
129
+ "learning_rate": 0.0001164835164835165,
130
+ "loss": 0.5086,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 2.2,
135
+ "learning_rate": 0.0001120879120879121,
136
+ "loss": 0.4924,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 2.2,
141
+ "eval_accuracy": 0.7529411764705882,
142
+ "eval_loss": 0.7035554647445679,
143
+ "eval_runtime": 2.1312,
144
+ "eval_samples_per_second": 79.768,
145
+ "eval_steps_per_second": 10.323,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 2.31,
150
+ "learning_rate": 0.0001076923076923077,
151
+ "loss": 0.4121,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 2.42,
156
+ "learning_rate": 0.00010329670329670331,
157
+ "loss": 0.451,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 2.53,
162
+ "learning_rate": 9.89010989010989e-05,
163
+ "loss": 0.5021,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 2.64,
168
+ "learning_rate": 9.450549450549451e-05,
169
+ "loss": 0.3728,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 2.75,
174
+ "learning_rate": 9.010989010989012e-05,
175
+ "loss": 0.5141,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 2.86,
180
+ "learning_rate": 8.571428571428571e-05,
181
+ "loss": 0.4065,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 2.97,
186
+ "learning_rate": 8.131868131868132e-05,
187
+ "loss": 0.434,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 3.08,
192
+ "learning_rate": 7.692307692307693e-05,
193
+ "loss": 0.3047,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 3.19,
198
+ "learning_rate": 7.252747252747253e-05,
199
+ "loss": 0.2544,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 3.3,
204
+ "learning_rate": 6.813186813186814e-05,
205
+ "loss": 0.4552,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 3.3,
210
+ "eval_accuracy": 0.7823529411764706,
211
+ "eval_loss": 0.612335741519928,
212
+ "eval_runtime": 1.7201,
213
+ "eval_samples_per_second": 98.833,
214
+ "eval_steps_per_second": 12.79,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 3.41,
219
+ "learning_rate": 6.373626373626373e-05,
220
+ "loss": 0.2929,
221
  "step": 310
222
  },
223
  {
224
  "epoch": 3.52,
225
+ "learning_rate": 5.9340659340659345e-05,
226
+ "loss": 0.297,
227
  "step": 320
228
  },
229
  {
230
  "epoch": 3.63,
231
+ "learning_rate": 5.494505494505495e-05,
232
+ "loss": 0.2667,
233
  "step": 330
234
  },
235
  {
236
  "epoch": 3.74,
237
+ "learning_rate": 5.054945054945055e-05,
238
+ "loss": 0.2297,
239
  "step": 340
240
  },
241
  {
242
  "epoch": 3.85,
243
+ "learning_rate": 4.615384615384616e-05,
244
+ "loss": 0.2997,
245
  "step": 350
246
  },
247
  {
248
  "epoch": 3.96,
249
+ "learning_rate": 4.1758241758241765e-05,
250
+ "loss": 0.4482,
251
  "step": 360
252
  },
253
  {
254
  "epoch": 4.07,
255
+ "learning_rate": 3.7362637362637365e-05,
256
+ "loss": 0.2956,
257
  "step": 370
258
  },
259
  {
260
  "epoch": 4.18,
261
+ "learning_rate": 3.296703296703297e-05,
262
+ "loss": 0.1919,
263
  "step": 380
264
  },
265
  {
266
  "epoch": 4.29,
267
+ "learning_rate": 2.857142857142857e-05,
268
+ "loss": 0.2353,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 4.4,
273
+ "learning_rate": 2.4175824175824177e-05,
274
+ "loss": 0.2355,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 4.4,
279
+ "eval_accuracy": 0.7647058823529411,
280
+ "eval_loss": 0.5748319625854492,
281
+ "eval_runtime": 1.7042,
282
+ "eval_samples_per_second": 99.752,
283
+ "eval_steps_per_second": 12.909,
284
  "step": 400
285
  },
286
  {
287
  "epoch": 4.51,
288
+ "learning_rate": 1.978021978021978e-05,
289
+ "loss": 0.1942,
290
  "step": 410
291
  },
292
  {
293
  "epoch": 4.62,
294
+ "learning_rate": 1.5384615384615387e-05,
295
+ "loss": 0.231,
296
  "step": 420
297
  },
298
  {
299
  "epoch": 4.73,
300
+ "learning_rate": 1.0989010989010989e-05,
301
+ "loss": 0.2007,
302
  "step": 430
303
  },
304
  {
305
  "epoch": 4.84,
306
+ "learning_rate": 6.5934065934065935e-06,
307
+ "loss": 0.2592,
308
  "step": 440
309
  },
310
  {
311
  "epoch": 4.95,
312
+ "learning_rate": 2.197802197802198e-06,
313
+ "loss": 0.2764,
314
  "step": 450
315
  },
316
  {
317
+ "epoch": 5.0,
318
+ "step": 455,
319
+ "total_flos": 5.622144623740109e+17,
320
+ "train_loss": 0.5662190929873959,
321
+ "train_runtime": 205.6317,
322
+ "train_samples_per_second": 35.282,
323
+ "train_steps_per_second": 2.213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  }
325
  ],
326
+ "max_steps": 455,
327
+ "num_train_epochs": 5,
328
+ "total_flos": 5.622144623740109e+17,
329
  "trial_name": null,
330
  "trial_params": null
331
  }