erlingh commited on
Commit
cb32814
·
1 Parent(s): 697aa39
Files changed (4) hide show
  1. rng_state.pth +0 -3
  2. scheduler.pt +0 -3
  3. trainer_state.json +0 -4018
  4. training_args.bin +0 -3
rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fb3c2a0207824c015d8574cb83fb4ba24e6eed8381855bfb3876d3d041f7d1f
3
- size 13611
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6086e80de9a299e8ebc9c0d037531136a1d7f8bb7c5020275636e6f38cde4e15
3
- size 623
 
 
 
 
trainer_state.json DELETED
@@ -1,4018 +0,0 @@
1
- {
2
- "best_metric": 1.634265422821045,
3
- "best_model_checkpoint": "./results/checkpoint-5800",
4
- "epoch": 0.2142434988179669,
5
- "global_step": 5800,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 0.00019996,
13
- "loss": 4.2732,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.0,
18
- "learning_rate": 0.00019992000000000002,
19
- "loss": 3.4267,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.0,
24
- "learning_rate": 0.00019988,
25
- "loss": 3.1308,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.0,
30
- "learning_rate": 0.00019984,
31
- "loss": 2.9584,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.0,
36
- "learning_rate": 0.0001998,
37
- "loss": 2.8321,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.0,
42
- "learning_rate": 0.00019976000000000003,
43
- "loss": 2.8353,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.0,
48
- "learning_rate": 0.00019972000000000002,
49
- "loss": 2.7275,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.0,
54
- "learning_rate": 0.00019968,
55
- "loss": 2.7678,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.0,
60
- "learning_rate": 0.00019964,
61
- "loss": 2.6752,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.0,
66
- "learning_rate": 0.0001996,
67
- "loss": 2.7207,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 0.0,
72
- "eval_accuracy": 0.6036520746718839,
73
- "eval_loss": 2.011291980743408,
74
- "eval_runtime": 48.3273,
75
- "eval_samples_per_second": 8.753,
76
- "eval_steps_per_second": 1.097,
77
- "step": 100
78
- },
79
- {
80
- "epoch": 0.0,
81
- "learning_rate": 0.00019956000000000002,
82
- "loss": 2.6655,
83
- "step": 110
84
- },
85
- {
86
- "epoch": 0.0,
87
- "learning_rate": 0.00019952000000000001,
88
- "loss": 2.5982,
89
- "step": 120
90
- },
91
- {
92
- "epoch": 0.0,
93
- "learning_rate": 0.00019948,
94
- "loss": 2.5936,
95
- "step": 130
96
- },
97
- {
98
- "epoch": 0.01,
99
- "learning_rate": 0.00019944,
100
- "loss": 2.625,
101
- "step": 140
102
- },
103
- {
104
- "epoch": 0.01,
105
- "learning_rate": 0.00019940000000000002,
106
- "loss": 2.6117,
107
- "step": 150
108
- },
109
- {
110
- "epoch": 0.01,
111
- "learning_rate": 0.00019936000000000002,
112
- "loss": 2.5167,
113
- "step": 160
114
- },
115
- {
116
- "epoch": 0.01,
117
- "learning_rate": 0.00019932,
118
- "loss": 2.4674,
119
- "step": 170
120
- },
121
- {
122
- "epoch": 0.01,
123
- "learning_rate": 0.00019928,
124
- "loss": 2.5246,
125
- "step": 180
126
- },
127
- {
128
- "epoch": 0.01,
129
- "learning_rate": 0.00019924,
130
- "loss": 2.5045,
131
- "step": 190
132
- },
133
- {
134
- "epoch": 0.01,
135
- "learning_rate": 0.00019920000000000002,
136
- "loss": 2.5222,
137
- "step": 200
138
- },
139
- {
140
- "epoch": 0.01,
141
- "eval_accuracy": 0.5987609032363251,
142
- "eval_loss": 2.0005741119384766,
143
- "eval_runtime": 47.4839,
144
- "eval_samples_per_second": 8.908,
145
- "eval_steps_per_second": 1.116,
146
- "step": 200
147
- },
148
- {
149
- "epoch": 0.01,
150
- "learning_rate": 0.00019916,
151
- "loss": 2.5048,
152
- "step": 210
153
- },
154
- {
155
- "epoch": 0.01,
156
- "learning_rate": 0.00019912,
157
- "loss": 2.4859,
158
- "step": 220
159
- },
160
- {
161
- "epoch": 0.01,
162
- "learning_rate": 0.00019908,
163
- "loss": 2.439,
164
- "step": 230
165
- },
166
- {
167
- "epoch": 0.01,
168
- "learning_rate": 0.00019904,
169
- "loss": 2.489,
170
- "step": 240
171
- },
172
- {
173
- "epoch": 0.01,
174
- "learning_rate": 0.000199,
175
- "loss": 2.5181,
176
- "step": 250
177
- },
178
- {
179
- "epoch": 0.01,
180
- "learning_rate": 0.00019896,
181
- "loss": 2.4656,
182
- "step": 260
183
- },
184
- {
185
- "epoch": 0.01,
186
- "learning_rate": 0.00019892000000000003,
187
- "loss": 2.4849,
188
- "step": 270
189
- },
190
- {
191
- "epoch": 0.01,
192
- "learning_rate": 0.00019888,
193
- "loss": 2.4934,
194
- "step": 280
195
- },
196
- {
197
- "epoch": 0.01,
198
- "learning_rate": 0.00019884000000000001,
199
- "loss": 2.4359,
200
- "step": 290
201
- },
202
- {
203
- "epoch": 0.01,
204
- "learning_rate": 0.0001988,
205
- "loss": 2.4123,
206
- "step": 300
207
- },
208
- {
209
- "epoch": 0.01,
210
- "eval_accuracy": 0.6184886280264124,
211
- "eval_loss": 1.9406265020370483,
212
- "eval_runtime": 50.1664,
213
- "eval_samples_per_second": 8.432,
214
- "eval_steps_per_second": 1.056,
215
- "step": 300
216
- },
217
- {
218
- "epoch": 0.01,
219
- "learning_rate": 0.00019876,
220
- "loss": 2.4585,
221
- "step": 310
222
- },
223
- {
224
- "epoch": 0.01,
225
- "learning_rate": 0.00019872000000000002,
226
- "loss": 2.436,
227
- "step": 320
228
- },
229
- {
230
- "epoch": 0.01,
231
- "learning_rate": 0.00019868,
232
- "loss": 2.4361,
233
- "step": 330
234
- },
235
- {
236
- "epoch": 0.01,
237
- "learning_rate": 0.00019864,
238
- "loss": 2.4193,
239
- "step": 340
240
- },
241
- {
242
- "epoch": 0.01,
243
- "learning_rate": 0.0001986,
244
- "loss": 2.4404,
245
- "step": 350
246
- },
247
- {
248
- "epoch": 0.01,
249
- "learning_rate": 0.00019856000000000002,
250
- "loss": 2.4109,
251
- "step": 360
252
- },
253
- {
254
- "epoch": 0.01,
255
- "learning_rate": 0.00019852000000000002,
256
- "loss": 2.4257,
257
- "step": 370
258
- },
259
- {
260
- "epoch": 0.01,
261
- "learning_rate": 0.00019848,
262
- "loss": 2.423,
263
- "step": 380
264
- },
265
- {
266
- "epoch": 0.01,
267
- "learning_rate": 0.00019844,
268
- "loss": 2.3701,
269
- "step": 390
270
- },
271
- {
272
- "epoch": 0.01,
273
- "learning_rate": 0.0001984,
274
- "loss": 2.4469,
275
- "step": 400
276
- },
277
- {
278
- "epoch": 0.01,
279
- "eval_accuracy": 0.6204450966006358,
280
- "eval_loss": 1.9681750535964966,
281
- "eval_runtime": 47.2918,
282
- "eval_samples_per_second": 8.944,
283
- "eval_steps_per_second": 1.121,
284
- "step": 400
285
- },
286
- {
287
- "epoch": 0.02,
288
- "learning_rate": 0.00019836000000000002,
289
- "loss": 2.3953,
290
- "step": 410
291
- },
292
- {
293
- "epoch": 0.02,
294
- "learning_rate": 0.00019832,
295
- "loss": 2.3848,
296
- "step": 420
297
- },
298
- {
299
- "epoch": 0.02,
300
- "learning_rate": 0.00019828,
301
- "loss": 2.3802,
302
- "step": 430
303
- },
304
- {
305
- "epoch": 0.02,
306
- "learning_rate": 0.00019824,
307
- "loss": 2.4174,
308
- "step": 440
309
- },
310
- {
311
- "epoch": 0.02,
312
- "learning_rate": 0.00019820000000000002,
313
- "loss": 2.4125,
314
- "step": 450
315
- },
316
- {
317
- "epoch": 0.02,
318
- "learning_rate": 0.00019816000000000001,
319
- "loss": 2.387,
320
- "step": 460
321
- },
322
- {
323
- "epoch": 0.02,
324
- "learning_rate": 0.00019812,
325
- "loss": 2.3491,
326
- "step": 470
327
- },
328
- {
329
- "epoch": 0.02,
330
- "learning_rate": 0.00019808,
331
- "loss": 2.3997,
332
- "step": 480
333
- },
334
- {
335
- "epoch": 0.02,
336
- "learning_rate": 0.00019804,
337
- "loss": 2.3107,
338
- "step": 490
339
- },
340
- {
341
- "epoch": 0.02,
342
- "learning_rate": 0.00019800000000000002,
343
- "loss": 2.3367,
344
- "step": 500
345
- },
346
- {
347
- "epoch": 0.02,
348
- "eval_accuracy": 0.6377272356729436,
349
- "eval_loss": 1.8571785688400269,
350
- "eval_runtime": 47.2012,
351
- "eval_samples_per_second": 8.962,
352
- "eval_steps_per_second": 1.123,
353
- "step": 500
354
- },
355
- {
356
- "epoch": 0.02,
357
- "learning_rate": 0.00019796,
358
- "loss": 2.3571,
359
- "step": 510
360
- },
361
- {
362
- "epoch": 0.02,
363
- "learning_rate": 0.00019792000000000003,
364
- "loss": 2.325,
365
- "step": 520
366
- },
367
- {
368
- "epoch": 0.02,
369
- "learning_rate": 0.00019788,
370
- "loss": 2.39,
371
- "step": 530
372
- },
373
- {
374
- "epoch": 0.02,
375
- "learning_rate": 0.00019784,
376
- "loss": 2.3508,
377
- "step": 540
378
- },
379
- {
380
- "epoch": 0.02,
381
- "learning_rate": 0.0001978,
382
- "loss": 2.3736,
383
- "step": 550
384
- },
385
- {
386
- "epoch": 0.02,
387
- "learning_rate": 0.00019776,
388
- "loss": 2.2891,
389
- "step": 560
390
- },
391
- {
392
- "epoch": 0.02,
393
- "learning_rate": 0.00019772000000000002,
394
- "loss": 2.3026,
395
- "step": 570
396
- },
397
- {
398
- "epoch": 0.02,
399
- "learning_rate": 0.00019768,
400
- "loss": 2.2965,
401
- "step": 580
402
- },
403
- {
404
- "epoch": 0.02,
405
- "learning_rate": 0.00019764,
406
- "loss": 2.3467,
407
- "step": 590
408
- },
409
- {
410
- "epoch": 0.02,
411
- "learning_rate": 0.0001976,
412
- "loss": 2.2691,
413
- "step": 600
414
- },
415
- {
416
- "epoch": 0.02,
417
- "eval_accuracy": 0.6315317518545692,
418
- "eval_loss": 1.8730525970458984,
419
- "eval_runtime": 46.6226,
420
- "eval_samples_per_second": 9.073,
421
- "eval_steps_per_second": 1.137,
422
- "step": 600
423
- },
424
- {
425
- "epoch": 0.02,
426
- "learning_rate": 0.00019756,
427
- "loss": 2.3046,
428
- "step": 610
429
- },
430
- {
431
- "epoch": 0.02,
432
- "learning_rate": 0.00019752000000000002,
433
- "loss": 2.2575,
434
- "step": 620
435
- },
436
- {
437
- "epoch": 0.02,
438
- "learning_rate": 0.00019748,
439
- "loss": 2.2895,
440
- "step": 630
441
- },
442
- {
443
- "epoch": 0.02,
444
- "learning_rate": 0.00019744,
445
- "loss": 2.2699,
446
- "step": 640
447
- },
448
- {
449
- "epoch": 0.02,
450
- "learning_rate": 0.0001974,
451
- "loss": 2.2235,
452
- "step": 650
453
- },
454
- {
455
- "epoch": 0.02,
456
- "learning_rate": 0.00019736000000000002,
457
- "loss": 2.2322,
458
- "step": 660
459
- },
460
- {
461
- "epoch": 0.02,
462
- "learning_rate": 0.00019732000000000001,
463
- "loss": 2.2743,
464
- "step": 670
465
- },
466
- {
467
- "epoch": 0.03,
468
- "learning_rate": 0.00019728,
469
- "loss": 2.2625,
470
- "step": 680
471
- },
472
- {
473
- "epoch": 0.03,
474
- "learning_rate": 0.00019724,
475
- "loss": 2.2843,
476
- "step": 690
477
- },
478
- {
479
- "epoch": 0.03,
480
- "learning_rate": 0.0001972,
481
- "loss": 2.2715,
482
- "step": 700
483
- },
484
- {
485
- "epoch": 0.03,
486
- "eval_accuracy": 0.632428466617755,
487
- "eval_loss": 1.8891419172286987,
488
- "eval_runtime": 47.1404,
489
- "eval_samples_per_second": 8.973,
490
- "eval_steps_per_second": 1.124,
491
- "step": 700
492
- },
493
- {
494
- "epoch": 0.03,
495
- "learning_rate": 0.00019716000000000002,
496
- "loss": 2.2755,
497
- "step": 710
498
- },
499
- {
500
- "epoch": 0.03,
501
- "learning_rate": 0.00019712,
502
- "loss": 2.2597,
503
- "step": 720
504
- },
505
- {
506
- "epoch": 0.03,
507
- "learning_rate": 0.00019708000000000003,
508
- "loss": 2.2185,
509
- "step": 730
510
- },
511
- {
512
- "epoch": 0.03,
513
- "learning_rate": 0.00019704,
514
- "loss": 2.3046,
515
- "step": 740
516
- },
517
- {
518
- "epoch": 0.03,
519
- "learning_rate": 0.00019700000000000002,
520
- "loss": 2.3451,
521
- "step": 750
522
- },
523
- {
524
- "epoch": 0.03,
525
- "learning_rate": 0.00019696,
526
- "loss": 2.2956,
527
- "step": 760
528
- },
529
- {
530
- "epoch": 0.03,
531
- "learning_rate": 0.00019692,
532
- "loss": 2.2371,
533
- "step": 770
534
- },
535
- {
536
- "epoch": 0.03,
537
- "learning_rate": 0.00019688000000000003,
538
- "loss": 2.2705,
539
- "step": 780
540
- },
541
- {
542
- "epoch": 0.03,
543
- "learning_rate": 0.00019684,
544
- "loss": 2.2141,
545
- "step": 790
546
- },
547
- {
548
- "epoch": 0.03,
549
- "learning_rate": 0.0001968,
550
- "loss": 2.2344,
551
- "step": 800
552
- },
553
- {
554
- "epoch": 0.03,
555
- "eval_accuracy": 0.6379717942447216,
556
- "eval_loss": 1.8463128805160522,
557
- "eval_runtime": 47.4067,
558
- "eval_samples_per_second": 8.923,
559
- "eval_steps_per_second": 1.118,
560
- "step": 800
561
- },
562
- {
563
- "epoch": 0.03,
564
- "learning_rate": 0.00019676,
565
- "loss": 2.221,
566
- "step": 810
567
- },
568
- {
569
- "epoch": 0.03,
570
- "learning_rate": 0.00019672000000000003,
571
- "loss": 2.3338,
572
- "step": 820
573
- },
574
- {
575
- "epoch": 0.03,
576
- "learning_rate": 0.00019668000000000002,
577
- "loss": 2.1978,
578
- "step": 830
579
- },
580
- {
581
- "epoch": 0.03,
582
- "learning_rate": 0.00019664000000000001,
583
- "loss": 2.2532,
584
- "step": 840
585
- },
586
- {
587
- "epoch": 0.03,
588
- "learning_rate": 0.0001966,
589
- "loss": 2.2765,
590
- "step": 850
591
- },
592
- {
593
- "epoch": 0.03,
594
- "learning_rate": 0.00019656,
595
- "loss": 2.2643,
596
- "step": 860
597
- },
598
- {
599
- "epoch": 0.03,
600
- "learning_rate": 0.00019652000000000002,
601
- "loss": 2.2794,
602
- "step": 870
603
- },
604
- {
605
- "epoch": 0.03,
606
- "learning_rate": 0.00019648000000000002,
607
- "loss": 2.2269,
608
- "step": 880
609
- },
610
- {
611
- "epoch": 0.03,
612
- "learning_rate": 0.00019644,
613
- "loss": 2.2576,
614
- "step": 890
615
- },
616
- {
617
- "epoch": 0.03,
618
- "learning_rate": 0.0001964,
619
- "loss": 2.2234,
620
- "step": 900
621
- },
622
- {
623
- "epoch": 0.03,
624
- "eval_accuracy": 0.632183908045977,
625
- "eval_loss": 1.866058588027954,
626
- "eval_runtime": 46.4721,
627
- "eval_samples_per_second": 9.102,
628
- "eval_steps_per_second": 1.14,
629
- "step": 900
630
- },
631
- {
632
- "epoch": 0.03,
633
- "learning_rate": 0.00019636000000000002,
634
- "loss": 2.2282,
635
- "step": 910
636
- },
637
- {
638
- "epoch": 0.03,
639
- "learning_rate": 0.00019632000000000002,
640
- "loss": 2.2236,
641
- "step": 920
642
- },
643
- {
644
- "epoch": 0.03,
645
- "learning_rate": 0.00019628,
646
- "loss": 2.2008,
647
- "step": 930
648
- },
649
- {
650
- "epoch": 0.03,
651
- "learning_rate": 0.00019624,
652
- "loss": 2.2731,
653
- "step": 940
654
- },
655
- {
656
- "epoch": 0.04,
657
- "learning_rate": 0.0001962,
658
- "loss": 2.2424,
659
- "step": 950
660
- },
661
- {
662
- "epoch": 0.04,
663
- "learning_rate": 0.00019616000000000002,
664
- "loss": 2.1774,
665
- "step": 960
666
- },
667
- {
668
- "epoch": 0.04,
669
- "learning_rate": 0.00019612,
670
- "loss": 2.1652,
671
- "step": 970
672
- },
673
- {
674
- "epoch": 0.04,
675
- "learning_rate": 0.00019608,
676
- "loss": 2.1703,
677
- "step": 980
678
- },
679
- {
680
- "epoch": 0.04,
681
- "learning_rate": 0.00019604,
682
- "loss": 2.185,
683
- "step": 990
684
- },
685
- {
686
- "epoch": 0.04,
687
- "learning_rate": 0.000196,
688
- "loss": 2.1818,
689
- "step": 1000
690
- },
691
- {
692
- "epoch": 0.04,
693
- "eval_accuracy": 0.6382163528164996,
694
- "eval_loss": 1.81806218624115,
695
- "eval_runtime": 60.7049,
696
- "eval_samples_per_second": 6.968,
697
- "eval_steps_per_second": 0.873,
698
- "step": 1000
699
- },
700
- {
701
- "epoch": 0.04,
702
- "learning_rate": 0.00019596000000000001,
703
- "loss": 2.1961,
704
- "step": 1010
705
- },
706
- {
707
- "epoch": 0.04,
708
- "learning_rate": 0.00019592,
709
- "loss": 2.1806,
710
- "step": 1020
711
- },
712
- {
713
- "epoch": 0.04,
714
- "learning_rate": 0.00019588000000000003,
715
- "loss": 2.1523,
716
- "step": 1030
717
- },
718
- {
719
- "epoch": 0.04,
720
- "learning_rate": 0.00019584,
721
- "loss": 2.1305,
722
- "step": 1040
723
- },
724
- {
725
- "epoch": 0.04,
726
- "learning_rate": 0.00019580000000000002,
727
- "loss": 2.1467,
728
- "step": 1050
729
- },
730
- {
731
- "epoch": 0.04,
732
- "learning_rate": 0.00019576,
733
- "loss": 2.1324,
734
- "step": 1060
735
- },
736
- {
737
- "epoch": 0.04,
738
- "learning_rate": 0.00019572,
739
- "loss": 2.211,
740
- "step": 1070
741
- },
742
- {
743
- "epoch": 0.04,
744
- "learning_rate": 0.00019568000000000002,
745
- "loss": 2.1626,
746
- "step": 1080
747
- },
748
- {
749
- "epoch": 0.04,
750
- "learning_rate": 0.00019564,
751
- "loss": 2.1327,
752
- "step": 1090
753
- },
754
- {
755
- "epoch": 0.04,
756
- "learning_rate": 0.0001956,
757
- "loss": 2.1808,
758
- "step": 1100
759
- },
760
- {
761
- "epoch": 0.04,
762
- "eval_accuracy": 0.6419662509170947,
763
- "eval_loss": 1.838297724723816,
764
- "eval_runtime": 61.0821,
765
- "eval_samples_per_second": 6.925,
766
- "eval_steps_per_second": 0.868,
767
- "step": 1100
768
- },
769
- {
770
- "epoch": 0.04,
771
- "learning_rate": 0.00019556,
772
- "loss": 2.2277,
773
- "step": 1110
774
- },
775
- {
776
- "epoch": 0.04,
777
- "learning_rate": 0.00019552000000000003,
778
- "loss": 2.1227,
779
- "step": 1120
780
- },
781
- {
782
- "epoch": 0.04,
783
- "learning_rate": 0.00019548000000000002,
784
- "loss": 2.2336,
785
- "step": 1130
786
- },
787
- {
788
- "epoch": 0.04,
789
- "learning_rate": 0.00019544,
790
- "loss": 2.1708,
791
- "step": 1140
792
- },
793
- {
794
- "epoch": 0.04,
795
- "learning_rate": 0.0001954,
796
- "loss": 2.1549,
797
- "step": 1150
798
- },
799
- {
800
- "epoch": 0.04,
801
- "learning_rate": 0.00019536,
802
- "loss": 2.1776,
803
- "step": 1160
804
- },
805
- {
806
- "epoch": 0.04,
807
- "learning_rate": 0.00019532000000000002,
808
- "loss": 2.1634,
809
- "step": 1170
810
- },
811
- {
812
- "epoch": 0.04,
813
- "learning_rate": 0.00019528000000000001,
814
- "loss": 2.1891,
815
- "step": 1180
816
- },
817
- {
818
- "epoch": 0.04,
819
- "learning_rate": 0.00019524,
820
- "loss": 2.2221,
821
- "step": 1190
822
- },
823
- {
824
- "epoch": 0.04,
825
- "learning_rate": 0.0001952,
826
- "loss": 2.1885,
827
- "step": 1200
828
- },
829
- {
830
- "epoch": 0.04,
831
- "eval_accuracy": 0.6440857585391702,
832
- "eval_loss": 1.8327720165252686,
833
- "eval_runtime": 61.1648,
834
- "eval_samples_per_second": 6.916,
835
- "eval_steps_per_second": 0.867,
836
- "step": 1200
837
- },
838
- {
839
- "epoch": 0.04,
840
- "learning_rate": 0.00019516000000000002,
841
- "loss": 2.1854,
842
- "step": 1210
843
- },
844
- {
845
- "epoch": 0.05,
846
- "learning_rate": 0.00019512000000000002,
847
- "loss": 2.1554,
848
- "step": 1220
849
- },
850
- {
851
- "epoch": 0.05,
852
- "learning_rate": 0.00019508,
853
- "loss": 2.1365,
854
- "step": 1230
855
- },
856
- {
857
- "epoch": 0.05,
858
- "learning_rate": 0.00019504,
859
- "loss": 2.1783,
860
- "step": 1240
861
- },
862
- {
863
- "epoch": 0.05,
864
- "learning_rate": 0.000195,
865
- "loss": 2.1918,
866
- "step": 1250
867
- },
868
- {
869
- "epoch": 0.05,
870
- "learning_rate": 0.00019496000000000002,
871
- "loss": 2.2186,
872
- "step": 1260
873
- },
874
- {
875
- "epoch": 0.05,
876
- "learning_rate": 0.00019492,
877
- "loss": 2.1508,
878
- "step": 1270
879
- },
880
- {
881
- "epoch": 0.05,
882
- "learning_rate": 0.00019488000000000003,
883
- "loss": 2.1997,
884
- "step": 1280
885
- },
886
- {
887
- "epoch": 0.05,
888
- "learning_rate": 0.00019484,
889
- "loss": 2.0978,
890
- "step": 1290
891
- },
892
- {
893
- "epoch": 0.05,
894
- "learning_rate": 0.0001948,
895
- "loss": 2.1547,
896
- "step": 1300
897
- },
898
- {
899
- "epoch": 0.05,
900
- "eval_accuracy": 0.648650851879025,
901
- "eval_loss": 1.7967556715011597,
902
- "eval_runtime": 60.331,
903
- "eval_samples_per_second": 7.011,
904
- "eval_steps_per_second": 0.878,
905
- "step": 1300
906
- },
907
- {
908
- "epoch": 0.05,
909
- "learning_rate": 0.00019476,
910
- "loss": 2.1202,
911
- "step": 1310
912
- },
913
- {
914
- "epoch": 0.05,
915
- "learning_rate": 0.00019472,
916
- "loss": 2.1406,
917
- "step": 1320
918
- },
919
- {
920
- "epoch": 0.05,
921
- "learning_rate": 0.00019468000000000003,
922
- "loss": 2.149,
923
- "step": 1330
924
- },
925
- {
926
- "epoch": 0.05,
927
- "learning_rate": 0.00019464,
928
- "loss": 2.1915,
929
- "step": 1340
930
- },
931
- {
932
- "epoch": 0.05,
933
- "learning_rate": 0.00019460000000000001,
934
- "loss": 2.1721,
935
- "step": 1350
936
- },
937
- {
938
- "epoch": 0.05,
939
- "learning_rate": 0.00019456,
940
- "loss": 2.1155,
941
- "step": 1360
942
- },
943
- {
944
- "epoch": 0.05,
945
- "learning_rate": 0.00019452,
946
- "loss": 2.138,
947
- "step": 1370
948
- },
949
- {
950
- "epoch": 0.05,
951
- "learning_rate": 0.00019448000000000002,
952
- "loss": 2.0946,
953
- "step": 1380
954
- },
955
- {
956
- "epoch": 0.05,
957
- "learning_rate": 0.00019444,
958
- "loss": 2.1564,
959
- "step": 1390
960
- },
961
- {
962
- "epoch": 0.05,
963
- "learning_rate": 0.0001944,
964
- "loss": 2.1261,
965
- "step": 1400
966
- },
967
- {
968
- "epoch": 0.05,
969
- "eval_accuracy": 0.6471835004483574,
970
- "eval_loss": 1.8035624027252197,
971
- "eval_runtime": 59.6014,
972
- "eval_samples_per_second": 7.097,
973
- "eval_steps_per_second": 0.889,
974
- "step": 1400
975
- },
976
- {
977
- "epoch": 0.05,
978
- "learning_rate": 0.00019436,
979
- "loss": 2.1554,
980
- "step": 1410
981
- },
982
- {
983
- "epoch": 0.05,
984
- "learning_rate": 0.00019432000000000002,
985
- "loss": 2.1857,
986
- "step": 1420
987
- },
988
- {
989
- "epoch": 0.05,
990
- "learning_rate": 0.00019428000000000002,
991
- "loss": 2.1381,
992
- "step": 1430
993
- },
994
- {
995
- "epoch": 0.05,
996
- "learning_rate": 0.00019424,
997
- "loss": 2.1638,
998
- "step": 1440
999
- },
1000
- {
1001
- "epoch": 0.05,
1002
- "learning_rate": 0.0001942,
1003
- "loss": 2.1491,
1004
- "step": 1450
1005
- },
1006
- {
1007
- "epoch": 0.05,
1008
- "learning_rate": 0.00019416,
1009
- "loss": 2.209,
1010
- "step": 1460
1011
- },
1012
- {
1013
- "epoch": 0.05,
1014
- "learning_rate": 0.00019412000000000002,
1015
- "loss": 2.2035,
1016
- "step": 1470
1017
- },
1018
- {
1019
- "epoch": 0.05,
1020
- "learning_rate": 0.00019408,
1021
- "loss": 2.079,
1022
- "step": 1480
1023
- },
1024
- {
1025
- "epoch": 0.06,
1026
- "learning_rate": 0.00019404,
1027
- "loss": 2.1535,
1028
- "step": 1490
1029
- },
1030
- {
1031
- "epoch": 0.06,
1032
- "learning_rate": 0.000194,
1033
- "loss": 2.1074,
1034
- "step": 1500
1035
- },
1036
- {
1037
- "epoch": 0.06,
1038
- "eval_accuracy": 0.6522377109317682,
1039
- "eval_loss": 1.7710509300231934,
1040
- "eval_runtime": 45.8627,
1041
- "eval_samples_per_second": 9.223,
1042
- "eval_steps_per_second": 1.156,
1043
- "step": 1500
1044
- },
1045
- {
1046
- "epoch": 0.06,
1047
- "learning_rate": 0.00019396000000000002,
1048
- "loss": 2.0692,
1049
- "step": 1510
1050
- },
1051
- {
1052
- "epoch": 0.06,
1053
- "learning_rate": 0.00019392000000000001,
1054
- "loss": 2.106,
1055
- "step": 1520
1056
- },
1057
- {
1058
- "epoch": 0.06,
1059
- "learning_rate": 0.00019388,
1060
- "loss": 2.133,
1061
- "step": 1530
1062
- },
1063
- {
1064
- "epoch": 0.06,
1065
- "learning_rate": 0.00019384,
1066
- "loss": 2.0844,
1067
- "step": 1540
1068
- },
1069
- {
1070
- "epoch": 0.06,
1071
- "learning_rate": 0.0001938,
1072
- "loss": 2.0839,
1073
- "step": 1550
1074
- },
1075
- {
1076
- "epoch": 0.06,
1077
- "learning_rate": 0.00019376000000000002,
1078
- "loss": 2.1023,
1079
- "step": 1560
1080
- },
1081
- {
1082
- "epoch": 0.06,
1083
- "learning_rate": 0.00019372,
1084
- "loss": 2.124,
1085
- "step": 1570
1086
- },
1087
- {
1088
- "epoch": 0.06,
1089
- "learning_rate": 0.00019368000000000003,
1090
- "loss": 2.1142,
1091
- "step": 1580
1092
- },
1093
- {
1094
- "epoch": 0.06,
1095
- "learning_rate": 0.00019364,
1096
- "loss": 2.0142,
1097
- "step": 1590
1098
- },
1099
- {
1100
- "epoch": 0.06,
1101
- "learning_rate": 0.00019360000000000002,
1102
- "loss": 2.1537,
1103
- "step": 1600
1104
- },
1105
- {
1106
- "epoch": 0.06,
1107
- "eval_accuracy": 0.6504442814053966,
1108
- "eval_loss": 1.7606570720672607,
1109
- "eval_runtime": 46.4971,
1110
- "eval_samples_per_second": 9.097,
1111
- "eval_steps_per_second": 1.14,
1112
- "step": 1600
1113
- },
1114
- {
1115
- "epoch": 0.06,
1116
- "learning_rate": 0.00019356,
1117
- "loss": 2.0624,
1118
- "step": 1610
1119
- },
1120
- {
1121
- "epoch": 0.06,
1122
- "learning_rate": 0.00019352,
1123
- "loss": 2.1308,
1124
- "step": 1620
1125
- },
1126
- {
1127
- "epoch": 0.06,
1128
- "learning_rate": 0.00019348000000000002,
1129
- "loss": 2.0587,
1130
- "step": 1630
1131
- },
1132
- {
1133
- "epoch": 0.06,
1134
- "learning_rate": 0.00019344,
1135
- "loss": 2.1031,
1136
- "step": 1640
1137
- },
1138
- {
1139
- "epoch": 0.06,
1140
- "learning_rate": 0.0001934,
1141
- "loss": 2.0765,
1142
- "step": 1650
1143
- },
1144
- {
1145
- "epoch": 0.06,
1146
- "learning_rate": 0.00019336,
1147
- "loss": 2.0972,
1148
- "step": 1660
1149
- },
1150
- {
1151
- "epoch": 0.06,
1152
- "learning_rate": 0.00019332,
1153
- "loss": 2.0875,
1154
- "step": 1670
1155
- },
1156
- {
1157
- "epoch": 0.06,
1158
- "learning_rate": 0.00019328000000000002,
1159
- "loss": 2.1277,
1160
- "step": 1680
1161
- },
1162
- {
1163
- "epoch": 0.06,
1164
- "learning_rate": 0.00019323999999999999,
1165
- "loss": 2.088,
1166
- "step": 1690
1167
- },
1168
- {
1169
- "epoch": 0.06,
1170
- "learning_rate": 0.0001932,
1171
- "loss": 2.1085,
1172
- "step": 1700
1173
- },
1174
- {
1175
- "epoch": 0.06,
1176
- "eval_accuracy": 0.6466943833048014,
1177
- "eval_loss": 1.7533295154571533,
1178
- "eval_runtime": 45.9496,
1179
- "eval_samples_per_second": 9.206,
1180
- "eval_steps_per_second": 1.153,
1181
- "step": 1700
1182
- },
1183
- {
1184
- "epoch": 0.06,
1185
- "learning_rate": 0.00019316,
1186
- "loss": 2.159,
1187
- "step": 1710
1188
- },
1189
- {
1190
- "epoch": 0.06,
1191
- "learning_rate": 0.00019312000000000002,
1192
- "loss": 2.0808,
1193
- "step": 1720
1194
- },
1195
- {
1196
- "epoch": 0.06,
1197
- "learning_rate": 0.00019308000000000001,
1198
- "loss": 2.1481,
1199
- "step": 1730
1200
- },
1201
- {
1202
- "epoch": 0.06,
1203
- "learning_rate": 0.00019304,
1204
- "loss": 2.0853,
1205
- "step": 1740
1206
- },
1207
- {
1208
- "epoch": 0.06,
1209
- "learning_rate": 0.000193,
1210
- "loss": 2.0692,
1211
- "step": 1750
1212
- },
1213
- {
1214
- "epoch": 0.07,
1215
- "learning_rate": 0.00019296,
1216
- "loss": 2.2094,
1217
- "step": 1760
1218
- },
1219
- {
1220
- "epoch": 0.07,
1221
- "learning_rate": 0.00019292000000000002,
1222
- "loss": 2.0793,
1223
- "step": 1770
1224
- },
1225
- {
1226
- "epoch": 0.07,
1227
- "learning_rate": 0.00019288,
1228
- "loss": 2.0469,
1229
- "step": 1780
1230
- },
1231
- {
1232
- "epoch": 0.07,
1233
- "learning_rate": 0.00019284,
1234
- "loss": 2.1105,
1235
- "step": 1790
1236
- },
1237
- {
1238
- "epoch": 0.07,
1239
- "learning_rate": 0.0001928,
1240
- "loss": 2.1268,
1241
- "step": 1800
1242
- },
1243
- {
1244
- "epoch": 0.07,
1245
- "eval_accuracy": 0.6550093747452514,
1246
- "eval_loss": 1.7450594902038574,
1247
- "eval_runtime": 46.1382,
1248
- "eval_samples_per_second": 9.168,
1249
- "eval_steps_per_second": 1.149,
1250
- "step": 1800
1251
- },
1252
- {
1253
- "epoch": 0.07,
1254
- "learning_rate": 0.00019276000000000002,
1255
- "loss": 2.0895,
1256
- "step": 1810
1257
- },
1258
- {
1259
- "epoch": 0.07,
1260
- "learning_rate": 0.00019272,
1261
- "loss": 2.1127,
1262
- "step": 1820
1263
- },
1264
- {
1265
- "epoch": 0.07,
1266
- "learning_rate": 0.00019268,
1267
- "loss": 2.0435,
1268
- "step": 1830
1269
- },
1270
- {
1271
- "epoch": 0.07,
1272
- "learning_rate": 0.00019264,
1273
- "loss": 2.0799,
1274
- "step": 1840
1275
- },
1276
- {
1277
- "epoch": 0.07,
1278
- "learning_rate": 0.0001926,
1279
- "loss": 2.0886,
1280
- "step": 1850
1281
- },
1282
- {
1283
- "epoch": 0.07,
1284
- "learning_rate": 0.00019256,
1285
- "loss": 2.0726,
1286
- "step": 1860
1287
- },
1288
- {
1289
- "epoch": 0.07,
1290
- "learning_rate": 0.00019252,
1291
- "loss": 2.1095,
1292
- "step": 1870
1293
- },
1294
- {
1295
- "epoch": 0.07,
1296
- "learning_rate": 0.00019248000000000003,
1297
- "loss": 2.0732,
1298
- "step": 1880
1299
- },
1300
- {
1301
- "epoch": 0.07,
1302
- "learning_rate": 0.00019244000000000002,
1303
- "loss": 2.0577,
1304
- "step": 1890
1305
- },
1306
- {
1307
- "epoch": 0.07,
1308
- "learning_rate": 0.00019240000000000001,
1309
- "loss": 2.0991,
1310
- "step": 1900
1311
- },
1312
- {
1313
- "epoch": 0.07,
1314
- "eval_accuracy": 0.6504442814053966,
1315
- "eval_loss": 1.7744449377059937,
1316
- "eval_runtime": 46.1319,
1317
- "eval_samples_per_second": 9.169,
1318
- "eval_steps_per_second": 1.149,
1319
- "step": 1900
1320
- },
1321
- {
1322
- "epoch": 0.07,
1323
- "learning_rate": 0.00019236,
1324
- "loss": 2.113,
1325
- "step": 1910
1326
- },
1327
- {
1328
- "epoch": 0.07,
1329
- "learning_rate": 0.00019232,
1330
- "loss": 2.0054,
1331
- "step": 1920
1332
- },
1333
- {
1334
- "epoch": 0.07,
1335
- "learning_rate": 0.00019228000000000002,
1336
- "loss": 2.0531,
1337
- "step": 1930
1338
- },
1339
- {
1340
- "epoch": 0.07,
1341
- "learning_rate": 0.00019224000000000002,
1342
- "loss": 2.0268,
1343
- "step": 1940
1344
- },
1345
- {
1346
- "epoch": 0.07,
1347
- "learning_rate": 0.0001922,
1348
- "loss": 2.0703,
1349
- "step": 1950
1350
- },
1351
- {
1352
- "epoch": 0.07,
1353
- "learning_rate": 0.00019216,
1354
- "loss": 2.0101,
1355
- "step": 1960
1356
- },
1357
- {
1358
- "epoch": 0.07,
1359
- "learning_rate": 0.00019212000000000002,
1360
- "loss": 2.0598,
1361
- "step": 1970
1362
- },
1363
- {
1364
- "epoch": 0.07,
1365
- "learning_rate": 0.00019208000000000002,
1366
- "loss": 2.0731,
1367
- "step": 1980
1368
- },
1369
- {
1370
- "epoch": 0.07,
1371
- "learning_rate": 0.00019204,
1372
- "loss": 2.0695,
1373
- "step": 1990
1374
- },
1375
- {
1376
- "epoch": 0.07,
1377
- "learning_rate": 0.000192,
1378
- "loss": 2.0982,
1379
- "step": 2000
1380
- },
1381
- {
1382
- "epoch": 0.07,
1383
- "eval_accuracy": 0.6515855547403603,
1384
- "eval_loss": 1.738739252090454,
1385
- "eval_runtime": 45.7921,
1386
- "eval_samples_per_second": 9.237,
1387
- "eval_steps_per_second": 1.157,
1388
- "step": 2000
1389
- },
1390
- {
1391
- "epoch": 0.07,
1392
- "learning_rate": 0.00019196,
1393
- "loss": 2.0565,
1394
- "step": 2010
1395
- },
1396
- {
1397
- "epoch": 0.07,
1398
- "learning_rate": 0.00019192000000000002,
1399
- "loss": 2.0358,
1400
- "step": 2020
1401
- },
1402
- {
1403
- "epoch": 0.07,
1404
- "learning_rate": 0.00019188,
1405
- "loss": 2.0407,
1406
- "step": 2030
1407
- },
1408
- {
1409
- "epoch": 0.08,
1410
- "learning_rate": 0.00019184,
1411
- "loss": 2.0584,
1412
- "step": 2040
1413
- },
1414
- {
1415
- "epoch": 0.08,
1416
- "learning_rate": 0.0001918,
1417
- "loss": 2.0313,
1418
- "step": 2050
1419
- },
1420
- {
1421
- "epoch": 0.08,
1422
- "learning_rate": 0.00019176,
1423
- "loss": 2.0852,
1424
- "step": 2060
1425
- },
1426
- {
1427
- "epoch": 0.08,
1428
- "learning_rate": 0.00019172000000000001,
1429
- "loss": 2.1305,
1430
- "step": 2070
1431
- },
1432
- {
1433
- "epoch": 0.08,
1434
- "learning_rate": 0.00019168,
1435
- "loss": 2.8885,
1436
- "step": 2080
1437
- },
1438
- {
1439
- "epoch": 0.08,
1440
- "learning_rate": 0.00019164000000000003,
1441
- "loss": 5.2587,
1442
- "step": 2090
1443
- },
1444
- {
1445
- "epoch": 0.08,
1446
- "learning_rate": 0.0001916,
1447
- "loss": 5.5295,
1448
- "step": 2100
1449
- },
1450
- {
1451
- "epoch": 0.08,
1452
- "eval_accuracy": 0.31556207711746964,
1453
- "eval_loss": 4.5144548416137695,
1454
- "eval_runtime": 46.4657,
1455
- "eval_samples_per_second": 9.103,
1456
- "eval_steps_per_second": 1.141,
1457
- "step": 2100
1458
- },
1459
- {
1460
- "epoch": 0.08,
1461
- "learning_rate": 0.00019156000000000002,
1462
- "loss": 4.2499,
1463
- "step": 2110
1464
- },
1465
- {
1466
- "epoch": 0.08,
1467
- "learning_rate": 0.00019152,
1468
- "loss": 2.2078,
1469
- "step": 2120
1470
- },
1471
- {
1472
- "epoch": 0.08,
1473
- "learning_rate": 0.00019148,
1474
- "loss": 2.1916,
1475
- "step": 2130
1476
- },
1477
- {
1478
- "epoch": 0.08,
1479
- "learning_rate": 0.00019144000000000002,
1480
- "loss": 2.1247,
1481
- "step": 2140
1482
- },
1483
- {
1484
- "epoch": 0.08,
1485
- "learning_rate": 0.0001914,
1486
- "loss": 2.0517,
1487
- "step": 2150
1488
- },
1489
- {
1490
- "epoch": 0.08,
1491
- "learning_rate": 0.00019136,
1492
- "loss": 2.1537,
1493
- "step": 2160
1494
- },
1495
- {
1496
- "epoch": 0.08,
1497
- "learning_rate": 0.00019132,
1498
- "loss": 2.1611,
1499
- "step": 2170
1500
- },
1501
- {
1502
- "epoch": 0.08,
1503
- "learning_rate": 0.00019128000000000003,
1504
- "loss": 2.1015,
1505
- "step": 2180
1506
- },
1507
- {
1508
- "epoch": 0.08,
1509
- "learning_rate": 0.00019124000000000002,
1510
- "loss": 2.191,
1511
- "step": 2190
1512
- },
1513
- {
1514
- "epoch": 0.08,
1515
- "learning_rate": 0.0001912,
1516
- "loss": 2.0354,
1517
- "step": 2200
1518
- },
1519
- {
1520
- "epoch": 0.08,
1521
- "eval_accuracy": 0.652645308551398,
1522
- "eval_loss": 1.74555242061615,
1523
- "eval_runtime": 45.8199,
1524
- "eval_samples_per_second": 9.232,
1525
- "eval_steps_per_second": 1.157,
1526
- "step": 2200
1527
- },
1528
- {
1529
- "epoch": 0.08,
1530
- "learning_rate": 0.00019116,
1531
- "loss": 2.0853,
1532
- "step": 2210
1533
- },
1534
- {
1535
- "epoch": 0.08,
1536
- "learning_rate": 0.00019112,
1537
- "loss": 2.0091,
1538
- "step": 2220
1539
- },
1540
- {
1541
- "epoch": 0.08,
1542
- "learning_rate": 0.00019108000000000002,
1543
- "loss": 2.093,
1544
- "step": 2230
1545
- },
1546
- {
1547
- "epoch": 0.08,
1548
- "learning_rate": 0.00019104000000000001,
1549
- "loss": 1.9993,
1550
- "step": 2240
1551
- },
1552
- {
1553
- "epoch": 0.08,
1554
- "learning_rate": 0.000191,
1555
- "loss": 2.0933,
1556
- "step": 2250
1557
- },
1558
- {
1559
- "epoch": 0.08,
1560
- "learning_rate": 0.00019096,
1561
- "loss": 2.0847,
1562
- "step": 2260
1563
- },
1564
- {
1565
- "epoch": 0.08,
1566
- "learning_rate": 0.00019092000000000002,
1567
- "loss": 2.1092,
1568
- "step": 2270
1569
- },
1570
- {
1571
- "epoch": 0.08,
1572
- "learning_rate": 0.00019088000000000002,
1573
- "loss": 2.1634,
1574
- "step": 2280
1575
- },
1576
- {
1577
- "epoch": 0.08,
1578
- "learning_rate": 0.00019084,
1579
- "loss": 2.048,
1580
- "step": 2290
1581
- },
1582
- {
1583
- "epoch": 0.08,
1584
- "learning_rate": 0.0001908,
1585
- "loss": 2.0382,
1586
- "step": 2300
1587
- },
1588
- {
1589
- "epoch": 0.08,
1590
- "eval_accuracy": 0.6545202576016956,
1591
- "eval_loss": 1.7376079559326172,
1592
- "eval_runtime": 46.305,
1593
- "eval_samples_per_second": 9.135,
1594
- "eval_steps_per_second": 1.145,
1595
- "step": 2300
1596
- },
1597
- {
1598
- "epoch": 0.09,
1599
- "learning_rate": 0.00019076,
1600
- "loss": 2.0105,
1601
- "step": 2310
1602
- },
1603
- {
1604
- "epoch": 0.09,
1605
- "learning_rate": 0.00019072000000000002,
1606
- "loss": 1.9905,
1607
- "step": 2320
1608
- },
1609
- {
1610
- "epoch": 0.09,
1611
- "learning_rate": 0.00019068,
1612
- "loss": 2.1397,
1613
- "step": 2330
1614
- },
1615
- {
1616
- "epoch": 0.09,
1617
- "learning_rate": 0.00019064000000000003,
1618
- "loss": 1.9861,
1619
- "step": 2340
1620
- },
1621
- {
1622
- "epoch": 0.09,
1623
- "learning_rate": 0.0001906,
1624
- "loss": 2.0603,
1625
- "step": 2350
1626
- },
1627
- {
1628
- "epoch": 0.09,
1629
- "learning_rate": 0.00019056000000000002,
1630
- "loss": 2.0137,
1631
- "step": 2360
1632
- },
1633
- {
1634
- "epoch": 0.09,
1635
- "learning_rate": 0.00019052,
1636
- "loss": 2.0223,
1637
- "step": 2370
1638
- },
1639
- {
1640
- "epoch": 0.09,
1641
- "learning_rate": 0.00019048,
1642
- "loss": 2.0917,
1643
- "step": 2380
1644
- },
1645
- {
1646
- "epoch": 0.09,
1647
- "learning_rate": 0.00019044000000000003,
1648
- "loss": 2.0426,
1649
- "step": 2390
1650
- },
1651
- {
1652
- "epoch": 0.09,
1653
- "learning_rate": 0.0001904,
1654
- "loss": 2.0427,
1655
- "step": 2400
1656
- },
1657
- {
1658
- "epoch": 0.09,
1659
- "eval_accuracy": 0.6534605037906579,
1660
- "eval_loss": 1.7286646366119385,
1661
- "eval_runtime": 46.0656,
1662
- "eval_samples_per_second": 9.183,
1663
- "eval_steps_per_second": 1.151,
1664
- "step": 2400
1665
- },
1666
- {
1667
- "epoch": 0.09,
1668
- "learning_rate": 0.00019036000000000001,
1669
- "loss": 1.9983,
1670
- "step": 2410
1671
- },
1672
- {
1673
- "epoch": 0.09,
1674
- "learning_rate": 0.00019032,
1675
- "loss": 2.0404,
1676
- "step": 2420
1677
- },
1678
- {
1679
- "epoch": 0.09,
1680
- "learning_rate": 0.00019028,
1681
- "loss": 2.0165,
1682
- "step": 2430
1683
- },
1684
- {
1685
- "epoch": 0.09,
1686
- "learning_rate": 0.00019024000000000002,
1687
- "loss": 2.0585,
1688
- "step": 2440
1689
- },
1690
- {
1691
- "epoch": 0.09,
1692
- "learning_rate": 0.0001902,
1693
- "loss": 2.0517,
1694
- "step": 2450
1695
- },
1696
- {
1697
- "epoch": 0.09,
1698
- "learning_rate": 0.00019016,
1699
- "loss": 2.041,
1700
- "step": 2460
1701
- },
1702
- {
1703
- "epoch": 0.09,
1704
- "learning_rate": 0.00019012,
1705
- "loss": 1.9899,
1706
- "step": 2470
1707
- },
1708
- {
1709
- "epoch": 0.09,
1710
- "learning_rate": 0.00019008000000000002,
1711
- "loss": 1.9965,
1712
- "step": 2480
1713
- },
1714
- {
1715
- "epoch": 0.09,
1716
- "learning_rate": 0.00019004000000000002,
1717
- "loss": 2.0312,
1718
- "step": 2490
1719
- },
1720
- {
1721
- "epoch": 0.09,
1722
- "learning_rate": 0.00019,
1723
- "loss": 1.9543,
1724
- "step": 2500
1725
- },
1726
- {
1727
- "epoch": 0.09,
1728
- "eval_accuracy": 0.6536235428385099,
1729
- "eval_loss": 1.7141377925872803,
1730
- "eval_runtime": 46.1754,
1731
- "eval_samples_per_second": 9.161,
1732
- "eval_steps_per_second": 1.148,
1733
- "step": 2500
1734
- },
1735
- {
1736
- "epoch": 0.09,
1737
- "learning_rate": 0.00018996,
1738
- "loss": 2.0157,
1739
- "step": 2510
1740
- },
1741
- {
1742
- "epoch": 0.09,
1743
- "learning_rate": 0.00018992,
1744
- "loss": 2.0398,
1745
- "step": 2520
1746
- },
1747
- {
1748
- "epoch": 0.09,
1749
- "learning_rate": 0.00018988000000000002,
1750
- "loss": 2.0151,
1751
- "step": 2530
1752
- },
1753
- {
1754
- "epoch": 0.09,
1755
- "learning_rate": 0.00018984,
1756
- "loss": 2.0104,
1757
- "step": 2540
1758
- },
1759
- {
1760
- "epoch": 0.09,
1761
- "learning_rate": 0.0001898,
1762
- "loss": 2.1258,
1763
- "step": 2550
1764
- },
1765
- {
1766
- "epoch": 0.09,
1767
- "learning_rate": 0.00018976,
1768
- "loss": 2.0673,
1769
- "step": 2560
1770
- },
1771
- {
1772
- "epoch": 0.09,
1773
- "learning_rate": 0.00018972000000000002,
1774
- "loss": 1.9949,
1775
- "step": 2570
1776
- },
1777
- {
1778
- "epoch": 0.1,
1779
- "learning_rate": 0.00018968,
1780
- "loss": 2.0162,
1781
- "step": 2580
1782
- },
1783
- {
1784
- "epoch": 0.1,
1785
- "learning_rate": 0.00018964,
1786
- "loss": 2.065,
1787
- "step": 2590
1788
- },
1789
- {
1790
- "epoch": 0.1,
1791
- "learning_rate": 0.0001896,
1792
- "loss": 1.9798,
1793
- "step": 2600
1794
- },
1795
- {
1796
- "epoch": 0.1,
1797
- "eval_accuracy": 0.6568843237955491,
1798
- "eval_loss": 1.7275162935256958,
1799
- "eval_runtime": 46.0053,
1800
- "eval_samples_per_second": 9.195,
1801
- "eval_steps_per_second": 1.152,
1802
- "step": 2600
1803
- },
1804
- {
1805
- "epoch": 0.1,
1806
- "learning_rate": 0.00018956,
1807
- "loss": 2.0334,
1808
- "step": 2610
1809
- },
1810
- {
1811
- "epoch": 0.1,
1812
- "learning_rate": 0.00018952000000000002,
1813
- "loss": 2.0242,
1814
- "step": 2620
1815
- },
1816
- {
1817
- "epoch": 0.1,
1818
- "learning_rate": 0.00018948,
1819
- "loss": 2.0321,
1820
- "step": 2630
1821
- },
1822
- {
1823
- "epoch": 0.1,
1824
- "learning_rate": 0.00018944000000000003,
1825
- "loss": 2.0455,
1826
- "step": 2640
1827
- },
1828
- {
1829
- "epoch": 0.1,
1830
- "learning_rate": 0.0001894,
1831
- "loss": 2.0152,
1832
- "step": 2650
1833
- },
1834
- {
1835
- "epoch": 0.1,
1836
- "learning_rate": 0.00018936000000000002,
1837
- "loss": 2.0764,
1838
- "step": 2660
1839
- },
1840
- {
1841
- "epoch": 0.1,
1842
- "learning_rate": 0.00018932,
1843
- "loss": 2.0572,
1844
- "step": 2670
1845
- },
1846
- {
1847
- "epoch": 0.1,
1848
- "learning_rate": 0.00018928,
1849
- "loss": 1.9985,
1850
- "step": 2680
1851
- },
1852
- {
1853
- "epoch": 0.1,
1854
- "learning_rate": 0.00018924000000000002,
1855
- "loss": 2.0185,
1856
- "step": 2690
1857
- },
1858
- {
1859
- "epoch": 0.1,
1860
- "learning_rate": 0.0001892,
1861
- "loss": 2.0054,
1862
- "step": 2700
1863
- },
1864
- {
1865
- "epoch": 0.1,
1866
- "eval_accuracy": 0.6598190266568843,
1867
- "eval_loss": 1.6908519268035889,
1868
- "eval_runtime": 45.7104,
1869
- "eval_samples_per_second": 9.254,
1870
- "eval_steps_per_second": 1.159,
1871
- "step": 2700
1872
- },
1873
- {
1874
- "epoch": 0.1,
1875
- "learning_rate": 0.00018916,
1876
- "loss": 2.0133,
1877
- "step": 2710
1878
- },
1879
- {
1880
- "epoch": 0.1,
1881
- "learning_rate": 0.00018912,
1882
- "loss": 2.0096,
1883
- "step": 2720
1884
- },
1885
- {
1886
- "epoch": 0.1,
1887
- "learning_rate": 0.00018908000000000003,
1888
- "loss": 2.0126,
1889
- "step": 2730
1890
- },
1891
- {
1892
- "epoch": 0.1,
1893
- "learning_rate": 0.00018904000000000002,
1894
- "loss": 1.9772,
1895
- "step": 2740
1896
- },
1897
- {
1898
- "epoch": 0.1,
1899
- "learning_rate": 0.00018899999999999999,
1900
- "loss": 2.0467,
1901
- "step": 2750
1902
- },
1903
- {
1904
- "epoch": 0.1,
1905
- "learning_rate": 0.00018896,
1906
- "loss": 1.9178,
1907
- "step": 2760
1908
- },
1909
- {
1910
- "epoch": 0.1,
1911
- "learning_rate": 0.00018892,
1912
- "loss": 2.0297,
1913
- "step": 2770
1914
- },
1915
- {
1916
- "epoch": 0.1,
1917
- "learning_rate": 0.00018888000000000002,
1918
- "loss": 2.0253,
1919
- "step": 2780
1920
- },
1921
- {
1922
- "epoch": 0.1,
1923
- "learning_rate": 0.00018884000000000001,
1924
- "loss": 1.9633,
1925
- "step": 2790
1926
- },
1927
- {
1928
- "epoch": 0.1,
1929
- "learning_rate": 0.0001888,
1930
- "loss": 2.0391,
1931
- "step": 2800
1932
- },
1933
- {
1934
- "epoch": 0.1,
1935
- "eval_accuracy": 0.657128882367327,
1936
- "eval_loss": 1.7396591901779175,
1937
- "eval_runtime": 46.5669,
1938
- "eval_samples_per_second": 9.084,
1939
- "eval_steps_per_second": 1.138,
1940
- "step": 2800
1941
- },
1942
- {
1943
- "epoch": 0.1,
1944
- "learning_rate": 0.00018876,
1945
- "loss": 2.0096,
1946
- "step": 2810
1947
- },
1948
- {
1949
- "epoch": 0.1,
1950
- "learning_rate": 0.00018872,
1951
- "loss": 1.9861,
1952
- "step": 2820
1953
- },
1954
- {
1955
- "epoch": 0.1,
1956
- "learning_rate": 0.00018868000000000002,
1957
- "loss": 1.9337,
1958
- "step": 2830
1959
- },
1960
- {
1961
- "epoch": 0.1,
1962
- "learning_rate": 0.00018864,
1963
- "loss": 2.0316,
1964
- "step": 2840
1965
- },
1966
- {
1967
- "epoch": 0.11,
1968
- "learning_rate": 0.0001886,
1969
- "loss": 2.0544,
1970
- "step": 2850
1971
- },
1972
- {
1973
- "epoch": 0.11,
1974
- "learning_rate": 0.00018856,
1975
- "loss": 1.9592,
1976
- "step": 2860
1977
- },
1978
- {
1979
- "epoch": 0.11,
1980
- "learning_rate": 0.00018852000000000002,
1981
- "loss": 1.9677,
1982
- "step": 2870
1983
- },
1984
- {
1985
- "epoch": 0.11,
1986
- "learning_rate": 0.00018848,
1987
- "loss": 2.0595,
1988
- "step": 2880
1989
- },
1990
- {
1991
- "epoch": 0.11,
1992
- "learning_rate": 0.00018844,
1993
- "loss": 1.9924,
1994
- "step": 2890
1995
- },
1996
- {
1997
- "epoch": 0.11,
1998
- "learning_rate": 0.0001884,
1999
- "loss": 1.988,
2000
- "step": 2900
2001
- },
2002
- {
2003
- "epoch": 0.11,
2004
- "eval_accuracy": 0.6597375071329583,
2005
- "eval_loss": 1.7008874416351318,
2006
- "eval_runtime": 46.6514,
2007
- "eval_samples_per_second": 9.067,
2008
- "eval_steps_per_second": 1.136,
2009
- "step": 2900
2010
- },
2011
- {
2012
- "epoch": 0.11,
2013
- "learning_rate": 0.00018836,
2014
- "loss": 2.0202,
2015
- "step": 2910
2016
- },
2017
- {
2018
- "epoch": 0.11,
2019
- "learning_rate": 0.00018832,
2020
- "loss": 1.9864,
2021
- "step": 2920
2022
- },
2023
- {
2024
- "epoch": 0.11,
2025
- "learning_rate": 0.00018828,
2026
- "loss": 1.9831,
2027
- "step": 2930
2028
- },
2029
- {
2030
- "epoch": 0.11,
2031
- "learning_rate": 0.00018824000000000003,
2032
- "loss": 1.9967,
2033
- "step": 2940
2034
- },
2035
- {
2036
- "epoch": 0.11,
2037
- "learning_rate": 0.0001882,
2038
- "loss": 1.9784,
2039
- "step": 2950
2040
- },
2041
- {
2042
- "epoch": 0.11,
2043
- "learning_rate": 0.00018816000000000001,
2044
- "loss": 1.9662,
2045
- "step": 2960
2046
- },
2047
- {
2048
- "epoch": 0.11,
2049
- "learning_rate": 0.00018812,
2050
- "loss": 1.9302,
2051
- "step": 2970
2052
- },
2053
- {
2054
- "epoch": 0.11,
2055
- "learning_rate": 0.00018808,
2056
- "loss": 2.0279,
2057
- "step": 2980
2058
- },
2059
- {
2060
- "epoch": 0.11,
2061
- "learning_rate": 0.00018804000000000002,
2062
- "loss": 2.0217,
2063
- "step": 2990
2064
- },
2065
- {
2066
- "epoch": 0.11,
2067
- "learning_rate": 0.000188,
2068
- "loss": 2.0044,
2069
- "step": 3000
2070
- },
2071
- {
2072
- "epoch": 0.11,
2073
- "eval_accuracy": 0.6644656395206652,
2074
- "eval_loss": 1.6991885900497437,
2075
- "eval_runtime": 45.8483,
2076
- "eval_samples_per_second": 9.226,
2077
- "eval_steps_per_second": 1.156,
2078
- "step": 3000
2079
- },
2080
- {
2081
- "epoch": 0.11,
2082
- "learning_rate": 0.00018796,
2083
- "loss": 2.0481,
2084
- "step": 3010
2085
- },
2086
- {
2087
- "epoch": 0.11,
2088
- "learning_rate": 0.00018792,
2089
- "loss": 1.9341,
2090
- "step": 3020
2091
- },
2092
- {
2093
- "epoch": 0.11,
2094
- "learning_rate": 0.00018788000000000002,
2095
- "loss": 1.9922,
2096
- "step": 3030
2097
- },
2098
- {
2099
- "epoch": 0.11,
2100
- "learning_rate": 0.00018784000000000002,
2101
- "loss": 1.9681,
2102
- "step": 3040
2103
- },
2104
- {
2105
- "epoch": 0.11,
2106
- "learning_rate": 0.0001878,
2107
- "loss": 1.97,
2108
- "step": 3050
2109
- },
2110
- {
2111
- "epoch": 0.11,
2112
- "learning_rate": 0.00018776,
2113
- "loss": 1.9463,
2114
- "step": 3060
2115
- },
2116
- {
2117
- "epoch": 0.11,
2118
- "learning_rate": 0.00018772,
2119
- "loss": 1.9919,
2120
- "step": 3070
2121
- },
2122
- {
2123
- "epoch": 0.11,
2124
- "learning_rate": 0.00018768000000000002,
2125
- "loss": 1.9762,
2126
- "step": 3080
2127
- },
2128
- {
2129
- "epoch": 0.11,
2130
- "learning_rate": 0.00018764,
2131
- "loss": 1.9491,
2132
- "step": 3090
2133
- },
2134
- {
2135
- "epoch": 0.11,
2136
- "learning_rate": 0.0001876,
2137
- "loss": 2.0099,
2138
- "step": 3100
2139
- },
2140
- {
2141
- "epoch": 0.11,
2142
- "eval_accuracy": 0.6629167685660716,
2143
- "eval_loss": 1.6818691492080688,
2144
- "eval_runtime": 47.0155,
2145
- "eval_samples_per_second": 8.997,
2146
- "eval_steps_per_second": 1.127,
2147
- "step": 3100
2148
- },
2149
- {
2150
- "epoch": 0.11,
2151
- "learning_rate": 0.00018756,
2152
- "loss": 1.9804,
2153
- "step": 3110
2154
- },
2155
- {
2156
- "epoch": 0.12,
2157
- "learning_rate": 0.00018752,
2158
- "loss": 1.9109,
2159
- "step": 3120
2160
- },
2161
- {
2162
- "epoch": 0.12,
2163
- "learning_rate": 0.00018748000000000001,
2164
- "loss": 1.9892,
2165
- "step": 3130
2166
- },
2167
- {
2168
- "epoch": 0.12,
2169
- "learning_rate": 0.00018744,
2170
- "loss": 1.9546,
2171
- "step": 3140
2172
- },
2173
- {
2174
- "epoch": 0.12,
2175
- "learning_rate": 0.00018740000000000003,
2176
- "loss": 1.9803,
2177
- "step": 3150
2178
- },
2179
- {
2180
- "epoch": 0.12,
2181
- "learning_rate": 0.00018736,
2182
- "loss": 1.9629,
2183
- "step": 3160
2184
- },
2185
- {
2186
- "epoch": 0.12,
2187
- "learning_rate": 0.00018732000000000002,
2188
- "loss": 1.9725,
2189
- "step": 3170
2190
- },
2191
- {
2192
- "epoch": 0.12,
2193
- "learning_rate": 0.00018728,
2194
- "loss": 2.0243,
2195
- "step": 3180
2196
- },
2197
- {
2198
- "epoch": 0.12,
2199
- "learning_rate": 0.00018724,
2200
- "loss": 2.0426,
2201
- "step": 3190
2202
- },
2203
- {
2204
- "epoch": 0.12,
2205
- "learning_rate": 0.00018720000000000002,
2206
- "loss": 1.9622,
2207
- "step": 3200
2208
- },
2209
- {
2210
- "epoch": 0.12,
2211
- "eval_accuracy": 0.6634058857096274,
2212
- "eval_loss": 1.6796367168426514,
2213
- "eval_runtime": 45.6554,
2214
- "eval_samples_per_second": 9.265,
2215
- "eval_steps_per_second": 1.161,
2216
- "step": 3200
2217
- },
2218
- {
2219
- "epoch": 0.12,
2220
- "learning_rate": 0.00018716,
2221
- "loss": 2.0019,
2222
- "step": 3210
2223
- },
2224
- {
2225
- "epoch": 0.12,
2226
- "learning_rate": 0.00018712,
2227
- "loss": 1.9587,
2228
- "step": 3220
2229
- },
2230
- {
2231
- "epoch": 0.12,
2232
- "learning_rate": 0.00018708,
2233
- "loss": 1.9816,
2234
- "step": 3230
2235
- },
2236
- {
2237
- "epoch": 0.12,
2238
- "learning_rate": 0.00018704000000000003,
2239
- "loss": 1.9877,
2240
- "step": 3240
2241
- },
2242
- {
2243
- "epoch": 0.12,
2244
- "learning_rate": 0.00018700000000000002,
2245
- "loss": 2.0077,
2246
- "step": 3250
2247
- },
2248
- {
2249
- "epoch": 0.12,
2250
- "learning_rate": 0.00018696,
2251
- "loss": 2.043,
2252
- "step": 3260
2253
- },
2254
- {
2255
- "epoch": 0.12,
2256
- "learning_rate": 0.00018692,
2257
- "loss": 1.9861,
2258
- "step": 3270
2259
- },
2260
- {
2261
- "epoch": 0.12,
2262
- "learning_rate": 0.00018688,
2263
- "loss": 1.9399,
2264
- "step": 3280
2265
- },
2266
- {
2267
- "epoch": 0.12,
2268
- "learning_rate": 0.00018684000000000002,
2269
- "loss": 1.9473,
2270
- "step": 3290
2271
- },
2272
- {
2273
- "epoch": 0.12,
2274
- "learning_rate": 0.00018680000000000001,
2275
- "loss": 1.9716,
2276
- "step": 3300
2277
- },
2278
- {
2279
- "epoch": 0.12,
2280
- "eval_accuracy": 0.657128882367327,
2281
- "eval_loss": 1.7205617427825928,
2282
- "eval_runtime": 46.0038,
2283
- "eval_samples_per_second": 9.195,
2284
- "eval_steps_per_second": 1.152,
2285
- "step": 3300
2286
- },
2287
- {
2288
- "epoch": 0.12,
2289
- "learning_rate": 0.00018676,
2290
- "loss": 1.9388,
2291
- "step": 3310
2292
- },
2293
- {
2294
- "epoch": 0.12,
2295
- "learning_rate": 0.00018672,
2296
- "loss": 1.9062,
2297
- "step": 3320
2298
- },
2299
- {
2300
- "epoch": 0.12,
2301
- "learning_rate": 0.00018668000000000002,
2302
- "loss": 2.0283,
2303
- "step": 3330
2304
- },
2305
- {
2306
- "epoch": 0.12,
2307
- "learning_rate": 0.00018664000000000002,
2308
- "loss": 1.9347,
2309
- "step": 3340
2310
- },
2311
- {
2312
- "epoch": 0.12,
2313
- "learning_rate": 0.0001866,
2314
- "loss": 1.9422,
2315
- "step": 3350
2316
- },
2317
- {
2318
- "epoch": 0.12,
2319
- "learning_rate": 0.00018656,
2320
- "loss": 1.935,
2321
- "step": 3360
2322
- },
2323
- {
2324
- "epoch": 0.12,
2325
- "learning_rate": 0.00018652,
2326
- "loss": 1.9642,
2327
- "step": 3370
2328
- },
2329
- {
2330
- "epoch": 0.12,
2331
- "learning_rate": 0.00018648000000000002,
2332
- "loss": 1.9179,
2333
- "step": 3380
2334
- },
2335
- {
2336
- "epoch": 0.13,
2337
- "learning_rate": 0.00018644,
2338
- "loss": 1.9948,
2339
- "step": 3390
2340
- },
2341
- {
2342
- "epoch": 0.13,
2343
- "learning_rate": 0.00018640000000000003,
2344
- "loss": 1.9136,
2345
- "step": 3400
2346
- },
2347
- {
2348
- "epoch": 0.13,
2349
- "eval_accuracy": 0.6654438738077769,
2350
- "eval_loss": 1.6819649934768677,
2351
- "eval_runtime": 45.6304,
2352
- "eval_samples_per_second": 9.27,
2353
- "eval_steps_per_second": 1.162,
2354
- "step": 3400
2355
- },
2356
- {
2357
- "epoch": 0.13,
2358
- "learning_rate": 0.00018636,
2359
- "loss": 1.8923,
2360
- "step": 3410
2361
- },
2362
- {
2363
- "epoch": 0.13,
2364
- "learning_rate": 0.00018632000000000002,
2365
- "loss": 1.9587,
2366
- "step": 3420
2367
- },
2368
- {
2369
- "epoch": 0.13,
2370
- "learning_rate": 0.00018628,
2371
- "loss": 1.9211,
2372
- "step": 3430
2373
- },
2374
- {
2375
- "epoch": 0.13,
2376
- "learning_rate": 0.00018624,
2377
- "loss": 1.9302,
2378
- "step": 3440
2379
- },
2380
- {
2381
- "epoch": 0.13,
2382
- "learning_rate": 0.00018620000000000003,
2383
- "loss": 1.9764,
2384
- "step": 3450
2385
- },
2386
- {
2387
- "epoch": 0.13,
2388
- "learning_rate": 0.00018616,
2389
- "loss": 1.9956,
2390
- "step": 3460
2391
- },
2392
- {
2393
- "epoch": 0.13,
2394
- "learning_rate": 0.00018612000000000001,
2395
- "loss": 1.9196,
2396
- "step": 3470
2397
- },
2398
- {
2399
- "epoch": 0.13,
2400
- "learning_rate": 0.00018608,
2401
- "loss": 1.955,
2402
- "step": 3480
2403
- },
2404
- {
2405
- "epoch": 0.13,
2406
- "learning_rate": 0.00018604,
2407
- "loss": 1.9914,
2408
- "step": 3490
2409
- },
2410
- {
2411
- "epoch": 0.13,
2412
- "learning_rate": 0.00018600000000000002,
2413
- "loss": 1.9277,
2414
- "step": 3500
2415
- },
2416
- {
2417
- "epoch": 0.13,
2418
- "eval_accuracy": 0.6650362761881471,
2419
- "eval_loss": 1.6978471279144287,
2420
- "eval_runtime": 45.6548,
2421
- "eval_samples_per_second": 9.265,
2422
- "eval_steps_per_second": 1.161,
2423
- "step": 3500
2424
- },
2425
- {
2426
- "epoch": 0.13,
2427
- "learning_rate": 0.00018596,
2428
- "loss": 1.949,
2429
- "step": 3510
2430
- },
2431
- {
2432
- "epoch": 0.13,
2433
- "learning_rate": 0.00018592,
2434
- "loss": 1.927,
2435
- "step": 3520
2436
- },
2437
- {
2438
- "epoch": 0.13,
2439
- "learning_rate": 0.00018588,
2440
- "loss": 2.0058,
2441
- "step": 3530
2442
- },
2443
- {
2444
- "epoch": 0.13,
2445
- "learning_rate": 0.00018584000000000002,
2446
- "loss": 1.9854,
2447
- "step": 3540
2448
- },
2449
- {
2450
- "epoch": 0.13,
2451
- "learning_rate": 0.00018580000000000002,
2452
- "loss": 1.9646,
2453
- "step": 3550
2454
- },
2455
- {
2456
- "epoch": 0.13,
2457
- "learning_rate": 0.00018576,
2458
- "loss": 1.9517,
2459
- "step": 3560
2460
- },
2461
- {
2462
- "epoch": 0.13,
2463
- "learning_rate": 0.00018572,
2464
- "loss": 2.0103,
2465
- "step": 3570
2466
- },
2467
- {
2468
- "epoch": 0.13,
2469
- "learning_rate": 0.00018568,
2470
- "loss": 1.9367,
2471
- "step": 3580
2472
- },
2473
- {
2474
- "epoch": 0.13,
2475
- "learning_rate": 0.00018564000000000002,
2476
- "loss": 1.9968,
2477
- "step": 3590
2478
- },
2479
- {
2480
- "epoch": 0.13,
2481
- "learning_rate": 0.0001856,
2482
- "loss": 1.9727,
2483
- "step": 3600
2484
- },
2485
- {
2486
- "epoch": 0.13,
2487
- "eval_accuracy": 0.6593299095133285,
2488
- "eval_loss": 1.6777493953704834,
2489
- "eval_runtime": 60.7874,
2490
- "eval_samples_per_second": 6.959,
2491
- "eval_steps_per_second": 0.872,
2492
- "step": 3600
2493
- },
2494
- {
2495
- "epoch": 0.13,
2496
- "learning_rate": 0.00018556,
2497
- "loss": 1.9233,
2498
- "step": 3610
2499
- },
2500
- {
2501
- "epoch": 0.13,
2502
- "learning_rate": 0.00018552,
2503
- "loss": 1.926,
2504
- "step": 3620
2505
- },
2506
- {
2507
- "epoch": 0.13,
2508
- "learning_rate": 0.00018548000000000002,
2509
- "loss": 1.9538,
2510
- "step": 3630
2511
- },
2512
- {
2513
- "epoch": 0.13,
2514
- "learning_rate": 0.00018544,
2515
- "loss": 1.9795,
2516
- "step": 3640
2517
- },
2518
- {
2519
- "epoch": 0.13,
2520
- "learning_rate": 0.0001854,
2521
- "loss": 1.9675,
2522
- "step": 3650
2523
- },
2524
- {
2525
- "epoch": 0.14,
2526
- "learning_rate": 0.00018536,
2527
- "loss": 1.8831,
2528
- "step": 3660
2529
- },
2530
- {
2531
- "epoch": 0.14,
2532
- "learning_rate": 0.00018532,
2533
- "loss": 1.9082,
2534
- "step": 3670
2535
- },
2536
- {
2537
- "epoch": 0.14,
2538
- "learning_rate": 0.00018528000000000001,
2539
- "loss": 1.9514,
2540
- "step": 3680
2541
- },
2542
- {
2543
- "epoch": 0.14,
2544
- "learning_rate": 0.00018524,
2545
- "loss": 1.9714,
2546
- "step": 3690
2547
- },
2548
- {
2549
- "epoch": 0.14,
2550
- "learning_rate": 0.00018520000000000003,
2551
- "loss": 2.0391,
2552
- "step": 3700
2553
- },
2554
- {
2555
- "epoch": 0.14,
2556
- "eval_accuracy": 0.6623461318985897,
2557
- "eval_loss": 1.6935285329818726,
2558
- "eval_runtime": 59.3856,
2559
- "eval_samples_per_second": 7.123,
2560
- "eval_steps_per_second": 0.892,
2561
- "step": 3700
2562
- },
2563
- {
2564
- "epoch": 0.14,
2565
- "learning_rate": 0.00018516,
2566
- "loss": 1.9065,
2567
- "step": 3710
2568
- },
2569
- {
2570
- "epoch": 0.14,
2571
- "learning_rate": 0.00018512000000000002,
2572
- "loss": 1.9062,
2573
- "step": 3720
2574
- },
2575
- {
2576
- "epoch": 0.14,
2577
- "learning_rate": 0.00018508,
2578
- "loss": 1.9187,
2579
- "step": 3730
2580
- },
2581
- {
2582
- "epoch": 0.14,
2583
- "learning_rate": 0.00018504,
2584
- "loss": 1.9407,
2585
- "step": 3740
2586
- },
2587
- {
2588
- "epoch": 0.14,
2589
- "learning_rate": 0.00018500000000000002,
2590
- "loss": 1.9684,
2591
- "step": 3750
2592
- },
2593
- {
2594
- "epoch": 0.14,
2595
- "learning_rate": 0.00018496,
2596
- "loss": 1.9124,
2597
- "step": 3760
2598
- },
2599
- {
2600
- "epoch": 0.14,
2601
- "learning_rate": 0.00018492,
2602
- "loss": 1.9626,
2603
- "step": 3770
2604
- },
2605
- {
2606
- "epoch": 0.14,
2607
- "learning_rate": 0.00018488,
2608
- "loss": 1.8665,
2609
- "step": 3780
2610
- },
2611
- {
2612
- "epoch": 0.14,
2613
- "learning_rate": 0.00018484000000000003,
2614
- "loss": 1.9432,
2615
- "step": 3790
2616
- },
2617
- {
2618
- "epoch": 0.14,
2619
- "learning_rate": 0.00018480000000000002,
2620
- "loss": 1.9367,
2621
- "step": 3800
2622
- },
2623
- {
2624
- "epoch": 0.14,
2625
- "eval_accuracy": 0.6560691285562892,
2626
- "eval_loss": 1.7309006452560425,
2627
- "eval_runtime": 61.2631,
2628
- "eval_samples_per_second": 6.905,
2629
- "eval_steps_per_second": 0.865,
2630
- "step": 3800
2631
- },
2632
- {
2633
- "epoch": 0.14,
2634
- "learning_rate": 0.00018476,
2635
- "loss": 1.9483,
2636
- "step": 3810
2637
- },
2638
- {
2639
- "epoch": 0.14,
2640
- "learning_rate": 0.00018472,
2641
- "loss": 1.9067,
2642
- "step": 3820
2643
- },
2644
- {
2645
- "epoch": 0.14,
2646
- "learning_rate": 0.00018468,
2647
- "loss": 1.9485,
2648
- "step": 3830
2649
- },
2650
- {
2651
- "epoch": 0.14,
2652
- "learning_rate": 0.00018464000000000002,
2653
- "loss": 1.9529,
2654
- "step": 3840
2655
- },
2656
- {
2657
- "epoch": 0.14,
2658
- "learning_rate": 0.00018460000000000001,
2659
- "loss": 1.9463,
2660
- "step": 3850
2661
- },
2662
- {
2663
- "epoch": 0.14,
2664
- "learning_rate": 0.00018456,
2665
- "loss": 2.0001,
2666
- "step": 3860
2667
- },
2668
- {
2669
- "epoch": 0.14,
2670
- "learning_rate": 0.00018452,
2671
- "loss": 1.8994,
2672
- "step": 3870
2673
- },
2674
- {
2675
- "epoch": 0.14,
2676
- "learning_rate": 0.00018448,
2677
- "loss": 1.8648,
2678
- "step": 3880
2679
- },
2680
- {
2681
- "epoch": 0.14,
2682
- "learning_rate": 0.00018444000000000002,
2683
- "loss": 1.9028,
2684
- "step": 3890
2685
- },
2686
- {
2687
- "epoch": 0.14,
2688
- "learning_rate": 0.0001844,
2689
- "loss": 1.9146,
2690
- "step": 3900
2691
- },
2692
- {
2693
- "epoch": 0.14,
2694
- "eval_accuracy": 0.6616939757071819,
2695
- "eval_loss": 1.7054038047790527,
2696
- "eval_runtime": 60.7176,
2697
- "eval_samples_per_second": 6.967,
2698
- "eval_steps_per_second": 0.873,
2699
- "step": 3900
2700
- },
2701
- {
2702
- "epoch": 0.14,
2703
- "learning_rate": 0.00018436,
2704
- "loss": 1.945,
2705
- "step": 3910
2706
- },
2707
- {
2708
- "epoch": 0.14,
2709
- "learning_rate": 0.00018432,
2710
- "loss": 1.9027,
2711
- "step": 3920
2712
- },
2713
- {
2714
- "epoch": 0.15,
2715
- "learning_rate": 0.00018428000000000002,
2716
- "loss": 1.959,
2717
- "step": 3930
2718
- },
2719
- {
2720
- "epoch": 0.15,
2721
- "learning_rate": 0.00018424,
2722
- "loss": 1.9469,
2723
- "step": 3940
2724
- },
2725
- {
2726
- "epoch": 0.15,
2727
- "learning_rate": 0.0001842,
2728
- "loss": 1.9149,
2729
- "step": 3950
2730
- },
2731
- {
2732
- "epoch": 0.15,
2733
- "learning_rate": 0.00018416,
2734
- "loss": 1.9151,
2735
- "step": 3960
2736
- },
2737
- {
2738
- "epoch": 0.15,
2739
- "learning_rate": 0.00018412,
2740
- "loss": 1.9054,
2741
- "step": 3970
2742
- },
2743
- {
2744
- "epoch": 0.15,
2745
- "learning_rate": 0.00018408,
2746
- "loss": 1.8939,
2747
- "step": 3980
2748
- },
2749
- {
2750
- "epoch": 0.15,
2751
- "learning_rate": 0.00018404,
2752
- "loss": 1.9331,
2753
- "step": 3990
2754
- },
2755
- {
2756
- "epoch": 0.15,
2757
- "learning_rate": 0.00018400000000000003,
2758
- "loss": 1.957,
2759
- "step": 4000
2760
- },
2761
- {
2762
- "epoch": 0.15,
2763
- "eval_accuracy": 0.6670742642862966,
2764
- "eval_loss": 1.6463383436203003,
2765
- "eval_runtime": 46.0232,
2766
- "eval_samples_per_second": 9.191,
2767
- "eval_steps_per_second": 1.152,
2768
- "step": 4000
2769
- },
2770
- {
2771
- "epoch": 0.15,
2772
- "learning_rate": 0.00018396,
2773
- "loss": 1.9497,
2774
- "step": 4010
2775
- },
2776
- {
2777
- "epoch": 0.15,
2778
- "learning_rate": 0.00018392000000000001,
2779
- "loss": 1.9271,
2780
- "step": 4020
2781
- },
2782
- {
2783
- "epoch": 0.15,
2784
- "learning_rate": 0.00018388,
2785
- "loss": 1.9087,
2786
- "step": 4030
2787
- },
2788
- {
2789
- "epoch": 0.15,
2790
- "learning_rate": 0.00018384,
2791
- "loss": 1.9161,
2792
- "step": 4040
2793
- },
2794
- {
2795
- "epoch": 0.15,
2796
- "learning_rate": 0.00018380000000000002,
2797
- "loss": 1.9423,
2798
- "step": 4050
2799
- },
2800
- {
2801
- "epoch": 0.15,
2802
- "learning_rate": 0.00018376,
2803
- "loss": 1.8995,
2804
- "step": 4060
2805
- },
2806
- {
2807
- "epoch": 0.15,
2808
- "learning_rate": 0.00018372,
2809
- "loss": 1.9421,
2810
- "step": 4070
2811
- },
2812
- {
2813
- "epoch": 0.15,
2814
- "learning_rate": 0.00018368,
2815
- "loss": 1.9013,
2816
- "step": 4080
2817
- },
2818
- {
2819
- "epoch": 0.15,
2820
- "learning_rate": 0.00018364000000000002,
2821
- "loss": 1.9092,
2822
- "step": 4090
2823
- },
2824
- {
2825
- "epoch": 0.15,
2826
- "learning_rate": 0.00018360000000000002,
2827
- "loss": 1.882,
2828
- "step": 4100
2829
- },
2830
- {
2831
- "epoch": 0.15,
2832
- "eval_accuracy": 0.6641395614249613,
2833
- "eval_loss": 1.6783875226974487,
2834
- "eval_runtime": 47.0079,
2835
- "eval_samples_per_second": 8.998,
2836
- "eval_steps_per_second": 1.127,
2837
- "step": 4100
2838
- },
2839
- {
2840
- "epoch": 0.15,
2841
- "learning_rate": 0.00018356,
2842
- "loss": 1.9424,
2843
- "step": 4110
2844
- },
2845
- {
2846
- "epoch": 0.15,
2847
- "learning_rate": 0.00018352,
2848
- "loss": 1.9477,
2849
- "step": 4120
2850
- },
2851
- {
2852
- "epoch": 0.15,
2853
- "learning_rate": 0.00018348,
2854
- "loss": 1.9255,
2855
- "step": 4130
2856
- },
2857
- {
2858
- "epoch": 0.15,
2859
- "learning_rate": 0.00018344000000000002,
2860
- "loss": 1.9456,
2861
- "step": 4140
2862
- },
2863
- {
2864
- "epoch": 0.15,
2865
- "learning_rate": 0.0001834,
2866
- "loss": 1.9288,
2867
- "step": 4150
2868
- },
2869
- {
2870
- "epoch": 0.15,
2871
- "learning_rate": 0.00018336,
2872
- "loss": 1.9088,
2873
- "step": 4160
2874
- },
2875
- {
2876
- "epoch": 0.15,
2877
- "learning_rate": 0.00018332,
2878
- "loss": 1.959,
2879
- "step": 4170
2880
- },
2881
- {
2882
- "epoch": 0.15,
2883
- "learning_rate": 0.00018328000000000002,
2884
- "loss": 1.9619,
2885
- "step": 4180
2886
- },
2887
- {
2888
- "epoch": 0.15,
2889
- "learning_rate": 0.00018324000000000001,
2890
- "loss": 1.9511,
2891
- "step": 4190
2892
- },
2893
- {
2894
- "epoch": 0.16,
2895
- "learning_rate": 0.0001832,
2896
- "loss": 1.9489,
2897
- "step": 4200
2898
- },
2899
- {
2900
- "epoch": 0.16,
2901
- "eval_accuracy": 0.6603081438004402,
2902
- "eval_loss": 1.676958680152893,
2903
- "eval_runtime": 46.3451,
2904
- "eval_samples_per_second": 9.127,
2905
- "eval_steps_per_second": 1.144,
2906
- "step": 4200
2907
- },
2908
- {
2909
- "epoch": 0.16,
2910
- "learning_rate": 0.00018316,
2911
- "loss": 1.8862,
2912
- "step": 4210
2913
- },
2914
- {
2915
- "epoch": 0.16,
2916
- "learning_rate": 0.00018312,
2917
- "loss": 1.8631,
2918
- "step": 4220
2919
- },
2920
- {
2921
- "epoch": 0.16,
2922
- "learning_rate": 0.00018308000000000002,
2923
- "loss": 1.9649,
2924
- "step": 4230
2925
- },
2926
- {
2927
- "epoch": 0.16,
2928
- "learning_rate": 0.00018304,
2929
- "loss": 1.9592,
2930
- "step": 4240
2931
- },
2932
- {
2933
- "epoch": 0.16,
2934
- "learning_rate": 0.000183,
2935
- "loss": 1.9317,
2936
- "step": 4250
2937
- },
2938
- {
2939
- "epoch": 0.16,
2940
- "learning_rate": 0.00018296,
2941
- "loss": 1.9138,
2942
- "step": 4260
2943
- },
2944
- {
2945
- "epoch": 0.16,
2946
- "learning_rate": 0.00018292,
2947
- "loss": 1.8876,
2948
- "step": 4270
2949
- },
2950
- {
2951
- "epoch": 0.16,
2952
- "learning_rate": 0.00018288,
2953
- "loss": 1.859,
2954
- "step": 4280
2955
- },
2956
- {
2957
- "epoch": 0.16,
2958
- "learning_rate": 0.00018284,
2959
- "loss": 1.9496,
2960
- "step": 4290
2961
- },
2962
- {
2963
- "epoch": 0.16,
2964
- "learning_rate": 0.00018280000000000003,
2965
- "loss": 1.9407,
2966
- "step": 4300
2967
- },
2968
- {
2969
- "epoch": 0.16,
2970
- "eval_accuracy": 0.669601369528002,
2971
- "eval_loss": 1.641465187072754,
2972
- "eval_runtime": 46.8321,
2973
- "eval_samples_per_second": 9.032,
2974
- "eval_steps_per_second": 1.132,
2975
- "step": 4300
2976
- },
2977
- {
2978
- "epoch": 0.16,
2979
- "learning_rate": 0.00018276,
2980
- "loss": 1.9654,
2981
- "step": 4310
2982
- },
2983
- {
2984
- "epoch": 0.16,
2985
- "learning_rate": 0.00018272,
2986
- "loss": 1.9223,
2987
- "step": 4320
2988
- },
2989
- {
2990
- "epoch": 0.16,
2991
- "learning_rate": 0.00018268,
2992
- "loss": 1.9163,
2993
- "step": 4330
2994
- },
2995
- {
2996
- "epoch": 0.16,
2997
- "learning_rate": 0.00018264,
2998
- "loss": 1.9175,
2999
- "step": 4340
3000
- },
3001
- {
3002
- "epoch": 0.16,
3003
- "learning_rate": 0.00018260000000000002,
3004
- "loss": 1.9255,
3005
- "step": 4350
3006
- },
3007
- {
3008
- "epoch": 0.16,
3009
- "learning_rate": 0.00018256,
3010
- "loss": 1.8736,
3011
- "step": 4360
3012
- },
3013
- {
3014
- "epoch": 0.16,
3015
- "learning_rate": 0.00018252,
3016
- "loss": 1.8851,
3017
- "step": 4370
3018
- },
3019
- {
3020
- "epoch": 0.16,
3021
- "learning_rate": 0.00018248,
3022
- "loss": 1.9125,
3023
- "step": 4380
3024
- },
3025
- {
3026
- "epoch": 0.16,
3027
- "learning_rate": 0.00018244000000000002,
3028
- "loss": 1.888,
3029
- "step": 4390
3030
- },
3031
- {
3032
- "epoch": 0.16,
3033
- "learning_rate": 0.00018240000000000002,
3034
- "loss": 1.9402,
3035
- "step": 4400
3036
- },
3037
- {
3038
- "epoch": 0.16,
3039
- "eval_accuracy": 0.6654438738077769,
3040
- "eval_loss": 1.681833267211914,
3041
- "eval_runtime": 46.6168,
3042
- "eval_samples_per_second": 9.074,
3043
- "eval_steps_per_second": 1.137,
3044
- "step": 4400
3045
- },
3046
- {
3047
- "epoch": 0.16,
3048
- "learning_rate": 0.00018236,
3049
- "loss": 1.8771,
3050
- "step": 4410
3051
- },
3052
- {
3053
- "epoch": 0.16,
3054
- "learning_rate": 0.00018232,
3055
- "loss": 1.9207,
3056
- "step": 4420
3057
- },
3058
- {
3059
- "epoch": 0.16,
3060
- "learning_rate": 0.00018228,
3061
- "loss": 1.8753,
3062
- "step": 4430
3063
- },
3064
- {
3065
- "epoch": 0.16,
3066
- "learning_rate": 0.00018224000000000002,
3067
- "loss": 1.8738,
3068
- "step": 4440
3069
- },
3070
- {
3071
- "epoch": 0.16,
3072
- "learning_rate": 0.0001822,
3073
- "loss": 1.9238,
3074
- "step": 4450
3075
- },
3076
- {
3077
- "epoch": 0.16,
3078
- "learning_rate": 0.00018216000000000003,
3079
- "loss": 1.8845,
3080
- "step": 4460
3081
- },
3082
- {
3083
- "epoch": 0.17,
3084
- "learning_rate": 0.00018212,
3085
- "loss": 1.9076,
3086
- "step": 4470
3087
- },
3088
- {
3089
- "epoch": 0.17,
3090
- "learning_rate": 0.00018208000000000002,
3091
- "loss": 1.9064,
3092
- "step": 4480
3093
- },
3094
- {
3095
- "epoch": 0.17,
3096
- "learning_rate": 0.00018204,
3097
- "loss": 1.9555,
3098
- "step": 4490
3099
- },
3100
- {
3101
- "epoch": 0.17,
3102
- "learning_rate": 0.000182,
3103
- "loss": 1.894,
3104
- "step": 4500
3105
- },
3106
- {
3107
- "epoch": 0.17,
3108
- "eval_accuracy": 0.6659329909513328,
3109
- "eval_loss": 1.664554476737976,
3110
- "eval_runtime": 46.5128,
3111
- "eval_samples_per_second": 9.094,
3112
- "eval_steps_per_second": 1.139,
3113
- "step": 4500
3114
- },
3115
- {
3116
- "epoch": 0.17,
3117
- "learning_rate": 0.00018196000000000003,
3118
- "loss": 1.9158,
3119
- "step": 4510
3120
- },
3121
- {
3122
- "epoch": 0.17,
3123
- "learning_rate": 0.00018192,
3124
- "loss": 1.9474,
3125
- "step": 4520
3126
- },
3127
- {
3128
- "epoch": 0.17,
3129
- "learning_rate": 0.00018188000000000001,
3130
- "loss": 1.9463,
3131
- "step": 4530
3132
- },
3133
- {
3134
- "epoch": 0.17,
3135
- "learning_rate": 0.00018184,
3136
- "loss": 1.8755,
3137
- "step": 4540
3138
- },
3139
- {
3140
- "epoch": 0.17,
3141
- "learning_rate": 0.00018180000000000003,
3142
- "loss": 1.9637,
3143
- "step": 4550
3144
- },
3145
- {
3146
- "epoch": 0.17,
3147
- "learning_rate": 0.00018176000000000002,
3148
- "loss": 1.9052,
3149
- "step": 4560
3150
- },
3151
- {
3152
- "epoch": 0.17,
3153
- "learning_rate": 0.00018172,
3154
- "loss": 1.9231,
3155
- "step": 4570
3156
- },
3157
- {
3158
- "epoch": 0.17,
3159
- "learning_rate": 0.00018168,
3160
- "loss": 1.9216,
3161
- "step": 4580
3162
- },
3163
- {
3164
- "epoch": 0.17,
3165
- "learning_rate": 0.00018164,
3166
- "loss": 1.8753,
3167
- "step": 4590
3168
- },
3169
- {
3170
- "epoch": 0.17,
3171
- "learning_rate": 0.00018160000000000002,
3172
- "loss": 1.9149,
3173
- "step": 4600
3174
- },
3175
- {
3176
- "epoch": 0.17,
3177
- "eval_accuracy": 0.6674818619059265,
3178
- "eval_loss": 1.6451318264007568,
3179
- "eval_runtime": 46.9277,
3180
- "eval_samples_per_second": 9.014,
3181
- "eval_steps_per_second": 1.129,
3182
- "step": 4600
3183
- },
3184
- {
3185
- "epoch": 0.17,
3186
- "learning_rate": 0.00018156000000000002,
3187
- "loss": 1.8871,
3188
- "step": 4610
3189
- },
3190
- {
3191
- "epoch": 0.17,
3192
- "learning_rate": 0.00018152,
3193
- "loss": 1.9246,
3194
- "step": 4620
3195
- },
3196
- {
3197
- "epoch": 0.17,
3198
- "learning_rate": 0.00018148,
3199
- "loss": 1.9086,
3200
- "step": 4630
3201
- },
3202
- {
3203
- "epoch": 0.17,
3204
- "learning_rate": 0.00018144,
3205
- "loss": 1.9105,
3206
- "step": 4640
3207
- },
3208
- {
3209
- "epoch": 0.17,
3210
- "learning_rate": 0.00018140000000000002,
3211
- "loss": 1.9686,
3212
- "step": 4650
3213
- },
3214
- {
3215
- "epoch": 0.17,
3216
- "learning_rate": 0.00018136,
3217
- "loss": 1.8944,
3218
- "step": 4660
3219
- },
3220
- {
3221
- "epoch": 0.17,
3222
- "learning_rate": 0.00018132,
3223
- "loss": 1.8953,
3224
- "step": 4670
3225
- },
3226
- {
3227
- "epoch": 0.17,
3228
- "learning_rate": 0.00018128,
3229
- "loss": 1.898,
3230
- "step": 4680
3231
- },
3232
- {
3233
- "epoch": 0.17,
3234
- "learning_rate": 0.00018124000000000002,
3235
- "loss": 1.934,
3236
- "step": 4690
3237
- },
3238
- {
3239
- "epoch": 0.17,
3240
- "learning_rate": 0.0001812,
3241
- "loss": 1.8868,
3242
- "step": 4700
3243
- },
3244
- {
3245
- "epoch": 0.17,
3246
- "eval_accuracy": 0.6653623542838509,
3247
- "eval_loss": 1.6440030336380005,
3248
- "eval_runtime": 47.0909,
3249
- "eval_samples_per_second": 8.983,
3250
- "eval_steps_per_second": 1.125,
3251
- "step": 4700
3252
- },
3253
- {
3254
- "epoch": 0.17,
3255
- "learning_rate": 0.00018116,
3256
- "loss": 1.8573,
3257
- "step": 4710
3258
- },
3259
- {
3260
- "epoch": 0.17,
3261
- "learning_rate": 0.00018112,
3262
- "loss": 1.8907,
3263
- "step": 4720
3264
- },
3265
- {
3266
- "epoch": 0.17,
3267
- "learning_rate": 0.00018108,
3268
- "loss": 1.8429,
3269
- "step": 4730
3270
- },
3271
- {
3272
- "epoch": 0.18,
3273
- "learning_rate": 0.00018104000000000001,
3274
- "loss": 1.8393,
3275
- "step": 4740
3276
- },
3277
- {
3278
- "epoch": 0.18,
3279
- "learning_rate": 0.000181,
3280
- "loss": 2.0048,
3281
- "step": 4750
3282
- },
3283
- {
3284
- "epoch": 0.18,
3285
- "learning_rate": 0.00018096000000000003,
3286
- "loss": 1.8358,
3287
- "step": 4760
3288
- },
3289
- {
3290
- "epoch": 0.18,
3291
- "learning_rate": 0.00018092,
3292
- "loss": 1.9148,
3293
- "step": 4770
3294
- },
3295
- {
3296
- "epoch": 0.18,
3297
- "learning_rate": 0.00018088000000000002,
3298
- "loss": 1.8835,
3299
- "step": 4780
3300
- },
3301
- {
3302
- "epoch": 0.18,
3303
- "learning_rate": 0.00018084,
3304
- "loss": 1.8832,
3305
- "step": 4790
3306
- },
3307
- {
3308
- "epoch": 0.18,
3309
- "learning_rate": 0.0001808,
3310
- "loss": 1.9157,
3311
- "step": 4800
3312
- },
3313
- {
3314
- "epoch": 0.18,
3315
- "eval_accuracy": 0.6643841199967392,
3316
- "eval_loss": 1.6713789701461792,
3317
- "eval_runtime": 47.3501,
3318
- "eval_samples_per_second": 8.933,
3319
- "eval_steps_per_second": 1.119,
3320
- "step": 4800
3321
- },
3322
- {
3323
- "epoch": 0.18,
3324
- "learning_rate": 0.00018076000000000002,
3325
- "loss": 1.9131,
3326
- "step": 4810
3327
- },
3328
- {
3329
- "epoch": 0.18,
3330
- "learning_rate": 0.00018072,
3331
- "loss": 1.8841,
3332
- "step": 4820
3333
- },
3334
- {
3335
- "epoch": 0.18,
3336
- "learning_rate": 0.00018068,
3337
- "loss": 1.9479,
3338
- "step": 4830
3339
- },
3340
- {
3341
- "epoch": 0.18,
3342
- "learning_rate": 0.00018064,
3343
- "loss": 1.9109,
3344
- "step": 4840
3345
- },
3346
- {
3347
- "epoch": 0.18,
3348
- "learning_rate": 0.00018060000000000003,
3349
- "loss": 1.9122,
3350
- "step": 4850
3351
- },
3352
- {
3353
- "epoch": 0.18,
3354
- "learning_rate": 0.00018056000000000002,
3355
- "loss": 1.867,
3356
- "step": 4860
3357
- },
3358
- {
3359
- "epoch": 0.18,
3360
- "learning_rate": 0.00018052,
3361
- "loss": 1.9297,
3362
- "step": 4870
3363
- },
3364
- {
3365
- "epoch": 0.18,
3366
- "learning_rate": 0.00018048,
3367
- "loss": 1.8987,
3368
- "step": 4880
3369
- },
3370
- {
3371
- "epoch": 0.18,
3372
- "learning_rate": 0.00018044,
3373
- "loss": 1.8791,
3374
- "step": 4890
3375
- },
3376
- {
3377
- "epoch": 0.18,
3378
- "learning_rate": 0.00018040000000000002,
3379
- "loss": 1.884,
3380
- "step": 4900
3381
- },
3382
- {
3383
- "epoch": 0.18,
3384
- "eval_accuracy": 0.669764408575854,
3385
- "eval_loss": 1.6372376680374146,
3386
- "eval_runtime": 45.8594,
3387
- "eval_samples_per_second": 9.224,
3388
- "eval_steps_per_second": 1.156,
3389
- "step": 4900
3390
- },
3391
- {
3392
- "epoch": 0.18,
3393
- "learning_rate": 0.00018036000000000001,
3394
- "loss": 1.9257,
3395
- "step": 4910
3396
- },
3397
- {
3398
- "epoch": 0.18,
3399
- "learning_rate": 0.00018032,
3400
- "loss": 1.8184,
3401
- "step": 4920
3402
- },
3403
- {
3404
- "epoch": 0.18,
3405
- "learning_rate": 0.00018028,
3406
- "loss": 1.9063,
3407
- "step": 4930
3408
- },
3409
- {
3410
- "epoch": 0.18,
3411
- "learning_rate": 0.00018024,
3412
- "loss": 1.9058,
3413
- "step": 4940
3414
- },
3415
- {
3416
- "epoch": 0.18,
3417
- "learning_rate": 0.00018020000000000002,
3418
- "loss": 1.8876,
3419
- "step": 4950
3420
- },
3421
- {
3422
- "epoch": 0.18,
3423
- "learning_rate": 0.00018016,
3424
- "loss": 1.8873,
3425
- "step": 4960
3426
- },
3427
- {
3428
- "epoch": 0.18,
3429
- "learning_rate": 0.00018012,
3430
- "loss": 1.8623,
3431
- "step": 4970
3432
- },
3433
- {
3434
- "epoch": 0.18,
3435
- "learning_rate": 0.00018008,
3436
- "loss": 1.8649,
3437
- "step": 4980
3438
- },
3439
- {
3440
- "epoch": 0.18,
3441
- "learning_rate": 0.00018004000000000002,
3442
- "loss": 1.9113,
3443
- "step": 4990
3444
- },
3445
- {
3446
- "epoch": 0.18,
3447
- "learning_rate": 0.00018,
3448
- "loss": 1.861,
3449
- "step": 5000
3450
- },
3451
- {
3452
- "epoch": 0.18,
3453
- "eval_accuracy": 0.6632428466617755,
3454
- "eval_loss": 1.6630749702453613,
3455
- "eval_runtime": 46.3295,
3456
- "eval_samples_per_second": 9.13,
3457
- "eval_steps_per_second": 1.144,
3458
- "step": 5000
3459
- },
3460
- {
3461
- "epoch": 0.19,
3462
- "learning_rate": 0.00017996,
3463
- "loss": 1.95,
3464
- "step": 5010
3465
- },
3466
- {
3467
- "epoch": 0.19,
3468
- "learning_rate": 0.00017992,
3469
- "loss": 1.8517,
3470
- "step": 5020
3471
- },
3472
- {
3473
- "epoch": 0.19,
3474
- "learning_rate": 0.00017988,
3475
- "loss": 1.8582,
3476
- "step": 5030
3477
- },
3478
- {
3479
- "epoch": 0.19,
3480
- "learning_rate": 0.00017984,
3481
- "loss": 1.8658,
3482
- "step": 5040
3483
- },
3484
- {
3485
- "epoch": 0.19,
3486
- "learning_rate": 0.0001798,
3487
- "loss": 1.9111,
3488
- "step": 5050
3489
- },
3490
- {
3491
- "epoch": 0.19,
3492
- "learning_rate": 0.00017976000000000003,
3493
- "loss": 1.8874,
3494
- "step": 5060
3495
- },
3496
- {
3497
- "epoch": 0.19,
3498
- "learning_rate": 0.00017972,
3499
- "loss": 1.8853,
3500
- "step": 5070
3501
- },
3502
- {
3503
- "epoch": 0.19,
3504
- "learning_rate": 0.00017968000000000001,
3505
- "loss": 1.8797,
3506
- "step": 5080
3507
- },
3508
- {
3509
- "epoch": 0.19,
3510
- "learning_rate": 0.00017964,
3511
- "loss": 1.8727,
3512
- "step": 5090
3513
- },
3514
- {
3515
- "epoch": 0.19,
3516
- "learning_rate": 0.0001796,
3517
- "loss": 1.9064,
3518
- "step": 5100
3519
- },
3520
- {
3521
- "epoch": 0.19,
3522
- "eval_accuracy": 0.6621015733268117,
3523
- "eval_loss": 1.689902424812317,
3524
- "eval_runtime": 47.432,
3525
- "eval_samples_per_second": 8.918,
3526
- "eval_steps_per_second": 1.117,
3527
- "step": 5100
3528
- },
3529
- {
3530
- "epoch": 0.19,
3531
- "learning_rate": 0.00017956000000000002,
3532
- "loss": 1.8958,
3533
- "step": 5110
3534
- },
3535
- {
3536
- "epoch": 0.19,
3537
- "learning_rate": 0.00017952,
3538
- "loss": 1.8912,
3539
- "step": 5120
3540
- },
3541
- {
3542
- "epoch": 0.19,
3543
- "learning_rate": 0.00017948,
3544
- "loss": 1.8342,
3545
- "step": 5130
3546
- },
3547
- {
3548
- "epoch": 0.19,
3549
- "learning_rate": 0.00017944,
3550
- "loss": 1.864,
3551
- "step": 5140
3552
- },
3553
- {
3554
- "epoch": 0.19,
3555
- "learning_rate": 0.00017940000000000002,
3556
- "loss": 1.8842,
3557
- "step": 5150
3558
- },
3559
- {
3560
- "epoch": 0.19,
3561
- "learning_rate": 0.00017936000000000002,
3562
- "loss": 1.8981,
3563
- "step": 5160
3564
- },
3565
- {
3566
- "epoch": 0.19,
3567
- "learning_rate": 0.00017932,
3568
- "loss": 1.8746,
3569
- "step": 5170
3570
- },
3571
- {
3572
- "epoch": 0.19,
3573
- "learning_rate": 0.00017928,
3574
- "loss": 1.8151,
3575
- "step": 5180
3576
- },
3577
- {
3578
- "epoch": 0.19,
3579
- "learning_rate": 0.00017924,
3580
- "loss": 1.8965,
3581
- "step": 5190
3582
- },
3583
- {
3584
- "epoch": 0.19,
3585
- "learning_rate": 0.00017920000000000002,
3586
- "loss": 1.9178,
3587
- "step": 5200
3588
- },
3589
- {
3590
- "epoch": 0.19,
3591
- "eval_accuracy": 0.669682889051928,
3592
- "eval_loss": 1.654188632965088,
3593
- "eval_runtime": 47.2488,
3594
- "eval_samples_per_second": 8.953,
3595
- "eval_steps_per_second": 1.122,
3596
- "step": 5200
3597
- },
3598
- {
3599
- "epoch": 0.19,
3600
- "learning_rate": 0.00017916,
3601
- "loss": 1.8615,
3602
- "step": 5210
3603
- },
3604
- {
3605
- "epoch": 0.19,
3606
- "learning_rate": 0.00017912,
3607
- "loss": 1.8374,
3608
- "step": 5220
3609
- },
3610
- {
3611
- "epoch": 0.19,
3612
- "learning_rate": 0.00017908,
3613
- "loss": 1.9621,
3614
- "step": 5230
3615
- },
3616
- {
3617
- "epoch": 0.19,
3618
- "learning_rate": 0.00017904000000000002,
3619
- "loss": 1.8499,
3620
- "step": 5240
3621
- },
3622
- {
3623
- "epoch": 0.19,
3624
- "learning_rate": 0.00017900000000000001,
3625
- "loss": 1.8441,
3626
- "step": 5250
3627
- },
3628
- {
3629
- "epoch": 0.19,
3630
- "learning_rate": 0.00017896,
3631
- "loss": 1.9126,
3632
- "step": 5260
3633
- },
3634
- {
3635
- "epoch": 0.19,
3636
- "learning_rate": 0.00017892,
3637
- "loss": 1.874,
3638
- "step": 5270
3639
- },
3640
- {
3641
- "epoch": 0.2,
3642
- "learning_rate": 0.00017888,
3643
- "loss": 1.9262,
3644
- "step": 5280
3645
- },
3646
- {
3647
- "epoch": 0.2,
3648
- "learning_rate": 0.00017884000000000002,
3649
- "loss": 1.8436,
3650
- "step": 5290
3651
- },
3652
- {
3653
- "epoch": 0.2,
3654
- "learning_rate": 0.0001788,
3655
- "loss": 1.9086,
3656
- "step": 5300
3657
- },
3658
- {
3659
- "epoch": 0.2,
3660
- "eval_accuracy": 0.6702535257194098,
3661
- "eval_loss": 1.6423653364181519,
3662
- "eval_runtime": 47.3319,
3663
- "eval_samples_per_second": 8.937,
3664
- "eval_steps_per_second": 1.12,
3665
- "step": 5300
3666
- },
3667
- {
3668
- "epoch": 0.2,
3669
- "learning_rate": 0.00017876,
3670
- "loss": 1.9283,
3671
- "step": 5310
3672
- },
3673
- {
3674
- "epoch": 0.2,
3675
- "learning_rate": 0.00017872,
3676
- "loss": 1.8879,
3677
- "step": 5320
3678
- },
3679
- {
3680
- "epoch": 0.2,
3681
- "learning_rate": 0.00017868,
3682
- "loss": 1.9249,
3683
- "step": 5330
3684
- },
3685
- {
3686
- "epoch": 0.2,
3687
- "learning_rate": 0.00017864,
3688
- "loss": 1.866,
3689
- "step": 5340
3690
- },
3691
- {
3692
- "epoch": 0.2,
3693
- "learning_rate": 0.0001786,
3694
- "loss": 1.8631,
3695
- "step": 5350
3696
- },
3697
- {
3698
- "epoch": 0.2,
3699
- "learning_rate": 0.00017856000000000003,
3700
- "loss": 1.8883,
3701
- "step": 5360
3702
- },
3703
- {
3704
- "epoch": 0.2,
3705
- "learning_rate": 0.00017852,
3706
- "loss": 1.9085,
3707
- "step": 5370
3708
- },
3709
- {
3710
- "epoch": 0.2,
3711
- "learning_rate": 0.00017848,
3712
- "loss": 1.9506,
3713
- "step": 5380
3714
- },
3715
- {
3716
- "epoch": 0.2,
3717
- "learning_rate": 0.00017844,
3718
- "loss": 1.894,
3719
- "step": 5390
3720
- },
3721
- {
3722
- "epoch": 0.2,
3723
- "learning_rate": 0.0001784,
3724
- "loss": 1.9128,
3725
- "step": 5400
3726
- },
3727
- {
3728
- "epoch": 0.2,
3729
- "eval_accuracy": 0.6704165647672617,
3730
- "eval_loss": 1.65791916847229,
3731
- "eval_runtime": 46.7984,
3732
- "eval_samples_per_second": 9.039,
3733
- "eval_steps_per_second": 1.133,
3734
- "step": 5400
3735
- },
3736
- {
3737
- "epoch": 0.2,
3738
- "learning_rate": 0.00017836000000000002,
3739
- "loss": 1.8842,
3740
- "step": 5410
3741
- },
3742
- {
3743
- "epoch": 0.2,
3744
- "learning_rate": 0.00017832,
3745
- "loss": 1.8935,
3746
- "step": 5420
3747
- },
3748
- {
3749
- "epoch": 0.2,
3750
- "learning_rate": 0.00017828,
3751
- "loss": 1.84,
3752
- "step": 5430
3753
- },
3754
- {
3755
- "epoch": 0.2,
3756
- "learning_rate": 0.00017824,
3757
- "loss": 1.8379,
3758
- "step": 5440
3759
- },
3760
- {
3761
- "epoch": 0.2,
3762
- "learning_rate": 0.00017820000000000002,
3763
- "loss": 1.8661,
3764
- "step": 5450
3765
- },
3766
- {
3767
- "epoch": 0.2,
3768
- "learning_rate": 0.00017816000000000002,
3769
- "loss": 1.8418,
3770
- "step": 5460
3771
- },
3772
- {
3773
- "epoch": 0.2,
3774
- "learning_rate": 0.00017812,
3775
- "loss": 1.8773,
3776
- "step": 5470
3777
- },
3778
- {
3779
- "epoch": 0.2,
3780
- "learning_rate": 0.00017808,
3781
- "loss": 1.939,
3782
- "step": 5480
3783
- },
3784
- {
3785
- "epoch": 0.2,
3786
- "learning_rate": 0.00017804,
3787
- "loss": 1.911,
3788
- "step": 5490
3789
- },
3790
- {
3791
- "epoch": 0.2,
3792
- "learning_rate": 0.00017800000000000002,
3793
- "loss": 1.8751,
3794
- "step": 5500
3795
- },
3796
- {
3797
- "epoch": 0.2,
3798
- "eval_accuracy": 0.6681340180973343,
3799
- "eval_loss": 1.6451387405395508,
3800
- "eval_runtime": 46.943,
3801
- "eval_samples_per_second": 9.011,
3802
- "eval_steps_per_second": 1.129,
3803
- "step": 5500
3804
- },
3805
- {
3806
- "epoch": 0.2,
3807
- "learning_rate": 0.00017796,
3808
- "loss": 1.8385,
3809
- "step": 5510
3810
- },
3811
- {
3812
- "epoch": 0.2,
3813
- "learning_rate": 0.00017792,
3814
- "loss": 1.9321,
3815
- "step": 5520
3816
- },
3817
- {
3818
- "epoch": 0.2,
3819
- "learning_rate": 0.00017788,
3820
- "loss": 1.8533,
3821
- "step": 5530
3822
- },
3823
- {
3824
- "epoch": 0.2,
3825
- "learning_rate": 0.00017784000000000002,
3826
- "loss": 1.8428,
3827
- "step": 5540
3828
- },
3829
- {
3830
- "epoch": 0.21,
3831
- "learning_rate": 0.0001778,
3832
- "loss": 1.8833,
3833
- "step": 5550
3834
- },
3835
- {
3836
- "epoch": 0.21,
3837
- "learning_rate": 0.00017776,
3838
- "loss": 1.8387,
3839
- "step": 5560
3840
- },
3841
- {
3842
- "epoch": 0.21,
3843
- "learning_rate": 0.00017772,
3844
- "loss": 1.8742,
3845
- "step": 5570
3846
- },
3847
- {
3848
- "epoch": 0.21,
3849
- "learning_rate": 0.00017768,
3850
- "loss": 1.8835,
3851
- "step": 5580
3852
- },
3853
- {
3854
- "epoch": 0.21,
3855
- "learning_rate": 0.00017764000000000001,
3856
- "loss": 1.9084,
3857
- "step": 5590
3858
- },
3859
- {
3860
- "epoch": 0.21,
3861
- "learning_rate": 0.0001776,
3862
- "loss": 1.8289,
3863
- "step": 5600
3864
- },
3865
- {
3866
- "epoch": 0.21,
3867
- "eval_accuracy": 0.6712317600065215,
3868
- "eval_loss": 1.649842619895935,
3869
- "eval_runtime": 47.4408,
3870
- "eval_samples_per_second": 8.916,
3871
- "eval_steps_per_second": 1.117,
3872
- "step": 5600
3873
- },
3874
- {
3875
- "epoch": 0.21,
3876
- "learning_rate": 0.00017756000000000003,
3877
- "loss": 1.9069,
3878
- "step": 5610
3879
- },
3880
- {
3881
- "epoch": 0.21,
3882
- "learning_rate": 0.00017752,
3883
- "loss": 1.8689,
3884
- "step": 5620
3885
- },
3886
- {
3887
- "epoch": 0.21,
3888
- "learning_rate": 0.00017748000000000002,
3889
- "loss": 1.8327,
3890
- "step": 5630
3891
- },
3892
- {
3893
- "epoch": 0.21,
3894
- "learning_rate": 0.00017744,
3895
- "loss": 1.8061,
3896
- "step": 5640
3897
- },
3898
- {
3899
- "epoch": 0.21,
3900
- "learning_rate": 0.0001774,
3901
- "loss": 1.8382,
3902
- "step": 5650
3903
- },
3904
- {
3905
- "epoch": 0.21,
3906
- "learning_rate": 0.00017736000000000002,
3907
- "loss": 1.8691,
3908
- "step": 5660
3909
- },
3910
- {
3911
- "epoch": 0.21,
3912
- "learning_rate": 0.00017732000000000002,
3913
- "loss": 1.8348,
3914
- "step": 5670
3915
- },
3916
- {
3917
- "epoch": 0.21,
3918
- "learning_rate": 0.00017728,
3919
- "loss": 1.8645,
3920
- "step": 5680
3921
- },
3922
- {
3923
- "epoch": 0.21,
3924
- "learning_rate": 0.00017724,
3925
- "loss": 1.9094,
3926
- "step": 5690
3927
- },
3928
- {
3929
- "epoch": 0.21,
3930
- "learning_rate": 0.0001772,
3931
- "loss": 1.8597,
3932
- "step": 5700
3933
- },
3934
- {
3935
- "epoch": 0.21,
3936
- "eval_accuracy": 0.669601369528002,
3937
- "eval_loss": 1.6287590265274048,
3938
- "eval_runtime": 46.725,
3939
- "eval_samples_per_second": 9.053,
3940
- "eval_steps_per_second": 1.134,
3941
- "step": 5700
3942
- },
3943
- {
3944
- "epoch": 0.21,
3945
- "learning_rate": 0.00017716000000000002,
3946
- "loss": 1.8063,
3947
- "step": 5710
3948
- },
3949
- {
3950
- "epoch": 0.21,
3951
- "learning_rate": 0.00017712,
3952
- "loss": 1.8771,
3953
- "step": 5720
3954
- },
3955
- {
3956
- "epoch": 0.21,
3957
- "learning_rate": 0.00017708,
3958
- "loss": 1.9203,
3959
- "step": 5730
3960
- },
3961
- {
3962
- "epoch": 0.21,
3963
- "learning_rate": 0.00017704,
3964
- "loss": 1.9039,
3965
- "step": 5740
3966
- },
3967
- {
3968
- "epoch": 0.21,
3969
- "learning_rate": 0.00017700000000000002,
3970
- "loss": 1.8713,
3971
- "step": 5750
3972
- },
3973
- {
3974
- "epoch": 0.21,
3975
- "learning_rate": 0.00017696,
3976
- "loss": 1.8446,
3977
- "step": 5760
3978
- },
3979
- {
3980
- "epoch": 0.21,
3981
- "learning_rate": 0.00017692,
3982
- "loss": 1.8771,
3983
- "step": 5770
3984
- },
3985
- {
3986
- "epoch": 0.21,
3987
- "learning_rate": 0.00017688,
3988
- "loss": 1.8959,
3989
- "step": 5780
3990
- },
3991
- {
3992
- "epoch": 0.21,
3993
- "learning_rate": 0.00017684,
3994
- "loss": 1.915,
3995
- "step": 5790
3996
- },
3997
- {
3998
- "epoch": 0.21,
3999
- "learning_rate": 0.00017680000000000001,
4000
- "loss": 1.8873,
4001
- "step": 5800
4002
- },
4003
- {
4004
- "epoch": 0.21,
4005
- "eval_accuracy": 0.6718023966740034,
4006
- "eval_loss": 1.634265422821045,
4007
- "eval_runtime": 47.1163,
4008
- "eval_samples_per_second": 8.978,
4009
- "eval_steps_per_second": 1.125,
4010
- "step": 5800
4011
- }
4012
- ],
4013
- "max_steps": 50000,
4014
- "num_train_epochs": 2,
4015
- "total_flos": 1.066306262532096e+17,
4016
- "trial_name": null,
4017
- "trial_params": null
4018
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a9cd9cf01a1ff40ca4c7f3b787db9616f0da4cd4bacee79ce247ef18fa96834
3
- size 4015