CreatorPhan commited on
Commit
327a8e8
1 Parent(s): e26fe7a

Upload folder using huggingface_hub

Browse files
Files changed (7) hide show
  1. adapter_model.bin +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. tokenizer.json +2 -2
  6. trainer_state.json +802 -1402
  7. training_args.bin +2 -2
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bfdd727772683806731fba15bf8f4caf691af6184bc5b21e2ea5a4f390f41dc
3
  size 39409357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7e192ed60fe7cea96046128cc1ca5931f6a086e81cbe23d17658e39615bd7f7
3
  size 39409357
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80d296d52ab4d4c11a0695940c0dadb792928e126cde3111312316dd3d33438
3
  size 78844421
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37a00e12954c764ca3596c339dd9d6eade6a658e22d14b6a8627f2ebe664cb83
3
  size 78844421
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e46ce4eb16240da9f3a8b3066acb6f59a234249ee2a3052f3323786da479838
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:370c3a07f37a8aae6ea141b54ca992b21699546baf7407eb587b6056f787333b
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64035578adb33297a89b3be176cbd0b7d7adb0f34d904267be99bb01d2a849d0
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea1baddcec85a868bf4a5d1391543e44800a635b2f938b33bc9378075f5e0851
3
  size 627
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d81d9b2c9d9db79ea02c00d4c7e79bb77a718dc57ab01f5f3b1cd6649f08993
3
- size 14500569
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a208233d2ee8d8c83b23bc214df737c44806a1919f444e89b31e586cd956ba
3
+ size 14500471
trainer_state.json CHANGED
@@ -1,3019 +1,2419 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.285714285714286,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03,
13
- "learning_rate": 0.002998214285714286,
14
  "loss": 3.0944,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.06,
19
- "learning_rate": 0.0029964285714285713,
20
- "loss": 2.8734,
21
  "step": 2
22
  },
23
  {
24
  "epoch": 0.09,
25
- "learning_rate": 0.002994642857142857,
26
- "loss": 8.7265,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.11,
31
- "learning_rate": 0.002992857142857143,
32
- "loss": 4.7587,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.14,
37
- "learning_rate": 0.002991071428571429,
38
- "loss": 4.3637,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.17,
43
- "learning_rate": 0.0029892857142857143,
44
- "loss": 3.315,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.2,
49
- "learning_rate": 0.0029875,
50
- "loss": 3.1726,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.23,
55
- "learning_rate": 0.002985714285714286,
56
- "loss": 2.9938,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.26,
61
- "learning_rate": 0.0029839285714285714,
62
- "loss": 3.0509,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 0.29,
67
- "learning_rate": 0.0029821428571428573,
68
- "loss": 2.93,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 0.31,
73
- "learning_rate": 0.002980357142857143,
74
- "loss": 2.8436,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 0.34,
79
- "learning_rate": 0.0029785714285714285,
80
- "loss": 2.8756,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 0.37,
85
- "learning_rate": 0.0029767857142857144,
86
- "loss": 2.8528,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 0.4,
91
- "learning_rate": 0.002975,
92
- "loss": 2.8405,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 0.43,
97
- "learning_rate": 0.002973214285714286,
98
- "loss": 2.8022,
99
  "step": 15
100
  },
101
  {
102
  "epoch": 0.46,
103
- "learning_rate": 0.0029714285714285715,
104
- "loss": 2.8356,
105
  "step": 16
106
  },
107
  {
108
  "epoch": 0.49,
109
- "learning_rate": 0.0029696428571428573,
110
- "loss": 2.7915,
111
  "step": 17
112
  },
113
  {
114
  "epoch": 0.51,
115
- "learning_rate": 0.002967857142857143,
116
- "loss": 2.7848,
117
  "step": 18
118
  },
119
  {
120
  "epoch": 0.54,
121
- "learning_rate": 0.0029660714285714286,
122
- "loss": 2.6604,
123
  "step": 19
124
  },
125
  {
126
  "epoch": 0.57,
127
- "learning_rate": 0.0029642857142857144,
128
- "loss": 2.736,
129
  "step": 20
130
  },
131
  {
132
  "epoch": 0.6,
133
- "learning_rate": 0.0029625000000000003,
134
- "loss": 2.7747,
135
  "step": 21
136
  },
137
  {
138
  "epoch": 0.63,
139
- "learning_rate": 0.0029607142857142857,
140
- "loss": 2.6958,
141
  "step": 22
142
  },
143
  {
144
  "epoch": 0.66,
145
- "learning_rate": 0.0029589285714285716,
146
- "loss": 2.7309,
147
  "step": 23
148
  },
149
  {
150
  "epoch": 0.69,
151
- "learning_rate": 0.0029571428571428574,
152
- "loss": 2.7294,
153
  "step": 24
154
  },
155
  {
156
  "epoch": 0.71,
157
- "learning_rate": 0.0029553571428571433,
158
- "loss": 2.7493,
159
  "step": 25
160
  },
161
  {
162
  "epoch": 0.74,
163
- "learning_rate": 0.0029535714285714287,
164
- "loss": 2.7351,
165
  "step": 26
166
  },
167
  {
168
  "epoch": 0.77,
169
- "learning_rate": 0.002951785714285714,
170
- "loss": 2.6825,
171
  "step": 27
172
  },
173
  {
174
  "epoch": 0.8,
175
- "learning_rate": 0.00295,
176
- "loss": 2.6658,
177
  "step": 28
178
  },
179
  {
180
  "epoch": 0.83,
181
- "learning_rate": 0.0029482142857142858,
182
- "loss": 2.6127,
183
  "step": 29
184
  },
185
  {
186
  "epoch": 0.86,
187
- "learning_rate": 0.002946428571428571,
188
- "loss": 2.6338,
189
  "step": 30
190
  },
191
  {
192
  "epoch": 0.89,
193
- "learning_rate": 0.002944642857142857,
194
- "loss": 2.7094,
195
  "step": 31
196
  },
197
  {
198
  "epoch": 0.91,
199
- "learning_rate": 0.002942857142857143,
200
- "loss": 2.6943,
201
  "step": 32
202
  },
203
  {
204
  "epoch": 0.94,
205
- "learning_rate": 0.0029410714285714283,
206
- "loss": 2.7222,
207
  "step": 33
208
  },
209
  {
210
  "epoch": 0.97,
211
- "learning_rate": 0.002939285714285714,
212
- "loss": 2.6541,
213
  "step": 34
214
  },
215
  {
216
  "epoch": 1.0,
217
- "learning_rate": 0.0029375,
218
- "loss": 2.7111,
219
  "step": 35
220
  },
221
  {
222
  "epoch": 1.03,
223
- "learning_rate": 0.002935714285714286,
224
- "loss": 2.5123,
225
  "step": 36
226
  },
227
  {
228
  "epoch": 1.06,
229
- "learning_rate": 0.0029339285714285713,
230
- "loss": 2.493,
231
  "step": 37
232
  },
233
  {
234
  "epoch": 1.09,
235
- "learning_rate": 0.002932142857142857,
236
- "loss": 2.4434,
237
  "step": 38
238
  },
239
  {
240
  "epoch": 1.11,
241
- "learning_rate": 0.002930357142857143,
242
- "loss": 2.4883,
243
  "step": 39
244
  },
245
  {
246
  "epoch": 1.14,
247
- "learning_rate": 0.0029285714285714284,
248
- "loss": 2.5004,
249
  "step": 40
250
  },
251
  {
252
  "epoch": 1.17,
253
- "learning_rate": 0.0029267857142857142,
254
- "loss": 2.4667,
255
  "step": 41
256
  },
257
  {
258
  "epoch": 1.2,
259
- "learning_rate": 0.002925,
260
- "loss": 2.4461,
261
  "step": 42
262
  },
263
  {
264
  "epoch": 1.23,
265
- "learning_rate": 0.002923214285714286,
266
- "loss": 2.5361,
267
  "step": 43
268
  },
269
  {
270
  "epoch": 1.26,
271
- "learning_rate": 0.0029214285714285713,
272
- "loss": 2.4597,
273
  "step": 44
274
  },
275
  {
276
  "epoch": 1.29,
277
- "learning_rate": 0.002919642857142857,
278
- "loss": 2.4006,
279
  "step": 45
280
  },
281
  {
282
  "epoch": 1.31,
283
- "learning_rate": 0.002917857142857143,
284
- "loss": 2.5019,
285
  "step": 46
286
  },
287
  {
288
  "epoch": 1.34,
289
- "learning_rate": 0.0029160714285714285,
290
- "loss": 2.5209,
291
  "step": 47
292
  },
293
  {
294
  "epoch": 1.37,
295
- "learning_rate": 0.0029142857142857143,
296
- "loss": 2.4753,
297
  "step": 48
298
  },
299
  {
300
  "epoch": 1.4,
301
- "learning_rate": 0.0029125,
302
- "loss": 2.4104,
303
  "step": 49
304
  },
305
  {
306
  "epoch": 1.43,
307
- "learning_rate": 0.0029107142857142856,
308
- "loss": 2.3938,
309
  "step": 50
310
  },
311
  {
312
  "epoch": 1.46,
313
- "learning_rate": 0.0029089285714285714,
314
- "loss": 2.4999,
315
  "step": 51
316
  },
317
  {
318
  "epoch": 1.49,
319
- "learning_rate": 0.0029071428571428573,
320
- "loss": 2.4292,
321
  "step": 52
322
  },
323
  {
324
  "epoch": 1.51,
325
- "learning_rate": 0.002905357142857143,
326
- "loss": 2.4636,
327
  "step": 53
328
  },
329
  {
330
  "epoch": 1.54,
331
- "learning_rate": 0.0029035714285714285,
332
- "loss": 2.5163,
333
  "step": 54
334
  },
335
  {
336
  "epoch": 1.57,
337
- "learning_rate": 0.0029017857142857144,
338
- "loss": 2.5098,
339
  "step": 55
340
  },
341
  {
342
  "epoch": 1.6,
343
- "learning_rate": 0.0029000000000000002,
344
- "loss": 2.447,
345
  "step": 56
346
  },
347
  {
348
  "epoch": 1.63,
349
- "learning_rate": 0.0028982142857142856,
350
- "loss": 2.4262,
351
  "step": 57
352
  },
353
  {
354
  "epoch": 1.66,
355
- "learning_rate": 0.0028964285714285715,
356
- "loss": 2.5146,
357
  "step": 58
358
  },
359
  {
360
  "epoch": 1.69,
361
- "learning_rate": 0.0028946428571428573,
362
- "loss": 2.4225,
363
  "step": 59
364
  },
365
  {
366
  "epoch": 1.71,
367
- "learning_rate": 0.0028928571428571428,
368
- "loss": 2.452,
369
  "step": 60
370
  },
371
  {
372
  "epoch": 1.74,
373
- "learning_rate": 0.0028910714285714286,
374
- "loss": 2.4449,
375
  "step": 61
376
  },
377
  {
378
  "epoch": 1.77,
379
- "learning_rate": 0.0028892857142857145,
380
- "loss": 2.4984,
381
  "step": 62
382
  },
383
  {
384
  "epoch": 1.8,
385
- "learning_rate": 0.0028875000000000003,
386
- "loss": 2.4493,
387
  "step": 63
388
  },
389
  {
390
  "epoch": 1.83,
391
- "learning_rate": 0.0028857142857142857,
392
- "loss": 2.4187,
393
  "step": 64
394
  },
395
  {
396
  "epoch": 1.86,
397
- "learning_rate": 0.0028839285714285716,
398
- "loss": 2.5019,
399
  "step": 65
400
  },
401
  {
402
  "epoch": 1.89,
403
- "learning_rate": 0.0028821428571428574,
404
- "loss": 2.4274,
405
  "step": 66
406
  },
407
  {
408
  "epoch": 1.91,
409
- "learning_rate": 0.002880357142857143,
410
- "loss": 2.4485,
411
  "step": 67
412
  },
413
  {
414
  "epoch": 1.94,
415
- "learning_rate": 0.0028785714285714287,
416
- "loss": 2.5096,
417
  "step": 68
418
  },
419
  {
420
  "epoch": 1.97,
421
- "learning_rate": 0.0028767857142857145,
422
- "loss": 2.4862,
423
  "step": 69
424
  },
425
  {
426
  "epoch": 2.0,
427
- "learning_rate": 0.0028750000000000004,
428
- "loss": 2.469,
429
  "step": 70
430
  },
431
  {
432
  "epoch": 2.03,
433
- "learning_rate": 0.002873214285714286,
434
- "loss": 2.1795,
435
  "step": 71
436
  },
437
  {
438
  "epoch": 2.06,
439
- "learning_rate": 0.0028714285714285716,
440
- "loss": 2.1106,
441
  "step": 72
442
  },
443
  {
444
  "epoch": 2.09,
445
- "learning_rate": 0.0028696428571428575,
446
- "loss": 2.0896,
447
  "step": 73
448
  },
449
  {
450
  "epoch": 2.11,
451
- "learning_rate": 0.002867857142857143,
452
- "loss": 2.2018,
453
  "step": 74
454
  },
455
  {
456
  "epoch": 2.14,
457
- "learning_rate": 0.0028660714285714288,
458
- "loss": 2.0803,
459
  "step": 75
460
  },
461
  {
462
  "epoch": 2.17,
463
- "learning_rate": 0.0028642857142857146,
464
- "loss": 2.1395,
465
  "step": 76
466
  },
467
  {
468
  "epoch": 2.2,
469
- "learning_rate": 0.0028625,
470
- "loss": 2.1019,
471
  "step": 77
472
  },
473
  {
474
  "epoch": 2.23,
475
- "learning_rate": 0.002860714285714286,
476
- "loss": 2.1383,
477
  "step": 78
478
  },
479
  {
480
  "epoch": 2.26,
481
- "learning_rate": 0.0028589285714285713,
482
- "loss": 2.1109,
483
  "step": 79
484
  },
485
  {
486
  "epoch": 2.29,
487
- "learning_rate": 0.002857142857142857,
488
- "loss": 2.0854,
489
  "step": 80
490
  },
491
  {
492
  "epoch": 2.31,
493
- "learning_rate": 0.0028553571428571426,
494
- "loss": 2.1667,
495
  "step": 81
496
  },
497
  {
498
  "epoch": 2.34,
499
- "learning_rate": 0.0028535714285714284,
500
- "loss": 2.1111,
501
  "step": 82
502
  },
503
  {
504
  "epoch": 2.37,
505
- "learning_rate": 0.0028517857142857143,
506
- "loss": 2.1032,
507
  "step": 83
508
  },
509
  {
510
  "epoch": 2.4,
511
- "learning_rate": 0.00285,
512
- "loss": 2.1708,
513
  "step": 84
514
  },
515
  {
516
  "epoch": 2.43,
517
- "learning_rate": 0.0028482142857142855,
518
- "loss": 2.1118,
519
  "step": 85
520
  },
521
  {
522
  "epoch": 2.46,
523
- "learning_rate": 0.0028464285714285714,
524
- "loss": 2.1481,
525
  "step": 86
526
  },
527
  {
528
  "epoch": 2.49,
529
- "learning_rate": 0.002844642857142857,
530
- "loss": 2.1538,
531
  "step": 87
532
  },
533
  {
534
  "epoch": 2.51,
535
- "learning_rate": 0.0028428571428571426,
536
- "loss": 2.1843,
537
  "step": 88
538
  },
539
  {
540
  "epoch": 2.54,
541
- "learning_rate": 0.0028410714285714285,
542
- "loss": 2.1828,
543
  "step": 89
544
  },
545
  {
546
  "epoch": 2.57,
547
- "learning_rate": 0.0028392857142857143,
548
- "loss": 2.2151,
549
  "step": 90
550
  },
551
  {
552
  "epoch": 2.6,
553
- "learning_rate": 0.0028375,
554
- "loss": 2.1969,
555
  "step": 91
556
  },
557
  {
558
  "epoch": 2.63,
559
- "learning_rate": 0.0028357142857142856,
560
- "loss": 2.1509,
561
  "step": 92
562
  },
563
  {
564
  "epoch": 2.66,
565
- "learning_rate": 0.0028339285714285714,
566
- "loss": 2.2636,
567
  "step": 93
568
  },
569
  {
570
  "epoch": 2.69,
571
- "learning_rate": 0.0028321428571428573,
572
- "loss": 2.2809,
573
  "step": 94
574
  },
575
  {
576
  "epoch": 2.71,
577
- "learning_rate": 0.0028303571428571427,
578
- "loss": 2.2044,
579
  "step": 95
580
  },
581
  {
582
  "epoch": 2.74,
583
- "learning_rate": 0.0028285714285714286,
584
- "loss": 2.2064,
585
  "step": 96
586
  },
587
  {
588
  "epoch": 2.77,
589
- "learning_rate": 0.0028267857142857144,
590
- "loss": 2.2408,
591
  "step": 97
592
  },
593
  {
594
  "epoch": 2.8,
595
- "learning_rate": 0.002825,
596
- "loss": 2.2446,
597
  "step": 98
598
  },
599
  {
600
  "epoch": 2.83,
601
- "learning_rate": 0.0028232142857142857,
602
- "loss": 2.1965,
603
  "step": 99
604
  },
605
  {
606
  "epoch": 2.86,
607
- "learning_rate": 0.0028214285714285715,
608
- "loss": 2.3093,
609
  "step": 100
610
  },
611
  {
612
  "epoch": 2.89,
613
- "learning_rate": 0.0028196428571428574,
614
- "loss": 2.2188,
615
  "step": 101
616
  },
617
  {
618
  "epoch": 2.91,
619
- "learning_rate": 0.0028178571428571428,
620
- "loss": 2.3098,
621
  "step": 102
622
  },
623
  {
624
  "epoch": 2.94,
625
- "learning_rate": 0.0028160714285714286,
626
- "loss": 2.2268,
627
  "step": 103
628
  },
629
  {
630
  "epoch": 2.97,
631
- "learning_rate": 0.0028142857142857145,
632
- "loss": 2.2574,
633
  "step": 104
634
  },
635
  {
636
  "epoch": 3.0,
637
- "learning_rate": 0.0028125,
638
- "loss": 2.2982,
639
  "step": 105
640
  },
641
  {
642
  "epoch": 3.03,
643
- "learning_rate": 0.0028107142857142857,
644
- "loss": 1.879,
645
  "step": 106
646
  },
647
  {
648
  "epoch": 3.06,
649
- "learning_rate": 0.0028089285714285716,
650
- "loss": 1.8074,
651
  "step": 107
652
  },
653
  {
654
  "epoch": 3.09,
655
- "learning_rate": 0.002807142857142857,
656
- "loss": 1.8174,
657
  "step": 108
658
  },
659
  {
660
  "epoch": 3.11,
661
- "learning_rate": 0.002805357142857143,
662
- "loss": 1.784,
663
  "step": 109
664
  },
665
  {
666
  "epoch": 3.14,
667
- "learning_rate": 0.0028035714285714287,
668
- "loss": 1.8469,
669
  "step": 110
670
  },
671
  {
672
  "epoch": 3.17,
673
- "learning_rate": 0.0028017857142857146,
674
- "loss": 1.8614,
675
  "step": 111
676
  },
677
  {
678
  "epoch": 3.2,
679
- "learning_rate": 0.0028,
680
- "loss": 1.8648,
681
  "step": 112
682
  },
683
  {
684
  "epoch": 3.23,
685
- "learning_rate": 0.002798214285714286,
686
- "loss": 1.8172,
687
  "step": 113
688
  },
689
  {
690
  "epoch": 3.26,
691
- "learning_rate": 0.0027964285714285717,
692
- "loss": 1.8579,
693
  "step": 114
694
  },
695
  {
696
  "epoch": 3.29,
697
- "learning_rate": 0.002794642857142857,
698
- "loss": 1.8261,
699
  "step": 115
700
  },
701
  {
702
  "epoch": 3.31,
703
- "learning_rate": 0.002792857142857143,
704
- "loss": 1.8993,
705
  "step": 116
706
  },
707
  {
708
  "epoch": 3.34,
709
- "learning_rate": 0.0027910714285714288,
710
- "loss": 1.8144,
711
  "step": 117
712
  },
713
  {
714
  "epoch": 3.37,
715
- "learning_rate": 0.0027892857142857146,
716
- "loss": 1.8583,
717
  "step": 118
718
  },
719
  {
720
  "epoch": 3.4,
721
- "learning_rate": 0.0027875,
722
- "loss": 1.8589,
723
  "step": 119
724
  },
725
  {
726
  "epoch": 3.43,
727
- "learning_rate": 0.002785714285714286,
728
- "loss": 1.9069,
729
  "step": 120
730
  },
731
  {
732
  "epoch": 3.46,
733
- "learning_rate": 0.0027839285714285717,
734
- "loss": 1.9004,
735
  "step": 121
736
  },
737
  {
738
  "epoch": 3.49,
739
- "learning_rate": 0.002782142857142857,
740
- "loss": 1.9378,
741
  "step": 122
742
  },
743
  {
744
  "epoch": 3.51,
745
- "learning_rate": 0.002780357142857143,
746
- "loss": 1.9161,
747
  "step": 123
748
  },
749
  {
750
  "epoch": 3.54,
751
- "learning_rate": 0.002778571428571429,
752
- "loss": 1.9886,
753
  "step": 124
754
  },
755
  {
756
  "epoch": 3.57,
757
- "learning_rate": 0.0027767857142857143,
758
- "loss": 1.8636,
759
  "step": 125
760
  },
761
  {
762
  "epoch": 3.6,
763
- "learning_rate": 0.002775,
764
- "loss": 1.9642,
765
  "step": 126
766
  },
767
  {
768
  "epoch": 3.63,
769
- "learning_rate": 0.002773214285714286,
770
- "loss": 1.959,
771
  "step": 127
772
  },
773
  {
774
  "epoch": 3.66,
775
- "learning_rate": 0.002771428571428572,
776
- "loss": 1.9787,
777
  "step": 128
778
  },
779
  {
780
  "epoch": 3.69,
781
- "learning_rate": 0.0027696428571428572,
782
- "loss": 2.0272,
783
  "step": 129
784
  },
785
  {
786
  "epoch": 3.71,
787
- "learning_rate": 0.002767857142857143,
788
- "loss": 2.0362,
789
  "step": 130
790
  },
791
  {
792
  "epoch": 3.74,
793
- "learning_rate": 0.002766071428571429,
794
- "loss": 2.0369,
795
  "step": 131
796
  },
797
  {
798
  "epoch": 3.77,
799
- "learning_rate": 0.0027642857142857143,
800
- "loss": 2.0721,
801
  "step": 132
802
  },
803
  {
804
  "epoch": 3.8,
805
- "learning_rate": 0.0027624999999999998,
806
- "loss": 1.9939,
807
  "step": 133
808
  },
809
  {
810
  "epoch": 3.83,
811
- "learning_rate": 0.0027607142857142856,
812
- "loss": 2.0403,
813
  "step": 134
814
  },
815
  {
816
  "epoch": 3.86,
817
- "learning_rate": 0.0027589285714285715,
818
- "loss": 2.1132,
819
  "step": 135
820
  },
821
  {
822
  "epoch": 3.89,
823
- "learning_rate": 0.002757142857142857,
824
- "loss": 2.0741,
825
  "step": 136
826
  },
827
  {
828
  "epoch": 3.91,
829
- "learning_rate": 0.0027553571428571427,
830
- "loss": 2.0754,
831
  "step": 137
832
  },
833
  {
834
  "epoch": 3.94,
835
- "learning_rate": 0.0027535714285714286,
836
- "loss": 2.1321,
837
  "step": 138
838
  },
839
  {
840
  "epoch": 3.97,
841
- "learning_rate": 0.0027517857142857144,
842
- "loss": 2.0665,
843
  "step": 139
844
  },
845
  {
846
  "epoch": 4.0,
847
- "learning_rate": 0.00275,
848
- "loss": 2.1085,
849
  "step": 140
850
  },
851
  {
852
  "epoch": 4.03,
853
- "learning_rate": 0.0027482142857142857,
854
- "loss": 1.653,
855
  "step": 141
856
  },
857
  {
858
  "epoch": 4.06,
859
- "learning_rate": 0.0027464285714285715,
860
- "loss": 1.5934,
861
  "step": 142
862
  },
863
  {
864
  "epoch": 4.09,
865
- "learning_rate": 0.002744642857142857,
866
- "loss": 1.6795,
867
  "step": 143
868
  },
869
  {
870
  "epoch": 4.11,
871
- "learning_rate": 0.002742857142857143,
872
- "loss": 1.6043,
873
  "step": 144
874
  },
875
  {
876
  "epoch": 4.14,
877
- "learning_rate": 0.0027410714285714287,
878
- "loss": 1.586,
879
  "step": 145
880
  },
881
  {
882
  "epoch": 4.17,
883
- "learning_rate": 0.002739285714285714,
884
- "loss": 1.6061,
885
  "step": 146
886
  },
887
  {
888
  "epoch": 4.2,
889
- "learning_rate": 0.0027375,
890
- "loss": 1.6438,
891
  "step": 147
892
  },
893
  {
894
  "epoch": 4.23,
895
- "learning_rate": 0.0027357142857142858,
896
- "loss": 1.6097,
897
  "step": 148
898
  },
899
  {
900
  "epoch": 4.26,
901
- "learning_rate": 0.0027339285714285716,
902
- "loss": 1.7163,
903
  "step": 149
904
  },
905
  {
906
  "epoch": 4.29,
907
- "learning_rate": 0.002732142857142857,
908
- "loss": 1.6485,
909
  "step": 150
910
  },
911
  {
912
  "epoch": 4.31,
913
- "learning_rate": 0.002730357142857143,
914
- "loss": 1.6555,
915
  "step": 151
916
  },
917
  {
918
  "epoch": 4.34,
919
- "learning_rate": 0.0027285714285714287,
920
- "loss": 1.689,
921
  "step": 152
922
  },
923
  {
924
  "epoch": 4.37,
925
- "learning_rate": 0.002726785714285714,
926
- "loss": 1.7174,
927
  "step": 153
928
  },
929
  {
930
  "epoch": 4.4,
931
- "learning_rate": 0.002725,
932
- "loss": 1.7205,
933
  "step": 154
934
  },
935
  {
936
  "epoch": 4.43,
937
- "learning_rate": 0.002723214285714286,
938
- "loss": 1.7064,
939
  "step": 155
940
  },
941
  {
942
  "epoch": 4.46,
943
- "learning_rate": 0.0027214285714285717,
944
- "loss": 1.7045,
945
  "step": 156
946
  },
947
  {
948
  "epoch": 4.49,
949
- "learning_rate": 0.002719642857142857,
950
- "loss": 1.7749,
951
  "step": 157
952
  },
953
  {
954
  "epoch": 4.51,
955
- "learning_rate": 0.002717857142857143,
956
- "loss": 1.7826,
957
  "step": 158
958
  },
959
  {
960
  "epoch": 4.54,
961
- "learning_rate": 0.002716071428571429,
962
- "loss": 1.7882,
963
  "step": 159
964
  },
965
  {
966
  "epoch": 4.57,
967
- "learning_rate": 0.0027142857142857142,
968
- "loss": 1.8073,
969
  "step": 160
970
  },
971
  {
972
  "epoch": 4.6,
973
- "learning_rate": 0.0027125,
974
- "loss": 1.7931,
975
  "step": 161
976
  },
977
  {
978
  "epoch": 4.63,
979
- "learning_rate": 0.002710714285714286,
980
- "loss": 1.8388,
981
  "step": 162
982
  },
983
  {
984
  "epoch": 4.66,
985
- "learning_rate": 0.0027089285714285713,
986
- "loss": 1.8267,
987
  "step": 163
988
  },
989
  {
990
  "epoch": 4.69,
991
- "learning_rate": 0.002707142857142857,
992
- "loss": 1.8208,
993
  "step": 164
994
  },
995
  {
996
  "epoch": 4.71,
997
- "learning_rate": 0.002705357142857143,
998
- "loss": 1.8404,
999
  "step": 165
1000
  },
1001
  {
1002
  "epoch": 4.74,
1003
- "learning_rate": 0.002703571428571429,
1004
- "loss": 1.8375,
1005
  "step": 166
1006
  },
1007
  {
1008
  "epoch": 4.77,
1009
- "learning_rate": 0.0027017857142857143,
1010
- "loss": 1.9444,
1011
  "step": 167
1012
  },
1013
  {
1014
  "epoch": 4.8,
1015
- "learning_rate": 0.0027,
1016
- "loss": 1.8325,
1017
  "step": 168
1018
  },
1019
  {
1020
  "epoch": 4.83,
1021
- "learning_rate": 0.002698214285714286,
1022
- "loss": 1.8705,
1023
  "step": 169
1024
  },
1025
  {
1026
  "epoch": 4.86,
1027
- "learning_rate": 0.0026964285714285714,
1028
- "loss": 1.9368,
1029
  "step": 170
1030
  },
1031
  {
1032
  "epoch": 4.89,
1033
- "learning_rate": 0.0026946428571428573,
1034
- "loss": 1.8758,
1035
  "step": 171
1036
  },
1037
  {
1038
  "epoch": 4.91,
1039
- "learning_rate": 0.002692857142857143,
1040
- "loss": 1.999,
1041
  "step": 172
1042
  },
1043
  {
1044
  "epoch": 4.94,
1045
- "learning_rate": 0.0026910714285714285,
1046
- "loss": 1.9547,
1047
  "step": 173
1048
  },
1049
  {
1050
  "epoch": 4.97,
1051
- "learning_rate": 0.0026892857142857144,
1052
- "loss": 1.9332,
1053
  "step": 174
1054
  },
1055
  {
1056
  "epoch": 5.0,
1057
- "learning_rate": 0.0026875000000000002,
1058
- "loss": 1.9704,
1059
  "step": 175
1060
  },
1061
  {
1062
  "epoch": 5.03,
1063
- "learning_rate": 0.002685714285714286,
1064
- "loss": 1.4766,
1065
  "step": 176
1066
  },
1067
  {
1068
  "epoch": 5.06,
1069
- "learning_rate": 0.0026839285714285715,
1070
- "loss": 1.4459,
1071
  "step": 177
1072
  },
1073
  {
1074
  "epoch": 5.09,
1075
- "learning_rate": 0.0026821428571428573,
1076
- "loss": 1.4451,
1077
  "step": 178
1078
  },
1079
  {
1080
  "epoch": 5.11,
1081
- "learning_rate": 0.002680357142857143,
1082
- "loss": 1.4761,
1083
  "step": 179
1084
  },
1085
  {
1086
  "epoch": 5.14,
1087
- "learning_rate": 0.0026785714285714286,
1088
- "loss": 1.5092,
1089
  "step": 180
1090
  },
1091
  {
1092
  "epoch": 5.17,
1093
- "learning_rate": 0.0026767857142857144,
1094
- "loss": 1.4439,
1095
  "step": 181
1096
  },
1097
  {
1098
  "epoch": 5.2,
1099
- "learning_rate": 0.0026750000000000003,
1100
- "loss": 1.4539,
1101
  "step": 182
1102
  },
1103
  {
1104
  "epoch": 5.23,
1105
- "learning_rate": 0.002673214285714286,
1106
- "loss": 1.4804,
1107
  "step": 183
1108
  },
1109
  {
1110
  "epoch": 5.26,
1111
- "learning_rate": 0.002671428571428571,
1112
- "loss": 1.4958,
1113
  "step": 184
1114
  },
1115
  {
1116
  "epoch": 5.29,
1117
- "learning_rate": 0.002669642857142857,
1118
- "loss": 1.5054,
1119
  "step": 185
1120
  },
1121
  {
1122
  "epoch": 5.31,
1123
- "learning_rate": 0.002667857142857143,
1124
- "loss": 1.4673,
1125
  "step": 186
1126
  },
1127
  {
1128
  "epoch": 5.34,
1129
- "learning_rate": 0.0026660714285714287,
1130
- "loss": 1.5703,
1131
  "step": 187
1132
  },
1133
  {
1134
  "epoch": 5.37,
1135
- "learning_rate": 0.002664285714285714,
1136
- "loss": 1.5504,
1137
  "step": 188
1138
  },
1139
  {
1140
  "epoch": 5.4,
1141
- "learning_rate": 0.0026625,
1142
- "loss": 1.6126,
1143
  "step": 189
1144
  },
1145
  {
1146
  "epoch": 5.43,
1147
- "learning_rate": 0.002660714285714286,
1148
- "loss": 1.5777,
1149
  "step": 190
1150
  },
1151
  {
1152
  "epoch": 5.46,
1153
- "learning_rate": 0.002658928571428571,
1154
- "loss": 1.5994,
1155
  "step": 191
1156
  },
1157
  {
1158
  "epoch": 5.49,
1159
- "learning_rate": 0.002657142857142857,
1160
- "loss": 1.5939,
1161
  "step": 192
1162
  },
1163
  {
1164
  "epoch": 5.51,
1165
- "learning_rate": 0.002655357142857143,
1166
- "loss": 1.6297,
1167
  "step": 193
1168
  },
1169
  {
1170
  "epoch": 5.54,
1171
- "learning_rate": 0.0026535714285714283,
1172
- "loss": 1.6749,
1173
  "step": 194
1174
  },
1175
  {
1176
  "epoch": 5.57,
1177
- "learning_rate": 0.002651785714285714,
1178
- "loss": 1.6525,
1179
  "step": 195
1180
  },
1181
  {
1182
  "epoch": 5.6,
1183
- "learning_rate": 0.00265,
1184
- "loss": 1.657,
1185
  "step": 196
1186
  },
1187
  {
1188
  "epoch": 5.63,
1189
- "learning_rate": 0.002648214285714286,
1190
- "loss": 1.6818,
1191
  "step": 197
1192
  },
1193
  {
1194
  "epoch": 5.66,
1195
- "learning_rate": 0.0026464285714285713,
1196
- "loss": 1.6985,
1197
  "step": 198
1198
  },
1199
  {
1200
  "epoch": 5.69,
1201
- "learning_rate": 0.002644642857142857,
1202
- "loss": 1.7456,
1203
  "step": 199
1204
  },
1205
  {
1206
  "epoch": 5.71,
1207
- "learning_rate": 0.002642857142857143,
1208
- "loss": 1.678,
1209
  "step": 200
1210
  },
1211
  {
1212
  "epoch": 5.74,
1213
- "learning_rate": 0.0026410714285714284,
1214
- "loss": 1.7613,
1215
  "step": 201
1216
  },
1217
  {
1218
  "epoch": 5.77,
1219
- "learning_rate": 0.0026392857142857142,
1220
- "loss": 1.7541,
1221
  "step": 202
1222
  },
1223
  {
1224
  "epoch": 5.8,
1225
- "learning_rate": 0.0026375,
1226
- "loss": 1.798,
1227
  "step": 203
1228
  },
1229
  {
1230
  "epoch": 5.83,
1231
- "learning_rate": 0.002635714285714286,
1232
- "loss": 1.821,
1233
  "step": 204
1234
  },
1235
  {
1236
  "epoch": 5.86,
1237
- "learning_rate": 0.0026339285714285714,
1238
- "loss": 1.8385,
1239
  "step": 205
1240
  },
1241
  {
1242
  "epoch": 5.89,
1243
- "learning_rate": 0.002632142857142857,
1244
- "loss": 1.8613,
1245
  "step": 206
1246
  },
1247
  {
1248
  "epoch": 5.91,
1249
- "learning_rate": 0.002630357142857143,
1250
- "loss": 1.902,
1251
  "step": 207
1252
  },
1253
  {
1254
  "epoch": 5.94,
1255
- "learning_rate": 0.0026285714285714285,
1256
- "loss": 2.0848,
1257
  "step": 208
1258
  },
1259
  {
1260
  "epoch": 5.97,
1261
- "learning_rate": 0.0026267857142857143,
1262
- "loss": 2.3277,
1263
  "step": 209
1264
  },
1265
  {
1266
  "epoch": 6.0,
1267
- "learning_rate": 0.002625,
1268
- "loss": 2.8535,
1269
  "step": 210
1270
  },
1271
  {
1272
  "epoch": 6.03,
1273
- "learning_rate": 0.0026232142857142856,
1274
- "loss": 6.2197,
1275
  "step": 211
1276
  },
1277
  {
1278
  "epoch": 6.06,
1279
- "learning_rate": 0.0026214285714285714,
1280
- "loss": 10.2288,
1281
  "step": 212
1282
  },
1283
  {
1284
  "epoch": 6.09,
1285
- "learning_rate": 0.0026196428571428573,
1286
- "loss": 12.5006,
1287
  "step": 213
1288
  },
1289
  {
1290
  "epoch": 6.11,
1291
- "learning_rate": 0.002617857142857143,
1292
- "loss": 10.5184,
1293
  "step": 214
1294
  },
1295
  {
1296
  "epoch": 6.14,
1297
- "learning_rate": 0.0026160714285714285,
1298
- "loss": 9.4834,
1299
  "step": 215
1300
  },
1301
  {
1302
  "epoch": 6.17,
1303
- "learning_rate": 0.0026142857142857144,
1304
- "loss": 16.0513,
1305
  "step": 216
1306
  },
1307
  {
1308
  "epoch": 6.2,
1309
- "learning_rate": 0.0026125000000000002,
1310
- "loss": 11.0576,
1311
  "step": 217
1312
  },
1313
  {
1314
  "epoch": 6.23,
1315
- "learning_rate": 0.0026107142857142857,
1316
- "loss": 15.3574,
1317
  "step": 218
1318
  },
1319
  {
1320
  "epoch": 6.26,
1321
- "learning_rate": 0.0026089285714285715,
1322
- "loss": 15.5239,
1323
  "step": 219
1324
  },
1325
  {
1326
  "epoch": 6.29,
1327
- "learning_rate": 0.0026071428571428574,
1328
- "loss": 15.3973,
1329
  "step": 220
1330
  },
1331
  {
1332
  "epoch": 6.31,
1333
- "learning_rate": 0.0026053571428571428,
1334
- "loss": 12.059,
1335
  "step": 221
1336
  },
1337
  {
1338
  "epoch": 6.34,
1339
- "learning_rate": 0.0026035714285714286,
1340
- "loss": 10.8352,
1341
  "step": 222
1342
  },
1343
  {
1344
  "epoch": 6.37,
1345
- "learning_rate": 0.0026017857142857145,
1346
- "loss": 10.1507,
1347
  "step": 223
1348
  },
1349
  {
1350
  "epoch": 6.4,
1351
- "learning_rate": 0.0026000000000000003,
1352
- "loss": 10.651,
1353
  "step": 224
1354
  },
1355
  {
1356
  "epoch": 6.43,
1357
- "learning_rate": 0.0025982142857142857,
1358
- "loss": 9.8363,
1359
  "step": 225
1360
  },
1361
  {
1362
  "epoch": 6.46,
1363
- "learning_rate": 0.0025964285714285716,
1364
- "loss": 9.3673,
1365
  "step": 226
1366
  },
1367
  {
1368
  "epoch": 6.49,
1369
- "learning_rate": 0.0025946428571428574,
1370
- "loss": 9.5433,
1371
  "step": 227
1372
  },
1373
  {
1374
  "epoch": 6.51,
1375
- "learning_rate": 0.002592857142857143,
1376
- "loss": 9.9206,
1377
  "step": 228
1378
  },
1379
  {
1380
  "epoch": 6.54,
1381
- "learning_rate": 0.0025910714285714287,
1382
- "loss": 9.5516,
1383
  "step": 229
1384
  },
1385
  {
1386
  "epoch": 6.57,
1387
- "learning_rate": 0.0025892857142857145,
1388
- "loss": 9.2165,
1389
  "step": 230
1390
  },
1391
  {
1392
  "epoch": 6.6,
1393
- "learning_rate": 0.0025875000000000004,
1394
- "loss": 9.0825,
1395
  "step": 231
1396
  },
1397
  {
1398
  "epoch": 6.63,
1399
- "learning_rate": 0.002585714285714286,
1400
- "loss": 8.7437,
1401
  "step": 232
1402
  },
1403
  {
1404
  "epoch": 6.66,
1405
- "learning_rate": 0.0025839285714285717,
1406
- "loss": 8.6366,
1407
  "step": 233
1408
  },
1409
  {
1410
  "epoch": 6.69,
1411
- "learning_rate": 0.0025821428571428575,
1412
- "loss": 9.7431,
1413
  "step": 234
1414
  },
1415
  {
1416
  "epoch": 6.71,
1417
- "learning_rate": 0.002580357142857143,
1418
- "loss": 8.1876,
1419
  "step": 235
1420
  },
1421
  {
1422
  "epoch": 6.74,
1423
- "learning_rate": 0.0025785714285714288,
1424
- "loss": 8.4559,
1425
  "step": 236
1426
  },
1427
  {
1428
  "epoch": 6.77,
1429
- "learning_rate": 0.002576785714285714,
1430
- "loss": 8.0092,
1431
  "step": 237
1432
  },
1433
  {
1434
  "epoch": 6.8,
1435
- "learning_rate": 0.002575,
1436
- "loss": 8.028,
1437
  "step": 238
1438
  },
1439
  {
1440
  "epoch": 6.83,
1441
- "learning_rate": 0.0025732142857142854,
1442
- "loss": 7.8379,
1443
  "step": 239
1444
  },
1445
  {
1446
  "epoch": 6.86,
1447
- "learning_rate": 0.0025714285714285713,
1448
- "loss": 7.8127,
1449
  "step": 240
1450
  },
1451
  {
1452
  "epoch": 6.89,
1453
- "learning_rate": 0.002569642857142857,
1454
- "loss": 7.8252,
1455
  "step": 241
1456
  },
1457
  {
1458
  "epoch": 6.91,
1459
- "learning_rate": 0.002567857142857143,
1460
- "loss": 7.7094,
1461
  "step": 242
1462
  },
1463
  {
1464
  "epoch": 6.94,
1465
- "learning_rate": 0.0025660714285714284,
1466
- "loss": 7.7962,
1467
  "step": 243
1468
  },
1469
  {
1470
  "epoch": 6.97,
1471
- "learning_rate": 0.0025642857142857143,
1472
- "loss": 7.4966,
1473
  "step": 244
1474
  },
1475
  {
1476
  "epoch": 7.0,
1477
- "learning_rate": 0.0025625,
1478
- "loss": 7.4851,
1479
  "step": 245
1480
  },
1481
  {
1482
  "epoch": 7.03,
1483
- "learning_rate": 0.0025607142857142855,
1484
- "loss": 7.5188,
1485
  "step": 246
1486
  },
1487
  {
1488
  "epoch": 7.06,
1489
- "learning_rate": 0.0025589285714285714,
1490
- "loss": 7.7866,
1491
  "step": 247
1492
  },
1493
  {
1494
  "epoch": 7.09,
1495
- "learning_rate": 0.0025571428571428572,
1496
- "loss": 7.5743,
1497
  "step": 248
1498
  },
1499
  {
1500
  "epoch": 7.11,
1501
- "learning_rate": 0.0025553571428571426,
1502
- "loss": 7.4608,
1503
  "step": 249
1504
  },
1505
  {
1506
  "epoch": 7.14,
1507
- "learning_rate": 0.0025535714285714285,
1508
- "loss": 7.4655,
1509
  "step": 250
1510
  },
1511
  {
1512
  "epoch": 7.17,
1513
- "learning_rate": 0.0025517857142857143,
1514
- "loss": 7.5474,
1515
  "step": 251
1516
  },
1517
  {
1518
  "epoch": 7.2,
1519
- "learning_rate": 0.00255,
1520
- "loss": 7.6983,
1521
  "step": 252
1522
  },
1523
  {
1524
  "epoch": 7.23,
1525
- "learning_rate": 0.0025482142857142856,
1526
- "loss": 7.4936,
1527
  "step": 253
1528
  },
1529
  {
1530
  "epoch": 7.26,
1531
- "learning_rate": 0.0025464285714285714,
1532
- "loss": 7.6966,
1533
  "step": 254
1534
  },
1535
  {
1536
  "epoch": 7.29,
1537
- "learning_rate": 0.0025446428571428573,
1538
- "loss": 7.4701,
1539
  "step": 255
1540
  },
1541
  {
1542
  "epoch": 7.31,
1543
- "learning_rate": 0.0025428571428571427,
1544
- "loss": 7.511,
1545
  "step": 256
1546
  },
1547
  {
1548
  "epoch": 7.34,
1549
- "learning_rate": 0.0025410714285714286,
1550
- "loss": 7.3709,
1551
  "step": 257
1552
  },
1553
  {
1554
  "epoch": 7.37,
1555
- "learning_rate": 0.0025392857142857144,
1556
- "loss": 7.4582,
1557
  "step": 258
1558
  },
1559
  {
1560
  "epoch": 7.4,
1561
- "learning_rate": 0.0025375,
1562
- "loss": 7.4263,
1563
  "step": 259
1564
  },
1565
  {
1566
  "epoch": 7.43,
1567
- "learning_rate": 0.0025357142857142857,
1568
- "loss": 7.3134,
1569
  "step": 260
1570
  },
1571
  {
1572
  "epoch": 7.46,
1573
- "learning_rate": 0.0025339285714285715,
1574
- "loss": 7.3849,
1575
  "step": 261
1576
  },
1577
  {
1578
  "epoch": 7.49,
1579
- "learning_rate": 0.0025321428571428574,
1580
- "loss": 7.292,
1581
  "step": 262
1582
  },
1583
  {
1584
  "epoch": 7.51,
1585
- "learning_rate": 0.002530357142857143,
1586
- "loss": 7.343,
1587
  "step": 263
1588
  },
1589
  {
1590
  "epoch": 7.54,
1591
- "learning_rate": 0.0025285714285714286,
1592
- "loss": 7.3166,
1593
  "step": 264
1594
  },
1595
  {
1596
  "epoch": 7.57,
1597
- "learning_rate": 0.0025267857142857145,
1598
- "loss": 7.2676,
1599
  "step": 265
1600
  },
1601
  {
1602
  "epoch": 7.6,
1603
- "learning_rate": 0.002525,
1604
- "loss": 7.2955,
1605
  "step": 266
1606
  },
1607
  {
1608
  "epoch": 7.63,
1609
- "learning_rate": 0.0025232142857142857,
1610
- "loss": 7.3386,
1611
  "step": 267
1612
  },
1613
  {
1614
  "epoch": 7.66,
1615
- "learning_rate": 0.0025214285714285716,
1616
- "loss": 7.2682,
1617
  "step": 268
1618
  },
1619
  {
1620
  "epoch": 7.69,
1621
- "learning_rate": 0.0025196428571428574,
1622
- "loss": 7.2359,
1623
  "step": 269
1624
  },
1625
  {
1626
  "epoch": 7.71,
1627
- "learning_rate": 0.002517857142857143,
1628
- "loss": 7.1849,
1629
  "step": 270
1630
  },
1631
  {
1632
  "epoch": 7.74,
1633
- "learning_rate": 0.0025160714285714287,
1634
- "loss": 7.2421,
1635
  "step": 271
1636
  },
1637
  {
1638
  "epoch": 7.77,
1639
- "learning_rate": 0.0025142857142857146,
1640
- "loss": 7.2341,
1641
  "step": 272
1642
  },
1643
  {
1644
  "epoch": 7.8,
1645
- "learning_rate": 0.0025125,
1646
- "loss": 7.2901,
1647
  "step": 273
1648
  },
1649
  {
1650
  "epoch": 7.83,
1651
- "learning_rate": 0.002510714285714286,
1652
- "loss": 7.1931,
1653
  "step": 274
1654
  },
1655
  {
1656
  "epoch": 7.86,
1657
- "learning_rate": 0.0025089285714285717,
1658
- "loss": 7.1907,
1659
  "step": 275
1660
  },
1661
  {
1662
  "epoch": 7.89,
1663
- "learning_rate": 0.002507142857142857,
1664
- "loss": 7.2369,
1665
  "step": 276
1666
  },
1667
  {
1668
  "epoch": 7.91,
1669
- "learning_rate": 0.002505357142857143,
1670
- "loss": 7.1764,
1671
  "step": 277
1672
  },
1673
  {
1674
  "epoch": 7.94,
1675
- "learning_rate": 0.002503571428571429,
1676
- "loss": 7.1928,
1677
  "step": 278
1678
  },
1679
  {
1680
  "epoch": 7.97,
1681
- "learning_rate": 0.0025017857142857146,
1682
- "loss": 7.2114,
1683
  "step": 279
1684
  },
1685
  {
1686
  "epoch": 8.0,
1687
- "learning_rate": 0.0025,
1688
- "loss": 7.2307,
1689
  "step": 280
1690
  },
1691
  {
1692
  "epoch": 8.03,
1693
- "learning_rate": 0.002498214285714286,
1694
- "loss": 7.2477,
1695
  "step": 281
1696
  },
1697
  {
1698
  "epoch": 8.06,
1699
- "learning_rate": 0.0024964285714285718,
1700
- "loss": 7.2069,
1701
  "step": 282
1702
  },
1703
  {
1704
  "epoch": 8.09,
1705
- "learning_rate": 0.002494642857142857,
1706
- "loss": 7.1484,
1707
  "step": 283
1708
  },
1709
  {
1710
  "epoch": 8.11,
1711
- "learning_rate": 0.002492857142857143,
1712
- "loss": 7.1076,
1713
  "step": 284
1714
  },
1715
  {
1716
  "epoch": 8.14,
1717
- "learning_rate": 0.002491071428571429,
1718
- "loss": 7.0819,
1719
  "step": 285
1720
  },
1721
  {
1722
  "epoch": 8.17,
1723
- "learning_rate": 0.0024892857142857143,
1724
- "loss": 7.0708,
1725
  "step": 286
1726
  },
1727
  {
1728
  "epoch": 8.2,
1729
- "learning_rate": 0.0024875,
1730
- "loss": 7.0763,
1731
  "step": 287
1732
  },
1733
  {
1734
  "epoch": 8.23,
1735
- "learning_rate": 0.002485714285714286,
1736
- "loss": 7.0792,
1737
  "step": 288
1738
  },
1739
  {
1740
  "epoch": 8.26,
1741
- "learning_rate": 0.0024839285714285714,
1742
- "loss": 7.1397,
1743
  "step": 289
1744
  },
1745
  {
1746
  "epoch": 8.29,
1747
- "learning_rate": 0.0024821428571428572,
1748
- "loss": 7.0893,
1749
  "step": 290
1750
  },
1751
  {
1752
  "epoch": 8.31,
1753
- "learning_rate": 0.0024803571428571427,
1754
- "loss": 7.1263,
1755
  "step": 291
1756
  },
1757
  {
1758
  "epoch": 8.34,
1759
- "learning_rate": 0.0024785714285714285,
1760
- "loss": 7.0226,
1761
  "step": 292
1762
  },
1763
  {
1764
  "epoch": 8.37,
1765
- "learning_rate": 0.0024767857142857144,
1766
- "loss": 7.1017,
1767
  "step": 293
1768
  },
1769
  {
1770
  "epoch": 8.4,
1771
- "learning_rate": 0.0024749999999999998,
1772
- "loss": 7.0161,
1773
  "step": 294
1774
  },
1775
  {
1776
  "epoch": 8.43,
1777
- "learning_rate": 0.0024732142857142856,
1778
- "loss": 7.117,
1779
  "step": 295
1780
  },
1781
  {
1782
  "epoch": 8.46,
1783
- "learning_rate": 0.0024714285714285715,
1784
- "loss": 7.0234,
1785
  "step": 296
1786
  },
1787
  {
1788
  "epoch": 8.49,
1789
- "learning_rate": 0.002469642857142857,
1790
- "loss": 7.0663,
1791
  "step": 297
1792
  },
1793
  {
1794
  "epoch": 8.51,
1795
- "learning_rate": 0.0024678571428571427,
1796
- "loss": 7.1604,
1797
  "step": 298
1798
  },
1799
  {
1800
  "epoch": 8.54,
1801
- "learning_rate": 0.0024660714285714286,
1802
- "loss": 7.0543,
1803
  "step": 299
1804
  },
1805
  {
1806
  "epoch": 8.57,
1807
- "learning_rate": 0.0024642857142857144,
1808
- "loss": 7.0131,
1809
  "step": 300
1810
  },
1811
  {
1812
  "epoch": 8.6,
1813
- "learning_rate": 0.0024625,
1814
- "loss": 7.0294,
1815
  "step": 301
1816
  },
1817
  {
1818
  "epoch": 8.63,
1819
- "learning_rate": 0.0024607142857142857,
1820
- "loss": 7.0273,
1821
  "step": 302
1822
  },
1823
  {
1824
  "epoch": 8.66,
1825
- "learning_rate": 0.0024589285714285715,
1826
- "loss": 7.0074,
1827
  "step": 303
1828
  },
1829
  {
1830
  "epoch": 8.69,
1831
- "learning_rate": 0.002457142857142857,
1832
- "loss": 6.9747,
1833
  "step": 304
1834
  },
1835
  {
1836
  "epoch": 8.71,
1837
- "learning_rate": 0.002455357142857143,
1838
- "loss": 7.0617,
1839
  "step": 305
1840
  },
1841
  {
1842
  "epoch": 8.74,
1843
- "learning_rate": 0.0024535714285714287,
1844
- "loss": 7.0907,
1845
  "step": 306
1846
  },
1847
  {
1848
  "epoch": 8.77,
1849
- "learning_rate": 0.002451785714285714,
1850
- "loss": 7.0037,
1851
  "step": 307
1852
  },
1853
  {
1854
  "epoch": 8.8,
1855
- "learning_rate": 0.00245,
1856
- "loss": 6.969,
1857
  "step": 308
1858
  },
1859
  {
1860
  "epoch": 8.83,
1861
- "learning_rate": 0.0024482142857142858,
1862
- "loss": 7.0575,
1863
  "step": 309
1864
  },
1865
  {
1866
  "epoch": 8.86,
1867
- "learning_rate": 0.0024464285714285716,
1868
- "loss": 6.9494,
1869
  "step": 310
1870
  },
1871
  {
1872
  "epoch": 8.89,
1873
- "learning_rate": 0.002444642857142857,
1874
- "loss": 6.969,
1875
  "step": 311
1876
  },
1877
  {
1878
  "epoch": 8.91,
1879
- "learning_rate": 0.002442857142857143,
1880
- "loss": 6.8827,
1881
  "step": 312
1882
  },
1883
  {
1884
  "epoch": 8.94,
1885
- "learning_rate": 0.0024410714285714287,
1886
- "loss": 6.9058,
1887
  "step": 313
1888
  },
1889
  {
1890
  "epoch": 8.97,
1891
- "learning_rate": 0.002439285714285714,
1892
- "loss": 6.8808,
1893
  "step": 314
1894
  },
1895
  {
1896
  "epoch": 9.0,
1897
- "learning_rate": 0.0024375,
1898
- "loss": 6.9516,
1899
  "step": 315
1900
  },
1901
  {
1902
  "epoch": 9.03,
1903
- "learning_rate": 0.002435714285714286,
1904
- "loss": 6.9132,
1905
  "step": 316
1906
  },
1907
  {
1908
  "epoch": 9.06,
1909
- "learning_rate": 0.0024339285714285717,
1910
- "loss": 6.9058,
1911
  "step": 317
1912
  },
1913
  {
1914
  "epoch": 9.09,
1915
- "learning_rate": 0.002432142857142857,
1916
- "loss": 6.9332,
1917
  "step": 318
1918
  },
1919
  {
1920
  "epoch": 9.11,
1921
- "learning_rate": 0.002430357142857143,
1922
- "loss": 6.9757,
1923
  "step": 319
1924
  },
1925
  {
1926
  "epoch": 9.14,
1927
- "learning_rate": 0.002428571428571429,
1928
- "loss": 6.8261,
1929
  "step": 320
1930
  },
1931
  {
1932
  "epoch": 9.17,
1933
- "learning_rate": 0.0024267857142857142,
1934
- "loss": 6.8571,
1935
  "step": 321
1936
  },
1937
  {
1938
  "epoch": 9.2,
1939
- "learning_rate": 0.002425,
1940
- "loss": 6.8435,
1941
  "step": 322
1942
  },
1943
  {
1944
  "epoch": 9.23,
1945
- "learning_rate": 0.002423214285714286,
1946
- "loss": 6.9033,
1947
  "step": 323
1948
  },
1949
  {
1950
  "epoch": 9.26,
1951
- "learning_rate": 0.0024214285714285713,
1952
- "loss": 6.8042,
1953
  "step": 324
1954
  },
1955
  {
1956
  "epoch": 9.29,
1957
- "learning_rate": 0.002419642857142857,
1958
- "loss": 6.8732,
1959
  "step": 325
1960
  },
1961
  {
1962
  "epoch": 9.31,
1963
- "learning_rate": 0.002417857142857143,
1964
- "loss": 6.752,
1965
  "step": 326
1966
  },
1967
  {
1968
  "epoch": 9.34,
1969
- "learning_rate": 0.002416071428571429,
1970
- "loss": 6.8016,
1971
  "step": 327
1972
  },
1973
  {
1974
  "epoch": 9.37,
1975
- "learning_rate": 0.0024142857142857143,
1976
- "loss": 6.8879,
1977
  "step": 328
1978
  },
1979
  {
1980
  "epoch": 9.4,
1981
- "learning_rate": 0.0024125,
1982
- "loss": 6.7643,
1983
  "step": 329
1984
  },
1985
  {
1986
  "epoch": 9.43,
1987
- "learning_rate": 0.002410714285714286,
1988
- "loss": 6.7084,
1989
  "step": 330
1990
  },
1991
  {
1992
  "epoch": 9.46,
1993
- "learning_rate": 0.0024089285714285714,
1994
- "loss": 6.8049,
1995
  "step": 331
1996
  },
1997
  {
1998
  "epoch": 9.49,
1999
- "learning_rate": 0.0024071428571428573,
2000
- "loss": 6.7925,
2001
  "step": 332
2002
  },
2003
  {
2004
  "epoch": 9.51,
2005
- "learning_rate": 0.002405357142857143,
2006
- "loss": 6.7289,
2007
  "step": 333
2008
  },
2009
  {
2010
  "epoch": 9.54,
2011
- "learning_rate": 0.0024035714285714285,
2012
- "loss": 6.7439,
2013
  "step": 334
2014
  },
2015
  {
2016
  "epoch": 9.57,
2017
- "learning_rate": 0.0024017857142857144,
2018
- "loss": 6.7119,
2019
  "step": 335
2020
  },
2021
  {
2022
  "epoch": 9.6,
2023
- "learning_rate": 0.0024000000000000002,
2024
- "loss": 6.7251,
2025
  "step": 336
2026
  },
2027
  {
2028
  "epoch": 9.63,
2029
- "learning_rate": 0.002398214285714286,
2030
- "loss": 6.6659,
2031
  "step": 337
2032
  },
2033
  {
2034
  "epoch": 9.66,
2035
- "learning_rate": 0.0023964285714285715,
2036
- "loss": 6.7422,
2037
  "step": 338
2038
  },
2039
  {
2040
  "epoch": 9.69,
2041
- "learning_rate": 0.0023946428571428573,
2042
- "loss": 6.7852,
2043
  "step": 339
2044
  },
2045
  {
2046
  "epoch": 9.71,
2047
- "learning_rate": 0.002392857142857143,
2048
- "loss": 6.6828,
2049
  "step": 340
2050
  },
2051
  {
2052
  "epoch": 9.74,
2053
- "learning_rate": 0.0023910714285714286,
2054
- "loss": 6.686,
2055
  "step": 341
2056
  },
2057
  {
2058
  "epoch": 9.77,
2059
- "learning_rate": 0.002389285714285714,
2060
- "loss": 6.7326,
2061
  "step": 342
2062
  },
2063
  {
2064
  "epoch": 9.8,
2065
- "learning_rate": 0.0023875,
2066
- "loss": 6.5601,
2067
  "step": 343
2068
  },
2069
  {
2070
  "epoch": 9.83,
2071
- "learning_rate": 0.0023857142857142857,
2072
- "loss": 6.6646,
2073
  "step": 344
2074
  },
2075
  {
2076
  "epoch": 9.86,
2077
- "learning_rate": 0.002383928571428571,
2078
- "loss": 6.5673,
2079
  "step": 345
2080
  },
2081
  {
2082
  "epoch": 9.89,
2083
- "learning_rate": 0.002382142857142857,
2084
- "loss": 6.6227,
2085
  "step": 346
2086
  },
2087
  {
2088
  "epoch": 9.91,
2089
- "learning_rate": 0.002380357142857143,
2090
- "loss": 6.5526,
2091
  "step": 347
2092
  },
2093
  {
2094
  "epoch": 9.94,
2095
- "learning_rate": 0.0023785714285714287,
2096
- "loss": 6.6842,
2097
  "step": 348
2098
  },
2099
  {
2100
  "epoch": 9.97,
2101
- "learning_rate": 0.002376785714285714,
2102
- "loss": 6.6211,
2103
  "step": 349
2104
  },
2105
  {
2106
  "epoch": 10.0,
2107
- "learning_rate": 0.002375,
2108
- "loss": 6.6952,
2109
  "step": 350
2110
  },
2111
  {
2112
  "epoch": 10.03,
2113
- "learning_rate": 0.002373214285714286,
2114
- "loss": 6.5324,
2115
  "step": 351
2116
  },
2117
  {
2118
  "epoch": 10.06,
2119
- "learning_rate": 0.002371428571428571,
2120
- "loss": 6.5792,
2121
  "step": 352
2122
  },
2123
  {
2124
  "epoch": 10.09,
2125
- "learning_rate": 0.002369642857142857,
2126
- "loss": 6.5276,
2127
  "step": 353
2128
  },
2129
  {
2130
  "epoch": 10.11,
2131
- "learning_rate": 0.002367857142857143,
2132
- "loss": 6.5634,
2133
  "step": 354
2134
  },
2135
  {
2136
  "epoch": 10.14,
2137
- "learning_rate": 0.0023660714285714288,
2138
- "loss": 6.5385,
2139
  "step": 355
2140
  },
2141
  {
2142
  "epoch": 10.17,
2143
- "learning_rate": 0.002364285714285714,
2144
- "loss": 6.4516,
2145
  "step": 356
2146
  },
2147
  {
2148
  "epoch": 10.2,
2149
- "learning_rate": 0.0023625,
2150
- "loss": 6.5641,
2151
  "step": 357
2152
  },
2153
  {
2154
  "epoch": 10.23,
2155
- "learning_rate": 0.002360714285714286,
2156
- "loss": 6.5001,
2157
  "step": 358
2158
  },
2159
  {
2160
  "epoch": 10.26,
2161
- "learning_rate": 0.0023589285714285713,
2162
- "loss": 6.4846,
2163
  "step": 359
2164
  },
2165
  {
2166
  "epoch": 10.29,
2167
- "learning_rate": 0.002357142857142857,
2168
- "loss": 6.4638,
2169
  "step": 360
2170
  },
2171
  {
2172
  "epoch": 10.31,
2173
- "learning_rate": 0.002355357142857143,
2174
- "loss": 6.5217,
2175
  "step": 361
2176
  },
2177
  {
2178
  "epoch": 10.34,
2179
- "learning_rate": 0.0023535714285714284,
2180
- "loss": 6.5444,
2181
  "step": 362
2182
  },
2183
  {
2184
  "epoch": 10.37,
2185
- "learning_rate": 0.0023517857142857142,
2186
- "loss": 6.496,
2187
  "step": 363
2188
  },
2189
  {
2190
  "epoch": 10.4,
2191
- "learning_rate": 0.00235,
2192
- "loss": 6.5345,
2193
  "step": 364
2194
  },
2195
  {
2196
  "epoch": 10.43,
2197
- "learning_rate": 0.002348214285714286,
2198
- "loss": 6.4732,
2199
  "step": 365
2200
  },
2201
  {
2202
  "epoch": 10.46,
2203
- "learning_rate": 0.0023464285714285714,
2204
- "loss": 6.4765,
2205
  "step": 366
2206
  },
2207
  {
2208
  "epoch": 10.49,
2209
- "learning_rate": 0.002344642857142857,
2210
- "loss": 6.3881,
2211
  "step": 367
2212
  },
2213
  {
2214
  "epoch": 10.51,
2215
- "learning_rate": 0.002342857142857143,
2216
- "loss": 6.4908,
2217
  "step": 368
2218
  },
2219
  {
2220
  "epoch": 10.54,
2221
- "learning_rate": 0.0023410714285714285,
2222
- "loss": 6.4593,
2223
  "step": 369
2224
  },
2225
  {
2226
  "epoch": 10.57,
2227
- "learning_rate": 0.0023392857142857143,
2228
- "loss": 6.5006,
2229
  "step": 370
2230
  },
2231
  {
2232
  "epoch": 10.6,
2233
- "learning_rate": 0.0023375,
2234
- "loss": 6.4495,
2235
  "step": 371
2236
  },
2237
  {
2238
  "epoch": 10.63,
2239
- "learning_rate": 0.0023357142857142856,
2240
- "loss": 6.3569,
2241
  "step": 372
2242
  },
2243
  {
2244
  "epoch": 10.66,
2245
- "learning_rate": 0.0023339285714285714,
2246
- "loss": 6.3592,
2247
  "step": 373
2248
  },
2249
  {
2250
  "epoch": 10.69,
2251
- "learning_rate": 0.0023321428571428573,
2252
- "loss": 6.3258,
2253
  "step": 374
2254
  },
2255
  {
2256
  "epoch": 10.71,
2257
- "learning_rate": 0.002330357142857143,
2258
- "loss": 6.3216,
2259
  "step": 375
2260
  },
2261
  {
2262
  "epoch": 10.74,
2263
- "learning_rate": 0.0023285714285714285,
2264
- "loss": 6.4878,
2265
  "step": 376
2266
  },
2267
  {
2268
  "epoch": 10.77,
2269
- "learning_rate": 0.0023267857142857144,
2270
- "loss": 6.3412,
2271
  "step": 377
2272
  },
2273
  {
2274
  "epoch": 10.8,
2275
- "learning_rate": 0.0023250000000000002,
2276
- "loss": 6.3925,
2277
  "step": 378
2278
  },
2279
  {
2280
  "epoch": 10.83,
2281
- "learning_rate": 0.0023232142857142857,
2282
- "loss": 6.275,
2283
  "step": 379
2284
  },
2285
  {
2286
  "epoch": 10.86,
2287
- "learning_rate": 0.0023214285714285715,
2288
- "loss": 6.3575,
2289
  "step": 380
2290
  },
2291
  {
2292
  "epoch": 10.89,
2293
- "learning_rate": 0.0023196428571428574,
2294
- "loss": 6.3259,
2295
  "step": 381
2296
  },
2297
  {
2298
  "epoch": 10.91,
2299
- "learning_rate": 0.002317857142857143,
2300
- "loss": 6.315,
2301
  "step": 382
2302
  },
2303
  {
2304
  "epoch": 10.94,
2305
- "learning_rate": 0.0023160714285714286,
2306
- "loss": 6.277,
2307
  "step": 383
2308
  },
2309
  {
2310
  "epoch": 10.97,
2311
- "learning_rate": 0.0023142857142857145,
2312
- "loss": 6.3259,
2313
  "step": 384
2314
  },
2315
  {
2316
  "epoch": 11.0,
2317
- "learning_rate": 0.0023125000000000003,
2318
- "loss": 6.3747,
2319
  "step": 385
2320
  },
2321
  {
2322
  "epoch": 11.03,
2323
- "learning_rate": 0.0023107142857142857,
2324
- "loss": 6.3646,
2325
  "step": 386
2326
  },
2327
  {
2328
  "epoch": 11.06,
2329
- "learning_rate": 0.0023089285714285716,
2330
- "loss": 6.3687,
2331
  "step": 387
2332
  },
2333
  {
2334
  "epoch": 11.09,
2335
- "learning_rate": 0.0023071428571428574,
2336
- "loss": 6.3374,
2337
  "step": 388
2338
  },
2339
  {
2340
  "epoch": 11.11,
2341
- "learning_rate": 0.002305357142857143,
2342
- "loss": 6.3129,
2343
  "step": 389
2344
  },
2345
  {
2346
  "epoch": 11.14,
2347
- "learning_rate": 0.0023035714285714287,
2348
- "loss": 6.3425,
2349
  "step": 390
2350
  },
2351
  {
2352
  "epoch": 11.17,
2353
- "learning_rate": 0.0023017857142857145,
2354
- "loss": 6.2122,
2355
  "step": 391
2356
  },
2357
  {
2358
  "epoch": 11.2,
2359
- "learning_rate": 0.0023000000000000004,
2360
- "loss": 6.2768,
2361
  "step": 392
2362
  },
2363
  {
2364
  "epoch": 11.23,
2365
- "learning_rate": 0.002298214285714286,
2366
- "loss": 6.2853,
2367
  "step": 393
2368
  },
2369
  {
2370
  "epoch": 11.26,
2371
- "learning_rate": 0.0022964285714285712,
2372
- "loss": 6.3215,
2373
  "step": 394
2374
  },
2375
  {
2376
  "epoch": 11.29,
2377
- "learning_rate": 0.002294642857142857,
2378
- "loss": 6.3244,
2379
  "step": 395
2380
  },
2381
  {
2382
  "epoch": 11.31,
2383
- "learning_rate": 0.002292857142857143,
2384
- "loss": 6.2399,
2385
  "step": 396
2386
  },
2387
  {
2388
  "epoch": 11.34,
2389
- "learning_rate": 0.0022910714285714283,
2390
- "loss": 6.2457,
2391
  "step": 397
2392
  },
2393
  {
2394
  "epoch": 11.37,
2395
- "learning_rate": 0.002289285714285714,
2396
- "loss": 6.2018,
2397
  "step": 398
2398
  },
2399
  {
2400
  "epoch": 11.4,
2401
- "learning_rate": 0.0022875,
2402
- "loss": 6.2101,
2403
  "step": 399
2404
  },
2405
  {
2406
  "epoch": 11.43,
2407
- "learning_rate": 0.0022857142857142855,
2408
- "loss": 6.2257,
2409
  "step": 400
2410
- },
2411
- {
2412
- "epoch": 11.46,
2413
- "learning_rate": 0.0022839285714285713,
2414
- "loss": 6.3029,
2415
- "step": 401
2416
- },
2417
- {
2418
- "epoch": 11.49,
2419
- "learning_rate": 0.002282142857142857,
2420
- "loss": 6.2312,
2421
- "step": 402
2422
- },
2423
- {
2424
- "epoch": 11.51,
2425
- "learning_rate": 0.002280357142857143,
2426
- "loss": 6.203,
2427
- "step": 403
2428
- },
2429
- {
2430
- "epoch": 11.54,
2431
- "learning_rate": 0.0022785714285714284,
2432
- "loss": 6.2881,
2433
- "step": 404
2434
- },
2435
- {
2436
- "epoch": 11.57,
2437
- "learning_rate": 0.0022767857142857143,
2438
- "loss": 6.3466,
2439
- "step": 405
2440
- },
2441
- {
2442
- "epoch": 11.6,
2443
- "learning_rate": 0.002275,
2444
- "loss": 6.1908,
2445
- "step": 406
2446
- },
2447
- {
2448
- "epoch": 11.63,
2449
- "learning_rate": 0.0022732142857142855,
2450
- "loss": 6.196,
2451
- "step": 407
2452
- },
2453
- {
2454
- "epoch": 11.66,
2455
- "learning_rate": 0.0022714285714285714,
2456
- "loss": 6.1726,
2457
- "step": 408
2458
- },
2459
- {
2460
- "epoch": 11.69,
2461
- "learning_rate": 0.0022696428571428572,
2462
- "loss": 6.1207,
2463
- "step": 409
2464
- },
2465
- {
2466
- "epoch": 11.71,
2467
- "learning_rate": 0.0022678571428571426,
2468
- "loss": 6.2382,
2469
- "step": 410
2470
- },
2471
- {
2472
- "epoch": 11.74,
2473
- "learning_rate": 0.0022660714285714285,
2474
- "loss": 6.1757,
2475
- "step": 411
2476
- },
2477
- {
2478
- "epoch": 11.77,
2479
- "learning_rate": 0.0022642857142857143,
2480
- "loss": 6.1153,
2481
- "step": 412
2482
- },
2483
- {
2484
- "epoch": 11.8,
2485
- "learning_rate": 0.0022625,
2486
- "loss": 6.1261,
2487
- "step": 413
2488
- },
2489
- {
2490
- "epoch": 11.83,
2491
- "learning_rate": 0.0022607142857142856,
2492
- "loss": 6.0762,
2493
- "step": 414
2494
- },
2495
- {
2496
- "epoch": 11.86,
2497
- "learning_rate": 0.0022589285714285715,
2498
- "loss": 6.1386,
2499
- "step": 415
2500
- },
2501
- {
2502
- "epoch": 11.89,
2503
- "learning_rate": 0.0022571428571428573,
2504
- "loss": 6.1204,
2505
- "step": 416
2506
- },
2507
- {
2508
- "epoch": 11.91,
2509
- "learning_rate": 0.0022553571428571427,
2510
- "loss": 6.1059,
2511
- "step": 417
2512
- },
2513
- {
2514
- "epoch": 11.94,
2515
- "learning_rate": 0.0022535714285714286,
2516
- "loss": 6.0591,
2517
- "step": 418
2518
- },
2519
- {
2520
- "epoch": 11.97,
2521
- "learning_rate": 0.0022517857142857144,
2522
- "loss": 6.1713,
2523
- "step": 419
2524
- },
2525
- {
2526
- "epoch": 12.0,
2527
- "learning_rate": 0.0022500000000000003,
2528
- "loss": 6.2039,
2529
- "step": 420
2530
- },
2531
- {
2532
- "epoch": 12.03,
2533
- "learning_rate": 0.0022482142857142857,
2534
- "loss": 6.0168,
2535
- "step": 421
2536
- },
2537
- {
2538
- "epoch": 12.06,
2539
- "learning_rate": 0.0022464285714285715,
2540
- "loss": 6.0206,
2541
- "step": 422
2542
- },
2543
- {
2544
- "epoch": 12.09,
2545
- "learning_rate": 0.0022446428571428574,
2546
- "loss": 6.0642,
2547
- "step": 423
2548
- },
2549
- {
2550
- "epoch": 12.11,
2551
- "learning_rate": 0.002242857142857143,
2552
- "loss": 6.0665,
2553
- "step": 424
2554
- },
2555
- {
2556
- "epoch": 12.14,
2557
- "learning_rate": 0.0022410714285714286,
2558
- "loss": 5.9766,
2559
- "step": 425
2560
- },
2561
- {
2562
- "epoch": 12.17,
2563
- "learning_rate": 0.0022392857142857145,
2564
- "loss": 6.2167,
2565
- "step": 426
2566
- },
2567
- {
2568
- "epoch": 12.2,
2569
- "learning_rate": 0.0022375,
2570
- "loss": 6.002,
2571
- "step": 427
2572
- },
2573
- {
2574
- "epoch": 12.23,
2575
- "learning_rate": 0.0022357142857142858,
2576
- "loss": 6.0266,
2577
- "step": 428
2578
- },
2579
- {
2580
- "epoch": 12.26,
2581
- "learning_rate": 0.0022339285714285716,
2582
- "loss": 5.9339,
2583
- "step": 429
2584
- },
2585
- {
2586
- "epoch": 12.29,
2587
- "learning_rate": 0.0022321428571428575,
2588
- "loss": 6.1066,
2589
- "step": 430
2590
- },
2591
- {
2592
- "epoch": 12.31,
2593
- "learning_rate": 0.002230357142857143,
2594
- "loss": 5.9262,
2595
- "step": 431
2596
- },
2597
- {
2598
- "epoch": 12.34,
2599
- "learning_rate": 0.0022285714285714287,
2600
- "loss": 6.0696,
2601
- "step": 432
2602
- },
2603
- {
2604
- "epoch": 12.37,
2605
- "learning_rate": 0.0022267857142857146,
2606
- "loss": 5.9181,
2607
- "step": 433
2608
- },
2609
- {
2610
- "epoch": 12.4,
2611
- "learning_rate": 0.002225,
2612
- "loss": 6.0291,
2613
- "step": 434
2614
- },
2615
- {
2616
- "epoch": 12.43,
2617
- "learning_rate": 0.002223214285714286,
2618
- "loss": 5.9493,
2619
- "step": 435
2620
- },
2621
- {
2622
- "epoch": 12.46,
2623
- "learning_rate": 0.0022214285714285717,
2624
- "loss": 5.9639,
2625
- "step": 436
2626
- },
2627
- {
2628
- "epoch": 12.49,
2629
- "learning_rate": 0.002219642857142857,
2630
- "loss": 6.0303,
2631
- "step": 437
2632
- },
2633
- {
2634
- "epoch": 12.51,
2635
- "learning_rate": 0.002217857142857143,
2636
- "loss": 6.0157,
2637
- "step": 438
2638
- },
2639
- {
2640
- "epoch": 12.54,
2641
- "learning_rate": 0.002216071428571429,
2642
- "loss": 5.9309,
2643
- "step": 439
2644
- },
2645
- {
2646
- "epoch": 12.57,
2647
- "learning_rate": 0.0022142857142857146,
2648
- "loss": 5.9554,
2649
- "step": 440
2650
- },
2651
- {
2652
- "epoch": 12.6,
2653
- "learning_rate": 0.0022125,
2654
- "loss": 5.9761,
2655
- "step": 441
2656
- },
2657
- {
2658
- "epoch": 12.63,
2659
- "learning_rate": 0.002210714285714286,
2660
- "loss": 5.9042,
2661
- "step": 442
2662
- },
2663
- {
2664
- "epoch": 12.66,
2665
- "learning_rate": 0.0022089285714285718,
2666
- "loss": 6.0009,
2667
- "step": 443
2668
- },
2669
- {
2670
- "epoch": 12.69,
2671
- "learning_rate": 0.002207142857142857,
2672
- "loss": 5.9199,
2673
- "step": 444
2674
- },
2675
- {
2676
- "epoch": 12.71,
2677
- "learning_rate": 0.002205357142857143,
2678
- "loss": 5.9472,
2679
- "step": 445
2680
- },
2681
- {
2682
- "epoch": 12.74,
2683
- "learning_rate": 0.002203571428571429,
2684
- "loss": 6.0478,
2685
- "step": 446
2686
- },
2687
- {
2688
- "epoch": 12.77,
2689
- "learning_rate": 0.0022017857142857143,
2690
- "loss": 6.0131,
2691
- "step": 447
2692
- },
2693
- {
2694
- "epoch": 12.8,
2695
- "learning_rate": 0.0021999999999999997,
2696
- "loss": 5.9161,
2697
- "step": 448
2698
- },
2699
- {
2700
- "epoch": 12.83,
2701
- "learning_rate": 0.0021982142857142855,
2702
- "loss": 5.935,
2703
- "step": 449
2704
- },
2705
- {
2706
- "epoch": 12.86,
2707
- "learning_rate": 0.0021964285714285714,
2708
- "loss": 5.9035,
2709
- "step": 450
2710
- },
2711
- {
2712
- "epoch": 12.89,
2713
- "learning_rate": 0.0021946428571428572,
2714
- "loss": 5.9422,
2715
- "step": 451
2716
- },
2717
- {
2718
- "epoch": 12.91,
2719
- "learning_rate": 0.0021928571428571427,
2720
- "loss": 6.0135,
2721
- "step": 452
2722
- },
2723
- {
2724
- "epoch": 12.94,
2725
- "learning_rate": 0.0021910714285714285,
2726
- "loss": 5.9757,
2727
- "step": 453
2728
- },
2729
- {
2730
- "epoch": 12.97,
2731
- "learning_rate": 0.0021892857142857144,
2732
- "loss": 5.942,
2733
- "step": 454
2734
- },
2735
- {
2736
- "epoch": 13.0,
2737
- "learning_rate": 0.0021874999999999998,
2738
- "loss": 5.943,
2739
- "step": 455
2740
- },
2741
- {
2742
- "epoch": 13.03,
2743
- "learning_rate": 0.0021857142857142856,
2744
- "loss": 5.8982,
2745
- "step": 456
2746
- },
2747
- {
2748
- "epoch": 13.06,
2749
- "learning_rate": 0.0021839285714285715,
2750
- "loss": 5.9874,
2751
- "step": 457
2752
- },
2753
- {
2754
- "epoch": 13.09,
2755
- "learning_rate": 0.002182142857142857,
2756
- "loss": 5.8677,
2757
- "step": 458
2758
- },
2759
- {
2760
- "epoch": 13.11,
2761
- "learning_rate": 0.0021803571428571427,
2762
- "loss": 5.8782,
2763
- "step": 459
2764
- },
2765
- {
2766
- "epoch": 13.14,
2767
- "learning_rate": 0.0021785714285714286,
2768
- "loss": 5.787,
2769
- "step": 460
2770
- },
2771
- {
2772
- "epoch": 13.17,
2773
- "learning_rate": 0.0021767857142857144,
2774
- "loss": 5.8339,
2775
- "step": 461
2776
- },
2777
- {
2778
- "epoch": 13.2,
2779
- "learning_rate": 0.002175,
2780
- "loss": 5.8303,
2781
- "step": 462
2782
- },
2783
- {
2784
- "epoch": 13.23,
2785
- "learning_rate": 0.0021732142857142857,
2786
- "loss": 5.8187,
2787
- "step": 463
2788
- },
2789
- {
2790
- "epoch": 13.26,
2791
- "learning_rate": 0.0021714285714285715,
2792
- "loss": 5.7448,
2793
- "step": 464
2794
- },
2795
- {
2796
- "epoch": 13.29,
2797
- "learning_rate": 0.002169642857142857,
2798
- "loss": 5.8681,
2799
- "step": 465
2800
- },
2801
- {
2802
- "epoch": 13.31,
2803
- "learning_rate": 0.002167857142857143,
2804
- "loss": 5.8039,
2805
- "step": 466
2806
- },
2807
- {
2808
- "epoch": 13.34,
2809
- "learning_rate": 0.0021660714285714287,
2810
- "loss": 5.8511,
2811
- "step": 467
2812
- },
2813
- {
2814
- "epoch": 13.37,
2815
- "learning_rate": 0.0021642857142857145,
2816
- "loss": 5.8184,
2817
- "step": 468
2818
- },
2819
- {
2820
- "epoch": 13.4,
2821
- "learning_rate": 0.0021625,
2822
- "loss": 5.7656,
2823
- "step": 469
2824
- },
2825
- {
2826
- "epoch": 13.43,
2827
- "learning_rate": 0.0021607142857142858,
2828
- "loss": 5.8613,
2829
- "step": 470
2830
- },
2831
- {
2832
- "epoch": 13.46,
2833
- "learning_rate": 0.0021589285714285716,
2834
- "loss": 5.849,
2835
- "step": 471
2836
- },
2837
- {
2838
- "epoch": 13.49,
2839
- "learning_rate": 0.002157142857142857,
2840
- "loss": 5.8011,
2841
- "step": 472
2842
- },
2843
- {
2844
- "epoch": 13.51,
2845
- "learning_rate": 0.002155357142857143,
2846
- "loss": 5.7813,
2847
- "step": 473
2848
- },
2849
- {
2850
- "epoch": 13.54,
2851
- "learning_rate": 0.0021535714285714287,
2852
- "loss": 5.8186,
2853
- "step": 474
2854
- },
2855
- {
2856
- "epoch": 13.57,
2857
- "learning_rate": 0.002151785714285714,
2858
- "loss": 5.8303,
2859
- "step": 475
2860
- },
2861
- {
2862
- "epoch": 13.6,
2863
- "learning_rate": 0.00215,
2864
- "loss": 5.7879,
2865
- "step": 476
2866
- },
2867
- {
2868
- "epoch": 13.63,
2869
- "learning_rate": 0.002148214285714286,
2870
- "loss": 5.6829,
2871
- "step": 477
2872
- },
2873
- {
2874
- "epoch": 13.66,
2875
- "learning_rate": 0.0021464285714285717,
2876
- "loss": 5.7869,
2877
- "step": 478
2878
- },
2879
- {
2880
- "epoch": 13.69,
2881
- "learning_rate": 0.002144642857142857,
2882
- "loss": 5.6489,
2883
- "step": 479
2884
- },
2885
- {
2886
- "epoch": 13.71,
2887
- "learning_rate": 0.002142857142857143,
2888
- "loss": 5.8708,
2889
- "step": 480
2890
- },
2891
- {
2892
- "epoch": 13.74,
2893
- "learning_rate": 0.002141071428571429,
2894
- "loss": 5.7791,
2895
- "step": 481
2896
- },
2897
- {
2898
- "epoch": 13.77,
2899
- "learning_rate": 0.0021392857142857142,
2900
- "loss": 5.7497,
2901
- "step": 482
2902
- },
2903
- {
2904
- "epoch": 13.8,
2905
- "learning_rate": 0.0021375,
2906
- "loss": 5.827,
2907
- "step": 483
2908
- },
2909
- {
2910
- "epoch": 13.83,
2911
- "learning_rate": 0.002135714285714286,
2912
- "loss": 5.7286,
2913
- "step": 484
2914
- },
2915
- {
2916
- "epoch": 13.86,
2917
- "learning_rate": 0.0021339285714285713,
2918
- "loss": 5.8183,
2919
- "step": 485
2920
- },
2921
- {
2922
- "epoch": 13.89,
2923
- "learning_rate": 0.002132142857142857,
2924
- "loss": 5.7191,
2925
- "step": 486
2926
- },
2927
- {
2928
- "epoch": 13.91,
2929
- "learning_rate": 0.002130357142857143,
2930
- "loss": 5.7647,
2931
- "step": 487
2932
- },
2933
- {
2934
- "epoch": 13.94,
2935
- "learning_rate": 0.002128571428571429,
2936
- "loss": 5.799,
2937
- "step": 488
2938
- },
2939
- {
2940
- "epoch": 13.97,
2941
- "learning_rate": 0.0021267857142857143,
2942
- "loss": 5.7583,
2943
- "step": 489
2944
- },
2945
- {
2946
- "epoch": 14.0,
2947
- "learning_rate": 0.002125,
2948
- "loss": 5.6326,
2949
- "step": 490
2950
- },
2951
- {
2952
- "epoch": 14.03,
2953
- "learning_rate": 0.002123214285714286,
2954
- "loss": 5.614,
2955
- "step": 491
2956
- },
2957
- {
2958
- "epoch": 14.06,
2959
- "learning_rate": 0.0021214285714285714,
2960
- "loss": 5.7278,
2961
- "step": 492
2962
- },
2963
- {
2964
- "epoch": 14.09,
2965
- "learning_rate": 0.0021196428571428573,
2966
- "loss": 5.6661,
2967
- "step": 493
2968
- },
2969
- {
2970
- "epoch": 14.11,
2971
- "learning_rate": 0.002117857142857143,
2972
- "loss": 5.6822,
2973
- "step": 494
2974
- },
2975
- {
2976
- "epoch": 14.14,
2977
- "learning_rate": 0.002116071428571429,
2978
- "loss": 5.7356,
2979
- "step": 495
2980
- },
2981
- {
2982
- "epoch": 14.17,
2983
- "learning_rate": 0.0021142857142857144,
2984
- "loss": 5.6169,
2985
- "step": 496
2986
- },
2987
- {
2988
- "epoch": 14.2,
2989
- "learning_rate": 0.0021125000000000002,
2990
- "loss": 5.7203,
2991
- "step": 497
2992
- },
2993
- {
2994
- "epoch": 14.23,
2995
- "learning_rate": 0.002110714285714286,
2996
- "loss": 5.6377,
2997
- "step": 498
2998
- },
2999
- {
3000
- "epoch": 14.26,
3001
- "learning_rate": 0.0021089285714285715,
3002
- "loss": 5.6836,
3003
- "step": 499
3004
- },
3005
- {
3006
- "epoch": 14.29,
3007
- "learning_rate": 0.002107142857142857,
3008
- "loss": 5.6531,
3009
- "step": 500
3010
  }
3011
  ],
3012
  "logging_steps": 1,
3013
  "max_steps": 1680,
3014
  "num_train_epochs": 48,
3015
  "save_steps": 100,
3016
- "total_flos": 2.884638740186112e+17,
3017
  "trial_name": null,
3018
  "trial_params": null
3019
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.428571428571429,
5
  "eval_steps": 500,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03,
13
+ "learning_rate": 0.000999404761904762,
14
  "loss": 3.0944,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.06,
19
+ "learning_rate": 0.0009988095238095238,
20
+ "loss": 2.7802,
21
  "step": 2
22
  },
23
  {
24
  "epoch": 0.09,
25
+ "learning_rate": 0.0009982142857142857,
26
+ "loss": 2.7798,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.11,
31
+ "learning_rate": 0.0009976190476190477,
32
+ "loss": 2.6729,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.14,
37
+ "learning_rate": 0.0009970238095238096,
38
+ "loss": 2.7544,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.17,
43
+ "learning_rate": 0.0009964285714285715,
44
+ "loss": 2.7115,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.2,
49
+ "learning_rate": 0.0009958333333333334,
50
+ "loss": 2.7491,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.23,
55
+ "learning_rate": 0.0009952380952380953,
56
+ "loss": 2.681,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.26,
61
+ "learning_rate": 0.0009946428571428571,
62
+ "loss": 2.7396,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 0.29,
67
+ "learning_rate": 0.000994047619047619,
68
+ "loss": 2.6911,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 0.31,
73
+ "learning_rate": 0.0009934523809523809,
74
+ "loss": 2.6725,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 0.34,
79
+ "learning_rate": 0.000992857142857143,
80
+ "loss": 2.6951,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 0.37,
85
+ "learning_rate": 0.0009922619047619049,
86
+ "loss": 2.6741,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 0.4,
91
+ "learning_rate": 0.0009916666666666667,
92
+ "loss": 2.6623,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 0.43,
97
+ "learning_rate": 0.0009910714285714286,
98
+ "loss": 2.6268,
99
  "step": 15
100
  },
101
  {
102
  "epoch": 0.46,
103
+ "learning_rate": 0.0009904761904761905,
104
+ "loss": 2.6727,
105
  "step": 16
106
  },
107
  {
108
  "epoch": 0.49,
109
+ "learning_rate": 0.0009898809523809524,
110
+ "loss": 2.6174,
111
  "step": 17
112
  },
113
  {
114
  "epoch": 0.51,
115
+ "learning_rate": 0.0009892857142857142,
116
+ "loss": 2.6544,
117
  "step": 18
118
  },
119
  {
120
  "epoch": 0.54,
121
+ "learning_rate": 0.0009886904761904763,
122
+ "loss": 2.5404,
123
  "step": 19
124
  },
125
  {
126
  "epoch": 0.57,
127
+ "learning_rate": 0.0009880952380952382,
128
+ "loss": 2.6286,
129
  "step": 20
130
  },
131
  {
132
  "epoch": 0.6,
133
+ "learning_rate": 0.0009875,
134
+ "loss": 2.6824,
135
  "step": 21
136
  },
137
  {
138
  "epoch": 0.63,
139
+ "learning_rate": 0.000986904761904762,
140
+ "loss": 2.6012,
141
  "step": 22
142
  },
143
  {
144
  "epoch": 0.66,
145
+ "learning_rate": 0.0009863095238095239,
146
+ "loss": 2.6446,
147
  "step": 23
148
  },
149
  {
150
  "epoch": 0.69,
151
+ "learning_rate": 0.0009857142857142857,
152
+ "loss": 2.6437,
153
  "step": 24
154
  },
155
  {
156
  "epoch": 0.71,
157
+ "learning_rate": 0.0009851190476190476,
158
+ "loss": 2.6596,
159
  "step": 25
160
  },
161
  {
162
  "epoch": 0.74,
163
+ "learning_rate": 0.0009845238095238097,
164
+ "loss": 2.6554,
165
  "step": 26
166
  },
167
  {
168
  "epoch": 0.77,
169
+ "learning_rate": 0.0009839285714285714,
170
+ "loss": 2.6064,
171
  "step": 27
172
  },
173
  {
174
  "epoch": 0.8,
175
+ "learning_rate": 0.0009833333333333332,
176
+ "loss": 2.5994,
177
  "step": 28
178
  },
179
  {
180
  "epoch": 0.83,
181
+ "learning_rate": 0.0009827380952380951,
182
+ "loss": 2.545,
183
  "step": 29
184
  },
185
  {
186
  "epoch": 0.86,
187
+ "learning_rate": 0.0009821428571428572,
188
+ "loss": 2.5704,
189
  "step": 30
190
  },
191
  {
192
  "epoch": 0.89,
193
+ "learning_rate": 0.000981547619047619,
194
+ "loss": 2.6461,
195
  "step": 31
196
  },
197
  {
198
  "epoch": 0.91,
199
+ "learning_rate": 0.000980952380952381,
200
+ "loss": 2.631,
201
  "step": 32
202
  },
203
  {
204
  "epoch": 0.94,
205
+ "learning_rate": 0.0009803571428571428,
206
+ "loss": 2.6678,
207
  "step": 33
208
  },
209
  {
210
  "epoch": 0.97,
211
+ "learning_rate": 0.0009797619047619047,
212
+ "loss": 2.5964,
213
  "step": 34
214
  },
215
  {
216
  "epoch": 1.0,
217
+ "learning_rate": 0.0009791666666666666,
218
+ "loss": 2.6566,
219
  "step": 35
220
  },
221
  {
222
  "epoch": 1.03,
223
+ "learning_rate": 0.0009785714285714285,
224
+ "loss": 2.4962,
225
  "step": 36
226
  },
227
  {
228
  "epoch": 1.06,
229
+ "learning_rate": 0.0009779761904761906,
230
+ "loss": 2.4815,
231
  "step": 37
232
  },
233
  {
234
  "epoch": 1.09,
235
+ "learning_rate": 0.0009773809523809524,
236
+ "loss": 2.4172,
237
  "step": 38
238
  },
239
  {
240
  "epoch": 1.11,
241
+ "learning_rate": 0.0009767857142857143,
242
+ "loss": 2.4641,
243
  "step": 39
244
  },
245
  {
246
  "epoch": 1.14,
247
+ "learning_rate": 0.0009761904761904762,
248
+ "loss": 2.4875,
249
  "step": 40
250
  },
251
  {
252
  "epoch": 1.17,
253
+ "learning_rate": 0.0009755952380952381,
254
+ "loss": 2.4486,
255
  "step": 41
256
  },
257
  {
258
  "epoch": 1.2,
259
+ "learning_rate": 0.000975,
260
+ "loss": 2.4463,
261
  "step": 42
262
  },
263
  {
264
  "epoch": 1.23,
265
+ "learning_rate": 0.0009744047619047619,
266
+ "loss": 2.5063,
267
  "step": 43
268
  },
269
  {
270
  "epoch": 1.26,
271
+ "learning_rate": 0.0009738095238095238,
272
+ "loss": 2.4506,
273
  "step": 44
274
  },
275
  {
276
  "epoch": 1.29,
277
+ "learning_rate": 0.0009732142857142857,
278
+ "loss": 2.3862,
279
  "step": 45
280
  },
281
  {
282
  "epoch": 1.31,
283
+ "learning_rate": 0.0009726190476190476,
284
+ "loss": 2.4925,
285
  "step": 46
286
  },
287
  {
288
  "epoch": 1.34,
289
+ "learning_rate": 0.0009720238095238096,
290
+ "loss": 2.5,
291
  "step": 47
292
  },
293
  {
294
  "epoch": 1.37,
295
+ "learning_rate": 0.0009714285714285714,
296
+ "loss": 2.4658,
297
  "step": 48
298
  },
299
  {
300
  "epoch": 1.4,
301
+ "learning_rate": 0.0009708333333333333,
302
+ "loss": 2.3983,
303
  "step": 49
304
  },
305
  {
306
  "epoch": 1.43,
307
+ "learning_rate": 0.0009702380952380953,
308
+ "loss": 2.369,
309
  "step": 50
310
  },
311
  {
312
  "epoch": 1.46,
313
+ "learning_rate": 0.0009696428571428572,
314
+ "loss": 2.4849,
315
  "step": 51
316
  },
317
  {
318
  "epoch": 1.49,
319
+ "learning_rate": 0.0009690476190476191,
320
+ "loss": 2.4106,
321
  "step": 52
322
  },
323
  {
324
  "epoch": 1.51,
325
+ "learning_rate": 0.0009684523809523809,
326
+ "loss": 2.4363,
327
  "step": 53
328
  },
329
  {
330
  "epoch": 1.54,
331
+ "learning_rate": 0.0009678571428571429,
332
+ "loss": 2.4935,
333
  "step": 54
334
  },
335
  {
336
  "epoch": 1.57,
337
+ "learning_rate": 0.0009672619047619048,
338
+ "loss": 2.479,
339
  "step": 55
340
  },
341
  {
342
  "epoch": 1.6,
343
+ "learning_rate": 0.0009666666666666667,
344
+ "loss": 2.4299,
345
  "step": 56
346
  },
347
  {
348
  "epoch": 1.63,
349
+ "learning_rate": 0.0009660714285714285,
350
+ "loss": 2.3964,
351
  "step": 57
352
  },
353
  {
354
  "epoch": 1.66,
355
+ "learning_rate": 0.0009654761904761905,
356
+ "loss": 2.4865,
357
  "step": 58
358
  },
359
  {
360
  "epoch": 1.69,
361
+ "learning_rate": 0.0009648809523809524,
362
+ "loss": 2.3831,
363
  "step": 59
364
  },
365
  {
366
  "epoch": 1.71,
367
+ "learning_rate": 0.0009642857142857143,
368
+ "loss": 2.4304,
369
  "step": 60
370
  },
371
  {
372
  "epoch": 1.74,
373
+ "learning_rate": 0.0009636904761904763,
374
+ "loss": 2.4273,
375
  "step": 61
376
  },
377
  {
378
  "epoch": 1.77,
379
+ "learning_rate": 0.0009630952380952382,
380
+ "loss": 2.4427,
381
  "step": 62
382
  },
383
  {
384
  "epoch": 1.8,
385
+ "learning_rate": 0.0009625,
386
+ "loss": 2.4191,
387
  "step": 63
388
  },
389
  {
390
  "epoch": 1.83,
391
+ "learning_rate": 0.0009619047619047619,
392
+ "loss": 2.3902,
393
  "step": 64
394
  },
395
  {
396
  "epoch": 1.86,
397
+ "learning_rate": 0.0009613095238095239,
398
+ "loss": 2.4699,
399
  "step": 65
400
  },
401
  {
402
  "epoch": 1.89,
403
+ "learning_rate": 0.0009607142857142858,
404
+ "loss": 2.3978,
405
  "step": 66
406
  },
407
  {
408
  "epoch": 1.91,
409
+ "learning_rate": 0.0009601190476190476,
410
+ "loss": 2.4128,
411
  "step": 67
412
  },
413
  {
414
  "epoch": 1.94,
415
+ "learning_rate": 0.0009595238095238095,
416
+ "loss": 2.4585,
417
  "step": 68
418
  },
419
  {
420
  "epoch": 1.97,
421
+ "learning_rate": 0.0009589285714285715,
422
+ "loss": 2.4476,
423
  "step": 69
424
  },
425
  {
426
  "epoch": 2.0,
427
+ "learning_rate": 0.0009583333333333334,
428
+ "loss": 2.4231,
429
  "step": 70
430
  },
431
  {
432
  "epoch": 2.03,
433
+ "learning_rate": 0.0009577380952380953,
434
+ "loss": 2.2655,
435
  "step": 71
436
  },
437
  {
438
  "epoch": 2.06,
439
+ "learning_rate": 0.0009571428571428573,
440
+ "loss": 2.208,
441
  "step": 72
442
  },
443
  {
444
  "epoch": 2.09,
445
+ "learning_rate": 0.0009565476190476191,
446
+ "loss": 2.1758,
447
  "step": 73
448
  },
449
  {
450
  "epoch": 2.11,
451
+ "learning_rate": 0.000955952380952381,
452
+ "loss": 2.3113,
453
  "step": 74
454
  },
455
  {
456
  "epoch": 2.14,
457
+ "learning_rate": 0.0009553571428571429,
458
+ "loss": 2.1739,
459
  "step": 75
460
  },
461
  {
462
  "epoch": 2.17,
463
+ "learning_rate": 0.0009547619047619049,
464
+ "loss": 2.2234,
465
  "step": 76
466
  },
467
  {
468
  "epoch": 2.2,
469
+ "learning_rate": 0.0009541666666666667,
470
+ "loss": 2.21,
471
  "step": 77
472
  },
473
  {
474
  "epoch": 2.23,
475
+ "learning_rate": 0.0009535714285714286,
476
+ "loss": 2.2316,
477
  "step": 78
478
  },
479
  {
480
  "epoch": 2.26,
481
+ "learning_rate": 0.0009529761904761904,
482
+ "loss": 2.2044,
483
  "step": 79
484
  },
485
  {
486
  "epoch": 2.29,
487
+ "learning_rate": 0.0009523809523809524,
488
+ "loss": 2.1784,
489
  "step": 80
490
  },
491
  {
492
  "epoch": 2.31,
493
+ "learning_rate": 0.0009517857142857143,
494
+ "loss": 2.2489,
495
  "step": 81
496
  },
497
  {
498
  "epoch": 2.34,
499
+ "learning_rate": 0.0009511904761904761,
500
+ "loss": 2.2003,
501
  "step": 82
502
  },
503
  {
504
  "epoch": 2.37,
505
+ "learning_rate": 0.0009505952380952381,
506
+ "loss": 2.169,
507
  "step": 83
508
  },
509
  {
510
  "epoch": 2.4,
511
+ "learning_rate": 0.00095,
512
+ "loss": 2.2303,
513
  "step": 84
514
  },
515
  {
516
  "epoch": 2.43,
517
+ "learning_rate": 0.0009494047619047619,
518
+ "loss": 2.1744,
519
  "step": 85
520
  },
521
  {
522
  "epoch": 2.46,
523
+ "learning_rate": 0.0009488095238095238,
524
+ "loss": 2.1904,
525
  "step": 86
526
  },
527
  {
528
  "epoch": 2.49,
529
+ "learning_rate": 0.0009482142857142857,
530
+ "loss": 2.222,
531
  "step": 87
532
  },
533
  {
534
  "epoch": 2.51,
535
+ "learning_rate": 0.0009476190476190476,
536
+ "loss": 2.2467,
537
  "step": 88
538
  },
539
  {
540
  "epoch": 2.54,
541
+ "learning_rate": 0.0009470238095238095,
542
+ "loss": 2.2241,
543
  "step": 89
544
  },
545
  {
546
  "epoch": 2.57,
547
+ "learning_rate": 0.0009464285714285714,
548
+ "loss": 2.2339,
549
  "step": 90
550
  },
551
  {
552
  "epoch": 2.6,
553
+ "learning_rate": 0.0009458333333333334,
554
+ "loss": 2.2339,
555
  "step": 91
556
  },
557
  {
558
  "epoch": 2.63,
559
+ "learning_rate": 0.0009452380952380952,
560
+ "loss": 2.1653,
561
  "step": 92
562
  },
563
  {
564
  "epoch": 2.66,
565
+ "learning_rate": 0.0009446428571428571,
566
+ "loss": 2.2954,
567
  "step": 93
568
  },
569
  {
570
  "epoch": 2.69,
571
+ "learning_rate": 0.0009440476190476191,
572
+ "loss": 2.2859,
573
  "step": 94
574
  },
575
  {
576
  "epoch": 2.71,
577
+ "learning_rate": 0.000943452380952381,
578
+ "loss": 2.2107,
579
  "step": 95
580
  },
581
  {
582
  "epoch": 2.74,
583
+ "learning_rate": 0.0009428571428571429,
584
+ "loss": 2.2142,
585
  "step": 96
586
  },
587
  {
588
  "epoch": 2.77,
589
+ "learning_rate": 0.0009422619047619047,
590
+ "loss": 2.2433,
591
  "step": 97
592
  },
593
  {
594
  "epoch": 2.8,
595
+ "learning_rate": 0.0009416666666666667,
596
+ "loss": 2.2417,
597
  "step": 98
598
  },
599
  {
600
  "epoch": 2.83,
601
+ "learning_rate": 0.0009410714285714286,
602
+ "loss": 2.2045,
603
  "step": 99
604
  },
605
  {
606
  "epoch": 2.86,
607
+ "learning_rate": 0.0009404761904761905,
608
+ "loss": 2.293,
609
  "step": 100
610
  },
611
  {
612
  "epoch": 2.89,
613
+ "learning_rate": 0.0009398809523809523,
614
+ "loss": 2.2051,
615
  "step": 101
616
  },
617
  {
618
  "epoch": 2.91,
619
+ "learning_rate": 0.0009392857142857143,
620
+ "loss": 2.2889,
621
  "step": 102
622
  },
623
  {
624
  "epoch": 2.94,
625
+ "learning_rate": 0.0009386904761904762,
626
+ "loss": 2.2,
627
  "step": 103
628
  },
629
  {
630
  "epoch": 2.97,
631
+ "learning_rate": 0.0009380952380952381,
632
+ "loss": 2.2298,
633
  "step": 104
634
  },
635
  {
636
  "epoch": 3.0,
637
+ "learning_rate": 0.0009375,
638
+ "loss": 2.2722,
639
  "step": 105
640
  },
641
  {
642
  "epoch": 3.03,
643
+ "learning_rate": 0.000936904761904762,
644
+ "loss": 2.0032,
645
  "step": 106
646
  },
647
  {
648
  "epoch": 3.06,
649
+ "learning_rate": 0.0009363095238095238,
650
+ "loss": 1.9269,
651
  "step": 107
652
  },
653
  {
654
  "epoch": 3.09,
655
+ "learning_rate": 0.0009357142857142857,
656
+ "loss": 1.916,
657
  "step": 108
658
  },
659
  {
660
  "epoch": 3.11,
661
+ "learning_rate": 0.0009351190476190477,
662
+ "loss": 1.9165,
663
  "step": 109
664
  },
665
  {
666
  "epoch": 3.14,
667
+ "learning_rate": 0.0009345238095238096,
668
+ "loss": 1.9296,
669
  "step": 110
670
  },
671
  {
672
  "epoch": 3.17,
673
+ "learning_rate": 0.0009339285714285714,
674
+ "loss": 1.954,
675
  "step": 111
676
  },
677
  {
678
  "epoch": 3.2,
679
+ "learning_rate": 0.0009333333333333333,
680
+ "loss": 1.9457,
681
  "step": 112
682
  },
683
  {
684
  "epoch": 3.23,
685
+ "learning_rate": 0.0009327380952380953,
686
+ "loss": 1.9135,
687
  "step": 113
688
  },
689
  {
690
  "epoch": 3.26,
691
+ "learning_rate": 0.0009321428571428572,
692
+ "loss": 1.9383,
693
  "step": 114
694
  },
695
  {
696
  "epoch": 3.29,
697
+ "learning_rate": 0.0009315476190476191,
698
+ "loss": 1.9057,
699
  "step": 115
700
  },
701
  {
702
  "epoch": 3.31,
703
+ "learning_rate": 0.0009309523809523809,
704
+ "loss": 1.9541,
705
  "step": 116
706
  },
707
  {
708
  "epoch": 3.34,
709
+ "learning_rate": 0.0009303571428571429,
710
+ "loss": 1.8827,
711
  "step": 117
712
  },
713
  {
714
  "epoch": 3.37,
715
+ "learning_rate": 0.0009297619047619048,
716
+ "loss": 1.899,
717
  "step": 118
718
  },
719
  {
720
  "epoch": 3.4,
721
+ "learning_rate": 0.0009291666666666667,
722
+ "loss": 1.9095,
723
  "step": 119
724
  },
725
  {
726
  "epoch": 3.43,
727
+ "learning_rate": 0.0009285714285714287,
728
+ "loss": 1.936,
729
  "step": 120
730
  },
731
  {
732
  "epoch": 3.46,
733
+ "learning_rate": 0.0009279761904761905,
734
+ "loss": 1.9224,
735
  "step": 121
736
  },
737
  {
738
  "epoch": 3.49,
739
+ "learning_rate": 0.0009273809523809524,
740
+ "loss": 1.9547,
741
  "step": 122
742
  },
743
  {
744
  "epoch": 3.51,
745
+ "learning_rate": 0.0009267857142857143,
746
+ "loss": 1.9323,
747
  "step": 123
748
  },
749
  {
750
  "epoch": 3.54,
751
+ "learning_rate": 0.0009261904761904763,
752
+ "loss": 1.9938,
753
  "step": 124
754
  },
755
  {
756
  "epoch": 3.57,
757
+ "learning_rate": 0.0009255952380952382,
758
+ "loss": 1.8674,
759
  "step": 125
760
  },
761
  {
762
  "epoch": 3.6,
763
+ "learning_rate": 0.000925,
764
+ "loss": 1.95,
765
  "step": 126
766
  },
767
  {
768
  "epoch": 3.63,
769
+ "learning_rate": 0.0009244047619047619,
770
+ "loss": 1.9374,
771
  "step": 127
772
  },
773
  {
774
  "epoch": 3.66,
775
+ "learning_rate": 0.0009238095238095239,
776
+ "loss": 1.9456,
777
  "step": 128
778
  },
779
  {
780
  "epoch": 3.69,
781
+ "learning_rate": 0.0009232142857142858,
782
+ "loss": 1.9791,
783
  "step": 129
784
  },
785
  {
786
  "epoch": 3.71,
787
+ "learning_rate": 0.0009226190476190477,
788
+ "loss": 2.0047,
789
  "step": 130
790
  },
791
  {
792
  "epoch": 3.74,
793
+ "learning_rate": 0.0009220238095238096,
794
+ "loss": 1.9971,
795
  "step": 131
796
  },
797
  {
798
  "epoch": 3.77,
799
+ "learning_rate": 0.0009214285714285714,
800
+ "loss": 2.0267,
801
  "step": 132
802
  },
803
  {
804
  "epoch": 3.8,
805
+ "learning_rate": 0.0009208333333333333,
806
+ "loss": 1.9374,
807
  "step": 133
808
  },
809
  {
810
  "epoch": 3.83,
811
+ "learning_rate": 0.0009202380952380952,
812
+ "loss": 1.9793,
813
  "step": 134
814
  },
815
  {
816
  "epoch": 3.86,
817
+ "learning_rate": 0.0009196428571428572,
818
+ "loss": 2.0483,
819
  "step": 135
820
  },
821
  {
822
  "epoch": 3.89,
823
+ "learning_rate": 0.000919047619047619,
824
+ "loss": 2.0003,
825
  "step": 136
826
  },
827
  {
828
  "epoch": 3.91,
829
+ "learning_rate": 0.0009184523809523809,
830
+ "loss": 2.0185,
831
  "step": 137
832
  },
833
  {
834
  "epoch": 3.94,
835
+ "learning_rate": 0.0009178571428571428,
836
+ "loss": 2.0517,
837
  "step": 138
838
  },
839
  {
840
  "epoch": 3.97,
841
+ "learning_rate": 0.0009172619047619048,
842
+ "loss": 1.9824,
843
  "step": 139
844
  },
845
  {
846
  "epoch": 4.0,
847
+ "learning_rate": 0.0009166666666666666,
848
+ "loss": 2.0383,
849
  "step": 140
850
  },
851
  {
852
  "epoch": 4.03,
853
+ "learning_rate": 0.0009160714285714285,
854
+ "loss": 1.6818,
855
  "step": 141
856
  },
857
  {
858
  "epoch": 4.06,
859
+ "learning_rate": 0.0009154761904761905,
860
+ "loss": 1.6208,
861
  "step": 142
862
  },
863
  {
864
  "epoch": 4.09,
865
+ "learning_rate": 0.0009148809523809524,
866
+ "loss": 1.6843,
867
  "step": 143
868
  },
869
  {
870
  "epoch": 4.11,
871
+ "learning_rate": 0.0009142857142857143,
872
+ "loss": 1.5885,
873
  "step": 144
874
  },
875
  {
876
  "epoch": 4.14,
877
+ "learning_rate": 0.0009136904761904761,
878
+ "loss": 1.5799,
879
  "step": 145
880
  },
881
  {
882
  "epoch": 4.17,
883
+ "learning_rate": 0.0009130952380952381,
884
+ "loss": 1.6334,
885
  "step": 146
886
  },
887
  {
888
  "epoch": 4.2,
889
+ "learning_rate": 0.0009125,
890
+ "loss": 1.6297,
891
  "step": 147
892
  },
893
  {
894
  "epoch": 4.23,
895
+ "learning_rate": 0.0009119047619047619,
896
+ "loss": 1.5929,
897
  "step": 148
898
  },
899
  {
900
  "epoch": 4.26,
901
+ "learning_rate": 0.0009113095238095238,
902
+ "loss": 1.6621,
903
  "step": 149
904
  },
905
  {
906
  "epoch": 4.29,
907
+ "learning_rate": 0.0009107142857142857,
908
+ "loss": 1.626,
909
  "step": 150
910
  },
911
  {
912
  "epoch": 4.31,
913
+ "learning_rate": 0.0009101190476190476,
914
+ "loss": 1.6138,
915
  "step": 151
916
  },
917
  {
918
  "epoch": 4.34,
919
+ "learning_rate": 0.0009095238095238095,
920
+ "loss": 1.6465,
921
  "step": 152
922
  },
923
  {
924
  "epoch": 4.37,
925
+ "learning_rate": 0.0009089285714285715,
926
+ "loss": 1.6622,
927
  "step": 153
928
  },
929
  {
930
  "epoch": 4.4,
931
+ "learning_rate": 0.0009083333333333334,
932
+ "loss": 1.6662,
933
  "step": 154
934
  },
935
  {
936
  "epoch": 4.43,
937
+ "learning_rate": 0.0009077380952380952,
938
+ "loss": 1.6348,
939
  "step": 155
940
  },
941
  {
942
  "epoch": 4.46,
943
+ "learning_rate": 0.0009071428571428571,
944
+ "loss": 1.6196,
945
  "step": 156
946
  },
947
  {
948
  "epoch": 4.49,
949
+ "learning_rate": 0.0009065476190476191,
950
+ "loss": 1.6766,
951
  "step": 157
952
  },
953
  {
954
  "epoch": 4.51,
955
+ "learning_rate": 0.000905952380952381,
956
+ "loss": 1.7069,
957
  "step": 158
958
  },
959
  {
960
  "epoch": 4.54,
961
+ "learning_rate": 0.0009053571428571429,
962
+ "loss": 1.6848,
963
  "step": 159
964
  },
965
  {
966
  "epoch": 4.57,
967
+ "learning_rate": 0.0009047619047619047,
968
+ "loss": 1.6884,
969
  "step": 160
970
  },
971
  {
972
  "epoch": 4.6,
973
+ "learning_rate": 0.0009041666666666667,
974
+ "loss": 1.6721,
975
  "step": 161
976
  },
977
  {
978
  "epoch": 4.63,
979
+ "learning_rate": 0.0009035714285714286,
980
+ "loss": 1.7116,
981
  "step": 162
982
  },
983
  {
984
  "epoch": 4.66,
985
+ "learning_rate": 0.0009029761904761905,
986
+ "loss": 1.693,
987
  "step": 163
988
  },
989
  {
990
  "epoch": 4.69,
991
+ "learning_rate": 0.0009023809523809525,
992
+ "loss": 1.6826,
993
  "step": 164
994
  },
995
  {
996
  "epoch": 4.71,
997
+ "learning_rate": 0.0009017857142857143,
998
+ "loss": 1.7061,
999
  "step": 165
1000
  },
1001
  {
1002
  "epoch": 4.74,
1003
+ "learning_rate": 0.0009011904761904762,
1004
+ "loss": 1.6964,
1005
  "step": 166
1006
  },
1007
  {
1008
  "epoch": 4.77,
1009
+ "learning_rate": 0.0009005952380952381,
1010
+ "loss": 1.7903,
1011
  "step": 167
1012
  },
1013
  {
1014
  "epoch": 4.8,
1015
+ "learning_rate": 0.0009000000000000001,
1016
+ "loss": 1.6829,
1017
  "step": 168
1018
  },
1019
  {
1020
  "epoch": 4.83,
1021
+ "learning_rate": 0.000899404761904762,
1022
+ "loss": 1.7047,
1023
  "step": 169
1024
  },
1025
  {
1026
  "epoch": 4.86,
1027
+ "learning_rate": 0.0008988095238095238,
1028
+ "loss": 1.7671,
1029
  "step": 170
1030
  },
1031
  {
1032
  "epoch": 4.89,
1033
+ "learning_rate": 0.0008982142857142857,
1034
+ "loss": 1.7184,
1035
  "step": 171
1036
  },
1037
  {
1038
  "epoch": 4.91,
1039
+ "learning_rate": 0.0008976190476190477,
1040
+ "loss": 1.8213,
1041
  "step": 172
1042
  },
1043
  {
1044
  "epoch": 4.94,
1045
+ "learning_rate": 0.0008970238095238096,
1046
+ "loss": 1.7688,
1047
  "step": 173
1048
  },
1049
  {
1050
  "epoch": 4.97,
1051
+ "learning_rate": 0.0008964285714285715,
1052
+ "loss": 1.7522,
1053
  "step": 174
1054
  },
1055
  {
1056
  "epoch": 5.0,
1057
+ "learning_rate": 0.0008958333333333334,
1058
+ "loss": 1.7862,
1059
  "step": 175
1060
  },
1061
  {
1062
  "epoch": 5.03,
1063
+ "learning_rate": 0.0008952380952380953,
1064
+ "loss": 1.3871,
1065
  "step": 176
1066
  },
1067
  {
1068
  "epoch": 5.06,
1069
+ "learning_rate": 0.0008946428571428572,
1070
+ "loss": 1.3491,
1071
  "step": 177
1072
  },
1073
  {
1074
  "epoch": 5.09,
1075
+ "learning_rate": 0.0008940476190476191,
1076
+ "loss": 1.3399,
1077
  "step": 178
1078
  },
1079
  {
1080
  "epoch": 5.11,
1081
+ "learning_rate": 0.0008934523809523811,
1082
+ "loss": 1.3569,
1083
  "step": 179
1084
  },
1085
  {
1086
  "epoch": 5.14,
1087
+ "learning_rate": 0.0008928571428571429,
1088
+ "loss": 1.3734,
1089
  "step": 180
1090
  },
1091
  {
1092
  "epoch": 5.17,
1093
+ "learning_rate": 0.0008922619047619048,
1094
+ "loss": 1.3151,
1095
  "step": 181
1096
  },
1097
  {
1098
  "epoch": 5.2,
1099
+ "learning_rate": 0.0008916666666666667,
1100
+ "loss": 1.3243,
1101
  "step": 182
1102
  },
1103
  {
1104
  "epoch": 5.23,
1105
+ "learning_rate": 0.0008910714285714287,
1106
+ "loss": 1.342,
1107
  "step": 183
1108
  },
1109
  {
1110
  "epoch": 5.26,
1111
+ "learning_rate": 0.0008904761904761904,
1112
+ "loss": 1.3664,
1113
  "step": 184
1114
  },
1115
  {
1116
  "epoch": 5.29,
1117
+ "learning_rate": 0.0008898809523809523,
1118
+ "loss": 1.3493,
1119
  "step": 185
1120
  },
1121
  {
1122
  "epoch": 5.31,
1123
+ "learning_rate": 0.0008892857142857142,
1124
+ "loss": 1.32,
1125
  "step": 186
1126
  },
1127
  {
1128
  "epoch": 5.34,
1129
+ "learning_rate": 0.0008886904761904762,
1130
+ "loss": 1.3978,
1131
  "step": 187
1132
  },
1133
  {
1134
  "epoch": 5.37,
1135
+ "learning_rate": 0.0008880952380952381,
1136
+ "loss": 1.3762,
1137
  "step": 188
1138
  },
1139
  {
1140
  "epoch": 5.4,
1141
+ "learning_rate": 0.0008874999999999999,
1142
+ "loss": 1.4172,
1143
  "step": 189
1144
  },
1145
  {
1146
  "epoch": 5.43,
1147
+ "learning_rate": 0.0008869047619047619,
1148
+ "loss": 1.3817,
1149
  "step": 190
1150
  },
1151
  {
1152
  "epoch": 5.46,
1153
+ "learning_rate": 0.0008863095238095238,
1154
+ "loss": 1.3779,
1155
  "step": 191
1156
  },
1157
  {
1158
  "epoch": 5.49,
1159
+ "learning_rate": 0.0008857142857142857,
1160
+ "loss": 1.378,
1161
  "step": 192
1162
  },
1163
  {
1164
  "epoch": 5.51,
1165
+ "learning_rate": 0.0008851190476190476,
1166
+ "loss": 1.4245,
1167
  "step": 193
1168
  },
1169
  {
1170
  "epoch": 5.54,
1171
+ "learning_rate": 0.0008845238095238095,
1172
+ "loss": 1.4425,
1173
  "step": 194
1174
  },
1175
  {
1176
  "epoch": 5.57,
1177
+ "learning_rate": 0.0008839285714285714,
1178
+ "loss": 1.4324,
1179
  "step": 195
1180
  },
1181
  {
1182
  "epoch": 5.6,
1183
+ "learning_rate": 0.0008833333333333333,
1184
+ "loss": 1.4264,
1185
  "step": 196
1186
  },
1187
  {
1188
  "epoch": 5.63,
1189
+ "learning_rate": 0.0008827380952380952,
1190
+ "loss": 1.4395,
1191
  "step": 197
1192
  },
1193
  {
1194
  "epoch": 5.66,
1195
+ "learning_rate": 0.0008821428571428572,
1196
+ "loss": 1.4549,
1197
  "step": 198
1198
  },
1199
  {
1200
  "epoch": 5.69,
1201
+ "learning_rate": 0.000881547619047619,
1202
+ "loss": 1.501,
1203
  "step": 199
1204
  },
1205
  {
1206
  "epoch": 5.71,
1207
+ "learning_rate": 0.0008809523809523809,
1208
+ "loss": 1.432,
1209
  "step": 200
1210
  },
1211
  {
1212
  "epoch": 5.74,
1213
+ "learning_rate": 0.0008803571428571429,
1214
+ "loss": 1.4922,
1215
  "step": 201
1216
  },
1217
  {
1218
  "epoch": 5.77,
1219
+ "learning_rate": 0.0008797619047619048,
1220
+ "loss": 1.4622,
1221
  "step": 202
1222
  },
1223
  {
1224
  "epoch": 5.8,
1225
+ "learning_rate": 0.0008791666666666667,
1226
+ "loss": 1.4794,
1227
  "step": 203
1228
  },
1229
  {
1230
  "epoch": 5.83,
1231
+ "learning_rate": 0.0008785714285714285,
1232
+ "loss": 1.4938,
1233
  "step": 204
1234
  },
1235
  {
1236
  "epoch": 5.86,
1237
+ "learning_rate": 0.0008779761904761905,
1238
+ "loss": 1.4792,
1239
  "step": 205
1240
  },
1241
  {
1242
  "epoch": 5.89,
1243
+ "learning_rate": 0.0008773809523809524,
1244
+ "loss": 1.5192,
1245
  "step": 206
1246
  },
1247
  {
1248
  "epoch": 5.91,
1249
+ "learning_rate": 0.0008767857142857143,
1250
+ "loss": 1.5055,
1251
  "step": 207
1252
  },
1253
  {
1254
  "epoch": 5.94,
1255
+ "learning_rate": 0.0008761904761904762,
1256
+ "loss": 1.5484,
1257
  "step": 208
1258
  },
1259
  {
1260
  "epoch": 5.97,
1261
+ "learning_rate": 0.0008755952380952381,
1262
+ "loss": 1.5096,
1263
  "step": 209
1264
  },
1265
  {
1266
  "epoch": 6.0,
1267
+ "learning_rate": 0.000875,
1268
+ "loss": 1.5298,
1269
  "step": 210
1270
  },
1271
  {
1272
  "epoch": 6.03,
1273
+ "learning_rate": 0.0008744047619047619,
1274
+ "loss": 1.1704,
1275
  "step": 211
1276
  },
1277
  {
1278
  "epoch": 6.06,
1279
+ "learning_rate": 0.0008738095238095239,
1280
+ "loss": 1.1261,
1281
  "step": 212
1282
  },
1283
  {
1284
  "epoch": 6.09,
1285
+ "learning_rate": 0.0008732142857142858,
1286
+ "loss": 1.1144,
1287
  "step": 213
1288
  },
1289
  {
1290
  "epoch": 6.11,
1291
+ "learning_rate": 0.0008726190476190476,
1292
+ "loss": 1.0984,
1293
  "step": 214
1294
  },
1295
  {
1296
  "epoch": 6.14,
1297
+ "learning_rate": 0.0008720238095238095,
1298
+ "loss": 1.0704,
1299
  "step": 215
1300
  },
1301
  {
1302
  "epoch": 6.17,
1303
+ "learning_rate": 0.0008714285714285715,
1304
+ "loss": 1.0655,
1305
  "step": 216
1306
  },
1307
  {
1308
  "epoch": 6.2,
1309
+ "learning_rate": 0.0008708333333333334,
1310
+ "loss": 1.09,
1311
  "step": 217
1312
  },
1313
  {
1314
  "epoch": 6.23,
1315
+ "learning_rate": 0.0008702380952380953,
1316
+ "loss": 1.0619,
1317
  "step": 218
1318
  },
1319
  {
1320
  "epoch": 6.26,
1321
+ "learning_rate": 0.0008696428571428571,
1322
+ "loss": 1.1633,
1323
  "step": 219
1324
  },
1325
  {
1326
  "epoch": 6.29,
1327
+ "learning_rate": 0.0008690476190476191,
1328
+ "loss": 1.1022,
1329
  "step": 220
1330
  },
1331
  {
1332
  "epoch": 6.31,
1333
+ "learning_rate": 0.000868452380952381,
1334
+ "loss": 1.1057,
1335
  "step": 221
1336
  },
1337
  {
1338
  "epoch": 6.34,
1339
+ "learning_rate": 0.0008678571428571429,
1340
+ "loss": 1.1279,
1341
  "step": 222
1342
  },
1343
  {
1344
  "epoch": 6.37,
1345
+ "learning_rate": 0.0008672619047619049,
1346
+ "loss": 1.0915,
1347
  "step": 223
1348
  },
1349
  {
1350
  "epoch": 6.4,
1351
+ "learning_rate": 0.0008666666666666667,
1352
+ "loss": 1.1731,
1353
  "step": 224
1354
  },
1355
  {
1356
  "epoch": 6.43,
1357
+ "learning_rate": 0.0008660714285714286,
1358
+ "loss": 1.1352,
1359
  "step": 225
1360
  },
1361
  {
1362
  "epoch": 6.46,
1363
+ "learning_rate": 0.0008654761904761905,
1364
+ "loss": 1.1632,
1365
  "step": 226
1366
  },
1367
  {
1368
  "epoch": 6.49,
1369
+ "learning_rate": 0.0008648809523809525,
1370
+ "loss": 1.1691,
1371
  "step": 227
1372
  },
1373
  {
1374
  "epoch": 6.51,
1375
+ "learning_rate": 0.0008642857142857144,
1376
+ "loss": 1.181,
1377
  "step": 228
1378
  },
1379
  {
1380
  "epoch": 6.54,
1381
+ "learning_rate": 0.0008636904761904762,
1382
+ "loss": 1.1635,
1383
  "step": 229
1384
  },
1385
  {
1386
  "epoch": 6.57,
1387
+ "learning_rate": 0.0008630952380952381,
1388
+ "loss": 1.1802,
1389
  "step": 230
1390
  },
1391
  {
1392
  "epoch": 6.6,
1393
+ "learning_rate": 0.0008625000000000001,
1394
+ "loss": 1.2111,
1395
  "step": 231
1396
  },
1397
  {
1398
  "epoch": 6.63,
1399
+ "learning_rate": 0.000861904761904762,
1400
+ "loss": 1.2503,
1401
  "step": 232
1402
  },
1403
  {
1404
  "epoch": 6.66,
1405
+ "learning_rate": 0.0008613095238095238,
1406
+ "loss": 1.2305,
1407
  "step": 233
1408
  },
1409
  {
1410
  "epoch": 6.69,
1411
+ "learning_rate": 0.0008607142857142858,
1412
+ "loss": 1.2446,
1413
  "step": 234
1414
  },
1415
  {
1416
  "epoch": 6.71,
1417
+ "learning_rate": 0.0008601190476190477,
1418
+ "loss": 1.263,
1419
  "step": 235
1420
  },
1421
  {
1422
  "epoch": 6.74,
1423
+ "learning_rate": 0.0008595238095238096,
1424
+ "loss": 1.2407,
1425
  "step": 236
1426
  },
1427
  {
1428
  "epoch": 6.77,
1429
+ "learning_rate": 0.0008589285714285714,
1430
+ "loss": 1.303,
1431
  "step": 237
1432
  },
1433
  {
1434
  "epoch": 6.8,
1435
+ "learning_rate": 0.0008583333333333333,
1436
+ "loss": 1.2309,
1437
  "step": 238
1438
  },
1439
  {
1440
  "epoch": 6.83,
1441
+ "learning_rate": 0.0008577380952380952,
1442
+ "loss": 1.2669,
1443
  "step": 239
1444
  },
1445
  {
1446
  "epoch": 6.86,
1447
+ "learning_rate": 0.0008571428571428571,
1448
+ "loss": 1.226,
1449
  "step": 240
1450
  },
1451
  {
1452
  "epoch": 6.89,
1453
+ "learning_rate": 0.000856547619047619,
1454
+ "loss": 1.2862,
1455
  "step": 241
1456
  },
1457
  {
1458
  "epoch": 6.91,
1459
+ "learning_rate": 0.000855952380952381,
1460
+ "loss": 1.2472,
1461
  "step": 242
1462
  },
1463
  {
1464
  "epoch": 6.94,
1465
+ "learning_rate": 0.0008553571428571428,
1466
+ "loss": 1.2928,
1467
  "step": 243
1468
  },
1469
  {
1470
  "epoch": 6.97,
1471
+ "learning_rate": 0.0008547619047619047,
1472
+ "loss": 1.2427,
1473
  "step": 244
1474
  },
1475
  {
1476
  "epoch": 7.0,
1477
+ "learning_rate": 0.0008541666666666666,
1478
+ "loss": 1.3195,
1479
  "step": 245
1480
  },
1481
  {
1482
  "epoch": 7.03,
1483
+ "learning_rate": 0.0008535714285714286,
1484
+ "loss": 0.8949,
1485
  "step": 246
1486
  },
1487
  {
1488
  "epoch": 7.06,
1489
+ "learning_rate": 0.0008529761904761905,
1490
+ "loss": 0.8907,
1491
  "step": 247
1492
  },
1493
  {
1494
  "epoch": 7.09,
1495
+ "learning_rate": 0.0008523809523809523,
1496
+ "loss": 0.8813,
1497
  "step": 248
1498
  },
1499
  {
1500
  "epoch": 7.11,
1501
+ "learning_rate": 0.0008517857142857143,
1502
+ "loss": 0.8702,
1503
  "step": 249
1504
  },
1505
  {
1506
  "epoch": 7.14,
1507
+ "learning_rate": 0.0008511904761904762,
1508
+ "loss": 0.9105,
1509
  "step": 250
1510
  },
1511
  {
1512
  "epoch": 7.17,
1513
+ "learning_rate": 0.0008505952380952381,
1514
+ "loss": 0.9096,
1515
  "step": 251
1516
  },
1517
  {
1518
  "epoch": 7.2,
1519
+ "learning_rate": 0.00085,
1520
+ "loss": 0.9121,
1521
  "step": 252
1522
  },
1523
  {
1524
  "epoch": 7.23,
1525
+ "learning_rate": 0.0008494047619047619,
1526
+ "loss": 0.9063,
1527
  "step": 253
1528
  },
1529
  {
1530
  "epoch": 7.26,
1531
+ "learning_rate": 0.0008488095238095238,
1532
+ "loss": 0.8976,
1533
  "step": 254
1534
  },
1535
  {
1536
  "epoch": 7.29,
1537
+ "learning_rate": 0.0008482142857142857,
1538
+ "loss": 0.9283,
1539
  "step": 255
1540
  },
1541
  {
1542
  "epoch": 7.31,
1543
+ "learning_rate": 0.0008476190476190476,
1544
+ "loss": 0.9409,
1545
  "step": 256
1546
  },
1547
  {
1548
  "epoch": 7.34,
1549
+ "learning_rate": 0.0008470238095238096,
1550
+ "loss": 0.9311,
1551
  "step": 257
1552
  },
1553
  {
1554
  "epoch": 7.37,
1555
+ "learning_rate": 0.0008464285714285714,
1556
+ "loss": 0.926,
1557
  "step": 258
1558
  },
1559
  {
1560
  "epoch": 7.4,
1561
+ "learning_rate": 0.0008458333333333333,
1562
+ "loss": 0.9704,
1563
  "step": 259
1564
  },
1565
  {
1566
  "epoch": 7.43,
1567
+ "learning_rate": 0.0008452380952380953,
1568
+ "loss": 0.9515,
1569
  "step": 260
1570
  },
1571
  {
1572
  "epoch": 7.46,
1573
+ "learning_rate": 0.0008446428571428572,
1574
+ "loss": 0.9069,
1575
  "step": 261
1576
  },
1577
  {
1578
  "epoch": 7.49,
1579
+ "learning_rate": 0.000844047619047619,
1580
+ "loss": 0.9359,
1581
  "step": 262
1582
  },
1583
  {
1584
  "epoch": 7.51,
1585
+ "learning_rate": 0.0008434523809523809,
1586
+ "loss": 0.9482,
1587
  "step": 263
1588
  },
1589
  {
1590
  "epoch": 7.54,
1591
+ "learning_rate": 0.0008428571428571429,
1592
+ "loss": 0.9717,
1593
  "step": 264
1594
  },
1595
  {
1596
  "epoch": 7.57,
1597
+ "learning_rate": 0.0008422619047619048,
1598
+ "loss": 0.9869,
1599
  "step": 265
1600
  },
1601
  {
1602
  "epoch": 7.6,
1603
+ "learning_rate": 0.0008416666666666667,
1604
+ "loss": 0.9728,
1605
  "step": 266
1606
  },
1607
  {
1608
  "epoch": 7.63,
1609
+ "learning_rate": 0.0008410714285714285,
1610
+ "loss": 0.9516,
1611
  "step": 267
1612
  },
1613
  {
1614
  "epoch": 7.66,
1615
+ "learning_rate": 0.0008404761904761905,
1616
+ "loss": 0.9838,
1617
  "step": 268
1618
  },
1619
  {
1620
  "epoch": 7.69,
1621
+ "learning_rate": 0.0008398809523809524,
1622
+ "loss": 1.0044,
1623
  "step": 269
1624
  },
1625
  {
1626
  "epoch": 7.71,
1627
+ "learning_rate": 0.0008392857142857143,
1628
+ "loss": 1.0153,
1629
  "step": 270
1630
  },
1631
  {
1632
  "epoch": 7.74,
1633
+ "learning_rate": 0.0008386904761904763,
1634
+ "loss": 1.0382,
1635
  "step": 271
1636
  },
1637
  {
1638
  "epoch": 7.77,
1639
+ "learning_rate": 0.0008380952380952382,
1640
+ "loss": 1.0109,
1641
  "step": 272
1642
  },
1643
  {
1644
  "epoch": 7.8,
1645
+ "learning_rate": 0.0008375,
1646
+ "loss": 0.9989,
1647
  "step": 273
1648
  },
1649
  {
1650
  "epoch": 7.83,
1651
+ "learning_rate": 0.0008369047619047619,
1652
+ "loss": 1.0631,
1653
  "step": 274
1654
  },
1655
  {
1656
  "epoch": 7.86,
1657
+ "learning_rate": 0.0008363095238095239,
1658
+ "loss": 1.0546,
1659
  "step": 275
1660
  },
1661
  {
1662
  "epoch": 7.89,
1663
+ "learning_rate": 0.0008357142857142858,
1664
+ "loss": 1.0827,
1665
  "step": 276
1666
  },
1667
  {
1668
  "epoch": 7.91,
1669
+ "learning_rate": 0.0008351190476190476,
1670
+ "loss": 1.087,
1671
  "step": 277
1672
  },
1673
  {
1674
  "epoch": 7.94,
1675
+ "learning_rate": 0.0008345238095238095,
1676
+ "loss": 1.041,
1677
  "step": 278
1678
  },
1679
  {
1680
  "epoch": 7.97,
1681
+ "learning_rate": 0.0008339285714285715,
1682
+ "loss": 1.0633,
1683
  "step": 279
1684
  },
1685
  {
1686
  "epoch": 8.0,
1687
+ "learning_rate": 0.0008333333333333334,
1688
+ "loss": 1.0709,
1689
  "step": 280
1690
  },
1691
  {
1692
  "epoch": 8.03,
1693
+ "learning_rate": 0.0008327380952380953,
1694
+ "loss": 0.7273,
1695
  "step": 281
1696
  },
1697
  {
1698
  "epoch": 8.06,
1699
+ "learning_rate": 0.0008321428571428573,
1700
+ "loss": 0.726,
1701
  "step": 282
1702
  },
1703
  {
1704
  "epoch": 8.09,
1705
+ "learning_rate": 0.0008315476190476191,
1706
+ "loss": 0.6943,
1707
  "step": 283
1708
  },
1709
  {
1710
  "epoch": 8.11,
1711
+ "learning_rate": 0.000830952380952381,
1712
+ "loss": 0.7127,
1713
  "step": 284
1714
  },
1715
  {
1716
  "epoch": 8.14,
1717
+ "learning_rate": 0.0008303571428571429,
1718
+ "loss": 0.6915,
1719
  "step": 285
1720
  },
1721
  {
1722
  "epoch": 8.17,
1723
+ "learning_rate": 0.0008297619047619049,
1724
+ "loss": 0.7138,
1725
  "step": 286
1726
  },
1727
  {
1728
  "epoch": 8.2,
1729
+ "learning_rate": 0.0008291666666666667,
1730
+ "loss": 0.7356,
1731
  "step": 287
1732
  },
1733
  {
1734
  "epoch": 8.23,
1735
+ "learning_rate": 0.0008285714285714286,
1736
+ "loss": 0.678,
1737
  "step": 288
1738
  },
1739
  {
1740
  "epoch": 8.26,
1741
+ "learning_rate": 0.0008279761904761904,
1742
+ "loss": 0.7375,
1743
  "step": 289
1744
  },
1745
  {
1746
  "epoch": 8.29,
1747
+ "learning_rate": 0.0008273809523809524,
1748
+ "loss": 0.7284,
1749
  "step": 290
1750
  },
1751
  {
1752
  "epoch": 8.31,
1753
+ "learning_rate": 0.0008267857142857143,
1754
+ "loss": 0.7304,
1755
  "step": 291
1756
  },
1757
  {
1758
  "epoch": 8.34,
1759
+ "learning_rate": 0.0008261904761904761,
1760
+ "loss": 0.7633,
1761
  "step": 292
1762
  },
1763
  {
1764
  "epoch": 8.37,
1765
+ "learning_rate": 0.0008255952380952381,
1766
+ "loss": 0.7416,
1767
  "step": 293
1768
  },
1769
  {
1770
  "epoch": 8.4,
1771
+ "learning_rate": 0.000825,
1772
+ "loss": 0.7895,
1773
  "step": 294
1774
  },
1775
  {
1776
  "epoch": 8.43,
1777
+ "learning_rate": 0.0008244047619047619,
1778
+ "loss": 0.8037,
1779
  "step": 295
1780
  },
1781
  {
1782
  "epoch": 8.46,
1783
+ "learning_rate": 0.0008238095238095238,
1784
+ "loss": 0.7736,
1785
  "step": 296
1786
  },
1787
  {
1788
  "epoch": 8.49,
1789
+ "learning_rate": 0.0008232142857142857,
1790
+ "loss": 0.778,
1791
  "step": 297
1792
  },
1793
  {
1794
  "epoch": 8.51,
1795
+ "learning_rate": 0.0008226190476190476,
1796
+ "loss": 0.7644,
1797
  "step": 298
1798
  },
1799
  {
1800
  "epoch": 8.54,
1801
+ "learning_rate": 0.0008220238095238095,
1802
+ "loss": 0.7942,
1803
  "step": 299
1804
  },
1805
  {
1806
  "epoch": 8.57,
1807
+ "learning_rate": 0.0008214285714285714,
1808
+ "loss": 0.7715,
1809
  "step": 300
1810
  },
1811
  {
1812
  "epoch": 8.6,
1813
+ "learning_rate": 0.0008208333333333334,
1814
+ "loss": 0.8288,
1815
  "step": 301
1816
  },
1817
  {
1818
  "epoch": 8.63,
1819
+ "learning_rate": 0.0008202380952380952,
1820
+ "loss": 0.8263,
1821
  "step": 302
1822
  },
1823
  {
1824
  "epoch": 8.66,
1825
+ "learning_rate": 0.0008196428571428571,
1826
+ "loss": 0.7923,
1827
  "step": 303
1828
  },
1829
  {
1830
  "epoch": 8.69,
1831
+ "learning_rate": 0.0008190476190476191,
1832
+ "loss": 0.8063,
1833
  "step": 304
1834
  },
1835
  {
1836
  "epoch": 8.71,
1837
+ "learning_rate": 0.000818452380952381,
1838
+ "loss": 0.8016,
1839
  "step": 305
1840
  },
1841
  {
1842
  "epoch": 8.74,
1843
+ "learning_rate": 0.0008178571428571428,
1844
+ "loss": 0.8467,
1845
  "step": 306
1846
  },
1847
  {
1848
  "epoch": 8.77,
1849
+ "learning_rate": 0.0008172619047619047,
1850
+ "loss": 0.8353,
1851
  "step": 307
1852
  },
1853
  {
1854
  "epoch": 8.8,
1855
+ "learning_rate": 0.0008166666666666667,
1856
+ "loss": 0.8272,
1857
  "step": 308
1858
  },
1859
  {
1860
  "epoch": 8.83,
1861
+ "learning_rate": 0.0008160714285714286,
1862
+ "loss": 0.8852,
1863
  "step": 309
1864
  },
1865
  {
1866
  "epoch": 8.86,
1867
+ "learning_rate": 0.0008154761904761905,
1868
+ "loss": 0.8541,
1869
  "step": 310
1870
  },
1871
  {
1872
  "epoch": 8.89,
1873
+ "learning_rate": 0.0008148809523809523,
1874
+ "loss": 0.8236,
1875
  "step": 311
1876
  },
1877
  {
1878
  "epoch": 8.91,
1879
+ "learning_rate": 0.0008142857142857143,
1880
+ "loss": 0.8609,
1881
  "step": 312
1882
  },
1883
  {
1884
  "epoch": 8.94,
1885
+ "learning_rate": 0.0008136904761904762,
1886
+ "loss": 0.8802,
1887
  "step": 313
1888
  },
1889
  {
1890
  "epoch": 8.97,
1891
+ "learning_rate": 0.0008130952380952381,
1892
+ "loss": 0.8615,
1893
  "step": 314
1894
  },
1895
  {
1896
  "epoch": 9.0,
1897
+ "learning_rate": 0.0008125000000000001,
1898
+ "loss": 0.8514,
1899
  "step": 315
1900
  },
1901
  {
1902
  "epoch": 9.03,
1903
+ "learning_rate": 0.000811904761904762,
1904
+ "loss": 0.5529,
1905
  "step": 316
1906
  },
1907
  {
1908
  "epoch": 9.06,
1909
+ "learning_rate": 0.0008113095238095238,
1910
+ "loss": 0.5736,
1911
  "step": 317
1912
  },
1913
  {
1914
  "epoch": 9.09,
1915
+ "learning_rate": 0.0008107142857142857,
1916
+ "loss": 0.5647,
1917
  "step": 318
1918
  },
1919
  {
1920
  "epoch": 9.11,
1921
+ "learning_rate": 0.0008101190476190477,
1922
+ "loss": 0.5677,
1923
  "step": 319
1924
  },
1925
  {
1926
  "epoch": 9.14,
1927
+ "learning_rate": 0.0008095238095238096,
1928
+ "loss": 0.5991,
1929
  "step": 320
1930
  },
1931
  {
1932
  "epoch": 9.17,
1933
+ "learning_rate": 0.0008089285714285714,
1934
+ "loss": 0.5666,
1935
  "step": 321
1936
  },
1937
  {
1938
  "epoch": 9.2,
1939
+ "learning_rate": 0.0008083333333333333,
1940
+ "loss": 0.5902,
1941
  "step": 322
1942
  },
1943
  {
1944
  "epoch": 9.23,
1945
+ "learning_rate": 0.0008077380952380953,
1946
+ "loss": 0.5961,
1947
  "step": 323
1948
  },
1949
  {
1950
  "epoch": 9.26,
1951
+ "learning_rate": 0.0008071428571428572,
1952
+ "loss": 0.5684,
1953
  "step": 324
1954
  },
1955
  {
1956
  "epoch": 9.29,
1957
+ "learning_rate": 0.0008065476190476191,
1958
+ "loss": 0.5976,
1959
  "step": 325
1960
  },
1961
  {
1962
  "epoch": 9.31,
1963
+ "learning_rate": 0.0008059523809523809,
1964
+ "loss": 0.6033,
1965
  "step": 326
1966
  },
1967
  {
1968
  "epoch": 9.34,
1969
+ "learning_rate": 0.0008053571428571429,
1970
+ "loss": 0.5877,
1971
  "step": 327
1972
  },
1973
  {
1974
  "epoch": 9.37,
1975
+ "learning_rate": 0.0008047619047619048,
1976
+ "loss": 0.5943,
1977
  "step": 328
1978
  },
1979
  {
1980
  "epoch": 9.4,
1981
+ "learning_rate": 0.0008041666666666667,
1982
+ "loss": 0.6176,
1983
  "step": 329
1984
  },
1985
  {
1986
  "epoch": 9.43,
1987
+ "learning_rate": 0.0008035714285714287,
1988
+ "loss": 0.6143,
1989
  "step": 330
1990
  },
1991
  {
1992
  "epoch": 9.46,
1993
+ "learning_rate": 0.0008029761904761905,
1994
+ "loss": 0.597,
1995
  "step": 331
1996
  },
1997
  {
1998
  "epoch": 9.49,
1999
+ "learning_rate": 0.0008023809523809524,
2000
+ "loss": 0.604,
2001
  "step": 332
2002
  },
2003
  {
2004
  "epoch": 9.51,
2005
+ "learning_rate": 0.0008017857142857143,
2006
+ "loss": 0.6036,
2007
  "step": 333
2008
  },
2009
  {
2010
  "epoch": 9.54,
2011
+ "learning_rate": 0.0008011904761904763,
2012
+ "loss": 0.6243,
2013
  "step": 334
2014
  },
2015
  {
2016
  "epoch": 9.57,
2017
+ "learning_rate": 0.0008005952380952382,
2018
+ "loss": 0.6301,
2019
  "step": 335
2020
  },
2021
  {
2022
  "epoch": 9.6,
2023
+ "learning_rate": 0.0008,
2024
+ "loss": 0.6271,
2025
  "step": 336
2026
  },
2027
  {
2028
  "epoch": 9.63,
2029
+ "learning_rate": 0.0007994047619047619,
2030
+ "loss": 0.6246,
2031
  "step": 337
2032
  },
2033
  {
2034
  "epoch": 9.66,
2035
+ "learning_rate": 0.0007988095238095239,
2036
+ "loss": 0.6597,
2037
  "step": 338
2038
  },
2039
  {
2040
  "epoch": 9.69,
2041
+ "learning_rate": 0.0007982142857142858,
2042
+ "loss": 0.6517,
2043
  "step": 339
2044
  },
2045
  {
2046
  "epoch": 9.71,
2047
+ "learning_rate": 0.0007976190476190477,
2048
+ "loss": 0.6645,
2049
  "step": 340
2050
  },
2051
  {
2052
  "epoch": 9.74,
2053
+ "learning_rate": 0.0007970238095238096,
2054
+ "loss": 0.6542,
2055
  "step": 341
2056
  },
2057
  {
2058
  "epoch": 9.77,
2059
+ "learning_rate": 0.0007964285714285714,
2060
+ "loss": 0.6496,
2061
  "step": 342
2062
  },
2063
  {
2064
  "epoch": 9.8,
2065
+ "learning_rate": 0.0007958333333333333,
2066
+ "loss": 0.6309,
2067
  "step": 343
2068
  },
2069
  {
2070
  "epoch": 9.83,
2071
+ "learning_rate": 0.0007952380952380952,
2072
+ "loss": 0.6668,
2073
  "step": 344
2074
  },
2075
  {
2076
  "epoch": 9.86,
2077
+ "learning_rate": 0.0007946428571428572,
2078
+ "loss": 0.6841,
2079
  "step": 345
2080
  },
2081
  {
2082
  "epoch": 9.89,
2083
+ "learning_rate": 0.000794047619047619,
2084
+ "loss": 0.6958,
2085
  "step": 346
2086
  },
2087
  {
2088
  "epoch": 9.91,
2089
+ "learning_rate": 0.0007934523809523809,
2090
+ "loss": 0.6592,
2091
  "step": 347
2092
  },
2093
  {
2094
  "epoch": 9.94,
2095
+ "learning_rate": 0.0007928571428571428,
2096
+ "loss": 0.6968,
2097
  "step": 348
2098
  },
2099
  {
2100
  "epoch": 9.97,
2101
+ "learning_rate": 0.0007922619047619048,
2102
+ "loss": 0.6916,
2103
  "step": 349
2104
  },
2105
  {
2106
  "epoch": 10.0,
2107
+ "learning_rate": 0.0007916666666666666,
2108
+ "loss": 0.7155,
2109
  "step": 350
2110
  },
2111
  {
2112
  "epoch": 10.03,
2113
+ "learning_rate": 0.0007910714285714285,
2114
+ "loss": 0.4288,
2115
  "step": 351
2116
  },
2117
  {
2118
  "epoch": 10.06,
2119
+ "learning_rate": 0.0007904761904761905,
2120
+ "loss": 0.4493,
2121
  "step": 352
2122
  },
2123
  {
2124
  "epoch": 10.09,
2125
+ "learning_rate": 0.0007898809523809524,
2126
+ "loss": 0.4152,
2127
  "step": 353
2128
  },
2129
  {
2130
  "epoch": 10.11,
2131
+ "learning_rate": 0.0007892857142857143,
2132
+ "loss": 0.4324,
2133
  "step": 354
2134
  },
2135
  {
2136
  "epoch": 10.14,
2137
+ "learning_rate": 0.0007886904761904761,
2138
+ "loss": 0.4334,
2139
  "step": 355
2140
  },
2141
  {
2142
  "epoch": 10.17,
2143
+ "learning_rate": 0.0007880952380952381,
2144
+ "loss": 0.4479,
2145
  "step": 356
2146
  },
2147
  {
2148
  "epoch": 10.2,
2149
+ "learning_rate": 0.0007875,
2150
+ "loss": 0.4391,
2151
  "step": 357
2152
  },
2153
  {
2154
  "epoch": 10.23,
2155
+ "learning_rate": 0.0007869047619047619,
2156
+ "loss": 0.4534,
2157
  "step": 358
2158
  },
2159
  {
2160
  "epoch": 10.26,
2161
+ "learning_rate": 0.0007863095238095238,
2162
+ "loss": 0.4494,
2163
  "step": 359
2164
  },
2165
  {
2166
  "epoch": 10.29,
2167
+ "learning_rate": 0.0007857142857142857,
2168
+ "loss": 0.4519,
2169
  "step": 360
2170
  },
2171
  {
2172
  "epoch": 10.31,
2173
+ "learning_rate": 0.0007851190476190476,
2174
+ "loss": 0.4673,
2175
  "step": 361
2176
  },
2177
  {
2178
  "epoch": 10.34,
2179
+ "learning_rate": 0.0007845238095238095,
2180
+ "loss": 0.4628,
2181
  "step": 362
2182
  },
2183
  {
2184
  "epoch": 10.37,
2185
+ "learning_rate": 0.0007839285714285715,
2186
+ "loss": 0.4608,
2187
  "step": 363
2188
  },
2189
  {
2190
  "epoch": 10.4,
2191
+ "learning_rate": 0.0007833333333333334,
2192
+ "loss": 0.4755,
2193
  "step": 364
2194
  },
2195
  {
2196
  "epoch": 10.43,
2197
+ "learning_rate": 0.0007827380952380952,
2198
+ "loss": 0.4771,
2199
  "step": 365
2200
  },
2201
  {
2202
  "epoch": 10.46,
2203
+ "learning_rate": 0.0007821428571428571,
2204
+ "loss": 0.4679,
2205
  "step": 366
2206
  },
2207
  {
2208
  "epoch": 10.49,
2209
+ "learning_rate": 0.0007815476190476191,
2210
+ "loss": 0.4985,
2211
  "step": 367
2212
  },
2213
  {
2214
  "epoch": 10.51,
2215
+ "learning_rate": 0.000780952380952381,
2216
+ "loss": 0.5242,
2217
  "step": 368
2218
  },
2219
  {
2220
  "epoch": 10.54,
2221
+ "learning_rate": 0.0007803571428571429,
2222
+ "loss": 0.478,
2223
  "step": 369
2224
  },
2225
  {
2226
  "epoch": 10.57,
2227
+ "learning_rate": 0.0007797619047619047,
2228
+ "loss": 0.5072,
2229
  "step": 370
2230
  },
2231
  {
2232
  "epoch": 10.6,
2233
+ "learning_rate": 0.0007791666666666667,
2234
+ "loss": 0.5001,
2235
  "step": 371
2236
  },
2237
  {
2238
  "epoch": 10.63,
2239
+ "learning_rate": 0.0007785714285714286,
2240
+ "loss": 0.5119,
2241
  "step": 372
2242
  },
2243
  {
2244
  "epoch": 10.66,
2245
+ "learning_rate": 0.0007779761904761905,
2246
+ "loss": 0.5212,
2247
  "step": 373
2248
  },
2249
  {
2250
  "epoch": 10.69,
2251
+ "learning_rate": 0.0007773809523809525,
2252
+ "loss": 0.5073,
2253
  "step": 374
2254
  },
2255
  {
2256
  "epoch": 10.71,
2257
+ "learning_rate": 0.0007767857142857143,
2258
+ "loss": 0.5089,
2259
  "step": 375
2260
  },
2261
  {
2262
  "epoch": 10.74,
2263
+ "learning_rate": 0.0007761904761904762,
2264
+ "loss": 0.5161,
2265
  "step": 376
2266
  },
2267
  {
2268
  "epoch": 10.77,
2269
+ "learning_rate": 0.0007755952380952381,
2270
+ "loss": 0.4861,
2271
  "step": 377
2272
  },
2273
  {
2274
  "epoch": 10.8,
2275
+ "learning_rate": 0.0007750000000000001,
2276
+ "loss": 0.531,
2277
  "step": 378
2278
  },
2279
  {
2280
  "epoch": 10.83,
2281
+ "learning_rate": 0.000774404761904762,
2282
+ "loss": 0.5244,
2283
  "step": 379
2284
  },
2285
  {
2286
  "epoch": 10.86,
2287
+ "learning_rate": 0.0007738095238095238,
2288
+ "loss": 0.5446,
2289
  "step": 380
2290
  },
2291
  {
2292
  "epoch": 10.89,
2293
+ "learning_rate": 0.0007732142857142857,
2294
+ "loss": 0.5515,
2295
  "step": 381
2296
  },
2297
  {
2298
  "epoch": 10.91,
2299
+ "learning_rate": 0.0007726190476190477,
2300
+ "loss": 0.5345,
2301
  "step": 382
2302
  },
2303
  {
2304
  "epoch": 10.94,
2305
+ "learning_rate": 0.0007720238095238096,
2306
+ "loss": 0.537,
2307
  "step": 383
2308
  },
2309
  {
2310
  "epoch": 10.97,
2311
+ "learning_rate": 0.0007714285714285715,
2312
+ "loss": 0.5589,
2313
  "step": 384
2314
  },
2315
  {
2316
  "epoch": 11.0,
2317
+ "learning_rate": 0.0007708333333333334,
2318
+ "loss": 0.5459,
2319
  "step": 385
2320
  },
2321
  {
2322
  "epoch": 11.03,
2323
+ "learning_rate": 0.0007702380952380953,
2324
+ "loss": 0.3344,
2325
  "step": 386
2326
  },
2327
  {
2328
  "epoch": 11.06,
2329
+ "learning_rate": 0.0007696428571428572,
2330
+ "loss": 0.3352,
2331
  "step": 387
2332
  },
2333
  {
2334
  "epoch": 11.09,
2335
+ "learning_rate": 0.0007690476190476191,
2336
+ "loss": 0.3263,
2337
  "step": 388
2338
  },
2339
  {
2340
  "epoch": 11.11,
2341
+ "learning_rate": 0.0007684523809523811,
2342
+ "loss": 0.3501,
2343
  "step": 389
2344
  },
2345
  {
2346
  "epoch": 11.14,
2347
+ "learning_rate": 0.0007678571428571429,
2348
+ "loss": 0.3523,
2349
  "step": 390
2350
  },
2351
  {
2352
  "epoch": 11.17,
2353
+ "learning_rate": 0.0007672619047619048,
2354
+ "loss": 0.3379,
2355
  "step": 391
2356
  },
2357
  {
2358
  "epoch": 11.2,
2359
+ "learning_rate": 0.0007666666666666667,
2360
+ "loss": 0.3456,
2361
  "step": 392
2362
  },
2363
  {
2364
  "epoch": 11.23,
2365
+ "learning_rate": 0.0007660714285714287,
2366
+ "loss": 0.347,
2367
  "step": 393
2368
  },
2369
  {
2370
  "epoch": 11.26,
2371
+ "learning_rate": 0.0007654761904761904,
2372
+ "loss": 0.3622,
2373
  "step": 394
2374
  },
2375
  {
2376
  "epoch": 11.29,
2377
+ "learning_rate": 0.0007648809523809523,
2378
+ "loss": 0.3612,
2379
  "step": 395
2380
  },
2381
  {
2382
  "epoch": 11.31,
2383
+ "learning_rate": 0.0007642857142857142,
2384
+ "loss": 0.3789,
2385
  "step": 396
2386
  },
2387
  {
2388
  "epoch": 11.34,
2389
+ "learning_rate": 0.0007636904761904762,
2390
+ "loss": 0.3491,
2391
  "step": 397
2392
  },
2393
  {
2394
  "epoch": 11.37,
2395
+ "learning_rate": 0.0007630952380952381,
2396
+ "loss": 0.3578,
2397
  "step": 398
2398
  },
2399
  {
2400
  "epoch": 11.4,
2401
+ "learning_rate": 0.0007624999999999999,
2402
+ "loss": 0.3524,
2403
  "step": 399
2404
  },
2405
  {
2406
  "epoch": 11.43,
2407
+ "learning_rate": 0.0007619047619047619,
2408
+ "loss": 0.3671,
2409
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2410
  }
2411
  ],
2412
  "logging_steps": 1,
2413
  "max_steps": 1680,
2414
  "num_train_epochs": 48,
2415
  "save_steps": 100,
2416
+ "total_flos": 2.3080655169481728e+17,
2417
  "trial_name": null,
2418
  "trial_params": null
2419
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8697dec26118787553757d80401d29ed990405410ca911afb8c282e3da6934e3
3
- size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b8d3d0c26b3d7e124a186eae5c4ec193759b71982a0e47131ab43fa25d3a439
3
+ size 4155