jimypbr commited on
Commit
67386e3
1 Parent(s): b81cd13

End of training

Browse files
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_exact_match": 87.01040681173131,
4
- "eval_f1": 92.97352589089498,
5
  "eval_samples": 10788,
6
- "train_loss": 1.088201597354519,
7
- "train_runtime": 725.1829,
8
  "train_samples": 88550,
9
- "train_samples_per_second": 244.214,
10
- "train_steps_per_second": 1.906
11
  }
1
  {
2
  "epoch": 2.0,
3
+ "eval_exact_match": 87.00094607379376,
4
+ "eval_f1": 93.1162823709701,
5
  "eval_samples": 10788,
6
+ "train_loss": 1.0848320548337724,
7
+ "train_runtime": 1094.3482,
8
  "train_samples": 88550,
9
+ "train_samples_per_second": 161.831,
10
+ "train_steps_per_second": 1.263
11
  }
eval_nbest_predictions.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6db3c582affb4fc1a6cccc9f461721100b335e4094fc6f4b5708b14e49d3cd2
3
- size 45927734
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb0aa620a0b1ec322f781d7a9df316ec3cea4c9fa0b4162f942e5b3e1f8b1e8d
3
+ size 45928944
eval_predictions.json CHANGED
The diff for this file is too large to render. See raw diff
eval_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_exact_match": 87.01040681173131,
4
- "eval_f1": 92.97352589089498,
5
  "eval_samples": 10788
6
  }
1
  {
2
  "epoch": 2.0,
3
+ "eval_exact_match": 87.00094607379376,
4
+ "eval_f1": 93.1162823709701,
5
  "eval_samples": 10788
6
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af932ffb7604732af50a824ab59a5b609d3327fcc26203ca5c10d44652fff9ca
3
  size 277274547
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ee26c6d455fb91793d4a3bc2e0206194107de7f421eaffeb1cc2df085bd7cd0
3
  size 277274547
runs/Jun22_10-14-47_gbnwp-pod015-2.ipu.graphcore.ai/events.out.tfevents.1655890263.gbnwp-pod015-2.ipu.graphcore.ai CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4535428b7b57a3488b1e5ad6a7187a628ba59f4314082a3c0b42d2595ab29c75
3
- size 18621
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a22a96511a82a943f72d21a239dc1bbab02399201b4683a7baebc3517bbb2c
3
+ size 24991
runs/Jun22_10-14-47_gbnwp-pod015-2.ipu.graphcore.ai/events.out.tfevents.1655891928.gbnwp-pod015-2.ipu.graphcore.ai ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fbcdf9519fefda9471bd7a2219914dcdb23e3896953111de96bebc162112f42
3
+ size 40
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 1.088201597354519,
4
- "train_runtime": 725.1829,
5
  "train_samples": 88550,
6
- "train_samples_per_second": 244.214,
7
- "train_steps_per_second": 1.906
8
  }
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 1.0848320548337724,
4
+ "train_runtime": 1094.3482,
5
  "train_samples": 88550,
6
+ "train_samples_per_second": 161.831,
7
+ "train_steps_per_second": 1.263
8
  }
trainer_state.json CHANGED
@@ -10,839 +10,839 @@
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 1.7341040462427746e-06,
13
- "loss": 6.1875,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.03,
18
  "learning_rate": 3.468208092485549e-06,
19
- "loss": 5.6434,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.04,
24
  "learning_rate": 5.202312138728324e-06,
25
- "loss": 5.1445,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.06,
30
  "learning_rate": 6.936416184971098e-06,
31
- "loss": 4.9082,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.07,
36
  "learning_rate": 8.670520231213873e-06,
37
- "loss": 4.6742,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.09,
42
  "learning_rate": 1.0404624277456647e-05,
43
- "loss": 4.0891,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.1,
48
  "learning_rate": 1.2138728323699422e-05,
49
- "loss": 3.7152,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.12,
54
  "learning_rate": 1.3872832369942197e-05,
55
- "loss": 3.3334,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.13,
60
  "learning_rate": 1.560693641618497e-05,
61
- "loss": 2.2468,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.14,
66
  "learning_rate": 1.7341040462427746e-05,
67
- "loss": 1.7937,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.16,
72
  "learning_rate": 1.9075144508670522e-05,
73
- "loss": 1.8321,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.17,
78
  "learning_rate": 2.0809248554913295e-05,
79
- "loss": 1.4568,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.19,
84
  "learning_rate": 2.2543352601156068e-05,
85
- "loss": 1.3031,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.2,
90
  "learning_rate": 2.4277456647398844e-05,
91
- "loss": 1.0809,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.22,
96
  "learning_rate": 2.601156069364162e-05,
97
- "loss": 1.4456,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.23,
102
  "learning_rate": 2.7745664739884393e-05,
103
- "loss": 1.5199,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.25,
108
  "learning_rate": 2.9479768786127166e-05,
109
- "loss": 0.9764,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.26,
114
  "learning_rate": 3.121387283236994e-05,
115
- "loss": 1.2064,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.27,
120
  "learning_rate": 3.294797687861272e-05,
121
- "loss": 1.0915,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.29,
126
  "learning_rate": 3.468208092485549e-05,
127
- "loss": 1.3032,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.3,
132
  "learning_rate": 3.6416184971098265e-05,
133
- "loss": 1.0709,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.32,
138
  "learning_rate": 3.8150289017341044e-05,
139
- "loss": 1.6387,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.33,
144
  "learning_rate": 3.988439306358381e-05,
145
- "loss": 1.2317,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.35,
150
  "learning_rate": 4.161849710982659e-05,
151
- "loss": 1.0637,
152
  "step": 240
153
  },
154
  {
155
  "epoch": 0.36,
156
  "learning_rate": 4.335260115606937e-05,
157
- "loss": 1.0287,
158
  "step": 250
159
  },
160
  {
161
  "epoch": 0.38,
162
  "learning_rate": 4.5086705202312136e-05,
163
- "loss": 1.074,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.39,
168
  "learning_rate": 4.6820809248554915e-05,
169
- "loss": 0.868,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.41,
174
  "learning_rate": 4.855491329479769e-05,
175
- "loss": 0.8482,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.42,
180
  "learning_rate": 5.028901734104046e-05,
181
- "loss": 1.2787,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.43,
186
  "learning_rate": 5.202312138728324e-05,
187
- "loss": 1.1221,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.45,
192
  "learning_rate": 5.3757225433526014e-05,
193
- "loss": 0.9836,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.46,
198
  "learning_rate": 5.5491329479768787e-05,
199
- "loss": 1.5154,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.48,
204
  "learning_rate": 5.7225433526011566e-05,
205
- "loss": 0.9228,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.49,
210
  "learning_rate": 5.895953757225433e-05,
211
- "loss": 0.5593,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 0.51,
216
  "learning_rate": 5.976833976833977e-05,
217
- "loss": 0.8908,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 0.52,
222
  "learning_rate": 5.9189189189189195e-05,
223
- "loss": 0.9526,
224
  "step": 360
225
  },
226
  {
227
  "epoch": 0.54,
228
  "learning_rate": 5.861003861003862e-05,
229
- "loss": 1.2775,
230
  "step": 370
231
  },
232
  {
233
  "epoch": 0.55,
234
  "learning_rate": 5.8030888030888026e-05,
235
- "loss": 1.1524,
236
  "step": 380
237
  },
238
  {
239
  "epoch": 0.56,
240
  "learning_rate": 5.745173745173745e-05,
241
- "loss": 1.2414,
242
  "step": 390
243
  },
244
  {
245
  "epoch": 0.58,
246
  "learning_rate": 5.687258687258687e-05,
247
- "loss": 1.1137,
248
  "step": 400
249
  },
250
  {
251
  "epoch": 0.59,
252
  "learning_rate": 5.629343629343629e-05,
253
- "loss": 0.9212,
254
  "step": 410
255
  },
256
  {
257
  "epoch": 0.61,
258
  "learning_rate": 5.5714285714285715e-05,
259
- "loss": 1.0503,
260
  "step": 420
261
  },
262
  {
263
  "epoch": 0.62,
264
  "learning_rate": 5.513513513513514e-05,
265
- "loss": 0.9738,
266
  "step": 430
267
  },
268
  {
269
  "epoch": 0.64,
270
  "learning_rate": 5.455598455598455e-05,
271
- "loss": 1.2703,
272
  "step": 440
273
  },
274
  {
275
  "epoch": 0.65,
276
  "learning_rate": 5.3976833976833975e-05,
277
- "loss": 1.086,
278
  "step": 450
279
  },
280
  {
281
  "epoch": 0.67,
282
  "learning_rate": 5.33976833976834e-05,
283
- "loss": 0.8879,
284
  "step": 460
285
  },
286
  {
287
  "epoch": 0.68,
288
  "learning_rate": 5.281853281853282e-05,
289
- "loss": 0.8404,
290
  "step": 470
291
  },
292
  {
293
  "epoch": 0.69,
294
  "learning_rate": 5.223938223938224e-05,
295
- "loss": 0.775,
296
  "step": 480
297
  },
298
  {
299
  "epoch": 0.71,
300
  "learning_rate": 5.166023166023166e-05,
301
- "loss": 1.2602,
302
  "step": 490
303
  },
304
  {
305
  "epoch": 0.72,
306
  "learning_rate": 5.108108108108108e-05,
307
- "loss": 1.2931,
308
  "step": 500
309
  },
310
  {
311
  "epoch": 0.74,
312
  "learning_rate": 5.05019305019305e-05,
313
- "loss": 1.1383,
314
  "step": 510
315
  },
316
  {
317
  "epoch": 0.75,
318
  "learning_rate": 4.9922779922779924e-05,
319
- "loss": 0.7263,
320
  "step": 520
321
  },
322
  {
323
  "epoch": 0.77,
324
  "learning_rate": 4.9343629343629346e-05,
325
- "loss": 0.5444,
326
  "step": 530
327
  },
328
  {
329
  "epoch": 0.78,
330
  "learning_rate": 4.876447876447877e-05,
331
- "loss": 0.8266,
332
  "step": 540
333
  },
334
  {
335
  "epoch": 0.8,
336
  "learning_rate": 4.8185328185328184e-05,
337
- "loss": 1.0101,
338
  "step": 550
339
  },
340
  {
341
  "epoch": 0.81,
342
  "learning_rate": 4.7606177606177606e-05,
343
- "loss": 0.8983,
344
  "step": 560
345
  },
346
  {
347
  "epoch": 0.82,
348
  "learning_rate": 4.702702702702703e-05,
349
- "loss": 0.5349,
350
  "step": 570
351
  },
352
  {
353
  "epoch": 0.84,
354
  "learning_rate": 4.644787644787645e-05,
355
- "loss": 0.7786,
356
  "step": 580
357
  },
358
  {
359
  "epoch": 0.85,
360
  "learning_rate": 4.586872586872587e-05,
361
- "loss": 0.8275,
362
  "step": 590
363
  },
364
  {
365
  "epoch": 0.87,
366
  "learning_rate": 4.5289575289575295e-05,
367
- "loss": 0.8844,
368
  "step": 600
369
  },
370
  {
371
  "epoch": 0.88,
372
  "learning_rate": 4.471042471042471e-05,
373
- "loss": 1.1885,
374
  "step": 610
375
  },
376
  {
377
  "epoch": 0.9,
378
  "learning_rate": 4.413127413127413e-05,
379
- "loss": 0.8453,
380
  "step": 620
381
  },
382
  {
383
  "epoch": 0.91,
384
  "learning_rate": 4.3552123552123555e-05,
385
- "loss": 0.7368,
386
  "step": 630
387
  },
388
  {
389
  "epoch": 0.93,
390
  "learning_rate": 4.297297297297298e-05,
391
- "loss": 1.0366,
392
  "step": 640
393
  },
394
  {
395
  "epoch": 0.94,
396
  "learning_rate": 4.23938223938224e-05,
397
- "loss": 0.4579,
398
  "step": 650
399
  },
400
  {
401
  "epoch": 0.96,
402
  "learning_rate": 4.181467181467182e-05,
403
- "loss": 0.873,
404
  "step": 660
405
  },
406
  {
407
  "epoch": 0.97,
408
  "learning_rate": 4.123552123552124e-05,
409
- "loss": 1.0861,
410
  "step": 670
411
  },
412
  {
413
  "epoch": 0.98,
414
  "learning_rate": 4.065637065637066e-05,
415
- "loss": 0.7572,
416
  "step": 680
417
  },
418
  {
419
  "epoch": 1.0,
420
  "learning_rate": 4.0077220077220075e-05,
421
- "loss": 0.7174,
422
  "step": 690
423
  },
424
  {
425
  "epoch": 1.01,
426
  "learning_rate": 3.94980694980695e-05,
427
- "loss": 0.4577,
428
  "step": 700
429
  },
430
  {
431
  "epoch": 1.03,
432
  "learning_rate": 3.891891891891892e-05,
433
- "loss": 0.6622,
434
  "step": 710
435
  },
436
  {
437
  "epoch": 1.04,
438
  "learning_rate": 3.833976833976834e-05,
439
- "loss": 0.3438,
440
  "step": 720
441
  },
442
  {
443
  "epoch": 1.06,
444
  "learning_rate": 3.776061776061776e-05,
445
- "loss": 0.7274,
446
  "step": 730
447
  },
448
  {
449
  "epoch": 1.07,
450
  "learning_rate": 3.718146718146718e-05,
451
- "loss": 0.6925,
452
  "step": 740
453
  },
454
  {
455
  "epoch": 1.09,
456
  "learning_rate": 3.66023166023166e-05,
457
- "loss": 0.6156,
458
  "step": 750
459
  },
460
  {
461
  "epoch": 1.1,
462
  "learning_rate": 3.6023166023166024e-05,
463
- "loss": 0.8509,
464
  "step": 760
465
  },
466
  {
467
  "epoch": 1.11,
468
  "learning_rate": 3.5444015444015446e-05,
469
- "loss": 0.5898,
470
  "step": 770
471
  },
472
  {
473
  "epoch": 1.13,
474
  "learning_rate": 3.486486486486486e-05,
475
- "loss": 0.7896,
476
  "step": 780
477
  },
478
  {
479
  "epoch": 1.14,
480
  "learning_rate": 3.4285714285714284e-05,
481
- "loss": 0.6095,
482
  "step": 790
483
  },
484
  {
485
  "epoch": 1.16,
486
  "learning_rate": 3.3706563706563706e-05,
487
- "loss": 0.7593,
488
  "step": 800
489
  },
490
  {
491
  "epoch": 1.17,
492
  "learning_rate": 3.312741312741313e-05,
493
- "loss": 0.7934,
494
  "step": 810
495
  },
496
  {
497
  "epoch": 1.19,
498
  "learning_rate": 3.254826254826255e-05,
499
- "loss": 0.6189,
500
  "step": 820
501
  },
502
  {
503
  "epoch": 1.2,
504
  "learning_rate": 3.196911196911197e-05,
505
- "loss": 0.8726,
506
  "step": 830
507
  },
508
  {
509
  "epoch": 1.22,
510
  "learning_rate": 3.138996138996139e-05,
511
- "loss": 1.0715,
512
  "step": 840
513
  },
514
  {
515
  "epoch": 1.23,
516
  "learning_rate": 3.081081081081081e-05,
517
- "loss": 0.8481,
518
  "step": 850
519
  },
520
  {
521
  "epoch": 1.24,
522
  "learning_rate": 3.0231660231660233e-05,
523
- "loss": 0.906,
524
  "step": 860
525
  },
526
  {
527
  "epoch": 1.26,
528
  "learning_rate": 2.9652509652509655e-05,
529
- "loss": 0.727,
530
  "step": 870
531
  },
532
  {
533
  "epoch": 1.27,
534
  "learning_rate": 2.9073359073359074e-05,
535
- "loss": 1.1817,
536
  "step": 880
537
  },
538
  {
539
  "epoch": 1.29,
540
  "learning_rate": 2.8494208494208496e-05,
541
- "loss": 0.8072,
542
  "step": 890
543
  },
544
  {
545
  "epoch": 1.3,
546
  "learning_rate": 2.7915057915057918e-05,
547
- "loss": 0.9308,
548
  "step": 900
549
  },
550
  {
551
  "epoch": 1.32,
552
  "learning_rate": 2.7335907335907337e-05,
553
- "loss": 0.2952,
554
  "step": 910
555
  },
556
  {
557
  "epoch": 1.33,
558
  "learning_rate": 2.6756756756756756e-05,
559
- "loss": 0.7024,
560
  "step": 920
561
  },
562
  {
563
  "epoch": 1.35,
564
  "learning_rate": 2.6177606177606178e-05,
565
- "loss": 0.6619,
566
  "step": 930
567
  },
568
  {
569
  "epoch": 1.36,
570
  "learning_rate": 2.5598455598455597e-05,
571
- "loss": 0.5304,
572
  "step": 940
573
  },
574
  {
575
  "epoch": 1.37,
576
  "learning_rate": 2.501930501930502e-05,
577
- "loss": 0.4559,
578
  "step": 950
579
  },
580
  {
581
  "epoch": 1.39,
582
  "learning_rate": 2.444015444015444e-05,
583
- "loss": 0.7422,
584
  "step": 960
585
  },
586
  {
587
  "epoch": 1.4,
588
  "learning_rate": 2.386100386100386e-05,
589
- "loss": 0.7257,
590
  "step": 970
591
  },
592
  {
593
  "epoch": 1.42,
594
  "learning_rate": 2.3281853281853282e-05,
595
- "loss": 0.6573,
596
  "step": 980
597
  },
598
  {
599
  "epoch": 1.43,
600
  "learning_rate": 2.2702702702702705e-05,
601
- "loss": 0.7282,
602
  "step": 990
603
  },
604
  {
605
  "epoch": 1.45,
606
  "learning_rate": 2.2123552123552123e-05,
607
- "loss": 0.6065,
608
  "step": 1000
609
  },
610
  {
611
  "epoch": 1.46,
612
  "learning_rate": 2.1544401544401546e-05,
613
- "loss": 0.5585,
614
  "step": 1010
615
  },
616
  {
617
  "epoch": 1.48,
618
  "learning_rate": 2.0965250965250968e-05,
619
- "loss": 0.8342,
620
  "step": 1020
621
  },
622
  {
623
  "epoch": 1.49,
624
  "learning_rate": 2.0386100386100387e-05,
625
- "loss": 0.7599,
626
  "step": 1030
627
  },
628
  {
629
  "epoch": 1.51,
630
  "learning_rate": 1.980694980694981e-05,
631
- "loss": 0.7969,
632
  "step": 1040
633
  },
634
  {
635
  "epoch": 1.52,
636
  "learning_rate": 1.922779922779923e-05,
637
- "loss": 0.5474,
638
  "step": 1050
639
  },
640
  {
641
  "epoch": 1.53,
642
  "learning_rate": 1.864864864864865e-05,
643
- "loss": 0.8421,
644
  "step": 1060
645
  },
646
  {
647
  "epoch": 1.55,
648
  "learning_rate": 1.806949806949807e-05,
649
- "loss": 0.565,
650
  "step": 1070
651
  },
652
  {
653
  "epoch": 1.56,
654
  "learning_rate": 1.7490347490347488e-05,
655
- "loss": 0.3672,
656
  "step": 1080
657
  },
658
  {
659
  "epoch": 1.58,
660
  "learning_rate": 1.691119691119691e-05,
661
- "loss": 0.505,
662
  "step": 1090
663
  },
664
  {
665
  "epoch": 1.59,
666
  "learning_rate": 1.6332046332046332e-05,
667
- "loss": 0.5057,
668
  "step": 1100
669
  },
670
  {
671
  "epoch": 1.61,
672
  "learning_rate": 1.575289575289575e-05,
673
- "loss": 0.7104,
674
  "step": 1110
675
  },
676
  {
677
  "epoch": 1.62,
678
  "learning_rate": 1.5173745173745173e-05,
679
- "loss": 0.6253,
680
  "step": 1120
681
  },
682
  {
683
  "epoch": 1.64,
684
  "learning_rate": 1.4594594594594596e-05,
685
- "loss": 0.9309,
686
  "step": 1130
687
  },
688
  {
689
  "epoch": 1.65,
690
  "learning_rate": 1.4015444015444016e-05,
691
- "loss": 0.7422,
692
  "step": 1140
693
  },
694
  {
695
  "epoch": 1.66,
696
  "learning_rate": 1.3436293436293437e-05,
697
- "loss": 0.6471,
698
  "step": 1150
699
  },
700
  {
701
  "epoch": 1.68,
702
  "learning_rate": 1.2857142857142857e-05,
703
- "loss": 0.7338,
704
  "step": 1160
705
  },
706
  {
707
  "epoch": 1.69,
708
  "learning_rate": 1.227799227799228e-05,
709
- "loss": 0.874,
710
  "step": 1170
711
  },
712
  {
713
  "epoch": 1.71,
714
  "learning_rate": 1.1698841698841698e-05,
715
- "loss": 0.7624,
716
  "step": 1180
717
  },
718
  {
719
  "epoch": 1.72,
720
  "learning_rate": 1.1119691119691119e-05,
721
- "loss": 0.7777,
722
  "step": 1190
723
  },
724
  {
725
  "epoch": 1.74,
726
  "learning_rate": 1.0540540540540541e-05,
727
- "loss": 0.8091,
728
  "step": 1200
729
  },
730
  {
731
  "epoch": 1.75,
732
  "learning_rate": 9.961389961389962e-06,
733
- "loss": 0.9177,
734
  "step": 1210
735
  },
736
  {
737
  "epoch": 1.77,
738
  "learning_rate": 9.382239382239382e-06,
739
- "loss": 1.0223,
740
  "step": 1220
741
  },
742
  {
743
  "epoch": 1.78,
744
  "learning_rate": 8.803088803088804e-06,
745
- "loss": 0.2531,
746
  "step": 1230
747
  },
748
  {
749
  "epoch": 1.79,
750
  "learning_rate": 8.223938223938225e-06,
751
- "loss": 0.6171,
752
  "step": 1240
753
  },
754
  {
755
  "epoch": 1.81,
756
  "learning_rate": 7.644787644787645e-06,
757
- "loss": 0.6996,
758
  "step": 1250
759
  },
760
  {
761
  "epoch": 1.82,
762
  "learning_rate": 7.065637065637066e-06,
763
- "loss": 0.5205,
764
  "step": 1260
765
  },
766
  {
767
  "epoch": 1.84,
768
  "learning_rate": 6.486486486486487e-06,
769
- "loss": 0.65,
770
  "step": 1270
771
  },
772
  {
773
  "epoch": 1.85,
774
  "learning_rate": 5.907335907335907e-06,
775
- "loss": 0.5435,
776
  "step": 1280
777
  },
778
  {
779
  "epoch": 1.87,
780
  "learning_rate": 5.3281853281853285e-06,
781
- "loss": 0.5957,
782
  "step": 1290
783
  },
784
  {
785
  "epoch": 1.88,
786
  "learning_rate": 4.749034749034749e-06,
787
- "loss": 0.5854,
788
  "step": 1300
789
  },
790
  {
791
  "epoch": 1.9,
792
  "learning_rate": 4.1698841698841696e-06,
793
- "loss": 0.779,
794
  "step": 1310
795
  },
796
  {
797
  "epoch": 1.91,
798
  "learning_rate": 3.5907335907335905e-06,
799
- "loss": 0.4551,
800
  "step": 1320
801
  },
802
  {
803
  "epoch": 1.92,
804
  "learning_rate": 3.0115830115830115e-06,
805
- "loss": 0.7719,
806
  "step": 1330
807
  },
808
  {
809
  "epoch": 1.94,
810
  "learning_rate": 2.4324324324324325e-06,
811
- "loss": 0.5182,
812
  "step": 1340
813
  },
814
  {
815
  "epoch": 1.95,
816
  "learning_rate": 1.8532818532818534e-06,
817
- "loss": 0.6974,
818
  "step": 1350
819
  },
820
  {
821
  "epoch": 1.97,
822
  "learning_rate": 1.2741312741312742e-06,
823
- "loss": 0.5345,
824
  "step": 1360
825
  },
826
  {
827
  "epoch": 1.98,
828
  "learning_rate": 6.949806949806949e-07,
829
- "loss": 0.6196,
830
  "step": 1370
831
  },
832
  {
833
  "epoch": 2.0,
834
  "learning_rate": 1.1583011583011584e-07,
835
- "loss": 0.6318,
836
  "step": 1380
837
  },
838
  {
839
  "epoch": 2.0,
840
  "step": 1382,
841
  "total_flos": 5.175834887457866e+18,
842
- "train_loss": 1.088201597354519,
843
- "train_runtime": 725.1829,
844
- "train_samples_per_second": 244.214,
845
- "train_steps_per_second": 1.906
846
  }
847
  ],
848
  "max_steps": 1382,
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 1.7341040462427746e-06,
13
+ "loss": 6.193,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.03,
18
  "learning_rate": 3.468208092485549e-06,
19
+ "loss": 5.641,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.04,
24
  "learning_rate": 5.202312138728324e-06,
25
+ "loss": 5.1531,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.06,
30
  "learning_rate": 6.936416184971098e-06,
31
+ "loss": 4.9047,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.07,
36
  "learning_rate": 8.670520231213873e-06,
37
+ "loss": 4.6805,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.09,
42
  "learning_rate": 1.0404624277456647e-05,
43
+ "loss": 4.0871,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.1,
48
  "learning_rate": 1.2138728323699422e-05,
49
+ "loss": 3.7475,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.12,
54
  "learning_rate": 1.3872832369942197e-05,
55
+ "loss": 3.3551,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.13,
60
  "learning_rate": 1.560693641618497e-05,
61
+ "loss": 2.201,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.14,
66
  "learning_rate": 1.7341040462427746e-05,
67
+ "loss": 1.7477,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.16,
72
  "learning_rate": 1.9075144508670522e-05,
73
+ "loss": 1.8617,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.17,
78
  "learning_rate": 2.0809248554913295e-05,
79
+ "loss": 1.4377,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.19,
84
  "learning_rate": 2.2543352601156068e-05,
85
+ "loss": 1.3061,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.2,
90
  "learning_rate": 2.4277456647398844e-05,
91
+ "loss": 1.0828,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.22,
96
  "learning_rate": 2.601156069364162e-05,
97
+ "loss": 1.4315,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.23,
102
  "learning_rate": 2.7745664739884393e-05,
103
+ "loss": 1.5473,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.25,
108
  "learning_rate": 2.9479768786127166e-05,
109
+ "loss": 1.0229,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.26,
114
  "learning_rate": 3.121387283236994e-05,
115
+ "loss": 1.1775,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 0.27,
120
  "learning_rate": 3.294797687861272e-05,
121
+ "loss": 1.1478,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 0.29,
126
  "learning_rate": 3.468208092485549e-05,
127
+ "loss": 1.3083,
128
  "step": 200
129
  },
130
  {
131
  "epoch": 0.3,
132
  "learning_rate": 3.6416184971098265e-05,
133
+ "loss": 1.087,
134
  "step": 210
135
  },
136
  {
137
  "epoch": 0.32,
138
  "learning_rate": 3.8150289017341044e-05,
139
+ "loss": 1.6076,
140
  "step": 220
141
  },
142
  {
143
  "epoch": 0.33,
144
  "learning_rate": 3.988439306358381e-05,
145
+ "loss": 1.2098,
146
  "step": 230
147
  },
148
  {
149
  "epoch": 0.35,
150
  "learning_rate": 4.161849710982659e-05,
151
+ "loss": 1.12,
152
  "step": 240
153
  },
154
  {
155
  "epoch": 0.36,
156
  "learning_rate": 4.335260115606937e-05,
157
+ "loss": 1.0021,
158
  "step": 250
159
  },
160
  {
161
  "epoch": 0.38,
162
  "learning_rate": 4.5086705202312136e-05,
163
+ "loss": 1.0726,
164
  "step": 260
165
  },
166
  {
167
  "epoch": 0.39,
168
  "learning_rate": 4.6820809248554915e-05,
169
+ "loss": 0.9225,
170
  "step": 270
171
  },
172
  {
173
  "epoch": 0.41,
174
  "learning_rate": 4.855491329479769e-05,
175
+ "loss": 0.9474,
176
  "step": 280
177
  },
178
  {
179
  "epoch": 0.42,
180
  "learning_rate": 5.028901734104046e-05,
181
+ "loss": 1.2694,
182
  "step": 290
183
  },
184
  {
185
  "epoch": 0.43,
186
  "learning_rate": 5.202312138728324e-05,
187
+ "loss": 1.0835,
188
  "step": 300
189
  },
190
  {
191
  "epoch": 0.45,
192
  "learning_rate": 5.3757225433526014e-05,
193
+ "loss": 0.9174,
194
  "step": 310
195
  },
196
  {
197
  "epoch": 0.46,
198
  "learning_rate": 5.5491329479768787e-05,
199
+ "loss": 1.5421,
200
  "step": 320
201
  },
202
  {
203
  "epoch": 0.48,
204
  "learning_rate": 5.7225433526011566e-05,
205
+ "loss": 0.9618,
206
  "step": 330
207
  },
208
  {
209
  "epoch": 0.49,
210
  "learning_rate": 5.895953757225433e-05,
211
+ "loss": 0.5463,
212
  "step": 340
213
  },
214
  {
215
  "epoch": 0.51,
216
  "learning_rate": 5.976833976833977e-05,
217
+ "loss": 0.8392,
218
  "step": 350
219
  },
220
  {
221
  "epoch": 0.52,
222
  "learning_rate": 5.9189189189189195e-05,
223
+ "loss": 0.9226,
224
  "step": 360
225
  },
226
  {
227
  "epoch": 0.54,
228
  "learning_rate": 5.861003861003862e-05,
229
+ "loss": 1.3196,
230
  "step": 370
231
  },
232
  {
233
  "epoch": 0.55,
234
  "learning_rate": 5.8030888030888026e-05,
235
+ "loss": 1.1419,
236
  "step": 380
237
  },
238
  {
239
  "epoch": 0.56,
240
  "learning_rate": 5.745173745173745e-05,
241
+ "loss": 1.3268,
242
  "step": 390
243
  },
244
  {
245
  "epoch": 0.58,
246
  "learning_rate": 5.687258687258687e-05,
247
+ "loss": 0.9793,
248
  "step": 400
249
  },
250
  {
251
  "epoch": 0.59,
252
  "learning_rate": 5.629343629343629e-05,
253
+ "loss": 0.8298,
254
  "step": 410
255
  },
256
  {
257
  "epoch": 0.61,
258
  "learning_rate": 5.5714285714285715e-05,
259
+ "loss": 1.0074,
260
  "step": 420
261
  },
262
  {
263
  "epoch": 0.62,
264
  "learning_rate": 5.513513513513514e-05,
265
+ "loss": 0.8881,
266
  "step": 430
267
  },
268
  {
269
  "epoch": 0.64,
270
  "learning_rate": 5.455598455598455e-05,
271
+ "loss": 1.2019,
272
  "step": 440
273
  },
274
  {
275
  "epoch": 0.65,
276
  "learning_rate": 5.3976833976833975e-05,
277
+ "loss": 0.9811,
278
  "step": 450
279
  },
280
  {
281
  "epoch": 0.67,
282
  "learning_rate": 5.33976833976834e-05,
283
+ "loss": 0.8954,
284
  "step": 460
285
  },
286
  {
287
  "epoch": 0.68,
288
  "learning_rate": 5.281853281853282e-05,
289
+ "loss": 0.8566,
290
  "step": 470
291
  },
292
  {
293
  "epoch": 0.69,
294
  "learning_rate": 5.223938223938224e-05,
295
+ "loss": 0.8032,
296
  "step": 480
297
  },
298
  {
299
  "epoch": 0.71,
300
  "learning_rate": 5.166023166023166e-05,
301
+ "loss": 1.1888,
302
  "step": 490
303
  },
304
  {
305
  "epoch": 0.72,
306
  "learning_rate": 5.108108108108108e-05,
307
+ "loss": 1.2299,
308
  "step": 500
309
  },
310
  {
311
  "epoch": 0.74,
312
  "learning_rate": 5.05019305019305e-05,
313
+ "loss": 1.1262,
314
  "step": 510
315
  },
316
  {
317
  "epoch": 0.75,
318
  "learning_rate": 4.9922779922779924e-05,
319
+ "loss": 0.7842,
320
  "step": 520
321
  },
322
  {
323
  "epoch": 0.77,
324
  "learning_rate": 4.9343629343629346e-05,
325
+ "loss": 0.5281,
326
  "step": 530
327
  },
328
  {
329
  "epoch": 0.78,
330
  "learning_rate": 4.876447876447877e-05,
331
+ "loss": 0.7704,
332
  "step": 540
333
  },
334
  {
335
  "epoch": 0.8,
336
  "learning_rate": 4.8185328185328184e-05,
337
+ "loss": 1.0325,
338
  "step": 550
339
  },
340
  {
341
  "epoch": 0.81,
342
  "learning_rate": 4.7606177606177606e-05,
343
+ "loss": 0.9719,
344
  "step": 560
345
  },
346
  {
347
  "epoch": 0.82,
348
  "learning_rate": 4.702702702702703e-05,
349
+ "loss": 0.5723,
350
  "step": 570
351
  },
352
  {
353
  "epoch": 0.84,
354
  "learning_rate": 4.644787644787645e-05,
355
+ "loss": 0.7936,
356
  "step": 580
357
  },
358
  {
359
  "epoch": 0.85,
360
  "learning_rate": 4.586872586872587e-05,
361
+ "loss": 0.7755,
362
  "step": 590
363
  },
364
  {
365
  "epoch": 0.87,
366
  "learning_rate": 4.5289575289575295e-05,
367
+ "loss": 0.8309,
368
  "step": 600
369
  },
370
  {
371
  "epoch": 0.88,
372
  "learning_rate": 4.471042471042471e-05,
373
+ "loss": 1.1389,
374
  "step": 610
375
  },
376
  {
377
  "epoch": 0.9,
378
  "learning_rate": 4.413127413127413e-05,
379
+ "loss": 0.8725,
380
  "step": 620
381
  },
382
  {
383
  "epoch": 0.91,
384
  "learning_rate": 4.3552123552123555e-05,
385
+ "loss": 0.7626,
386
  "step": 630
387
  },
388
  {
389
  "epoch": 0.93,
390
  "learning_rate": 4.297297297297298e-05,
391
+ "loss": 0.9825,
392
  "step": 640
393
  },
394
  {
395
  "epoch": 0.94,
396
  "learning_rate": 4.23938223938224e-05,
397
+ "loss": 0.3986,
398
  "step": 650
399
  },
400
  {
401
  "epoch": 0.96,
402
  "learning_rate": 4.181467181467182e-05,
403
+ "loss": 0.8921,
404
  "step": 660
405
  },
406
  {
407
  "epoch": 0.97,
408
  "learning_rate": 4.123552123552124e-05,
409
+ "loss": 0.9631,
410
  "step": 670
411
  },
412
  {
413
  "epoch": 0.98,
414
  "learning_rate": 4.065637065637066e-05,
415
+ "loss": 0.782,
416
  "step": 680
417
  },
418
  {
419
  "epoch": 1.0,
420
  "learning_rate": 4.0077220077220075e-05,
421
+ "loss": 0.8098,
422
  "step": 690
423
  },
424
  {
425
  "epoch": 1.01,
426
  "learning_rate": 3.94980694980695e-05,
427
+ "loss": 0.4539,
428
  "step": 700
429
  },
430
  {
431
  "epoch": 1.03,
432
  "learning_rate": 3.891891891891892e-05,
433
+ "loss": 0.6934,
434
  "step": 710
435
  },
436
  {
437
  "epoch": 1.04,
438
  "learning_rate": 3.833976833976834e-05,
439
+ "loss": 0.3759,
440
  "step": 720
441
  },
442
  {
443
  "epoch": 1.06,
444
  "learning_rate": 3.776061776061776e-05,
445
+ "loss": 0.6816,
446
  "step": 730
447
  },
448
  {
449
  "epoch": 1.07,
450
  "learning_rate": 3.718146718146718e-05,
451
+ "loss": 0.6174,
452
  "step": 740
453
  },
454
  {
455
  "epoch": 1.09,
456
  "learning_rate": 3.66023166023166e-05,
457
+ "loss": 0.5918,
458
  "step": 750
459
  },
460
  {
461
  "epoch": 1.1,
462
  "learning_rate": 3.6023166023166024e-05,
463
+ "loss": 0.8005,
464
  "step": 760
465
  },
466
  {
467
  "epoch": 1.11,
468
  "learning_rate": 3.5444015444015446e-05,
469
+ "loss": 0.5895,
470
  "step": 770
471
  },
472
  {
473
  "epoch": 1.13,
474
  "learning_rate": 3.486486486486486e-05,
475
+ "loss": 0.7898,
476
  "step": 780
477
  },
478
  {
479
  "epoch": 1.14,
480
  "learning_rate": 3.4285714285714284e-05,
481
+ "loss": 0.6744,
482
  "step": 790
483
  },
484
  {
485
  "epoch": 1.16,
486
  "learning_rate": 3.3706563706563706e-05,
487
+ "loss": 0.7501,
488
  "step": 800
489
  },
490
  {
491
  "epoch": 1.17,
492
  "learning_rate": 3.312741312741313e-05,
493
+ "loss": 0.8325,
494
  "step": 810
495
  },
496
  {
497
  "epoch": 1.19,
498
  "learning_rate": 3.254826254826255e-05,
499
+ "loss": 0.5695,
500
  "step": 820
501
  },
502
  {
503
  "epoch": 1.2,
504
  "learning_rate": 3.196911196911197e-05,
505
+ "loss": 0.8383,
506
  "step": 830
507
  },
508
  {
509
  "epoch": 1.22,
510
  "learning_rate": 3.138996138996139e-05,
511
+ "loss": 1.0377,
512
  "step": 840
513
  },
514
  {
515
  "epoch": 1.23,
516
  "learning_rate": 3.081081081081081e-05,
517
+ "loss": 0.7829,
518
  "step": 850
519
  },
520
  {
521
  "epoch": 1.24,
522
  "learning_rate": 3.0231660231660233e-05,
523
+ "loss": 0.8918,
524
  "step": 860
525
  },
526
  {
527
  "epoch": 1.26,
528
  "learning_rate": 2.9652509652509655e-05,
529
+ "loss": 0.8774,
530
  "step": 870
531
  },
532
  {
533
  "epoch": 1.27,
534
  "learning_rate": 2.9073359073359074e-05,
535
+ "loss": 1.1418,
536
  "step": 880
537
  },
538
  {
539
  "epoch": 1.29,
540
  "learning_rate": 2.8494208494208496e-05,
541
+ "loss": 0.7277,
542
  "step": 890
543
  },
544
  {
545
  "epoch": 1.3,
546
  "learning_rate": 2.7915057915057918e-05,
547
+ "loss": 0.931,
548
  "step": 900
549
  },
550
  {
551
  "epoch": 1.32,
552
  "learning_rate": 2.7335907335907337e-05,
553
+ "loss": 0.2893,
554
  "step": 910
555
  },
556
  {
557
  "epoch": 1.33,
558
  "learning_rate": 2.6756756756756756e-05,
559
+ "loss": 0.7633,
560
  "step": 920
561
  },
562
  {
563
  "epoch": 1.35,
564
  "learning_rate": 2.6177606177606178e-05,
565
+ "loss": 0.6092,
566
  "step": 930
567
  },
568
  {
569
  "epoch": 1.36,
570
  "learning_rate": 2.5598455598455597e-05,
571
+ "loss": 0.5479,
572
  "step": 940
573
  },
574
  {
575
  "epoch": 1.37,
576
  "learning_rate": 2.501930501930502e-05,
577
+ "loss": 0.4822,
578
  "step": 950
579
  },
580
  {
581
  "epoch": 1.39,
582
  "learning_rate": 2.444015444015444e-05,
583
+ "loss": 0.822,
584
  "step": 960
585
  },
586
  {
587
  "epoch": 1.4,
588
  "learning_rate": 2.386100386100386e-05,
589
+ "loss": 0.8179,
590
  "step": 970
591
  },
592
  {
593
  "epoch": 1.42,
594
  "learning_rate": 2.3281853281853282e-05,
595
+ "loss": 0.6271,
596
  "step": 980
597
  },
598
  {
599
  "epoch": 1.43,
600
  "learning_rate": 2.2702702702702705e-05,
601
+ "loss": 0.7264,
602
  "step": 990
603
  },
604
  {
605
  "epoch": 1.45,
606
  "learning_rate": 2.2123552123552123e-05,
607
+ "loss": 0.6068,
608
  "step": 1000
609
  },
610
  {
611
  "epoch": 1.46,
612
  "learning_rate": 2.1544401544401546e-05,
613
+ "loss": 0.5542,
614
  "step": 1010
615
  },
616
  {
617
  "epoch": 1.48,
618
  "learning_rate": 2.0965250965250968e-05,
619
+ "loss": 0.895,
620
  "step": 1020
621
  },
622
  {
623
  "epoch": 1.49,
624
  "learning_rate": 2.0386100386100387e-05,
625
+ "loss": 0.7742,
626
  "step": 1030
627
  },
628
  {
629
  "epoch": 1.51,
630
  "learning_rate": 1.980694980694981e-05,
631
+ "loss": 0.8531,
632
  "step": 1040
633
  },
634
  {
635
  "epoch": 1.52,
636
  "learning_rate": 1.922779922779923e-05,
637
+ "loss": 0.5329,
638
  "step": 1050
639
  },
640
  {
641
  "epoch": 1.53,
642
  "learning_rate": 1.864864864864865e-05,
643
+ "loss": 0.8632,
644
  "step": 1060
645
  },
646
  {
647
  "epoch": 1.55,
648
  "learning_rate": 1.806949806949807e-05,
649
+ "loss": 0.5655,
650
  "step": 1070
651
  },
652
  {
653
  "epoch": 1.56,
654
  "learning_rate": 1.7490347490347488e-05,
655
+ "loss": 0.3663,
656
  "step": 1080
657
  },
658
  {
659
  "epoch": 1.58,
660
  "learning_rate": 1.691119691119691e-05,
661
+ "loss": 0.5332,
662
  "step": 1090
663
  },
664
  {
665
  "epoch": 1.59,
666
  "learning_rate": 1.6332046332046332e-05,
667
+ "loss": 0.4962,
668
  "step": 1100
669
  },
670
  {
671
  "epoch": 1.61,
672
  "learning_rate": 1.575289575289575e-05,
673
+ "loss": 0.725,
674
  "step": 1110
675
  },
676
  {
677
  "epoch": 1.62,
678
  "learning_rate": 1.5173745173745173e-05,
679
+ "loss": 0.6943,
680
  "step": 1120
681
  },
682
  {
683
  "epoch": 1.64,
684
  "learning_rate": 1.4594594594594596e-05,
685
+ "loss": 1.003,
686
  "step": 1130
687
  },
688
  {
689
  "epoch": 1.65,
690
  "learning_rate": 1.4015444015444016e-05,
691
+ "loss": 0.7074,
692
  "step": 1140
693
  },
694
  {
695
  "epoch": 1.66,
696
  "learning_rate": 1.3436293436293437e-05,
697
+ "loss": 0.6233,
698
  "step": 1150
699
  },
700
  {
701
  "epoch": 1.68,
702
  "learning_rate": 1.2857142857142857e-05,
703
+ "loss": 0.7505,
704
  "step": 1160
705
  },
706
  {
707
  "epoch": 1.69,
708
  "learning_rate": 1.227799227799228e-05,
709
+ "loss": 0.8814,
710
  "step": 1170
711
  },
712
  {
713
  "epoch": 1.71,
714
  "learning_rate": 1.1698841698841698e-05,
715
+ "loss": 0.6993,
716
  "step": 1180
717
  },
718
  {
719
  "epoch": 1.72,
720
  "learning_rate": 1.1119691119691119e-05,
721
+ "loss": 0.8268,
722
  "step": 1190
723
  },
724
  {
725
  "epoch": 1.74,
726
  "learning_rate": 1.0540540540540541e-05,
727
+ "loss": 0.7882,
728
  "step": 1200
729
  },
730
  {
731
  "epoch": 1.75,
732
  "learning_rate": 9.961389961389962e-06,
733
+ "loss": 0.833,
734
  "step": 1210
735
  },
736
  {
737
  "epoch": 1.77,
738
  "learning_rate": 9.382239382239382e-06,
739
+ "loss": 0.9814,
740
  "step": 1220
741
  },
742
  {
743
  "epoch": 1.78,
744
  "learning_rate": 8.803088803088804e-06,
745
+ "loss": 0.2646,
746
  "step": 1230
747
  },
748
  {
749
  "epoch": 1.79,
750
  "learning_rate": 8.223938223938225e-06,
751
+ "loss": 0.6311,
752
  "step": 1240
753
  },
754
  {
755
  "epoch": 1.81,
756
  "learning_rate": 7.644787644787645e-06,
757
+ "loss": 0.6489,
758
  "step": 1250
759
  },
760
  {
761
  "epoch": 1.82,
762
  "learning_rate": 7.065637065637066e-06,
763
+ "loss": 0.5292,
764
  "step": 1260
765
  },
766
  {
767
  "epoch": 1.84,
768
  "learning_rate": 6.486486486486487e-06,
769
+ "loss": 0.6609,
770
  "step": 1270
771
  },
772
  {
773
  "epoch": 1.85,
774
  "learning_rate": 5.907335907335907e-06,
775
+ "loss": 0.6312,
776
  "step": 1280
777
  },
778
  {
779
  "epoch": 1.87,
780
  "learning_rate": 5.3281853281853285e-06,
781
+ "loss": 0.5062,
782
  "step": 1290
783
  },
784
  {
785
  "epoch": 1.88,
786
  "learning_rate": 4.749034749034749e-06,
787
+ "loss": 0.6065,
788
  "step": 1300
789
  },
790
  {
791
  "epoch": 1.9,
792
  "learning_rate": 4.1698841698841696e-06,
793
+ "loss": 0.7586,
794
  "step": 1310
795
  },
796
  {
797
  "epoch": 1.91,
798
  "learning_rate": 3.5907335907335905e-06,
799
+ "loss": 0.5006,
800
  "step": 1320
801
  },
802
  {
803
  "epoch": 1.92,
804
  "learning_rate": 3.0115830115830115e-06,
805
+ "loss": 0.7565,
806
  "step": 1330
807
  },
808
  {
809
  "epoch": 1.94,
810
  "learning_rate": 2.4324324324324325e-06,
811
+ "loss": 0.5601,
812
  "step": 1340
813
  },
814
  {
815
  "epoch": 1.95,
816
  "learning_rate": 1.8532818532818534e-06,
817
+ "loss": 0.5827,
818
  "step": 1350
819
  },
820
  {
821
  "epoch": 1.97,
822
  "learning_rate": 1.2741312741312742e-06,
823
+ "loss": 0.5409,
824
  "step": 1360
825
  },
826
  {
827
  "epoch": 1.98,
828
  "learning_rate": 6.949806949806949e-07,
829
+ "loss": 0.6359,
830
  "step": 1370
831
  },
832
  {
833
  "epoch": 2.0,
834
  "learning_rate": 1.1583011583011584e-07,
835
+ "loss": 0.6002,
836
  "step": 1380
837
  },
838
  {
839
  "epoch": 2.0,
840
  "step": 1382,
841
  "total_flos": 5.175834887457866e+18,
842
+ "train_loss": 1.0848320548337724,
843
+ "train_runtime": 1094.3482,
844
+ "train_samples_per_second": 161.831,
845
+ "train_steps_per_second": 1.263
846
  }
847
  ],
848
  "max_steps": 1382,