S2312dal commited on
Commit
bbd68d1
1 Parent(s): 86f452c

End of training

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
checkpoint-131/config.json CHANGED
@@ -34,7 +34,7 @@
34
  "position_embedding_type": "absolute",
35
  "problem_type": "regression",
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.20.0",
38
  "type_vocab_size": 2,
39
  "vocab_size": 30000
40
  }
 
34
  "position_embedding_type": "absolute",
35
  "problem_type": "regression",
36
  "torch_dtype": "float32",
37
+ "transformers_version": "4.20.1",
38
  "type_vocab_size": 2,
39
  "vocab_size": 30000
40
  }
checkpoint-131/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97679e9c1d65d927cfc8925d9d3bc0650d65d3502a479c5d831c02ad2e1adc6f
3
  size 93489987
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b113d04bbcfe6145a4bdb14fe7bca0fcc723633e8f13f12ab5ae4d4264972d95
3
  size 93489987
checkpoint-131/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76fba4cdc57dc74a4a78327d0ee13de9a386bdd62a74c9d772573f5f297c46ce
3
  size 46750353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:182a4caf595ba030b4d405415b239626ce1e9b532e001b46928c480cc4e8a055
3
  size 46750353
checkpoint-131/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f09daf6d7cf8238d63ce48ddb4eb1749daab6a9bf2cf83605fe226a548a848fd
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aff9de7abb62bcccab55b648ee4014a3baf88dd1252535f9693c228f065c5c8
3
  size 559
checkpoint-131/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0b53835ac22bcbd217e33a03420f87d07a93b139e16e457d064aad3c02dc4f2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c0e78f078ae85d8a3cdd8f56373ea696003cb961f1beccf16bf22704ce46f1
3
  size 623
checkpoint-131/trainer_state.json CHANGED
@@ -10,797 +10,797 @@
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 0.0,
13
- "loss": 0.3588,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 0.0,
19
- "loss": 0.3226,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.02,
24
  "learning_rate": 2.5e-06,
25
- "loss": 0.2542,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 0.03,
30
  "learning_rate": 5e-06,
31
- "loss": 0.2879,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 0.04,
36
- "learning_rate": 5e-06,
37
- "loss": 0.5216,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 0.05,
42
- "learning_rate": 7.500000000000001e-06,
43
- "loss": 0.341,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 0.05,
48
- "learning_rate": 1e-05,
49
- "loss": 0.1838,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 0.06,
54
- "learning_rate": 1.25e-05,
55
- "loss": 0.3373,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 0.07,
60
- "learning_rate": 1.5000000000000002e-05,
61
- "loss": 0.2025,
62
  "step": 9
63
  },
64
  {
65
  "epoch": 0.08,
66
- "learning_rate": 1.7500000000000002e-05,
67
- "loss": 0.2342,
68
  "step": 10
69
  },
70
  {
71
  "epoch": 0.08,
72
- "learning_rate": 2e-05,
73
- "loss": 0.1436,
74
  "step": 11
75
  },
76
  {
77
  "epoch": 0.09,
78
- "learning_rate": 1.994805194805195e-05,
79
- "loss": 0.4132,
80
  "step": 12
81
  },
82
  {
83
  "epoch": 0.1,
84
- "learning_rate": 1.98961038961039e-05,
85
- "loss": 0.279,
86
  "step": 13
87
  },
88
  {
89
  "epoch": 0.11,
90
- "learning_rate": 1.9844155844155846e-05,
91
- "loss": 0.1597,
92
  "step": 14
93
  },
94
  {
95
  "epoch": 0.11,
96
- "learning_rate": 1.9792207792207794e-05,
97
- "loss": 0.2237,
98
  "step": 15
99
  },
100
  {
101
  "epoch": 0.12,
102
- "learning_rate": 1.974025974025974e-05,
103
- "loss": 0.3902,
104
  "step": 16
105
  },
106
  {
107
  "epoch": 0.13,
108
- "learning_rate": 1.968831168831169e-05,
109
- "loss": 0.1849,
110
  "step": 17
111
  },
112
  {
113
  "epoch": 0.14,
114
- "learning_rate": 1.963636363636364e-05,
115
- "loss": 0.2283,
116
  "step": 18
117
  },
118
  {
119
  "epoch": 0.15,
120
- "learning_rate": 1.9584415584415586e-05,
121
- "loss": 0.1571,
122
  "step": 19
123
  },
124
  {
125
  "epoch": 0.15,
126
- "learning_rate": 1.9532467532467533e-05,
127
- "loss": 0.2266,
128
  "step": 20
129
  },
130
  {
131
  "epoch": 0.16,
132
- "learning_rate": 1.9480519480519483e-05,
133
- "loss": 0.1266,
134
  "step": 21
135
  },
136
  {
137
  "epoch": 0.17,
138
- "learning_rate": 1.942857142857143e-05,
139
- "loss": 0.1642,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 0.18,
144
- "learning_rate": 1.9376623376623377e-05,
145
- "loss": 0.1877,
146
  "step": 23
147
  },
148
  {
149
  "epoch": 0.18,
150
- "learning_rate": 1.9324675324675325e-05,
151
- "loss": 0.1765,
152
  "step": 24
153
  },
154
  {
155
  "epoch": 0.19,
156
- "learning_rate": 1.9272727272727275e-05,
157
- "loss": 0.1325,
158
  "step": 25
159
  },
160
  {
161
  "epoch": 0.2,
162
- "learning_rate": 1.9220779220779222e-05,
163
- "loss": 0.1093,
164
  "step": 26
165
  },
166
  {
167
  "epoch": 0.21,
168
- "learning_rate": 1.916883116883117e-05,
169
- "loss": 0.082,
170
  "step": 27
171
  },
172
  {
173
  "epoch": 0.21,
174
- "learning_rate": 1.9116883116883117e-05,
175
- "loss": 0.1399,
176
  "step": 28
177
  },
178
  {
179
  "epoch": 0.22,
180
- "learning_rate": 1.9064935064935067e-05,
181
- "loss": 0.0833,
182
  "step": 29
183
  },
184
  {
185
  "epoch": 0.23,
186
- "learning_rate": 1.9012987012987014e-05,
187
- "loss": 0.122,
188
  "step": 30
189
  },
190
  {
191
  "epoch": 0.24,
192
- "learning_rate": 1.896103896103896e-05,
193
- "loss": 0.1042,
194
  "step": 31
195
  },
196
  {
197
  "epoch": 0.24,
198
- "learning_rate": 1.8909090909090912e-05,
199
- "loss": 0.1439,
200
  "step": 32
201
  },
202
  {
203
  "epoch": 0.25,
204
- "learning_rate": 1.885714285714286e-05,
205
- "loss": 0.045,
206
  "step": 33
207
  },
208
  {
209
  "epoch": 0.26,
210
- "learning_rate": 1.8805194805194806e-05,
211
- "loss": 0.1221,
212
  "step": 34
213
  },
214
  {
215
  "epoch": 0.27,
216
- "learning_rate": 1.8753246753246753e-05,
217
- "loss": 0.1128,
218
  "step": 35
219
  },
220
  {
221
  "epoch": 0.27,
222
- "learning_rate": 1.8701298701298704e-05,
223
- "loss": 0.1142,
224
  "step": 36
225
  },
226
  {
227
  "epoch": 0.28,
228
- "learning_rate": 1.864935064935065e-05,
229
- "loss": 0.095,
230
  "step": 37
231
  },
232
  {
233
  "epoch": 0.29,
234
- "learning_rate": 1.8597402597402598e-05,
235
- "loss": 0.0696,
236
  "step": 38
237
  },
238
  {
239
  "epoch": 0.3,
240
- "learning_rate": 1.8545454545454545e-05,
241
- "loss": 0.1131,
242
  "step": 39
243
  },
244
  {
245
  "epoch": 0.31,
246
- "learning_rate": 1.8493506493506496e-05,
247
- "loss": 0.2079,
248
  "step": 40
249
  },
250
  {
251
  "epoch": 0.31,
252
- "learning_rate": 1.8441558441558443e-05,
253
- "loss": 0.2369,
254
  "step": 41
255
  },
256
  {
257
  "epoch": 0.32,
258
- "learning_rate": 1.838961038961039e-05,
259
- "loss": 0.1422,
260
  "step": 42
261
  },
262
  {
263
  "epoch": 0.33,
264
- "learning_rate": 1.8337662337662337e-05,
265
- "loss": 0.0955,
266
  "step": 43
267
  },
268
  {
269
  "epoch": 0.34,
270
- "learning_rate": 1.8285714285714288e-05,
271
- "loss": 0.1609,
272
  "step": 44
273
  },
274
  {
275
  "epoch": 0.34,
276
- "learning_rate": 1.8233766233766235e-05,
277
- "loss": 0.1931,
278
  "step": 45
279
  },
280
  {
281
  "epoch": 0.35,
282
- "learning_rate": 1.8181818181818182e-05,
283
- "loss": 0.1338,
284
  "step": 46
285
  },
286
  {
287
  "epoch": 0.36,
288
- "learning_rate": 1.812987012987013e-05,
289
- "loss": 0.0927,
290
  "step": 47
291
  },
292
  {
293
  "epoch": 0.37,
294
- "learning_rate": 1.807792207792208e-05,
295
- "loss": 0.0717,
296
  "step": 48
297
  },
298
  {
299
  "epoch": 0.37,
300
- "learning_rate": 1.8025974025974027e-05,
301
- "loss": 0.0478,
302
  "step": 49
303
  },
304
  {
305
  "epoch": 0.38,
306
- "learning_rate": 1.7974025974025974e-05,
307
- "loss": 0.1131,
308
  "step": 50
309
  },
310
  {
311
  "epoch": 0.39,
312
- "learning_rate": 1.792207792207792e-05,
313
- "loss": 0.202,
314
  "step": 51
315
  },
316
  {
317
  "epoch": 0.4,
318
- "learning_rate": 1.7870129870129872e-05,
319
- "loss": 0.0677,
320
  "step": 52
321
  },
322
  {
323
  "epoch": 0.4,
324
- "learning_rate": 1.781818181818182e-05,
325
- "loss": 0.1057,
326
  "step": 53
327
  },
328
  {
329
  "epoch": 0.41,
330
- "learning_rate": 1.7766233766233766e-05,
331
- "loss": 0.0773,
332
  "step": 54
333
  },
334
  {
335
  "epoch": 0.42,
336
- "learning_rate": 1.7714285714285717e-05,
337
- "loss": 0.0599,
338
  "step": 55
339
  },
340
  {
341
  "epoch": 0.43,
342
- "learning_rate": 1.7662337662337664e-05,
343
- "loss": 0.2749,
344
  "step": 56
345
  },
346
  {
347
  "epoch": 0.44,
348
- "learning_rate": 1.761038961038961e-05,
349
- "loss": 0.1707,
350
  "step": 57
351
  },
352
  {
353
  "epoch": 0.44,
354
- "learning_rate": 1.7558441558441558e-05,
355
- "loss": 0.1129,
356
  "step": 58
357
  },
358
  {
359
  "epoch": 0.45,
360
- "learning_rate": 1.750649350649351e-05,
361
- "loss": 0.0496,
362
  "step": 59
363
  },
364
  {
365
  "epoch": 0.46,
366
- "learning_rate": 1.7454545454545456e-05,
367
- "loss": 0.0708,
368
  "step": 60
369
  },
370
  {
371
  "epoch": 0.47,
372
- "learning_rate": 1.7402597402597403e-05,
373
- "loss": 0.0694,
374
  "step": 61
375
  },
376
  {
377
  "epoch": 0.47,
378
- "learning_rate": 1.735064935064935e-05,
379
- "loss": 0.1964,
380
  "step": 62
381
  },
382
  {
383
  "epoch": 0.48,
384
- "learning_rate": 1.72987012987013e-05,
385
- "loss": 0.1586,
386
  "step": 63
387
  },
388
  {
389
  "epoch": 0.49,
390
- "learning_rate": 1.7246753246753248e-05,
391
- "loss": 0.2173,
392
  "step": 64
393
  },
394
  {
395
  "epoch": 0.5,
396
- "learning_rate": 1.7194805194805195e-05,
397
- "loss": 0.162,
398
  "step": 65
399
  },
400
  {
401
  "epoch": 0.5,
402
- "learning_rate": 1.7142857142857142e-05,
403
- "loss": 0.0896,
404
  "step": 66
405
  },
406
  {
407
  "epoch": 0.51,
408
- "learning_rate": 1.7090909090909092e-05,
409
- "loss": 0.0503,
410
  "step": 67
411
  },
412
  {
413
  "epoch": 0.52,
414
- "learning_rate": 1.703896103896104e-05,
415
- "loss": 0.1161,
416
  "step": 68
417
  },
418
  {
419
  "epoch": 0.53,
420
- "learning_rate": 1.6987012987012987e-05,
421
- "loss": 0.0854,
422
  "step": 69
423
  },
424
  {
425
  "epoch": 0.53,
426
- "learning_rate": 1.6935064935064934e-05,
427
- "loss": 0.1247,
428
  "step": 70
429
  },
430
  {
431
  "epoch": 0.54,
432
- "learning_rate": 1.6883116883116884e-05,
433
- "loss": 0.1368,
434
  "step": 71
435
  },
436
  {
437
  "epoch": 0.55,
438
- "learning_rate": 1.683116883116883e-05,
439
- "loss": 0.1041,
440
  "step": 72
441
  },
442
  {
443
  "epoch": 0.56,
444
- "learning_rate": 1.677922077922078e-05,
445
- "loss": 0.0546,
446
  "step": 73
447
  },
448
  {
449
  "epoch": 0.56,
450
- "learning_rate": 1.672727272727273e-05,
451
- "loss": 0.0866,
452
  "step": 74
453
  },
454
  {
455
  "epoch": 0.57,
456
- "learning_rate": 1.6675324675324676e-05,
457
- "loss": 0.0624,
458
  "step": 75
459
  },
460
  {
461
  "epoch": 0.58,
462
- "learning_rate": 1.6623376623376627e-05,
463
- "loss": 0.1149,
464
  "step": 76
465
  },
466
  {
467
  "epoch": 0.59,
468
- "learning_rate": 1.6571428571428574e-05,
469
- "loss": 0.0752,
470
  "step": 77
471
  },
472
  {
473
  "epoch": 0.6,
474
- "learning_rate": 1.651948051948052e-05,
475
- "loss": 0.0457,
476
  "step": 78
477
  },
478
  {
479
  "epoch": 0.6,
480
- "learning_rate": 1.646753246753247e-05,
481
- "loss": 0.0705,
482
  "step": 79
483
  },
484
  {
485
  "epoch": 0.61,
486
- "learning_rate": 1.641558441558442e-05,
487
- "loss": 0.0827,
488
  "step": 80
489
  },
490
  {
491
  "epoch": 0.62,
492
- "learning_rate": 1.6363636363636366e-05,
493
- "loss": 0.0717,
494
  "step": 81
495
  },
496
  {
497
  "epoch": 0.63,
498
- "learning_rate": 1.6311688311688313e-05,
499
- "loss": 0.0804,
500
  "step": 82
501
  },
502
  {
503
  "epoch": 0.63,
504
- "learning_rate": 1.6259740259740264e-05,
505
- "loss": 0.0188,
506
  "step": 83
507
  },
508
  {
509
  "epoch": 0.64,
510
- "learning_rate": 1.620779220779221e-05,
511
- "loss": 0.0457,
512
  "step": 84
513
  },
514
  {
515
  "epoch": 0.65,
516
- "learning_rate": 1.6155844155844158e-05,
517
- "loss": 0.0422,
518
  "step": 85
519
  },
520
  {
521
  "epoch": 0.66,
522
- "learning_rate": 1.6103896103896105e-05,
523
- "loss": 0.0297,
524
  "step": 86
525
  },
526
  {
527
  "epoch": 0.66,
528
- "learning_rate": 1.6051948051948056e-05,
529
- "loss": 0.0408,
530
  "step": 87
531
  },
532
  {
533
  "epoch": 0.67,
534
- "learning_rate": 1.6000000000000003e-05,
535
- "loss": 0.0527,
536
  "step": 88
537
  },
538
  {
539
  "epoch": 0.68,
540
- "learning_rate": 1.594805194805195e-05,
541
- "loss": 0.0234,
542
  "step": 89
543
  },
544
  {
545
  "epoch": 0.69,
546
- "learning_rate": 1.5896103896103897e-05,
547
- "loss": 0.0368,
548
  "step": 90
549
  },
550
  {
551
  "epoch": 0.69,
552
- "learning_rate": 1.5844155844155847e-05,
553
- "loss": 0.0731,
554
  "step": 91
555
  },
556
  {
557
  "epoch": 0.7,
558
- "learning_rate": 1.5792207792207795e-05,
559
- "loss": 0.1104,
560
  "step": 92
561
  },
562
  {
563
  "epoch": 0.71,
564
- "learning_rate": 1.5740259740259742e-05,
565
- "loss": 0.0546,
566
  "step": 93
567
  },
568
  {
569
  "epoch": 0.72,
570
- "learning_rate": 1.568831168831169e-05,
571
- "loss": 0.0226,
572
  "step": 94
573
  },
574
  {
575
  "epoch": 0.73,
576
- "learning_rate": 1.563636363636364e-05,
577
- "loss": 0.0366,
578
  "step": 95
579
  },
580
  {
581
  "epoch": 0.73,
582
- "learning_rate": 1.5584415584415587e-05,
583
- "loss": 0.038,
584
  "step": 96
585
  },
586
  {
587
  "epoch": 0.74,
588
- "learning_rate": 1.5532467532467534e-05,
589
- "loss": 0.041,
590
  "step": 97
591
  },
592
  {
593
  "epoch": 0.75,
594
- "learning_rate": 1.548051948051948e-05,
595
- "loss": 0.0375,
596
  "step": 98
597
  },
598
  {
599
  "epoch": 0.76,
600
- "learning_rate": 1.542857142857143e-05,
601
- "loss": 0.0389,
602
  "step": 99
603
  },
604
  {
605
  "epoch": 0.76,
606
- "learning_rate": 1.537662337662338e-05,
607
- "loss": 0.022,
608
  "step": 100
609
  },
610
  {
611
  "epoch": 0.77,
612
- "learning_rate": 1.5324675324675326e-05,
613
- "loss": 0.0569,
614
  "step": 101
615
  },
616
  {
617
  "epoch": 0.78,
618
- "learning_rate": 1.5272727272727276e-05,
619
- "loss": 0.0172,
620
  "step": 102
621
  },
622
  {
623
  "epoch": 0.79,
624
- "learning_rate": 1.5220779220779223e-05,
625
- "loss": 0.0754,
626
  "step": 103
627
  },
628
  {
629
  "epoch": 0.79,
630
- "learning_rate": 1.516883116883117e-05,
631
- "loss": 0.0626,
632
  "step": 104
633
  },
634
  {
635
  "epoch": 0.8,
636
- "learning_rate": 1.511688311688312e-05,
637
- "loss": 0.0515,
638
  "step": 105
639
  },
640
  {
641
  "epoch": 0.81,
642
- "learning_rate": 1.5064935064935066e-05,
643
- "loss": 0.0345,
644
  "step": 106
645
  },
646
  {
647
  "epoch": 0.82,
648
- "learning_rate": 1.5012987012987015e-05,
649
- "loss": 0.0504,
650
  "step": 107
651
  },
652
  {
653
  "epoch": 0.82,
654
- "learning_rate": 1.4961038961038962e-05,
655
- "loss": 0.0329,
656
  "step": 108
657
  },
658
  {
659
  "epoch": 0.83,
660
- "learning_rate": 1.4909090909090911e-05,
661
- "loss": 0.0719,
662
  "step": 109
663
  },
664
  {
665
  "epoch": 0.84,
666
- "learning_rate": 1.4857142857142858e-05,
667
- "loss": 0.0417,
668
  "step": 110
669
  },
670
  {
671
  "epoch": 0.85,
672
- "learning_rate": 1.4805194805194807e-05,
673
- "loss": 0.0551,
674
  "step": 111
675
  },
676
  {
677
  "epoch": 0.85,
678
- "learning_rate": 1.4753246753246754e-05,
679
- "loss": 0.0299,
680
  "step": 112
681
  },
682
  {
683
  "epoch": 0.86,
684
- "learning_rate": 1.4701298701298703e-05,
685
- "loss": 0.024,
686
  "step": 113
687
  },
688
  {
689
  "epoch": 0.87,
690
- "learning_rate": 1.464935064935065e-05,
691
- "loss": 0.0328,
692
  "step": 114
693
  },
694
  {
695
  "epoch": 0.88,
696
- "learning_rate": 1.45974025974026e-05,
697
- "loss": 0.0462,
698
  "step": 115
699
  },
700
  {
701
  "epoch": 0.89,
702
- "learning_rate": 1.4545454545454546e-05,
703
- "loss": 0.0286,
704
  "step": 116
705
  },
706
  {
707
  "epoch": 0.89,
708
- "learning_rate": 1.4493506493506495e-05,
709
- "loss": 0.0259,
710
  "step": 117
711
  },
712
  {
713
  "epoch": 0.9,
714
- "learning_rate": 1.4441558441558442e-05,
715
- "loss": 0.0321,
716
  "step": 118
717
  },
718
  {
719
  "epoch": 0.91,
720
- "learning_rate": 1.4389610389610391e-05,
721
- "loss": 0.0324,
722
  "step": 119
723
  },
724
  {
725
  "epoch": 0.92,
726
- "learning_rate": 1.433766233766234e-05,
727
- "loss": 0.1605,
728
  "step": 120
729
  },
730
  {
731
  "epoch": 0.92,
732
- "learning_rate": 1.4285714285714287e-05,
733
- "loss": 0.0466,
734
  "step": 121
735
  },
736
  {
737
  "epoch": 0.93,
738
- "learning_rate": 1.4233766233766236e-05,
739
- "loss": 0.0628,
740
  "step": 122
741
  },
742
  {
743
  "epoch": 0.94,
744
- "learning_rate": 1.4181818181818183e-05,
745
- "loss": 0.0276,
746
  "step": 123
747
  },
748
  {
749
  "epoch": 0.95,
750
- "learning_rate": 1.4129870129870132e-05,
751
- "loss": 0.0371,
752
  "step": 124
753
  },
754
  {
755
  "epoch": 0.95,
756
- "learning_rate": 1.4077922077922079e-05,
757
- "loss": 0.0115,
758
  "step": 125
759
  },
760
  {
761
  "epoch": 0.96,
762
- "learning_rate": 1.4025974025974028e-05,
763
- "loss": 0.0399,
764
  "step": 126
765
  },
766
  {
767
  "epoch": 0.97,
768
- "learning_rate": 1.3974025974025975e-05,
769
- "loss": 0.043,
770
  "step": 127
771
  },
772
  {
773
  "epoch": 0.98,
774
- "learning_rate": 1.3922077922077924e-05,
775
- "loss": 0.0543,
776
  "step": 128
777
  },
778
  {
779
  "epoch": 0.98,
780
- "learning_rate": 1.3870129870129871e-05,
781
- "loss": 0.0232,
782
  "step": 129
783
  },
784
  {
785
  "epoch": 0.99,
786
- "learning_rate": 1.381818181818182e-05,
787
- "loss": 0.0542,
788
  "step": 130
789
  },
790
  {
791
  "epoch": 1.0,
792
- "learning_rate": 1.3766233766233767e-05,
793
- "loss": 0.0232,
794
  "step": 131
795
  },
796
  {
797
  "epoch": 1.0,
798
- "eval_loss": 0.029073596000671387,
799
- "eval_pearson": 0.9236611784671435,
800
- "eval_runtime": 5.5585,
801
- "eval_samples_per_second": 41.738,
802
- "eval_spearmanr": 0.8832765384834576,
803
- "eval_steps_per_second": 2.699,
804
  "step": 131
805
  }
806
  ],
 
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 0.0,
13
+ "loss": 0.4278,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 0.0,
19
+ "loss": 0.409,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.02,
24
  "learning_rate": 2.5e-06,
25
+ "loss": 0.3399,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 0.03,
30
  "learning_rate": 5e-06,
31
+ "loss": 0.3277,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 0.04,
36
+ "learning_rate": 7.500000000000001e-06,
37
+ "loss": 0.3771,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 0.05,
42
+ "learning_rate": 1e-05,
43
+ "loss": 0.2081,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 0.05,
48
+ "learning_rate": 1.25e-05,
49
+ "loss": 0.3177,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 0.06,
54
+ "learning_rate": 1.5000000000000002e-05,
55
+ "loss": 0.3049,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 0.07,
60
+ "learning_rate": 1.7500000000000002e-05,
61
+ "loss": 0.1694,
62
  "step": 9
63
  },
64
  {
65
  "epoch": 0.08,
66
+ "learning_rate": 2e-05,
67
+ "loss": 0.2039,
68
  "step": 10
69
  },
70
  {
71
  "epoch": 0.08,
72
+ "learning_rate": 1.994805194805195e-05,
73
+ "loss": 0.1651,
74
  "step": 11
75
  },
76
  {
77
  "epoch": 0.09,
78
+ "learning_rate": 1.98961038961039e-05,
79
+ "loss": 0.3483,
80
  "step": 12
81
  },
82
  {
83
  "epoch": 0.1,
84
+ "learning_rate": 1.9844155844155846e-05,
85
+ "loss": 0.1532,
86
  "step": 13
87
  },
88
  {
89
  "epoch": 0.11,
90
+ "learning_rate": 1.9792207792207794e-05,
91
+ "loss": 0.2102,
92
  "step": 14
93
  },
94
  {
95
  "epoch": 0.11,
96
+ "learning_rate": 1.974025974025974e-05,
97
+ "loss": 0.0953,
98
  "step": 15
99
  },
100
  {
101
  "epoch": 0.12,
102
+ "learning_rate": 1.968831168831169e-05,
103
+ "loss": 0.0761,
104
  "step": 16
105
  },
106
  {
107
  "epoch": 0.13,
108
+ "learning_rate": 1.963636363636364e-05,
109
+ "loss": 0.1778,
110
  "step": 17
111
  },
112
  {
113
  "epoch": 0.14,
114
+ "learning_rate": 1.9584415584415586e-05,
115
+ "loss": 0.138,
116
  "step": 18
117
  },
118
  {
119
  "epoch": 0.15,
120
+ "learning_rate": 1.9532467532467533e-05,
121
+ "loss": 0.1523,
122
  "step": 19
123
  },
124
  {
125
  "epoch": 0.15,
126
+ "learning_rate": 1.9480519480519483e-05,
127
+ "loss": 0.1351,
128
  "step": 20
129
  },
130
  {
131
  "epoch": 0.16,
132
+ "learning_rate": 1.942857142857143e-05,
133
+ "loss": 0.0846,
134
  "step": 21
135
  },
136
  {
137
  "epoch": 0.17,
138
+ "learning_rate": 1.9376623376623377e-05,
139
+ "loss": 0.1725,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 0.18,
144
+ "learning_rate": 1.9324675324675325e-05,
145
+ "loss": 0.0751,
146
  "step": 23
147
  },
148
  {
149
  "epoch": 0.18,
150
+ "learning_rate": 1.9272727272727275e-05,
151
+ "loss": 0.1553,
152
  "step": 24
153
  },
154
  {
155
  "epoch": 0.19,
156
+ "learning_rate": 1.9220779220779222e-05,
157
+ "loss": 0.1551,
158
  "step": 25
159
  },
160
  {
161
  "epoch": 0.2,
162
+ "learning_rate": 1.916883116883117e-05,
163
+ "loss": 0.1263,
164
  "step": 26
165
  },
166
  {
167
  "epoch": 0.21,
168
+ "learning_rate": 1.9116883116883117e-05,
169
+ "loss": 0.0627,
170
  "step": 27
171
  },
172
  {
173
  "epoch": 0.21,
174
+ "learning_rate": 1.9064935064935067e-05,
175
+ "loss": 0.109,
176
  "step": 28
177
  },
178
  {
179
  "epoch": 0.22,
180
+ "learning_rate": 1.9012987012987014e-05,
181
+ "loss": 0.0426,
182
  "step": 29
183
  },
184
  {
185
  "epoch": 0.23,
186
+ "learning_rate": 1.896103896103896e-05,
187
+ "loss": 0.0864,
188
  "step": 30
189
  },
190
  {
191
  "epoch": 0.24,
192
+ "learning_rate": 1.8909090909090912e-05,
193
+ "loss": 0.0477,
194
  "step": 31
195
  },
196
  {
197
  "epoch": 0.24,
198
+ "learning_rate": 1.885714285714286e-05,
199
+ "loss": 0.1584,
200
  "step": 32
201
  },
202
  {
203
  "epoch": 0.25,
204
+ "learning_rate": 1.8805194805194806e-05,
205
+ "loss": 0.06,
206
  "step": 33
207
  },
208
  {
209
  "epoch": 0.26,
210
+ "learning_rate": 1.8753246753246753e-05,
211
+ "loss": 0.0972,
212
  "step": 34
213
  },
214
  {
215
  "epoch": 0.27,
216
+ "learning_rate": 1.8701298701298704e-05,
217
+ "loss": 0.0658,
218
  "step": 35
219
  },
220
  {
221
  "epoch": 0.27,
222
+ "learning_rate": 1.864935064935065e-05,
223
+ "loss": 0.0729,
224
  "step": 36
225
  },
226
  {
227
  "epoch": 0.28,
228
+ "learning_rate": 1.8597402597402598e-05,
229
+ "loss": 0.0851,
230
  "step": 37
231
  },
232
  {
233
  "epoch": 0.29,
234
+ "learning_rate": 1.8545454545454545e-05,
235
+ "loss": 0.1193,
236
  "step": 38
237
  },
238
  {
239
  "epoch": 0.3,
240
+ "learning_rate": 1.8493506493506496e-05,
241
+ "loss": 0.0194,
242
  "step": 39
243
  },
244
  {
245
  "epoch": 0.31,
246
+ "learning_rate": 1.8441558441558443e-05,
247
+ "loss": 0.0994,
248
  "step": 40
249
  },
250
  {
251
  "epoch": 0.31,
252
+ "learning_rate": 1.838961038961039e-05,
253
+ "loss": 0.0719,
254
  "step": 41
255
  },
256
  {
257
  "epoch": 0.32,
258
+ "learning_rate": 1.8337662337662337e-05,
259
+ "loss": 0.0915,
260
  "step": 42
261
  },
262
  {
263
  "epoch": 0.33,
264
+ "learning_rate": 1.8285714285714288e-05,
265
+ "loss": 0.0823,
266
  "step": 43
267
  },
268
  {
269
  "epoch": 0.34,
270
+ "learning_rate": 1.8233766233766235e-05,
271
+ "loss": 0.1922,
272
  "step": 44
273
  },
274
  {
275
  "epoch": 0.34,
276
+ "learning_rate": 1.8181818181818182e-05,
277
+ "loss": 0.1452,
278
  "step": 45
279
  },
280
  {
281
  "epoch": 0.35,
282
+ "learning_rate": 1.812987012987013e-05,
283
+ "loss": 0.0233,
284
  "step": 46
285
  },
286
  {
287
  "epoch": 0.36,
288
+ "learning_rate": 1.807792207792208e-05,
289
+ "loss": 0.0448,
290
  "step": 47
291
  },
292
  {
293
  "epoch": 0.37,
294
+ "learning_rate": 1.8025974025974027e-05,
295
+ "loss": 0.0708,
296
  "step": 48
297
  },
298
  {
299
  "epoch": 0.37,
300
+ "learning_rate": 1.7974025974025974e-05,
301
+ "loss": 0.0441,
302
  "step": 49
303
  },
304
  {
305
  "epoch": 0.38,
306
+ "learning_rate": 1.792207792207792e-05,
307
+ "loss": 0.0498,
308
  "step": 50
309
  },
310
  {
311
  "epoch": 0.39,
312
+ "learning_rate": 1.7870129870129872e-05,
313
+ "loss": 0.0645,
314
  "step": 51
315
  },
316
  {
317
  "epoch": 0.4,
318
+ "learning_rate": 1.781818181818182e-05,
319
+ "loss": 0.0718,
320
  "step": 52
321
  },
322
  {
323
  "epoch": 0.4,
324
+ "learning_rate": 1.7766233766233766e-05,
325
+ "loss": 0.0605,
326
  "step": 53
327
  },
328
  {
329
  "epoch": 0.41,
330
+ "learning_rate": 1.7714285714285717e-05,
331
+ "loss": 0.0752,
332
  "step": 54
333
  },
334
  {
335
  "epoch": 0.42,
336
+ "learning_rate": 1.7662337662337664e-05,
337
+ "loss": 0.0312,
338
  "step": 55
339
  },
340
  {
341
  "epoch": 0.43,
342
+ "learning_rate": 1.761038961038961e-05,
343
+ "loss": 0.1122,
344
  "step": 56
345
  },
346
  {
347
  "epoch": 0.44,
348
+ "learning_rate": 1.7558441558441558e-05,
349
+ "loss": 0.0579,
350
  "step": 57
351
  },
352
  {
353
  "epoch": 0.44,
354
+ "learning_rate": 1.750649350649351e-05,
355
+ "loss": 0.0664,
356
  "step": 58
357
  },
358
  {
359
  "epoch": 0.45,
360
+ "learning_rate": 1.7454545454545456e-05,
361
+ "loss": 0.059,
362
  "step": 59
363
  },
364
  {
365
  "epoch": 0.46,
366
+ "learning_rate": 1.7402597402597403e-05,
367
+ "loss": 0.0602,
368
  "step": 60
369
  },
370
  {
371
  "epoch": 0.47,
372
+ "learning_rate": 1.735064935064935e-05,
373
+ "loss": 0.0322,
374
  "step": 61
375
  },
376
  {
377
  "epoch": 0.47,
378
+ "learning_rate": 1.72987012987013e-05,
379
+ "loss": 0.1271,
380
  "step": 62
381
  },
382
  {
383
  "epoch": 0.48,
384
+ "learning_rate": 1.7246753246753248e-05,
385
+ "loss": 0.2168,
386
  "step": 63
387
  },
388
  {
389
  "epoch": 0.49,
390
+ "learning_rate": 1.7194805194805195e-05,
391
+ "loss": 0.1552,
392
  "step": 64
393
  },
394
  {
395
  "epoch": 0.5,
396
+ "learning_rate": 1.7142857142857142e-05,
397
+ "loss": 0.1742,
398
  "step": 65
399
  },
400
  {
401
  "epoch": 0.5,
402
+ "learning_rate": 1.7090909090909092e-05,
403
+ "loss": 0.0751,
404
  "step": 66
405
  },
406
  {
407
  "epoch": 0.51,
408
+ "learning_rate": 1.703896103896104e-05,
409
+ "loss": 0.0379,
410
  "step": 67
411
  },
412
  {
413
  "epoch": 0.52,
414
+ "learning_rate": 1.6987012987012987e-05,
415
+ "loss": 0.185,
416
  "step": 68
417
  },
418
  {
419
  "epoch": 0.53,
420
+ "learning_rate": 1.6935064935064934e-05,
421
+ "loss": 0.1504,
422
  "step": 69
423
  },
424
  {
425
  "epoch": 0.53,
426
+ "learning_rate": 1.6883116883116884e-05,
427
+ "loss": 0.1169,
428
  "step": 70
429
  },
430
  {
431
  "epoch": 0.54,
432
+ "learning_rate": 1.683116883116883e-05,
433
+ "loss": 0.0771,
434
  "step": 71
435
  },
436
  {
437
  "epoch": 0.55,
438
+ "learning_rate": 1.677922077922078e-05,
439
+ "loss": 0.0374,
440
  "step": 72
441
  },
442
  {
443
  "epoch": 0.56,
444
+ "learning_rate": 1.672727272727273e-05,
445
+ "loss": 0.1039,
446
  "step": 73
447
  },
448
  {
449
  "epoch": 0.56,
450
+ "learning_rate": 1.6675324675324676e-05,
451
+ "loss": 0.101,
452
  "step": 74
453
  },
454
  {
455
  "epoch": 0.57,
456
+ "learning_rate": 1.6623376623376627e-05,
457
+ "loss": 0.0917,
458
  "step": 75
459
  },
460
  {
461
  "epoch": 0.58,
462
+ "learning_rate": 1.6571428571428574e-05,
463
+ "loss": 0.089,
464
  "step": 76
465
  },
466
  {
467
  "epoch": 0.59,
468
+ "learning_rate": 1.651948051948052e-05,
469
+ "loss": 0.0307,
470
  "step": 77
471
  },
472
  {
473
  "epoch": 0.6,
474
+ "learning_rate": 1.646753246753247e-05,
475
+ "loss": 0.1496,
476
  "step": 78
477
  },
478
  {
479
  "epoch": 0.6,
480
+ "learning_rate": 1.641558441558442e-05,
481
+ "loss": 0.1121,
482
  "step": 79
483
  },
484
  {
485
  "epoch": 0.61,
486
+ "learning_rate": 1.6363636363636366e-05,
487
+ "loss": 0.1882,
488
  "step": 80
489
  },
490
  {
491
  "epoch": 0.62,
492
+ "learning_rate": 1.6311688311688313e-05,
493
+ "loss": 0.136,
494
  "step": 81
495
  },
496
  {
497
  "epoch": 0.63,
498
+ "learning_rate": 1.6259740259740264e-05,
499
+ "loss": 0.1114,
500
  "step": 82
501
  },
502
  {
503
  "epoch": 0.63,
504
+ "learning_rate": 1.620779220779221e-05,
505
+ "loss": 0.0095,
506
  "step": 83
507
  },
508
  {
509
  "epoch": 0.64,
510
+ "learning_rate": 1.6155844155844158e-05,
511
+ "loss": 0.0559,
512
  "step": 84
513
  },
514
  {
515
  "epoch": 0.65,
516
+ "learning_rate": 1.6103896103896105e-05,
517
+ "loss": 0.0528,
518
  "step": 85
519
  },
520
  {
521
  "epoch": 0.66,
522
+ "learning_rate": 1.6051948051948056e-05,
523
+ "loss": 0.0298,
524
  "step": 86
525
  },
526
  {
527
  "epoch": 0.66,
528
+ "learning_rate": 1.6000000000000003e-05,
529
+ "loss": 0.0436,
530
  "step": 87
531
  },
532
  {
533
  "epoch": 0.67,
534
+ "learning_rate": 1.594805194805195e-05,
535
+ "loss": 0.0515,
536
  "step": 88
537
  },
538
  {
539
  "epoch": 0.68,
540
+ "learning_rate": 1.5896103896103897e-05,
541
+ "loss": 0.0588,
542
  "step": 89
543
  },
544
  {
545
  "epoch": 0.69,
546
+ "learning_rate": 1.5844155844155847e-05,
547
+ "loss": 0.071,
548
  "step": 90
549
  },
550
  {
551
  "epoch": 0.69,
552
+ "learning_rate": 1.5792207792207795e-05,
553
+ "loss": 0.0457,
554
  "step": 91
555
  },
556
  {
557
  "epoch": 0.7,
558
+ "learning_rate": 1.5740259740259742e-05,
559
+ "loss": 0.0383,
560
  "step": 92
561
  },
562
  {
563
  "epoch": 0.71,
564
+ "learning_rate": 1.568831168831169e-05,
565
+ "loss": 0.0686,
566
  "step": 93
567
  },
568
  {
569
  "epoch": 0.72,
570
+ "learning_rate": 1.563636363636364e-05,
571
+ "loss": 0.1018,
572
  "step": 94
573
  },
574
  {
575
  "epoch": 0.73,
576
+ "learning_rate": 1.5584415584415587e-05,
577
+ "loss": 0.1044,
578
  "step": 95
579
  },
580
  {
581
  "epoch": 0.73,
582
+ "learning_rate": 1.5532467532467534e-05,
583
+ "loss": 0.0549,
584
  "step": 96
585
  },
586
  {
587
  "epoch": 0.74,
588
+ "learning_rate": 1.548051948051948e-05,
589
+ "loss": 0.0321,
590
  "step": 97
591
  },
592
  {
593
  "epoch": 0.75,
594
+ "learning_rate": 1.542857142857143e-05,
595
+ "loss": 0.0248,
596
  "step": 98
597
  },
598
  {
599
  "epoch": 0.76,
600
+ "learning_rate": 1.537662337662338e-05,
601
+ "loss": 0.0508,
602
  "step": 99
603
  },
604
  {
605
  "epoch": 0.76,
606
+ "learning_rate": 1.5324675324675326e-05,
607
+ "loss": 0.0279,
608
  "step": 100
609
  },
610
  {
611
  "epoch": 0.77,
612
+ "learning_rate": 1.5272727272727276e-05,
613
+ "loss": 0.0499,
614
  "step": 101
615
  },
616
  {
617
  "epoch": 0.78,
618
+ "learning_rate": 1.5220779220779223e-05,
619
+ "loss": 0.035,
620
  "step": 102
621
  },
622
  {
623
  "epoch": 0.79,
624
+ "learning_rate": 1.516883116883117e-05,
625
+ "loss": 0.0767,
626
  "step": 103
627
  },
628
  {
629
  "epoch": 0.79,
630
+ "learning_rate": 1.511688311688312e-05,
631
+ "loss": 0.0712,
632
  "step": 104
633
  },
634
  {
635
  "epoch": 0.8,
636
+ "learning_rate": 1.5064935064935066e-05,
637
+ "loss": 0.0458,
638
  "step": 105
639
  },
640
  {
641
  "epoch": 0.81,
642
+ "learning_rate": 1.5012987012987015e-05,
643
+ "loss": 0.0362,
644
  "step": 106
645
  },
646
  {
647
  "epoch": 0.82,
648
+ "learning_rate": 1.4961038961038962e-05,
649
+ "loss": 0.0651,
650
  "step": 107
651
  },
652
  {
653
  "epoch": 0.82,
654
+ "learning_rate": 1.4909090909090911e-05,
655
+ "loss": 0.0447,
656
  "step": 108
657
  },
658
  {
659
  "epoch": 0.83,
660
+ "learning_rate": 1.4857142857142858e-05,
661
+ "loss": 0.1035,
662
  "step": 109
663
  },
664
  {
665
  "epoch": 0.84,
666
+ "learning_rate": 1.4805194805194807e-05,
667
+ "loss": 0.0608,
668
  "step": 110
669
  },
670
  {
671
  "epoch": 0.85,
672
+ "learning_rate": 1.4753246753246754e-05,
673
+ "loss": 0.0407,
674
  "step": 111
675
  },
676
  {
677
  "epoch": 0.85,
678
+ "learning_rate": 1.4701298701298703e-05,
679
+ "loss": 0.0183,
680
  "step": 112
681
  },
682
  {
683
  "epoch": 0.86,
684
+ "learning_rate": 1.464935064935065e-05,
685
+ "loss": 0.0281,
686
  "step": 113
687
  },
688
  {
689
  "epoch": 0.87,
690
+ "learning_rate": 1.45974025974026e-05,
691
+ "loss": 0.0289,
692
  "step": 114
693
  },
694
  {
695
  "epoch": 0.88,
696
+ "learning_rate": 1.4545454545454546e-05,
697
+ "loss": 0.0725,
698
  "step": 115
699
  },
700
  {
701
  "epoch": 0.89,
702
+ "learning_rate": 1.4493506493506495e-05,
703
+ "loss": 0.0287,
704
  "step": 116
705
  },
706
  {
707
  "epoch": 0.89,
708
+ "learning_rate": 1.4441558441558442e-05,
709
+ "loss": 0.018,
710
  "step": 117
711
  },
712
  {
713
  "epoch": 0.9,
714
+ "learning_rate": 1.4389610389610391e-05,
715
+ "loss": 0.0227,
716
  "step": 118
717
  },
718
  {
719
  "epoch": 0.91,
720
+ "learning_rate": 1.433766233766234e-05,
721
+ "loss": 0.0315,
722
  "step": 119
723
  },
724
  {
725
  "epoch": 0.92,
726
+ "learning_rate": 1.4285714285714287e-05,
727
+ "loss": 0.1159,
728
  "step": 120
729
  },
730
  {
731
  "epoch": 0.92,
732
+ "learning_rate": 1.4233766233766236e-05,
733
+ "loss": 0.0419,
734
  "step": 121
735
  },
736
  {
737
  "epoch": 0.93,
738
+ "learning_rate": 1.4181818181818183e-05,
739
+ "loss": 0.0668,
740
  "step": 122
741
  },
742
  {
743
  "epoch": 0.94,
744
+ "learning_rate": 1.4129870129870132e-05,
745
+ "loss": 0.0357,
746
  "step": 123
747
  },
748
  {
749
  "epoch": 0.95,
750
+ "learning_rate": 1.4077922077922079e-05,
751
+ "loss": 0.0288,
752
  "step": 124
753
  },
754
  {
755
  "epoch": 0.95,
756
+ "learning_rate": 1.4025974025974028e-05,
757
+ "loss": 0.0135,
758
  "step": 125
759
  },
760
  {
761
  "epoch": 0.96,
762
+ "learning_rate": 1.3974025974025975e-05,
763
+ "loss": 0.0308,
764
  "step": 126
765
  },
766
  {
767
  "epoch": 0.97,
768
+ "learning_rate": 1.3922077922077924e-05,
769
+ "loss": 0.025,
770
  "step": 127
771
  },
772
  {
773
  "epoch": 0.98,
774
+ "learning_rate": 1.3870129870129871e-05,
775
+ "loss": 0.0375,
776
  "step": 128
777
  },
778
  {
779
  "epoch": 0.98,
780
+ "learning_rate": 1.381818181818182e-05,
781
+ "loss": 0.0182,
782
  "step": 129
783
  },
784
  {
785
  "epoch": 0.99,
786
+ "learning_rate": 1.3766233766233767e-05,
787
+ "loss": 0.0417,
788
  "step": 130
789
  },
790
  {
791
  "epoch": 1.0,
792
+ "learning_rate": 1.3714285714285716e-05,
793
+ "loss": 0.0094,
794
  "step": 131
795
  },
796
  {
797
  "epoch": 1.0,
798
+ "eval_loss": 0.034164465963840485,
799
+ "eval_pearson": 0.9208741871943605,
800
+ "eval_runtime": 5.5147,
801
+ "eval_samples_per_second": 42.069,
802
+ "eval_spearmanr": 0.8739346623789532,
803
+ "eval_steps_per_second": 2.72,
804
  "step": 131
805
  }
806
  ],
checkpoint-131/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa3be555654ef58dc4291062e451cc4f3b395b85f9521a093bdc71de5f5c2938
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c355b2a51bf6c9af2253d00c618ad523567bd7357682948505578a36e2e9f8f2
3
  size 3311
checkpoint-262/config.json CHANGED
@@ -34,7 +34,7 @@
34
  "position_embedding_type": "absolute",
35
  "problem_type": "regression",
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.20.0",
38
  "type_vocab_size": 2,
39
  "vocab_size": 30000
40
  }
 
34
  "position_embedding_type": "absolute",
35
  "problem_type": "regression",
36
  "torch_dtype": "float32",
37
+ "transformers_version": "4.20.1",
38
  "type_vocab_size": 2,
39
  "vocab_size": 30000
40
  }
checkpoint-262/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70a62e99ccc6f7136ea65a4aa4d29621c5a1ae4fbe56eeed636d0535a4e56caf
3
  size 93490051
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ccb8a7a8b8e5ba6164105b9d9d030f583b6c09748d6330d8f67df6d81261300
3
  size 93490051
checkpoint-262/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cdc6504aa0540f996b73c710e67e68a2602d5d34ac923fc95f2c70e0ad858a5
3
  size 46750353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1d15690a936faf0a21a6faf23269e51cb7a96a58378e9cc5383f238cca709e0
3
  size 46750353
checkpoint-262/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f522c7eb6e3c2b203c614ad074395772b3ba4de07537d0c54a03fe0ca59c79a
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bd434c261237391b236d99d6b6625730cbceeceef66707924e0b604fa04729e
3
  size 559
checkpoint-262/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:720a232b73b34e4a6ec8c90c7b6dee9d4c40dc3fb46e6b103f0b2b08c2d3fd94
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e58e62e190d1deac7ac4ea88e1ca0b35ee12c02532cedd5724c51c8bd46a7613
3
  size 623
checkpoint-262/trainer_state.json CHANGED
@@ -10,1593 +10,1593 @@
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 0.0,
13
- "loss": 0.3588,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 0.0,
19
- "loss": 0.3226,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.02,
24
  "learning_rate": 2.5e-06,
25
- "loss": 0.2542,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 0.03,
30
  "learning_rate": 5e-06,
31
- "loss": 0.2879,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 0.04,
36
- "learning_rate": 5e-06,
37
- "loss": 0.5216,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 0.05,
42
- "learning_rate": 7.500000000000001e-06,
43
- "loss": 0.341,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 0.05,
48
- "learning_rate": 1e-05,
49
- "loss": 0.1838,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 0.06,
54
- "learning_rate": 1.25e-05,
55
- "loss": 0.3373,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 0.07,
60
- "learning_rate": 1.5000000000000002e-05,
61
- "loss": 0.2025,
62
  "step": 9
63
  },
64
  {
65
  "epoch": 0.08,
66
- "learning_rate": 1.7500000000000002e-05,
67
- "loss": 0.2342,
68
  "step": 10
69
  },
70
  {
71
  "epoch": 0.08,
72
- "learning_rate": 2e-05,
73
- "loss": 0.1436,
74
  "step": 11
75
  },
76
  {
77
  "epoch": 0.09,
78
- "learning_rate": 1.994805194805195e-05,
79
- "loss": 0.4132,
80
  "step": 12
81
  },
82
  {
83
  "epoch": 0.1,
84
- "learning_rate": 1.98961038961039e-05,
85
- "loss": 0.279,
86
  "step": 13
87
  },
88
  {
89
  "epoch": 0.11,
90
- "learning_rate": 1.9844155844155846e-05,
91
- "loss": 0.1597,
92
  "step": 14
93
  },
94
  {
95
  "epoch": 0.11,
96
- "learning_rate": 1.9792207792207794e-05,
97
- "loss": 0.2237,
98
  "step": 15
99
  },
100
  {
101
  "epoch": 0.12,
102
- "learning_rate": 1.974025974025974e-05,
103
- "loss": 0.3902,
104
  "step": 16
105
  },
106
  {
107
  "epoch": 0.13,
108
- "learning_rate": 1.968831168831169e-05,
109
- "loss": 0.1849,
110
  "step": 17
111
  },
112
  {
113
  "epoch": 0.14,
114
- "learning_rate": 1.963636363636364e-05,
115
- "loss": 0.2283,
116
  "step": 18
117
  },
118
  {
119
  "epoch": 0.15,
120
- "learning_rate": 1.9584415584415586e-05,
121
- "loss": 0.1571,
122
  "step": 19
123
  },
124
  {
125
  "epoch": 0.15,
126
- "learning_rate": 1.9532467532467533e-05,
127
- "loss": 0.2266,
128
  "step": 20
129
  },
130
  {
131
  "epoch": 0.16,
132
- "learning_rate": 1.9480519480519483e-05,
133
- "loss": 0.1266,
134
  "step": 21
135
  },
136
  {
137
  "epoch": 0.17,
138
- "learning_rate": 1.942857142857143e-05,
139
- "loss": 0.1642,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 0.18,
144
- "learning_rate": 1.9376623376623377e-05,
145
- "loss": 0.1877,
146
  "step": 23
147
  },
148
  {
149
  "epoch": 0.18,
150
- "learning_rate": 1.9324675324675325e-05,
151
- "loss": 0.1765,
152
  "step": 24
153
  },
154
  {
155
  "epoch": 0.19,
156
- "learning_rate": 1.9272727272727275e-05,
157
- "loss": 0.1325,
158
  "step": 25
159
  },
160
  {
161
  "epoch": 0.2,
162
- "learning_rate": 1.9220779220779222e-05,
163
- "loss": 0.1093,
164
  "step": 26
165
  },
166
  {
167
  "epoch": 0.21,
168
- "learning_rate": 1.916883116883117e-05,
169
- "loss": 0.082,
170
  "step": 27
171
  },
172
  {
173
  "epoch": 0.21,
174
- "learning_rate": 1.9116883116883117e-05,
175
- "loss": 0.1399,
176
  "step": 28
177
  },
178
  {
179
  "epoch": 0.22,
180
- "learning_rate": 1.9064935064935067e-05,
181
- "loss": 0.0833,
182
  "step": 29
183
  },
184
  {
185
  "epoch": 0.23,
186
- "learning_rate": 1.9012987012987014e-05,
187
- "loss": 0.122,
188
  "step": 30
189
  },
190
  {
191
  "epoch": 0.24,
192
- "learning_rate": 1.896103896103896e-05,
193
- "loss": 0.1042,
194
  "step": 31
195
  },
196
  {
197
  "epoch": 0.24,
198
- "learning_rate": 1.8909090909090912e-05,
199
- "loss": 0.1439,
200
  "step": 32
201
  },
202
  {
203
  "epoch": 0.25,
204
- "learning_rate": 1.885714285714286e-05,
205
- "loss": 0.045,
206
  "step": 33
207
  },
208
  {
209
  "epoch": 0.26,
210
- "learning_rate": 1.8805194805194806e-05,
211
- "loss": 0.1221,
212
  "step": 34
213
  },
214
  {
215
  "epoch": 0.27,
216
- "learning_rate": 1.8753246753246753e-05,
217
- "loss": 0.1128,
218
  "step": 35
219
  },
220
  {
221
  "epoch": 0.27,
222
- "learning_rate": 1.8701298701298704e-05,
223
- "loss": 0.1142,
224
  "step": 36
225
  },
226
  {
227
  "epoch": 0.28,
228
- "learning_rate": 1.864935064935065e-05,
229
- "loss": 0.095,
230
  "step": 37
231
  },
232
  {
233
  "epoch": 0.29,
234
- "learning_rate": 1.8597402597402598e-05,
235
- "loss": 0.0696,
236
  "step": 38
237
  },
238
  {
239
  "epoch": 0.3,
240
- "learning_rate": 1.8545454545454545e-05,
241
- "loss": 0.1131,
242
  "step": 39
243
  },
244
  {
245
  "epoch": 0.31,
246
- "learning_rate": 1.8493506493506496e-05,
247
- "loss": 0.2079,
248
  "step": 40
249
  },
250
  {
251
  "epoch": 0.31,
252
- "learning_rate": 1.8441558441558443e-05,
253
- "loss": 0.2369,
254
  "step": 41
255
  },
256
  {
257
  "epoch": 0.32,
258
- "learning_rate": 1.838961038961039e-05,
259
- "loss": 0.1422,
260
  "step": 42
261
  },
262
  {
263
  "epoch": 0.33,
264
- "learning_rate": 1.8337662337662337e-05,
265
- "loss": 0.0955,
266
  "step": 43
267
  },
268
  {
269
  "epoch": 0.34,
270
- "learning_rate": 1.8285714285714288e-05,
271
- "loss": 0.1609,
272
  "step": 44
273
  },
274
  {
275
  "epoch": 0.34,
276
- "learning_rate": 1.8233766233766235e-05,
277
- "loss": 0.1931,
278
  "step": 45
279
  },
280
  {
281
  "epoch": 0.35,
282
- "learning_rate": 1.8181818181818182e-05,
283
- "loss": 0.1338,
284
  "step": 46
285
  },
286
  {
287
  "epoch": 0.36,
288
- "learning_rate": 1.812987012987013e-05,
289
- "loss": 0.0927,
290
  "step": 47
291
  },
292
  {
293
  "epoch": 0.37,
294
- "learning_rate": 1.807792207792208e-05,
295
- "loss": 0.0717,
296
  "step": 48
297
  },
298
  {
299
  "epoch": 0.37,
300
- "learning_rate": 1.8025974025974027e-05,
301
- "loss": 0.0478,
302
  "step": 49
303
  },
304
  {
305
  "epoch": 0.38,
306
- "learning_rate": 1.7974025974025974e-05,
307
- "loss": 0.1131,
308
  "step": 50
309
  },
310
  {
311
  "epoch": 0.39,
312
- "learning_rate": 1.792207792207792e-05,
313
- "loss": 0.202,
314
  "step": 51
315
  },
316
  {
317
  "epoch": 0.4,
318
- "learning_rate": 1.7870129870129872e-05,
319
- "loss": 0.0677,
320
  "step": 52
321
  },
322
  {
323
  "epoch": 0.4,
324
- "learning_rate": 1.781818181818182e-05,
325
- "loss": 0.1057,
326
  "step": 53
327
  },
328
  {
329
  "epoch": 0.41,
330
- "learning_rate": 1.7766233766233766e-05,
331
- "loss": 0.0773,
332
  "step": 54
333
  },
334
  {
335
  "epoch": 0.42,
336
- "learning_rate": 1.7714285714285717e-05,
337
- "loss": 0.0599,
338
  "step": 55
339
  },
340
  {
341
  "epoch": 0.43,
342
- "learning_rate": 1.7662337662337664e-05,
343
- "loss": 0.2749,
344
  "step": 56
345
  },
346
  {
347
  "epoch": 0.44,
348
- "learning_rate": 1.761038961038961e-05,
349
- "loss": 0.1707,
350
  "step": 57
351
  },
352
  {
353
  "epoch": 0.44,
354
- "learning_rate": 1.7558441558441558e-05,
355
- "loss": 0.1129,
356
  "step": 58
357
  },
358
  {
359
  "epoch": 0.45,
360
- "learning_rate": 1.750649350649351e-05,
361
- "loss": 0.0496,
362
  "step": 59
363
  },
364
  {
365
  "epoch": 0.46,
366
- "learning_rate": 1.7454545454545456e-05,
367
- "loss": 0.0708,
368
  "step": 60
369
  },
370
  {
371
  "epoch": 0.47,
372
- "learning_rate": 1.7402597402597403e-05,
373
- "loss": 0.0694,
374
  "step": 61
375
  },
376
  {
377
  "epoch": 0.47,
378
- "learning_rate": 1.735064935064935e-05,
379
- "loss": 0.1964,
380
  "step": 62
381
  },
382
  {
383
  "epoch": 0.48,
384
- "learning_rate": 1.72987012987013e-05,
385
- "loss": 0.1586,
386
  "step": 63
387
  },
388
  {
389
  "epoch": 0.49,
390
- "learning_rate": 1.7246753246753248e-05,
391
- "loss": 0.2173,
392
  "step": 64
393
  },
394
  {
395
  "epoch": 0.5,
396
- "learning_rate": 1.7194805194805195e-05,
397
- "loss": 0.162,
398
  "step": 65
399
  },
400
  {
401
  "epoch": 0.5,
402
- "learning_rate": 1.7142857142857142e-05,
403
- "loss": 0.0896,
404
  "step": 66
405
  },
406
  {
407
  "epoch": 0.51,
408
- "learning_rate": 1.7090909090909092e-05,
409
- "loss": 0.0503,
410
  "step": 67
411
  },
412
  {
413
  "epoch": 0.52,
414
- "learning_rate": 1.703896103896104e-05,
415
- "loss": 0.1161,
416
  "step": 68
417
  },
418
  {
419
  "epoch": 0.53,
420
- "learning_rate": 1.6987012987012987e-05,
421
- "loss": 0.0854,
422
  "step": 69
423
  },
424
  {
425
  "epoch": 0.53,
426
- "learning_rate": 1.6935064935064934e-05,
427
- "loss": 0.1247,
428
  "step": 70
429
  },
430
  {
431
  "epoch": 0.54,
432
- "learning_rate": 1.6883116883116884e-05,
433
- "loss": 0.1368,
434
  "step": 71
435
  },
436
  {
437
  "epoch": 0.55,
438
- "learning_rate": 1.683116883116883e-05,
439
- "loss": 0.1041,
440
  "step": 72
441
  },
442
  {
443
  "epoch": 0.56,
444
- "learning_rate": 1.677922077922078e-05,
445
- "loss": 0.0546,
446
  "step": 73
447
  },
448
  {
449
  "epoch": 0.56,
450
- "learning_rate": 1.672727272727273e-05,
451
- "loss": 0.0866,
452
  "step": 74
453
  },
454
  {
455
  "epoch": 0.57,
456
- "learning_rate": 1.6675324675324676e-05,
457
- "loss": 0.0624,
458
  "step": 75
459
  },
460
  {
461
  "epoch": 0.58,
462
- "learning_rate": 1.6623376623376627e-05,
463
- "loss": 0.1149,
464
  "step": 76
465
  },
466
  {
467
  "epoch": 0.59,
468
- "learning_rate": 1.6571428571428574e-05,
469
- "loss": 0.0752,
470
  "step": 77
471
  },
472
  {
473
  "epoch": 0.6,
474
- "learning_rate": 1.651948051948052e-05,
475
- "loss": 0.0457,
476
  "step": 78
477
  },
478
  {
479
  "epoch": 0.6,
480
- "learning_rate": 1.646753246753247e-05,
481
- "loss": 0.0705,
482
  "step": 79
483
  },
484
  {
485
  "epoch": 0.61,
486
- "learning_rate": 1.641558441558442e-05,
487
- "loss": 0.0827,
488
  "step": 80
489
  },
490
  {
491
  "epoch": 0.62,
492
- "learning_rate": 1.6363636363636366e-05,
493
- "loss": 0.0717,
494
  "step": 81
495
  },
496
  {
497
  "epoch": 0.63,
498
- "learning_rate": 1.6311688311688313e-05,
499
- "loss": 0.0804,
500
  "step": 82
501
  },
502
  {
503
  "epoch": 0.63,
504
- "learning_rate": 1.6259740259740264e-05,
505
- "loss": 0.0188,
506
  "step": 83
507
  },
508
  {
509
  "epoch": 0.64,
510
- "learning_rate": 1.620779220779221e-05,
511
- "loss": 0.0457,
512
  "step": 84
513
  },
514
  {
515
  "epoch": 0.65,
516
- "learning_rate": 1.6155844155844158e-05,
517
- "loss": 0.0422,
518
  "step": 85
519
  },
520
  {
521
  "epoch": 0.66,
522
- "learning_rate": 1.6103896103896105e-05,
523
- "loss": 0.0297,
524
  "step": 86
525
  },
526
  {
527
  "epoch": 0.66,
528
- "learning_rate": 1.6051948051948056e-05,
529
- "loss": 0.0408,
530
  "step": 87
531
  },
532
  {
533
  "epoch": 0.67,
534
- "learning_rate": 1.6000000000000003e-05,
535
- "loss": 0.0527,
536
  "step": 88
537
  },
538
  {
539
  "epoch": 0.68,
540
- "learning_rate": 1.594805194805195e-05,
541
- "loss": 0.0234,
542
  "step": 89
543
  },
544
  {
545
  "epoch": 0.69,
546
- "learning_rate": 1.5896103896103897e-05,
547
- "loss": 0.0368,
548
  "step": 90
549
  },
550
  {
551
  "epoch": 0.69,
552
- "learning_rate": 1.5844155844155847e-05,
553
- "loss": 0.0731,
554
  "step": 91
555
  },
556
  {
557
  "epoch": 0.7,
558
- "learning_rate": 1.5792207792207795e-05,
559
- "loss": 0.1104,
560
  "step": 92
561
  },
562
  {
563
  "epoch": 0.71,
564
- "learning_rate": 1.5740259740259742e-05,
565
- "loss": 0.0546,
566
  "step": 93
567
  },
568
  {
569
  "epoch": 0.72,
570
- "learning_rate": 1.568831168831169e-05,
571
- "loss": 0.0226,
572
  "step": 94
573
  },
574
  {
575
  "epoch": 0.73,
576
- "learning_rate": 1.563636363636364e-05,
577
- "loss": 0.0366,
578
  "step": 95
579
  },
580
  {
581
  "epoch": 0.73,
582
- "learning_rate": 1.5584415584415587e-05,
583
- "loss": 0.038,
584
  "step": 96
585
  },
586
  {
587
  "epoch": 0.74,
588
- "learning_rate": 1.5532467532467534e-05,
589
- "loss": 0.041,
590
  "step": 97
591
  },
592
  {
593
  "epoch": 0.75,
594
- "learning_rate": 1.548051948051948e-05,
595
- "loss": 0.0375,
596
  "step": 98
597
  },
598
  {
599
  "epoch": 0.76,
600
- "learning_rate": 1.542857142857143e-05,
601
- "loss": 0.0389,
602
  "step": 99
603
  },
604
  {
605
  "epoch": 0.76,
606
- "learning_rate": 1.537662337662338e-05,
607
- "loss": 0.022,
608
  "step": 100
609
  },
610
  {
611
  "epoch": 0.77,
612
- "learning_rate": 1.5324675324675326e-05,
613
- "loss": 0.0569,
614
  "step": 101
615
  },
616
  {
617
  "epoch": 0.78,
618
- "learning_rate": 1.5272727272727276e-05,
619
- "loss": 0.0172,
620
  "step": 102
621
  },
622
  {
623
  "epoch": 0.79,
624
- "learning_rate": 1.5220779220779223e-05,
625
- "loss": 0.0754,
626
  "step": 103
627
  },
628
  {
629
  "epoch": 0.79,
630
- "learning_rate": 1.516883116883117e-05,
631
- "loss": 0.0626,
632
  "step": 104
633
  },
634
  {
635
  "epoch": 0.8,
636
- "learning_rate": 1.511688311688312e-05,
637
- "loss": 0.0515,
638
  "step": 105
639
  },
640
  {
641
  "epoch": 0.81,
642
- "learning_rate": 1.5064935064935066e-05,
643
- "loss": 0.0345,
644
  "step": 106
645
  },
646
  {
647
  "epoch": 0.82,
648
- "learning_rate": 1.5012987012987015e-05,
649
- "loss": 0.0504,
650
  "step": 107
651
  },
652
  {
653
  "epoch": 0.82,
654
- "learning_rate": 1.4961038961038962e-05,
655
- "loss": 0.0329,
656
  "step": 108
657
  },
658
  {
659
  "epoch": 0.83,
660
- "learning_rate": 1.4909090909090911e-05,
661
- "loss": 0.0719,
662
  "step": 109
663
  },
664
  {
665
  "epoch": 0.84,
666
- "learning_rate": 1.4857142857142858e-05,
667
- "loss": 0.0417,
668
  "step": 110
669
  },
670
  {
671
  "epoch": 0.85,
672
- "learning_rate": 1.4805194805194807e-05,
673
- "loss": 0.0551,
674
  "step": 111
675
  },
676
  {
677
  "epoch": 0.85,
678
- "learning_rate": 1.4753246753246754e-05,
679
- "loss": 0.0299,
680
  "step": 112
681
  },
682
  {
683
  "epoch": 0.86,
684
- "learning_rate": 1.4701298701298703e-05,
685
- "loss": 0.024,
686
  "step": 113
687
  },
688
  {
689
  "epoch": 0.87,
690
- "learning_rate": 1.464935064935065e-05,
691
- "loss": 0.0328,
692
  "step": 114
693
  },
694
  {
695
  "epoch": 0.88,
696
- "learning_rate": 1.45974025974026e-05,
697
- "loss": 0.0462,
698
  "step": 115
699
  },
700
  {
701
  "epoch": 0.89,
702
- "learning_rate": 1.4545454545454546e-05,
703
- "loss": 0.0286,
704
  "step": 116
705
  },
706
  {
707
  "epoch": 0.89,
708
- "learning_rate": 1.4493506493506495e-05,
709
- "loss": 0.0259,
710
  "step": 117
711
  },
712
  {
713
  "epoch": 0.9,
714
- "learning_rate": 1.4441558441558442e-05,
715
- "loss": 0.0321,
716
  "step": 118
717
  },
718
  {
719
  "epoch": 0.91,
720
- "learning_rate": 1.4389610389610391e-05,
721
- "loss": 0.0324,
722
  "step": 119
723
  },
724
  {
725
  "epoch": 0.92,
726
- "learning_rate": 1.433766233766234e-05,
727
- "loss": 0.1605,
728
  "step": 120
729
  },
730
  {
731
  "epoch": 0.92,
732
- "learning_rate": 1.4285714285714287e-05,
733
- "loss": 0.0466,
734
  "step": 121
735
  },
736
  {
737
  "epoch": 0.93,
738
- "learning_rate": 1.4233766233766236e-05,
739
- "loss": 0.0628,
740
  "step": 122
741
  },
742
  {
743
  "epoch": 0.94,
744
- "learning_rate": 1.4181818181818183e-05,
745
- "loss": 0.0276,
746
  "step": 123
747
  },
748
  {
749
  "epoch": 0.95,
750
- "learning_rate": 1.4129870129870132e-05,
751
- "loss": 0.0371,
752
  "step": 124
753
  },
754
  {
755
  "epoch": 0.95,
756
- "learning_rate": 1.4077922077922079e-05,
757
- "loss": 0.0115,
758
  "step": 125
759
  },
760
  {
761
  "epoch": 0.96,
762
- "learning_rate": 1.4025974025974028e-05,
763
- "loss": 0.0399,
764
  "step": 126
765
  },
766
  {
767
  "epoch": 0.97,
768
- "learning_rate": 1.3974025974025975e-05,
769
- "loss": 0.043,
770
  "step": 127
771
  },
772
  {
773
  "epoch": 0.98,
774
- "learning_rate": 1.3922077922077924e-05,
775
- "loss": 0.0543,
776
  "step": 128
777
  },
778
  {
779
  "epoch": 0.98,
780
- "learning_rate": 1.3870129870129871e-05,
781
- "loss": 0.0232,
782
  "step": 129
783
  },
784
  {
785
  "epoch": 0.99,
786
- "learning_rate": 1.381818181818182e-05,
787
- "loss": 0.0542,
788
  "step": 130
789
  },
790
  {
791
  "epoch": 1.0,
792
- "learning_rate": 1.3766233766233767e-05,
793
- "loss": 0.0232,
794
  "step": 131
795
  },
796
  {
797
  "epoch": 1.0,
798
- "eval_loss": 0.029073596000671387,
799
- "eval_pearson": 0.9236611784671435,
800
- "eval_runtime": 5.5585,
801
- "eval_samples_per_second": 41.738,
802
- "eval_spearmanr": 0.8832765384834576,
803
- "eval_steps_per_second": 2.699,
804
  "step": 131
805
  },
806
  {
807
  "epoch": 1.01,
808
- "learning_rate": 1.3714285714285716e-05,
809
- "loss": 0.0361,
810
  "step": 132
811
  },
812
  {
813
  "epoch": 1.02,
814
- "learning_rate": 1.3662337662337663e-05,
815
- "loss": 0.0235,
816
  "step": 133
817
  },
818
  {
819
  "epoch": 1.02,
820
- "learning_rate": 1.3610389610389612e-05,
821
- "loss": 0.0195,
822
  "step": 134
823
  },
824
  {
825
  "epoch": 1.03,
826
- "learning_rate": 1.3558441558441559e-05,
827
- "loss": 0.0482,
828
  "step": 135
829
  },
830
  {
831
  "epoch": 1.04,
832
- "learning_rate": 1.3506493506493508e-05,
833
- "loss": 0.0332,
834
  "step": 136
835
  },
836
  {
837
  "epoch": 1.05,
838
- "learning_rate": 1.3454545454545455e-05,
839
- "loss": 0.0261,
840
  "step": 137
841
  },
842
  {
843
  "epoch": 1.05,
844
- "learning_rate": 1.3402597402597404e-05,
845
- "loss": 0.0681,
846
  "step": 138
847
  },
848
  {
849
  "epoch": 1.06,
850
- "learning_rate": 1.3350649350649351e-05,
851
- "loss": 0.0216,
852
  "step": 139
853
  },
854
  {
855
  "epoch": 1.07,
856
- "learning_rate": 1.32987012987013e-05,
857
- "loss": 0.0326,
858
  "step": 140
859
  },
860
  {
861
  "epoch": 1.08,
862
- "learning_rate": 1.3246753246753249e-05,
863
- "loss": 0.0253,
864
  "step": 141
865
  },
866
  {
867
  "epoch": 1.08,
868
- "learning_rate": 1.3194805194805196e-05,
869
- "loss": 0.0286,
870
  "step": 142
871
  },
872
  {
873
  "epoch": 1.09,
874
- "learning_rate": 1.3142857142857145e-05,
875
- "loss": 0.0096,
876
  "step": 143
877
  },
878
  {
879
  "epoch": 1.1,
880
- "learning_rate": 1.3090909090909092e-05,
881
- "loss": 0.0412,
882
  "step": 144
883
  },
884
  {
885
  "epoch": 1.11,
886
- "learning_rate": 1.303896103896104e-05,
887
- "loss": 0.0444,
888
  "step": 145
889
  },
890
  {
891
  "epoch": 1.11,
892
- "learning_rate": 1.2987012987012988e-05,
893
- "loss": 0.0393,
894
  "step": 146
895
  },
896
  {
897
  "epoch": 1.12,
898
- "learning_rate": 1.2935064935064937e-05,
899
- "loss": 0.0186,
900
  "step": 147
901
  },
902
  {
903
  "epoch": 1.13,
904
- "learning_rate": 1.2883116883116884e-05,
905
- "loss": 0.0301,
906
  "step": 148
907
  },
908
  {
909
  "epoch": 1.14,
910
- "learning_rate": 1.2831168831168832e-05,
911
- "loss": 0.0099,
912
  "step": 149
913
  },
914
  {
915
  "epoch": 1.15,
916
- "learning_rate": 1.277922077922078e-05,
917
- "loss": 0.0252,
918
  "step": 150
919
  },
920
  {
921
  "epoch": 1.15,
922
- "learning_rate": 1.2727272727272728e-05,
923
- "loss": 0.0108,
924
  "step": 151
925
  },
926
  {
927
  "epoch": 1.16,
928
- "learning_rate": 1.2675324675324676e-05,
929
- "loss": 0.0447,
930
  "step": 152
931
  },
932
  {
933
  "epoch": 1.17,
934
- "learning_rate": 1.2623376623376624e-05,
935
- "loss": 0.0567,
936
  "step": 153
937
  },
938
  {
939
  "epoch": 1.18,
940
- "learning_rate": 1.2571428571428572e-05,
941
- "loss": 0.0347,
942
  "step": 154
943
  },
944
  {
945
  "epoch": 1.18,
946
- "learning_rate": 1.251948051948052e-05,
947
- "loss": 0.0186,
948
  "step": 155
949
  },
950
  {
951
  "epoch": 1.19,
952
- "learning_rate": 1.2467532467532468e-05,
953
- "loss": 0.0303,
954
  "step": 156
955
  },
956
  {
957
  "epoch": 1.2,
958
- "learning_rate": 1.2415584415584416e-05,
959
- "loss": 0.0286,
960
  "step": 157
961
  },
962
  {
963
  "epoch": 1.21,
964
- "learning_rate": 1.2363636363636364e-05,
965
- "loss": 0.0182,
966
  "step": 158
967
  },
968
  {
969
  "epoch": 1.21,
970
- "learning_rate": 1.2311688311688312e-05,
971
- "loss": 0.0155,
972
  "step": 159
973
  },
974
  {
975
  "epoch": 1.22,
976
- "learning_rate": 1.2259740259740261e-05,
977
- "loss": 0.0253,
978
  "step": 160
979
  },
980
  {
981
  "epoch": 1.23,
982
- "learning_rate": 1.2207792207792208e-05,
983
- "loss": 0.0128,
984
  "step": 161
985
  },
986
  {
987
  "epoch": 1.24,
988
- "learning_rate": 1.2155844155844157e-05,
989
- "loss": 0.0074,
990
  "step": 162
991
  },
992
  {
993
  "epoch": 1.24,
994
- "learning_rate": 1.2103896103896104e-05,
995
- "loss": 0.0284,
996
  "step": 163
997
  },
998
  {
999
  "epoch": 1.25,
1000
- "learning_rate": 1.2051948051948053e-05,
1001
- "loss": 0.0389,
1002
  "step": 164
1003
  },
1004
  {
1005
  "epoch": 1.26,
1006
- "learning_rate": 1.2e-05,
1007
- "loss": 0.0165,
1008
  "step": 165
1009
  },
1010
  {
1011
  "epoch": 1.27,
1012
- "learning_rate": 1.1948051948051949e-05,
1013
- "loss": 0.0307,
1014
  "step": 166
1015
  },
1016
  {
1017
  "epoch": 1.27,
1018
- "learning_rate": 1.1896103896103896e-05,
1019
- "loss": 0.016,
1020
  "step": 167
1021
  },
1022
  {
1023
  "epoch": 1.28,
1024
- "learning_rate": 1.1844155844155845e-05,
1025
- "loss": 0.031,
1026
  "step": 168
1027
  },
1028
  {
1029
  "epoch": 1.29,
1030
- "learning_rate": 1.1792207792207792e-05,
1031
- "loss": 0.0083,
1032
  "step": 169
1033
  },
1034
  {
1035
  "epoch": 1.3,
1036
- "learning_rate": 1.1740259740259741e-05,
1037
- "loss": 0.0577,
1038
  "step": 170
1039
  },
1040
  {
1041
  "epoch": 1.31,
1042
- "learning_rate": 1.1688311688311688e-05,
1043
- "loss": 0.0133,
1044
  "step": 171
1045
  },
1046
  {
1047
  "epoch": 1.31,
1048
- "learning_rate": 1.1636363636363637e-05,
1049
- "loss": 0.017,
1050
  "step": 172
1051
  },
1052
  {
1053
  "epoch": 1.32,
1054
- "learning_rate": 1.1584415584415584e-05,
1055
- "loss": 0.0206,
1056
  "step": 173
1057
  },
1058
  {
1059
  "epoch": 1.33,
1060
- "learning_rate": 1.1532467532467533e-05,
1061
- "loss": 0.0482,
1062
  "step": 174
1063
  },
1064
  {
1065
  "epoch": 1.34,
1066
- "learning_rate": 1.148051948051948e-05,
1067
- "loss": 0.0467,
1068
  "step": 175
1069
  },
1070
  {
1071
  "epoch": 1.34,
1072
- "learning_rate": 1.1428571428571429e-05,
1073
- "loss": 0.0195,
1074
  "step": 176
1075
  },
1076
  {
1077
  "epoch": 1.35,
1078
- "learning_rate": 1.1376623376623376e-05,
1079
- "loss": 0.0481,
1080
  "step": 177
1081
  },
1082
  {
1083
  "epoch": 1.36,
1084
- "learning_rate": 1.1324675324675325e-05,
1085
- "loss": 0.0189,
1086
  "step": 178
1087
  },
1088
  {
1089
  "epoch": 1.37,
1090
- "learning_rate": 1.1272727272727272e-05,
1091
- "loss": 0.0498,
1092
  "step": 179
1093
  },
1094
  {
1095
  "epoch": 1.37,
1096
- "learning_rate": 1.1220779220779221e-05,
1097
- "loss": 0.0351,
1098
  "step": 180
1099
  },
1100
  {
1101
  "epoch": 1.38,
1102
- "learning_rate": 1.116883116883117e-05,
1103
- "loss": 0.0341,
1104
  "step": 181
1105
  },
1106
  {
1107
  "epoch": 1.39,
1108
- "learning_rate": 1.1116883116883117e-05,
1109
- "loss": 0.0659,
1110
  "step": 182
1111
  },
1112
  {
1113
  "epoch": 1.4,
1114
- "learning_rate": 1.1064935064935066e-05,
1115
- "loss": 0.0198,
1116
  "step": 183
1117
  },
1118
  {
1119
  "epoch": 1.4,
1120
- "learning_rate": 1.1012987012987013e-05,
1121
- "loss": 0.0352,
1122
  "step": 184
1123
  },
1124
  {
1125
  "epoch": 1.41,
1126
- "learning_rate": 1.0961038961038962e-05,
1127
- "loss": 0.0129,
1128
  "step": 185
1129
  },
1130
  {
1131
  "epoch": 1.42,
1132
- "learning_rate": 1.0909090909090909e-05,
1133
- "loss": 0.0353,
1134
  "step": 186
1135
  },
1136
  {
1137
  "epoch": 1.43,
1138
- "learning_rate": 1.0857142857142858e-05,
1139
- "loss": 0.0084,
1140
  "step": 187
1141
  },
1142
  {
1143
  "epoch": 1.44,
1144
- "learning_rate": 1.0805194805194805e-05,
1145
- "loss": 0.02,
1146
  "step": 188
1147
  },
1148
  {
1149
  "epoch": 1.44,
1150
- "learning_rate": 1.0753246753246754e-05,
1151
- "loss": 0.0106,
1152
  "step": 189
1153
  },
1154
  {
1155
  "epoch": 1.45,
1156
- "learning_rate": 1.0701298701298701e-05,
1157
- "loss": 0.0253,
1158
  "step": 190
1159
  },
1160
  {
1161
  "epoch": 1.46,
1162
- "learning_rate": 1.064935064935065e-05,
1163
- "loss": 0.0116,
1164
  "step": 191
1165
  },
1166
  {
1167
  "epoch": 1.47,
1168
- "learning_rate": 1.0597402597402597e-05,
1169
- "loss": 0.0099,
1170
  "step": 192
1171
  },
1172
  {
1173
  "epoch": 1.47,
1174
- "learning_rate": 1.0545454545454546e-05,
1175
- "loss": 0.0845,
1176
  "step": 193
1177
  },
1178
  {
1179
  "epoch": 1.48,
1180
- "learning_rate": 1.0493506493506493e-05,
1181
- "loss": 0.0236,
1182
  "step": 194
1183
  },
1184
  {
1185
  "epoch": 1.49,
1186
- "learning_rate": 1.0441558441558442e-05,
1187
- "loss": 0.0299,
1188
  "step": 195
1189
  },
1190
  {
1191
  "epoch": 1.5,
1192
- "learning_rate": 1.0389610389610389e-05,
1193
- "loss": 0.01,
1194
  "step": 196
1195
  },
1196
  {
1197
  "epoch": 1.5,
1198
- "learning_rate": 1.0337662337662338e-05,
1199
- "loss": 0.0154,
1200
  "step": 197
1201
  },
1202
  {
1203
  "epoch": 1.51,
1204
- "learning_rate": 1.0285714285714285e-05,
1205
- "loss": 0.0095,
1206
  "step": 198
1207
  },
1208
  {
1209
  "epoch": 1.52,
1210
- "learning_rate": 1.0233766233766234e-05,
1211
- "loss": 0.0278,
1212
  "step": 199
1213
  },
1214
  {
1215
  "epoch": 1.53,
1216
- "learning_rate": 1.0181818181818182e-05,
1217
- "loss": 0.0228,
1218
  "step": 200
1219
  },
1220
  {
1221
  "epoch": 1.53,
1222
- "learning_rate": 1.012987012987013e-05,
1223
- "loss": 0.0252,
1224
  "step": 201
1225
  },
1226
  {
1227
  "epoch": 1.54,
1228
- "learning_rate": 1.0077922077922078e-05,
1229
- "loss": 0.085,
1230
  "step": 202
1231
  },
1232
  {
1233
  "epoch": 1.55,
1234
- "learning_rate": 1.0025974025974026e-05,
1235
- "loss": 0.0046,
1236
  "step": 203
1237
  },
1238
  {
1239
  "epoch": 1.56,
1240
- "learning_rate": 9.974025974025974e-06,
1241
- "loss": 0.0206,
1242
  "step": 204
1243
  },
1244
  {
1245
  "epoch": 1.56,
1246
- "learning_rate": 9.922077922077923e-06,
1247
- "loss": 0.046,
1248
  "step": 205
1249
  },
1250
  {
1251
  "epoch": 1.57,
1252
- "learning_rate": 9.87012987012987e-06,
1253
- "loss": 0.0137,
1254
  "step": 206
1255
  },
1256
  {
1257
  "epoch": 1.58,
1258
- "learning_rate": 9.81818181818182e-06,
1259
- "loss": 0.012,
1260
  "step": 207
1261
  },
1262
  {
1263
  "epoch": 1.59,
1264
- "learning_rate": 9.766233766233766e-06,
1265
- "loss": 0.0504,
1266
  "step": 208
1267
  },
1268
  {
1269
  "epoch": 1.6,
1270
- "learning_rate": 9.714285714285715e-06,
1271
- "loss": 0.0137,
1272
  "step": 209
1273
  },
1274
  {
1275
  "epoch": 1.6,
1276
- "learning_rate": 9.662337662337662e-06,
1277
- "loss": 0.0191,
1278
  "step": 210
1279
  },
1280
  {
1281
  "epoch": 1.61,
1282
- "learning_rate": 9.610389610389611e-06,
1283
- "loss": 0.016,
1284
  "step": 211
1285
  },
1286
  {
1287
  "epoch": 1.62,
1288
- "learning_rate": 9.558441558441558e-06,
1289
- "loss": 0.0511,
1290
  "step": 212
1291
  },
1292
  {
1293
  "epoch": 1.63,
1294
- "learning_rate": 9.506493506493507e-06,
1295
- "loss": 0.0233,
1296
  "step": 213
1297
  },
1298
  {
1299
  "epoch": 1.63,
1300
- "learning_rate": 9.454545454545456e-06,
1301
- "loss": 0.0301,
1302
  "step": 214
1303
  },
1304
  {
1305
  "epoch": 1.64,
1306
- "learning_rate": 9.402597402597403e-06,
1307
- "loss": 0.0087,
1308
  "step": 215
1309
  },
1310
  {
1311
  "epoch": 1.65,
1312
- "learning_rate": 9.350649350649352e-06,
1313
- "loss": 0.0112,
1314
  "step": 216
1315
  },
1316
  {
1317
  "epoch": 1.66,
1318
- "learning_rate": 9.298701298701299e-06,
1319
- "loss": 0.016,
1320
  "step": 217
1321
  },
1322
  {
1323
  "epoch": 1.66,
1324
- "learning_rate": 9.246753246753248e-06,
1325
- "loss": 0.0158,
1326
  "step": 218
1327
  },
1328
  {
1329
  "epoch": 1.67,
1330
- "learning_rate": 9.194805194805195e-06,
1331
- "loss": 0.0235,
1332
  "step": 219
1333
  },
1334
  {
1335
  "epoch": 1.68,
1336
- "learning_rate": 9.142857142857144e-06,
1337
- "loss": 0.0137,
1338
  "step": 220
1339
  },
1340
  {
1341
  "epoch": 1.69,
1342
- "learning_rate": 9.090909090909091e-06,
1343
- "loss": 0.0099,
1344
  "step": 221
1345
  },
1346
  {
1347
  "epoch": 1.69,
1348
- "learning_rate": 9.03896103896104e-06,
1349
- "loss": 0.0494,
1350
  "step": 222
1351
  },
1352
  {
1353
  "epoch": 1.7,
1354
- "learning_rate": 8.987012987012987e-06,
1355
- "loss": 0.0184,
1356
  "step": 223
1357
  },
1358
  {
1359
  "epoch": 1.71,
1360
- "learning_rate": 8.935064935064936e-06,
1361
- "loss": 0.0238,
1362
  "step": 224
1363
  },
1364
  {
1365
  "epoch": 1.72,
1366
- "learning_rate": 8.883116883116883e-06,
1367
- "loss": 0.0168,
1368
  "step": 225
1369
  },
1370
  {
1371
  "epoch": 1.73,
1372
- "learning_rate": 8.831168831168832e-06,
1373
- "loss": 0.0187,
1374
  "step": 226
1375
  },
1376
  {
1377
  "epoch": 1.73,
1378
- "learning_rate": 8.779220779220779e-06,
1379
- "loss": 0.029,
1380
  "step": 227
1381
  },
1382
  {
1383
  "epoch": 1.74,
1384
- "learning_rate": 8.727272727272728e-06,
1385
- "loss": 0.0259,
1386
  "step": 228
1387
  },
1388
  {
1389
  "epoch": 1.75,
1390
- "learning_rate": 8.675324675324675e-06,
1391
- "loss": 0.0128,
1392
  "step": 229
1393
  },
1394
  {
1395
  "epoch": 1.76,
1396
- "learning_rate": 8.623376623376624e-06,
1397
- "loss": 0.0235,
1398
  "step": 230
1399
  },
1400
  {
1401
  "epoch": 1.76,
1402
- "learning_rate": 8.571428571428571e-06,
1403
  "loss": 0.0514,
1404
  "step": 231
1405
  },
1406
  {
1407
  "epoch": 1.77,
1408
- "learning_rate": 8.51948051948052e-06,
1409
- "loss": 0.0147,
1410
  "step": 232
1411
  },
1412
  {
1413
  "epoch": 1.78,
1414
- "learning_rate": 8.467532467532467e-06,
1415
- "loss": 0.0107,
1416
  "step": 233
1417
  },
1418
  {
1419
  "epoch": 1.79,
1420
- "learning_rate": 8.415584415584416e-06,
1421
- "loss": 0.0391,
1422
  "step": 234
1423
  },
1424
  {
1425
  "epoch": 1.79,
1426
- "learning_rate": 8.363636363636365e-06,
1427
- "loss": 0.0082,
1428
  "step": 235
1429
  },
1430
  {
1431
  "epoch": 1.8,
1432
- "learning_rate": 8.311688311688313e-06,
1433
- "loss": 0.0744,
1434
  "step": 236
1435
  },
1436
  {
1437
  "epoch": 1.81,
1438
- "learning_rate": 8.25974025974026e-06,
1439
- "loss": 0.0284,
1440
  "step": 237
1441
  },
1442
  {
1443
  "epoch": 1.82,
1444
- "learning_rate": 8.20779220779221e-06,
1445
- "loss": 0.0066,
1446
  "step": 238
1447
  },
1448
  {
1449
  "epoch": 1.82,
1450
- "learning_rate": 8.155844155844157e-06,
1451
- "loss": 0.0103,
1452
  "step": 239
1453
  },
1454
  {
1455
  "epoch": 1.83,
1456
- "learning_rate": 8.103896103896105e-06,
1457
- "loss": 0.0096,
1458
  "step": 240
1459
  },
1460
  {
1461
  "epoch": 1.84,
1462
- "learning_rate": 8.051948051948052e-06,
1463
- "loss": 0.0167,
1464
  "step": 241
1465
  },
1466
  {
1467
  "epoch": 1.85,
1468
- "learning_rate": 8.000000000000001e-06,
1469
- "loss": 0.0107,
1470
  "step": 242
1471
  },
1472
  {
1473
  "epoch": 1.85,
1474
- "learning_rate": 7.948051948051948e-06,
1475
- "loss": 0.009,
1476
  "step": 243
1477
  },
1478
  {
1479
  "epoch": 1.86,
1480
- "learning_rate": 7.896103896103897e-06,
1481
- "loss": 0.0153,
1482
  "step": 244
1483
  },
1484
  {
1485
  "epoch": 1.87,
1486
- "learning_rate": 7.844155844155844e-06,
1487
- "loss": 0.0178,
1488
  "step": 245
1489
  },
1490
  {
1491
  "epoch": 1.88,
1492
- "learning_rate": 7.792207792207793e-06,
1493
- "loss": 0.0057,
1494
  "step": 246
1495
  },
1496
  {
1497
  "epoch": 1.89,
1498
- "learning_rate": 7.74025974025974e-06,
1499
- "loss": 0.0267,
1500
  "step": 247
1501
  },
1502
  {
1503
  "epoch": 1.89,
1504
- "learning_rate": 7.68831168831169e-06,
1505
- "loss": 0.0199,
1506
  "step": 248
1507
  },
1508
  {
1509
  "epoch": 1.9,
1510
- "learning_rate": 7.636363636363638e-06,
1511
- "loss": 0.0298,
1512
  "step": 249
1513
  },
1514
  {
1515
  "epoch": 1.91,
1516
- "learning_rate": 7.584415584415585e-06,
1517
- "loss": 0.0187,
1518
  "step": 250
1519
  },
1520
  {
1521
  "epoch": 1.92,
1522
- "learning_rate": 7.532467532467533e-06,
1523
- "loss": 0.0218,
1524
  "step": 251
1525
  },
1526
  {
1527
  "epoch": 1.92,
1528
- "learning_rate": 7.480519480519481e-06,
1529
- "loss": 0.0058,
1530
  "step": 252
1531
  },
1532
  {
1533
  "epoch": 1.93,
1534
- "learning_rate": 7.428571428571429e-06,
1535
- "loss": 0.0172,
1536
  "step": 253
1537
  },
1538
  {
1539
  "epoch": 1.94,
1540
- "learning_rate": 7.376623376623377e-06,
1541
- "loss": 0.0095,
1542
  "step": 254
1543
  },
1544
  {
1545
  "epoch": 1.95,
1546
- "learning_rate": 7.324675324675325e-06,
1547
- "loss": 0.0243,
1548
  "step": 255
1549
  },
1550
  {
1551
  "epoch": 1.95,
1552
- "learning_rate": 7.272727272727273e-06,
1553
- "loss": 0.0137,
1554
  "step": 256
1555
  },
1556
  {
1557
  "epoch": 1.96,
1558
- "learning_rate": 7.220779220779221e-06,
1559
- "loss": 0.0328,
1560
  "step": 257
1561
  },
1562
  {
1563
  "epoch": 1.97,
1564
- "learning_rate": 7.16883116883117e-06,
1565
- "loss": 0.023,
1566
  "step": 258
1567
  },
1568
  {
1569
  "epoch": 1.98,
1570
- "learning_rate": 7.116883116883118e-06,
1571
- "loss": 0.0512,
1572
  "step": 259
1573
  },
1574
  {
1575
  "epoch": 1.98,
1576
- "learning_rate": 7.064935064935066e-06,
1577
- "loss": 0.0106,
1578
  "step": 260
1579
  },
1580
  {
1581
  "epoch": 1.99,
1582
- "learning_rate": 7.012987012987014e-06,
1583
- "loss": 0.0226,
1584
  "step": 261
1585
  },
1586
  {
1587
  "epoch": 2.0,
1588
- "learning_rate": 6.961038961038962e-06,
1589
- "loss": 0.0024,
1590
  "step": 262
1591
  },
1592
  {
1593
  "epoch": 2.0,
1594
- "eval_loss": 0.021491218358278275,
1595
- "eval_pearson": 0.9619413875691463,
1596
- "eval_runtime": 5.3773,
1597
- "eval_samples_per_second": 43.144,
1598
- "eval_spearmanr": 0.9085946142766584,
1599
- "eval_steps_per_second": 2.79,
1600
  "step": 262
1601
  }
1602
  ],
 
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 0.0,
13
+ "loss": 0.4278,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 0.0,
19
+ "loss": 0.409,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.02,
24
  "learning_rate": 2.5e-06,
25
+ "loss": 0.3399,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 0.03,
30
  "learning_rate": 5e-06,
31
+ "loss": 0.3277,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 0.04,
36
+ "learning_rate": 7.500000000000001e-06,
37
+ "loss": 0.3771,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 0.05,
42
+ "learning_rate": 1e-05,
43
+ "loss": 0.2081,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 0.05,
48
+ "learning_rate": 1.25e-05,
49
+ "loss": 0.3177,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 0.06,
54
+ "learning_rate": 1.5000000000000002e-05,
55
+ "loss": 0.3049,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 0.07,
60
+ "learning_rate": 1.7500000000000002e-05,
61
+ "loss": 0.1694,
62
  "step": 9
63
  },
64
  {
65
  "epoch": 0.08,
66
+ "learning_rate": 2e-05,
67
+ "loss": 0.2039,
68
  "step": 10
69
  },
70
  {
71
  "epoch": 0.08,
72
+ "learning_rate": 1.994805194805195e-05,
73
+ "loss": 0.1651,
74
  "step": 11
75
  },
76
  {
77
  "epoch": 0.09,
78
+ "learning_rate": 1.98961038961039e-05,
79
+ "loss": 0.3483,
80
  "step": 12
81
  },
82
  {
83
  "epoch": 0.1,
84
+ "learning_rate": 1.9844155844155846e-05,
85
+ "loss": 0.1532,
86
  "step": 13
87
  },
88
  {
89
  "epoch": 0.11,
90
+ "learning_rate": 1.9792207792207794e-05,
91
+ "loss": 0.2102,
92
  "step": 14
93
  },
94
  {
95
  "epoch": 0.11,
96
+ "learning_rate": 1.974025974025974e-05,
97
+ "loss": 0.0953,
98
  "step": 15
99
  },
100
  {
101
  "epoch": 0.12,
102
+ "learning_rate": 1.968831168831169e-05,
103
+ "loss": 0.0761,
104
  "step": 16
105
  },
106
  {
107
  "epoch": 0.13,
108
+ "learning_rate": 1.963636363636364e-05,
109
+ "loss": 0.1778,
110
  "step": 17
111
  },
112
  {
113
  "epoch": 0.14,
114
+ "learning_rate": 1.9584415584415586e-05,
115
+ "loss": 0.138,
116
  "step": 18
117
  },
118
  {
119
  "epoch": 0.15,
120
+ "learning_rate": 1.9532467532467533e-05,
121
+ "loss": 0.1523,
122
  "step": 19
123
  },
124
  {
125
  "epoch": 0.15,
126
+ "learning_rate": 1.9480519480519483e-05,
127
+ "loss": 0.1351,
128
  "step": 20
129
  },
130
  {
131
  "epoch": 0.16,
132
+ "learning_rate": 1.942857142857143e-05,
133
+ "loss": 0.0846,
134
  "step": 21
135
  },
136
  {
137
  "epoch": 0.17,
138
+ "learning_rate": 1.9376623376623377e-05,
139
+ "loss": 0.1725,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 0.18,
144
+ "learning_rate": 1.9324675324675325e-05,
145
+ "loss": 0.0751,
146
  "step": 23
147
  },
148
  {
149
  "epoch": 0.18,
150
+ "learning_rate": 1.9272727272727275e-05,
151
+ "loss": 0.1553,
152
  "step": 24
153
  },
154
  {
155
  "epoch": 0.19,
156
+ "learning_rate": 1.9220779220779222e-05,
157
+ "loss": 0.1551,
158
  "step": 25
159
  },
160
  {
161
  "epoch": 0.2,
162
+ "learning_rate": 1.916883116883117e-05,
163
+ "loss": 0.1263,
164
  "step": 26
165
  },
166
  {
167
  "epoch": 0.21,
168
+ "learning_rate": 1.9116883116883117e-05,
169
+ "loss": 0.0627,
170
  "step": 27
171
  },
172
  {
173
  "epoch": 0.21,
174
+ "learning_rate": 1.9064935064935067e-05,
175
+ "loss": 0.109,
176
  "step": 28
177
  },
178
  {
179
  "epoch": 0.22,
180
+ "learning_rate": 1.9012987012987014e-05,
181
+ "loss": 0.0426,
182
  "step": 29
183
  },
184
  {
185
  "epoch": 0.23,
186
+ "learning_rate": 1.896103896103896e-05,
187
+ "loss": 0.0864,
188
  "step": 30
189
  },
190
  {
191
  "epoch": 0.24,
192
+ "learning_rate": 1.8909090909090912e-05,
193
+ "loss": 0.0477,
194
  "step": 31
195
  },
196
  {
197
  "epoch": 0.24,
198
+ "learning_rate": 1.885714285714286e-05,
199
+ "loss": 0.1584,
200
  "step": 32
201
  },
202
  {
203
  "epoch": 0.25,
204
+ "learning_rate": 1.8805194805194806e-05,
205
+ "loss": 0.06,
206
  "step": 33
207
  },
208
  {
209
  "epoch": 0.26,
210
+ "learning_rate": 1.8753246753246753e-05,
211
+ "loss": 0.0972,
212
  "step": 34
213
  },
214
  {
215
  "epoch": 0.27,
216
+ "learning_rate": 1.8701298701298704e-05,
217
+ "loss": 0.0658,
218
  "step": 35
219
  },
220
  {
221
  "epoch": 0.27,
222
+ "learning_rate": 1.864935064935065e-05,
223
+ "loss": 0.0729,
224
  "step": 36
225
  },
226
  {
227
  "epoch": 0.28,
228
+ "learning_rate": 1.8597402597402598e-05,
229
+ "loss": 0.0851,
230
  "step": 37
231
  },
232
  {
233
  "epoch": 0.29,
234
+ "learning_rate": 1.8545454545454545e-05,
235
+ "loss": 0.1193,
236
  "step": 38
237
  },
238
  {
239
  "epoch": 0.3,
240
+ "learning_rate": 1.8493506493506496e-05,
241
+ "loss": 0.0194,
242
  "step": 39
243
  },
244
  {
245
  "epoch": 0.31,
246
+ "learning_rate": 1.8441558441558443e-05,
247
+ "loss": 0.0994,
248
  "step": 40
249
  },
250
  {
251
  "epoch": 0.31,
252
+ "learning_rate": 1.838961038961039e-05,
253
+ "loss": 0.0719,
254
  "step": 41
255
  },
256
  {
257
  "epoch": 0.32,
258
+ "learning_rate": 1.8337662337662337e-05,
259
+ "loss": 0.0915,
260
  "step": 42
261
  },
262
  {
263
  "epoch": 0.33,
264
+ "learning_rate": 1.8285714285714288e-05,
265
+ "loss": 0.0823,
266
  "step": 43
267
  },
268
  {
269
  "epoch": 0.34,
270
+ "learning_rate": 1.8233766233766235e-05,
271
+ "loss": 0.1922,
272
  "step": 44
273
  },
274
  {
275
  "epoch": 0.34,
276
+ "learning_rate": 1.8181818181818182e-05,
277
+ "loss": 0.1452,
278
  "step": 45
279
  },
280
  {
281
  "epoch": 0.35,
282
+ "learning_rate": 1.812987012987013e-05,
283
+ "loss": 0.0233,
284
  "step": 46
285
  },
286
  {
287
  "epoch": 0.36,
288
+ "learning_rate": 1.807792207792208e-05,
289
+ "loss": 0.0448,
290
  "step": 47
291
  },
292
  {
293
  "epoch": 0.37,
294
+ "learning_rate": 1.8025974025974027e-05,
295
+ "loss": 0.0708,
296
  "step": 48
297
  },
298
  {
299
  "epoch": 0.37,
300
+ "learning_rate": 1.7974025974025974e-05,
301
+ "loss": 0.0441,
302
  "step": 49
303
  },
304
  {
305
  "epoch": 0.38,
306
+ "learning_rate": 1.792207792207792e-05,
307
+ "loss": 0.0498,
308
  "step": 50
309
  },
310
  {
311
  "epoch": 0.39,
312
+ "learning_rate": 1.7870129870129872e-05,
313
+ "loss": 0.0645,
314
  "step": 51
315
  },
316
  {
317
  "epoch": 0.4,
318
+ "learning_rate": 1.781818181818182e-05,
319
+ "loss": 0.0718,
320
  "step": 52
321
  },
322
  {
323
  "epoch": 0.4,
324
+ "learning_rate": 1.7766233766233766e-05,
325
+ "loss": 0.0605,
326
  "step": 53
327
  },
328
  {
329
  "epoch": 0.41,
330
+ "learning_rate": 1.7714285714285717e-05,
331
+ "loss": 0.0752,
332
  "step": 54
333
  },
334
  {
335
  "epoch": 0.42,
336
+ "learning_rate": 1.7662337662337664e-05,
337
+ "loss": 0.0312,
338
  "step": 55
339
  },
340
  {
341
  "epoch": 0.43,
342
+ "learning_rate": 1.761038961038961e-05,
343
+ "loss": 0.1122,
344
  "step": 56
345
  },
346
  {
347
  "epoch": 0.44,
348
+ "learning_rate": 1.7558441558441558e-05,
349
+ "loss": 0.0579,
350
  "step": 57
351
  },
352
  {
353
  "epoch": 0.44,
354
+ "learning_rate": 1.750649350649351e-05,
355
+ "loss": 0.0664,
356
  "step": 58
357
  },
358
  {
359
  "epoch": 0.45,
360
+ "learning_rate": 1.7454545454545456e-05,
361
+ "loss": 0.059,
362
  "step": 59
363
  },
364
  {
365
  "epoch": 0.46,
366
+ "learning_rate": 1.7402597402597403e-05,
367
+ "loss": 0.0602,
368
  "step": 60
369
  },
370
  {
371
  "epoch": 0.47,
372
+ "learning_rate": 1.735064935064935e-05,
373
+ "loss": 0.0322,
374
  "step": 61
375
  },
376
  {
377
  "epoch": 0.47,
378
+ "learning_rate": 1.72987012987013e-05,
379
+ "loss": 0.1271,
380
  "step": 62
381
  },
382
  {
383
  "epoch": 0.48,
384
+ "learning_rate": 1.7246753246753248e-05,
385
+ "loss": 0.2168,
386
  "step": 63
387
  },
388
  {
389
  "epoch": 0.49,
390
+ "learning_rate": 1.7194805194805195e-05,
391
+ "loss": 0.1552,
392
  "step": 64
393
  },
394
  {
395
  "epoch": 0.5,
396
+ "learning_rate": 1.7142857142857142e-05,
397
+ "loss": 0.1742,
398
  "step": 65
399
  },
400
  {
401
  "epoch": 0.5,
402
+ "learning_rate": 1.7090909090909092e-05,
403
+ "loss": 0.0751,
404
  "step": 66
405
  },
406
  {
407
  "epoch": 0.51,
408
+ "learning_rate": 1.703896103896104e-05,
409
+ "loss": 0.0379,
410
  "step": 67
411
  },
412
  {
413
  "epoch": 0.52,
414
+ "learning_rate": 1.6987012987012987e-05,
415
+ "loss": 0.185,
416
  "step": 68
417
  },
418
  {
419
  "epoch": 0.53,
420
+ "learning_rate": 1.6935064935064934e-05,
421
+ "loss": 0.1504,
422
  "step": 69
423
  },
424
  {
425
  "epoch": 0.53,
426
+ "learning_rate": 1.6883116883116884e-05,
427
+ "loss": 0.1169,
428
  "step": 70
429
  },
430
  {
431
  "epoch": 0.54,
432
+ "learning_rate": 1.683116883116883e-05,
433
+ "loss": 0.0771,
434
  "step": 71
435
  },
436
  {
437
  "epoch": 0.55,
438
+ "learning_rate": 1.677922077922078e-05,
439
+ "loss": 0.0374,
440
  "step": 72
441
  },
442
  {
443
  "epoch": 0.56,
444
+ "learning_rate": 1.672727272727273e-05,
445
+ "loss": 0.1039,
446
  "step": 73
447
  },
448
  {
449
  "epoch": 0.56,
450
+ "learning_rate": 1.6675324675324676e-05,
451
+ "loss": 0.101,
452
  "step": 74
453
  },
454
  {
455
  "epoch": 0.57,
456
+ "learning_rate": 1.6623376623376627e-05,
457
+ "loss": 0.0917,
458
  "step": 75
459
  },
460
  {
461
  "epoch": 0.58,
462
+ "learning_rate": 1.6571428571428574e-05,
463
+ "loss": 0.089,
464
  "step": 76
465
  },
466
  {
467
  "epoch": 0.59,
468
+ "learning_rate": 1.651948051948052e-05,
469
+ "loss": 0.0307,
470
  "step": 77
471
  },
472
  {
473
  "epoch": 0.6,
474
+ "learning_rate": 1.646753246753247e-05,
475
+ "loss": 0.1496,
476
  "step": 78
477
  },
478
  {
479
  "epoch": 0.6,
480
+ "learning_rate": 1.641558441558442e-05,
481
+ "loss": 0.1121,
482
  "step": 79
483
  },
484
  {
485
  "epoch": 0.61,
486
+ "learning_rate": 1.6363636363636366e-05,
487
+ "loss": 0.1882,
488
  "step": 80
489
  },
490
  {
491
  "epoch": 0.62,
492
+ "learning_rate": 1.6311688311688313e-05,
493
+ "loss": 0.136,
494
  "step": 81
495
  },
496
  {
497
  "epoch": 0.63,
498
+ "learning_rate": 1.6259740259740264e-05,
499
+ "loss": 0.1114,
500
  "step": 82
501
  },
502
  {
503
  "epoch": 0.63,
504
+ "learning_rate": 1.620779220779221e-05,
505
+ "loss": 0.0095,
506
  "step": 83
507
  },
508
  {
509
  "epoch": 0.64,
510
+ "learning_rate": 1.6155844155844158e-05,
511
+ "loss": 0.0559,
512
  "step": 84
513
  },
514
  {
515
  "epoch": 0.65,
516
+ "learning_rate": 1.6103896103896105e-05,
517
+ "loss": 0.0528,
518
  "step": 85
519
  },
520
  {
521
  "epoch": 0.66,
522
+ "learning_rate": 1.6051948051948056e-05,
523
+ "loss": 0.0298,
524
  "step": 86
525
  },
526
  {
527
  "epoch": 0.66,
528
+ "learning_rate": 1.6000000000000003e-05,
529
+ "loss": 0.0436,
530
  "step": 87
531
  },
532
  {
533
  "epoch": 0.67,
534
+ "learning_rate": 1.594805194805195e-05,
535
+ "loss": 0.0515,
536
  "step": 88
537
  },
538
  {
539
  "epoch": 0.68,
540
+ "learning_rate": 1.5896103896103897e-05,
541
+ "loss": 0.0588,
542
  "step": 89
543
  },
544
  {
545
  "epoch": 0.69,
546
+ "learning_rate": 1.5844155844155847e-05,
547
+ "loss": 0.071,
548
  "step": 90
549
  },
550
  {
551
  "epoch": 0.69,
552
+ "learning_rate": 1.5792207792207795e-05,
553
+ "loss": 0.0457,
554
  "step": 91
555
  },
556
  {
557
  "epoch": 0.7,
558
+ "learning_rate": 1.5740259740259742e-05,
559
+ "loss": 0.0383,
560
  "step": 92
561
  },
562
  {
563
  "epoch": 0.71,
564
+ "learning_rate": 1.568831168831169e-05,
565
+ "loss": 0.0686,
566
  "step": 93
567
  },
568
  {
569
  "epoch": 0.72,
570
+ "learning_rate": 1.563636363636364e-05,
571
+ "loss": 0.1018,
572
  "step": 94
573
  },
574
  {
575
  "epoch": 0.73,
576
+ "learning_rate": 1.5584415584415587e-05,
577
+ "loss": 0.1044,
578
  "step": 95
579
  },
580
  {
581
  "epoch": 0.73,
582
+ "learning_rate": 1.5532467532467534e-05,
583
+ "loss": 0.0549,
584
  "step": 96
585
  },
586
  {
587
  "epoch": 0.74,
588
+ "learning_rate": 1.548051948051948e-05,
589
+ "loss": 0.0321,
590
  "step": 97
591
  },
592
  {
593
  "epoch": 0.75,
594
+ "learning_rate": 1.542857142857143e-05,
595
+ "loss": 0.0248,
596
  "step": 98
597
  },
598
  {
599
  "epoch": 0.76,
600
+ "learning_rate": 1.537662337662338e-05,
601
+ "loss": 0.0508,
602
  "step": 99
603
  },
604
  {
605
  "epoch": 0.76,
606
+ "learning_rate": 1.5324675324675326e-05,
607
+ "loss": 0.0279,
608
  "step": 100
609
  },
610
  {
611
  "epoch": 0.77,
612
+ "learning_rate": 1.5272727272727276e-05,
613
+ "loss": 0.0499,
614
  "step": 101
615
  },
616
  {
617
  "epoch": 0.78,
618
+ "learning_rate": 1.5220779220779223e-05,
619
+ "loss": 0.035,
620
  "step": 102
621
  },
622
  {
623
  "epoch": 0.79,
624
+ "learning_rate": 1.516883116883117e-05,
625
+ "loss": 0.0767,
626
  "step": 103
627
  },
628
  {
629
  "epoch": 0.79,
630
+ "learning_rate": 1.511688311688312e-05,
631
+ "loss": 0.0712,
632
  "step": 104
633
  },
634
  {
635
  "epoch": 0.8,
636
+ "learning_rate": 1.5064935064935066e-05,
637
+ "loss": 0.0458,
638
  "step": 105
639
  },
640
  {
641
  "epoch": 0.81,
642
+ "learning_rate": 1.5012987012987015e-05,
643
+ "loss": 0.0362,
644
  "step": 106
645
  },
646
  {
647
  "epoch": 0.82,
648
+ "learning_rate": 1.4961038961038962e-05,
649
+ "loss": 0.0651,
650
  "step": 107
651
  },
652
  {
653
  "epoch": 0.82,
654
+ "learning_rate": 1.4909090909090911e-05,
655
+ "loss": 0.0447,
656
  "step": 108
657
  },
658
  {
659
  "epoch": 0.83,
660
+ "learning_rate": 1.4857142857142858e-05,
661
+ "loss": 0.1035,
662
  "step": 109
663
  },
664
  {
665
  "epoch": 0.84,
666
+ "learning_rate": 1.4805194805194807e-05,
667
+ "loss": 0.0608,
668
  "step": 110
669
  },
670
  {
671
  "epoch": 0.85,
672
+ "learning_rate": 1.4753246753246754e-05,
673
+ "loss": 0.0407,
674
  "step": 111
675
  },
676
  {
677
  "epoch": 0.85,
678
+ "learning_rate": 1.4701298701298703e-05,
679
+ "loss": 0.0183,
680
  "step": 112
681
  },
682
  {
683
  "epoch": 0.86,
684
+ "learning_rate": 1.464935064935065e-05,
685
+ "loss": 0.0281,
686
  "step": 113
687
  },
688
  {
689
  "epoch": 0.87,
690
+ "learning_rate": 1.45974025974026e-05,
691
+ "loss": 0.0289,
692
  "step": 114
693
  },
694
  {
695
  "epoch": 0.88,
696
+ "learning_rate": 1.4545454545454546e-05,
697
+ "loss": 0.0725,
698
  "step": 115
699
  },
700
  {
701
  "epoch": 0.89,
702
+ "learning_rate": 1.4493506493506495e-05,
703
+ "loss": 0.0287,
704
  "step": 116
705
  },
706
  {
707
  "epoch": 0.89,
708
+ "learning_rate": 1.4441558441558442e-05,
709
+ "loss": 0.018,
710
  "step": 117
711
  },
712
  {
713
  "epoch": 0.9,
714
+ "learning_rate": 1.4389610389610391e-05,
715
+ "loss": 0.0227,
716
  "step": 118
717
  },
718
  {
719
  "epoch": 0.91,
720
+ "learning_rate": 1.433766233766234e-05,
721
+ "loss": 0.0315,
722
  "step": 119
723
  },
724
  {
725
  "epoch": 0.92,
726
+ "learning_rate": 1.4285714285714287e-05,
727
+ "loss": 0.1159,
728
  "step": 120
729
  },
730
  {
731
  "epoch": 0.92,
732
+ "learning_rate": 1.4233766233766236e-05,
733
+ "loss": 0.0419,
734
  "step": 121
735
  },
736
  {
737
  "epoch": 0.93,
738
+ "learning_rate": 1.4181818181818183e-05,
739
+ "loss": 0.0668,
740
  "step": 122
741
  },
742
  {
743
  "epoch": 0.94,
744
+ "learning_rate": 1.4129870129870132e-05,
745
+ "loss": 0.0357,
746
  "step": 123
747
  },
748
  {
749
  "epoch": 0.95,
750
+ "learning_rate": 1.4077922077922079e-05,
751
+ "loss": 0.0288,
752
  "step": 124
753
  },
754
  {
755
  "epoch": 0.95,
756
+ "learning_rate": 1.4025974025974028e-05,
757
+ "loss": 0.0135,
758
  "step": 125
759
  },
760
  {
761
  "epoch": 0.96,
762
+ "learning_rate": 1.3974025974025975e-05,
763
+ "loss": 0.0308,
764
  "step": 126
765
  },
766
  {
767
  "epoch": 0.97,
768
+ "learning_rate": 1.3922077922077924e-05,
769
+ "loss": 0.025,
770
  "step": 127
771
  },
772
  {
773
  "epoch": 0.98,
774
+ "learning_rate": 1.3870129870129871e-05,
775
+ "loss": 0.0375,
776
  "step": 128
777
  },
778
  {
779
  "epoch": 0.98,
780
+ "learning_rate": 1.381818181818182e-05,
781
+ "loss": 0.0182,
782
  "step": 129
783
  },
784
  {
785
  "epoch": 0.99,
786
+ "learning_rate": 1.3766233766233767e-05,
787
+ "loss": 0.0417,
788
  "step": 130
789
  },
790
  {
791
  "epoch": 1.0,
792
+ "learning_rate": 1.3714285714285716e-05,
793
+ "loss": 0.0094,
794
  "step": 131
795
  },
796
  {
797
  "epoch": 1.0,
798
+ "eval_loss": 0.034164465963840485,
799
+ "eval_pearson": 0.9208741871943605,
800
+ "eval_runtime": 5.5147,
801
+ "eval_samples_per_second": 42.069,
802
+ "eval_spearmanr": 0.8739346623789532,
803
+ "eval_steps_per_second": 2.72,
804
  "step": 131
805
  },
806
  {
807
  "epoch": 1.01,
808
+ "learning_rate": 1.3662337662337663e-05,
809
+ "loss": 0.0628,
810
  "step": 132
811
  },
812
  {
813
  "epoch": 1.02,
814
+ "learning_rate": 1.3610389610389612e-05,
815
+ "loss": 0.0295,
816
  "step": 133
817
  },
818
  {
819
  "epoch": 1.02,
820
+ "learning_rate": 1.3558441558441559e-05,
821
+ "loss": 0.0132,
822
  "step": 134
823
  },
824
  {
825
  "epoch": 1.03,
826
+ "learning_rate": 1.3506493506493508e-05,
827
+ "loss": 0.0651,
828
  "step": 135
829
  },
830
  {
831
  "epoch": 1.04,
832
+ "learning_rate": 1.3454545454545455e-05,
833
+ "loss": 0.0453,
834
  "step": 136
835
  },
836
  {
837
  "epoch": 1.05,
838
+ "learning_rate": 1.3402597402597404e-05,
839
+ "loss": 0.02,
840
  "step": 137
841
  },
842
  {
843
  "epoch": 1.05,
844
+ "learning_rate": 1.3350649350649351e-05,
845
+ "loss": 0.088,
846
  "step": 138
847
  },
848
  {
849
  "epoch": 1.06,
850
+ "learning_rate": 1.32987012987013e-05,
851
+ "loss": 0.0337,
852
  "step": 139
853
  },
854
  {
855
  "epoch": 1.07,
856
+ "learning_rate": 1.3246753246753249e-05,
857
+ "loss": 0.03,
858
  "step": 140
859
  },
860
  {
861
  "epoch": 1.08,
862
+ "learning_rate": 1.3194805194805196e-05,
863
+ "loss": 0.0261,
864
  "step": 141
865
  },
866
  {
867
  "epoch": 1.08,
868
+ "learning_rate": 1.3142857142857145e-05,
869
+ "loss": 0.0245,
870
  "step": 142
871
  },
872
  {
873
  "epoch": 1.09,
874
+ "learning_rate": 1.3090909090909092e-05,
875
+ "loss": 0.0675,
876
  "step": 143
877
  },
878
  {
879
  "epoch": 1.1,
880
+ "learning_rate": 1.303896103896104e-05,
881
+ "loss": 0.1051,
882
  "step": 144
883
  },
884
  {
885
  "epoch": 1.11,
886
+ "learning_rate": 1.2987012987012988e-05,
887
+ "loss": 0.0985,
888
  "step": 145
889
  },
890
  {
891
  "epoch": 1.11,
892
+ "learning_rate": 1.2935064935064937e-05,
893
+ "loss": 0.077,
894
  "step": 146
895
  },
896
  {
897
  "epoch": 1.12,
898
+ "learning_rate": 1.2883116883116884e-05,
899
+ "loss": 0.0332,
900
  "step": 147
901
  },
902
  {
903
  "epoch": 1.13,
904
+ "learning_rate": 1.2831168831168832e-05,
905
+ "loss": 0.0324,
906
  "step": 148
907
  },
908
  {
909
  "epoch": 1.14,
910
+ "learning_rate": 1.277922077922078e-05,
911
+ "loss": 0.0265,
912
  "step": 149
913
  },
914
  {
915
  "epoch": 1.15,
916
+ "learning_rate": 1.2727272727272728e-05,
917
+ "loss": 0.0313,
918
  "step": 150
919
  },
920
  {
921
  "epoch": 1.15,
922
+ "learning_rate": 1.2675324675324676e-05,
923
+ "loss": 0.0368,
924
  "step": 151
925
  },
926
  {
927
  "epoch": 1.16,
928
+ "learning_rate": 1.2623376623376624e-05,
929
+ "loss": 0.0313,
930
  "step": 152
931
  },
932
  {
933
  "epoch": 1.17,
934
+ "learning_rate": 1.2571428571428572e-05,
935
+ "loss": 0.0388,
936
  "step": 153
937
  },
938
  {
939
  "epoch": 1.18,
940
+ "learning_rate": 1.251948051948052e-05,
941
+ "loss": 0.0474,
942
  "step": 154
943
  },
944
  {
945
  "epoch": 1.18,
946
+ "learning_rate": 1.2467532467532468e-05,
947
+ "loss": 0.0294,
948
  "step": 155
949
  },
950
  {
951
  "epoch": 1.19,
952
+ "learning_rate": 1.2415584415584416e-05,
953
+ "loss": 0.0156,
954
  "step": 156
955
  },
956
  {
957
  "epoch": 1.2,
958
+ "learning_rate": 1.2363636363636364e-05,
959
+ "loss": 0.0245,
960
  "step": 157
961
  },
962
  {
963
  "epoch": 1.21,
964
+ "learning_rate": 1.2311688311688312e-05,
965
+ "loss": 0.0066,
966
  "step": 158
967
  },
968
  {
969
  "epoch": 1.21,
970
+ "learning_rate": 1.2259740259740261e-05,
971
+ "loss": 0.0254,
972
  "step": 159
973
  },
974
  {
975
  "epoch": 1.22,
976
+ "learning_rate": 1.2207792207792208e-05,
977
+ "loss": 0.0229,
978
  "step": 160
979
  },
980
  {
981
  "epoch": 1.23,
982
+ "learning_rate": 1.2155844155844157e-05,
983
+ "loss": 0.0169,
984
  "step": 161
985
  },
986
  {
987
  "epoch": 1.24,
988
+ "learning_rate": 1.2103896103896104e-05,
989
+ "loss": 0.0355,
990
  "step": 162
991
  },
992
  {
993
  "epoch": 1.24,
994
+ "learning_rate": 1.2051948051948053e-05,
995
+ "loss": 0.0137,
996
  "step": 163
997
  },
998
  {
999
  "epoch": 1.25,
1000
+ "learning_rate": 1.2e-05,
1001
+ "loss": 0.0271,
1002
  "step": 164
1003
  },
1004
  {
1005
  "epoch": 1.26,
1006
+ "learning_rate": 1.1948051948051949e-05,
1007
+ "loss": 0.021,
1008
  "step": 165
1009
  },
1010
  {
1011
  "epoch": 1.27,
1012
+ "learning_rate": 1.1896103896103896e-05,
1013
+ "loss": 0.0214,
1014
  "step": 166
1015
  },
1016
  {
1017
  "epoch": 1.27,
1018
+ "learning_rate": 1.1844155844155845e-05,
1019
+ "loss": 0.0313,
1020
  "step": 167
1021
  },
1022
  {
1023
  "epoch": 1.28,
1024
+ "learning_rate": 1.1792207792207792e-05,
1025
+ "loss": 0.0335,
1026
  "step": 168
1027
  },
1028
  {
1029
  "epoch": 1.29,
1030
+ "learning_rate": 1.1740259740259741e-05,
1031
+ "loss": 0.0169,
1032
  "step": 169
1033
  },
1034
  {
1035
  "epoch": 1.3,
1036
+ "learning_rate": 1.1688311688311688e-05,
1037
+ "loss": 0.0613,
1038
  "step": 170
1039
  },
1040
  {
1041
  "epoch": 1.31,
1042
+ "learning_rate": 1.1636363636363637e-05,
1043
+ "loss": 0.0141,
1044
  "step": 171
1045
  },
1046
  {
1047
  "epoch": 1.31,
1048
+ "learning_rate": 1.1584415584415584e-05,
1049
+ "loss": 0.0126,
1050
  "step": 172
1051
  },
1052
  {
1053
  "epoch": 1.32,
1054
+ "learning_rate": 1.1532467532467533e-05,
1055
+ "loss": 0.0161,
1056
  "step": 173
1057
  },
1058
  {
1059
  "epoch": 1.33,
1060
+ "learning_rate": 1.148051948051948e-05,
1061
+ "loss": 0.0192,
1062
  "step": 174
1063
  },
1064
  {
1065
  "epoch": 1.34,
1066
+ "learning_rate": 1.1428571428571429e-05,
1067
+ "loss": 0.016,
1068
  "step": 175
1069
  },
1070
  {
1071
  "epoch": 1.34,
1072
+ "learning_rate": 1.1376623376623376e-05,
1073
+ "loss": 0.0169,
1074
  "step": 176
1075
  },
1076
  {
1077
  "epoch": 1.35,
1078
+ "learning_rate": 1.1324675324675325e-05,
1079
+ "loss": 0.0201,
1080
  "step": 177
1081
  },
1082
  {
1083
  "epoch": 1.36,
1084
+ "learning_rate": 1.1272727272727272e-05,
1085
+ "loss": 0.0141,
1086
  "step": 178
1087
  },
1088
  {
1089
  "epoch": 1.37,
1090
+ "learning_rate": 1.1220779220779221e-05,
1091
+ "loss": 0.0122,
1092
  "step": 179
1093
  },
1094
  {
1095
  "epoch": 1.37,
1096
+ "learning_rate": 1.116883116883117e-05,
1097
+ "loss": 0.0226,
1098
  "step": 180
1099
  },
1100
  {
1101
  "epoch": 1.38,
1102
+ "learning_rate": 1.1116883116883117e-05,
1103
+ "loss": 0.026,
1104
  "step": 181
1105
  },
1106
  {
1107
  "epoch": 1.39,
1108
+ "learning_rate": 1.1064935064935066e-05,
1109
+ "loss": 0.0718,
1110
  "step": 182
1111
  },
1112
  {
1113
  "epoch": 1.4,
1114
+ "learning_rate": 1.1012987012987013e-05,
1115
+ "loss": 0.0199,
1116
  "step": 183
1117
  },
1118
  {
1119
  "epoch": 1.4,
1120
+ "learning_rate": 1.0961038961038962e-05,
1121
+ "loss": 0.0222,
1122
  "step": 184
1123
  },
1124
  {
1125
  "epoch": 1.41,
1126
+ "learning_rate": 1.0909090909090909e-05,
1127
+ "loss": 0.0155,
1128
  "step": 185
1129
  },
1130
  {
1131
  "epoch": 1.42,
1132
+ "learning_rate": 1.0857142857142858e-05,
1133
+ "loss": 0.0244,
1134
  "step": 186
1135
  },
1136
  {
1137
  "epoch": 1.43,
1138
+ "learning_rate": 1.0805194805194805e-05,
1139
+ "loss": 0.0191,
1140
  "step": 187
1141
  },
1142
  {
1143
  "epoch": 1.44,
1144
+ "learning_rate": 1.0753246753246754e-05,
1145
+ "loss": 0.0319,
1146
  "step": 188
1147
  },
1148
  {
1149
  "epoch": 1.44,
1150
+ "learning_rate": 1.0701298701298701e-05,
1151
+ "loss": 0.0144,
1152
  "step": 189
1153
  },
1154
  {
1155
  "epoch": 1.45,
1156
+ "learning_rate": 1.064935064935065e-05,
1157
+ "loss": 0.022,
1158
  "step": 190
1159
  },
1160
  {
1161
  "epoch": 1.46,
1162
+ "learning_rate": 1.0597402597402597e-05,
1163
+ "loss": 0.0118,
1164
  "step": 191
1165
  },
1166
  {
1167
  "epoch": 1.47,
1168
+ "learning_rate": 1.0545454545454546e-05,
1169
+ "loss": 0.0228,
1170
  "step": 192
1171
  },
1172
  {
1173
  "epoch": 1.47,
1174
+ "learning_rate": 1.0493506493506493e-05,
1175
+ "loss": 0.0329,
1176
  "step": 193
1177
  },
1178
  {
1179
  "epoch": 1.48,
1180
+ "learning_rate": 1.0441558441558442e-05,
1181
+ "loss": 0.0401,
1182
  "step": 194
1183
  },
1184
  {
1185
  "epoch": 1.49,
1186
+ "learning_rate": 1.0389610389610389e-05,
1187
+ "loss": 0.044,
1188
  "step": 195
1189
  },
1190
  {
1191
  "epoch": 1.5,
1192
+ "learning_rate": 1.0337662337662338e-05,
1193
+ "loss": 0.0226,
1194
  "step": 196
1195
  },
1196
  {
1197
  "epoch": 1.5,
1198
+ "learning_rate": 1.0285714285714285e-05,
1199
+ "loss": 0.0236,
1200
  "step": 197
1201
  },
1202
  {
1203
  "epoch": 1.51,
1204
+ "learning_rate": 1.0233766233766234e-05,
1205
+ "loss": 0.0348,
1206
  "step": 198
1207
  },
1208
  {
1209
  "epoch": 1.52,
1210
+ "learning_rate": 1.0181818181818182e-05,
1211
+ "loss": 0.0614,
1212
  "step": 199
1213
  },
1214
  {
1215
  "epoch": 1.53,
1216
+ "learning_rate": 1.012987012987013e-05,
1217
+ "loss": 0.0188,
1218
  "step": 200
1219
  },
1220
  {
1221
  "epoch": 1.53,
1222
+ "learning_rate": 1.0077922077922078e-05,
1223
+ "loss": 0.0231,
1224
  "step": 201
1225
  },
1226
  {
1227
  "epoch": 1.54,
1228
+ "learning_rate": 1.0025974025974026e-05,
1229
+ "loss": 0.0584,
1230
  "step": 202
1231
  },
1232
  {
1233
  "epoch": 1.55,
1234
+ "learning_rate": 9.974025974025974e-06,
1235
+ "loss": 0.0106,
1236
  "step": 203
1237
  },
1238
  {
1239
  "epoch": 1.56,
1240
+ "learning_rate": 9.922077922077923e-06,
1241
+ "loss": 0.0251,
1242
  "step": 204
1243
  },
1244
  {
1245
  "epoch": 1.56,
1246
+ "learning_rate": 9.87012987012987e-06,
1247
+ "loss": 0.0555,
1248
  "step": 205
1249
  },
1250
  {
1251
  "epoch": 1.57,
1252
+ "learning_rate": 9.81818181818182e-06,
1253
+ "loss": 0.0745,
1254
  "step": 206
1255
  },
1256
  {
1257
  "epoch": 1.58,
1258
+ "learning_rate": 9.766233766233766e-06,
1259
+ "loss": 0.0586,
1260
  "step": 207
1261
  },
1262
  {
1263
  "epoch": 1.59,
1264
+ "learning_rate": 9.714285714285715e-06,
1265
+ "loss": 0.0738,
1266
  "step": 208
1267
  },
1268
  {
1269
  "epoch": 1.6,
1270
+ "learning_rate": 9.662337662337662e-06,
1271
+ "loss": 0.0209,
1272
  "step": 209
1273
  },
1274
  {
1275
  "epoch": 1.6,
1276
+ "learning_rate": 9.610389610389611e-06,
1277
+ "loss": 0.0472,
1278
  "step": 210
1279
  },
1280
  {
1281
  "epoch": 1.61,
1282
+ "learning_rate": 9.558441558441558e-06,
1283
+ "loss": 0.0182,
1284
  "step": 211
1285
  },
1286
  {
1287
  "epoch": 1.62,
1288
+ "learning_rate": 9.506493506493507e-06,
1289
+ "loss": 0.0532,
1290
  "step": 212
1291
  },
1292
  {
1293
  "epoch": 1.63,
1294
+ "learning_rate": 9.454545454545456e-06,
1295
+ "loss": 0.0269,
1296
  "step": 213
1297
  },
1298
  {
1299
  "epoch": 1.63,
1300
+ "learning_rate": 9.402597402597403e-06,
1301
+ "loss": 0.014,
1302
  "step": 214
1303
  },
1304
  {
1305
  "epoch": 1.64,
1306
+ "learning_rate": 9.350649350649352e-06,
1307
+ "loss": 0.0248,
1308
  "step": 215
1309
  },
1310
  {
1311
  "epoch": 1.65,
1312
+ "learning_rate": 9.298701298701299e-06,
1313
+ "loss": 0.0237,
1314
  "step": 216
1315
  },
1316
  {
1317
  "epoch": 1.66,
1318
+ "learning_rate": 9.246753246753248e-06,
1319
+ "loss": 0.0132,
1320
  "step": 217
1321
  },
1322
  {
1323
  "epoch": 1.66,
1324
+ "learning_rate": 9.194805194805195e-06,
1325
+ "loss": 0.0124,
1326
  "step": 218
1327
  },
1328
  {
1329
  "epoch": 1.67,
1330
+ "learning_rate": 9.142857142857144e-06,
1331
+ "loss": 0.0197,
1332
  "step": 219
1333
  },
1334
  {
1335
  "epoch": 1.68,
1336
+ "learning_rate": 9.090909090909091e-06,
1337
+ "loss": 0.0088,
1338
  "step": 220
1339
  },
1340
  {
1341
  "epoch": 1.69,
1342
+ "learning_rate": 9.03896103896104e-06,
1343
+ "loss": 0.011,
1344
  "step": 221
1345
  },
1346
  {
1347
  "epoch": 1.69,
1348
+ "learning_rate": 8.987012987012987e-06,
1349
+ "loss": 0.0072,
1350
  "step": 222
1351
  },
1352
  {
1353
  "epoch": 1.7,
1354
+ "learning_rate": 8.935064935064936e-06,
1355
+ "loss": 0.0129,
1356
  "step": 223
1357
  },
1358
  {
1359
  "epoch": 1.71,
1360
+ "learning_rate": 8.883116883116883e-06,
1361
+ "loss": 0.0103,
1362
  "step": 224
1363
  },
1364
  {
1365
  "epoch": 1.72,
1366
+ "learning_rate": 8.831168831168832e-06,
1367
+ "loss": 0.0273,
1368
  "step": 225
1369
  },
1370
  {
1371
  "epoch": 1.73,
1372
+ "learning_rate": 8.779220779220779e-06,
1373
+ "loss": 0.0081,
1374
  "step": 226
1375
  },
1376
  {
1377
  "epoch": 1.73,
1378
+ "learning_rate": 8.727272727272728e-06,
1379
+ "loss": 0.0519,
1380
  "step": 227
1381
  },
1382
  {
1383
  "epoch": 1.74,
1384
+ "learning_rate": 8.675324675324675e-06,
1385
+ "loss": 0.053,
1386
  "step": 228
1387
  },
1388
  {
1389
  "epoch": 1.75,
1390
+ "learning_rate": 8.623376623376624e-06,
1391
+ "loss": 0.0055,
1392
  "step": 229
1393
  },
1394
  {
1395
  "epoch": 1.76,
1396
+ "learning_rate": 8.571428571428571e-06,
1397
+ "loss": 0.0244,
1398
  "step": 230
1399
  },
1400
  {
1401
  "epoch": 1.76,
1402
+ "learning_rate": 8.51948051948052e-06,
1403
  "loss": 0.0514,
1404
  "step": 231
1405
  },
1406
  {
1407
  "epoch": 1.77,
1408
+ "learning_rate": 8.467532467532467e-06,
1409
+ "loss": 0.013,
1410
  "step": 232
1411
  },
1412
  {
1413
  "epoch": 1.78,
1414
+ "learning_rate": 8.415584415584416e-06,
1415
+ "loss": 0.0054,
1416
  "step": 233
1417
  },
1418
  {
1419
  "epoch": 1.79,
1420
+ "learning_rate": 8.363636363636365e-06,
1421
+ "loss": 0.0235,
1422
  "step": 234
1423
  },
1424
  {
1425
  "epoch": 1.79,
1426
+ "learning_rate": 8.311688311688313e-06,
1427
+ "loss": 0.0177,
1428
  "step": 235
1429
  },
1430
  {
1431
  "epoch": 1.8,
1432
+ "learning_rate": 8.25974025974026e-06,
1433
+ "loss": 0.047,
1434
  "step": 236
1435
  },
1436
  {
1437
  "epoch": 1.81,
1438
+ "learning_rate": 8.20779220779221e-06,
1439
+ "loss": 0.0328,
1440
  "step": 237
1441
  },
1442
  {
1443
  "epoch": 1.82,
1444
+ "learning_rate": 8.155844155844157e-06,
1445
+ "loss": 0.0046,
1446
  "step": 238
1447
  },
1448
  {
1449
  "epoch": 1.82,
1450
+ "learning_rate": 8.103896103896105e-06,
1451
+ "loss": 0.0106,
1452
  "step": 239
1453
  },
1454
  {
1455
  "epoch": 1.83,
1456
+ "learning_rate": 8.051948051948052e-06,
1457
+ "loss": 0.0081,
1458
  "step": 240
1459
  },
1460
  {
1461
  "epoch": 1.84,
1462
+ "learning_rate": 8.000000000000001e-06,
1463
+ "loss": 0.0077,
1464
  "step": 241
1465
  },
1466
  {
1467
  "epoch": 1.85,
1468
+ "learning_rate": 7.948051948051948e-06,
1469
+ "loss": 0.0106,
1470
  "step": 242
1471
  },
1472
  {
1473
  "epoch": 1.85,
1474
+ "learning_rate": 7.896103896103897e-06,
1475
+ "loss": 0.0135,
1476
  "step": 243
1477
  },
1478
  {
1479
  "epoch": 1.86,
1480
+ "learning_rate": 7.844155844155844e-06,
1481
+ "loss": 0.0066,
1482
  "step": 244
1483
  },
1484
  {
1485
  "epoch": 1.87,
1486
+ "learning_rate": 7.792207792207793e-06,
1487
+ "loss": 0.0255,
1488
  "step": 245
1489
  },
1490
  {
1491
  "epoch": 1.88,
1492
+ "learning_rate": 7.74025974025974e-06,
1493
+ "loss": 0.0126,
1494
  "step": 246
1495
  },
1496
  {
1497
  "epoch": 1.89,
1498
+ "learning_rate": 7.68831168831169e-06,
1499
+ "loss": 0.0245,
1500
  "step": 247
1501
  },
1502
  {
1503
  "epoch": 1.89,
1504
+ "learning_rate": 7.636363636363638e-06,
1505
+ "loss": 0.0209,
1506
  "step": 248
1507
  },
1508
  {
1509
  "epoch": 1.9,
1510
+ "learning_rate": 7.584415584415585e-06,
1511
+ "loss": 0.0183,
1512
  "step": 249
1513
  },
1514
  {
1515
  "epoch": 1.91,
1516
+ "learning_rate": 7.532467532467533e-06,
1517
+ "loss": 0.0165,
1518
  "step": 250
1519
  },
1520
  {
1521
  "epoch": 1.92,
1522
+ "learning_rate": 7.480519480519481e-06,
1523
+ "loss": 0.0106,
1524
  "step": 251
1525
  },
1526
  {
1527
  "epoch": 1.92,
1528
+ "learning_rate": 7.428571428571429e-06,
1529
+ "loss": 0.0091,
1530
  "step": 252
1531
  },
1532
  {
1533
  "epoch": 1.93,
1534
+ "learning_rate": 7.376623376623377e-06,
1535
+ "loss": 0.0169,
1536
  "step": 253
1537
  },
1538
  {
1539
  "epoch": 1.94,
1540
+ "learning_rate": 7.324675324675325e-06,
1541
+ "loss": 0.0079,
1542
  "step": 254
1543
  },
1544
  {
1545
  "epoch": 1.95,
1546
+ "learning_rate": 7.272727272727273e-06,
1547
+ "loss": 0.0328,
1548
  "step": 255
1549
  },
1550
  {
1551
  "epoch": 1.95,
1552
+ "learning_rate": 7.220779220779221e-06,
1553
+ "loss": 0.0202,
1554
  "step": 256
1555
  },
1556
  {
1557
  "epoch": 1.96,
1558
+ "learning_rate": 7.16883116883117e-06,
1559
+ "loss": 0.0233,
1560
  "step": 257
1561
  },
1562
  {
1563
  "epoch": 1.97,
1564
+ "learning_rate": 7.116883116883118e-06,
1565
+ "loss": 0.0179,
1566
  "step": 258
1567
  },
1568
  {
1569
  "epoch": 1.98,
1570
+ "learning_rate": 7.064935064935066e-06,
1571
+ "loss": 0.0271,
1572
  "step": 259
1573
  },
1574
  {
1575
  "epoch": 1.98,
1576
+ "learning_rate": 7.012987012987014e-06,
1577
+ "loss": 0.0209,
1578
  "step": 260
1579
  },
1580
  {
1581
  "epoch": 1.99,
1582
+ "learning_rate": 6.961038961038962e-06,
1583
+ "loss": 0.0225,
1584
  "step": 261
1585
  },
1586
  {
1587
  "epoch": 2.0,
1588
+ "learning_rate": 6.90909090909091e-06,
1589
+ "loss": 0.0091,
1590
  "step": 262
1591
  },
1592
  {
1593
  "epoch": 2.0,
1594
+ "eval_loss": 0.015746144577860832,
1595
+ "eval_pearson": 0.9584851350897383,
1596
+ "eval_runtime": 5.2391,
1597
+ "eval_samples_per_second": 44.282,
1598
+ "eval_spearmanr": 0.9039676698999922,
1599
+ "eval_steps_per_second": 2.863,
1600
  "step": 262
1601
  }
1602
  ],
checkpoint-262/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa3be555654ef58dc4291062e451cc4f3b395b85f9521a093bdc71de5f5c2938
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c355b2a51bf6c9af2253d00c618ad523567bd7357682948505578a36e2e9f8f2
3
  size 3311
checkpoint-393/config.json CHANGED
@@ -34,7 +34,7 @@
34
  "position_embedding_type": "absolute",
35
  "problem_type": "regression",
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.20.0",
38
  "type_vocab_size": 2,
39
  "vocab_size": 30000
40
  }
 
34
  "position_embedding_type": "absolute",
35
  "problem_type": "regression",
36
  "torch_dtype": "float32",
37
+ "transformers_version": "4.20.1",
38
  "type_vocab_size": 2,
39
  "vocab_size": 30000
40
  }
checkpoint-393/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e214675144e3fcad8aa76ec3ad8b88a665fb4546bf76993660476faa52baa6f
3
  size 93490051
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cf9ae268c36291839d31b59624fa0b998839c4cd7ad21d34fd76883c6110ee0
3
  size 93490051
checkpoint-393/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83c13b311191c9fd24a89163aa7cf7e2077e631cc20cf7fc2b7abfcd3d37b033
3
  size 46750353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37faaea797d87287140583e74f091daf924c82a659dc7dfdec40c9b0c5c0be0
3
  size 46750353
checkpoint-393/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d00b3e4a66a0d317028d9da2db9b48af0244ab901f9c82525322c84bbdfbc6dc
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:054a25044b42ad1a22a1f7a5ee50f3b7e1d526e3ec864e58ae835ba1d135ff83
3
  size 559
checkpoint-393/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5defaac178f5f69503b53f740dbc2dd079b96a604024bebc5a10b74a6f5f9c7b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b8940b18643503acb48a8b02af44b4e2ec0c740f1cd6441602e404dddce6bb9
3
  size 623
checkpoint-393/trainer_state.json CHANGED
@@ -10,2389 +10,2389 @@
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 0.0,
13
- "loss": 0.3588,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 0.0,
19
- "loss": 0.3226,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.02,
24
  "learning_rate": 2.5e-06,
25
- "loss": 0.2542,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 0.03,
30
  "learning_rate": 5e-06,
31
- "loss": 0.2879,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 0.04,
36
- "learning_rate": 5e-06,
37
- "loss": 0.5216,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 0.05,
42
- "learning_rate": 7.500000000000001e-06,
43
- "loss": 0.341,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 0.05,
48
- "learning_rate": 1e-05,
49
- "loss": 0.1838,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 0.06,
54
- "learning_rate": 1.25e-05,
55
- "loss": 0.3373,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 0.07,
60
- "learning_rate": 1.5000000000000002e-05,
61
- "loss": 0.2025,
62
  "step": 9
63
  },
64
  {
65
  "epoch": 0.08,
66
- "learning_rate": 1.7500000000000002e-05,
67
- "loss": 0.2342,
68
  "step": 10
69
  },
70
  {
71
  "epoch": 0.08,
72
- "learning_rate": 2e-05,
73
- "loss": 0.1436,
74
  "step": 11
75
  },
76
  {
77
  "epoch": 0.09,
78
- "learning_rate": 1.994805194805195e-05,
79
- "loss": 0.4132,
80
  "step": 12
81
  },
82
  {
83
  "epoch": 0.1,
84
- "learning_rate": 1.98961038961039e-05,
85
- "loss": 0.279,
86
  "step": 13
87
  },
88
  {
89
  "epoch": 0.11,
90
- "learning_rate": 1.9844155844155846e-05,
91
- "loss": 0.1597,
92
  "step": 14
93
  },
94
  {
95
  "epoch": 0.11,
96
- "learning_rate": 1.9792207792207794e-05,
97
- "loss": 0.2237,
98
  "step": 15
99
  },
100
  {
101
  "epoch": 0.12,
102
- "learning_rate": 1.974025974025974e-05,
103
- "loss": 0.3902,
104
  "step": 16
105
  },
106
  {
107
  "epoch": 0.13,
108
- "learning_rate": 1.968831168831169e-05,
109
- "loss": 0.1849,
110
  "step": 17
111
  },
112
  {
113
  "epoch": 0.14,
114
- "learning_rate": 1.963636363636364e-05,
115
- "loss": 0.2283,
116
  "step": 18
117
  },
118
  {
119
  "epoch": 0.15,
120
- "learning_rate": 1.9584415584415586e-05,
121
- "loss": 0.1571,
122
  "step": 19
123
  },
124
  {
125
  "epoch": 0.15,
126
- "learning_rate": 1.9532467532467533e-05,
127
- "loss": 0.2266,
128
  "step": 20
129
  },
130
  {
131
  "epoch": 0.16,
132
- "learning_rate": 1.9480519480519483e-05,
133
- "loss": 0.1266,
134
  "step": 21
135
  },
136
  {
137
  "epoch": 0.17,
138
- "learning_rate": 1.942857142857143e-05,
139
- "loss": 0.1642,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 0.18,
144
- "learning_rate": 1.9376623376623377e-05,
145
- "loss": 0.1877,
146
  "step": 23
147
  },
148
  {
149
  "epoch": 0.18,
150
- "learning_rate": 1.9324675324675325e-05,
151
- "loss": 0.1765,
152
  "step": 24
153
  },
154
  {
155
  "epoch": 0.19,
156
- "learning_rate": 1.9272727272727275e-05,
157
- "loss": 0.1325,
158
  "step": 25
159
  },
160
  {
161
  "epoch": 0.2,
162
- "learning_rate": 1.9220779220779222e-05,
163
- "loss": 0.1093,
164
  "step": 26
165
  },
166
  {
167
  "epoch": 0.21,
168
- "learning_rate": 1.916883116883117e-05,
169
- "loss": 0.082,
170
  "step": 27
171
  },
172
  {
173
  "epoch": 0.21,
174
- "learning_rate": 1.9116883116883117e-05,
175
- "loss": 0.1399,
176
  "step": 28
177
  },
178
  {
179
  "epoch": 0.22,
180
- "learning_rate": 1.9064935064935067e-05,
181
- "loss": 0.0833,
182
  "step": 29
183
  },
184
  {
185
  "epoch": 0.23,
186
- "learning_rate": 1.9012987012987014e-05,
187
- "loss": 0.122,
188
  "step": 30
189
  },
190
  {
191
  "epoch": 0.24,
192
- "learning_rate": 1.896103896103896e-05,
193
- "loss": 0.1042,
194
  "step": 31
195
  },
196
  {
197
  "epoch": 0.24,
198
- "learning_rate": 1.8909090909090912e-05,
199
- "loss": 0.1439,
200
  "step": 32
201
  },
202
  {
203
  "epoch": 0.25,
204
- "learning_rate": 1.885714285714286e-05,
205
- "loss": 0.045,
206
  "step": 33
207
  },
208
  {
209
  "epoch": 0.26,
210
- "learning_rate": 1.8805194805194806e-05,
211
- "loss": 0.1221,
212
  "step": 34
213
  },
214
  {
215
  "epoch": 0.27,
216
- "learning_rate": 1.8753246753246753e-05,
217
- "loss": 0.1128,
218
  "step": 35
219
  },
220
  {
221
  "epoch": 0.27,
222
- "learning_rate": 1.8701298701298704e-05,
223
- "loss": 0.1142,
224
  "step": 36
225
  },
226
  {
227
  "epoch": 0.28,
228
- "learning_rate": 1.864935064935065e-05,
229
- "loss": 0.095,
230
  "step": 37
231
  },
232
  {
233
  "epoch": 0.29,
234
- "learning_rate": 1.8597402597402598e-05,
235
- "loss": 0.0696,
236
  "step": 38
237
  },
238
  {
239
  "epoch": 0.3,
240
- "learning_rate": 1.8545454545454545e-05,
241
- "loss": 0.1131,
242
  "step": 39
243
  },
244
  {
245
  "epoch": 0.31,
246
- "learning_rate": 1.8493506493506496e-05,
247
- "loss": 0.2079,
248
  "step": 40
249
  },
250
  {
251
  "epoch": 0.31,
252
- "learning_rate": 1.8441558441558443e-05,
253
- "loss": 0.2369,
254
  "step": 41
255
  },
256
  {
257
  "epoch": 0.32,
258
- "learning_rate": 1.838961038961039e-05,
259
- "loss": 0.1422,
260
  "step": 42
261
  },
262
  {
263
  "epoch": 0.33,
264
- "learning_rate": 1.8337662337662337e-05,
265
- "loss": 0.0955,
266
  "step": 43
267
  },
268
  {
269
  "epoch": 0.34,
270
- "learning_rate": 1.8285714285714288e-05,
271
- "loss": 0.1609,
272
  "step": 44
273
  },
274
  {
275
  "epoch": 0.34,
276
- "learning_rate": 1.8233766233766235e-05,
277
- "loss": 0.1931,
278
  "step": 45
279
  },
280
  {
281
  "epoch": 0.35,
282
- "learning_rate": 1.8181818181818182e-05,
283
- "loss": 0.1338,
284
  "step": 46
285
  },
286
  {
287
  "epoch": 0.36,
288
- "learning_rate": 1.812987012987013e-05,
289
- "loss": 0.0927,
290
  "step": 47
291
  },
292
  {
293
  "epoch": 0.37,
294
- "learning_rate": 1.807792207792208e-05,
295
- "loss": 0.0717,
296
  "step": 48
297
  },
298
  {
299
  "epoch": 0.37,
300
- "learning_rate": 1.8025974025974027e-05,
301
- "loss": 0.0478,
302
  "step": 49
303
  },
304
  {
305
  "epoch": 0.38,
306
- "learning_rate": 1.7974025974025974e-05,
307
- "loss": 0.1131,
308
  "step": 50
309
  },
310
  {
311
  "epoch": 0.39,
312
- "learning_rate": 1.792207792207792e-05,
313
- "loss": 0.202,
314
  "step": 51
315
  },
316
  {
317
  "epoch": 0.4,
318
- "learning_rate": 1.7870129870129872e-05,
319
- "loss": 0.0677,
320
  "step": 52
321
  },
322
  {
323
  "epoch": 0.4,
324
- "learning_rate": 1.781818181818182e-05,
325
- "loss": 0.1057,
326
  "step": 53
327
  },
328
  {
329
  "epoch": 0.41,
330
- "learning_rate": 1.7766233766233766e-05,
331
- "loss": 0.0773,
332
  "step": 54
333
  },
334
  {
335
  "epoch": 0.42,
336
- "learning_rate": 1.7714285714285717e-05,
337
- "loss": 0.0599,
338
  "step": 55
339
  },
340
  {
341
  "epoch": 0.43,
342
- "learning_rate": 1.7662337662337664e-05,
343
- "loss": 0.2749,
344
  "step": 56
345
  },
346
  {
347
  "epoch": 0.44,
348
- "learning_rate": 1.761038961038961e-05,
349
- "loss": 0.1707,
350
  "step": 57
351
  },
352
  {
353
  "epoch": 0.44,
354
- "learning_rate": 1.7558441558441558e-05,
355
- "loss": 0.1129,
356
  "step": 58
357
  },
358
  {
359
  "epoch": 0.45,
360
- "learning_rate": 1.750649350649351e-05,
361
- "loss": 0.0496,
362
  "step": 59
363
  },
364
  {
365
  "epoch": 0.46,
366
- "learning_rate": 1.7454545454545456e-05,
367
- "loss": 0.0708,
368
  "step": 60
369
  },
370
  {
371
  "epoch": 0.47,
372
- "learning_rate": 1.7402597402597403e-05,
373
- "loss": 0.0694,
374
  "step": 61
375
  },
376
  {
377
  "epoch": 0.47,
378
- "learning_rate": 1.735064935064935e-05,
379
- "loss": 0.1964,
380
  "step": 62
381
  },
382
  {
383
  "epoch": 0.48,
384
- "learning_rate": 1.72987012987013e-05,
385
- "loss": 0.1586,
386
  "step": 63
387
  },
388
  {
389
  "epoch": 0.49,
390
- "learning_rate": 1.7246753246753248e-05,
391
- "loss": 0.2173,
392
  "step": 64
393
  },
394
  {
395
  "epoch": 0.5,
396
- "learning_rate": 1.7194805194805195e-05,
397
- "loss": 0.162,
398
  "step": 65
399
  },
400
  {
401
  "epoch": 0.5,
402
- "learning_rate": 1.7142857142857142e-05,
403
- "loss": 0.0896,
404
  "step": 66
405
  },
406
  {
407
  "epoch": 0.51,
408
- "learning_rate": 1.7090909090909092e-05,
409
- "loss": 0.0503,
410
  "step": 67
411
  },
412
  {
413
  "epoch": 0.52,
414
- "learning_rate": 1.703896103896104e-05,
415
- "loss": 0.1161,
416
  "step": 68
417
  },
418
  {
419
  "epoch": 0.53,
420
- "learning_rate": 1.6987012987012987e-05,
421
- "loss": 0.0854,
422
  "step": 69
423
  },
424
  {
425
  "epoch": 0.53,
426
- "learning_rate": 1.6935064935064934e-05,
427
- "loss": 0.1247,
428
  "step": 70
429
  },
430
  {
431
  "epoch": 0.54,
432
- "learning_rate": 1.6883116883116884e-05,
433
- "loss": 0.1368,
434
  "step": 71
435
  },
436
  {
437
  "epoch": 0.55,
438
- "learning_rate": 1.683116883116883e-05,
439
- "loss": 0.1041,
440
  "step": 72
441
  },
442
  {
443
  "epoch": 0.56,
444
- "learning_rate": 1.677922077922078e-05,
445
- "loss": 0.0546,
446
  "step": 73
447
  },
448
  {
449
  "epoch": 0.56,
450
- "learning_rate": 1.672727272727273e-05,
451
- "loss": 0.0866,
452
  "step": 74
453
  },
454
  {
455
  "epoch": 0.57,
456
- "learning_rate": 1.6675324675324676e-05,
457
- "loss": 0.0624,
458
  "step": 75
459
  },
460
  {
461
  "epoch": 0.58,
462
- "learning_rate": 1.6623376623376627e-05,
463
- "loss": 0.1149,
464
  "step": 76
465
  },
466
  {
467
  "epoch": 0.59,
468
- "learning_rate": 1.6571428571428574e-05,
469
- "loss": 0.0752,
470
  "step": 77
471
  },
472
  {
473
  "epoch": 0.6,
474
- "learning_rate": 1.651948051948052e-05,
475
- "loss": 0.0457,
476
  "step": 78
477
  },
478
  {
479
  "epoch": 0.6,
480
- "learning_rate": 1.646753246753247e-05,
481
- "loss": 0.0705,
482
  "step": 79
483
  },
484
  {
485
  "epoch": 0.61,
486
- "learning_rate": 1.641558441558442e-05,
487
- "loss": 0.0827,
488
  "step": 80
489
  },
490
  {
491
  "epoch": 0.62,
492
- "learning_rate": 1.6363636363636366e-05,
493
- "loss": 0.0717,
494
  "step": 81
495
  },
496
  {
497
  "epoch": 0.63,
498
- "learning_rate": 1.6311688311688313e-05,
499
- "loss": 0.0804,
500
  "step": 82
501
  },
502
  {
503
  "epoch": 0.63,
504
- "learning_rate": 1.6259740259740264e-05,
505
- "loss": 0.0188,
506
  "step": 83
507
  },
508
  {
509
  "epoch": 0.64,
510
- "learning_rate": 1.620779220779221e-05,
511
- "loss": 0.0457,
512
  "step": 84
513
  },
514
  {
515
  "epoch": 0.65,
516
- "learning_rate": 1.6155844155844158e-05,
517
- "loss": 0.0422,
518
  "step": 85
519
  },
520
  {
521
  "epoch": 0.66,
522
- "learning_rate": 1.6103896103896105e-05,
523
- "loss": 0.0297,
524
  "step": 86
525
  },
526
  {
527
  "epoch": 0.66,
528
- "learning_rate": 1.6051948051948056e-05,
529
- "loss": 0.0408,
530
  "step": 87
531
  },
532
  {
533
  "epoch": 0.67,
534
- "learning_rate": 1.6000000000000003e-05,
535
- "loss": 0.0527,
536
  "step": 88
537
  },
538
  {
539
  "epoch": 0.68,
540
- "learning_rate": 1.594805194805195e-05,
541
- "loss": 0.0234,
542
  "step": 89
543
  },
544
  {
545
  "epoch": 0.69,
546
- "learning_rate": 1.5896103896103897e-05,
547
- "loss": 0.0368,
548
  "step": 90
549
  },
550
  {
551
  "epoch": 0.69,
552
- "learning_rate": 1.5844155844155847e-05,
553
- "loss": 0.0731,
554
  "step": 91
555
  },
556
  {
557
  "epoch": 0.7,
558
- "learning_rate": 1.5792207792207795e-05,
559
- "loss": 0.1104,
560
  "step": 92
561
  },
562
  {
563
  "epoch": 0.71,
564
- "learning_rate": 1.5740259740259742e-05,
565
- "loss": 0.0546,
566
  "step": 93
567
  },
568
  {
569
  "epoch": 0.72,
570
- "learning_rate": 1.568831168831169e-05,
571
- "loss": 0.0226,
572
  "step": 94
573
  },
574
  {
575
  "epoch": 0.73,
576
- "learning_rate": 1.563636363636364e-05,
577
- "loss": 0.0366,
578
  "step": 95
579
  },
580
  {
581
  "epoch": 0.73,
582
- "learning_rate": 1.5584415584415587e-05,
583
- "loss": 0.038,
584
  "step": 96
585
  },
586
  {
587
  "epoch": 0.74,
588
- "learning_rate": 1.5532467532467534e-05,
589
- "loss": 0.041,
590
  "step": 97
591
  },
592
  {
593
  "epoch": 0.75,
594
- "learning_rate": 1.548051948051948e-05,
595
- "loss": 0.0375,
596
  "step": 98
597
  },
598
  {
599
  "epoch": 0.76,
600
- "learning_rate": 1.542857142857143e-05,
601
- "loss": 0.0389,
602
  "step": 99
603
  },
604
  {
605
  "epoch": 0.76,
606
- "learning_rate": 1.537662337662338e-05,
607
- "loss": 0.022,
608
  "step": 100
609
  },
610
  {
611
  "epoch": 0.77,
612
- "learning_rate": 1.5324675324675326e-05,
613
- "loss": 0.0569,
614
  "step": 101
615
  },
616
  {
617
  "epoch": 0.78,
618
- "learning_rate": 1.5272727272727276e-05,
619
- "loss": 0.0172,
620
  "step": 102
621
  },
622
  {
623
  "epoch": 0.79,
624
- "learning_rate": 1.5220779220779223e-05,
625
- "loss": 0.0754,
626
  "step": 103
627
  },
628
  {
629
  "epoch": 0.79,
630
- "learning_rate": 1.516883116883117e-05,
631
- "loss": 0.0626,
632
  "step": 104
633
  },
634
  {
635
  "epoch": 0.8,
636
- "learning_rate": 1.511688311688312e-05,
637
- "loss": 0.0515,
638
  "step": 105
639
  },
640
  {
641
  "epoch": 0.81,
642
- "learning_rate": 1.5064935064935066e-05,
643
- "loss": 0.0345,
644
  "step": 106
645
  },
646
  {
647
  "epoch": 0.82,
648
- "learning_rate": 1.5012987012987015e-05,
649
- "loss": 0.0504,
650
  "step": 107
651
  },
652
  {
653
  "epoch": 0.82,
654
- "learning_rate": 1.4961038961038962e-05,
655
- "loss": 0.0329,
656
  "step": 108
657
  },
658
  {
659
  "epoch": 0.83,
660
- "learning_rate": 1.4909090909090911e-05,
661
- "loss": 0.0719,
662
  "step": 109
663
  },
664
  {
665
  "epoch": 0.84,
666
- "learning_rate": 1.4857142857142858e-05,
667
- "loss": 0.0417,
668
  "step": 110
669
  },
670
  {
671
  "epoch": 0.85,
672
- "learning_rate": 1.4805194805194807e-05,
673
- "loss": 0.0551,
674
  "step": 111
675
  },
676
  {
677
  "epoch": 0.85,
678
- "learning_rate": 1.4753246753246754e-05,
679
- "loss": 0.0299,
680
  "step": 112
681
  },
682
  {
683
  "epoch": 0.86,
684
- "learning_rate": 1.4701298701298703e-05,
685
- "loss": 0.024,
686
  "step": 113
687
  },
688
  {
689
  "epoch": 0.87,
690
- "learning_rate": 1.464935064935065e-05,
691
- "loss": 0.0328,
692
  "step": 114
693
  },
694
  {
695
  "epoch": 0.88,
696
- "learning_rate": 1.45974025974026e-05,
697
- "loss": 0.0462,
698
  "step": 115
699
  },
700
  {
701
  "epoch": 0.89,
702
- "learning_rate": 1.4545454545454546e-05,
703
- "loss": 0.0286,
704
  "step": 116
705
  },
706
  {
707
  "epoch": 0.89,
708
- "learning_rate": 1.4493506493506495e-05,
709
- "loss": 0.0259,
710
- "step": 117
711
  },
712
  {
713
  "epoch": 0.9,
714
- "learning_rate": 1.4441558441558442e-05,
715
- "loss": 0.0321,
716
  "step": 118
717
  },
718
  {
719
  "epoch": 0.91,
720
- "learning_rate": 1.4389610389610391e-05,
721
- "loss": 0.0324,
722
  "step": 119
723
  },
724
  {
725
  "epoch": 0.92,
726
- "learning_rate": 1.433766233766234e-05,
727
- "loss": 0.1605,
728
  "step": 120
729
  },
730
  {
731
  "epoch": 0.92,
732
- "learning_rate": 1.4285714285714287e-05,
733
- "loss": 0.0466,
734
  "step": 121
735
  },
736
  {
737
  "epoch": 0.93,
738
- "learning_rate": 1.4233766233766236e-05,
739
- "loss": 0.0628,
740
  "step": 122
741
  },
742
  {
743
  "epoch": 0.94,
744
- "learning_rate": 1.4181818181818183e-05,
745
- "loss": 0.0276,
746
  "step": 123
747
  },
748
  {
749
  "epoch": 0.95,
750
- "learning_rate": 1.4129870129870132e-05,
751
- "loss": 0.0371,
752
  "step": 124
753
  },
754
  {
755
  "epoch": 0.95,
756
- "learning_rate": 1.4077922077922079e-05,
757
- "loss": 0.0115,
758
  "step": 125
759
  },
760
  {
761
  "epoch": 0.96,
762
- "learning_rate": 1.4025974025974028e-05,
763
- "loss": 0.0399,
764
  "step": 126
765
  },
766
  {
767
  "epoch": 0.97,
768
- "learning_rate": 1.3974025974025975e-05,
769
- "loss": 0.043,
770
  "step": 127
771
  },
772
  {
773
  "epoch": 0.98,
774
- "learning_rate": 1.3922077922077924e-05,
775
- "loss": 0.0543,
776
  "step": 128
777
  },
778
  {
779
  "epoch": 0.98,
780
- "learning_rate": 1.3870129870129871e-05,
781
- "loss": 0.0232,
782
  "step": 129
783
  },
784
  {
785
  "epoch": 0.99,
786
- "learning_rate": 1.381818181818182e-05,
787
- "loss": 0.0542,
788
  "step": 130
789
  },
790
  {
791
  "epoch": 1.0,
792
- "learning_rate": 1.3766233766233767e-05,
793
- "loss": 0.0232,
794
  "step": 131
795
  },
796
  {
797
  "epoch": 1.0,
798
- "eval_loss": 0.029073596000671387,
799
- "eval_pearson": 0.9236611784671435,
800
- "eval_runtime": 5.5585,
801
- "eval_samples_per_second": 41.738,
802
- "eval_spearmanr": 0.8832765384834576,
803
- "eval_steps_per_second": 2.699,
804
  "step": 131
805
  },
806
  {
807
  "epoch": 1.01,
808
- "learning_rate": 1.3714285714285716e-05,
809
- "loss": 0.0361,
810
  "step": 132
811
  },
812
  {
813
  "epoch": 1.02,
814
- "learning_rate": 1.3662337662337663e-05,
815
- "loss": 0.0235,
816
  "step": 133
817
  },
818
  {
819
  "epoch": 1.02,
820
- "learning_rate": 1.3610389610389612e-05,
821
- "loss": 0.0195,
822
  "step": 134
823
  },
824
  {
825
  "epoch": 1.03,
826
- "learning_rate": 1.3558441558441559e-05,
827
- "loss": 0.0482,
828
  "step": 135
829
  },
830
  {
831
  "epoch": 1.04,
832
- "learning_rate": 1.3506493506493508e-05,
833
- "loss": 0.0332,
834
  "step": 136
835
  },
836
  {
837
  "epoch": 1.05,
838
- "learning_rate": 1.3454545454545455e-05,
839
- "loss": 0.0261,
840
  "step": 137
841
  },
842
  {
843
  "epoch": 1.05,
844
- "learning_rate": 1.3402597402597404e-05,
845
- "loss": 0.0681,
846
  "step": 138
847
  },
848
  {
849
  "epoch": 1.06,
850
- "learning_rate": 1.3350649350649351e-05,
851
- "loss": 0.0216,
852
  "step": 139
853
  },
854
  {
855
  "epoch": 1.07,
856
- "learning_rate": 1.32987012987013e-05,
857
- "loss": 0.0326,
858
  "step": 140
859
  },
860
  {
861
  "epoch": 1.08,
862
- "learning_rate": 1.3246753246753249e-05,
863
- "loss": 0.0253,
864
  "step": 141
865
  },
866
  {
867
  "epoch": 1.08,
868
- "learning_rate": 1.3194805194805196e-05,
869
- "loss": 0.0286,
870
  "step": 142
871
  },
872
  {
873
  "epoch": 1.09,
874
- "learning_rate": 1.3142857142857145e-05,
875
- "loss": 0.0096,
876
  "step": 143
877
  },
878
  {
879
  "epoch": 1.1,
880
- "learning_rate": 1.3090909090909092e-05,
881
- "loss": 0.0412,
882
  "step": 144
883
  },
884
  {
885
  "epoch": 1.11,
886
- "learning_rate": 1.303896103896104e-05,
887
- "loss": 0.0444,
888
  "step": 145
889
  },
890
  {
891
  "epoch": 1.11,
892
- "learning_rate": 1.2987012987012988e-05,
893
- "loss": 0.0393,
894
  "step": 146
895
  },
896
  {
897
  "epoch": 1.12,
898
- "learning_rate": 1.2935064935064937e-05,
899
- "loss": 0.0186,
900
  "step": 147
901
  },
902
  {
903
  "epoch": 1.13,
904
- "learning_rate": 1.2883116883116884e-05,
905
- "loss": 0.0301,
906
  "step": 148
907
  },
908
  {
909
  "epoch": 1.14,
910
- "learning_rate": 1.2831168831168832e-05,
911
- "loss": 0.0099,
912
  "step": 149
913
  },
914
  {
915
  "epoch": 1.15,
916
- "learning_rate": 1.277922077922078e-05,
917
- "loss": 0.0252,
918
  "step": 150
919
  },
920
  {
921
  "epoch": 1.15,
922
- "learning_rate": 1.2727272727272728e-05,
923
- "loss": 0.0108,
924
  "step": 151
925
  },
926
  {
927
  "epoch": 1.16,
928
- "learning_rate": 1.2675324675324676e-05,
929
- "loss": 0.0447,
930
  "step": 152
931
  },
932
  {
933
  "epoch": 1.17,
934
- "learning_rate": 1.2623376623376624e-05,
935
- "loss": 0.0567,
936
  "step": 153
937
  },
938
  {
939
  "epoch": 1.18,
940
- "learning_rate": 1.2571428571428572e-05,
941
- "loss": 0.0347,
942
  "step": 154
943
  },
944
  {
945
  "epoch": 1.18,
946
- "learning_rate": 1.251948051948052e-05,
947
- "loss": 0.0186,
948
  "step": 155
949
  },
950
  {
951
  "epoch": 1.19,
952
- "learning_rate": 1.2467532467532468e-05,
953
- "loss": 0.0303,
954
  "step": 156
955
  },
956
  {
957
  "epoch": 1.2,
958
- "learning_rate": 1.2415584415584416e-05,
959
- "loss": 0.0286,
960
  "step": 157
961
  },
962
  {
963
  "epoch": 1.21,
964
- "learning_rate": 1.2363636363636364e-05,
965
- "loss": 0.0182,
966
  "step": 158
967
  },
968
  {
969
  "epoch": 1.21,
970
- "learning_rate": 1.2311688311688312e-05,
971
- "loss": 0.0155,
972
  "step": 159
973
  },
974
  {
975
  "epoch": 1.22,
976
- "learning_rate": 1.2259740259740261e-05,
977
- "loss": 0.0253,
978
  "step": 160
979
  },
980
  {
981
  "epoch": 1.23,
982
- "learning_rate": 1.2207792207792208e-05,
983
- "loss": 0.0128,
984
  "step": 161
985
  },
986
  {
987
  "epoch": 1.24,
988
- "learning_rate": 1.2155844155844157e-05,
989
- "loss": 0.0074,
990
  "step": 162
991
  },
992
  {
993
  "epoch": 1.24,
994
- "learning_rate": 1.2103896103896104e-05,
995
- "loss": 0.0284,
996
  "step": 163
997
  },
998
  {
999
  "epoch": 1.25,
1000
- "learning_rate": 1.2051948051948053e-05,
1001
- "loss": 0.0389,
1002
  "step": 164
1003
  },
1004
  {
1005
  "epoch": 1.26,
1006
- "learning_rate": 1.2e-05,
1007
- "loss": 0.0165,
1008
  "step": 165
1009
  },
1010
  {
1011
  "epoch": 1.27,
1012
- "learning_rate": 1.1948051948051949e-05,
1013
- "loss": 0.0307,
1014
  "step": 166
1015
  },
1016
  {
1017
  "epoch": 1.27,
1018
- "learning_rate": 1.1896103896103896e-05,
1019
- "loss": 0.016,
1020
  "step": 167
1021
  },
1022
  {
1023
  "epoch": 1.28,
1024
- "learning_rate": 1.1844155844155845e-05,
1025
- "loss": 0.031,
1026
  "step": 168
1027
  },
1028
  {
1029
  "epoch": 1.29,
1030
- "learning_rate": 1.1792207792207792e-05,
1031
- "loss": 0.0083,
1032
  "step": 169
1033
  },
1034
  {
1035
  "epoch": 1.3,
1036
- "learning_rate": 1.1740259740259741e-05,
1037
- "loss": 0.0577,
1038
  "step": 170
1039
  },
1040
  {
1041
  "epoch": 1.31,
1042
- "learning_rate": 1.1688311688311688e-05,
1043
- "loss": 0.0133,
1044
  "step": 171
1045
  },
1046
  {
1047
  "epoch": 1.31,
1048
- "learning_rate": 1.1636363636363637e-05,
1049
- "loss": 0.017,
1050
  "step": 172
1051
  },
1052
  {
1053
  "epoch": 1.32,
1054
- "learning_rate": 1.1584415584415584e-05,
1055
- "loss": 0.0206,
1056
  "step": 173
1057
  },
1058
  {
1059
  "epoch": 1.33,
1060
- "learning_rate": 1.1532467532467533e-05,
1061
- "loss": 0.0482,
1062
  "step": 174
1063
  },
1064
  {
1065
  "epoch": 1.34,
1066
- "learning_rate": 1.148051948051948e-05,
1067
- "loss": 0.0467,
1068
  "step": 175
1069
  },
1070
  {
1071
  "epoch": 1.34,
1072
- "learning_rate": 1.1428571428571429e-05,
1073
- "loss": 0.0195,
1074
  "step": 176
1075
  },
1076
  {
1077
  "epoch": 1.35,
1078
- "learning_rate": 1.1376623376623376e-05,
1079
- "loss": 0.0481,
1080
  "step": 177
1081
  },
1082
  {
1083
  "epoch": 1.36,
1084
- "learning_rate": 1.1324675324675325e-05,
1085
- "loss": 0.0189,
1086
  "step": 178
1087
  },
1088
  {
1089
  "epoch": 1.37,
1090
- "learning_rate": 1.1272727272727272e-05,
1091
- "loss": 0.0498,
1092
  "step": 179
1093
  },
1094
  {
1095
  "epoch": 1.37,
1096
- "learning_rate": 1.1220779220779221e-05,
1097
- "loss": 0.0351,
1098
  "step": 180
1099
  },
1100
  {
1101
  "epoch": 1.38,
1102
- "learning_rate": 1.116883116883117e-05,
1103
- "loss": 0.0341,
1104
  "step": 181
1105
  },
1106
  {
1107
  "epoch": 1.39,
1108
- "learning_rate": 1.1116883116883117e-05,
1109
- "loss": 0.0659,
1110
  "step": 182
1111
  },
1112
  {
1113
  "epoch": 1.4,
1114
- "learning_rate": 1.1064935064935066e-05,
1115
- "loss": 0.0198,
1116
  "step": 183
1117
  },
1118
  {
1119
  "epoch": 1.4,
1120
- "learning_rate": 1.1012987012987013e-05,
1121
- "loss": 0.0352,
1122
  "step": 184
1123
  },
1124
  {
1125
  "epoch": 1.41,
1126
- "learning_rate": 1.0961038961038962e-05,
1127
- "loss": 0.0129,
1128
  "step": 185
1129
  },
1130
  {
1131
  "epoch": 1.42,
1132
- "learning_rate": 1.0909090909090909e-05,
1133
- "loss": 0.0353,
1134
  "step": 186
1135
  },
1136
  {
1137
  "epoch": 1.43,
1138
- "learning_rate": 1.0857142857142858e-05,
1139
- "loss": 0.0084,
1140
  "step": 187
1141
  },
1142
  {
1143
  "epoch": 1.44,
1144
- "learning_rate": 1.0805194805194805e-05,
1145
- "loss": 0.02,
1146
  "step": 188
1147
  },
1148
  {
1149
  "epoch": 1.44,
1150
- "learning_rate": 1.0753246753246754e-05,
1151
- "loss": 0.0106,
1152
  "step": 189
1153
  },
1154
  {
1155
  "epoch": 1.45,
1156
- "learning_rate": 1.0701298701298701e-05,
1157
- "loss": 0.0253,
1158
  "step": 190
1159
  },
1160
  {
1161
  "epoch": 1.46,
1162
- "learning_rate": 1.064935064935065e-05,
1163
- "loss": 0.0116,
1164
  "step": 191
1165
  },
1166
  {
1167
  "epoch": 1.47,
1168
- "learning_rate": 1.0597402597402597e-05,
1169
- "loss": 0.0099,
1170
  "step": 192
1171
  },
1172
  {
1173
  "epoch": 1.47,
1174
- "learning_rate": 1.0545454545454546e-05,
1175
- "loss": 0.0845,
1176
  "step": 193
1177
  },
1178
  {
1179
  "epoch": 1.48,
1180
- "learning_rate": 1.0493506493506493e-05,
1181
- "loss": 0.0236,
1182
  "step": 194
1183
  },
1184
  {
1185
  "epoch": 1.49,
1186
- "learning_rate": 1.0441558441558442e-05,
1187
- "loss": 0.0299,
1188
  "step": 195
1189
  },
1190
  {
1191
  "epoch": 1.5,
1192
- "learning_rate": 1.0389610389610389e-05,
1193
- "loss": 0.01,
1194
  "step": 196
1195
  },
1196
  {
1197
  "epoch": 1.5,
1198
- "learning_rate": 1.0337662337662338e-05,
1199
- "loss": 0.0154,
1200
  "step": 197
1201
  },
1202
  {
1203
  "epoch": 1.51,
1204
- "learning_rate": 1.0285714285714285e-05,
1205
- "loss": 0.0095,
1206
  "step": 198
1207
  },
1208
  {
1209
  "epoch": 1.52,
1210
- "learning_rate": 1.0233766233766234e-05,
1211
- "loss": 0.0278,
1212
  "step": 199
1213
  },
1214
  {
1215
  "epoch": 1.53,
1216
- "learning_rate": 1.0181818181818182e-05,
1217
- "loss": 0.0228,
1218
  "step": 200
1219
  },
1220
  {
1221
  "epoch": 1.53,
1222
- "learning_rate": 1.012987012987013e-05,
1223
- "loss": 0.0252,
1224
  "step": 201
1225
  },
1226
  {
1227
  "epoch": 1.54,
1228
- "learning_rate": 1.0077922077922078e-05,
1229
- "loss": 0.085,
1230
  "step": 202
1231
  },
1232
  {
1233
  "epoch": 1.55,
1234
- "learning_rate": 1.0025974025974026e-05,
1235
- "loss": 0.0046,
1236
  "step": 203
1237
  },
1238
  {
1239
  "epoch": 1.56,
1240
- "learning_rate": 9.974025974025974e-06,
1241
- "loss": 0.0206,
1242
  "step": 204
1243
  },
1244
  {
1245
  "epoch": 1.56,
1246
- "learning_rate": 9.922077922077923e-06,
1247
- "loss": 0.046,
1248
  "step": 205
1249
  },
1250
  {
1251
  "epoch": 1.57,
1252
- "learning_rate": 9.87012987012987e-06,
1253
- "loss": 0.0137,
1254
  "step": 206
1255
  },
1256
  {
1257
  "epoch": 1.58,
1258
- "learning_rate": 9.81818181818182e-06,
1259
- "loss": 0.012,
1260
  "step": 207
1261
  },
1262
  {
1263
  "epoch": 1.59,
1264
- "learning_rate": 9.766233766233766e-06,
1265
- "loss": 0.0504,
1266
  "step": 208
1267
  },
1268
  {
1269
  "epoch": 1.6,
1270
- "learning_rate": 9.714285714285715e-06,
1271
- "loss": 0.0137,
1272
  "step": 209
1273
  },
1274
  {
1275
  "epoch": 1.6,
1276
- "learning_rate": 9.662337662337662e-06,
1277
- "loss": 0.0191,
1278
  "step": 210
1279
  },
1280
  {
1281
  "epoch": 1.61,
1282
- "learning_rate": 9.610389610389611e-06,
1283
- "loss": 0.016,
1284
  "step": 211
1285
  },
1286
  {
1287
  "epoch": 1.62,
1288
- "learning_rate": 9.558441558441558e-06,
1289
- "loss": 0.0511,
1290
  "step": 212
1291
  },
1292
  {
1293
  "epoch": 1.63,
1294
- "learning_rate": 9.506493506493507e-06,
1295
- "loss": 0.0233,
1296
  "step": 213
1297
  },
1298
  {
1299
  "epoch": 1.63,
1300
- "learning_rate": 9.454545454545456e-06,
1301
- "loss": 0.0301,
1302
  "step": 214
1303
  },
1304
  {
1305
  "epoch": 1.64,
1306
- "learning_rate": 9.402597402597403e-06,
1307
- "loss": 0.0087,
1308
  "step": 215
1309
  },
1310
  {
1311
  "epoch": 1.65,
1312
- "learning_rate": 9.350649350649352e-06,
1313
- "loss": 0.0112,
1314
  "step": 216
1315
  },
1316
  {
1317
  "epoch": 1.66,
1318
- "learning_rate": 9.298701298701299e-06,
1319
- "loss": 0.016,
1320
  "step": 217
1321
  },
1322
  {
1323
  "epoch": 1.66,
1324
- "learning_rate": 9.246753246753248e-06,
1325
- "loss": 0.0158,
1326
  "step": 218
1327
  },
1328
  {
1329
  "epoch": 1.67,
1330
- "learning_rate": 9.194805194805195e-06,
1331
- "loss": 0.0235,
1332
  "step": 219
1333
  },
1334
  {
1335
  "epoch": 1.68,
1336
- "learning_rate": 9.142857142857144e-06,
1337
- "loss": 0.0137,
1338
  "step": 220
1339
  },
1340
  {
1341
  "epoch": 1.69,
1342
- "learning_rate": 9.090909090909091e-06,
1343
- "loss": 0.0099,
1344
  "step": 221
1345
  },
1346
  {
1347
  "epoch": 1.69,
1348
- "learning_rate": 9.03896103896104e-06,
1349
- "loss": 0.0494,
1350
  "step": 222
1351
  },
1352
  {
1353
  "epoch": 1.7,
1354
- "learning_rate": 8.987012987012987e-06,
1355
- "loss": 0.0184,
1356
  "step": 223
1357
  },
1358
  {
1359
  "epoch": 1.71,
1360
- "learning_rate": 8.935064935064936e-06,
1361
- "loss": 0.0238,
1362
  "step": 224
1363
  },
1364
  {
1365
  "epoch": 1.72,
1366
- "learning_rate": 8.883116883116883e-06,
1367
- "loss": 0.0168,
1368
  "step": 225
1369
  },
1370
  {
1371
  "epoch": 1.73,
1372
- "learning_rate": 8.831168831168832e-06,
1373
- "loss": 0.0187,
1374
  "step": 226
1375
  },
1376
  {
1377
  "epoch": 1.73,
1378
- "learning_rate": 8.779220779220779e-06,
1379
- "loss": 0.029,
1380
  "step": 227
1381
  },
1382
  {
1383
  "epoch": 1.74,
1384
- "learning_rate": 8.727272727272728e-06,
1385
- "loss": 0.0259,
1386
  "step": 228
1387
  },
1388
  {
1389
  "epoch": 1.75,
1390
- "learning_rate": 8.675324675324675e-06,
1391
- "loss": 0.0128,
1392
  "step": 229
1393
  },
1394
  {
1395
  "epoch": 1.76,
1396
- "learning_rate": 8.623376623376624e-06,
1397
- "loss": 0.0235,
1398
  "step": 230
1399
  },
1400
  {
1401
  "epoch": 1.76,
1402
- "learning_rate": 8.571428571428571e-06,
1403
  "loss": 0.0514,
1404
  "step": 231
1405
  },
1406
  {
1407
  "epoch": 1.77,
1408
- "learning_rate": 8.51948051948052e-06,
1409
- "loss": 0.0147,
1410
  "step": 232
1411
  },
1412
  {
1413
  "epoch": 1.78,
1414
- "learning_rate": 8.467532467532467e-06,
1415
- "loss": 0.0107,
1416
  "step": 233
1417
  },
1418
  {
1419
  "epoch": 1.79,
1420
- "learning_rate": 8.415584415584416e-06,
1421
- "loss": 0.0391,
1422
  "step": 234
1423
  },
1424
  {
1425
  "epoch": 1.79,
1426
- "learning_rate": 8.363636363636365e-06,
1427
- "loss": 0.0082,
1428
  "step": 235
1429
  },
1430
  {
1431
  "epoch": 1.8,
1432
- "learning_rate": 8.311688311688313e-06,
1433
- "loss": 0.0744,
1434
  "step": 236
1435
  },
1436
  {
1437
  "epoch": 1.81,
1438
- "learning_rate": 8.25974025974026e-06,
1439
- "loss": 0.0284,
1440
  "step": 237
1441
  },
1442
  {
1443
  "epoch": 1.82,
1444
- "learning_rate": 8.20779220779221e-06,
1445
- "loss": 0.0066,
1446
  "step": 238
1447
  },
1448
  {
1449
  "epoch": 1.82,
1450
- "learning_rate": 8.155844155844157e-06,
1451
- "loss": 0.0103,
1452
  "step": 239
1453
  },
1454
  {
1455
  "epoch": 1.83,
1456
- "learning_rate": 8.103896103896105e-06,
1457
- "loss": 0.0096,
1458
  "step": 240
1459
  },
1460
  {
1461
  "epoch": 1.84,
1462
- "learning_rate": 8.051948051948052e-06,
1463
- "loss": 0.0167,
1464
  "step": 241
1465
  },
1466
  {
1467
  "epoch": 1.85,
1468
- "learning_rate": 8.000000000000001e-06,
1469
- "loss": 0.0107,
1470
  "step": 242
1471
  },
1472
  {
1473
  "epoch": 1.85,
1474
- "learning_rate": 7.948051948051948e-06,
1475
- "loss": 0.009,
1476
  "step": 243
1477
  },
1478
  {
1479
  "epoch": 1.86,
1480
- "learning_rate": 7.896103896103897e-06,
1481
- "loss": 0.0153,
1482
  "step": 244
1483
  },
1484
  {
1485
  "epoch": 1.87,
1486
- "learning_rate": 7.844155844155844e-06,
1487
- "loss": 0.0178,
1488
  "step": 245
1489
  },
1490
  {
1491
  "epoch": 1.88,
1492
- "learning_rate": 7.792207792207793e-06,
1493
- "loss": 0.0057,
1494
  "step": 246
1495
  },
1496
  {
1497
  "epoch": 1.89,
1498
- "learning_rate": 7.74025974025974e-06,
1499
- "loss": 0.0267,
1500
  "step": 247
1501
  },
1502
  {
1503
  "epoch": 1.89,
1504
- "learning_rate": 7.68831168831169e-06,
1505
- "loss": 0.0199,
1506
  "step": 248
1507
  },
1508
  {
1509
  "epoch": 1.9,
1510
- "learning_rate": 7.636363636363638e-06,
1511
- "loss": 0.0298,
1512
  "step": 249
1513
  },
1514
  {
1515
  "epoch": 1.91,
1516
- "learning_rate": 7.584415584415585e-06,
1517
- "loss": 0.0187,
1518
  "step": 250
1519
  },
1520
  {
1521
  "epoch": 1.92,
1522
- "learning_rate": 7.532467532467533e-06,
1523
- "loss": 0.0218,
1524
  "step": 251
1525
  },
1526
  {
1527
  "epoch": 1.92,
1528
- "learning_rate": 7.480519480519481e-06,
1529
- "loss": 0.0058,
1530
  "step": 252
1531
  },
1532
  {
1533
  "epoch": 1.93,
1534
- "learning_rate": 7.428571428571429e-06,
1535
- "loss": 0.0172,
1536
  "step": 253
1537
  },
1538
  {
1539
  "epoch": 1.94,
1540
- "learning_rate": 7.376623376623377e-06,
1541
- "loss": 0.0095,
1542
  "step": 254
1543
  },
1544
  {
1545
  "epoch": 1.95,
1546
- "learning_rate": 7.324675324675325e-06,
1547
- "loss": 0.0243,
1548
  "step": 255
1549
  },
1550
  {
1551
  "epoch": 1.95,
1552
- "learning_rate": 7.272727272727273e-06,
1553
- "loss": 0.0137,
1554
  "step": 256
1555
  },
1556
  {
1557
  "epoch": 1.96,
1558
- "learning_rate": 7.220779220779221e-06,
1559
- "loss": 0.0328,
1560
  "step": 257
1561
  },
1562
  {
1563
  "epoch": 1.97,
1564
- "learning_rate": 7.16883116883117e-06,
1565
- "loss": 0.023,
1566
  "step": 258
1567
  },
1568
  {
1569
  "epoch": 1.98,
1570
- "learning_rate": 7.116883116883118e-06,
1571
- "loss": 0.0512,
1572
  "step": 259
1573
  },
1574
  {
1575
  "epoch": 1.98,
1576
- "learning_rate": 7.064935064935066e-06,
1577
- "loss": 0.0106,
1578
  "step": 260
1579
  },
1580
  {
1581
  "epoch": 1.99,
1582
- "learning_rate": 7.012987012987014e-06,
1583
- "loss": 0.0226,
1584
  "step": 261
1585
  },
1586
  {
1587
  "epoch": 2.0,
1588
- "learning_rate": 6.961038961038962e-06,
1589
- "loss": 0.0024,
1590
  "step": 262
1591
  },
1592
  {
1593
  "epoch": 2.0,
1594
- "eval_loss": 0.021491218358278275,
1595
- "eval_pearson": 0.9619413875691463,
1596
- "eval_runtime": 5.3773,
1597
- "eval_samples_per_second": 43.144,
1598
- "eval_spearmanr": 0.9085946142766584,
1599
- "eval_steps_per_second": 2.79,
1600
  "step": 262
1601
  },
1602
  {
1603
  "epoch": 2.01,
1604
- "learning_rate": 6.90909090909091e-06,
1605
- "loss": 0.0118,
1606
  "step": 263
1607
  },
1608
  {
1609
  "epoch": 2.02,
1610
- "learning_rate": 6.857142857142858e-06,
1611
- "loss": 0.029,
1612
  "step": 264
1613
  },
1614
  {
1615
  "epoch": 2.02,
1616
- "learning_rate": 6.805194805194806e-06,
1617
- "loss": 0.0178,
1618
  "step": 265
1619
  },
1620
  {
1621
  "epoch": 2.03,
1622
- "learning_rate": 6.753246753246754e-06,
1623
- "loss": 0.0163,
1624
  "step": 266
1625
  },
1626
  {
1627
  "epoch": 2.04,
1628
- "learning_rate": 6.701298701298702e-06,
1629
- "loss": 0.018,
1630
  "step": 267
1631
  },
1632
  {
1633
  "epoch": 2.05,
1634
- "learning_rate": 6.64935064935065e-06,
1635
- "loss": 0.016,
1636
  "step": 268
1637
  },
1638
  {
1639
  "epoch": 2.05,
1640
- "learning_rate": 6.597402597402598e-06,
1641
- "loss": 0.0132,
1642
  "step": 269
1643
  },
1644
  {
1645
  "epoch": 2.06,
1646
- "learning_rate": 6.545454545454546e-06,
1647
- "loss": 0.0195,
1648
  "step": 270
1649
  },
1650
  {
1651
  "epoch": 2.07,
1652
- "learning_rate": 6.493506493506494e-06,
1653
- "loss": 0.0133,
1654
  "step": 271
1655
  },
1656
  {
1657
  "epoch": 2.08,
1658
- "learning_rate": 6.441558441558442e-06,
1659
- "loss": 0.0065,
1660
  "step": 272
1661
  },
1662
  {
1663
  "epoch": 2.08,
1664
- "learning_rate": 6.38961038961039e-06,
1665
- "loss": 0.0154,
1666
  "step": 273
1667
  },
1668
  {
1669
  "epoch": 2.09,
1670
- "learning_rate": 6.337662337662338e-06,
1671
- "loss": 0.0136,
1672
  "step": 274
1673
  },
1674
  {
1675
  "epoch": 2.1,
1676
- "learning_rate": 6.285714285714286e-06,
1677
- "loss": 0.0155,
1678
  "step": 275
1679
  },
1680
  {
1681
  "epoch": 2.11,
1682
- "learning_rate": 6.233766233766234e-06,
1683
- "loss": 0.0081,
1684
  "step": 276
1685
  },
1686
  {
1687
  "epoch": 2.11,
1688
- "learning_rate": 6.181818181818182e-06,
1689
- "loss": 0.0104,
1690
  "step": 277
1691
  },
1692
  {
1693
  "epoch": 2.12,
1694
- "learning_rate": 6.129870129870131e-06,
1695
- "loss": 0.0183,
1696
  "step": 278
1697
  },
1698
  {
1699
  "epoch": 2.13,
1700
- "learning_rate": 6.077922077922079e-06,
1701
- "loss": 0.0223,
1702
  "step": 279
1703
  },
1704
  {
1705
  "epoch": 2.14,
1706
- "learning_rate": 6.025974025974027e-06,
1707
- "loss": 0.0142,
1708
  "step": 280
1709
  },
1710
  {
1711
  "epoch": 2.15,
1712
- "learning_rate": 5.9740259740259746e-06,
1713
- "loss": 0.049,
1714
  "step": 281
1715
  },
1716
  {
1717
  "epoch": 2.15,
1718
- "learning_rate": 5.9220779220779226e-06,
1719
- "loss": 0.0039,
1720
  "step": 282
1721
  },
1722
  {
1723
  "epoch": 2.16,
1724
- "learning_rate": 5.8701298701298705e-06,
1725
- "loss": 0.0139,
1726
  "step": 283
1727
  },
1728
  {
1729
  "epoch": 2.17,
1730
- "learning_rate": 5.8181818181818185e-06,
1731
- "loss": 0.0056,
1732
  "step": 284
1733
  },
1734
  {
1735
  "epoch": 2.18,
1736
- "learning_rate": 5.7662337662337665e-06,
1737
- "loss": 0.0088,
1738
  "step": 285
1739
  },
1740
  {
1741
  "epoch": 2.18,
1742
- "learning_rate": 5.7142857142857145e-06,
1743
- "loss": 0.0144,
1744
  "step": 286
1745
  },
1746
  {
1747
  "epoch": 2.19,
1748
- "learning_rate": 5.6623376623376625e-06,
1749
- "loss": 0.0127,
1750
  "step": 287
1751
  },
1752
  {
1753
  "epoch": 2.2,
1754
- "learning_rate": 5.6103896103896105e-06,
1755
- "loss": 0.0056,
1756
  "step": 288
1757
  },
1758
  {
1759
  "epoch": 2.21,
1760
- "learning_rate": 5.5584415584415585e-06,
1761
- "loss": 0.0048,
1762
  "step": 289
1763
  },
1764
  {
1765
  "epoch": 2.21,
1766
- "learning_rate": 5.5064935064935065e-06,
1767
- "loss": 0.007,
1768
  "step": 290
1769
  },
1770
  {
1771
  "epoch": 2.22,
1772
- "learning_rate": 5.4545454545454545e-06,
1773
- "loss": 0.0097,
1774
  "step": 291
1775
  },
1776
  {
1777
  "epoch": 2.23,
1778
- "learning_rate": 5.4025974025974024e-06,
1779
- "loss": 0.0044,
1780
  "step": 292
1781
  },
1782
  {
1783
  "epoch": 2.24,
1784
- "learning_rate": 5.3506493506493504e-06,
1785
- "loss": 0.0108,
1786
  "step": 293
1787
  },
1788
  {
1789
  "epoch": 2.24,
1790
- "learning_rate": 5.298701298701298e-06,
1791
- "loss": 0.0116,
1792
  "step": 294
1793
  },
1794
  {
1795
  "epoch": 2.25,
1796
- "learning_rate": 5.246753246753246e-06,
1797
- "loss": 0.0111,
1798
  "step": 295
1799
  },
1800
  {
1801
  "epoch": 2.26,
1802
- "learning_rate": 5.194805194805194e-06,
1803
- "loss": 0.0057,
1804
  "step": 296
1805
  },
1806
  {
1807
  "epoch": 2.27,
1808
- "learning_rate": 5.142857142857142e-06,
1809
- "loss": 0.0043,
1810
  "step": 297
1811
  },
1812
  {
1813
  "epoch": 2.27,
1814
- "learning_rate": 5.090909090909091e-06,
1815
- "loss": 0.0051,
1816
  "step": 298
1817
  },
1818
  {
1819
  "epoch": 2.28,
1820
- "learning_rate": 5.038961038961039e-06,
1821
- "loss": 0.0313,
1822
  "step": 299
1823
  },
1824
  {
1825
  "epoch": 2.29,
1826
- "learning_rate": 4.987012987012987e-06,
1827
- "loss": 0.0058,
1828
  "step": 300
1829
  },
1830
  {
1831
  "epoch": 2.3,
1832
- "learning_rate": 4.935064935064935e-06,
1833
- "loss": 0.008,
1834
  "step": 301
1835
  },
1836
  {
1837
  "epoch": 2.31,
1838
- "learning_rate": 4.883116883116883e-06,
1839
- "loss": 0.0131,
1840
  "step": 302
1841
  },
1842
  {
1843
  "epoch": 2.31,
1844
- "learning_rate": 4.831168831168831e-06,
1845
- "loss": 0.0109,
1846
  "step": 303
1847
  },
1848
  {
1849
  "epoch": 2.32,
1850
- "learning_rate": 4.779220779220779e-06,
1851
- "loss": 0.0093,
1852
  "step": 304
1853
  },
1854
  {
1855
  "epoch": 2.33,
1856
- "learning_rate": 4.727272727272728e-06,
1857
- "loss": 0.0097,
1858
  "step": 305
1859
  },
1860
  {
1861
  "epoch": 2.34,
1862
- "learning_rate": 4.675324675324676e-06,
1863
- "loss": 0.0075,
1864
  "step": 306
1865
  },
1866
  {
1867
  "epoch": 2.34,
1868
- "learning_rate": 4.623376623376624e-06,
1869
- "loss": 0.0157,
1870
  "step": 307
1871
  },
1872
  {
1873
  "epoch": 2.35,
1874
- "learning_rate": 4.571428571428572e-06,
1875
- "loss": 0.0045,
1876
  "step": 308
1877
  },
1878
  {
1879
  "epoch": 2.36,
1880
- "learning_rate": 4.51948051948052e-06,
1881
- "loss": 0.0146,
1882
  "step": 309
1883
  },
1884
  {
1885
  "epoch": 2.37,
1886
- "learning_rate": 4.467532467532468e-06,
1887
- "loss": 0.024,
1888
  "step": 310
1889
  },
1890
  {
1891
  "epoch": 2.37,
1892
- "learning_rate": 4.415584415584416e-06,
1893
- "loss": 0.0168,
1894
  "step": 311
1895
  },
1896
  {
1897
  "epoch": 2.38,
1898
- "learning_rate": 4.363636363636364e-06,
1899
- "loss": 0.0191,
1900
  "step": 312
1901
  },
1902
  {
1903
  "epoch": 2.39,
1904
- "learning_rate": 4.311688311688312e-06,
1905
- "loss": 0.009,
1906
  "step": 313
1907
  },
1908
  {
1909
  "epoch": 2.4,
1910
- "learning_rate": 4.25974025974026e-06,
1911
- "loss": 0.0093,
1912
  "step": 314
1913
  },
1914
  {
1915
  "epoch": 2.4,
1916
- "learning_rate": 4.207792207792208e-06,
1917
- "loss": 0.0133,
1918
  "step": 315
1919
  },
1920
  {
1921
  "epoch": 2.41,
1922
- "learning_rate": 4.155844155844157e-06,
1923
- "loss": 0.0114,
1924
  "step": 316
1925
  },
1926
  {
1927
  "epoch": 2.42,
1928
- "learning_rate": 4.103896103896105e-06,
1929
- "loss": 0.0165,
1930
  "step": 317
1931
  },
1932
  {
1933
  "epoch": 2.43,
1934
- "learning_rate": 4.051948051948053e-06,
1935
- "loss": 0.0162,
1936
  "step": 318
1937
  },
1938
  {
1939
  "epoch": 2.44,
1940
- "learning_rate": 4.000000000000001e-06,
1941
- "loss": 0.013,
1942
  "step": 319
1943
  },
1944
  {
1945
  "epoch": 2.44,
1946
- "learning_rate": 3.948051948051949e-06,
1947
- "loss": 0.0069,
1948
  "step": 320
1949
  },
1950
  {
1951
  "epoch": 2.45,
1952
- "learning_rate": 3.896103896103897e-06,
1953
- "loss": 0.0161,
1954
  "step": 321
1955
  },
1956
  {
1957
  "epoch": 2.46,
1958
- "learning_rate": 3.844155844155845e-06,
1959
- "loss": 0.0022,
1960
  "step": 322
1961
  },
1962
  {
1963
  "epoch": 2.47,
1964
- "learning_rate": 3.7922077922077926e-06,
1965
- "loss": 0.0078,
1966
  "step": 323
1967
  },
1968
  {
1969
  "epoch": 2.47,
1970
- "learning_rate": 3.7402597402597406e-06,
1971
- "loss": 0.0149,
1972
  "step": 324
1973
  },
1974
  {
1975
  "epoch": 2.48,
1976
- "learning_rate": 3.6883116883116886e-06,
1977
- "loss": 0.0203,
1978
  "step": 325
1979
  },
1980
  {
1981
  "epoch": 2.49,
1982
- "learning_rate": 3.6363636363636366e-06,
1983
- "loss": 0.0093,
1984
  "step": 326
1985
  },
1986
  {
1987
  "epoch": 2.5,
1988
- "learning_rate": 3.584415584415585e-06,
1989
- "loss": 0.0271,
1990
  "step": 327
1991
  },
1992
  {
1993
  "epoch": 2.5,
1994
- "learning_rate": 3.532467532467533e-06,
1995
- "loss": 0.0087,
1996
  "step": 328
1997
  },
1998
  {
1999
  "epoch": 2.51,
2000
- "learning_rate": 3.480519480519481e-06,
2001
- "loss": 0.0123,
2002
  "step": 329
2003
  },
2004
  {
2005
  "epoch": 2.52,
2006
- "learning_rate": 3.428571428571429e-06,
2007
- "loss": 0.0056,
2008
  "step": 330
2009
  },
2010
  {
2011
  "epoch": 2.53,
2012
- "learning_rate": 3.376623376623377e-06,
2013
- "loss": 0.0095,
2014
  "step": 331
2015
  },
2016
  {
2017
  "epoch": 2.53,
2018
- "learning_rate": 3.324675324675325e-06,
2019
- "loss": 0.0112,
2020
  "step": 332
2021
  },
2022
  {
2023
  "epoch": 2.54,
2024
- "learning_rate": 3.272727272727273e-06,
2025
- "loss": 0.0062,
2026
  "step": 333
2027
  },
2028
  {
2029
  "epoch": 2.55,
2030
- "learning_rate": 3.220779220779221e-06,
2031
- "loss": 0.0087,
2032
  "step": 334
2033
  },
2034
  {
2035
  "epoch": 2.56,
2036
- "learning_rate": 3.168831168831169e-06,
2037
- "loss": 0.0326,
2038
  "step": 335
2039
  },
2040
  {
2041
  "epoch": 2.56,
2042
- "learning_rate": 3.116883116883117e-06,
2043
- "loss": 0.0272,
2044
  "step": 336
2045
  },
2046
  {
2047
  "epoch": 2.57,
2048
- "learning_rate": 3.0649350649350653e-06,
2049
- "loss": 0.0287,
2050
  "step": 337
2051
  },
2052
  {
2053
  "epoch": 2.58,
2054
- "learning_rate": 3.0129870129870133e-06,
2055
- "loss": 0.0109,
2056
  "step": 338
2057
  },
2058
  {
2059
  "epoch": 2.59,
2060
- "learning_rate": 2.9610389610389613e-06,
2061
- "loss": 0.01,
2062
  "step": 339
2063
  },
2064
  {
2065
  "epoch": 2.6,
2066
- "learning_rate": 2.9090909090909093e-06,
2067
- "loss": 0.0074,
2068
  "step": 340
2069
  },
2070
  {
2071
  "epoch": 2.6,
2072
- "learning_rate": 2.8571428571428573e-06,
2073
- "loss": 0.0096,
2074
  "step": 341
2075
  },
2076
  {
2077
  "epoch": 2.61,
2078
- "learning_rate": 2.8051948051948052e-06,
2079
- "loss": 0.0055,
2080
  "step": 342
2081
  },
2082
  {
2083
  "epoch": 2.62,
2084
- "learning_rate": 2.7532467532467532e-06,
2085
- "loss": 0.0027,
2086
  "step": 343
2087
  },
2088
  {
2089
  "epoch": 2.63,
2090
- "learning_rate": 2.7012987012987012e-06,
2091
- "loss": 0.0089,
2092
  "step": 344
2093
  },
2094
  {
2095
  "epoch": 2.63,
2096
- "learning_rate": 2.649350649350649e-06,
2097
- "loss": 0.0106,
2098
  "step": 345
2099
  },
2100
  {
2101
  "epoch": 2.64,
2102
- "learning_rate": 2.597402597402597e-06,
2103
- "loss": 0.0067,
2104
  "step": 346
2105
  },
2106
  {
2107
  "epoch": 2.65,
2108
- "learning_rate": 2.5454545454545456e-06,
2109
- "loss": 0.0045,
2110
  "step": 347
2111
  },
2112
  {
2113
  "epoch": 2.66,
2114
- "learning_rate": 2.4935064935064936e-06,
2115
- "loss": 0.004,
2116
  "step": 348
2117
  },
2118
  {
2119
  "epoch": 2.66,
2120
- "learning_rate": 2.4415584415584416e-06,
2121
- "loss": 0.0083,
2122
  "step": 349
2123
  },
2124
  {
2125
  "epoch": 2.67,
2126
- "learning_rate": 2.3896103896103896e-06,
2127
- "loss": 0.0109,
2128
  "step": 350
2129
  },
2130
  {
2131
  "epoch": 2.68,
2132
- "learning_rate": 2.337662337662338e-06,
2133
- "loss": 0.0049,
2134
  "step": 351
2135
  },
2136
  {
2137
  "epoch": 2.69,
2138
- "learning_rate": 2.285714285714286e-06,
2139
- "loss": 0.0166,
2140
  "step": 352
2141
  },
2142
  {
2143
  "epoch": 2.69,
2144
- "learning_rate": 2.233766233766234e-06,
2145
- "loss": 0.0044,
2146
  "step": 353
2147
  },
2148
  {
2149
  "epoch": 2.7,
2150
- "learning_rate": 2.181818181818182e-06,
2151
- "loss": 0.0054,
2152
  "step": 354
2153
  },
2154
  {
2155
  "epoch": 2.71,
2156
- "learning_rate": 2.12987012987013e-06,
2157
- "loss": 0.006,
2158
  "step": 355
2159
  },
2160
  {
2161
  "epoch": 2.72,
2162
- "learning_rate": 2.0779220779220784e-06,
2163
- "loss": 0.0064,
2164
  "step": 356
2165
  },
2166
  {
2167
  "epoch": 2.73,
2168
- "learning_rate": 2.0259740259740263e-06,
2169
- "loss": 0.0029,
2170
  "step": 357
2171
  },
2172
  {
2173
  "epoch": 2.73,
2174
- "learning_rate": 1.9740259740259743e-06,
2175
- "loss": 0.0122,
2176
  "step": 358
2177
  },
2178
  {
2179
  "epoch": 2.74,
2180
- "learning_rate": 1.9220779220779223e-06,
2181
- "loss": 0.0086,
2182
  "step": 359
2183
  },
2184
  {
2185
  "epoch": 2.75,
2186
- "learning_rate": 1.8701298701298703e-06,
2187
- "loss": 0.0069,
2188
  "step": 360
2189
  },
2190
  {
2191
  "epoch": 2.76,
2192
- "learning_rate": 1.8181818181818183e-06,
2193
- "loss": 0.0066,
2194
  "step": 361
2195
  },
2196
  {
2197
  "epoch": 2.76,
2198
- "learning_rate": 1.7662337662337665e-06,
2199
- "loss": 0.0058,
2200
  "step": 362
2201
  },
2202
  {
2203
  "epoch": 2.77,
2204
- "learning_rate": 1.7142857142857145e-06,
2205
- "loss": 0.0106,
2206
  "step": 363
2207
  },
2208
  {
2209
  "epoch": 2.78,
2210
- "learning_rate": 1.6623376623376625e-06,
2211
- "loss": 0.009,
2212
  "step": 364
2213
  },
2214
  {
2215
  "epoch": 2.79,
2216
- "learning_rate": 1.6103896103896105e-06,
2217
- "loss": 0.0051,
2218
  "step": 365
2219
  },
2220
  {
2221
  "epoch": 2.79,
2222
- "learning_rate": 1.5584415584415584e-06,
2223
- "loss": 0.0072,
2224
  "step": 366
2225
  },
2226
  {
2227
  "epoch": 2.8,
2228
- "learning_rate": 1.5064935064935066e-06,
2229
- "loss": 0.0052,
2230
  "step": 367
2231
  },
2232
  {
2233
  "epoch": 2.81,
2234
- "learning_rate": 1.4545454545454546e-06,
2235
- "loss": 0.0055,
2236
  "step": 368
2237
  },
2238
  {
2239
  "epoch": 2.82,
2240
- "learning_rate": 1.4025974025974026e-06,
2241
- "loss": 0.0065,
2242
  "step": 369
2243
  },
2244
  {
2245
  "epoch": 2.82,
2246
- "learning_rate": 1.3506493506493506e-06,
2247
- "loss": 0.0061,
2248
  "step": 370
2249
  },
2250
  {
2251
  "epoch": 2.83,
2252
- "learning_rate": 1.2987012987012986e-06,
2253
- "loss": 0.0087,
2254
  "step": 371
2255
  },
2256
  {
2257
  "epoch": 2.84,
2258
- "learning_rate": 1.2467532467532468e-06,
2259
- "loss": 0.0068,
2260
  "step": 372
2261
  },
2262
  {
2263
  "epoch": 2.85,
2264
- "learning_rate": 1.1948051948051948e-06,
2265
- "loss": 0.006,
2266
  "step": 373
2267
  },
2268
  {
2269
  "epoch": 2.85,
2270
- "learning_rate": 1.142857142857143e-06,
2271
- "loss": 0.0098,
2272
  "step": 374
2273
  },
2274
  {
2275
  "epoch": 2.86,
2276
- "learning_rate": 1.090909090909091e-06,
2277
- "loss": 0.0062,
2278
  "step": 375
2279
  },
2280
  {
2281
  "epoch": 2.87,
2282
- "learning_rate": 1.0389610389610392e-06,
2283
- "loss": 0.0081,
2284
  "step": 376
2285
  },
2286
  {
2287
  "epoch": 2.88,
2288
- "learning_rate": 9.870129870129872e-07,
2289
- "loss": 0.0078,
2290
  "step": 377
2291
  },
2292
  {
2293
  "epoch": 2.89,
2294
- "learning_rate": 9.350649350649352e-07,
2295
- "loss": 0.0078,
2296
  "step": 378
2297
  },
2298
  {
2299
  "epoch": 2.89,
2300
- "learning_rate": 8.831168831168832e-07,
2301
- "loss": 0.0082,
2302
  "step": 379
2303
  },
2304
  {
2305
  "epoch": 2.9,
2306
- "learning_rate": 8.311688311688312e-07,
2307
- "loss": 0.0047,
2308
  "step": 380
2309
  },
2310
  {
2311
  "epoch": 2.91,
2312
- "learning_rate": 7.792207792207792e-07,
2313
- "loss": 0.0072,
2314
  "step": 381
2315
  },
2316
  {
2317
  "epoch": 2.92,
2318
- "learning_rate": 7.272727272727273e-07,
2319
- "loss": 0.0134,
2320
  "step": 382
2321
  },
2322
  {
2323
  "epoch": 2.92,
2324
- "learning_rate": 6.753246753246753e-07,
2325
- "loss": 0.0041,
2326
  "step": 383
2327
  },
2328
  {
2329
  "epoch": 2.93,
2330
- "learning_rate": 6.233766233766234e-07,
2331
- "loss": 0.0056,
2332
  "step": 384
2333
  },
2334
  {
2335
  "epoch": 2.94,
2336
- "learning_rate": 5.714285714285715e-07,
2337
- "loss": 0.0048,
2338
  "step": 385
2339
  },
2340
  {
2341
  "epoch": 2.95,
2342
- "learning_rate": 5.194805194805196e-07,
2343
- "loss": 0.0053,
2344
  "step": 386
2345
  },
2346
  {
2347
  "epoch": 2.95,
2348
- "learning_rate": 4.675324675324676e-07,
2349
- "loss": 0.0116,
2350
  "step": 387
2351
  },
2352
  {
2353
  "epoch": 2.96,
2354
- "learning_rate": 4.155844155844156e-07,
2355
- "loss": 0.0069,
2356
  "step": 388
2357
  },
2358
  {
2359
  "epoch": 2.97,
2360
- "learning_rate": 3.6363636363636366e-07,
2361
- "loss": 0.0086,
2362
  "step": 389
2363
  },
2364
  {
2365
  "epoch": 2.98,
2366
- "learning_rate": 3.116883116883117e-07,
2367
- "loss": 0.009,
2368
  "step": 390
2369
  },
2370
  {
2371
  "epoch": 2.98,
2372
- "learning_rate": 2.597402597402598e-07,
2373
- "loss": 0.0082,
2374
  "step": 391
2375
  },
2376
  {
2377
  "epoch": 2.99,
2378
- "learning_rate": 2.077922077922078e-07,
2379
- "loss": 0.0068,
2380
  "step": 392
2381
  },
2382
  {
2383
  "epoch": 3.0,
2384
- "learning_rate": 1.5584415584415585e-07,
2385
- "loss": 0.0101,
2386
  "step": 393
2387
  },
2388
  {
2389
  "epoch": 3.0,
2390
- "eval_loss": 0.010308222845196724,
2391
- "eval_pearson": 0.974361418206129,
2392
- "eval_runtime": 5.3535,
2393
- "eval_samples_per_second": 43.336,
2394
- "eval_spearmanr": 0.9111814010591314,
2395
- "eval_steps_per_second": 2.802,
2396
  "step": 393
2397
  }
2398
  ],
 
10
  {
11
  "epoch": 0.01,
12
  "learning_rate": 0.0,
13
+ "loss": 0.4278,
14
  "step": 1
15
  },
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 0.0,
19
+ "loss": 0.409,
20
  "step": 2
21
  },
22
  {
23
  "epoch": 0.02,
24
  "learning_rate": 2.5e-06,
25
+ "loss": 0.3399,
26
  "step": 3
27
  },
28
  {
29
  "epoch": 0.03,
30
  "learning_rate": 5e-06,
31
+ "loss": 0.3277,
32
  "step": 4
33
  },
34
  {
35
  "epoch": 0.04,
36
+ "learning_rate": 7.500000000000001e-06,
37
+ "loss": 0.3771,
38
  "step": 5
39
  },
40
  {
41
  "epoch": 0.05,
42
+ "learning_rate": 1e-05,
43
+ "loss": 0.2081,
44
  "step": 6
45
  },
46
  {
47
  "epoch": 0.05,
48
+ "learning_rate": 1.25e-05,
49
+ "loss": 0.3177,
50
  "step": 7
51
  },
52
  {
53
  "epoch": 0.06,
54
+ "learning_rate": 1.5000000000000002e-05,
55
+ "loss": 0.3049,
56
  "step": 8
57
  },
58
  {
59
  "epoch": 0.07,
60
+ "learning_rate": 1.7500000000000002e-05,
61
+ "loss": 0.1694,
62
  "step": 9
63
  },
64
  {
65
  "epoch": 0.08,
66
+ "learning_rate": 2e-05,
67
+ "loss": 0.2039,
68
  "step": 10
69
  },
70
  {
71
  "epoch": 0.08,
72
+ "learning_rate": 1.994805194805195e-05,
73
+ "loss": 0.1651,
74
  "step": 11
75
  },
76
  {
77
  "epoch": 0.09,
78
+ "learning_rate": 1.98961038961039e-05,
79
+ "loss": 0.3483,
80
  "step": 12
81
  },
82
  {
83
  "epoch": 0.1,
84
+ "learning_rate": 1.9844155844155846e-05,
85
+ "loss": 0.1532,
86
  "step": 13
87
  },
88
  {
89
  "epoch": 0.11,
90
+ "learning_rate": 1.9792207792207794e-05,
91
+ "loss": 0.2102,
92
  "step": 14
93
  },
94
  {
95
  "epoch": 0.11,
96
+ "learning_rate": 1.974025974025974e-05,
97
+ "loss": 0.0953,
98
  "step": 15
99
  },
100
  {
101
  "epoch": 0.12,
102
+ "learning_rate": 1.968831168831169e-05,
103
+ "loss": 0.0761,
104
  "step": 16
105
  },
106
  {
107
  "epoch": 0.13,
108
+ "learning_rate": 1.963636363636364e-05,
109
+ "loss": 0.1778,
110
  "step": 17
111
  },
112
  {
113
  "epoch": 0.14,
114
+ "learning_rate": 1.9584415584415586e-05,
115
+ "loss": 0.138,
116
  "step": 18
117
  },
118
  {
119
  "epoch": 0.15,
120
+ "learning_rate": 1.9532467532467533e-05,
121
+ "loss": 0.1523,
122
  "step": 19
123
  },
124
  {
125
  "epoch": 0.15,
126
+ "learning_rate": 1.9480519480519483e-05,
127
+ "loss": 0.1351,
128
  "step": 20
129
  },
130
  {
131
  "epoch": 0.16,
132
+ "learning_rate": 1.942857142857143e-05,
133
+ "loss": 0.0846,
134
  "step": 21
135
  },
136
  {
137
  "epoch": 0.17,
138
+ "learning_rate": 1.9376623376623377e-05,
139
+ "loss": 0.1725,
140
  "step": 22
141
  },
142
  {
143
  "epoch": 0.18,
144
+ "learning_rate": 1.9324675324675325e-05,
145
+ "loss": 0.0751,
146
  "step": 23
147
  },
148
  {
149
  "epoch": 0.18,
150
+ "learning_rate": 1.9272727272727275e-05,
151
+ "loss": 0.1553,
152
  "step": 24
153
  },
154
  {
155
  "epoch": 0.19,
156
+ "learning_rate": 1.9220779220779222e-05,
157
+ "loss": 0.1551,
158
  "step": 25
159
  },
160
  {
161
  "epoch": 0.2,
162
+ "learning_rate": 1.916883116883117e-05,
163
+ "loss": 0.1263,
164
  "step": 26
165
  },
166
  {
167
  "epoch": 0.21,
168
+ "learning_rate": 1.9116883116883117e-05,
169
+ "loss": 0.0627,
170
  "step": 27
171
  },
172
  {
173
  "epoch": 0.21,
174
+ "learning_rate": 1.9064935064935067e-05,
175
+ "loss": 0.109,
176
  "step": 28
177
  },
178
  {
179
  "epoch": 0.22,
180
+ "learning_rate": 1.9012987012987014e-05,
181
+ "loss": 0.0426,
182
  "step": 29
183
  },
184
  {
185
  "epoch": 0.23,
186
+ "learning_rate": 1.896103896103896e-05,
187
+ "loss": 0.0864,
188
  "step": 30
189
  },
190
  {
191
  "epoch": 0.24,
192
+ "learning_rate": 1.8909090909090912e-05,
193
+ "loss": 0.0477,
194
  "step": 31
195
  },
196
  {
197
  "epoch": 0.24,
198
+ "learning_rate": 1.885714285714286e-05,
199
+ "loss": 0.1584,
200
  "step": 32
201
  },
202
  {
203
  "epoch": 0.25,
204
+ "learning_rate": 1.8805194805194806e-05,
205
+ "loss": 0.06,
206
  "step": 33
207
  },
208
  {
209
  "epoch": 0.26,
210
+ "learning_rate": 1.8753246753246753e-05,
211
+ "loss": 0.0972,
212
  "step": 34
213
  },
214
  {
215
  "epoch": 0.27,
216
+ "learning_rate": 1.8701298701298704e-05,
217
+ "loss": 0.0658,
218
  "step": 35
219
  },
220
  {
221
  "epoch": 0.27,
222
+ "learning_rate": 1.864935064935065e-05,
223
+ "loss": 0.0729,
224
  "step": 36
225
  },
226
  {
227
  "epoch": 0.28,
228
+ "learning_rate": 1.8597402597402598e-05,
229
+ "loss": 0.0851,
230
  "step": 37
231
  },
232
  {
233
  "epoch": 0.29,
234
+ "learning_rate": 1.8545454545454545e-05,
235
+ "loss": 0.1193,
236
  "step": 38
237
  },
238
  {
239
  "epoch": 0.3,
240
+ "learning_rate": 1.8493506493506496e-05,
241
+ "loss": 0.0194,
242
  "step": 39
243
  },
244
  {
245
  "epoch": 0.31,
246
+ "learning_rate": 1.8441558441558443e-05,
247
+ "loss": 0.0994,
248
  "step": 40
249
  },
250
  {
251
  "epoch": 0.31,
252
+ "learning_rate": 1.838961038961039e-05,
253
+ "loss": 0.0719,
254
  "step": 41
255
  },
256
  {
257
  "epoch": 0.32,
258
+ "learning_rate": 1.8337662337662337e-05,
259
+ "loss": 0.0915,
260
  "step": 42
261
  },
262
  {
263
  "epoch": 0.33,
264
+ "learning_rate": 1.8285714285714288e-05,
265
+ "loss": 0.0823,
266
  "step": 43
267
  },
268
  {
269
  "epoch": 0.34,
270
+ "learning_rate": 1.8233766233766235e-05,
271
+ "loss": 0.1922,
272
  "step": 44
273
  },
274
  {
275
  "epoch": 0.34,
276
+ "learning_rate": 1.8181818181818182e-05,
277
+ "loss": 0.1452,
278
  "step": 45
279
  },
280
  {
281
  "epoch": 0.35,
282
+ "learning_rate": 1.812987012987013e-05,
283
+ "loss": 0.0233,
284
  "step": 46
285
  },
286
  {
287
  "epoch": 0.36,
288
+ "learning_rate": 1.807792207792208e-05,
289
+ "loss": 0.0448,
290
  "step": 47
291
  },
292
  {
293
  "epoch": 0.37,
294
+ "learning_rate": 1.8025974025974027e-05,
295
+ "loss": 0.0708,
296
  "step": 48
297
  },
298
  {
299
  "epoch": 0.37,
300
+ "learning_rate": 1.7974025974025974e-05,
301
+ "loss": 0.0441,
302
  "step": 49
303
  },
304
  {
305
  "epoch": 0.38,
306
+ "learning_rate": 1.792207792207792e-05,
307
+ "loss": 0.0498,
308
  "step": 50
309
  },
310
  {
311
  "epoch": 0.39,
312
+ "learning_rate": 1.7870129870129872e-05,
313
+ "loss": 0.0645,
314
  "step": 51
315
  },
316
  {
317
  "epoch": 0.4,
318
+ "learning_rate": 1.781818181818182e-05,
319
+ "loss": 0.0718,
320
  "step": 52
321
  },
322
  {
323
  "epoch": 0.4,
324
+ "learning_rate": 1.7766233766233766e-05,
325
+ "loss": 0.0605,
326
  "step": 53
327
  },
328
  {
329
  "epoch": 0.41,
330
+ "learning_rate": 1.7714285714285717e-05,
331
+ "loss": 0.0752,
332
  "step": 54
333
  },
334
  {
335
  "epoch": 0.42,
336
+ "learning_rate": 1.7662337662337664e-05,
337
+ "loss": 0.0312,
338
  "step": 55
339
  },
340
  {
341
  "epoch": 0.43,
342
+ "learning_rate": 1.761038961038961e-05,
343
+ "loss": 0.1122,
344
  "step": 56
345
  },
346
  {
347
  "epoch": 0.44,
348
+ "learning_rate": 1.7558441558441558e-05,
349
+ "loss": 0.0579,
350
  "step": 57
351
  },
352
  {
353
  "epoch": 0.44,
354
+ "learning_rate": 1.750649350649351e-05,
355
+ "loss": 0.0664,
356
  "step": 58
357
  },
358
  {
359
  "epoch": 0.45,
360
+ "learning_rate": 1.7454545454545456e-05,
361
+ "loss": 0.059,
362
  "step": 59
363
  },
364
  {
365
  "epoch": 0.46,
366
+ "learning_rate": 1.7402597402597403e-05,
367
+ "loss": 0.0602,
368
  "step": 60
369
  },
370
  {
371
  "epoch": 0.47,
372
+ "learning_rate": 1.735064935064935e-05,
373
+ "loss": 0.0322,
374
  "step": 61
375
  },
376
  {
377
  "epoch": 0.47,
378
+ "learning_rate": 1.72987012987013e-05,
379
+ "loss": 0.1271,
380
  "step": 62
381
  },
382
  {
383
  "epoch": 0.48,
384
+ "learning_rate": 1.7246753246753248e-05,
385
+ "loss": 0.2168,
386
  "step": 63
387
  },
388
  {
389
  "epoch": 0.49,
390
+ "learning_rate": 1.7194805194805195e-05,
391
+ "loss": 0.1552,
392
  "step": 64
393
  },
394
  {
395
  "epoch": 0.5,
396
+ "learning_rate": 1.7142857142857142e-05,
397
+ "loss": 0.1742,
398
  "step": 65
399
  },
400
  {
401
  "epoch": 0.5,
402
+ "learning_rate": 1.7090909090909092e-05,
403
+ "loss": 0.0751,
404
  "step": 66
405
  },
406
  {
407
  "epoch": 0.51,
408
+ "learning_rate": 1.703896103896104e-05,
409
+ "loss": 0.0379,
410
  "step": 67
411
  },
412
  {
413
  "epoch": 0.52,
414
+ "learning_rate": 1.6987012987012987e-05,
415
+ "loss": 0.185,
416
  "step": 68
417
  },
418
  {
419
  "epoch": 0.53,
420
+ "learning_rate": 1.6935064935064934e-05,
421
+ "loss": 0.1504,
422
  "step": 69
423
  },
424
  {
425
  "epoch": 0.53,
426
+ "learning_rate": 1.6883116883116884e-05,
427
+ "loss": 0.1169,
428
  "step": 70
429
  },
430
  {
431
  "epoch": 0.54,
432
+ "learning_rate": 1.683116883116883e-05,
433
+ "loss": 0.0771,
434
  "step": 71
435
  },
436
  {
437
  "epoch": 0.55,
438
+ "learning_rate": 1.677922077922078e-05,
439
+ "loss": 0.0374,
440
  "step": 72
441
  },
442
  {
443
  "epoch": 0.56,
444
+ "learning_rate": 1.672727272727273e-05,
445
+ "loss": 0.1039,
446
  "step": 73
447
  },
448
  {
449
  "epoch": 0.56,
450
+ "learning_rate": 1.6675324675324676e-05,
451
+ "loss": 0.101,
452
  "step": 74
453
  },
454
  {
455
  "epoch": 0.57,
456
+ "learning_rate": 1.6623376623376627e-05,
457
+ "loss": 0.0917,
458
  "step": 75
459
  },
460
  {
461
  "epoch": 0.58,
462
+ "learning_rate": 1.6571428571428574e-05,
463
+ "loss": 0.089,
464
  "step": 76
465
  },
466
  {
467
  "epoch": 0.59,
468
+ "learning_rate": 1.651948051948052e-05,
469
+ "loss": 0.0307,
470
  "step": 77
471
  },
472
  {
473
  "epoch": 0.6,
474
+ "learning_rate": 1.646753246753247e-05,
475
+ "loss": 0.1496,
476
  "step": 78
477
  },
478
  {
479
  "epoch": 0.6,
480
+ "learning_rate": 1.641558441558442e-05,
481
+ "loss": 0.1121,
482
  "step": 79
483
  },
484
  {
485
  "epoch": 0.61,
486
+ "learning_rate": 1.6363636363636366e-05,
487
+ "loss": 0.1882,
488
  "step": 80
489
  },
490
  {
491
  "epoch": 0.62,
492
+ "learning_rate": 1.6311688311688313e-05,
493
+ "loss": 0.136,
494
  "step": 81
495
  },
496
  {
497
  "epoch": 0.63,
498
+ "learning_rate": 1.6259740259740264e-05,
499
+ "loss": 0.1114,
500
  "step": 82
501
  },
502
  {
503
  "epoch": 0.63,
504
+ "learning_rate": 1.620779220779221e-05,
505
+ "loss": 0.0095,
506
  "step": 83
507
  },
508
  {
509
  "epoch": 0.64,
510
+ "learning_rate": 1.6155844155844158e-05,
511
+ "loss": 0.0559,
512
  "step": 84
513
  },
514
  {
515
  "epoch": 0.65,
516
+ "learning_rate": 1.6103896103896105e-05,
517
+ "loss": 0.0528,
518
  "step": 85
519
  },
520
  {
521
  "epoch": 0.66,
522
+ "learning_rate": 1.6051948051948056e-05,
523
+ "loss": 0.0298,
524
  "step": 86
525
  },
526
  {
527
  "epoch": 0.66,
528
+ "learning_rate": 1.6000000000000003e-05,
529
+ "loss": 0.0436,
530
  "step": 87
531
  },
532
  {
533
  "epoch": 0.67,
534
+ "learning_rate": 1.594805194805195e-05,
535
+ "loss": 0.0515,
536
  "step": 88
537
  },
538
  {
539
  "epoch": 0.68,
540
+ "learning_rate": 1.5896103896103897e-05,
541
+ "loss": 0.0588,
542
  "step": 89
543
  },
544
  {
545
  "epoch": 0.69,
546
+ "learning_rate": 1.5844155844155847e-05,
547
+ "loss": 0.071,
548
  "step": 90
549
  },
550
  {
551
  "epoch": 0.69,
552
+ "learning_rate": 1.5792207792207795e-05,
553
+ "loss": 0.0457,
554
  "step": 91
555
  },
556
  {
557
  "epoch": 0.7,
558
+ "learning_rate": 1.5740259740259742e-05,
559
+ "loss": 0.0383,
560
  "step": 92
561
  },
562
  {
563
  "epoch": 0.71,
564
+ "learning_rate": 1.568831168831169e-05,
565
+ "loss": 0.0686,
566
  "step": 93
567
  },
568
  {
569
  "epoch": 0.72,
570
+ "learning_rate": 1.563636363636364e-05,
571
+ "loss": 0.1018,
572
  "step": 94
573
  },
574
  {
575
  "epoch": 0.73,
576
+ "learning_rate": 1.5584415584415587e-05,
577
+ "loss": 0.1044,
578
  "step": 95
579
  },
580
  {
581
  "epoch": 0.73,
582
+ "learning_rate": 1.5532467532467534e-05,
583
+ "loss": 0.0549,
584
  "step": 96
585
  },
586
  {
587
  "epoch": 0.74,
588
+ "learning_rate": 1.548051948051948e-05,
589
+ "loss": 0.0321,
590
  "step": 97
591
  },
592
  {
593
  "epoch": 0.75,
594
+ "learning_rate": 1.542857142857143e-05,
595
+ "loss": 0.0248,
596
  "step": 98
597
  },
598
  {
599
  "epoch": 0.76,
600
+ "learning_rate": 1.537662337662338e-05,
601
+ "loss": 0.0508,
602
  "step": 99
603
  },
604
  {
605
  "epoch": 0.76,
606
+ "learning_rate": 1.5324675324675326e-05,
607
+ "loss": 0.0279,
608
  "step": 100
609
  },
610
  {
611
  "epoch": 0.77,
612
+ "learning_rate": 1.5272727272727276e-05,
613
+ "loss": 0.0499,
614
  "step": 101
615
  },
616
  {
617
  "epoch": 0.78,
618
+ "learning_rate": 1.5220779220779223e-05,
619
+ "loss": 0.035,
620
  "step": 102
621
  },
622
  {
623
  "epoch": 0.79,
624
+ "learning_rate": 1.516883116883117e-05,
625
+ "loss": 0.0767,
626
  "step": 103
627
  },
628
  {
629
  "epoch": 0.79,
630
+ "learning_rate": 1.511688311688312e-05,
631
+ "loss": 0.0712,
632
  "step": 104
633
  },
634
  {
635
  "epoch": 0.8,
636
+ "learning_rate": 1.5064935064935066e-05,
637
+ "loss": 0.0458,
638
  "step": 105
639
  },
640
  {
641
  "epoch": 0.81,
642
+ "learning_rate": 1.5012987012987015e-05,
643
+ "loss": 0.0362,
644
  "step": 106
645
  },
646
  {
647
  "epoch": 0.82,
648
+ "learning_rate": 1.4961038961038962e-05,
649
+ "loss": 0.0651,
650
  "step": 107
651
  },
652
  {
653
  "epoch": 0.82,
654
+ "learning_rate": 1.4909090909090911e-05,
655
+ "loss": 0.0447,
656
  "step": 108
657
  },
658
  {
659
  "epoch": 0.83,
660
+ "learning_rate": 1.4857142857142858e-05,
661
+ "loss": 0.1035,
662
  "step": 109
663
  },
664
  {
665
  "epoch": 0.84,
666
+ "learning_rate": 1.4805194805194807e-05,
667
+ "loss": 0.0608,
668
  "step": 110
669
  },
670
  {
671
  "epoch": 0.85,
672
+ "learning_rate": 1.4753246753246754e-05,
673
+ "loss": 0.0407,
674
  "step": 111
675
  },
676
  {
677
  "epoch": 0.85,
678
+ "learning_rate": 1.4701298701298703e-05,
679
+ "loss": 0.0183,
680
  "step": 112
681
  },
682
  {
683
  "epoch": 0.86,
684
+ "learning_rate": 1.464935064935065e-05,
685
+ "loss": 0.0281,
686
  "step": 113
687
  },
688
  {
689
  "epoch": 0.87,
690
+ "learning_rate": 1.45974025974026e-05,
691
+ "loss": 0.0289,
692
  "step": 114
693
  },
694
  {
695
  "epoch": 0.88,
696
+ "learning_rate": 1.4545454545454546e-05,
697
+ "loss": 0.0725,
698
  "step": 115
699
  },
700
  {
701
  "epoch": 0.89,
702
+ "learning_rate": 1.4493506493506495e-05,
703
+ "loss": 0.0287,
704
  "step": 116
705
  },
706
  {
707
  "epoch": 0.89,
708
+ "learning_rate": 1.4441558441558442e-05,
709
+ "loss": 0.018,
710
+ "step": 117
711
  },
712
  {
713
  "epoch": 0.9,
714
+ "learning_rate": 1.4389610389610391e-05,
715
+ "loss": 0.0227,
716
  "step": 118
717
  },
718
  {
719
  "epoch": 0.91,
720
+ "learning_rate": 1.433766233766234e-05,
721
+ "loss": 0.0315,
722
  "step": 119
723
  },
724
  {
725
  "epoch": 0.92,
726
+ "learning_rate": 1.4285714285714287e-05,
727
+ "loss": 0.1159,
728
  "step": 120
729
  },
730
  {
731
  "epoch": 0.92,
732
+ "learning_rate": 1.4233766233766236e-05,
733
+ "loss": 0.0419,
734
  "step": 121
735
  },
736
  {
737
  "epoch": 0.93,
738
+ "learning_rate": 1.4181818181818183e-05,
739
+ "loss": 0.0668,
740
  "step": 122
741
  },
742
  {
743
  "epoch": 0.94,
744
+ "learning_rate": 1.4129870129870132e-05,
745
+ "loss": 0.0357,
746
  "step": 123
747
  },
748
  {
749
  "epoch": 0.95,
750
+ "learning_rate": 1.4077922077922079e-05,
751
+ "loss": 0.0288,
752
  "step": 124
753
  },
754
  {
755
  "epoch": 0.95,
756
+ "learning_rate": 1.4025974025974028e-05,
757
+ "loss": 0.0135,
758
  "step": 125
759
  },
760
  {
761
  "epoch": 0.96,
762
+ "learning_rate": 1.3974025974025975e-05,
763
+ "loss": 0.0308,
764
  "step": 126
765
  },
766
  {
767
  "epoch": 0.97,
768
+ "learning_rate": 1.3922077922077924e-05,
769
+ "loss": 0.025,
770
  "step": 127
771
  },
772
  {
773
  "epoch": 0.98,
774
+ "learning_rate": 1.3870129870129871e-05,
775
+ "loss": 0.0375,
776
  "step": 128
777
  },
778
  {
779
  "epoch": 0.98,
780
+ "learning_rate": 1.381818181818182e-05,
781
+ "loss": 0.0182,
782
  "step": 129
783
  },
784
  {
785
  "epoch": 0.99,
786
+ "learning_rate": 1.3766233766233767e-05,
787
+ "loss": 0.0417,
788
  "step": 130
789
  },
790
  {
791
  "epoch": 1.0,
792
+ "learning_rate": 1.3714285714285716e-05,
793
+ "loss": 0.0094,
794
  "step": 131
795
  },
796
  {
797
  "epoch": 1.0,
798
+ "eval_loss": 0.034164465963840485,
799
+ "eval_pearson": 0.9208741871943605,
800
+ "eval_runtime": 5.5147,
801
+ "eval_samples_per_second": 42.069,
802
+ "eval_spearmanr": 0.8739346623789532,
803
+ "eval_steps_per_second": 2.72,
804
  "step": 131
805
  },
806
  {
807
  "epoch": 1.01,
808
+ "learning_rate": 1.3662337662337663e-05,
809
+ "loss": 0.0628,
810
  "step": 132
811
  },
812
  {
813
  "epoch": 1.02,
814
+ "learning_rate": 1.3610389610389612e-05,
815
+ "loss": 0.0295,
816
  "step": 133
817
  },
818
  {
819
  "epoch": 1.02,
820
+ "learning_rate": 1.3558441558441559e-05,
821
+ "loss": 0.0132,
822
  "step": 134
823
  },
824
  {
825
  "epoch": 1.03,
826
+ "learning_rate": 1.3506493506493508e-05,
827
+ "loss": 0.0651,
828
  "step": 135
829
  },
830
  {
831
  "epoch": 1.04,
832
+ "learning_rate": 1.3454545454545455e-05,
833
+ "loss": 0.0453,
834
  "step": 136
835
  },
836
  {
837
  "epoch": 1.05,
838
+ "learning_rate": 1.3402597402597404e-05,
839
+ "loss": 0.02,
840
  "step": 137
841
  },
842
  {
843
  "epoch": 1.05,
844
+ "learning_rate": 1.3350649350649351e-05,
845
+ "loss": 0.088,
846
  "step": 138
847
  },
848
  {
849
  "epoch": 1.06,
850
+ "learning_rate": 1.32987012987013e-05,
851
+ "loss": 0.0337,
852
  "step": 139
853
  },
854
  {
855
  "epoch": 1.07,
856
+ "learning_rate": 1.3246753246753249e-05,
857
+ "loss": 0.03,
858
  "step": 140
859
  },
860
  {
861
  "epoch": 1.08,
862
+ "learning_rate": 1.3194805194805196e-05,
863
+ "loss": 0.0261,
864
  "step": 141
865
  },
866
  {
867
  "epoch": 1.08,
868
+ "learning_rate": 1.3142857142857145e-05,
869
+ "loss": 0.0245,
870
  "step": 142
871
  },
872
  {
873
  "epoch": 1.09,
874
+ "learning_rate": 1.3090909090909092e-05,
875
+ "loss": 0.0675,
876
  "step": 143
877
  },
878
  {
879
  "epoch": 1.1,
880
+ "learning_rate": 1.303896103896104e-05,
881
+ "loss": 0.1051,
882
  "step": 144
883
  },
884
  {
885
  "epoch": 1.11,
886
+ "learning_rate": 1.2987012987012988e-05,
887
+ "loss": 0.0985,
888
  "step": 145
889
  },
890
  {
891
  "epoch": 1.11,
892
+ "learning_rate": 1.2935064935064937e-05,
893
+ "loss": 0.077,
894
  "step": 146
895
  },
896
  {
897
  "epoch": 1.12,
898
+ "learning_rate": 1.2883116883116884e-05,
899
+ "loss": 0.0332,
900
  "step": 147
901
  },
902
  {
903
  "epoch": 1.13,
904
+ "learning_rate": 1.2831168831168832e-05,
905
+ "loss": 0.0324,
906
  "step": 148
907
  },
908
  {
909
  "epoch": 1.14,
910
+ "learning_rate": 1.277922077922078e-05,
911
+ "loss": 0.0265,
912
  "step": 149
913
  },
914
  {
915
  "epoch": 1.15,
916
+ "learning_rate": 1.2727272727272728e-05,
917
+ "loss": 0.0313,
918
  "step": 150
919
  },
920
  {
921
  "epoch": 1.15,
922
+ "learning_rate": 1.2675324675324676e-05,
923
+ "loss": 0.0368,
924
  "step": 151
925
  },
926
  {
927
  "epoch": 1.16,
928
+ "learning_rate": 1.2623376623376624e-05,
929
+ "loss": 0.0313,
930
  "step": 152
931
  },
932
  {
933
  "epoch": 1.17,
934
+ "learning_rate": 1.2571428571428572e-05,
935
+ "loss": 0.0388,
936
  "step": 153
937
  },
938
  {
939
  "epoch": 1.18,
940
+ "learning_rate": 1.251948051948052e-05,
941
+ "loss": 0.0474,
942
  "step": 154
943
  },
944
  {
945
  "epoch": 1.18,
946
+ "learning_rate": 1.2467532467532468e-05,
947
+ "loss": 0.0294,
948
  "step": 155
949
  },
950
  {
951
  "epoch": 1.19,
952
+ "learning_rate": 1.2415584415584416e-05,
953
+ "loss": 0.0156,
954
  "step": 156
955
  },
956
  {
957
  "epoch": 1.2,
958
+ "learning_rate": 1.2363636363636364e-05,
959
+ "loss": 0.0245,
960
  "step": 157
961
  },
962
  {
963
  "epoch": 1.21,
964
+ "learning_rate": 1.2311688311688312e-05,
965
+ "loss": 0.0066,
966
  "step": 158
967
  },
968
  {
969
  "epoch": 1.21,
970
+ "learning_rate": 1.2259740259740261e-05,
971
+ "loss": 0.0254,
972
  "step": 159
973
  },
974
  {
975
  "epoch": 1.22,
976
+ "learning_rate": 1.2207792207792208e-05,
977
+ "loss": 0.0229,
978
  "step": 160
979
  },
980
  {
981
  "epoch": 1.23,
982
+ "learning_rate": 1.2155844155844157e-05,
983
+ "loss": 0.0169,
984
  "step": 161
985
  },
986
  {
987
  "epoch": 1.24,
988
+ "learning_rate": 1.2103896103896104e-05,
989
+ "loss": 0.0355,
990
  "step": 162
991
  },
992
  {
993
  "epoch": 1.24,
994
+ "learning_rate": 1.2051948051948053e-05,
995
+ "loss": 0.0137,
996
  "step": 163
997
  },
998
  {
999
  "epoch": 1.25,
1000
+ "learning_rate": 1.2e-05,
1001
+ "loss": 0.0271,
1002
  "step": 164
1003
  },
1004
  {
1005
  "epoch": 1.26,
1006
+ "learning_rate": 1.1948051948051949e-05,
1007
+ "loss": 0.021,
1008
  "step": 165
1009
  },
1010
  {
1011
  "epoch": 1.27,
1012
+ "learning_rate": 1.1896103896103896e-05,
1013
+ "loss": 0.0214,
1014
  "step": 166
1015
  },
1016
  {
1017
  "epoch": 1.27,
1018
+ "learning_rate": 1.1844155844155845e-05,
1019
+ "loss": 0.0313,
1020
  "step": 167
1021
  },
1022
  {
1023
  "epoch": 1.28,
1024
+ "learning_rate": 1.1792207792207792e-05,
1025
+ "loss": 0.0335,
1026
  "step": 168
1027
  },
1028
  {
1029
  "epoch": 1.29,
1030
+ "learning_rate": 1.1740259740259741e-05,
1031
+ "loss": 0.0169,
1032
  "step": 169
1033
  },
1034
  {
1035
  "epoch": 1.3,
1036
+ "learning_rate": 1.1688311688311688e-05,
1037
+ "loss": 0.0613,
1038
  "step": 170
1039
  },
1040
  {
1041
  "epoch": 1.31,
1042
+ "learning_rate": 1.1636363636363637e-05,
1043
+ "loss": 0.0141,
1044
  "step": 171
1045
  },
1046
  {
1047
  "epoch": 1.31,
1048
+ "learning_rate": 1.1584415584415584e-05,
1049
+ "loss": 0.0126,
1050
  "step": 172
1051
  },
1052
  {
1053
  "epoch": 1.32,
1054
+ "learning_rate": 1.1532467532467533e-05,
1055
+ "loss": 0.0161,
1056
  "step": 173
1057
  },
1058
  {
1059
  "epoch": 1.33,
1060
+ "learning_rate": 1.148051948051948e-05,
1061
+ "loss": 0.0192,
1062
  "step": 174
1063
  },
1064
  {
1065
  "epoch": 1.34,
1066
+ "learning_rate": 1.1428571428571429e-05,
1067
+ "loss": 0.016,
1068
  "step": 175
1069
  },
1070
  {
1071
  "epoch": 1.34,
1072
+ "learning_rate": 1.1376623376623376e-05,
1073
+ "loss": 0.0169,
1074
  "step": 176
1075
  },
1076
  {
1077
  "epoch": 1.35,
1078
+ "learning_rate": 1.1324675324675325e-05,
1079
+ "loss": 0.0201,
1080
  "step": 177
1081
  },
1082
  {
1083
  "epoch": 1.36,
1084
+ "learning_rate": 1.1272727272727272e-05,
1085
+ "loss": 0.0141,
1086
  "step": 178
1087
  },
1088
  {
1089
  "epoch": 1.37,
1090
+ "learning_rate": 1.1220779220779221e-05,
1091
+ "loss": 0.0122,
1092
  "step": 179
1093
  },
1094
  {
1095
  "epoch": 1.37,
1096
+ "learning_rate": 1.116883116883117e-05,
1097
+ "loss": 0.0226,
1098
  "step": 180
1099
  },
1100
  {
1101
  "epoch": 1.38,
1102
+ "learning_rate": 1.1116883116883117e-05,
1103
+ "loss": 0.026,
1104
  "step": 181
1105
  },
1106
  {
1107
  "epoch": 1.39,
1108
+ "learning_rate": 1.1064935064935066e-05,
1109
+ "loss": 0.0718,
1110
  "step": 182
1111
  },
1112
  {
1113
  "epoch": 1.4,
1114
+ "learning_rate": 1.1012987012987013e-05,
1115
+ "loss": 0.0199,
1116
  "step": 183
1117
  },
1118
  {
1119
  "epoch": 1.4,
1120
+ "learning_rate": 1.0961038961038962e-05,
1121
+ "loss": 0.0222,
1122
  "step": 184
1123
  },
1124
  {
1125
  "epoch": 1.41,
1126
+ "learning_rate": 1.0909090909090909e-05,
1127
+ "loss": 0.0155,
1128
  "step": 185
1129
  },
1130
  {
1131
  "epoch": 1.42,
1132
+ "learning_rate": 1.0857142857142858e-05,
1133
+ "loss": 0.0244,
1134
  "step": 186
1135
  },
1136
  {
1137
  "epoch": 1.43,
1138
+ "learning_rate": 1.0805194805194805e-05,
1139
+ "loss": 0.0191,
1140
  "step": 187
1141
  },
1142
  {
1143
  "epoch": 1.44,
1144
+ "learning_rate": 1.0753246753246754e-05,
1145
+ "loss": 0.0319,
1146
  "step": 188
1147
  },
1148
  {
1149
  "epoch": 1.44,
1150
+ "learning_rate": 1.0701298701298701e-05,
1151
+ "loss": 0.0144,
1152
  "step": 189
1153
  },
1154
  {
1155
  "epoch": 1.45,
1156
+ "learning_rate": 1.064935064935065e-05,
1157
+ "loss": 0.022,
1158
  "step": 190
1159
  },
1160
  {
1161
  "epoch": 1.46,
1162
+ "learning_rate": 1.0597402597402597e-05,
1163
+ "loss": 0.0118,
1164
  "step": 191
1165
  },
1166
  {
1167
  "epoch": 1.47,
1168
+ "learning_rate": 1.0545454545454546e-05,
1169
+ "loss": 0.0228,
1170
  "step": 192
1171
  },
1172
  {
1173
  "epoch": 1.47,
1174
+ "learning_rate": 1.0493506493506493e-05,
1175
+ "loss": 0.0329,
1176
  "step": 193
1177
  },
1178
  {
1179
  "epoch": 1.48,
1180
+ "learning_rate": 1.0441558441558442e-05,
1181
+ "loss": 0.0401,
1182
  "step": 194
1183
  },
1184
  {
1185
  "epoch": 1.49,
1186
+ "learning_rate": 1.0389610389610389e-05,
1187
+ "loss": 0.044,
1188
  "step": 195
1189
  },
1190
  {
1191
  "epoch": 1.5,
1192
+ "learning_rate": 1.0337662337662338e-05,
1193
+ "loss": 0.0226,
1194
  "step": 196
1195
  },
1196
  {
1197
  "epoch": 1.5,
1198
+ "learning_rate": 1.0285714285714285e-05,
1199
+ "loss": 0.0236,
1200
  "step": 197
1201
  },
1202
  {
1203
  "epoch": 1.51,
1204
+ "learning_rate": 1.0233766233766234e-05,
1205
+ "loss": 0.0348,
1206
  "step": 198
1207
  },
1208
  {
1209
  "epoch": 1.52,
1210
+ "learning_rate": 1.0181818181818182e-05,
1211
+ "loss": 0.0614,
1212
  "step": 199
1213
  },
1214
  {
1215
  "epoch": 1.53,
1216
+ "learning_rate": 1.012987012987013e-05,
1217
+ "loss": 0.0188,
1218
  "step": 200
1219
  },
1220
  {
1221
  "epoch": 1.53,
1222
+ "learning_rate": 1.0077922077922078e-05,
1223
+ "loss": 0.0231,
1224
  "step": 201
1225
  },
1226
  {
1227
  "epoch": 1.54,
1228
+ "learning_rate": 1.0025974025974026e-05,
1229
+ "loss": 0.0584,
1230
  "step": 202
1231
  },
1232
  {
1233
  "epoch": 1.55,
1234
+ "learning_rate": 9.974025974025974e-06,
1235
+ "loss": 0.0106,
1236
  "step": 203
1237
  },
1238
  {
1239
  "epoch": 1.56,
1240
+ "learning_rate": 9.922077922077923e-06,
1241
+ "loss": 0.0251,
1242
  "step": 204
1243
  },
1244
  {
1245
  "epoch": 1.56,
1246
+ "learning_rate": 9.87012987012987e-06,
1247
+ "loss": 0.0555,
1248
  "step": 205
1249
  },
1250
  {
1251
  "epoch": 1.57,
1252
+ "learning_rate": 9.81818181818182e-06,
1253
+ "loss": 0.0745,
1254
  "step": 206
1255
  },
1256
  {
1257
  "epoch": 1.58,
1258
+ "learning_rate": 9.766233766233766e-06,
1259
+ "loss": 0.0586,
1260
  "step": 207
1261
  },
1262
  {
1263
  "epoch": 1.59,
1264
+ "learning_rate": 9.714285714285715e-06,
1265
+ "loss": 0.0738,
1266
  "step": 208
1267
  },
1268
  {
1269
  "epoch": 1.6,
1270
+ "learning_rate": 9.662337662337662e-06,
1271
+ "loss": 0.0209,
1272
  "step": 209
1273
  },
1274
  {
1275
  "epoch": 1.6,
1276
+ "learning_rate": 9.610389610389611e-06,
1277
+ "loss": 0.0472,
1278
  "step": 210
1279
  },
1280
  {
1281
  "epoch": 1.61,
1282
+ "learning_rate": 9.558441558441558e-06,
1283
+ "loss": 0.0182,
1284
  "step": 211
1285
  },
1286
  {
1287
  "epoch": 1.62,
1288
+ "learning_rate": 9.506493506493507e-06,
1289
+ "loss": 0.0532,
1290
  "step": 212
1291
  },
1292
  {
1293
  "epoch": 1.63,
1294
+ "learning_rate": 9.454545454545456e-06,
1295
+ "loss": 0.0269,
1296
  "step": 213
1297
  },
1298
  {
1299
  "epoch": 1.63,
1300
+ "learning_rate": 9.402597402597403e-06,
1301
+ "loss": 0.014,
1302
  "step": 214
1303
  },
1304
  {
1305
  "epoch": 1.64,
1306
+ "learning_rate": 9.350649350649352e-06,
1307
+ "loss": 0.0248,
1308
  "step": 215
1309
  },
1310
  {
1311
  "epoch": 1.65,
1312
+ "learning_rate": 9.298701298701299e-06,
1313
+ "loss": 0.0237,
1314
  "step": 216
1315
  },
1316
  {
1317
  "epoch": 1.66,
1318
+ "learning_rate": 9.246753246753248e-06,
1319
+ "loss": 0.0132,
1320
  "step": 217
1321
  },
1322
  {
1323
  "epoch": 1.66,
1324
+ "learning_rate": 9.194805194805195e-06,
1325
+ "loss": 0.0124,
1326
  "step": 218
1327
  },
1328
  {
1329
  "epoch": 1.67,
1330
+ "learning_rate": 9.142857142857144e-06,
1331
+ "loss": 0.0197,
1332
  "step": 219
1333
  },
1334
  {
1335
  "epoch": 1.68,
1336
+ "learning_rate": 9.090909090909091e-06,
1337
+ "loss": 0.0088,
1338
  "step": 220
1339
  },
1340
  {
1341
  "epoch": 1.69,
1342
+ "learning_rate": 9.03896103896104e-06,
1343
+ "loss": 0.011,
1344
  "step": 221
1345
  },
1346
  {
1347
  "epoch": 1.69,
1348
+ "learning_rate": 8.987012987012987e-06,
1349
+ "loss": 0.0072,
1350
  "step": 222
1351
  },
1352
  {
1353
  "epoch": 1.7,
1354
+ "learning_rate": 8.935064935064936e-06,
1355
+ "loss": 0.0129,
1356
  "step": 223
1357
  },
1358
  {
1359
  "epoch": 1.71,
1360
+ "learning_rate": 8.883116883116883e-06,
1361
+ "loss": 0.0103,
1362
  "step": 224
1363
  },
1364
  {
1365
  "epoch": 1.72,
1366
+ "learning_rate": 8.831168831168832e-06,
1367
+ "loss": 0.0273,
1368
  "step": 225
1369
  },
1370
  {
1371
  "epoch": 1.73,
1372
+ "learning_rate": 8.779220779220779e-06,
1373
+ "loss": 0.0081,
1374
  "step": 226
1375
  },
1376
  {
1377
  "epoch": 1.73,
1378
+ "learning_rate": 8.727272727272728e-06,
1379
+ "loss": 0.0519,
1380
  "step": 227
1381
  },
1382
  {
1383
  "epoch": 1.74,
1384
+ "learning_rate": 8.675324675324675e-06,
1385
+ "loss": 0.053,
1386
  "step": 228
1387
  },
1388
  {
1389
  "epoch": 1.75,
1390
+ "learning_rate": 8.623376623376624e-06,
1391
+ "loss": 0.0055,
1392
  "step": 229
1393
  },
1394
  {
1395
  "epoch": 1.76,
1396
+ "learning_rate": 8.571428571428571e-06,
1397
+ "loss": 0.0244,
1398
  "step": 230
1399
  },
1400
  {
1401
  "epoch": 1.76,
1402
+ "learning_rate": 8.51948051948052e-06,
1403
  "loss": 0.0514,
1404
  "step": 231
1405
  },
1406
  {
1407
  "epoch": 1.77,
1408
+ "learning_rate": 8.467532467532467e-06,
1409
+ "loss": 0.013,
1410
  "step": 232
1411
  },
1412
  {
1413
  "epoch": 1.78,
1414
+ "learning_rate": 8.415584415584416e-06,
1415
+ "loss": 0.0054,
1416
  "step": 233
1417
  },
1418
  {
1419
  "epoch": 1.79,
1420
+ "learning_rate": 8.363636363636365e-06,
1421
+ "loss": 0.0235,
1422
  "step": 234
1423
  },
1424
  {
1425
  "epoch": 1.79,
1426
+ "learning_rate": 8.311688311688313e-06,
1427
+ "loss": 0.0177,
1428
  "step": 235
1429
  },
1430
  {
1431
  "epoch": 1.8,
1432
+ "learning_rate": 8.25974025974026e-06,
1433
+ "loss": 0.047,
1434
  "step": 236
1435
  },
1436
  {
1437
  "epoch": 1.81,
1438
+ "learning_rate": 8.20779220779221e-06,
1439
+ "loss": 0.0328,
1440
  "step": 237
1441
  },
1442
  {
1443
  "epoch": 1.82,
1444
+ "learning_rate": 8.155844155844157e-06,
1445
+ "loss": 0.0046,
1446
  "step": 238
1447
  },
1448
  {
1449
  "epoch": 1.82,
1450
+ "learning_rate": 8.103896103896105e-06,
1451
+ "loss": 0.0106,
1452
  "step": 239
1453
  },
1454
  {
1455
  "epoch": 1.83,
1456
+ "learning_rate": 8.051948051948052e-06,
1457
+ "loss": 0.0081,
1458
  "step": 240
1459
  },
1460
  {
1461
  "epoch": 1.84,
1462
+ "learning_rate": 8.000000000000001e-06,
1463
+ "loss": 0.0077,
1464
  "step": 241
1465
  },
1466
  {
1467
  "epoch": 1.85,
1468
+ "learning_rate": 7.948051948051948e-06,
1469
+ "loss": 0.0106,
1470
  "step": 242
1471
  },
1472
  {
1473
  "epoch": 1.85,
1474
+ "learning_rate": 7.896103896103897e-06,
1475
+ "loss": 0.0135,
1476
  "step": 243
1477
  },
1478
  {
1479
  "epoch": 1.86,
1480
+ "learning_rate": 7.844155844155844e-06,
1481
+ "loss": 0.0066,
1482
  "step": 244
1483
  },
1484
  {
1485
  "epoch": 1.87,
1486
+ "learning_rate": 7.792207792207793e-06,
1487
+ "loss": 0.0255,
1488
  "step": 245
1489
  },
1490
  {
1491
  "epoch": 1.88,
1492
+ "learning_rate": 7.74025974025974e-06,
1493
+ "loss": 0.0126,
1494
  "step": 246
1495
  },
1496
  {
1497
  "epoch": 1.89,
1498
+ "learning_rate": 7.68831168831169e-06,
1499
+ "loss": 0.0245,
1500
  "step": 247
1501
  },
1502
  {
1503
  "epoch": 1.89,
1504
+ "learning_rate": 7.636363636363638e-06,
1505
+ "loss": 0.0209,
1506
  "step": 248
1507
  },
1508
  {
1509
  "epoch": 1.9,
1510
+ "learning_rate": 7.584415584415585e-06,
1511
+ "loss": 0.0183,
1512
  "step": 249
1513
  },
1514
  {
1515
  "epoch": 1.91,
1516
+ "learning_rate": 7.532467532467533e-06,
1517
+ "loss": 0.0165,
1518
  "step": 250
1519
  },
1520
  {
1521
  "epoch": 1.92,
1522
+ "learning_rate": 7.480519480519481e-06,
1523
+ "loss": 0.0106,
1524
  "step": 251
1525
  },
1526
  {
1527
  "epoch": 1.92,
1528
+ "learning_rate": 7.428571428571429e-06,
1529
+ "loss": 0.0091,
1530
  "step": 252
1531
  },
1532
  {
1533
  "epoch": 1.93,
1534
+ "learning_rate": 7.376623376623377e-06,
1535
+ "loss": 0.0169,
1536
  "step": 253
1537
  },
1538
  {
1539
  "epoch": 1.94,
1540
+ "learning_rate": 7.324675324675325e-06,
1541
+ "loss": 0.0079,
1542
  "step": 254
1543
  },
1544
  {
1545
  "epoch": 1.95,
1546
+ "learning_rate": 7.272727272727273e-06,
1547
+ "loss": 0.0328,
1548
  "step": 255
1549
  },
1550
  {
1551
  "epoch": 1.95,
1552
+ "learning_rate": 7.220779220779221e-06,
1553
+ "loss": 0.0202,
1554
  "step": 256
1555
  },
1556
  {
1557
  "epoch": 1.96,
1558
+ "learning_rate": 7.16883116883117e-06,
1559
+ "loss": 0.0233,
1560
  "step": 257
1561
  },
1562
  {
1563
  "epoch": 1.97,
1564
+ "learning_rate": 7.116883116883118e-06,
1565
+ "loss": 0.0179,
1566
  "step": 258
1567
  },
1568
  {
1569
  "epoch": 1.98,
1570
+ "learning_rate": 7.064935064935066e-06,
1571
+ "loss": 0.0271,
1572
  "step": 259
1573
  },
1574
  {
1575
  "epoch": 1.98,
1576
+ "learning_rate": 7.012987012987014e-06,
1577
+ "loss": 0.0209,
1578
  "step": 260
1579
  },
1580
  {
1581
  "epoch": 1.99,
1582
+ "learning_rate": 6.961038961038962e-06,
1583
+ "loss": 0.0225,
1584
  "step": 261
1585
  },
1586
  {
1587
  "epoch": 2.0,
1588
+ "learning_rate": 6.90909090909091e-06,
1589
+ "loss": 0.0091,
1590
  "step": 262
1591
  },
1592
  {
1593
  "epoch": 2.0,
1594
+ "eval_loss": 0.015746144577860832,
1595
+ "eval_pearson": 0.9584851350897383,
1596
+ "eval_runtime": 5.2391,
1597
+ "eval_samples_per_second": 44.282,
1598
+ "eval_spearmanr": 0.9039676698999922,
1599
+ "eval_steps_per_second": 2.863,
1600
  "step": 262
1601
  },
1602
  {
1603
  "epoch": 2.01,
1604
+ "learning_rate": 6.857142857142858e-06,
1605
+ "loss": 0.0102,
1606
  "step": 263
1607
  },
1608
  {
1609
  "epoch": 2.02,
1610
+ "learning_rate": 6.805194805194806e-06,
1611
+ "loss": 0.0135,
1612
  "step": 264
1613
  },
1614
  {
1615
  "epoch": 2.02,
1616
+ "learning_rate": 6.753246753246754e-06,
1617
+ "loss": 0.0083,
1618
  "step": 265
1619
  },
1620
  {
1621
  "epoch": 2.03,
1622
+ "learning_rate": 6.701298701298702e-06,
1623
+ "loss": 0.0214,
1624
  "step": 266
1625
  },
1626
  {
1627
  "epoch": 2.04,
1628
+ "learning_rate": 6.64935064935065e-06,
1629
+ "loss": 0.0156,
1630
  "step": 267
1631
  },
1632
  {
1633
  "epoch": 2.05,
1634
+ "learning_rate": 6.597402597402598e-06,
1635
+ "loss": 0.0095,
1636
  "step": 268
1637
  },
1638
  {
1639
  "epoch": 2.05,
1640
+ "learning_rate": 6.545454545454546e-06,
1641
+ "loss": 0.0126,
1642
  "step": 269
1643
  },
1644
  {
1645
  "epoch": 2.06,
1646
+ "learning_rate": 6.493506493506494e-06,
1647
+ "loss": 0.0025,
1648
  "step": 270
1649
  },
1650
  {
1651
  "epoch": 2.07,
1652
+ "learning_rate": 6.441558441558442e-06,
1653
+ "loss": 0.015,
1654
  "step": 271
1655
  },
1656
  {
1657
  "epoch": 2.08,
1658
+ "learning_rate": 6.38961038961039e-06,
1659
+ "loss": 0.0108,
1660
  "step": 272
1661
  },
1662
  {
1663
  "epoch": 2.08,
1664
+ "learning_rate": 6.337662337662338e-06,
1665
+ "loss": 0.0308,
1666
  "step": 273
1667
  },
1668
  {
1669
  "epoch": 2.09,
1670
+ "learning_rate": 6.285714285714286e-06,
1671
+ "loss": 0.0269,
1672
  "step": 274
1673
  },
1674
  {
1675
  "epoch": 2.1,
1676
+ "learning_rate": 6.233766233766234e-06,
1677
+ "loss": 0.0135,
1678
  "step": 275
1679
  },
1680
  {
1681
  "epoch": 2.11,
1682
+ "learning_rate": 6.181818181818182e-06,
1683
+ "loss": 0.0107,
1684
  "step": 276
1685
  },
1686
  {
1687
  "epoch": 2.11,
1688
+ "learning_rate": 6.129870129870131e-06,
1689
+ "loss": 0.0073,
1690
  "step": 277
1691
  },
1692
  {
1693
  "epoch": 2.12,
1694
+ "learning_rate": 6.077922077922079e-06,
1695
+ "loss": 0.0232,
1696
  "step": 278
1697
  },
1698
  {
1699
  "epoch": 2.13,
1700
+ "learning_rate": 6.025974025974027e-06,
1701
+ "loss": 0.0156,
1702
  "step": 279
1703
  },
1704
  {
1705
  "epoch": 2.14,
1706
+ "learning_rate": 5.9740259740259746e-06,
1707
+ "loss": 0.0091,
1708
  "step": 280
1709
  },
1710
  {
1711
  "epoch": 2.15,
1712
+ "learning_rate": 5.9220779220779226e-06,
1713
+ "loss": 0.025,
1714
  "step": 281
1715
  },
1716
  {
1717
  "epoch": 2.15,
1718
+ "learning_rate": 5.8701298701298705e-06,
1719
+ "loss": 0.0034,
1720
  "step": 282
1721
  },
1722
  {
1723
  "epoch": 2.16,
1724
+ "learning_rate": 5.8181818181818185e-06,
1725
+ "loss": 0.0122,
1726
  "step": 283
1727
  },
1728
  {
1729
  "epoch": 2.17,
1730
+ "learning_rate": 5.7662337662337665e-06,
1731
+ "loss": 0.0182,
1732
  "step": 284
1733
  },
1734
  {
1735
  "epoch": 2.18,
1736
+ "learning_rate": 5.7142857142857145e-06,
1737
+ "loss": 0.0044,
1738
  "step": 285
1739
  },
1740
  {
1741
  "epoch": 2.18,
1742
+ "learning_rate": 5.6623376623376625e-06,
1743
+ "loss": 0.0047,
1744
  "step": 286
1745
  },
1746
  {
1747
  "epoch": 2.19,
1748
+ "learning_rate": 5.6103896103896105e-06,
1749
+ "loss": 0.0155,
1750
  "step": 287
1751
  },
1752
  {
1753
  "epoch": 2.2,
1754
+ "learning_rate": 5.5584415584415585e-06,
1755
+ "loss": 0.0069,
1756
  "step": 288
1757
  },
1758
  {
1759
  "epoch": 2.21,
1760
+ "learning_rate": 5.5064935064935065e-06,
1761
+ "loss": 0.0102,
1762
  "step": 289
1763
  },
1764
  {
1765
  "epoch": 2.21,
1766
+ "learning_rate": 5.4545454545454545e-06,
1767
+ "loss": 0.0085,
1768
  "step": 290
1769
  },
1770
  {
1771
  "epoch": 2.22,
1772
+ "learning_rate": 5.4025974025974024e-06,
1773
+ "loss": 0.0066,
1774
  "step": 291
1775
  },
1776
  {
1777
  "epoch": 2.23,
1778
+ "learning_rate": 5.3506493506493504e-06,
1779
+ "loss": 0.0082,
1780
  "step": 292
1781
  },
1782
  {
1783
  "epoch": 2.24,
1784
+ "learning_rate": 5.298701298701298e-06,
1785
+ "loss": 0.0121,
1786
  "step": 293
1787
  },
1788
  {
1789
  "epoch": 2.24,
1790
+ "learning_rate": 5.246753246753246e-06,
1791
+ "loss": 0.0096,
1792
  "step": 294
1793
  },
1794
  {
1795
  "epoch": 2.25,
1796
+ "learning_rate": 5.194805194805194e-06,
1797
+ "loss": 0.0088,
1798
  "step": 295
1799
  },
1800
  {
1801
  "epoch": 2.26,
1802
+ "learning_rate": 5.142857142857142e-06,
1803
+ "loss": 0.0172,
1804
  "step": 296
1805
  },
1806
  {
1807
  "epoch": 2.27,
1808
+ "learning_rate": 5.090909090909091e-06,
1809
+ "loss": 0.0073,
1810
  "step": 297
1811
  },
1812
  {
1813
  "epoch": 2.27,
1814
+ "learning_rate": 5.038961038961039e-06,
1815
+ "loss": 0.0125,
1816
  "step": 298
1817
  },
1818
  {
1819
  "epoch": 2.28,
1820
+ "learning_rate": 4.987012987012987e-06,
1821
+ "loss": 0.0631,
1822
  "step": 299
1823
  },
1824
  {
1825
  "epoch": 2.29,
1826
+ "learning_rate": 4.935064935064935e-06,
1827
+ "loss": 0.0036,
1828
  "step": 300
1829
  },
1830
  {
1831
  "epoch": 2.3,
1832
+ "learning_rate": 4.883116883116883e-06,
1833
+ "loss": 0.0085,
1834
  "step": 301
1835
  },
1836
  {
1837
  "epoch": 2.31,
1838
+ "learning_rate": 4.831168831168831e-06,
1839
+ "loss": 0.0224,
1840
  "step": 302
1841
  },
1842
  {
1843
  "epoch": 2.31,
1844
+ "learning_rate": 4.779220779220779e-06,
1845
+ "loss": 0.0073,
1846
  "step": 303
1847
  },
1848
  {
1849
  "epoch": 2.32,
1850
+ "learning_rate": 4.727272727272728e-06,
1851
+ "loss": 0.0054,
1852
  "step": 304
1853
  },
1854
  {
1855
  "epoch": 2.33,
1856
+ "learning_rate": 4.675324675324676e-06,
1857
+ "loss": 0.0076,
1858
  "step": 305
1859
  },
1860
  {
1861
  "epoch": 2.34,
1862
+ "learning_rate": 4.623376623376624e-06,
1863
+ "loss": 0.0116,
1864
  "step": 306
1865
  },
1866
  {
1867
  "epoch": 2.34,
1868
+ "learning_rate": 4.571428571428572e-06,
1869
+ "loss": 0.0046,
1870
  "step": 307
1871
  },
1872
  {
1873
  "epoch": 2.35,
1874
+ "learning_rate": 4.51948051948052e-06,
1875
+ "loss": 0.01,
1876
  "step": 308
1877
  },
1878
  {
1879
  "epoch": 2.36,
1880
+ "learning_rate": 4.467532467532468e-06,
1881
+ "loss": 0.0062,
1882
  "step": 309
1883
  },
1884
  {
1885
  "epoch": 2.37,
1886
+ "learning_rate": 4.415584415584416e-06,
1887
+ "loss": 0.0298,
1888
  "step": 310
1889
  },
1890
  {
1891
  "epoch": 2.37,
1892
+ "learning_rate": 4.363636363636364e-06,
1893
+ "loss": 0.0045,
1894
  "step": 311
1895
  },
1896
  {
1897
  "epoch": 2.38,
1898
+ "learning_rate": 4.311688311688312e-06,
1899
+ "loss": 0.0107,
1900
  "step": 312
1901
  },
1902
  {
1903
  "epoch": 2.39,
1904
+ "learning_rate": 4.25974025974026e-06,
1905
+ "loss": 0.004,
1906
  "step": 313
1907
  },
1908
  {
1909
  "epoch": 2.4,
1910
+ "learning_rate": 4.207792207792208e-06,
1911
+ "loss": 0.0038,
1912
  "step": 314
1913
  },
1914
  {
1915
  "epoch": 2.4,
1916
+ "learning_rate": 4.155844155844157e-06,
1917
+ "loss": 0.0108,
1918
  "step": 315
1919
  },
1920
  {
1921
  "epoch": 2.41,
1922
+ "learning_rate": 4.103896103896105e-06,
1923
+ "loss": 0.006,
1924
  "step": 316
1925
  },
1926
  {
1927
  "epoch": 2.42,
1928
+ "learning_rate": 4.051948051948053e-06,
1929
+ "loss": 0.0094,
1930
  "step": 317
1931
  },
1932
  {
1933
  "epoch": 2.43,
1934
+ "learning_rate": 4.000000000000001e-06,
1935
+ "loss": 0.009,
1936
  "step": 318
1937
  },
1938
  {
1939
  "epoch": 2.44,
1940
+ "learning_rate": 3.948051948051949e-06,
1941
+ "loss": 0.008,
1942
  "step": 319
1943
  },
1944
  {
1945
  "epoch": 2.44,
1946
+ "learning_rate": 3.896103896103897e-06,
1947
+ "loss": 0.007,
1948
  "step": 320
1949
  },
1950
  {
1951
  "epoch": 2.45,
1952
+ "learning_rate": 3.844155844155845e-06,
1953
+ "loss": 0.0112,
1954
  "step": 321
1955
  },
1956
  {
1957
  "epoch": 2.46,
1958
+ "learning_rate": 3.7922077922077926e-06,
1959
+ "loss": 0.005,
1960
  "step": 322
1961
  },
1962
  {
1963
  "epoch": 2.47,
1964
+ "learning_rate": 3.7402597402597406e-06,
1965
+ "loss": 0.0039,
1966
  "step": 323
1967
  },
1968
  {
1969
  "epoch": 2.47,
1970
+ "learning_rate": 3.6883116883116886e-06,
1971
+ "loss": 0.0128,
1972
  "step": 324
1973
  },
1974
  {
1975
  "epoch": 2.48,
1976
+ "learning_rate": 3.6363636363636366e-06,
1977
+ "loss": 0.038,
1978
  "step": 325
1979
  },
1980
  {
1981
  "epoch": 2.49,
1982
+ "learning_rate": 3.584415584415585e-06,
1983
+ "loss": 0.0036,
1984
  "step": 326
1985
  },
1986
  {
1987
  "epoch": 2.5,
1988
+ "learning_rate": 3.532467532467533e-06,
1989
+ "loss": 0.0189,
1990
  "step": 327
1991
  },
1992
  {
1993
  "epoch": 2.5,
1994
+ "learning_rate": 3.480519480519481e-06,
1995
+ "loss": 0.0127,
1996
  "step": 328
1997
  },
1998
  {
1999
  "epoch": 2.51,
2000
+ "learning_rate": 3.428571428571429e-06,
2001
+ "loss": 0.0063,
2002
  "step": 329
2003
  },
2004
  {
2005
  "epoch": 2.52,
2006
+ "learning_rate": 3.376623376623377e-06,
2007
+ "loss": 0.0069,
2008
  "step": 330
2009
  },
2010
  {
2011
  "epoch": 2.53,
2012
+ "learning_rate": 3.324675324675325e-06,
2013
+ "loss": 0.0067,
2014
  "step": 331
2015
  },
2016
  {
2017
  "epoch": 2.53,
2018
+ "learning_rate": 3.272727272727273e-06,
2019
+ "loss": 0.0146,
2020
  "step": 332
2021
  },
2022
  {
2023
  "epoch": 2.54,
2024
+ "learning_rate": 3.220779220779221e-06,
2025
+ "loss": 0.0044,
2026
  "step": 333
2027
  },
2028
  {
2029
  "epoch": 2.55,
2030
+ "learning_rate": 3.168831168831169e-06,
2031
+ "loss": 0.0166,
2032
  "step": 334
2033
  },
2034
  {
2035
  "epoch": 2.56,
2036
+ "learning_rate": 3.116883116883117e-06,
2037
+ "loss": 0.0319,
2038
  "step": 335
2039
  },
2040
  {
2041
  "epoch": 2.56,
2042
+ "learning_rate": 3.0649350649350653e-06,
2043
+ "loss": 0.006,
2044
  "step": 336
2045
  },
2046
  {
2047
  "epoch": 2.57,
2048
+ "learning_rate": 3.0129870129870133e-06,
2049
+ "loss": 0.0332,
2050
  "step": 337
2051
  },
2052
  {
2053
  "epoch": 2.58,
2054
+ "learning_rate": 2.9610389610389613e-06,
2055
+ "loss": 0.0056,
2056
  "step": 338
2057
  },
2058
  {
2059
  "epoch": 2.59,
2060
+ "learning_rate": 2.9090909090909093e-06,
2061
+ "loss": 0.0041,
2062
  "step": 339
2063
  },
2064
  {
2065
  "epoch": 2.6,
2066
+ "learning_rate": 2.8571428571428573e-06,
2067
+ "loss": 0.0032,
2068
  "step": 340
2069
  },
2070
  {
2071
  "epoch": 2.6,
2072
+ "learning_rate": 2.8051948051948052e-06,
2073
+ "loss": 0.006,
2074
  "step": 341
2075
  },
2076
  {
2077
  "epoch": 2.61,
2078
+ "learning_rate": 2.7532467532467532e-06,
2079
+ "loss": 0.0063,
2080
  "step": 342
2081
  },
2082
  {
2083
  "epoch": 2.62,
2084
+ "learning_rate": 2.7012987012987012e-06,
2085
+ "loss": 0.0036,
2086
  "step": 343
2087
  },
2088
  {
2089
  "epoch": 2.63,
2090
+ "learning_rate": 2.649350649350649e-06,
2091
+ "loss": 0.0076,
2092
  "step": 344
2093
  },
2094
  {
2095
  "epoch": 2.63,
2096
+ "learning_rate": 2.597402597402597e-06,
2097
+ "loss": 0.0062,
2098
  "step": 345
2099
  },
2100
  {
2101
  "epoch": 2.64,
2102
+ "learning_rate": 2.5454545454545456e-06,
2103
+ "loss": 0.0081,
2104
  "step": 346
2105
  },
2106
  {
2107
  "epoch": 2.65,
2108
+ "learning_rate": 2.4935064935064936e-06,
2109
+ "loss": 0.0051,
2110
  "step": 347
2111
  },
2112
  {
2113
  "epoch": 2.66,
2114
+ "learning_rate": 2.4415584415584416e-06,
2115
+ "loss": 0.0088,
2116
  "step": 348
2117
  },
2118
  {
2119
  "epoch": 2.66,
2120
+ "learning_rate": 2.3896103896103896e-06,
2121
+ "loss": 0.0085,
2122
  "step": 349
2123
  },
2124
  {
2125
  "epoch": 2.67,
2126
+ "learning_rate": 2.337662337662338e-06,
2127
+ "loss": 0.0057,
2128
  "step": 350
2129
  },
2130
  {
2131
  "epoch": 2.68,
2132
+ "learning_rate": 2.285714285714286e-06,
2133
+ "loss": 0.004,
2134
  "step": 351
2135
  },
2136
  {
2137
  "epoch": 2.69,
2138
+ "learning_rate": 2.233766233766234e-06,
2139
+ "loss": 0.0042,
2140
  "step": 352
2141
  },
2142
  {
2143
  "epoch": 2.69,
2144
+ "learning_rate": 2.181818181818182e-06,
2145
+ "loss": 0.0065,
2146
  "step": 353
2147
  },
2148
  {
2149
  "epoch": 2.7,
2150
+ "learning_rate": 2.12987012987013e-06,
2151
+ "loss": 0.0067,
2152
  "step": 354
2153
  },
2154
  {
2155
  "epoch": 2.71,
2156
+ "learning_rate": 2.0779220779220784e-06,
2157
+ "loss": 0.0111,
2158
  "step": 355
2159
  },
2160
  {
2161
  "epoch": 2.72,
2162
+ "learning_rate": 2.0259740259740263e-06,
2163
+ "loss": 0.0161,
2164
  "step": 356
2165
  },
2166
  {
2167
  "epoch": 2.73,
2168
+ "learning_rate": 1.9740259740259743e-06,
2169
+ "loss": 0.0034,
2170
  "step": 357
2171
  },
2172
  {
2173
  "epoch": 2.73,
2174
+ "learning_rate": 1.9220779220779223e-06,
2175
+ "loss": 0.0048,
2176
  "step": 358
2177
  },
2178
  {
2179
  "epoch": 2.74,
2180
+ "learning_rate": 1.8701298701298703e-06,
2181
+ "loss": 0.0132,
2182
  "step": 359
2183
  },
2184
  {
2185
  "epoch": 2.75,
2186
+ "learning_rate": 1.8181818181818183e-06,
2187
+ "loss": 0.0039,
2188
  "step": 360
2189
  },
2190
  {
2191
  "epoch": 2.76,
2192
+ "learning_rate": 1.7662337662337665e-06,
2193
+ "loss": 0.0075,
2194
  "step": 361
2195
  },
2196
  {
2197
  "epoch": 2.76,
2198
+ "learning_rate": 1.7142857142857145e-06,
2199
+ "loss": 0.0043,
2200
  "step": 362
2201
  },
2202
  {
2203
  "epoch": 2.77,
2204
+ "learning_rate": 1.6623376623376625e-06,
2205
+ "loss": 0.0043,
2206
  "step": 363
2207
  },
2208
  {
2209
  "epoch": 2.78,
2210
+ "learning_rate": 1.6103896103896105e-06,
2211
+ "loss": 0.0071,
2212
  "step": 364
2213
  },
2214
  {
2215
  "epoch": 2.79,
2216
+ "learning_rate": 1.5584415584415584e-06,
2217
+ "loss": 0.0109,
2218
  "step": 365
2219
  },
2220
  {
2221
  "epoch": 2.79,
2222
+ "learning_rate": 1.5064935064935066e-06,
2223
+ "loss": 0.0093,
2224
  "step": 366
2225
  },
2226
  {
2227
  "epoch": 2.8,
2228
+ "learning_rate": 1.4545454545454546e-06,
2229
+ "loss": 0.0057,
2230
  "step": 367
2231
  },
2232
  {
2233
  "epoch": 2.81,
2234
+ "learning_rate": 1.4025974025974026e-06,
2235
+ "loss": 0.0141,
2236
  "step": 368
2237
  },
2238
  {
2239
  "epoch": 2.82,
2240
+ "learning_rate": 1.3506493506493506e-06,
2241
+ "loss": 0.0029,
2242
  "step": 369
2243
  },
2244
  {
2245
  "epoch": 2.82,
2246
+ "learning_rate": 1.2987012987012986e-06,
2247
+ "loss": 0.0031,
2248
  "step": 370
2249
  },
2250
  {
2251
  "epoch": 2.83,
2252
+ "learning_rate": 1.2467532467532468e-06,
2253
+ "loss": 0.0049,
2254
  "step": 371
2255
  },
2256
  {
2257
  "epoch": 2.84,
2258
+ "learning_rate": 1.1948051948051948e-06,
2259
+ "loss": 0.0037,
2260
  "step": 372
2261
  },
2262
  {
2263
  "epoch": 2.85,
2264
+ "learning_rate": 1.142857142857143e-06,
2265
+ "loss": 0.0108,
2266
  "step": 373
2267
  },
2268
  {
2269
  "epoch": 2.85,
2270
+ "learning_rate": 1.090909090909091e-06,
2271
+ "loss": 0.0045,
2272
  "step": 374
2273
  },
2274
  {
2275
  "epoch": 2.86,
2276
+ "learning_rate": 1.0389610389610392e-06,
2277
+ "loss": 0.0103,
2278
  "step": 375
2279
  },
2280
  {
2281
  "epoch": 2.87,
2282
+ "learning_rate": 9.870129870129872e-07,
2283
+ "loss": 0.0049,
2284
  "step": 376
2285
  },
2286
  {
2287
  "epoch": 2.88,
2288
+ "learning_rate": 9.350649350649352e-07,
2289
+ "loss": 0.0061,
2290
  "step": 377
2291
  },
2292
  {
2293
  "epoch": 2.89,
2294
+ "learning_rate": 8.831168831168832e-07,
2295
+ "loss": 0.015,
2296
  "step": 378
2297
  },
2298
  {
2299
  "epoch": 2.89,
2300
+ "learning_rate": 8.311688311688312e-07,
2301
+ "loss": 0.0071,
2302
  "step": 379
2303
  },
2304
  {
2305
  "epoch": 2.9,
2306
+ "learning_rate": 7.792207792207792e-07,
2307
+ "loss": 0.0058,
2308
  "step": 380
2309
  },
2310
  {
2311
  "epoch": 2.91,
2312
+ "learning_rate": 7.272727272727273e-07,
2313
+ "loss": 0.0052,
2314
  "step": 381
2315
  },
2316
  {
2317
  "epoch": 2.92,
2318
+ "learning_rate": 6.753246753246753e-07,
2319
+ "loss": 0.0064,
2320
  "step": 382
2321
  },
2322
  {
2323
  "epoch": 2.92,
2324
+ "learning_rate": 6.233766233766234e-07,
2325
+ "loss": 0.0029,
2326
  "step": 383
2327
  },
2328
  {
2329
  "epoch": 2.93,
2330
+ "learning_rate": 5.714285714285715e-07,
2331
+ "loss": 0.0053,
2332
  "step": 384
2333
  },
2334
  {
2335
  "epoch": 2.94,
2336
+ "learning_rate": 5.194805194805196e-07,
2337
+ "loss": 0.0046,
2338
  "step": 385
2339
  },
2340
  {
2341
  "epoch": 2.95,
2342
+ "learning_rate": 4.675324675324676e-07,
2343
+ "loss": 0.0195,
2344
  "step": 386
2345
  },
2346
  {
2347
  "epoch": 2.95,
2348
+ "learning_rate": 4.155844155844156e-07,
2349
+ "loss": 0.0087,
2350
  "step": 387
2351
  },
2352
  {
2353
  "epoch": 2.96,
2354
+ "learning_rate": 3.6363636363636366e-07,
2355
+ "loss": 0.0043,
2356
  "step": 388
2357
  },
2358
  {
2359
  "epoch": 2.97,
2360
+ "learning_rate": 3.116883116883117e-07,
2361
+ "loss": 0.0056,
2362
  "step": 389
2363
  },
2364
  {
2365
  "epoch": 2.98,
2366
+ "learning_rate": 2.597402597402598e-07,
2367
+ "loss": 0.0088,
2368
  "step": 390
2369
  },
2370
  {
2371
  "epoch": 2.98,
2372
+ "learning_rate": 2.077922077922078e-07,
2373
+ "loss": 0.0163,
2374
  "step": 391
2375
  },
2376
  {
2377
  "epoch": 2.99,
2378
+ "learning_rate": 1.5584415584415585e-07,
2379
+ "loss": 0.0047,
2380
  "step": 392
2381
  },
2382
  {
2383
  "epoch": 3.0,
2384
+ "learning_rate": 1.038961038961039e-07,
2385
+ "loss": 0.0018,
2386
  "step": 393
2387
  },
2388
  {
2389
  "epoch": 3.0,
2390
+ "eval_loss": 0.010622967034578323,
2391
+ "eval_pearson": 0.9723387124637607,
2392
+ "eval_runtime": 5.2392,
2393
+ "eval_samples_per_second": 44.282,
2394
+ "eval_spearmanr": 0.9112422452507688,
2395
+ "eval_steps_per_second": 2.863,
2396
  "step": 393
2397
  }
2398
  ],
checkpoint-393/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa3be555654ef58dc4291062e451cc4f3b395b85f9521a093bdc71de5f5c2938
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c355b2a51bf6c9af2253d00c618ad523567bd7357682948505578a36e2e9f8f2
3
  size 3311
config.json CHANGED
@@ -34,7 +34,7 @@
34
  "position_embedding_type": "absolute",
35
  "problem_type": "regression",
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.20.0",
38
  "type_vocab_size": 2,
39
  "vocab_size": 30000
40
  }
 
34
  "position_embedding_type": "absolute",
35
  "problem_type": "regression",
36
  "torch_dtype": "float32",
37
+ "transformers_version": "4.20.1",
38
  "type_vocab_size": 2,
39
  "vocab_size": 30000
40
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83c13b311191c9fd24a89163aa7cf7e2077e631cc20cf7fc2b7abfcd3d37b033
3
  size 46750353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37faaea797d87287140583e74f091daf924c82a659dc7dfdec40c9b0c5c0be0
3
  size 46750353
runs/Jun21_21-19-13_2b301f7dd448/1655846468.5608478/events.out.tfevents.1655846468.2b301f7dd448.79.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7a457fdb7a7c00a6c8fb86c4a4bcddafe3b89205766eee8fc12791751421ef
3
+ size 5302
runs/Jun21_21-19-13_2b301f7dd448/events.out.tfevents.1655846468.2b301f7dd448.79.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08bdf94d41124dfae5521a92d2bc0db67e8bdd7e4f0a462e2f4ba19cfba4af30
3
+ size 66646
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa3be555654ef58dc4291062e451cc4f3b395b85f9521a093bdc71de5f5c2938
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c355b2a51bf6c9af2253d00c618ad523567bd7357682948505578a36e2e9f8f2
3
  size 3311