File size: 39,882 Bytes
b9cab03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 735,
  "global_step": 5874,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02502553626149132,
      "grad_norm": 65.55949401855469,
      "learning_rate": 4.834865509022812e-07,
      "loss": 16.851,
      "step": 147
    },
    {
      "epoch": 0.05005107252298264,
      "grad_norm": 23.207971572875977,
      "learning_rate": 9.805924412665985e-07,
      "loss": 11.2787,
      "step": 294
    },
    {
      "epoch": 0.07507660878447395,
      "grad_norm": 176.1532440185547,
      "learning_rate": 1.481103166496425e-06,
      "loss": 8.9166,
      "step": 441
    },
    {
      "epoch": 0.10010214504596528,
      "grad_norm": 22.1564998626709,
      "learning_rate": 1.981613891726251e-06,
      "loss": 7.9463,
      "step": 588
    },
    {
      "epoch": 0.12512768130745658,
      "grad_norm": 20.11876106262207,
      "learning_rate": 2.4821246169560777e-06,
      "loss": 7.2108,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_nli-pairs_loss": 6.905651569366455,
      "eval_nli-pairs_runtime": 4.0844,
      "eval_nli-pairs_samples_per_second": 36.725,
      "eval_nli-pairs_steps_per_second": 1.224,
      "eval_sts-test_pearson_cosine": 0.3740256550072784,
      "eval_sts-test_pearson_dot": 0.13384893803205677,
      "eval_sts-test_pearson_euclidean": 0.3912387619869807,
      "eval_sts-test_pearson_manhattan": 0.4202605137823524,
      "eval_sts-test_pearson_max": 0.4202605137823524,
      "eval_sts-test_spearman_cosine": 0.37210107338950205,
      "eval_sts-test_spearman_dot": 0.12092409843417483,
      "eval_sts-test_spearman_euclidean": 0.39172287978780546,
      "eval_sts-test_spearman_manhattan": 0.4169664738563951,
      "eval_sts-test_spearman_max": 0.4169664738563951,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_vitaminc-pairs_loss": 5.720878601074219,
      "eval_vitaminc-pairs_runtime": 2.1703,
      "eval_vitaminc-pairs_samples_per_second": 69.115,
      "eval_vitaminc-pairs_steps_per_second": 2.304,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_qnli-contrastive_loss": 8.1649751663208,
      "eval_qnli-contrastive_runtime": 0.4937,
      "eval_qnli-contrastive_samples_per_second": 303.841,
      "eval_qnli-contrastive_steps_per_second": 10.128,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_scitail-pairs-qa_loss": 3.7859296798706055,
      "eval_scitail-pairs-qa_runtime": 1.1509,
      "eval_scitail-pairs-qa_samples_per_second": 130.329,
      "eval_scitail-pairs-qa_steps_per_second": 4.344,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_scitail-pairs-pos_loss": 3.9919917583465576,
      "eval_scitail-pairs-pos_runtime": 2.1442,
      "eval_scitail-pairs-pos_samples_per_second": 69.956,
      "eval_scitail-pairs-pos_steps_per_second": 2.332,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_xsum-pairs_loss": 4.600368976593018,
      "eval_xsum-pairs_runtime": 2.26,
      "eval_xsum-pairs_samples_per_second": 66.371,
      "eval_xsum-pairs_steps_per_second": 2.212,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_compression-pairs_loss": 3.3037569522857666,
      "eval_compression-pairs_runtime": 0.449,
      "eval_compression-pairs_samples_per_second": 334.078,
      "eval_compression-pairs_steps_per_second": 11.136,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_sciq_pairs_loss": 10.214456558227539,
      "eval_sciq_pairs_runtime": 7.1179,
      "eval_sciq_pairs_samples_per_second": 21.074,
      "eval_sciq_pairs_steps_per_second": 0.702,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_qasc_pairs_loss": 10.58031940460205,
      "eval_qasc_pairs_runtime": 2.0175,
      "eval_qasc_pairs_samples_per_second": 74.348,
      "eval_qasc_pairs_steps_per_second": 2.478,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_openbookqa_pairs_loss": 7.862658977508545,
      "eval_openbookqa_pairs_runtime": 0.8571,
      "eval_openbookqa_pairs_samples_per_second": 120.168,
      "eval_openbookqa_pairs_steps_per_second": 4.667,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_msmarco_pairs_loss": 8.754273414611816,
      "eval_msmarco_pairs_runtime": 2.7533,
      "eval_msmarco_pairs_samples_per_second": 54.481,
      "eval_msmarco_pairs_steps_per_second": 1.816,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_nq_pairs_loss": 8.415486335754395,
      "eval_nq_pairs_runtime": 5.0894,
      "eval_nq_pairs_samples_per_second": 29.473,
      "eval_nq_pairs_steps_per_second": 0.982,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_trivia_pairs_loss": 9.051105499267578,
      "eval_trivia_pairs_runtime": 9.5498,
      "eval_trivia_pairs_samples_per_second": 15.707,
      "eval_trivia_pairs_steps_per_second": 0.524,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_quora_pairs_loss": 4.5232110023498535,
      "eval_quora_pairs_runtime": 1.1469,
      "eval_quora_pairs_samples_per_second": 130.785,
      "eval_quora_pairs_steps_per_second": 4.36,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_gooaq_pairs_loss": 7.579105854034424,
      "eval_gooaq_pairs_runtime": 2.0491,
      "eval_gooaq_pairs_samples_per_second": 73.203,
      "eval_gooaq_pairs_steps_per_second": 2.44,
      "step": 735
    },
    {
      "epoch": 0.1501532175689479,
      "grad_norm": 31.7736759185791,
      "learning_rate": 2.982635342185904e-06,
      "loss": 6.7709,
      "step": 882
    },
    {
      "epoch": 0.1751787538304392,
      "grad_norm": 31.57339096069336,
      "learning_rate": 3.4831460674157306e-06,
      "loss": 6.1746,
      "step": 1029
    },
    {
      "epoch": 0.20020429009193055,
      "grad_norm": 25.392702102661133,
      "learning_rate": 3.9836567926455565e-06,
      "loss": 5.7706,
      "step": 1176
    },
    {
      "epoch": 0.22522982635342187,
      "grad_norm": 32.390472412109375,
      "learning_rate": 4.484167517875383e-06,
      "loss": 5.7283,
      "step": 1323
    },
    {
      "epoch": 0.25025536261491316,
      "grad_norm": 18.85039520263672,
      "learning_rate": 4.98467824310521e-06,
      "loss": 5.1856,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_nli-pairs_loss": 4.352054119110107,
      "eval_nli-pairs_runtime": 4.1476,
      "eval_nli-pairs_samples_per_second": 36.165,
      "eval_nli-pairs_steps_per_second": 1.206,
      "eval_sts-test_pearson_cosine": 0.6694155778571752,
      "eval_sts-test_pearson_dot": 0.5201102118957572,
      "eval_sts-test_pearson_euclidean": 0.6613028243200022,
      "eval_sts-test_pearson_manhattan": 0.6670710500315469,
      "eval_sts-test_pearson_max": 0.6694155778571752,
      "eval_sts-test_spearman_cosine": 0.6367853204388882,
      "eval_sts-test_spearman_dot": 0.4940207180607985,
      "eval_sts-test_spearman_euclidean": 0.6391132775161348,
      "eval_sts-test_spearman_manhattan": 0.6446159957787251,
      "eval_sts-test_spearman_max": 0.6446159957787251,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_vitaminc-pairs_loss": 3.4987735748291016,
      "eval_vitaminc-pairs_runtime": 2.1678,
      "eval_vitaminc-pairs_samples_per_second": 69.194,
      "eval_vitaminc-pairs_steps_per_second": 2.306,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_qnli-contrastive_loss": 12.915559768676758,
      "eval_qnli-contrastive_runtime": 0.4918,
      "eval_qnli-contrastive_samples_per_second": 304.99,
      "eval_qnli-contrastive_steps_per_second": 10.166,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_scitail-pairs-qa_loss": 1.3250077962875366,
      "eval_scitail-pairs-qa_runtime": 1.154,
      "eval_scitail-pairs-qa_samples_per_second": 129.984,
      "eval_scitail-pairs-qa_steps_per_second": 4.333,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_scitail-pairs-pos_loss": 2.457335948944092,
      "eval_scitail-pairs-pos_runtime": 2.1475,
      "eval_scitail-pairs-pos_samples_per_second": 69.85,
      "eval_scitail-pairs-pos_steps_per_second": 2.328,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_xsum-pairs_loss": 3.071201801300049,
      "eval_xsum-pairs_runtime": 2.2634,
      "eval_xsum-pairs_samples_per_second": 66.271,
      "eval_xsum-pairs_steps_per_second": 2.209,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_compression-pairs_loss": 2.0629916191101074,
      "eval_compression-pairs_runtime": 0.4529,
      "eval_compression-pairs_samples_per_second": 331.23,
      "eval_compression-pairs_steps_per_second": 11.041,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_sciq_pairs_loss": 9.06814193725586,
      "eval_sciq_pairs_runtime": 7.1445,
      "eval_sciq_pairs_samples_per_second": 20.995,
      "eval_sciq_pairs_steps_per_second": 0.7,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_qasc_pairs_loss": 9.245658874511719,
      "eval_qasc_pairs_runtime": 2.0471,
      "eval_qasc_pairs_samples_per_second": 73.274,
      "eval_qasc_pairs_steps_per_second": 2.442,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_openbookqa_pairs_loss": 5.652446746826172,
      "eval_openbookqa_pairs_runtime": 0.8946,
      "eval_openbookqa_pairs_samples_per_second": 115.14,
      "eval_openbookqa_pairs_steps_per_second": 4.471,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_msmarco_pairs_loss": 4.844855785369873,
      "eval_msmarco_pairs_runtime": 2.7887,
      "eval_msmarco_pairs_samples_per_second": 53.788,
      "eval_msmarco_pairs_steps_per_second": 1.793,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_nq_pairs_loss": 5.023958206176758,
      "eval_nq_pairs_runtime": 5.0823,
      "eval_nq_pairs_samples_per_second": 29.514,
      "eval_nq_pairs_steps_per_second": 0.984,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_trivia_pairs_loss": 5.2907304763793945,
      "eval_trivia_pairs_runtime": 9.6673,
      "eval_trivia_pairs_samples_per_second": 15.516,
      "eval_trivia_pairs_steps_per_second": 0.517,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_quora_pairs_loss": 1.5572240352630615,
      "eval_quora_pairs_runtime": 1.1979,
      "eval_quora_pairs_samples_per_second": 125.218,
      "eval_quora_pairs_steps_per_second": 4.174,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_gooaq_pairs_loss": 3.970768928527832,
      "eval_gooaq_pairs_runtime": 2.117,
      "eval_gooaq_pairs_samples_per_second": 70.855,
      "eval_gooaq_pairs_steps_per_second": 2.362,
      "step": 1470
    },
    {
      "epoch": 0.2752808988764045,
      "grad_norm": 40.67585754394531,
      "learning_rate": 5.4851889683350365e-06,
      "loss": 4.185,
      "step": 1617
    },
    {
      "epoch": 0.3003064351378958,
      "grad_norm": 45.92570495605469,
      "learning_rate": 5.985699693564862e-06,
      "loss": 4.6367,
      "step": 1764
    },
    {
      "epoch": 0.32533197139938713,
      "grad_norm": 13.566838264465332,
      "learning_rate": 6.486210418794688e-06,
      "loss": 4.3615,
      "step": 1911
    },
    {
      "epoch": 0.3503575076608784,
      "grad_norm": 9.495999336242676,
      "learning_rate": 6.986721144024515e-06,
      "loss": 4.1791,
      "step": 2058
    },
    {
      "epoch": 0.37538304392236976,
      "grad_norm": 32.735416412353516,
      "learning_rate": 7.487231869254341e-06,
      "loss": 4.1051,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_nli-pairs_loss": 3.2717113494873047,
      "eval_nli-pairs_runtime": 4.0124,
      "eval_nli-pairs_samples_per_second": 37.384,
      "eval_nli-pairs_steps_per_second": 1.246,
      "eval_sts-test_pearson_cosine": 0.6958570089637609,
      "eval_sts-test_pearson_dot": 0.5824298957890577,
      "eval_sts-test_pearson_euclidean": 0.6893962819387462,
      "eval_sts-test_pearson_manhattan": 0.6993681181979946,
      "eval_sts-test_pearson_max": 0.6993681181979946,
      "eval_sts-test_spearman_cosine": 0.6652712160836801,
      "eval_sts-test_spearman_dot": 0.5536505624407877,
      "eval_sts-test_spearman_euclidean": 0.6659844314307678,
      "eval_sts-test_spearman_manhattan": 0.675740852112121,
      "eval_sts-test_spearman_max": 0.675740852112121,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_vitaminc-pairs_loss": 2.7197911739349365,
      "eval_vitaminc-pairs_runtime": 2.1625,
      "eval_vitaminc-pairs_samples_per_second": 69.365,
      "eval_vitaminc-pairs_steps_per_second": 2.312,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_qnli-contrastive_loss": 9.638714790344238,
      "eval_qnli-contrastive_runtime": 0.4877,
      "eval_qnli-contrastive_samples_per_second": 307.567,
      "eval_qnli-contrastive_steps_per_second": 10.252,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_scitail-pairs-qa_loss": 0.8106752634048462,
      "eval_scitail-pairs-qa_runtime": 1.1588,
      "eval_scitail-pairs-qa_samples_per_second": 129.449,
      "eval_scitail-pairs-qa_steps_per_second": 4.315,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_scitail-pairs-pos_loss": 1.8894625902175903,
      "eval_scitail-pairs-pos_runtime": 2.1181,
      "eval_scitail-pairs-pos_samples_per_second": 70.817,
      "eval_scitail-pairs-pos_steps_per_second": 2.361,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_xsum-pairs_loss": 2.262718439102173,
      "eval_xsum-pairs_runtime": 2.2585,
      "eval_xsum-pairs_samples_per_second": 66.416,
      "eval_xsum-pairs_steps_per_second": 2.214,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_compression-pairs_loss": 1.4910633563995361,
      "eval_compression-pairs_runtime": 0.4462,
      "eval_compression-pairs_samples_per_second": 336.204,
      "eval_compression-pairs_steps_per_second": 11.207,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_sciq_pairs_loss": 8.59740161895752,
      "eval_sciq_pairs_runtime": 7.1845,
      "eval_sciq_pairs_samples_per_second": 20.878,
      "eval_sciq_pairs_steps_per_second": 0.696,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_qasc_pairs_loss": 8.103879928588867,
      "eval_qasc_pairs_runtime": 2.0762,
      "eval_qasc_pairs_samples_per_second": 72.246,
      "eval_qasc_pairs_steps_per_second": 2.408,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_openbookqa_pairs_loss": 5.090969562530518,
      "eval_openbookqa_pairs_runtime": 0.89,
      "eval_openbookqa_pairs_samples_per_second": 115.726,
      "eval_openbookqa_pairs_steps_per_second": 4.494,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_msmarco_pairs_loss": 3.9566943645477295,
      "eval_msmarco_pairs_runtime": 2.8183,
      "eval_msmarco_pairs_samples_per_second": 53.223,
      "eval_msmarco_pairs_steps_per_second": 1.774,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_nq_pairs_loss": 4.009054183959961,
      "eval_nq_pairs_runtime": 5.0219,
      "eval_nq_pairs_samples_per_second": 29.869,
      "eval_nq_pairs_steps_per_second": 0.996,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_trivia_pairs_loss": 4.286431312561035,
      "eval_trivia_pairs_runtime": 9.4975,
      "eval_trivia_pairs_samples_per_second": 15.794,
      "eval_trivia_pairs_steps_per_second": 0.526,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_quora_pairs_loss": 1.123273491859436,
      "eval_quora_pairs_runtime": 1.1487,
      "eval_quora_pairs_samples_per_second": 130.586,
      "eval_quora_pairs_steps_per_second": 4.353,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_gooaq_pairs_loss": 3.222414255142212,
      "eval_gooaq_pairs_runtime": 2.0173,
      "eval_gooaq_pairs_samples_per_second": 74.357,
      "eval_gooaq_pairs_steps_per_second": 2.479,
      "step": 2205
    },
    {
      "epoch": 0.4004085801838611,
      "grad_norm": 218.56105041503906,
      "learning_rate": 7.987742594484168e-06,
      "loss": 3.7674,
      "step": 2352
    },
    {
      "epoch": 0.4254341164453524,
      "grad_norm": 27.877609252929688,
      "learning_rate": 8.488253319713993e-06,
      "loss": 3.8729,
      "step": 2499
    },
    {
      "epoch": 0.45045965270684374,
      "grad_norm": 33.50013732910156,
      "learning_rate": 8.988764044943822e-06,
      "loss": 3.4527,
      "step": 2646
    },
    {
      "epoch": 0.475485188968335,
      "grad_norm": 14.015911102294922,
      "learning_rate": 9.489274770173647e-06,
      "loss": 3.3545,
      "step": 2793
    },
    {
      "epoch": 0.5005107252298263,
      "grad_norm": 33.59694290161133,
      "learning_rate": 9.989785495403473e-06,
      "loss": 3.3247,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_nli-pairs_loss": 2.7121565341949463,
      "eval_nli-pairs_runtime": 4.1564,
      "eval_nli-pairs_samples_per_second": 36.089,
      "eval_nli-pairs_steps_per_second": 1.203,
      "eval_sts-test_pearson_cosine": 0.716623047702725,
      "eval_sts-test_pearson_dot": 0.6128451070598809,
      "eval_sts-test_pearson_euclidean": 0.7138791236031807,
      "eval_sts-test_pearson_manhattan": 0.7213151818687454,
      "eval_sts-test_pearson_max": 0.7213151818687454,
      "eval_sts-test_spearman_cosine": 0.6919792400941177,
      "eval_sts-test_spearman_dot": 0.5867158357121192,
      "eval_sts-test_spearman_euclidean": 0.6925037259567834,
      "eval_sts-test_spearman_manhattan": 0.7008895667910079,
      "eval_sts-test_spearman_max": 0.7008895667910079,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_vitaminc-pairs_loss": 2.225992441177368,
      "eval_vitaminc-pairs_runtime": 2.253,
      "eval_vitaminc-pairs_samples_per_second": 66.577,
      "eval_vitaminc-pairs_steps_per_second": 2.219,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_qnli-contrastive_loss": 4.92629861831665,
      "eval_qnli-contrastive_runtime": 0.5005,
      "eval_qnli-contrastive_samples_per_second": 299.691,
      "eval_qnli-contrastive_steps_per_second": 9.99,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_scitail-pairs-qa_loss": 0.5898066163063049,
      "eval_scitail-pairs-qa_runtime": 1.2227,
      "eval_scitail-pairs-qa_samples_per_second": 122.682,
      "eval_scitail-pairs-qa_steps_per_second": 4.089,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_scitail-pairs-pos_loss": 1.4237287044525146,
      "eval_scitail-pairs-pos_runtime": 2.4409,
      "eval_scitail-pairs-pos_samples_per_second": 61.452,
      "eval_scitail-pairs-pos_steps_per_second": 2.048,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_xsum-pairs_loss": 1.8388895988464355,
      "eval_xsum-pairs_runtime": 2.2831,
      "eval_xsum-pairs_samples_per_second": 65.7,
      "eval_xsum-pairs_steps_per_second": 2.19,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_compression-pairs_loss": 1.1590967178344727,
      "eval_compression-pairs_runtime": 0.5152,
      "eval_compression-pairs_samples_per_second": 291.165,
      "eval_compression-pairs_steps_per_second": 9.706,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_sciq_pairs_loss": 8.282496452331543,
      "eval_sciq_pairs_runtime": 7.2871,
      "eval_sciq_pairs_samples_per_second": 20.584,
      "eval_sciq_pairs_steps_per_second": 0.686,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_qasc_pairs_loss": 7.817965507507324,
      "eval_qasc_pairs_runtime": 2.0211,
      "eval_qasc_pairs_samples_per_second": 74.218,
      "eval_qasc_pairs_steps_per_second": 2.474,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_openbookqa_pairs_loss": 4.619383811950684,
      "eval_openbookqa_pairs_runtime": 0.8531,
      "eval_openbookqa_pairs_samples_per_second": 120.731,
      "eval_openbookqa_pairs_steps_per_second": 4.689,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_msmarco_pairs_loss": 3.478559970855713,
      "eval_msmarco_pairs_runtime": 2.7512,
      "eval_msmarco_pairs_samples_per_second": 54.522,
      "eval_msmarco_pairs_steps_per_second": 1.817,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_nq_pairs_loss": 3.3449866771698,
      "eval_nq_pairs_runtime": 5.0591,
      "eval_nq_pairs_samples_per_second": 29.649,
      "eval_nq_pairs_steps_per_second": 0.988,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_trivia_pairs_loss": 3.524484872817993,
      "eval_trivia_pairs_runtime": 9.662,
      "eval_trivia_pairs_samples_per_second": 15.525,
      "eval_trivia_pairs_steps_per_second": 0.517,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_quora_pairs_loss": 0.9095575213432312,
      "eval_quora_pairs_runtime": 1.2482,
      "eval_quora_pairs_samples_per_second": 120.175,
      "eval_quora_pairs_steps_per_second": 4.006,
      "step": 2940
    },
    {
      "epoch": 0.5005107252298263,
      "eval_gooaq_pairs_loss": 2.6586034297943115,
      "eval_gooaq_pairs_runtime": 2.1091,
      "eval_gooaq_pairs_samples_per_second": 71.12,
      "eval_gooaq_pairs_steps_per_second": 2.371,
      "step": 2940
    },
    {
      "epoch": 0.5255362614913177,
      "grad_norm": 35.33409118652344,
      "learning_rate": 1.04902962206333e-05,
      "loss": 3.116,
      "step": 3087
    },
    {
      "epoch": 0.550561797752809,
      "grad_norm": 22.29003143310547,
      "learning_rate": 1.0990806945863125e-05,
      "loss": 3.2418,
      "step": 3234
    },
    {
      "epoch": 0.5755873340143003,
      "grad_norm": 31.277965545654297,
      "learning_rate": 1.1491317671092953e-05,
      "loss": 3.0757,
      "step": 3381
    },
    {
      "epoch": 0.6006128702757916,
      "grad_norm": 24.612506866455078,
      "learning_rate": 1.1991828396322778e-05,
      "loss": 2.8524,
      "step": 3528
    },
    {
      "epoch": 0.625638406537283,
      "grad_norm": 25.11741065979004,
      "learning_rate": 1.2492339121552605e-05,
      "loss": 2.6875,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_nli-pairs_loss": 2.479051113128662,
      "eval_nli-pairs_runtime": 3.9943,
      "eval_nli-pairs_samples_per_second": 37.553,
      "eval_nli-pairs_steps_per_second": 1.252,
      "eval_sts-test_pearson_cosine": 0.7278742453545186,
      "eval_sts-test_pearson_dot": 0.6217650825208566,
      "eval_sts-test_pearson_euclidean": 0.7243228472931561,
      "eval_sts-test_pearson_manhattan": 0.7333297580184588,
      "eval_sts-test_pearson_max": 0.7333297580184588,
      "eval_sts-test_spearman_cosine": 0.7013110457844404,
      "eval_sts-test_spearman_dot": 0.5970993074902947,
      "eval_sts-test_spearman_euclidean": 0.701564129266252,
      "eval_sts-test_spearman_manhattan": 0.7116482009924582,
      "eval_sts-test_spearman_max": 0.7116482009924582,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_vitaminc-pairs_loss": 1.974273681640625,
      "eval_vitaminc-pairs_runtime": 2.1754,
      "eval_vitaminc-pairs_samples_per_second": 68.953,
      "eval_vitaminc-pairs_steps_per_second": 2.298,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_qnli-contrastive_loss": 1.7706010341644287,
      "eval_qnli-contrastive_runtime": 0.4866,
      "eval_qnli-contrastive_samples_per_second": 308.244,
      "eval_qnli-contrastive_steps_per_second": 10.275,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_scitail-pairs-qa_loss": 0.4400452673435211,
      "eval_scitail-pairs-qa_runtime": 1.1519,
      "eval_scitail-pairs-qa_samples_per_second": 130.222,
      "eval_scitail-pairs-qa_steps_per_second": 4.341,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_scitail-pairs-pos_loss": 1.1909903287887573,
      "eval_scitail-pairs-pos_runtime": 2.1319,
      "eval_scitail-pairs-pos_samples_per_second": 70.36,
      "eval_scitail-pairs-pos_steps_per_second": 2.345,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_xsum-pairs_loss": 1.4811985492706299,
      "eval_xsum-pairs_runtime": 2.254,
      "eval_xsum-pairs_samples_per_second": 66.548,
      "eval_xsum-pairs_steps_per_second": 2.218,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_compression-pairs_loss": 0.8453781008720398,
      "eval_compression-pairs_runtime": 0.4401,
      "eval_compression-pairs_samples_per_second": 340.826,
      "eval_compression-pairs_steps_per_second": 11.361,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_sciq_pairs_loss": 8.014656066894531,
      "eval_sciq_pairs_runtime": 7.0707,
      "eval_sciq_pairs_samples_per_second": 21.214,
      "eval_sciq_pairs_steps_per_second": 0.707,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_qasc_pairs_loss": 6.9316277503967285,
      "eval_qasc_pairs_runtime": 2.0338,
      "eval_qasc_pairs_samples_per_second": 73.752,
      "eval_qasc_pairs_steps_per_second": 2.458,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_openbookqa_pairs_loss": 4.21690034866333,
      "eval_openbookqa_pairs_runtime": 0.918,
      "eval_openbookqa_pairs_samples_per_second": 112.202,
      "eval_openbookqa_pairs_steps_per_second": 4.357,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_msmarco_pairs_loss": 3.0209598541259766,
      "eval_msmarco_pairs_runtime": 2.7749,
      "eval_msmarco_pairs_samples_per_second": 54.056,
      "eval_msmarco_pairs_steps_per_second": 1.802,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_nq_pairs_loss": 2.956088066101074,
      "eval_nq_pairs_runtime": 5.0024,
      "eval_nq_pairs_samples_per_second": 29.986,
      "eval_nq_pairs_steps_per_second": 1.0,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_trivia_pairs_loss": 3.17364501953125,
      "eval_trivia_pairs_runtime": 9.4856,
      "eval_trivia_pairs_samples_per_second": 15.813,
      "eval_trivia_pairs_steps_per_second": 0.527,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_quora_pairs_loss": 0.763593852519989,
      "eval_quora_pairs_runtime": 1.1441,
      "eval_quora_pairs_samples_per_second": 131.104,
      "eval_quora_pairs_steps_per_second": 4.37,
      "step": 3675
    },
    {
      "epoch": 0.625638406537283,
      "eval_gooaq_pairs_loss": 2.3524909019470215,
      "eval_gooaq_pairs_runtime": 2.0161,
      "eval_gooaq_pairs_samples_per_second": 74.4,
      "eval_gooaq_pairs_steps_per_second": 2.48,
      "step": 3675
    },
    {
      "epoch": 0.6506639427987743,
      "grad_norm": 31.163997650146484,
      "learning_rate": 1.2992849846782432e-05,
      "loss": 2.7808,
      "step": 3822
    },
    {
      "epoch": 0.6756894790602656,
      "grad_norm": 14.883658409118652,
      "learning_rate": 1.3493360572012258e-05,
      "loss": 2.5687,
      "step": 3969
    },
    {
      "epoch": 0.7007150153217568,
      "grad_norm": 5.874042987823486,
      "learning_rate": 1.3993871297242083e-05,
      "loss": 2.3034,
      "step": 4116
    },
    {
      "epoch": 0.7257405515832482,
      "grad_norm": 31.464054107666016,
      "learning_rate": 1.4494382022471912e-05,
      "loss": 2.4412,
      "step": 4263
    },
    {
      "epoch": 0.7507660878447395,
      "grad_norm": 16.43915367126465,
      "learning_rate": 1.4994892747701737e-05,
      "loss": 2.3293,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_nli-pairs_loss": 2.3226094245910645,
      "eval_nli-pairs_runtime": 4.113,
      "eval_nli-pairs_samples_per_second": 36.47,
      "eval_nli-pairs_steps_per_second": 1.216,
      "eval_sts-test_pearson_cosine": 0.7356971966139032,
      "eval_sts-test_pearson_dot": 0.6150809513049869,
      "eval_sts-test_pearson_euclidean": 0.7330733579988641,
      "eval_sts-test_pearson_manhattan": 0.7423412248131348,
      "eval_sts-test_pearson_max": 0.7423412248131348,
      "eval_sts-test_spearman_cosine": 0.7121899723082045,
      "eval_sts-test_spearman_dot": 0.5926505936679538,
      "eval_sts-test_spearman_euclidean": 0.7130179905407037,
      "eval_sts-test_spearman_manhattan": 0.7227257562995023,
      "eval_sts-test_spearman_max": 0.7227257562995023,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_vitaminc-pairs_loss": 1.7956713438034058,
      "eval_vitaminc-pairs_runtime": 2.174,
      "eval_vitaminc-pairs_samples_per_second": 68.996,
      "eval_vitaminc-pairs_steps_per_second": 2.3,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_qnli-contrastive_loss": 1.0078614950180054,
      "eval_qnli-contrastive_runtime": 0.4874,
      "eval_qnli-contrastive_samples_per_second": 307.763,
      "eval_qnli-contrastive_steps_per_second": 10.259,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_scitail-pairs-qa_loss": 0.36971578001976013,
      "eval_scitail-pairs-qa_runtime": 1.164,
      "eval_scitail-pairs-qa_samples_per_second": 128.863,
      "eval_scitail-pairs-qa_steps_per_second": 4.295,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_scitail-pairs-pos_loss": 1.0497769117355347,
      "eval_scitail-pairs-pos_runtime": 2.1205,
      "eval_scitail-pairs-pos_samples_per_second": 70.74,
      "eval_scitail-pairs-pos_steps_per_second": 2.358,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_xsum-pairs_loss": 1.1691261529922485,
      "eval_xsum-pairs_runtime": 2.259,
      "eval_xsum-pairs_samples_per_second": 66.401,
      "eval_xsum-pairs_steps_per_second": 2.213,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_compression-pairs_loss": 0.5027483105659485,
      "eval_compression-pairs_runtime": 0.4403,
      "eval_compression-pairs_samples_per_second": 340.682,
      "eval_compression-pairs_steps_per_second": 11.356,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_sciq_pairs_loss": 7.823739528656006,
      "eval_sciq_pairs_runtime": 7.0738,
      "eval_sciq_pairs_samples_per_second": 21.205,
      "eval_sciq_pairs_steps_per_second": 0.707,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_qasc_pairs_loss": 6.404655933380127,
      "eval_qasc_pairs_runtime": 2.0346,
      "eval_qasc_pairs_samples_per_second": 73.723,
      "eval_qasc_pairs_steps_per_second": 2.457,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_openbookqa_pairs_loss": 3.857389211654663,
      "eval_openbookqa_pairs_runtime": 0.8544,
      "eval_openbookqa_pairs_samples_per_second": 120.547,
      "eval_openbookqa_pairs_steps_per_second": 4.681,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_msmarco_pairs_loss": 2.7028510570526123,
      "eval_msmarco_pairs_runtime": 2.7448,
      "eval_msmarco_pairs_samples_per_second": 54.649,
      "eval_msmarco_pairs_steps_per_second": 1.822,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_nq_pairs_loss": 2.679351329803467,
      "eval_nq_pairs_runtime": 5.067,
      "eval_nq_pairs_samples_per_second": 29.603,
      "eval_nq_pairs_steps_per_second": 0.987,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_trivia_pairs_loss": 2.8798065185546875,
      "eval_trivia_pairs_runtime": 9.5449,
      "eval_trivia_pairs_samples_per_second": 15.715,
      "eval_trivia_pairs_steps_per_second": 0.524,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_quora_pairs_loss": 0.6825175285339355,
      "eval_quora_pairs_runtime": 1.1431,
      "eval_quora_pairs_samples_per_second": 131.221,
      "eval_quora_pairs_steps_per_second": 4.374,
      "step": 4410
    },
    {
      "epoch": 0.7507660878447395,
      "eval_gooaq_pairs_loss": 2.0472166538238525,
      "eval_gooaq_pairs_runtime": 2.0218,
      "eval_gooaq_pairs_samples_per_second": 74.191,
      "eval_gooaq_pairs_steps_per_second": 2.473,
      "step": 4410
    },
    {
      "epoch": 0.7757916241062308,
      "grad_norm": 4.2425055503845215,
      "learning_rate": 1.5495403472931565e-05,
      "loss": 2.3651,
      "step": 4557
    },
    {
      "epoch": 0.8008171603677222,
      "grad_norm": 22.42776107788086,
      "learning_rate": 1.5995914198161388e-05,
      "loss": 2.6296,
      "step": 4704
    },
    {
      "epoch": 0.8258426966292135,
      "grad_norm": 21.169517517089844,
      "learning_rate": 1.6496424923391215e-05,
      "loss": 2.2108,
      "step": 4851
    },
    {
      "epoch": 0.8508682328907048,
      "grad_norm": 23.326181411743164,
      "learning_rate": 1.699693564862104e-05,
      "loss": 2.1852,
      "step": 4998
    },
    {
      "epoch": 0.8758937691521961,
      "grad_norm": 24.574176788330078,
      "learning_rate": 1.7497446373850868e-05,
      "loss": 2.2944,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_nli-pairs_loss": 2.0634915828704834,
      "eval_nli-pairs_runtime": 4.0019,
      "eval_nli-pairs_samples_per_second": 37.482,
      "eval_nli-pairs_steps_per_second": 1.249,
      "eval_sts-test_pearson_cosine": 0.7466390532977636,
      "eval_sts-test_pearson_dot": 0.612259458274589,
      "eval_sts-test_pearson_euclidean": 0.7432536346376271,
      "eval_sts-test_pearson_manhattan": 0.7500490179501229,
      "eval_sts-test_pearson_max": 0.7500490179501229,
      "eval_sts-test_spearman_cosine": 0.728273260456201,
      "eval_sts-test_spearman_dot": 0.5960115087190596,
      "eval_sts-test_spearman_euclidean": 0.7272394395622148,
      "eval_sts-test_spearman_manhattan": 0.7334149564445704,
      "eval_sts-test_spearman_max": 0.7334149564445704,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_vitaminc-pairs_loss": 1.638654112815857,
      "eval_vitaminc-pairs_runtime": 2.1637,
      "eval_vitaminc-pairs_samples_per_second": 69.327,
      "eval_vitaminc-pairs_steps_per_second": 2.311,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_qnli-contrastive_loss": 0.9639705419540405,
      "eval_qnli-contrastive_runtime": 0.4889,
      "eval_qnli-contrastive_samples_per_second": 306.825,
      "eval_qnli-contrastive_steps_per_second": 10.228,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_scitail-pairs-qa_loss": 0.31595128774642944,
      "eval_scitail-pairs-qa_runtime": 1.1467,
      "eval_scitail-pairs-qa_samples_per_second": 130.806,
      "eval_scitail-pairs-qa_steps_per_second": 4.36,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_scitail-pairs-pos_loss": 0.9187478423118591,
      "eval_scitail-pairs-pos_runtime": 2.1273,
      "eval_scitail-pairs-pos_samples_per_second": 70.512,
      "eval_scitail-pairs-pos_steps_per_second": 2.35,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_xsum-pairs_loss": 1.060194492340088,
      "eval_xsum-pairs_runtime": 2.2836,
      "eval_xsum-pairs_samples_per_second": 65.686,
      "eval_xsum-pairs_steps_per_second": 2.19,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_compression-pairs_loss": 0.41078585386276245,
      "eval_compression-pairs_runtime": 0.4434,
      "eval_compression-pairs_samples_per_second": 338.276,
      "eval_compression-pairs_steps_per_second": 11.276,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_sciq_pairs_loss": 7.577760696411133,
      "eval_sciq_pairs_runtime": 7.1025,
      "eval_sciq_pairs_samples_per_second": 21.119,
      "eval_sciq_pairs_steps_per_second": 0.704,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_qasc_pairs_loss": 6.353766918182373,
      "eval_qasc_pairs_runtime": 2.0113,
      "eval_qasc_pairs_samples_per_second": 74.58,
      "eval_qasc_pairs_steps_per_second": 2.486,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_openbookqa_pairs_loss": 3.7140932083129883,
      "eval_openbookqa_pairs_runtime": 0.8529,
      "eval_openbookqa_pairs_samples_per_second": 120.762,
      "eval_openbookqa_pairs_steps_per_second": 4.69,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_msmarco_pairs_loss": 2.3862576484680176,
      "eval_msmarco_pairs_runtime": 2.8953,
      "eval_msmarco_pairs_samples_per_second": 51.808,
      "eval_msmarco_pairs_steps_per_second": 1.727,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_nq_pairs_loss": 2.3543190956115723,
      "eval_nq_pairs_runtime": 5.0048,
      "eval_nq_pairs_samples_per_second": 29.971,
      "eval_nq_pairs_steps_per_second": 0.999,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_trivia_pairs_loss": 2.494807481765747,
      "eval_trivia_pairs_runtime": 9.5513,
      "eval_trivia_pairs_samples_per_second": 15.705,
      "eval_trivia_pairs_steps_per_second": 0.523,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_quora_pairs_loss": 0.6137441992759705,
      "eval_quora_pairs_runtime": 1.1541,
      "eval_quora_pairs_samples_per_second": 129.967,
      "eval_quora_pairs_steps_per_second": 4.332,
      "step": 5145
    },
    {
      "epoch": 0.8758937691521961,
      "eval_gooaq_pairs_loss": 1.8279658555984497,
      "eval_gooaq_pairs_runtime": 2.0951,
      "eval_gooaq_pairs_samples_per_second": 71.595,
      "eval_gooaq_pairs_steps_per_second": 2.387,
      "step": 5145
    },
    {
      "epoch": 0.9009193054136875,
      "grad_norm": 10.590804100036621,
      "learning_rate": 1.7997957099080695e-05,
      "loss": 2.2133,
      "step": 5292
    },
    {
      "epoch": 0.9259448416751788,
      "grad_norm": 18.527711868286133,
      "learning_rate": 1.849846782431052e-05,
      "loss": 2.2255,
      "step": 5439
    },
    {
      "epoch": 0.95097037793667,
      "grad_norm": 2.617710828781128,
      "learning_rate": 1.8995573714674838e-05,
      "loss": 2.3502,
      "step": 5586
    },
    {
      "epoch": 0.9759959141981613,
      "grad_norm": 19.551551818847656,
      "learning_rate": 1.9496084439904668e-05,
      "loss": 1.8964,
      "step": 5733
    }
  ],
  "logging_steps": 147,
  "max_steps": 29370,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 2937,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}