File size: 17,929 Bytes
a6da7f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.5,
  "eval_steps": 735,
  "global_step": 2937,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02502553626149132,
      "grad_norm": 65.55949401855469,
      "learning_rate": 4.834865509022812e-07,
      "loss": 16.851,
      "step": 147
    },
    {
      "epoch": 0.05005107252298264,
      "grad_norm": 23.207971572875977,
      "learning_rate": 9.805924412665985e-07,
      "loss": 11.2787,
      "step": 294
    },
    {
      "epoch": 0.07507660878447395,
      "grad_norm": 176.1532440185547,
      "learning_rate": 1.481103166496425e-06,
      "loss": 8.9166,
      "step": 441
    },
    {
      "epoch": 0.10010214504596528,
      "grad_norm": 22.1564998626709,
      "learning_rate": 1.981613891726251e-06,
      "loss": 7.9463,
      "step": 588
    },
    {
      "epoch": 0.12512768130745658,
      "grad_norm": 20.11876106262207,
      "learning_rate": 2.4821246169560777e-06,
      "loss": 7.2108,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_nli-pairs_loss": 6.905651569366455,
      "eval_nli-pairs_runtime": 4.0844,
      "eval_nli-pairs_samples_per_second": 36.725,
      "eval_nli-pairs_steps_per_second": 1.224,
      "eval_sts-test_pearson_cosine": 0.3740256550072784,
      "eval_sts-test_pearson_dot": 0.13384893803205677,
      "eval_sts-test_pearson_euclidean": 0.3912387619869807,
      "eval_sts-test_pearson_manhattan": 0.4202605137823524,
      "eval_sts-test_pearson_max": 0.4202605137823524,
      "eval_sts-test_spearman_cosine": 0.37210107338950205,
      "eval_sts-test_spearman_dot": 0.12092409843417483,
      "eval_sts-test_spearman_euclidean": 0.39172287978780546,
      "eval_sts-test_spearman_manhattan": 0.4169664738563951,
      "eval_sts-test_spearman_max": 0.4169664738563951,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_vitaminc-pairs_loss": 5.720878601074219,
      "eval_vitaminc-pairs_runtime": 2.1703,
      "eval_vitaminc-pairs_samples_per_second": 69.115,
      "eval_vitaminc-pairs_steps_per_second": 2.304,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_qnli-contrastive_loss": 8.1649751663208,
      "eval_qnli-contrastive_runtime": 0.4937,
      "eval_qnli-contrastive_samples_per_second": 303.841,
      "eval_qnli-contrastive_steps_per_second": 10.128,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_scitail-pairs-qa_loss": 3.7859296798706055,
      "eval_scitail-pairs-qa_runtime": 1.1509,
      "eval_scitail-pairs-qa_samples_per_second": 130.329,
      "eval_scitail-pairs-qa_steps_per_second": 4.344,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_scitail-pairs-pos_loss": 3.9919917583465576,
      "eval_scitail-pairs-pos_runtime": 2.1442,
      "eval_scitail-pairs-pos_samples_per_second": 69.956,
      "eval_scitail-pairs-pos_steps_per_second": 2.332,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_xsum-pairs_loss": 4.600368976593018,
      "eval_xsum-pairs_runtime": 2.26,
      "eval_xsum-pairs_samples_per_second": 66.371,
      "eval_xsum-pairs_steps_per_second": 2.212,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_compression-pairs_loss": 3.3037569522857666,
      "eval_compression-pairs_runtime": 0.449,
      "eval_compression-pairs_samples_per_second": 334.078,
      "eval_compression-pairs_steps_per_second": 11.136,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_sciq_pairs_loss": 10.214456558227539,
      "eval_sciq_pairs_runtime": 7.1179,
      "eval_sciq_pairs_samples_per_second": 21.074,
      "eval_sciq_pairs_steps_per_second": 0.702,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_qasc_pairs_loss": 10.58031940460205,
      "eval_qasc_pairs_runtime": 2.0175,
      "eval_qasc_pairs_samples_per_second": 74.348,
      "eval_qasc_pairs_steps_per_second": 2.478,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_openbookqa_pairs_loss": 7.862658977508545,
      "eval_openbookqa_pairs_runtime": 0.8571,
      "eval_openbookqa_pairs_samples_per_second": 120.168,
      "eval_openbookqa_pairs_steps_per_second": 4.667,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_msmarco_pairs_loss": 8.754273414611816,
      "eval_msmarco_pairs_runtime": 2.7533,
      "eval_msmarco_pairs_samples_per_second": 54.481,
      "eval_msmarco_pairs_steps_per_second": 1.816,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_nq_pairs_loss": 8.415486335754395,
      "eval_nq_pairs_runtime": 5.0894,
      "eval_nq_pairs_samples_per_second": 29.473,
      "eval_nq_pairs_steps_per_second": 0.982,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_trivia_pairs_loss": 9.051105499267578,
      "eval_trivia_pairs_runtime": 9.5498,
      "eval_trivia_pairs_samples_per_second": 15.707,
      "eval_trivia_pairs_steps_per_second": 0.524,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_quora_pairs_loss": 4.5232110023498535,
      "eval_quora_pairs_runtime": 1.1469,
      "eval_quora_pairs_samples_per_second": 130.785,
      "eval_quora_pairs_steps_per_second": 4.36,
      "step": 735
    },
    {
      "epoch": 0.12512768130745658,
      "eval_gooaq_pairs_loss": 7.579105854034424,
      "eval_gooaq_pairs_runtime": 2.0491,
      "eval_gooaq_pairs_samples_per_second": 73.203,
      "eval_gooaq_pairs_steps_per_second": 2.44,
      "step": 735
    },
    {
      "epoch": 0.1501532175689479,
      "grad_norm": 31.7736759185791,
      "learning_rate": 2.982635342185904e-06,
      "loss": 6.7709,
      "step": 882
    },
    {
      "epoch": 0.1751787538304392,
      "grad_norm": 31.57339096069336,
      "learning_rate": 3.4831460674157306e-06,
      "loss": 6.1746,
      "step": 1029
    },
    {
      "epoch": 0.20020429009193055,
      "grad_norm": 25.392702102661133,
      "learning_rate": 3.9836567926455565e-06,
      "loss": 5.7706,
      "step": 1176
    },
    {
      "epoch": 0.22522982635342187,
      "grad_norm": 32.390472412109375,
      "learning_rate": 4.484167517875383e-06,
      "loss": 5.7283,
      "step": 1323
    },
    {
      "epoch": 0.25025536261491316,
      "grad_norm": 18.85039520263672,
      "learning_rate": 4.98467824310521e-06,
      "loss": 5.1856,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_nli-pairs_loss": 4.352054119110107,
      "eval_nli-pairs_runtime": 4.1476,
      "eval_nli-pairs_samples_per_second": 36.165,
      "eval_nli-pairs_steps_per_second": 1.206,
      "eval_sts-test_pearson_cosine": 0.6694155778571752,
      "eval_sts-test_pearson_dot": 0.5201102118957572,
      "eval_sts-test_pearson_euclidean": 0.6613028243200022,
      "eval_sts-test_pearson_manhattan": 0.6670710500315469,
      "eval_sts-test_pearson_max": 0.6694155778571752,
      "eval_sts-test_spearman_cosine": 0.6367853204388882,
      "eval_sts-test_spearman_dot": 0.4940207180607985,
      "eval_sts-test_spearman_euclidean": 0.6391132775161348,
      "eval_sts-test_spearman_manhattan": 0.6446159957787251,
      "eval_sts-test_spearman_max": 0.6446159957787251,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_vitaminc-pairs_loss": 3.4987735748291016,
      "eval_vitaminc-pairs_runtime": 2.1678,
      "eval_vitaminc-pairs_samples_per_second": 69.194,
      "eval_vitaminc-pairs_steps_per_second": 2.306,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_qnli-contrastive_loss": 12.915559768676758,
      "eval_qnli-contrastive_runtime": 0.4918,
      "eval_qnli-contrastive_samples_per_second": 304.99,
      "eval_qnli-contrastive_steps_per_second": 10.166,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_scitail-pairs-qa_loss": 1.3250077962875366,
      "eval_scitail-pairs-qa_runtime": 1.154,
      "eval_scitail-pairs-qa_samples_per_second": 129.984,
      "eval_scitail-pairs-qa_steps_per_second": 4.333,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_scitail-pairs-pos_loss": 2.457335948944092,
      "eval_scitail-pairs-pos_runtime": 2.1475,
      "eval_scitail-pairs-pos_samples_per_second": 69.85,
      "eval_scitail-pairs-pos_steps_per_second": 2.328,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_xsum-pairs_loss": 3.071201801300049,
      "eval_xsum-pairs_runtime": 2.2634,
      "eval_xsum-pairs_samples_per_second": 66.271,
      "eval_xsum-pairs_steps_per_second": 2.209,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_compression-pairs_loss": 2.0629916191101074,
      "eval_compression-pairs_runtime": 0.4529,
      "eval_compression-pairs_samples_per_second": 331.23,
      "eval_compression-pairs_steps_per_second": 11.041,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_sciq_pairs_loss": 9.06814193725586,
      "eval_sciq_pairs_runtime": 7.1445,
      "eval_sciq_pairs_samples_per_second": 20.995,
      "eval_sciq_pairs_steps_per_second": 0.7,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_qasc_pairs_loss": 9.245658874511719,
      "eval_qasc_pairs_runtime": 2.0471,
      "eval_qasc_pairs_samples_per_second": 73.274,
      "eval_qasc_pairs_steps_per_second": 2.442,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_openbookqa_pairs_loss": 5.652446746826172,
      "eval_openbookqa_pairs_runtime": 0.8946,
      "eval_openbookqa_pairs_samples_per_second": 115.14,
      "eval_openbookqa_pairs_steps_per_second": 4.471,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_msmarco_pairs_loss": 4.844855785369873,
      "eval_msmarco_pairs_runtime": 2.7887,
      "eval_msmarco_pairs_samples_per_second": 53.788,
      "eval_msmarco_pairs_steps_per_second": 1.793,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_nq_pairs_loss": 5.023958206176758,
      "eval_nq_pairs_runtime": 5.0823,
      "eval_nq_pairs_samples_per_second": 29.514,
      "eval_nq_pairs_steps_per_second": 0.984,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_trivia_pairs_loss": 5.2907304763793945,
      "eval_trivia_pairs_runtime": 9.6673,
      "eval_trivia_pairs_samples_per_second": 15.516,
      "eval_trivia_pairs_steps_per_second": 0.517,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_quora_pairs_loss": 1.5572240352630615,
      "eval_quora_pairs_runtime": 1.1979,
      "eval_quora_pairs_samples_per_second": 125.218,
      "eval_quora_pairs_steps_per_second": 4.174,
      "step": 1470
    },
    {
      "epoch": 0.25025536261491316,
      "eval_gooaq_pairs_loss": 3.970768928527832,
      "eval_gooaq_pairs_runtime": 2.117,
      "eval_gooaq_pairs_samples_per_second": 70.855,
      "eval_gooaq_pairs_steps_per_second": 2.362,
      "step": 1470
    },
    {
      "epoch": 0.2752808988764045,
      "grad_norm": 40.67585754394531,
      "learning_rate": 5.4851889683350365e-06,
      "loss": 4.185,
      "step": 1617
    },
    {
      "epoch": 0.3003064351378958,
      "grad_norm": 45.92570495605469,
      "learning_rate": 5.985699693564862e-06,
      "loss": 4.6367,
      "step": 1764
    },
    {
      "epoch": 0.32533197139938713,
      "grad_norm": 13.566838264465332,
      "learning_rate": 6.486210418794688e-06,
      "loss": 4.3615,
      "step": 1911
    },
    {
      "epoch": 0.3503575076608784,
      "grad_norm": 9.495999336242676,
      "learning_rate": 6.986721144024515e-06,
      "loss": 4.1791,
      "step": 2058
    },
    {
      "epoch": 0.37538304392236976,
      "grad_norm": 32.735416412353516,
      "learning_rate": 7.487231869254341e-06,
      "loss": 4.1051,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_nli-pairs_loss": 3.2717113494873047,
      "eval_nli-pairs_runtime": 4.0124,
      "eval_nli-pairs_samples_per_second": 37.384,
      "eval_nli-pairs_steps_per_second": 1.246,
      "eval_sts-test_pearson_cosine": 0.6958570089637609,
      "eval_sts-test_pearson_dot": 0.5824298957890577,
      "eval_sts-test_pearson_euclidean": 0.6893962819387462,
      "eval_sts-test_pearson_manhattan": 0.6993681181979946,
      "eval_sts-test_pearson_max": 0.6993681181979946,
      "eval_sts-test_spearman_cosine": 0.6652712160836801,
      "eval_sts-test_spearman_dot": 0.5536505624407877,
      "eval_sts-test_spearman_euclidean": 0.6659844314307678,
      "eval_sts-test_spearman_manhattan": 0.675740852112121,
      "eval_sts-test_spearman_max": 0.675740852112121,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_vitaminc-pairs_loss": 2.7197911739349365,
      "eval_vitaminc-pairs_runtime": 2.1625,
      "eval_vitaminc-pairs_samples_per_second": 69.365,
      "eval_vitaminc-pairs_steps_per_second": 2.312,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_qnli-contrastive_loss": 9.638714790344238,
      "eval_qnli-contrastive_runtime": 0.4877,
      "eval_qnli-contrastive_samples_per_second": 307.567,
      "eval_qnli-contrastive_steps_per_second": 10.252,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_scitail-pairs-qa_loss": 0.8106752634048462,
      "eval_scitail-pairs-qa_runtime": 1.1588,
      "eval_scitail-pairs-qa_samples_per_second": 129.449,
      "eval_scitail-pairs-qa_steps_per_second": 4.315,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_scitail-pairs-pos_loss": 1.8894625902175903,
      "eval_scitail-pairs-pos_runtime": 2.1181,
      "eval_scitail-pairs-pos_samples_per_second": 70.817,
      "eval_scitail-pairs-pos_steps_per_second": 2.361,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_xsum-pairs_loss": 2.262718439102173,
      "eval_xsum-pairs_runtime": 2.2585,
      "eval_xsum-pairs_samples_per_second": 66.416,
      "eval_xsum-pairs_steps_per_second": 2.214,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_compression-pairs_loss": 1.4910633563995361,
      "eval_compression-pairs_runtime": 0.4462,
      "eval_compression-pairs_samples_per_second": 336.204,
      "eval_compression-pairs_steps_per_second": 11.207,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_sciq_pairs_loss": 8.59740161895752,
      "eval_sciq_pairs_runtime": 7.1845,
      "eval_sciq_pairs_samples_per_second": 20.878,
      "eval_sciq_pairs_steps_per_second": 0.696,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_qasc_pairs_loss": 8.103879928588867,
      "eval_qasc_pairs_runtime": 2.0762,
      "eval_qasc_pairs_samples_per_second": 72.246,
      "eval_qasc_pairs_steps_per_second": 2.408,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_openbookqa_pairs_loss": 5.090969562530518,
      "eval_openbookqa_pairs_runtime": 0.89,
      "eval_openbookqa_pairs_samples_per_second": 115.726,
      "eval_openbookqa_pairs_steps_per_second": 4.494,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_msmarco_pairs_loss": 3.9566943645477295,
      "eval_msmarco_pairs_runtime": 2.8183,
      "eval_msmarco_pairs_samples_per_second": 53.223,
      "eval_msmarco_pairs_steps_per_second": 1.774,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_nq_pairs_loss": 4.009054183959961,
      "eval_nq_pairs_runtime": 5.0219,
      "eval_nq_pairs_samples_per_second": 29.869,
      "eval_nq_pairs_steps_per_second": 0.996,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_trivia_pairs_loss": 4.286431312561035,
      "eval_trivia_pairs_runtime": 9.4975,
      "eval_trivia_pairs_samples_per_second": 15.794,
      "eval_trivia_pairs_steps_per_second": 0.526,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_quora_pairs_loss": 1.123273491859436,
      "eval_quora_pairs_runtime": 1.1487,
      "eval_quora_pairs_samples_per_second": 130.586,
      "eval_quora_pairs_steps_per_second": 4.353,
      "step": 2205
    },
    {
      "epoch": 0.37538304392236976,
      "eval_gooaq_pairs_loss": 3.222414255142212,
      "eval_gooaq_pairs_runtime": 2.0173,
      "eval_gooaq_pairs_samples_per_second": 74.357,
      "eval_gooaq_pairs_steps_per_second": 2.479,
      "step": 2205
    },
    {
      "epoch": 0.4004085801838611,
      "grad_norm": 218.56105041503906,
      "learning_rate": 7.987742594484168e-06,
      "loss": 3.7674,
      "step": 2352
    },
    {
      "epoch": 0.4254341164453524,
      "grad_norm": 27.877609252929688,
      "learning_rate": 8.488253319713993e-06,
      "loss": 3.8729,
      "step": 2499
    },
    {
      "epoch": 0.45045965270684374,
      "grad_norm": 33.50013732910156,
      "learning_rate": 8.988764044943822e-06,
      "loss": 3.4527,
      "step": 2646
    },
    {
      "epoch": 0.475485188968335,
      "grad_norm": 14.015911102294922,
      "learning_rate": 9.489274770173647e-06,
      "loss": 3.3545,
      "step": 2793
    }
  ],
  "logging_steps": 147,
  "max_steps": 29370,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 2937,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}