bobox's picture
Training in progress, step 17622, checkpoint
1565e24 verified
raw
history blame
128 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 735,
"global_step": 17622,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02502553626149132,
"grad_norm": 65.55949401855469,
"learning_rate": 4.834865509022812e-07,
"loss": 16.851,
"step": 147
},
{
"epoch": 0.05005107252298264,
"grad_norm": 23.207971572875977,
"learning_rate": 9.805924412665985e-07,
"loss": 11.2787,
"step": 294
},
{
"epoch": 0.07507660878447395,
"grad_norm": 176.1532440185547,
"learning_rate": 1.481103166496425e-06,
"loss": 8.9166,
"step": 441
},
{
"epoch": 0.10010214504596528,
"grad_norm": 22.1564998626709,
"learning_rate": 1.981613891726251e-06,
"loss": 7.9463,
"step": 588
},
{
"epoch": 0.12512768130745658,
"grad_norm": 20.11876106262207,
"learning_rate": 2.4821246169560777e-06,
"loss": 7.2108,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_nli-pairs_loss": 6.905651569366455,
"eval_nli-pairs_runtime": 4.0844,
"eval_nli-pairs_samples_per_second": 36.725,
"eval_nli-pairs_steps_per_second": 1.224,
"eval_sts-test_pearson_cosine": 0.3740256550072784,
"eval_sts-test_pearson_dot": 0.13384893803205677,
"eval_sts-test_pearson_euclidean": 0.3912387619869807,
"eval_sts-test_pearson_manhattan": 0.4202605137823524,
"eval_sts-test_pearson_max": 0.4202605137823524,
"eval_sts-test_spearman_cosine": 0.37210107338950205,
"eval_sts-test_spearman_dot": 0.12092409843417483,
"eval_sts-test_spearman_euclidean": 0.39172287978780546,
"eval_sts-test_spearman_manhattan": 0.4169664738563951,
"eval_sts-test_spearman_max": 0.4169664738563951,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_vitaminc-pairs_loss": 5.720878601074219,
"eval_vitaminc-pairs_runtime": 2.1703,
"eval_vitaminc-pairs_samples_per_second": 69.115,
"eval_vitaminc-pairs_steps_per_second": 2.304,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_qnli-contrastive_loss": 8.1649751663208,
"eval_qnli-contrastive_runtime": 0.4937,
"eval_qnli-contrastive_samples_per_second": 303.841,
"eval_qnli-contrastive_steps_per_second": 10.128,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_scitail-pairs-qa_loss": 3.7859296798706055,
"eval_scitail-pairs-qa_runtime": 1.1509,
"eval_scitail-pairs-qa_samples_per_second": 130.329,
"eval_scitail-pairs-qa_steps_per_second": 4.344,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_scitail-pairs-pos_loss": 3.9919917583465576,
"eval_scitail-pairs-pos_runtime": 2.1442,
"eval_scitail-pairs-pos_samples_per_second": 69.956,
"eval_scitail-pairs-pos_steps_per_second": 2.332,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_xsum-pairs_loss": 4.600368976593018,
"eval_xsum-pairs_runtime": 2.26,
"eval_xsum-pairs_samples_per_second": 66.371,
"eval_xsum-pairs_steps_per_second": 2.212,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_compression-pairs_loss": 3.3037569522857666,
"eval_compression-pairs_runtime": 0.449,
"eval_compression-pairs_samples_per_second": 334.078,
"eval_compression-pairs_steps_per_second": 11.136,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_sciq_pairs_loss": 10.214456558227539,
"eval_sciq_pairs_runtime": 7.1179,
"eval_sciq_pairs_samples_per_second": 21.074,
"eval_sciq_pairs_steps_per_second": 0.702,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_qasc_pairs_loss": 10.58031940460205,
"eval_qasc_pairs_runtime": 2.0175,
"eval_qasc_pairs_samples_per_second": 74.348,
"eval_qasc_pairs_steps_per_second": 2.478,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_openbookqa_pairs_loss": 7.862658977508545,
"eval_openbookqa_pairs_runtime": 0.8571,
"eval_openbookqa_pairs_samples_per_second": 120.168,
"eval_openbookqa_pairs_steps_per_second": 4.667,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_msmarco_pairs_loss": 8.754273414611816,
"eval_msmarco_pairs_runtime": 2.7533,
"eval_msmarco_pairs_samples_per_second": 54.481,
"eval_msmarco_pairs_steps_per_second": 1.816,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_nq_pairs_loss": 8.415486335754395,
"eval_nq_pairs_runtime": 5.0894,
"eval_nq_pairs_samples_per_second": 29.473,
"eval_nq_pairs_steps_per_second": 0.982,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_trivia_pairs_loss": 9.051105499267578,
"eval_trivia_pairs_runtime": 9.5498,
"eval_trivia_pairs_samples_per_second": 15.707,
"eval_trivia_pairs_steps_per_second": 0.524,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_quora_pairs_loss": 4.5232110023498535,
"eval_quora_pairs_runtime": 1.1469,
"eval_quora_pairs_samples_per_second": 130.785,
"eval_quora_pairs_steps_per_second": 4.36,
"step": 735
},
{
"epoch": 0.12512768130745658,
"eval_gooaq_pairs_loss": 7.579105854034424,
"eval_gooaq_pairs_runtime": 2.0491,
"eval_gooaq_pairs_samples_per_second": 73.203,
"eval_gooaq_pairs_steps_per_second": 2.44,
"step": 735
},
{
"epoch": 0.1501532175689479,
"grad_norm": 31.7736759185791,
"learning_rate": 2.982635342185904e-06,
"loss": 6.7709,
"step": 882
},
{
"epoch": 0.1751787538304392,
"grad_norm": 31.57339096069336,
"learning_rate": 3.4831460674157306e-06,
"loss": 6.1746,
"step": 1029
},
{
"epoch": 0.20020429009193055,
"grad_norm": 25.392702102661133,
"learning_rate": 3.9836567926455565e-06,
"loss": 5.7706,
"step": 1176
},
{
"epoch": 0.22522982635342187,
"grad_norm": 32.390472412109375,
"learning_rate": 4.484167517875383e-06,
"loss": 5.7283,
"step": 1323
},
{
"epoch": 0.25025536261491316,
"grad_norm": 18.85039520263672,
"learning_rate": 4.98467824310521e-06,
"loss": 5.1856,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_nli-pairs_loss": 4.352054119110107,
"eval_nli-pairs_runtime": 4.1476,
"eval_nli-pairs_samples_per_second": 36.165,
"eval_nli-pairs_steps_per_second": 1.206,
"eval_sts-test_pearson_cosine": 0.6694155778571752,
"eval_sts-test_pearson_dot": 0.5201102118957572,
"eval_sts-test_pearson_euclidean": 0.6613028243200022,
"eval_sts-test_pearson_manhattan": 0.6670710500315469,
"eval_sts-test_pearson_max": 0.6694155778571752,
"eval_sts-test_spearman_cosine": 0.6367853204388882,
"eval_sts-test_spearman_dot": 0.4940207180607985,
"eval_sts-test_spearman_euclidean": 0.6391132775161348,
"eval_sts-test_spearman_manhattan": 0.6446159957787251,
"eval_sts-test_spearman_max": 0.6446159957787251,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_vitaminc-pairs_loss": 3.4987735748291016,
"eval_vitaminc-pairs_runtime": 2.1678,
"eval_vitaminc-pairs_samples_per_second": 69.194,
"eval_vitaminc-pairs_steps_per_second": 2.306,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_qnli-contrastive_loss": 12.915559768676758,
"eval_qnli-contrastive_runtime": 0.4918,
"eval_qnli-contrastive_samples_per_second": 304.99,
"eval_qnli-contrastive_steps_per_second": 10.166,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_scitail-pairs-qa_loss": 1.3250077962875366,
"eval_scitail-pairs-qa_runtime": 1.154,
"eval_scitail-pairs-qa_samples_per_second": 129.984,
"eval_scitail-pairs-qa_steps_per_second": 4.333,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_scitail-pairs-pos_loss": 2.457335948944092,
"eval_scitail-pairs-pos_runtime": 2.1475,
"eval_scitail-pairs-pos_samples_per_second": 69.85,
"eval_scitail-pairs-pos_steps_per_second": 2.328,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_xsum-pairs_loss": 3.071201801300049,
"eval_xsum-pairs_runtime": 2.2634,
"eval_xsum-pairs_samples_per_second": 66.271,
"eval_xsum-pairs_steps_per_second": 2.209,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_compression-pairs_loss": 2.0629916191101074,
"eval_compression-pairs_runtime": 0.4529,
"eval_compression-pairs_samples_per_second": 331.23,
"eval_compression-pairs_steps_per_second": 11.041,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_sciq_pairs_loss": 9.06814193725586,
"eval_sciq_pairs_runtime": 7.1445,
"eval_sciq_pairs_samples_per_second": 20.995,
"eval_sciq_pairs_steps_per_second": 0.7,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_qasc_pairs_loss": 9.245658874511719,
"eval_qasc_pairs_runtime": 2.0471,
"eval_qasc_pairs_samples_per_second": 73.274,
"eval_qasc_pairs_steps_per_second": 2.442,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_openbookqa_pairs_loss": 5.652446746826172,
"eval_openbookqa_pairs_runtime": 0.8946,
"eval_openbookqa_pairs_samples_per_second": 115.14,
"eval_openbookqa_pairs_steps_per_second": 4.471,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_msmarco_pairs_loss": 4.844855785369873,
"eval_msmarco_pairs_runtime": 2.7887,
"eval_msmarco_pairs_samples_per_second": 53.788,
"eval_msmarco_pairs_steps_per_second": 1.793,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_nq_pairs_loss": 5.023958206176758,
"eval_nq_pairs_runtime": 5.0823,
"eval_nq_pairs_samples_per_second": 29.514,
"eval_nq_pairs_steps_per_second": 0.984,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_trivia_pairs_loss": 5.2907304763793945,
"eval_trivia_pairs_runtime": 9.6673,
"eval_trivia_pairs_samples_per_second": 15.516,
"eval_trivia_pairs_steps_per_second": 0.517,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_quora_pairs_loss": 1.5572240352630615,
"eval_quora_pairs_runtime": 1.1979,
"eval_quora_pairs_samples_per_second": 125.218,
"eval_quora_pairs_steps_per_second": 4.174,
"step": 1470
},
{
"epoch": 0.25025536261491316,
"eval_gooaq_pairs_loss": 3.970768928527832,
"eval_gooaq_pairs_runtime": 2.117,
"eval_gooaq_pairs_samples_per_second": 70.855,
"eval_gooaq_pairs_steps_per_second": 2.362,
"step": 1470
},
{
"epoch": 0.2752808988764045,
"grad_norm": 40.67585754394531,
"learning_rate": 5.4851889683350365e-06,
"loss": 4.185,
"step": 1617
},
{
"epoch": 0.3003064351378958,
"grad_norm": 45.92570495605469,
"learning_rate": 5.985699693564862e-06,
"loss": 4.6367,
"step": 1764
},
{
"epoch": 0.32533197139938713,
"grad_norm": 13.566838264465332,
"learning_rate": 6.486210418794688e-06,
"loss": 4.3615,
"step": 1911
},
{
"epoch": 0.3503575076608784,
"grad_norm": 9.495999336242676,
"learning_rate": 6.986721144024515e-06,
"loss": 4.1791,
"step": 2058
},
{
"epoch": 0.37538304392236976,
"grad_norm": 32.735416412353516,
"learning_rate": 7.487231869254341e-06,
"loss": 4.1051,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_nli-pairs_loss": 3.2717113494873047,
"eval_nli-pairs_runtime": 4.0124,
"eval_nli-pairs_samples_per_second": 37.384,
"eval_nli-pairs_steps_per_second": 1.246,
"eval_sts-test_pearson_cosine": 0.6958570089637609,
"eval_sts-test_pearson_dot": 0.5824298957890577,
"eval_sts-test_pearson_euclidean": 0.6893962819387462,
"eval_sts-test_pearson_manhattan": 0.6993681181979946,
"eval_sts-test_pearson_max": 0.6993681181979946,
"eval_sts-test_spearman_cosine": 0.6652712160836801,
"eval_sts-test_spearman_dot": 0.5536505624407877,
"eval_sts-test_spearman_euclidean": 0.6659844314307678,
"eval_sts-test_spearman_manhattan": 0.675740852112121,
"eval_sts-test_spearman_max": 0.675740852112121,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_vitaminc-pairs_loss": 2.7197911739349365,
"eval_vitaminc-pairs_runtime": 2.1625,
"eval_vitaminc-pairs_samples_per_second": 69.365,
"eval_vitaminc-pairs_steps_per_second": 2.312,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_qnli-contrastive_loss": 9.638714790344238,
"eval_qnli-contrastive_runtime": 0.4877,
"eval_qnli-contrastive_samples_per_second": 307.567,
"eval_qnli-contrastive_steps_per_second": 10.252,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_scitail-pairs-qa_loss": 0.8106752634048462,
"eval_scitail-pairs-qa_runtime": 1.1588,
"eval_scitail-pairs-qa_samples_per_second": 129.449,
"eval_scitail-pairs-qa_steps_per_second": 4.315,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_scitail-pairs-pos_loss": 1.8894625902175903,
"eval_scitail-pairs-pos_runtime": 2.1181,
"eval_scitail-pairs-pos_samples_per_second": 70.817,
"eval_scitail-pairs-pos_steps_per_second": 2.361,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_xsum-pairs_loss": 2.262718439102173,
"eval_xsum-pairs_runtime": 2.2585,
"eval_xsum-pairs_samples_per_second": 66.416,
"eval_xsum-pairs_steps_per_second": 2.214,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_compression-pairs_loss": 1.4910633563995361,
"eval_compression-pairs_runtime": 0.4462,
"eval_compression-pairs_samples_per_second": 336.204,
"eval_compression-pairs_steps_per_second": 11.207,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_sciq_pairs_loss": 8.59740161895752,
"eval_sciq_pairs_runtime": 7.1845,
"eval_sciq_pairs_samples_per_second": 20.878,
"eval_sciq_pairs_steps_per_second": 0.696,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_qasc_pairs_loss": 8.103879928588867,
"eval_qasc_pairs_runtime": 2.0762,
"eval_qasc_pairs_samples_per_second": 72.246,
"eval_qasc_pairs_steps_per_second": 2.408,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_openbookqa_pairs_loss": 5.090969562530518,
"eval_openbookqa_pairs_runtime": 0.89,
"eval_openbookqa_pairs_samples_per_second": 115.726,
"eval_openbookqa_pairs_steps_per_second": 4.494,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_msmarco_pairs_loss": 3.9566943645477295,
"eval_msmarco_pairs_runtime": 2.8183,
"eval_msmarco_pairs_samples_per_second": 53.223,
"eval_msmarco_pairs_steps_per_second": 1.774,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_nq_pairs_loss": 4.009054183959961,
"eval_nq_pairs_runtime": 5.0219,
"eval_nq_pairs_samples_per_second": 29.869,
"eval_nq_pairs_steps_per_second": 0.996,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_trivia_pairs_loss": 4.286431312561035,
"eval_trivia_pairs_runtime": 9.4975,
"eval_trivia_pairs_samples_per_second": 15.794,
"eval_trivia_pairs_steps_per_second": 0.526,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_quora_pairs_loss": 1.123273491859436,
"eval_quora_pairs_runtime": 1.1487,
"eval_quora_pairs_samples_per_second": 130.586,
"eval_quora_pairs_steps_per_second": 4.353,
"step": 2205
},
{
"epoch": 0.37538304392236976,
"eval_gooaq_pairs_loss": 3.222414255142212,
"eval_gooaq_pairs_runtime": 2.0173,
"eval_gooaq_pairs_samples_per_second": 74.357,
"eval_gooaq_pairs_steps_per_second": 2.479,
"step": 2205
},
{
"epoch": 0.4004085801838611,
"grad_norm": 218.56105041503906,
"learning_rate": 7.987742594484168e-06,
"loss": 3.7674,
"step": 2352
},
{
"epoch": 0.4254341164453524,
"grad_norm": 27.877609252929688,
"learning_rate": 8.488253319713993e-06,
"loss": 3.8729,
"step": 2499
},
{
"epoch": 0.45045965270684374,
"grad_norm": 33.50013732910156,
"learning_rate": 8.988764044943822e-06,
"loss": 3.4527,
"step": 2646
},
{
"epoch": 0.475485188968335,
"grad_norm": 14.015911102294922,
"learning_rate": 9.489274770173647e-06,
"loss": 3.3545,
"step": 2793
},
{
"epoch": 0.5005107252298263,
"grad_norm": 33.59694290161133,
"learning_rate": 9.989785495403473e-06,
"loss": 3.3247,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_nli-pairs_loss": 2.7121565341949463,
"eval_nli-pairs_runtime": 4.1564,
"eval_nli-pairs_samples_per_second": 36.089,
"eval_nli-pairs_steps_per_second": 1.203,
"eval_sts-test_pearson_cosine": 0.716623047702725,
"eval_sts-test_pearson_dot": 0.6128451070598809,
"eval_sts-test_pearson_euclidean": 0.7138791236031807,
"eval_sts-test_pearson_manhattan": 0.7213151818687454,
"eval_sts-test_pearson_max": 0.7213151818687454,
"eval_sts-test_spearman_cosine": 0.6919792400941177,
"eval_sts-test_spearman_dot": 0.5867158357121192,
"eval_sts-test_spearman_euclidean": 0.6925037259567834,
"eval_sts-test_spearman_manhattan": 0.7008895667910079,
"eval_sts-test_spearman_max": 0.7008895667910079,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_vitaminc-pairs_loss": 2.225992441177368,
"eval_vitaminc-pairs_runtime": 2.253,
"eval_vitaminc-pairs_samples_per_second": 66.577,
"eval_vitaminc-pairs_steps_per_second": 2.219,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_qnli-contrastive_loss": 4.92629861831665,
"eval_qnli-contrastive_runtime": 0.5005,
"eval_qnli-contrastive_samples_per_second": 299.691,
"eval_qnli-contrastive_steps_per_second": 9.99,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_scitail-pairs-qa_loss": 0.5898066163063049,
"eval_scitail-pairs-qa_runtime": 1.2227,
"eval_scitail-pairs-qa_samples_per_second": 122.682,
"eval_scitail-pairs-qa_steps_per_second": 4.089,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_scitail-pairs-pos_loss": 1.4237287044525146,
"eval_scitail-pairs-pos_runtime": 2.4409,
"eval_scitail-pairs-pos_samples_per_second": 61.452,
"eval_scitail-pairs-pos_steps_per_second": 2.048,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_xsum-pairs_loss": 1.8388895988464355,
"eval_xsum-pairs_runtime": 2.2831,
"eval_xsum-pairs_samples_per_second": 65.7,
"eval_xsum-pairs_steps_per_second": 2.19,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_compression-pairs_loss": 1.1590967178344727,
"eval_compression-pairs_runtime": 0.5152,
"eval_compression-pairs_samples_per_second": 291.165,
"eval_compression-pairs_steps_per_second": 9.706,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_sciq_pairs_loss": 8.282496452331543,
"eval_sciq_pairs_runtime": 7.2871,
"eval_sciq_pairs_samples_per_second": 20.584,
"eval_sciq_pairs_steps_per_second": 0.686,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_qasc_pairs_loss": 7.817965507507324,
"eval_qasc_pairs_runtime": 2.0211,
"eval_qasc_pairs_samples_per_second": 74.218,
"eval_qasc_pairs_steps_per_second": 2.474,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_openbookqa_pairs_loss": 4.619383811950684,
"eval_openbookqa_pairs_runtime": 0.8531,
"eval_openbookqa_pairs_samples_per_second": 120.731,
"eval_openbookqa_pairs_steps_per_second": 4.689,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_msmarco_pairs_loss": 3.478559970855713,
"eval_msmarco_pairs_runtime": 2.7512,
"eval_msmarco_pairs_samples_per_second": 54.522,
"eval_msmarco_pairs_steps_per_second": 1.817,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_nq_pairs_loss": 3.3449866771698,
"eval_nq_pairs_runtime": 5.0591,
"eval_nq_pairs_samples_per_second": 29.649,
"eval_nq_pairs_steps_per_second": 0.988,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_trivia_pairs_loss": 3.524484872817993,
"eval_trivia_pairs_runtime": 9.662,
"eval_trivia_pairs_samples_per_second": 15.525,
"eval_trivia_pairs_steps_per_second": 0.517,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_quora_pairs_loss": 0.9095575213432312,
"eval_quora_pairs_runtime": 1.2482,
"eval_quora_pairs_samples_per_second": 120.175,
"eval_quora_pairs_steps_per_second": 4.006,
"step": 2940
},
{
"epoch": 0.5005107252298263,
"eval_gooaq_pairs_loss": 2.6586034297943115,
"eval_gooaq_pairs_runtime": 2.1091,
"eval_gooaq_pairs_samples_per_second": 71.12,
"eval_gooaq_pairs_steps_per_second": 2.371,
"step": 2940
},
{
"epoch": 0.5255362614913177,
"grad_norm": 35.33409118652344,
"learning_rate": 1.04902962206333e-05,
"loss": 3.116,
"step": 3087
},
{
"epoch": 0.550561797752809,
"grad_norm": 22.29003143310547,
"learning_rate": 1.0990806945863125e-05,
"loss": 3.2418,
"step": 3234
},
{
"epoch": 0.5755873340143003,
"grad_norm": 31.277965545654297,
"learning_rate": 1.1491317671092953e-05,
"loss": 3.0757,
"step": 3381
},
{
"epoch": 0.6006128702757916,
"grad_norm": 24.612506866455078,
"learning_rate": 1.1991828396322778e-05,
"loss": 2.8524,
"step": 3528
},
{
"epoch": 0.625638406537283,
"grad_norm": 25.11741065979004,
"learning_rate": 1.2492339121552605e-05,
"loss": 2.6875,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_nli-pairs_loss": 2.479051113128662,
"eval_nli-pairs_runtime": 3.9943,
"eval_nli-pairs_samples_per_second": 37.553,
"eval_nli-pairs_steps_per_second": 1.252,
"eval_sts-test_pearson_cosine": 0.7278742453545186,
"eval_sts-test_pearson_dot": 0.6217650825208566,
"eval_sts-test_pearson_euclidean": 0.7243228472931561,
"eval_sts-test_pearson_manhattan": 0.7333297580184588,
"eval_sts-test_pearson_max": 0.7333297580184588,
"eval_sts-test_spearman_cosine": 0.7013110457844404,
"eval_sts-test_spearman_dot": 0.5970993074902947,
"eval_sts-test_spearman_euclidean": 0.701564129266252,
"eval_sts-test_spearman_manhattan": 0.7116482009924582,
"eval_sts-test_spearman_max": 0.7116482009924582,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_vitaminc-pairs_loss": 1.974273681640625,
"eval_vitaminc-pairs_runtime": 2.1754,
"eval_vitaminc-pairs_samples_per_second": 68.953,
"eval_vitaminc-pairs_steps_per_second": 2.298,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_qnli-contrastive_loss": 1.7706010341644287,
"eval_qnli-contrastive_runtime": 0.4866,
"eval_qnli-contrastive_samples_per_second": 308.244,
"eval_qnli-contrastive_steps_per_second": 10.275,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_scitail-pairs-qa_loss": 0.4400452673435211,
"eval_scitail-pairs-qa_runtime": 1.1519,
"eval_scitail-pairs-qa_samples_per_second": 130.222,
"eval_scitail-pairs-qa_steps_per_second": 4.341,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_scitail-pairs-pos_loss": 1.1909903287887573,
"eval_scitail-pairs-pos_runtime": 2.1319,
"eval_scitail-pairs-pos_samples_per_second": 70.36,
"eval_scitail-pairs-pos_steps_per_second": 2.345,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_xsum-pairs_loss": 1.4811985492706299,
"eval_xsum-pairs_runtime": 2.254,
"eval_xsum-pairs_samples_per_second": 66.548,
"eval_xsum-pairs_steps_per_second": 2.218,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_compression-pairs_loss": 0.8453781008720398,
"eval_compression-pairs_runtime": 0.4401,
"eval_compression-pairs_samples_per_second": 340.826,
"eval_compression-pairs_steps_per_second": 11.361,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_sciq_pairs_loss": 8.014656066894531,
"eval_sciq_pairs_runtime": 7.0707,
"eval_sciq_pairs_samples_per_second": 21.214,
"eval_sciq_pairs_steps_per_second": 0.707,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_qasc_pairs_loss": 6.9316277503967285,
"eval_qasc_pairs_runtime": 2.0338,
"eval_qasc_pairs_samples_per_second": 73.752,
"eval_qasc_pairs_steps_per_second": 2.458,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_openbookqa_pairs_loss": 4.21690034866333,
"eval_openbookqa_pairs_runtime": 0.918,
"eval_openbookqa_pairs_samples_per_second": 112.202,
"eval_openbookqa_pairs_steps_per_second": 4.357,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_msmarco_pairs_loss": 3.0209598541259766,
"eval_msmarco_pairs_runtime": 2.7749,
"eval_msmarco_pairs_samples_per_second": 54.056,
"eval_msmarco_pairs_steps_per_second": 1.802,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_nq_pairs_loss": 2.956088066101074,
"eval_nq_pairs_runtime": 5.0024,
"eval_nq_pairs_samples_per_second": 29.986,
"eval_nq_pairs_steps_per_second": 1.0,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_trivia_pairs_loss": 3.17364501953125,
"eval_trivia_pairs_runtime": 9.4856,
"eval_trivia_pairs_samples_per_second": 15.813,
"eval_trivia_pairs_steps_per_second": 0.527,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_quora_pairs_loss": 0.763593852519989,
"eval_quora_pairs_runtime": 1.1441,
"eval_quora_pairs_samples_per_second": 131.104,
"eval_quora_pairs_steps_per_second": 4.37,
"step": 3675
},
{
"epoch": 0.625638406537283,
"eval_gooaq_pairs_loss": 2.3524909019470215,
"eval_gooaq_pairs_runtime": 2.0161,
"eval_gooaq_pairs_samples_per_second": 74.4,
"eval_gooaq_pairs_steps_per_second": 2.48,
"step": 3675
},
{
"epoch": 0.6506639427987743,
"grad_norm": 31.163997650146484,
"learning_rate": 1.2992849846782432e-05,
"loss": 2.7808,
"step": 3822
},
{
"epoch": 0.6756894790602656,
"grad_norm": 14.883658409118652,
"learning_rate": 1.3493360572012258e-05,
"loss": 2.5687,
"step": 3969
},
{
"epoch": 0.7007150153217568,
"grad_norm": 5.874042987823486,
"learning_rate": 1.3993871297242083e-05,
"loss": 2.3034,
"step": 4116
},
{
"epoch": 0.7257405515832482,
"grad_norm": 31.464054107666016,
"learning_rate": 1.4494382022471912e-05,
"loss": 2.4412,
"step": 4263
},
{
"epoch": 0.7507660878447395,
"grad_norm": 16.43915367126465,
"learning_rate": 1.4994892747701737e-05,
"loss": 2.3293,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_nli-pairs_loss": 2.3226094245910645,
"eval_nli-pairs_runtime": 4.113,
"eval_nli-pairs_samples_per_second": 36.47,
"eval_nli-pairs_steps_per_second": 1.216,
"eval_sts-test_pearson_cosine": 0.7356971966139032,
"eval_sts-test_pearson_dot": 0.6150809513049869,
"eval_sts-test_pearson_euclidean": 0.7330733579988641,
"eval_sts-test_pearson_manhattan": 0.7423412248131348,
"eval_sts-test_pearson_max": 0.7423412248131348,
"eval_sts-test_spearman_cosine": 0.7121899723082045,
"eval_sts-test_spearman_dot": 0.5926505936679538,
"eval_sts-test_spearman_euclidean": 0.7130179905407037,
"eval_sts-test_spearman_manhattan": 0.7227257562995023,
"eval_sts-test_spearman_max": 0.7227257562995023,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_vitaminc-pairs_loss": 1.7956713438034058,
"eval_vitaminc-pairs_runtime": 2.174,
"eval_vitaminc-pairs_samples_per_second": 68.996,
"eval_vitaminc-pairs_steps_per_second": 2.3,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_qnli-contrastive_loss": 1.0078614950180054,
"eval_qnli-contrastive_runtime": 0.4874,
"eval_qnli-contrastive_samples_per_second": 307.763,
"eval_qnli-contrastive_steps_per_second": 10.259,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_scitail-pairs-qa_loss": 0.36971578001976013,
"eval_scitail-pairs-qa_runtime": 1.164,
"eval_scitail-pairs-qa_samples_per_second": 128.863,
"eval_scitail-pairs-qa_steps_per_second": 4.295,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_scitail-pairs-pos_loss": 1.0497769117355347,
"eval_scitail-pairs-pos_runtime": 2.1205,
"eval_scitail-pairs-pos_samples_per_second": 70.74,
"eval_scitail-pairs-pos_steps_per_second": 2.358,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_xsum-pairs_loss": 1.1691261529922485,
"eval_xsum-pairs_runtime": 2.259,
"eval_xsum-pairs_samples_per_second": 66.401,
"eval_xsum-pairs_steps_per_second": 2.213,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_compression-pairs_loss": 0.5027483105659485,
"eval_compression-pairs_runtime": 0.4403,
"eval_compression-pairs_samples_per_second": 340.682,
"eval_compression-pairs_steps_per_second": 11.356,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_sciq_pairs_loss": 7.823739528656006,
"eval_sciq_pairs_runtime": 7.0738,
"eval_sciq_pairs_samples_per_second": 21.205,
"eval_sciq_pairs_steps_per_second": 0.707,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_qasc_pairs_loss": 6.404655933380127,
"eval_qasc_pairs_runtime": 2.0346,
"eval_qasc_pairs_samples_per_second": 73.723,
"eval_qasc_pairs_steps_per_second": 2.457,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_openbookqa_pairs_loss": 3.857389211654663,
"eval_openbookqa_pairs_runtime": 0.8544,
"eval_openbookqa_pairs_samples_per_second": 120.547,
"eval_openbookqa_pairs_steps_per_second": 4.681,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_msmarco_pairs_loss": 2.7028510570526123,
"eval_msmarco_pairs_runtime": 2.7448,
"eval_msmarco_pairs_samples_per_second": 54.649,
"eval_msmarco_pairs_steps_per_second": 1.822,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_nq_pairs_loss": 2.679351329803467,
"eval_nq_pairs_runtime": 5.067,
"eval_nq_pairs_samples_per_second": 29.603,
"eval_nq_pairs_steps_per_second": 0.987,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_trivia_pairs_loss": 2.8798065185546875,
"eval_trivia_pairs_runtime": 9.5449,
"eval_trivia_pairs_samples_per_second": 15.715,
"eval_trivia_pairs_steps_per_second": 0.524,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_quora_pairs_loss": 0.6825175285339355,
"eval_quora_pairs_runtime": 1.1431,
"eval_quora_pairs_samples_per_second": 131.221,
"eval_quora_pairs_steps_per_second": 4.374,
"step": 4410
},
{
"epoch": 0.7507660878447395,
"eval_gooaq_pairs_loss": 2.0472166538238525,
"eval_gooaq_pairs_runtime": 2.0218,
"eval_gooaq_pairs_samples_per_second": 74.191,
"eval_gooaq_pairs_steps_per_second": 2.473,
"step": 4410
},
{
"epoch": 0.7757916241062308,
"grad_norm": 4.2425055503845215,
"learning_rate": 1.5495403472931565e-05,
"loss": 2.3651,
"step": 4557
},
{
"epoch": 0.8008171603677222,
"grad_norm": 22.42776107788086,
"learning_rate": 1.5995914198161388e-05,
"loss": 2.6296,
"step": 4704
},
{
"epoch": 0.8258426966292135,
"grad_norm": 21.169517517089844,
"learning_rate": 1.6496424923391215e-05,
"loss": 2.2108,
"step": 4851
},
{
"epoch": 0.8508682328907048,
"grad_norm": 23.326181411743164,
"learning_rate": 1.699693564862104e-05,
"loss": 2.1852,
"step": 4998
},
{
"epoch": 0.8758937691521961,
"grad_norm": 24.574176788330078,
"learning_rate": 1.7497446373850868e-05,
"loss": 2.2944,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_nli-pairs_loss": 2.0634915828704834,
"eval_nli-pairs_runtime": 4.0019,
"eval_nli-pairs_samples_per_second": 37.482,
"eval_nli-pairs_steps_per_second": 1.249,
"eval_sts-test_pearson_cosine": 0.7466390532977636,
"eval_sts-test_pearson_dot": 0.612259458274589,
"eval_sts-test_pearson_euclidean": 0.7432536346376271,
"eval_sts-test_pearson_manhattan": 0.7500490179501229,
"eval_sts-test_pearson_max": 0.7500490179501229,
"eval_sts-test_spearman_cosine": 0.728273260456201,
"eval_sts-test_spearman_dot": 0.5960115087190596,
"eval_sts-test_spearman_euclidean": 0.7272394395622148,
"eval_sts-test_spearman_manhattan": 0.7334149564445704,
"eval_sts-test_spearman_max": 0.7334149564445704,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_vitaminc-pairs_loss": 1.638654112815857,
"eval_vitaminc-pairs_runtime": 2.1637,
"eval_vitaminc-pairs_samples_per_second": 69.327,
"eval_vitaminc-pairs_steps_per_second": 2.311,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_qnli-contrastive_loss": 0.9639705419540405,
"eval_qnli-contrastive_runtime": 0.4889,
"eval_qnli-contrastive_samples_per_second": 306.825,
"eval_qnli-contrastive_steps_per_second": 10.228,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_scitail-pairs-qa_loss": 0.31595128774642944,
"eval_scitail-pairs-qa_runtime": 1.1467,
"eval_scitail-pairs-qa_samples_per_second": 130.806,
"eval_scitail-pairs-qa_steps_per_second": 4.36,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_scitail-pairs-pos_loss": 0.9187478423118591,
"eval_scitail-pairs-pos_runtime": 2.1273,
"eval_scitail-pairs-pos_samples_per_second": 70.512,
"eval_scitail-pairs-pos_steps_per_second": 2.35,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_xsum-pairs_loss": 1.060194492340088,
"eval_xsum-pairs_runtime": 2.2836,
"eval_xsum-pairs_samples_per_second": 65.686,
"eval_xsum-pairs_steps_per_second": 2.19,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_compression-pairs_loss": 0.41078585386276245,
"eval_compression-pairs_runtime": 0.4434,
"eval_compression-pairs_samples_per_second": 338.276,
"eval_compression-pairs_steps_per_second": 11.276,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_sciq_pairs_loss": 7.577760696411133,
"eval_sciq_pairs_runtime": 7.1025,
"eval_sciq_pairs_samples_per_second": 21.119,
"eval_sciq_pairs_steps_per_second": 0.704,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_qasc_pairs_loss": 6.353766918182373,
"eval_qasc_pairs_runtime": 2.0113,
"eval_qasc_pairs_samples_per_second": 74.58,
"eval_qasc_pairs_steps_per_second": 2.486,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_openbookqa_pairs_loss": 3.7140932083129883,
"eval_openbookqa_pairs_runtime": 0.8529,
"eval_openbookqa_pairs_samples_per_second": 120.762,
"eval_openbookqa_pairs_steps_per_second": 4.69,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_msmarco_pairs_loss": 2.3862576484680176,
"eval_msmarco_pairs_runtime": 2.8953,
"eval_msmarco_pairs_samples_per_second": 51.808,
"eval_msmarco_pairs_steps_per_second": 1.727,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_nq_pairs_loss": 2.3543190956115723,
"eval_nq_pairs_runtime": 5.0048,
"eval_nq_pairs_samples_per_second": 29.971,
"eval_nq_pairs_steps_per_second": 0.999,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_trivia_pairs_loss": 2.494807481765747,
"eval_trivia_pairs_runtime": 9.5513,
"eval_trivia_pairs_samples_per_second": 15.705,
"eval_trivia_pairs_steps_per_second": 0.523,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_quora_pairs_loss": 0.6137441992759705,
"eval_quora_pairs_runtime": 1.1541,
"eval_quora_pairs_samples_per_second": 129.967,
"eval_quora_pairs_steps_per_second": 4.332,
"step": 5145
},
{
"epoch": 0.8758937691521961,
"eval_gooaq_pairs_loss": 1.8279658555984497,
"eval_gooaq_pairs_runtime": 2.0951,
"eval_gooaq_pairs_samples_per_second": 71.595,
"eval_gooaq_pairs_steps_per_second": 2.387,
"step": 5145
},
{
"epoch": 0.9009193054136875,
"grad_norm": 10.590804100036621,
"learning_rate": 1.7997957099080695e-05,
"loss": 2.2133,
"step": 5292
},
{
"epoch": 0.9259448416751788,
"grad_norm": 18.527711868286133,
"learning_rate": 1.849846782431052e-05,
"loss": 2.2255,
"step": 5439
},
{
"epoch": 0.95097037793667,
"grad_norm": 2.617710828781128,
"learning_rate": 1.8995573714674838e-05,
"loss": 2.3502,
"step": 5586
},
{
"epoch": 0.9759959141981613,
"grad_norm": 19.551551818847656,
"learning_rate": 1.9496084439904668e-05,
"loss": 1.8964,
"step": 5733
},
{
"epoch": 1.0010214504596526,
"grad_norm": 11.783225059509277,
"learning_rate": 1.999319033026898e-05,
"loss": 1.913,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_nli-pairs_loss": 1.9677053689956665,
"eval_nli-pairs_runtime": 4.3863,
"eval_nli-pairs_samples_per_second": 34.198,
"eval_nli-pairs_steps_per_second": 1.14,
"eval_sts-test_pearson_cosine": 0.7531824359441671,
"eval_sts-test_pearson_dot": 0.602579906515822,
"eval_sts-test_pearson_euclidean": 0.7486763477944213,
"eval_sts-test_pearson_manhattan": 0.7566220287347274,
"eval_sts-test_pearson_max": 0.7566220287347274,
"eval_sts-test_spearman_cosine": 0.7387792578665129,
"eval_sts-test_spearman_dot": 0.5926594656319394,
"eval_sts-test_spearman_euclidean": 0.733653805383597,
"eval_sts-test_spearman_manhattan": 0.7420657558603486,
"eval_sts-test_spearman_max": 0.7420657558603486,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_vitaminc-pairs_loss": 1.4394291639328003,
"eval_vitaminc-pairs_runtime": 2.2575,
"eval_vitaminc-pairs_samples_per_second": 66.446,
"eval_vitaminc-pairs_steps_per_second": 2.215,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_qnli-contrastive_loss": 0.45715218782424927,
"eval_qnli-contrastive_runtime": 0.501,
"eval_qnli-contrastive_samples_per_second": 299.385,
"eval_qnli-contrastive_steps_per_second": 9.979,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_scitail-pairs-qa_loss": 0.26679515838623047,
"eval_scitail-pairs-qa_runtime": 1.4342,
"eval_scitail-pairs-qa_samples_per_second": 104.587,
"eval_scitail-pairs-qa_steps_per_second": 3.486,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_scitail-pairs-pos_loss": 0.8628473281860352,
"eval_scitail-pairs-pos_runtime": 2.3485,
"eval_scitail-pairs-pos_samples_per_second": 63.871,
"eval_scitail-pairs-pos_steps_per_second": 2.129,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_xsum-pairs_loss": 0.9014443755149841,
"eval_xsum-pairs_runtime": 2.2896,
"eval_xsum-pairs_samples_per_second": 65.513,
"eval_xsum-pairs_steps_per_second": 2.184,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_compression-pairs_loss": 0.3047434389591217,
"eval_compression-pairs_runtime": 0.4852,
"eval_compression-pairs_samples_per_second": 309.163,
"eval_compression-pairs_steps_per_second": 10.305,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_sciq_pairs_loss": 1.091601848602295,
"eval_sciq_pairs_runtime": 7.3046,
"eval_sciq_pairs_samples_per_second": 20.535,
"eval_sciq_pairs_steps_per_second": 0.684,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_qasc_pairs_loss": 5.947833061218262,
"eval_qasc_pairs_runtime": 2.1787,
"eval_qasc_pairs_samples_per_second": 68.849,
"eval_qasc_pairs_steps_per_second": 2.295,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_openbookqa_pairs_loss": 3.4724366664886475,
"eval_openbookqa_pairs_runtime": 0.9106,
"eval_openbookqa_pairs_samples_per_second": 113.111,
"eval_openbookqa_pairs_steps_per_second": 4.393,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_msmarco_pairs_loss": 2.1638240814208984,
"eval_msmarco_pairs_runtime": 2.82,
"eval_msmarco_pairs_samples_per_second": 53.191,
"eval_msmarco_pairs_steps_per_second": 1.773,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_nq_pairs_loss": 2.110903739929199,
"eval_nq_pairs_runtime": 5.2303,
"eval_nq_pairs_samples_per_second": 28.679,
"eval_nq_pairs_steps_per_second": 0.956,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_trivia_pairs_loss": 2.3711097240448,
"eval_trivia_pairs_runtime": 9.6247,
"eval_trivia_pairs_samples_per_second": 15.585,
"eval_trivia_pairs_steps_per_second": 0.519,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_quora_pairs_loss": 0.5216041803359985,
"eval_quora_pairs_runtime": 1.3072,
"eval_quora_pairs_samples_per_second": 114.749,
"eval_quora_pairs_steps_per_second": 3.825,
"step": 5880
},
{
"epoch": 1.0010214504596526,
"eval_gooaq_pairs_loss": 1.7041363716125488,
"eval_gooaq_pairs_runtime": 2.0973,
"eval_gooaq_pairs_samples_per_second": 71.521,
"eval_gooaq_pairs_steps_per_second": 2.384,
"step": 5880
},
{
"epoch": 1.026046986721144,
"grad_norm": 17.308378219604492,
"learning_rate": 2.0493701055498808e-05,
"loss": 1.7772,
"step": 6027
},
{
"epoch": 1.0510725229826354,
"grad_norm": 20.248981475830078,
"learning_rate": 2.0994211780728634e-05,
"loss": 1.9079,
"step": 6174
},
{
"epoch": 1.0760980592441267,
"grad_norm": 6.012618064880371,
"learning_rate": 2.1494722505958464e-05,
"loss": 1.8657,
"step": 6321
},
{
"epoch": 1.101123595505618,
"grad_norm": 1.1185024976730347,
"learning_rate": 2.1995233231188288e-05,
"loss": 1.7144,
"step": 6468
},
{
"epoch": 1.1261491317671093,
"grad_norm": 1.2436251640319824,
"learning_rate": 2.2495743956418114e-05,
"loss": 1.7661,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_nli-pairs_loss": 1.7907973527908325,
"eval_nli-pairs_runtime": 4.0147,
"eval_nli-pairs_samples_per_second": 37.363,
"eval_nli-pairs_steps_per_second": 1.245,
"eval_sts-test_pearson_cosine": 0.755444461779583,
"eval_sts-test_pearson_dot": 0.5833168145328357,
"eval_sts-test_pearson_euclidean": 0.7437155007996056,
"eval_sts-test_pearson_manhattan": 0.7524938984567344,
"eval_sts-test_pearson_max": 0.755444461779583,
"eval_sts-test_spearman_cosine": 0.7446166596886566,
"eval_sts-test_spearman_dot": 0.5792340720766105,
"eval_sts-test_spearman_euclidean": 0.7317285388028532,
"eval_sts-test_spearman_manhattan": 0.7401637904976945,
"eval_sts-test_spearman_max": 0.7446166596886566,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_vitaminc-pairs_loss": 1.3403607606887817,
"eval_vitaminc-pairs_runtime": 2.168,
"eval_vitaminc-pairs_samples_per_second": 69.189,
"eval_vitaminc-pairs_steps_per_second": 2.306,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_qnli-contrastive_loss": 0.2736852467060089,
"eval_qnli-contrastive_runtime": 0.4913,
"eval_qnli-contrastive_samples_per_second": 305.336,
"eval_qnli-contrastive_steps_per_second": 10.178,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_scitail-pairs-qa_loss": 0.22441554069519043,
"eval_scitail-pairs-qa_runtime": 1.1614,
"eval_scitail-pairs-qa_samples_per_second": 129.152,
"eval_scitail-pairs-qa_steps_per_second": 4.305,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_scitail-pairs-pos_loss": 0.7723743915557861,
"eval_scitail-pairs-pos_runtime": 2.1567,
"eval_scitail-pairs-pos_samples_per_second": 69.55,
"eval_scitail-pairs-pos_steps_per_second": 2.318,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_xsum-pairs_loss": 0.8370540142059326,
"eval_xsum-pairs_runtime": 2.2569,
"eval_xsum-pairs_samples_per_second": 66.463,
"eval_xsum-pairs_steps_per_second": 2.215,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_compression-pairs_loss": 0.265947163105011,
"eval_compression-pairs_runtime": 0.4431,
"eval_compression-pairs_samples_per_second": 338.529,
"eval_compression-pairs_steps_per_second": 11.284,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_sciq_pairs_loss": 0.9383512735366821,
"eval_sciq_pairs_runtime": 7.1464,
"eval_sciq_pairs_samples_per_second": 20.99,
"eval_sciq_pairs_steps_per_second": 0.7,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_qasc_pairs_loss": 5.753899097442627,
"eval_qasc_pairs_runtime": 2.0099,
"eval_qasc_pairs_samples_per_second": 74.63,
"eval_qasc_pairs_steps_per_second": 2.488,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_openbookqa_pairs_loss": 3.3517918586730957,
"eval_openbookqa_pairs_runtime": 0.8594,
"eval_openbookqa_pairs_samples_per_second": 119.858,
"eval_openbookqa_pairs_steps_per_second": 4.655,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_msmarco_pairs_loss": 2.044360399246216,
"eval_msmarco_pairs_runtime": 2.7431,
"eval_msmarco_pairs_samples_per_second": 54.682,
"eval_msmarco_pairs_steps_per_second": 1.823,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_nq_pairs_loss": 1.9409464597702026,
"eval_nq_pairs_runtime": 5.028,
"eval_nq_pairs_samples_per_second": 29.833,
"eval_nq_pairs_steps_per_second": 0.994,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_trivia_pairs_loss": 2.369060754776001,
"eval_trivia_pairs_runtime": 9.5137,
"eval_trivia_pairs_samples_per_second": 15.767,
"eval_trivia_pairs_steps_per_second": 0.526,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_quora_pairs_loss": 0.47849634289741516,
"eval_quora_pairs_runtime": 1.1413,
"eval_quora_pairs_samples_per_second": 131.424,
"eval_quora_pairs_steps_per_second": 4.381,
"step": 6615
},
{
"epoch": 1.1261491317671093,
"eval_gooaq_pairs_loss": 1.5795674324035645,
"eval_gooaq_pairs_runtime": 2.0155,
"eval_gooaq_pairs_samples_per_second": 74.422,
"eval_gooaq_pairs_steps_per_second": 2.481,
"step": 6615
},
{
"epoch": 1.1511746680286006,
"grad_norm": 20.95261001586914,
"learning_rate": 2.299625468164794e-05,
"loss": 1.8066,
"step": 6762
},
{
"epoch": 1.1762002042900919,
"grad_norm": 20.31597900390625,
"learning_rate": 2.3496765406877764e-05,
"loss": 1.7438,
"step": 6909
},
{
"epoch": 1.2012257405515832,
"grad_norm": 28.363882064819336,
"learning_rate": 2.399727613210759e-05,
"loss": 2.0231,
"step": 7056
},
{
"epoch": 1.2262512768130747,
"grad_norm": 14.403656959533691,
"learning_rate": 2.449778685733742e-05,
"loss": 1.8966,
"step": 7203
},
{
"epoch": 1.251276813074566,
"grad_norm": 17.73562240600586,
"learning_rate": 2.4998297582567248e-05,
"loss": 1.7958,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_nli-pairs_loss": 1.5906368494033813,
"eval_nli-pairs_runtime": 4.0261,
"eval_nli-pairs_samples_per_second": 37.257,
"eval_nli-pairs_steps_per_second": 1.242,
"eval_sts-test_pearson_cosine": 0.7626661521495873,
"eval_sts-test_pearson_dot": 0.5632604768989181,
"eval_sts-test_pearson_euclidean": 0.7370060575260952,
"eval_sts-test_pearson_manhattan": 0.7472706980613159,
"eval_sts-test_pearson_max": 0.7626661521495873,
"eval_sts-test_spearman_cosine": 0.7535266725567149,
"eval_sts-test_spearman_dot": 0.5848997224802808,
"eval_sts-test_spearman_euclidean": 0.7290608032903477,
"eval_sts-test_spearman_manhattan": 0.739032087078249,
"eval_sts-test_spearman_max": 0.7535266725567149,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_vitaminc-pairs_loss": 1.222551941871643,
"eval_vitaminc-pairs_runtime": 2.1784,
"eval_vitaminc-pairs_samples_per_second": 68.857,
"eval_vitaminc-pairs_steps_per_second": 2.295,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_qnli-contrastive_loss": 0.3951484262943268,
"eval_qnli-contrastive_runtime": 0.4916,
"eval_qnli-contrastive_samples_per_second": 305.11,
"eval_qnli-contrastive_steps_per_second": 10.17,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_scitail-pairs-qa_loss": 0.17783091962337494,
"eval_scitail-pairs-qa_runtime": 1.1549,
"eval_scitail-pairs-qa_samples_per_second": 129.88,
"eval_scitail-pairs-qa_steps_per_second": 4.329,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_scitail-pairs-pos_loss": 0.7214661836624146,
"eval_scitail-pairs-pos_runtime": 2.132,
"eval_scitail-pairs-pos_samples_per_second": 70.357,
"eval_scitail-pairs-pos_steps_per_second": 2.345,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_xsum-pairs_loss": 0.7919928431510925,
"eval_xsum-pairs_runtime": 2.2579,
"eval_xsum-pairs_samples_per_second": 66.432,
"eval_xsum-pairs_steps_per_second": 2.214,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_compression-pairs_loss": 0.24975377321243286,
"eval_compression-pairs_runtime": 0.447,
"eval_compression-pairs_samples_per_second": 335.534,
"eval_compression-pairs_steps_per_second": 11.184,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_sciq_pairs_loss": 0.8343773484230042,
"eval_sciq_pairs_runtime": 7.1288,
"eval_sciq_pairs_samples_per_second": 21.042,
"eval_sciq_pairs_steps_per_second": 0.701,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_qasc_pairs_loss": 5.4840240478515625,
"eval_qasc_pairs_runtime": 2.025,
"eval_qasc_pairs_samples_per_second": 74.074,
"eval_qasc_pairs_steps_per_second": 2.469,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_openbookqa_pairs_loss": 3.1631176471710205,
"eval_openbookqa_pairs_runtime": 0.8612,
"eval_openbookqa_pairs_samples_per_second": 119.598,
"eval_openbookqa_pairs_steps_per_second": 4.645,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_msmarco_pairs_loss": 1.8952231407165527,
"eval_msmarco_pairs_runtime": 2.7585,
"eval_msmarco_pairs_samples_per_second": 54.378,
"eval_msmarco_pairs_steps_per_second": 1.813,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_nq_pairs_loss": 1.6934970617294312,
"eval_nq_pairs_runtime": 5.0253,
"eval_nq_pairs_samples_per_second": 29.849,
"eval_nq_pairs_steps_per_second": 0.995,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_trivia_pairs_loss": 1.9966663122177124,
"eval_trivia_pairs_runtime": 9.5675,
"eval_trivia_pairs_samples_per_second": 15.678,
"eval_trivia_pairs_steps_per_second": 0.523,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_quora_pairs_loss": 0.405385285615921,
"eval_quora_pairs_runtime": 1.1432,
"eval_quora_pairs_samples_per_second": 131.209,
"eval_quora_pairs_steps_per_second": 4.374,
"step": 7350
},
{
"epoch": 1.251276813074566,
"eval_gooaq_pairs_loss": 1.3951071500778198,
"eval_gooaq_pairs_runtime": 2.038,
"eval_gooaq_pairs_samples_per_second": 73.601,
"eval_gooaq_pairs_steps_per_second": 2.453,
"step": 7350
},
{
"epoch": 1.2763023493360572,
"grad_norm": 21.254159927368164,
"learning_rate": 2.549880830779707e-05,
"loss": 1.5109,
"step": 7497
},
{
"epoch": 1.3013278855975485,
"grad_norm": 20.08012580871582,
"learning_rate": 2.5999319033026898e-05,
"loss": 1.8119,
"step": 7644
},
{
"epoch": 1.3263534218590398,
"grad_norm": 0.6448306441307068,
"learning_rate": 2.6499829758256724e-05,
"loss": 1.6833,
"step": 7791
},
{
"epoch": 1.351378958120531,
"grad_norm": 16.65821647644043,
"learning_rate": 2.7000340483486554e-05,
"loss": 1.5917,
"step": 7938
},
{
"epoch": 1.3764044943820224,
"grad_norm": 14.949362754821777,
"learning_rate": 2.7500851208716378e-05,
"loss": 1.809,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_nli-pairs_loss": 1.5967836380004883,
"eval_nli-pairs_runtime": 4.0496,
"eval_nli-pairs_samples_per_second": 37.041,
"eval_nli-pairs_steps_per_second": 1.235,
"eval_sts-test_pearson_cosine": 0.7653416933913197,
"eval_sts-test_pearson_dot": 0.5401711611334493,
"eval_sts-test_pearson_euclidean": 0.7529907774019836,
"eval_sts-test_pearson_manhattan": 0.7605105025260754,
"eval_sts-test_pearson_max": 0.7653416933913197,
"eval_sts-test_spearman_cosine": 0.7593865234485873,
"eval_sts-test_spearman_dot": 0.5559615063301898,
"eval_sts-test_spearman_euclidean": 0.7436431053840061,
"eval_sts-test_spearman_manhattan": 0.7515978828464567,
"eval_sts-test_spearman_max": 0.7593865234485873,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_vitaminc-pairs_loss": 1.1434590816497803,
"eval_vitaminc-pairs_runtime": 2.2066,
"eval_vitaminc-pairs_samples_per_second": 67.977,
"eval_vitaminc-pairs_steps_per_second": 2.266,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_qnli-contrastive_loss": 0.3819103538990021,
"eval_qnli-contrastive_runtime": 0.4972,
"eval_qnli-contrastive_samples_per_second": 301.706,
"eval_qnli-contrastive_steps_per_second": 10.057,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_scitail-pairs-qa_loss": 0.15774373710155487,
"eval_scitail-pairs-qa_runtime": 1.1704,
"eval_scitail-pairs-qa_samples_per_second": 128.161,
"eval_scitail-pairs-qa_steps_per_second": 4.272,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_scitail-pairs-pos_loss": 0.6571963429450989,
"eval_scitail-pairs-pos_runtime": 2.1634,
"eval_scitail-pairs-pos_samples_per_second": 69.335,
"eval_scitail-pairs-pos_steps_per_second": 2.311,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_xsum-pairs_loss": 0.7028753757476807,
"eval_xsum-pairs_runtime": 2.2608,
"eval_xsum-pairs_samples_per_second": 66.347,
"eval_xsum-pairs_steps_per_second": 2.212,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_compression-pairs_loss": 0.23010987043380737,
"eval_compression-pairs_runtime": 0.4514,
"eval_compression-pairs_samples_per_second": 332.284,
"eval_compression-pairs_steps_per_second": 11.076,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_sciq_pairs_loss": 0.799666702747345,
"eval_sciq_pairs_runtime": 7.1816,
"eval_sciq_pairs_samples_per_second": 20.887,
"eval_sciq_pairs_steps_per_second": 0.696,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_qasc_pairs_loss": 5.433376789093018,
"eval_qasc_pairs_runtime": 2.0592,
"eval_qasc_pairs_samples_per_second": 72.843,
"eval_qasc_pairs_steps_per_second": 2.428,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_openbookqa_pairs_loss": 2.9010672569274902,
"eval_openbookqa_pairs_runtime": 0.865,
"eval_openbookqa_pairs_samples_per_second": 119.074,
"eval_openbookqa_pairs_steps_per_second": 4.624,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_msmarco_pairs_loss": 1.7567836046218872,
"eval_msmarco_pairs_runtime": 2.7812,
"eval_msmarco_pairs_samples_per_second": 53.933,
"eval_msmarco_pairs_steps_per_second": 1.798,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_nq_pairs_loss": 1.5407707691192627,
"eval_nq_pairs_runtime": 5.0607,
"eval_nq_pairs_samples_per_second": 29.64,
"eval_nq_pairs_steps_per_second": 0.988,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_trivia_pairs_loss": 1.8419283628463745,
"eval_trivia_pairs_runtime": 9.5535,
"eval_trivia_pairs_samples_per_second": 15.701,
"eval_trivia_pairs_steps_per_second": 0.523,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_quora_pairs_loss": 0.3561370372772217,
"eval_quora_pairs_runtime": 1.2005,
"eval_quora_pairs_samples_per_second": 124.946,
"eval_quora_pairs_steps_per_second": 4.165,
"step": 8085
},
{
"epoch": 1.3764044943820224,
"eval_gooaq_pairs_loss": 1.1745914220809937,
"eval_gooaq_pairs_runtime": 2.0463,
"eval_gooaq_pairs_samples_per_second": 73.305,
"eval_gooaq_pairs_steps_per_second": 2.443,
"step": 8085
},
{
"epoch": 1.401430030643514,
"grad_norm": 14.31106185913086,
"learning_rate": 2.8001361933946204e-05,
"loss": 1.5561,
"step": 8232
},
{
"epoch": 1.4264555669050052,
"grad_norm": 11.82392692565918,
"learning_rate": 2.850187265917603e-05,
"loss": 1.5325,
"step": 8379
},
{
"epoch": 1.4514811031664965,
"grad_norm": 21.716449737548828,
"learning_rate": 2.9002383384405858e-05,
"loss": 1.5085,
"step": 8526
},
{
"epoch": 1.4765066394279878,
"grad_norm": 6.5607147216796875,
"learning_rate": 2.950289410963568e-05,
"loss": 1.5634,
"step": 8673
},
{
"epoch": 1.501532175689479,
"grad_norm": 8.737595558166504,
"learning_rate": 2.9999998423842776e-05,
"loss": 1.3857,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_nli-pairs_loss": 1.454946517944336,
"eval_nli-pairs_runtime": 4.3786,
"eval_nli-pairs_samples_per_second": 34.257,
"eval_nli-pairs_steps_per_second": 1.142,
"eval_sts-test_pearson_cosine": 0.758856517299588,
"eval_sts-test_pearson_dot": 0.5254244903711445,
"eval_sts-test_pearson_euclidean": 0.7467439510002647,
"eval_sts-test_pearson_manhattan": 0.7525779346304055,
"eval_sts-test_pearson_max": 0.758856517299588,
"eval_sts-test_spearman_cosine": 0.7596605816446022,
"eval_sts-test_spearman_dot": 0.5600186533991508,
"eval_sts-test_spearman_euclidean": 0.7367598380547504,
"eval_sts-test_spearman_manhattan": 0.7440123650923844,
"eval_sts-test_spearman_max": 0.7596605816446022,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_vitaminc-pairs_loss": 1.1383781433105469,
"eval_vitaminc-pairs_runtime": 2.3314,
"eval_vitaminc-pairs_samples_per_second": 64.34,
"eval_vitaminc-pairs_steps_per_second": 2.145,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_qnli-contrastive_loss": 0.32092100381851196,
"eval_qnli-contrastive_runtime": 0.5002,
"eval_qnli-contrastive_samples_per_second": 299.881,
"eval_qnli-contrastive_steps_per_second": 9.996,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_scitail-pairs-qa_loss": 0.14513270556926727,
"eval_scitail-pairs-qa_runtime": 1.5154,
"eval_scitail-pairs-qa_samples_per_second": 98.985,
"eval_scitail-pairs-qa_steps_per_second": 3.3,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_scitail-pairs-pos_loss": 0.6857669353485107,
"eval_scitail-pairs-pos_runtime": 2.4178,
"eval_scitail-pairs-pos_samples_per_second": 62.041,
"eval_scitail-pairs-pos_steps_per_second": 2.068,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_xsum-pairs_loss": 0.683724045753479,
"eval_xsum-pairs_runtime": 2.2766,
"eval_xsum-pairs_samples_per_second": 65.887,
"eval_xsum-pairs_steps_per_second": 2.196,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_compression-pairs_loss": 0.20896266400814056,
"eval_compression-pairs_runtime": 0.4683,
"eval_compression-pairs_samples_per_second": 320.274,
"eval_compression-pairs_steps_per_second": 10.676,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_sciq_pairs_loss": 0.7911179661750793,
"eval_sciq_pairs_runtime": 7.3506,
"eval_sciq_pairs_samples_per_second": 20.407,
"eval_sciq_pairs_steps_per_second": 0.68,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_qasc_pairs_loss": 5.3092241287231445,
"eval_qasc_pairs_runtime": 2.1926,
"eval_qasc_pairs_samples_per_second": 68.411,
"eval_qasc_pairs_steps_per_second": 2.28,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_openbookqa_pairs_loss": 2.923464298248291,
"eval_openbookqa_pairs_runtime": 0.963,
"eval_openbookqa_pairs_samples_per_second": 106.961,
"eval_openbookqa_pairs_steps_per_second": 4.154,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_msmarco_pairs_loss": 1.674107313156128,
"eval_msmarco_pairs_runtime": 2.8516,
"eval_msmarco_pairs_samples_per_second": 52.602,
"eval_msmarco_pairs_steps_per_second": 1.753,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_nq_pairs_loss": 1.4419037103652954,
"eval_nq_pairs_runtime": 5.1485,
"eval_nq_pairs_samples_per_second": 29.135,
"eval_nq_pairs_steps_per_second": 0.971,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_trivia_pairs_loss": 1.7546964883804321,
"eval_trivia_pairs_runtime": 9.6901,
"eval_trivia_pairs_samples_per_second": 15.48,
"eval_trivia_pairs_steps_per_second": 0.516,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_quora_pairs_loss": 0.31785744428634644,
"eval_quora_pairs_runtime": 1.2699,
"eval_quora_pairs_samples_per_second": 118.117,
"eval_quora_pairs_steps_per_second": 3.937,
"step": 8820
},
{
"epoch": 1.501532175689479,
"eval_gooaq_pairs_loss": 1.1328644752502441,
"eval_gooaq_pairs_runtime": 2.1292,
"eval_gooaq_pairs_samples_per_second": 70.448,
"eval_gooaq_pairs_steps_per_second": 2.348,
"step": 8820
},
{
"epoch": 1.5265577119509703,
"grad_norm": 15.168123245239258,
"learning_rate": 2.9965489092992677e-05,
"loss": 1.6167,
"step": 8967
},
{
"epoch": 1.5515832482124616,
"grad_norm": 29.32268524169922,
"learning_rate": 2.9863043834895476e-05,
"loss": 1.6664,
"step": 9114
},
{
"epoch": 1.5766087844739531,
"grad_norm": 2.5159287452697754,
"learning_rate": 2.9693127700413034e-05,
"loss": 1.4785,
"step": 9261
},
{
"epoch": 1.6016343207354442,
"grad_norm": 17.4219970703125,
"learning_rate": 2.9456512024854113e-05,
"loss": 1.5881,
"step": 9408
},
{
"epoch": 1.6266598569969357,
"grad_norm": 15.60139274597168,
"learning_rate": 2.915427092649312e-05,
"loss": 1.3379,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_nli-pairs_loss": 1.3879741430282593,
"eval_nli-pairs_runtime": 4.1363,
"eval_nli-pairs_samples_per_second": 36.264,
"eval_nli-pairs_steps_per_second": 1.209,
"eval_sts-test_pearson_cosine": 0.7733483283639441,
"eval_sts-test_pearson_dot": 0.5424296843493538,
"eval_sts-test_pearson_euclidean": 0.7555770040784449,
"eval_sts-test_pearson_manhattan": 0.7604742759594404,
"eval_sts-test_pearson_max": 0.7733483283639441,
"eval_sts-test_spearman_cosine": 0.779671933510953,
"eval_sts-test_spearman_dot": 0.5784449139725663,
"eval_sts-test_spearman_euclidean": 0.7515003599642571,
"eval_sts-test_spearman_manhattan": 0.7568440288585417,
"eval_sts-test_spearman_max": 0.779671933510953,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_vitaminc-pairs_loss": 0.9942379593849182,
"eval_vitaminc-pairs_runtime": 2.2185,
"eval_vitaminc-pairs_samples_per_second": 67.613,
"eval_vitaminc-pairs_steps_per_second": 2.254,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_qnli-contrastive_loss": 0.2178214192390442,
"eval_qnli-contrastive_runtime": 0.4991,
"eval_qnli-contrastive_samples_per_second": 300.549,
"eval_qnli-contrastive_steps_per_second": 10.018,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_scitail-pairs-qa_loss": 0.13629749417304993,
"eval_scitail-pairs-qa_runtime": 1.1751,
"eval_scitail-pairs-qa_samples_per_second": 127.653,
"eval_scitail-pairs-qa_steps_per_second": 4.255,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_scitail-pairs-pos_loss": 0.5964671969413757,
"eval_scitail-pairs-pos_runtime": 2.1841,
"eval_scitail-pairs-pos_samples_per_second": 68.677,
"eval_scitail-pairs-pos_steps_per_second": 2.289,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_xsum-pairs_loss": 0.6746851205825806,
"eval_xsum-pairs_runtime": 2.2628,
"eval_xsum-pairs_samples_per_second": 66.291,
"eval_xsum-pairs_steps_per_second": 2.21,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_compression-pairs_loss": 0.17857055366039276,
"eval_compression-pairs_runtime": 0.4506,
"eval_compression-pairs_samples_per_second": 332.902,
"eval_compression-pairs_steps_per_second": 11.097,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_sciq_pairs_loss": 0.7349148988723755,
"eval_sciq_pairs_runtime": 7.116,
"eval_sciq_pairs_samples_per_second": 21.079,
"eval_sciq_pairs_steps_per_second": 0.703,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_qasc_pairs_loss": 5.115650177001953,
"eval_qasc_pairs_runtime": 2.0271,
"eval_qasc_pairs_samples_per_second": 73.997,
"eval_qasc_pairs_steps_per_second": 2.467,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_openbookqa_pairs_loss": 2.694535255432129,
"eval_openbookqa_pairs_runtime": 0.8634,
"eval_openbookqa_pairs_samples_per_second": 119.302,
"eval_openbookqa_pairs_steps_per_second": 4.633,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_msmarco_pairs_loss": 1.5184054374694824,
"eval_msmarco_pairs_runtime": 2.7561,
"eval_msmarco_pairs_samples_per_second": 54.424,
"eval_msmarco_pairs_steps_per_second": 1.814,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_nq_pairs_loss": 1.293426752090454,
"eval_nq_pairs_runtime": 5.0107,
"eval_nq_pairs_samples_per_second": 29.936,
"eval_nq_pairs_steps_per_second": 0.998,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_trivia_pairs_loss": 1.5939557552337646,
"eval_trivia_pairs_runtime": 9.5368,
"eval_trivia_pairs_samples_per_second": 15.728,
"eval_trivia_pairs_steps_per_second": 0.524,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_quora_pairs_loss": 0.31308451294898987,
"eval_quora_pairs_runtime": 1.1456,
"eval_quora_pairs_samples_per_second": 130.932,
"eval_quora_pairs_steps_per_second": 4.364,
"step": 9555
},
{
"epoch": 1.6266598569969357,
"eval_gooaq_pairs_loss": 1.0807112455368042,
"eval_gooaq_pairs_runtime": 2.0197,
"eval_gooaq_pairs_samples_per_second": 74.269,
"eval_gooaq_pairs_steps_per_second": 2.476,
"step": 9555
},
{
"epoch": 1.651685393258427,
"grad_norm": 0.7546759843826294,
"learning_rate": 2.878777643060379e-05,
"loss": 1.4469,
"step": 9702
},
{
"epoch": 1.6767109295199183,
"grad_norm": 0.8483991026878357,
"learning_rate": 2.835869224114224e-05,
"loss": 1.3878,
"step": 9849
},
{
"epoch": 1.7017364657814096,
"grad_norm": 20.814105987548828,
"learning_rate": 2.7868966188352908e-05,
"loss": 1.2764,
"step": 9996
},
{
"epoch": 1.7267620020429009,
"grad_norm": 3.1025094985961914,
"learning_rate": 2.73208213865815e-05,
"loss": 1.3884,
"step": 10143
},
{
"epoch": 1.7517875383043924,
"grad_norm": 14.80810260772705,
"learning_rate": 2.671674614243416e-05,
"loss": 1.2977,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_nli-pairs_loss": 1.3081562519073486,
"eval_nli-pairs_runtime": 4.0165,
"eval_nli-pairs_samples_per_second": 37.346,
"eval_nli-pairs_steps_per_second": 1.245,
"eval_sts-test_pearson_cosine": 0.7681143802843627,
"eval_sts-test_pearson_dot": 0.5287526695750702,
"eval_sts-test_pearson_euclidean": 0.7538805205317111,
"eval_sts-test_pearson_manhattan": 0.7596894203751682,
"eval_sts-test_pearson_max": 0.7681143802843627,
"eval_sts-test_spearman_cosine": 0.770908506196058,
"eval_sts-test_spearman_dot": 0.5670572774538138,
"eval_sts-test_spearman_euclidean": 0.7452730842318486,
"eval_sts-test_spearman_manhattan": 0.7517699916174685,
"eval_sts-test_spearman_max": 0.770908506196058,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_vitaminc-pairs_loss": 0.9676446318626404,
"eval_vitaminc-pairs_runtime": 2.1787,
"eval_vitaminc-pairs_samples_per_second": 68.85,
"eval_vitaminc-pairs_steps_per_second": 2.295,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_qnli-contrastive_loss": 0.244391530752182,
"eval_qnli-contrastive_runtime": 0.4884,
"eval_qnli-contrastive_samples_per_second": 307.113,
"eval_qnli-contrastive_steps_per_second": 10.237,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_scitail-pairs-qa_loss": 0.1264333575963974,
"eval_scitail-pairs-qa_runtime": 1.1536,
"eval_scitail-pairs-qa_samples_per_second": 130.03,
"eval_scitail-pairs-qa_steps_per_second": 4.334,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_scitail-pairs-pos_loss": 0.5472012162208557,
"eval_scitail-pairs-pos_runtime": 2.1213,
"eval_scitail-pairs-pos_samples_per_second": 70.711,
"eval_scitail-pairs-pos_steps_per_second": 2.357,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_xsum-pairs_loss": 0.5869634747505188,
"eval_xsum-pairs_runtime": 2.2876,
"eval_xsum-pairs_samples_per_second": 65.571,
"eval_xsum-pairs_steps_per_second": 2.186,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_compression-pairs_loss": 0.16663199663162231,
"eval_compression-pairs_runtime": 0.4431,
"eval_compression-pairs_samples_per_second": 338.526,
"eval_compression-pairs_steps_per_second": 11.284,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_sciq_pairs_loss": 0.6884138584136963,
"eval_sciq_pairs_runtime": 7.0451,
"eval_sciq_pairs_samples_per_second": 21.291,
"eval_sciq_pairs_steps_per_second": 0.71,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_qasc_pairs_loss": 5.099090099334717,
"eval_qasc_pairs_runtime": 2.0309,
"eval_qasc_pairs_samples_per_second": 73.86,
"eval_qasc_pairs_steps_per_second": 2.462,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_openbookqa_pairs_loss": 2.6562159061431885,
"eval_openbookqa_pairs_runtime": 0.8531,
"eval_openbookqa_pairs_samples_per_second": 120.74,
"eval_openbookqa_pairs_steps_per_second": 4.689,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_msmarco_pairs_loss": 1.3729219436645508,
"eval_msmarco_pairs_runtime": 2.7346,
"eval_msmarco_pairs_samples_per_second": 54.853,
"eval_msmarco_pairs_steps_per_second": 1.828,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_nq_pairs_loss": 1.2174726724624634,
"eval_nq_pairs_runtime": 4.9981,
"eval_nq_pairs_samples_per_second": 30.012,
"eval_nq_pairs_steps_per_second": 1.0,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_trivia_pairs_loss": 1.5839861631393433,
"eval_trivia_pairs_runtime": 9.4611,
"eval_trivia_pairs_samples_per_second": 15.854,
"eval_trivia_pairs_steps_per_second": 0.528,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_quora_pairs_loss": 0.2804078757762909,
"eval_quora_pairs_runtime": 1.1799,
"eval_quora_pairs_samples_per_second": 127.13,
"eval_quora_pairs_steps_per_second": 4.238,
"step": 10290
},
{
"epoch": 1.7517875383043924,
"eval_gooaq_pairs_loss": 0.9541385769844055,
"eval_gooaq_pairs_runtime": 2.1014,
"eval_gooaq_pairs_samples_per_second": 71.38,
"eval_gooaq_pairs_steps_per_second": 2.379,
"step": 10290
},
{
"epoch": 1.7768130745658834,
"grad_norm": 51.763004302978516,
"learning_rate": 2.6059482659094694e-05,
"loss": 1.4422,
"step": 10437
},
{
"epoch": 1.801838610827375,
"grad_norm": 3.4887988567352295,
"learning_rate": 2.5352014588076858e-05,
"loss": 1.4997,
"step": 10584
},
{
"epoch": 1.8268641470888662,
"grad_norm": 6.360722064971924,
"learning_rate": 2.4597553484920438e-05,
"loss": 1.2797,
"step": 10731
},
{
"epoch": 1.8518896833503575,
"grad_norm": 16.216428756713867,
"learning_rate": 2.3799524230315696e-05,
"loss": 1.2362,
"step": 10878
},
{
"epoch": 1.8769152196118488,
"grad_norm": 19.113628387451172,
"learning_rate": 2.2961549482836967e-05,
"loss": 1.2799,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_nli-pairs_loss": 1.2669230699539185,
"eval_nli-pairs_runtime": 4.0111,
"eval_nli-pairs_samples_per_second": 37.396,
"eval_nli-pairs_steps_per_second": 1.247,
"eval_sts-test_pearson_cosine": 0.774489523257569,
"eval_sts-test_pearson_dot": 0.5150859135257536,
"eval_sts-test_pearson_euclidean": 0.7570251269629877,
"eval_sts-test_pearson_manhattan": 0.7623769541465137,
"eval_sts-test_pearson_max": 0.774489523257569,
"eval_sts-test_spearman_cosine": 0.7816800005074528,
"eval_sts-test_spearman_dot": 0.565603897190929,
"eval_sts-test_spearman_euclidean": 0.7507848233553155,
"eval_sts-test_spearman_manhattan": 0.756029656784038,
"eval_sts-test_spearman_max": 0.7816800005074528,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_vitaminc-pairs_loss": 0.875577986240387,
"eval_vitaminc-pairs_runtime": 2.2185,
"eval_vitaminc-pairs_samples_per_second": 67.614,
"eval_vitaminc-pairs_steps_per_second": 2.254,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_qnli-contrastive_loss": 0.23095794022083282,
"eval_qnli-contrastive_runtime": 0.4906,
"eval_qnli-contrastive_samples_per_second": 305.756,
"eval_qnli-contrastive_steps_per_second": 10.192,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_scitail-pairs-qa_loss": 0.11762743443250656,
"eval_scitail-pairs-qa_runtime": 1.1505,
"eval_scitail-pairs-qa_samples_per_second": 130.379,
"eval_scitail-pairs-qa_steps_per_second": 4.346,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_scitail-pairs-pos_loss": 0.5055103898048401,
"eval_scitail-pairs-pos_runtime": 2.1912,
"eval_scitail-pairs-pos_samples_per_second": 68.456,
"eval_scitail-pairs-pos_steps_per_second": 2.282,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_xsum-pairs_loss": 0.5941822528839111,
"eval_xsum-pairs_runtime": 2.26,
"eval_xsum-pairs_samples_per_second": 66.371,
"eval_xsum-pairs_steps_per_second": 2.212,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_compression-pairs_loss": 0.16561630368232727,
"eval_compression-pairs_runtime": 0.4447,
"eval_compression-pairs_samples_per_second": 337.281,
"eval_compression-pairs_steps_per_second": 11.243,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_sciq_pairs_loss": 0.6859617233276367,
"eval_sciq_pairs_runtime": 7.2855,
"eval_sciq_pairs_samples_per_second": 20.589,
"eval_sciq_pairs_steps_per_second": 0.686,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_qasc_pairs_loss": 4.979205131530762,
"eval_qasc_pairs_runtime": 2.0332,
"eval_qasc_pairs_samples_per_second": 73.775,
"eval_qasc_pairs_steps_per_second": 2.459,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_openbookqa_pairs_loss": 2.5103061199188232,
"eval_openbookqa_pairs_runtime": 0.8673,
"eval_openbookqa_pairs_samples_per_second": 118.755,
"eval_openbookqa_pairs_steps_per_second": 4.612,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_msmarco_pairs_loss": 1.2753304243087769,
"eval_msmarco_pairs_runtime": 2.7942,
"eval_msmarco_pairs_samples_per_second": 53.683,
"eval_msmarco_pairs_steps_per_second": 1.789,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_nq_pairs_loss": 1.057248592376709,
"eval_nq_pairs_runtime": 5.0749,
"eval_nq_pairs_samples_per_second": 29.557,
"eval_nq_pairs_steps_per_second": 0.985,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_trivia_pairs_loss": 1.4893617630004883,
"eval_trivia_pairs_runtime": 9.5535,
"eval_trivia_pairs_samples_per_second": 15.701,
"eval_trivia_pairs_steps_per_second": 0.523,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_quora_pairs_loss": 0.27783504128456116,
"eval_quora_pairs_runtime": 1.1843,
"eval_quora_pairs_samples_per_second": 126.653,
"eval_quora_pairs_steps_per_second": 4.222,
"step": 11025
},
{
"epoch": 1.8769152196118488,
"eval_gooaq_pairs_loss": 0.8971360325813293,
"eval_gooaq_pairs_runtime": 2.0278,
"eval_gooaq_pairs_samples_per_second": 73.97,
"eval_gooaq_pairs_steps_per_second": 2.466,
"step": 11025
},
{
"epoch": 1.90194075587334,
"grad_norm": 14.915979385375977,
"learning_rate": 2.2087433233862403e-05,
"loss": 1.2292,
"step": 11172
},
{
"epoch": 1.9269662921348316,
"grad_norm": 13.753366470336914,
"learning_rate": 2.118740830659258e-05,
"loss": 1.0362,
"step": 11319
},
{
"epoch": 1.9519918283963227,
"grad_norm": 8.33267593383789,
"learning_rate": 2.0259676306932596e-05,
"loss": 1.1851,
"step": 11466
},
{
"epoch": 1.9770173646578142,
"grad_norm": 0.6671110987663269,
"learning_rate": 1.9301804508269106e-05,
"loss": 1.0248,
"step": 11613
},
{
"epoch": 2.0020429009193053,
"grad_norm": 71.1603775024414,
"learning_rate": 1.8331109675851356e-05,
"loss": 1.1305,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_nli-pairs_loss": 1.1981595754623413,
"eval_nli-pairs_runtime": 4.4194,
"eval_nli-pairs_samples_per_second": 33.941,
"eval_nli-pairs_steps_per_second": 1.131,
"eval_sts-test_pearson_cosine": 0.7691168917727959,
"eval_sts-test_pearson_dot": 0.5009080741883037,
"eval_sts-test_pearson_euclidean": 0.7565965590806436,
"eval_sts-test_pearson_manhattan": 0.7607578912460005,
"eval_sts-test_pearson_max": 0.7691168917727959,
"eval_sts-test_spearman_cosine": 0.7788020160239207,
"eval_sts-test_spearman_dot": 0.5543439729717182,
"eval_sts-test_spearman_euclidean": 0.7507099854871488,
"eval_sts-test_spearman_manhattan": 0.7550850801051086,
"eval_sts-test_spearman_max": 0.7788020160239207,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_vitaminc-pairs_loss": 0.8724684715270996,
"eval_vitaminc-pairs_runtime": 2.2855,
"eval_vitaminc-pairs_samples_per_second": 65.632,
"eval_vitaminc-pairs_steps_per_second": 2.188,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_qnli-contrastive_loss": 0.1063760370016098,
"eval_qnli-contrastive_runtime": 0.5211,
"eval_qnli-contrastive_samples_per_second": 287.861,
"eval_qnli-contrastive_steps_per_second": 9.595,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_scitail-pairs-qa_loss": 0.11115950345993042,
"eval_scitail-pairs-qa_runtime": 1.2652,
"eval_scitail-pairs-qa_samples_per_second": 118.561,
"eval_scitail-pairs-qa_steps_per_second": 3.952,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_scitail-pairs-pos_loss": 0.5056447386741638,
"eval_scitail-pairs-pos_runtime": 2.3265,
"eval_scitail-pairs-pos_samples_per_second": 64.475,
"eval_scitail-pairs-pos_steps_per_second": 2.149,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_xsum-pairs_loss": 0.5417940020561218,
"eval_xsum-pairs_runtime": 2.2651,
"eval_xsum-pairs_samples_per_second": 66.224,
"eval_xsum-pairs_steps_per_second": 2.207,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_compression-pairs_loss": 0.13307414948940277,
"eval_compression-pairs_runtime": 0.4568,
"eval_compression-pairs_samples_per_second": 328.338,
"eval_compression-pairs_steps_per_second": 10.945,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_sciq_pairs_loss": 0.5748575329780579,
"eval_sciq_pairs_runtime": 7.352,
"eval_sciq_pairs_samples_per_second": 20.403,
"eval_sciq_pairs_steps_per_second": 0.68,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_qasc_pairs_loss": 4.8620710372924805,
"eval_qasc_pairs_runtime": 2.2185,
"eval_qasc_pairs_samples_per_second": 67.614,
"eval_qasc_pairs_steps_per_second": 2.254,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_openbookqa_pairs_loss": 2.3957626819610596,
"eval_openbookqa_pairs_runtime": 0.9894,
"eval_openbookqa_pairs_samples_per_second": 104.099,
"eval_openbookqa_pairs_steps_per_second": 4.043,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_msmarco_pairs_loss": 1.2221691608428955,
"eval_msmarco_pairs_runtime": 2.8364,
"eval_msmarco_pairs_samples_per_second": 52.883,
"eval_msmarco_pairs_steps_per_second": 1.763,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_nq_pairs_loss": 1.056867241859436,
"eval_nq_pairs_runtime": 5.123,
"eval_nq_pairs_samples_per_second": 29.28,
"eval_nq_pairs_steps_per_second": 0.976,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_trivia_pairs_loss": 1.5130479335784912,
"eval_trivia_pairs_runtime": 9.628,
"eval_trivia_pairs_samples_per_second": 15.58,
"eval_trivia_pairs_steps_per_second": 0.519,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_quora_pairs_loss": 0.2696760296821594,
"eval_quora_pairs_runtime": 1.2258,
"eval_quora_pairs_samples_per_second": 122.368,
"eval_quora_pairs_steps_per_second": 4.079,
"step": 11760
},
{
"epoch": 2.0020429009193053,
"eval_gooaq_pairs_loss": 0.8714584112167358,
"eval_gooaq_pairs_runtime": 2.1309,
"eval_gooaq_pairs_samples_per_second": 70.391,
"eval_gooaq_pairs_steps_per_second": 2.346,
"step": 11760
},
{
"epoch": 2.0270684371807968,
"grad_norm": 0.7177102565765381,
"learning_rate": 1.7338706161920983e-05,
"loss": 0.9284,
"step": 11907
},
{
"epoch": 2.052093973442288,
"grad_norm": 11.534607887268066,
"learning_rate": 1.633568607738064e-05,
"loss": 1.0998,
"step": 12054
},
{
"epoch": 2.0771195097037793,
"grad_norm": 13.443835258483887,
"learning_rate": 1.5326602637903215e-05,
"loss": 1.1181,
"step": 12201
},
{
"epoch": 2.102145045965271,
"grad_norm": 1.4795461893081665,
"learning_rate": 1.431603658379759e-05,
"loss": 0.9978,
"step": 12348
},
{
"epoch": 2.127170582226762,
"grad_norm": 13.516138076782227,
"learning_rate": 1.3308575385710644e-05,
"loss": 1.0565,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_nli-pairs_loss": 1.210241675376892,
"eval_nli-pairs_runtime": 4.0103,
"eval_nli-pairs_samples_per_second": 37.404,
"eval_nli-pairs_steps_per_second": 1.247,
"eval_sts-test_pearson_cosine": 0.7762452815355265,
"eval_sts-test_pearson_dot": 0.48818071088823645,
"eval_sts-test_pearson_euclidean": 0.7572653656278441,
"eval_sts-test_pearson_manhattan": 0.7608519923908275,
"eval_sts-test_pearson_max": 0.7762452815355265,
"eval_sts-test_spearman_cosine": 0.7839057066535283,
"eval_sts-test_spearman_dot": 0.5570503640965535,
"eval_sts-test_spearman_euclidean": 0.7527973687121541,
"eval_sts-test_spearman_manhattan": 0.755831239077737,
"eval_sts-test_spearman_max": 0.7839057066535283,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_vitaminc-pairs_loss": 0.7919407486915588,
"eval_vitaminc-pairs_runtime": 2.1778,
"eval_vitaminc-pairs_samples_per_second": 68.877,
"eval_vitaminc-pairs_steps_per_second": 2.296,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_qnli-contrastive_loss": 0.10323584824800491,
"eval_qnli-contrastive_runtime": 0.4911,
"eval_qnli-contrastive_samples_per_second": 305.443,
"eval_qnli-contrastive_steps_per_second": 10.181,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_scitail-pairs-qa_loss": 0.1031724140048027,
"eval_scitail-pairs-qa_runtime": 1.1871,
"eval_scitail-pairs-qa_samples_per_second": 126.362,
"eval_scitail-pairs-qa_steps_per_second": 4.212,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_scitail-pairs-pos_loss": 0.49804234504699707,
"eval_scitail-pairs-pos_runtime": 2.1491,
"eval_scitail-pairs-pos_samples_per_second": 69.797,
"eval_scitail-pairs-pos_steps_per_second": 2.327,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_xsum-pairs_loss": 0.5050535202026367,
"eval_xsum-pairs_runtime": 2.2665,
"eval_xsum-pairs_samples_per_second": 66.182,
"eval_xsum-pairs_steps_per_second": 2.206,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_compression-pairs_loss": 0.12162226438522339,
"eval_compression-pairs_runtime": 0.4514,
"eval_compression-pairs_samples_per_second": 332.334,
"eval_compression-pairs_steps_per_second": 11.078,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_sciq_pairs_loss": 0.5630886554718018,
"eval_sciq_pairs_runtime": 7.0948,
"eval_sciq_pairs_samples_per_second": 21.142,
"eval_sciq_pairs_steps_per_second": 0.705,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_qasc_pairs_loss": 4.82968282699585,
"eval_qasc_pairs_runtime": 2.0203,
"eval_qasc_pairs_samples_per_second": 74.248,
"eval_qasc_pairs_steps_per_second": 2.475,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_openbookqa_pairs_loss": 2.291588544845581,
"eval_openbookqa_pairs_runtime": 0.8653,
"eval_openbookqa_pairs_samples_per_second": 119.034,
"eval_openbookqa_pairs_steps_per_second": 4.623,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_msmarco_pairs_loss": 1.179781198501587,
"eval_msmarco_pairs_runtime": 2.7463,
"eval_msmarco_pairs_samples_per_second": 54.619,
"eval_msmarco_pairs_steps_per_second": 1.821,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_nq_pairs_loss": 0.9966514110565186,
"eval_nq_pairs_runtime": 5.0621,
"eval_nq_pairs_samples_per_second": 29.632,
"eval_nq_pairs_steps_per_second": 0.988,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_trivia_pairs_loss": 1.4555574655532837,
"eval_trivia_pairs_runtime": 9.5288,
"eval_trivia_pairs_samples_per_second": 15.742,
"eval_trivia_pairs_steps_per_second": 0.525,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_quora_pairs_loss": 0.2517216205596924,
"eval_quora_pairs_runtime": 1.154,
"eval_quora_pairs_samples_per_second": 129.984,
"eval_quora_pairs_steps_per_second": 4.333,
"step": 12495
},
{
"epoch": 2.127170582226762,
"eval_gooaq_pairs_loss": 0.8206157684326172,
"eval_gooaq_pairs_runtime": 2.0213,
"eval_gooaq_pairs_samples_per_second": 74.209,
"eval_gooaq_pairs_steps_per_second": 2.474,
"step": 12495
},
{
"epoch": 2.1521961184882534,
"grad_norm": 10.220344543457031,
"learning_rate": 1.2308792419776779e-05,
"loss": 1.1317,
"step": 12642
},
{
"epoch": 2.1772216547497445,
"grad_norm": 6.893187046051025,
"learning_rate": 1.13212262067496e-05,
"loss": 1.0682,
"step": 12789
},
{
"epoch": 2.202247191011236,
"grad_norm": 14.389963150024414,
"learning_rate": 1.0350359809359845e-05,
"loss": 1.2708,
"step": 12936
},
{
"epoch": 2.227272727272727,
"grad_norm": 0.3779028058052063,
"learning_rate": 9.400600481425268e-06,
"loss": 1.2129,
"step": 13083
},
{
"epoch": 2.2522982635342186,
"grad_norm": 3.433882713317871,
"learning_rate": 8.476259661095597e-06,
"loss": 1.053,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_nli-pairs_loss": 1.1418253183364868,
"eval_nli-pairs_runtime": 4.1287,
"eval_nli-pairs_samples_per_second": 36.331,
"eval_nli-pairs_steps_per_second": 1.211,
"eval_sts-test_pearson_cosine": 0.7786789365004515,
"eval_sts-test_pearson_dot": 0.4855185680416273,
"eval_sts-test_pearson_euclidean": 0.7514151357124674,
"eval_sts-test_pearson_manhattan": 0.7548721969767885,
"eval_sts-test_pearson_max": 0.7786789365004515,
"eval_sts-test_spearman_cosine": 0.7870432894305359,
"eval_sts-test_spearman_dot": 0.5630314308020745,
"eval_sts-test_spearman_euclidean": 0.7495100025349075,
"eval_sts-test_spearman_manhattan": 0.7525107811391334,
"eval_sts-test_spearman_max": 0.7870432894305359,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_vitaminc-pairs_loss": 0.7908185720443726,
"eval_vitaminc-pairs_runtime": 2.1735,
"eval_vitaminc-pairs_samples_per_second": 69.012,
"eval_vitaminc-pairs_steps_per_second": 2.3,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_qnli-contrastive_loss": 0.17070643603801727,
"eval_qnli-contrastive_runtime": 0.4906,
"eval_qnli-contrastive_samples_per_second": 305.758,
"eval_qnli-contrastive_steps_per_second": 10.192,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_scitail-pairs-qa_loss": 0.09536581486463547,
"eval_scitail-pairs-qa_runtime": 1.1537,
"eval_scitail-pairs-qa_samples_per_second": 130.014,
"eval_scitail-pairs-qa_steps_per_second": 4.334,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_scitail-pairs-pos_loss": 0.4803718328475952,
"eval_scitail-pairs-pos_runtime": 2.1338,
"eval_scitail-pairs-pos_samples_per_second": 70.297,
"eval_scitail-pairs-pos_steps_per_second": 2.343,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_xsum-pairs_loss": 0.4886069595813751,
"eval_xsum-pairs_runtime": 2.2577,
"eval_xsum-pairs_samples_per_second": 66.438,
"eval_xsum-pairs_steps_per_second": 2.215,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_compression-pairs_loss": 0.12639394402503967,
"eval_compression-pairs_runtime": 0.4532,
"eval_compression-pairs_samples_per_second": 330.97,
"eval_compression-pairs_steps_per_second": 11.032,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_sciq_pairs_loss": 0.5328854918479919,
"eval_sciq_pairs_runtime": 7.1317,
"eval_sciq_pairs_samples_per_second": 21.033,
"eval_sciq_pairs_steps_per_second": 0.701,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_qasc_pairs_loss": 4.704314231872559,
"eval_qasc_pairs_runtime": 2.0312,
"eval_qasc_pairs_samples_per_second": 73.848,
"eval_qasc_pairs_steps_per_second": 2.462,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_openbookqa_pairs_loss": 2.2545013427734375,
"eval_openbookqa_pairs_runtime": 0.8657,
"eval_openbookqa_pairs_samples_per_second": 118.98,
"eval_openbookqa_pairs_steps_per_second": 4.621,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_msmarco_pairs_loss": 1.1227293014526367,
"eval_msmarco_pairs_runtime": 2.7855,
"eval_msmarco_pairs_samples_per_second": 53.851,
"eval_msmarco_pairs_steps_per_second": 1.795,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_nq_pairs_loss": 0.9163884520530701,
"eval_nq_pairs_runtime": 5.0251,
"eval_nq_pairs_samples_per_second": 29.85,
"eval_nq_pairs_steps_per_second": 0.995,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_trivia_pairs_loss": 1.2854268550872803,
"eval_trivia_pairs_runtime": 9.6199,
"eval_trivia_pairs_samples_per_second": 15.593,
"eval_trivia_pairs_steps_per_second": 0.52,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_quora_pairs_loss": 0.24444325268268585,
"eval_quora_pairs_runtime": 1.1606,
"eval_quora_pairs_samples_per_second": 129.238,
"eval_quora_pairs_steps_per_second": 4.308,
"step": 13230
},
{
"epoch": 2.2522982635342186,
"eval_gooaq_pairs_loss": 0.8153015971183777,
"eval_gooaq_pairs_runtime": 2.0348,
"eval_gooaq_pairs_samples_per_second": 73.718,
"eval_gooaq_pairs_steps_per_second": 2.457,
"step": 13230
},
{
"epoch": 2.27732379979571,
"grad_norm": 1.6442259550094604,
"learning_rate": 7.58153339905326e-06,
"loss": 0.8897,
"step": 13377
},
{
"epoch": 2.302349336057201,
"grad_norm": 3.068699598312378,
"learning_rate": 6.720483310516198e-06,
"loss": 1.181,
"step": 13524
},
{
"epoch": 2.3273748723186927,
"grad_norm": 15.221121788024902,
"learning_rate": 5.897018137511326e-06,
"loss": 1.0895,
"step": 13671
},
{
"epoch": 2.3524004085801837,
"grad_norm": 8.72175121307373,
"learning_rate": 5.114876005116682e-06,
"loss": 1.0347,
"step": 13818
},
{
"epoch": 2.3774259448416752,
"grad_norm": 15.892960548400879,
"learning_rate": 4.377607452220317e-06,
"loss": 1.1473,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_nli-pairs_loss": 1.1473166942596436,
"eval_nli-pairs_runtime": 4.0846,
"eval_nli-pairs_samples_per_second": 36.723,
"eval_nli-pairs_steps_per_second": 1.224,
"eval_sts-test_pearson_cosine": 0.7794293138100197,
"eval_sts-test_pearson_dot": 0.47438029525552705,
"eval_sts-test_pearson_euclidean": 0.751105924306521,
"eval_sts-test_pearson_manhattan": 0.755281014746346,
"eval_sts-test_pearson_max": 0.7794293138100197,
"eval_sts-test_spearman_cosine": 0.7872791214894774,
"eval_sts-test_spearman_dot": 0.5580180518636964,
"eval_sts-test_spearman_euclidean": 0.7478338358714589,
"eval_sts-test_spearman_manhattan": 0.7517708620916009,
"eval_sts-test_spearman_max": 0.7872791214894774,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_vitaminc-pairs_loss": 0.7656364440917969,
"eval_vitaminc-pairs_runtime": 2.1781,
"eval_vitaminc-pairs_samples_per_second": 68.869,
"eval_vitaminc-pairs_steps_per_second": 2.296,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_qnli-contrastive_loss": 0.18101921677589417,
"eval_qnli-contrastive_runtime": 0.4943,
"eval_qnli-contrastive_samples_per_second": 303.474,
"eval_qnli-contrastive_steps_per_second": 10.116,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_scitail-pairs-qa_loss": 0.09049389511346817,
"eval_scitail-pairs-qa_runtime": 1.1619,
"eval_scitail-pairs-qa_samples_per_second": 129.104,
"eval_scitail-pairs-qa_steps_per_second": 4.303,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_scitail-pairs-pos_loss": 0.47021567821502686,
"eval_scitail-pairs-pos_runtime": 2.1593,
"eval_scitail-pairs-pos_samples_per_second": 69.466,
"eval_scitail-pairs-pos_steps_per_second": 2.316,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_xsum-pairs_loss": 0.4638828933238983,
"eval_xsum-pairs_runtime": 2.2613,
"eval_xsum-pairs_samples_per_second": 66.334,
"eval_xsum-pairs_steps_per_second": 2.211,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_compression-pairs_loss": 0.12560921907424927,
"eval_compression-pairs_runtime": 0.4496,
"eval_compression-pairs_samples_per_second": 333.638,
"eval_compression-pairs_steps_per_second": 11.121,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_sciq_pairs_loss": 0.5231578946113586,
"eval_sciq_pairs_runtime": 7.1367,
"eval_sciq_pairs_samples_per_second": 21.018,
"eval_sciq_pairs_steps_per_second": 0.701,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_qasc_pairs_loss": 4.6708855628967285,
"eval_qasc_pairs_runtime": 2.0351,
"eval_qasc_pairs_samples_per_second": 73.705,
"eval_qasc_pairs_steps_per_second": 2.457,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_openbookqa_pairs_loss": 2.246180772781372,
"eval_openbookqa_pairs_runtime": 0.8632,
"eval_openbookqa_pairs_samples_per_second": 119.324,
"eval_openbookqa_pairs_steps_per_second": 4.634,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_msmarco_pairs_loss": 1.114973545074463,
"eval_msmarco_pairs_runtime": 2.7619,
"eval_msmarco_pairs_samples_per_second": 54.309,
"eval_msmarco_pairs_steps_per_second": 1.81,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_nq_pairs_loss": 0.8807224631309509,
"eval_nq_pairs_runtime": 5.0622,
"eval_nq_pairs_samples_per_second": 29.632,
"eval_nq_pairs_steps_per_second": 0.988,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_trivia_pairs_loss": 1.2553032636642456,
"eval_trivia_pairs_runtime": 9.5755,
"eval_trivia_pairs_samples_per_second": 15.665,
"eval_trivia_pairs_steps_per_second": 0.522,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_quora_pairs_loss": 0.2363266944885254,
"eval_quora_pairs_runtime": 1.1671,
"eval_quora_pairs_samples_per_second": 128.525,
"eval_quora_pairs_steps_per_second": 4.284,
"step": 13965
},
{
"epoch": 2.3774259448416752,
"eval_gooaq_pairs_loss": 0.7755452990531921,
"eval_gooaq_pairs_runtime": 2.0356,
"eval_gooaq_pairs_samples_per_second": 73.69,
"eval_gooaq_pairs_steps_per_second": 2.456,
"step": 13965
},
{
"epoch": 2.4024514811031663,
"grad_norm": 0.3262540102005005,
"learning_rate": 3.688559313827753e-06,
"loss": 1.0026,
"step": 14112
},
{
"epoch": 2.427477017364658,
"grad_norm": 10.04266357421875,
"learning_rate": 3.050859528084451e-06,
"loss": 1.0728,
"step": 14259
},
{
"epoch": 2.4525025536261493,
"grad_norm": 0.9428766369819641,
"learning_rate": 2.46740293698192e-06,
"loss": 0.8232,
"step": 14406
},
{
"epoch": 2.4775280898876404,
"grad_norm": 0.5977104902267456,
"learning_rate": 1.9408381452051525e-06,
"loss": 1.0261,
"step": 14553
},
{
"epoch": 2.502553626149132,
"grad_norm": 7.32331657409668,
"learning_rate": 1.4735554967758374e-06,
"loss": 0.7961,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_nli-pairs_loss": 1.1169875860214233,
"eval_nli-pairs_runtime": 4.3443,
"eval_nli-pairs_samples_per_second": 34.528,
"eval_nli-pairs_steps_per_second": 1.151,
"eval_sts-test_pearson_cosine": 0.7785596547461405,
"eval_sts-test_pearson_dot": 0.4820884354849637,
"eval_sts-test_pearson_euclidean": 0.7540328646347341,
"eval_sts-test_pearson_manhattan": 0.7573099162359008,
"eval_sts-test_pearson_max": 0.7785596547461405,
"eval_sts-test_spearman_cosine": 0.7876381439639152,
"eval_sts-test_spearman_dot": 0.5623065847013597,
"eval_sts-test_spearman_euclidean": 0.749918183890608,
"eval_sts-test_spearman_manhattan": 0.7531470748226545,
"eval_sts-test_spearman_max": 0.7876381439639152,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_vitaminc-pairs_loss": 0.7670332789421082,
"eval_vitaminc-pairs_runtime": 2.2702,
"eval_vitaminc-pairs_samples_per_second": 66.072,
"eval_vitaminc-pairs_steps_per_second": 2.202,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_qnli-contrastive_loss": 0.1765088140964508,
"eval_qnli-contrastive_runtime": 0.5066,
"eval_qnli-contrastive_samples_per_second": 296.085,
"eval_qnli-contrastive_steps_per_second": 9.87,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_scitail-pairs-qa_loss": 0.09172121435403824,
"eval_scitail-pairs-qa_runtime": 1.2477,
"eval_scitail-pairs-qa_samples_per_second": 120.219,
"eval_scitail-pairs-qa_steps_per_second": 4.007,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_scitail-pairs-pos_loss": 0.46642106771469116,
"eval_scitail-pairs-pos_runtime": 2.345,
"eval_scitail-pairs-pos_samples_per_second": 63.966,
"eval_scitail-pairs-pos_steps_per_second": 2.132,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_xsum-pairs_loss": 0.4636780917644501,
"eval_xsum-pairs_runtime": 2.2663,
"eval_xsum-pairs_samples_per_second": 66.186,
"eval_xsum-pairs_steps_per_second": 2.206,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_compression-pairs_loss": 0.11796586215496063,
"eval_compression-pairs_runtime": 0.4625,
"eval_compression-pairs_samples_per_second": 324.33,
"eval_compression-pairs_steps_per_second": 10.811,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_sciq_pairs_loss": 0.5210192203521729,
"eval_sciq_pairs_runtime": 7.5811,
"eval_sciq_pairs_samples_per_second": 19.786,
"eval_sciq_pairs_steps_per_second": 0.66,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_qasc_pairs_loss": 4.692019939422607,
"eval_qasc_pairs_runtime": 2.2569,
"eval_qasc_pairs_samples_per_second": 66.461,
"eval_qasc_pairs_steps_per_second": 2.215,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_openbookqa_pairs_loss": 2.2295894622802734,
"eval_openbookqa_pairs_runtime": 0.9771,
"eval_openbookqa_pairs_samples_per_second": 105.409,
"eval_openbookqa_pairs_steps_per_second": 4.094,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_msmarco_pairs_loss": 1.1003308296203613,
"eval_msmarco_pairs_runtime": 2.8235,
"eval_msmarco_pairs_samples_per_second": 53.126,
"eval_msmarco_pairs_steps_per_second": 1.771,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_nq_pairs_loss": 0.8704373240470886,
"eval_nq_pairs_runtime": 5.0895,
"eval_nq_pairs_samples_per_second": 29.473,
"eval_nq_pairs_steps_per_second": 0.982,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_trivia_pairs_loss": 1.2344694137573242,
"eval_trivia_pairs_runtime": 9.6823,
"eval_trivia_pairs_samples_per_second": 15.492,
"eval_trivia_pairs_steps_per_second": 0.516,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_quora_pairs_loss": 0.23405136168003082,
"eval_quora_pairs_runtime": 1.1581,
"eval_quora_pairs_samples_per_second": 129.521,
"eval_quora_pairs_steps_per_second": 4.317,
"step": 14700
},
{
"epoch": 2.502553626149132,
"eval_gooaq_pairs_loss": 0.7561784982681274,
"eval_gooaq_pairs_runtime": 2.048,
"eval_gooaq_pairs_samples_per_second": 73.241,
"eval_gooaq_pairs_steps_per_second": 2.441,
"step": 14700
},
{
"epoch": 2.527579162410623,
"grad_norm": 17.226215362548828,
"learning_rate": 1.0676762240713628e-06,
"loss": 1.1167,
"step": 14847
},
{
"epoch": 2.5526046986721145,
"grad_norm": 11.959600448608398,
"learning_rate": 7.250428184777619e-07,
"loss": 1.1546,
"step": 14994
},
{
"epoch": 2.577630234933606,
"grad_norm": 6.162104606628418,
"learning_rate": 4.4721066638903405e-07,
"loss": 0.9669,
"step": 15141
},
{
"epoch": 2.602655771195097,
"grad_norm": 1.557124137878418,
"learning_rate": 2.3544098852131546e-07,
"loss": 1.1057,
"step": 15288
},
{
"epoch": 2.627681307456588,
"grad_norm": 6.910587787628174,
"learning_rate": 9.069511459389502e-08,
"loss": 0.868,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_nli-pairs_loss": 1.1110929250717163,
"eval_nli-pairs_runtime": 4.0329,
"eval_nli-pairs_samples_per_second": 37.194,
"eval_nli-pairs_steps_per_second": 1.24,
"eval_sts-test_pearson_cosine": 0.7783243156342984,
"eval_sts-test_pearson_dot": 0.4788735179310955,
"eval_sts-test_pearson_euclidean": 0.7523796985987524,
"eval_sts-test_pearson_manhattan": 0.755715400774414,
"eval_sts-test_pearson_max": 0.7783243156342984,
"eval_sts-test_spearman_cosine": 0.787532153185639,
"eval_sts-test_spearman_dot": 0.5602968065359735,
"eval_sts-test_spearman_euclidean": 0.7486436044524005,
"eval_sts-test_spearman_manhattan": 0.7517248414986571,
"eval_sts-test_spearman_max": 0.787532153185639,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_vitaminc-pairs_loss": 0.7633076906204224,
"eval_vitaminc-pairs_runtime": 2.1822,
"eval_vitaminc-pairs_samples_per_second": 68.738,
"eval_vitaminc-pairs_steps_per_second": 2.291,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_qnli-contrastive_loss": 0.17258352041244507,
"eval_qnli-contrastive_runtime": 0.4919,
"eval_qnli-contrastive_samples_per_second": 304.937,
"eval_qnli-contrastive_steps_per_second": 10.165,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_scitail-pairs-qa_loss": 0.09059016406536102,
"eval_scitail-pairs-qa_runtime": 1.1561,
"eval_scitail-pairs-qa_samples_per_second": 129.748,
"eval_scitail-pairs-qa_steps_per_second": 4.325,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_scitail-pairs-pos_loss": 0.46606332063674927,
"eval_scitail-pairs-pos_runtime": 2.1432,
"eval_scitail-pairs-pos_samples_per_second": 69.988,
"eval_scitail-pairs-pos_steps_per_second": 2.333,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_xsum-pairs_loss": 0.4616774618625641,
"eval_xsum-pairs_runtime": 2.2623,
"eval_xsum-pairs_samples_per_second": 66.306,
"eval_xsum-pairs_steps_per_second": 2.21,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_compression-pairs_loss": 0.11741954833269119,
"eval_compression-pairs_runtime": 0.4508,
"eval_compression-pairs_samples_per_second": 332.731,
"eval_compression-pairs_steps_per_second": 11.091,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_sciq_pairs_loss": 0.5167393088340759,
"eval_sciq_pairs_runtime": 7.102,
"eval_sciq_pairs_samples_per_second": 21.121,
"eval_sciq_pairs_steps_per_second": 0.704,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_qasc_pairs_loss": 4.663302421569824,
"eval_qasc_pairs_runtime": 2.0987,
"eval_qasc_pairs_samples_per_second": 71.472,
"eval_qasc_pairs_steps_per_second": 2.382,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_openbookqa_pairs_loss": 2.2289419174194336,
"eval_openbookqa_pairs_runtime": 0.8981,
"eval_openbookqa_pairs_samples_per_second": 114.689,
"eval_openbookqa_pairs_steps_per_second": 4.454,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_msmarco_pairs_loss": 1.1020023822784424,
"eval_msmarco_pairs_runtime": 2.7621,
"eval_msmarco_pairs_samples_per_second": 54.306,
"eval_msmarco_pairs_steps_per_second": 1.81,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_nq_pairs_loss": 0.8701896071434021,
"eval_nq_pairs_runtime": 5.0219,
"eval_nq_pairs_samples_per_second": 29.869,
"eval_nq_pairs_steps_per_second": 0.996,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_trivia_pairs_loss": 1.231194257736206,
"eval_trivia_pairs_runtime": 9.5216,
"eval_trivia_pairs_samples_per_second": 15.754,
"eval_trivia_pairs_steps_per_second": 0.525,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_quora_pairs_loss": 0.23271657526493073,
"eval_quora_pairs_runtime": 1.1597,
"eval_quora_pairs_samples_per_second": 129.345,
"eval_quora_pairs_steps_per_second": 4.312,
"step": 15435
},
{
"epoch": 2.627681307456588,
"eval_gooaq_pairs_loss": 0.7600908279418945,
"eval_gooaq_pairs_runtime": 2.0644,
"eval_gooaq_pairs_samples_per_second": 72.659,
"eval_gooaq_pairs_steps_per_second": 2.422,
"step": 15435
},
{
"epoch": 2.6527068437180796,
"grad_norm": 1.7729548215866089,
"learning_rate": 1.363011936719949e-08,
"loss": 0.9528,
"step": 15582
},
{
"epoch": 2.677732379979571,
"grad_norm": 10.120983123779297,
"learning_rate": 2.999540416015201e-05,
"loss": 0.9067,
"step": 15729
},
{
"epoch": 2.702757916241062,
"grad_norm": 7.889310359954834,
"learning_rate": 2.9936366712797823e-05,
"loss": 0.9652,
"step": 15876
},
{
"epoch": 2.7277834525025537,
"grad_norm": 14.683575630187988,
"learning_rate": 2.98095255394156e-05,
"loss": 0.9666,
"step": 16023
},
{
"epoch": 2.752808988764045,
"grad_norm": 8.276579856872559,
"learning_rate": 2.9615456436270568e-05,
"loss": 0.9773,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_nli-pairs_loss": 1.1626721620559692,
"eval_nli-pairs_runtime": 4.0414,
"eval_nli-pairs_samples_per_second": 37.116,
"eval_nli-pairs_steps_per_second": 1.237,
"eval_sts-test_pearson_cosine": 0.7701324682629961,
"eval_sts-test_pearson_dot": 0.5036980743243168,
"eval_sts-test_pearson_euclidean": 0.7514501837169054,
"eval_sts-test_pearson_manhattan": 0.7545217281908033,
"eval_sts-test_pearson_max": 0.7701324682629961,
"eval_sts-test_spearman_cosine": 0.7814132001858928,
"eval_sts-test_spearman_dot": 0.559749596717011,
"eval_sts-test_spearman_euclidean": 0.7455646472517521,
"eval_sts-test_spearman_manhattan": 0.7491013309648271,
"eval_sts-test_spearman_max": 0.7814132001858928,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_vitaminc-pairs_loss": 0.7333893775939941,
"eval_vitaminc-pairs_runtime": 2.189,
"eval_vitaminc-pairs_samples_per_second": 68.525,
"eval_vitaminc-pairs_steps_per_second": 2.284,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_qnli-contrastive_loss": 0.2755473256111145,
"eval_qnli-contrastive_runtime": 0.4937,
"eval_qnli-contrastive_samples_per_second": 303.852,
"eval_qnli-contrastive_steps_per_second": 10.128,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_scitail-pairs-qa_loss": 0.094447560608387,
"eval_scitail-pairs-qa_runtime": 1.1614,
"eval_scitail-pairs-qa_samples_per_second": 129.149,
"eval_scitail-pairs-qa_steps_per_second": 4.305,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_scitail-pairs-pos_loss": 0.5057587623596191,
"eval_scitail-pairs-pos_runtime": 2.152,
"eval_scitail-pairs-pos_samples_per_second": 69.701,
"eval_scitail-pairs-pos_steps_per_second": 2.323,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_xsum-pairs_loss": 0.47404322028160095,
"eval_xsum-pairs_runtime": 2.2581,
"eval_xsum-pairs_samples_per_second": 66.428,
"eval_xsum-pairs_steps_per_second": 2.214,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_compression-pairs_loss": 0.13702698051929474,
"eval_compression-pairs_runtime": 0.4467,
"eval_compression-pairs_samples_per_second": 335.818,
"eval_compression-pairs_steps_per_second": 11.194,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_sciq_pairs_loss": 0.5540564060211182,
"eval_sciq_pairs_runtime": 7.0974,
"eval_sciq_pairs_samples_per_second": 21.134,
"eval_sciq_pairs_steps_per_second": 0.704,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_qasc_pairs_loss": 4.834662437438965,
"eval_qasc_pairs_runtime": 2.0303,
"eval_qasc_pairs_samples_per_second": 73.881,
"eval_qasc_pairs_steps_per_second": 2.463,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_openbookqa_pairs_loss": 2.2681949138641357,
"eval_openbookqa_pairs_runtime": 0.8722,
"eval_openbookqa_pairs_samples_per_second": 118.098,
"eval_openbookqa_pairs_steps_per_second": 4.586,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_msmarco_pairs_loss": 1.0872397422790527,
"eval_msmarco_pairs_runtime": 2.7843,
"eval_msmarco_pairs_samples_per_second": 53.873,
"eval_msmarco_pairs_steps_per_second": 1.796,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_nq_pairs_loss": 0.9480971097946167,
"eval_nq_pairs_runtime": 5.0477,
"eval_nq_pairs_samples_per_second": 29.717,
"eval_nq_pairs_steps_per_second": 0.991,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_trivia_pairs_loss": 1.3409621715545654,
"eval_trivia_pairs_runtime": 9.5305,
"eval_trivia_pairs_samples_per_second": 15.739,
"eval_trivia_pairs_steps_per_second": 0.525,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_quora_pairs_loss": 0.23218180239200592,
"eval_quora_pairs_runtime": 1.1513,
"eval_quora_pairs_samples_per_second": 130.282,
"eval_quora_pairs_steps_per_second": 4.343,
"step": 16170
},
{
"epoch": 2.752808988764045,
"eval_gooaq_pairs_loss": 0.8007516264915466,
"eval_gooaq_pairs_runtime": 2.0386,
"eval_gooaq_pairs_samples_per_second": 73.58,
"eval_gooaq_pairs_steps_per_second": 2.453,
"step": 16170
},
{
"epoch": 2.7778345250255363,
"grad_norm": 4.684284687042236,
"learning_rate": 2.935504038121719e-05,
"loss": 1.0145,
"step": 16317
},
{
"epoch": 2.802860061287028,
"grad_norm": 7.51400089263916,
"learning_rate": 2.9029459534494935e-05,
"loss": 1.1732,
"step": 16464
},
{
"epoch": 2.827885597548519,
"grad_norm": 2.8207240104675293,
"learning_rate": 2.8640191872304822e-05,
"loss": 0.884,
"step": 16611
},
{
"epoch": 2.8529111338100104,
"grad_norm": 0.4372667074203491,
"learning_rate": 2.8189004477527595e-05,
"loss": 0.9076,
"step": 16758
},
{
"epoch": 2.8779366700715014,
"grad_norm": 10.363112449645996,
"learning_rate": 2.7677945518040432e-05,
"loss": 0.9472,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_nli-pairs_loss": 1.1196931600570679,
"eval_nli-pairs_runtime": 4.0541,
"eval_nli-pairs_samples_per_second": 37.0,
"eval_nli-pairs_steps_per_second": 1.233,
"eval_sts-test_pearson_cosine": 0.7699051255542444,
"eval_sts-test_pearson_dot": 0.46895647260006346,
"eval_sts-test_pearson_euclidean": 0.757452845704679,
"eval_sts-test_pearson_manhattan": 0.76091917538426,
"eval_sts-test_pearson_max": 0.7699051255542444,
"eval_sts-test_spearman_cosine": 0.7830173030447911,
"eval_sts-test_spearman_dot": 0.55297440791417,
"eval_sts-test_spearman_euclidean": 0.7540269111333524,
"eval_sts-test_spearman_manhattan": 0.7581350404978112,
"eval_sts-test_spearman_max": 0.7830173030447911,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_vitaminc-pairs_loss": 0.712186872959137,
"eval_vitaminc-pairs_runtime": 2.1744,
"eval_vitaminc-pairs_samples_per_second": 68.985,
"eval_vitaminc-pairs_steps_per_second": 2.299,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_qnli-contrastive_loss": 0.22650264203548431,
"eval_qnli-contrastive_runtime": 0.4958,
"eval_qnli-contrastive_samples_per_second": 302.557,
"eval_qnli-contrastive_steps_per_second": 10.085,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_scitail-pairs-qa_loss": 0.09132811427116394,
"eval_scitail-pairs-qa_runtime": 1.1566,
"eval_scitail-pairs-qa_samples_per_second": 129.694,
"eval_scitail-pairs-qa_steps_per_second": 4.323,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_scitail-pairs-pos_loss": 0.467918336391449,
"eval_scitail-pairs-pos_runtime": 2.2104,
"eval_scitail-pairs-pos_samples_per_second": 67.862,
"eval_scitail-pairs-pos_steps_per_second": 2.262,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_xsum-pairs_loss": 0.47422775626182556,
"eval_xsum-pairs_runtime": 2.2599,
"eval_xsum-pairs_samples_per_second": 66.373,
"eval_xsum-pairs_steps_per_second": 2.212,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_compression-pairs_loss": 0.11817952245473862,
"eval_compression-pairs_runtime": 0.4608,
"eval_compression-pairs_samples_per_second": 325.531,
"eval_compression-pairs_steps_per_second": 10.851,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_sciq_pairs_loss": 0.5348854660987854,
"eval_sciq_pairs_runtime": 7.1388,
"eval_sciq_pairs_samples_per_second": 21.012,
"eval_sciq_pairs_steps_per_second": 0.7,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_qasc_pairs_loss": 4.7842535972595215,
"eval_qasc_pairs_runtime": 2.025,
"eval_qasc_pairs_samples_per_second": 74.075,
"eval_qasc_pairs_steps_per_second": 2.469,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_openbookqa_pairs_loss": 2.231882095336914,
"eval_openbookqa_pairs_runtime": 0.8604,
"eval_openbookqa_pairs_samples_per_second": 119.717,
"eval_openbookqa_pairs_steps_per_second": 4.649,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_msmarco_pairs_loss": 1.0365279912948608,
"eval_msmarco_pairs_runtime": 2.7537,
"eval_msmarco_pairs_samples_per_second": 54.473,
"eval_msmarco_pairs_steps_per_second": 1.816,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_nq_pairs_loss": 0.8606622219085693,
"eval_nq_pairs_runtime": 5.0268,
"eval_nq_pairs_samples_per_second": 29.84,
"eval_nq_pairs_steps_per_second": 0.995,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_trivia_pairs_loss": 1.3138747215270996,
"eval_trivia_pairs_runtime": 9.5468,
"eval_trivia_pairs_samples_per_second": 15.712,
"eval_trivia_pairs_steps_per_second": 0.524,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_quora_pairs_loss": 0.23666483163833618,
"eval_quora_pairs_runtime": 1.1556,
"eval_quora_pairs_samples_per_second": 129.805,
"eval_quora_pairs_steps_per_second": 4.327,
"step": 16905
},
{
"epoch": 2.8779366700715014,
"eval_gooaq_pairs_loss": 0.7752490043640137,
"eval_gooaq_pairs_runtime": 2.0295,
"eval_gooaq_pairs_samples_per_second": 73.909,
"eval_gooaq_pairs_steps_per_second": 2.464,
"step": 16905
},
{
"epoch": 2.902962206332993,
"grad_norm": 1.7985535860061646,
"learning_rate": 2.7109334949046588e-05,
"loss": 0.8681,
"step": 17052
},
{
"epoch": 2.927987742594484,
"grad_norm": 1.4242136478424072,
"learning_rate": 2.64857539816249e-05,
"loss": 0.7491,
"step": 17199
},
{
"epoch": 2.9530132788559755,
"grad_norm": 2.512678623199463,
"learning_rate": 2.5814799487800633e-05,
"loss": 0.8847,
"step": 17346
},
{
"epoch": 2.9780388151174666,
"grad_norm": 8.538130760192871,
"learning_rate": 2.50903296142385e-05,
"loss": 0.8441,
"step": 17493
}
],
"logging_steps": 147,
"max_steps": 29370,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2937,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}