{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5, "eval_steps": 735, "global_step": 8811, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02502553626149132, "grad_norm": 65.55949401855469, "learning_rate": 4.834865509022812e-07, "loss": 16.851, "step": 147 }, { "epoch": 0.05005107252298264, "grad_norm": 23.207971572875977, "learning_rate": 9.805924412665985e-07, "loss": 11.2787, "step": 294 }, { "epoch": 0.07507660878447395, "grad_norm": 176.1532440185547, "learning_rate": 1.481103166496425e-06, "loss": 8.9166, "step": 441 }, { "epoch": 0.10010214504596528, "grad_norm": 22.1564998626709, "learning_rate": 1.981613891726251e-06, "loss": 7.9463, "step": 588 }, { "epoch": 0.12512768130745658, "grad_norm": 20.11876106262207, "learning_rate": 2.4821246169560777e-06, "loss": 7.2108, "step": 735 }, { "epoch": 0.12512768130745658, "eval_nli-pairs_loss": 6.905651569366455, "eval_nli-pairs_runtime": 4.0844, "eval_nli-pairs_samples_per_second": 36.725, "eval_nli-pairs_steps_per_second": 1.224, "eval_sts-test_pearson_cosine": 0.3740256550072784, "eval_sts-test_pearson_dot": 0.13384893803205677, "eval_sts-test_pearson_euclidean": 0.3912387619869807, "eval_sts-test_pearson_manhattan": 0.4202605137823524, "eval_sts-test_pearson_max": 0.4202605137823524, "eval_sts-test_spearman_cosine": 0.37210107338950205, "eval_sts-test_spearman_dot": 0.12092409843417483, "eval_sts-test_spearman_euclidean": 0.39172287978780546, "eval_sts-test_spearman_manhattan": 0.4169664738563951, "eval_sts-test_spearman_max": 0.4169664738563951, "step": 735 }, { "epoch": 0.12512768130745658, "eval_vitaminc-pairs_loss": 5.720878601074219, "eval_vitaminc-pairs_runtime": 2.1703, "eval_vitaminc-pairs_samples_per_second": 69.115, "eval_vitaminc-pairs_steps_per_second": 2.304, "step": 735 }, { "epoch": 0.12512768130745658, "eval_qnli-contrastive_loss": 8.1649751663208, "eval_qnli-contrastive_runtime": 0.4937, "eval_qnli-contrastive_samples_per_second": 303.841, "eval_qnli-contrastive_steps_per_second": 10.128, "step": 735 }, { "epoch": 0.12512768130745658, "eval_scitail-pairs-qa_loss": 3.7859296798706055, "eval_scitail-pairs-qa_runtime": 1.1509, "eval_scitail-pairs-qa_samples_per_second": 130.329, "eval_scitail-pairs-qa_steps_per_second": 4.344, "step": 735 }, { "epoch": 0.12512768130745658, "eval_scitail-pairs-pos_loss": 3.9919917583465576, "eval_scitail-pairs-pos_runtime": 2.1442, "eval_scitail-pairs-pos_samples_per_second": 69.956, "eval_scitail-pairs-pos_steps_per_second": 2.332, "step": 735 }, { "epoch": 0.12512768130745658, "eval_xsum-pairs_loss": 4.600368976593018, "eval_xsum-pairs_runtime": 2.26, "eval_xsum-pairs_samples_per_second": 66.371, "eval_xsum-pairs_steps_per_second": 2.212, "step": 735 }, { "epoch": 0.12512768130745658, "eval_compression-pairs_loss": 3.3037569522857666, "eval_compression-pairs_runtime": 0.449, "eval_compression-pairs_samples_per_second": 334.078, "eval_compression-pairs_steps_per_second": 11.136, "step": 735 }, { "epoch": 0.12512768130745658, "eval_sciq_pairs_loss": 10.214456558227539, "eval_sciq_pairs_runtime": 7.1179, "eval_sciq_pairs_samples_per_second": 21.074, "eval_sciq_pairs_steps_per_second": 0.702, "step": 735 }, { "epoch": 0.12512768130745658, "eval_qasc_pairs_loss": 10.58031940460205, "eval_qasc_pairs_runtime": 2.0175, "eval_qasc_pairs_samples_per_second": 74.348, "eval_qasc_pairs_steps_per_second": 2.478, "step": 735 }, { "epoch": 0.12512768130745658, "eval_openbookqa_pairs_loss": 7.862658977508545, "eval_openbookqa_pairs_runtime": 0.8571, "eval_openbookqa_pairs_samples_per_second": 120.168, "eval_openbookqa_pairs_steps_per_second": 4.667, "step": 735 }, { "epoch": 0.12512768130745658, "eval_msmarco_pairs_loss": 8.754273414611816, "eval_msmarco_pairs_runtime": 2.7533, "eval_msmarco_pairs_samples_per_second": 54.481, "eval_msmarco_pairs_steps_per_second": 1.816, "step": 735 }, { "epoch": 0.12512768130745658, "eval_nq_pairs_loss": 8.415486335754395, "eval_nq_pairs_runtime": 5.0894, "eval_nq_pairs_samples_per_second": 29.473, "eval_nq_pairs_steps_per_second": 0.982, "step": 735 }, { "epoch": 0.12512768130745658, "eval_trivia_pairs_loss": 9.051105499267578, "eval_trivia_pairs_runtime": 9.5498, "eval_trivia_pairs_samples_per_second": 15.707, "eval_trivia_pairs_steps_per_second": 0.524, "step": 735 }, { "epoch": 0.12512768130745658, "eval_quora_pairs_loss": 4.5232110023498535, "eval_quora_pairs_runtime": 1.1469, "eval_quora_pairs_samples_per_second": 130.785, "eval_quora_pairs_steps_per_second": 4.36, "step": 735 }, { "epoch": 0.12512768130745658, "eval_gooaq_pairs_loss": 7.579105854034424, "eval_gooaq_pairs_runtime": 2.0491, "eval_gooaq_pairs_samples_per_second": 73.203, "eval_gooaq_pairs_steps_per_second": 2.44, "step": 735 }, { "epoch": 0.1501532175689479, "grad_norm": 31.7736759185791, "learning_rate": 2.982635342185904e-06, "loss": 6.7709, "step": 882 }, { "epoch": 0.1751787538304392, "grad_norm": 31.57339096069336, "learning_rate": 3.4831460674157306e-06, "loss": 6.1746, "step": 1029 }, { "epoch": 0.20020429009193055, "grad_norm": 25.392702102661133, "learning_rate": 3.9836567926455565e-06, "loss": 5.7706, "step": 1176 }, { "epoch": 0.22522982635342187, "grad_norm": 32.390472412109375, "learning_rate": 4.484167517875383e-06, "loss": 5.7283, "step": 1323 }, { "epoch": 0.25025536261491316, "grad_norm": 18.85039520263672, "learning_rate": 4.98467824310521e-06, "loss": 5.1856, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_nli-pairs_loss": 4.352054119110107, "eval_nli-pairs_runtime": 4.1476, "eval_nli-pairs_samples_per_second": 36.165, "eval_nli-pairs_steps_per_second": 1.206, "eval_sts-test_pearson_cosine": 0.6694155778571752, "eval_sts-test_pearson_dot": 0.5201102118957572, "eval_sts-test_pearson_euclidean": 0.6613028243200022, "eval_sts-test_pearson_manhattan": 0.6670710500315469, "eval_sts-test_pearson_max": 0.6694155778571752, "eval_sts-test_spearman_cosine": 0.6367853204388882, "eval_sts-test_spearman_dot": 0.4940207180607985, "eval_sts-test_spearman_euclidean": 0.6391132775161348, "eval_sts-test_spearman_manhattan": 0.6446159957787251, "eval_sts-test_spearman_max": 0.6446159957787251, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_vitaminc-pairs_loss": 3.4987735748291016, "eval_vitaminc-pairs_runtime": 2.1678, "eval_vitaminc-pairs_samples_per_second": 69.194, "eval_vitaminc-pairs_steps_per_second": 2.306, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_qnli-contrastive_loss": 12.915559768676758, "eval_qnli-contrastive_runtime": 0.4918, "eval_qnli-contrastive_samples_per_second": 304.99, "eval_qnli-contrastive_steps_per_second": 10.166, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_scitail-pairs-qa_loss": 1.3250077962875366, "eval_scitail-pairs-qa_runtime": 1.154, "eval_scitail-pairs-qa_samples_per_second": 129.984, "eval_scitail-pairs-qa_steps_per_second": 4.333, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_scitail-pairs-pos_loss": 2.457335948944092, "eval_scitail-pairs-pos_runtime": 2.1475, "eval_scitail-pairs-pos_samples_per_second": 69.85, "eval_scitail-pairs-pos_steps_per_second": 2.328, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_xsum-pairs_loss": 3.071201801300049, "eval_xsum-pairs_runtime": 2.2634, "eval_xsum-pairs_samples_per_second": 66.271, "eval_xsum-pairs_steps_per_second": 2.209, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_compression-pairs_loss": 2.0629916191101074, "eval_compression-pairs_runtime": 0.4529, "eval_compression-pairs_samples_per_second": 331.23, "eval_compression-pairs_steps_per_second": 11.041, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_sciq_pairs_loss": 9.06814193725586, "eval_sciq_pairs_runtime": 7.1445, "eval_sciq_pairs_samples_per_second": 20.995, "eval_sciq_pairs_steps_per_second": 0.7, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_qasc_pairs_loss": 9.245658874511719, "eval_qasc_pairs_runtime": 2.0471, "eval_qasc_pairs_samples_per_second": 73.274, "eval_qasc_pairs_steps_per_second": 2.442, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_openbookqa_pairs_loss": 5.652446746826172, "eval_openbookqa_pairs_runtime": 0.8946, "eval_openbookqa_pairs_samples_per_second": 115.14, "eval_openbookqa_pairs_steps_per_second": 4.471, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_msmarco_pairs_loss": 4.844855785369873, "eval_msmarco_pairs_runtime": 2.7887, "eval_msmarco_pairs_samples_per_second": 53.788, "eval_msmarco_pairs_steps_per_second": 1.793, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_nq_pairs_loss": 5.023958206176758, "eval_nq_pairs_runtime": 5.0823, "eval_nq_pairs_samples_per_second": 29.514, "eval_nq_pairs_steps_per_second": 0.984, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_trivia_pairs_loss": 5.2907304763793945, "eval_trivia_pairs_runtime": 9.6673, "eval_trivia_pairs_samples_per_second": 15.516, "eval_trivia_pairs_steps_per_second": 0.517, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_quora_pairs_loss": 1.5572240352630615, "eval_quora_pairs_runtime": 1.1979, "eval_quora_pairs_samples_per_second": 125.218, "eval_quora_pairs_steps_per_second": 4.174, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_gooaq_pairs_loss": 3.970768928527832, "eval_gooaq_pairs_runtime": 2.117, "eval_gooaq_pairs_samples_per_second": 70.855, "eval_gooaq_pairs_steps_per_second": 2.362, "step": 1470 }, { "epoch": 0.2752808988764045, "grad_norm": 40.67585754394531, "learning_rate": 5.4851889683350365e-06, "loss": 4.185, "step": 1617 }, { "epoch": 0.3003064351378958, "grad_norm": 45.92570495605469, "learning_rate": 5.985699693564862e-06, "loss": 4.6367, "step": 1764 }, { "epoch": 0.32533197139938713, "grad_norm": 13.566838264465332, "learning_rate": 6.486210418794688e-06, "loss": 4.3615, "step": 1911 }, { "epoch": 0.3503575076608784, "grad_norm": 9.495999336242676, "learning_rate": 6.986721144024515e-06, "loss": 4.1791, "step": 2058 }, { "epoch": 0.37538304392236976, "grad_norm": 32.735416412353516, "learning_rate": 7.487231869254341e-06, "loss": 4.1051, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_nli-pairs_loss": 3.2717113494873047, "eval_nli-pairs_runtime": 4.0124, "eval_nli-pairs_samples_per_second": 37.384, "eval_nli-pairs_steps_per_second": 1.246, "eval_sts-test_pearson_cosine": 0.6958570089637609, "eval_sts-test_pearson_dot": 0.5824298957890577, "eval_sts-test_pearson_euclidean": 0.6893962819387462, "eval_sts-test_pearson_manhattan": 0.6993681181979946, "eval_sts-test_pearson_max": 0.6993681181979946, "eval_sts-test_spearman_cosine": 0.6652712160836801, "eval_sts-test_spearman_dot": 0.5536505624407877, "eval_sts-test_spearman_euclidean": 0.6659844314307678, "eval_sts-test_spearman_manhattan": 0.675740852112121, "eval_sts-test_spearman_max": 0.675740852112121, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_vitaminc-pairs_loss": 2.7197911739349365, "eval_vitaminc-pairs_runtime": 2.1625, "eval_vitaminc-pairs_samples_per_second": 69.365, "eval_vitaminc-pairs_steps_per_second": 2.312, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_qnli-contrastive_loss": 9.638714790344238, "eval_qnli-contrastive_runtime": 0.4877, "eval_qnli-contrastive_samples_per_second": 307.567, "eval_qnli-contrastive_steps_per_second": 10.252, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_scitail-pairs-qa_loss": 0.8106752634048462, "eval_scitail-pairs-qa_runtime": 1.1588, "eval_scitail-pairs-qa_samples_per_second": 129.449, "eval_scitail-pairs-qa_steps_per_second": 4.315, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_scitail-pairs-pos_loss": 1.8894625902175903, "eval_scitail-pairs-pos_runtime": 2.1181, "eval_scitail-pairs-pos_samples_per_second": 70.817, "eval_scitail-pairs-pos_steps_per_second": 2.361, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_xsum-pairs_loss": 2.262718439102173, "eval_xsum-pairs_runtime": 2.2585, "eval_xsum-pairs_samples_per_second": 66.416, "eval_xsum-pairs_steps_per_second": 2.214, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_compression-pairs_loss": 1.4910633563995361, "eval_compression-pairs_runtime": 0.4462, "eval_compression-pairs_samples_per_second": 336.204, "eval_compression-pairs_steps_per_second": 11.207, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_sciq_pairs_loss": 8.59740161895752, "eval_sciq_pairs_runtime": 7.1845, "eval_sciq_pairs_samples_per_second": 20.878, "eval_sciq_pairs_steps_per_second": 0.696, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_qasc_pairs_loss": 8.103879928588867, "eval_qasc_pairs_runtime": 2.0762, "eval_qasc_pairs_samples_per_second": 72.246, "eval_qasc_pairs_steps_per_second": 2.408, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_openbookqa_pairs_loss": 5.090969562530518, "eval_openbookqa_pairs_runtime": 0.89, "eval_openbookqa_pairs_samples_per_second": 115.726, "eval_openbookqa_pairs_steps_per_second": 4.494, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_msmarco_pairs_loss": 3.9566943645477295, "eval_msmarco_pairs_runtime": 2.8183, "eval_msmarco_pairs_samples_per_second": 53.223, "eval_msmarco_pairs_steps_per_second": 1.774, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_nq_pairs_loss": 4.009054183959961, "eval_nq_pairs_runtime": 5.0219, "eval_nq_pairs_samples_per_second": 29.869, "eval_nq_pairs_steps_per_second": 0.996, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_trivia_pairs_loss": 4.286431312561035, "eval_trivia_pairs_runtime": 9.4975, "eval_trivia_pairs_samples_per_second": 15.794, "eval_trivia_pairs_steps_per_second": 0.526, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_quora_pairs_loss": 1.123273491859436, "eval_quora_pairs_runtime": 1.1487, "eval_quora_pairs_samples_per_second": 130.586, "eval_quora_pairs_steps_per_second": 4.353, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_gooaq_pairs_loss": 3.222414255142212, "eval_gooaq_pairs_runtime": 2.0173, "eval_gooaq_pairs_samples_per_second": 74.357, "eval_gooaq_pairs_steps_per_second": 2.479, "step": 2205 }, { "epoch": 0.4004085801838611, "grad_norm": 218.56105041503906, "learning_rate": 7.987742594484168e-06, "loss": 3.7674, "step": 2352 }, { "epoch": 0.4254341164453524, "grad_norm": 27.877609252929688, "learning_rate": 8.488253319713993e-06, "loss": 3.8729, "step": 2499 }, { "epoch": 0.45045965270684374, "grad_norm": 33.50013732910156, "learning_rate": 8.988764044943822e-06, "loss": 3.4527, "step": 2646 }, { "epoch": 0.475485188968335, "grad_norm": 14.015911102294922, "learning_rate": 9.489274770173647e-06, "loss": 3.3545, "step": 2793 }, { "epoch": 0.5005107252298263, "grad_norm": 33.59694290161133, "learning_rate": 9.989785495403473e-06, "loss": 3.3247, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_nli-pairs_loss": 2.7121565341949463, "eval_nli-pairs_runtime": 4.1564, "eval_nli-pairs_samples_per_second": 36.089, "eval_nli-pairs_steps_per_second": 1.203, "eval_sts-test_pearson_cosine": 0.716623047702725, "eval_sts-test_pearson_dot": 0.6128451070598809, "eval_sts-test_pearson_euclidean": 0.7138791236031807, "eval_sts-test_pearson_manhattan": 0.7213151818687454, "eval_sts-test_pearson_max": 0.7213151818687454, "eval_sts-test_spearman_cosine": 0.6919792400941177, "eval_sts-test_spearman_dot": 0.5867158357121192, "eval_sts-test_spearman_euclidean": 0.6925037259567834, "eval_sts-test_spearman_manhattan": 0.7008895667910079, "eval_sts-test_spearman_max": 0.7008895667910079, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_vitaminc-pairs_loss": 2.225992441177368, "eval_vitaminc-pairs_runtime": 2.253, "eval_vitaminc-pairs_samples_per_second": 66.577, "eval_vitaminc-pairs_steps_per_second": 2.219, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_qnli-contrastive_loss": 4.92629861831665, "eval_qnli-contrastive_runtime": 0.5005, "eval_qnli-contrastive_samples_per_second": 299.691, "eval_qnli-contrastive_steps_per_second": 9.99, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_scitail-pairs-qa_loss": 0.5898066163063049, "eval_scitail-pairs-qa_runtime": 1.2227, "eval_scitail-pairs-qa_samples_per_second": 122.682, "eval_scitail-pairs-qa_steps_per_second": 4.089, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_scitail-pairs-pos_loss": 1.4237287044525146, "eval_scitail-pairs-pos_runtime": 2.4409, "eval_scitail-pairs-pos_samples_per_second": 61.452, "eval_scitail-pairs-pos_steps_per_second": 2.048, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_xsum-pairs_loss": 1.8388895988464355, "eval_xsum-pairs_runtime": 2.2831, "eval_xsum-pairs_samples_per_second": 65.7, "eval_xsum-pairs_steps_per_second": 2.19, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_compression-pairs_loss": 1.1590967178344727, "eval_compression-pairs_runtime": 0.5152, "eval_compression-pairs_samples_per_second": 291.165, "eval_compression-pairs_steps_per_second": 9.706, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_sciq_pairs_loss": 8.282496452331543, "eval_sciq_pairs_runtime": 7.2871, "eval_sciq_pairs_samples_per_second": 20.584, "eval_sciq_pairs_steps_per_second": 0.686, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_qasc_pairs_loss": 7.817965507507324, "eval_qasc_pairs_runtime": 2.0211, "eval_qasc_pairs_samples_per_second": 74.218, "eval_qasc_pairs_steps_per_second": 2.474, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_openbookqa_pairs_loss": 4.619383811950684, "eval_openbookqa_pairs_runtime": 0.8531, "eval_openbookqa_pairs_samples_per_second": 120.731, "eval_openbookqa_pairs_steps_per_second": 4.689, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_msmarco_pairs_loss": 3.478559970855713, "eval_msmarco_pairs_runtime": 2.7512, "eval_msmarco_pairs_samples_per_second": 54.522, "eval_msmarco_pairs_steps_per_second": 1.817, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_nq_pairs_loss": 3.3449866771698, "eval_nq_pairs_runtime": 5.0591, "eval_nq_pairs_samples_per_second": 29.649, "eval_nq_pairs_steps_per_second": 0.988, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_trivia_pairs_loss": 3.524484872817993, "eval_trivia_pairs_runtime": 9.662, "eval_trivia_pairs_samples_per_second": 15.525, "eval_trivia_pairs_steps_per_second": 0.517, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_quora_pairs_loss": 0.9095575213432312, "eval_quora_pairs_runtime": 1.2482, "eval_quora_pairs_samples_per_second": 120.175, "eval_quora_pairs_steps_per_second": 4.006, "step": 2940 }, { "epoch": 0.5005107252298263, "eval_gooaq_pairs_loss": 2.6586034297943115, "eval_gooaq_pairs_runtime": 2.1091, "eval_gooaq_pairs_samples_per_second": 71.12, "eval_gooaq_pairs_steps_per_second": 2.371, "step": 2940 }, { "epoch": 0.5255362614913177, "grad_norm": 35.33409118652344, "learning_rate": 1.04902962206333e-05, "loss": 3.116, "step": 3087 }, { "epoch": 0.550561797752809, "grad_norm": 22.29003143310547, "learning_rate": 1.0990806945863125e-05, "loss": 3.2418, "step": 3234 }, { "epoch": 0.5755873340143003, "grad_norm": 31.277965545654297, "learning_rate": 1.1491317671092953e-05, "loss": 3.0757, "step": 3381 }, { "epoch": 0.6006128702757916, "grad_norm": 24.612506866455078, "learning_rate": 1.1991828396322778e-05, "loss": 2.8524, "step": 3528 }, { "epoch": 0.625638406537283, "grad_norm": 25.11741065979004, "learning_rate": 1.2492339121552605e-05, "loss": 2.6875, "step": 3675 }, { "epoch": 0.625638406537283, "eval_nli-pairs_loss": 2.479051113128662, "eval_nli-pairs_runtime": 3.9943, "eval_nli-pairs_samples_per_second": 37.553, "eval_nli-pairs_steps_per_second": 1.252, "eval_sts-test_pearson_cosine": 0.7278742453545186, "eval_sts-test_pearson_dot": 0.6217650825208566, "eval_sts-test_pearson_euclidean": 0.7243228472931561, "eval_sts-test_pearson_manhattan": 0.7333297580184588, "eval_sts-test_pearson_max": 0.7333297580184588, "eval_sts-test_spearman_cosine": 0.7013110457844404, "eval_sts-test_spearman_dot": 0.5970993074902947, "eval_sts-test_spearman_euclidean": 0.701564129266252, "eval_sts-test_spearman_manhattan": 0.7116482009924582, "eval_sts-test_spearman_max": 0.7116482009924582, "step": 3675 }, { "epoch": 0.625638406537283, "eval_vitaminc-pairs_loss": 1.974273681640625, "eval_vitaminc-pairs_runtime": 2.1754, "eval_vitaminc-pairs_samples_per_second": 68.953, "eval_vitaminc-pairs_steps_per_second": 2.298, "step": 3675 }, { "epoch": 0.625638406537283, "eval_qnli-contrastive_loss": 1.7706010341644287, "eval_qnli-contrastive_runtime": 0.4866, "eval_qnli-contrastive_samples_per_second": 308.244, "eval_qnli-contrastive_steps_per_second": 10.275, "step": 3675 }, { "epoch": 0.625638406537283, "eval_scitail-pairs-qa_loss": 0.4400452673435211, "eval_scitail-pairs-qa_runtime": 1.1519, "eval_scitail-pairs-qa_samples_per_second": 130.222, "eval_scitail-pairs-qa_steps_per_second": 4.341, "step": 3675 }, { "epoch": 0.625638406537283, "eval_scitail-pairs-pos_loss": 1.1909903287887573, "eval_scitail-pairs-pos_runtime": 2.1319, "eval_scitail-pairs-pos_samples_per_second": 70.36, "eval_scitail-pairs-pos_steps_per_second": 2.345, "step": 3675 }, { "epoch": 0.625638406537283, "eval_xsum-pairs_loss": 1.4811985492706299, "eval_xsum-pairs_runtime": 2.254, "eval_xsum-pairs_samples_per_second": 66.548, "eval_xsum-pairs_steps_per_second": 2.218, "step": 3675 }, { "epoch": 0.625638406537283, "eval_compression-pairs_loss": 0.8453781008720398, "eval_compression-pairs_runtime": 0.4401, "eval_compression-pairs_samples_per_second": 340.826, "eval_compression-pairs_steps_per_second": 11.361, "step": 3675 }, { "epoch": 0.625638406537283, "eval_sciq_pairs_loss": 8.014656066894531, "eval_sciq_pairs_runtime": 7.0707, "eval_sciq_pairs_samples_per_second": 21.214, "eval_sciq_pairs_steps_per_second": 0.707, "step": 3675 }, { "epoch": 0.625638406537283, "eval_qasc_pairs_loss": 6.9316277503967285, "eval_qasc_pairs_runtime": 2.0338, "eval_qasc_pairs_samples_per_second": 73.752, "eval_qasc_pairs_steps_per_second": 2.458, "step": 3675 }, { "epoch": 0.625638406537283, "eval_openbookqa_pairs_loss": 4.21690034866333, "eval_openbookqa_pairs_runtime": 0.918, "eval_openbookqa_pairs_samples_per_second": 112.202, "eval_openbookqa_pairs_steps_per_second": 4.357, "step": 3675 }, { "epoch": 0.625638406537283, "eval_msmarco_pairs_loss": 3.0209598541259766, "eval_msmarco_pairs_runtime": 2.7749, "eval_msmarco_pairs_samples_per_second": 54.056, "eval_msmarco_pairs_steps_per_second": 1.802, "step": 3675 }, { "epoch": 0.625638406537283, "eval_nq_pairs_loss": 2.956088066101074, "eval_nq_pairs_runtime": 5.0024, "eval_nq_pairs_samples_per_second": 29.986, "eval_nq_pairs_steps_per_second": 1.0, "step": 3675 }, { "epoch": 0.625638406537283, "eval_trivia_pairs_loss": 3.17364501953125, "eval_trivia_pairs_runtime": 9.4856, "eval_trivia_pairs_samples_per_second": 15.813, "eval_trivia_pairs_steps_per_second": 0.527, "step": 3675 }, { "epoch": 0.625638406537283, "eval_quora_pairs_loss": 0.763593852519989, "eval_quora_pairs_runtime": 1.1441, "eval_quora_pairs_samples_per_second": 131.104, "eval_quora_pairs_steps_per_second": 4.37, "step": 3675 }, { "epoch": 0.625638406537283, "eval_gooaq_pairs_loss": 2.3524909019470215, "eval_gooaq_pairs_runtime": 2.0161, "eval_gooaq_pairs_samples_per_second": 74.4, "eval_gooaq_pairs_steps_per_second": 2.48, "step": 3675 }, { "epoch": 0.6506639427987743, "grad_norm": 31.163997650146484, "learning_rate": 1.2992849846782432e-05, "loss": 2.7808, "step": 3822 }, { "epoch": 0.6756894790602656, "grad_norm": 14.883658409118652, "learning_rate": 1.3493360572012258e-05, "loss": 2.5687, "step": 3969 }, { "epoch": 0.7007150153217568, "grad_norm": 5.874042987823486, "learning_rate": 1.3993871297242083e-05, "loss": 2.3034, "step": 4116 }, { "epoch": 0.7257405515832482, "grad_norm": 31.464054107666016, "learning_rate": 1.4494382022471912e-05, "loss": 2.4412, "step": 4263 }, { "epoch": 0.7507660878447395, "grad_norm": 16.43915367126465, "learning_rate": 1.4994892747701737e-05, "loss": 2.3293, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_nli-pairs_loss": 2.3226094245910645, "eval_nli-pairs_runtime": 4.113, "eval_nli-pairs_samples_per_second": 36.47, "eval_nli-pairs_steps_per_second": 1.216, "eval_sts-test_pearson_cosine": 0.7356971966139032, "eval_sts-test_pearson_dot": 0.6150809513049869, "eval_sts-test_pearson_euclidean": 0.7330733579988641, "eval_sts-test_pearson_manhattan": 0.7423412248131348, "eval_sts-test_pearson_max": 0.7423412248131348, "eval_sts-test_spearman_cosine": 0.7121899723082045, "eval_sts-test_spearman_dot": 0.5926505936679538, "eval_sts-test_spearman_euclidean": 0.7130179905407037, "eval_sts-test_spearman_manhattan": 0.7227257562995023, "eval_sts-test_spearman_max": 0.7227257562995023, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_vitaminc-pairs_loss": 1.7956713438034058, "eval_vitaminc-pairs_runtime": 2.174, "eval_vitaminc-pairs_samples_per_second": 68.996, "eval_vitaminc-pairs_steps_per_second": 2.3, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_qnli-contrastive_loss": 1.0078614950180054, "eval_qnli-contrastive_runtime": 0.4874, "eval_qnli-contrastive_samples_per_second": 307.763, "eval_qnli-contrastive_steps_per_second": 10.259, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_scitail-pairs-qa_loss": 0.36971578001976013, "eval_scitail-pairs-qa_runtime": 1.164, "eval_scitail-pairs-qa_samples_per_second": 128.863, "eval_scitail-pairs-qa_steps_per_second": 4.295, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_scitail-pairs-pos_loss": 1.0497769117355347, "eval_scitail-pairs-pos_runtime": 2.1205, "eval_scitail-pairs-pos_samples_per_second": 70.74, "eval_scitail-pairs-pos_steps_per_second": 2.358, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_xsum-pairs_loss": 1.1691261529922485, "eval_xsum-pairs_runtime": 2.259, "eval_xsum-pairs_samples_per_second": 66.401, "eval_xsum-pairs_steps_per_second": 2.213, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_compression-pairs_loss": 0.5027483105659485, "eval_compression-pairs_runtime": 0.4403, "eval_compression-pairs_samples_per_second": 340.682, "eval_compression-pairs_steps_per_second": 11.356, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_sciq_pairs_loss": 7.823739528656006, "eval_sciq_pairs_runtime": 7.0738, "eval_sciq_pairs_samples_per_second": 21.205, "eval_sciq_pairs_steps_per_second": 0.707, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_qasc_pairs_loss": 6.404655933380127, "eval_qasc_pairs_runtime": 2.0346, "eval_qasc_pairs_samples_per_second": 73.723, "eval_qasc_pairs_steps_per_second": 2.457, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_openbookqa_pairs_loss": 3.857389211654663, "eval_openbookqa_pairs_runtime": 0.8544, "eval_openbookqa_pairs_samples_per_second": 120.547, "eval_openbookqa_pairs_steps_per_second": 4.681, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_msmarco_pairs_loss": 2.7028510570526123, "eval_msmarco_pairs_runtime": 2.7448, "eval_msmarco_pairs_samples_per_second": 54.649, "eval_msmarco_pairs_steps_per_second": 1.822, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_nq_pairs_loss": 2.679351329803467, "eval_nq_pairs_runtime": 5.067, "eval_nq_pairs_samples_per_second": 29.603, "eval_nq_pairs_steps_per_second": 0.987, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_trivia_pairs_loss": 2.8798065185546875, "eval_trivia_pairs_runtime": 9.5449, "eval_trivia_pairs_samples_per_second": 15.715, "eval_trivia_pairs_steps_per_second": 0.524, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_quora_pairs_loss": 0.6825175285339355, "eval_quora_pairs_runtime": 1.1431, "eval_quora_pairs_samples_per_second": 131.221, "eval_quora_pairs_steps_per_second": 4.374, "step": 4410 }, { "epoch": 0.7507660878447395, "eval_gooaq_pairs_loss": 2.0472166538238525, "eval_gooaq_pairs_runtime": 2.0218, "eval_gooaq_pairs_samples_per_second": 74.191, "eval_gooaq_pairs_steps_per_second": 2.473, "step": 4410 }, { "epoch": 0.7757916241062308, "grad_norm": 4.2425055503845215, "learning_rate": 1.5495403472931565e-05, "loss": 2.3651, "step": 4557 }, { "epoch": 0.8008171603677222, "grad_norm": 22.42776107788086, "learning_rate": 1.5995914198161388e-05, "loss": 2.6296, "step": 4704 }, { "epoch": 0.8258426966292135, "grad_norm": 21.169517517089844, "learning_rate": 1.6496424923391215e-05, "loss": 2.2108, "step": 4851 }, { "epoch": 0.8508682328907048, "grad_norm": 23.326181411743164, "learning_rate": 1.699693564862104e-05, "loss": 2.1852, "step": 4998 }, { "epoch": 0.8758937691521961, "grad_norm": 24.574176788330078, "learning_rate": 1.7497446373850868e-05, "loss": 2.2944, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_nli-pairs_loss": 2.0634915828704834, "eval_nli-pairs_runtime": 4.0019, "eval_nli-pairs_samples_per_second": 37.482, "eval_nli-pairs_steps_per_second": 1.249, "eval_sts-test_pearson_cosine": 0.7466390532977636, "eval_sts-test_pearson_dot": 0.612259458274589, "eval_sts-test_pearson_euclidean": 0.7432536346376271, "eval_sts-test_pearson_manhattan": 0.7500490179501229, "eval_sts-test_pearson_max": 0.7500490179501229, "eval_sts-test_spearman_cosine": 0.728273260456201, "eval_sts-test_spearman_dot": 0.5960115087190596, "eval_sts-test_spearman_euclidean": 0.7272394395622148, "eval_sts-test_spearman_manhattan": 0.7334149564445704, "eval_sts-test_spearman_max": 0.7334149564445704, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_vitaminc-pairs_loss": 1.638654112815857, "eval_vitaminc-pairs_runtime": 2.1637, "eval_vitaminc-pairs_samples_per_second": 69.327, "eval_vitaminc-pairs_steps_per_second": 2.311, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_qnli-contrastive_loss": 0.9639705419540405, "eval_qnli-contrastive_runtime": 0.4889, "eval_qnli-contrastive_samples_per_second": 306.825, "eval_qnli-contrastive_steps_per_second": 10.228, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_scitail-pairs-qa_loss": 0.31595128774642944, "eval_scitail-pairs-qa_runtime": 1.1467, "eval_scitail-pairs-qa_samples_per_second": 130.806, "eval_scitail-pairs-qa_steps_per_second": 4.36, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_scitail-pairs-pos_loss": 0.9187478423118591, "eval_scitail-pairs-pos_runtime": 2.1273, "eval_scitail-pairs-pos_samples_per_second": 70.512, "eval_scitail-pairs-pos_steps_per_second": 2.35, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_xsum-pairs_loss": 1.060194492340088, "eval_xsum-pairs_runtime": 2.2836, "eval_xsum-pairs_samples_per_second": 65.686, "eval_xsum-pairs_steps_per_second": 2.19, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_compression-pairs_loss": 0.41078585386276245, "eval_compression-pairs_runtime": 0.4434, "eval_compression-pairs_samples_per_second": 338.276, "eval_compression-pairs_steps_per_second": 11.276, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_sciq_pairs_loss": 7.577760696411133, "eval_sciq_pairs_runtime": 7.1025, "eval_sciq_pairs_samples_per_second": 21.119, "eval_sciq_pairs_steps_per_second": 0.704, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_qasc_pairs_loss": 6.353766918182373, "eval_qasc_pairs_runtime": 2.0113, "eval_qasc_pairs_samples_per_second": 74.58, "eval_qasc_pairs_steps_per_second": 2.486, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_openbookqa_pairs_loss": 3.7140932083129883, "eval_openbookqa_pairs_runtime": 0.8529, "eval_openbookqa_pairs_samples_per_second": 120.762, "eval_openbookqa_pairs_steps_per_second": 4.69, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_msmarco_pairs_loss": 2.3862576484680176, "eval_msmarco_pairs_runtime": 2.8953, "eval_msmarco_pairs_samples_per_second": 51.808, "eval_msmarco_pairs_steps_per_second": 1.727, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_nq_pairs_loss": 2.3543190956115723, "eval_nq_pairs_runtime": 5.0048, "eval_nq_pairs_samples_per_second": 29.971, "eval_nq_pairs_steps_per_second": 0.999, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_trivia_pairs_loss": 2.494807481765747, "eval_trivia_pairs_runtime": 9.5513, "eval_trivia_pairs_samples_per_second": 15.705, "eval_trivia_pairs_steps_per_second": 0.523, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_quora_pairs_loss": 0.6137441992759705, "eval_quora_pairs_runtime": 1.1541, "eval_quora_pairs_samples_per_second": 129.967, "eval_quora_pairs_steps_per_second": 4.332, "step": 5145 }, { "epoch": 0.8758937691521961, "eval_gooaq_pairs_loss": 1.8279658555984497, "eval_gooaq_pairs_runtime": 2.0951, "eval_gooaq_pairs_samples_per_second": 71.595, "eval_gooaq_pairs_steps_per_second": 2.387, "step": 5145 }, { "epoch": 0.9009193054136875, "grad_norm": 10.590804100036621, "learning_rate": 1.7997957099080695e-05, "loss": 2.2133, "step": 5292 }, { "epoch": 0.9259448416751788, "grad_norm": 18.527711868286133, "learning_rate": 1.849846782431052e-05, "loss": 2.2255, "step": 5439 }, { "epoch": 0.95097037793667, "grad_norm": 2.617710828781128, "learning_rate": 1.8995573714674838e-05, "loss": 2.3502, "step": 5586 }, { "epoch": 0.9759959141981613, "grad_norm": 19.551551818847656, "learning_rate": 1.9496084439904668e-05, "loss": 1.8964, "step": 5733 }, { "epoch": 1.0010214504596526, "grad_norm": 11.783225059509277, "learning_rate": 1.999319033026898e-05, "loss": 1.913, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_nli-pairs_loss": 1.9677053689956665, "eval_nli-pairs_runtime": 4.3863, "eval_nli-pairs_samples_per_second": 34.198, "eval_nli-pairs_steps_per_second": 1.14, "eval_sts-test_pearson_cosine": 0.7531824359441671, "eval_sts-test_pearson_dot": 0.602579906515822, "eval_sts-test_pearson_euclidean": 0.7486763477944213, "eval_sts-test_pearson_manhattan": 0.7566220287347274, "eval_sts-test_pearson_max": 0.7566220287347274, "eval_sts-test_spearman_cosine": 0.7387792578665129, "eval_sts-test_spearman_dot": 0.5926594656319394, "eval_sts-test_spearman_euclidean": 0.733653805383597, "eval_sts-test_spearman_manhattan": 0.7420657558603486, "eval_sts-test_spearman_max": 0.7420657558603486, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_vitaminc-pairs_loss": 1.4394291639328003, "eval_vitaminc-pairs_runtime": 2.2575, "eval_vitaminc-pairs_samples_per_second": 66.446, "eval_vitaminc-pairs_steps_per_second": 2.215, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_qnli-contrastive_loss": 0.45715218782424927, "eval_qnli-contrastive_runtime": 0.501, "eval_qnli-contrastive_samples_per_second": 299.385, "eval_qnli-contrastive_steps_per_second": 9.979, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_scitail-pairs-qa_loss": 0.26679515838623047, "eval_scitail-pairs-qa_runtime": 1.4342, "eval_scitail-pairs-qa_samples_per_second": 104.587, "eval_scitail-pairs-qa_steps_per_second": 3.486, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_scitail-pairs-pos_loss": 0.8628473281860352, "eval_scitail-pairs-pos_runtime": 2.3485, "eval_scitail-pairs-pos_samples_per_second": 63.871, "eval_scitail-pairs-pos_steps_per_second": 2.129, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_xsum-pairs_loss": 0.9014443755149841, "eval_xsum-pairs_runtime": 2.2896, "eval_xsum-pairs_samples_per_second": 65.513, "eval_xsum-pairs_steps_per_second": 2.184, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_compression-pairs_loss": 0.3047434389591217, "eval_compression-pairs_runtime": 0.4852, "eval_compression-pairs_samples_per_second": 309.163, "eval_compression-pairs_steps_per_second": 10.305, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_sciq_pairs_loss": 1.091601848602295, "eval_sciq_pairs_runtime": 7.3046, "eval_sciq_pairs_samples_per_second": 20.535, "eval_sciq_pairs_steps_per_second": 0.684, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_qasc_pairs_loss": 5.947833061218262, "eval_qasc_pairs_runtime": 2.1787, "eval_qasc_pairs_samples_per_second": 68.849, "eval_qasc_pairs_steps_per_second": 2.295, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_openbookqa_pairs_loss": 3.4724366664886475, "eval_openbookqa_pairs_runtime": 0.9106, "eval_openbookqa_pairs_samples_per_second": 113.111, "eval_openbookqa_pairs_steps_per_second": 4.393, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_msmarco_pairs_loss": 2.1638240814208984, "eval_msmarco_pairs_runtime": 2.82, "eval_msmarco_pairs_samples_per_second": 53.191, "eval_msmarco_pairs_steps_per_second": 1.773, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_nq_pairs_loss": 2.110903739929199, "eval_nq_pairs_runtime": 5.2303, "eval_nq_pairs_samples_per_second": 28.679, "eval_nq_pairs_steps_per_second": 0.956, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_trivia_pairs_loss": 2.3711097240448, "eval_trivia_pairs_runtime": 9.6247, "eval_trivia_pairs_samples_per_second": 15.585, "eval_trivia_pairs_steps_per_second": 0.519, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_quora_pairs_loss": 0.5216041803359985, "eval_quora_pairs_runtime": 1.3072, "eval_quora_pairs_samples_per_second": 114.749, "eval_quora_pairs_steps_per_second": 3.825, "step": 5880 }, { "epoch": 1.0010214504596526, "eval_gooaq_pairs_loss": 1.7041363716125488, "eval_gooaq_pairs_runtime": 2.0973, "eval_gooaq_pairs_samples_per_second": 71.521, "eval_gooaq_pairs_steps_per_second": 2.384, "step": 5880 }, { "epoch": 1.026046986721144, "grad_norm": 17.308378219604492, "learning_rate": 2.0493701055498808e-05, "loss": 1.7772, "step": 6027 }, { "epoch": 1.0510725229826354, "grad_norm": 20.248981475830078, "learning_rate": 2.0994211780728634e-05, "loss": 1.9079, "step": 6174 }, { "epoch": 1.0760980592441267, "grad_norm": 6.012618064880371, "learning_rate": 2.1494722505958464e-05, "loss": 1.8657, "step": 6321 }, { "epoch": 1.101123595505618, "grad_norm": 1.1185024976730347, "learning_rate": 2.1995233231188288e-05, "loss": 1.7144, "step": 6468 }, { "epoch": 1.1261491317671093, "grad_norm": 1.2436251640319824, "learning_rate": 2.2495743956418114e-05, "loss": 1.7661, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_nli-pairs_loss": 1.7907973527908325, "eval_nli-pairs_runtime": 4.0147, "eval_nli-pairs_samples_per_second": 37.363, "eval_nli-pairs_steps_per_second": 1.245, "eval_sts-test_pearson_cosine": 0.755444461779583, "eval_sts-test_pearson_dot": 0.5833168145328357, "eval_sts-test_pearson_euclidean": 0.7437155007996056, "eval_sts-test_pearson_manhattan": 0.7524938984567344, "eval_sts-test_pearson_max": 0.755444461779583, "eval_sts-test_spearman_cosine": 0.7446166596886566, "eval_sts-test_spearman_dot": 0.5792340720766105, "eval_sts-test_spearman_euclidean": 0.7317285388028532, "eval_sts-test_spearman_manhattan": 0.7401637904976945, "eval_sts-test_spearman_max": 0.7446166596886566, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_vitaminc-pairs_loss": 1.3403607606887817, "eval_vitaminc-pairs_runtime": 2.168, "eval_vitaminc-pairs_samples_per_second": 69.189, "eval_vitaminc-pairs_steps_per_second": 2.306, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_qnli-contrastive_loss": 0.2736852467060089, "eval_qnli-contrastive_runtime": 0.4913, "eval_qnli-contrastive_samples_per_second": 305.336, "eval_qnli-contrastive_steps_per_second": 10.178, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_scitail-pairs-qa_loss": 0.22441554069519043, "eval_scitail-pairs-qa_runtime": 1.1614, "eval_scitail-pairs-qa_samples_per_second": 129.152, "eval_scitail-pairs-qa_steps_per_second": 4.305, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_scitail-pairs-pos_loss": 0.7723743915557861, "eval_scitail-pairs-pos_runtime": 2.1567, "eval_scitail-pairs-pos_samples_per_second": 69.55, "eval_scitail-pairs-pos_steps_per_second": 2.318, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_xsum-pairs_loss": 0.8370540142059326, "eval_xsum-pairs_runtime": 2.2569, "eval_xsum-pairs_samples_per_second": 66.463, "eval_xsum-pairs_steps_per_second": 2.215, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_compression-pairs_loss": 0.265947163105011, "eval_compression-pairs_runtime": 0.4431, "eval_compression-pairs_samples_per_second": 338.529, "eval_compression-pairs_steps_per_second": 11.284, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_sciq_pairs_loss": 0.9383512735366821, "eval_sciq_pairs_runtime": 7.1464, "eval_sciq_pairs_samples_per_second": 20.99, "eval_sciq_pairs_steps_per_second": 0.7, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_qasc_pairs_loss": 5.753899097442627, "eval_qasc_pairs_runtime": 2.0099, "eval_qasc_pairs_samples_per_second": 74.63, "eval_qasc_pairs_steps_per_second": 2.488, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_openbookqa_pairs_loss": 3.3517918586730957, "eval_openbookqa_pairs_runtime": 0.8594, "eval_openbookqa_pairs_samples_per_second": 119.858, "eval_openbookqa_pairs_steps_per_second": 4.655, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_msmarco_pairs_loss": 2.044360399246216, "eval_msmarco_pairs_runtime": 2.7431, "eval_msmarco_pairs_samples_per_second": 54.682, "eval_msmarco_pairs_steps_per_second": 1.823, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_nq_pairs_loss": 1.9409464597702026, "eval_nq_pairs_runtime": 5.028, "eval_nq_pairs_samples_per_second": 29.833, "eval_nq_pairs_steps_per_second": 0.994, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_trivia_pairs_loss": 2.369060754776001, "eval_trivia_pairs_runtime": 9.5137, "eval_trivia_pairs_samples_per_second": 15.767, "eval_trivia_pairs_steps_per_second": 0.526, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_quora_pairs_loss": 0.47849634289741516, "eval_quora_pairs_runtime": 1.1413, "eval_quora_pairs_samples_per_second": 131.424, "eval_quora_pairs_steps_per_second": 4.381, "step": 6615 }, { "epoch": 1.1261491317671093, "eval_gooaq_pairs_loss": 1.5795674324035645, "eval_gooaq_pairs_runtime": 2.0155, "eval_gooaq_pairs_samples_per_second": 74.422, "eval_gooaq_pairs_steps_per_second": 2.481, "step": 6615 }, { "epoch": 1.1511746680286006, "grad_norm": 20.95261001586914, "learning_rate": 2.299625468164794e-05, "loss": 1.8066, "step": 6762 }, { "epoch": 1.1762002042900919, "grad_norm": 20.31597900390625, "learning_rate": 2.3496765406877764e-05, "loss": 1.7438, "step": 6909 }, { "epoch": 1.2012257405515832, "grad_norm": 28.363882064819336, "learning_rate": 2.399727613210759e-05, "loss": 2.0231, "step": 7056 }, { "epoch": 1.2262512768130747, "grad_norm": 14.403656959533691, "learning_rate": 2.449778685733742e-05, "loss": 1.8966, "step": 7203 }, { "epoch": 1.251276813074566, "grad_norm": 17.73562240600586, "learning_rate": 2.4998297582567248e-05, "loss": 1.7958, "step": 7350 }, { "epoch": 1.251276813074566, "eval_nli-pairs_loss": 1.5906368494033813, "eval_nli-pairs_runtime": 4.0261, "eval_nli-pairs_samples_per_second": 37.257, "eval_nli-pairs_steps_per_second": 1.242, "eval_sts-test_pearson_cosine": 0.7626661521495873, "eval_sts-test_pearson_dot": 0.5632604768989181, "eval_sts-test_pearson_euclidean": 0.7370060575260952, "eval_sts-test_pearson_manhattan": 0.7472706980613159, "eval_sts-test_pearson_max": 0.7626661521495873, "eval_sts-test_spearman_cosine": 0.7535266725567149, "eval_sts-test_spearman_dot": 0.5848997224802808, "eval_sts-test_spearman_euclidean": 0.7290608032903477, "eval_sts-test_spearman_manhattan": 0.739032087078249, "eval_sts-test_spearman_max": 0.7535266725567149, "step": 7350 }, { "epoch": 1.251276813074566, "eval_vitaminc-pairs_loss": 1.222551941871643, "eval_vitaminc-pairs_runtime": 2.1784, "eval_vitaminc-pairs_samples_per_second": 68.857, "eval_vitaminc-pairs_steps_per_second": 2.295, "step": 7350 }, { "epoch": 1.251276813074566, "eval_qnli-contrastive_loss": 0.3951484262943268, "eval_qnli-contrastive_runtime": 0.4916, "eval_qnli-contrastive_samples_per_second": 305.11, "eval_qnli-contrastive_steps_per_second": 10.17, "step": 7350 }, { "epoch": 1.251276813074566, "eval_scitail-pairs-qa_loss": 0.17783091962337494, "eval_scitail-pairs-qa_runtime": 1.1549, "eval_scitail-pairs-qa_samples_per_second": 129.88, "eval_scitail-pairs-qa_steps_per_second": 4.329, "step": 7350 }, { "epoch": 1.251276813074566, "eval_scitail-pairs-pos_loss": 0.7214661836624146, "eval_scitail-pairs-pos_runtime": 2.132, "eval_scitail-pairs-pos_samples_per_second": 70.357, "eval_scitail-pairs-pos_steps_per_second": 2.345, "step": 7350 }, { "epoch": 1.251276813074566, "eval_xsum-pairs_loss": 0.7919928431510925, "eval_xsum-pairs_runtime": 2.2579, "eval_xsum-pairs_samples_per_second": 66.432, "eval_xsum-pairs_steps_per_second": 2.214, "step": 7350 }, { "epoch": 1.251276813074566, "eval_compression-pairs_loss": 0.24975377321243286, "eval_compression-pairs_runtime": 0.447, "eval_compression-pairs_samples_per_second": 335.534, "eval_compression-pairs_steps_per_second": 11.184, "step": 7350 }, { "epoch": 1.251276813074566, "eval_sciq_pairs_loss": 0.8343773484230042, "eval_sciq_pairs_runtime": 7.1288, "eval_sciq_pairs_samples_per_second": 21.042, "eval_sciq_pairs_steps_per_second": 0.701, "step": 7350 }, { "epoch": 1.251276813074566, "eval_qasc_pairs_loss": 5.4840240478515625, "eval_qasc_pairs_runtime": 2.025, "eval_qasc_pairs_samples_per_second": 74.074, "eval_qasc_pairs_steps_per_second": 2.469, "step": 7350 }, { "epoch": 1.251276813074566, "eval_openbookqa_pairs_loss": 3.1631176471710205, "eval_openbookqa_pairs_runtime": 0.8612, "eval_openbookqa_pairs_samples_per_second": 119.598, "eval_openbookqa_pairs_steps_per_second": 4.645, "step": 7350 }, { "epoch": 1.251276813074566, "eval_msmarco_pairs_loss": 1.8952231407165527, "eval_msmarco_pairs_runtime": 2.7585, "eval_msmarco_pairs_samples_per_second": 54.378, "eval_msmarco_pairs_steps_per_second": 1.813, "step": 7350 }, { "epoch": 1.251276813074566, "eval_nq_pairs_loss": 1.6934970617294312, "eval_nq_pairs_runtime": 5.0253, "eval_nq_pairs_samples_per_second": 29.849, "eval_nq_pairs_steps_per_second": 0.995, "step": 7350 }, { "epoch": 1.251276813074566, "eval_trivia_pairs_loss": 1.9966663122177124, "eval_trivia_pairs_runtime": 9.5675, "eval_trivia_pairs_samples_per_second": 15.678, "eval_trivia_pairs_steps_per_second": 0.523, "step": 7350 }, { "epoch": 1.251276813074566, "eval_quora_pairs_loss": 0.405385285615921, "eval_quora_pairs_runtime": 1.1432, "eval_quora_pairs_samples_per_second": 131.209, "eval_quora_pairs_steps_per_second": 4.374, "step": 7350 }, { "epoch": 1.251276813074566, "eval_gooaq_pairs_loss": 1.3951071500778198, "eval_gooaq_pairs_runtime": 2.038, "eval_gooaq_pairs_samples_per_second": 73.601, "eval_gooaq_pairs_steps_per_second": 2.453, "step": 7350 }, { "epoch": 1.2763023493360572, "grad_norm": 21.254159927368164, "learning_rate": 2.549880830779707e-05, "loss": 1.5109, "step": 7497 }, { "epoch": 1.3013278855975485, "grad_norm": 20.08012580871582, "learning_rate": 2.5999319033026898e-05, "loss": 1.8119, "step": 7644 }, { "epoch": 1.3263534218590398, "grad_norm": 0.6448306441307068, "learning_rate": 2.6499829758256724e-05, "loss": 1.6833, "step": 7791 }, { "epoch": 1.351378958120531, "grad_norm": 16.65821647644043, "learning_rate": 2.7000340483486554e-05, "loss": 1.5917, "step": 7938 }, { "epoch": 1.3764044943820224, "grad_norm": 14.949362754821777, "learning_rate": 2.7500851208716378e-05, "loss": 1.809, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_nli-pairs_loss": 1.5967836380004883, "eval_nli-pairs_runtime": 4.0496, "eval_nli-pairs_samples_per_second": 37.041, "eval_nli-pairs_steps_per_second": 1.235, "eval_sts-test_pearson_cosine": 0.7653416933913197, "eval_sts-test_pearson_dot": 0.5401711611334493, "eval_sts-test_pearson_euclidean": 0.7529907774019836, "eval_sts-test_pearson_manhattan": 0.7605105025260754, "eval_sts-test_pearson_max": 0.7653416933913197, "eval_sts-test_spearman_cosine": 0.7593865234485873, "eval_sts-test_spearman_dot": 0.5559615063301898, "eval_sts-test_spearman_euclidean": 0.7436431053840061, "eval_sts-test_spearman_manhattan": 0.7515978828464567, "eval_sts-test_spearman_max": 0.7593865234485873, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_vitaminc-pairs_loss": 1.1434590816497803, "eval_vitaminc-pairs_runtime": 2.2066, "eval_vitaminc-pairs_samples_per_second": 67.977, "eval_vitaminc-pairs_steps_per_second": 2.266, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_qnli-contrastive_loss": 0.3819103538990021, "eval_qnli-contrastive_runtime": 0.4972, "eval_qnli-contrastive_samples_per_second": 301.706, "eval_qnli-contrastive_steps_per_second": 10.057, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_scitail-pairs-qa_loss": 0.15774373710155487, "eval_scitail-pairs-qa_runtime": 1.1704, "eval_scitail-pairs-qa_samples_per_second": 128.161, "eval_scitail-pairs-qa_steps_per_second": 4.272, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_scitail-pairs-pos_loss": 0.6571963429450989, "eval_scitail-pairs-pos_runtime": 2.1634, "eval_scitail-pairs-pos_samples_per_second": 69.335, "eval_scitail-pairs-pos_steps_per_second": 2.311, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_xsum-pairs_loss": 0.7028753757476807, "eval_xsum-pairs_runtime": 2.2608, "eval_xsum-pairs_samples_per_second": 66.347, "eval_xsum-pairs_steps_per_second": 2.212, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_compression-pairs_loss": 0.23010987043380737, "eval_compression-pairs_runtime": 0.4514, "eval_compression-pairs_samples_per_second": 332.284, "eval_compression-pairs_steps_per_second": 11.076, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_sciq_pairs_loss": 0.799666702747345, "eval_sciq_pairs_runtime": 7.1816, "eval_sciq_pairs_samples_per_second": 20.887, "eval_sciq_pairs_steps_per_second": 0.696, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_qasc_pairs_loss": 5.433376789093018, "eval_qasc_pairs_runtime": 2.0592, "eval_qasc_pairs_samples_per_second": 72.843, "eval_qasc_pairs_steps_per_second": 2.428, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_openbookqa_pairs_loss": 2.9010672569274902, "eval_openbookqa_pairs_runtime": 0.865, "eval_openbookqa_pairs_samples_per_second": 119.074, "eval_openbookqa_pairs_steps_per_second": 4.624, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_msmarco_pairs_loss": 1.7567836046218872, "eval_msmarco_pairs_runtime": 2.7812, "eval_msmarco_pairs_samples_per_second": 53.933, "eval_msmarco_pairs_steps_per_second": 1.798, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_nq_pairs_loss": 1.5407707691192627, "eval_nq_pairs_runtime": 5.0607, "eval_nq_pairs_samples_per_second": 29.64, "eval_nq_pairs_steps_per_second": 0.988, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_trivia_pairs_loss": 1.8419283628463745, "eval_trivia_pairs_runtime": 9.5535, "eval_trivia_pairs_samples_per_second": 15.701, "eval_trivia_pairs_steps_per_second": 0.523, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_quora_pairs_loss": 0.3561370372772217, "eval_quora_pairs_runtime": 1.2005, "eval_quora_pairs_samples_per_second": 124.946, "eval_quora_pairs_steps_per_second": 4.165, "step": 8085 }, { "epoch": 1.3764044943820224, "eval_gooaq_pairs_loss": 1.1745914220809937, "eval_gooaq_pairs_runtime": 2.0463, "eval_gooaq_pairs_samples_per_second": 73.305, "eval_gooaq_pairs_steps_per_second": 2.443, "step": 8085 }, { "epoch": 1.401430030643514, "grad_norm": 14.31106185913086, "learning_rate": 2.8001361933946204e-05, "loss": 1.5561, "step": 8232 }, { "epoch": 1.4264555669050052, "grad_norm": 11.82392692565918, "learning_rate": 2.850187265917603e-05, "loss": 1.5325, "step": 8379 }, { "epoch": 1.4514811031664965, "grad_norm": 21.716449737548828, "learning_rate": 2.9002383384405858e-05, "loss": 1.5085, "step": 8526 }, { "epoch": 1.4765066394279878, "grad_norm": 6.5607147216796875, "learning_rate": 2.950289410963568e-05, "loss": 1.5634, "step": 8673 } ], "logging_steps": 147, "max_steps": 29370, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2937, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }