|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 735, |
|
"global_step": 17622, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02502553626149132, |
|
"grad_norm": 65.55949401855469, |
|
"learning_rate": 4.834865509022812e-07, |
|
"loss": 16.851, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.05005107252298264, |
|
"grad_norm": 23.207971572875977, |
|
"learning_rate": 9.805924412665985e-07, |
|
"loss": 11.2787, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.07507660878447395, |
|
"grad_norm": 176.1532440185547, |
|
"learning_rate": 1.481103166496425e-06, |
|
"loss": 8.9166, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.10010214504596528, |
|
"grad_norm": 22.1564998626709, |
|
"learning_rate": 1.981613891726251e-06, |
|
"loss": 7.9463, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"grad_norm": 20.11876106262207, |
|
"learning_rate": 2.4821246169560777e-06, |
|
"loss": 7.2108, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_nli-pairs_loss": 6.905651569366455, |
|
"eval_nli-pairs_runtime": 4.0844, |
|
"eval_nli-pairs_samples_per_second": 36.725, |
|
"eval_nli-pairs_steps_per_second": 1.224, |
|
"eval_sts-test_pearson_cosine": 0.3740256550072784, |
|
"eval_sts-test_pearson_dot": 0.13384893803205677, |
|
"eval_sts-test_pearson_euclidean": 0.3912387619869807, |
|
"eval_sts-test_pearson_manhattan": 0.4202605137823524, |
|
"eval_sts-test_pearson_max": 0.4202605137823524, |
|
"eval_sts-test_spearman_cosine": 0.37210107338950205, |
|
"eval_sts-test_spearman_dot": 0.12092409843417483, |
|
"eval_sts-test_spearman_euclidean": 0.39172287978780546, |
|
"eval_sts-test_spearman_manhattan": 0.4169664738563951, |
|
"eval_sts-test_spearman_max": 0.4169664738563951, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_vitaminc-pairs_loss": 5.720878601074219, |
|
"eval_vitaminc-pairs_runtime": 2.1703, |
|
"eval_vitaminc-pairs_samples_per_second": 69.115, |
|
"eval_vitaminc-pairs_steps_per_second": 2.304, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_qnli-contrastive_loss": 8.1649751663208, |
|
"eval_qnli-contrastive_runtime": 0.4937, |
|
"eval_qnli-contrastive_samples_per_second": 303.841, |
|
"eval_qnli-contrastive_steps_per_second": 10.128, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_scitail-pairs-qa_loss": 3.7859296798706055, |
|
"eval_scitail-pairs-qa_runtime": 1.1509, |
|
"eval_scitail-pairs-qa_samples_per_second": 130.329, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.344, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_scitail-pairs-pos_loss": 3.9919917583465576, |
|
"eval_scitail-pairs-pos_runtime": 2.1442, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.956, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.332, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_xsum-pairs_loss": 4.600368976593018, |
|
"eval_xsum-pairs_runtime": 2.26, |
|
"eval_xsum-pairs_samples_per_second": 66.371, |
|
"eval_xsum-pairs_steps_per_second": 2.212, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_compression-pairs_loss": 3.3037569522857666, |
|
"eval_compression-pairs_runtime": 0.449, |
|
"eval_compression-pairs_samples_per_second": 334.078, |
|
"eval_compression-pairs_steps_per_second": 11.136, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_sciq_pairs_loss": 10.214456558227539, |
|
"eval_sciq_pairs_runtime": 7.1179, |
|
"eval_sciq_pairs_samples_per_second": 21.074, |
|
"eval_sciq_pairs_steps_per_second": 0.702, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_qasc_pairs_loss": 10.58031940460205, |
|
"eval_qasc_pairs_runtime": 2.0175, |
|
"eval_qasc_pairs_samples_per_second": 74.348, |
|
"eval_qasc_pairs_steps_per_second": 2.478, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_openbookqa_pairs_loss": 7.862658977508545, |
|
"eval_openbookqa_pairs_runtime": 0.8571, |
|
"eval_openbookqa_pairs_samples_per_second": 120.168, |
|
"eval_openbookqa_pairs_steps_per_second": 4.667, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_msmarco_pairs_loss": 8.754273414611816, |
|
"eval_msmarco_pairs_runtime": 2.7533, |
|
"eval_msmarco_pairs_samples_per_second": 54.481, |
|
"eval_msmarco_pairs_steps_per_second": 1.816, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_nq_pairs_loss": 8.415486335754395, |
|
"eval_nq_pairs_runtime": 5.0894, |
|
"eval_nq_pairs_samples_per_second": 29.473, |
|
"eval_nq_pairs_steps_per_second": 0.982, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_trivia_pairs_loss": 9.051105499267578, |
|
"eval_trivia_pairs_runtime": 9.5498, |
|
"eval_trivia_pairs_samples_per_second": 15.707, |
|
"eval_trivia_pairs_steps_per_second": 0.524, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_quora_pairs_loss": 4.5232110023498535, |
|
"eval_quora_pairs_runtime": 1.1469, |
|
"eval_quora_pairs_samples_per_second": 130.785, |
|
"eval_quora_pairs_steps_per_second": 4.36, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_gooaq_pairs_loss": 7.579105854034424, |
|
"eval_gooaq_pairs_runtime": 2.0491, |
|
"eval_gooaq_pairs_samples_per_second": 73.203, |
|
"eval_gooaq_pairs_steps_per_second": 2.44, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.1501532175689479, |
|
"grad_norm": 31.7736759185791, |
|
"learning_rate": 2.982635342185904e-06, |
|
"loss": 6.7709, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.1751787538304392, |
|
"grad_norm": 31.57339096069336, |
|
"learning_rate": 3.4831460674157306e-06, |
|
"loss": 6.1746, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.20020429009193055, |
|
"grad_norm": 25.392702102661133, |
|
"learning_rate": 3.9836567926455565e-06, |
|
"loss": 5.7706, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.22522982635342187, |
|
"grad_norm": 32.390472412109375, |
|
"learning_rate": 4.484167517875383e-06, |
|
"loss": 5.7283, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"grad_norm": 18.85039520263672, |
|
"learning_rate": 4.98467824310521e-06, |
|
"loss": 5.1856, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_nli-pairs_loss": 4.352054119110107, |
|
"eval_nli-pairs_runtime": 4.1476, |
|
"eval_nli-pairs_samples_per_second": 36.165, |
|
"eval_nli-pairs_steps_per_second": 1.206, |
|
"eval_sts-test_pearson_cosine": 0.6694155778571752, |
|
"eval_sts-test_pearson_dot": 0.5201102118957572, |
|
"eval_sts-test_pearson_euclidean": 0.6613028243200022, |
|
"eval_sts-test_pearson_manhattan": 0.6670710500315469, |
|
"eval_sts-test_pearson_max": 0.6694155778571752, |
|
"eval_sts-test_spearman_cosine": 0.6367853204388882, |
|
"eval_sts-test_spearman_dot": 0.4940207180607985, |
|
"eval_sts-test_spearman_euclidean": 0.6391132775161348, |
|
"eval_sts-test_spearman_manhattan": 0.6446159957787251, |
|
"eval_sts-test_spearman_max": 0.6446159957787251, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_vitaminc-pairs_loss": 3.4987735748291016, |
|
"eval_vitaminc-pairs_runtime": 2.1678, |
|
"eval_vitaminc-pairs_samples_per_second": 69.194, |
|
"eval_vitaminc-pairs_steps_per_second": 2.306, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_qnli-contrastive_loss": 12.915559768676758, |
|
"eval_qnli-contrastive_runtime": 0.4918, |
|
"eval_qnli-contrastive_samples_per_second": 304.99, |
|
"eval_qnli-contrastive_steps_per_second": 10.166, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_scitail-pairs-qa_loss": 1.3250077962875366, |
|
"eval_scitail-pairs-qa_runtime": 1.154, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.984, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.333, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_scitail-pairs-pos_loss": 2.457335948944092, |
|
"eval_scitail-pairs-pos_runtime": 2.1475, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.85, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.328, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_xsum-pairs_loss": 3.071201801300049, |
|
"eval_xsum-pairs_runtime": 2.2634, |
|
"eval_xsum-pairs_samples_per_second": 66.271, |
|
"eval_xsum-pairs_steps_per_second": 2.209, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_compression-pairs_loss": 2.0629916191101074, |
|
"eval_compression-pairs_runtime": 0.4529, |
|
"eval_compression-pairs_samples_per_second": 331.23, |
|
"eval_compression-pairs_steps_per_second": 11.041, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_sciq_pairs_loss": 9.06814193725586, |
|
"eval_sciq_pairs_runtime": 7.1445, |
|
"eval_sciq_pairs_samples_per_second": 20.995, |
|
"eval_sciq_pairs_steps_per_second": 0.7, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_qasc_pairs_loss": 9.245658874511719, |
|
"eval_qasc_pairs_runtime": 2.0471, |
|
"eval_qasc_pairs_samples_per_second": 73.274, |
|
"eval_qasc_pairs_steps_per_second": 2.442, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_openbookqa_pairs_loss": 5.652446746826172, |
|
"eval_openbookqa_pairs_runtime": 0.8946, |
|
"eval_openbookqa_pairs_samples_per_second": 115.14, |
|
"eval_openbookqa_pairs_steps_per_second": 4.471, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_msmarco_pairs_loss": 4.844855785369873, |
|
"eval_msmarco_pairs_runtime": 2.7887, |
|
"eval_msmarco_pairs_samples_per_second": 53.788, |
|
"eval_msmarco_pairs_steps_per_second": 1.793, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_nq_pairs_loss": 5.023958206176758, |
|
"eval_nq_pairs_runtime": 5.0823, |
|
"eval_nq_pairs_samples_per_second": 29.514, |
|
"eval_nq_pairs_steps_per_second": 0.984, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_trivia_pairs_loss": 5.2907304763793945, |
|
"eval_trivia_pairs_runtime": 9.6673, |
|
"eval_trivia_pairs_samples_per_second": 15.516, |
|
"eval_trivia_pairs_steps_per_second": 0.517, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_quora_pairs_loss": 1.5572240352630615, |
|
"eval_quora_pairs_runtime": 1.1979, |
|
"eval_quora_pairs_samples_per_second": 125.218, |
|
"eval_quora_pairs_steps_per_second": 4.174, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_gooaq_pairs_loss": 3.970768928527832, |
|
"eval_gooaq_pairs_runtime": 2.117, |
|
"eval_gooaq_pairs_samples_per_second": 70.855, |
|
"eval_gooaq_pairs_steps_per_second": 2.362, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.2752808988764045, |
|
"grad_norm": 40.67585754394531, |
|
"learning_rate": 5.4851889683350365e-06, |
|
"loss": 4.185, |
|
"step": 1617 |
|
}, |
|
{ |
|
"epoch": 0.3003064351378958, |
|
"grad_norm": 45.92570495605469, |
|
"learning_rate": 5.985699693564862e-06, |
|
"loss": 4.6367, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 0.32533197139938713, |
|
"grad_norm": 13.566838264465332, |
|
"learning_rate": 6.486210418794688e-06, |
|
"loss": 4.3615, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 0.3503575076608784, |
|
"grad_norm": 9.495999336242676, |
|
"learning_rate": 6.986721144024515e-06, |
|
"loss": 4.1791, |
|
"step": 2058 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"grad_norm": 32.735416412353516, |
|
"learning_rate": 7.487231869254341e-06, |
|
"loss": 4.1051, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_nli-pairs_loss": 3.2717113494873047, |
|
"eval_nli-pairs_runtime": 4.0124, |
|
"eval_nli-pairs_samples_per_second": 37.384, |
|
"eval_nli-pairs_steps_per_second": 1.246, |
|
"eval_sts-test_pearson_cosine": 0.6958570089637609, |
|
"eval_sts-test_pearson_dot": 0.5824298957890577, |
|
"eval_sts-test_pearson_euclidean": 0.6893962819387462, |
|
"eval_sts-test_pearson_manhattan": 0.6993681181979946, |
|
"eval_sts-test_pearson_max": 0.6993681181979946, |
|
"eval_sts-test_spearman_cosine": 0.6652712160836801, |
|
"eval_sts-test_spearman_dot": 0.5536505624407877, |
|
"eval_sts-test_spearman_euclidean": 0.6659844314307678, |
|
"eval_sts-test_spearman_manhattan": 0.675740852112121, |
|
"eval_sts-test_spearman_max": 0.675740852112121, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_vitaminc-pairs_loss": 2.7197911739349365, |
|
"eval_vitaminc-pairs_runtime": 2.1625, |
|
"eval_vitaminc-pairs_samples_per_second": 69.365, |
|
"eval_vitaminc-pairs_steps_per_second": 2.312, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_qnli-contrastive_loss": 9.638714790344238, |
|
"eval_qnli-contrastive_runtime": 0.4877, |
|
"eval_qnli-contrastive_samples_per_second": 307.567, |
|
"eval_qnli-contrastive_steps_per_second": 10.252, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_scitail-pairs-qa_loss": 0.8106752634048462, |
|
"eval_scitail-pairs-qa_runtime": 1.1588, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.449, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.315, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_scitail-pairs-pos_loss": 1.8894625902175903, |
|
"eval_scitail-pairs-pos_runtime": 2.1181, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.817, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.361, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_xsum-pairs_loss": 2.262718439102173, |
|
"eval_xsum-pairs_runtime": 2.2585, |
|
"eval_xsum-pairs_samples_per_second": 66.416, |
|
"eval_xsum-pairs_steps_per_second": 2.214, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_compression-pairs_loss": 1.4910633563995361, |
|
"eval_compression-pairs_runtime": 0.4462, |
|
"eval_compression-pairs_samples_per_second": 336.204, |
|
"eval_compression-pairs_steps_per_second": 11.207, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_sciq_pairs_loss": 8.59740161895752, |
|
"eval_sciq_pairs_runtime": 7.1845, |
|
"eval_sciq_pairs_samples_per_second": 20.878, |
|
"eval_sciq_pairs_steps_per_second": 0.696, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_qasc_pairs_loss": 8.103879928588867, |
|
"eval_qasc_pairs_runtime": 2.0762, |
|
"eval_qasc_pairs_samples_per_second": 72.246, |
|
"eval_qasc_pairs_steps_per_second": 2.408, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_openbookqa_pairs_loss": 5.090969562530518, |
|
"eval_openbookqa_pairs_runtime": 0.89, |
|
"eval_openbookqa_pairs_samples_per_second": 115.726, |
|
"eval_openbookqa_pairs_steps_per_second": 4.494, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_msmarco_pairs_loss": 3.9566943645477295, |
|
"eval_msmarco_pairs_runtime": 2.8183, |
|
"eval_msmarco_pairs_samples_per_second": 53.223, |
|
"eval_msmarco_pairs_steps_per_second": 1.774, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_nq_pairs_loss": 4.009054183959961, |
|
"eval_nq_pairs_runtime": 5.0219, |
|
"eval_nq_pairs_samples_per_second": 29.869, |
|
"eval_nq_pairs_steps_per_second": 0.996, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_trivia_pairs_loss": 4.286431312561035, |
|
"eval_trivia_pairs_runtime": 9.4975, |
|
"eval_trivia_pairs_samples_per_second": 15.794, |
|
"eval_trivia_pairs_steps_per_second": 0.526, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_quora_pairs_loss": 1.123273491859436, |
|
"eval_quora_pairs_runtime": 1.1487, |
|
"eval_quora_pairs_samples_per_second": 130.586, |
|
"eval_quora_pairs_steps_per_second": 4.353, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_gooaq_pairs_loss": 3.222414255142212, |
|
"eval_gooaq_pairs_runtime": 2.0173, |
|
"eval_gooaq_pairs_samples_per_second": 74.357, |
|
"eval_gooaq_pairs_steps_per_second": 2.479, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.4004085801838611, |
|
"grad_norm": 218.56105041503906, |
|
"learning_rate": 7.987742594484168e-06, |
|
"loss": 3.7674, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 0.4254341164453524, |
|
"grad_norm": 27.877609252929688, |
|
"learning_rate": 8.488253319713993e-06, |
|
"loss": 3.8729, |
|
"step": 2499 |
|
}, |
|
{ |
|
"epoch": 0.45045965270684374, |
|
"grad_norm": 33.50013732910156, |
|
"learning_rate": 8.988764044943822e-06, |
|
"loss": 3.4527, |
|
"step": 2646 |
|
}, |
|
{ |
|
"epoch": 0.475485188968335, |
|
"grad_norm": 14.015911102294922, |
|
"learning_rate": 9.489274770173647e-06, |
|
"loss": 3.3545, |
|
"step": 2793 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"grad_norm": 33.59694290161133, |
|
"learning_rate": 9.989785495403473e-06, |
|
"loss": 3.3247, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_nli-pairs_loss": 2.7121565341949463, |
|
"eval_nli-pairs_runtime": 4.1564, |
|
"eval_nli-pairs_samples_per_second": 36.089, |
|
"eval_nli-pairs_steps_per_second": 1.203, |
|
"eval_sts-test_pearson_cosine": 0.716623047702725, |
|
"eval_sts-test_pearson_dot": 0.6128451070598809, |
|
"eval_sts-test_pearson_euclidean": 0.7138791236031807, |
|
"eval_sts-test_pearson_manhattan": 0.7213151818687454, |
|
"eval_sts-test_pearson_max": 0.7213151818687454, |
|
"eval_sts-test_spearman_cosine": 0.6919792400941177, |
|
"eval_sts-test_spearman_dot": 0.5867158357121192, |
|
"eval_sts-test_spearman_euclidean": 0.6925037259567834, |
|
"eval_sts-test_spearman_manhattan": 0.7008895667910079, |
|
"eval_sts-test_spearman_max": 0.7008895667910079, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_vitaminc-pairs_loss": 2.225992441177368, |
|
"eval_vitaminc-pairs_runtime": 2.253, |
|
"eval_vitaminc-pairs_samples_per_second": 66.577, |
|
"eval_vitaminc-pairs_steps_per_second": 2.219, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_qnli-contrastive_loss": 4.92629861831665, |
|
"eval_qnli-contrastive_runtime": 0.5005, |
|
"eval_qnli-contrastive_samples_per_second": 299.691, |
|
"eval_qnli-contrastive_steps_per_second": 9.99, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_scitail-pairs-qa_loss": 0.5898066163063049, |
|
"eval_scitail-pairs-qa_runtime": 1.2227, |
|
"eval_scitail-pairs-qa_samples_per_second": 122.682, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.089, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_scitail-pairs-pos_loss": 1.4237287044525146, |
|
"eval_scitail-pairs-pos_runtime": 2.4409, |
|
"eval_scitail-pairs-pos_samples_per_second": 61.452, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.048, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_xsum-pairs_loss": 1.8388895988464355, |
|
"eval_xsum-pairs_runtime": 2.2831, |
|
"eval_xsum-pairs_samples_per_second": 65.7, |
|
"eval_xsum-pairs_steps_per_second": 2.19, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_compression-pairs_loss": 1.1590967178344727, |
|
"eval_compression-pairs_runtime": 0.5152, |
|
"eval_compression-pairs_samples_per_second": 291.165, |
|
"eval_compression-pairs_steps_per_second": 9.706, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_sciq_pairs_loss": 8.282496452331543, |
|
"eval_sciq_pairs_runtime": 7.2871, |
|
"eval_sciq_pairs_samples_per_second": 20.584, |
|
"eval_sciq_pairs_steps_per_second": 0.686, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_qasc_pairs_loss": 7.817965507507324, |
|
"eval_qasc_pairs_runtime": 2.0211, |
|
"eval_qasc_pairs_samples_per_second": 74.218, |
|
"eval_qasc_pairs_steps_per_second": 2.474, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_openbookqa_pairs_loss": 4.619383811950684, |
|
"eval_openbookqa_pairs_runtime": 0.8531, |
|
"eval_openbookqa_pairs_samples_per_second": 120.731, |
|
"eval_openbookqa_pairs_steps_per_second": 4.689, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_msmarco_pairs_loss": 3.478559970855713, |
|
"eval_msmarco_pairs_runtime": 2.7512, |
|
"eval_msmarco_pairs_samples_per_second": 54.522, |
|
"eval_msmarco_pairs_steps_per_second": 1.817, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_nq_pairs_loss": 3.3449866771698, |
|
"eval_nq_pairs_runtime": 5.0591, |
|
"eval_nq_pairs_samples_per_second": 29.649, |
|
"eval_nq_pairs_steps_per_second": 0.988, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_trivia_pairs_loss": 3.524484872817993, |
|
"eval_trivia_pairs_runtime": 9.662, |
|
"eval_trivia_pairs_samples_per_second": 15.525, |
|
"eval_trivia_pairs_steps_per_second": 0.517, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_quora_pairs_loss": 0.9095575213432312, |
|
"eval_quora_pairs_runtime": 1.2482, |
|
"eval_quora_pairs_samples_per_second": 120.175, |
|
"eval_quora_pairs_steps_per_second": 4.006, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_gooaq_pairs_loss": 2.6586034297943115, |
|
"eval_gooaq_pairs_runtime": 2.1091, |
|
"eval_gooaq_pairs_samples_per_second": 71.12, |
|
"eval_gooaq_pairs_steps_per_second": 2.371, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5255362614913177, |
|
"grad_norm": 35.33409118652344, |
|
"learning_rate": 1.04902962206333e-05, |
|
"loss": 3.116, |
|
"step": 3087 |
|
}, |
|
{ |
|
"epoch": 0.550561797752809, |
|
"grad_norm": 22.29003143310547, |
|
"learning_rate": 1.0990806945863125e-05, |
|
"loss": 3.2418, |
|
"step": 3234 |
|
}, |
|
{ |
|
"epoch": 0.5755873340143003, |
|
"grad_norm": 31.277965545654297, |
|
"learning_rate": 1.1491317671092953e-05, |
|
"loss": 3.0757, |
|
"step": 3381 |
|
}, |
|
{ |
|
"epoch": 0.6006128702757916, |
|
"grad_norm": 24.612506866455078, |
|
"learning_rate": 1.1991828396322778e-05, |
|
"loss": 2.8524, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"grad_norm": 25.11741065979004, |
|
"learning_rate": 1.2492339121552605e-05, |
|
"loss": 2.6875, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_nli-pairs_loss": 2.479051113128662, |
|
"eval_nli-pairs_runtime": 3.9943, |
|
"eval_nli-pairs_samples_per_second": 37.553, |
|
"eval_nli-pairs_steps_per_second": 1.252, |
|
"eval_sts-test_pearson_cosine": 0.7278742453545186, |
|
"eval_sts-test_pearson_dot": 0.6217650825208566, |
|
"eval_sts-test_pearson_euclidean": 0.7243228472931561, |
|
"eval_sts-test_pearson_manhattan": 0.7333297580184588, |
|
"eval_sts-test_pearson_max": 0.7333297580184588, |
|
"eval_sts-test_spearman_cosine": 0.7013110457844404, |
|
"eval_sts-test_spearman_dot": 0.5970993074902947, |
|
"eval_sts-test_spearman_euclidean": 0.701564129266252, |
|
"eval_sts-test_spearman_manhattan": 0.7116482009924582, |
|
"eval_sts-test_spearman_max": 0.7116482009924582, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_vitaminc-pairs_loss": 1.974273681640625, |
|
"eval_vitaminc-pairs_runtime": 2.1754, |
|
"eval_vitaminc-pairs_samples_per_second": 68.953, |
|
"eval_vitaminc-pairs_steps_per_second": 2.298, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_qnli-contrastive_loss": 1.7706010341644287, |
|
"eval_qnli-contrastive_runtime": 0.4866, |
|
"eval_qnli-contrastive_samples_per_second": 308.244, |
|
"eval_qnli-contrastive_steps_per_second": 10.275, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_scitail-pairs-qa_loss": 0.4400452673435211, |
|
"eval_scitail-pairs-qa_runtime": 1.1519, |
|
"eval_scitail-pairs-qa_samples_per_second": 130.222, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.341, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_scitail-pairs-pos_loss": 1.1909903287887573, |
|
"eval_scitail-pairs-pos_runtime": 2.1319, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.36, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.345, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_xsum-pairs_loss": 1.4811985492706299, |
|
"eval_xsum-pairs_runtime": 2.254, |
|
"eval_xsum-pairs_samples_per_second": 66.548, |
|
"eval_xsum-pairs_steps_per_second": 2.218, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_compression-pairs_loss": 0.8453781008720398, |
|
"eval_compression-pairs_runtime": 0.4401, |
|
"eval_compression-pairs_samples_per_second": 340.826, |
|
"eval_compression-pairs_steps_per_second": 11.361, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_sciq_pairs_loss": 8.014656066894531, |
|
"eval_sciq_pairs_runtime": 7.0707, |
|
"eval_sciq_pairs_samples_per_second": 21.214, |
|
"eval_sciq_pairs_steps_per_second": 0.707, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_qasc_pairs_loss": 6.9316277503967285, |
|
"eval_qasc_pairs_runtime": 2.0338, |
|
"eval_qasc_pairs_samples_per_second": 73.752, |
|
"eval_qasc_pairs_steps_per_second": 2.458, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_openbookqa_pairs_loss": 4.21690034866333, |
|
"eval_openbookqa_pairs_runtime": 0.918, |
|
"eval_openbookqa_pairs_samples_per_second": 112.202, |
|
"eval_openbookqa_pairs_steps_per_second": 4.357, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_msmarco_pairs_loss": 3.0209598541259766, |
|
"eval_msmarco_pairs_runtime": 2.7749, |
|
"eval_msmarco_pairs_samples_per_second": 54.056, |
|
"eval_msmarco_pairs_steps_per_second": 1.802, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_nq_pairs_loss": 2.956088066101074, |
|
"eval_nq_pairs_runtime": 5.0024, |
|
"eval_nq_pairs_samples_per_second": 29.986, |
|
"eval_nq_pairs_steps_per_second": 1.0, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_trivia_pairs_loss": 3.17364501953125, |
|
"eval_trivia_pairs_runtime": 9.4856, |
|
"eval_trivia_pairs_samples_per_second": 15.813, |
|
"eval_trivia_pairs_steps_per_second": 0.527, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_quora_pairs_loss": 0.763593852519989, |
|
"eval_quora_pairs_runtime": 1.1441, |
|
"eval_quora_pairs_samples_per_second": 131.104, |
|
"eval_quora_pairs_steps_per_second": 4.37, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_gooaq_pairs_loss": 2.3524909019470215, |
|
"eval_gooaq_pairs_runtime": 2.0161, |
|
"eval_gooaq_pairs_samples_per_second": 74.4, |
|
"eval_gooaq_pairs_steps_per_second": 2.48, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.6506639427987743, |
|
"grad_norm": 31.163997650146484, |
|
"learning_rate": 1.2992849846782432e-05, |
|
"loss": 2.7808, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 0.6756894790602656, |
|
"grad_norm": 14.883658409118652, |
|
"learning_rate": 1.3493360572012258e-05, |
|
"loss": 2.5687, |
|
"step": 3969 |
|
}, |
|
{ |
|
"epoch": 0.7007150153217568, |
|
"grad_norm": 5.874042987823486, |
|
"learning_rate": 1.3993871297242083e-05, |
|
"loss": 2.3034, |
|
"step": 4116 |
|
}, |
|
{ |
|
"epoch": 0.7257405515832482, |
|
"grad_norm": 31.464054107666016, |
|
"learning_rate": 1.4494382022471912e-05, |
|
"loss": 2.4412, |
|
"step": 4263 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"grad_norm": 16.43915367126465, |
|
"learning_rate": 1.4994892747701737e-05, |
|
"loss": 2.3293, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_nli-pairs_loss": 2.3226094245910645, |
|
"eval_nli-pairs_runtime": 4.113, |
|
"eval_nli-pairs_samples_per_second": 36.47, |
|
"eval_nli-pairs_steps_per_second": 1.216, |
|
"eval_sts-test_pearson_cosine": 0.7356971966139032, |
|
"eval_sts-test_pearson_dot": 0.6150809513049869, |
|
"eval_sts-test_pearson_euclidean": 0.7330733579988641, |
|
"eval_sts-test_pearson_manhattan": 0.7423412248131348, |
|
"eval_sts-test_pearson_max": 0.7423412248131348, |
|
"eval_sts-test_spearman_cosine": 0.7121899723082045, |
|
"eval_sts-test_spearman_dot": 0.5926505936679538, |
|
"eval_sts-test_spearman_euclidean": 0.7130179905407037, |
|
"eval_sts-test_spearman_manhattan": 0.7227257562995023, |
|
"eval_sts-test_spearman_max": 0.7227257562995023, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_vitaminc-pairs_loss": 1.7956713438034058, |
|
"eval_vitaminc-pairs_runtime": 2.174, |
|
"eval_vitaminc-pairs_samples_per_second": 68.996, |
|
"eval_vitaminc-pairs_steps_per_second": 2.3, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_qnli-contrastive_loss": 1.0078614950180054, |
|
"eval_qnli-contrastive_runtime": 0.4874, |
|
"eval_qnli-contrastive_samples_per_second": 307.763, |
|
"eval_qnli-contrastive_steps_per_second": 10.259, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_scitail-pairs-qa_loss": 0.36971578001976013, |
|
"eval_scitail-pairs-qa_runtime": 1.164, |
|
"eval_scitail-pairs-qa_samples_per_second": 128.863, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.295, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_scitail-pairs-pos_loss": 1.0497769117355347, |
|
"eval_scitail-pairs-pos_runtime": 2.1205, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.74, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.358, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_xsum-pairs_loss": 1.1691261529922485, |
|
"eval_xsum-pairs_runtime": 2.259, |
|
"eval_xsum-pairs_samples_per_second": 66.401, |
|
"eval_xsum-pairs_steps_per_second": 2.213, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_compression-pairs_loss": 0.5027483105659485, |
|
"eval_compression-pairs_runtime": 0.4403, |
|
"eval_compression-pairs_samples_per_second": 340.682, |
|
"eval_compression-pairs_steps_per_second": 11.356, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_sciq_pairs_loss": 7.823739528656006, |
|
"eval_sciq_pairs_runtime": 7.0738, |
|
"eval_sciq_pairs_samples_per_second": 21.205, |
|
"eval_sciq_pairs_steps_per_second": 0.707, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_qasc_pairs_loss": 6.404655933380127, |
|
"eval_qasc_pairs_runtime": 2.0346, |
|
"eval_qasc_pairs_samples_per_second": 73.723, |
|
"eval_qasc_pairs_steps_per_second": 2.457, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_openbookqa_pairs_loss": 3.857389211654663, |
|
"eval_openbookqa_pairs_runtime": 0.8544, |
|
"eval_openbookqa_pairs_samples_per_second": 120.547, |
|
"eval_openbookqa_pairs_steps_per_second": 4.681, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_msmarco_pairs_loss": 2.7028510570526123, |
|
"eval_msmarco_pairs_runtime": 2.7448, |
|
"eval_msmarco_pairs_samples_per_second": 54.649, |
|
"eval_msmarco_pairs_steps_per_second": 1.822, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_nq_pairs_loss": 2.679351329803467, |
|
"eval_nq_pairs_runtime": 5.067, |
|
"eval_nq_pairs_samples_per_second": 29.603, |
|
"eval_nq_pairs_steps_per_second": 0.987, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_trivia_pairs_loss": 2.8798065185546875, |
|
"eval_trivia_pairs_runtime": 9.5449, |
|
"eval_trivia_pairs_samples_per_second": 15.715, |
|
"eval_trivia_pairs_steps_per_second": 0.524, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_quora_pairs_loss": 0.6825175285339355, |
|
"eval_quora_pairs_runtime": 1.1431, |
|
"eval_quora_pairs_samples_per_second": 131.221, |
|
"eval_quora_pairs_steps_per_second": 4.374, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_gooaq_pairs_loss": 2.0472166538238525, |
|
"eval_gooaq_pairs_runtime": 2.0218, |
|
"eval_gooaq_pairs_samples_per_second": 74.191, |
|
"eval_gooaq_pairs_steps_per_second": 2.473, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7757916241062308, |
|
"grad_norm": 4.2425055503845215, |
|
"learning_rate": 1.5495403472931565e-05, |
|
"loss": 2.3651, |
|
"step": 4557 |
|
}, |
|
{ |
|
"epoch": 0.8008171603677222, |
|
"grad_norm": 22.42776107788086, |
|
"learning_rate": 1.5995914198161388e-05, |
|
"loss": 2.6296, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.8258426966292135, |
|
"grad_norm": 21.169517517089844, |
|
"learning_rate": 1.6496424923391215e-05, |
|
"loss": 2.2108, |
|
"step": 4851 |
|
}, |
|
{ |
|
"epoch": 0.8508682328907048, |
|
"grad_norm": 23.326181411743164, |
|
"learning_rate": 1.699693564862104e-05, |
|
"loss": 2.1852, |
|
"step": 4998 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"grad_norm": 24.574176788330078, |
|
"learning_rate": 1.7497446373850868e-05, |
|
"loss": 2.2944, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_nli-pairs_loss": 2.0634915828704834, |
|
"eval_nli-pairs_runtime": 4.0019, |
|
"eval_nli-pairs_samples_per_second": 37.482, |
|
"eval_nli-pairs_steps_per_second": 1.249, |
|
"eval_sts-test_pearson_cosine": 0.7466390532977636, |
|
"eval_sts-test_pearson_dot": 0.612259458274589, |
|
"eval_sts-test_pearson_euclidean": 0.7432536346376271, |
|
"eval_sts-test_pearson_manhattan": 0.7500490179501229, |
|
"eval_sts-test_pearson_max": 0.7500490179501229, |
|
"eval_sts-test_spearman_cosine": 0.728273260456201, |
|
"eval_sts-test_spearman_dot": 0.5960115087190596, |
|
"eval_sts-test_spearman_euclidean": 0.7272394395622148, |
|
"eval_sts-test_spearman_manhattan": 0.7334149564445704, |
|
"eval_sts-test_spearman_max": 0.7334149564445704, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_vitaminc-pairs_loss": 1.638654112815857, |
|
"eval_vitaminc-pairs_runtime": 2.1637, |
|
"eval_vitaminc-pairs_samples_per_second": 69.327, |
|
"eval_vitaminc-pairs_steps_per_second": 2.311, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_qnli-contrastive_loss": 0.9639705419540405, |
|
"eval_qnli-contrastive_runtime": 0.4889, |
|
"eval_qnli-contrastive_samples_per_second": 306.825, |
|
"eval_qnli-contrastive_steps_per_second": 10.228, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_scitail-pairs-qa_loss": 0.31595128774642944, |
|
"eval_scitail-pairs-qa_runtime": 1.1467, |
|
"eval_scitail-pairs-qa_samples_per_second": 130.806, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.36, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_scitail-pairs-pos_loss": 0.9187478423118591, |
|
"eval_scitail-pairs-pos_runtime": 2.1273, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.512, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.35, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_xsum-pairs_loss": 1.060194492340088, |
|
"eval_xsum-pairs_runtime": 2.2836, |
|
"eval_xsum-pairs_samples_per_second": 65.686, |
|
"eval_xsum-pairs_steps_per_second": 2.19, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_compression-pairs_loss": 0.41078585386276245, |
|
"eval_compression-pairs_runtime": 0.4434, |
|
"eval_compression-pairs_samples_per_second": 338.276, |
|
"eval_compression-pairs_steps_per_second": 11.276, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_sciq_pairs_loss": 7.577760696411133, |
|
"eval_sciq_pairs_runtime": 7.1025, |
|
"eval_sciq_pairs_samples_per_second": 21.119, |
|
"eval_sciq_pairs_steps_per_second": 0.704, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_qasc_pairs_loss": 6.353766918182373, |
|
"eval_qasc_pairs_runtime": 2.0113, |
|
"eval_qasc_pairs_samples_per_second": 74.58, |
|
"eval_qasc_pairs_steps_per_second": 2.486, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_openbookqa_pairs_loss": 3.7140932083129883, |
|
"eval_openbookqa_pairs_runtime": 0.8529, |
|
"eval_openbookqa_pairs_samples_per_second": 120.762, |
|
"eval_openbookqa_pairs_steps_per_second": 4.69, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_msmarco_pairs_loss": 2.3862576484680176, |
|
"eval_msmarco_pairs_runtime": 2.8953, |
|
"eval_msmarco_pairs_samples_per_second": 51.808, |
|
"eval_msmarco_pairs_steps_per_second": 1.727, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_nq_pairs_loss": 2.3543190956115723, |
|
"eval_nq_pairs_runtime": 5.0048, |
|
"eval_nq_pairs_samples_per_second": 29.971, |
|
"eval_nq_pairs_steps_per_second": 0.999, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_trivia_pairs_loss": 2.494807481765747, |
|
"eval_trivia_pairs_runtime": 9.5513, |
|
"eval_trivia_pairs_samples_per_second": 15.705, |
|
"eval_trivia_pairs_steps_per_second": 0.523, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_quora_pairs_loss": 0.6137441992759705, |
|
"eval_quora_pairs_runtime": 1.1541, |
|
"eval_quora_pairs_samples_per_second": 129.967, |
|
"eval_quora_pairs_steps_per_second": 4.332, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_gooaq_pairs_loss": 1.8279658555984497, |
|
"eval_gooaq_pairs_runtime": 2.0951, |
|
"eval_gooaq_pairs_samples_per_second": 71.595, |
|
"eval_gooaq_pairs_steps_per_second": 2.387, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.9009193054136875, |
|
"grad_norm": 10.590804100036621, |
|
"learning_rate": 1.7997957099080695e-05, |
|
"loss": 2.2133, |
|
"step": 5292 |
|
}, |
|
{ |
|
"epoch": 0.9259448416751788, |
|
"grad_norm": 18.527711868286133, |
|
"learning_rate": 1.849846782431052e-05, |
|
"loss": 2.2255, |
|
"step": 5439 |
|
}, |
|
{ |
|
"epoch": 0.95097037793667, |
|
"grad_norm": 2.617710828781128, |
|
"learning_rate": 1.8995573714674838e-05, |
|
"loss": 2.3502, |
|
"step": 5586 |
|
}, |
|
{ |
|
"epoch": 0.9759959141981613, |
|
"grad_norm": 19.551551818847656, |
|
"learning_rate": 1.9496084439904668e-05, |
|
"loss": 1.8964, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"grad_norm": 11.783225059509277, |
|
"learning_rate": 1.999319033026898e-05, |
|
"loss": 1.913, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_nli-pairs_loss": 1.9677053689956665, |
|
"eval_nli-pairs_runtime": 4.3863, |
|
"eval_nli-pairs_samples_per_second": 34.198, |
|
"eval_nli-pairs_steps_per_second": 1.14, |
|
"eval_sts-test_pearson_cosine": 0.7531824359441671, |
|
"eval_sts-test_pearson_dot": 0.602579906515822, |
|
"eval_sts-test_pearson_euclidean": 0.7486763477944213, |
|
"eval_sts-test_pearson_manhattan": 0.7566220287347274, |
|
"eval_sts-test_pearson_max": 0.7566220287347274, |
|
"eval_sts-test_spearman_cosine": 0.7387792578665129, |
|
"eval_sts-test_spearman_dot": 0.5926594656319394, |
|
"eval_sts-test_spearman_euclidean": 0.733653805383597, |
|
"eval_sts-test_spearman_manhattan": 0.7420657558603486, |
|
"eval_sts-test_spearman_max": 0.7420657558603486, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_vitaminc-pairs_loss": 1.4394291639328003, |
|
"eval_vitaminc-pairs_runtime": 2.2575, |
|
"eval_vitaminc-pairs_samples_per_second": 66.446, |
|
"eval_vitaminc-pairs_steps_per_second": 2.215, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_qnli-contrastive_loss": 0.45715218782424927, |
|
"eval_qnli-contrastive_runtime": 0.501, |
|
"eval_qnli-contrastive_samples_per_second": 299.385, |
|
"eval_qnli-contrastive_steps_per_second": 9.979, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_scitail-pairs-qa_loss": 0.26679515838623047, |
|
"eval_scitail-pairs-qa_runtime": 1.4342, |
|
"eval_scitail-pairs-qa_samples_per_second": 104.587, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.486, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_scitail-pairs-pos_loss": 0.8628473281860352, |
|
"eval_scitail-pairs-pos_runtime": 2.3485, |
|
"eval_scitail-pairs-pos_samples_per_second": 63.871, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.129, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_xsum-pairs_loss": 0.9014443755149841, |
|
"eval_xsum-pairs_runtime": 2.2896, |
|
"eval_xsum-pairs_samples_per_second": 65.513, |
|
"eval_xsum-pairs_steps_per_second": 2.184, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_compression-pairs_loss": 0.3047434389591217, |
|
"eval_compression-pairs_runtime": 0.4852, |
|
"eval_compression-pairs_samples_per_second": 309.163, |
|
"eval_compression-pairs_steps_per_second": 10.305, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_sciq_pairs_loss": 1.091601848602295, |
|
"eval_sciq_pairs_runtime": 7.3046, |
|
"eval_sciq_pairs_samples_per_second": 20.535, |
|
"eval_sciq_pairs_steps_per_second": 0.684, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_qasc_pairs_loss": 5.947833061218262, |
|
"eval_qasc_pairs_runtime": 2.1787, |
|
"eval_qasc_pairs_samples_per_second": 68.849, |
|
"eval_qasc_pairs_steps_per_second": 2.295, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_openbookqa_pairs_loss": 3.4724366664886475, |
|
"eval_openbookqa_pairs_runtime": 0.9106, |
|
"eval_openbookqa_pairs_samples_per_second": 113.111, |
|
"eval_openbookqa_pairs_steps_per_second": 4.393, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_msmarco_pairs_loss": 2.1638240814208984, |
|
"eval_msmarco_pairs_runtime": 2.82, |
|
"eval_msmarco_pairs_samples_per_second": 53.191, |
|
"eval_msmarco_pairs_steps_per_second": 1.773, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_nq_pairs_loss": 2.110903739929199, |
|
"eval_nq_pairs_runtime": 5.2303, |
|
"eval_nq_pairs_samples_per_second": 28.679, |
|
"eval_nq_pairs_steps_per_second": 0.956, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_trivia_pairs_loss": 2.3711097240448, |
|
"eval_trivia_pairs_runtime": 9.6247, |
|
"eval_trivia_pairs_samples_per_second": 15.585, |
|
"eval_trivia_pairs_steps_per_second": 0.519, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_quora_pairs_loss": 0.5216041803359985, |
|
"eval_quora_pairs_runtime": 1.3072, |
|
"eval_quora_pairs_samples_per_second": 114.749, |
|
"eval_quora_pairs_steps_per_second": 3.825, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0010214504596526, |
|
"eval_gooaq_pairs_loss": 1.7041363716125488, |
|
"eval_gooaq_pairs_runtime": 2.0973, |
|
"eval_gooaq_pairs_samples_per_second": 71.521, |
|
"eval_gooaq_pairs_steps_per_second": 2.384, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.026046986721144, |
|
"grad_norm": 17.308378219604492, |
|
"learning_rate": 2.0493701055498808e-05, |
|
"loss": 1.7772, |
|
"step": 6027 |
|
}, |
|
{ |
|
"epoch": 1.0510725229826354, |
|
"grad_norm": 20.248981475830078, |
|
"learning_rate": 2.0994211780728634e-05, |
|
"loss": 1.9079, |
|
"step": 6174 |
|
}, |
|
{ |
|
"epoch": 1.0760980592441267, |
|
"grad_norm": 6.012618064880371, |
|
"learning_rate": 2.1494722505958464e-05, |
|
"loss": 1.8657, |
|
"step": 6321 |
|
}, |
|
{ |
|
"epoch": 1.101123595505618, |
|
"grad_norm": 1.1185024976730347, |
|
"learning_rate": 2.1995233231188288e-05, |
|
"loss": 1.7144, |
|
"step": 6468 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"grad_norm": 1.2436251640319824, |
|
"learning_rate": 2.2495743956418114e-05, |
|
"loss": 1.7661, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_nli-pairs_loss": 1.7907973527908325, |
|
"eval_nli-pairs_runtime": 4.0147, |
|
"eval_nli-pairs_samples_per_second": 37.363, |
|
"eval_nli-pairs_steps_per_second": 1.245, |
|
"eval_sts-test_pearson_cosine": 0.755444461779583, |
|
"eval_sts-test_pearson_dot": 0.5833168145328357, |
|
"eval_sts-test_pearson_euclidean": 0.7437155007996056, |
|
"eval_sts-test_pearson_manhattan": 0.7524938984567344, |
|
"eval_sts-test_pearson_max": 0.755444461779583, |
|
"eval_sts-test_spearman_cosine": 0.7446166596886566, |
|
"eval_sts-test_spearman_dot": 0.5792340720766105, |
|
"eval_sts-test_spearman_euclidean": 0.7317285388028532, |
|
"eval_sts-test_spearman_manhattan": 0.7401637904976945, |
|
"eval_sts-test_spearman_max": 0.7446166596886566, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_vitaminc-pairs_loss": 1.3403607606887817, |
|
"eval_vitaminc-pairs_runtime": 2.168, |
|
"eval_vitaminc-pairs_samples_per_second": 69.189, |
|
"eval_vitaminc-pairs_steps_per_second": 2.306, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_qnli-contrastive_loss": 0.2736852467060089, |
|
"eval_qnli-contrastive_runtime": 0.4913, |
|
"eval_qnli-contrastive_samples_per_second": 305.336, |
|
"eval_qnli-contrastive_steps_per_second": 10.178, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_scitail-pairs-qa_loss": 0.22441554069519043, |
|
"eval_scitail-pairs-qa_runtime": 1.1614, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.152, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.305, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_scitail-pairs-pos_loss": 0.7723743915557861, |
|
"eval_scitail-pairs-pos_runtime": 2.1567, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.55, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.318, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_xsum-pairs_loss": 0.8370540142059326, |
|
"eval_xsum-pairs_runtime": 2.2569, |
|
"eval_xsum-pairs_samples_per_second": 66.463, |
|
"eval_xsum-pairs_steps_per_second": 2.215, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_compression-pairs_loss": 0.265947163105011, |
|
"eval_compression-pairs_runtime": 0.4431, |
|
"eval_compression-pairs_samples_per_second": 338.529, |
|
"eval_compression-pairs_steps_per_second": 11.284, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_sciq_pairs_loss": 0.9383512735366821, |
|
"eval_sciq_pairs_runtime": 7.1464, |
|
"eval_sciq_pairs_samples_per_second": 20.99, |
|
"eval_sciq_pairs_steps_per_second": 0.7, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_qasc_pairs_loss": 5.753899097442627, |
|
"eval_qasc_pairs_runtime": 2.0099, |
|
"eval_qasc_pairs_samples_per_second": 74.63, |
|
"eval_qasc_pairs_steps_per_second": 2.488, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_openbookqa_pairs_loss": 3.3517918586730957, |
|
"eval_openbookqa_pairs_runtime": 0.8594, |
|
"eval_openbookqa_pairs_samples_per_second": 119.858, |
|
"eval_openbookqa_pairs_steps_per_second": 4.655, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_msmarco_pairs_loss": 2.044360399246216, |
|
"eval_msmarco_pairs_runtime": 2.7431, |
|
"eval_msmarco_pairs_samples_per_second": 54.682, |
|
"eval_msmarco_pairs_steps_per_second": 1.823, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_nq_pairs_loss": 1.9409464597702026, |
|
"eval_nq_pairs_runtime": 5.028, |
|
"eval_nq_pairs_samples_per_second": 29.833, |
|
"eval_nq_pairs_steps_per_second": 0.994, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_trivia_pairs_loss": 2.369060754776001, |
|
"eval_trivia_pairs_runtime": 9.5137, |
|
"eval_trivia_pairs_samples_per_second": 15.767, |
|
"eval_trivia_pairs_steps_per_second": 0.526, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_quora_pairs_loss": 0.47849634289741516, |
|
"eval_quora_pairs_runtime": 1.1413, |
|
"eval_quora_pairs_samples_per_second": 131.424, |
|
"eval_quora_pairs_steps_per_second": 4.381, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1261491317671093, |
|
"eval_gooaq_pairs_loss": 1.5795674324035645, |
|
"eval_gooaq_pairs_runtime": 2.0155, |
|
"eval_gooaq_pairs_samples_per_second": 74.422, |
|
"eval_gooaq_pairs_steps_per_second": 2.481, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 1.1511746680286006, |
|
"grad_norm": 20.95261001586914, |
|
"learning_rate": 2.299625468164794e-05, |
|
"loss": 1.8066, |
|
"step": 6762 |
|
}, |
|
{ |
|
"epoch": 1.1762002042900919, |
|
"grad_norm": 20.31597900390625, |
|
"learning_rate": 2.3496765406877764e-05, |
|
"loss": 1.7438, |
|
"step": 6909 |
|
}, |
|
{ |
|
"epoch": 1.2012257405515832, |
|
"grad_norm": 28.363882064819336, |
|
"learning_rate": 2.399727613210759e-05, |
|
"loss": 2.0231, |
|
"step": 7056 |
|
}, |
|
{ |
|
"epoch": 1.2262512768130747, |
|
"grad_norm": 14.403656959533691, |
|
"learning_rate": 2.449778685733742e-05, |
|
"loss": 1.8966, |
|
"step": 7203 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"grad_norm": 17.73562240600586, |
|
"learning_rate": 2.4998297582567248e-05, |
|
"loss": 1.7958, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_nli-pairs_loss": 1.5906368494033813, |
|
"eval_nli-pairs_runtime": 4.0261, |
|
"eval_nli-pairs_samples_per_second": 37.257, |
|
"eval_nli-pairs_steps_per_second": 1.242, |
|
"eval_sts-test_pearson_cosine": 0.7626661521495873, |
|
"eval_sts-test_pearson_dot": 0.5632604768989181, |
|
"eval_sts-test_pearson_euclidean": 0.7370060575260952, |
|
"eval_sts-test_pearson_manhattan": 0.7472706980613159, |
|
"eval_sts-test_pearson_max": 0.7626661521495873, |
|
"eval_sts-test_spearman_cosine": 0.7535266725567149, |
|
"eval_sts-test_spearman_dot": 0.5848997224802808, |
|
"eval_sts-test_spearman_euclidean": 0.7290608032903477, |
|
"eval_sts-test_spearman_manhattan": 0.739032087078249, |
|
"eval_sts-test_spearman_max": 0.7535266725567149, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_vitaminc-pairs_loss": 1.222551941871643, |
|
"eval_vitaminc-pairs_runtime": 2.1784, |
|
"eval_vitaminc-pairs_samples_per_second": 68.857, |
|
"eval_vitaminc-pairs_steps_per_second": 2.295, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_qnli-contrastive_loss": 0.3951484262943268, |
|
"eval_qnli-contrastive_runtime": 0.4916, |
|
"eval_qnli-contrastive_samples_per_second": 305.11, |
|
"eval_qnli-contrastive_steps_per_second": 10.17, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_scitail-pairs-qa_loss": 0.17783091962337494, |
|
"eval_scitail-pairs-qa_runtime": 1.1549, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.88, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.329, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_scitail-pairs-pos_loss": 0.7214661836624146, |
|
"eval_scitail-pairs-pos_runtime": 2.132, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.357, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.345, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_xsum-pairs_loss": 0.7919928431510925, |
|
"eval_xsum-pairs_runtime": 2.2579, |
|
"eval_xsum-pairs_samples_per_second": 66.432, |
|
"eval_xsum-pairs_steps_per_second": 2.214, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_compression-pairs_loss": 0.24975377321243286, |
|
"eval_compression-pairs_runtime": 0.447, |
|
"eval_compression-pairs_samples_per_second": 335.534, |
|
"eval_compression-pairs_steps_per_second": 11.184, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_sciq_pairs_loss": 0.8343773484230042, |
|
"eval_sciq_pairs_runtime": 7.1288, |
|
"eval_sciq_pairs_samples_per_second": 21.042, |
|
"eval_sciq_pairs_steps_per_second": 0.701, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_qasc_pairs_loss": 5.4840240478515625, |
|
"eval_qasc_pairs_runtime": 2.025, |
|
"eval_qasc_pairs_samples_per_second": 74.074, |
|
"eval_qasc_pairs_steps_per_second": 2.469, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_openbookqa_pairs_loss": 3.1631176471710205, |
|
"eval_openbookqa_pairs_runtime": 0.8612, |
|
"eval_openbookqa_pairs_samples_per_second": 119.598, |
|
"eval_openbookqa_pairs_steps_per_second": 4.645, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_msmarco_pairs_loss": 1.8952231407165527, |
|
"eval_msmarco_pairs_runtime": 2.7585, |
|
"eval_msmarco_pairs_samples_per_second": 54.378, |
|
"eval_msmarco_pairs_steps_per_second": 1.813, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_nq_pairs_loss": 1.6934970617294312, |
|
"eval_nq_pairs_runtime": 5.0253, |
|
"eval_nq_pairs_samples_per_second": 29.849, |
|
"eval_nq_pairs_steps_per_second": 0.995, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_trivia_pairs_loss": 1.9966663122177124, |
|
"eval_trivia_pairs_runtime": 9.5675, |
|
"eval_trivia_pairs_samples_per_second": 15.678, |
|
"eval_trivia_pairs_steps_per_second": 0.523, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_quora_pairs_loss": 0.405385285615921, |
|
"eval_quora_pairs_runtime": 1.1432, |
|
"eval_quora_pairs_samples_per_second": 131.209, |
|
"eval_quora_pairs_steps_per_second": 4.374, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.251276813074566, |
|
"eval_gooaq_pairs_loss": 1.3951071500778198, |
|
"eval_gooaq_pairs_runtime": 2.038, |
|
"eval_gooaq_pairs_samples_per_second": 73.601, |
|
"eval_gooaq_pairs_steps_per_second": 2.453, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.2763023493360572, |
|
"grad_norm": 21.254159927368164, |
|
"learning_rate": 2.549880830779707e-05, |
|
"loss": 1.5109, |
|
"step": 7497 |
|
}, |
|
{ |
|
"epoch": 1.3013278855975485, |
|
"grad_norm": 20.08012580871582, |
|
"learning_rate": 2.5999319033026898e-05, |
|
"loss": 1.8119, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 1.3263534218590398, |
|
"grad_norm": 0.6448306441307068, |
|
"learning_rate": 2.6499829758256724e-05, |
|
"loss": 1.6833, |
|
"step": 7791 |
|
}, |
|
{ |
|
"epoch": 1.351378958120531, |
|
"grad_norm": 16.65821647644043, |
|
"learning_rate": 2.7000340483486554e-05, |
|
"loss": 1.5917, |
|
"step": 7938 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"grad_norm": 14.949362754821777, |
|
"learning_rate": 2.7500851208716378e-05, |
|
"loss": 1.809, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_nli-pairs_loss": 1.5967836380004883, |
|
"eval_nli-pairs_runtime": 4.0496, |
|
"eval_nli-pairs_samples_per_second": 37.041, |
|
"eval_nli-pairs_steps_per_second": 1.235, |
|
"eval_sts-test_pearson_cosine": 0.7653416933913197, |
|
"eval_sts-test_pearson_dot": 0.5401711611334493, |
|
"eval_sts-test_pearson_euclidean": 0.7529907774019836, |
|
"eval_sts-test_pearson_manhattan": 0.7605105025260754, |
|
"eval_sts-test_pearson_max": 0.7653416933913197, |
|
"eval_sts-test_spearman_cosine": 0.7593865234485873, |
|
"eval_sts-test_spearman_dot": 0.5559615063301898, |
|
"eval_sts-test_spearman_euclidean": 0.7436431053840061, |
|
"eval_sts-test_spearman_manhattan": 0.7515978828464567, |
|
"eval_sts-test_spearman_max": 0.7593865234485873, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_vitaminc-pairs_loss": 1.1434590816497803, |
|
"eval_vitaminc-pairs_runtime": 2.2066, |
|
"eval_vitaminc-pairs_samples_per_second": 67.977, |
|
"eval_vitaminc-pairs_steps_per_second": 2.266, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_qnli-contrastive_loss": 0.3819103538990021, |
|
"eval_qnli-contrastive_runtime": 0.4972, |
|
"eval_qnli-contrastive_samples_per_second": 301.706, |
|
"eval_qnli-contrastive_steps_per_second": 10.057, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_scitail-pairs-qa_loss": 0.15774373710155487, |
|
"eval_scitail-pairs-qa_runtime": 1.1704, |
|
"eval_scitail-pairs-qa_samples_per_second": 128.161, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.272, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_scitail-pairs-pos_loss": 0.6571963429450989, |
|
"eval_scitail-pairs-pos_runtime": 2.1634, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.335, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.311, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_xsum-pairs_loss": 0.7028753757476807, |
|
"eval_xsum-pairs_runtime": 2.2608, |
|
"eval_xsum-pairs_samples_per_second": 66.347, |
|
"eval_xsum-pairs_steps_per_second": 2.212, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_compression-pairs_loss": 0.23010987043380737, |
|
"eval_compression-pairs_runtime": 0.4514, |
|
"eval_compression-pairs_samples_per_second": 332.284, |
|
"eval_compression-pairs_steps_per_second": 11.076, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_sciq_pairs_loss": 0.799666702747345, |
|
"eval_sciq_pairs_runtime": 7.1816, |
|
"eval_sciq_pairs_samples_per_second": 20.887, |
|
"eval_sciq_pairs_steps_per_second": 0.696, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_qasc_pairs_loss": 5.433376789093018, |
|
"eval_qasc_pairs_runtime": 2.0592, |
|
"eval_qasc_pairs_samples_per_second": 72.843, |
|
"eval_qasc_pairs_steps_per_second": 2.428, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_openbookqa_pairs_loss": 2.9010672569274902, |
|
"eval_openbookqa_pairs_runtime": 0.865, |
|
"eval_openbookqa_pairs_samples_per_second": 119.074, |
|
"eval_openbookqa_pairs_steps_per_second": 4.624, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_msmarco_pairs_loss": 1.7567836046218872, |
|
"eval_msmarco_pairs_runtime": 2.7812, |
|
"eval_msmarco_pairs_samples_per_second": 53.933, |
|
"eval_msmarco_pairs_steps_per_second": 1.798, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_nq_pairs_loss": 1.5407707691192627, |
|
"eval_nq_pairs_runtime": 5.0607, |
|
"eval_nq_pairs_samples_per_second": 29.64, |
|
"eval_nq_pairs_steps_per_second": 0.988, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_trivia_pairs_loss": 1.8419283628463745, |
|
"eval_trivia_pairs_runtime": 9.5535, |
|
"eval_trivia_pairs_samples_per_second": 15.701, |
|
"eval_trivia_pairs_steps_per_second": 0.523, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_quora_pairs_loss": 0.3561370372772217, |
|
"eval_quora_pairs_runtime": 1.2005, |
|
"eval_quora_pairs_samples_per_second": 124.946, |
|
"eval_quora_pairs_steps_per_second": 4.165, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.3764044943820224, |
|
"eval_gooaq_pairs_loss": 1.1745914220809937, |
|
"eval_gooaq_pairs_runtime": 2.0463, |
|
"eval_gooaq_pairs_samples_per_second": 73.305, |
|
"eval_gooaq_pairs_steps_per_second": 2.443, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 1.401430030643514, |
|
"grad_norm": 14.31106185913086, |
|
"learning_rate": 2.8001361933946204e-05, |
|
"loss": 1.5561, |
|
"step": 8232 |
|
}, |
|
{ |
|
"epoch": 1.4264555669050052, |
|
"grad_norm": 11.82392692565918, |
|
"learning_rate": 2.850187265917603e-05, |
|
"loss": 1.5325, |
|
"step": 8379 |
|
}, |
|
{ |
|
"epoch": 1.4514811031664965, |
|
"grad_norm": 21.716449737548828, |
|
"learning_rate": 2.9002383384405858e-05, |
|
"loss": 1.5085, |
|
"step": 8526 |
|
}, |
|
{ |
|
"epoch": 1.4765066394279878, |
|
"grad_norm": 6.5607147216796875, |
|
"learning_rate": 2.950289410963568e-05, |
|
"loss": 1.5634, |
|
"step": 8673 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"grad_norm": 8.737595558166504, |
|
"learning_rate": 2.9999998423842776e-05, |
|
"loss": 1.3857, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_nli-pairs_loss": 1.454946517944336, |
|
"eval_nli-pairs_runtime": 4.3786, |
|
"eval_nli-pairs_samples_per_second": 34.257, |
|
"eval_nli-pairs_steps_per_second": 1.142, |
|
"eval_sts-test_pearson_cosine": 0.758856517299588, |
|
"eval_sts-test_pearson_dot": 0.5254244903711445, |
|
"eval_sts-test_pearson_euclidean": 0.7467439510002647, |
|
"eval_sts-test_pearson_manhattan": 0.7525779346304055, |
|
"eval_sts-test_pearson_max": 0.758856517299588, |
|
"eval_sts-test_spearman_cosine": 0.7596605816446022, |
|
"eval_sts-test_spearman_dot": 0.5600186533991508, |
|
"eval_sts-test_spearman_euclidean": 0.7367598380547504, |
|
"eval_sts-test_spearman_manhattan": 0.7440123650923844, |
|
"eval_sts-test_spearman_max": 0.7596605816446022, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_vitaminc-pairs_loss": 1.1383781433105469, |
|
"eval_vitaminc-pairs_runtime": 2.3314, |
|
"eval_vitaminc-pairs_samples_per_second": 64.34, |
|
"eval_vitaminc-pairs_steps_per_second": 2.145, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_qnli-contrastive_loss": 0.32092100381851196, |
|
"eval_qnli-contrastive_runtime": 0.5002, |
|
"eval_qnli-contrastive_samples_per_second": 299.881, |
|
"eval_qnli-contrastive_steps_per_second": 9.996, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_scitail-pairs-qa_loss": 0.14513270556926727, |
|
"eval_scitail-pairs-qa_runtime": 1.5154, |
|
"eval_scitail-pairs-qa_samples_per_second": 98.985, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.3, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_scitail-pairs-pos_loss": 0.6857669353485107, |
|
"eval_scitail-pairs-pos_runtime": 2.4178, |
|
"eval_scitail-pairs-pos_samples_per_second": 62.041, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.068, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_xsum-pairs_loss": 0.683724045753479, |
|
"eval_xsum-pairs_runtime": 2.2766, |
|
"eval_xsum-pairs_samples_per_second": 65.887, |
|
"eval_xsum-pairs_steps_per_second": 2.196, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_compression-pairs_loss": 0.20896266400814056, |
|
"eval_compression-pairs_runtime": 0.4683, |
|
"eval_compression-pairs_samples_per_second": 320.274, |
|
"eval_compression-pairs_steps_per_second": 10.676, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_sciq_pairs_loss": 0.7911179661750793, |
|
"eval_sciq_pairs_runtime": 7.3506, |
|
"eval_sciq_pairs_samples_per_second": 20.407, |
|
"eval_sciq_pairs_steps_per_second": 0.68, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_qasc_pairs_loss": 5.3092241287231445, |
|
"eval_qasc_pairs_runtime": 2.1926, |
|
"eval_qasc_pairs_samples_per_second": 68.411, |
|
"eval_qasc_pairs_steps_per_second": 2.28, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_openbookqa_pairs_loss": 2.923464298248291, |
|
"eval_openbookqa_pairs_runtime": 0.963, |
|
"eval_openbookqa_pairs_samples_per_second": 106.961, |
|
"eval_openbookqa_pairs_steps_per_second": 4.154, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_msmarco_pairs_loss": 1.674107313156128, |
|
"eval_msmarco_pairs_runtime": 2.8516, |
|
"eval_msmarco_pairs_samples_per_second": 52.602, |
|
"eval_msmarco_pairs_steps_per_second": 1.753, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_nq_pairs_loss": 1.4419037103652954, |
|
"eval_nq_pairs_runtime": 5.1485, |
|
"eval_nq_pairs_samples_per_second": 29.135, |
|
"eval_nq_pairs_steps_per_second": 0.971, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_trivia_pairs_loss": 1.7546964883804321, |
|
"eval_trivia_pairs_runtime": 9.6901, |
|
"eval_trivia_pairs_samples_per_second": 15.48, |
|
"eval_trivia_pairs_steps_per_second": 0.516, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_quora_pairs_loss": 0.31785744428634644, |
|
"eval_quora_pairs_runtime": 1.2699, |
|
"eval_quora_pairs_samples_per_second": 118.117, |
|
"eval_quora_pairs_steps_per_second": 3.937, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.501532175689479, |
|
"eval_gooaq_pairs_loss": 1.1328644752502441, |
|
"eval_gooaq_pairs_runtime": 2.1292, |
|
"eval_gooaq_pairs_samples_per_second": 70.448, |
|
"eval_gooaq_pairs_steps_per_second": 2.348, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.5265577119509703, |
|
"grad_norm": 15.168123245239258, |
|
"learning_rate": 2.9965489092992677e-05, |
|
"loss": 1.6167, |
|
"step": 8967 |
|
}, |
|
{ |
|
"epoch": 1.5515832482124616, |
|
"grad_norm": 29.32268524169922, |
|
"learning_rate": 2.9863043834895476e-05, |
|
"loss": 1.6664, |
|
"step": 9114 |
|
}, |
|
{ |
|
"epoch": 1.5766087844739531, |
|
"grad_norm": 2.5159287452697754, |
|
"learning_rate": 2.9693127700413034e-05, |
|
"loss": 1.4785, |
|
"step": 9261 |
|
}, |
|
{ |
|
"epoch": 1.6016343207354442, |
|
"grad_norm": 17.4219970703125, |
|
"learning_rate": 2.9456512024854113e-05, |
|
"loss": 1.5881, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"grad_norm": 15.60139274597168, |
|
"learning_rate": 2.915427092649312e-05, |
|
"loss": 1.3379, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_nli-pairs_loss": 1.3879741430282593, |
|
"eval_nli-pairs_runtime": 4.1363, |
|
"eval_nli-pairs_samples_per_second": 36.264, |
|
"eval_nli-pairs_steps_per_second": 1.209, |
|
"eval_sts-test_pearson_cosine": 0.7733483283639441, |
|
"eval_sts-test_pearson_dot": 0.5424296843493538, |
|
"eval_sts-test_pearson_euclidean": 0.7555770040784449, |
|
"eval_sts-test_pearson_manhattan": 0.7604742759594404, |
|
"eval_sts-test_pearson_max": 0.7733483283639441, |
|
"eval_sts-test_spearman_cosine": 0.779671933510953, |
|
"eval_sts-test_spearman_dot": 0.5784449139725663, |
|
"eval_sts-test_spearman_euclidean": 0.7515003599642571, |
|
"eval_sts-test_spearman_manhattan": 0.7568440288585417, |
|
"eval_sts-test_spearman_max": 0.779671933510953, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_vitaminc-pairs_loss": 0.9942379593849182, |
|
"eval_vitaminc-pairs_runtime": 2.2185, |
|
"eval_vitaminc-pairs_samples_per_second": 67.613, |
|
"eval_vitaminc-pairs_steps_per_second": 2.254, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_qnli-contrastive_loss": 0.2178214192390442, |
|
"eval_qnli-contrastive_runtime": 0.4991, |
|
"eval_qnli-contrastive_samples_per_second": 300.549, |
|
"eval_qnli-contrastive_steps_per_second": 10.018, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_scitail-pairs-qa_loss": 0.13629749417304993, |
|
"eval_scitail-pairs-qa_runtime": 1.1751, |
|
"eval_scitail-pairs-qa_samples_per_second": 127.653, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.255, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_scitail-pairs-pos_loss": 0.5964671969413757, |
|
"eval_scitail-pairs-pos_runtime": 2.1841, |
|
"eval_scitail-pairs-pos_samples_per_second": 68.677, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.289, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_xsum-pairs_loss": 0.6746851205825806, |
|
"eval_xsum-pairs_runtime": 2.2628, |
|
"eval_xsum-pairs_samples_per_second": 66.291, |
|
"eval_xsum-pairs_steps_per_second": 2.21, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_compression-pairs_loss": 0.17857055366039276, |
|
"eval_compression-pairs_runtime": 0.4506, |
|
"eval_compression-pairs_samples_per_second": 332.902, |
|
"eval_compression-pairs_steps_per_second": 11.097, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_sciq_pairs_loss": 0.7349148988723755, |
|
"eval_sciq_pairs_runtime": 7.116, |
|
"eval_sciq_pairs_samples_per_second": 21.079, |
|
"eval_sciq_pairs_steps_per_second": 0.703, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_qasc_pairs_loss": 5.115650177001953, |
|
"eval_qasc_pairs_runtime": 2.0271, |
|
"eval_qasc_pairs_samples_per_second": 73.997, |
|
"eval_qasc_pairs_steps_per_second": 2.467, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_openbookqa_pairs_loss": 2.694535255432129, |
|
"eval_openbookqa_pairs_runtime": 0.8634, |
|
"eval_openbookqa_pairs_samples_per_second": 119.302, |
|
"eval_openbookqa_pairs_steps_per_second": 4.633, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_msmarco_pairs_loss": 1.5184054374694824, |
|
"eval_msmarco_pairs_runtime": 2.7561, |
|
"eval_msmarco_pairs_samples_per_second": 54.424, |
|
"eval_msmarco_pairs_steps_per_second": 1.814, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_nq_pairs_loss": 1.293426752090454, |
|
"eval_nq_pairs_runtime": 5.0107, |
|
"eval_nq_pairs_samples_per_second": 29.936, |
|
"eval_nq_pairs_steps_per_second": 0.998, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_trivia_pairs_loss": 1.5939557552337646, |
|
"eval_trivia_pairs_runtime": 9.5368, |
|
"eval_trivia_pairs_samples_per_second": 15.728, |
|
"eval_trivia_pairs_steps_per_second": 0.524, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_quora_pairs_loss": 0.31308451294898987, |
|
"eval_quora_pairs_runtime": 1.1456, |
|
"eval_quora_pairs_samples_per_second": 130.932, |
|
"eval_quora_pairs_steps_per_second": 4.364, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.6266598569969357, |
|
"eval_gooaq_pairs_loss": 1.0807112455368042, |
|
"eval_gooaq_pairs_runtime": 2.0197, |
|
"eval_gooaq_pairs_samples_per_second": 74.269, |
|
"eval_gooaq_pairs_steps_per_second": 2.476, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 1.651685393258427, |
|
"grad_norm": 0.7546759843826294, |
|
"learning_rate": 2.878777643060379e-05, |
|
"loss": 1.4469, |
|
"step": 9702 |
|
}, |
|
{ |
|
"epoch": 1.6767109295199183, |
|
"grad_norm": 0.8483991026878357, |
|
"learning_rate": 2.835869224114224e-05, |
|
"loss": 1.3878, |
|
"step": 9849 |
|
}, |
|
{ |
|
"epoch": 1.7017364657814096, |
|
"grad_norm": 20.814105987548828, |
|
"learning_rate": 2.7868966188352908e-05, |
|
"loss": 1.2764, |
|
"step": 9996 |
|
}, |
|
{ |
|
"epoch": 1.7267620020429009, |
|
"grad_norm": 3.1025094985961914, |
|
"learning_rate": 2.73208213865815e-05, |
|
"loss": 1.3884, |
|
"step": 10143 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"grad_norm": 14.80810260772705, |
|
"learning_rate": 2.671674614243416e-05, |
|
"loss": 1.2977, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_nli-pairs_loss": 1.3081562519073486, |
|
"eval_nli-pairs_runtime": 4.0165, |
|
"eval_nli-pairs_samples_per_second": 37.346, |
|
"eval_nli-pairs_steps_per_second": 1.245, |
|
"eval_sts-test_pearson_cosine": 0.7681143802843627, |
|
"eval_sts-test_pearson_dot": 0.5287526695750702, |
|
"eval_sts-test_pearson_euclidean": 0.7538805205317111, |
|
"eval_sts-test_pearson_manhattan": 0.7596894203751682, |
|
"eval_sts-test_pearson_max": 0.7681143802843627, |
|
"eval_sts-test_spearman_cosine": 0.770908506196058, |
|
"eval_sts-test_spearman_dot": 0.5670572774538138, |
|
"eval_sts-test_spearman_euclidean": 0.7452730842318486, |
|
"eval_sts-test_spearman_manhattan": 0.7517699916174685, |
|
"eval_sts-test_spearman_max": 0.770908506196058, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_vitaminc-pairs_loss": 0.9676446318626404, |
|
"eval_vitaminc-pairs_runtime": 2.1787, |
|
"eval_vitaminc-pairs_samples_per_second": 68.85, |
|
"eval_vitaminc-pairs_steps_per_second": 2.295, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_qnli-contrastive_loss": 0.244391530752182, |
|
"eval_qnli-contrastive_runtime": 0.4884, |
|
"eval_qnli-contrastive_samples_per_second": 307.113, |
|
"eval_qnli-contrastive_steps_per_second": 10.237, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_scitail-pairs-qa_loss": 0.1264333575963974, |
|
"eval_scitail-pairs-qa_runtime": 1.1536, |
|
"eval_scitail-pairs-qa_samples_per_second": 130.03, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.334, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_scitail-pairs-pos_loss": 0.5472012162208557, |
|
"eval_scitail-pairs-pos_runtime": 2.1213, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.711, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.357, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_xsum-pairs_loss": 0.5869634747505188, |
|
"eval_xsum-pairs_runtime": 2.2876, |
|
"eval_xsum-pairs_samples_per_second": 65.571, |
|
"eval_xsum-pairs_steps_per_second": 2.186, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_compression-pairs_loss": 0.16663199663162231, |
|
"eval_compression-pairs_runtime": 0.4431, |
|
"eval_compression-pairs_samples_per_second": 338.526, |
|
"eval_compression-pairs_steps_per_second": 11.284, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_sciq_pairs_loss": 0.6884138584136963, |
|
"eval_sciq_pairs_runtime": 7.0451, |
|
"eval_sciq_pairs_samples_per_second": 21.291, |
|
"eval_sciq_pairs_steps_per_second": 0.71, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_qasc_pairs_loss": 5.099090099334717, |
|
"eval_qasc_pairs_runtime": 2.0309, |
|
"eval_qasc_pairs_samples_per_second": 73.86, |
|
"eval_qasc_pairs_steps_per_second": 2.462, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_openbookqa_pairs_loss": 2.6562159061431885, |
|
"eval_openbookqa_pairs_runtime": 0.8531, |
|
"eval_openbookqa_pairs_samples_per_second": 120.74, |
|
"eval_openbookqa_pairs_steps_per_second": 4.689, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_msmarco_pairs_loss": 1.3729219436645508, |
|
"eval_msmarco_pairs_runtime": 2.7346, |
|
"eval_msmarco_pairs_samples_per_second": 54.853, |
|
"eval_msmarco_pairs_steps_per_second": 1.828, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_nq_pairs_loss": 1.2174726724624634, |
|
"eval_nq_pairs_runtime": 4.9981, |
|
"eval_nq_pairs_samples_per_second": 30.012, |
|
"eval_nq_pairs_steps_per_second": 1.0, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_trivia_pairs_loss": 1.5839861631393433, |
|
"eval_trivia_pairs_runtime": 9.4611, |
|
"eval_trivia_pairs_samples_per_second": 15.854, |
|
"eval_trivia_pairs_steps_per_second": 0.528, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_quora_pairs_loss": 0.2804078757762909, |
|
"eval_quora_pairs_runtime": 1.1799, |
|
"eval_quora_pairs_samples_per_second": 127.13, |
|
"eval_quora_pairs_steps_per_second": 4.238, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7517875383043924, |
|
"eval_gooaq_pairs_loss": 0.9541385769844055, |
|
"eval_gooaq_pairs_runtime": 2.1014, |
|
"eval_gooaq_pairs_samples_per_second": 71.38, |
|
"eval_gooaq_pairs_steps_per_second": 2.379, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.7768130745658834, |
|
"grad_norm": 51.763004302978516, |
|
"learning_rate": 2.6059482659094694e-05, |
|
"loss": 1.4422, |
|
"step": 10437 |
|
}, |
|
{ |
|
"epoch": 1.801838610827375, |
|
"grad_norm": 3.4887988567352295, |
|
"learning_rate": 2.5352014588076858e-05, |
|
"loss": 1.4997, |
|
"step": 10584 |
|
}, |
|
{ |
|
"epoch": 1.8268641470888662, |
|
"grad_norm": 6.360722064971924, |
|
"learning_rate": 2.4597553484920438e-05, |
|
"loss": 1.2797, |
|
"step": 10731 |
|
}, |
|
{ |
|
"epoch": 1.8518896833503575, |
|
"grad_norm": 16.216428756713867, |
|
"learning_rate": 2.3799524230315696e-05, |
|
"loss": 1.2362, |
|
"step": 10878 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"grad_norm": 19.113628387451172, |
|
"learning_rate": 2.2961549482836967e-05, |
|
"loss": 1.2799, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_nli-pairs_loss": 1.2669230699539185, |
|
"eval_nli-pairs_runtime": 4.0111, |
|
"eval_nli-pairs_samples_per_second": 37.396, |
|
"eval_nli-pairs_steps_per_second": 1.247, |
|
"eval_sts-test_pearson_cosine": 0.774489523257569, |
|
"eval_sts-test_pearson_dot": 0.5150859135257536, |
|
"eval_sts-test_pearson_euclidean": 0.7570251269629877, |
|
"eval_sts-test_pearson_manhattan": 0.7623769541465137, |
|
"eval_sts-test_pearson_max": 0.774489523257569, |
|
"eval_sts-test_spearman_cosine": 0.7816800005074528, |
|
"eval_sts-test_spearman_dot": 0.565603897190929, |
|
"eval_sts-test_spearman_euclidean": 0.7507848233553155, |
|
"eval_sts-test_spearman_manhattan": 0.756029656784038, |
|
"eval_sts-test_spearman_max": 0.7816800005074528, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_vitaminc-pairs_loss": 0.875577986240387, |
|
"eval_vitaminc-pairs_runtime": 2.2185, |
|
"eval_vitaminc-pairs_samples_per_second": 67.614, |
|
"eval_vitaminc-pairs_steps_per_second": 2.254, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_qnli-contrastive_loss": 0.23095794022083282, |
|
"eval_qnli-contrastive_runtime": 0.4906, |
|
"eval_qnli-contrastive_samples_per_second": 305.756, |
|
"eval_qnli-contrastive_steps_per_second": 10.192, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_scitail-pairs-qa_loss": 0.11762743443250656, |
|
"eval_scitail-pairs-qa_runtime": 1.1505, |
|
"eval_scitail-pairs-qa_samples_per_second": 130.379, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.346, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_scitail-pairs-pos_loss": 0.5055103898048401, |
|
"eval_scitail-pairs-pos_runtime": 2.1912, |
|
"eval_scitail-pairs-pos_samples_per_second": 68.456, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.282, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_xsum-pairs_loss": 0.5941822528839111, |
|
"eval_xsum-pairs_runtime": 2.26, |
|
"eval_xsum-pairs_samples_per_second": 66.371, |
|
"eval_xsum-pairs_steps_per_second": 2.212, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_compression-pairs_loss": 0.16561630368232727, |
|
"eval_compression-pairs_runtime": 0.4447, |
|
"eval_compression-pairs_samples_per_second": 337.281, |
|
"eval_compression-pairs_steps_per_second": 11.243, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_sciq_pairs_loss": 0.6859617233276367, |
|
"eval_sciq_pairs_runtime": 7.2855, |
|
"eval_sciq_pairs_samples_per_second": 20.589, |
|
"eval_sciq_pairs_steps_per_second": 0.686, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_qasc_pairs_loss": 4.979205131530762, |
|
"eval_qasc_pairs_runtime": 2.0332, |
|
"eval_qasc_pairs_samples_per_second": 73.775, |
|
"eval_qasc_pairs_steps_per_second": 2.459, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_openbookqa_pairs_loss": 2.5103061199188232, |
|
"eval_openbookqa_pairs_runtime": 0.8673, |
|
"eval_openbookqa_pairs_samples_per_second": 118.755, |
|
"eval_openbookqa_pairs_steps_per_second": 4.612, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_msmarco_pairs_loss": 1.2753304243087769, |
|
"eval_msmarco_pairs_runtime": 2.7942, |
|
"eval_msmarco_pairs_samples_per_second": 53.683, |
|
"eval_msmarco_pairs_steps_per_second": 1.789, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_nq_pairs_loss": 1.057248592376709, |
|
"eval_nq_pairs_runtime": 5.0749, |
|
"eval_nq_pairs_samples_per_second": 29.557, |
|
"eval_nq_pairs_steps_per_second": 0.985, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_trivia_pairs_loss": 1.4893617630004883, |
|
"eval_trivia_pairs_runtime": 9.5535, |
|
"eval_trivia_pairs_samples_per_second": 15.701, |
|
"eval_trivia_pairs_steps_per_second": 0.523, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_quora_pairs_loss": 0.27783504128456116, |
|
"eval_quora_pairs_runtime": 1.1843, |
|
"eval_quora_pairs_samples_per_second": 126.653, |
|
"eval_quora_pairs_steps_per_second": 4.222, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.8769152196118488, |
|
"eval_gooaq_pairs_loss": 0.8971360325813293, |
|
"eval_gooaq_pairs_runtime": 2.0278, |
|
"eval_gooaq_pairs_samples_per_second": 73.97, |
|
"eval_gooaq_pairs_steps_per_second": 2.466, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.90194075587334, |
|
"grad_norm": 14.915979385375977, |
|
"learning_rate": 2.2087433233862403e-05, |
|
"loss": 1.2292, |
|
"step": 11172 |
|
}, |
|
{ |
|
"epoch": 1.9269662921348316, |
|
"grad_norm": 13.753366470336914, |
|
"learning_rate": 2.118740830659258e-05, |
|
"loss": 1.0362, |
|
"step": 11319 |
|
}, |
|
{ |
|
"epoch": 1.9519918283963227, |
|
"grad_norm": 8.33267593383789, |
|
"learning_rate": 2.0259676306932596e-05, |
|
"loss": 1.1851, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 1.9770173646578142, |
|
"grad_norm": 0.6671110987663269, |
|
"learning_rate": 1.9301804508269106e-05, |
|
"loss": 1.0248, |
|
"step": 11613 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"grad_norm": 71.1603775024414, |
|
"learning_rate": 1.8331109675851356e-05, |
|
"loss": 1.1305, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_nli-pairs_loss": 1.1981595754623413, |
|
"eval_nli-pairs_runtime": 4.4194, |
|
"eval_nli-pairs_samples_per_second": 33.941, |
|
"eval_nli-pairs_steps_per_second": 1.131, |
|
"eval_sts-test_pearson_cosine": 0.7691168917727959, |
|
"eval_sts-test_pearson_dot": 0.5009080741883037, |
|
"eval_sts-test_pearson_euclidean": 0.7565965590806436, |
|
"eval_sts-test_pearson_manhattan": 0.7607578912460005, |
|
"eval_sts-test_pearson_max": 0.7691168917727959, |
|
"eval_sts-test_spearman_cosine": 0.7788020160239207, |
|
"eval_sts-test_spearman_dot": 0.5543439729717182, |
|
"eval_sts-test_spearman_euclidean": 0.7507099854871488, |
|
"eval_sts-test_spearman_manhattan": 0.7550850801051086, |
|
"eval_sts-test_spearman_max": 0.7788020160239207, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_vitaminc-pairs_loss": 0.8724684715270996, |
|
"eval_vitaminc-pairs_runtime": 2.2855, |
|
"eval_vitaminc-pairs_samples_per_second": 65.632, |
|
"eval_vitaminc-pairs_steps_per_second": 2.188, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_qnli-contrastive_loss": 0.1063760370016098, |
|
"eval_qnli-contrastive_runtime": 0.5211, |
|
"eval_qnli-contrastive_samples_per_second": 287.861, |
|
"eval_qnli-contrastive_steps_per_second": 9.595, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_scitail-pairs-qa_loss": 0.11115950345993042, |
|
"eval_scitail-pairs-qa_runtime": 1.2652, |
|
"eval_scitail-pairs-qa_samples_per_second": 118.561, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.952, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_scitail-pairs-pos_loss": 0.5056447386741638, |
|
"eval_scitail-pairs-pos_runtime": 2.3265, |
|
"eval_scitail-pairs-pos_samples_per_second": 64.475, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.149, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_xsum-pairs_loss": 0.5417940020561218, |
|
"eval_xsum-pairs_runtime": 2.2651, |
|
"eval_xsum-pairs_samples_per_second": 66.224, |
|
"eval_xsum-pairs_steps_per_second": 2.207, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_compression-pairs_loss": 0.13307414948940277, |
|
"eval_compression-pairs_runtime": 0.4568, |
|
"eval_compression-pairs_samples_per_second": 328.338, |
|
"eval_compression-pairs_steps_per_second": 10.945, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_sciq_pairs_loss": 0.5748575329780579, |
|
"eval_sciq_pairs_runtime": 7.352, |
|
"eval_sciq_pairs_samples_per_second": 20.403, |
|
"eval_sciq_pairs_steps_per_second": 0.68, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_qasc_pairs_loss": 4.8620710372924805, |
|
"eval_qasc_pairs_runtime": 2.2185, |
|
"eval_qasc_pairs_samples_per_second": 67.614, |
|
"eval_qasc_pairs_steps_per_second": 2.254, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_openbookqa_pairs_loss": 2.3957626819610596, |
|
"eval_openbookqa_pairs_runtime": 0.9894, |
|
"eval_openbookqa_pairs_samples_per_second": 104.099, |
|
"eval_openbookqa_pairs_steps_per_second": 4.043, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_msmarco_pairs_loss": 1.2221691608428955, |
|
"eval_msmarco_pairs_runtime": 2.8364, |
|
"eval_msmarco_pairs_samples_per_second": 52.883, |
|
"eval_msmarco_pairs_steps_per_second": 1.763, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_nq_pairs_loss": 1.056867241859436, |
|
"eval_nq_pairs_runtime": 5.123, |
|
"eval_nq_pairs_samples_per_second": 29.28, |
|
"eval_nq_pairs_steps_per_second": 0.976, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_trivia_pairs_loss": 1.5130479335784912, |
|
"eval_trivia_pairs_runtime": 9.628, |
|
"eval_trivia_pairs_samples_per_second": 15.58, |
|
"eval_trivia_pairs_steps_per_second": 0.519, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_quora_pairs_loss": 0.2696760296821594, |
|
"eval_quora_pairs_runtime": 1.2258, |
|
"eval_quora_pairs_samples_per_second": 122.368, |
|
"eval_quora_pairs_steps_per_second": 4.079, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0020429009193053, |
|
"eval_gooaq_pairs_loss": 0.8714584112167358, |
|
"eval_gooaq_pairs_runtime": 2.1309, |
|
"eval_gooaq_pairs_samples_per_second": 70.391, |
|
"eval_gooaq_pairs_steps_per_second": 2.346, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.0270684371807968, |
|
"grad_norm": 0.7177102565765381, |
|
"learning_rate": 1.7338706161920983e-05, |
|
"loss": 0.9284, |
|
"step": 11907 |
|
}, |
|
{ |
|
"epoch": 2.052093973442288, |
|
"grad_norm": 11.534607887268066, |
|
"learning_rate": 1.633568607738064e-05, |
|
"loss": 1.0998, |
|
"step": 12054 |
|
}, |
|
{ |
|
"epoch": 2.0771195097037793, |
|
"grad_norm": 13.443835258483887, |
|
"learning_rate": 1.5326602637903215e-05, |
|
"loss": 1.1181, |
|
"step": 12201 |
|
}, |
|
{ |
|
"epoch": 2.102145045965271, |
|
"grad_norm": 1.4795461893081665, |
|
"learning_rate": 1.431603658379759e-05, |
|
"loss": 0.9978, |
|
"step": 12348 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"grad_norm": 13.516138076782227, |
|
"learning_rate": 1.3308575385710644e-05, |
|
"loss": 1.0565, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_nli-pairs_loss": 1.210241675376892, |
|
"eval_nli-pairs_runtime": 4.0103, |
|
"eval_nli-pairs_samples_per_second": 37.404, |
|
"eval_nli-pairs_steps_per_second": 1.247, |
|
"eval_sts-test_pearson_cosine": 0.7762452815355265, |
|
"eval_sts-test_pearson_dot": 0.48818071088823645, |
|
"eval_sts-test_pearson_euclidean": 0.7572653656278441, |
|
"eval_sts-test_pearson_manhattan": 0.7608519923908275, |
|
"eval_sts-test_pearson_max": 0.7762452815355265, |
|
"eval_sts-test_spearman_cosine": 0.7839057066535283, |
|
"eval_sts-test_spearman_dot": 0.5570503640965535, |
|
"eval_sts-test_spearman_euclidean": 0.7527973687121541, |
|
"eval_sts-test_spearman_manhattan": 0.755831239077737, |
|
"eval_sts-test_spearman_max": 0.7839057066535283, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_vitaminc-pairs_loss": 0.7919407486915588, |
|
"eval_vitaminc-pairs_runtime": 2.1778, |
|
"eval_vitaminc-pairs_samples_per_second": 68.877, |
|
"eval_vitaminc-pairs_steps_per_second": 2.296, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_qnli-contrastive_loss": 0.10323584824800491, |
|
"eval_qnli-contrastive_runtime": 0.4911, |
|
"eval_qnli-contrastive_samples_per_second": 305.443, |
|
"eval_qnli-contrastive_steps_per_second": 10.181, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_scitail-pairs-qa_loss": 0.1031724140048027, |
|
"eval_scitail-pairs-qa_runtime": 1.1871, |
|
"eval_scitail-pairs-qa_samples_per_second": 126.362, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.212, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_scitail-pairs-pos_loss": 0.49804234504699707, |
|
"eval_scitail-pairs-pos_runtime": 2.1491, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.797, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.327, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_xsum-pairs_loss": 0.5050535202026367, |
|
"eval_xsum-pairs_runtime": 2.2665, |
|
"eval_xsum-pairs_samples_per_second": 66.182, |
|
"eval_xsum-pairs_steps_per_second": 2.206, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_compression-pairs_loss": 0.12162226438522339, |
|
"eval_compression-pairs_runtime": 0.4514, |
|
"eval_compression-pairs_samples_per_second": 332.334, |
|
"eval_compression-pairs_steps_per_second": 11.078, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_sciq_pairs_loss": 0.5630886554718018, |
|
"eval_sciq_pairs_runtime": 7.0948, |
|
"eval_sciq_pairs_samples_per_second": 21.142, |
|
"eval_sciq_pairs_steps_per_second": 0.705, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_qasc_pairs_loss": 4.82968282699585, |
|
"eval_qasc_pairs_runtime": 2.0203, |
|
"eval_qasc_pairs_samples_per_second": 74.248, |
|
"eval_qasc_pairs_steps_per_second": 2.475, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_openbookqa_pairs_loss": 2.291588544845581, |
|
"eval_openbookqa_pairs_runtime": 0.8653, |
|
"eval_openbookqa_pairs_samples_per_second": 119.034, |
|
"eval_openbookqa_pairs_steps_per_second": 4.623, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_msmarco_pairs_loss": 1.179781198501587, |
|
"eval_msmarco_pairs_runtime": 2.7463, |
|
"eval_msmarco_pairs_samples_per_second": 54.619, |
|
"eval_msmarco_pairs_steps_per_second": 1.821, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_nq_pairs_loss": 0.9966514110565186, |
|
"eval_nq_pairs_runtime": 5.0621, |
|
"eval_nq_pairs_samples_per_second": 29.632, |
|
"eval_nq_pairs_steps_per_second": 0.988, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_trivia_pairs_loss": 1.4555574655532837, |
|
"eval_trivia_pairs_runtime": 9.5288, |
|
"eval_trivia_pairs_samples_per_second": 15.742, |
|
"eval_trivia_pairs_steps_per_second": 0.525, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_quora_pairs_loss": 0.2517216205596924, |
|
"eval_quora_pairs_runtime": 1.154, |
|
"eval_quora_pairs_samples_per_second": 129.984, |
|
"eval_quora_pairs_steps_per_second": 4.333, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.127170582226762, |
|
"eval_gooaq_pairs_loss": 0.8206157684326172, |
|
"eval_gooaq_pairs_runtime": 2.0213, |
|
"eval_gooaq_pairs_samples_per_second": 74.209, |
|
"eval_gooaq_pairs_steps_per_second": 2.474, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 2.1521961184882534, |
|
"grad_norm": 10.220344543457031, |
|
"learning_rate": 1.2308792419776779e-05, |
|
"loss": 1.1317, |
|
"step": 12642 |
|
}, |
|
{ |
|
"epoch": 2.1772216547497445, |
|
"grad_norm": 6.893187046051025, |
|
"learning_rate": 1.13212262067496e-05, |
|
"loss": 1.0682, |
|
"step": 12789 |
|
}, |
|
{ |
|
"epoch": 2.202247191011236, |
|
"grad_norm": 14.389963150024414, |
|
"learning_rate": 1.0350359809359845e-05, |
|
"loss": 1.2708, |
|
"step": 12936 |
|
}, |
|
{ |
|
"epoch": 2.227272727272727, |
|
"grad_norm": 0.3779028058052063, |
|
"learning_rate": 9.400600481425268e-06, |
|
"loss": 1.2129, |
|
"step": 13083 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"grad_norm": 3.433882713317871, |
|
"learning_rate": 8.476259661095597e-06, |
|
"loss": 1.053, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_nli-pairs_loss": 1.1418253183364868, |
|
"eval_nli-pairs_runtime": 4.1287, |
|
"eval_nli-pairs_samples_per_second": 36.331, |
|
"eval_nli-pairs_steps_per_second": 1.211, |
|
"eval_sts-test_pearson_cosine": 0.7786789365004515, |
|
"eval_sts-test_pearson_dot": 0.4855185680416273, |
|
"eval_sts-test_pearson_euclidean": 0.7514151357124674, |
|
"eval_sts-test_pearson_manhattan": 0.7548721969767885, |
|
"eval_sts-test_pearson_max": 0.7786789365004515, |
|
"eval_sts-test_spearman_cosine": 0.7870432894305359, |
|
"eval_sts-test_spearman_dot": 0.5630314308020745, |
|
"eval_sts-test_spearman_euclidean": 0.7495100025349075, |
|
"eval_sts-test_spearman_manhattan": 0.7525107811391334, |
|
"eval_sts-test_spearman_max": 0.7870432894305359, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_vitaminc-pairs_loss": 0.7908185720443726, |
|
"eval_vitaminc-pairs_runtime": 2.1735, |
|
"eval_vitaminc-pairs_samples_per_second": 69.012, |
|
"eval_vitaminc-pairs_steps_per_second": 2.3, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_qnli-contrastive_loss": 0.17070643603801727, |
|
"eval_qnli-contrastive_runtime": 0.4906, |
|
"eval_qnli-contrastive_samples_per_second": 305.758, |
|
"eval_qnli-contrastive_steps_per_second": 10.192, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_scitail-pairs-qa_loss": 0.09536581486463547, |
|
"eval_scitail-pairs-qa_runtime": 1.1537, |
|
"eval_scitail-pairs-qa_samples_per_second": 130.014, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.334, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_scitail-pairs-pos_loss": 0.4803718328475952, |
|
"eval_scitail-pairs-pos_runtime": 2.1338, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.297, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.343, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_xsum-pairs_loss": 0.4886069595813751, |
|
"eval_xsum-pairs_runtime": 2.2577, |
|
"eval_xsum-pairs_samples_per_second": 66.438, |
|
"eval_xsum-pairs_steps_per_second": 2.215, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_compression-pairs_loss": 0.12639394402503967, |
|
"eval_compression-pairs_runtime": 0.4532, |
|
"eval_compression-pairs_samples_per_second": 330.97, |
|
"eval_compression-pairs_steps_per_second": 11.032, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_sciq_pairs_loss": 0.5328854918479919, |
|
"eval_sciq_pairs_runtime": 7.1317, |
|
"eval_sciq_pairs_samples_per_second": 21.033, |
|
"eval_sciq_pairs_steps_per_second": 0.701, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_qasc_pairs_loss": 4.704314231872559, |
|
"eval_qasc_pairs_runtime": 2.0312, |
|
"eval_qasc_pairs_samples_per_second": 73.848, |
|
"eval_qasc_pairs_steps_per_second": 2.462, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_openbookqa_pairs_loss": 2.2545013427734375, |
|
"eval_openbookqa_pairs_runtime": 0.8657, |
|
"eval_openbookqa_pairs_samples_per_second": 118.98, |
|
"eval_openbookqa_pairs_steps_per_second": 4.621, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_msmarco_pairs_loss": 1.1227293014526367, |
|
"eval_msmarco_pairs_runtime": 2.7855, |
|
"eval_msmarco_pairs_samples_per_second": 53.851, |
|
"eval_msmarco_pairs_steps_per_second": 1.795, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_nq_pairs_loss": 0.9163884520530701, |
|
"eval_nq_pairs_runtime": 5.0251, |
|
"eval_nq_pairs_samples_per_second": 29.85, |
|
"eval_nq_pairs_steps_per_second": 0.995, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_trivia_pairs_loss": 1.2854268550872803, |
|
"eval_trivia_pairs_runtime": 9.6199, |
|
"eval_trivia_pairs_samples_per_second": 15.593, |
|
"eval_trivia_pairs_steps_per_second": 0.52, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_quora_pairs_loss": 0.24444325268268585, |
|
"eval_quora_pairs_runtime": 1.1606, |
|
"eval_quora_pairs_samples_per_second": 129.238, |
|
"eval_quora_pairs_steps_per_second": 4.308, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.2522982635342186, |
|
"eval_gooaq_pairs_loss": 0.8153015971183777, |
|
"eval_gooaq_pairs_runtime": 2.0348, |
|
"eval_gooaq_pairs_samples_per_second": 73.718, |
|
"eval_gooaq_pairs_steps_per_second": 2.457, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.27732379979571, |
|
"grad_norm": 1.6442259550094604, |
|
"learning_rate": 7.58153339905326e-06, |
|
"loss": 0.8897, |
|
"step": 13377 |
|
}, |
|
{ |
|
"epoch": 2.302349336057201, |
|
"grad_norm": 3.068699598312378, |
|
"learning_rate": 6.720483310516198e-06, |
|
"loss": 1.181, |
|
"step": 13524 |
|
}, |
|
{ |
|
"epoch": 2.3273748723186927, |
|
"grad_norm": 15.221121788024902, |
|
"learning_rate": 5.897018137511326e-06, |
|
"loss": 1.0895, |
|
"step": 13671 |
|
}, |
|
{ |
|
"epoch": 2.3524004085801837, |
|
"grad_norm": 8.72175121307373, |
|
"learning_rate": 5.114876005116682e-06, |
|
"loss": 1.0347, |
|
"step": 13818 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"grad_norm": 15.892960548400879, |
|
"learning_rate": 4.377607452220317e-06, |
|
"loss": 1.1473, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_nli-pairs_loss": 1.1473166942596436, |
|
"eval_nli-pairs_runtime": 4.0846, |
|
"eval_nli-pairs_samples_per_second": 36.723, |
|
"eval_nli-pairs_steps_per_second": 1.224, |
|
"eval_sts-test_pearson_cosine": 0.7794293138100197, |
|
"eval_sts-test_pearson_dot": 0.47438029525552705, |
|
"eval_sts-test_pearson_euclidean": 0.751105924306521, |
|
"eval_sts-test_pearson_manhattan": 0.755281014746346, |
|
"eval_sts-test_pearson_max": 0.7794293138100197, |
|
"eval_sts-test_spearman_cosine": 0.7872791214894774, |
|
"eval_sts-test_spearman_dot": 0.5580180518636964, |
|
"eval_sts-test_spearman_euclidean": 0.7478338358714589, |
|
"eval_sts-test_spearman_manhattan": 0.7517708620916009, |
|
"eval_sts-test_spearman_max": 0.7872791214894774, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_vitaminc-pairs_loss": 0.7656364440917969, |
|
"eval_vitaminc-pairs_runtime": 2.1781, |
|
"eval_vitaminc-pairs_samples_per_second": 68.869, |
|
"eval_vitaminc-pairs_steps_per_second": 2.296, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_qnli-contrastive_loss": 0.18101921677589417, |
|
"eval_qnli-contrastive_runtime": 0.4943, |
|
"eval_qnli-contrastive_samples_per_second": 303.474, |
|
"eval_qnli-contrastive_steps_per_second": 10.116, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_scitail-pairs-qa_loss": 0.09049389511346817, |
|
"eval_scitail-pairs-qa_runtime": 1.1619, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.104, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.303, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_scitail-pairs-pos_loss": 0.47021567821502686, |
|
"eval_scitail-pairs-pos_runtime": 2.1593, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.466, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.316, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_xsum-pairs_loss": 0.4638828933238983, |
|
"eval_xsum-pairs_runtime": 2.2613, |
|
"eval_xsum-pairs_samples_per_second": 66.334, |
|
"eval_xsum-pairs_steps_per_second": 2.211, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_compression-pairs_loss": 0.12560921907424927, |
|
"eval_compression-pairs_runtime": 0.4496, |
|
"eval_compression-pairs_samples_per_second": 333.638, |
|
"eval_compression-pairs_steps_per_second": 11.121, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_sciq_pairs_loss": 0.5231578946113586, |
|
"eval_sciq_pairs_runtime": 7.1367, |
|
"eval_sciq_pairs_samples_per_second": 21.018, |
|
"eval_sciq_pairs_steps_per_second": 0.701, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_qasc_pairs_loss": 4.6708855628967285, |
|
"eval_qasc_pairs_runtime": 2.0351, |
|
"eval_qasc_pairs_samples_per_second": 73.705, |
|
"eval_qasc_pairs_steps_per_second": 2.457, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_openbookqa_pairs_loss": 2.246180772781372, |
|
"eval_openbookqa_pairs_runtime": 0.8632, |
|
"eval_openbookqa_pairs_samples_per_second": 119.324, |
|
"eval_openbookqa_pairs_steps_per_second": 4.634, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_msmarco_pairs_loss": 1.114973545074463, |
|
"eval_msmarco_pairs_runtime": 2.7619, |
|
"eval_msmarco_pairs_samples_per_second": 54.309, |
|
"eval_msmarco_pairs_steps_per_second": 1.81, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_nq_pairs_loss": 0.8807224631309509, |
|
"eval_nq_pairs_runtime": 5.0622, |
|
"eval_nq_pairs_samples_per_second": 29.632, |
|
"eval_nq_pairs_steps_per_second": 0.988, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_trivia_pairs_loss": 1.2553032636642456, |
|
"eval_trivia_pairs_runtime": 9.5755, |
|
"eval_trivia_pairs_samples_per_second": 15.665, |
|
"eval_trivia_pairs_steps_per_second": 0.522, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_quora_pairs_loss": 0.2363266944885254, |
|
"eval_quora_pairs_runtime": 1.1671, |
|
"eval_quora_pairs_samples_per_second": 128.525, |
|
"eval_quora_pairs_steps_per_second": 4.284, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.3774259448416752, |
|
"eval_gooaq_pairs_loss": 0.7755452990531921, |
|
"eval_gooaq_pairs_runtime": 2.0356, |
|
"eval_gooaq_pairs_samples_per_second": 73.69, |
|
"eval_gooaq_pairs_steps_per_second": 2.456, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 2.4024514811031663, |
|
"grad_norm": 0.3262540102005005, |
|
"learning_rate": 3.688559313827753e-06, |
|
"loss": 1.0026, |
|
"step": 14112 |
|
}, |
|
{ |
|
"epoch": 2.427477017364658, |
|
"grad_norm": 10.04266357421875, |
|
"learning_rate": 3.050859528084451e-06, |
|
"loss": 1.0728, |
|
"step": 14259 |
|
}, |
|
{ |
|
"epoch": 2.4525025536261493, |
|
"grad_norm": 0.9428766369819641, |
|
"learning_rate": 2.46740293698192e-06, |
|
"loss": 0.8232, |
|
"step": 14406 |
|
}, |
|
{ |
|
"epoch": 2.4775280898876404, |
|
"grad_norm": 0.5977104902267456, |
|
"learning_rate": 1.9408381452051525e-06, |
|
"loss": 1.0261, |
|
"step": 14553 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"grad_norm": 7.32331657409668, |
|
"learning_rate": 1.4735554967758374e-06, |
|
"loss": 0.7961, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_nli-pairs_loss": 1.1169875860214233, |
|
"eval_nli-pairs_runtime": 4.3443, |
|
"eval_nli-pairs_samples_per_second": 34.528, |
|
"eval_nli-pairs_steps_per_second": 1.151, |
|
"eval_sts-test_pearson_cosine": 0.7785596547461405, |
|
"eval_sts-test_pearson_dot": 0.4820884354849637, |
|
"eval_sts-test_pearson_euclidean": 0.7540328646347341, |
|
"eval_sts-test_pearson_manhattan": 0.7573099162359008, |
|
"eval_sts-test_pearson_max": 0.7785596547461405, |
|
"eval_sts-test_spearman_cosine": 0.7876381439639152, |
|
"eval_sts-test_spearman_dot": 0.5623065847013597, |
|
"eval_sts-test_spearman_euclidean": 0.749918183890608, |
|
"eval_sts-test_spearman_manhattan": 0.7531470748226545, |
|
"eval_sts-test_spearman_max": 0.7876381439639152, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_vitaminc-pairs_loss": 0.7670332789421082, |
|
"eval_vitaminc-pairs_runtime": 2.2702, |
|
"eval_vitaminc-pairs_samples_per_second": 66.072, |
|
"eval_vitaminc-pairs_steps_per_second": 2.202, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_qnli-contrastive_loss": 0.1765088140964508, |
|
"eval_qnli-contrastive_runtime": 0.5066, |
|
"eval_qnli-contrastive_samples_per_second": 296.085, |
|
"eval_qnli-contrastive_steps_per_second": 9.87, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_scitail-pairs-qa_loss": 0.09172121435403824, |
|
"eval_scitail-pairs-qa_runtime": 1.2477, |
|
"eval_scitail-pairs-qa_samples_per_second": 120.219, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.007, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_scitail-pairs-pos_loss": 0.46642106771469116, |
|
"eval_scitail-pairs-pos_runtime": 2.345, |
|
"eval_scitail-pairs-pos_samples_per_second": 63.966, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.132, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_xsum-pairs_loss": 0.4636780917644501, |
|
"eval_xsum-pairs_runtime": 2.2663, |
|
"eval_xsum-pairs_samples_per_second": 66.186, |
|
"eval_xsum-pairs_steps_per_second": 2.206, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_compression-pairs_loss": 0.11796586215496063, |
|
"eval_compression-pairs_runtime": 0.4625, |
|
"eval_compression-pairs_samples_per_second": 324.33, |
|
"eval_compression-pairs_steps_per_second": 10.811, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_sciq_pairs_loss": 0.5210192203521729, |
|
"eval_sciq_pairs_runtime": 7.5811, |
|
"eval_sciq_pairs_samples_per_second": 19.786, |
|
"eval_sciq_pairs_steps_per_second": 0.66, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_qasc_pairs_loss": 4.692019939422607, |
|
"eval_qasc_pairs_runtime": 2.2569, |
|
"eval_qasc_pairs_samples_per_second": 66.461, |
|
"eval_qasc_pairs_steps_per_second": 2.215, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_openbookqa_pairs_loss": 2.2295894622802734, |
|
"eval_openbookqa_pairs_runtime": 0.9771, |
|
"eval_openbookqa_pairs_samples_per_second": 105.409, |
|
"eval_openbookqa_pairs_steps_per_second": 4.094, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_msmarco_pairs_loss": 1.1003308296203613, |
|
"eval_msmarco_pairs_runtime": 2.8235, |
|
"eval_msmarco_pairs_samples_per_second": 53.126, |
|
"eval_msmarco_pairs_steps_per_second": 1.771, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_nq_pairs_loss": 0.8704373240470886, |
|
"eval_nq_pairs_runtime": 5.0895, |
|
"eval_nq_pairs_samples_per_second": 29.473, |
|
"eval_nq_pairs_steps_per_second": 0.982, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_trivia_pairs_loss": 1.2344694137573242, |
|
"eval_trivia_pairs_runtime": 9.6823, |
|
"eval_trivia_pairs_samples_per_second": 15.492, |
|
"eval_trivia_pairs_steps_per_second": 0.516, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_quora_pairs_loss": 0.23405136168003082, |
|
"eval_quora_pairs_runtime": 1.1581, |
|
"eval_quora_pairs_samples_per_second": 129.521, |
|
"eval_quora_pairs_steps_per_second": 4.317, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.502553626149132, |
|
"eval_gooaq_pairs_loss": 0.7561784982681274, |
|
"eval_gooaq_pairs_runtime": 2.048, |
|
"eval_gooaq_pairs_samples_per_second": 73.241, |
|
"eval_gooaq_pairs_steps_per_second": 2.441, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.527579162410623, |
|
"grad_norm": 17.226215362548828, |
|
"learning_rate": 1.0676762240713628e-06, |
|
"loss": 1.1167, |
|
"step": 14847 |
|
}, |
|
{ |
|
"epoch": 2.5526046986721145, |
|
"grad_norm": 11.959600448608398, |
|
"learning_rate": 7.250428184777619e-07, |
|
"loss": 1.1546, |
|
"step": 14994 |
|
}, |
|
{ |
|
"epoch": 2.577630234933606, |
|
"grad_norm": 6.162104606628418, |
|
"learning_rate": 4.4721066638903405e-07, |
|
"loss": 0.9669, |
|
"step": 15141 |
|
}, |
|
{ |
|
"epoch": 2.602655771195097, |
|
"grad_norm": 1.557124137878418, |
|
"learning_rate": 2.3544098852131546e-07, |
|
"loss": 1.1057, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"grad_norm": 6.910587787628174, |
|
"learning_rate": 9.069511459389502e-08, |
|
"loss": 0.868, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_nli-pairs_loss": 1.1110929250717163, |
|
"eval_nli-pairs_runtime": 4.0329, |
|
"eval_nli-pairs_samples_per_second": 37.194, |
|
"eval_nli-pairs_steps_per_second": 1.24, |
|
"eval_sts-test_pearson_cosine": 0.7783243156342984, |
|
"eval_sts-test_pearson_dot": 0.4788735179310955, |
|
"eval_sts-test_pearson_euclidean": 0.7523796985987524, |
|
"eval_sts-test_pearson_manhattan": 0.755715400774414, |
|
"eval_sts-test_pearson_max": 0.7783243156342984, |
|
"eval_sts-test_spearman_cosine": 0.787532153185639, |
|
"eval_sts-test_spearman_dot": 0.5602968065359735, |
|
"eval_sts-test_spearman_euclidean": 0.7486436044524005, |
|
"eval_sts-test_spearman_manhattan": 0.7517248414986571, |
|
"eval_sts-test_spearman_max": 0.787532153185639, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_vitaminc-pairs_loss": 0.7633076906204224, |
|
"eval_vitaminc-pairs_runtime": 2.1822, |
|
"eval_vitaminc-pairs_samples_per_second": 68.738, |
|
"eval_vitaminc-pairs_steps_per_second": 2.291, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_qnli-contrastive_loss": 0.17258352041244507, |
|
"eval_qnli-contrastive_runtime": 0.4919, |
|
"eval_qnli-contrastive_samples_per_second": 304.937, |
|
"eval_qnli-contrastive_steps_per_second": 10.165, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_scitail-pairs-qa_loss": 0.09059016406536102, |
|
"eval_scitail-pairs-qa_runtime": 1.1561, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.748, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.325, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_scitail-pairs-pos_loss": 0.46606332063674927, |
|
"eval_scitail-pairs-pos_runtime": 2.1432, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.988, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.333, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_xsum-pairs_loss": 0.4616774618625641, |
|
"eval_xsum-pairs_runtime": 2.2623, |
|
"eval_xsum-pairs_samples_per_second": 66.306, |
|
"eval_xsum-pairs_steps_per_second": 2.21, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_compression-pairs_loss": 0.11741954833269119, |
|
"eval_compression-pairs_runtime": 0.4508, |
|
"eval_compression-pairs_samples_per_second": 332.731, |
|
"eval_compression-pairs_steps_per_second": 11.091, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_sciq_pairs_loss": 0.5167393088340759, |
|
"eval_sciq_pairs_runtime": 7.102, |
|
"eval_sciq_pairs_samples_per_second": 21.121, |
|
"eval_sciq_pairs_steps_per_second": 0.704, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_qasc_pairs_loss": 4.663302421569824, |
|
"eval_qasc_pairs_runtime": 2.0987, |
|
"eval_qasc_pairs_samples_per_second": 71.472, |
|
"eval_qasc_pairs_steps_per_second": 2.382, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_openbookqa_pairs_loss": 2.2289419174194336, |
|
"eval_openbookqa_pairs_runtime": 0.8981, |
|
"eval_openbookqa_pairs_samples_per_second": 114.689, |
|
"eval_openbookqa_pairs_steps_per_second": 4.454, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_msmarco_pairs_loss": 1.1020023822784424, |
|
"eval_msmarco_pairs_runtime": 2.7621, |
|
"eval_msmarco_pairs_samples_per_second": 54.306, |
|
"eval_msmarco_pairs_steps_per_second": 1.81, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_nq_pairs_loss": 0.8701896071434021, |
|
"eval_nq_pairs_runtime": 5.0219, |
|
"eval_nq_pairs_samples_per_second": 29.869, |
|
"eval_nq_pairs_steps_per_second": 0.996, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_trivia_pairs_loss": 1.231194257736206, |
|
"eval_trivia_pairs_runtime": 9.5216, |
|
"eval_trivia_pairs_samples_per_second": 15.754, |
|
"eval_trivia_pairs_steps_per_second": 0.525, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_quora_pairs_loss": 0.23271657526493073, |
|
"eval_quora_pairs_runtime": 1.1597, |
|
"eval_quora_pairs_samples_per_second": 129.345, |
|
"eval_quora_pairs_steps_per_second": 4.312, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.627681307456588, |
|
"eval_gooaq_pairs_loss": 0.7600908279418945, |
|
"eval_gooaq_pairs_runtime": 2.0644, |
|
"eval_gooaq_pairs_samples_per_second": 72.659, |
|
"eval_gooaq_pairs_steps_per_second": 2.422, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 2.6527068437180796, |
|
"grad_norm": 1.7729548215866089, |
|
"learning_rate": 1.363011936719949e-08, |
|
"loss": 0.9528, |
|
"step": 15582 |
|
}, |
|
{ |
|
"epoch": 2.677732379979571, |
|
"grad_norm": 10.120983123779297, |
|
"learning_rate": 2.999540416015201e-05, |
|
"loss": 0.9067, |
|
"step": 15729 |
|
}, |
|
{ |
|
"epoch": 2.702757916241062, |
|
"grad_norm": 7.889310359954834, |
|
"learning_rate": 2.9936366712797823e-05, |
|
"loss": 0.9652, |
|
"step": 15876 |
|
}, |
|
{ |
|
"epoch": 2.7277834525025537, |
|
"grad_norm": 14.683575630187988, |
|
"learning_rate": 2.98095255394156e-05, |
|
"loss": 0.9666, |
|
"step": 16023 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"grad_norm": 8.276579856872559, |
|
"learning_rate": 2.9615456436270568e-05, |
|
"loss": 0.9773, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_nli-pairs_loss": 1.1626721620559692, |
|
"eval_nli-pairs_runtime": 4.0414, |
|
"eval_nli-pairs_samples_per_second": 37.116, |
|
"eval_nli-pairs_steps_per_second": 1.237, |
|
"eval_sts-test_pearson_cosine": 0.7701324682629961, |
|
"eval_sts-test_pearson_dot": 0.5036980743243168, |
|
"eval_sts-test_pearson_euclidean": 0.7514501837169054, |
|
"eval_sts-test_pearson_manhattan": 0.7545217281908033, |
|
"eval_sts-test_pearson_max": 0.7701324682629961, |
|
"eval_sts-test_spearman_cosine": 0.7814132001858928, |
|
"eval_sts-test_spearman_dot": 0.559749596717011, |
|
"eval_sts-test_spearman_euclidean": 0.7455646472517521, |
|
"eval_sts-test_spearman_manhattan": 0.7491013309648271, |
|
"eval_sts-test_spearman_max": 0.7814132001858928, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_vitaminc-pairs_loss": 0.7333893775939941, |
|
"eval_vitaminc-pairs_runtime": 2.189, |
|
"eval_vitaminc-pairs_samples_per_second": 68.525, |
|
"eval_vitaminc-pairs_steps_per_second": 2.284, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_qnli-contrastive_loss": 0.2755473256111145, |
|
"eval_qnli-contrastive_runtime": 0.4937, |
|
"eval_qnli-contrastive_samples_per_second": 303.852, |
|
"eval_qnli-contrastive_steps_per_second": 10.128, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_scitail-pairs-qa_loss": 0.094447560608387, |
|
"eval_scitail-pairs-qa_runtime": 1.1614, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.149, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.305, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_scitail-pairs-pos_loss": 0.5057587623596191, |
|
"eval_scitail-pairs-pos_runtime": 2.152, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.701, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.323, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_xsum-pairs_loss": 0.47404322028160095, |
|
"eval_xsum-pairs_runtime": 2.2581, |
|
"eval_xsum-pairs_samples_per_second": 66.428, |
|
"eval_xsum-pairs_steps_per_second": 2.214, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_compression-pairs_loss": 0.13702698051929474, |
|
"eval_compression-pairs_runtime": 0.4467, |
|
"eval_compression-pairs_samples_per_second": 335.818, |
|
"eval_compression-pairs_steps_per_second": 11.194, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_sciq_pairs_loss": 0.5540564060211182, |
|
"eval_sciq_pairs_runtime": 7.0974, |
|
"eval_sciq_pairs_samples_per_second": 21.134, |
|
"eval_sciq_pairs_steps_per_second": 0.704, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_qasc_pairs_loss": 4.834662437438965, |
|
"eval_qasc_pairs_runtime": 2.0303, |
|
"eval_qasc_pairs_samples_per_second": 73.881, |
|
"eval_qasc_pairs_steps_per_second": 2.463, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_openbookqa_pairs_loss": 2.2681949138641357, |
|
"eval_openbookqa_pairs_runtime": 0.8722, |
|
"eval_openbookqa_pairs_samples_per_second": 118.098, |
|
"eval_openbookqa_pairs_steps_per_second": 4.586, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_msmarco_pairs_loss": 1.0872397422790527, |
|
"eval_msmarco_pairs_runtime": 2.7843, |
|
"eval_msmarco_pairs_samples_per_second": 53.873, |
|
"eval_msmarco_pairs_steps_per_second": 1.796, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_nq_pairs_loss": 0.9480971097946167, |
|
"eval_nq_pairs_runtime": 5.0477, |
|
"eval_nq_pairs_samples_per_second": 29.717, |
|
"eval_nq_pairs_steps_per_second": 0.991, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_trivia_pairs_loss": 1.3409621715545654, |
|
"eval_trivia_pairs_runtime": 9.5305, |
|
"eval_trivia_pairs_samples_per_second": 15.739, |
|
"eval_trivia_pairs_steps_per_second": 0.525, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_quora_pairs_loss": 0.23218180239200592, |
|
"eval_quora_pairs_runtime": 1.1513, |
|
"eval_quora_pairs_samples_per_second": 130.282, |
|
"eval_quora_pairs_steps_per_second": 4.343, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.752808988764045, |
|
"eval_gooaq_pairs_loss": 0.8007516264915466, |
|
"eval_gooaq_pairs_runtime": 2.0386, |
|
"eval_gooaq_pairs_samples_per_second": 73.58, |
|
"eval_gooaq_pairs_steps_per_second": 2.453, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 2.7778345250255363, |
|
"grad_norm": 4.684284687042236, |
|
"learning_rate": 2.935504038121719e-05, |
|
"loss": 1.0145, |
|
"step": 16317 |
|
}, |
|
{ |
|
"epoch": 2.802860061287028, |
|
"grad_norm": 7.51400089263916, |
|
"learning_rate": 2.9029459534494935e-05, |
|
"loss": 1.1732, |
|
"step": 16464 |
|
}, |
|
{ |
|
"epoch": 2.827885597548519, |
|
"grad_norm": 2.8207240104675293, |
|
"learning_rate": 2.8640191872304822e-05, |
|
"loss": 0.884, |
|
"step": 16611 |
|
}, |
|
{ |
|
"epoch": 2.8529111338100104, |
|
"grad_norm": 0.4372667074203491, |
|
"learning_rate": 2.8189004477527595e-05, |
|
"loss": 0.9076, |
|
"step": 16758 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"grad_norm": 10.363112449645996, |
|
"learning_rate": 2.7677945518040432e-05, |
|
"loss": 0.9472, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_nli-pairs_loss": 1.1196931600570679, |
|
"eval_nli-pairs_runtime": 4.0541, |
|
"eval_nli-pairs_samples_per_second": 37.0, |
|
"eval_nli-pairs_steps_per_second": 1.233, |
|
"eval_sts-test_pearson_cosine": 0.7699051255542444, |
|
"eval_sts-test_pearson_dot": 0.46895647260006346, |
|
"eval_sts-test_pearson_euclidean": 0.757452845704679, |
|
"eval_sts-test_pearson_manhattan": 0.76091917538426, |
|
"eval_sts-test_pearson_max": 0.7699051255542444, |
|
"eval_sts-test_spearman_cosine": 0.7830173030447911, |
|
"eval_sts-test_spearman_dot": 0.55297440791417, |
|
"eval_sts-test_spearman_euclidean": 0.7540269111333524, |
|
"eval_sts-test_spearman_manhattan": 0.7581350404978112, |
|
"eval_sts-test_spearman_max": 0.7830173030447911, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_vitaminc-pairs_loss": 0.712186872959137, |
|
"eval_vitaminc-pairs_runtime": 2.1744, |
|
"eval_vitaminc-pairs_samples_per_second": 68.985, |
|
"eval_vitaminc-pairs_steps_per_second": 2.299, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_qnli-contrastive_loss": 0.22650264203548431, |
|
"eval_qnli-contrastive_runtime": 0.4958, |
|
"eval_qnli-contrastive_samples_per_second": 302.557, |
|
"eval_qnli-contrastive_steps_per_second": 10.085, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_scitail-pairs-qa_loss": 0.09132811427116394, |
|
"eval_scitail-pairs-qa_runtime": 1.1566, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.694, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.323, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_scitail-pairs-pos_loss": 0.467918336391449, |
|
"eval_scitail-pairs-pos_runtime": 2.2104, |
|
"eval_scitail-pairs-pos_samples_per_second": 67.862, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.262, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_xsum-pairs_loss": 0.47422775626182556, |
|
"eval_xsum-pairs_runtime": 2.2599, |
|
"eval_xsum-pairs_samples_per_second": 66.373, |
|
"eval_xsum-pairs_steps_per_second": 2.212, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_compression-pairs_loss": 0.11817952245473862, |
|
"eval_compression-pairs_runtime": 0.4608, |
|
"eval_compression-pairs_samples_per_second": 325.531, |
|
"eval_compression-pairs_steps_per_second": 10.851, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_sciq_pairs_loss": 0.5348854660987854, |
|
"eval_sciq_pairs_runtime": 7.1388, |
|
"eval_sciq_pairs_samples_per_second": 21.012, |
|
"eval_sciq_pairs_steps_per_second": 0.7, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_qasc_pairs_loss": 4.7842535972595215, |
|
"eval_qasc_pairs_runtime": 2.025, |
|
"eval_qasc_pairs_samples_per_second": 74.075, |
|
"eval_qasc_pairs_steps_per_second": 2.469, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_openbookqa_pairs_loss": 2.231882095336914, |
|
"eval_openbookqa_pairs_runtime": 0.8604, |
|
"eval_openbookqa_pairs_samples_per_second": 119.717, |
|
"eval_openbookqa_pairs_steps_per_second": 4.649, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_msmarco_pairs_loss": 1.0365279912948608, |
|
"eval_msmarco_pairs_runtime": 2.7537, |
|
"eval_msmarco_pairs_samples_per_second": 54.473, |
|
"eval_msmarco_pairs_steps_per_second": 1.816, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_nq_pairs_loss": 0.8606622219085693, |
|
"eval_nq_pairs_runtime": 5.0268, |
|
"eval_nq_pairs_samples_per_second": 29.84, |
|
"eval_nq_pairs_steps_per_second": 0.995, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_trivia_pairs_loss": 1.3138747215270996, |
|
"eval_trivia_pairs_runtime": 9.5468, |
|
"eval_trivia_pairs_samples_per_second": 15.712, |
|
"eval_trivia_pairs_steps_per_second": 0.524, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_quora_pairs_loss": 0.23666483163833618, |
|
"eval_quora_pairs_runtime": 1.1556, |
|
"eval_quora_pairs_samples_per_second": 129.805, |
|
"eval_quora_pairs_steps_per_second": 4.327, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.8779366700715014, |
|
"eval_gooaq_pairs_loss": 0.7752490043640137, |
|
"eval_gooaq_pairs_runtime": 2.0295, |
|
"eval_gooaq_pairs_samples_per_second": 73.909, |
|
"eval_gooaq_pairs_steps_per_second": 2.464, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 2.902962206332993, |
|
"grad_norm": 1.7985535860061646, |
|
"learning_rate": 2.7109334949046588e-05, |
|
"loss": 0.8681, |
|
"step": 17052 |
|
}, |
|
{ |
|
"epoch": 2.927987742594484, |
|
"grad_norm": 1.4242136478424072, |
|
"learning_rate": 2.64857539816249e-05, |
|
"loss": 0.7491, |
|
"step": 17199 |
|
}, |
|
{ |
|
"epoch": 2.9530132788559755, |
|
"grad_norm": 2.512678623199463, |
|
"learning_rate": 2.5814799487800633e-05, |
|
"loss": 0.8847, |
|
"step": 17346 |
|
}, |
|
{ |
|
"epoch": 2.9780388151174666, |
|
"grad_norm": 8.538130760192871, |
|
"learning_rate": 2.50903296142385e-05, |
|
"loss": 0.8441, |
|
"step": 17493 |
|
} |
|
], |
|
"logging_steps": 147, |
|
"max_steps": 29370, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 2937, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|