|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 735, |
|
"global_step": 5874, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02502553626149132, |
|
"grad_norm": 65.55949401855469, |
|
"learning_rate": 4.834865509022812e-07, |
|
"loss": 16.851, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.05005107252298264, |
|
"grad_norm": 23.207971572875977, |
|
"learning_rate": 9.805924412665985e-07, |
|
"loss": 11.2787, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.07507660878447395, |
|
"grad_norm": 176.1532440185547, |
|
"learning_rate": 1.481103166496425e-06, |
|
"loss": 8.9166, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.10010214504596528, |
|
"grad_norm": 22.1564998626709, |
|
"learning_rate": 1.981613891726251e-06, |
|
"loss": 7.9463, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"grad_norm": 20.11876106262207, |
|
"learning_rate": 2.4821246169560777e-06, |
|
"loss": 7.2108, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_nli-pairs_loss": 6.905651569366455, |
|
"eval_nli-pairs_runtime": 4.0844, |
|
"eval_nli-pairs_samples_per_second": 36.725, |
|
"eval_nli-pairs_steps_per_second": 1.224, |
|
"eval_sts-test_pearson_cosine": 0.3740256550072784, |
|
"eval_sts-test_pearson_dot": 0.13384893803205677, |
|
"eval_sts-test_pearson_euclidean": 0.3912387619869807, |
|
"eval_sts-test_pearson_manhattan": 0.4202605137823524, |
|
"eval_sts-test_pearson_max": 0.4202605137823524, |
|
"eval_sts-test_spearman_cosine": 0.37210107338950205, |
|
"eval_sts-test_spearman_dot": 0.12092409843417483, |
|
"eval_sts-test_spearman_euclidean": 0.39172287978780546, |
|
"eval_sts-test_spearman_manhattan": 0.4169664738563951, |
|
"eval_sts-test_spearman_max": 0.4169664738563951, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_vitaminc-pairs_loss": 5.720878601074219, |
|
"eval_vitaminc-pairs_runtime": 2.1703, |
|
"eval_vitaminc-pairs_samples_per_second": 69.115, |
|
"eval_vitaminc-pairs_steps_per_second": 2.304, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_qnli-contrastive_loss": 8.1649751663208, |
|
"eval_qnli-contrastive_runtime": 0.4937, |
|
"eval_qnli-contrastive_samples_per_second": 303.841, |
|
"eval_qnli-contrastive_steps_per_second": 10.128, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_scitail-pairs-qa_loss": 3.7859296798706055, |
|
"eval_scitail-pairs-qa_runtime": 1.1509, |
|
"eval_scitail-pairs-qa_samples_per_second": 130.329, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.344, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_scitail-pairs-pos_loss": 3.9919917583465576, |
|
"eval_scitail-pairs-pos_runtime": 2.1442, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.956, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.332, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_xsum-pairs_loss": 4.600368976593018, |
|
"eval_xsum-pairs_runtime": 2.26, |
|
"eval_xsum-pairs_samples_per_second": 66.371, |
|
"eval_xsum-pairs_steps_per_second": 2.212, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_compression-pairs_loss": 3.3037569522857666, |
|
"eval_compression-pairs_runtime": 0.449, |
|
"eval_compression-pairs_samples_per_second": 334.078, |
|
"eval_compression-pairs_steps_per_second": 11.136, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_sciq_pairs_loss": 10.214456558227539, |
|
"eval_sciq_pairs_runtime": 7.1179, |
|
"eval_sciq_pairs_samples_per_second": 21.074, |
|
"eval_sciq_pairs_steps_per_second": 0.702, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_qasc_pairs_loss": 10.58031940460205, |
|
"eval_qasc_pairs_runtime": 2.0175, |
|
"eval_qasc_pairs_samples_per_second": 74.348, |
|
"eval_qasc_pairs_steps_per_second": 2.478, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_openbookqa_pairs_loss": 7.862658977508545, |
|
"eval_openbookqa_pairs_runtime": 0.8571, |
|
"eval_openbookqa_pairs_samples_per_second": 120.168, |
|
"eval_openbookqa_pairs_steps_per_second": 4.667, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_msmarco_pairs_loss": 8.754273414611816, |
|
"eval_msmarco_pairs_runtime": 2.7533, |
|
"eval_msmarco_pairs_samples_per_second": 54.481, |
|
"eval_msmarco_pairs_steps_per_second": 1.816, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_nq_pairs_loss": 8.415486335754395, |
|
"eval_nq_pairs_runtime": 5.0894, |
|
"eval_nq_pairs_samples_per_second": 29.473, |
|
"eval_nq_pairs_steps_per_second": 0.982, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_trivia_pairs_loss": 9.051105499267578, |
|
"eval_trivia_pairs_runtime": 9.5498, |
|
"eval_trivia_pairs_samples_per_second": 15.707, |
|
"eval_trivia_pairs_steps_per_second": 0.524, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_quora_pairs_loss": 4.5232110023498535, |
|
"eval_quora_pairs_runtime": 1.1469, |
|
"eval_quora_pairs_samples_per_second": 130.785, |
|
"eval_quora_pairs_steps_per_second": 4.36, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.12512768130745658, |
|
"eval_gooaq_pairs_loss": 7.579105854034424, |
|
"eval_gooaq_pairs_runtime": 2.0491, |
|
"eval_gooaq_pairs_samples_per_second": 73.203, |
|
"eval_gooaq_pairs_steps_per_second": 2.44, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.1501532175689479, |
|
"grad_norm": 31.7736759185791, |
|
"learning_rate": 2.982635342185904e-06, |
|
"loss": 6.7709, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.1751787538304392, |
|
"grad_norm": 31.57339096069336, |
|
"learning_rate": 3.4831460674157306e-06, |
|
"loss": 6.1746, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.20020429009193055, |
|
"grad_norm": 25.392702102661133, |
|
"learning_rate": 3.9836567926455565e-06, |
|
"loss": 5.7706, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.22522982635342187, |
|
"grad_norm": 32.390472412109375, |
|
"learning_rate": 4.484167517875383e-06, |
|
"loss": 5.7283, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"grad_norm": 18.85039520263672, |
|
"learning_rate": 4.98467824310521e-06, |
|
"loss": 5.1856, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_nli-pairs_loss": 4.352054119110107, |
|
"eval_nli-pairs_runtime": 4.1476, |
|
"eval_nli-pairs_samples_per_second": 36.165, |
|
"eval_nli-pairs_steps_per_second": 1.206, |
|
"eval_sts-test_pearson_cosine": 0.6694155778571752, |
|
"eval_sts-test_pearson_dot": 0.5201102118957572, |
|
"eval_sts-test_pearson_euclidean": 0.6613028243200022, |
|
"eval_sts-test_pearson_manhattan": 0.6670710500315469, |
|
"eval_sts-test_pearson_max": 0.6694155778571752, |
|
"eval_sts-test_spearman_cosine": 0.6367853204388882, |
|
"eval_sts-test_spearman_dot": 0.4940207180607985, |
|
"eval_sts-test_spearman_euclidean": 0.6391132775161348, |
|
"eval_sts-test_spearman_manhattan": 0.6446159957787251, |
|
"eval_sts-test_spearman_max": 0.6446159957787251, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_vitaminc-pairs_loss": 3.4987735748291016, |
|
"eval_vitaminc-pairs_runtime": 2.1678, |
|
"eval_vitaminc-pairs_samples_per_second": 69.194, |
|
"eval_vitaminc-pairs_steps_per_second": 2.306, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_qnli-contrastive_loss": 12.915559768676758, |
|
"eval_qnli-contrastive_runtime": 0.4918, |
|
"eval_qnli-contrastive_samples_per_second": 304.99, |
|
"eval_qnli-contrastive_steps_per_second": 10.166, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_scitail-pairs-qa_loss": 1.3250077962875366, |
|
"eval_scitail-pairs-qa_runtime": 1.154, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.984, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.333, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_scitail-pairs-pos_loss": 2.457335948944092, |
|
"eval_scitail-pairs-pos_runtime": 2.1475, |
|
"eval_scitail-pairs-pos_samples_per_second": 69.85, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.328, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_xsum-pairs_loss": 3.071201801300049, |
|
"eval_xsum-pairs_runtime": 2.2634, |
|
"eval_xsum-pairs_samples_per_second": 66.271, |
|
"eval_xsum-pairs_steps_per_second": 2.209, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_compression-pairs_loss": 2.0629916191101074, |
|
"eval_compression-pairs_runtime": 0.4529, |
|
"eval_compression-pairs_samples_per_second": 331.23, |
|
"eval_compression-pairs_steps_per_second": 11.041, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_sciq_pairs_loss": 9.06814193725586, |
|
"eval_sciq_pairs_runtime": 7.1445, |
|
"eval_sciq_pairs_samples_per_second": 20.995, |
|
"eval_sciq_pairs_steps_per_second": 0.7, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_qasc_pairs_loss": 9.245658874511719, |
|
"eval_qasc_pairs_runtime": 2.0471, |
|
"eval_qasc_pairs_samples_per_second": 73.274, |
|
"eval_qasc_pairs_steps_per_second": 2.442, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_openbookqa_pairs_loss": 5.652446746826172, |
|
"eval_openbookqa_pairs_runtime": 0.8946, |
|
"eval_openbookqa_pairs_samples_per_second": 115.14, |
|
"eval_openbookqa_pairs_steps_per_second": 4.471, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_msmarco_pairs_loss": 4.844855785369873, |
|
"eval_msmarco_pairs_runtime": 2.7887, |
|
"eval_msmarco_pairs_samples_per_second": 53.788, |
|
"eval_msmarco_pairs_steps_per_second": 1.793, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_nq_pairs_loss": 5.023958206176758, |
|
"eval_nq_pairs_runtime": 5.0823, |
|
"eval_nq_pairs_samples_per_second": 29.514, |
|
"eval_nq_pairs_steps_per_second": 0.984, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_trivia_pairs_loss": 5.2907304763793945, |
|
"eval_trivia_pairs_runtime": 9.6673, |
|
"eval_trivia_pairs_samples_per_second": 15.516, |
|
"eval_trivia_pairs_steps_per_second": 0.517, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_quora_pairs_loss": 1.5572240352630615, |
|
"eval_quora_pairs_runtime": 1.1979, |
|
"eval_quora_pairs_samples_per_second": 125.218, |
|
"eval_quora_pairs_steps_per_second": 4.174, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.25025536261491316, |
|
"eval_gooaq_pairs_loss": 3.970768928527832, |
|
"eval_gooaq_pairs_runtime": 2.117, |
|
"eval_gooaq_pairs_samples_per_second": 70.855, |
|
"eval_gooaq_pairs_steps_per_second": 2.362, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.2752808988764045, |
|
"grad_norm": 40.67585754394531, |
|
"learning_rate": 5.4851889683350365e-06, |
|
"loss": 4.185, |
|
"step": 1617 |
|
}, |
|
{ |
|
"epoch": 0.3003064351378958, |
|
"grad_norm": 45.92570495605469, |
|
"learning_rate": 5.985699693564862e-06, |
|
"loss": 4.6367, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 0.32533197139938713, |
|
"grad_norm": 13.566838264465332, |
|
"learning_rate": 6.486210418794688e-06, |
|
"loss": 4.3615, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 0.3503575076608784, |
|
"grad_norm": 9.495999336242676, |
|
"learning_rate": 6.986721144024515e-06, |
|
"loss": 4.1791, |
|
"step": 2058 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"grad_norm": 32.735416412353516, |
|
"learning_rate": 7.487231869254341e-06, |
|
"loss": 4.1051, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_nli-pairs_loss": 3.2717113494873047, |
|
"eval_nli-pairs_runtime": 4.0124, |
|
"eval_nli-pairs_samples_per_second": 37.384, |
|
"eval_nli-pairs_steps_per_second": 1.246, |
|
"eval_sts-test_pearson_cosine": 0.6958570089637609, |
|
"eval_sts-test_pearson_dot": 0.5824298957890577, |
|
"eval_sts-test_pearson_euclidean": 0.6893962819387462, |
|
"eval_sts-test_pearson_manhattan": 0.6993681181979946, |
|
"eval_sts-test_pearson_max": 0.6993681181979946, |
|
"eval_sts-test_spearman_cosine": 0.6652712160836801, |
|
"eval_sts-test_spearman_dot": 0.5536505624407877, |
|
"eval_sts-test_spearman_euclidean": 0.6659844314307678, |
|
"eval_sts-test_spearman_manhattan": 0.675740852112121, |
|
"eval_sts-test_spearman_max": 0.675740852112121, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_vitaminc-pairs_loss": 2.7197911739349365, |
|
"eval_vitaminc-pairs_runtime": 2.1625, |
|
"eval_vitaminc-pairs_samples_per_second": 69.365, |
|
"eval_vitaminc-pairs_steps_per_second": 2.312, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_qnli-contrastive_loss": 9.638714790344238, |
|
"eval_qnli-contrastive_runtime": 0.4877, |
|
"eval_qnli-contrastive_samples_per_second": 307.567, |
|
"eval_qnli-contrastive_steps_per_second": 10.252, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_scitail-pairs-qa_loss": 0.8106752634048462, |
|
"eval_scitail-pairs-qa_runtime": 1.1588, |
|
"eval_scitail-pairs-qa_samples_per_second": 129.449, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.315, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_scitail-pairs-pos_loss": 1.8894625902175903, |
|
"eval_scitail-pairs-pos_runtime": 2.1181, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.817, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.361, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_xsum-pairs_loss": 2.262718439102173, |
|
"eval_xsum-pairs_runtime": 2.2585, |
|
"eval_xsum-pairs_samples_per_second": 66.416, |
|
"eval_xsum-pairs_steps_per_second": 2.214, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_compression-pairs_loss": 1.4910633563995361, |
|
"eval_compression-pairs_runtime": 0.4462, |
|
"eval_compression-pairs_samples_per_second": 336.204, |
|
"eval_compression-pairs_steps_per_second": 11.207, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_sciq_pairs_loss": 8.59740161895752, |
|
"eval_sciq_pairs_runtime": 7.1845, |
|
"eval_sciq_pairs_samples_per_second": 20.878, |
|
"eval_sciq_pairs_steps_per_second": 0.696, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_qasc_pairs_loss": 8.103879928588867, |
|
"eval_qasc_pairs_runtime": 2.0762, |
|
"eval_qasc_pairs_samples_per_second": 72.246, |
|
"eval_qasc_pairs_steps_per_second": 2.408, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_openbookqa_pairs_loss": 5.090969562530518, |
|
"eval_openbookqa_pairs_runtime": 0.89, |
|
"eval_openbookqa_pairs_samples_per_second": 115.726, |
|
"eval_openbookqa_pairs_steps_per_second": 4.494, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_msmarco_pairs_loss": 3.9566943645477295, |
|
"eval_msmarco_pairs_runtime": 2.8183, |
|
"eval_msmarco_pairs_samples_per_second": 53.223, |
|
"eval_msmarco_pairs_steps_per_second": 1.774, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_nq_pairs_loss": 4.009054183959961, |
|
"eval_nq_pairs_runtime": 5.0219, |
|
"eval_nq_pairs_samples_per_second": 29.869, |
|
"eval_nq_pairs_steps_per_second": 0.996, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_trivia_pairs_loss": 4.286431312561035, |
|
"eval_trivia_pairs_runtime": 9.4975, |
|
"eval_trivia_pairs_samples_per_second": 15.794, |
|
"eval_trivia_pairs_steps_per_second": 0.526, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_quora_pairs_loss": 1.123273491859436, |
|
"eval_quora_pairs_runtime": 1.1487, |
|
"eval_quora_pairs_samples_per_second": 130.586, |
|
"eval_quora_pairs_steps_per_second": 4.353, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.37538304392236976, |
|
"eval_gooaq_pairs_loss": 3.222414255142212, |
|
"eval_gooaq_pairs_runtime": 2.0173, |
|
"eval_gooaq_pairs_samples_per_second": 74.357, |
|
"eval_gooaq_pairs_steps_per_second": 2.479, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.4004085801838611, |
|
"grad_norm": 218.56105041503906, |
|
"learning_rate": 7.987742594484168e-06, |
|
"loss": 3.7674, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 0.4254341164453524, |
|
"grad_norm": 27.877609252929688, |
|
"learning_rate": 8.488253319713993e-06, |
|
"loss": 3.8729, |
|
"step": 2499 |
|
}, |
|
{ |
|
"epoch": 0.45045965270684374, |
|
"grad_norm": 33.50013732910156, |
|
"learning_rate": 8.988764044943822e-06, |
|
"loss": 3.4527, |
|
"step": 2646 |
|
}, |
|
{ |
|
"epoch": 0.475485188968335, |
|
"grad_norm": 14.015911102294922, |
|
"learning_rate": 9.489274770173647e-06, |
|
"loss": 3.3545, |
|
"step": 2793 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"grad_norm": 33.59694290161133, |
|
"learning_rate": 9.989785495403473e-06, |
|
"loss": 3.3247, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_nli-pairs_loss": 2.7121565341949463, |
|
"eval_nli-pairs_runtime": 4.1564, |
|
"eval_nli-pairs_samples_per_second": 36.089, |
|
"eval_nli-pairs_steps_per_second": 1.203, |
|
"eval_sts-test_pearson_cosine": 0.716623047702725, |
|
"eval_sts-test_pearson_dot": 0.6128451070598809, |
|
"eval_sts-test_pearson_euclidean": 0.7138791236031807, |
|
"eval_sts-test_pearson_manhattan": 0.7213151818687454, |
|
"eval_sts-test_pearson_max": 0.7213151818687454, |
|
"eval_sts-test_spearman_cosine": 0.6919792400941177, |
|
"eval_sts-test_spearman_dot": 0.5867158357121192, |
|
"eval_sts-test_spearman_euclidean": 0.6925037259567834, |
|
"eval_sts-test_spearman_manhattan": 0.7008895667910079, |
|
"eval_sts-test_spearman_max": 0.7008895667910079, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_vitaminc-pairs_loss": 2.225992441177368, |
|
"eval_vitaminc-pairs_runtime": 2.253, |
|
"eval_vitaminc-pairs_samples_per_second": 66.577, |
|
"eval_vitaminc-pairs_steps_per_second": 2.219, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_qnli-contrastive_loss": 4.92629861831665, |
|
"eval_qnli-contrastive_runtime": 0.5005, |
|
"eval_qnli-contrastive_samples_per_second": 299.691, |
|
"eval_qnli-contrastive_steps_per_second": 9.99, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_scitail-pairs-qa_loss": 0.5898066163063049, |
|
"eval_scitail-pairs-qa_runtime": 1.2227, |
|
"eval_scitail-pairs-qa_samples_per_second": 122.682, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.089, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_scitail-pairs-pos_loss": 1.4237287044525146, |
|
"eval_scitail-pairs-pos_runtime": 2.4409, |
|
"eval_scitail-pairs-pos_samples_per_second": 61.452, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.048, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_xsum-pairs_loss": 1.8388895988464355, |
|
"eval_xsum-pairs_runtime": 2.2831, |
|
"eval_xsum-pairs_samples_per_second": 65.7, |
|
"eval_xsum-pairs_steps_per_second": 2.19, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_compression-pairs_loss": 1.1590967178344727, |
|
"eval_compression-pairs_runtime": 0.5152, |
|
"eval_compression-pairs_samples_per_second": 291.165, |
|
"eval_compression-pairs_steps_per_second": 9.706, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_sciq_pairs_loss": 8.282496452331543, |
|
"eval_sciq_pairs_runtime": 7.2871, |
|
"eval_sciq_pairs_samples_per_second": 20.584, |
|
"eval_sciq_pairs_steps_per_second": 0.686, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_qasc_pairs_loss": 7.817965507507324, |
|
"eval_qasc_pairs_runtime": 2.0211, |
|
"eval_qasc_pairs_samples_per_second": 74.218, |
|
"eval_qasc_pairs_steps_per_second": 2.474, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_openbookqa_pairs_loss": 4.619383811950684, |
|
"eval_openbookqa_pairs_runtime": 0.8531, |
|
"eval_openbookqa_pairs_samples_per_second": 120.731, |
|
"eval_openbookqa_pairs_steps_per_second": 4.689, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_msmarco_pairs_loss": 3.478559970855713, |
|
"eval_msmarco_pairs_runtime": 2.7512, |
|
"eval_msmarco_pairs_samples_per_second": 54.522, |
|
"eval_msmarco_pairs_steps_per_second": 1.817, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_nq_pairs_loss": 3.3449866771698, |
|
"eval_nq_pairs_runtime": 5.0591, |
|
"eval_nq_pairs_samples_per_second": 29.649, |
|
"eval_nq_pairs_steps_per_second": 0.988, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_trivia_pairs_loss": 3.524484872817993, |
|
"eval_trivia_pairs_runtime": 9.662, |
|
"eval_trivia_pairs_samples_per_second": 15.525, |
|
"eval_trivia_pairs_steps_per_second": 0.517, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_quora_pairs_loss": 0.9095575213432312, |
|
"eval_quora_pairs_runtime": 1.2482, |
|
"eval_quora_pairs_samples_per_second": 120.175, |
|
"eval_quora_pairs_steps_per_second": 4.006, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5005107252298263, |
|
"eval_gooaq_pairs_loss": 2.6586034297943115, |
|
"eval_gooaq_pairs_runtime": 2.1091, |
|
"eval_gooaq_pairs_samples_per_second": 71.12, |
|
"eval_gooaq_pairs_steps_per_second": 2.371, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5255362614913177, |
|
"grad_norm": 35.33409118652344, |
|
"learning_rate": 1.04902962206333e-05, |
|
"loss": 3.116, |
|
"step": 3087 |
|
}, |
|
{ |
|
"epoch": 0.550561797752809, |
|
"grad_norm": 22.29003143310547, |
|
"learning_rate": 1.0990806945863125e-05, |
|
"loss": 3.2418, |
|
"step": 3234 |
|
}, |
|
{ |
|
"epoch": 0.5755873340143003, |
|
"grad_norm": 31.277965545654297, |
|
"learning_rate": 1.1491317671092953e-05, |
|
"loss": 3.0757, |
|
"step": 3381 |
|
}, |
|
{ |
|
"epoch": 0.6006128702757916, |
|
"grad_norm": 24.612506866455078, |
|
"learning_rate": 1.1991828396322778e-05, |
|
"loss": 2.8524, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"grad_norm": 25.11741065979004, |
|
"learning_rate": 1.2492339121552605e-05, |
|
"loss": 2.6875, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_nli-pairs_loss": 2.479051113128662, |
|
"eval_nli-pairs_runtime": 3.9943, |
|
"eval_nli-pairs_samples_per_second": 37.553, |
|
"eval_nli-pairs_steps_per_second": 1.252, |
|
"eval_sts-test_pearson_cosine": 0.7278742453545186, |
|
"eval_sts-test_pearson_dot": 0.6217650825208566, |
|
"eval_sts-test_pearson_euclidean": 0.7243228472931561, |
|
"eval_sts-test_pearson_manhattan": 0.7333297580184588, |
|
"eval_sts-test_pearson_max": 0.7333297580184588, |
|
"eval_sts-test_spearman_cosine": 0.7013110457844404, |
|
"eval_sts-test_spearman_dot": 0.5970993074902947, |
|
"eval_sts-test_spearman_euclidean": 0.701564129266252, |
|
"eval_sts-test_spearman_manhattan": 0.7116482009924582, |
|
"eval_sts-test_spearman_max": 0.7116482009924582, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_vitaminc-pairs_loss": 1.974273681640625, |
|
"eval_vitaminc-pairs_runtime": 2.1754, |
|
"eval_vitaminc-pairs_samples_per_second": 68.953, |
|
"eval_vitaminc-pairs_steps_per_second": 2.298, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_qnli-contrastive_loss": 1.7706010341644287, |
|
"eval_qnli-contrastive_runtime": 0.4866, |
|
"eval_qnli-contrastive_samples_per_second": 308.244, |
|
"eval_qnli-contrastive_steps_per_second": 10.275, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_scitail-pairs-qa_loss": 0.4400452673435211, |
|
"eval_scitail-pairs-qa_runtime": 1.1519, |
|
"eval_scitail-pairs-qa_samples_per_second": 130.222, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.341, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_scitail-pairs-pos_loss": 1.1909903287887573, |
|
"eval_scitail-pairs-pos_runtime": 2.1319, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.36, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.345, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_xsum-pairs_loss": 1.4811985492706299, |
|
"eval_xsum-pairs_runtime": 2.254, |
|
"eval_xsum-pairs_samples_per_second": 66.548, |
|
"eval_xsum-pairs_steps_per_second": 2.218, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_compression-pairs_loss": 0.8453781008720398, |
|
"eval_compression-pairs_runtime": 0.4401, |
|
"eval_compression-pairs_samples_per_second": 340.826, |
|
"eval_compression-pairs_steps_per_second": 11.361, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_sciq_pairs_loss": 8.014656066894531, |
|
"eval_sciq_pairs_runtime": 7.0707, |
|
"eval_sciq_pairs_samples_per_second": 21.214, |
|
"eval_sciq_pairs_steps_per_second": 0.707, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_qasc_pairs_loss": 6.9316277503967285, |
|
"eval_qasc_pairs_runtime": 2.0338, |
|
"eval_qasc_pairs_samples_per_second": 73.752, |
|
"eval_qasc_pairs_steps_per_second": 2.458, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_openbookqa_pairs_loss": 4.21690034866333, |
|
"eval_openbookqa_pairs_runtime": 0.918, |
|
"eval_openbookqa_pairs_samples_per_second": 112.202, |
|
"eval_openbookqa_pairs_steps_per_second": 4.357, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_msmarco_pairs_loss": 3.0209598541259766, |
|
"eval_msmarco_pairs_runtime": 2.7749, |
|
"eval_msmarco_pairs_samples_per_second": 54.056, |
|
"eval_msmarco_pairs_steps_per_second": 1.802, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_nq_pairs_loss": 2.956088066101074, |
|
"eval_nq_pairs_runtime": 5.0024, |
|
"eval_nq_pairs_samples_per_second": 29.986, |
|
"eval_nq_pairs_steps_per_second": 1.0, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_trivia_pairs_loss": 3.17364501953125, |
|
"eval_trivia_pairs_runtime": 9.4856, |
|
"eval_trivia_pairs_samples_per_second": 15.813, |
|
"eval_trivia_pairs_steps_per_second": 0.527, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_quora_pairs_loss": 0.763593852519989, |
|
"eval_quora_pairs_runtime": 1.1441, |
|
"eval_quora_pairs_samples_per_second": 131.104, |
|
"eval_quora_pairs_steps_per_second": 4.37, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.625638406537283, |
|
"eval_gooaq_pairs_loss": 2.3524909019470215, |
|
"eval_gooaq_pairs_runtime": 2.0161, |
|
"eval_gooaq_pairs_samples_per_second": 74.4, |
|
"eval_gooaq_pairs_steps_per_second": 2.48, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.6506639427987743, |
|
"grad_norm": 31.163997650146484, |
|
"learning_rate": 1.2992849846782432e-05, |
|
"loss": 2.7808, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 0.6756894790602656, |
|
"grad_norm": 14.883658409118652, |
|
"learning_rate": 1.3493360572012258e-05, |
|
"loss": 2.5687, |
|
"step": 3969 |
|
}, |
|
{ |
|
"epoch": 0.7007150153217568, |
|
"grad_norm": 5.874042987823486, |
|
"learning_rate": 1.3993871297242083e-05, |
|
"loss": 2.3034, |
|
"step": 4116 |
|
}, |
|
{ |
|
"epoch": 0.7257405515832482, |
|
"grad_norm": 31.464054107666016, |
|
"learning_rate": 1.4494382022471912e-05, |
|
"loss": 2.4412, |
|
"step": 4263 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"grad_norm": 16.43915367126465, |
|
"learning_rate": 1.4994892747701737e-05, |
|
"loss": 2.3293, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_nli-pairs_loss": 2.3226094245910645, |
|
"eval_nli-pairs_runtime": 4.113, |
|
"eval_nli-pairs_samples_per_second": 36.47, |
|
"eval_nli-pairs_steps_per_second": 1.216, |
|
"eval_sts-test_pearson_cosine": 0.7356971966139032, |
|
"eval_sts-test_pearson_dot": 0.6150809513049869, |
|
"eval_sts-test_pearson_euclidean": 0.7330733579988641, |
|
"eval_sts-test_pearson_manhattan": 0.7423412248131348, |
|
"eval_sts-test_pearson_max": 0.7423412248131348, |
|
"eval_sts-test_spearman_cosine": 0.7121899723082045, |
|
"eval_sts-test_spearman_dot": 0.5926505936679538, |
|
"eval_sts-test_spearman_euclidean": 0.7130179905407037, |
|
"eval_sts-test_spearman_manhattan": 0.7227257562995023, |
|
"eval_sts-test_spearman_max": 0.7227257562995023, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_vitaminc-pairs_loss": 1.7956713438034058, |
|
"eval_vitaminc-pairs_runtime": 2.174, |
|
"eval_vitaminc-pairs_samples_per_second": 68.996, |
|
"eval_vitaminc-pairs_steps_per_second": 2.3, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_qnli-contrastive_loss": 1.0078614950180054, |
|
"eval_qnli-contrastive_runtime": 0.4874, |
|
"eval_qnli-contrastive_samples_per_second": 307.763, |
|
"eval_qnli-contrastive_steps_per_second": 10.259, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_scitail-pairs-qa_loss": 0.36971578001976013, |
|
"eval_scitail-pairs-qa_runtime": 1.164, |
|
"eval_scitail-pairs-qa_samples_per_second": 128.863, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.295, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_scitail-pairs-pos_loss": 1.0497769117355347, |
|
"eval_scitail-pairs-pos_runtime": 2.1205, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.74, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.358, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_xsum-pairs_loss": 1.1691261529922485, |
|
"eval_xsum-pairs_runtime": 2.259, |
|
"eval_xsum-pairs_samples_per_second": 66.401, |
|
"eval_xsum-pairs_steps_per_second": 2.213, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_compression-pairs_loss": 0.5027483105659485, |
|
"eval_compression-pairs_runtime": 0.4403, |
|
"eval_compression-pairs_samples_per_second": 340.682, |
|
"eval_compression-pairs_steps_per_second": 11.356, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_sciq_pairs_loss": 7.823739528656006, |
|
"eval_sciq_pairs_runtime": 7.0738, |
|
"eval_sciq_pairs_samples_per_second": 21.205, |
|
"eval_sciq_pairs_steps_per_second": 0.707, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_qasc_pairs_loss": 6.404655933380127, |
|
"eval_qasc_pairs_runtime": 2.0346, |
|
"eval_qasc_pairs_samples_per_second": 73.723, |
|
"eval_qasc_pairs_steps_per_second": 2.457, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_openbookqa_pairs_loss": 3.857389211654663, |
|
"eval_openbookqa_pairs_runtime": 0.8544, |
|
"eval_openbookqa_pairs_samples_per_second": 120.547, |
|
"eval_openbookqa_pairs_steps_per_second": 4.681, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_msmarco_pairs_loss": 2.7028510570526123, |
|
"eval_msmarco_pairs_runtime": 2.7448, |
|
"eval_msmarco_pairs_samples_per_second": 54.649, |
|
"eval_msmarco_pairs_steps_per_second": 1.822, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_nq_pairs_loss": 2.679351329803467, |
|
"eval_nq_pairs_runtime": 5.067, |
|
"eval_nq_pairs_samples_per_second": 29.603, |
|
"eval_nq_pairs_steps_per_second": 0.987, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_trivia_pairs_loss": 2.8798065185546875, |
|
"eval_trivia_pairs_runtime": 9.5449, |
|
"eval_trivia_pairs_samples_per_second": 15.715, |
|
"eval_trivia_pairs_steps_per_second": 0.524, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_quora_pairs_loss": 0.6825175285339355, |
|
"eval_quora_pairs_runtime": 1.1431, |
|
"eval_quora_pairs_samples_per_second": 131.221, |
|
"eval_quora_pairs_steps_per_second": 4.374, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7507660878447395, |
|
"eval_gooaq_pairs_loss": 2.0472166538238525, |
|
"eval_gooaq_pairs_runtime": 2.0218, |
|
"eval_gooaq_pairs_samples_per_second": 74.191, |
|
"eval_gooaq_pairs_steps_per_second": 2.473, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7757916241062308, |
|
"grad_norm": 4.2425055503845215, |
|
"learning_rate": 1.5495403472931565e-05, |
|
"loss": 2.3651, |
|
"step": 4557 |
|
}, |
|
{ |
|
"epoch": 0.8008171603677222, |
|
"grad_norm": 22.42776107788086, |
|
"learning_rate": 1.5995914198161388e-05, |
|
"loss": 2.6296, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.8258426966292135, |
|
"grad_norm": 21.169517517089844, |
|
"learning_rate": 1.6496424923391215e-05, |
|
"loss": 2.2108, |
|
"step": 4851 |
|
}, |
|
{ |
|
"epoch": 0.8508682328907048, |
|
"grad_norm": 23.326181411743164, |
|
"learning_rate": 1.699693564862104e-05, |
|
"loss": 2.1852, |
|
"step": 4998 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"grad_norm": 24.574176788330078, |
|
"learning_rate": 1.7497446373850868e-05, |
|
"loss": 2.2944, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_nli-pairs_loss": 2.0634915828704834, |
|
"eval_nli-pairs_runtime": 4.0019, |
|
"eval_nli-pairs_samples_per_second": 37.482, |
|
"eval_nli-pairs_steps_per_second": 1.249, |
|
"eval_sts-test_pearson_cosine": 0.7466390532977636, |
|
"eval_sts-test_pearson_dot": 0.612259458274589, |
|
"eval_sts-test_pearson_euclidean": 0.7432536346376271, |
|
"eval_sts-test_pearson_manhattan": 0.7500490179501229, |
|
"eval_sts-test_pearson_max": 0.7500490179501229, |
|
"eval_sts-test_spearman_cosine": 0.728273260456201, |
|
"eval_sts-test_spearman_dot": 0.5960115087190596, |
|
"eval_sts-test_spearman_euclidean": 0.7272394395622148, |
|
"eval_sts-test_spearman_manhattan": 0.7334149564445704, |
|
"eval_sts-test_spearman_max": 0.7334149564445704, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_vitaminc-pairs_loss": 1.638654112815857, |
|
"eval_vitaminc-pairs_runtime": 2.1637, |
|
"eval_vitaminc-pairs_samples_per_second": 69.327, |
|
"eval_vitaminc-pairs_steps_per_second": 2.311, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_qnli-contrastive_loss": 0.9639705419540405, |
|
"eval_qnli-contrastive_runtime": 0.4889, |
|
"eval_qnli-contrastive_samples_per_second": 306.825, |
|
"eval_qnli-contrastive_steps_per_second": 10.228, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_scitail-pairs-qa_loss": 0.31595128774642944, |
|
"eval_scitail-pairs-qa_runtime": 1.1467, |
|
"eval_scitail-pairs-qa_samples_per_second": 130.806, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.36, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_scitail-pairs-pos_loss": 0.9187478423118591, |
|
"eval_scitail-pairs-pos_runtime": 2.1273, |
|
"eval_scitail-pairs-pos_samples_per_second": 70.512, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.35, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_xsum-pairs_loss": 1.060194492340088, |
|
"eval_xsum-pairs_runtime": 2.2836, |
|
"eval_xsum-pairs_samples_per_second": 65.686, |
|
"eval_xsum-pairs_steps_per_second": 2.19, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_compression-pairs_loss": 0.41078585386276245, |
|
"eval_compression-pairs_runtime": 0.4434, |
|
"eval_compression-pairs_samples_per_second": 338.276, |
|
"eval_compression-pairs_steps_per_second": 11.276, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_sciq_pairs_loss": 7.577760696411133, |
|
"eval_sciq_pairs_runtime": 7.1025, |
|
"eval_sciq_pairs_samples_per_second": 21.119, |
|
"eval_sciq_pairs_steps_per_second": 0.704, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_qasc_pairs_loss": 6.353766918182373, |
|
"eval_qasc_pairs_runtime": 2.0113, |
|
"eval_qasc_pairs_samples_per_second": 74.58, |
|
"eval_qasc_pairs_steps_per_second": 2.486, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_openbookqa_pairs_loss": 3.7140932083129883, |
|
"eval_openbookqa_pairs_runtime": 0.8529, |
|
"eval_openbookqa_pairs_samples_per_second": 120.762, |
|
"eval_openbookqa_pairs_steps_per_second": 4.69, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_msmarco_pairs_loss": 2.3862576484680176, |
|
"eval_msmarco_pairs_runtime": 2.8953, |
|
"eval_msmarco_pairs_samples_per_second": 51.808, |
|
"eval_msmarco_pairs_steps_per_second": 1.727, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_nq_pairs_loss": 2.3543190956115723, |
|
"eval_nq_pairs_runtime": 5.0048, |
|
"eval_nq_pairs_samples_per_second": 29.971, |
|
"eval_nq_pairs_steps_per_second": 0.999, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_trivia_pairs_loss": 2.494807481765747, |
|
"eval_trivia_pairs_runtime": 9.5513, |
|
"eval_trivia_pairs_samples_per_second": 15.705, |
|
"eval_trivia_pairs_steps_per_second": 0.523, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_quora_pairs_loss": 0.6137441992759705, |
|
"eval_quora_pairs_runtime": 1.1541, |
|
"eval_quora_pairs_samples_per_second": 129.967, |
|
"eval_quora_pairs_steps_per_second": 4.332, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.8758937691521961, |
|
"eval_gooaq_pairs_loss": 1.8279658555984497, |
|
"eval_gooaq_pairs_runtime": 2.0951, |
|
"eval_gooaq_pairs_samples_per_second": 71.595, |
|
"eval_gooaq_pairs_steps_per_second": 2.387, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.9009193054136875, |
|
"grad_norm": 10.590804100036621, |
|
"learning_rate": 1.7997957099080695e-05, |
|
"loss": 2.2133, |
|
"step": 5292 |
|
}, |
|
{ |
|
"epoch": 0.9259448416751788, |
|
"grad_norm": 18.527711868286133, |
|
"learning_rate": 1.849846782431052e-05, |
|
"loss": 2.2255, |
|
"step": 5439 |
|
}, |
|
{ |
|
"epoch": 0.95097037793667, |
|
"grad_norm": 2.617710828781128, |
|
"learning_rate": 1.8995573714674838e-05, |
|
"loss": 2.3502, |
|
"step": 5586 |
|
}, |
|
{ |
|
"epoch": 0.9759959141981613, |
|
"grad_norm": 19.551551818847656, |
|
"learning_rate": 1.9496084439904668e-05, |
|
"loss": 1.8964, |
|
"step": 5733 |
|
} |
|
], |
|
"logging_steps": 147, |
|
"max_steps": 29370, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 2937, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|