{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5, "eval_steps": 735, "global_step": 2937, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02502553626149132, "grad_norm": 65.55949401855469, "learning_rate": 4.834865509022812e-07, "loss": 16.851, "step": 147 }, { "epoch": 0.05005107252298264, "grad_norm": 23.207971572875977, "learning_rate": 9.805924412665985e-07, "loss": 11.2787, "step": 294 }, { "epoch": 0.07507660878447395, "grad_norm": 176.1532440185547, "learning_rate": 1.481103166496425e-06, "loss": 8.9166, "step": 441 }, { "epoch": 0.10010214504596528, "grad_norm": 22.1564998626709, "learning_rate": 1.981613891726251e-06, "loss": 7.9463, "step": 588 }, { "epoch": 0.12512768130745658, "grad_norm": 20.11876106262207, "learning_rate": 2.4821246169560777e-06, "loss": 7.2108, "step": 735 }, { "epoch": 0.12512768130745658, "eval_nli-pairs_loss": 6.905651569366455, "eval_nli-pairs_runtime": 4.0844, "eval_nli-pairs_samples_per_second": 36.725, "eval_nli-pairs_steps_per_second": 1.224, "eval_sts-test_pearson_cosine": 0.3740256550072784, "eval_sts-test_pearson_dot": 0.13384893803205677, "eval_sts-test_pearson_euclidean": 0.3912387619869807, "eval_sts-test_pearson_manhattan": 0.4202605137823524, "eval_sts-test_pearson_max": 0.4202605137823524, "eval_sts-test_spearman_cosine": 0.37210107338950205, "eval_sts-test_spearman_dot": 0.12092409843417483, "eval_sts-test_spearman_euclidean": 0.39172287978780546, "eval_sts-test_spearman_manhattan": 0.4169664738563951, "eval_sts-test_spearman_max": 0.4169664738563951, "step": 735 }, { "epoch": 0.12512768130745658, "eval_vitaminc-pairs_loss": 5.720878601074219, "eval_vitaminc-pairs_runtime": 2.1703, "eval_vitaminc-pairs_samples_per_second": 69.115, "eval_vitaminc-pairs_steps_per_second": 2.304, "step": 735 }, { "epoch": 0.12512768130745658, "eval_qnli-contrastive_loss": 8.1649751663208, "eval_qnli-contrastive_runtime": 0.4937, "eval_qnli-contrastive_samples_per_second": 303.841, "eval_qnli-contrastive_steps_per_second": 10.128, "step": 735 }, { "epoch": 0.12512768130745658, "eval_scitail-pairs-qa_loss": 3.7859296798706055, "eval_scitail-pairs-qa_runtime": 1.1509, "eval_scitail-pairs-qa_samples_per_second": 130.329, "eval_scitail-pairs-qa_steps_per_second": 4.344, "step": 735 }, { "epoch": 0.12512768130745658, "eval_scitail-pairs-pos_loss": 3.9919917583465576, "eval_scitail-pairs-pos_runtime": 2.1442, "eval_scitail-pairs-pos_samples_per_second": 69.956, "eval_scitail-pairs-pos_steps_per_second": 2.332, "step": 735 }, { "epoch": 0.12512768130745658, "eval_xsum-pairs_loss": 4.600368976593018, "eval_xsum-pairs_runtime": 2.26, "eval_xsum-pairs_samples_per_second": 66.371, "eval_xsum-pairs_steps_per_second": 2.212, "step": 735 }, { "epoch": 0.12512768130745658, "eval_compression-pairs_loss": 3.3037569522857666, "eval_compression-pairs_runtime": 0.449, "eval_compression-pairs_samples_per_second": 334.078, "eval_compression-pairs_steps_per_second": 11.136, "step": 735 }, { "epoch": 0.12512768130745658, "eval_sciq_pairs_loss": 10.214456558227539, "eval_sciq_pairs_runtime": 7.1179, "eval_sciq_pairs_samples_per_second": 21.074, "eval_sciq_pairs_steps_per_second": 0.702, "step": 735 }, { "epoch": 0.12512768130745658, "eval_qasc_pairs_loss": 10.58031940460205, "eval_qasc_pairs_runtime": 2.0175, "eval_qasc_pairs_samples_per_second": 74.348, "eval_qasc_pairs_steps_per_second": 2.478, "step": 735 }, { "epoch": 0.12512768130745658, "eval_openbookqa_pairs_loss": 7.862658977508545, "eval_openbookqa_pairs_runtime": 0.8571, "eval_openbookqa_pairs_samples_per_second": 120.168, "eval_openbookqa_pairs_steps_per_second": 4.667, "step": 735 }, { "epoch": 0.12512768130745658, "eval_msmarco_pairs_loss": 8.754273414611816, "eval_msmarco_pairs_runtime": 2.7533, "eval_msmarco_pairs_samples_per_second": 54.481, "eval_msmarco_pairs_steps_per_second": 1.816, "step": 735 }, { "epoch": 0.12512768130745658, "eval_nq_pairs_loss": 8.415486335754395, "eval_nq_pairs_runtime": 5.0894, "eval_nq_pairs_samples_per_second": 29.473, "eval_nq_pairs_steps_per_second": 0.982, "step": 735 }, { "epoch": 0.12512768130745658, "eval_trivia_pairs_loss": 9.051105499267578, "eval_trivia_pairs_runtime": 9.5498, "eval_trivia_pairs_samples_per_second": 15.707, "eval_trivia_pairs_steps_per_second": 0.524, "step": 735 }, { "epoch": 0.12512768130745658, "eval_quora_pairs_loss": 4.5232110023498535, "eval_quora_pairs_runtime": 1.1469, "eval_quora_pairs_samples_per_second": 130.785, "eval_quora_pairs_steps_per_second": 4.36, "step": 735 }, { "epoch": 0.12512768130745658, "eval_gooaq_pairs_loss": 7.579105854034424, "eval_gooaq_pairs_runtime": 2.0491, "eval_gooaq_pairs_samples_per_second": 73.203, "eval_gooaq_pairs_steps_per_second": 2.44, "step": 735 }, { "epoch": 0.1501532175689479, "grad_norm": 31.7736759185791, "learning_rate": 2.982635342185904e-06, "loss": 6.7709, "step": 882 }, { "epoch": 0.1751787538304392, "grad_norm": 31.57339096069336, "learning_rate": 3.4831460674157306e-06, "loss": 6.1746, "step": 1029 }, { "epoch": 0.20020429009193055, "grad_norm": 25.392702102661133, "learning_rate": 3.9836567926455565e-06, "loss": 5.7706, "step": 1176 }, { "epoch": 0.22522982635342187, "grad_norm": 32.390472412109375, "learning_rate": 4.484167517875383e-06, "loss": 5.7283, "step": 1323 }, { "epoch": 0.25025536261491316, "grad_norm": 18.85039520263672, "learning_rate": 4.98467824310521e-06, "loss": 5.1856, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_nli-pairs_loss": 4.352054119110107, "eval_nli-pairs_runtime": 4.1476, "eval_nli-pairs_samples_per_second": 36.165, "eval_nli-pairs_steps_per_second": 1.206, "eval_sts-test_pearson_cosine": 0.6694155778571752, "eval_sts-test_pearson_dot": 0.5201102118957572, "eval_sts-test_pearson_euclidean": 0.6613028243200022, "eval_sts-test_pearson_manhattan": 0.6670710500315469, "eval_sts-test_pearson_max": 0.6694155778571752, "eval_sts-test_spearman_cosine": 0.6367853204388882, "eval_sts-test_spearman_dot": 0.4940207180607985, "eval_sts-test_spearman_euclidean": 0.6391132775161348, "eval_sts-test_spearman_manhattan": 0.6446159957787251, "eval_sts-test_spearman_max": 0.6446159957787251, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_vitaminc-pairs_loss": 3.4987735748291016, "eval_vitaminc-pairs_runtime": 2.1678, "eval_vitaminc-pairs_samples_per_second": 69.194, "eval_vitaminc-pairs_steps_per_second": 2.306, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_qnli-contrastive_loss": 12.915559768676758, "eval_qnli-contrastive_runtime": 0.4918, "eval_qnli-contrastive_samples_per_second": 304.99, "eval_qnli-contrastive_steps_per_second": 10.166, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_scitail-pairs-qa_loss": 1.3250077962875366, "eval_scitail-pairs-qa_runtime": 1.154, "eval_scitail-pairs-qa_samples_per_second": 129.984, "eval_scitail-pairs-qa_steps_per_second": 4.333, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_scitail-pairs-pos_loss": 2.457335948944092, "eval_scitail-pairs-pos_runtime": 2.1475, "eval_scitail-pairs-pos_samples_per_second": 69.85, "eval_scitail-pairs-pos_steps_per_second": 2.328, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_xsum-pairs_loss": 3.071201801300049, "eval_xsum-pairs_runtime": 2.2634, "eval_xsum-pairs_samples_per_second": 66.271, "eval_xsum-pairs_steps_per_second": 2.209, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_compression-pairs_loss": 2.0629916191101074, "eval_compression-pairs_runtime": 0.4529, "eval_compression-pairs_samples_per_second": 331.23, "eval_compression-pairs_steps_per_second": 11.041, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_sciq_pairs_loss": 9.06814193725586, "eval_sciq_pairs_runtime": 7.1445, "eval_sciq_pairs_samples_per_second": 20.995, "eval_sciq_pairs_steps_per_second": 0.7, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_qasc_pairs_loss": 9.245658874511719, "eval_qasc_pairs_runtime": 2.0471, "eval_qasc_pairs_samples_per_second": 73.274, "eval_qasc_pairs_steps_per_second": 2.442, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_openbookqa_pairs_loss": 5.652446746826172, "eval_openbookqa_pairs_runtime": 0.8946, "eval_openbookqa_pairs_samples_per_second": 115.14, "eval_openbookqa_pairs_steps_per_second": 4.471, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_msmarco_pairs_loss": 4.844855785369873, "eval_msmarco_pairs_runtime": 2.7887, "eval_msmarco_pairs_samples_per_second": 53.788, "eval_msmarco_pairs_steps_per_second": 1.793, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_nq_pairs_loss": 5.023958206176758, "eval_nq_pairs_runtime": 5.0823, "eval_nq_pairs_samples_per_second": 29.514, "eval_nq_pairs_steps_per_second": 0.984, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_trivia_pairs_loss": 5.2907304763793945, "eval_trivia_pairs_runtime": 9.6673, "eval_trivia_pairs_samples_per_second": 15.516, "eval_trivia_pairs_steps_per_second": 0.517, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_quora_pairs_loss": 1.5572240352630615, "eval_quora_pairs_runtime": 1.1979, "eval_quora_pairs_samples_per_second": 125.218, "eval_quora_pairs_steps_per_second": 4.174, "step": 1470 }, { "epoch": 0.25025536261491316, "eval_gooaq_pairs_loss": 3.970768928527832, "eval_gooaq_pairs_runtime": 2.117, "eval_gooaq_pairs_samples_per_second": 70.855, "eval_gooaq_pairs_steps_per_second": 2.362, "step": 1470 }, { "epoch": 0.2752808988764045, "grad_norm": 40.67585754394531, "learning_rate": 5.4851889683350365e-06, "loss": 4.185, "step": 1617 }, { "epoch": 0.3003064351378958, "grad_norm": 45.92570495605469, "learning_rate": 5.985699693564862e-06, "loss": 4.6367, "step": 1764 }, { "epoch": 0.32533197139938713, "grad_norm": 13.566838264465332, "learning_rate": 6.486210418794688e-06, "loss": 4.3615, "step": 1911 }, { "epoch": 0.3503575076608784, "grad_norm": 9.495999336242676, "learning_rate": 6.986721144024515e-06, "loss": 4.1791, "step": 2058 }, { "epoch": 0.37538304392236976, "grad_norm": 32.735416412353516, "learning_rate": 7.487231869254341e-06, "loss": 4.1051, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_nli-pairs_loss": 3.2717113494873047, "eval_nli-pairs_runtime": 4.0124, "eval_nli-pairs_samples_per_second": 37.384, "eval_nli-pairs_steps_per_second": 1.246, "eval_sts-test_pearson_cosine": 0.6958570089637609, "eval_sts-test_pearson_dot": 0.5824298957890577, "eval_sts-test_pearson_euclidean": 0.6893962819387462, "eval_sts-test_pearson_manhattan": 0.6993681181979946, "eval_sts-test_pearson_max": 0.6993681181979946, "eval_sts-test_spearman_cosine": 0.6652712160836801, "eval_sts-test_spearman_dot": 0.5536505624407877, "eval_sts-test_spearman_euclidean": 0.6659844314307678, "eval_sts-test_spearman_manhattan": 0.675740852112121, "eval_sts-test_spearman_max": 0.675740852112121, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_vitaminc-pairs_loss": 2.7197911739349365, "eval_vitaminc-pairs_runtime": 2.1625, "eval_vitaminc-pairs_samples_per_second": 69.365, "eval_vitaminc-pairs_steps_per_second": 2.312, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_qnli-contrastive_loss": 9.638714790344238, "eval_qnli-contrastive_runtime": 0.4877, "eval_qnli-contrastive_samples_per_second": 307.567, "eval_qnli-contrastive_steps_per_second": 10.252, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_scitail-pairs-qa_loss": 0.8106752634048462, "eval_scitail-pairs-qa_runtime": 1.1588, "eval_scitail-pairs-qa_samples_per_second": 129.449, "eval_scitail-pairs-qa_steps_per_second": 4.315, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_scitail-pairs-pos_loss": 1.8894625902175903, "eval_scitail-pairs-pos_runtime": 2.1181, "eval_scitail-pairs-pos_samples_per_second": 70.817, "eval_scitail-pairs-pos_steps_per_second": 2.361, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_xsum-pairs_loss": 2.262718439102173, "eval_xsum-pairs_runtime": 2.2585, "eval_xsum-pairs_samples_per_second": 66.416, "eval_xsum-pairs_steps_per_second": 2.214, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_compression-pairs_loss": 1.4910633563995361, "eval_compression-pairs_runtime": 0.4462, "eval_compression-pairs_samples_per_second": 336.204, "eval_compression-pairs_steps_per_second": 11.207, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_sciq_pairs_loss": 8.59740161895752, "eval_sciq_pairs_runtime": 7.1845, "eval_sciq_pairs_samples_per_second": 20.878, "eval_sciq_pairs_steps_per_second": 0.696, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_qasc_pairs_loss": 8.103879928588867, "eval_qasc_pairs_runtime": 2.0762, "eval_qasc_pairs_samples_per_second": 72.246, "eval_qasc_pairs_steps_per_second": 2.408, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_openbookqa_pairs_loss": 5.090969562530518, "eval_openbookqa_pairs_runtime": 0.89, "eval_openbookqa_pairs_samples_per_second": 115.726, "eval_openbookqa_pairs_steps_per_second": 4.494, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_msmarco_pairs_loss": 3.9566943645477295, "eval_msmarco_pairs_runtime": 2.8183, "eval_msmarco_pairs_samples_per_second": 53.223, "eval_msmarco_pairs_steps_per_second": 1.774, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_nq_pairs_loss": 4.009054183959961, "eval_nq_pairs_runtime": 5.0219, "eval_nq_pairs_samples_per_second": 29.869, "eval_nq_pairs_steps_per_second": 0.996, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_trivia_pairs_loss": 4.286431312561035, "eval_trivia_pairs_runtime": 9.4975, "eval_trivia_pairs_samples_per_second": 15.794, "eval_trivia_pairs_steps_per_second": 0.526, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_quora_pairs_loss": 1.123273491859436, "eval_quora_pairs_runtime": 1.1487, "eval_quora_pairs_samples_per_second": 130.586, "eval_quora_pairs_steps_per_second": 4.353, "step": 2205 }, { "epoch": 0.37538304392236976, "eval_gooaq_pairs_loss": 3.222414255142212, "eval_gooaq_pairs_runtime": 2.0173, "eval_gooaq_pairs_samples_per_second": 74.357, "eval_gooaq_pairs_steps_per_second": 2.479, "step": 2205 }, { "epoch": 0.4004085801838611, "grad_norm": 218.56105041503906, "learning_rate": 7.987742594484168e-06, "loss": 3.7674, "step": 2352 }, { "epoch": 0.4254341164453524, "grad_norm": 27.877609252929688, "learning_rate": 8.488253319713993e-06, "loss": 3.8729, "step": 2499 }, { "epoch": 0.45045965270684374, "grad_norm": 33.50013732910156, "learning_rate": 8.988764044943822e-06, "loss": 3.4527, "step": 2646 }, { "epoch": 0.475485188968335, "grad_norm": 14.015911102294922, "learning_rate": 9.489274770173647e-06, "loss": 3.3545, "step": 2793 } ], "logging_steps": 147, "max_steps": 29370, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2937, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }