diff --git "a/checkpoint-7662/trainer_state.json" "b/checkpoint-7662/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-7662/trainer_state.json" @@ -0,0 +1,5064 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2007522331922895, + "eval_steps": 320, + "global_step": 7662, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0050148879485973985, + "grad_norm": 14.771158218383789, + "learning_rate": 9.707724425887265e-07, + "loss": 0.6329, + "step": 32 + }, + { + "epoch": 0.010029775897194797, + "grad_norm": 11.052021980285645, + "learning_rate": 1.9728601252609606e-06, + "loss": 0.9693, + "step": 64 + }, + { + "epoch": 0.015044663845792195, + "grad_norm": 20.26296615600586, + "learning_rate": 2.9749478079331944e-06, + "loss": 0.6548, + "step": 96 + }, + { + "epoch": 0.020059551794389594, + "grad_norm": 12.62913703918457, + "learning_rate": 3.945720250521921e-06, + "loss": 1.1279, + "step": 128 + }, + { + "epoch": 0.025074439742986992, + "grad_norm": 12.316486358642578, + "learning_rate": 4.916492693110647e-06, + "loss": 1.0017, + "step": 160 + }, + { + "epoch": 0.03008932769158439, + "grad_norm": 64.25923919677734, + "learning_rate": 5.918580375782881e-06, + "loss": 0.7571, + "step": 192 + }, + { + "epoch": 0.03510421564018179, + "grad_norm": 0.8205029368400574, + "learning_rate": 6.920668058455115e-06, + "loss": 0.7304, + "step": 224 + }, + { + "epoch": 0.04011910358877919, + "grad_norm": 6.598870754241943, + "learning_rate": 7.922755741127349e-06, + "loss": 0.7636, + "step": 256 + }, + { + "epoch": 0.045133991537376586, + "grad_norm": 8.728073120117188, + "learning_rate": 8.924843423799583e-06, + "loss": 0.482, + "step": 288 + }, + { + "epoch": 0.050148879485973984, + "grad_norm": 7.645521640777588, + "learning_rate": 9.926931106471817e-06, + "loss": 0.6312, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_nli-pairs_loss": 1.0158467292785645, + "eval_nli-pairs_runtime": 3.7267, + "eval_nli-pairs_samples_per_second": 26.833, + "eval_nli-pairs_steps_per_second": 1.073, + "eval_sts-test_pearson_cosine": 0.7848265412179125, + "eval_sts-test_pearson_dot": 0.5437080705284749, + "eval_sts-test_pearson_euclidean": 0.7445845076364892, + "eval_sts-test_pearson_manhattan": 0.7429239204432232, + "eval_sts-test_pearson_max": 0.7848265412179125, + "eval_sts-test_spearman_cosine": 0.7989504707258924, + "eval_sts-test_spearman_dot": 0.5206855421174118, + "eval_sts-test_spearman_euclidean": 0.733568982260844, + "eval_sts-test_spearman_manhattan": 0.7349407257944446, + "eval_sts-test_spearman_max": 0.7989504707258924, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_vitaminc-pairs_loss": 4.692601680755615, + "eval_vitaminc-pairs_runtime": 1.1397, + "eval_vitaminc-pairs_samples_per_second": 74.578, + "eval_vitaminc-pairs_steps_per_second": 2.632, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_sts-label_loss": 3.5502490997314453, + "eval_sts-label_runtime": 0.28, + "eval_sts-label_samples_per_second": 357.117, + "eval_sts-label_steps_per_second": 14.285, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_qnli-contrastive_loss": 0.16079513728618622, + "eval_qnli-contrastive_runtime": 0.3646, + "eval_qnli-contrastive_samples_per_second": 274.299, + "eval_qnli-contrastive_steps_per_second": 10.972, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_scitail-pairs-qa_loss": 0.07610582560300827, + "eval_scitail-pairs-qa_runtime": 0.8885, + "eval_scitail-pairs-qa_samples_per_second": 112.548, + "eval_scitail-pairs-qa_steps_per_second": 4.502, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_scitail-pairs-pos_loss": 0.5141278505325317, + "eval_scitail-pairs-pos_runtime": 1.3498, + "eval_scitail-pairs-pos_samples_per_second": 74.085, + "eval_scitail-pairs-pos_steps_per_second": 2.963, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_xsum-pairs_loss": 0.25581496953964233, + "eval_xsum-pairs_runtime": 0.9407, + "eval_xsum-pairs_samples_per_second": 106.304, + "eval_xsum-pairs_steps_per_second": 4.252, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_compression-pairs_loss": 0.09814296662807465, + "eval_compression-pairs_runtime": 0.2758, + "eval_compression-pairs_samples_per_second": 362.517, + "eval_compression-pairs_steps_per_second": 14.501, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_sciq_pairs_loss": 0.25620242953300476, + "eval_sciq_pairs_runtime": 4.1155, + "eval_sciq_pairs_samples_per_second": 24.298, + "eval_sciq_pairs_steps_per_second": 0.972, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_qasc_pairs_loss": 0.2044612169265747, + "eval_qasc_pairs_runtime": 1.1029, + "eval_qasc_pairs_samples_per_second": 90.672, + "eval_qasc_pairs_steps_per_second": 3.627, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_openbookqa_pairs_loss": 1.7537646293640137, + "eval_openbookqa_pairs_runtime": 0.9037, + "eval_openbookqa_pairs_samples_per_second": 110.653, + "eval_openbookqa_pairs_steps_per_second": 4.426, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_msmarco_pairs_loss": 0.5138561725616455, + "eval_msmarco_pairs_runtime": 2.0511, + "eval_msmarco_pairs_samples_per_second": 48.754, + "eval_msmarco_pairs_steps_per_second": 1.95, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_nq_pairs_loss": 0.23510317504405975, + "eval_nq_pairs_runtime": 4.5293, + "eval_nq_pairs_samples_per_second": 22.078, + "eval_nq_pairs_steps_per_second": 0.883, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_trivia_pairs_loss": 0.7808571457862854, + "eval_trivia_pairs_runtime": 6.5065, + "eval_trivia_pairs_samples_per_second": 15.369, + "eval_trivia_pairs_steps_per_second": 0.615, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_quora_pairs_loss": 0.0392119362950325, + "eval_quora_pairs_runtime": 0.675, + "eval_quora_pairs_samples_per_second": 148.153, + "eval_quora_pairs_steps_per_second": 5.926, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_gooaq_pairs_loss": 0.4712902009487152, + "eval_gooaq_pairs_runtime": 1.4079, + "eval_gooaq_pairs_samples_per_second": 71.028, + "eval_gooaq_pairs_steps_per_second": 2.841, + "step": 320 + }, + { + "epoch": 0.050148879485973984, + "eval_mrpc_pairs_loss": 0.05498996376991272, + "eval_mrpc_pairs_runtime": 0.2623, + "eval_mrpc_pairs_samples_per_second": 381.172, + "eval_mrpc_pairs_steps_per_second": 15.247, + "step": 320 + }, + { + "epoch": 0.05516376743457138, + "grad_norm": 0.34924012422561646, + "learning_rate": 1.092901878914405e-05, + "loss": 0.5791, + "step": 352 + }, + { + "epoch": 0.06017865538316878, + "grad_norm": 0.36700841784477234, + "learning_rate": 1.1931106471816284e-05, + "loss": 0.6413, + "step": 384 + }, + { + "epoch": 0.06519354333176618, + "grad_norm": 7.559622764587402, + "learning_rate": 1.2933194154488518e-05, + "loss": 0.4319, + "step": 416 + }, + { + "epoch": 0.07020843128036358, + "grad_norm": 7.982416152954102, + "learning_rate": 1.3935281837160753e-05, + "loss": 0.6672, + "step": 448 + }, + { + "epoch": 0.07522331922896097, + "grad_norm": 0.6726166009902954, + "learning_rate": 1.4937369519832987e-05, + "loss": 0.459, + "step": 480 + }, + { + "epoch": 0.08023820717755838, + "grad_norm": 14.846123695373535, + "learning_rate": 1.593945720250522e-05, + "loss": 0.7621, + "step": 512 + }, + { + "epoch": 0.08525309512615578, + "grad_norm": 0.7846627831459045, + "learning_rate": 1.6941544885177454e-05, + "loss": 0.864, + "step": 544 + }, + { + "epoch": 0.09026798307475317, + "grad_norm": 0.8993583917617798, + "learning_rate": 1.7943632567849688e-05, + "loss": 0.5081, + "step": 576 + }, + { + "epoch": 0.09528287102335058, + "grad_norm": 1.4990565776824951, + "learning_rate": 1.894572025052192e-05, + "loss": 0.654, + "step": 608 + }, + { + "epoch": 0.10029775897194797, + "grad_norm": 15.647976875305176, + "learning_rate": 1.9947807933194157e-05, + "loss": 0.6372, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_nli-pairs_loss": 1.0652996301651, + "eval_nli-pairs_runtime": 3.6326, + "eval_nli-pairs_samples_per_second": 27.528, + "eval_nli-pairs_steps_per_second": 1.101, + "eval_sts-test_pearson_cosine": 0.785263018402905, + "eval_sts-test_pearson_dot": 0.5290450141477089, + "eval_sts-test_pearson_euclidean": 0.7433756286425983, + "eval_sts-test_pearson_manhattan": 0.7411097274300102, + "eval_sts-test_pearson_max": 0.785263018402905, + "eval_sts-test_spearman_cosine": 0.7996928912411947, + "eval_sts-test_spearman_dot": 0.5102571497667188, + "eval_sts-test_spearman_euclidean": 0.7338969723324641, + "eval_sts-test_spearman_manhattan": 0.7343494860194358, + "eval_sts-test_spearman_max": 0.7996928912411947, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_vitaminc-pairs_loss": 4.719416618347168, + "eval_vitaminc-pairs_runtime": 1.1268, + "eval_vitaminc-pairs_samples_per_second": 75.437, + "eval_vitaminc-pairs_steps_per_second": 2.662, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_sts-label_loss": 3.612347364425659, + "eval_sts-label_runtime": 0.2683, + "eval_sts-label_samples_per_second": 372.651, + "eval_sts-label_steps_per_second": 14.906, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_qnli-contrastive_loss": 0.15202775597572327, + "eval_qnli-contrastive_runtime": 0.3528, + "eval_qnli-contrastive_samples_per_second": 283.457, + "eval_qnli-contrastive_steps_per_second": 11.338, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_scitail-pairs-qa_loss": 0.07544919103384018, + "eval_scitail-pairs-qa_runtime": 0.8732, + "eval_scitail-pairs-qa_samples_per_second": 114.517, + "eval_scitail-pairs-qa_steps_per_second": 4.581, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_scitail-pairs-pos_loss": 0.5404170751571655, + "eval_scitail-pairs-pos_runtime": 1.3146, + "eval_scitail-pairs-pos_samples_per_second": 76.067, + "eval_scitail-pairs-pos_steps_per_second": 3.043, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_xsum-pairs_loss": 0.25958582758903503, + "eval_xsum-pairs_runtime": 0.9287, + "eval_xsum-pairs_samples_per_second": 107.679, + "eval_xsum-pairs_steps_per_second": 4.307, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_compression-pairs_loss": 0.10066353529691696, + "eval_compression-pairs_runtime": 0.2732, + "eval_compression-pairs_samples_per_second": 366.076, + "eval_compression-pairs_steps_per_second": 14.643, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_sciq_pairs_loss": 0.2645374834537506, + "eval_sciq_pairs_runtime": 4.0725, + "eval_sciq_pairs_samples_per_second": 24.555, + "eval_sciq_pairs_steps_per_second": 0.982, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_qasc_pairs_loss": 0.21021947264671326, + "eval_qasc_pairs_runtime": 1.0743, + "eval_qasc_pairs_samples_per_second": 93.084, + "eval_qasc_pairs_steps_per_second": 3.723, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_openbookqa_pairs_loss": 1.7905032634735107, + "eval_openbookqa_pairs_runtime": 0.8886, + "eval_openbookqa_pairs_samples_per_second": 112.532, + "eval_openbookqa_pairs_steps_per_second": 4.501, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_msmarco_pairs_loss": 0.5102832913398743, + "eval_msmarco_pairs_runtime": 2.0529, + "eval_msmarco_pairs_samples_per_second": 48.712, + "eval_msmarco_pairs_steps_per_second": 1.948, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_nq_pairs_loss": 0.24466972053050995, + "eval_nq_pairs_runtime": 4.4973, + "eval_nq_pairs_samples_per_second": 22.235, + "eval_nq_pairs_steps_per_second": 0.889, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_trivia_pairs_loss": 0.8748095631599426, + "eval_trivia_pairs_runtime": 6.4825, + "eval_trivia_pairs_samples_per_second": 15.426, + "eval_trivia_pairs_steps_per_second": 0.617, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_quora_pairs_loss": 0.07820220291614532, + "eval_quora_pairs_runtime": 0.6944, + "eval_quora_pairs_samples_per_second": 144.008, + "eval_quora_pairs_steps_per_second": 5.76, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_gooaq_pairs_loss": 0.5236212611198425, + "eval_gooaq_pairs_runtime": 1.3899, + "eval_gooaq_pairs_samples_per_second": 71.949, + "eval_gooaq_pairs_steps_per_second": 2.878, + "step": 640 + }, + { + "epoch": 0.10029775897194797, + "eval_mrpc_pairs_loss": 0.05494727939367294, + "eval_mrpc_pairs_runtime": 0.2598, + "eval_mrpc_pairs_samples_per_second": 384.941, + "eval_mrpc_pairs_steps_per_second": 15.398, + "step": 640 + }, + { + "epoch": 0.10531264692054537, + "grad_norm": 11.01974105834961, + "learning_rate": 2.0949895615866387e-05, + "loss": 0.9292, + "step": 672 + }, + { + "epoch": 0.11032753486914276, + "grad_norm": 0.5542309284210205, + "learning_rate": 2.1951983298538625e-05, + "loss": 1.3108, + "step": 704 + }, + { + "epoch": 0.11534242281774017, + "grad_norm": 15.458569526672363, + "learning_rate": 2.2954070981210856e-05, + "loss": 0.9674, + "step": 736 + }, + { + "epoch": 0.12035731076633756, + "grad_norm": 2.7814478874206543, + "learning_rate": 2.395615866388309e-05, + "loss": 0.9226, + "step": 768 + }, + { + "epoch": 0.12537219871493496, + "grad_norm": 11.393244743347168, + "learning_rate": 2.4958246346555324e-05, + "loss": 0.789, + "step": 800 + }, + { + "epoch": 0.13038708666353235, + "grad_norm": 9.288290977478027, + "learning_rate": 2.596033402922756e-05, + "loss": 0.5186, + "step": 832 + }, + { + "epoch": 0.13540197461212977, + "grad_norm": 47.65571212768555, + "learning_rate": 2.6962421711899793e-05, + "loss": 0.6726, + "step": 864 + }, + { + "epoch": 0.14041686256072716, + "grad_norm": 12.908064842224121, + "learning_rate": 2.7964509394572024e-05, + "loss": 0.5381, + "step": 896 + }, + { + "epoch": 0.14543175050932455, + "grad_norm": 14.951742172241211, + "learning_rate": 2.896659707724426e-05, + "loss": 0.581, + "step": 928 + }, + { + "epoch": 0.15044663845792194, + "grad_norm": 20.12006187438965, + "learning_rate": 2.9968684759916492e-05, + "loss": 0.9038, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_nli-pairs_loss": 1.2173175811767578, + "eval_nli-pairs_runtime": 3.7098, + "eval_nli-pairs_samples_per_second": 26.955, + "eval_nli-pairs_steps_per_second": 1.078, + "eval_sts-test_pearson_cosine": 0.7840992835675669, + "eval_sts-test_pearson_dot": 0.5220462136106129, + "eval_sts-test_pearson_euclidean": 0.7457350047351855, + "eval_sts-test_pearson_manhattan": 0.7425970830541657, + "eval_sts-test_pearson_max": 0.7840992835675669, + "eval_sts-test_spearman_cosine": 0.8006376809572144, + "eval_sts-test_spearman_dot": 0.5020544543992158, + "eval_sts-test_spearman_euclidean": 0.7369257710408655, + "eval_sts-test_spearman_manhattan": 0.7362649758012406, + "eval_sts-test_spearman_max": 0.8006376809572144, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_vitaminc-pairs_loss": 4.774902820587158, + "eval_vitaminc-pairs_runtime": 1.1212, + "eval_vitaminc-pairs_samples_per_second": 75.809, + "eval_vitaminc-pairs_steps_per_second": 2.676, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_sts-label_loss": 3.198556900024414, + "eval_sts-label_runtime": 0.2678, + "eval_sts-label_samples_per_second": 373.382, + "eval_sts-label_steps_per_second": 14.935, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_qnli-contrastive_loss": 0.1943340301513672, + "eval_qnli-contrastive_runtime": 0.3511, + "eval_qnli-contrastive_samples_per_second": 284.789, + "eval_qnli-contrastive_steps_per_second": 11.392, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_scitail-pairs-qa_loss": 0.08060617744922638, + "eval_scitail-pairs-qa_runtime": 0.8778, + "eval_scitail-pairs-qa_samples_per_second": 113.92, + "eval_scitail-pairs-qa_steps_per_second": 4.557, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_scitail-pairs-pos_loss": 0.4759831428527832, + "eval_scitail-pairs-pos_runtime": 1.3609, + "eval_scitail-pairs-pos_samples_per_second": 73.48, + "eval_scitail-pairs-pos_steps_per_second": 2.939, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_xsum-pairs_loss": 0.27583304047584534, + "eval_xsum-pairs_runtime": 0.9343, + "eval_xsum-pairs_samples_per_second": 107.035, + "eval_xsum-pairs_steps_per_second": 4.281, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_compression-pairs_loss": 0.10094660520553589, + "eval_compression-pairs_runtime": 0.2739, + "eval_compression-pairs_samples_per_second": 365.047, + "eval_compression-pairs_steps_per_second": 14.602, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_sciq_pairs_loss": 0.2688131630420685, + "eval_sciq_pairs_runtime": 4.0582, + "eval_sciq_pairs_samples_per_second": 24.641, + "eval_sciq_pairs_steps_per_second": 0.986, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_qasc_pairs_loss": 0.23267821967601776, + "eval_qasc_pairs_runtime": 1.0554, + "eval_qasc_pairs_samples_per_second": 94.75, + "eval_qasc_pairs_steps_per_second": 3.79, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_openbookqa_pairs_loss": 1.8053069114685059, + "eval_openbookqa_pairs_runtime": 0.8871, + "eval_openbookqa_pairs_samples_per_second": 112.727, + "eval_openbookqa_pairs_steps_per_second": 4.509, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_msmarco_pairs_loss": 0.5809260606765747, + "eval_msmarco_pairs_runtime": 2.0498, + "eval_msmarco_pairs_samples_per_second": 48.786, + "eval_msmarco_pairs_steps_per_second": 1.951, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_nq_pairs_loss": 0.2808491885662079, + "eval_nq_pairs_runtime": 4.4982, + "eval_nq_pairs_samples_per_second": 22.231, + "eval_nq_pairs_steps_per_second": 0.889, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_trivia_pairs_loss": 0.9379808902740479, + "eval_trivia_pairs_runtime": 6.4578, + "eval_trivia_pairs_samples_per_second": 15.485, + "eval_trivia_pairs_steps_per_second": 0.619, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_quora_pairs_loss": 0.0913279801607132, + "eval_quora_pairs_runtime": 0.6721, + "eval_quora_pairs_samples_per_second": 148.79, + "eval_quora_pairs_steps_per_second": 5.952, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_gooaq_pairs_loss": 0.5807955265045166, + "eval_gooaq_pairs_runtime": 1.3915, + "eval_gooaq_pairs_samples_per_second": 71.865, + "eval_gooaq_pairs_steps_per_second": 2.875, + "step": 960 + }, + { + "epoch": 0.15044663845792194, + "eval_mrpc_pairs_loss": 0.05799216777086258, + "eval_mrpc_pairs_runtime": 0.2571, + "eval_mrpc_pairs_samples_per_second": 388.998, + "eval_mrpc_pairs_steps_per_second": 15.56, + "step": 960 + }, + { + "epoch": 0.15546152640651936, + "grad_norm": 9.773286819458008, + "learning_rate": 2.9997957904107625e-05, + "loss": 0.7964, + "step": 992 + }, + { + "epoch": 0.16047641435511675, + "grad_norm": 19.411075592041016, + "learning_rate": 2.9991566594209126e-05, + "loss": 0.8213, + "step": 1024 + }, + { + "epoch": 0.16549130230371414, + "grad_norm": 3.5282175540924072, + "learning_rate": 2.9980825799589488e-05, + "loss": 0.5396, + "step": 1056 + }, + { + "epoch": 0.17050619025231156, + "grad_norm": 62.66339874267578, + "learning_rate": 2.996573863646219e-05, + "loss": 0.9297, + "step": 1088 + }, + { + "epoch": 0.17552107820090895, + "grad_norm": 8.785274505615234, + "learning_rate": 2.994630948204727e-05, + "loss": 1.169, + "step": 1120 + }, + { + "epoch": 0.18053596614950634, + "grad_norm": 24.10859489440918, + "learning_rate": 2.992254397330132e-05, + "loss": 0.7486, + "step": 1152 + }, + { + "epoch": 0.18555085409810373, + "grad_norm": 25.545284271240234, + "learning_rate": 2.9894449005282077e-05, + "loss": 0.6821, + "step": 1184 + }, + { + "epoch": 0.19056574204670115, + "grad_norm": 0.8675521016120911, + "learning_rate": 2.9862032729147954e-05, + "loss": 0.6125, + "step": 1216 + }, + { + "epoch": 0.19558062999529854, + "grad_norm": 16.122114181518555, + "learning_rate": 2.9825304549793153e-05, + "loss": 0.8061, + "step": 1248 + }, + { + "epoch": 0.20059551794389593, + "grad_norm": 1.0314382314682007, + "learning_rate": 2.978427512311904e-05, + "loss": 0.6918, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_nli-pairs_loss": 1.1552109718322754, + "eval_nli-pairs_runtime": 3.8751, + "eval_nli-pairs_samples_per_second": 25.806, + "eval_nli-pairs_steps_per_second": 1.032, + "eval_sts-test_pearson_cosine": 0.786106976104726, + "eval_sts-test_pearson_dot": 0.5116758767219935, + "eval_sts-test_pearson_euclidean": 0.7432891018313416, + "eval_sts-test_pearson_manhattan": 0.7400929158927781, + "eval_sts-test_pearson_max": 0.786106976104726, + "eval_sts-test_spearman_cosine": 0.801377272203007, + "eval_sts-test_spearman_dot": 0.4921454166952506, + "eval_sts-test_spearman_euclidean": 0.7343686249967402, + "eval_sts-test_spearman_manhattan": 0.7331946050808561, + "eval_sts-test_spearman_max": 0.801377272203007, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_vitaminc-pairs_loss": 4.6789751052856445, + "eval_vitaminc-pairs_runtime": 1.1504, + "eval_vitaminc-pairs_samples_per_second": 73.889, + "eval_vitaminc-pairs_steps_per_second": 2.608, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_sts-label_loss": 3.5580556392669678, + "eval_sts-label_runtime": 0.2834, + "eval_sts-label_samples_per_second": 352.858, + "eval_sts-label_steps_per_second": 14.114, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_qnli-contrastive_loss": 0.20369713008403778, + "eval_qnli-contrastive_runtime": 0.358, + "eval_qnli-contrastive_samples_per_second": 279.331, + "eval_qnli-contrastive_steps_per_second": 11.173, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_scitail-pairs-qa_loss": 0.07465875148773193, + "eval_scitail-pairs-qa_runtime": 0.9504, + "eval_scitail-pairs-qa_samples_per_second": 105.214, + "eval_scitail-pairs-qa_steps_per_second": 4.209, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_scitail-pairs-pos_loss": 0.49434563517570496, + "eval_scitail-pairs-pos_runtime": 1.6041, + "eval_scitail-pairs-pos_samples_per_second": 62.339, + "eval_scitail-pairs-pos_steps_per_second": 2.494, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_xsum-pairs_loss": 0.28282061219215393, + "eval_xsum-pairs_runtime": 0.9316, + "eval_xsum-pairs_samples_per_second": 107.346, + "eval_xsum-pairs_steps_per_second": 4.294, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_compression-pairs_loss": 0.097385473549366, + "eval_compression-pairs_runtime": 0.2754, + "eval_compression-pairs_samples_per_second": 363.1, + "eval_compression-pairs_steps_per_second": 14.524, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_sciq_pairs_loss": 0.2762215733528137, + "eval_sciq_pairs_runtime": 4.2307, + "eval_sciq_pairs_samples_per_second": 23.637, + "eval_sciq_pairs_steps_per_second": 0.945, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_qasc_pairs_loss": 0.19347424805164337, + "eval_qasc_pairs_runtime": 1.2282, + "eval_qasc_pairs_samples_per_second": 81.421, + "eval_qasc_pairs_steps_per_second": 3.257, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_openbookqa_pairs_loss": 1.6875064373016357, + "eval_openbookqa_pairs_runtime": 1.1661, + "eval_openbookqa_pairs_samples_per_second": 85.754, + "eval_openbookqa_pairs_steps_per_second": 3.43, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_msmarco_pairs_loss": 0.5743877291679382, + "eval_msmarco_pairs_runtime": 2.1428, + "eval_msmarco_pairs_samples_per_second": 46.669, + "eval_msmarco_pairs_steps_per_second": 1.867, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_nq_pairs_loss": 0.30348217487335205, + "eval_nq_pairs_runtime": 4.5543, + "eval_nq_pairs_samples_per_second": 21.957, + "eval_nq_pairs_steps_per_second": 0.878, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_trivia_pairs_loss": 0.9221765995025635, + "eval_trivia_pairs_runtime": 6.6513, + "eval_trivia_pairs_samples_per_second": 15.035, + "eval_trivia_pairs_steps_per_second": 0.601, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_quora_pairs_loss": 0.03854631260037422, + "eval_quora_pairs_runtime": 0.7822, + "eval_quora_pairs_samples_per_second": 127.852, + "eval_quora_pairs_steps_per_second": 5.114, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_gooaq_pairs_loss": 0.528398334980011, + "eval_gooaq_pairs_runtime": 1.4882, + "eval_gooaq_pairs_samples_per_second": 67.194, + "eval_gooaq_pairs_steps_per_second": 2.688, + "step": 1280 + }, + { + "epoch": 0.20059551794389593, + "eval_mrpc_pairs_loss": 0.05623970925807953, + "eval_mrpc_pairs_runtime": 0.2698, + "eval_mrpc_pairs_samples_per_second": 370.713, + "eval_mrpc_pairs_steps_per_second": 14.829, + "step": 1280 + }, + { + "epoch": 0.20561040589249335, + "grad_norm": 0.6042119860649109, + "learning_rate": 2.9738956352942557e-05, + "loss": 0.9421, + "step": 1312 + }, + { + "epoch": 0.21062529384109074, + "grad_norm": 13.87867546081543, + "learning_rate": 2.968936138754259e-05, + "loss": 0.8641, + "step": 1344 + }, + { + "epoch": 0.21564018178968813, + "grad_norm": 44.48640441894531, + "learning_rate": 2.9635504615845257e-05, + "loss": 1.157, + "step": 1376 + }, + { + "epoch": 0.22065506973828553, + "grad_norm": 15.554729461669922, + "learning_rate": 2.957928148945977e-05, + "loss": 0.8772, + "step": 1408 + }, + { + "epoch": 0.22566995768688294, + "grad_norm": 16.644670486450195, + "learning_rate": 2.9517081112297707e-05, + "loss": 1.0496, + "step": 1440 + }, + { + "epoch": 0.23068484563548033, + "grad_norm": 13.053145408630371, + "learning_rate": 2.9450668912302004e-05, + "loss": 0.589, + "step": 1472 + }, + { + "epoch": 0.23569973358407773, + "grad_norm": 7.827791213989258, + "learning_rate": 2.9380064157562306e-05, + "loss": 0.8234, + "step": 1504 + }, + { + "epoch": 0.24071462153267512, + "grad_norm": 15.598438262939453, + "learning_rate": 2.930528733254901e-05, + "loss": 0.7365, + "step": 1536 + }, + { + "epoch": 0.24572950948127253, + "grad_norm": 13.723180770874023, + "learning_rate": 2.9226360132170112e-05, + "loss": 0.5076, + "step": 1568 + }, + { + "epoch": 0.2507443974298699, + "grad_norm": 10.20022964477539, + "learning_rate": 2.9143305455476866e-05, + "loss": 1.0329, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_nli-pairs_loss": 1.0577216148376465, + "eval_nli-pairs_runtime": 3.6476, + "eval_nli-pairs_samples_per_second": 27.415, + "eval_nli-pairs_steps_per_second": 1.097, + "eval_sts-test_pearson_cosine": 0.7876359552191669, + "eval_sts-test_pearson_dot": 0.5220803655074544, + "eval_sts-test_pearson_euclidean": 0.7444632413869628, + "eval_sts-test_pearson_manhattan": 0.7418744760088763, + "eval_sts-test_pearson_max": 0.7876359552191669, + "eval_sts-test_spearman_cosine": 0.8018874000525117, + "eval_sts-test_spearman_dot": 0.5034518981121652, + "eval_sts-test_spearman_euclidean": 0.7344750702387959, + "eval_sts-test_spearman_manhattan": 0.7332804063416474, + "eval_sts-test_spearman_max": 0.8018874000525117, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_vitaminc-pairs_loss": 4.784573554992676, + "eval_vitaminc-pairs_runtime": 1.145, + "eval_vitaminc-pairs_samples_per_second": 74.235, + "eval_vitaminc-pairs_steps_per_second": 2.62, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_sts-label_loss": 3.6113080978393555, + "eval_sts-label_runtime": 0.2746, + "eval_sts-label_samples_per_second": 364.172, + "eval_sts-label_steps_per_second": 14.567, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_qnli-contrastive_loss": 0.18593625724315643, + "eval_qnli-contrastive_runtime": 0.3541, + "eval_qnli-contrastive_samples_per_second": 282.413, + "eval_qnli-contrastive_steps_per_second": 11.297, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_scitail-pairs-qa_loss": 0.07545661181211472, + "eval_scitail-pairs-qa_runtime": 0.8854, + "eval_scitail-pairs-qa_samples_per_second": 112.941, + "eval_scitail-pairs-qa_steps_per_second": 4.518, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_scitail-pairs-pos_loss": 0.5018333792686462, + "eval_scitail-pairs-pos_runtime": 1.3443, + "eval_scitail-pairs-pos_samples_per_second": 74.386, + "eval_scitail-pairs-pos_steps_per_second": 2.975, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_xsum-pairs_loss": 0.2749001085758209, + "eval_xsum-pairs_runtime": 0.9439, + "eval_xsum-pairs_samples_per_second": 105.939, + "eval_xsum-pairs_steps_per_second": 4.238, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_compression-pairs_loss": 0.09735233336687088, + "eval_compression-pairs_runtime": 0.2764, + "eval_compression-pairs_samples_per_second": 361.753, + "eval_compression-pairs_steps_per_second": 14.47, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_sciq_pairs_loss": 0.2648228108882904, + "eval_sciq_pairs_runtime": 4.1207, + "eval_sciq_pairs_samples_per_second": 24.268, + "eval_sciq_pairs_steps_per_second": 0.971, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_qasc_pairs_loss": 0.21318012475967407, + "eval_qasc_pairs_runtime": 1.0917, + "eval_qasc_pairs_samples_per_second": 91.604, + "eval_qasc_pairs_steps_per_second": 3.664, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_openbookqa_pairs_loss": 1.790009617805481, + "eval_openbookqa_pairs_runtime": 0.8969, + "eval_openbookqa_pairs_samples_per_second": 111.496, + "eval_openbookqa_pairs_steps_per_second": 4.46, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_msmarco_pairs_loss": 0.57186359167099, + "eval_msmarco_pairs_runtime": 2.0592, + "eval_msmarco_pairs_samples_per_second": 48.563, + "eval_msmarco_pairs_steps_per_second": 1.943, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_nq_pairs_loss": 0.2738310396671295, + "eval_nq_pairs_runtime": 4.5092, + "eval_nq_pairs_samples_per_second": 22.177, + "eval_nq_pairs_steps_per_second": 0.887, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_trivia_pairs_loss": 0.8291679620742798, + "eval_trivia_pairs_runtime": 6.526, + "eval_trivia_pairs_samples_per_second": 15.323, + "eval_trivia_pairs_steps_per_second": 0.613, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_quora_pairs_loss": 0.08000540733337402, + "eval_quora_pairs_runtime": 0.6761, + "eval_quora_pairs_samples_per_second": 147.909, + "eval_quora_pairs_steps_per_second": 5.916, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_gooaq_pairs_loss": 0.5998037457466125, + "eval_gooaq_pairs_runtime": 1.3978, + "eval_gooaq_pairs_samples_per_second": 71.541, + "eval_gooaq_pairs_steps_per_second": 2.862, + "step": 1600 + }, + { + "epoch": 0.2507443974298699, + "eval_mrpc_pairs_loss": 0.05507182702422142, + "eval_mrpc_pairs_runtime": 0.2617, + "eval_mrpc_pairs_samples_per_second": 382.156, + "eval_mrpc_pairs_steps_per_second": 15.286, + "step": 1600 + }, + { + "epoch": 0.2557592853784673, + "grad_norm": 8.05022144317627, + "learning_rate": 2.9056147399020182e-05, + "loss": 1.4006, + "step": 1632 + }, + { + "epoch": 0.2607741733270647, + "grad_norm": 0.38224154710769653, + "learning_rate": 2.8964911249859437e-05, + "loss": 0.5963, + "step": 1664 + }, + { + "epoch": 0.2657890612756621, + "grad_norm": 0.46655791997909546, + "learning_rate": 2.886962347822604e-05, + "loss": 0.7488, + "step": 1696 + }, + { + "epoch": 0.27080394922425954, + "grad_norm": 8.102537155151367, + "learning_rate": 2.8770311729843616e-05, + "loss": 0.8548, + "step": 1728 + }, + { + "epoch": 0.27581883717285693, + "grad_norm": 11.803775787353516, + "learning_rate": 2.86670048179072e-05, + "loss": 1.3324, + "step": 1760 + }, + { + "epoch": 0.2808337251214543, + "grad_norm": 16.266756057739258, + "learning_rate": 2.8559732714723715e-05, + "loss": 0.5804, + "step": 1792 + }, + { + "epoch": 0.2858486130700517, + "grad_norm": 2.8448822498321533, + "learning_rate": 2.8448526543016114e-05, + "loss": 0.7827, + "step": 1824 + }, + { + "epoch": 0.2908635010186491, + "grad_norm": 21.346328735351562, + "learning_rate": 2.8333418566893796e-05, + "loss": 0.5448, + "step": 1856 + }, + { + "epoch": 0.2958783889672465, + "grad_norm": 3.4379029273986816, + "learning_rate": 2.8214442182491866e-05, + "loss": 0.7368, + "step": 1888 + }, + { + "epoch": 0.3008932769158439, + "grad_norm": 17.05881690979004, + "learning_rate": 2.8091631908281963e-05, + "loss": 0.5657, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_nli-pairs_loss": 1.0244356393814087, + "eval_nli-pairs_runtime": 3.6217, + "eval_nli-pairs_samples_per_second": 27.612, + "eval_nli-pairs_steps_per_second": 1.104, + "eval_sts-test_pearson_cosine": 0.781915957368962, + "eval_sts-test_pearson_dot": 0.49821032356844613, + "eval_sts-test_pearson_euclidean": 0.7329308897504494, + "eval_sts-test_pearson_manhattan": 0.7292186092506918, + "eval_sts-test_pearson_max": 0.781915957368962, + "eval_sts-test_spearman_cosine": 0.7983596570250642, + "eval_sts-test_spearman_dot": 0.4812350313638781, + "eval_sts-test_spearman_euclidean": 0.7265758267352669, + "eval_sts-test_spearman_manhattan": 0.7259264140902829, + "eval_sts-test_spearman_max": 0.7983596570250642, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_vitaminc-pairs_loss": 4.698296070098877, + "eval_vitaminc-pairs_runtime": 1.1338, + "eval_vitaminc-pairs_samples_per_second": 74.97, + "eval_vitaminc-pairs_steps_per_second": 2.646, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_sts-label_loss": 3.1822261810302734, + "eval_sts-label_runtime": 0.2702, + "eval_sts-label_samples_per_second": 370.09, + "eval_sts-label_steps_per_second": 14.804, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_qnli-contrastive_loss": 0.11326340585947037, + "eval_qnli-contrastive_runtime": 0.3581, + "eval_qnli-contrastive_samples_per_second": 279.28, + "eval_qnli-contrastive_steps_per_second": 11.171, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_scitail-pairs-qa_loss": 0.07009608298540115, + "eval_scitail-pairs-qa_runtime": 0.8816, + "eval_scitail-pairs-qa_samples_per_second": 113.424, + "eval_scitail-pairs-qa_steps_per_second": 4.537, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_scitail-pairs-pos_loss": 0.49156129360198975, + "eval_scitail-pairs-pos_runtime": 1.3759, + "eval_scitail-pairs-pos_samples_per_second": 72.678, + "eval_scitail-pairs-pos_steps_per_second": 2.907, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_xsum-pairs_loss": 0.25940877199172974, + "eval_xsum-pairs_runtime": 0.9373, + "eval_xsum-pairs_samples_per_second": 106.695, + "eval_xsum-pairs_steps_per_second": 4.268, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_compression-pairs_loss": 0.0919649675488472, + "eval_compression-pairs_runtime": 0.2738, + "eval_compression-pairs_samples_per_second": 365.291, + "eval_compression-pairs_steps_per_second": 14.612, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_sciq_pairs_loss": 0.29138606786727905, + "eval_sciq_pairs_runtime": 4.1059, + "eval_sciq_pairs_samples_per_second": 24.355, + "eval_sciq_pairs_steps_per_second": 0.974, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_qasc_pairs_loss": 0.19625085592269897, + "eval_qasc_pairs_runtime": 1.0611, + "eval_qasc_pairs_samples_per_second": 94.24, + "eval_qasc_pairs_steps_per_second": 3.77, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_openbookqa_pairs_loss": 1.7960456609725952, + "eval_openbookqa_pairs_runtime": 0.9042, + "eval_openbookqa_pairs_samples_per_second": 110.601, + "eval_openbookqa_pairs_steps_per_second": 4.424, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_msmarco_pairs_loss": 0.5171416997909546, + "eval_msmarco_pairs_runtime": 2.0637, + "eval_msmarco_pairs_samples_per_second": 48.457, + "eval_msmarco_pairs_steps_per_second": 1.938, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_nq_pairs_loss": 0.24809740483760834, + "eval_nq_pairs_runtime": 4.529, + "eval_nq_pairs_samples_per_second": 22.08, + "eval_nq_pairs_steps_per_second": 0.883, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_trivia_pairs_loss": 0.9041999578475952, + "eval_trivia_pairs_runtime": 6.5257, + "eval_trivia_pairs_samples_per_second": 15.324, + "eval_trivia_pairs_steps_per_second": 0.613, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_quora_pairs_loss": 0.03601976856589317, + "eval_quora_pairs_runtime": 0.6811, + "eval_quora_pairs_samples_per_second": 146.827, + "eval_quora_pairs_steps_per_second": 5.873, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_gooaq_pairs_loss": 0.5626399517059326, + "eval_gooaq_pairs_runtime": 1.3943, + "eval_gooaq_pairs_samples_per_second": 71.72, + "eval_gooaq_pairs_steps_per_second": 2.869, + "step": 1920 + }, + { + "epoch": 0.3008932769158439, + "eval_mrpc_pairs_loss": 0.04984402656555176, + "eval_mrpc_pairs_runtime": 0.2579, + "eval_mrpc_pairs_samples_per_second": 387.725, + "eval_mrpc_pairs_steps_per_second": 15.509, + "step": 1920 + }, + { + "epoch": 0.30590816486444133, + "grad_norm": 22.65591812133789, + "learning_rate": 2.796502337505742e-05, + "loss": 0.7425, + "step": 1952 + }, + { + "epoch": 0.3109230528130387, + "grad_norm": 10.119640350341797, + "learning_rate": 2.78346533155958e-05, + "loss": 0.7819, + "step": 1984 + }, + { + "epoch": 0.3159379407616361, + "grad_norm": 8.690531730651855, + "learning_rate": 2.770055955400161e-05, + "loss": 0.5937, + "step": 2016 + }, + { + "epoch": 0.3209528287102335, + "grad_norm": 0.8992699384689331, + "learning_rate": 2.7562780994732476e-05, + "loss": 0.8133, + "step": 2048 + }, + { + "epoch": 0.3259677166588309, + "grad_norm": 10.619684219360352, + "learning_rate": 2.7421357611311824e-05, + "loss": 1.0674, + "step": 2080 + }, + { + "epoch": 0.3309826046074283, + "grad_norm": 7.222084045410156, + "learning_rate": 2.727633043473141e-05, + "loss": 0.6288, + "step": 2112 + }, + { + "epoch": 0.3359974925560257, + "grad_norm": 10.166888236999512, + "learning_rate": 2.712774154154707e-05, + "loss": 0.5866, + "step": 2144 + }, + { + "epoch": 0.3410123805046231, + "grad_norm": 0.36360761523246765, + "learning_rate": 2.6975634041671052e-05, + "loss": 0.6962, + "step": 2176 + }, + { + "epoch": 0.3460272684532205, + "grad_norm": 9.586665153503418, + "learning_rate": 2.6820052065864665e-05, + "loss": 0.5562, + "step": 2208 + }, + { + "epoch": 0.3510421564018179, + "grad_norm": 1.1307642459869385, + "learning_rate": 2.6661040752934594e-05, + "loss": 0.8871, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_nli-pairs_loss": 1.0147591829299927, + "eval_nli-pairs_runtime": 3.7201, + "eval_nli-pairs_samples_per_second": 26.881, + "eval_nli-pairs_steps_per_second": 1.075, + "eval_sts-test_pearson_cosine": 0.7872126529181761, + "eval_sts-test_pearson_dot": 0.5062045289861089, + "eval_sts-test_pearson_euclidean": 0.7351473988633473, + "eval_sts-test_pearson_manhattan": 0.7310226402088944, + "eval_sts-test_pearson_max": 0.7872126529181761, + "eval_sts-test_spearman_cosine": 0.801487068999052, + "eval_sts-test_spearman_dot": 0.4912205722904683, + "eval_sts-test_spearman_euclidean": 0.7267262355024484, + "eval_sts-test_spearman_manhattan": 0.72510169253649, + "eval_sts-test_spearman_max": 0.801487068999052, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_vitaminc-pairs_loss": 4.644638538360596, + "eval_vitaminc-pairs_runtime": 1.1453, + "eval_vitaminc-pairs_samples_per_second": 74.215, + "eval_vitaminc-pairs_steps_per_second": 2.619, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_sts-label_loss": 3.915343999862671, + "eval_sts-label_runtime": 0.2807, + "eval_sts-label_samples_per_second": 356.217, + "eval_sts-label_steps_per_second": 14.249, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_qnli-contrastive_loss": 0.11220741271972656, + "eval_qnli-contrastive_runtime": 0.3614, + "eval_qnli-contrastive_samples_per_second": 276.705, + "eval_qnli-contrastive_steps_per_second": 11.068, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_scitail-pairs-qa_loss": 0.06635177880525589, + "eval_scitail-pairs-qa_runtime": 0.8881, + "eval_scitail-pairs-qa_samples_per_second": 112.594, + "eval_scitail-pairs-qa_steps_per_second": 4.504, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_scitail-pairs-pos_loss": 0.5765587687492371, + "eval_scitail-pairs-pos_runtime": 1.3496, + "eval_scitail-pairs-pos_samples_per_second": 74.097, + "eval_scitail-pairs-pos_steps_per_second": 2.964, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_xsum-pairs_loss": 0.2595808804035187, + "eval_xsum-pairs_runtime": 0.9377, + "eval_xsum-pairs_samples_per_second": 106.641, + "eval_xsum-pairs_steps_per_second": 4.266, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_compression-pairs_loss": 0.0918564721941948, + "eval_compression-pairs_runtime": 0.2755, + "eval_compression-pairs_samples_per_second": 363.032, + "eval_compression-pairs_steps_per_second": 14.521, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_sciq_pairs_loss": 0.284303218126297, + "eval_sciq_pairs_runtime": 4.1289, + "eval_sciq_pairs_samples_per_second": 24.22, + "eval_sciq_pairs_steps_per_second": 0.969, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_qasc_pairs_loss": 0.19232892990112305, + "eval_qasc_pairs_runtime": 1.0709, + "eval_qasc_pairs_samples_per_second": 93.384, + "eval_qasc_pairs_steps_per_second": 3.735, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_openbookqa_pairs_loss": 1.6234371662139893, + "eval_openbookqa_pairs_runtime": 0.9558, + "eval_openbookqa_pairs_samples_per_second": 104.62, + "eval_openbookqa_pairs_steps_per_second": 4.185, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_msmarco_pairs_loss": 0.5325217247009277, + "eval_msmarco_pairs_runtime": 2.0971, + "eval_msmarco_pairs_samples_per_second": 47.685, + "eval_msmarco_pairs_steps_per_second": 1.907, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_nq_pairs_loss": 0.2721095681190491, + "eval_nq_pairs_runtime": 4.5393, + "eval_nq_pairs_samples_per_second": 22.03, + "eval_nq_pairs_steps_per_second": 0.881, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_trivia_pairs_loss": 0.8544899821281433, + "eval_trivia_pairs_runtime": 6.4668, + "eval_trivia_pairs_samples_per_second": 15.464, + "eval_trivia_pairs_steps_per_second": 0.619, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_quora_pairs_loss": 0.08441996574401855, + "eval_quora_pairs_runtime": 0.6933, + "eval_quora_pairs_samples_per_second": 144.233, + "eval_quora_pairs_steps_per_second": 5.769, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_gooaq_pairs_loss": 0.5711588859558105, + "eval_gooaq_pairs_runtime": 1.3941, + "eval_gooaq_pairs_samples_per_second": 71.733, + "eval_gooaq_pairs_steps_per_second": 2.869, + "step": 2240 + }, + { + "epoch": 0.3510421564018179, + "eval_mrpc_pairs_loss": 0.05093960464000702, + "eval_mrpc_pairs_runtime": 0.2633, + "eval_mrpc_pairs_samples_per_second": 379.777, + "eval_mrpc_pairs_steps_per_second": 15.191, + "step": 2240 + }, + { + "epoch": 0.3560570443504153, + "grad_norm": 0.39178094267845154, + "learning_rate": 2.6498646236636892e-05, + "loss": 0.6805, + "step": 2272 + }, + { + "epoch": 0.3610719322990127, + "grad_norm": 7.91475248336792, + "learning_rate": 2.6332915632292237e-05, + "loss": 1.0451, + "step": 2304 + }, + { + "epoch": 0.3660868202476101, + "grad_norm": 31.54157066345215, + "learning_rate": 2.616389702311641e-05, + "loss": 1.0603, + "step": 2336 + }, + { + "epoch": 0.37110170819620747, + "grad_norm": 8.400779724121094, + "learning_rate": 2.5991639446269964e-05, + "loss": 0.8142, + "step": 2368 + }, + { + "epoch": 0.3761165961448049, + "grad_norm": 20.99441146850586, + "learning_rate": 2.5816192878631166e-05, + "loss": 1.7211, + "step": 2400 + }, + { + "epoch": 0.3811314840934023, + "grad_norm": 10.574430465698242, + "learning_rate": 2.5637608222296237e-05, + "loss": 0.7523, + "step": 2432 + }, + { + "epoch": 0.3861463720419997, + "grad_norm": 0.8941424489021301, + "learning_rate": 2.5455937289811207e-05, + "loss": 0.8053, + "step": 2464 + }, + { + "epoch": 0.3911612599905971, + "grad_norm": 1.9402281045913696, + "learning_rate": 2.5271232789139587e-05, + "loss": 0.8427, + "step": 2496 + }, + { + "epoch": 0.3961761479391945, + "grad_norm": 23.42873764038086, + "learning_rate": 2.5083548308370296e-05, + "loss": 0.8204, + "step": 2528 + }, + { + "epoch": 0.40119103588779187, + "grad_norm": 4.5422234535217285, + "learning_rate": 2.4892938300170198e-05, + "loss": 0.5343, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_nli-pairs_loss": 1.002213478088379, + "eval_nli-pairs_runtime": 3.8843, + "eval_nli-pairs_samples_per_second": 25.745, + "eval_nli-pairs_steps_per_second": 1.03, + "eval_sts-test_pearson_cosine": 0.7872537557423719, + "eval_sts-test_pearson_dot": 0.5372668921721468, + "eval_sts-test_pearson_euclidean": 0.7383744840101544, + "eval_sts-test_pearson_manhattan": 0.7333039162515002, + "eval_sts-test_pearson_max": 0.7872537557423719, + "eval_sts-test_spearman_cosine": 0.8038647026605977, + "eval_sts-test_spearman_dot": 0.5191465873751544, + "eval_sts-test_spearman_euclidean": 0.730034619048548, + "eval_sts-test_spearman_manhattan": 0.7277569753761504, + "eval_sts-test_spearman_max": 0.8038647026605977, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_vitaminc-pairs_loss": 4.723379135131836, + "eval_vitaminc-pairs_runtime": 1.3031, + "eval_vitaminc-pairs_samples_per_second": 65.23, + "eval_vitaminc-pairs_steps_per_second": 2.302, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_sts-label_loss": 3.8185579776763916, + "eval_sts-label_runtime": 0.4182, + "eval_sts-label_samples_per_second": 239.094, + "eval_sts-label_steps_per_second": 9.564, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_qnli-contrastive_loss": 0.15084019303321838, + "eval_qnli-contrastive_runtime": 0.3638, + "eval_qnli-contrastive_samples_per_second": 274.906, + "eval_qnli-contrastive_steps_per_second": 10.996, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_scitail-pairs-qa_loss": 0.06741151213645935, + "eval_scitail-pairs-qa_runtime": 0.9458, + "eval_scitail-pairs-qa_samples_per_second": 105.735, + "eval_scitail-pairs-qa_steps_per_second": 4.229, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_scitail-pairs-pos_loss": 0.47680819034576416, + "eval_scitail-pairs-pos_runtime": 1.4736, + "eval_scitail-pairs-pos_samples_per_second": 67.859, + "eval_scitail-pairs-pos_steps_per_second": 2.714, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_xsum-pairs_loss": 0.2572269141674042, + "eval_xsum-pairs_runtime": 0.9448, + "eval_xsum-pairs_samples_per_second": 105.847, + "eval_xsum-pairs_steps_per_second": 4.234, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_compression-pairs_loss": 0.09604756534099579, + "eval_compression-pairs_runtime": 0.2774, + "eval_compression-pairs_samples_per_second": 360.554, + "eval_compression-pairs_steps_per_second": 14.422, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_sciq_pairs_loss": 0.2735004425048828, + "eval_sciq_pairs_runtime": 4.2103, + "eval_sciq_pairs_samples_per_second": 23.751, + "eval_sciq_pairs_steps_per_second": 0.95, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_qasc_pairs_loss": 0.1924300342798233, + "eval_qasc_pairs_runtime": 1.1352, + "eval_qasc_pairs_samples_per_second": 88.089, + "eval_qasc_pairs_steps_per_second": 3.524, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_openbookqa_pairs_loss": 1.6290359497070312, + "eval_openbookqa_pairs_runtime": 0.9392, + "eval_openbookqa_pairs_samples_per_second": 106.476, + "eval_openbookqa_pairs_steps_per_second": 4.259, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_msmarco_pairs_loss": 0.518312931060791, + "eval_msmarco_pairs_runtime": 2.121, + "eval_msmarco_pairs_samples_per_second": 47.147, + "eval_msmarco_pairs_steps_per_second": 1.886, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_nq_pairs_loss": 0.3077375292778015, + "eval_nq_pairs_runtime": 4.6617, + "eval_nq_pairs_samples_per_second": 21.451, + "eval_nq_pairs_steps_per_second": 0.858, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_trivia_pairs_loss": 0.8588294386863708, + "eval_trivia_pairs_runtime": 6.6293, + "eval_trivia_pairs_samples_per_second": 15.085, + "eval_trivia_pairs_steps_per_second": 0.603, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_quora_pairs_loss": 0.07980062067508698, + "eval_quora_pairs_runtime": 0.7261, + "eval_quora_pairs_samples_per_second": 137.72, + "eval_quora_pairs_steps_per_second": 5.509, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_gooaq_pairs_loss": 0.6570906043052673, + "eval_gooaq_pairs_runtime": 1.5071, + "eval_gooaq_pairs_samples_per_second": 66.352, + "eval_gooaq_pairs_steps_per_second": 2.654, + "step": 2560 + }, + { + "epoch": 0.40119103588779187, + "eval_mrpc_pairs_loss": 0.051231566816568375, + "eval_mrpc_pairs_runtime": 0.2799, + "eval_mrpc_pairs_samples_per_second": 357.322, + "eval_mrpc_pairs_steps_per_second": 14.293, + "step": 2560 + }, + { + "epoch": 0.40620592383638926, + "grad_norm": 37.2639045715332, + "learning_rate": 2.4699458065985813e-05, + "loss": 0.9709, + "step": 2592 + }, + { + "epoch": 0.4112208117849867, + "grad_norm": 15.363207817077637, + "learning_rate": 2.45031637399988e-05, + "loss": 0.708, + "step": 2624 + }, + { + "epoch": 0.4162356997335841, + "grad_norm": 1.8831324577331543, + "learning_rate": 2.430411227283978e-05, + "loss": 0.4083, + "step": 2656 + }, + { + "epoch": 0.4212505876821815, + "grad_norm": 5.664551734924316, + "learning_rate": 2.4102361415065367e-05, + "loss": 0.8732, + "step": 2688 + }, + { + "epoch": 0.4262654756307789, + "grad_norm": 0.615675151348114, + "learning_rate": 2.3897969700403022e-05, + "loss": 1.2616, + "step": 2720 + }, + { + "epoch": 0.43128036357937627, + "grad_norm": 19.81829261779785, + "learning_rate": 2.3690996428768772e-05, + "loss": 1.3324, + "step": 2752 + }, + { + "epoch": 0.43629525152797366, + "grad_norm": 6.3363118171691895, + "learning_rate": 2.348150164906257e-05, + "loss": 0.6244, + "step": 2784 + }, + { + "epoch": 0.44131013947657105, + "grad_norm": 1.103615641593933, + "learning_rate": 2.3269546141746407e-05, + "loss": 0.6176, + "step": 2816 + }, + { + "epoch": 0.44632502742516844, + "grad_norm": 11.468894004821777, + "learning_rate": 2.3055191401210126e-05, + "loss": 0.6926, + "step": 2848 + }, + { + "epoch": 0.4513399153737659, + "grad_norm": 4.0951619148254395, + "learning_rate": 2.283849961793017e-05, + "loss": 0.8158, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_nli-pairs_loss": 1.2103344202041626, + "eval_nli-pairs_runtime": 3.656, + "eval_nli-pairs_samples_per_second": 27.353, + "eval_nli-pairs_steps_per_second": 1.094, + "eval_sts-test_pearson_cosine": 0.7884135608823999, + "eval_sts-test_pearson_dot": 0.5043809957478502, + "eval_sts-test_pearson_euclidean": 0.73325296875941, + "eval_sts-test_pearson_manhattan": 0.7274442771815695, + "eval_sts-test_pearson_max": 0.7884135608823999, + "eval_sts-test_spearman_cosine": 0.8024151272859597, + "eval_sts-test_spearman_dot": 0.4849613226687463, + "eval_sts-test_spearman_euclidean": 0.7267107319000072, + "eval_sts-test_spearman_manhattan": 0.7238097600272174, + "eval_sts-test_spearman_max": 0.8024151272859597, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_vitaminc-pairs_loss": 4.7560882568359375, + "eval_vitaminc-pairs_runtime": 1.1898, + "eval_vitaminc-pairs_samples_per_second": 71.438, + "eval_vitaminc-pairs_steps_per_second": 2.521, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_sts-label_loss": 3.4280478954315186, + "eval_sts-label_runtime": 0.2879, + "eval_sts-label_samples_per_second": 347.303, + "eval_sts-label_steps_per_second": 13.892, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_qnli-contrastive_loss": 0.1333482712507248, + "eval_qnli-contrastive_runtime": 0.3658, + "eval_qnli-contrastive_samples_per_second": 273.37, + "eval_qnli-contrastive_steps_per_second": 10.935, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_scitail-pairs-qa_loss": 0.0703386664390564, + "eval_scitail-pairs-qa_runtime": 0.8879, + "eval_scitail-pairs-qa_samples_per_second": 112.63, + "eval_scitail-pairs-qa_steps_per_second": 4.505, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_scitail-pairs-pos_loss": 0.4763020873069763, + "eval_scitail-pairs-pos_runtime": 1.3239, + "eval_scitail-pairs-pos_samples_per_second": 75.532, + "eval_scitail-pairs-pos_steps_per_second": 3.021, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_xsum-pairs_loss": 0.25743284821510315, + "eval_xsum-pairs_runtime": 0.9333, + "eval_xsum-pairs_samples_per_second": 107.15, + "eval_xsum-pairs_steps_per_second": 4.286, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_compression-pairs_loss": 0.09842805564403534, + "eval_compression-pairs_runtime": 0.2944, + "eval_compression-pairs_samples_per_second": 339.674, + "eval_compression-pairs_steps_per_second": 13.587, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_sciq_pairs_loss": 0.28244778513908386, + "eval_sciq_pairs_runtime": 4.0785, + "eval_sciq_pairs_samples_per_second": 24.519, + "eval_sciq_pairs_steps_per_second": 0.981, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_qasc_pairs_loss": 0.18051397800445557, + "eval_qasc_pairs_runtime": 1.0561, + "eval_qasc_pairs_samples_per_second": 94.69, + "eval_qasc_pairs_steps_per_second": 3.788, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_openbookqa_pairs_loss": 1.5708725452423096, + "eval_openbookqa_pairs_runtime": 0.9072, + "eval_openbookqa_pairs_samples_per_second": 110.229, + "eval_openbookqa_pairs_steps_per_second": 4.409, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_msmarco_pairs_loss": 0.5720314979553223, + "eval_msmarco_pairs_runtime": 2.0694, + "eval_msmarco_pairs_samples_per_second": 48.322, + "eval_msmarco_pairs_steps_per_second": 1.933, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_nq_pairs_loss": 0.2748319208621979, + "eval_nq_pairs_runtime": 4.5496, + "eval_nq_pairs_samples_per_second": 21.98, + "eval_nq_pairs_steps_per_second": 0.879, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_trivia_pairs_loss": 0.8936847448348999, + "eval_trivia_pairs_runtime": 6.4784, + "eval_trivia_pairs_samples_per_second": 15.436, + "eval_trivia_pairs_steps_per_second": 0.617, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_quora_pairs_loss": 0.07990340888500214, + "eval_quora_pairs_runtime": 0.6852, + "eval_quora_pairs_samples_per_second": 145.945, + "eval_quora_pairs_steps_per_second": 5.838, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_gooaq_pairs_loss": 0.6210995316505432, + "eval_gooaq_pairs_runtime": 1.4234, + "eval_gooaq_pairs_samples_per_second": 70.255, + "eval_gooaq_pairs_steps_per_second": 2.81, + "step": 2880 + }, + { + "epoch": 0.4513399153737659, + "eval_mrpc_pairs_loss": 0.053870730102062225, + "eval_mrpc_pairs_runtime": 0.2678, + "eval_mrpc_pairs_samples_per_second": 373.436, + "eval_mrpc_pairs_steps_per_second": 14.937, + "step": 2880 + }, + { + "epoch": 0.4563548033223633, + "grad_norm": 0.5031663775444031, + "learning_rate": 2.261953366042628e-05, + "loss": 1.4753, + "step": 2912 + }, + { + "epoch": 0.46136969127096067, + "grad_norm": 3.3404605388641357, + "learning_rate": 2.239835705702158e-05, + "loss": 0.5735, + "step": 2944 + }, + { + "epoch": 0.46638457921955806, + "grad_norm": 14.60761547088623, + "learning_rate": 2.217503397741115e-05, + "loss": 1.2261, + "step": 2976 + }, + { + "epoch": 0.47139946716815545, + "grad_norm": 0.7826951146125793, + "learning_rate": 2.194962921404456e-05, + "loss": 0.6085, + "step": 3008 + }, + { + "epoch": 0.47641435511675284, + "grad_norm": 5.523419380187988, + "learning_rate": 2.1722208163327738e-05, + "loss": 0.8766, + "step": 3040 + }, + { + "epoch": 0.48142924306535023, + "grad_norm": 1.2507153749465942, + "learning_rate": 2.1492836806649564e-05, + "loss": 1.1824, + "step": 3072 + }, + { + "epoch": 0.4864441310139477, + "grad_norm": 10.76526165008545, + "learning_rate": 2.1261581691238775e-05, + "loss": 0.7192, + "step": 3104 + }, + { + "epoch": 0.49145901896254507, + "grad_norm": 2.5375277996063232, + "learning_rate": 2.1028509910856705e-05, + "loss": 0.6131, + "step": 3136 + }, + { + "epoch": 0.49647390691114246, + "grad_norm": 6.569655418395996, + "learning_rate": 2.0793689086331472e-05, + "loss": 0.7407, + "step": 3168 + }, + { + "epoch": 0.5014887948597399, + "grad_norm": 0.42745527625083923, + "learning_rate": 2.055718734593919e-05, + "loss": 0.5857, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_nli-pairs_loss": 1.1431602239608765, + "eval_nli-pairs_runtime": 3.6407, + "eval_nli-pairs_samples_per_second": 27.467, + "eval_nli-pairs_steps_per_second": 1.099, + "eval_sts-test_pearson_cosine": 0.7838341260331343, + "eval_sts-test_pearson_dot": 0.5274891201747137, + "eval_sts-test_pearson_euclidean": 0.734987175544037, + "eval_sts-test_pearson_manhattan": 0.7296263541205231, + "eval_sts-test_pearson_max": 0.7838341260331343, + "eval_sts-test_spearman_cosine": 0.8013224760849562, + "eval_sts-test_spearman_dot": 0.5061225327907017, + "eval_sts-test_spearman_euclidean": 0.7282525362996873, + "eval_sts-test_spearman_manhattan": 0.7265322068183514, + "eval_sts-test_spearman_max": 0.8013224760849562, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_vitaminc-pairs_loss": 4.748112201690674, + "eval_vitaminc-pairs_runtime": 1.1378, + "eval_vitaminc-pairs_samples_per_second": 74.706, + "eval_vitaminc-pairs_steps_per_second": 2.637, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_sts-label_loss": 3.9402565956115723, + "eval_sts-label_runtime": 0.2789, + "eval_sts-label_samples_per_second": 358.596, + "eval_sts-label_steps_per_second": 14.344, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_qnli-contrastive_loss": 0.10341227799654007, + "eval_qnli-contrastive_runtime": 0.3605, + "eval_qnli-contrastive_samples_per_second": 277.417, + "eval_qnli-contrastive_steps_per_second": 11.097, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_scitail-pairs-qa_loss": 0.06673895567655563, + "eval_scitail-pairs-qa_runtime": 0.8765, + "eval_scitail-pairs-qa_samples_per_second": 114.092, + "eval_scitail-pairs-qa_steps_per_second": 4.564, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_scitail-pairs-pos_loss": 0.510690450668335, + "eval_scitail-pairs-pos_runtime": 1.3274, + "eval_scitail-pairs-pos_samples_per_second": 75.334, + "eval_scitail-pairs-pos_steps_per_second": 3.013, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_xsum-pairs_loss": 0.26573723554611206, + "eval_xsum-pairs_runtime": 0.9342, + "eval_xsum-pairs_samples_per_second": 107.047, + "eval_xsum-pairs_steps_per_second": 4.282, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_compression-pairs_loss": 0.09096826612949371, + "eval_compression-pairs_runtime": 0.2779, + "eval_compression-pairs_samples_per_second": 359.804, + "eval_compression-pairs_steps_per_second": 14.392, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_sciq_pairs_loss": 0.30787500739097595, + "eval_sciq_pairs_runtime": 4.1007, + "eval_sciq_pairs_samples_per_second": 24.386, + "eval_sciq_pairs_steps_per_second": 0.975, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_qasc_pairs_loss": 0.1825849413871765, + "eval_qasc_pairs_runtime": 1.0526, + "eval_qasc_pairs_samples_per_second": 94.998, + "eval_qasc_pairs_steps_per_second": 3.8, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_openbookqa_pairs_loss": 1.5945305824279785, + "eval_openbookqa_pairs_runtime": 0.8948, + "eval_openbookqa_pairs_samples_per_second": 111.759, + "eval_openbookqa_pairs_steps_per_second": 4.47, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_msmarco_pairs_loss": 0.5864604711532593, + "eval_msmarco_pairs_runtime": 2.0556, + "eval_msmarco_pairs_samples_per_second": 48.646, + "eval_msmarco_pairs_steps_per_second": 1.946, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_nq_pairs_loss": 0.2538978159427643, + "eval_nq_pairs_runtime": 4.5409, + "eval_nq_pairs_samples_per_second": 22.022, + "eval_nq_pairs_steps_per_second": 0.881, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_trivia_pairs_loss": 0.8825237154960632, + "eval_trivia_pairs_runtime": 6.4701, + "eval_trivia_pairs_samples_per_second": 15.456, + "eval_trivia_pairs_steps_per_second": 0.618, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_quora_pairs_loss": 0.06264814734458923, + "eval_quora_pairs_runtime": 0.6792, + "eval_quora_pairs_samples_per_second": 147.238, + "eval_quora_pairs_steps_per_second": 5.89, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_gooaq_pairs_loss": 0.5953384041786194, + "eval_gooaq_pairs_runtime": 1.4186, + "eval_gooaq_pairs_samples_per_second": 70.49, + "eval_gooaq_pairs_steps_per_second": 2.82, + "step": 3200 + }, + { + "epoch": 0.5014887948597399, + "eval_mrpc_pairs_loss": 0.05028616264462471, + "eval_mrpc_pairs_runtime": 0.2664, + "eval_mrpc_pairs_samples_per_second": 375.444, + "eval_mrpc_pairs_steps_per_second": 15.018, + "step": 3200 + }, + { + "epoch": 0.5065036828083372, + "grad_norm": 17.477581024169922, + "learning_rate": 2.0319073305638035e-05, + "loss": 0.6212, + "step": 3232 + }, + { + "epoch": 0.5115185707569346, + "grad_norm": 15.705268859863281, + "learning_rate": 2.0079416049160762e-05, + "loss": 1.1408, + "step": 3264 + }, + { + "epoch": 0.516533458705532, + "grad_norm": 15.518088340759277, + "learning_rate": 1.983828510797154e-05, + "loss": 0.6898, + "step": 3296 + }, + { + "epoch": 0.5215483466541294, + "grad_norm": 18.28449058532715, + "learning_rate": 1.9595750441092844e-05, + "loss": 0.9827, + "step": 3328 + }, + { + "epoch": 0.5265632346027268, + "grad_norm": 11.187614440917969, + "learning_rate": 1.935188241480837e-05, + "loss": 0.9518, + "step": 3360 + }, + { + "epoch": 0.5315781225513242, + "grad_norm": 24.515199661254883, + "learning_rate": 1.910675178224773e-05, + "loss": 0.5584, + "step": 3392 + }, + { + "epoch": 0.5365930104999217, + "grad_norm": 21.595224380493164, + "learning_rate": 1.886042966285894e-05, + "loss": 1.3362, + "step": 3424 + }, + { + "epoch": 0.5416078984485191, + "grad_norm": 14.934494972229004, + "learning_rate": 1.8612987521774603e-05, + "loss": 0.4418, + "step": 3456 + }, + { + "epoch": 0.5466227863971165, + "grad_norm": 1.0222537517547607, + "learning_rate": 1.836449714907785e-05, + "loss": 0.5896, + "step": 3488 + }, + { + "epoch": 0.5516376743457139, + "grad_norm": 13.705151557922363, + "learning_rate": 1.811503063897396e-05, + "loss": 0.7951, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_nli-pairs_loss": 1.0016616582870483, + "eval_nli-pairs_runtime": 3.6365, + "eval_nli-pairs_samples_per_second": 27.499, + "eval_nli-pairs_steps_per_second": 1.1, + "eval_sts-test_pearson_cosine": 0.783269156461013, + "eval_sts-test_pearson_dot": 0.5146760761775918, + "eval_sts-test_pearson_euclidean": 0.7293244171224789, + "eval_sts-test_pearson_manhattan": 0.722566066058283, + "eval_sts-test_pearson_max": 0.783269156461013, + "eval_sts-test_spearman_cosine": 0.800346163751739, + "eval_sts-test_spearman_dot": 0.49134463318009686, + "eval_sts-test_spearman_euclidean": 0.7220780456605193, + "eval_sts-test_spearman_manhattan": 0.7185570530657137, + "eval_sts-test_spearman_max": 0.800346163751739, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_vitaminc-pairs_loss": 4.628457546234131, + "eval_vitaminc-pairs_runtime": 1.1358, + "eval_vitaminc-pairs_samples_per_second": 74.837, + "eval_vitaminc-pairs_steps_per_second": 2.641, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_sts-label_loss": 3.698469877243042, + "eval_sts-label_runtime": 0.2763, + "eval_sts-label_samples_per_second": 361.871, + "eval_sts-label_steps_per_second": 14.475, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_qnli-contrastive_loss": 0.11857427656650543, + "eval_qnli-contrastive_runtime": 0.3599, + "eval_qnli-contrastive_samples_per_second": 277.865, + "eval_qnli-contrastive_steps_per_second": 11.115, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_scitail-pairs-qa_loss": 0.06011494621634483, + "eval_scitail-pairs-qa_runtime": 0.8855, + "eval_scitail-pairs-qa_samples_per_second": 112.93, + "eval_scitail-pairs-qa_steps_per_second": 4.517, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_scitail-pairs-pos_loss": 0.5179685950279236, + "eval_scitail-pairs-pos_runtime": 1.3428, + "eval_scitail-pairs-pos_samples_per_second": 74.469, + "eval_scitail-pairs-pos_steps_per_second": 2.979, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_xsum-pairs_loss": 0.2575337886810303, + "eval_xsum-pairs_runtime": 0.9362, + "eval_xsum-pairs_samples_per_second": 106.81, + "eval_xsum-pairs_steps_per_second": 4.272, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_compression-pairs_loss": 0.08986295014619827, + "eval_compression-pairs_runtime": 0.2735, + "eval_compression-pairs_samples_per_second": 365.659, + "eval_compression-pairs_steps_per_second": 14.626, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_sciq_pairs_loss": 0.2898155748844147, + "eval_sciq_pairs_runtime": 4.1009, + "eval_sciq_pairs_samples_per_second": 24.385, + "eval_sciq_pairs_steps_per_second": 0.975, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_qasc_pairs_loss": 0.1790761798620224, + "eval_qasc_pairs_runtime": 1.0559, + "eval_qasc_pairs_samples_per_second": 94.702, + "eval_qasc_pairs_steps_per_second": 3.788, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_openbookqa_pairs_loss": 1.6558103561401367, + "eval_openbookqa_pairs_runtime": 0.8846, + "eval_openbookqa_pairs_samples_per_second": 113.048, + "eval_openbookqa_pairs_steps_per_second": 4.522, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_msmarco_pairs_loss": 0.5547183156013489, + "eval_msmarco_pairs_runtime": 2.0592, + "eval_msmarco_pairs_samples_per_second": 48.563, + "eval_msmarco_pairs_steps_per_second": 1.943, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_nq_pairs_loss": 0.24799224734306335, + "eval_nq_pairs_runtime": 4.5115, + "eval_nq_pairs_samples_per_second": 22.166, + "eval_nq_pairs_steps_per_second": 0.887, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_trivia_pairs_loss": 0.9036693572998047, + "eval_trivia_pairs_runtime": 6.5286, + "eval_trivia_pairs_samples_per_second": 15.317, + "eval_trivia_pairs_steps_per_second": 0.613, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_quora_pairs_loss": 0.05727443844079971, + "eval_quora_pairs_runtime": 0.6763, + "eval_quora_pairs_samples_per_second": 147.873, + "eval_quora_pairs_steps_per_second": 5.915, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_gooaq_pairs_loss": 0.5602415800094604, + "eval_gooaq_pairs_runtime": 1.4132, + "eval_gooaq_pairs_samples_per_second": 70.759, + "eval_gooaq_pairs_steps_per_second": 2.83, + "step": 3520 + }, + { + "epoch": 0.5516376743457139, + "eval_mrpc_pairs_loss": 0.04762456938624382, + "eval_mrpc_pairs_runtime": 0.2648, + "eval_mrpc_pairs_samples_per_second": 377.632, + "eval_mrpc_pairs_steps_per_second": 15.105, + "step": 3520 + }, + { + "epoch": 0.5566525622943113, + "grad_norm": 0.39285340905189514, + "learning_rate": 1.7864660368873747e-05, + "loss": 0.5201, + "step": 3552 + }, + { + "epoch": 0.5616674502429087, + "grad_norm": 16.01999855041504, + "learning_rate": 1.7613458978394786e-05, + "loss": 0.6351, + "step": 3584 + }, + { + "epoch": 0.566682338191506, + "grad_norm": 0.5487422347068787, + "learning_rate": 1.7361499348286606e-05, + "loss": 0.8652, + "step": 3616 + }, + { + "epoch": 0.5716972261401034, + "grad_norm": 0.9249119758605957, + "learning_rate": 1.710885457928585e-05, + "loss": 0.6407, + "step": 3648 + }, + { + "epoch": 0.5767121140887008, + "grad_norm": 6.578505992889404, + "learning_rate": 1.6855597970907664e-05, + "loss": 0.9435, + "step": 3680 + }, + { + "epoch": 0.5817270020372982, + "grad_norm": 14.307022094726562, + "learning_rate": 1.6601803000179394e-05, + "loss": 0.9295, + "step": 3712 + }, + { + "epoch": 0.5867418899858956, + "grad_norm": 16.091779708862305, + "learning_rate": 1.6347543300322795e-05, + "loss": 0.6829, + "step": 3744 + }, + { + "epoch": 0.591756777934493, + "grad_norm": 29.058805465698242, + "learning_rate": 1.6092892639390916e-05, + "loss": 0.8683, + "step": 3776 + }, + { + "epoch": 0.5967716658830904, + "grad_norm": 13.12238597869873, + "learning_rate": 1.583792489886586e-05, + "loss": 1.115, + "step": 3808 + }, + { + "epoch": 0.6017865538316878, + "grad_norm": 11.606388092041016, + "learning_rate": 1.558271405222362e-05, + "loss": 1.0936, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_nli-pairs_loss": 0.9106074571609497, + "eval_nli-pairs_runtime": 3.9467, + "eval_nli-pairs_samples_per_second": 25.337, + "eval_nli-pairs_steps_per_second": 1.013, + "eval_sts-test_pearson_cosine": 0.7831915073063493, + "eval_sts-test_pearson_dot": 0.51712727721244, + "eval_sts-test_pearson_euclidean": 0.7355201142492419, + "eval_sts-test_pearson_manhattan": 0.7299910115321456, + "eval_sts-test_pearson_max": 0.7831915073063493, + "eval_sts-test_spearman_cosine": 0.8005432620025132, + "eval_sts-test_spearman_dot": 0.49466719400094655, + "eval_sts-test_spearman_euclidean": 0.7273424991180402, + "eval_sts-test_spearman_manhattan": 0.7249394934262583, + "eval_sts-test_spearman_max": 0.8005432620025132, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_vitaminc-pairs_loss": 4.7559494972229, + "eval_vitaminc-pairs_runtime": 1.1844, + "eval_vitaminc-pairs_samples_per_second": 71.768, + "eval_vitaminc-pairs_steps_per_second": 2.533, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_sts-label_loss": 3.46917724609375, + "eval_sts-label_runtime": 0.3003, + "eval_sts-label_samples_per_second": 333.048, + "eval_sts-label_steps_per_second": 13.322, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_qnli-contrastive_loss": 0.13890141248703003, + "eval_qnli-contrastive_runtime": 0.3729, + "eval_qnli-contrastive_samples_per_second": 268.18, + "eval_qnli-contrastive_steps_per_second": 10.727, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_scitail-pairs-qa_loss": 0.0611240416765213, + "eval_scitail-pairs-qa_runtime": 0.9367, + "eval_scitail-pairs-qa_samples_per_second": 106.755, + "eval_scitail-pairs-qa_steps_per_second": 4.27, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_scitail-pairs-pos_loss": 0.46203696727752686, + "eval_scitail-pairs-pos_runtime": 1.4874, + "eval_scitail-pairs-pos_samples_per_second": 67.232, + "eval_scitail-pairs-pos_steps_per_second": 2.689, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_xsum-pairs_loss": 0.24919259548187256, + "eval_xsum-pairs_runtime": 0.9576, + "eval_xsum-pairs_samples_per_second": 104.427, + "eval_xsum-pairs_steps_per_second": 4.177, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_compression-pairs_loss": 0.08809012174606323, + "eval_compression-pairs_runtime": 0.298, + "eval_compression-pairs_samples_per_second": 335.567, + "eval_compression-pairs_steps_per_second": 13.423, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_sciq_pairs_loss": 0.28287386894226074, + "eval_sciq_pairs_runtime": 4.2668, + "eval_sciq_pairs_samples_per_second": 23.437, + "eval_sciq_pairs_steps_per_second": 0.937, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_qasc_pairs_loss": 0.1861308217048645, + "eval_qasc_pairs_runtime": 1.0488, + "eval_qasc_pairs_samples_per_second": 95.351, + "eval_qasc_pairs_steps_per_second": 3.814, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_openbookqa_pairs_loss": 1.600982666015625, + "eval_openbookqa_pairs_runtime": 0.9077, + "eval_openbookqa_pairs_samples_per_second": 110.17, + "eval_openbookqa_pairs_steps_per_second": 4.407, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_msmarco_pairs_loss": 0.5555463433265686, + "eval_msmarco_pairs_runtime": 2.1064, + "eval_msmarco_pairs_samples_per_second": 47.474, + "eval_msmarco_pairs_steps_per_second": 1.899, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_nq_pairs_loss": 0.23241031169891357, + "eval_nq_pairs_runtime": 4.6119, + "eval_nq_pairs_samples_per_second": 21.683, + "eval_nq_pairs_steps_per_second": 0.867, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_trivia_pairs_loss": 0.7936394214630127, + "eval_trivia_pairs_runtime": 6.6242, + "eval_trivia_pairs_samples_per_second": 15.096, + "eval_trivia_pairs_steps_per_second": 0.604, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_quora_pairs_loss": 0.05936668440699577, + "eval_quora_pairs_runtime": 0.7463, + "eval_quora_pairs_samples_per_second": 133.994, + "eval_quora_pairs_steps_per_second": 5.36, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_gooaq_pairs_loss": 0.5735708475112915, + "eval_gooaq_pairs_runtime": 1.4747, + "eval_gooaq_pairs_samples_per_second": 67.809, + "eval_gooaq_pairs_steps_per_second": 2.712, + "step": 3840 + }, + { + "epoch": 0.6017865538316878, + "eval_mrpc_pairs_loss": 0.046309370547533035, + "eval_mrpc_pairs_runtime": 0.2694, + "eval_mrpc_pairs_samples_per_second": 371.218, + "eval_mrpc_pairs_steps_per_second": 14.849, + "step": 3840 + }, + { + "epoch": 0.6068014417802852, + "grad_norm": 6.513147830963135, + "learning_rate": 1.53273341434723e-05, + "loss": 0.8689, + "step": 3872 + }, + { + "epoch": 0.6118163297288827, + "grad_norm": 0.2349071353673935, + "learning_rate": 1.5071859265669756e-05, + "loss": 0.8692, + "step": 3904 + }, + { + "epoch": 0.6168312176774801, + "grad_norm": 18.028608322143555, + "learning_rate": 1.4816363539427118e-05, + "loss": 0.9083, + "step": 3936 + }, + { + "epoch": 0.6218461056260774, + "grad_norm": 17.381690979003906, + "learning_rate": 1.456092109140423e-05, + "loss": 1.0782, + "step": 3968 + }, + { + "epoch": 0.6268609935746748, + "grad_norm": 20.72548484802246, + "learning_rate": 1.4305606032803418e-05, + "loss": 0.7711, + "step": 4000 + }, + { + "epoch": 0.6318758815232722, + "grad_norm": 28.311264038085938, + "learning_rate": 1.4050492437867641e-05, + "loss": 1.0005, + "step": 4032 + }, + { + "epoch": 0.6368907694718696, + "grad_norm": 14.892809867858887, + "learning_rate": 1.3795654322389481e-05, + "loss": 0.7229, + "step": 4064 + }, + { + "epoch": 0.641905657420467, + "grad_norm": 18.567630767822266, + "learning_rate": 1.3541165622236977e-05, + "loss": 0.4871, + "step": 4096 + }, + { + "epoch": 0.6469205453690644, + "grad_norm": 8.814851760864258, + "learning_rate": 1.3287100171902759e-05, + "loss": 0.7853, + "step": 4128 + }, + { + "epoch": 0.6519354333176618, + "grad_norm": 19.43486785888672, + "learning_rate": 1.3033531683082495e-05, + "loss": 0.9271, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_nli-pairs_loss": 0.8979966640472412, + "eval_nli-pairs_runtime": 3.6341, + "eval_nli-pairs_samples_per_second": 27.517, + "eval_nli-pairs_steps_per_second": 1.101, + "eval_sts-test_pearson_cosine": 0.786081877366483, + "eval_sts-test_pearson_dot": 0.5354100918466089, + "eval_sts-test_pearson_euclidean": 0.7368659505908834, + "eval_sts-test_pearson_manhattan": 0.7310042183211231, + "eval_sts-test_pearson_max": 0.786081877366483, + "eval_sts-test_spearman_cosine": 0.8043456052578905, + "eval_sts-test_spearman_dot": 0.5150264179790126, + "eval_sts-test_spearman_euclidean": 0.7297811553069841, + "eval_sts-test_spearman_manhattan": 0.7264172194761916, + "eval_sts-test_spearman_max": 0.8043456052578905, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_vitaminc-pairs_loss": 4.720225811004639, + "eval_vitaminc-pairs_runtime": 1.1487, + "eval_vitaminc-pairs_samples_per_second": 73.995, + "eval_vitaminc-pairs_steps_per_second": 2.612, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_sts-label_loss": 3.9553511142730713, + "eval_sts-label_runtime": 0.2732, + "eval_sts-label_samples_per_second": 366.049, + "eval_sts-label_steps_per_second": 14.642, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_qnli-contrastive_loss": 0.14256399869918823, + "eval_qnli-contrastive_runtime": 0.3558, + "eval_qnli-contrastive_samples_per_second": 281.03, + "eval_qnli-contrastive_steps_per_second": 11.241, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_scitail-pairs-qa_loss": 0.06135182082653046, + "eval_scitail-pairs-qa_runtime": 0.8797, + "eval_scitail-pairs-qa_samples_per_second": 113.67, + "eval_scitail-pairs-qa_steps_per_second": 4.547, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_scitail-pairs-pos_loss": 0.42590686678886414, + "eval_scitail-pairs-pos_runtime": 1.3288, + "eval_scitail-pairs-pos_samples_per_second": 75.254, + "eval_scitail-pairs-pos_steps_per_second": 3.01, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_xsum-pairs_loss": 0.2564789056777954, + "eval_xsum-pairs_runtime": 0.9345, + "eval_xsum-pairs_samples_per_second": 107.011, + "eval_xsum-pairs_steps_per_second": 4.28, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_compression-pairs_loss": 0.08838170021772385, + "eval_compression-pairs_runtime": 0.2761, + "eval_compression-pairs_samples_per_second": 362.144, + "eval_compression-pairs_steps_per_second": 14.486, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_sciq_pairs_loss": 0.2946786880493164, + "eval_sciq_pairs_runtime": 4.076, + "eval_sciq_pairs_samples_per_second": 24.534, + "eval_sciq_pairs_steps_per_second": 0.981, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_qasc_pairs_loss": 0.17502914369106293, + "eval_qasc_pairs_runtime": 1.0723, + "eval_qasc_pairs_samples_per_second": 93.259, + "eval_qasc_pairs_steps_per_second": 3.73, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_openbookqa_pairs_loss": 1.5555152893066406, + "eval_openbookqa_pairs_runtime": 0.8973, + "eval_openbookqa_pairs_samples_per_second": 111.451, + "eval_openbookqa_pairs_steps_per_second": 4.458, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_msmarco_pairs_loss": 0.5041812062263489, + "eval_msmarco_pairs_runtime": 2.0593, + "eval_msmarco_pairs_samples_per_second": 48.56, + "eval_msmarco_pairs_steps_per_second": 1.942, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_nq_pairs_loss": 0.24564537405967712, + "eval_nq_pairs_runtime": 4.527, + "eval_nq_pairs_samples_per_second": 22.09, + "eval_nq_pairs_steps_per_second": 0.884, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_trivia_pairs_loss": 0.8565467000007629, + "eval_trivia_pairs_runtime": 6.4751, + "eval_trivia_pairs_samples_per_second": 15.444, + "eval_trivia_pairs_steps_per_second": 0.618, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_quora_pairs_loss": 0.052645713090896606, + "eval_quora_pairs_runtime": 0.6803, + "eval_quora_pairs_samples_per_second": 146.985, + "eval_quora_pairs_steps_per_second": 5.879, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_gooaq_pairs_loss": 0.5815556645393372, + "eval_gooaq_pairs_runtime": 1.3985, + "eval_gooaq_pairs_samples_per_second": 71.504, + "eval_gooaq_pairs_steps_per_second": 2.86, + "step": 4160 + }, + { + "epoch": 0.6519354333176618, + "eval_mrpc_pairs_loss": 0.047052089124917984, + "eval_mrpc_pairs_runtime": 0.2602, + "eval_mrpc_pairs_samples_per_second": 384.349, + "eval_mrpc_pairs_steps_per_second": 15.374, + "step": 4160 + }, + { + "epoch": 0.6569503212662592, + "grad_norm": 21.91355323791504, + "learning_rate": 1.2780533723289014e-05, + "loss": 0.5223, + "step": 4192 + }, + { + "epoch": 0.6619652092148566, + "grad_norm": 9.792081832885742, + "learning_rate": 1.2528179694508286e-05, + "loss": 1.0498, + "step": 4224 + }, + { + "epoch": 0.666980097163454, + "grad_norm": 6.606201648712158, + "learning_rate": 1.2276542811903345e-05, + "loss": 0.6791, + "step": 4256 + }, + { + "epoch": 0.6719949851120514, + "grad_norm": 16.744705200195312, + "learning_rate": 1.2025696082572509e-05, + "loss": 0.8836, + "step": 4288 + }, + { + "epoch": 0.6770098730606487, + "grad_norm": 8.791626930236816, + "learning_rate": 1.1775712284367882e-05, + "loss": 0.6035, + "step": 4320 + }, + { + "epoch": 0.6820247610092462, + "grad_norm": 1.067271113395691, + "learning_rate": 1.152666394478045e-05, + "loss": 0.5167, + "step": 4352 + }, + { + "epoch": 0.6870396489578436, + "grad_norm": 7.685211181640625, + "learning_rate": 1.1286358620301126e-05, + "loss": 0.981, + "step": 4384 + }, + { + "epoch": 0.692054536906441, + "grad_norm": 19.07784652709961, + "learning_rate": 1.10393628476565e-05, + "loss": 0.4873, + "step": 4416 + }, + { + "epoch": 0.6970694248550384, + "grad_norm": 1.4715958833694458, + "learning_rate": 1.0793516169782712e-05, + "loss": 0.4762, + "step": 4448 + }, + { + "epoch": 0.7020843128036358, + "grad_norm": 14.572600364685059, + "learning_rate": 1.0548889913873123e-05, + "loss": 0.8201, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_nli-pairs_loss": 0.8704043626785278, + "eval_nli-pairs_runtime": 3.6418, + "eval_nli-pairs_samples_per_second": 27.459, + "eval_nli-pairs_steps_per_second": 1.098, + "eval_sts-test_pearson_cosine": 0.7871366351762351, + "eval_sts-test_pearson_dot": 0.520292802271069, + "eval_sts-test_pearson_euclidean": 0.7358991589918665, + "eval_sts-test_pearson_manhattan": 0.7306487678482384, + "eval_sts-test_pearson_max": 0.7871366351762351, + "eval_sts-test_spearman_cosine": 0.8043053229220561, + "eval_sts-test_spearman_dot": 0.500924984433136, + "eval_sts-test_spearman_euclidean": 0.7279966902078664, + "eval_sts-test_spearman_manhattan": 0.7254635738312362, + "eval_sts-test_spearman_max": 0.8043053229220561, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_vitaminc-pairs_loss": 4.733531475067139, + "eval_vitaminc-pairs_runtime": 1.1524, + "eval_vitaminc-pairs_samples_per_second": 73.759, + "eval_vitaminc-pairs_steps_per_second": 2.603, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_sts-label_loss": 3.589179515838623, + "eval_sts-label_runtime": 0.2802, + "eval_sts-label_samples_per_second": 356.831, + "eval_sts-label_steps_per_second": 14.273, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_qnli-contrastive_loss": 0.11559023708105087, + "eval_qnli-contrastive_runtime": 0.3803, + "eval_qnli-contrastive_samples_per_second": 262.956, + "eval_qnli-contrastive_steps_per_second": 10.518, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_scitail-pairs-qa_loss": 0.05958002060651779, + "eval_scitail-pairs-qa_runtime": 0.9171, + "eval_scitail-pairs-qa_samples_per_second": 109.042, + "eval_scitail-pairs-qa_steps_per_second": 4.362, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_scitail-pairs-pos_loss": 0.43254122138023376, + "eval_scitail-pairs-pos_runtime": 1.3676, + "eval_scitail-pairs-pos_samples_per_second": 73.118, + "eval_scitail-pairs-pos_steps_per_second": 2.925, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_xsum-pairs_loss": 0.248906210064888, + "eval_xsum-pairs_runtime": 0.9364, + "eval_xsum-pairs_samples_per_second": 106.797, + "eval_xsum-pairs_steps_per_second": 4.272, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_compression-pairs_loss": 0.08712127059698105, + "eval_compression-pairs_runtime": 0.2771, + "eval_compression-pairs_samples_per_second": 360.923, + "eval_compression-pairs_steps_per_second": 14.437, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_sciq_pairs_loss": 0.2863478362560272, + "eval_sciq_pairs_runtime": 4.1006, + "eval_sciq_pairs_samples_per_second": 24.386, + "eval_sciq_pairs_steps_per_second": 0.975, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_qasc_pairs_loss": 0.17710347473621368, + "eval_qasc_pairs_runtime": 1.0521, + "eval_qasc_pairs_samples_per_second": 95.051, + "eval_qasc_pairs_steps_per_second": 3.802, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_openbookqa_pairs_loss": 1.5271464586257935, + "eval_openbookqa_pairs_runtime": 0.8986, + "eval_openbookqa_pairs_samples_per_second": 111.286, + "eval_openbookqa_pairs_steps_per_second": 4.451, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_msmarco_pairs_loss": 0.5346755385398865, + "eval_msmarco_pairs_runtime": 2.0827, + "eval_msmarco_pairs_samples_per_second": 48.014, + "eval_msmarco_pairs_steps_per_second": 1.921, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_nq_pairs_loss": 0.24830152094364166, + "eval_nq_pairs_runtime": 4.5025, + "eval_nq_pairs_samples_per_second": 22.21, + "eval_nq_pairs_steps_per_second": 0.888, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_trivia_pairs_loss": 0.799673318862915, + "eval_trivia_pairs_runtime": 6.4664, + "eval_trivia_pairs_samples_per_second": 15.465, + "eval_trivia_pairs_steps_per_second": 0.619, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_quora_pairs_loss": 0.030656050890684128, + "eval_quora_pairs_runtime": 0.6818, + "eval_quora_pairs_samples_per_second": 146.669, + "eval_quora_pairs_steps_per_second": 5.867, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_gooaq_pairs_loss": 0.5036634802818298, + "eval_gooaq_pairs_runtime": 1.4051, + "eval_gooaq_pairs_samples_per_second": 71.169, + "eval_gooaq_pairs_steps_per_second": 2.847, + "step": 4480 + }, + { + "epoch": 0.7020843128036358, + "eval_mrpc_pairs_loss": 0.04455450549721718, + "eval_mrpc_pairs_runtime": 0.2642, + "eval_mrpc_pairs_samples_per_second": 378.478, + "eval_mrpc_pairs_steps_per_second": 15.139, + "step": 4480 + }, + { + "epoch": 0.7070992007522332, + "grad_norm": 15.19054889678955, + "learning_rate": 1.030555505304156e-05, + "loss": 0.7799, + "step": 4512 + }, + { + "epoch": 0.7121140887008306, + "grad_norm": 16.065160751342773, + "learning_rate": 1.0063582185731009e-05, + "loss": 0.8006, + "step": 4544 + }, + { + "epoch": 0.717128976649428, + "grad_norm": 3.2584469318389893, + "learning_rate": 9.823041515230937e-06, + "loss": 0.5123, + "step": 4576 + }, + { + "epoch": 0.7221438645980254, + "grad_norm": 2.2951438426971436, + "learning_rate": 9.584002829309324e-06, + "loss": 0.7421, + "step": 4608 + }, + { + "epoch": 0.7271587525466228, + "grad_norm": 21.291872024536133, + "learning_rate": 9.346535479965231e-06, + "loss": 0.9477, + "step": 4640 + }, + { + "epoch": 0.7321736404952202, + "grad_norm": 4.785529613494873, + "learning_rate": 9.11070836330775e-06, + "loss": 0.5021, + "step": 4672 + }, + { + "epoch": 0.7371885284438175, + "grad_norm": 1.7058138847351074, + "learning_rate": 8.876589899567312e-06, + "loss": 0.931, + "step": 4704 + }, + { + "epoch": 0.7422034163924149, + "grad_norm": 9.1055326461792, + "learning_rate": 8.644248013244963e-06, + "loss": 0.7777, + "step": 4736 + }, + { + "epoch": 0.7472183043410123, + "grad_norm": 3.6529128551483154, + "learning_rate": 8.413750113405556e-06, + "loss": 0.9462, + "step": 4768 + }, + { + "epoch": 0.7522331922896098, + "grad_norm": 0.5643049478530884, + "learning_rate": 8.185163074120399e-06, + "loss": 0.5846, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_nli-pairs_loss": 0.9291799664497375, + "eval_nli-pairs_runtime": 3.7498, + "eval_nli-pairs_samples_per_second": 26.668, + "eval_nli-pairs_steps_per_second": 1.067, + "eval_sts-test_pearson_cosine": 0.7855324842750789, + "eval_sts-test_pearson_dot": 0.5242204261314407, + "eval_sts-test_pearson_euclidean": 0.7349702751512333, + "eval_sts-test_pearson_manhattan": 0.7293454465410049, + "eval_sts-test_pearson_max": 0.7855324842750789, + "eval_sts-test_spearman_cosine": 0.8044211074352633, + "eval_sts-test_spearman_dot": 0.5021807579050959, + "eval_sts-test_spearman_euclidean": 0.7270456124616013, + "eval_sts-test_spearman_manhattan": 0.7246691951731193, + "eval_sts-test_spearman_max": 0.8044211074352633, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_vitaminc-pairs_loss": 4.687094688415527, + "eval_vitaminc-pairs_runtime": 1.1386, + "eval_vitaminc-pairs_samples_per_second": 74.654, + "eval_vitaminc-pairs_steps_per_second": 2.635, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_sts-label_loss": 3.8013510704040527, + "eval_sts-label_runtime": 0.2716, + "eval_sts-label_samples_per_second": 368.125, + "eval_sts-label_steps_per_second": 14.725, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_qnli-contrastive_loss": 0.1414812207221985, + "eval_qnli-contrastive_runtime": 0.3601, + "eval_qnli-contrastive_samples_per_second": 277.73, + "eval_qnli-contrastive_steps_per_second": 11.109, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_scitail-pairs-qa_loss": 0.05851547792553902, + "eval_scitail-pairs-qa_runtime": 0.8864, + "eval_scitail-pairs-qa_samples_per_second": 112.817, + "eval_scitail-pairs-qa_steps_per_second": 4.513, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_scitail-pairs-pos_loss": 0.4562886357307434, + "eval_scitail-pairs-pos_runtime": 1.3535, + "eval_scitail-pairs-pos_samples_per_second": 73.88, + "eval_scitail-pairs-pos_steps_per_second": 2.955, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_xsum-pairs_loss": 0.23483119904994965, + "eval_xsum-pairs_runtime": 0.9336, + "eval_xsum-pairs_samples_per_second": 107.109, + "eval_xsum-pairs_steps_per_second": 4.284, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_compression-pairs_loss": 0.08680214732885361, + "eval_compression-pairs_runtime": 0.2716, + "eval_compression-pairs_samples_per_second": 368.254, + "eval_compression-pairs_steps_per_second": 14.73, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_sciq_pairs_loss": 0.2816057801246643, + "eval_sciq_pairs_runtime": 4.0742, + "eval_sciq_pairs_samples_per_second": 24.545, + "eval_sciq_pairs_steps_per_second": 0.982, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_qasc_pairs_loss": 0.17035560309886932, + "eval_qasc_pairs_runtime": 1.0717, + "eval_qasc_pairs_samples_per_second": 93.311, + "eval_qasc_pairs_steps_per_second": 3.732, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_openbookqa_pairs_loss": 1.5671054124832153, + "eval_openbookqa_pairs_runtime": 0.8973, + "eval_openbookqa_pairs_samples_per_second": 111.441, + "eval_openbookqa_pairs_steps_per_second": 4.458, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_msmarco_pairs_loss": 0.5062486529350281, + "eval_msmarco_pairs_runtime": 2.0609, + "eval_msmarco_pairs_samples_per_second": 48.524, + "eval_msmarco_pairs_steps_per_second": 1.941, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_nq_pairs_loss": 0.22875532507896423, + "eval_nq_pairs_runtime": 4.5041, + "eval_nq_pairs_samples_per_second": 22.202, + "eval_nq_pairs_steps_per_second": 0.888, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_trivia_pairs_loss": 0.8119627237319946, + "eval_trivia_pairs_runtime": 6.4609, + "eval_trivia_pairs_samples_per_second": 15.478, + "eval_trivia_pairs_steps_per_second": 0.619, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_quora_pairs_loss": 0.06211049482226372, + "eval_quora_pairs_runtime": 0.6765, + "eval_quora_pairs_samples_per_second": 147.827, + "eval_quora_pairs_steps_per_second": 5.913, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_gooaq_pairs_loss": 0.4847571551799774, + "eval_gooaq_pairs_runtime": 1.3911, + "eval_gooaq_pairs_samples_per_second": 71.886, + "eval_gooaq_pairs_steps_per_second": 2.875, + "step": 4800 + }, + { + "epoch": 0.7522331922896098, + "eval_mrpc_pairs_loss": 0.04384278133511543, + "eval_mrpc_pairs_runtime": 0.2617, + "eval_mrpc_pairs_samples_per_second": 382.146, + "eval_mrpc_pairs_steps_per_second": 15.286, + "step": 4800 + }, + { + "epoch": 0.7572480802382072, + "grad_norm": 14.555929183959961, + "learning_rate": 7.958553215065208e-06, + "loss": 0.6735, + "step": 4832 + }, + { + "epoch": 0.7622629681868046, + "grad_norm": 10.30207347869873, + "learning_rate": 7.733986282278816e-06, + "loss": 1.1569, + "step": 4864 + }, + { + "epoch": 0.767277856135402, + "grad_norm": 17.255786895751953, + "learning_rate": 7.511527429088396e-06, + "loss": 0.9749, + "step": 4896 + }, + { + "epoch": 0.7722927440839994, + "grad_norm": 14.730864524841309, + "learning_rate": 7.291241197206574e-06, + "loss": 0.6581, + "step": 4928 + }, + { + "epoch": 0.7773076320325968, + "grad_norm": 8.807291984558105, + "learning_rate": 7.07319149800605e-06, + "loss": 0.6979, + "step": 4960 + }, + { + "epoch": 0.7823225199811942, + "grad_norm": 0.6080070734024048, + "learning_rate": 6.857441593977046e-06, + "loss": 0.7582, + "step": 4992 + }, + { + "epoch": 0.7873374079297916, + "grad_norm": 2.2002525329589844, + "learning_rate": 6.6440540803730425e-06, + "loss": 1.0082, + "step": 5024 + }, + { + "epoch": 0.792352295878389, + "grad_norm": 8.624346733093262, + "learning_rate": 6.433090867050122e-06, + "loss": 0.6206, + "step": 5056 + }, + { + "epoch": 0.7973671838269863, + "grad_norm": 0.9821205139160156, + "learning_rate": 6.224613160505094e-06, + "loss": 0.5165, + "step": 5088 + }, + { + "epoch": 0.8023820717755837, + "grad_norm": 4.104696750640869, + "learning_rate": 6.018681446117773e-06, + "loss": 0.4914, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_nli-pairs_loss": 0.8841198682785034, + "eval_nli-pairs_runtime": 4.1793, + "eval_nli-pairs_samples_per_second": 23.928, + "eval_nli-pairs_steps_per_second": 0.957, + "eval_sts-test_pearson_cosine": 0.7866468635321827, + "eval_sts-test_pearson_dot": 0.5124924570863083, + "eval_sts-test_pearson_euclidean": 0.7320768163626257, + "eval_sts-test_pearson_manhattan": 0.7266238528084388, + "eval_sts-test_pearson_max": 0.7866468635321827, + "eval_sts-test_spearman_cosine": 0.8041619306345255, + "eval_sts-test_spearman_dot": 0.4913316974763461, + "eval_sts-test_spearman_euclidean": 0.7232005770314757, + "eval_sts-test_spearman_manhattan": 0.7207683852583252, + "eval_sts-test_spearman_max": 0.8041619306345255, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_vitaminc-pairs_loss": 4.725103855133057, + "eval_vitaminc-pairs_runtime": 1.2146, + "eval_vitaminc-pairs_samples_per_second": 69.982, + "eval_vitaminc-pairs_steps_per_second": 2.47, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_sts-label_loss": 3.6535470485687256, + "eval_sts-label_runtime": 0.3164, + "eval_sts-label_samples_per_second": 316.056, + "eval_sts-label_steps_per_second": 12.642, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_qnli-contrastive_loss": 0.10529302805662155, + "eval_qnli-contrastive_runtime": 0.368, + "eval_qnli-contrastive_samples_per_second": 271.711, + "eval_qnli-contrastive_steps_per_second": 10.868, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_scitail-pairs-qa_loss": 0.05555274337530136, + "eval_scitail-pairs-qa_runtime": 0.9542, + "eval_scitail-pairs-qa_samples_per_second": 104.795, + "eval_scitail-pairs-qa_steps_per_second": 4.192, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_scitail-pairs-pos_loss": 0.4785614013671875, + "eval_scitail-pairs-pos_runtime": 1.4937, + "eval_scitail-pairs-pos_samples_per_second": 66.949, + "eval_scitail-pairs-pos_steps_per_second": 2.678, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_xsum-pairs_loss": 0.2355932593345642, + "eval_xsum-pairs_runtime": 0.9396, + "eval_xsum-pairs_samples_per_second": 106.432, + "eval_xsum-pairs_steps_per_second": 4.257, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_compression-pairs_loss": 0.083825021982193, + "eval_compression-pairs_runtime": 0.2789, + "eval_compression-pairs_samples_per_second": 358.564, + "eval_compression-pairs_steps_per_second": 14.343, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_sciq_pairs_loss": 0.28157705068588257, + "eval_sciq_pairs_runtime": 4.1947, + "eval_sciq_pairs_samples_per_second": 23.84, + "eval_sciq_pairs_steps_per_second": 0.954, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_qasc_pairs_loss": 0.1739024668931961, + "eval_qasc_pairs_runtime": 1.1277, + "eval_qasc_pairs_samples_per_second": 88.676, + "eval_qasc_pairs_steps_per_second": 3.547, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_openbookqa_pairs_loss": 1.591935396194458, + "eval_openbookqa_pairs_runtime": 1.0022, + "eval_openbookqa_pairs_samples_per_second": 99.782, + "eval_openbookqa_pairs_steps_per_second": 3.991, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_msmarco_pairs_loss": 0.5132349133491516, + "eval_msmarco_pairs_runtime": 2.1322, + "eval_msmarco_pairs_samples_per_second": 46.901, + "eval_msmarco_pairs_steps_per_second": 1.876, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_nq_pairs_loss": 0.2343132346868515, + "eval_nq_pairs_runtime": 4.5529, + "eval_nq_pairs_samples_per_second": 21.964, + "eval_nq_pairs_steps_per_second": 0.879, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_trivia_pairs_loss": 0.7988561987876892, + "eval_trivia_pairs_runtime": 6.5661, + "eval_trivia_pairs_samples_per_second": 15.23, + "eval_trivia_pairs_steps_per_second": 0.609, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_quora_pairs_loss": 0.05578049644827843, + "eval_quora_pairs_runtime": 0.8028, + "eval_quora_pairs_samples_per_second": 124.564, + "eval_quora_pairs_steps_per_second": 4.983, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_gooaq_pairs_loss": 0.48901888728141785, + "eval_gooaq_pairs_runtime": 1.5605, + "eval_gooaq_pairs_samples_per_second": 64.082, + "eval_gooaq_pairs_steps_per_second": 2.563, + "step": 5120 + }, + { + "epoch": 0.8023820717755837, + "eval_mrpc_pairs_loss": 0.04172317683696747, + "eval_mrpc_pairs_runtime": 0.2628, + "eval_mrpc_pairs_samples_per_second": 380.505, + "eval_mrpc_pairs_steps_per_second": 15.22, + "step": 5120 + }, + { + "epoch": 0.8073969597241811, + "grad_norm": 9.413043022155762, + "learning_rate": 5.815355470602388e-06, + "loss": 1.098, + "step": 5152 + }, + { + "epoch": 0.8124118476727785, + "grad_norm": 0.25412222743034363, + "learning_rate": 5.614694224673387e-06, + "loss": 0.821, + "step": 5184 + }, + { + "epoch": 0.8174267356213759, + "grad_norm": 18.76092529296875, + "learning_rate": 5.416755925930494e-06, + "loss": 0.9351, + "step": 5216 + }, + { + "epoch": 0.8224416235699734, + "grad_norm": 19.607337951660156, + "learning_rate": 5.221598001968132e-06, + "loss": 0.8784, + "step": 5248 + }, + { + "epoch": 0.8274565115185708, + "grad_norm": 3.2164149284362793, + "learning_rate": 5.029277073714009e-06, + "loss": 0.8326, + "step": 5280 + }, + { + "epoch": 0.8324713994671682, + "grad_norm": 11.156713485717773, + "learning_rate": 4.839848939001789e-06, + "loss": 0.7551, + "step": 5312 + }, + { + "epoch": 0.8374862874157656, + "grad_norm": 8.80623722076416, + "learning_rate": 4.653368556382492e-06, + "loss": 0.8234, + "step": 5344 + }, + { + "epoch": 0.842501175364363, + "grad_norm": 16.081491470336914, + "learning_rate": 4.469890029179472e-06, + "loss": 1.0922, + "step": 5376 + }, + { + "epoch": 0.8475160633129604, + "grad_norm": 0.8583326935768127, + "learning_rate": 4.2894665897914794e-06, + "loss": 1.0925, + "step": 5408 + }, + { + "epoch": 0.8525309512615578, + "grad_norm": 7.903942108154297, + "learning_rate": 4.112150584248388e-06, + "loss": 1.099, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_nli-pairs_loss": 0.8909263014793396, + "eval_nli-pairs_runtime": 3.6329, + "eval_nli-pairs_samples_per_second": 27.526, + "eval_nli-pairs_steps_per_second": 1.101, + "eval_sts-test_pearson_cosine": 0.7892673589571536, + "eval_sts-test_pearson_dot": 0.5308666684424199, + "eval_sts-test_pearson_euclidean": 0.7372214599353599, + "eval_sts-test_pearson_manhattan": 0.73149442324126, + "eval_sts-test_pearson_max": 0.7892673589571536, + "eval_sts-test_spearman_cosine": 0.8088174691107087, + "eval_sts-test_spearman_dot": 0.5097841799376374, + "eval_sts-test_spearman_euclidean": 0.7291099552995026, + "eval_sts-test_spearman_manhattan": 0.7255023946868168, + "eval_sts-test_spearman_max": 0.8088174691107087, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_vitaminc-pairs_loss": 4.696901798248291, + "eval_vitaminc-pairs_runtime": 1.13, + "eval_vitaminc-pairs_samples_per_second": 75.219, + "eval_vitaminc-pairs_steps_per_second": 2.655, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_sts-label_loss": 3.794584274291992, + "eval_sts-label_runtime": 0.2757, + "eval_sts-label_samples_per_second": 362.777, + "eval_sts-label_steps_per_second": 14.511, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_qnli-contrastive_loss": 0.1291896551847458, + "eval_qnli-contrastive_runtime": 0.3577, + "eval_qnli-contrastive_samples_per_second": 279.536, + "eval_qnli-contrastive_steps_per_second": 11.181, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_scitail-pairs-qa_loss": 0.05729294940829277, + "eval_scitail-pairs-qa_runtime": 0.8757, + "eval_scitail-pairs-qa_samples_per_second": 114.199, + "eval_scitail-pairs-qa_steps_per_second": 4.568, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_scitail-pairs-pos_loss": 0.47140783071517944, + "eval_scitail-pairs-pos_runtime": 1.3328, + "eval_scitail-pairs-pos_samples_per_second": 75.031, + "eval_scitail-pairs-pos_steps_per_second": 3.001, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_xsum-pairs_loss": 0.2317724972963333, + "eval_xsum-pairs_runtime": 0.934, + "eval_xsum-pairs_samples_per_second": 107.065, + "eval_xsum-pairs_steps_per_second": 4.283, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_compression-pairs_loss": 0.0849599540233612, + "eval_compression-pairs_runtime": 0.2772, + "eval_compression-pairs_samples_per_second": 360.752, + "eval_compression-pairs_steps_per_second": 14.43, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_sciq_pairs_loss": 0.2746911346912384, + "eval_sciq_pairs_runtime": 4.0398, + "eval_sciq_pairs_samples_per_second": 24.754, + "eval_sciq_pairs_steps_per_second": 0.99, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_qasc_pairs_loss": 0.16956950724124908, + "eval_qasc_pairs_runtime": 1.0682, + "eval_qasc_pairs_samples_per_second": 93.615, + "eval_qasc_pairs_steps_per_second": 3.745, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_openbookqa_pairs_loss": 1.5424996614456177, + "eval_openbookqa_pairs_runtime": 0.8928, + "eval_openbookqa_pairs_samples_per_second": 112.006, + "eval_openbookqa_pairs_steps_per_second": 4.48, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_msmarco_pairs_loss": 0.5047981142997742, + "eval_msmarco_pairs_runtime": 2.0436, + "eval_msmarco_pairs_samples_per_second": 48.932, + "eval_msmarco_pairs_steps_per_second": 1.957, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_nq_pairs_loss": 0.230237677693367, + "eval_nq_pairs_runtime": 4.5251, + "eval_nq_pairs_samples_per_second": 22.099, + "eval_nq_pairs_steps_per_second": 0.884, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_trivia_pairs_loss": 0.7567735314369202, + "eval_trivia_pairs_runtime": 6.4545, + "eval_trivia_pairs_samples_per_second": 15.493, + "eval_trivia_pairs_steps_per_second": 0.62, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_quora_pairs_loss": 0.05753583088517189, + "eval_quora_pairs_runtime": 0.6769, + "eval_quora_pairs_samples_per_second": 147.736, + "eval_quora_pairs_steps_per_second": 5.909, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_gooaq_pairs_loss": 0.49448758363723755, + "eval_gooaq_pairs_runtime": 1.3984, + "eval_gooaq_pairs_samples_per_second": 71.51, + "eval_gooaq_pairs_steps_per_second": 2.86, + "step": 5440 + }, + { + "epoch": 0.8525309512615578, + "eval_mrpc_pairs_loss": 0.04384453222155571, + "eval_mrpc_pairs_runtime": 0.2653, + "eval_mrpc_pairs_samples_per_second": 376.996, + "eval_mrpc_pairs_steps_per_second": 15.08, + "step": 5440 + }, + { + "epoch": 0.8575458392101551, + "grad_norm": 0.8697513341903687, + "learning_rate": 3.93799345702415e-06, + "loss": 0.5396, + "step": 5472 + }, + { + "epoch": 0.8625607271587525, + "grad_norm": 8.337197303771973, + "learning_rate": 3.7670457361112116e-06, + "loss": 0.6636, + "step": 5504 + }, + { + "epoch": 0.8675756151073499, + "grad_norm": 0.3655373156070709, + "learning_rate": 3.5993570183609596e-06, + "loss": 1.0095, + "step": 5536 + }, + { + "epoch": 0.8725905030559473, + "grad_norm": 13.748374938964844, + "learning_rate": 3.4349759550941933e-06, + "loss": 0.631, + "step": 5568 + }, + { + "epoch": 0.8776053910045447, + "grad_norm": 15.683762550354004, + "learning_rate": 3.273950237986013e-06, + "loss": 0.5415, + "step": 5600 + }, + { + "epoch": 0.8826202789531421, + "grad_norm": 10.004467964172363, + "learning_rate": 3.11632658522906e-06, + "loss": 0.9227, + "step": 5632 + }, + { + "epoch": 0.8876351669017395, + "grad_norm": 12.990907669067383, + "learning_rate": 2.9621507279792564e-06, + "loss": 0.8991, + "step": 5664 + }, + { + "epoch": 0.8926500548503369, + "grad_norm": 0.4619373679161072, + "learning_rate": 2.8114673970878584e-06, + "loss": 0.5068, + "step": 5696 + }, + { + "epoch": 0.8976649427989344, + "grad_norm": 8.317788124084473, + "learning_rate": 2.664320310123768e-06, + "loss": 1.2134, + "step": 5728 + }, + { + "epoch": 0.9026798307475318, + "grad_norm": 0.38993319869041443, + "learning_rate": 2.5207521586897876e-06, + "loss": 0.4651, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_nli-pairs_loss": 0.8765493631362915, + "eval_nli-pairs_runtime": 3.6164, + "eval_nli-pairs_samples_per_second": 27.652, + "eval_nli-pairs_steps_per_second": 1.106, + "eval_sts-test_pearson_cosine": 0.7880147168961996, + "eval_sts-test_pearson_dot": 0.5198107156003906, + "eval_sts-test_pearson_euclidean": 0.7362840264051249, + "eval_sts-test_pearson_manhattan": 0.7307716823389564, + "eval_sts-test_pearson_max": 0.7880147168961996, + "eval_sts-test_spearman_cosine": 0.8071394355093185, + "eval_sts-test_spearman_dot": 0.49865317522814645, + "eval_sts-test_spearman_euclidean": 0.7278395467197664, + "eval_sts-test_spearman_manhattan": 0.7246934378777047, + "eval_sts-test_spearman_max": 0.8071394355093185, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_vitaminc-pairs_loss": 4.717629432678223, + "eval_vitaminc-pairs_runtime": 1.1248, + "eval_vitaminc-pairs_samples_per_second": 75.571, + "eval_vitaminc-pairs_steps_per_second": 2.667, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_sts-label_loss": 3.7598328590393066, + "eval_sts-label_runtime": 0.2743, + "eval_sts-label_samples_per_second": 364.548, + "eval_sts-label_steps_per_second": 14.582, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_qnli-contrastive_loss": 0.11829647421836853, + "eval_qnli-contrastive_runtime": 0.3606, + "eval_qnli-contrastive_samples_per_second": 277.334, + "eval_qnli-contrastive_steps_per_second": 11.093, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_scitail-pairs-qa_loss": 0.05503571406006813, + "eval_scitail-pairs-qa_runtime": 0.874, + "eval_scitail-pairs-qa_samples_per_second": 114.411, + "eval_scitail-pairs-qa_steps_per_second": 4.576, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_scitail-pairs-pos_loss": 0.47530597448349, + "eval_scitail-pairs-pos_runtime": 1.3429, + "eval_scitail-pairs-pos_samples_per_second": 74.463, + "eval_scitail-pairs-pos_steps_per_second": 2.979, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_xsum-pairs_loss": 0.22936196625232697, + "eval_xsum-pairs_runtime": 0.9431, + "eval_xsum-pairs_samples_per_second": 106.028, + "eval_xsum-pairs_steps_per_second": 4.241, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_compression-pairs_loss": 0.08313465863466263, + "eval_compression-pairs_runtime": 0.2781, + "eval_compression-pairs_samples_per_second": 359.542, + "eval_compression-pairs_steps_per_second": 14.382, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_sciq_pairs_loss": 0.27646955847740173, + "eval_sciq_pairs_runtime": 4.0554, + "eval_sciq_pairs_samples_per_second": 24.658, + "eval_sciq_pairs_steps_per_second": 0.986, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_qasc_pairs_loss": 0.17006540298461914, + "eval_qasc_pairs_runtime": 1.0538, + "eval_qasc_pairs_samples_per_second": 94.898, + "eval_qasc_pairs_steps_per_second": 3.796, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_openbookqa_pairs_loss": 1.5487664937973022, + "eval_openbookqa_pairs_runtime": 0.8956, + "eval_openbookqa_pairs_samples_per_second": 111.653, + "eval_openbookqa_pairs_steps_per_second": 4.466, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_msmarco_pairs_loss": 0.4861982464790344, + "eval_msmarco_pairs_runtime": 2.0548, + "eval_msmarco_pairs_samples_per_second": 48.666, + "eval_msmarco_pairs_steps_per_second": 1.947, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_nq_pairs_loss": 0.22520922124385834, + "eval_nq_pairs_runtime": 4.4973, + "eval_nq_pairs_samples_per_second": 22.236, + "eval_nq_pairs_steps_per_second": 0.889, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_trivia_pairs_loss": 0.7480303049087524, + "eval_trivia_pairs_runtime": 6.498, + "eval_trivia_pairs_samples_per_second": 15.389, + "eval_trivia_pairs_steps_per_second": 0.616, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_quora_pairs_loss": 0.06060533598065376, + "eval_quora_pairs_runtime": 0.6722, + "eval_quora_pairs_samples_per_second": 148.76, + "eval_quora_pairs_steps_per_second": 5.95, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_gooaq_pairs_loss": 0.4696855843067169, + "eval_gooaq_pairs_runtime": 1.3985, + "eval_gooaq_pairs_samples_per_second": 71.503, + "eval_gooaq_pairs_steps_per_second": 2.86, + "step": 5760 + }, + { + "epoch": 0.9026798307475318, + "eval_mrpc_pairs_loss": 0.04175671190023422, + "eval_mrpc_pairs_runtime": 0.2618, + "eval_mrpc_pairs_samples_per_second": 381.956, + "eval_mrpc_pairs_steps_per_second": 15.278, + "step": 5760 + }, + { + "epoch": 0.9076947186961292, + "grad_norm": 19.970914840698242, + "learning_rate": 2.3808045960365743e-06, + "loss": 0.6346, + "step": 5792 + }, + { + "epoch": 0.9127096066447266, + "grad_norm": 7.2970075607299805, + "learning_rate": 2.2445182249778363e-06, + "loss": 1.1103, + "step": 5824 + }, + { + "epoch": 0.917724494593324, + "grad_norm": 14.34080982208252, + "learning_rate": 2.1119325861102666e-06, + "loss": 0.7667, + "step": 5856 + }, + { + "epoch": 0.9227393825419213, + "grad_norm": 16.219850540161133, + "learning_rate": 1.98308614634171e-06, + "loss": 0.9174, + "step": 5888 + }, + { + "epoch": 0.9277542704905187, + "grad_norm": 17.201740264892578, + "learning_rate": 1.8580162877307744e-06, + "loss": 0.7609, + "step": 5920 + }, + { + "epoch": 0.9327691584391161, + "grad_norm": 12.591241836547852, + "learning_rate": 1.7367592966412454e-06, + "loss": 0.8993, + "step": 5952 + }, + { + "epoch": 0.9377840463877135, + "grad_norm": 17.12389373779297, + "learning_rate": 1.619350353214355e-06, + "loss": 0.7587, + "step": 5984 + }, + { + "epoch": 0.9427989343363109, + "grad_norm": 44.237342834472656, + "learning_rate": 1.5058235211620126e-06, + "loss": 0.935, + "step": 6016 + }, + { + "epoch": 0.9478138222849083, + "grad_norm": 4.658092975616455, + "learning_rate": 1.3962117378839439e-06, + "loss": 0.8551, + "step": 6048 + }, + { + "epoch": 0.9528287102335057, + "grad_norm": 0.4202437698841095, + "learning_rate": 1.2905468049116077e-06, + "loss": 1.4247, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_nli-pairs_loss": 0.880797266960144, + "eval_nli-pairs_runtime": 3.65, + "eval_nli-pairs_samples_per_second": 27.397, + "eval_nli-pairs_steps_per_second": 1.096, + "eval_sts-test_pearson_cosine": 0.7886384880168056, + "eval_sts-test_pearson_dot": 0.5209320238457065, + "eval_sts-test_pearson_euclidean": 0.7365619856047663, + "eval_sts-test_pearson_manhattan": 0.7309874377904119, + "eval_sts-test_pearson_max": 0.7886384880168056, + "eval_sts-test_spearman_cosine": 0.8078306606920327, + "eval_sts-test_spearman_dot": 0.4995671547413244, + "eval_sts-test_spearman_euclidean": 0.7281379887760366, + "eval_sts-test_spearman_manhattan": 0.7249545388844193, + "eval_sts-test_spearman_max": 0.8078306606920327, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_vitaminc-pairs_loss": 4.70750617980957, + "eval_vitaminc-pairs_runtime": 1.1372, + "eval_vitaminc-pairs_samples_per_second": 74.747, + "eval_vitaminc-pairs_steps_per_second": 2.638, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_sts-label_loss": 3.7686922550201416, + "eval_sts-label_runtime": 0.2807, + "eval_sts-label_samples_per_second": 356.243, + "eval_sts-label_steps_per_second": 14.25, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_qnli-contrastive_loss": 0.12000326067209244, + "eval_qnli-contrastive_runtime": 0.3651, + "eval_qnli-contrastive_samples_per_second": 273.878, + "eval_qnli-contrastive_steps_per_second": 10.955, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_scitail-pairs-qa_loss": 0.055266913026571274, + "eval_scitail-pairs-qa_runtime": 0.8813, + "eval_scitail-pairs-qa_samples_per_second": 113.472, + "eval_scitail-pairs-qa_steps_per_second": 4.539, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_scitail-pairs-pos_loss": 0.46404972672462463, + "eval_scitail-pairs-pos_runtime": 1.3468, + "eval_scitail-pairs-pos_samples_per_second": 74.248, + "eval_scitail-pairs-pos_steps_per_second": 2.97, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_xsum-pairs_loss": 0.22768865525722504, + "eval_xsum-pairs_runtime": 0.9385, + "eval_xsum-pairs_samples_per_second": 106.553, + "eval_xsum-pairs_steps_per_second": 4.262, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_compression-pairs_loss": 0.08245458453893661, + "eval_compression-pairs_runtime": 0.2783, + "eval_compression-pairs_samples_per_second": 359.331, + "eval_compression-pairs_steps_per_second": 14.373, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_sciq_pairs_loss": 0.24696679413318634, + "eval_sciq_pairs_runtime": 4.072, + "eval_sciq_pairs_samples_per_second": 24.558, + "eval_sciq_pairs_steps_per_second": 0.982, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_qasc_pairs_loss": 0.16628116369247437, + "eval_qasc_pairs_runtime": 1.066, + "eval_qasc_pairs_samples_per_second": 93.809, + "eval_qasc_pairs_steps_per_second": 3.752, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_openbookqa_pairs_loss": 1.5343760251998901, + "eval_openbookqa_pairs_runtime": 0.9064, + "eval_openbookqa_pairs_samples_per_second": 110.324, + "eval_openbookqa_pairs_steps_per_second": 4.413, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_msmarco_pairs_loss": 0.48861968517303467, + "eval_msmarco_pairs_runtime": 2.0777, + "eval_msmarco_pairs_samples_per_second": 48.131, + "eval_msmarco_pairs_steps_per_second": 1.925, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_nq_pairs_loss": 0.2192871868610382, + "eval_nq_pairs_runtime": 4.5629, + "eval_nq_pairs_samples_per_second": 21.916, + "eval_nq_pairs_steps_per_second": 0.877, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_trivia_pairs_loss": 0.7455114126205444, + "eval_trivia_pairs_runtime": 6.4434, + "eval_trivia_pairs_samples_per_second": 15.52, + "eval_trivia_pairs_steps_per_second": 0.621, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_quora_pairs_loss": 0.0536942183971405, + "eval_quora_pairs_runtime": 0.6874, + "eval_quora_pairs_samples_per_second": 145.481, + "eval_quora_pairs_steps_per_second": 5.819, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_gooaq_pairs_loss": 0.4775075614452362, + "eval_gooaq_pairs_runtime": 1.3946, + "eval_gooaq_pairs_samples_per_second": 71.707, + "eval_gooaq_pairs_steps_per_second": 2.868, + "step": 6080 + }, + { + "epoch": 0.9528287102335057, + "eval_mrpc_pairs_loss": 0.041804660111665726, + "eval_mrpc_pairs_runtime": 0.2631, + "eval_mrpc_pairs_samples_per_second": 380.035, + "eval_mrpc_pairs_steps_per_second": 15.201, + "step": 6080 + }, + { + "epoch": 0.9578435981821031, + "grad_norm": 15.8797607421875, + "learning_rate": 1.1888593786816527e-06, + "loss": 0.3377, + "step": 6112 + }, + { + "epoch": 0.9628584861307005, + "grad_norm": 54.2625732421875, + "learning_rate": 1.0911789616415957e-06, + "loss": 1.163, + "step": 6144 + }, + { + "epoch": 0.967873374079298, + "grad_norm": 27.014169692993164, + "learning_rate": 9.975338936903327e-07, + "loss": 1.1638, + "step": 6176 + }, + { + "epoch": 0.9728882620278954, + "grad_norm": 12.264323234558105, + "learning_rate": 9.079513439558945e-07, + "loss": 0.7428, + "step": 6208 + }, + { + "epoch": 0.9779031499764927, + "grad_norm": 0.2486962229013443, + "learning_rate": 8.224573029129201e-07, + "loss": 0.3827, + "step": 6240 + }, + { + "epoch": 0.9829180379250901, + "grad_norm": 0.19951488077640533, + "learning_rate": 7.41076574842064e-07, + "loss": 1.0739, + "step": 6272 + }, + { + "epoch": 0.9879329258736875, + "grad_norm": 1.6168636083602905, + "learning_rate": 6.638327706335673e-07, + "loss": 0.7049, + "step": 6304 + }, + { + "epoch": 0.9929478138222849, + "grad_norm": 1.4084432125091553, + "learning_rate": 5.907483009370463e-07, + "loss": 0.9298, + "step": 6336 + }, + { + "epoch": 0.9979627017708823, + "grad_norm": 0.7779116630554199, + "learning_rate": 5.218443696595343e-07, + "loss": 0.6243, + "step": 6368 + }, + { + "epoch": 1.0029775897194797, + "grad_norm": 10.389066696166992, + "learning_rate": 4.5714096781360346e-07, + "loss": 0.8693, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_nli-pairs_loss": 0.8764966726303101, + "eval_nli-pairs_runtime": 3.9731, + "eval_nli-pairs_samples_per_second": 25.169, + "eval_nli-pairs_steps_per_second": 1.007, + "eval_sts-test_pearson_cosine": 0.7883389668315285, + "eval_sts-test_pearson_dot": 0.517346671859764, + "eval_sts-test_pearson_euclidean": 0.7353164199200737, + "eval_sts-test_pearson_manhattan": 0.7297049415657237, + "eval_sts-test_pearson_max": 0.7883389668315285, + "eval_sts-test_spearman_cosine": 0.8072800949662179, + "eval_sts-test_spearman_dot": 0.4963365732568842, + "eval_sts-test_spearman_euclidean": 0.7268218204343426, + "eval_sts-test_spearman_manhattan": 0.7238000634035274, + "eval_sts-test_spearman_max": 0.8072800949662179, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_vitaminc-pairs_loss": 4.692606449127197, + "eval_vitaminc-pairs_runtime": 1.1964, + "eval_vitaminc-pairs_samples_per_second": 71.046, + "eval_vitaminc-pairs_steps_per_second": 2.508, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_sts-label_loss": 3.7494537830352783, + "eval_sts-label_runtime": 0.2884, + "eval_sts-label_samples_per_second": 346.773, + "eval_sts-label_steps_per_second": 13.871, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_qnli-contrastive_loss": 0.11221926659345627, + "eval_qnli-contrastive_runtime": 0.366, + "eval_qnli-contrastive_samples_per_second": 273.23, + "eval_qnli-contrastive_steps_per_second": 10.929, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_scitail-pairs-qa_loss": 0.05439920350909233, + "eval_scitail-pairs-qa_runtime": 1.0826, + "eval_scitail-pairs-qa_samples_per_second": 92.37, + "eval_scitail-pairs-qa_steps_per_second": 3.695, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_scitail-pairs-pos_loss": 0.47426754236221313, + "eval_scitail-pairs-pos_runtime": 1.4478, + "eval_scitail-pairs-pos_samples_per_second": 69.07, + "eval_scitail-pairs-pos_steps_per_second": 2.763, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_xsum-pairs_loss": 0.22696803510189056, + "eval_xsum-pairs_runtime": 0.9498, + "eval_xsum-pairs_samples_per_second": 105.287, + "eval_xsum-pairs_steps_per_second": 4.211, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_compression-pairs_loss": 0.08134880661964417, + "eval_compression-pairs_runtime": 0.2978, + "eval_compression-pairs_samples_per_second": 335.83, + "eval_compression-pairs_steps_per_second": 13.433, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_sciq_pairs_loss": 0.22929410636425018, + "eval_sciq_pairs_runtime": 4.3229, + "eval_sciq_pairs_samples_per_second": 23.132, + "eval_sciq_pairs_steps_per_second": 0.925, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_qasc_pairs_loss": 0.16514292359352112, + "eval_qasc_pairs_runtime": 1.1535, + "eval_qasc_pairs_samples_per_second": 86.694, + "eval_qasc_pairs_steps_per_second": 3.468, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_openbookqa_pairs_loss": 1.5505836009979248, + "eval_openbookqa_pairs_runtime": 0.9784, + "eval_openbookqa_pairs_samples_per_second": 102.21, + "eval_openbookqa_pairs_steps_per_second": 4.088, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_msmarco_pairs_loss": 0.48988625407218933, + "eval_msmarco_pairs_runtime": 2.1515, + "eval_msmarco_pairs_samples_per_second": 46.48, + "eval_msmarco_pairs_steps_per_second": 1.859, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_nq_pairs_loss": 0.21817754209041595, + "eval_nq_pairs_runtime": 4.6579, + "eval_nq_pairs_samples_per_second": 21.469, + "eval_nq_pairs_steps_per_second": 0.859, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_trivia_pairs_loss": 0.7522485852241516, + "eval_trivia_pairs_runtime": 6.6903, + "eval_trivia_pairs_samples_per_second": 14.947, + "eval_trivia_pairs_steps_per_second": 0.598, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_quora_pairs_loss": 0.026629021391272545, + "eval_quora_pairs_runtime": 0.7757, + "eval_quora_pairs_samples_per_second": 128.912, + "eval_quora_pairs_steps_per_second": 5.156, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_gooaq_pairs_loss": 0.47202804684638977, + "eval_gooaq_pairs_runtime": 1.5337, + "eval_gooaq_pairs_samples_per_second": 65.202, + "eval_gooaq_pairs_steps_per_second": 2.608, + "step": 6400 + }, + { + "epoch": 1.0029775897194797, + "eval_mrpc_pairs_loss": 0.041211605072021484, + "eval_mrpc_pairs_runtime": 0.2796, + "eval_mrpc_pairs_samples_per_second": 357.626, + "eval_mrpc_pairs_steps_per_second": 14.305, + "step": 6400 + }, + { + "epoch": 1.007992477668077, + "grad_norm": 0.12192127108573914, + "learning_rate": 3.9665686771741374e-07, + "loss": 0.731, + "step": 6432 + }, + { + "epoch": 1.0130073656166745, + "grad_norm": 4.465780258178711, + "learning_rate": 3.404096175483029e-07, + "loss": 0.7662, + "step": 6464 + }, + { + "epoch": 1.0180222535652719, + "grad_norm": 19.539562225341797, + "learning_rate": 2.8841553625157116e-07, + "loss": 0.5362, + "step": 6496 + }, + { + "epoch": 1.0230371415138693, + "grad_norm": 1.8675719499588013, + "learning_rate": 2.406897088058863e-07, + "loss": 0.9786, + "step": 6528 + }, + { + "epoch": 1.0280520294624667, + "grad_norm": 1.5663179159164429, + "learning_rate": 1.9724598184667987e-07, + "loss": 0.9213, + "step": 6560 + }, + { + "epoch": 1.033066917411064, + "grad_norm": 1.0503817796707153, + "learning_rate": 1.580969596488624e-07, + "loss": 0.7601, + "step": 6592 + }, + { + "epoch": 1.0380818053596614, + "grad_norm": 1.7467032670974731, + "learning_rate": 1.2325400046994672e-07, + "loss": 0.4821, + "step": 6624 + }, + { + "epoch": 1.0430966933082588, + "grad_norm": 0.5685003399848938, + "learning_rate": 9.272721325469414e-08, + "loss": 0.73, + "step": 6656 + }, + { + "epoch": 1.0481115812568562, + "grad_norm": 0.16832184791564941, + "learning_rate": 6.652545470221705e-08, + "loss": 0.4139, + "step": 6688 + }, + { + "epoch": 1.0531264692054536, + "grad_norm": 17.248783111572266, + "learning_rate": 4.465632669640285e-08, + "loss": 0.5152, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_nli-pairs_loss": 0.8771082162857056, + "eval_nli-pairs_runtime": 3.6864, + "eval_nli-pairs_samples_per_second": 27.127, + "eval_nli-pairs_steps_per_second": 1.085, + "eval_sts-test_pearson_cosine": 0.7895199953969396, + "eval_sts-test_pearson_dot": 0.5189310649741209, + "eval_sts-test_pearson_euclidean": 0.7358975444358454, + "eval_sts-test_pearson_manhattan": 0.7303294470043906, + "eval_sts-test_pearson_max": 0.7895199953969396, + "eval_sts-test_spearman_cosine": 0.8080710925195471, + "eval_sts-test_spearman_dot": 0.49813617315229736, + "eval_sts-test_spearman_euclidean": 0.727349183443088, + "eval_sts-test_spearman_manhattan": 0.7243520585394965, + "eval_sts-test_spearman_max": 0.8080710925195471, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_vitaminc-pairs_loss": 4.680215358734131, + "eval_vitaminc-pairs_runtime": 1.1767, + "eval_vitaminc-pairs_samples_per_second": 72.234, + "eval_vitaminc-pairs_steps_per_second": 2.549, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_sts-label_loss": 3.747551202774048, + "eval_sts-label_runtime": 0.2756, + "eval_sts-label_samples_per_second": 362.89, + "eval_sts-label_steps_per_second": 14.516, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_qnli-contrastive_loss": 0.11317223310470581, + "eval_qnli-contrastive_runtime": 0.362, + "eval_qnli-contrastive_samples_per_second": 276.263, + "eval_qnli-contrastive_steps_per_second": 11.051, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_scitail-pairs-qa_loss": 0.05494887754321098, + "eval_scitail-pairs-qa_runtime": 0.8771, + "eval_scitail-pairs-qa_samples_per_second": 114.01, + "eval_scitail-pairs-qa_steps_per_second": 4.56, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_scitail-pairs-pos_loss": 0.46942538022994995, + "eval_scitail-pairs-pos_runtime": 1.3418, + "eval_scitail-pairs-pos_samples_per_second": 74.527, + "eval_scitail-pairs-pos_steps_per_second": 2.981, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_xsum-pairs_loss": 0.22760838270187378, + "eval_xsum-pairs_runtime": 0.9366, + "eval_xsum-pairs_samples_per_second": 106.764, + "eval_xsum-pairs_steps_per_second": 4.271, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_compression-pairs_loss": 0.081705242395401, + "eval_compression-pairs_runtime": 0.2786, + "eval_compression-pairs_samples_per_second": 358.908, + "eval_compression-pairs_steps_per_second": 14.356, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_sciq_pairs_loss": 0.22932235896587372, + "eval_sciq_pairs_runtime": 4.0839, + "eval_sciq_pairs_samples_per_second": 24.486, + "eval_sciq_pairs_steps_per_second": 0.979, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_qasc_pairs_loss": 0.1658654361963272, + "eval_qasc_pairs_runtime": 1.0521, + "eval_qasc_pairs_samples_per_second": 95.048, + "eval_qasc_pairs_steps_per_second": 3.802, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_openbookqa_pairs_loss": 1.5459561347961426, + "eval_openbookqa_pairs_runtime": 0.8996, + "eval_openbookqa_pairs_samples_per_second": 111.162, + "eval_openbookqa_pairs_steps_per_second": 4.446, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_msmarco_pairs_loss": 0.49013325572013855, + "eval_msmarco_pairs_runtime": 2.0531, + "eval_msmarco_pairs_samples_per_second": 48.707, + "eval_msmarco_pairs_steps_per_second": 1.948, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_nq_pairs_loss": 0.22049441933631897, + "eval_nq_pairs_runtime": 4.5149, + "eval_nq_pairs_samples_per_second": 22.149, + "eval_nq_pairs_steps_per_second": 0.886, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_trivia_pairs_loss": 0.7513056397438049, + "eval_trivia_pairs_runtime": 6.4705, + "eval_trivia_pairs_samples_per_second": 15.455, + "eval_trivia_pairs_steps_per_second": 0.618, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_quora_pairs_loss": 0.024981992319226265, + "eval_quora_pairs_runtime": 0.6855, + "eval_quora_pairs_samples_per_second": 145.879, + "eval_quora_pairs_steps_per_second": 5.835, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_gooaq_pairs_loss": 0.47234511375427246, + "eval_gooaq_pairs_runtime": 1.4025, + "eval_gooaq_pairs_samples_per_second": 71.304, + "eval_gooaq_pairs_steps_per_second": 2.852, + "step": 6720 + }, + { + "epoch": 1.0531264692054536, + "eval_mrpc_pairs_loss": 0.04154253005981445, + "eval_mrpc_pairs_runtime": 0.2618, + "eval_mrpc_pairs_samples_per_second": 382.036, + "eval_mrpc_pairs_steps_per_second": 15.281, + "step": 6720 + }, + { + "epoch": 1.058141357154051, + "grad_norm": 14.043108940124512, + "learning_rate": 2.7126174100376432e-08, + "loss": 0.4684, + "step": 6752 + }, + { + "epoch": 1.0631562451026484, + "grad_norm": 0.5513893365859985, + "learning_rate": 1.3940082915687713e-08, + "loss": 0.445, + "step": 6784 + }, + { + "epoch": 1.068171133051246, + "grad_norm": 7.036909580230713, + "learning_rate": 5.101878806703652e-09, + "loss": 0.4288, + "step": 6816 + }, + { + "epoch": 1.0731860209998434, + "grad_norm": 0.2966393828392029, + "learning_rate": 6.141259906761176e-10, + "loss": 0.3797, + "step": 6848 + }, + { + "epoch": 1.0782009089484408, + "grad_norm": 9.721883773803711, + "learning_rate": 2.9999521873506204e-05, + "loss": 0.4304, + "step": 6880 + }, + { + "epoch": 1.0832157968970382, + "grad_norm": 2.1523923873901367, + "learning_rate": 2.9995306080226573e-05, + "loss": 0.8562, + "step": 6912 + }, + { + "epoch": 1.0882306848456356, + "grad_norm": 12.939388275146484, + "learning_rate": 2.9986739717293326e-05, + "loss": 0.4902, + "step": 6944 + }, + { + "epoch": 1.093245572794233, + "grad_norm": 0.37949275970458984, + "learning_rate": 2.9973825270054784e-05, + "loss": 0.4285, + "step": 6976 + }, + { + "epoch": 1.0982604607428303, + "grad_norm": 2.427003860473633, + "learning_rate": 2.995656648536359e-05, + "loss": 0.4782, + "step": 7008 + }, + { + "epoch": 1.1032753486914277, + "grad_norm": 10.36500072479248, + "learning_rate": 2.9934968370489646e-05, + "loss": 0.7503, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_nli-pairs_loss": 0.9263110160827637, + "eval_nli-pairs_runtime": 3.6445, + "eval_nli-pairs_samples_per_second": 27.439, + "eval_nli-pairs_steps_per_second": 1.098, + "eval_sts-test_pearson_cosine": 0.7937369016852821, + "eval_sts-test_pearson_dot": 0.5273705048333348, + "eval_sts-test_pearson_euclidean": 0.7373368406202081, + "eval_sts-test_pearson_manhattan": 0.7318756816157863, + "eval_sts-test_pearson_max": 0.7937369016852821, + "eval_sts-test_spearman_cosine": 0.810858247608813, + "eval_sts-test_spearman_dot": 0.508640420451459, + "eval_sts-test_spearman_euclidean": 0.73158962258494, + "eval_sts-test_spearman_manhattan": 0.7284434977078286, + "eval_sts-test_spearman_max": 0.810858247608813, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_vitaminc-pairs_loss": 4.521730422973633, + "eval_vitaminc-pairs_runtime": 1.1248, + "eval_vitaminc-pairs_samples_per_second": 75.569, + "eval_vitaminc-pairs_steps_per_second": 2.667, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_sts-label_loss": 3.8786072731018066, + "eval_sts-label_runtime": 0.2698, + "eval_sts-label_samples_per_second": 370.602, + "eval_sts-label_steps_per_second": 14.824, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_qnli-contrastive_loss": 0.1796300858259201, + "eval_qnli-contrastive_runtime": 0.3573, + "eval_qnli-contrastive_samples_per_second": 279.916, + "eval_qnli-contrastive_steps_per_second": 11.197, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_scitail-pairs-qa_loss": 0.06360480934381485, + "eval_scitail-pairs-qa_runtime": 0.8855, + "eval_scitail-pairs-qa_samples_per_second": 112.93, + "eval_scitail-pairs-qa_steps_per_second": 4.517, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_scitail-pairs-pos_loss": 0.5473235249519348, + "eval_scitail-pairs-pos_runtime": 1.3255, + "eval_scitail-pairs-pos_samples_per_second": 75.446, + "eval_scitail-pairs-pos_steps_per_second": 3.018, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_xsum-pairs_loss": 0.24051249027252197, + "eval_xsum-pairs_runtime": 0.9384, + "eval_xsum-pairs_samples_per_second": 106.567, + "eval_xsum-pairs_steps_per_second": 4.263, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_compression-pairs_loss": 0.0928964912891388, + "eval_compression-pairs_runtime": 0.2778, + "eval_compression-pairs_samples_per_second": 359.983, + "eval_compression-pairs_steps_per_second": 14.399, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_sciq_pairs_loss": 0.28897982835769653, + "eval_sciq_pairs_runtime": 4.1339, + "eval_sciq_pairs_samples_per_second": 24.19, + "eval_sciq_pairs_steps_per_second": 0.968, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_qasc_pairs_loss": 0.1793307065963745, + "eval_qasc_pairs_runtime": 1.0598, + "eval_qasc_pairs_samples_per_second": 94.357, + "eval_qasc_pairs_steps_per_second": 3.774, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_openbookqa_pairs_loss": 1.7123816013336182, + "eval_openbookqa_pairs_runtime": 0.8946, + "eval_openbookqa_pairs_samples_per_second": 111.784, + "eval_openbookqa_pairs_steps_per_second": 4.471, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_msmarco_pairs_loss": 0.4797554016113281, + "eval_msmarco_pairs_runtime": 2.0659, + "eval_msmarco_pairs_samples_per_second": 48.405, + "eval_msmarco_pairs_steps_per_second": 1.936, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_nq_pairs_loss": 0.2459176480770111, + "eval_nq_pairs_runtime": 4.5081, + "eval_nq_pairs_samples_per_second": 22.182, + "eval_nq_pairs_steps_per_second": 0.887, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_trivia_pairs_loss": 0.9698570966720581, + "eval_trivia_pairs_runtime": 6.4733, + "eval_trivia_pairs_samples_per_second": 15.448, + "eval_trivia_pairs_steps_per_second": 0.618, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_quora_pairs_loss": 0.03161533921957016, + "eval_quora_pairs_runtime": 0.6866, + "eval_quora_pairs_samples_per_second": 145.647, + "eval_quora_pairs_steps_per_second": 5.826, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_gooaq_pairs_loss": 0.5500377416610718, + "eval_gooaq_pairs_runtime": 1.4051, + "eval_gooaq_pairs_samples_per_second": 71.17, + "eval_gooaq_pairs_steps_per_second": 2.847, + "step": 7040 + }, + { + "epoch": 1.1032753486914277, + "eval_mrpc_pairs_loss": 0.04846707731485367, + "eval_mrpc_pairs_runtime": 0.2627, + "eval_mrpc_pairs_samples_per_second": 380.671, + "eval_mrpc_pairs_steps_per_second": 15.227, + "step": 7040 + }, + { + "epoch": 1.1082902366400251, + "grad_norm": 10.539325714111328, + "learning_rate": 2.9909037191667383e-05, + "loss": 1.0828, + "step": 7072 + }, + { + "epoch": 1.1133051245886225, + "grad_norm": 14.641651153564453, + "learning_rate": 2.987878047227772e-05, + "loss": 0.6206, + "step": 7104 + }, + { + "epoch": 1.11832001253722, + "grad_norm": 12.57785415649414, + "learning_rate": 2.9844206990665325e-05, + "loss": 0.8111, + "step": 7136 + }, + { + "epoch": 1.1233349004858173, + "grad_norm": 6.1240129470825195, + "learning_rate": 2.980532677759177e-05, + "loss": 0.49, + "step": 7168 + }, + { + "epoch": 1.1283497884344147, + "grad_norm": 8.179468154907227, + "learning_rate": 2.97621511133253e-05, + "loss": 0.5289, + "step": 7200 + }, + { + "epoch": 1.133364676383012, + "grad_norm": 13.069085121154785, + "learning_rate": 2.971469252436813e-05, + "loss": 0.2983, + "step": 7232 + }, + { + "epoch": 1.1383795643316095, + "grad_norm": 11.689116477966309, + "learning_rate": 2.9662964779822125e-05, + "loss": 0.5183, + "step": 7264 + }, + { + "epoch": 1.1433944522802069, + "grad_norm": 6.402202606201172, + "learning_rate": 2.9606982887393993e-05, + "loss": 0.3254, + "step": 7296 + }, + { + "epoch": 1.1484093402288043, + "grad_norm": 17.79107093811035, + "learning_rate": 2.9546763089041115e-05, + "loss": 0.5142, + "step": 7328 + }, + { + "epoch": 1.1534242281774016, + "grad_norm": 3.3558926582336426, + "learning_rate": 2.9482322856259305e-05, + "loss": 0.5605, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_nli-pairs_loss": 1.034800410270691, + "eval_nli-pairs_runtime": 3.6881, + "eval_nli-pairs_samples_per_second": 27.114, + "eval_nli-pairs_steps_per_second": 1.085, + "eval_sts-test_pearson_cosine": 0.7910968553972442, + "eval_sts-test_pearson_dot": 0.5191989002837457, + "eval_sts-test_pearson_euclidean": 0.7346238729069505, + "eval_sts-test_pearson_manhattan": 0.7286075410186882, + "eval_sts-test_pearson_max": 0.7910968553972442, + "eval_sts-test_spearman_cosine": 0.8066961580110351, + "eval_sts-test_spearman_dot": 0.5084443140830514, + "eval_sts-test_spearman_euclidean": 0.72712818838666, + "eval_sts-test_spearman_manhattan": 0.7230020447891047, + "eval_sts-test_spearman_max": 0.8066961580110351, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_vitaminc-pairs_loss": 4.861147403717041, + "eval_vitaminc-pairs_runtime": 1.2006, + "eval_vitaminc-pairs_samples_per_second": 70.796, + "eval_vitaminc-pairs_steps_per_second": 2.499, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_sts-label_loss": 3.832930326461792, + "eval_sts-label_runtime": 0.2878, + "eval_sts-label_samples_per_second": 347.487, + "eval_sts-label_steps_per_second": 13.899, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_qnli-contrastive_loss": 0.20628628134727478, + "eval_qnli-contrastive_runtime": 0.3622, + "eval_qnli-contrastive_samples_per_second": 276.06, + "eval_qnli-contrastive_steps_per_second": 11.042, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_scitail-pairs-qa_loss": 0.06246212124824524, + "eval_scitail-pairs-qa_runtime": 0.9341, + "eval_scitail-pairs-qa_samples_per_second": 107.06, + "eval_scitail-pairs-qa_steps_per_second": 4.282, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_scitail-pairs-pos_loss": 0.4741693437099457, + "eval_scitail-pairs-pos_runtime": 1.6197, + "eval_scitail-pairs-pos_samples_per_second": 61.738, + "eval_scitail-pairs-pos_steps_per_second": 2.47, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_xsum-pairs_loss": 0.23739749193191528, + "eval_xsum-pairs_runtime": 0.9463, + "eval_xsum-pairs_samples_per_second": 105.68, + "eval_xsum-pairs_steps_per_second": 4.227, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_compression-pairs_loss": 0.09255027025938034, + "eval_compression-pairs_runtime": 0.2828, + "eval_compression-pairs_samples_per_second": 353.649, + "eval_compression-pairs_steps_per_second": 14.146, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_sciq_pairs_loss": 0.2770608365535736, + "eval_sciq_pairs_runtime": 4.1267, + "eval_sciq_pairs_samples_per_second": 24.232, + "eval_sciq_pairs_steps_per_second": 0.969, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_qasc_pairs_loss": 0.18835808336734772, + "eval_qasc_pairs_runtime": 1.0608, + "eval_qasc_pairs_samples_per_second": 94.272, + "eval_qasc_pairs_steps_per_second": 3.771, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_openbookqa_pairs_loss": 1.8109256029129028, + "eval_openbookqa_pairs_runtime": 0.9025, + "eval_openbookqa_pairs_samples_per_second": 110.805, + "eval_openbookqa_pairs_steps_per_second": 4.432, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_msmarco_pairs_loss": 0.5193920731544495, + "eval_msmarco_pairs_runtime": 2.1117, + "eval_msmarco_pairs_samples_per_second": 47.354, + "eval_msmarco_pairs_steps_per_second": 1.894, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_nq_pairs_loss": 0.2714031934738159, + "eval_nq_pairs_runtime": 4.5373, + "eval_nq_pairs_samples_per_second": 22.04, + "eval_nq_pairs_steps_per_second": 0.882, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_trivia_pairs_loss": 0.939833402633667, + "eval_trivia_pairs_runtime": 6.4956, + "eval_trivia_pairs_samples_per_second": 15.395, + "eval_trivia_pairs_steps_per_second": 0.616, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_quora_pairs_loss": 0.058685559779405594, + "eval_quora_pairs_runtime": 0.6769, + "eval_quora_pairs_samples_per_second": 147.738, + "eval_quora_pairs_steps_per_second": 5.91, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_gooaq_pairs_loss": 0.6361711621284485, + "eval_gooaq_pairs_runtime": 1.435, + "eval_gooaq_pairs_samples_per_second": 69.685, + "eval_gooaq_pairs_steps_per_second": 2.787, + "step": 7360 + }, + { + "epoch": 1.1534242281774016, + "eval_mrpc_pairs_loss": 0.047355230897665024, + "eval_mrpc_pairs_runtime": 0.2779, + "eval_mrpc_pairs_samples_per_second": 359.791, + "eval_mrpc_pairs_steps_per_second": 14.392, + "step": 7360 + }, + { + "epoch": 1.158439116125999, + "grad_norm": 0.946281909942627, + "learning_rate": 2.9413680885013797e-05, + "loss": 0.6993, + "step": 7392 + }, + { + "epoch": 1.1634540040745964, + "grad_norm": 7.1736626625061035, + "learning_rate": 2.9340857090315025e-05, + "loss": 0.3437, + "step": 7424 + }, + { + "epoch": 1.1684688920231938, + "grad_norm": 0.19313736259937286, + "learning_rate": 2.9263872600440707e-05, + "loss": 0.3281, + "step": 7456 + }, + { + "epoch": 1.1734837799717912, + "grad_norm": 12.984513282775879, + "learning_rate": 2.9182749750805903e-05, + "loss": 1.0286, + "step": 7488 + }, + { + "epoch": 1.1784986679203886, + "grad_norm": 0.5984382033348083, + "learning_rate": 2.9097512077482918e-05, + "loss": 0.6668, + "step": 7520 + }, + { + "epoch": 1.183513555868986, + "grad_norm": 4.237669944763184, + "learning_rate": 2.9008184310372744e-05, + "loss": 0.3861, + "step": 7552 + }, + { + "epoch": 1.1885284438175834, + "grad_norm": 0.4000037610530853, + "learning_rate": 2.891479236603025e-05, + "loss": 0.4096, + "step": 7584 + }, + { + "epoch": 1.1935433317661808, + "grad_norm": 13.399718284606934, + "learning_rate": 2.8817363340145038e-05, + "loss": 0.5836, + "step": 7616 + }, + { + "epoch": 1.1985582197147782, + "grad_norm": 1.461013913154602, + "learning_rate": 2.8715925499680188e-05, + "loss": 0.2649, + "step": 7648 + } + ], + "logging_steps": 32, + "max_steps": 12762, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1277, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}