{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 320, "global_step": 12762, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0050148879485973985, "grad_norm": 14.771158218383789, "learning_rate": 9.707724425887265e-07, "loss": 0.6329, "step": 32 }, { "epoch": 0.010029775897194797, "grad_norm": 11.052021980285645, "learning_rate": 1.9728601252609606e-06, "loss": 0.9693, "step": 64 }, { "epoch": 0.015044663845792195, "grad_norm": 20.26296615600586, "learning_rate": 2.9749478079331944e-06, "loss": 0.6548, "step": 96 }, { "epoch": 0.020059551794389594, "grad_norm": 12.62913703918457, "learning_rate": 3.945720250521921e-06, "loss": 1.1279, "step": 128 }, { "epoch": 0.025074439742986992, "grad_norm": 12.316486358642578, "learning_rate": 4.916492693110647e-06, "loss": 1.0017, "step": 160 }, { "epoch": 0.03008932769158439, "grad_norm": 64.25923919677734, "learning_rate": 5.918580375782881e-06, "loss": 0.7571, "step": 192 }, { "epoch": 0.03510421564018179, "grad_norm": 0.8205029368400574, "learning_rate": 6.920668058455115e-06, "loss": 0.7304, "step": 224 }, { "epoch": 0.04011910358877919, "grad_norm": 6.598870754241943, "learning_rate": 7.922755741127349e-06, "loss": 0.7636, "step": 256 }, { "epoch": 0.045133991537376586, "grad_norm": 8.728073120117188, "learning_rate": 8.924843423799583e-06, "loss": 0.482, "step": 288 }, { "epoch": 0.050148879485973984, "grad_norm": 7.645521640777588, "learning_rate": 9.926931106471817e-06, "loss": 0.6312, "step": 320 }, { "epoch": 0.050148879485973984, "eval_nli-pairs_loss": 1.0158467292785645, "eval_nli-pairs_runtime": 3.7267, "eval_nli-pairs_samples_per_second": 26.833, "eval_nli-pairs_steps_per_second": 1.073, "eval_sts-test_pearson_cosine": 0.7848265412179125, "eval_sts-test_pearson_dot": 0.5437080705284749, "eval_sts-test_pearson_euclidean": 0.7445845076364892, "eval_sts-test_pearson_manhattan": 0.7429239204432232, "eval_sts-test_pearson_max": 0.7848265412179125, "eval_sts-test_spearman_cosine": 0.7989504707258924, "eval_sts-test_spearman_dot": 0.5206855421174118, "eval_sts-test_spearman_euclidean": 0.733568982260844, "eval_sts-test_spearman_manhattan": 0.7349407257944446, "eval_sts-test_spearman_max": 0.7989504707258924, "step": 320 }, { "epoch": 0.050148879485973984, "eval_vitaminc-pairs_loss": 4.692601680755615, "eval_vitaminc-pairs_runtime": 1.1397, "eval_vitaminc-pairs_samples_per_second": 74.578, "eval_vitaminc-pairs_steps_per_second": 2.632, "step": 320 }, { "epoch": 0.050148879485973984, "eval_sts-label_loss": 3.5502490997314453, "eval_sts-label_runtime": 0.28, "eval_sts-label_samples_per_second": 357.117, "eval_sts-label_steps_per_second": 14.285, "step": 320 }, { "epoch": 0.050148879485973984, "eval_qnli-contrastive_loss": 0.16079513728618622, "eval_qnli-contrastive_runtime": 0.3646, "eval_qnli-contrastive_samples_per_second": 274.299, "eval_qnli-contrastive_steps_per_second": 10.972, "step": 320 }, { "epoch": 0.050148879485973984, "eval_scitail-pairs-qa_loss": 0.07610582560300827, "eval_scitail-pairs-qa_runtime": 0.8885, "eval_scitail-pairs-qa_samples_per_second": 112.548, "eval_scitail-pairs-qa_steps_per_second": 4.502, "step": 320 }, { "epoch": 0.050148879485973984, "eval_scitail-pairs-pos_loss": 0.5141278505325317, "eval_scitail-pairs-pos_runtime": 1.3498, "eval_scitail-pairs-pos_samples_per_second": 74.085, "eval_scitail-pairs-pos_steps_per_second": 2.963, "step": 320 }, { "epoch": 0.050148879485973984, "eval_xsum-pairs_loss": 0.25581496953964233, "eval_xsum-pairs_runtime": 0.9407, "eval_xsum-pairs_samples_per_second": 106.304, "eval_xsum-pairs_steps_per_second": 4.252, "step": 320 }, { "epoch": 0.050148879485973984, "eval_compression-pairs_loss": 0.09814296662807465, "eval_compression-pairs_runtime": 0.2758, "eval_compression-pairs_samples_per_second": 362.517, "eval_compression-pairs_steps_per_second": 14.501, "step": 320 }, { "epoch": 0.050148879485973984, "eval_sciq_pairs_loss": 0.25620242953300476, "eval_sciq_pairs_runtime": 4.1155, "eval_sciq_pairs_samples_per_second": 24.298, "eval_sciq_pairs_steps_per_second": 0.972, "step": 320 }, { "epoch": 0.050148879485973984, "eval_qasc_pairs_loss": 0.2044612169265747, "eval_qasc_pairs_runtime": 1.1029, "eval_qasc_pairs_samples_per_second": 90.672, "eval_qasc_pairs_steps_per_second": 3.627, "step": 320 }, { "epoch": 0.050148879485973984, "eval_openbookqa_pairs_loss": 1.7537646293640137, "eval_openbookqa_pairs_runtime": 0.9037, "eval_openbookqa_pairs_samples_per_second": 110.653, "eval_openbookqa_pairs_steps_per_second": 4.426, "step": 320 }, { "epoch": 0.050148879485973984, "eval_msmarco_pairs_loss": 0.5138561725616455, "eval_msmarco_pairs_runtime": 2.0511, "eval_msmarco_pairs_samples_per_second": 48.754, "eval_msmarco_pairs_steps_per_second": 1.95, "step": 320 }, { "epoch": 0.050148879485973984, "eval_nq_pairs_loss": 0.23510317504405975, "eval_nq_pairs_runtime": 4.5293, "eval_nq_pairs_samples_per_second": 22.078, "eval_nq_pairs_steps_per_second": 0.883, "step": 320 }, { "epoch": 0.050148879485973984, "eval_trivia_pairs_loss": 0.7808571457862854, "eval_trivia_pairs_runtime": 6.5065, "eval_trivia_pairs_samples_per_second": 15.369, "eval_trivia_pairs_steps_per_second": 0.615, "step": 320 }, { "epoch": 0.050148879485973984, "eval_quora_pairs_loss": 0.0392119362950325, "eval_quora_pairs_runtime": 0.675, "eval_quora_pairs_samples_per_second": 148.153, "eval_quora_pairs_steps_per_second": 5.926, "step": 320 }, { "epoch": 0.050148879485973984, "eval_gooaq_pairs_loss": 0.4712902009487152, "eval_gooaq_pairs_runtime": 1.4079, "eval_gooaq_pairs_samples_per_second": 71.028, "eval_gooaq_pairs_steps_per_second": 2.841, "step": 320 }, { "epoch": 0.050148879485973984, "eval_mrpc_pairs_loss": 0.05498996376991272, "eval_mrpc_pairs_runtime": 0.2623, "eval_mrpc_pairs_samples_per_second": 381.172, "eval_mrpc_pairs_steps_per_second": 15.247, "step": 320 }, { "epoch": 0.05516376743457138, "grad_norm": 0.34924012422561646, "learning_rate": 1.092901878914405e-05, "loss": 0.5791, "step": 352 }, { "epoch": 0.06017865538316878, "grad_norm": 0.36700841784477234, "learning_rate": 1.1931106471816284e-05, "loss": 0.6413, "step": 384 }, { "epoch": 0.06519354333176618, "grad_norm": 7.559622764587402, "learning_rate": 1.2933194154488518e-05, "loss": 0.4319, "step": 416 }, { "epoch": 0.07020843128036358, "grad_norm": 7.982416152954102, "learning_rate": 1.3935281837160753e-05, "loss": 0.6672, "step": 448 }, { "epoch": 0.07522331922896097, "grad_norm": 0.6726166009902954, "learning_rate": 1.4937369519832987e-05, "loss": 0.459, "step": 480 }, { "epoch": 0.08023820717755838, "grad_norm": 14.846123695373535, "learning_rate": 1.593945720250522e-05, "loss": 0.7621, "step": 512 }, { "epoch": 0.08525309512615578, "grad_norm": 0.7846627831459045, "learning_rate": 1.6941544885177454e-05, "loss": 0.864, "step": 544 }, { "epoch": 0.09026798307475317, "grad_norm": 0.8993583917617798, "learning_rate": 1.7943632567849688e-05, "loss": 0.5081, "step": 576 }, { "epoch": 0.09528287102335058, "grad_norm": 1.4990565776824951, "learning_rate": 1.894572025052192e-05, "loss": 0.654, "step": 608 }, { "epoch": 0.10029775897194797, "grad_norm": 15.647976875305176, "learning_rate": 1.9947807933194157e-05, "loss": 0.6372, "step": 640 }, { "epoch": 0.10029775897194797, "eval_nli-pairs_loss": 1.0652996301651, "eval_nli-pairs_runtime": 3.6326, "eval_nli-pairs_samples_per_second": 27.528, "eval_nli-pairs_steps_per_second": 1.101, "eval_sts-test_pearson_cosine": 0.785263018402905, "eval_sts-test_pearson_dot": 0.5290450141477089, "eval_sts-test_pearson_euclidean": 0.7433756286425983, "eval_sts-test_pearson_manhattan": 0.7411097274300102, "eval_sts-test_pearson_max": 0.785263018402905, "eval_sts-test_spearman_cosine": 0.7996928912411947, "eval_sts-test_spearman_dot": 0.5102571497667188, "eval_sts-test_spearman_euclidean": 0.7338969723324641, "eval_sts-test_spearman_manhattan": 0.7343494860194358, "eval_sts-test_spearman_max": 0.7996928912411947, "step": 640 }, { "epoch": 0.10029775897194797, "eval_vitaminc-pairs_loss": 4.719416618347168, "eval_vitaminc-pairs_runtime": 1.1268, "eval_vitaminc-pairs_samples_per_second": 75.437, "eval_vitaminc-pairs_steps_per_second": 2.662, "step": 640 }, { "epoch": 0.10029775897194797, "eval_sts-label_loss": 3.612347364425659, "eval_sts-label_runtime": 0.2683, "eval_sts-label_samples_per_second": 372.651, "eval_sts-label_steps_per_second": 14.906, "step": 640 }, { "epoch": 0.10029775897194797, "eval_qnli-contrastive_loss": 0.15202775597572327, "eval_qnli-contrastive_runtime": 0.3528, "eval_qnli-contrastive_samples_per_second": 283.457, "eval_qnli-contrastive_steps_per_second": 11.338, "step": 640 }, { "epoch": 0.10029775897194797, "eval_scitail-pairs-qa_loss": 0.07544919103384018, "eval_scitail-pairs-qa_runtime": 0.8732, "eval_scitail-pairs-qa_samples_per_second": 114.517, "eval_scitail-pairs-qa_steps_per_second": 4.581, "step": 640 }, { "epoch": 0.10029775897194797, "eval_scitail-pairs-pos_loss": 0.5404170751571655, "eval_scitail-pairs-pos_runtime": 1.3146, "eval_scitail-pairs-pos_samples_per_second": 76.067, "eval_scitail-pairs-pos_steps_per_second": 3.043, "step": 640 }, { "epoch": 0.10029775897194797, "eval_xsum-pairs_loss": 0.25958582758903503, "eval_xsum-pairs_runtime": 0.9287, "eval_xsum-pairs_samples_per_second": 107.679, "eval_xsum-pairs_steps_per_second": 4.307, "step": 640 }, { "epoch": 0.10029775897194797, "eval_compression-pairs_loss": 0.10066353529691696, "eval_compression-pairs_runtime": 0.2732, "eval_compression-pairs_samples_per_second": 366.076, "eval_compression-pairs_steps_per_second": 14.643, "step": 640 }, { "epoch": 0.10029775897194797, "eval_sciq_pairs_loss": 0.2645374834537506, "eval_sciq_pairs_runtime": 4.0725, "eval_sciq_pairs_samples_per_second": 24.555, "eval_sciq_pairs_steps_per_second": 0.982, "step": 640 }, { "epoch": 0.10029775897194797, "eval_qasc_pairs_loss": 0.21021947264671326, "eval_qasc_pairs_runtime": 1.0743, "eval_qasc_pairs_samples_per_second": 93.084, "eval_qasc_pairs_steps_per_second": 3.723, "step": 640 }, { "epoch": 0.10029775897194797, "eval_openbookqa_pairs_loss": 1.7905032634735107, "eval_openbookqa_pairs_runtime": 0.8886, "eval_openbookqa_pairs_samples_per_second": 112.532, "eval_openbookqa_pairs_steps_per_second": 4.501, "step": 640 }, { "epoch": 0.10029775897194797, "eval_msmarco_pairs_loss": 0.5102832913398743, "eval_msmarco_pairs_runtime": 2.0529, "eval_msmarco_pairs_samples_per_second": 48.712, "eval_msmarco_pairs_steps_per_second": 1.948, "step": 640 }, { "epoch": 0.10029775897194797, "eval_nq_pairs_loss": 0.24466972053050995, "eval_nq_pairs_runtime": 4.4973, "eval_nq_pairs_samples_per_second": 22.235, "eval_nq_pairs_steps_per_second": 0.889, "step": 640 }, { "epoch": 0.10029775897194797, "eval_trivia_pairs_loss": 0.8748095631599426, "eval_trivia_pairs_runtime": 6.4825, "eval_trivia_pairs_samples_per_second": 15.426, "eval_trivia_pairs_steps_per_second": 0.617, "step": 640 }, { "epoch": 0.10029775897194797, "eval_quora_pairs_loss": 0.07820220291614532, "eval_quora_pairs_runtime": 0.6944, "eval_quora_pairs_samples_per_second": 144.008, "eval_quora_pairs_steps_per_second": 5.76, "step": 640 }, { "epoch": 0.10029775897194797, "eval_gooaq_pairs_loss": 0.5236212611198425, "eval_gooaq_pairs_runtime": 1.3899, "eval_gooaq_pairs_samples_per_second": 71.949, "eval_gooaq_pairs_steps_per_second": 2.878, "step": 640 }, { "epoch": 0.10029775897194797, "eval_mrpc_pairs_loss": 0.05494727939367294, "eval_mrpc_pairs_runtime": 0.2598, "eval_mrpc_pairs_samples_per_second": 384.941, "eval_mrpc_pairs_steps_per_second": 15.398, "step": 640 }, { "epoch": 0.10531264692054537, "grad_norm": 11.01974105834961, "learning_rate": 2.0949895615866387e-05, "loss": 0.9292, "step": 672 }, { "epoch": 0.11032753486914276, "grad_norm": 0.5542309284210205, "learning_rate": 2.1951983298538625e-05, "loss": 1.3108, "step": 704 }, { "epoch": 0.11534242281774017, "grad_norm": 15.458569526672363, "learning_rate": 2.2954070981210856e-05, "loss": 0.9674, "step": 736 }, { "epoch": 0.12035731076633756, "grad_norm": 2.7814478874206543, "learning_rate": 2.395615866388309e-05, "loss": 0.9226, "step": 768 }, { "epoch": 0.12537219871493496, "grad_norm": 11.393244743347168, "learning_rate": 2.4958246346555324e-05, "loss": 0.789, "step": 800 }, { "epoch": 0.13038708666353235, "grad_norm": 9.288290977478027, "learning_rate": 2.596033402922756e-05, "loss": 0.5186, "step": 832 }, { "epoch": 0.13540197461212977, "grad_norm": 47.65571212768555, "learning_rate": 2.6962421711899793e-05, "loss": 0.6726, "step": 864 }, { "epoch": 0.14041686256072716, "grad_norm": 12.908064842224121, "learning_rate": 2.7964509394572024e-05, "loss": 0.5381, "step": 896 }, { "epoch": 0.14543175050932455, "grad_norm": 14.951742172241211, "learning_rate": 2.896659707724426e-05, "loss": 0.581, "step": 928 }, { "epoch": 0.15044663845792194, "grad_norm": 20.12006187438965, "learning_rate": 2.9968684759916492e-05, "loss": 0.9038, "step": 960 }, { "epoch": 0.15044663845792194, "eval_nli-pairs_loss": 1.2173175811767578, "eval_nli-pairs_runtime": 3.7098, "eval_nli-pairs_samples_per_second": 26.955, "eval_nli-pairs_steps_per_second": 1.078, "eval_sts-test_pearson_cosine": 0.7840992835675669, "eval_sts-test_pearson_dot": 0.5220462136106129, "eval_sts-test_pearson_euclidean": 0.7457350047351855, "eval_sts-test_pearson_manhattan": 0.7425970830541657, "eval_sts-test_pearson_max": 0.7840992835675669, "eval_sts-test_spearman_cosine": 0.8006376809572144, "eval_sts-test_spearman_dot": 0.5020544543992158, "eval_sts-test_spearman_euclidean": 0.7369257710408655, "eval_sts-test_spearman_manhattan": 0.7362649758012406, "eval_sts-test_spearman_max": 0.8006376809572144, "step": 960 }, { "epoch": 0.15044663845792194, "eval_vitaminc-pairs_loss": 4.774902820587158, "eval_vitaminc-pairs_runtime": 1.1212, "eval_vitaminc-pairs_samples_per_second": 75.809, "eval_vitaminc-pairs_steps_per_second": 2.676, "step": 960 }, { "epoch": 0.15044663845792194, "eval_sts-label_loss": 3.198556900024414, "eval_sts-label_runtime": 0.2678, "eval_sts-label_samples_per_second": 373.382, "eval_sts-label_steps_per_second": 14.935, "step": 960 }, { "epoch": 0.15044663845792194, "eval_qnli-contrastive_loss": 0.1943340301513672, "eval_qnli-contrastive_runtime": 0.3511, "eval_qnli-contrastive_samples_per_second": 284.789, "eval_qnli-contrastive_steps_per_second": 11.392, "step": 960 }, { "epoch": 0.15044663845792194, "eval_scitail-pairs-qa_loss": 0.08060617744922638, "eval_scitail-pairs-qa_runtime": 0.8778, "eval_scitail-pairs-qa_samples_per_second": 113.92, "eval_scitail-pairs-qa_steps_per_second": 4.557, "step": 960 }, { "epoch": 0.15044663845792194, "eval_scitail-pairs-pos_loss": 0.4759831428527832, "eval_scitail-pairs-pos_runtime": 1.3609, "eval_scitail-pairs-pos_samples_per_second": 73.48, "eval_scitail-pairs-pos_steps_per_second": 2.939, "step": 960 }, { "epoch": 0.15044663845792194, "eval_xsum-pairs_loss": 0.27583304047584534, "eval_xsum-pairs_runtime": 0.9343, "eval_xsum-pairs_samples_per_second": 107.035, "eval_xsum-pairs_steps_per_second": 4.281, "step": 960 }, { "epoch": 0.15044663845792194, "eval_compression-pairs_loss": 0.10094660520553589, "eval_compression-pairs_runtime": 0.2739, "eval_compression-pairs_samples_per_second": 365.047, "eval_compression-pairs_steps_per_second": 14.602, "step": 960 }, { "epoch": 0.15044663845792194, "eval_sciq_pairs_loss": 0.2688131630420685, "eval_sciq_pairs_runtime": 4.0582, "eval_sciq_pairs_samples_per_second": 24.641, "eval_sciq_pairs_steps_per_second": 0.986, "step": 960 }, { "epoch": 0.15044663845792194, "eval_qasc_pairs_loss": 0.23267821967601776, "eval_qasc_pairs_runtime": 1.0554, "eval_qasc_pairs_samples_per_second": 94.75, "eval_qasc_pairs_steps_per_second": 3.79, "step": 960 }, { "epoch": 0.15044663845792194, "eval_openbookqa_pairs_loss": 1.8053069114685059, "eval_openbookqa_pairs_runtime": 0.8871, "eval_openbookqa_pairs_samples_per_second": 112.727, "eval_openbookqa_pairs_steps_per_second": 4.509, "step": 960 }, { "epoch": 0.15044663845792194, "eval_msmarco_pairs_loss": 0.5809260606765747, "eval_msmarco_pairs_runtime": 2.0498, "eval_msmarco_pairs_samples_per_second": 48.786, "eval_msmarco_pairs_steps_per_second": 1.951, "step": 960 }, { "epoch": 0.15044663845792194, "eval_nq_pairs_loss": 0.2808491885662079, "eval_nq_pairs_runtime": 4.4982, "eval_nq_pairs_samples_per_second": 22.231, "eval_nq_pairs_steps_per_second": 0.889, "step": 960 }, { "epoch": 0.15044663845792194, "eval_trivia_pairs_loss": 0.9379808902740479, "eval_trivia_pairs_runtime": 6.4578, "eval_trivia_pairs_samples_per_second": 15.485, "eval_trivia_pairs_steps_per_second": 0.619, "step": 960 }, { "epoch": 0.15044663845792194, "eval_quora_pairs_loss": 0.0913279801607132, "eval_quora_pairs_runtime": 0.6721, "eval_quora_pairs_samples_per_second": 148.79, "eval_quora_pairs_steps_per_second": 5.952, "step": 960 }, { "epoch": 0.15044663845792194, "eval_gooaq_pairs_loss": 0.5807955265045166, "eval_gooaq_pairs_runtime": 1.3915, "eval_gooaq_pairs_samples_per_second": 71.865, "eval_gooaq_pairs_steps_per_second": 2.875, "step": 960 }, { "epoch": 0.15044663845792194, "eval_mrpc_pairs_loss": 0.05799216777086258, "eval_mrpc_pairs_runtime": 0.2571, "eval_mrpc_pairs_samples_per_second": 388.998, "eval_mrpc_pairs_steps_per_second": 15.56, "step": 960 }, { "epoch": 0.15546152640651936, "grad_norm": 9.773286819458008, "learning_rate": 2.9997957904107625e-05, "loss": 0.7964, "step": 992 }, { "epoch": 0.16047641435511675, "grad_norm": 19.411075592041016, "learning_rate": 2.9991566594209126e-05, "loss": 0.8213, "step": 1024 }, { "epoch": 0.16549130230371414, "grad_norm": 3.5282175540924072, "learning_rate": 2.9980825799589488e-05, "loss": 0.5396, "step": 1056 }, { "epoch": 0.17050619025231156, "grad_norm": 62.66339874267578, "learning_rate": 2.996573863646219e-05, "loss": 0.9297, "step": 1088 }, { "epoch": 0.17552107820090895, "grad_norm": 8.785274505615234, "learning_rate": 2.994630948204727e-05, "loss": 1.169, "step": 1120 }, { "epoch": 0.18053596614950634, "grad_norm": 24.10859489440918, "learning_rate": 2.992254397330132e-05, "loss": 0.7486, "step": 1152 }, { "epoch": 0.18555085409810373, "grad_norm": 25.545284271240234, "learning_rate": 2.9894449005282077e-05, "loss": 0.6821, "step": 1184 }, { "epoch": 0.19056574204670115, "grad_norm": 0.8675521016120911, "learning_rate": 2.9862032729147954e-05, "loss": 0.6125, "step": 1216 }, { "epoch": 0.19558062999529854, "grad_norm": 16.122114181518555, "learning_rate": 2.9825304549793153e-05, "loss": 0.8061, "step": 1248 }, { "epoch": 0.20059551794389593, "grad_norm": 1.0314382314682007, "learning_rate": 2.978427512311904e-05, "loss": 0.6918, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_nli-pairs_loss": 1.1552109718322754, "eval_nli-pairs_runtime": 3.8751, "eval_nli-pairs_samples_per_second": 25.806, "eval_nli-pairs_steps_per_second": 1.032, "eval_sts-test_pearson_cosine": 0.786106976104726, "eval_sts-test_pearson_dot": 0.5116758767219935, "eval_sts-test_pearson_euclidean": 0.7432891018313416, "eval_sts-test_pearson_manhattan": 0.7400929158927781, "eval_sts-test_pearson_max": 0.786106976104726, "eval_sts-test_spearman_cosine": 0.801377272203007, "eval_sts-test_spearman_dot": 0.4921454166952506, "eval_sts-test_spearman_euclidean": 0.7343686249967402, "eval_sts-test_spearman_manhattan": 0.7331946050808561, "eval_sts-test_spearman_max": 0.801377272203007, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_vitaminc-pairs_loss": 4.6789751052856445, "eval_vitaminc-pairs_runtime": 1.1504, "eval_vitaminc-pairs_samples_per_second": 73.889, "eval_vitaminc-pairs_steps_per_second": 2.608, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_sts-label_loss": 3.5580556392669678, "eval_sts-label_runtime": 0.2834, "eval_sts-label_samples_per_second": 352.858, "eval_sts-label_steps_per_second": 14.114, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_qnli-contrastive_loss": 0.20369713008403778, "eval_qnli-contrastive_runtime": 0.358, "eval_qnli-contrastive_samples_per_second": 279.331, "eval_qnli-contrastive_steps_per_second": 11.173, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_scitail-pairs-qa_loss": 0.07465875148773193, "eval_scitail-pairs-qa_runtime": 0.9504, "eval_scitail-pairs-qa_samples_per_second": 105.214, "eval_scitail-pairs-qa_steps_per_second": 4.209, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_scitail-pairs-pos_loss": 0.49434563517570496, "eval_scitail-pairs-pos_runtime": 1.6041, "eval_scitail-pairs-pos_samples_per_second": 62.339, "eval_scitail-pairs-pos_steps_per_second": 2.494, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_xsum-pairs_loss": 0.28282061219215393, "eval_xsum-pairs_runtime": 0.9316, "eval_xsum-pairs_samples_per_second": 107.346, "eval_xsum-pairs_steps_per_second": 4.294, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_compression-pairs_loss": 0.097385473549366, "eval_compression-pairs_runtime": 0.2754, "eval_compression-pairs_samples_per_second": 363.1, "eval_compression-pairs_steps_per_second": 14.524, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_sciq_pairs_loss": 0.2762215733528137, "eval_sciq_pairs_runtime": 4.2307, "eval_sciq_pairs_samples_per_second": 23.637, "eval_sciq_pairs_steps_per_second": 0.945, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_qasc_pairs_loss": 0.19347424805164337, "eval_qasc_pairs_runtime": 1.2282, "eval_qasc_pairs_samples_per_second": 81.421, "eval_qasc_pairs_steps_per_second": 3.257, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_openbookqa_pairs_loss": 1.6875064373016357, "eval_openbookqa_pairs_runtime": 1.1661, "eval_openbookqa_pairs_samples_per_second": 85.754, "eval_openbookqa_pairs_steps_per_second": 3.43, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_msmarco_pairs_loss": 0.5743877291679382, "eval_msmarco_pairs_runtime": 2.1428, "eval_msmarco_pairs_samples_per_second": 46.669, "eval_msmarco_pairs_steps_per_second": 1.867, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_nq_pairs_loss": 0.30348217487335205, "eval_nq_pairs_runtime": 4.5543, "eval_nq_pairs_samples_per_second": 21.957, "eval_nq_pairs_steps_per_second": 0.878, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_trivia_pairs_loss": 0.9221765995025635, "eval_trivia_pairs_runtime": 6.6513, "eval_trivia_pairs_samples_per_second": 15.035, "eval_trivia_pairs_steps_per_second": 0.601, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_quora_pairs_loss": 0.03854631260037422, "eval_quora_pairs_runtime": 0.7822, "eval_quora_pairs_samples_per_second": 127.852, "eval_quora_pairs_steps_per_second": 5.114, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_gooaq_pairs_loss": 0.528398334980011, "eval_gooaq_pairs_runtime": 1.4882, "eval_gooaq_pairs_samples_per_second": 67.194, "eval_gooaq_pairs_steps_per_second": 2.688, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_mrpc_pairs_loss": 0.05623970925807953, "eval_mrpc_pairs_runtime": 0.2698, "eval_mrpc_pairs_samples_per_second": 370.713, "eval_mrpc_pairs_steps_per_second": 14.829, "step": 1280 }, { "epoch": 0.20561040589249335, "grad_norm": 0.6042119860649109, "learning_rate": 2.9738956352942557e-05, "loss": 0.9421, "step": 1312 }, { "epoch": 0.21062529384109074, "grad_norm": 13.87867546081543, "learning_rate": 2.968936138754259e-05, "loss": 0.8641, "step": 1344 }, { "epoch": 0.21564018178968813, "grad_norm": 44.48640441894531, "learning_rate": 2.9635504615845257e-05, "loss": 1.157, "step": 1376 }, { "epoch": 0.22065506973828553, "grad_norm": 15.554729461669922, "learning_rate": 2.957928148945977e-05, "loss": 0.8772, "step": 1408 }, { "epoch": 0.22566995768688294, "grad_norm": 16.644670486450195, "learning_rate": 2.9517081112297707e-05, "loss": 1.0496, "step": 1440 }, { "epoch": 0.23068484563548033, "grad_norm": 13.053145408630371, "learning_rate": 2.9450668912302004e-05, "loss": 0.589, "step": 1472 }, { "epoch": 0.23569973358407773, "grad_norm": 7.827791213989258, "learning_rate": 2.9380064157562306e-05, "loss": 0.8234, "step": 1504 }, { "epoch": 0.24071462153267512, "grad_norm": 15.598438262939453, "learning_rate": 2.930528733254901e-05, "loss": 0.7365, "step": 1536 }, { "epoch": 0.24572950948127253, "grad_norm": 13.723180770874023, "learning_rate": 2.9226360132170112e-05, "loss": 0.5076, "step": 1568 }, { "epoch": 0.2507443974298699, "grad_norm": 10.20022964477539, "learning_rate": 2.9143305455476866e-05, "loss": 1.0329, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_nli-pairs_loss": 1.0577216148376465, "eval_nli-pairs_runtime": 3.6476, "eval_nli-pairs_samples_per_second": 27.415, "eval_nli-pairs_steps_per_second": 1.097, "eval_sts-test_pearson_cosine": 0.7876359552191669, "eval_sts-test_pearson_dot": 0.5220803655074544, "eval_sts-test_pearson_euclidean": 0.7444632413869628, "eval_sts-test_pearson_manhattan": 0.7418744760088763, "eval_sts-test_pearson_max": 0.7876359552191669, "eval_sts-test_spearman_cosine": 0.8018874000525117, "eval_sts-test_spearman_dot": 0.5034518981121652, "eval_sts-test_spearman_euclidean": 0.7344750702387959, "eval_sts-test_spearman_manhattan": 0.7332804063416474, "eval_sts-test_spearman_max": 0.8018874000525117, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_vitaminc-pairs_loss": 4.784573554992676, "eval_vitaminc-pairs_runtime": 1.145, "eval_vitaminc-pairs_samples_per_second": 74.235, "eval_vitaminc-pairs_steps_per_second": 2.62, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_sts-label_loss": 3.6113080978393555, "eval_sts-label_runtime": 0.2746, "eval_sts-label_samples_per_second": 364.172, "eval_sts-label_steps_per_second": 14.567, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_qnli-contrastive_loss": 0.18593625724315643, "eval_qnli-contrastive_runtime": 0.3541, "eval_qnli-contrastive_samples_per_second": 282.413, "eval_qnli-contrastive_steps_per_second": 11.297, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_scitail-pairs-qa_loss": 0.07545661181211472, "eval_scitail-pairs-qa_runtime": 0.8854, "eval_scitail-pairs-qa_samples_per_second": 112.941, "eval_scitail-pairs-qa_steps_per_second": 4.518, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_scitail-pairs-pos_loss": 0.5018333792686462, "eval_scitail-pairs-pos_runtime": 1.3443, "eval_scitail-pairs-pos_samples_per_second": 74.386, "eval_scitail-pairs-pos_steps_per_second": 2.975, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_xsum-pairs_loss": 0.2749001085758209, "eval_xsum-pairs_runtime": 0.9439, "eval_xsum-pairs_samples_per_second": 105.939, "eval_xsum-pairs_steps_per_second": 4.238, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_compression-pairs_loss": 0.09735233336687088, "eval_compression-pairs_runtime": 0.2764, "eval_compression-pairs_samples_per_second": 361.753, "eval_compression-pairs_steps_per_second": 14.47, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_sciq_pairs_loss": 0.2648228108882904, "eval_sciq_pairs_runtime": 4.1207, "eval_sciq_pairs_samples_per_second": 24.268, "eval_sciq_pairs_steps_per_second": 0.971, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_qasc_pairs_loss": 0.21318012475967407, "eval_qasc_pairs_runtime": 1.0917, "eval_qasc_pairs_samples_per_second": 91.604, "eval_qasc_pairs_steps_per_second": 3.664, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_openbookqa_pairs_loss": 1.790009617805481, "eval_openbookqa_pairs_runtime": 0.8969, "eval_openbookqa_pairs_samples_per_second": 111.496, "eval_openbookqa_pairs_steps_per_second": 4.46, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_msmarco_pairs_loss": 0.57186359167099, "eval_msmarco_pairs_runtime": 2.0592, "eval_msmarco_pairs_samples_per_second": 48.563, "eval_msmarco_pairs_steps_per_second": 1.943, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_nq_pairs_loss": 0.2738310396671295, "eval_nq_pairs_runtime": 4.5092, "eval_nq_pairs_samples_per_second": 22.177, "eval_nq_pairs_steps_per_second": 0.887, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_trivia_pairs_loss": 0.8291679620742798, "eval_trivia_pairs_runtime": 6.526, "eval_trivia_pairs_samples_per_second": 15.323, "eval_trivia_pairs_steps_per_second": 0.613, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_quora_pairs_loss": 0.08000540733337402, "eval_quora_pairs_runtime": 0.6761, "eval_quora_pairs_samples_per_second": 147.909, "eval_quora_pairs_steps_per_second": 5.916, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_gooaq_pairs_loss": 0.5998037457466125, "eval_gooaq_pairs_runtime": 1.3978, "eval_gooaq_pairs_samples_per_second": 71.541, "eval_gooaq_pairs_steps_per_second": 2.862, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_mrpc_pairs_loss": 0.05507182702422142, "eval_mrpc_pairs_runtime": 0.2617, "eval_mrpc_pairs_samples_per_second": 382.156, "eval_mrpc_pairs_steps_per_second": 15.286, "step": 1600 }, { "epoch": 0.2557592853784673, "grad_norm": 8.05022144317627, "learning_rate": 2.9056147399020182e-05, "loss": 1.4006, "step": 1632 }, { "epoch": 0.2607741733270647, "grad_norm": 0.38224154710769653, "learning_rate": 2.8964911249859437e-05, "loss": 0.5963, "step": 1664 }, { "epoch": 0.2657890612756621, "grad_norm": 0.46655791997909546, "learning_rate": 2.886962347822604e-05, "loss": 0.7488, "step": 1696 }, { "epoch": 0.27080394922425954, "grad_norm": 8.102537155151367, "learning_rate": 2.8770311729843616e-05, "loss": 0.8548, "step": 1728 }, { "epoch": 0.27581883717285693, "grad_norm": 11.803775787353516, "learning_rate": 2.86670048179072e-05, "loss": 1.3324, "step": 1760 }, { "epoch": 0.2808337251214543, "grad_norm": 16.266756057739258, "learning_rate": 2.8559732714723715e-05, "loss": 0.5804, "step": 1792 }, { "epoch": 0.2858486130700517, "grad_norm": 2.8448822498321533, "learning_rate": 2.8448526543016114e-05, "loss": 0.7827, "step": 1824 }, { "epoch": 0.2908635010186491, "grad_norm": 21.346328735351562, "learning_rate": 2.8333418566893796e-05, "loss": 0.5448, "step": 1856 }, { "epoch": 0.2958783889672465, "grad_norm": 3.4379029273986816, "learning_rate": 2.8214442182491866e-05, "loss": 0.7368, "step": 1888 }, { "epoch": 0.3008932769158439, "grad_norm": 17.05881690979004, "learning_rate": 2.8091631908281963e-05, "loss": 0.5657, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_nli-pairs_loss": 1.0244356393814087, "eval_nli-pairs_runtime": 3.6217, "eval_nli-pairs_samples_per_second": 27.612, "eval_nli-pairs_steps_per_second": 1.104, "eval_sts-test_pearson_cosine": 0.781915957368962, "eval_sts-test_pearson_dot": 0.49821032356844613, "eval_sts-test_pearson_euclidean": 0.7329308897504494, "eval_sts-test_pearson_manhattan": 0.7292186092506918, "eval_sts-test_pearson_max": 0.781915957368962, "eval_sts-test_spearman_cosine": 0.7983596570250642, "eval_sts-test_spearman_dot": 0.4812350313638781, "eval_sts-test_spearman_euclidean": 0.7265758267352669, "eval_sts-test_spearman_manhattan": 0.7259264140902829, "eval_sts-test_spearman_max": 0.7983596570250642, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_vitaminc-pairs_loss": 4.698296070098877, "eval_vitaminc-pairs_runtime": 1.1338, "eval_vitaminc-pairs_samples_per_second": 74.97, "eval_vitaminc-pairs_steps_per_second": 2.646, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_sts-label_loss": 3.1822261810302734, "eval_sts-label_runtime": 0.2702, "eval_sts-label_samples_per_second": 370.09, "eval_sts-label_steps_per_second": 14.804, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_qnli-contrastive_loss": 0.11326340585947037, "eval_qnli-contrastive_runtime": 0.3581, "eval_qnli-contrastive_samples_per_second": 279.28, "eval_qnli-contrastive_steps_per_second": 11.171, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_scitail-pairs-qa_loss": 0.07009608298540115, "eval_scitail-pairs-qa_runtime": 0.8816, "eval_scitail-pairs-qa_samples_per_second": 113.424, "eval_scitail-pairs-qa_steps_per_second": 4.537, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_scitail-pairs-pos_loss": 0.49156129360198975, "eval_scitail-pairs-pos_runtime": 1.3759, "eval_scitail-pairs-pos_samples_per_second": 72.678, "eval_scitail-pairs-pos_steps_per_second": 2.907, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_xsum-pairs_loss": 0.25940877199172974, "eval_xsum-pairs_runtime": 0.9373, "eval_xsum-pairs_samples_per_second": 106.695, "eval_xsum-pairs_steps_per_second": 4.268, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_compression-pairs_loss": 0.0919649675488472, "eval_compression-pairs_runtime": 0.2738, "eval_compression-pairs_samples_per_second": 365.291, "eval_compression-pairs_steps_per_second": 14.612, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_sciq_pairs_loss": 0.29138606786727905, "eval_sciq_pairs_runtime": 4.1059, "eval_sciq_pairs_samples_per_second": 24.355, "eval_sciq_pairs_steps_per_second": 0.974, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_qasc_pairs_loss": 0.19625085592269897, "eval_qasc_pairs_runtime": 1.0611, "eval_qasc_pairs_samples_per_second": 94.24, "eval_qasc_pairs_steps_per_second": 3.77, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_openbookqa_pairs_loss": 1.7960456609725952, "eval_openbookqa_pairs_runtime": 0.9042, "eval_openbookqa_pairs_samples_per_second": 110.601, "eval_openbookqa_pairs_steps_per_second": 4.424, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_msmarco_pairs_loss": 0.5171416997909546, "eval_msmarco_pairs_runtime": 2.0637, "eval_msmarco_pairs_samples_per_second": 48.457, "eval_msmarco_pairs_steps_per_second": 1.938, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_nq_pairs_loss": 0.24809740483760834, "eval_nq_pairs_runtime": 4.529, "eval_nq_pairs_samples_per_second": 22.08, "eval_nq_pairs_steps_per_second": 0.883, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_trivia_pairs_loss": 0.9041999578475952, "eval_trivia_pairs_runtime": 6.5257, "eval_trivia_pairs_samples_per_second": 15.324, "eval_trivia_pairs_steps_per_second": 0.613, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_quora_pairs_loss": 0.03601976856589317, "eval_quora_pairs_runtime": 0.6811, "eval_quora_pairs_samples_per_second": 146.827, "eval_quora_pairs_steps_per_second": 5.873, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_gooaq_pairs_loss": 0.5626399517059326, "eval_gooaq_pairs_runtime": 1.3943, "eval_gooaq_pairs_samples_per_second": 71.72, "eval_gooaq_pairs_steps_per_second": 2.869, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_mrpc_pairs_loss": 0.04984402656555176, "eval_mrpc_pairs_runtime": 0.2579, "eval_mrpc_pairs_samples_per_second": 387.725, "eval_mrpc_pairs_steps_per_second": 15.509, "step": 1920 }, { "epoch": 0.30590816486444133, "grad_norm": 22.65591812133789, "learning_rate": 2.796502337505742e-05, "loss": 0.7425, "step": 1952 }, { "epoch": 0.3109230528130387, "grad_norm": 10.119640350341797, "learning_rate": 2.78346533155958e-05, "loss": 0.7819, "step": 1984 }, { "epoch": 0.3159379407616361, "grad_norm": 8.690531730651855, "learning_rate": 2.770055955400161e-05, "loss": 0.5937, "step": 2016 }, { "epoch": 0.3209528287102335, "grad_norm": 0.8992699384689331, "learning_rate": 2.7562780994732476e-05, "loss": 0.8133, "step": 2048 }, { "epoch": 0.3259677166588309, "grad_norm": 10.619684219360352, "learning_rate": 2.7421357611311824e-05, "loss": 1.0674, "step": 2080 }, { "epoch": 0.3309826046074283, "grad_norm": 7.222084045410156, "learning_rate": 2.727633043473141e-05, "loss": 0.6288, "step": 2112 }, { "epoch": 0.3359974925560257, "grad_norm": 10.166888236999512, "learning_rate": 2.712774154154707e-05, "loss": 0.5866, "step": 2144 }, { "epoch": 0.3410123805046231, "grad_norm": 0.36360761523246765, "learning_rate": 2.6975634041671052e-05, "loss": 0.6962, "step": 2176 }, { "epoch": 0.3460272684532205, "grad_norm": 9.586665153503418, "learning_rate": 2.6820052065864665e-05, "loss": 0.5562, "step": 2208 }, { "epoch": 0.3510421564018179, "grad_norm": 1.1307642459869385, "learning_rate": 2.6661040752934594e-05, "loss": 0.8871, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_nli-pairs_loss": 1.0147591829299927, "eval_nli-pairs_runtime": 3.7201, "eval_nli-pairs_samples_per_second": 26.881, "eval_nli-pairs_steps_per_second": 1.075, "eval_sts-test_pearson_cosine": 0.7872126529181761, "eval_sts-test_pearson_dot": 0.5062045289861089, "eval_sts-test_pearson_euclidean": 0.7351473988633473, "eval_sts-test_pearson_manhattan": 0.7310226402088944, "eval_sts-test_pearson_max": 0.7872126529181761, "eval_sts-test_spearman_cosine": 0.801487068999052, "eval_sts-test_spearman_dot": 0.4912205722904683, "eval_sts-test_spearman_euclidean": 0.7267262355024484, "eval_sts-test_spearman_manhattan": 0.72510169253649, "eval_sts-test_spearman_max": 0.801487068999052, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_vitaminc-pairs_loss": 4.644638538360596, "eval_vitaminc-pairs_runtime": 1.1453, "eval_vitaminc-pairs_samples_per_second": 74.215, "eval_vitaminc-pairs_steps_per_second": 2.619, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_sts-label_loss": 3.915343999862671, "eval_sts-label_runtime": 0.2807, "eval_sts-label_samples_per_second": 356.217, "eval_sts-label_steps_per_second": 14.249, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_qnli-contrastive_loss": 0.11220741271972656, "eval_qnli-contrastive_runtime": 0.3614, "eval_qnli-contrastive_samples_per_second": 276.705, "eval_qnli-contrastive_steps_per_second": 11.068, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_scitail-pairs-qa_loss": 0.06635177880525589, "eval_scitail-pairs-qa_runtime": 0.8881, "eval_scitail-pairs-qa_samples_per_second": 112.594, "eval_scitail-pairs-qa_steps_per_second": 4.504, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_scitail-pairs-pos_loss": 0.5765587687492371, "eval_scitail-pairs-pos_runtime": 1.3496, "eval_scitail-pairs-pos_samples_per_second": 74.097, "eval_scitail-pairs-pos_steps_per_second": 2.964, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_xsum-pairs_loss": 0.2595808804035187, "eval_xsum-pairs_runtime": 0.9377, "eval_xsum-pairs_samples_per_second": 106.641, "eval_xsum-pairs_steps_per_second": 4.266, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_compression-pairs_loss": 0.0918564721941948, "eval_compression-pairs_runtime": 0.2755, "eval_compression-pairs_samples_per_second": 363.032, "eval_compression-pairs_steps_per_second": 14.521, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_sciq_pairs_loss": 0.284303218126297, "eval_sciq_pairs_runtime": 4.1289, "eval_sciq_pairs_samples_per_second": 24.22, "eval_sciq_pairs_steps_per_second": 0.969, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_qasc_pairs_loss": 0.19232892990112305, "eval_qasc_pairs_runtime": 1.0709, "eval_qasc_pairs_samples_per_second": 93.384, "eval_qasc_pairs_steps_per_second": 3.735, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_openbookqa_pairs_loss": 1.6234371662139893, "eval_openbookqa_pairs_runtime": 0.9558, "eval_openbookqa_pairs_samples_per_second": 104.62, "eval_openbookqa_pairs_steps_per_second": 4.185, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_msmarco_pairs_loss": 0.5325217247009277, "eval_msmarco_pairs_runtime": 2.0971, "eval_msmarco_pairs_samples_per_second": 47.685, "eval_msmarco_pairs_steps_per_second": 1.907, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_nq_pairs_loss": 0.2721095681190491, "eval_nq_pairs_runtime": 4.5393, "eval_nq_pairs_samples_per_second": 22.03, "eval_nq_pairs_steps_per_second": 0.881, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_trivia_pairs_loss": 0.8544899821281433, "eval_trivia_pairs_runtime": 6.4668, "eval_trivia_pairs_samples_per_second": 15.464, "eval_trivia_pairs_steps_per_second": 0.619, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_quora_pairs_loss": 0.08441996574401855, "eval_quora_pairs_runtime": 0.6933, "eval_quora_pairs_samples_per_second": 144.233, "eval_quora_pairs_steps_per_second": 5.769, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_gooaq_pairs_loss": 0.5711588859558105, "eval_gooaq_pairs_runtime": 1.3941, "eval_gooaq_pairs_samples_per_second": 71.733, "eval_gooaq_pairs_steps_per_second": 2.869, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_mrpc_pairs_loss": 0.05093960464000702, "eval_mrpc_pairs_runtime": 0.2633, "eval_mrpc_pairs_samples_per_second": 379.777, "eval_mrpc_pairs_steps_per_second": 15.191, "step": 2240 }, { "epoch": 0.3560570443504153, "grad_norm": 0.39178094267845154, "learning_rate": 2.6498646236636892e-05, "loss": 0.6805, "step": 2272 }, { "epoch": 0.3610719322990127, "grad_norm": 7.91475248336792, "learning_rate": 2.6332915632292237e-05, "loss": 1.0451, "step": 2304 }, { "epoch": 0.3660868202476101, "grad_norm": 31.54157066345215, "learning_rate": 2.616389702311641e-05, "loss": 1.0603, "step": 2336 }, { "epoch": 0.37110170819620747, "grad_norm": 8.400779724121094, "learning_rate": 2.5991639446269964e-05, "loss": 0.8142, "step": 2368 }, { "epoch": 0.3761165961448049, "grad_norm": 20.99441146850586, "learning_rate": 2.5816192878631166e-05, "loss": 1.7211, "step": 2400 }, { "epoch": 0.3811314840934023, "grad_norm": 10.574430465698242, "learning_rate": 2.5637608222296237e-05, "loss": 0.7523, "step": 2432 }, { "epoch": 0.3861463720419997, "grad_norm": 0.8941424489021301, "learning_rate": 2.5455937289811207e-05, "loss": 0.8053, "step": 2464 }, { "epoch": 0.3911612599905971, "grad_norm": 1.9402281045913696, "learning_rate": 2.5271232789139587e-05, "loss": 0.8427, "step": 2496 }, { "epoch": 0.3961761479391945, "grad_norm": 23.42873764038086, "learning_rate": 2.5083548308370296e-05, "loss": 0.8204, "step": 2528 }, { "epoch": 0.40119103588779187, "grad_norm": 4.5422234535217285, "learning_rate": 2.4892938300170198e-05, "loss": 0.5343, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_nli-pairs_loss": 1.002213478088379, "eval_nli-pairs_runtime": 3.8843, "eval_nli-pairs_samples_per_second": 25.745, "eval_nli-pairs_steps_per_second": 1.03, "eval_sts-test_pearson_cosine": 0.7872537557423719, "eval_sts-test_pearson_dot": 0.5372668921721468, "eval_sts-test_pearson_euclidean": 0.7383744840101544, "eval_sts-test_pearson_manhattan": 0.7333039162515002, "eval_sts-test_pearson_max": 0.7872537557423719, "eval_sts-test_spearman_cosine": 0.8038647026605977, "eval_sts-test_spearman_dot": 0.5191465873751544, "eval_sts-test_spearman_euclidean": 0.730034619048548, "eval_sts-test_spearman_manhattan": 0.7277569753761504, "eval_sts-test_spearman_max": 0.8038647026605977, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_vitaminc-pairs_loss": 4.723379135131836, "eval_vitaminc-pairs_runtime": 1.3031, "eval_vitaminc-pairs_samples_per_second": 65.23, "eval_vitaminc-pairs_steps_per_second": 2.302, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_sts-label_loss": 3.8185579776763916, "eval_sts-label_runtime": 0.4182, "eval_sts-label_samples_per_second": 239.094, "eval_sts-label_steps_per_second": 9.564, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_qnli-contrastive_loss": 0.15084019303321838, "eval_qnli-contrastive_runtime": 0.3638, "eval_qnli-contrastive_samples_per_second": 274.906, "eval_qnli-contrastive_steps_per_second": 10.996, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_scitail-pairs-qa_loss": 0.06741151213645935, "eval_scitail-pairs-qa_runtime": 0.9458, "eval_scitail-pairs-qa_samples_per_second": 105.735, "eval_scitail-pairs-qa_steps_per_second": 4.229, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_scitail-pairs-pos_loss": 0.47680819034576416, "eval_scitail-pairs-pos_runtime": 1.4736, "eval_scitail-pairs-pos_samples_per_second": 67.859, "eval_scitail-pairs-pos_steps_per_second": 2.714, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_xsum-pairs_loss": 0.2572269141674042, "eval_xsum-pairs_runtime": 0.9448, "eval_xsum-pairs_samples_per_second": 105.847, "eval_xsum-pairs_steps_per_second": 4.234, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_compression-pairs_loss": 0.09604756534099579, "eval_compression-pairs_runtime": 0.2774, "eval_compression-pairs_samples_per_second": 360.554, "eval_compression-pairs_steps_per_second": 14.422, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_sciq_pairs_loss": 0.2735004425048828, "eval_sciq_pairs_runtime": 4.2103, "eval_sciq_pairs_samples_per_second": 23.751, "eval_sciq_pairs_steps_per_second": 0.95, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_qasc_pairs_loss": 0.1924300342798233, "eval_qasc_pairs_runtime": 1.1352, "eval_qasc_pairs_samples_per_second": 88.089, "eval_qasc_pairs_steps_per_second": 3.524, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_openbookqa_pairs_loss": 1.6290359497070312, "eval_openbookqa_pairs_runtime": 0.9392, "eval_openbookqa_pairs_samples_per_second": 106.476, "eval_openbookqa_pairs_steps_per_second": 4.259, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_msmarco_pairs_loss": 0.518312931060791, "eval_msmarco_pairs_runtime": 2.121, "eval_msmarco_pairs_samples_per_second": 47.147, "eval_msmarco_pairs_steps_per_second": 1.886, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_nq_pairs_loss": 0.3077375292778015, "eval_nq_pairs_runtime": 4.6617, "eval_nq_pairs_samples_per_second": 21.451, "eval_nq_pairs_steps_per_second": 0.858, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_trivia_pairs_loss": 0.8588294386863708, "eval_trivia_pairs_runtime": 6.6293, "eval_trivia_pairs_samples_per_second": 15.085, "eval_trivia_pairs_steps_per_second": 0.603, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_quora_pairs_loss": 0.07980062067508698, "eval_quora_pairs_runtime": 0.7261, "eval_quora_pairs_samples_per_second": 137.72, "eval_quora_pairs_steps_per_second": 5.509, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_gooaq_pairs_loss": 0.6570906043052673, "eval_gooaq_pairs_runtime": 1.5071, "eval_gooaq_pairs_samples_per_second": 66.352, "eval_gooaq_pairs_steps_per_second": 2.654, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_mrpc_pairs_loss": 0.051231566816568375, "eval_mrpc_pairs_runtime": 0.2799, "eval_mrpc_pairs_samples_per_second": 357.322, "eval_mrpc_pairs_steps_per_second": 14.293, "step": 2560 }, { "epoch": 0.40620592383638926, "grad_norm": 37.2639045715332, "learning_rate": 2.4699458065985813e-05, "loss": 0.9709, "step": 2592 }, { "epoch": 0.4112208117849867, "grad_norm": 15.363207817077637, "learning_rate": 2.45031637399988e-05, "loss": 0.708, "step": 2624 }, { "epoch": 0.4162356997335841, "grad_norm": 1.8831324577331543, "learning_rate": 2.430411227283978e-05, "loss": 0.4083, "step": 2656 }, { "epoch": 0.4212505876821815, "grad_norm": 5.664551734924316, "learning_rate": 2.4102361415065367e-05, "loss": 0.8732, "step": 2688 }, { "epoch": 0.4262654756307789, "grad_norm": 0.615675151348114, "learning_rate": 2.3897969700403022e-05, "loss": 1.2616, "step": 2720 }, { "epoch": 0.43128036357937627, "grad_norm": 19.81829261779785, "learning_rate": 2.3690996428768772e-05, "loss": 1.3324, "step": 2752 }, { "epoch": 0.43629525152797366, "grad_norm": 6.3363118171691895, "learning_rate": 2.348150164906257e-05, "loss": 0.6244, "step": 2784 }, { "epoch": 0.44131013947657105, "grad_norm": 1.103615641593933, "learning_rate": 2.3269546141746407e-05, "loss": 0.6176, "step": 2816 }, { "epoch": 0.44632502742516844, "grad_norm": 11.468894004821777, "learning_rate": 2.3055191401210126e-05, "loss": 0.6926, "step": 2848 }, { "epoch": 0.4513399153737659, "grad_norm": 4.0951619148254395, "learning_rate": 2.283849961793017e-05, "loss": 0.8158, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_nli-pairs_loss": 1.2103344202041626, "eval_nli-pairs_runtime": 3.656, "eval_nli-pairs_samples_per_second": 27.353, "eval_nli-pairs_steps_per_second": 1.094, "eval_sts-test_pearson_cosine": 0.7884135608823999, "eval_sts-test_pearson_dot": 0.5043809957478502, "eval_sts-test_pearson_euclidean": 0.73325296875941, "eval_sts-test_pearson_manhattan": 0.7274442771815695, "eval_sts-test_pearson_max": 0.7884135608823999, "eval_sts-test_spearman_cosine": 0.8024151272859597, "eval_sts-test_spearman_dot": 0.4849613226687463, "eval_sts-test_spearman_euclidean": 0.7267107319000072, "eval_sts-test_spearman_manhattan": 0.7238097600272174, "eval_sts-test_spearman_max": 0.8024151272859597, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_vitaminc-pairs_loss": 4.7560882568359375, "eval_vitaminc-pairs_runtime": 1.1898, "eval_vitaminc-pairs_samples_per_second": 71.438, "eval_vitaminc-pairs_steps_per_second": 2.521, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_sts-label_loss": 3.4280478954315186, "eval_sts-label_runtime": 0.2879, "eval_sts-label_samples_per_second": 347.303, "eval_sts-label_steps_per_second": 13.892, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_qnli-contrastive_loss": 0.1333482712507248, "eval_qnli-contrastive_runtime": 0.3658, "eval_qnli-contrastive_samples_per_second": 273.37, "eval_qnli-contrastive_steps_per_second": 10.935, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_scitail-pairs-qa_loss": 0.0703386664390564, "eval_scitail-pairs-qa_runtime": 0.8879, "eval_scitail-pairs-qa_samples_per_second": 112.63, "eval_scitail-pairs-qa_steps_per_second": 4.505, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_scitail-pairs-pos_loss": 0.4763020873069763, "eval_scitail-pairs-pos_runtime": 1.3239, "eval_scitail-pairs-pos_samples_per_second": 75.532, "eval_scitail-pairs-pos_steps_per_second": 3.021, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_xsum-pairs_loss": 0.25743284821510315, "eval_xsum-pairs_runtime": 0.9333, "eval_xsum-pairs_samples_per_second": 107.15, "eval_xsum-pairs_steps_per_second": 4.286, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_compression-pairs_loss": 0.09842805564403534, "eval_compression-pairs_runtime": 0.2944, "eval_compression-pairs_samples_per_second": 339.674, "eval_compression-pairs_steps_per_second": 13.587, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_sciq_pairs_loss": 0.28244778513908386, "eval_sciq_pairs_runtime": 4.0785, "eval_sciq_pairs_samples_per_second": 24.519, "eval_sciq_pairs_steps_per_second": 0.981, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_qasc_pairs_loss": 0.18051397800445557, "eval_qasc_pairs_runtime": 1.0561, "eval_qasc_pairs_samples_per_second": 94.69, "eval_qasc_pairs_steps_per_second": 3.788, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_openbookqa_pairs_loss": 1.5708725452423096, "eval_openbookqa_pairs_runtime": 0.9072, "eval_openbookqa_pairs_samples_per_second": 110.229, "eval_openbookqa_pairs_steps_per_second": 4.409, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_msmarco_pairs_loss": 0.5720314979553223, "eval_msmarco_pairs_runtime": 2.0694, "eval_msmarco_pairs_samples_per_second": 48.322, "eval_msmarco_pairs_steps_per_second": 1.933, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_nq_pairs_loss": 0.2748319208621979, "eval_nq_pairs_runtime": 4.5496, "eval_nq_pairs_samples_per_second": 21.98, "eval_nq_pairs_steps_per_second": 0.879, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_trivia_pairs_loss": 0.8936847448348999, "eval_trivia_pairs_runtime": 6.4784, "eval_trivia_pairs_samples_per_second": 15.436, "eval_trivia_pairs_steps_per_second": 0.617, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_quora_pairs_loss": 0.07990340888500214, "eval_quora_pairs_runtime": 0.6852, "eval_quora_pairs_samples_per_second": 145.945, "eval_quora_pairs_steps_per_second": 5.838, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_gooaq_pairs_loss": 0.6210995316505432, "eval_gooaq_pairs_runtime": 1.4234, "eval_gooaq_pairs_samples_per_second": 70.255, "eval_gooaq_pairs_steps_per_second": 2.81, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_mrpc_pairs_loss": 0.053870730102062225, "eval_mrpc_pairs_runtime": 0.2678, "eval_mrpc_pairs_samples_per_second": 373.436, "eval_mrpc_pairs_steps_per_second": 14.937, "step": 2880 }, { "epoch": 0.4563548033223633, "grad_norm": 0.5031663775444031, "learning_rate": 2.261953366042628e-05, "loss": 1.4753, "step": 2912 }, { "epoch": 0.46136969127096067, "grad_norm": 3.3404605388641357, "learning_rate": 2.239835705702158e-05, "loss": 0.5735, "step": 2944 }, { "epoch": 0.46638457921955806, "grad_norm": 14.60761547088623, "learning_rate": 2.217503397741115e-05, "loss": 1.2261, "step": 2976 }, { "epoch": 0.47139946716815545, "grad_norm": 0.7826951146125793, "learning_rate": 2.194962921404456e-05, "loss": 0.6085, "step": 3008 }, { "epoch": 0.47641435511675284, "grad_norm": 5.523419380187988, "learning_rate": 2.1722208163327738e-05, "loss": 0.8766, "step": 3040 }, { "epoch": 0.48142924306535023, "grad_norm": 1.2507153749465942, "learning_rate": 2.1492836806649564e-05, "loss": 1.1824, "step": 3072 }, { "epoch": 0.4864441310139477, "grad_norm": 10.76526165008545, "learning_rate": 2.1261581691238775e-05, "loss": 0.7192, "step": 3104 }, { "epoch": 0.49145901896254507, "grad_norm": 2.5375277996063232, "learning_rate": 2.1028509910856705e-05, "loss": 0.6131, "step": 3136 }, { "epoch": 0.49647390691114246, "grad_norm": 6.569655418395996, "learning_rate": 2.0793689086331472e-05, "loss": 0.7407, "step": 3168 }, { "epoch": 0.5014887948597399, "grad_norm": 0.42745527625083923, "learning_rate": 2.055718734593919e-05, "loss": 0.5857, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_nli-pairs_loss": 1.1431602239608765, "eval_nli-pairs_runtime": 3.6407, "eval_nli-pairs_samples_per_second": 27.467, "eval_nli-pairs_steps_per_second": 1.099, "eval_sts-test_pearson_cosine": 0.7838341260331343, "eval_sts-test_pearson_dot": 0.5274891201747137, "eval_sts-test_pearson_euclidean": 0.734987175544037, "eval_sts-test_pearson_manhattan": 0.7296263541205231, "eval_sts-test_pearson_max": 0.7838341260331343, "eval_sts-test_spearman_cosine": 0.8013224760849562, "eval_sts-test_spearman_dot": 0.5061225327907017, "eval_sts-test_spearman_euclidean": 0.7282525362996873, "eval_sts-test_spearman_manhattan": 0.7265322068183514, "eval_sts-test_spearman_max": 0.8013224760849562, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_vitaminc-pairs_loss": 4.748112201690674, "eval_vitaminc-pairs_runtime": 1.1378, "eval_vitaminc-pairs_samples_per_second": 74.706, "eval_vitaminc-pairs_steps_per_second": 2.637, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_sts-label_loss": 3.9402565956115723, "eval_sts-label_runtime": 0.2789, "eval_sts-label_samples_per_second": 358.596, "eval_sts-label_steps_per_second": 14.344, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_qnli-contrastive_loss": 0.10341227799654007, "eval_qnli-contrastive_runtime": 0.3605, "eval_qnli-contrastive_samples_per_second": 277.417, "eval_qnli-contrastive_steps_per_second": 11.097, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_scitail-pairs-qa_loss": 0.06673895567655563, "eval_scitail-pairs-qa_runtime": 0.8765, "eval_scitail-pairs-qa_samples_per_second": 114.092, "eval_scitail-pairs-qa_steps_per_second": 4.564, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_scitail-pairs-pos_loss": 0.510690450668335, "eval_scitail-pairs-pos_runtime": 1.3274, "eval_scitail-pairs-pos_samples_per_second": 75.334, "eval_scitail-pairs-pos_steps_per_second": 3.013, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_xsum-pairs_loss": 0.26573723554611206, "eval_xsum-pairs_runtime": 0.9342, "eval_xsum-pairs_samples_per_second": 107.047, "eval_xsum-pairs_steps_per_second": 4.282, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_compression-pairs_loss": 0.09096826612949371, "eval_compression-pairs_runtime": 0.2779, "eval_compression-pairs_samples_per_second": 359.804, "eval_compression-pairs_steps_per_second": 14.392, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_sciq_pairs_loss": 0.30787500739097595, "eval_sciq_pairs_runtime": 4.1007, "eval_sciq_pairs_samples_per_second": 24.386, "eval_sciq_pairs_steps_per_second": 0.975, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_qasc_pairs_loss": 0.1825849413871765, "eval_qasc_pairs_runtime": 1.0526, "eval_qasc_pairs_samples_per_second": 94.998, "eval_qasc_pairs_steps_per_second": 3.8, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_openbookqa_pairs_loss": 1.5945305824279785, "eval_openbookqa_pairs_runtime": 0.8948, "eval_openbookqa_pairs_samples_per_second": 111.759, "eval_openbookqa_pairs_steps_per_second": 4.47, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_msmarco_pairs_loss": 0.5864604711532593, "eval_msmarco_pairs_runtime": 2.0556, "eval_msmarco_pairs_samples_per_second": 48.646, "eval_msmarco_pairs_steps_per_second": 1.946, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_nq_pairs_loss": 0.2538978159427643, "eval_nq_pairs_runtime": 4.5409, "eval_nq_pairs_samples_per_second": 22.022, "eval_nq_pairs_steps_per_second": 0.881, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_trivia_pairs_loss": 0.8825237154960632, "eval_trivia_pairs_runtime": 6.4701, "eval_trivia_pairs_samples_per_second": 15.456, "eval_trivia_pairs_steps_per_second": 0.618, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_quora_pairs_loss": 0.06264814734458923, "eval_quora_pairs_runtime": 0.6792, "eval_quora_pairs_samples_per_second": 147.238, "eval_quora_pairs_steps_per_second": 5.89, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_gooaq_pairs_loss": 0.5953384041786194, "eval_gooaq_pairs_runtime": 1.4186, "eval_gooaq_pairs_samples_per_second": 70.49, "eval_gooaq_pairs_steps_per_second": 2.82, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_mrpc_pairs_loss": 0.05028616264462471, "eval_mrpc_pairs_runtime": 0.2664, "eval_mrpc_pairs_samples_per_second": 375.444, "eval_mrpc_pairs_steps_per_second": 15.018, "step": 3200 }, { "epoch": 0.5065036828083372, "grad_norm": 17.477581024169922, "learning_rate": 2.0319073305638035e-05, "loss": 0.6212, "step": 3232 }, { "epoch": 0.5115185707569346, "grad_norm": 15.705268859863281, "learning_rate": 2.0079416049160762e-05, "loss": 1.1408, "step": 3264 }, { "epoch": 0.516533458705532, "grad_norm": 15.518088340759277, "learning_rate": 1.983828510797154e-05, "loss": 0.6898, "step": 3296 }, { "epoch": 0.5215483466541294, "grad_norm": 18.28449058532715, "learning_rate": 1.9595750441092844e-05, "loss": 0.9827, "step": 3328 }, { "epoch": 0.5265632346027268, "grad_norm": 11.187614440917969, "learning_rate": 1.935188241480837e-05, "loss": 0.9518, "step": 3360 }, { "epoch": 0.5315781225513242, "grad_norm": 24.515199661254883, "learning_rate": 1.910675178224773e-05, "loss": 0.5584, "step": 3392 }, { "epoch": 0.5365930104999217, "grad_norm": 21.595224380493164, "learning_rate": 1.886042966285894e-05, "loss": 1.3362, "step": 3424 }, { "epoch": 0.5416078984485191, "grad_norm": 14.934494972229004, "learning_rate": 1.8612987521774603e-05, "loss": 0.4418, "step": 3456 }, { "epoch": 0.5466227863971165, "grad_norm": 1.0222537517547607, "learning_rate": 1.836449714907785e-05, "loss": 0.5896, "step": 3488 }, { "epoch": 0.5516376743457139, "grad_norm": 13.705151557922363, "learning_rate": 1.811503063897396e-05, "loss": 0.7951, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_nli-pairs_loss": 1.0016616582870483, "eval_nli-pairs_runtime": 3.6365, "eval_nli-pairs_samples_per_second": 27.499, "eval_nli-pairs_steps_per_second": 1.1, "eval_sts-test_pearson_cosine": 0.783269156461013, "eval_sts-test_pearson_dot": 0.5146760761775918, "eval_sts-test_pearson_euclidean": 0.7293244171224789, "eval_sts-test_pearson_manhattan": 0.722566066058283, "eval_sts-test_pearson_max": 0.783269156461013, "eval_sts-test_spearman_cosine": 0.800346163751739, "eval_sts-test_spearman_dot": 0.49134463318009686, "eval_sts-test_spearman_euclidean": 0.7220780456605193, "eval_sts-test_spearman_manhattan": 0.7185570530657137, "eval_sts-test_spearman_max": 0.800346163751739, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_vitaminc-pairs_loss": 4.628457546234131, "eval_vitaminc-pairs_runtime": 1.1358, "eval_vitaminc-pairs_samples_per_second": 74.837, "eval_vitaminc-pairs_steps_per_second": 2.641, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_sts-label_loss": 3.698469877243042, "eval_sts-label_runtime": 0.2763, "eval_sts-label_samples_per_second": 361.871, "eval_sts-label_steps_per_second": 14.475, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_qnli-contrastive_loss": 0.11857427656650543, "eval_qnli-contrastive_runtime": 0.3599, "eval_qnli-contrastive_samples_per_second": 277.865, "eval_qnli-contrastive_steps_per_second": 11.115, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_scitail-pairs-qa_loss": 0.06011494621634483, "eval_scitail-pairs-qa_runtime": 0.8855, "eval_scitail-pairs-qa_samples_per_second": 112.93, "eval_scitail-pairs-qa_steps_per_second": 4.517, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_scitail-pairs-pos_loss": 0.5179685950279236, "eval_scitail-pairs-pos_runtime": 1.3428, "eval_scitail-pairs-pos_samples_per_second": 74.469, "eval_scitail-pairs-pos_steps_per_second": 2.979, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_xsum-pairs_loss": 0.2575337886810303, "eval_xsum-pairs_runtime": 0.9362, "eval_xsum-pairs_samples_per_second": 106.81, "eval_xsum-pairs_steps_per_second": 4.272, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_compression-pairs_loss": 0.08986295014619827, "eval_compression-pairs_runtime": 0.2735, "eval_compression-pairs_samples_per_second": 365.659, "eval_compression-pairs_steps_per_second": 14.626, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_sciq_pairs_loss": 0.2898155748844147, "eval_sciq_pairs_runtime": 4.1009, "eval_sciq_pairs_samples_per_second": 24.385, "eval_sciq_pairs_steps_per_second": 0.975, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_qasc_pairs_loss": 0.1790761798620224, "eval_qasc_pairs_runtime": 1.0559, "eval_qasc_pairs_samples_per_second": 94.702, "eval_qasc_pairs_steps_per_second": 3.788, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_openbookqa_pairs_loss": 1.6558103561401367, "eval_openbookqa_pairs_runtime": 0.8846, "eval_openbookqa_pairs_samples_per_second": 113.048, "eval_openbookqa_pairs_steps_per_second": 4.522, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_msmarco_pairs_loss": 0.5547183156013489, "eval_msmarco_pairs_runtime": 2.0592, "eval_msmarco_pairs_samples_per_second": 48.563, "eval_msmarco_pairs_steps_per_second": 1.943, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_nq_pairs_loss": 0.24799224734306335, "eval_nq_pairs_runtime": 4.5115, "eval_nq_pairs_samples_per_second": 22.166, "eval_nq_pairs_steps_per_second": 0.887, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_trivia_pairs_loss": 0.9036693572998047, "eval_trivia_pairs_runtime": 6.5286, "eval_trivia_pairs_samples_per_second": 15.317, "eval_trivia_pairs_steps_per_second": 0.613, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_quora_pairs_loss": 0.05727443844079971, "eval_quora_pairs_runtime": 0.6763, "eval_quora_pairs_samples_per_second": 147.873, "eval_quora_pairs_steps_per_second": 5.915, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_gooaq_pairs_loss": 0.5602415800094604, "eval_gooaq_pairs_runtime": 1.4132, "eval_gooaq_pairs_samples_per_second": 70.759, "eval_gooaq_pairs_steps_per_second": 2.83, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_mrpc_pairs_loss": 0.04762456938624382, "eval_mrpc_pairs_runtime": 0.2648, "eval_mrpc_pairs_samples_per_second": 377.632, "eval_mrpc_pairs_steps_per_second": 15.105, "step": 3520 }, { "epoch": 0.5566525622943113, "grad_norm": 0.39285340905189514, "learning_rate": 1.7864660368873747e-05, "loss": 0.5201, "step": 3552 }, { "epoch": 0.5616674502429087, "grad_norm": 16.01999855041504, "learning_rate": 1.7613458978394786e-05, "loss": 0.6351, "step": 3584 }, { "epoch": 0.566682338191506, "grad_norm": 0.5487422347068787, "learning_rate": 1.7361499348286606e-05, "loss": 0.8652, "step": 3616 }, { "epoch": 0.5716972261401034, "grad_norm": 0.9249119758605957, "learning_rate": 1.710885457928585e-05, "loss": 0.6407, "step": 3648 }, { "epoch": 0.5767121140887008, "grad_norm": 6.578505992889404, "learning_rate": 1.6855597970907664e-05, "loss": 0.9435, "step": 3680 }, { "epoch": 0.5817270020372982, "grad_norm": 14.307022094726562, "learning_rate": 1.6601803000179394e-05, "loss": 0.9295, "step": 3712 }, { "epoch": 0.5867418899858956, "grad_norm": 16.091779708862305, "learning_rate": 1.6347543300322795e-05, "loss": 0.6829, "step": 3744 }, { "epoch": 0.591756777934493, "grad_norm": 29.058805465698242, "learning_rate": 1.6092892639390916e-05, "loss": 0.8683, "step": 3776 }, { "epoch": 0.5967716658830904, "grad_norm": 13.12238597869873, "learning_rate": 1.583792489886586e-05, "loss": 1.115, "step": 3808 }, { "epoch": 0.6017865538316878, "grad_norm": 11.606388092041016, "learning_rate": 1.558271405222362e-05, "loss": 1.0936, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_nli-pairs_loss": 0.9106074571609497, "eval_nli-pairs_runtime": 3.9467, "eval_nli-pairs_samples_per_second": 25.337, "eval_nli-pairs_steps_per_second": 1.013, "eval_sts-test_pearson_cosine": 0.7831915073063493, "eval_sts-test_pearson_dot": 0.51712727721244, "eval_sts-test_pearson_euclidean": 0.7355201142492419, "eval_sts-test_pearson_manhattan": 0.7299910115321456, "eval_sts-test_pearson_max": 0.7831915073063493, "eval_sts-test_spearman_cosine": 0.8005432620025132, "eval_sts-test_spearman_dot": 0.49466719400094655, "eval_sts-test_spearman_euclidean": 0.7273424991180402, "eval_sts-test_spearman_manhattan": 0.7249394934262583, "eval_sts-test_spearman_max": 0.8005432620025132, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_vitaminc-pairs_loss": 4.7559494972229, "eval_vitaminc-pairs_runtime": 1.1844, "eval_vitaminc-pairs_samples_per_second": 71.768, "eval_vitaminc-pairs_steps_per_second": 2.533, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_sts-label_loss": 3.46917724609375, "eval_sts-label_runtime": 0.3003, "eval_sts-label_samples_per_second": 333.048, "eval_sts-label_steps_per_second": 13.322, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_qnli-contrastive_loss": 0.13890141248703003, "eval_qnli-contrastive_runtime": 0.3729, "eval_qnli-contrastive_samples_per_second": 268.18, "eval_qnli-contrastive_steps_per_second": 10.727, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_scitail-pairs-qa_loss": 0.0611240416765213, "eval_scitail-pairs-qa_runtime": 0.9367, "eval_scitail-pairs-qa_samples_per_second": 106.755, "eval_scitail-pairs-qa_steps_per_second": 4.27, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_scitail-pairs-pos_loss": 0.46203696727752686, "eval_scitail-pairs-pos_runtime": 1.4874, "eval_scitail-pairs-pos_samples_per_second": 67.232, "eval_scitail-pairs-pos_steps_per_second": 2.689, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_xsum-pairs_loss": 0.24919259548187256, "eval_xsum-pairs_runtime": 0.9576, "eval_xsum-pairs_samples_per_second": 104.427, "eval_xsum-pairs_steps_per_second": 4.177, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_compression-pairs_loss": 0.08809012174606323, "eval_compression-pairs_runtime": 0.298, "eval_compression-pairs_samples_per_second": 335.567, "eval_compression-pairs_steps_per_second": 13.423, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_sciq_pairs_loss": 0.28287386894226074, "eval_sciq_pairs_runtime": 4.2668, "eval_sciq_pairs_samples_per_second": 23.437, "eval_sciq_pairs_steps_per_second": 0.937, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_qasc_pairs_loss": 0.1861308217048645, "eval_qasc_pairs_runtime": 1.0488, "eval_qasc_pairs_samples_per_second": 95.351, "eval_qasc_pairs_steps_per_second": 3.814, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_openbookqa_pairs_loss": 1.600982666015625, "eval_openbookqa_pairs_runtime": 0.9077, "eval_openbookqa_pairs_samples_per_second": 110.17, "eval_openbookqa_pairs_steps_per_second": 4.407, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_msmarco_pairs_loss": 0.5555463433265686, "eval_msmarco_pairs_runtime": 2.1064, "eval_msmarco_pairs_samples_per_second": 47.474, "eval_msmarco_pairs_steps_per_second": 1.899, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_nq_pairs_loss": 0.23241031169891357, "eval_nq_pairs_runtime": 4.6119, "eval_nq_pairs_samples_per_second": 21.683, "eval_nq_pairs_steps_per_second": 0.867, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_trivia_pairs_loss": 0.7936394214630127, "eval_trivia_pairs_runtime": 6.6242, "eval_trivia_pairs_samples_per_second": 15.096, "eval_trivia_pairs_steps_per_second": 0.604, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_quora_pairs_loss": 0.05936668440699577, "eval_quora_pairs_runtime": 0.7463, "eval_quora_pairs_samples_per_second": 133.994, "eval_quora_pairs_steps_per_second": 5.36, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_gooaq_pairs_loss": 0.5735708475112915, "eval_gooaq_pairs_runtime": 1.4747, "eval_gooaq_pairs_samples_per_second": 67.809, "eval_gooaq_pairs_steps_per_second": 2.712, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_mrpc_pairs_loss": 0.046309370547533035, "eval_mrpc_pairs_runtime": 0.2694, "eval_mrpc_pairs_samples_per_second": 371.218, "eval_mrpc_pairs_steps_per_second": 14.849, "step": 3840 }, { "epoch": 0.6068014417802852, "grad_norm": 6.513147830963135, "learning_rate": 1.53273341434723e-05, "loss": 0.8689, "step": 3872 }, { "epoch": 0.6118163297288827, "grad_norm": 0.2349071353673935, "learning_rate": 1.5071859265669756e-05, "loss": 0.8692, "step": 3904 }, { "epoch": 0.6168312176774801, "grad_norm": 18.028608322143555, "learning_rate": 1.4816363539427118e-05, "loss": 0.9083, "step": 3936 }, { "epoch": 0.6218461056260774, "grad_norm": 17.381690979003906, "learning_rate": 1.456092109140423e-05, "loss": 1.0782, "step": 3968 }, { "epoch": 0.6268609935746748, "grad_norm": 20.72548484802246, "learning_rate": 1.4305606032803418e-05, "loss": 0.7711, "step": 4000 }, { "epoch": 0.6318758815232722, "grad_norm": 28.311264038085938, "learning_rate": 1.4050492437867641e-05, "loss": 1.0005, "step": 4032 }, { "epoch": 0.6368907694718696, "grad_norm": 14.892809867858887, "learning_rate": 1.3795654322389481e-05, "loss": 0.7229, "step": 4064 }, { "epoch": 0.641905657420467, "grad_norm": 18.567630767822266, "learning_rate": 1.3541165622236977e-05, "loss": 0.4871, "step": 4096 }, { "epoch": 0.6469205453690644, "grad_norm": 8.814851760864258, "learning_rate": 1.3287100171902759e-05, "loss": 0.7853, "step": 4128 }, { "epoch": 0.6519354333176618, "grad_norm": 19.43486785888672, "learning_rate": 1.3033531683082495e-05, "loss": 0.9271, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_nli-pairs_loss": 0.8979966640472412, "eval_nli-pairs_runtime": 3.6341, "eval_nli-pairs_samples_per_second": 27.517, "eval_nli-pairs_steps_per_second": 1.101, "eval_sts-test_pearson_cosine": 0.786081877366483, "eval_sts-test_pearson_dot": 0.5354100918466089, "eval_sts-test_pearson_euclidean": 0.7368659505908834, "eval_sts-test_pearson_manhattan": 0.7310042183211231, "eval_sts-test_pearson_max": 0.786081877366483, "eval_sts-test_spearman_cosine": 0.8043456052578905, "eval_sts-test_spearman_dot": 0.5150264179790126, "eval_sts-test_spearman_euclidean": 0.7297811553069841, "eval_sts-test_spearman_manhattan": 0.7264172194761916, "eval_sts-test_spearman_max": 0.8043456052578905, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_vitaminc-pairs_loss": 4.720225811004639, "eval_vitaminc-pairs_runtime": 1.1487, "eval_vitaminc-pairs_samples_per_second": 73.995, "eval_vitaminc-pairs_steps_per_second": 2.612, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_sts-label_loss": 3.9553511142730713, "eval_sts-label_runtime": 0.2732, "eval_sts-label_samples_per_second": 366.049, "eval_sts-label_steps_per_second": 14.642, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_qnli-contrastive_loss": 0.14256399869918823, "eval_qnli-contrastive_runtime": 0.3558, "eval_qnli-contrastive_samples_per_second": 281.03, "eval_qnli-contrastive_steps_per_second": 11.241, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_scitail-pairs-qa_loss": 0.06135182082653046, "eval_scitail-pairs-qa_runtime": 0.8797, "eval_scitail-pairs-qa_samples_per_second": 113.67, "eval_scitail-pairs-qa_steps_per_second": 4.547, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_scitail-pairs-pos_loss": 0.42590686678886414, "eval_scitail-pairs-pos_runtime": 1.3288, "eval_scitail-pairs-pos_samples_per_second": 75.254, "eval_scitail-pairs-pos_steps_per_second": 3.01, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_xsum-pairs_loss": 0.2564789056777954, "eval_xsum-pairs_runtime": 0.9345, "eval_xsum-pairs_samples_per_second": 107.011, "eval_xsum-pairs_steps_per_second": 4.28, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_compression-pairs_loss": 0.08838170021772385, "eval_compression-pairs_runtime": 0.2761, "eval_compression-pairs_samples_per_second": 362.144, "eval_compression-pairs_steps_per_second": 14.486, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_sciq_pairs_loss": 0.2946786880493164, "eval_sciq_pairs_runtime": 4.076, "eval_sciq_pairs_samples_per_second": 24.534, "eval_sciq_pairs_steps_per_second": 0.981, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_qasc_pairs_loss": 0.17502914369106293, "eval_qasc_pairs_runtime": 1.0723, "eval_qasc_pairs_samples_per_second": 93.259, "eval_qasc_pairs_steps_per_second": 3.73, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_openbookqa_pairs_loss": 1.5555152893066406, "eval_openbookqa_pairs_runtime": 0.8973, "eval_openbookqa_pairs_samples_per_second": 111.451, "eval_openbookqa_pairs_steps_per_second": 4.458, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_msmarco_pairs_loss": 0.5041812062263489, "eval_msmarco_pairs_runtime": 2.0593, "eval_msmarco_pairs_samples_per_second": 48.56, "eval_msmarco_pairs_steps_per_second": 1.942, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_nq_pairs_loss": 0.24564537405967712, "eval_nq_pairs_runtime": 4.527, "eval_nq_pairs_samples_per_second": 22.09, "eval_nq_pairs_steps_per_second": 0.884, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_trivia_pairs_loss": 0.8565467000007629, "eval_trivia_pairs_runtime": 6.4751, "eval_trivia_pairs_samples_per_second": 15.444, "eval_trivia_pairs_steps_per_second": 0.618, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_quora_pairs_loss": 0.052645713090896606, "eval_quora_pairs_runtime": 0.6803, "eval_quora_pairs_samples_per_second": 146.985, "eval_quora_pairs_steps_per_second": 5.879, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_gooaq_pairs_loss": 0.5815556645393372, "eval_gooaq_pairs_runtime": 1.3985, "eval_gooaq_pairs_samples_per_second": 71.504, "eval_gooaq_pairs_steps_per_second": 2.86, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_mrpc_pairs_loss": 0.047052089124917984, "eval_mrpc_pairs_runtime": 0.2602, "eval_mrpc_pairs_samples_per_second": 384.349, "eval_mrpc_pairs_steps_per_second": 15.374, "step": 4160 }, { "epoch": 0.6569503212662592, "grad_norm": 21.91355323791504, "learning_rate": 1.2780533723289014e-05, "loss": 0.5223, "step": 4192 }, { "epoch": 0.6619652092148566, "grad_norm": 9.792081832885742, "learning_rate": 1.2528179694508286e-05, "loss": 1.0498, "step": 4224 }, { "epoch": 0.666980097163454, "grad_norm": 6.606201648712158, "learning_rate": 1.2276542811903345e-05, "loss": 0.6791, "step": 4256 }, { "epoch": 0.6719949851120514, "grad_norm": 16.744705200195312, "learning_rate": 1.2025696082572509e-05, "loss": 0.8836, "step": 4288 }, { "epoch": 0.6770098730606487, "grad_norm": 8.791626930236816, "learning_rate": 1.1775712284367882e-05, "loss": 0.6035, "step": 4320 }, { "epoch": 0.6820247610092462, "grad_norm": 1.067271113395691, "learning_rate": 1.152666394478045e-05, "loss": 0.5167, "step": 4352 }, { "epoch": 0.6870396489578436, "grad_norm": 7.685211181640625, "learning_rate": 1.1286358620301126e-05, "loss": 0.981, "step": 4384 }, { "epoch": 0.692054536906441, "grad_norm": 19.07784652709961, "learning_rate": 1.10393628476565e-05, "loss": 0.4873, "step": 4416 }, { "epoch": 0.6970694248550384, "grad_norm": 1.4715958833694458, "learning_rate": 1.0793516169782712e-05, "loss": 0.4762, "step": 4448 }, { "epoch": 0.7020843128036358, "grad_norm": 14.572600364685059, "learning_rate": 1.0548889913873123e-05, "loss": 0.8201, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_nli-pairs_loss": 0.8704043626785278, "eval_nli-pairs_runtime": 3.6418, "eval_nli-pairs_samples_per_second": 27.459, "eval_nli-pairs_steps_per_second": 1.098, "eval_sts-test_pearson_cosine": 0.7871366351762351, "eval_sts-test_pearson_dot": 0.520292802271069, "eval_sts-test_pearson_euclidean": 0.7358991589918665, "eval_sts-test_pearson_manhattan": 0.7306487678482384, "eval_sts-test_pearson_max": 0.7871366351762351, "eval_sts-test_spearman_cosine": 0.8043053229220561, "eval_sts-test_spearman_dot": 0.500924984433136, "eval_sts-test_spearman_euclidean": 0.7279966902078664, "eval_sts-test_spearman_manhattan": 0.7254635738312362, "eval_sts-test_spearman_max": 0.8043053229220561, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_vitaminc-pairs_loss": 4.733531475067139, "eval_vitaminc-pairs_runtime": 1.1524, "eval_vitaminc-pairs_samples_per_second": 73.759, "eval_vitaminc-pairs_steps_per_second": 2.603, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_sts-label_loss": 3.589179515838623, "eval_sts-label_runtime": 0.2802, "eval_sts-label_samples_per_second": 356.831, "eval_sts-label_steps_per_second": 14.273, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_qnli-contrastive_loss": 0.11559023708105087, "eval_qnli-contrastive_runtime": 0.3803, "eval_qnli-contrastive_samples_per_second": 262.956, "eval_qnli-contrastive_steps_per_second": 10.518, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_scitail-pairs-qa_loss": 0.05958002060651779, "eval_scitail-pairs-qa_runtime": 0.9171, "eval_scitail-pairs-qa_samples_per_second": 109.042, "eval_scitail-pairs-qa_steps_per_second": 4.362, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_scitail-pairs-pos_loss": 0.43254122138023376, "eval_scitail-pairs-pos_runtime": 1.3676, "eval_scitail-pairs-pos_samples_per_second": 73.118, "eval_scitail-pairs-pos_steps_per_second": 2.925, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_xsum-pairs_loss": 0.248906210064888, "eval_xsum-pairs_runtime": 0.9364, "eval_xsum-pairs_samples_per_second": 106.797, "eval_xsum-pairs_steps_per_second": 4.272, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_compression-pairs_loss": 0.08712127059698105, "eval_compression-pairs_runtime": 0.2771, "eval_compression-pairs_samples_per_second": 360.923, "eval_compression-pairs_steps_per_second": 14.437, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_sciq_pairs_loss": 0.2863478362560272, "eval_sciq_pairs_runtime": 4.1006, "eval_sciq_pairs_samples_per_second": 24.386, "eval_sciq_pairs_steps_per_second": 0.975, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_qasc_pairs_loss": 0.17710347473621368, "eval_qasc_pairs_runtime": 1.0521, "eval_qasc_pairs_samples_per_second": 95.051, "eval_qasc_pairs_steps_per_second": 3.802, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_openbookqa_pairs_loss": 1.5271464586257935, "eval_openbookqa_pairs_runtime": 0.8986, "eval_openbookqa_pairs_samples_per_second": 111.286, "eval_openbookqa_pairs_steps_per_second": 4.451, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_msmarco_pairs_loss": 0.5346755385398865, "eval_msmarco_pairs_runtime": 2.0827, "eval_msmarco_pairs_samples_per_second": 48.014, "eval_msmarco_pairs_steps_per_second": 1.921, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_nq_pairs_loss": 0.24830152094364166, "eval_nq_pairs_runtime": 4.5025, "eval_nq_pairs_samples_per_second": 22.21, "eval_nq_pairs_steps_per_second": 0.888, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_trivia_pairs_loss": 0.799673318862915, "eval_trivia_pairs_runtime": 6.4664, "eval_trivia_pairs_samples_per_second": 15.465, "eval_trivia_pairs_steps_per_second": 0.619, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_quora_pairs_loss": 0.030656050890684128, "eval_quora_pairs_runtime": 0.6818, "eval_quora_pairs_samples_per_second": 146.669, "eval_quora_pairs_steps_per_second": 5.867, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_gooaq_pairs_loss": 0.5036634802818298, "eval_gooaq_pairs_runtime": 1.4051, "eval_gooaq_pairs_samples_per_second": 71.169, "eval_gooaq_pairs_steps_per_second": 2.847, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_mrpc_pairs_loss": 0.04455450549721718, "eval_mrpc_pairs_runtime": 0.2642, "eval_mrpc_pairs_samples_per_second": 378.478, "eval_mrpc_pairs_steps_per_second": 15.139, "step": 4480 }, { "epoch": 0.7070992007522332, "grad_norm": 15.19054889678955, "learning_rate": 1.030555505304156e-05, "loss": 0.7799, "step": 4512 }, { "epoch": 0.7121140887008306, "grad_norm": 16.065160751342773, "learning_rate": 1.0063582185731009e-05, "loss": 0.8006, "step": 4544 }, { "epoch": 0.717128976649428, "grad_norm": 3.2584469318389893, "learning_rate": 9.823041515230937e-06, "loss": 0.5123, "step": 4576 }, { "epoch": 0.7221438645980254, "grad_norm": 2.2951438426971436, "learning_rate": 9.584002829309324e-06, "loss": 0.7421, "step": 4608 }, { "epoch": 0.7271587525466228, "grad_norm": 21.291872024536133, "learning_rate": 9.346535479965231e-06, "loss": 0.9477, "step": 4640 }, { "epoch": 0.7321736404952202, "grad_norm": 4.785529613494873, "learning_rate": 9.11070836330775e-06, "loss": 0.5021, "step": 4672 }, { "epoch": 0.7371885284438175, "grad_norm": 1.7058138847351074, "learning_rate": 8.876589899567312e-06, "loss": 0.931, "step": 4704 }, { "epoch": 0.7422034163924149, "grad_norm": 9.1055326461792, "learning_rate": 8.644248013244963e-06, "loss": 0.7777, "step": 4736 }, { "epoch": 0.7472183043410123, "grad_norm": 3.6529128551483154, "learning_rate": 8.413750113405556e-06, "loss": 0.9462, "step": 4768 }, { "epoch": 0.7522331922896098, "grad_norm": 0.5643049478530884, "learning_rate": 8.185163074120399e-06, "loss": 0.5846, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_nli-pairs_loss": 0.9291799664497375, "eval_nli-pairs_runtime": 3.7498, "eval_nli-pairs_samples_per_second": 26.668, "eval_nli-pairs_steps_per_second": 1.067, "eval_sts-test_pearson_cosine": 0.7855324842750789, "eval_sts-test_pearson_dot": 0.5242204261314407, "eval_sts-test_pearson_euclidean": 0.7349702751512333, "eval_sts-test_pearson_manhattan": 0.7293454465410049, "eval_sts-test_pearson_max": 0.7855324842750789, "eval_sts-test_spearman_cosine": 0.8044211074352633, "eval_sts-test_spearman_dot": 0.5021807579050959, "eval_sts-test_spearman_euclidean": 0.7270456124616013, "eval_sts-test_spearman_manhattan": 0.7246691951731193, "eval_sts-test_spearman_max": 0.8044211074352633, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_vitaminc-pairs_loss": 4.687094688415527, "eval_vitaminc-pairs_runtime": 1.1386, "eval_vitaminc-pairs_samples_per_second": 74.654, "eval_vitaminc-pairs_steps_per_second": 2.635, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_sts-label_loss": 3.8013510704040527, "eval_sts-label_runtime": 0.2716, "eval_sts-label_samples_per_second": 368.125, "eval_sts-label_steps_per_second": 14.725, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_qnli-contrastive_loss": 0.1414812207221985, "eval_qnli-contrastive_runtime": 0.3601, "eval_qnli-contrastive_samples_per_second": 277.73, "eval_qnli-contrastive_steps_per_second": 11.109, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_scitail-pairs-qa_loss": 0.05851547792553902, "eval_scitail-pairs-qa_runtime": 0.8864, "eval_scitail-pairs-qa_samples_per_second": 112.817, "eval_scitail-pairs-qa_steps_per_second": 4.513, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_scitail-pairs-pos_loss": 0.4562886357307434, "eval_scitail-pairs-pos_runtime": 1.3535, "eval_scitail-pairs-pos_samples_per_second": 73.88, "eval_scitail-pairs-pos_steps_per_second": 2.955, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_xsum-pairs_loss": 0.23483119904994965, "eval_xsum-pairs_runtime": 0.9336, "eval_xsum-pairs_samples_per_second": 107.109, "eval_xsum-pairs_steps_per_second": 4.284, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_compression-pairs_loss": 0.08680214732885361, "eval_compression-pairs_runtime": 0.2716, "eval_compression-pairs_samples_per_second": 368.254, "eval_compression-pairs_steps_per_second": 14.73, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_sciq_pairs_loss": 0.2816057801246643, "eval_sciq_pairs_runtime": 4.0742, "eval_sciq_pairs_samples_per_second": 24.545, "eval_sciq_pairs_steps_per_second": 0.982, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_qasc_pairs_loss": 0.17035560309886932, "eval_qasc_pairs_runtime": 1.0717, "eval_qasc_pairs_samples_per_second": 93.311, "eval_qasc_pairs_steps_per_second": 3.732, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_openbookqa_pairs_loss": 1.5671054124832153, "eval_openbookqa_pairs_runtime": 0.8973, "eval_openbookqa_pairs_samples_per_second": 111.441, "eval_openbookqa_pairs_steps_per_second": 4.458, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_msmarco_pairs_loss": 0.5062486529350281, "eval_msmarco_pairs_runtime": 2.0609, "eval_msmarco_pairs_samples_per_second": 48.524, "eval_msmarco_pairs_steps_per_second": 1.941, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_nq_pairs_loss": 0.22875532507896423, "eval_nq_pairs_runtime": 4.5041, "eval_nq_pairs_samples_per_second": 22.202, "eval_nq_pairs_steps_per_second": 0.888, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_trivia_pairs_loss": 0.8119627237319946, "eval_trivia_pairs_runtime": 6.4609, "eval_trivia_pairs_samples_per_second": 15.478, "eval_trivia_pairs_steps_per_second": 0.619, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_quora_pairs_loss": 0.06211049482226372, "eval_quora_pairs_runtime": 0.6765, "eval_quora_pairs_samples_per_second": 147.827, "eval_quora_pairs_steps_per_second": 5.913, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_gooaq_pairs_loss": 0.4847571551799774, "eval_gooaq_pairs_runtime": 1.3911, "eval_gooaq_pairs_samples_per_second": 71.886, "eval_gooaq_pairs_steps_per_second": 2.875, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_mrpc_pairs_loss": 0.04384278133511543, "eval_mrpc_pairs_runtime": 0.2617, "eval_mrpc_pairs_samples_per_second": 382.146, "eval_mrpc_pairs_steps_per_second": 15.286, "step": 4800 }, { "epoch": 0.7572480802382072, "grad_norm": 14.555929183959961, "learning_rate": 7.958553215065208e-06, "loss": 0.6735, "step": 4832 }, { "epoch": 0.7622629681868046, "grad_norm": 10.30207347869873, "learning_rate": 7.733986282278816e-06, "loss": 1.1569, "step": 4864 }, { "epoch": 0.767277856135402, "grad_norm": 17.255786895751953, "learning_rate": 7.511527429088396e-06, "loss": 0.9749, "step": 4896 }, { "epoch": 0.7722927440839994, "grad_norm": 14.730864524841309, "learning_rate": 7.291241197206574e-06, "loss": 0.6581, "step": 4928 }, { "epoch": 0.7773076320325968, "grad_norm": 8.807291984558105, "learning_rate": 7.07319149800605e-06, "loss": 0.6979, "step": 4960 }, { "epoch": 0.7823225199811942, "grad_norm": 0.6080070734024048, "learning_rate": 6.857441593977046e-06, "loss": 0.7582, "step": 4992 }, { "epoch": 0.7873374079297916, "grad_norm": 2.2002525329589844, "learning_rate": 6.6440540803730425e-06, "loss": 1.0082, "step": 5024 }, { "epoch": 0.792352295878389, "grad_norm": 8.624346733093262, "learning_rate": 6.433090867050122e-06, "loss": 0.6206, "step": 5056 }, { "epoch": 0.7973671838269863, "grad_norm": 0.9821205139160156, "learning_rate": 6.224613160505094e-06, "loss": 0.5165, "step": 5088 }, { "epoch": 0.8023820717755837, "grad_norm": 4.104696750640869, "learning_rate": 6.018681446117773e-06, "loss": 0.4914, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_nli-pairs_loss": 0.8841198682785034, "eval_nli-pairs_runtime": 4.1793, "eval_nli-pairs_samples_per_second": 23.928, "eval_nli-pairs_steps_per_second": 0.957, "eval_sts-test_pearson_cosine": 0.7866468635321827, "eval_sts-test_pearson_dot": 0.5124924570863083, "eval_sts-test_pearson_euclidean": 0.7320768163626257, "eval_sts-test_pearson_manhattan": 0.7266238528084388, "eval_sts-test_pearson_max": 0.7866468635321827, "eval_sts-test_spearman_cosine": 0.8041619306345255, "eval_sts-test_spearman_dot": 0.4913316974763461, "eval_sts-test_spearman_euclidean": 0.7232005770314757, "eval_sts-test_spearman_manhattan": 0.7207683852583252, "eval_sts-test_spearman_max": 0.8041619306345255, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_vitaminc-pairs_loss": 4.725103855133057, "eval_vitaminc-pairs_runtime": 1.2146, "eval_vitaminc-pairs_samples_per_second": 69.982, "eval_vitaminc-pairs_steps_per_second": 2.47, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_sts-label_loss": 3.6535470485687256, "eval_sts-label_runtime": 0.3164, "eval_sts-label_samples_per_second": 316.056, "eval_sts-label_steps_per_second": 12.642, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_qnli-contrastive_loss": 0.10529302805662155, "eval_qnli-contrastive_runtime": 0.368, "eval_qnli-contrastive_samples_per_second": 271.711, "eval_qnli-contrastive_steps_per_second": 10.868, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_scitail-pairs-qa_loss": 0.05555274337530136, "eval_scitail-pairs-qa_runtime": 0.9542, "eval_scitail-pairs-qa_samples_per_second": 104.795, "eval_scitail-pairs-qa_steps_per_second": 4.192, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_scitail-pairs-pos_loss": 0.4785614013671875, "eval_scitail-pairs-pos_runtime": 1.4937, "eval_scitail-pairs-pos_samples_per_second": 66.949, "eval_scitail-pairs-pos_steps_per_second": 2.678, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_xsum-pairs_loss": 0.2355932593345642, "eval_xsum-pairs_runtime": 0.9396, "eval_xsum-pairs_samples_per_second": 106.432, "eval_xsum-pairs_steps_per_second": 4.257, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_compression-pairs_loss": 0.083825021982193, "eval_compression-pairs_runtime": 0.2789, "eval_compression-pairs_samples_per_second": 358.564, "eval_compression-pairs_steps_per_second": 14.343, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_sciq_pairs_loss": 0.28157705068588257, "eval_sciq_pairs_runtime": 4.1947, "eval_sciq_pairs_samples_per_second": 23.84, "eval_sciq_pairs_steps_per_second": 0.954, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_qasc_pairs_loss": 0.1739024668931961, "eval_qasc_pairs_runtime": 1.1277, "eval_qasc_pairs_samples_per_second": 88.676, "eval_qasc_pairs_steps_per_second": 3.547, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_openbookqa_pairs_loss": 1.591935396194458, "eval_openbookqa_pairs_runtime": 1.0022, "eval_openbookqa_pairs_samples_per_second": 99.782, "eval_openbookqa_pairs_steps_per_second": 3.991, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_msmarco_pairs_loss": 0.5132349133491516, "eval_msmarco_pairs_runtime": 2.1322, "eval_msmarco_pairs_samples_per_second": 46.901, "eval_msmarco_pairs_steps_per_second": 1.876, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_nq_pairs_loss": 0.2343132346868515, "eval_nq_pairs_runtime": 4.5529, "eval_nq_pairs_samples_per_second": 21.964, "eval_nq_pairs_steps_per_second": 0.879, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_trivia_pairs_loss": 0.7988561987876892, "eval_trivia_pairs_runtime": 6.5661, "eval_trivia_pairs_samples_per_second": 15.23, "eval_trivia_pairs_steps_per_second": 0.609, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_quora_pairs_loss": 0.05578049644827843, "eval_quora_pairs_runtime": 0.8028, "eval_quora_pairs_samples_per_second": 124.564, "eval_quora_pairs_steps_per_second": 4.983, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_gooaq_pairs_loss": 0.48901888728141785, "eval_gooaq_pairs_runtime": 1.5605, "eval_gooaq_pairs_samples_per_second": 64.082, "eval_gooaq_pairs_steps_per_second": 2.563, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_mrpc_pairs_loss": 0.04172317683696747, "eval_mrpc_pairs_runtime": 0.2628, "eval_mrpc_pairs_samples_per_second": 380.505, "eval_mrpc_pairs_steps_per_second": 15.22, "step": 5120 }, { "epoch": 0.8073969597241811, "grad_norm": 9.413043022155762, "learning_rate": 5.815355470602388e-06, "loss": 1.098, "step": 5152 }, { "epoch": 0.8124118476727785, "grad_norm": 0.25412222743034363, "learning_rate": 5.614694224673387e-06, "loss": 0.821, "step": 5184 }, { "epoch": 0.8174267356213759, "grad_norm": 18.76092529296875, "learning_rate": 5.416755925930494e-06, "loss": 0.9351, "step": 5216 }, { "epoch": 0.8224416235699734, "grad_norm": 19.607337951660156, "learning_rate": 5.221598001968132e-06, "loss": 0.8784, "step": 5248 }, { "epoch": 0.8274565115185708, "grad_norm": 3.2164149284362793, "learning_rate": 5.029277073714009e-06, "loss": 0.8326, "step": 5280 }, { "epoch": 0.8324713994671682, "grad_norm": 11.156713485717773, "learning_rate": 4.839848939001789e-06, "loss": 0.7551, "step": 5312 }, { "epoch": 0.8374862874157656, "grad_norm": 8.80623722076416, "learning_rate": 4.653368556382492e-06, "loss": 0.8234, "step": 5344 }, { "epoch": 0.842501175364363, "grad_norm": 16.081491470336914, "learning_rate": 4.469890029179472e-06, "loss": 1.0922, "step": 5376 }, { "epoch": 0.8475160633129604, "grad_norm": 0.8583326935768127, "learning_rate": 4.2894665897914794e-06, "loss": 1.0925, "step": 5408 }, { "epoch": 0.8525309512615578, "grad_norm": 7.903942108154297, "learning_rate": 4.112150584248388e-06, "loss": 1.099, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_nli-pairs_loss": 0.8909263014793396, "eval_nli-pairs_runtime": 3.6329, "eval_nli-pairs_samples_per_second": 27.526, "eval_nli-pairs_steps_per_second": 1.101, "eval_sts-test_pearson_cosine": 0.7892673589571536, "eval_sts-test_pearson_dot": 0.5308666684424199, "eval_sts-test_pearson_euclidean": 0.7372214599353599, "eval_sts-test_pearson_manhattan": 0.73149442324126, "eval_sts-test_pearson_max": 0.7892673589571536, "eval_sts-test_spearman_cosine": 0.8088174691107087, "eval_sts-test_spearman_dot": 0.5097841799376374, "eval_sts-test_spearman_euclidean": 0.7291099552995026, "eval_sts-test_spearman_manhattan": 0.7255023946868168, "eval_sts-test_spearman_max": 0.8088174691107087, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_vitaminc-pairs_loss": 4.696901798248291, "eval_vitaminc-pairs_runtime": 1.13, "eval_vitaminc-pairs_samples_per_second": 75.219, "eval_vitaminc-pairs_steps_per_second": 2.655, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_sts-label_loss": 3.794584274291992, "eval_sts-label_runtime": 0.2757, "eval_sts-label_samples_per_second": 362.777, "eval_sts-label_steps_per_second": 14.511, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_qnli-contrastive_loss": 0.1291896551847458, "eval_qnli-contrastive_runtime": 0.3577, "eval_qnli-contrastive_samples_per_second": 279.536, "eval_qnli-contrastive_steps_per_second": 11.181, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_scitail-pairs-qa_loss": 0.05729294940829277, "eval_scitail-pairs-qa_runtime": 0.8757, "eval_scitail-pairs-qa_samples_per_second": 114.199, "eval_scitail-pairs-qa_steps_per_second": 4.568, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_scitail-pairs-pos_loss": 0.47140783071517944, "eval_scitail-pairs-pos_runtime": 1.3328, "eval_scitail-pairs-pos_samples_per_second": 75.031, "eval_scitail-pairs-pos_steps_per_second": 3.001, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_xsum-pairs_loss": 0.2317724972963333, "eval_xsum-pairs_runtime": 0.934, "eval_xsum-pairs_samples_per_second": 107.065, "eval_xsum-pairs_steps_per_second": 4.283, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_compression-pairs_loss": 0.0849599540233612, "eval_compression-pairs_runtime": 0.2772, "eval_compression-pairs_samples_per_second": 360.752, "eval_compression-pairs_steps_per_second": 14.43, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_sciq_pairs_loss": 0.2746911346912384, "eval_sciq_pairs_runtime": 4.0398, "eval_sciq_pairs_samples_per_second": 24.754, "eval_sciq_pairs_steps_per_second": 0.99, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_qasc_pairs_loss": 0.16956950724124908, "eval_qasc_pairs_runtime": 1.0682, "eval_qasc_pairs_samples_per_second": 93.615, "eval_qasc_pairs_steps_per_second": 3.745, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_openbookqa_pairs_loss": 1.5424996614456177, "eval_openbookqa_pairs_runtime": 0.8928, "eval_openbookqa_pairs_samples_per_second": 112.006, "eval_openbookqa_pairs_steps_per_second": 4.48, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_msmarco_pairs_loss": 0.5047981142997742, "eval_msmarco_pairs_runtime": 2.0436, "eval_msmarco_pairs_samples_per_second": 48.932, "eval_msmarco_pairs_steps_per_second": 1.957, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_nq_pairs_loss": 0.230237677693367, "eval_nq_pairs_runtime": 4.5251, "eval_nq_pairs_samples_per_second": 22.099, "eval_nq_pairs_steps_per_second": 0.884, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_trivia_pairs_loss": 0.7567735314369202, "eval_trivia_pairs_runtime": 6.4545, "eval_trivia_pairs_samples_per_second": 15.493, "eval_trivia_pairs_steps_per_second": 0.62, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_quora_pairs_loss": 0.05753583088517189, "eval_quora_pairs_runtime": 0.6769, "eval_quora_pairs_samples_per_second": 147.736, "eval_quora_pairs_steps_per_second": 5.909, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_gooaq_pairs_loss": 0.49448758363723755, "eval_gooaq_pairs_runtime": 1.3984, "eval_gooaq_pairs_samples_per_second": 71.51, "eval_gooaq_pairs_steps_per_second": 2.86, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_mrpc_pairs_loss": 0.04384453222155571, "eval_mrpc_pairs_runtime": 0.2653, "eval_mrpc_pairs_samples_per_second": 376.996, "eval_mrpc_pairs_steps_per_second": 15.08, "step": 5440 }, { "epoch": 0.8575458392101551, "grad_norm": 0.8697513341903687, "learning_rate": 3.93799345702415e-06, "loss": 0.5396, "step": 5472 }, { "epoch": 0.8625607271587525, "grad_norm": 8.337197303771973, "learning_rate": 3.7670457361112116e-06, "loss": 0.6636, "step": 5504 }, { "epoch": 0.8675756151073499, "grad_norm": 0.3655373156070709, "learning_rate": 3.5993570183609596e-06, "loss": 1.0095, "step": 5536 }, { "epoch": 0.8725905030559473, "grad_norm": 13.748374938964844, "learning_rate": 3.4349759550941933e-06, "loss": 0.631, "step": 5568 }, { "epoch": 0.8776053910045447, "grad_norm": 15.683762550354004, "learning_rate": 3.273950237986013e-06, "loss": 0.5415, "step": 5600 }, { "epoch": 0.8826202789531421, "grad_norm": 10.004467964172363, "learning_rate": 3.11632658522906e-06, "loss": 0.9227, "step": 5632 }, { "epoch": 0.8876351669017395, "grad_norm": 12.990907669067383, "learning_rate": 2.9621507279792564e-06, "loss": 0.8991, "step": 5664 }, { "epoch": 0.8926500548503369, "grad_norm": 0.4619373679161072, "learning_rate": 2.8114673970878584e-06, "loss": 0.5068, "step": 5696 }, { "epoch": 0.8976649427989344, "grad_norm": 8.317788124084473, "learning_rate": 2.664320310123768e-06, "loss": 1.2134, "step": 5728 }, { "epoch": 0.9026798307475318, "grad_norm": 0.38993319869041443, "learning_rate": 2.5207521586897876e-06, "loss": 0.4651, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_nli-pairs_loss": 0.8765493631362915, "eval_nli-pairs_runtime": 3.6164, "eval_nli-pairs_samples_per_second": 27.652, "eval_nli-pairs_steps_per_second": 1.106, "eval_sts-test_pearson_cosine": 0.7880147168961996, "eval_sts-test_pearson_dot": 0.5198107156003906, "eval_sts-test_pearson_euclidean": 0.7362840264051249, "eval_sts-test_pearson_manhattan": 0.7307716823389564, "eval_sts-test_pearson_max": 0.7880147168961996, "eval_sts-test_spearman_cosine": 0.8071394355093185, "eval_sts-test_spearman_dot": 0.49865317522814645, "eval_sts-test_spearman_euclidean": 0.7278395467197664, "eval_sts-test_spearman_manhattan": 0.7246934378777047, "eval_sts-test_spearman_max": 0.8071394355093185, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_vitaminc-pairs_loss": 4.717629432678223, "eval_vitaminc-pairs_runtime": 1.1248, "eval_vitaminc-pairs_samples_per_second": 75.571, "eval_vitaminc-pairs_steps_per_second": 2.667, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_sts-label_loss": 3.7598328590393066, "eval_sts-label_runtime": 0.2743, "eval_sts-label_samples_per_second": 364.548, "eval_sts-label_steps_per_second": 14.582, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_qnli-contrastive_loss": 0.11829647421836853, "eval_qnli-contrastive_runtime": 0.3606, "eval_qnli-contrastive_samples_per_second": 277.334, "eval_qnli-contrastive_steps_per_second": 11.093, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_scitail-pairs-qa_loss": 0.05503571406006813, "eval_scitail-pairs-qa_runtime": 0.874, "eval_scitail-pairs-qa_samples_per_second": 114.411, "eval_scitail-pairs-qa_steps_per_second": 4.576, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_scitail-pairs-pos_loss": 0.47530597448349, "eval_scitail-pairs-pos_runtime": 1.3429, "eval_scitail-pairs-pos_samples_per_second": 74.463, "eval_scitail-pairs-pos_steps_per_second": 2.979, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_xsum-pairs_loss": 0.22936196625232697, "eval_xsum-pairs_runtime": 0.9431, "eval_xsum-pairs_samples_per_second": 106.028, "eval_xsum-pairs_steps_per_second": 4.241, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_compression-pairs_loss": 0.08313465863466263, "eval_compression-pairs_runtime": 0.2781, "eval_compression-pairs_samples_per_second": 359.542, "eval_compression-pairs_steps_per_second": 14.382, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_sciq_pairs_loss": 0.27646955847740173, "eval_sciq_pairs_runtime": 4.0554, "eval_sciq_pairs_samples_per_second": 24.658, "eval_sciq_pairs_steps_per_second": 0.986, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_qasc_pairs_loss": 0.17006540298461914, "eval_qasc_pairs_runtime": 1.0538, "eval_qasc_pairs_samples_per_second": 94.898, "eval_qasc_pairs_steps_per_second": 3.796, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_openbookqa_pairs_loss": 1.5487664937973022, "eval_openbookqa_pairs_runtime": 0.8956, "eval_openbookqa_pairs_samples_per_second": 111.653, "eval_openbookqa_pairs_steps_per_second": 4.466, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_msmarco_pairs_loss": 0.4861982464790344, "eval_msmarco_pairs_runtime": 2.0548, "eval_msmarco_pairs_samples_per_second": 48.666, "eval_msmarco_pairs_steps_per_second": 1.947, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_nq_pairs_loss": 0.22520922124385834, "eval_nq_pairs_runtime": 4.4973, "eval_nq_pairs_samples_per_second": 22.236, "eval_nq_pairs_steps_per_second": 0.889, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_trivia_pairs_loss": 0.7480303049087524, "eval_trivia_pairs_runtime": 6.498, "eval_trivia_pairs_samples_per_second": 15.389, "eval_trivia_pairs_steps_per_second": 0.616, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_quora_pairs_loss": 0.06060533598065376, "eval_quora_pairs_runtime": 0.6722, "eval_quora_pairs_samples_per_second": 148.76, "eval_quora_pairs_steps_per_second": 5.95, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_gooaq_pairs_loss": 0.4696855843067169, "eval_gooaq_pairs_runtime": 1.3985, "eval_gooaq_pairs_samples_per_second": 71.503, "eval_gooaq_pairs_steps_per_second": 2.86, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_mrpc_pairs_loss": 0.04175671190023422, "eval_mrpc_pairs_runtime": 0.2618, "eval_mrpc_pairs_samples_per_second": 381.956, "eval_mrpc_pairs_steps_per_second": 15.278, "step": 5760 }, { "epoch": 0.9076947186961292, "grad_norm": 19.970914840698242, "learning_rate": 2.3808045960365743e-06, "loss": 0.6346, "step": 5792 }, { "epoch": 0.9127096066447266, "grad_norm": 7.2970075607299805, "learning_rate": 2.2445182249778363e-06, "loss": 1.1103, "step": 5824 }, { "epoch": 0.917724494593324, "grad_norm": 14.34080982208252, "learning_rate": 2.1119325861102666e-06, "loss": 0.7667, "step": 5856 }, { "epoch": 0.9227393825419213, "grad_norm": 16.219850540161133, "learning_rate": 1.98308614634171e-06, "loss": 0.9174, "step": 5888 }, { "epoch": 0.9277542704905187, "grad_norm": 17.201740264892578, "learning_rate": 1.8580162877307744e-06, "loss": 0.7609, "step": 5920 }, { "epoch": 0.9327691584391161, "grad_norm": 12.591241836547852, "learning_rate": 1.7367592966412454e-06, "loss": 0.8993, "step": 5952 }, { "epoch": 0.9377840463877135, "grad_norm": 17.12389373779297, "learning_rate": 1.619350353214355e-06, "loss": 0.7587, "step": 5984 }, { "epoch": 0.9427989343363109, "grad_norm": 44.237342834472656, "learning_rate": 1.5058235211620126e-06, "loss": 0.935, "step": 6016 }, { "epoch": 0.9478138222849083, "grad_norm": 4.658092975616455, "learning_rate": 1.3962117378839439e-06, "loss": 0.8551, "step": 6048 }, { "epoch": 0.9528287102335057, "grad_norm": 0.4202437698841095, "learning_rate": 1.2905468049116077e-06, "loss": 1.4247, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_nli-pairs_loss": 0.880797266960144, "eval_nli-pairs_runtime": 3.65, "eval_nli-pairs_samples_per_second": 27.397, "eval_nli-pairs_steps_per_second": 1.096, "eval_sts-test_pearson_cosine": 0.7886384880168056, "eval_sts-test_pearson_dot": 0.5209320238457065, "eval_sts-test_pearson_euclidean": 0.7365619856047663, "eval_sts-test_pearson_manhattan": 0.7309874377904119, "eval_sts-test_pearson_max": 0.7886384880168056, "eval_sts-test_spearman_cosine": 0.8078306606920327, "eval_sts-test_spearman_dot": 0.4995671547413244, "eval_sts-test_spearman_euclidean": 0.7281379887760366, "eval_sts-test_spearman_manhattan": 0.7249545388844193, "eval_sts-test_spearman_max": 0.8078306606920327, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_vitaminc-pairs_loss": 4.70750617980957, "eval_vitaminc-pairs_runtime": 1.1372, "eval_vitaminc-pairs_samples_per_second": 74.747, "eval_vitaminc-pairs_steps_per_second": 2.638, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_sts-label_loss": 3.7686922550201416, "eval_sts-label_runtime": 0.2807, "eval_sts-label_samples_per_second": 356.243, "eval_sts-label_steps_per_second": 14.25, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_qnli-contrastive_loss": 0.12000326067209244, "eval_qnli-contrastive_runtime": 0.3651, "eval_qnli-contrastive_samples_per_second": 273.878, "eval_qnli-contrastive_steps_per_second": 10.955, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_scitail-pairs-qa_loss": 0.055266913026571274, "eval_scitail-pairs-qa_runtime": 0.8813, "eval_scitail-pairs-qa_samples_per_second": 113.472, "eval_scitail-pairs-qa_steps_per_second": 4.539, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_scitail-pairs-pos_loss": 0.46404972672462463, "eval_scitail-pairs-pos_runtime": 1.3468, "eval_scitail-pairs-pos_samples_per_second": 74.248, "eval_scitail-pairs-pos_steps_per_second": 2.97, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_xsum-pairs_loss": 0.22768865525722504, "eval_xsum-pairs_runtime": 0.9385, "eval_xsum-pairs_samples_per_second": 106.553, "eval_xsum-pairs_steps_per_second": 4.262, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_compression-pairs_loss": 0.08245458453893661, "eval_compression-pairs_runtime": 0.2783, "eval_compression-pairs_samples_per_second": 359.331, "eval_compression-pairs_steps_per_second": 14.373, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_sciq_pairs_loss": 0.24696679413318634, "eval_sciq_pairs_runtime": 4.072, "eval_sciq_pairs_samples_per_second": 24.558, "eval_sciq_pairs_steps_per_second": 0.982, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_qasc_pairs_loss": 0.16628116369247437, "eval_qasc_pairs_runtime": 1.066, "eval_qasc_pairs_samples_per_second": 93.809, "eval_qasc_pairs_steps_per_second": 3.752, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_openbookqa_pairs_loss": 1.5343760251998901, "eval_openbookqa_pairs_runtime": 0.9064, "eval_openbookqa_pairs_samples_per_second": 110.324, "eval_openbookqa_pairs_steps_per_second": 4.413, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_msmarco_pairs_loss": 0.48861968517303467, "eval_msmarco_pairs_runtime": 2.0777, "eval_msmarco_pairs_samples_per_second": 48.131, "eval_msmarco_pairs_steps_per_second": 1.925, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_nq_pairs_loss": 0.2192871868610382, "eval_nq_pairs_runtime": 4.5629, "eval_nq_pairs_samples_per_second": 21.916, "eval_nq_pairs_steps_per_second": 0.877, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_trivia_pairs_loss": 0.7455114126205444, "eval_trivia_pairs_runtime": 6.4434, "eval_trivia_pairs_samples_per_second": 15.52, "eval_trivia_pairs_steps_per_second": 0.621, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_quora_pairs_loss": 0.0536942183971405, "eval_quora_pairs_runtime": 0.6874, "eval_quora_pairs_samples_per_second": 145.481, "eval_quora_pairs_steps_per_second": 5.819, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_gooaq_pairs_loss": 0.4775075614452362, "eval_gooaq_pairs_runtime": 1.3946, "eval_gooaq_pairs_samples_per_second": 71.707, "eval_gooaq_pairs_steps_per_second": 2.868, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_mrpc_pairs_loss": 0.041804660111665726, "eval_mrpc_pairs_runtime": 0.2631, "eval_mrpc_pairs_samples_per_second": 380.035, "eval_mrpc_pairs_steps_per_second": 15.201, "step": 6080 }, { "epoch": 0.9578435981821031, "grad_norm": 15.8797607421875, "learning_rate": 1.1888593786816527e-06, "loss": 0.3377, "step": 6112 }, { "epoch": 0.9628584861307005, "grad_norm": 54.2625732421875, "learning_rate": 1.0911789616415957e-06, "loss": 1.163, "step": 6144 }, { "epoch": 0.967873374079298, "grad_norm": 27.014169692993164, "learning_rate": 9.975338936903327e-07, "loss": 1.1638, "step": 6176 }, { "epoch": 0.9728882620278954, "grad_norm": 12.264323234558105, "learning_rate": 9.079513439558945e-07, "loss": 0.7428, "step": 6208 }, { "epoch": 0.9779031499764927, "grad_norm": 0.2486962229013443, "learning_rate": 8.224573029129201e-07, "loss": 0.3827, "step": 6240 }, { "epoch": 0.9829180379250901, "grad_norm": 0.19951488077640533, "learning_rate": 7.41076574842064e-07, "loss": 1.0739, "step": 6272 }, { "epoch": 0.9879329258736875, "grad_norm": 1.6168636083602905, "learning_rate": 6.638327706335673e-07, "loss": 0.7049, "step": 6304 }, { "epoch": 0.9929478138222849, "grad_norm": 1.4084432125091553, "learning_rate": 5.907483009370463e-07, "loss": 0.9298, "step": 6336 }, { "epoch": 0.9979627017708823, "grad_norm": 0.7779116630554199, "learning_rate": 5.218443696595343e-07, "loss": 0.6243, "step": 6368 }, { "epoch": 1.0029775897194797, "grad_norm": 10.389066696166992, "learning_rate": 4.5714096781360346e-07, "loss": 0.8693, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_nli-pairs_loss": 0.8764966726303101, "eval_nli-pairs_runtime": 3.9731, "eval_nli-pairs_samples_per_second": 25.169, "eval_nli-pairs_steps_per_second": 1.007, "eval_sts-test_pearson_cosine": 0.7883389668315285, "eval_sts-test_pearson_dot": 0.517346671859764, "eval_sts-test_pearson_euclidean": 0.7353164199200737, "eval_sts-test_pearson_manhattan": 0.7297049415657237, "eval_sts-test_pearson_max": 0.7883389668315285, "eval_sts-test_spearman_cosine": 0.8072800949662179, "eval_sts-test_spearman_dot": 0.4963365732568842, "eval_sts-test_spearman_euclidean": 0.7268218204343426, "eval_sts-test_spearman_manhattan": 0.7238000634035274, "eval_sts-test_spearman_max": 0.8072800949662179, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_vitaminc-pairs_loss": 4.692606449127197, "eval_vitaminc-pairs_runtime": 1.1964, "eval_vitaminc-pairs_samples_per_second": 71.046, "eval_vitaminc-pairs_steps_per_second": 2.508, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_sts-label_loss": 3.7494537830352783, "eval_sts-label_runtime": 0.2884, "eval_sts-label_samples_per_second": 346.773, "eval_sts-label_steps_per_second": 13.871, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_qnli-contrastive_loss": 0.11221926659345627, "eval_qnli-contrastive_runtime": 0.366, "eval_qnli-contrastive_samples_per_second": 273.23, "eval_qnli-contrastive_steps_per_second": 10.929, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_scitail-pairs-qa_loss": 0.05439920350909233, "eval_scitail-pairs-qa_runtime": 1.0826, "eval_scitail-pairs-qa_samples_per_second": 92.37, "eval_scitail-pairs-qa_steps_per_second": 3.695, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_scitail-pairs-pos_loss": 0.47426754236221313, "eval_scitail-pairs-pos_runtime": 1.4478, "eval_scitail-pairs-pos_samples_per_second": 69.07, "eval_scitail-pairs-pos_steps_per_second": 2.763, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_xsum-pairs_loss": 0.22696803510189056, "eval_xsum-pairs_runtime": 0.9498, "eval_xsum-pairs_samples_per_second": 105.287, "eval_xsum-pairs_steps_per_second": 4.211, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_compression-pairs_loss": 0.08134880661964417, "eval_compression-pairs_runtime": 0.2978, "eval_compression-pairs_samples_per_second": 335.83, "eval_compression-pairs_steps_per_second": 13.433, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_sciq_pairs_loss": 0.22929410636425018, "eval_sciq_pairs_runtime": 4.3229, "eval_sciq_pairs_samples_per_second": 23.132, "eval_sciq_pairs_steps_per_second": 0.925, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_qasc_pairs_loss": 0.16514292359352112, "eval_qasc_pairs_runtime": 1.1535, "eval_qasc_pairs_samples_per_second": 86.694, "eval_qasc_pairs_steps_per_second": 3.468, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_openbookqa_pairs_loss": 1.5505836009979248, "eval_openbookqa_pairs_runtime": 0.9784, "eval_openbookqa_pairs_samples_per_second": 102.21, "eval_openbookqa_pairs_steps_per_second": 4.088, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_msmarco_pairs_loss": 0.48988625407218933, "eval_msmarco_pairs_runtime": 2.1515, "eval_msmarco_pairs_samples_per_second": 46.48, "eval_msmarco_pairs_steps_per_second": 1.859, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_nq_pairs_loss": 0.21817754209041595, "eval_nq_pairs_runtime": 4.6579, "eval_nq_pairs_samples_per_second": 21.469, "eval_nq_pairs_steps_per_second": 0.859, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_trivia_pairs_loss": 0.7522485852241516, "eval_trivia_pairs_runtime": 6.6903, "eval_trivia_pairs_samples_per_second": 14.947, "eval_trivia_pairs_steps_per_second": 0.598, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_quora_pairs_loss": 0.026629021391272545, "eval_quora_pairs_runtime": 0.7757, "eval_quora_pairs_samples_per_second": 128.912, "eval_quora_pairs_steps_per_second": 5.156, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_gooaq_pairs_loss": 0.47202804684638977, "eval_gooaq_pairs_runtime": 1.5337, "eval_gooaq_pairs_samples_per_second": 65.202, "eval_gooaq_pairs_steps_per_second": 2.608, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_mrpc_pairs_loss": 0.041211605072021484, "eval_mrpc_pairs_runtime": 0.2796, "eval_mrpc_pairs_samples_per_second": 357.626, "eval_mrpc_pairs_steps_per_second": 14.305, "step": 6400 }, { "epoch": 1.007992477668077, "grad_norm": 0.12192127108573914, "learning_rate": 3.9665686771741374e-07, "loss": 0.731, "step": 6432 }, { "epoch": 1.0130073656166745, "grad_norm": 4.465780258178711, "learning_rate": 3.404096175483029e-07, "loss": 0.7662, "step": 6464 }, { "epoch": 1.0180222535652719, "grad_norm": 19.539562225341797, "learning_rate": 2.8841553625157116e-07, "loss": 0.5362, "step": 6496 }, { "epoch": 1.0230371415138693, "grad_norm": 1.8675719499588013, "learning_rate": 2.406897088058863e-07, "loss": 0.9786, "step": 6528 }, { "epoch": 1.0280520294624667, "grad_norm": 1.5663179159164429, "learning_rate": 1.9724598184667987e-07, "loss": 0.9213, "step": 6560 }, { "epoch": 1.033066917411064, "grad_norm": 1.0503817796707153, "learning_rate": 1.580969596488624e-07, "loss": 0.7601, "step": 6592 }, { "epoch": 1.0380818053596614, "grad_norm": 1.7467032670974731, "learning_rate": 1.2325400046994672e-07, "loss": 0.4821, "step": 6624 }, { "epoch": 1.0430966933082588, "grad_norm": 0.5685003399848938, "learning_rate": 9.272721325469414e-08, "loss": 0.73, "step": 6656 }, { "epoch": 1.0481115812568562, "grad_norm": 0.16832184791564941, "learning_rate": 6.652545470221705e-08, "loss": 0.4139, "step": 6688 }, { "epoch": 1.0531264692054536, "grad_norm": 17.248783111572266, "learning_rate": 4.465632669640285e-08, "loss": 0.5152, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_nli-pairs_loss": 0.8771082162857056, "eval_nli-pairs_runtime": 3.6864, "eval_nli-pairs_samples_per_second": 27.127, "eval_nli-pairs_steps_per_second": 1.085, "eval_sts-test_pearson_cosine": 0.7895199953969396, "eval_sts-test_pearson_dot": 0.5189310649741209, "eval_sts-test_pearson_euclidean": 0.7358975444358454, "eval_sts-test_pearson_manhattan": 0.7303294470043906, "eval_sts-test_pearson_max": 0.7895199953969396, "eval_sts-test_spearman_cosine": 0.8080710925195471, "eval_sts-test_spearman_dot": 0.49813617315229736, "eval_sts-test_spearman_euclidean": 0.727349183443088, "eval_sts-test_spearman_manhattan": 0.7243520585394965, "eval_sts-test_spearman_max": 0.8080710925195471, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_vitaminc-pairs_loss": 4.680215358734131, "eval_vitaminc-pairs_runtime": 1.1767, "eval_vitaminc-pairs_samples_per_second": 72.234, "eval_vitaminc-pairs_steps_per_second": 2.549, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_sts-label_loss": 3.747551202774048, "eval_sts-label_runtime": 0.2756, "eval_sts-label_samples_per_second": 362.89, "eval_sts-label_steps_per_second": 14.516, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_qnli-contrastive_loss": 0.11317223310470581, "eval_qnli-contrastive_runtime": 0.362, "eval_qnli-contrastive_samples_per_second": 276.263, "eval_qnli-contrastive_steps_per_second": 11.051, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_scitail-pairs-qa_loss": 0.05494887754321098, "eval_scitail-pairs-qa_runtime": 0.8771, "eval_scitail-pairs-qa_samples_per_second": 114.01, "eval_scitail-pairs-qa_steps_per_second": 4.56, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_scitail-pairs-pos_loss": 0.46942538022994995, "eval_scitail-pairs-pos_runtime": 1.3418, "eval_scitail-pairs-pos_samples_per_second": 74.527, "eval_scitail-pairs-pos_steps_per_second": 2.981, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_xsum-pairs_loss": 0.22760838270187378, "eval_xsum-pairs_runtime": 0.9366, "eval_xsum-pairs_samples_per_second": 106.764, "eval_xsum-pairs_steps_per_second": 4.271, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_compression-pairs_loss": 0.081705242395401, "eval_compression-pairs_runtime": 0.2786, "eval_compression-pairs_samples_per_second": 358.908, "eval_compression-pairs_steps_per_second": 14.356, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_sciq_pairs_loss": 0.22932235896587372, "eval_sciq_pairs_runtime": 4.0839, "eval_sciq_pairs_samples_per_second": 24.486, "eval_sciq_pairs_steps_per_second": 0.979, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_qasc_pairs_loss": 0.1658654361963272, "eval_qasc_pairs_runtime": 1.0521, "eval_qasc_pairs_samples_per_second": 95.048, "eval_qasc_pairs_steps_per_second": 3.802, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_openbookqa_pairs_loss": 1.5459561347961426, "eval_openbookqa_pairs_runtime": 0.8996, "eval_openbookqa_pairs_samples_per_second": 111.162, "eval_openbookqa_pairs_steps_per_second": 4.446, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_msmarco_pairs_loss": 0.49013325572013855, "eval_msmarco_pairs_runtime": 2.0531, "eval_msmarco_pairs_samples_per_second": 48.707, "eval_msmarco_pairs_steps_per_second": 1.948, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_nq_pairs_loss": 0.22049441933631897, "eval_nq_pairs_runtime": 4.5149, "eval_nq_pairs_samples_per_second": 22.149, "eval_nq_pairs_steps_per_second": 0.886, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_trivia_pairs_loss": 0.7513056397438049, "eval_trivia_pairs_runtime": 6.4705, "eval_trivia_pairs_samples_per_second": 15.455, "eval_trivia_pairs_steps_per_second": 0.618, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_quora_pairs_loss": 0.024981992319226265, "eval_quora_pairs_runtime": 0.6855, "eval_quora_pairs_samples_per_second": 145.879, "eval_quora_pairs_steps_per_second": 5.835, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_gooaq_pairs_loss": 0.47234511375427246, "eval_gooaq_pairs_runtime": 1.4025, "eval_gooaq_pairs_samples_per_second": 71.304, "eval_gooaq_pairs_steps_per_second": 2.852, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_mrpc_pairs_loss": 0.04154253005981445, "eval_mrpc_pairs_runtime": 0.2618, "eval_mrpc_pairs_samples_per_second": 382.036, "eval_mrpc_pairs_steps_per_second": 15.281, "step": 6720 }, { "epoch": 1.058141357154051, "grad_norm": 14.043108940124512, "learning_rate": 2.7126174100376432e-08, "loss": 0.4684, "step": 6752 }, { "epoch": 1.0631562451026484, "grad_norm": 0.5513893365859985, "learning_rate": 1.3940082915687713e-08, "loss": 0.445, "step": 6784 }, { "epoch": 1.068171133051246, "grad_norm": 7.036909580230713, "learning_rate": 5.101878806703652e-09, "loss": 0.4288, "step": 6816 }, { "epoch": 1.0731860209998434, "grad_norm": 0.2966393828392029, "learning_rate": 6.141259906761176e-10, "loss": 0.3797, "step": 6848 }, { "epoch": 1.0782009089484408, "grad_norm": 9.721883773803711, "learning_rate": 2.9999521873506204e-05, "loss": 0.4304, "step": 6880 }, { "epoch": 1.0832157968970382, "grad_norm": 2.1523923873901367, "learning_rate": 2.9995306080226573e-05, "loss": 0.8562, "step": 6912 }, { "epoch": 1.0882306848456356, "grad_norm": 12.939388275146484, "learning_rate": 2.9986739717293326e-05, "loss": 0.4902, "step": 6944 }, { "epoch": 1.093245572794233, "grad_norm": 0.37949275970458984, "learning_rate": 2.9973825270054784e-05, "loss": 0.4285, "step": 6976 }, { "epoch": 1.0982604607428303, "grad_norm": 2.427003860473633, "learning_rate": 2.995656648536359e-05, "loss": 0.4782, "step": 7008 }, { "epoch": 1.1032753486914277, "grad_norm": 10.36500072479248, "learning_rate": 2.9934968370489646e-05, "loss": 0.7503, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_nli-pairs_loss": 0.9263110160827637, "eval_nli-pairs_runtime": 3.6445, "eval_nli-pairs_samples_per_second": 27.439, "eval_nli-pairs_steps_per_second": 1.098, "eval_sts-test_pearson_cosine": 0.7937369016852821, "eval_sts-test_pearson_dot": 0.5273705048333348, "eval_sts-test_pearson_euclidean": 0.7373368406202081, "eval_sts-test_pearson_manhattan": 0.7318756816157863, "eval_sts-test_pearson_max": 0.7937369016852821, "eval_sts-test_spearman_cosine": 0.810858247608813, "eval_sts-test_spearman_dot": 0.508640420451459, "eval_sts-test_spearman_euclidean": 0.73158962258494, "eval_sts-test_spearman_manhattan": 0.7284434977078286, "eval_sts-test_spearman_max": 0.810858247608813, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_vitaminc-pairs_loss": 4.521730422973633, "eval_vitaminc-pairs_runtime": 1.1248, "eval_vitaminc-pairs_samples_per_second": 75.569, "eval_vitaminc-pairs_steps_per_second": 2.667, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_sts-label_loss": 3.8786072731018066, "eval_sts-label_runtime": 0.2698, "eval_sts-label_samples_per_second": 370.602, "eval_sts-label_steps_per_second": 14.824, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_qnli-contrastive_loss": 0.1796300858259201, "eval_qnli-contrastive_runtime": 0.3573, "eval_qnli-contrastive_samples_per_second": 279.916, "eval_qnli-contrastive_steps_per_second": 11.197, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_scitail-pairs-qa_loss": 0.06360480934381485, "eval_scitail-pairs-qa_runtime": 0.8855, "eval_scitail-pairs-qa_samples_per_second": 112.93, "eval_scitail-pairs-qa_steps_per_second": 4.517, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_scitail-pairs-pos_loss": 0.5473235249519348, "eval_scitail-pairs-pos_runtime": 1.3255, "eval_scitail-pairs-pos_samples_per_second": 75.446, "eval_scitail-pairs-pos_steps_per_second": 3.018, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_xsum-pairs_loss": 0.24051249027252197, "eval_xsum-pairs_runtime": 0.9384, "eval_xsum-pairs_samples_per_second": 106.567, "eval_xsum-pairs_steps_per_second": 4.263, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_compression-pairs_loss": 0.0928964912891388, "eval_compression-pairs_runtime": 0.2778, "eval_compression-pairs_samples_per_second": 359.983, "eval_compression-pairs_steps_per_second": 14.399, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_sciq_pairs_loss": 0.28897982835769653, "eval_sciq_pairs_runtime": 4.1339, "eval_sciq_pairs_samples_per_second": 24.19, "eval_sciq_pairs_steps_per_second": 0.968, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_qasc_pairs_loss": 0.1793307065963745, "eval_qasc_pairs_runtime": 1.0598, "eval_qasc_pairs_samples_per_second": 94.357, "eval_qasc_pairs_steps_per_second": 3.774, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_openbookqa_pairs_loss": 1.7123816013336182, "eval_openbookqa_pairs_runtime": 0.8946, "eval_openbookqa_pairs_samples_per_second": 111.784, "eval_openbookqa_pairs_steps_per_second": 4.471, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_msmarco_pairs_loss": 0.4797554016113281, "eval_msmarco_pairs_runtime": 2.0659, "eval_msmarco_pairs_samples_per_second": 48.405, "eval_msmarco_pairs_steps_per_second": 1.936, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_nq_pairs_loss": 0.2459176480770111, "eval_nq_pairs_runtime": 4.5081, "eval_nq_pairs_samples_per_second": 22.182, "eval_nq_pairs_steps_per_second": 0.887, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_trivia_pairs_loss": 0.9698570966720581, "eval_trivia_pairs_runtime": 6.4733, "eval_trivia_pairs_samples_per_second": 15.448, "eval_trivia_pairs_steps_per_second": 0.618, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_quora_pairs_loss": 0.03161533921957016, "eval_quora_pairs_runtime": 0.6866, "eval_quora_pairs_samples_per_second": 145.647, "eval_quora_pairs_steps_per_second": 5.826, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_gooaq_pairs_loss": 0.5500377416610718, "eval_gooaq_pairs_runtime": 1.4051, "eval_gooaq_pairs_samples_per_second": 71.17, "eval_gooaq_pairs_steps_per_second": 2.847, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_mrpc_pairs_loss": 0.04846707731485367, "eval_mrpc_pairs_runtime": 0.2627, "eval_mrpc_pairs_samples_per_second": 380.671, "eval_mrpc_pairs_steps_per_second": 15.227, "step": 7040 }, { "epoch": 1.1082902366400251, "grad_norm": 10.539325714111328, "learning_rate": 2.9909037191667383e-05, "loss": 1.0828, "step": 7072 }, { "epoch": 1.1133051245886225, "grad_norm": 14.641651153564453, "learning_rate": 2.987878047227772e-05, "loss": 0.6206, "step": 7104 }, { "epoch": 1.11832001253722, "grad_norm": 12.57785415649414, "learning_rate": 2.9844206990665325e-05, "loss": 0.8111, "step": 7136 }, { "epoch": 1.1233349004858173, "grad_norm": 6.1240129470825195, "learning_rate": 2.980532677759177e-05, "loss": 0.49, "step": 7168 }, { "epoch": 1.1283497884344147, "grad_norm": 8.179468154907227, "learning_rate": 2.97621511133253e-05, "loss": 0.5289, "step": 7200 }, { "epoch": 1.133364676383012, "grad_norm": 13.069085121154785, "learning_rate": 2.971469252436813e-05, "loss": 0.2983, "step": 7232 }, { "epoch": 1.1383795643316095, "grad_norm": 11.689116477966309, "learning_rate": 2.9662964779822125e-05, "loss": 0.5183, "step": 7264 }, { "epoch": 1.1433944522802069, "grad_norm": 6.402202606201172, "learning_rate": 2.9606982887393993e-05, "loss": 0.3254, "step": 7296 }, { "epoch": 1.1484093402288043, "grad_norm": 17.79107093811035, "learning_rate": 2.9546763089041115e-05, "loss": 0.5142, "step": 7328 }, { "epoch": 1.1534242281774016, "grad_norm": 3.3558926582336426, "learning_rate": 2.9482322856259305e-05, "loss": 0.5605, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_nli-pairs_loss": 1.034800410270691, "eval_nli-pairs_runtime": 3.6881, "eval_nli-pairs_samples_per_second": 27.114, "eval_nli-pairs_steps_per_second": 1.085, "eval_sts-test_pearson_cosine": 0.7910968553972442, "eval_sts-test_pearson_dot": 0.5191989002837457, "eval_sts-test_pearson_euclidean": 0.7346238729069505, "eval_sts-test_pearson_manhattan": 0.7286075410186882, "eval_sts-test_pearson_max": 0.7910968553972442, "eval_sts-test_spearman_cosine": 0.8066961580110351, "eval_sts-test_spearman_dot": 0.5084443140830514, "eval_sts-test_spearman_euclidean": 0.72712818838666, "eval_sts-test_spearman_manhattan": 0.7230020447891047, "eval_sts-test_spearman_max": 0.8066961580110351, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_vitaminc-pairs_loss": 4.861147403717041, "eval_vitaminc-pairs_runtime": 1.2006, "eval_vitaminc-pairs_samples_per_second": 70.796, "eval_vitaminc-pairs_steps_per_second": 2.499, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_sts-label_loss": 3.832930326461792, "eval_sts-label_runtime": 0.2878, "eval_sts-label_samples_per_second": 347.487, "eval_sts-label_steps_per_second": 13.899, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_qnli-contrastive_loss": 0.20628628134727478, "eval_qnli-contrastive_runtime": 0.3622, "eval_qnli-contrastive_samples_per_second": 276.06, "eval_qnli-contrastive_steps_per_second": 11.042, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_scitail-pairs-qa_loss": 0.06246212124824524, "eval_scitail-pairs-qa_runtime": 0.9341, "eval_scitail-pairs-qa_samples_per_second": 107.06, "eval_scitail-pairs-qa_steps_per_second": 4.282, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_scitail-pairs-pos_loss": 0.4741693437099457, "eval_scitail-pairs-pos_runtime": 1.6197, "eval_scitail-pairs-pos_samples_per_second": 61.738, "eval_scitail-pairs-pos_steps_per_second": 2.47, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_xsum-pairs_loss": 0.23739749193191528, "eval_xsum-pairs_runtime": 0.9463, "eval_xsum-pairs_samples_per_second": 105.68, "eval_xsum-pairs_steps_per_second": 4.227, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_compression-pairs_loss": 0.09255027025938034, "eval_compression-pairs_runtime": 0.2828, "eval_compression-pairs_samples_per_second": 353.649, "eval_compression-pairs_steps_per_second": 14.146, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_sciq_pairs_loss": 0.2770608365535736, "eval_sciq_pairs_runtime": 4.1267, "eval_sciq_pairs_samples_per_second": 24.232, "eval_sciq_pairs_steps_per_second": 0.969, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_qasc_pairs_loss": 0.18835808336734772, "eval_qasc_pairs_runtime": 1.0608, "eval_qasc_pairs_samples_per_second": 94.272, "eval_qasc_pairs_steps_per_second": 3.771, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_openbookqa_pairs_loss": 1.8109256029129028, "eval_openbookqa_pairs_runtime": 0.9025, "eval_openbookqa_pairs_samples_per_second": 110.805, "eval_openbookqa_pairs_steps_per_second": 4.432, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_msmarco_pairs_loss": 0.5193920731544495, "eval_msmarco_pairs_runtime": 2.1117, "eval_msmarco_pairs_samples_per_second": 47.354, "eval_msmarco_pairs_steps_per_second": 1.894, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_nq_pairs_loss": 0.2714031934738159, "eval_nq_pairs_runtime": 4.5373, "eval_nq_pairs_samples_per_second": 22.04, "eval_nq_pairs_steps_per_second": 0.882, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_trivia_pairs_loss": 0.939833402633667, "eval_trivia_pairs_runtime": 6.4956, "eval_trivia_pairs_samples_per_second": 15.395, "eval_trivia_pairs_steps_per_second": 0.616, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_quora_pairs_loss": 0.058685559779405594, "eval_quora_pairs_runtime": 0.6769, "eval_quora_pairs_samples_per_second": 147.738, "eval_quora_pairs_steps_per_second": 5.91, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_gooaq_pairs_loss": 0.6361711621284485, "eval_gooaq_pairs_runtime": 1.435, "eval_gooaq_pairs_samples_per_second": 69.685, "eval_gooaq_pairs_steps_per_second": 2.787, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_mrpc_pairs_loss": 0.047355230897665024, "eval_mrpc_pairs_runtime": 0.2779, "eval_mrpc_pairs_samples_per_second": 359.791, "eval_mrpc_pairs_steps_per_second": 14.392, "step": 7360 }, { "epoch": 1.158439116125999, "grad_norm": 0.946281909942627, "learning_rate": 2.9413680885013797e-05, "loss": 0.6993, "step": 7392 }, { "epoch": 1.1634540040745964, "grad_norm": 7.1736626625061035, "learning_rate": 2.9340857090315025e-05, "loss": 0.3437, "step": 7424 }, { "epoch": 1.1684688920231938, "grad_norm": 0.19313736259937286, "learning_rate": 2.9263872600440707e-05, "loss": 0.3281, "step": 7456 }, { "epoch": 1.1734837799717912, "grad_norm": 12.984513282775879, "learning_rate": 2.9182749750805903e-05, "loss": 1.0286, "step": 7488 }, { "epoch": 1.1784986679203886, "grad_norm": 0.5984382033348083, "learning_rate": 2.9097512077482918e-05, "loss": 0.6668, "step": 7520 }, { "epoch": 1.183513555868986, "grad_norm": 4.237669944763184, "learning_rate": 2.9008184310372744e-05, "loss": 0.3861, "step": 7552 }, { "epoch": 1.1885284438175834, "grad_norm": 0.4000037610530853, "learning_rate": 2.891479236603025e-05, "loss": 0.4096, "step": 7584 }, { "epoch": 1.1935433317661808, "grad_norm": 13.399718284606934, "learning_rate": 2.8817363340145038e-05, "loss": 0.5836, "step": 7616 }, { "epoch": 1.1985582197147782, "grad_norm": 1.461013913154602, "learning_rate": 2.8715925499680188e-05, "loss": 0.2649, "step": 7648 }, { "epoch": 1.2035731076633756, "grad_norm": 6.206007957458496, "learning_rate": 2.8610508274671218e-05, "loss": 0.5884, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_nli-pairs_loss": 1.0354279279708862, "eval_nli-pairs_runtime": 3.7382, "eval_nli-pairs_samples_per_second": 26.751, "eval_nli-pairs_steps_per_second": 1.07, "eval_sts-test_pearson_cosine": 0.7841729020272651, "eval_sts-test_pearson_dot": 0.5058693889598734, "eval_sts-test_pearson_euclidean": 0.7294148871338325, "eval_sts-test_pearson_manhattan": 0.7246093271358469, "eval_sts-test_pearson_max": 0.7841729020272651, "eval_sts-test_spearman_cosine": 0.8000443657886165, "eval_sts-test_spearman_dot": 0.49286718177568123, "eval_sts-test_spearman_euclidean": 0.7196647955405734, "eval_sts-test_spearman_manhattan": 0.7181182061459461, "eval_sts-test_spearman_max": 0.8000443657886165, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_vitaminc-pairs_loss": 4.786523342132568, "eval_vitaminc-pairs_runtime": 1.1677, "eval_vitaminc-pairs_samples_per_second": 72.794, "eval_vitaminc-pairs_steps_per_second": 2.569, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_sts-label_loss": 3.80216383934021, "eval_sts-label_runtime": 0.2869, "eval_sts-label_samples_per_second": 348.532, "eval_sts-label_steps_per_second": 13.941, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_qnli-contrastive_loss": 0.15080063045024872, "eval_qnli-contrastive_runtime": 0.3765, "eval_qnli-contrastive_samples_per_second": 265.584, "eval_qnli-contrastive_steps_per_second": 10.623, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_scitail-pairs-qa_loss": 0.05477406457066536, "eval_scitail-pairs-qa_runtime": 0.9695, "eval_scitail-pairs-qa_samples_per_second": 103.142, "eval_scitail-pairs-qa_steps_per_second": 4.126, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_scitail-pairs-pos_loss": 0.49995747208595276, "eval_scitail-pairs-pos_runtime": 1.4259, "eval_scitail-pairs-pos_samples_per_second": 70.132, "eval_scitail-pairs-pos_steps_per_second": 2.805, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_xsum-pairs_loss": 0.24929432570934296, "eval_xsum-pairs_runtime": 0.9657, "eval_xsum-pairs_samples_per_second": 103.554, "eval_xsum-pairs_steps_per_second": 4.142, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_compression-pairs_loss": 0.08594885468482971, "eval_compression-pairs_runtime": 0.2846, "eval_compression-pairs_samples_per_second": 351.315, "eval_compression-pairs_steps_per_second": 14.053, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_sciq_pairs_loss": 0.28326743841171265, "eval_sciq_pairs_runtime": 4.1832, "eval_sciq_pairs_samples_per_second": 23.905, "eval_sciq_pairs_steps_per_second": 0.956, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_qasc_pairs_loss": 0.1851280927658081, "eval_qasc_pairs_runtime": 1.1629, "eval_qasc_pairs_samples_per_second": 85.993, "eval_qasc_pairs_steps_per_second": 3.44, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_openbookqa_pairs_loss": 1.686630368232727, "eval_openbookqa_pairs_runtime": 0.9518, "eval_openbookqa_pairs_samples_per_second": 105.066, "eval_openbookqa_pairs_steps_per_second": 4.203, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_msmarco_pairs_loss": 0.5506166219711304, "eval_msmarco_pairs_runtime": 2.1738, "eval_msmarco_pairs_samples_per_second": 46.002, "eval_msmarco_pairs_steps_per_second": 1.84, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_nq_pairs_loss": 0.24249011278152466, "eval_nq_pairs_runtime": 4.6491, "eval_nq_pairs_samples_per_second": 21.51, "eval_nq_pairs_steps_per_second": 0.86, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_trivia_pairs_loss": 0.9296412467956543, "eval_trivia_pairs_runtime": 6.6163, "eval_trivia_pairs_samples_per_second": 15.114, "eval_trivia_pairs_steps_per_second": 0.605, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_quora_pairs_loss": 0.0314582884311676, "eval_quora_pairs_runtime": 0.7294, "eval_quora_pairs_samples_per_second": 137.107, "eval_quora_pairs_steps_per_second": 5.484, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_gooaq_pairs_loss": 0.5459653735160828, "eval_gooaq_pairs_runtime": 1.4937, "eval_gooaq_pairs_samples_per_second": 66.948, "eval_gooaq_pairs_steps_per_second": 2.678, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_mrpc_pairs_loss": 0.04276818782091141, "eval_mrpc_pairs_runtime": 0.2699, "eval_mrpc_pairs_samples_per_second": 370.467, "eval_mrpc_pairs_steps_per_second": 14.819, "step": 7680 }, { "epoch": 1.208587995611973, "grad_norm": 2.091714859008789, "learning_rate": 2.8501142249687554e-05, "loss": 0.7018, "step": 7712 }, { "epoch": 1.2136028835605703, "grad_norm": 0.21109235286712646, "learning_rate": 2.838785915495912e-05, "loss": 0.7082, "step": 7744 }, { "epoch": 1.2186177715091677, "grad_norm": 0.1267768293619156, "learning_rate": 2.827069185717042e-05, "loss": 0.7527, "step": 7776 }, { "epoch": 1.2236326594577653, "grad_norm": 1.6667953729629517, "learning_rate": 2.8149674349925023e-05, "loss": 0.4255, "step": 7808 }, { "epoch": 1.2286475474063627, "grad_norm": 12.699274063110352, "learning_rate": 2.8024841743882998e-05, "loss": 0.7488, "step": 7840 }, { "epoch": 1.2336624353549601, "grad_norm": 8.052750587463379, "learning_rate": 2.7896230256574348e-05, "loss": 0.3364, "step": 7872 }, { "epoch": 1.2386773233035575, "grad_norm": 7.821995258331299, "learning_rate": 2.7763877201891205e-05, "loss": 0.6963, "step": 7904 }, { "epoch": 1.243692211252155, "grad_norm": 5.756433486938477, "learning_rate": 2.762782097926205e-05, "loss": 0.2829, "step": 7936 }, { "epoch": 1.2487070992007523, "grad_norm": 18.80353355407715, "learning_rate": 2.7488101062510904e-05, "loss": 0.7504, "step": 7968 }, { "epoch": 1.2537219871493497, "grad_norm": 3.668611526489258, "learning_rate": 2.734475798840485e-05, "loss": 0.7759, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_nli-pairs_loss": 0.9781379699707031, "eval_nli-pairs_runtime": 3.6268, "eval_nli-pairs_samples_per_second": 27.573, "eval_nli-pairs_steps_per_second": 1.103, "eval_sts-test_pearson_cosine": 0.7896747038559737, "eval_sts-test_pearson_dot": 0.5160875833412549, "eval_sts-test_pearson_euclidean": 0.7398944244671477, "eval_sts-test_pearson_manhattan": 0.7345204191784053, "eval_sts-test_pearson_max": 0.7896747038559737, "eval_sts-test_spearman_cosine": 0.81067276102482, "eval_sts-test_spearman_dot": 0.5010127030277397, "eval_sts-test_spearman_euclidean": 0.7318872170742919, "eval_sts-test_spearman_manhattan": 0.7283578865769135, "eval_sts-test_spearman_max": 0.81067276102482, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_vitaminc-pairs_loss": 4.802966594696045, "eval_vitaminc-pairs_runtime": 1.1396, "eval_vitaminc-pairs_samples_per_second": 74.59, "eval_vitaminc-pairs_steps_per_second": 2.633, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_sts-label_loss": 4.1984076499938965, "eval_sts-label_runtime": 0.2755, "eval_sts-label_samples_per_second": 362.988, "eval_sts-label_steps_per_second": 14.52, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_qnli-contrastive_loss": 0.23863555490970612, "eval_qnli-contrastive_runtime": 0.3602, "eval_qnli-contrastive_samples_per_second": 277.617, "eval_qnli-contrastive_steps_per_second": 11.105, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_scitail-pairs-qa_loss": 0.06188047304749489, "eval_scitail-pairs-qa_runtime": 0.8935, "eval_scitail-pairs-qa_samples_per_second": 111.921, "eval_scitail-pairs-qa_steps_per_second": 4.477, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_scitail-pairs-pos_loss": 0.44846847653388977, "eval_scitail-pairs-pos_runtime": 1.3467, "eval_scitail-pairs-pos_samples_per_second": 74.254, "eval_scitail-pairs-pos_steps_per_second": 2.97, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_xsum-pairs_loss": 0.2367183268070221, "eval_xsum-pairs_runtime": 0.9443, "eval_xsum-pairs_samples_per_second": 105.898, "eval_xsum-pairs_steps_per_second": 4.236, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_compression-pairs_loss": 0.08503348380327225, "eval_compression-pairs_runtime": 0.2921, "eval_compression-pairs_samples_per_second": 342.302, "eval_compression-pairs_steps_per_second": 13.692, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_sciq_pairs_loss": 0.270333856344223, "eval_sciq_pairs_runtime": 4.0839, "eval_sciq_pairs_samples_per_second": 24.486, "eval_sciq_pairs_steps_per_second": 0.979, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_qasc_pairs_loss": 0.18802641332149506, "eval_qasc_pairs_runtime": 1.0724, "eval_qasc_pairs_samples_per_second": 93.25, "eval_qasc_pairs_steps_per_second": 3.73, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_openbookqa_pairs_loss": 1.7418819665908813, "eval_openbookqa_pairs_runtime": 0.8925, "eval_openbookqa_pairs_samples_per_second": 112.041, "eval_openbookqa_pairs_steps_per_second": 4.482, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_msmarco_pairs_loss": 0.4858554005622864, "eval_msmarco_pairs_runtime": 2.0565, "eval_msmarco_pairs_samples_per_second": 48.627, "eval_msmarco_pairs_steps_per_second": 1.945, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_nq_pairs_loss": 0.23637117445468903, "eval_nq_pairs_runtime": 4.5088, "eval_nq_pairs_samples_per_second": 22.179, "eval_nq_pairs_steps_per_second": 0.887, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_trivia_pairs_loss": 0.7162200808525085, "eval_trivia_pairs_runtime": 6.4981, "eval_trivia_pairs_samples_per_second": 15.389, "eval_trivia_pairs_steps_per_second": 0.616, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_quora_pairs_loss": 0.06219913437962532, "eval_quora_pairs_runtime": 0.6795, "eval_quora_pairs_samples_per_second": 147.17, "eval_quora_pairs_steps_per_second": 5.887, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_gooaq_pairs_loss": 0.609254777431488, "eval_gooaq_pairs_runtime": 1.4106, "eval_gooaq_pairs_samples_per_second": 70.891, "eval_gooaq_pairs_steps_per_second": 2.836, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_mrpc_pairs_loss": 0.04724707454442978, "eval_mrpc_pairs_runtime": 0.2638, "eval_mrpc_pairs_samples_per_second": 379.073, "eval_mrpc_pairs_steps_per_second": 15.163, "step": 8000 }, { "epoch": 1.258736875097947, "grad_norm": 0.6591500043869019, "learning_rate": 2.7197833344893126e-05, "loss": 0.5297, "step": 8032 }, { "epoch": 1.2637517630465445, "grad_norm": 4.714929103851318, "learning_rate": 2.7047369759041298e-05, "loss": 0.4933, "step": 8064 }, { "epoch": 1.2687666509951419, "grad_norm": 0.15916971862316132, "learning_rate": 2.6893410884663914e-05, "loss": 0.3868, "step": 8096 }, { "epoch": 1.2737815389437392, "grad_norm": 0.6014376282691956, "learning_rate": 2.6736001389659254e-05, "loss": 0.9955, "step": 8128 }, { "epoch": 1.2787964268923366, "grad_norm": 2.986762762069702, "learning_rate": 2.6575186943049913e-05, "loss": 0.5548, "step": 8160 }, { "epoch": 1.283811314840934, "grad_norm": 0.3188874125480652, "learning_rate": 2.6411014201732884e-05, "loss": 0.4924, "step": 8192 }, { "epoch": 1.2888262027895314, "grad_norm": 0.7150152921676636, "learning_rate": 2.624353079694308e-05, "loss": 0.3422, "step": 8224 }, { "epoch": 1.2938410907381288, "grad_norm": 0.8286885619163513, "learning_rate": 2.6072785320434107e-05, "loss": 0.4707, "step": 8256 }, { "epoch": 1.2988559786867262, "grad_norm": 27.87748146057129, "learning_rate": 2.5898827310380408e-05, "loss": 0.3956, "step": 8288 }, { "epoch": 1.3038708666353236, "grad_norm": 0.3072638213634491, "learning_rate": 2.5721707237004854e-05, "loss": 0.547, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_nli-pairs_loss": 0.9301618933677673, "eval_nli-pairs_runtime": 3.7138, "eval_nli-pairs_samples_per_second": 26.926, "eval_nli-pairs_steps_per_second": 1.077, "eval_sts-test_pearson_cosine": 0.7849967022727309, "eval_sts-test_pearson_dot": 0.4795538577643521, "eval_sts-test_pearson_euclidean": 0.7253853385122256, "eval_sts-test_pearson_manhattan": 0.7194021088193217, "eval_sts-test_pearson_max": 0.7849967022727309, "eval_sts-test_spearman_cosine": 0.8020224630491872, "eval_sts-test_spearman_dot": 0.46441948467132393, "eval_sts-test_spearman_euclidean": 0.7190775648500753, "eval_sts-test_spearman_manhattan": 0.7154699878910861, "eval_sts-test_spearman_max": 0.8020224630491872, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_vitaminc-pairs_loss": 4.762923240661621, "eval_vitaminc-pairs_runtime": 1.1277, "eval_vitaminc-pairs_samples_per_second": 75.372, "eval_vitaminc-pairs_steps_per_second": 2.66, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_sts-label_loss": 3.531181573867798, "eval_sts-label_runtime": 0.2802, "eval_sts-label_samples_per_second": 356.848, "eval_sts-label_steps_per_second": 14.274, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_qnli-contrastive_loss": 0.13507510721683502, "eval_qnli-contrastive_runtime": 0.3622, "eval_qnli-contrastive_samples_per_second": 276.104, "eval_qnli-contrastive_steps_per_second": 11.044, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_scitail-pairs-qa_loss": 0.052693866193294525, "eval_scitail-pairs-qa_runtime": 0.8696, "eval_scitail-pairs-qa_samples_per_second": 115.0, "eval_scitail-pairs-qa_steps_per_second": 4.6, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_scitail-pairs-pos_loss": 0.47491660714149475, "eval_scitail-pairs-pos_runtime": 1.3447, "eval_scitail-pairs-pos_samples_per_second": 74.365, "eval_scitail-pairs-pos_steps_per_second": 2.975, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_xsum-pairs_loss": 0.23617514967918396, "eval_xsum-pairs_runtime": 0.9378, "eval_xsum-pairs_samples_per_second": 106.627, "eval_xsum-pairs_steps_per_second": 4.265, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_compression-pairs_loss": 0.07913873344659805, "eval_compression-pairs_runtime": 0.2742, "eval_compression-pairs_samples_per_second": 364.643, "eval_compression-pairs_steps_per_second": 14.586, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_sciq_pairs_loss": 0.298448383808136, "eval_sciq_pairs_runtime": 4.0839, "eval_sciq_pairs_samples_per_second": 24.486, "eval_sciq_pairs_steps_per_second": 0.979, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_qasc_pairs_loss": 0.1738889515399933, "eval_qasc_pairs_runtime": 1.0525, "eval_qasc_pairs_samples_per_second": 95.013, "eval_qasc_pairs_steps_per_second": 3.801, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_openbookqa_pairs_loss": 1.8043091297149658, "eval_openbookqa_pairs_runtime": 0.893, "eval_openbookqa_pairs_samples_per_second": 111.985, "eval_openbookqa_pairs_steps_per_second": 4.479, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_msmarco_pairs_loss": 0.5003547668457031, "eval_msmarco_pairs_runtime": 2.0613, "eval_msmarco_pairs_samples_per_second": 48.513, "eval_msmarco_pairs_steps_per_second": 1.941, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_nq_pairs_loss": 0.21183601021766663, "eval_nq_pairs_runtime": 4.5233, "eval_nq_pairs_samples_per_second": 22.108, "eval_nq_pairs_steps_per_second": 0.884, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_trivia_pairs_loss": 0.8857311010360718, "eval_trivia_pairs_runtime": 6.4553, "eval_trivia_pairs_samples_per_second": 15.491, "eval_trivia_pairs_steps_per_second": 0.62, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_quora_pairs_loss": 0.029251573607325554, "eval_quora_pairs_runtime": 0.6755, "eval_quora_pairs_samples_per_second": 148.047, "eval_quora_pairs_steps_per_second": 5.922, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_gooaq_pairs_loss": 0.5669267773628235, "eval_gooaq_pairs_runtime": 1.4109, "eval_gooaq_pairs_samples_per_second": 70.878, "eval_gooaq_pairs_steps_per_second": 2.835, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_mrpc_pairs_loss": 0.04050436615943909, "eval_mrpc_pairs_runtime": 0.2684, "eval_mrpc_pairs_samples_per_second": 372.59, "eval_mrpc_pairs_steps_per_second": 14.904, "step": 8320 }, { "epoch": 1.308885754583921, "grad_norm": 0.1075374037027359, "learning_rate": 2.5541476487935806e-05, "loss": 0.5412, "step": 8352 }, { "epoch": 1.3139006425325184, "grad_norm": 7.75120735168457, "learning_rate": 2.535818735329815e-05, "loss": 0.3885, "step": 8384 }, { "epoch": 1.3189155304811158, "grad_norm": 0.5364068150520325, "learning_rate": 2.5171893010542385e-05, "loss": 0.4274, "step": 8416 }, { "epoch": 1.3239304184297132, "grad_norm": 0.6744114756584167, "learning_rate": 2.4988605558565137e-05, "loss": 0.893, "step": 8448 }, { "epoch": 1.3289453063783105, "grad_norm": 1.6057082414627075, "learning_rate": 2.4796553472267232e-05, "loss": 0.3456, "step": 8480 }, { "epoch": 1.333960194326908, "grad_norm": 0.8727301955223083, "learning_rate": 2.460165912399626e-05, "loss": 0.4292, "step": 8512 }, { "epoch": 1.3389750822755053, "grad_norm": 0.23973700404167175, "learning_rate": 2.440397905820904e-05, "loss": 0.4275, "step": 8544 }, { "epoch": 1.343989970224103, "grad_norm": 16.09794807434082, "learning_rate": 2.4203570627579187e-05, "loss": 0.3236, "step": 8576 }, { "epoch": 1.3490048581727003, "grad_norm": 0.1335248500108719, "learning_rate": 2.4000491976357433e-05, "loss": 0.3961, "step": 8608 }, { "epoch": 1.3540197461212977, "grad_norm": 4.587371349334717, "learning_rate": 2.3794802023502332e-05, "loss": 0.5146, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_nli-pairs_loss": 0.996806800365448, "eval_nli-pairs_runtime": 3.6888, "eval_nli-pairs_samples_per_second": 27.109, "eval_nli-pairs_steps_per_second": 1.084, "eval_sts-test_pearson_cosine": 0.7869180410057008, "eval_sts-test_pearson_dot": 0.4938689019771704, "eval_sts-test_pearson_euclidean": 0.7205117910572312, "eval_sts-test_pearson_manhattan": 0.7128032248904813, "eval_sts-test_pearson_max": 0.7869180410057008, "eval_sts-test_spearman_cosine": 0.8042081001243602, "eval_sts-test_spearman_dot": 0.48091332474106047, "eval_sts-test_spearman_euclidean": 0.7130418025896658, "eval_sts-test_spearman_manhattan": 0.7066951779815502, "eval_sts-test_spearman_max": 0.8042081001243602, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_vitaminc-pairs_loss": 4.757246971130371, "eval_vitaminc-pairs_runtime": 1.13, "eval_vitaminc-pairs_samples_per_second": 75.223, "eval_vitaminc-pairs_steps_per_second": 2.655, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_sts-label_loss": 3.562749147415161, "eval_sts-label_runtime": 0.2852, "eval_sts-label_samples_per_second": 350.667, "eval_sts-label_steps_per_second": 14.027, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_qnli-contrastive_loss": 0.10447724163532257, "eval_qnli-contrastive_runtime": 0.3616, "eval_qnli-contrastive_samples_per_second": 276.535, "eval_qnli-contrastive_steps_per_second": 11.061, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_scitail-pairs-qa_loss": 0.050035107880830765, "eval_scitail-pairs-qa_runtime": 0.8786, "eval_scitail-pairs-qa_samples_per_second": 113.822, "eval_scitail-pairs-qa_steps_per_second": 4.553, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_scitail-pairs-pos_loss": 0.4792901873588562, "eval_scitail-pairs-pos_runtime": 1.3333, "eval_scitail-pairs-pos_samples_per_second": 75.004, "eval_scitail-pairs-pos_steps_per_second": 3.0, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_xsum-pairs_loss": 0.23096245527267456, "eval_xsum-pairs_runtime": 0.9402, "eval_xsum-pairs_samples_per_second": 106.362, "eval_xsum-pairs_steps_per_second": 4.254, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_compression-pairs_loss": 0.0777381882071495, "eval_compression-pairs_runtime": 0.2739, "eval_compression-pairs_samples_per_second": 365.114, "eval_compression-pairs_steps_per_second": 14.605, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_sciq_pairs_loss": 0.2707681953907013, "eval_sciq_pairs_runtime": 4.1199, "eval_sciq_pairs_samples_per_second": 24.272, "eval_sciq_pairs_steps_per_second": 0.971, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_qasc_pairs_loss": 0.1706008017063141, "eval_qasc_pairs_runtime": 1.065, "eval_qasc_pairs_samples_per_second": 93.9, "eval_qasc_pairs_steps_per_second": 3.756, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_openbookqa_pairs_loss": 1.7370460033416748, "eval_openbookqa_pairs_runtime": 0.8951, "eval_openbookqa_pairs_samples_per_second": 111.72, "eval_openbookqa_pairs_steps_per_second": 4.469, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_msmarco_pairs_loss": 0.4633770287036896, "eval_msmarco_pairs_runtime": 2.064, "eval_msmarco_pairs_samples_per_second": 48.449, "eval_msmarco_pairs_steps_per_second": 1.938, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_nq_pairs_loss": 0.21504688262939453, "eval_nq_pairs_runtime": 4.523, "eval_nq_pairs_samples_per_second": 22.109, "eval_nq_pairs_steps_per_second": 0.884, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_trivia_pairs_loss": 0.8408924341201782, "eval_trivia_pairs_runtime": 6.4614, "eval_trivia_pairs_samples_per_second": 15.476, "eval_trivia_pairs_steps_per_second": 0.619, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_quora_pairs_loss": 0.02466999925673008, "eval_quora_pairs_runtime": 0.681, "eval_quora_pairs_samples_per_second": 146.836, "eval_quora_pairs_steps_per_second": 5.873, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_gooaq_pairs_loss": 0.5489644408226013, "eval_gooaq_pairs_runtime": 1.4284, "eval_gooaq_pairs_samples_per_second": 70.007, "eval_gooaq_pairs_steps_per_second": 2.8, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_mrpc_pairs_loss": 0.039087630808353424, "eval_mrpc_pairs_runtime": 0.2676, "eval_mrpc_pairs_samples_per_second": 373.632, "eval_mrpc_pairs_steps_per_second": 14.945, "step": 8640 }, { "epoch": 1.359034634069895, "grad_norm": 0.20070208609104156, "learning_rate": 2.3586560445586147e-05, "loss": 0.7562, "step": 8672 }, { "epoch": 1.3640495220184925, "grad_norm": 14.552980422973633, "learning_rate": 2.3375827659480975e-05, "loss": 0.7881, "step": 8704 }, { "epoch": 1.36906440996709, "grad_norm": 0.728196382522583, "learning_rate": 2.3162664804830062e-05, "loss": 0.6117, "step": 8736 }, { "epoch": 1.3740792979156873, "grad_norm": 104.08293151855469, "learning_rate": 2.2947133726309464e-05, "loss": 1.3083, "step": 8768 }, { "epoch": 1.3790941858642847, "grad_norm": 9.243626594543457, "learning_rate": 2.2729296955685097e-05, "loss": 0.5359, "step": 8800 }, { "epoch": 1.384109073812882, "grad_norm": 1.2041038274765015, "learning_rate": 2.2509217693670464e-05, "loss": 0.45, "step": 8832 }, { "epoch": 1.3891239617614795, "grad_norm": 3.953394889831543, "learning_rate": 2.2286959791590365e-05, "loss": 0.6022, "step": 8864 }, { "epoch": 1.3941388497100768, "grad_norm": 18.004009246826172, "learning_rate": 2.2062587732855727e-05, "loss": 0.6664, "step": 8896 }, { "epoch": 1.3991537376586742, "grad_norm": 4.0190887451171875, "learning_rate": 2.1836166614255147e-05, "loss": 0.3255, "step": 8928 }, { "epoch": 1.4041686256072716, "grad_norm": 0.366133451461792, "learning_rate": 2.1607762127068423e-05, "loss": 0.6036, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_nli-pairs_loss": 0.8724198341369629, "eval_nli-pairs_runtime": 3.933, "eval_nli-pairs_samples_per_second": 25.426, "eval_nli-pairs_steps_per_second": 1.017, "eval_sts-test_pearson_cosine": 0.7927428344689331, "eval_sts-test_pearson_dot": 0.49937373977135646, "eval_sts-test_pearson_euclidean": 0.7199047693834656, "eval_sts-test_pearson_manhattan": 0.7130503772521911, "eval_sts-test_pearson_max": 0.7927428344689331, "eval_sts-test_spearman_cosine": 0.8082435105379481, "eval_sts-test_spearman_dot": 0.48718344612655096, "eval_sts-test_spearman_euclidean": 0.7134853526837959, "eval_sts-test_spearman_manhattan": 0.7086123376992564, "eval_sts-test_spearman_max": 0.8082435105379481, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_vitaminc-pairs_loss": 4.699924945831299, "eval_vitaminc-pairs_runtime": 1.2166, "eval_vitaminc-pairs_samples_per_second": 69.868, "eval_vitaminc-pairs_steps_per_second": 2.466, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_sts-label_loss": 3.5729637145996094, "eval_sts-label_runtime": 0.2826, "eval_sts-label_samples_per_second": 353.863, "eval_sts-label_steps_per_second": 14.155, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_qnli-contrastive_loss": 0.14608506858348846, "eval_qnli-contrastive_runtime": 0.3638, "eval_qnli-contrastive_samples_per_second": 274.882, "eval_qnli-contrastive_steps_per_second": 10.995, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_scitail-pairs-qa_loss": 0.05385418236255646, "eval_scitail-pairs-qa_runtime": 0.875, "eval_scitail-pairs-qa_samples_per_second": 114.282, "eval_scitail-pairs-qa_steps_per_second": 4.571, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_scitail-pairs-pos_loss": 0.38266777992248535, "eval_scitail-pairs-pos_runtime": 1.3771, "eval_scitail-pairs-pos_samples_per_second": 72.617, "eval_scitail-pairs-pos_steps_per_second": 2.905, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_xsum-pairs_loss": 0.22780302166938782, "eval_xsum-pairs_runtime": 0.9641, "eval_xsum-pairs_samples_per_second": 103.722, "eval_xsum-pairs_steps_per_second": 4.149, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_compression-pairs_loss": 0.0785873681306839, "eval_compression-pairs_runtime": 0.2787, "eval_compression-pairs_samples_per_second": 358.789, "eval_compression-pairs_steps_per_second": 14.352, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_sciq_pairs_loss": 0.26403677463531494, "eval_sciq_pairs_runtime": 4.2105, "eval_sciq_pairs_samples_per_second": 23.75, "eval_sciq_pairs_steps_per_second": 0.95, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_qasc_pairs_loss": 0.17071855068206787, "eval_qasc_pairs_runtime": 1.1266, "eval_qasc_pairs_samples_per_second": 88.764, "eval_qasc_pairs_steps_per_second": 3.551, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_openbookqa_pairs_loss": 1.721885085105896, "eval_openbookqa_pairs_runtime": 0.9651, "eval_openbookqa_pairs_samples_per_second": 103.621, "eval_openbookqa_pairs_steps_per_second": 4.145, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_msmarco_pairs_loss": 0.49493569135665894, "eval_msmarco_pairs_runtime": 2.1343, "eval_msmarco_pairs_samples_per_second": 46.854, "eval_msmarco_pairs_steps_per_second": 1.874, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_nq_pairs_loss": 0.24152007699012756, "eval_nq_pairs_runtime": 4.5902, "eval_nq_pairs_samples_per_second": 21.786, "eval_nq_pairs_steps_per_second": 0.871, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_trivia_pairs_loss": 0.8256454467773438, "eval_trivia_pairs_runtime": 6.5794, "eval_trivia_pairs_samples_per_second": 15.199, "eval_trivia_pairs_steps_per_second": 0.608, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_quora_pairs_loss": 0.02668851427733898, "eval_quora_pairs_runtime": 0.7332, "eval_quora_pairs_samples_per_second": 136.391, "eval_quora_pairs_steps_per_second": 5.456, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_gooaq_pairs_loss": 0.5385005474090576, "eval_gooaq_pairs_runtime": 1.4718, "eval_gooaq_pairs_samples_per_second": 67.942, "eval_gooaq_pairs_steps_per_second": 2.718, "step": 8960 }, { "epoch": 1.4041686256072716, "eval_mrpc_pairs_loss": 0.041219133883714676, "eval_mrpc_pairs_runtime": 0.2647, "eval_mrpc_pairs_samples_per_second": 377.746, "eval_mrpc_pairs_steps_per_second": 15.11, "step": 8960 }, { "epoch": 1.409183513555869, "grad_norm": 9.40150260925293, "learning_rate": 2.1377440538007663e-05, "loss": 0.4723, "step": 8992 }, { "epoch": 1.4141984015044664, "grad_norm": 0.7629052996635437, "learning_rate": 2.1145268669991343e-05, "loss": 0.2569, "step": 9024 }, { "epoch": 1.4192132894530638, "grad_norm": 4.149620056152344, "learning_rate": 2.0911313882757114e-05, "loss": 0.5794, "step": 9056 }, { "epoch": 1.4242281774016612, "grad_norm": 6.7548322677612305, "learning_rate": 2.0675644053318755e-05, "loss": 1.022, "step": 9088 }, { "epoch": 1.4292430653502586, "grad_norm": 6.15263032913208, "learning_rate": 2.043832755627316e-05, "loss": 1.0539, "step": 9120 }, { "epoch": 1.434257953298856, "grad_norm": 0.640146791934967, "learning_rate": 2.0199433243962828e-05, "loss": 0.4634, "step": 9152 }, { "epoch": 1.4392728412474534, "grad_norm": 10.551207542419434, "learning_rate": 1.995903042649987e-05, "loss": 0.3755, "step": 9184 }, { "epoch": 1.4442877291960508, "grad_norm": 6.792413711547852, "learning_rate": 1.9717188851657146e-05, "loss": 0.4033, "step": 9216 }, { "epoch": 1.4493026171446481, "grad_norm": 21.39606475830078, "learning_rate": 1.947397868463241e-05, "loss": 0.522, "step": 9248 }, { "epoch": 1.4543175050932455, "grad_norm": 0.42344239354133606, "learning_rate": 1.9229470487691367e-05, "loss": 1.1067, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_nli-pairs_loss": 1.0502961874008179, "eval_nli-pairs_runtime": 3.6372, "eval_nli-pairs_samples_per_second": 27.493, "eval_nli-pairs_steps_per_second": 1.1, "eval_sts-test_pearson_cosine": 0.7912276888525616, "eval_sts-test_pearson_dot": 0.4995225131725172, "eval_sts-test_pearson_euclidean": 0.7189719436162519, "eval_sts-test_pearson_manhattan": 0.7107873830173421, "eval_sts-test_pearson_max": 0.7912276888525616, "eval_sts-test_spearman_cosine": 0.8089166756731377, "eval_sts-test_spearman_dot": 0.4901845431410516, "eval_sts-test_spearman_euclidean": 0.714581893529738, "eval_sts-test_spearman_manhattan": 0.7088218604884453, "eval_sts-test_spearman_max": 0.8089166756731377, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_vitaminc-pairs_loss": 4.6338725090026855, "eval_vitaminc-pairs_runtime": 1.1311, "eval_vitaminc-pairs_samples_per_second": 75.15, "eval_vitaminc-pairs_steps_per_second": 2.652, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_sts-label_loss": 3.7515180110931396, "eval_sts-label_runtime": 0.274, "eval_sts-label_samples_per_second": 364.984, "eval_sts-label_steps_per_second": 14.599, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_qnli-contrastive_loss": 0.1773652583360672, "eval_qnli-contrastive_runtime": 0.3822, "eval_qnli-contrastive_samples_per_second": 261.676, "eval_qnli-contrastive_steps_per_second": 10.467, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_scitail-pairs-qa_loss": 0.0540962740778923, "eval_scitail-pairs-qa_runtime": 0.8755, "eval_scitail-pairs-qa_samples_per_second": 114.221, "eval_scitail-pairs-qa_steps_per_second": 4.569, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_scitail-pairs-pos_loss": 0.43677064776420593, "eval_scitail-pairs-pos_runtime": 1.3268, "eval_scitail-pairs-pos_samples_per_second": 75.368, "eval_scitail-pairs-pos_steps_per_second": 3.015, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_xsum-pairs_loss": 0.21872493624687195, "eval_xsum-pairs_runtime": 0.9376, "eval_xsum-pairs_samples_per_second": 106.652, "eval_xsum-pairs_steps_per_second": 4.266, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_compression-pairs_loss": 0.08013663440942764, "eval_compression-pairs_runtime": 0.2747, "eval_compression-pairs_samples_per_second": 364.042, "eval_compression-pairs_steps_per_second": 14.562, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_sciq_pairs_loss": 0.23196449875831604, "eval_sciq_pairs_runtime": 4.0506, "eval_sciq_pairs_samples_per_second": 24.687, "eval_sciq_pairs_steps_per_second": 0.987, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_qasc_pairs_loss": 0.17310062050819397, "eval_qasc_pairs_runtime": 1.0578, "eval_qasc_pairs_samples_per_second": 94.538, "eval_qasc_pairs_steps_per_second": 3.782, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_openbookqa_pairs_loss": 1.6407041549682617, "eval_openbookqa_pairs_runtime": 0.9055, "eval_openbookqa_pairs_samples_per_second": 110.441, "eval_openbookqa_pairs_steps_per_second": 4.418, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_msmarco_pairs_loss": 0.4894391596317291, "eval_msmarco_pairs_runtime": 2.0749, "eval_msmarco_pairs_samples_per_second": 48.195, "eval_msmarco_pairs_steps_per_second": 1.928, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_nq_pairs_loss": 0.24873030185699463, "eval_nq_pairs_runtime": 4.5502, "eval_nq_pairs_samples_per_second": 21.977, "eval_nq_pairs_steps_per_second": 0.879, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_trivia_pairs_loss": 0.7743425965309143, "eval_trivia_pairs_runtime": 6.4497, "eval_trivia_pairs_samples_per_second": 15.505, "eval_trivia_pairs_steps_per_second": 0.62, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_quora_pairs_loss": 0.023751694709062576, "eval_quora_pairs_runtime": 0.6809, "eval_quora_pairs_samples_per_second": 146.865, "eval_quora_pairs_steps_per_second": 5.875, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_gooaq_pairs_loss": 0.5431591272354126, "eval_gooaq_pairs_runtime": 1.3978, "eval_gooaq_pairs_samples_per_second": 71.539, "eval_gooaq_pairs_steps_per_second": 2.862, "step": 9280 }, { "epoch": 1.4543175050932455, "eval_mrpc_pairs_loss": 0.044174738228321075, "eval_mrpc_pairs_runtime": 0.2611, "eval_mrpc_pairs_samples_per_second": 383.037, "eval_mrpc_pairs_steps_per_second": 15.321, "step": 9280 }, { "epoch": 1.459332393041843, "grad_norm": 9.472426414489746, "learning_rate": 1.8983735199695544e-05, "loss": 0.6612, "step": 9312 }, { "epoch": 1.4643472809904403, "grad_norm": 14.92159652709961, "learning_rate": 1.8736844115520908e-05, "loss": 0.5152, "step": 9344 }, { "epoch": 1.4693621689390377, "grad_norm": 0.5201769471168518, "learning_rate": 1.8488868865373112e-05, "loss": 0.7975, "step": 9376 }, { "epoch": 1.474377056887635, "grad_norm": 11.845800399780273, "learning_rate": 1.8239881394005564e-05, "loss": 0.574, "step": 9408 }, { "epoch": 1.4793919448362325, "grad_norm": 23.641178131103516, "learning_rate": 1.798995393984615e-05, "loss": 0.8784, "step": 9440 }, { "epoch": 1.4844068327848299, "grad_norm": 0.519938588142395, "learning_rate": 1.773915901403883e-05, "loss": 0.807, "step": 9472 }, { "epoch": 1.4894217207334273, "grad_norm": 9.97410774230957, "learning_rate": 1.748756937940602e-05, "loss": 0.4858, "step": 9504 }, { "epoch": 1.4944366086820247, "grad_norm": 0.5798318386077881, "learning_rate": 1.7235258029338022e-05, "loss": 0.542, "step": 9536 }, { "epoch": 1.499451496630622, "grad_norm": 13.993447303771973, "learning_rate": 1.6982298166615585e-05, "loss": 0.4288, "step": 9568 }, { "epoch": 1.5044663845792194, "grad_norm": 5.321017265319824, "learning_rate": 1.6728763182171593e-05, "loss": 0.3218, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_nli-pairs_loss": 0.9721713066101074, "eval_nli-pairs_runtime": 3.6151, "eval_nli-pairs_samples_per_second": 27.662, "eval_nli-pairs_steps_per_second": 1.106, "eval_sts-test_pearson_cosine": 0.7869040096278236, "eval_sts-test_pearson_dot": 0.4954695412903865, "eval_sts-test_pearson_euclidean": 0.7150262347360125, "eval_sts-test_pearson_manhattan": 0.707955640033785, "eval_sts-test_pearson_max": 0.7869040096278236, "eval_sts-test_spearman_cosine": 0.8057646132461553, "eval_sts-test_spearman_dot": 0.48003986808946403, "eval_sts-test_spearman_euclidean": 0.7090196050571179, "eval_sts-test_spearman_manhattan": 0.7048136710517273, "eval_sts-test_spearman_max": 0.8057646132461553, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_vitaminc-pairs_loss": 4.704538822174072, "eval_vitaminc-pairs_runtime": 1.1827, "eval_vitaminc-pairs_samples_per_second": 71.867, "eval_vitaminc-pairs_steps_per_second": 2.536, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_sts-label_loss": 3.9333627223968506, "eval_sts-label_runtime": 0.2833, "eval_sts-label_samples_per_second": 352.95, "eval_sts-label_steps_per_second": 14.118, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_qnli-contrastive_loss": 0.08793709427118301, "eval_qnli-contrastive_runtime": 0.3563, "eval_qnli-contrastive_samples_per_second": 280.63, "eval_qnli-contrastive_steps_per_second": 11.225, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_scitail-pairs-qa_loss": 0.048883963376283646, "eval_scitail-pairs-qa_runtime": 0.887, "eval_scitail-pairs-qa_samples_per_second": 112.737, "eval_scitail-pairs-qa_steps_per_second": 4.509, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_scitail-pairs-pos_loss": 0.4018934369087219, "eval_scitail-pairs-pos_runtime": 1.3205, "eval_scitail-pairs-pos_samples_per_second": 75.73, "eval_scitail-pairs-pos_steps_per_second": 3.029, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_xsum-pairs_loss": 0.2280152142047882, "eval_xsum-pairs_runtime": 0.9518, "eval_xsum-pairs_samples_per_second": 105.068, "eval_xsum-pairs_steps_per_second": 4.203, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_compression-pairs_loss": 0.07252852618694305, "eval_compression-pairs_runtime": 0.2764, "eval_compression-pairs_samples_per_second": 361.792, "eval_compression-pairs_steps_per_second": 14.472, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_sciq_pairs_loss": 0.26032450795173645, "eval_sciq_pairs_runtime": 4.0901, "eval_sciq_pairs_samples_per_second": 24.449, "eval_sciq_pairs_steps_per_second": 0.978, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_qasc_pairs_loss": 0.15575185418128967, "eval_qasc_pairs_runtime": 1.0582, "eval_qasc_pairs_samples_per_second": 94.5, "eval_qasc_pairs_steps_per_second": 3.78, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_openbookqa_pairs_loss": 1.7224982976913452, "eval_openbookqa_pairs_runtime": 0.8953, "eval_openbookqa_pairs_samples_per_second": 111.701, "eval_openbookqa_pairs_steps_per_second": 4.468, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_msmarco_pairs_loss": 0.48336413502693176, "eval_msmarco_pairs_runtime": 2.0593, "eval_msmarco_pairs_samples_per_second": 48.561, "eval_msmarco_pairs_steps_per_second": 1.942, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_nq_pairs_loss": 0.22721004486083984, "eval_nq_pairs_runtime": 4.5043, "eval_nq_pairs_samples_per_second": 22.201, "eval_nq_pairs_steps_per_second": 0.888, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_trivia_pairs_loss": 0.8712177276611328, "eval_trivia_pairs_runtime": 6.4437, "eval_trivia_pairs_samples_per_second": 15.519, "eval_trivia_pairs_steps_per_second": 0.621, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_quora_pairs_loss": 0.0451449453830719, "eval_quora_pairs_runtime": 0.6862, "eval_quora_pairs_samples_per_second": 145.734, "eval_quora_pairs_steps_per_second": 5.829, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_gooaq_pairs_loss": 0.5695764422416687, "eval_gooaq_pairs_runtime": 1.4019, "eval_gooaq_pairs_samples_per_second": 71.332, "eval_gooaq_pairs_steps_per_second": 2.853, "step": 9600 }, { "epoch": 1.5044663845792194, "eval_mrpc_pairs_loss": 0.03778570890426636, "eval_mrpc_pairs_runtime": 0.2732, "eval_mrpc_pairs_samples_per_second": 366.013, "eval_mrpc_pairs_steps_per_second": 14.641, "step": 9600 }, { "epoch": 1.5094812725278168, "grad_norm": 57.80153274536133, "learning_rate": 1.6474726633798303e-05, "loss": 0.7936, "step": 9632 }, { "epoch": 1.5144961604764142, "grad_norm": 14.445369720458984, "learning_rate": 1.6220262224806082e-05, "loss": 0.5664, "step": 9664 }, { "epoch": 1.5195110484250116, "grad_norm": 0.9594554901123047, "learning_rate": 1.5965443782640004e-05, "loss": 0.7019, "step": 9696 }, { "epoch": 1.524525936373609, "grad_norm": 16.985197067260742, "learning_rate": 1.5710345237460317e-05, "loss": 0.6887, "step": 9728 }, { "epoch": 1.5295408243222064, "grad_norm": 13.977591514587402, "learning_rate": 1.545504060069323e-05, "loss": 0.5558, "step": 9760 }, { "epoch": 1.5345557122708038, "grad_norm": 10.073654174804688, "learning_rate": 1.519960394355803e-05, "loss": 0.7874, "step": 9792 }, { "epoch": 1.5395706002194014, "grad_norm": 13.922711372375488, "learning_rate": 1.4944109375576943e-05, "loss": 0.6661, "step": 9824 }, { "epoch": 1.5445854881679988, "grad_norm": 1.1066734790802002, "learning_rate": 1.4688631023073767e-05, "loss": 0.314, "step": 9856 }, { "epoch": 1.5496003761165962, "grad_norm": 11.345468521118164, "learning_rate": 1.4433243007667727e-05, "loss": 0.6541, "step": 9888 }, { "epoch": 1.5546152640651936, "grad_norm": 0.1960260272026062, "learning_rate": 1.4178019424768682e-05, "loss": 0.3876, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_nli-pairs_loss": 0.9812193512916565, "eval_nli-pairs_runtime": 3.6762, "eval_nli-pairs_samples_per_second": 27.202, "eval_nli-pairs_steps_per_second": 1.088, "eval_sts-test_pearson_cosine": 0.7883698302733059, "eval_sts-test_pearson_dot": 0.49774498696293734, "eval_sts-test_pearson_euclidean": 0.7160645671058068, "eval_sts-test_pearson_manhattan": 0.708704079258377, "eval_sts-test_pearson_max": 0.7883698302733059, "eval_sts-test_spearman_cosine": 0.8062965737327521, "eval_sts-test_spearman_dot": 0.48077830504025015, "eval_sts-test_spearman_euclidean": 0.7115357288623965, "eval_sts-test_spearman_manhattan": 0.7067091055676675, "eval_sts-test_spearman_max": 0.8062965737327521, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_vitaminc-pairs_loss": 4.601877689361572, "eval_vitaminc-pairs_runtime": 1.1563, "eval_vitaminc-pairs_samples_per_second": 73.511, "eval_vitaminc-pairs_steps_per_second": 2.594, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_sts-label_loss": 3.759185791015625, "eval_sts-label_runtime": 0.2803, "eval_sts-label_samples_per_second": 356.755, "eval_sts-label_steps_per_second": 14.27, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_qnli-contrastive_loss": 0.12763477861881256, "eval_qnli-contrastive_runtime": 0.3644, "eval_qnli-contrastive_samples_per_second": 274.409, "eval_qnli-contrastive_steps_per_second": 10.976, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_scitail-pairs-qa_loss": 0.047839775681495667, "eval_scitail-pairs-qa_runtime": 0.8866, "eval_scitail-pairs-qa_samples_per_second": 112.788, "eval_scitail-pairs-qa_steps_per_second": 4.512, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_scitail-pairs-pos_loss": 0.42824697494506836, "eval_scitail-pairs-pos_runtime": 1.3646, "eval_scitail-pairs-pos_samples_per_second": 73.28, "eval_scitail-pairs-pos_steps_per_second": 2.931, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_xsum-pairs_loss": 0.21476294100284576, "eval_xsum-pairs_runtime": 0.9397, "eval_xsum-pairs_samples_per_second": 106.418, "eval_xsum-pairs_steps_per_second": 4.257, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_compression-pairs_loss": 0.0711471289396286, "eval_compression-pairs_runtime": 0.2823, "eval_compression-pairs_samples_per_second": 354.238, "eval_compression-pairs_steps_per_second": 14.17, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_sciq_pairs_loss": 0.2557770311832428, "eval_sciq_pairs_runtime": 4.086, "eval_sciq_pairs_samples_per_second": 24.474, "eval_sciq_pairs_steps_per_second": 0.979, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_qasc_pairs_loss": 0.14738526940345764, "eval_qasc_pairs_runtime": 1.0758, "eval_qasc_pairs_samples_per_second": 92.954, "eval_qasc_pairs_steps_per_second": 3.718, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_openbookqa_pairs_loss": 1.7022367715835571, "eval_openbookqa_pairs_runtime": 0.8963, "eval_openbookqa_pairs_samples_per_second": 111.564, "eval_openbookqa_pairs_steps_per_second": 4.463, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_msmarco_pairs_loss": 0.5194939374923706, "eval_msmarco_pairs_runtime": 2.0452, "eval_msmarco_pairs_samples_per_second": 48.895, "eval_msmarco_pairs_steps_per_second": 1.956, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_nq_pairs_loss": 0.24882511794567108, "eval_nq_pairs_runtime": 4.4873, "eval_nq_pairs_samples_per_second": 22.285, "eval_nq_pairs_steps_per_second": 0.891, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_trivia_pairs_loss": 0.8160566687583923, "eval_trivia_pairs_runtime": 6.4416, "eval_trivia_pairs_samples_per_second": 15.524, "eval_trivia_pairs_steps_per_second": 0.621, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_quora_pairs_loss": 0.019316570833325386, "eval_quora_pairs_runtime": 0.6835, "eval_quora_pairs_samples_per_second": 146.3, "eval_quora_pairs_steps_per_second": 5.852, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_gooaq_pairs_loss": 0.5341556072235107, "eval_gooaq_pairs_runtime": 1.4064, "eval_gooaq_pairs_samples_per_second": 71.103, "eval_gooaq_pairs_steps_per_second": 2.844, "step": 9920 }, { "epoch": 1.5546152640651936, "eval_mrpc_pairs_loss": 0.037278927862644196, "eval_mrpc_pairs_runtime": 0.2652, "eval_mrpc_pairs_samples_per_second": 377.087, "eval_mrpc_pairs_steps_per_second": 15.083, "step": 9920 }, { "epoch": 1.559630152013791, "grad_norm": 9.619163513183594, "learning_rate": 1.3923034322079869e-05, "loss": 0.4225, "step": 9952 }, { "epoch": 1.5646450399623884, "grad_norm": 13.126330375671387, "learning_rate": 1.3668361678114555e-05, "loss": 0.5979, "step": 9984 }, { "epoch": 1.5696599279109857, "grad_norm": 0.24413801729679108, "learning_rate": 1.3414075380732742e-05, "loss": 0.4349, "step": 10016 }, { "epoch": 1.5746748158595831, "grad_norm": 10.528765678405762, "learning_rate": 1.31602492057042e-05, "loss": 0.8265, "step": 10048 }, { "epoch": 1.5796897038081805, "grad_norm": 2.4941253662109375, "learning_rate": 1.2906956795303937e-05, "loss": 0.4669, "step": 10080 }, { "epoch": 1.584704591756778, "grad_norm": 13.037269592285156, "learning_rate": 1.2654271636946504e-05, "loss": 0.6543, "step": 10112 }, { "epoch": 1.5897194797053753, "grad_norm": 17.997892379760742, "learning_rate": 1.2402267041865147e-05, "loss": 0.5953, "step": 10144 }, { "epoch": 1.5947343676539727, "grad_norm": 2.191682815551758, "learning_rate": 1.215101612384217e-05, "loss": 0.7695, "step": 10176 }, { "epoch": 1.59974925560257, "grad_norm": 0.29988715052604675, "learning_rate": 1.1900591777996438e-05, "loss": 1.0416, "step": 10208 }, { "epoch": 1.6047641435511675, "grad_norm": 17.13808250427246, "learning_rate": 1.165106665963446e-05, "loss": 0.582, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_nli-pairs_loss": 0.8930012583732605, "eval_nli-pairs_runtime": 3.7196, "eval_nli-pairs_samples_per_second": 26.885, "eval_nli-pairs_steps_per_second": 1.075, "eval_sts-test_pearson_cosine": 0.7903083827158762, "eval_sts-test_pearson_dot": 0.506941058703846, "eval_sts-test_pearson_euclidean": 0.7203941922684222, "eval_sts-test_pearson_manhattan": 0.7131998484149887, "eval_sts-test_pearson_max": 0.7903083827158762, "eval_sts-test_spearman_cosine": 0.808222937569047, "eval_sts-test_spearman_dot": 0.4889752392434299, "eval_sts-test_spearman_euclidean": 0.7136289076125458, "eval_sts-test_spearman_manhattan": 0.7085035353048789, "eval_sts-test_spearman_max": 0.808222937569047, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_vitaminc-pairs_loss": 4.64041805267334, "eval_vitaminc-pairs_runtime": 1.171, "eval_vitaminc-pairs_samples_per_second": 72.59, "eval_vitaminc-pairs_steps_per_second": 2.562, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_sts-label_loss": 3.797877073287964, "eval_sts-label_runtime": 0.2902, "eval_sts-label_samples_per_second": 344.541, "eval_sts-label_steps_per_second": 13.782, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_qnli-contrastive_loss": 0.12474731355905533, "eval_qnli-contrastive_runtime": 0.3643, "eval_qnli-contrastive_samples_per_second": 274.483, "eval_qnli-contrastive_steps_per_second": 10.979, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_scitail-pairs-qa_loss": 0.049197420477867126, "eval_scitail-pairs-qa_runtime": 1.0129, "eval_scitail-pairs-qa_samples_per_second": 98.725, "eval_scitail-pairs-qa_steps_per_second": 3.949, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_scitail-pairs-pos_loss": 0.39805683493614197, "eval_scitail-pairs-pos_runtime": 1.5068, "eval_scitail-pairs-pos_samples_per_second": 66.368, "eval_scitail-pairs-pos_steps_per_second": 2.655, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_xsum-pairs_loss": 0.219086155295372, "eval_xsum-pairs_runtime": 0.9561, "eval_xsum-pairs_samples_per_second": 104.596, "eval_xsum-pairs_steps_per_second": 4.184, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_compression-pairs_loss": 0.07605351507663727, "eval_compression-pairs_runtime": 0.3253, "eval_compression-pairs_samples_per_second": 307.395, "eval_compression-pairs_steps_per_second": 12.296, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_sciq_pairs_loss": 0.2541166841983795, "eval_sciq_pairs_runtime": 4.2719, "eval_sciq_pairs_samples_per_second": 23.409, "eval_sciq_pairs_steps_per_second": 0.936, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_qasc_pairs_loss": 0.152567058801651, "eval_qasc_pairs_runtime": 1.1758, "eval_qasc_pairs_samples_per_second": 85.047, "eval_qasc_pairs_steps_per_second": 3.402, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_openbookqa_pairs_loss": 1.6163227558135986, "eval_openbookqa_pairs_runtime": 0.9704, "eval_openbookqa_pairs_samples_per_second": 103.054, "eval_openbookqa_pairs_steps_per_second": 4.122, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_msmarco_pairs_loss": 0.48798614740371704, "eval_msmarco_pairs_runtime": 2.1358, "eval_msmarco_pairs_samples_per_second": 46.82, "eval_msmarco_pairs_steps_per_second": 1.873, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_nq_pairs_loss": 0.22564272582530975, "eval_nq_pairs_runtime": 4.6501, "eval_nq_pairs_samples_per_second": 21.505, "eval_nq_pairs_steps_per_second": 0.86, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_trivia_pairs_loss": 0.8167528510093689, "eval_trivia_pairs_runtime": 6.5192, "eval_trivia_pairs_samples_per_second": 15.339, "eval_trivia_pairs_steps_per_second": 0.614, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_quora_pairs_loss": 0.050022438168525696, "eval_quora_pairs_runtime": 0.7208, "eval_quora_pairs_samples_per_second": 138.734, "eval_quora_pairs_steps_per_second": 5.549, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_gooaq_pairs_loss": 0.5044955611228943, "eval_gooaq_pairs_runtime": 1.5342, "eval_gooaq_pairs_samples_per_second": 65.182, "eval_gooaq_pairs_steps_per_second": 2.607, "step": 10240 }, { "epoch": 1.6047641435511675, "eval_mrpc_pairs_loss": 0.03784802556037903, "eval_mrpc_pairs_runtime": 0.2652, "eval_mrpc_pairs_samples_per_second": 377.103, "eval_mrpc_pairs_steps_per_second": 15.084, "step": 10240 }, { "epoch": 1.609779031499765, "grad_norm": 3.4870223999023438, "learning_rate": 1.1402513163171013e-05, "loss": 0.4853, "step": 10272 }, { "epoch": 1.6147939194483625, "grad_norm": 0.3523337244987488, "learning_rate": 1.1155003401125379e-05, "loss": 0.7606, "step": 10304 }, { "epoch": 1.6198088073969599, "grad_norm": 0.3042531907558441, "learning_rate": 1.090860918319947e-05, "loss": 0.7573, "step": 10336 }, { "epoch": 1.6248236953455573, "grad_norm": 5.512173652648926, "learning_rate": 1.0663401995443753e-05, "loss": 0.8745, "step": 10368 }, { "epoch": 1.6298385832941547, "grad_norm": 8.629206657409668, "learning_rate": 1.0419452979517123e-05, "loss": 0.5335, "step": 10400 }, { "epoch": 1.634853471242752, "grad_norm": 5.364308834075928, "learning_rate": 1.0176832912046605e-05, "loss": 0.8592, "step": 10432 }, { "epoch": 1.6398683591913494, "grad_norm": 6.823199272155762, "learning_rate": 9.935612184093056e-06, "loss": 0.5884, "step": 10464 }, { "epoch": 1.6448832471399468, "grad_norm": 0.22770029306411743, "learning_rate": 9.69586078072868e-06, "loss": 0.5912, "step": 10496 }, { "epoch": 1.6498981350885442, "grad_norm": 9.537901878356934, "learning_rate": 9.457648260732383e-06, "loss": 0.4696, "step": 10528 }, { "epoch": 1.6549130230371416, "grad_norm": 10.702925682067871, "learning_rate": 9.221043736408692e-06, "loss": 0.6711, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_nli-pairs_loss": 0.8484927415847778, "eval_nli-pairs_runtime": 3.6438, "eval_nli-pairs_samples_per_second": 27.444, "eval_nli-pairs_steps_per_second": 1.098, "eval_sts-test_pearson_cosine": 0.7886383018112382, "eval_sts-test_pearson_dot": 0.509657353420987, "eval_sts-test_pearson_euclidean": 0.7203835760202054, "eval_sts-test_pearson_manhattan": 0.7132829596532094, "eval_sts-test_pearson_max": 0.7886383018112382, "eval_sts-test_spearman_cosine": 0.8068820363083877, "eval_sts-test_spearman_dot": 0.4905223352095461, "eval_sts-test_spearman_euclidean": 0.7139385283892524, "eval_sts-test_spearman_manhattan": 0.7089123420275218, "eval_sts-test_spearman_max": 0.8068820363083877, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_vitaminc-pairs_loss": 4.674776554107666, "eval_vitaminc-pairs_runtime": 1.1704, "eval_vitaminc-pairs_samples_per_second": 72.625, "eval_vitaminc-pairs_steps_per_second": 2.563, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_sts-label_loss": 4.107458114624023, "eval_sts-label_runtime": 0.2808, "eval_sts-label_samples_per_second": 356.107, "eval_sts-label_steps_per_second": 14.244, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_qnli-contrastive_loss": 0.1500800997018814, "eval_qnli-contrastive_runtime": 0.3698, "eval_qnli-contrastive_samples_per_second": 270.398, "eval_qnli-contrastive_steps_per_second": 10.816, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_scitail-pairs-qa_loss": 0.04740756377577782, "eval_scitail-pairs-qa_runtime": 0.8928, "eval_scitail-pairs-qa_samples_per_second": 112.003, "eval_scitail-pairs-qa_steps_per_second": 4.48, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_scitail-pairs-pos_loss": 0.3631865978240967, "eval_scitail-pairs-pos_runtime": 1.3483, "eval_scitail-pairs-pos_samples_per_second": 74.166, "eval_scitail-pairs-pos_steps_per_second": 2.967, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_xsum-pairs_loss": 0.22463081777095795, "eval_xsum-pairs_runtime": 0.9451, "eval_xsum-pairs_samples_per_second": 105.814, "eval_xsum-pairs_steps_per_second": 4.233, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_compression-pairs_loss": 0.07229481637477875, "eval_compression-pairs_runtime": 0.2835, "eval_compression-pairs_samples_per_second": 352.791, "eval_compression-pairs_steps_per_second": 14.112, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_sciq_pairs_loss": 0.24752530455589294, "eval_sciq_pairs_runtime": 4.0901, "eval_sciq_pairs_samples_per_second": 24.45, "eval_sciq_pairs_steps_per_second": 0.978, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_qasc_pairs_loss": 0.1482870876789093, "eval_qasc_pairs_runtime": 1.0637, "eval_qasc_pairs_samples_per_second": 94.012, "eval_qasc_pairs_steps_per_second": 3.76, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_openbookqa_pairs_loss": 1.5899702310562134, "eval_openbookqa_pairs_runtime": 0.9187, "eval_openbookqa_pairs_samples_per_second": 108.851, "eval_openbookqa_pairs_steps_per_second": 4.354, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_msmarco_pairs_loss": 0.4562755525112152, "eval_msmarco_pairs_runtime": 2.0663, "eval_msmarco_pairs_samples_per_second": 48.395, "eval_msmarco_pairs_steps_per_second": 1.936, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_nq_pairs_loss": 0.2125861942768097, "eval_nq_pairs_runtime": 4.5117, "eval_nq_pairs_samples_per_second": 22.165, "eval_nq_pairs_steps_per_second": 0.887, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_trivia_pairs_loss": 0.7469798922538757, "eval_trivia_pairs_runtime": 6.5359, "eval_trivia_pairs_samples_per_second": 15.3, "eval_trivia_pairs_steps_per_second": 0.612, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_quora_pairs_loss": 0.022414864972233772, "eval_quora_pairs_runtime": 0.6868, "eval_quora_pairs_samples_per_second": 145.612, "eval_quora_pairs_steps_per_second": 5.824, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_gooaq_pairs_loss": 0.5061071515083313, "eval_gooaq_pairs_runtime": 1.4011, "eval_gooaq_pairs_samples_per_second": 71.371, "eval_gooaq_pairs_steps_per_second": 2.855, "step": 10560 }, { "epoch": 1.6549130230371416, "eval_mrpc_pairs_loss": 0.03582798317074776, "eval_mrpc_pairs_runtime": 0.2689, "eval_mrpc_pairs_samples_per_second": 371.847, "eval_mrpc_pairs_steps_per_second": 14.874, "step": 10560 }, { "epoch": 1.659927910985739, "grad_norm": 0.17413297295570374, "learning_rate": 8.98611585353635e-06, "loss": 0.6604, "step": 10592 }, { "epoch": 1.6649427989343364, "grad_norm": 15.481103897094727, "learning_rate": 8.760192658851857e-06, "loss": 0.7325, "step": 10624 }, { "epoch": 1.6699576868829338, "grad_norm": 8.540956497192383, "learning_rate": 8.528764372618384e-06, "loss": 0.5003, "step": 10656 }, { "epoch": 1.6749725748315312, "grad_norm": 3.3334126472473145, "learning_rate": 8.299213577994032e-06, "loss": 0.7602, "step": 10688 }, { "epoch": 1.6799874627801286, "grad_norm": 5.868366241455078, "learning_rate": 8.071606874267657e-06, "loss": 0.3509, "step": 10720 }, { "epoch": 1.685002350728726, "grad_norm": 9.065354347229004, "learning_rate": 7.846010296691414e-06, "loss": 0.5256, "step": 10752 }, { "epoch": 1.6900172386773233, "grad_norm": 1.3587162494659424, "learning_rate": 7.622489297322031e-06, "loss": 0.72, "step": 10784 }, { "epoch": 1.6950321266259207, "grad_norm": 12.665712356567383, "learning_rate": 7.401108726031321e-06, "loss": 0.3566, "step": 10816 }, { "epoch": 1.7000470145745181, "grad_norm": 0.25204044580459595, "learning_rate": 7.181932811691307e-06, "loss": 0.4914, "step": 10848 }, { "epoch": 1.7050619025231155, "grad_norm": 16.243755340576172, "learning_rate": 6.9650251435396265e-06, "loss": 0.803, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_nli-pairs_loss": 0.8494153618812561, "eval_nli-pairs_runtime": 3.6201, "eval_nli-pairs_samples_per_second": 27.623, "eval_nli-pairs_steps_per_second": 1.105, "eval_sts-test_pearson_cosine": 0.7892466217647861, "eval_sts-test_pearson_dot": 0.5132730908314299, "eval_sts-test_pearson_euclidean": 0.7182104368148383, "eval_sts-test_pearson_manhattan": 0.7115314021169153, "eval_sts-test_pearson_max": 0.7892466217647861, "eval_sts-test_spearman_cosine": 0.8066653682021561, "eval_sts-test_spearman_dot": 0.49605915086018837, "eval_sts-test_spearman_euclidean": 0.7106506513811333, "eval_sts-test_spearman_manhattan": 0.7061885414200401, "eval_sts-test_spearman_max": 0.8066653682021561, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_vitaminc-pairs_loss": 4.736045837402344, "eval_vitaminc-pairs_runtime": 1.1691, "eval_vitaminc-pairs_samples_per_second": 72.706, "eval_vitaminc-pairs_steps_per_second": 2.566, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_sts-label_loss": 3.8989040851593018, "eval_sts-label_runtime": 0.2863, "eval_sts-label_samples_per_second": 349.313, "eval_sts-label_steps_per_second": 13.973, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_qnli-contrastive_loss": 0.11956857889890671, "eval_qnli-contrastive_runtime": 0.364, "eval_qnli-contrastive_samples_per_second": 274.753, "eval_qnli-contrastive_steps_per_second": 10.99, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_scitail-pairs-qa_loss": 0.04798151180148125, "eval_scitail-pairs-qa_runtime": 0.8856, "eval_scitail-pairs-qa_samples_per_second": 112.919, "eval_scitail-pairs-qa_steps_per_second": 4.517, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_scitail-pairs-pos_loss": 0.3735659718513489, "eval_scitail-pairs-pos_runtime": 1.3392, "eval_scitail-pairs-pos_samples_per_second": 74.673, "eval_scitail-pairs-pos_steps_per_second": 2.987, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_xsum-pairs_loss": 0.22131063044071198, "eval_xsum-pairs_runtime": 0.9431, "eval_xsum-pairs_samples_per_second": 106.038, "eval_xsum-pairs_steps_per_second": 4.242, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_compression-pairs_loss": 0.07452361285686493, "eval_compression-pairs_runtime": 0.2739, "eval_compression-pairs_samples_per_second": 365.156, "eval_compression-pairs_steps_per_second": 14.606, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_sciq_pairs_loss": 0.242247074842453, "eval_sciq_pairs_runtime": 4.1029, "eval_sciq_pairs_samples_per_second": 24.373, "eval_sciq_pairs_steps_per_second": 0.975, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_qasc_pairs_loss": 0.1498023420572281, "eval_qasc_pairs_runtime": 1.0532, "eval_qasc_pairs_samples_per_second": 94.948, "eval_qasc_pairs_steps_per_second": 3.798, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_openbookqa_pairs_loss": 1.5622608661651611, "eval_openbookqa_pairs_runtime": 0.9163, "eval_openbookqa_pairs_samples_per_second": 109.131, "eval_openbookqa_pairs_steps_per_second": 4.365, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_msmarco_pairs_loss": 0.46780094504356384, "eval_msmarco_pairs_runtime": 2.0521, "eval_msmarco_pairs_samples_per_second": 48.731, "eval_msmarco_pairs_steps_per_second": 1.949, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_nq_pairs_loss": 0.23504668474197388, "eval_nq_pairs_runtime": 4.5565, "eval_nq_pairs_samples_per_second": 21.947, "eval_nq_pairs_steps_per_second": 0.878, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_trivia_pairs_loss": 0.7335551381111145, "eval_trivia_pairs_runtime": 6.4465, "eval_trivia_pairs_samples_per_second": 15.512, "eval_trivia_pairs_steps_per_second": 0.62, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_quora_pairs_loss": 0.024753902107477188, "eval_quora_pairs_runtime": 0.6836, "eval_quora_pairs_samples_per_second": 146.276, "eval_quora_pairs_steps_per_second": 5.851, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_gooaq_pairs_loss": 0.46595844626426697, "eval_gooaq_pairs_runtime": 1.4005, "eval_gooaq_pairs_samples_per_second": 71.401, "eval_gooaq_pairs_steps_per_second": 2.856, "step": 10880 }, { "epoch": 1.7050619025231155, "eval_mrpc_pairs_loss": 0.03661191835999489, "eval_mrpc_pairs_runtime": 0.2636, "eval_mrpc_pairs_samples_per_second": 379.305, "eval_mrpc_pairs_steps_per_second": 15.172, "step": 10880 }, { "epoch": 1.710076790471713, "grad_norm": 0.9613147974014282, "learning_rate": 6.750448652730435e-06, "loss": 0.631, "step": 10912 }, { "epoch": 1.7150916784203103, "grad_norm": 16.191865921020508, "learning_rate": 6.538265594076307e-06, "loss": 0.4674, "step": 10944 }, { "epoch": 1.7201065663689077, "grad_norm": 0.30773648619651794, "learning_rate": 6.328537527986275e-06, "loss": 0.59, "step": 10976 }, { "epoch": 1.725121454317505, "grad_norm": 18.124229431152344, "learning_rate": 6.121325302605417e-06, "loss": 0.6661, "step": 11008 }, { "epoch": 1.7301363422661025, "grad_norm": 12.083048820495605, "learning_rate": 5.916689036161038e-06, "loss": 0.5495, "step": 11040 }, { "epoch": 1.7351512302146999, "grad_norm": 0.35161450505256653, "learning_rate": 5.714688099520686e-06, "loss": 0.4449, "step": 11072 }, { "epoch": 1.7401661181632972, "grad_norm": 0.209833562374115, "learning_rate": 5.5153810989668945e-06, "loss": 0.9734, "step": 11104 }, { "epoch": 1.7451810061118946, "grad_norm": 5.53667688369751, "learning_rate": 5.318825859193847e-06, "loss": 0.8756, "step": 11136 }, { "epoch": 1.750195894060492, "grad_norm": 2.0180232524871826, "learning_rate": 5.125079406530779e-06, "loss": 0.5044, "step": 11168 }, { "epoch": 1.7552107820090894, "grad_norm": 0.2467559278011322, "learning_rate": 4.93419795239698e-06, "loss": 0.4335, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_nli-pairs_loss": 0.877187192440033, "eval_nli-pairs_runtime": 3.6535, "eval_nli-pairs_samples_per_second": 27.371, "eval_nli-pairs_steps_per_second": 1.095, "eval_sts-test_pearson_cosine": 0.7865247222227171, "eval_sts-test_pearson_dot": 0.514323257383645, "eval_sts-test_pearson_euclidean": 0.7211034420770008, "eval_sts-test_pearson_manhattan": 0.7144614802492367, "eval_sts-test_pearson_max": 0.7865247222227171, "eval_sts-test_spearman_cosine": 0.8063165740213032, "eval_sts-test_spearman_dot": 0.4949834120552875, "eval_sts-test_spearman_euclidean": 0.7142341047530502, "eval_sts-test_spearman_manhattan": 0.7096420582019349, "eval_sts-test_spearman_max": 0.8063165740213032, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_vitaminc-pairs_loss": 4.702724456787109, "eval_vitaminc-pairs_runtime": 1.1334, "eval_vitaminc-pairs_samples_per_second": 74.993, "eval_vitaminc-pairs_steps_per_second": 2.647, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_sts-label_loss": 4.041957855224609, "eval_sts-label_runtime": 0.2856, "eval_sts-label_samples_per_second": 350.099, "eval_sts-label_steps_per_second": 14.004, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_qnli-contrastive_loss": 0.13319608569145203, "eval_qnli-contrastive_runtime": 0.364, "eval_qnli-contrastive_samples_per_second": 274.734, "eval_qnli-contrastive_steps_per_second": 10.989, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_scitail-pairs-qa_loss": 0.04602590203285217, "eval_scitail-pairs-qa_runtime": 0.8964, "eval_scitail-pairs-qa_samples_per_second": 111.553, "eval_scitail-pairs-qa_steps_per_second": 4.462, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_scitail-pairs-pos_loss": 0.3770352602005005, "eval_scitail-pairs-pos_runtime": 1.3478, "eval_scitail-pairs-pos_samples_per_second": 74.195, "eval_scitail-pairs-pos_steps_per_second": 2.968, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_xsum-pairs_loss": 0.21323198080062866, "eval_xsum-pairs_runtime": 0.9434, "eval_xsum-pairs_samples_per_second": 106.0, "eval_xsum-pairs_steps_per_second": 4.24, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_compression-pairs_loss": 0.07242386043071747, "eval_compression-pairs_runtime": 0.2827, "eval_compression-pairs_samples_per_second": 353.737, "eval_compression-pairs_steps_per_second": 14.149, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_sciq_pairs_loss": 0.23106925189495087, "eval_sciq_pairs_runtime": 4.1328, "eval_sciq_pairs_samples_per_second": 24.197, "eval_sciq_pairs_steps_per_second": 0.968, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_qasc_pairs_loss": 0.14957545697689056, "eval_qasc_pairs_runtime": 1.063, "eval_qasc_pairs_samples_per_second": 94.07, "eval_qasc_pairs_steps_per_second": 3.763, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_openbookqa_pairs_loss": 1.5883697271347046, "eval_openbookqa_pairs_runtime": 0.8979, "eval_openbookqa_pairs_samples_per_second": 111.375, "eval_openbookqa_pairs_steps_per_second": 4.455, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_msmarco_pairs_loss": 0.46254363656044006, "eval_msmarco_pairs_runtime": 2.1183, "eval_msmarco_pairs_samples_per_second": 47.207, "eval_msmarco_pairs_steps_per_second": 1.888, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_nq_pairs_loss": 0.20571322739124298, "eval_nq_pairs_runtime": 4.5262, "eval_nq_pairs_samples_per_second": 22.094, "eval_nq_pairs_steps_per_second": 0.884, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_trivia_pairs_loss": 0.7241536974906921, "eval_trivia_pairs_runtime": 6.4444, "eval_trivia_pairs_samples_per_second": 15.517, "eval_trivia_pairs_steps_per_second": 0.621, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_quora_pairs_loss": 0.04489434137940407, "eval_quora_pairs_runtime": 0.6836, "eval_quora_pairs_samples_per_second": 146.291, "eval_quora_pairs_steps_per_second": 5.852, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_gooaq_pairs_loss": 0.4569108188152313, "eval_gooaq_pairs_runtime": 1.4175, "eval_gooaq_pairs_samples_per_second": 70.546, "eval_gooaq_pairs_steps_per_second": 2.822, "step": 11200 }, { "epoch": 1.7552107820090894, "eval_mrpc_pairs_loss": 0.035774633288383484, "eval_mrpc_pairs_runtime": 0.2662, "eval_mrpc_pairs_samples_per_second": 375.609, "eval_mrpc_pairs_steps_per_second": 15.024, "step": 11200 }, { "epoch": 1.7602256699576868, "grad_norm": 0.9150563478469849, "learning_rate": 4.746236876993248e-06, "loss": 0.9002, "step": 11232 }, { "epoch": 1.7652405579062842, "grad_norm": 12.53256893157959, "learning_rate": 4.561250713234495e-06, "loss": 0.7993, "step": 11264 }, { "epoch": 1.7702554458548816, "grad_norm": 15.736297607421875, "learning_rate": 4.379293130928219e-06, "loss": 0.7534, "step": 11296 }, { "epoch": 1.775270333803479, "grad_norm": 2.4506030082702637, "learning_rate": 4.200416921203288e-06, "loss": 0.505, "step": 11328 }, { "epoch": 1.7802852217520764, "grad_norm": 2.577625036239624, "learning_rate": 4.024673981193751e-06, "loss": 0.5255, "step": 11360 }, { "epoch": 1.7853001097006738, "grad_norm": 3.458557367324829, "learning_rate": 3.852115298981977e-06, "loss": 1.1055, "step": 11392 }, { "epoch": 1.7903149976492712, "grad_norm": 3.930065631866455, "learning_rate": 3.682790938805565e-06, "loss": 0.4554, "step": 11424 }, { "epoch": 1.7953298855978685, "grad_norm": 10.131789207458496, "learning_rate": 3.5167500265322354e-06, "loss": 0.4593, "step": 11456 }, { "epoch": 1.800344773546466, "grad_norm": 1.7020587921142578, "learning_rate": 3.354040735407013e-06, "loss": 0.3412, "step": 11488 }, { "epoch": 1.8053596614950633, "grad_norm": 17.23381805419922, "learning_rate": 3.194710272075821e-06, "loss": 0.5286, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_nli-pairs_loss": 0.8453327417373657, "eval_nli-pairs_runtime": 3.767, "eval_nli-pairs_samples_per_second": 26.546, "eval_nli-pairs_steps_per_second": 1.062, "eval_sts-test_pearson_cosine": 0.7887577875023472, "eval_sts-test_pearson_dot": 0.5046260670313512, "eval_sts-test_pearson_euclidean": 0.7163532724972116, "eval_sts-test_pearson_manhattan": 0.7095926728673246, "eval_sts-test_pearson_max": 0.7887577875023472, "eval_sts-test_spearman_cosine": 0.8069614258399779, "eval_sts-test_spearman_dot": 0.48691887696763547, "eval_sts-test_spearman_euclidean": 0.7087924044364149, "eval_sts-test_spearman_manhattan": 0.7038474172761815, "eval_sts-test_spearman_max": 0.8069614258399779, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_vitaminc-pairs_loss": 4.721137046813965, "eval_vitaminc-pairs_runtime": 1.183, "eval_vitaminc-pairs_samples_per_second": 71.853, "eval_vitaminc-pairs_steps_per_second": 2.536, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_sts-label_loss": 3.9839565753936768, "eval_sts-label_runtime": 0.3489, "eval_sts-label_samples_per_second": 286.602, "eval_sts-label_steps_per_second": 11.464, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_qnli-contrastive_loss": 0.11494701355695724, "eval_qnli-contrastive_runtime": 0.3826, "eval_qnli-contrastive_samples_per_second": 261.354, "eval_qnli-contrastive_steps_per_second": 10.454, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_scitail-pairs-qa_loss": 0.044962480664253235, "eval_scitail-pairs-qa_runtime": 1.0148, "eval_scitail-pairs-qa_samples_per_second": 98.546, "eval_scitail-pairs-qa_steps_per_second": 3.942, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_scitail-pairs-pos_loss": 0.3905903995037079, "eval_scitail-pairs-pos_runtime": 1.4797, "eval_scitail-pairs-pos_samples_per_second": 67.581, "eval_scitail-pairs-pos_steps_per_second": 2.703, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_xsum-pairs_loss": 0.2134229689836502, "eval_xsum-pairs_runtime": 0.9474, "eval_xsum-pairs_samples_per_second": 105.555, "eval_xsum-pairs_steps_per_second": 4.222, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_compression-pairs_loss": 0.07042723149061203, "eval_compression-pairs_runtime": 0.3012, "eval_compression-pairs_samples_per_second": 332.038, "eval_compression-pairs_steps_per_second": 13.282, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_sciq_pairs_loss": 0.23385030031204224, "eval_sciq_pairs_runtime": 4.232, "eval_sciq_pairs_samples_per_second": 23.629, "eval_sciq_pairs_steps_per_second": 0.945, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_qasc_pairs_loss": 0.1464882493019104, "eval_qasc_pairs_runtime": 1.1995, "eval_qasc_pairs_samples_per_second": 83.367, "eval_qasc_pairs_steps_per_second": 3.335, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_openbookqa_pairs_loss": 1.5863837003707886, "eval_openbookqa_pairs_runtime": 1.0359, "eval_openbookqa_pairs_samples_per_second": 96.538, "eval_openbookqa_pairs_steps_per_second": 3.862, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_msmarco_pairs_loss": 0.46735861897468567, "eval_msmarco_pairs_runtime": 2.1176, "eval_msmarco_pairs_samples_per_second": 47.223, "eval_msmarco_pairs_steps_per_second": 1.889, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_nq_pairs_loss": 0.20451465249061584, "eval_nq_pairs_runtime": 4.576, "eval_nq_pairs_samples_per_second": 21.853, "eval_nq_pairs_steps_per_second": 0.874, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_trivia_pairs_loss": 0.7080298066139221, "eval_trivia_pairs_runtime": 6.4996, "eval_trivia_pairs_samples_per_second": 15.386, "eval_trivia_pairs_steps_per_second": 0.615, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_quora_pairs_loss": 0.05269511789083481, "eval_quora_pairs_runtime": 0.6762, "eval_quora_pairs_samples_per_second": 147.881, "eval_quora_pairs_steps_per_second": 5.915, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_gooaq_pairs_loss": 0.46917012333869934, "eval_gooaq_pairs_runtime": 1.4121, "eval_gooaq_pairs_samples_per_second": 70.816, "eval_gooaq_pairs_steps_per_second": 2.833, "step": 11520 }, { "epoch": 1.8053596614950633, "eval_mrpc_pairs_loss": 0.034264735877513885, "eval_mrpc_pairs_runtime": 0.2695, "eval_mrpc_pairs_samples_per_second": 371.085, "eval_mrpc_pairs_steps_per_second": 14.843, "step": 11520 }, { "epoch": 1.8103745494436607, "grad_norm": 11.72193717956543, "learning_rate": 3.0388048628894118e-06, "loss": 1.1054, "step": 11552 }, { "epoch": 1.815389437392258, "grad_norm": 0.5831508636474609, "learning_rate": 2.8863697404918156e-06, "loss": 0.8731, "step": 11584 }, { "epoch": 1.8204043253408555, "grad_norm": 3.6861729621887207, "learning_rate": 2.7374491306970335e-06, "loss": 0.7774, "step": 11616 }, { "epoch": 1.8254192132894531, "grad_norm": 6.950013637542725, "learning_rate": 2.592086239657871e-06, "loss": 0.7425, "step": 11648 }, { "epoch": 1.8304341012380505, "grad_norm": 0.5719025135040283, "learning_rate": 2.4503232413305703e-06, "loss": 0.4233, "step": 11680 }, { "epoch": 1.835448989186648, "grad_norm": 17.151565551757812, "learning_rate": 2.3122012652389466e-06, "loss": 1.0839, "step": 11712 }, { "epoch": 1.8404638771352453, "grad_norm": 13.834997177124023, "learning_rate": 2.1777603845415166e-06, "loss": 1.0086, "step": 11744 }, { "epoch": 1.8454787650838427, "grad_norm": 67.06939697265625, "learning_rate": 2.0470396044051546e-06, "loss": 0.9838, "step": 11776 }, { "epoch": 1.85049365303244, "grad_norm": 16.5714168548584, "learning_rate": 1.9200768506885265e-06, "loss": 1.0228, "step": 11808 }, { "epoch": 1.8555085409810375, "grad_norm": 0.7045705914497375, "learning_rate": 1.7969089589387306e-06, "loss": 0.5337, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_nli-pairs_loss": 0.8482785224914551, "eval_nli-pairs_runtime": 3.7468, "eval_nli-pairs_samples_per_second": 26.69, "eval_nli-pairs_steps_per_second": 1.068, "eval_sts-test_pearson_cosine": 0.7895263195100214, "eval_sts-test_pearson_dot": 0.5158381393309118, "eval_sts-test_pearson_euclidean": 0.7193159029139317, "eval_sts-test_pearson_manhattan": 0.7125586811431698, "eval_sts-test_pearson_max": 0.7895263195100214, "eval_sts-test_spearman_cosine": 0.8091171985524063, "eval_sts-test_spearman_dot": 0.49747649807543753, "eval_sts-test_spearman_euclidean": 0.7116692618850177, "eval_sts-test_spearman_manhattan": 0.706644667574661, "eval_sts-test_spearman_max": 0.8091171985524063, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_vitaminc-pairs_loss": 4.710992336273193, "eval_vitaminc-pairs_runtime": 1.1414, "eval_vitaminc-pairs_samples_per_second": 74.47, "eval_vitaminc-pairs_steps_per_second": 2.628, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_sts-label_loss": 4.067962169647217, "eval_sts-label_runtime": 0.2727, "eval_sts-label_samples_per_second": 366.723, "eval_sts-label_steps_per_second": 14.669, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_qnli-contrastive_loss": 0.1216823160648346, "eval_qnli-contrastive_runtime": 0.3609, "eval_qnli-contrastive_samples_per_second": 277.082, "eval_qnli-contrastive_steps_per_second": 11.083, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_scitail-pairs-qa_loss": 0.04541168361902237, "eval_scitail-pairs-qa_runtime": 0.8784, "eval_scitail-pairs-qa_samples_per_second": 113.848, "eval_scitail-pairs-qa_steps_per_second": 4.554, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_scitail-pairs-pos_loss": 0.3859683573246002, "eval_scitail-pairs-pos_runtime": 1.334, "eval_scitail-pairs-pos_samples_per_second": 74.962, "eval_scitail-pairs-pos_steps_per_second": 2.998, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_xsum-pairs_loss": 0.21197709441184998, "eval_xsum-pairs_runtime": 0.9422, "eval_xsum-pairs_samples_per_second": 106.13, "eval_xsum-pairs_steps_per_second": 4.245, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_compression-pairs_loss": 0.0720294639468193, "eval_compression-pairs_runtime": 0.2847, "eval_compression-pairs_samples_per_second": 351.213, "eval_compression-pairs_steps_per_second": 14.049, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_sciq_pairs_loss": 0.22902096807956696, "eval_sciq_pairs_runtime": 4.1139, "eval_sciq_pairs_samples_per_second": 24.308, "eval_sciq_pairs_steps_per_second": 0.972, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_qasc_pairs_loss": 0.14490041136741638, "eval_qasc_pairs_runtime": 1.0553, "eval_qasc_pairs_samples_per_second": 94.755, "eval_qasc_pairs_steps_per_second": 3.79, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_openbookqa_pairs_loss": 1.5618047714233398, "eval_openbookqa_pairs_runtime": 0.9003, "eval_openbookqa_pairs_samples_per_second": 111.072, "eval_openbookqa_pairs_steps_per_second": 4.443, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_msmarco_pairs_loss": 0.4609709680080414, "eval_msmarco_pairs_runtime": 2.0531, "eval_msmarco_pairs_samples_per_second": 48.706, "eval_msmarco_pairs_steps_per_second": 1.948, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_nq_pairs_loss": 0.20117321610450745, "eval_nq_pairs_runtime": 4.502, "eval_nq_pairs_samples_per_second": 22.213, "eval_nq_pairs_steps_per_second": 0.889, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_trivia_pairs_loss": 0.6966190934181213, "eval_trivia_pairs_runtime": 6.445, "eval_trivia_pairs_samples_per_second": 15.516, "eval_trivia_pairs_steps_per_second": 0.621, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_quora_pairs_loss": 0.04505549371242523, "eval_quora_pairs_runtime": 0.6816, "eval_quora_pairs_samples_per_second": 146.704, "eval_quora_pairs_steps_per_second": 5.868, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_gooaq_pairs_loss": 0.45639243721961975, "eval_gooaq_pairs_runtime": 1.4058, "eval_gooaq_pairs_samples_per_second": 71.134, "eval_gooaq_pairs_steps_per_second": 2.845, "step": 11840 }, { "epoch": 1.8555085409810375, "eval_mrpc_pairs_loss": 0.035240672528743744, "eval_mrpc_pairs_runtime": 0.2659, "eval_mrpc_pairs_samples_per_second": 376.125, "eval_mrpc_pairs_steps_per_second": 15.045, "step": 11840 }, { "epoch": 1.8605234289296348, "grad_norm": 23.763080596923828, "learning_rate": 1.677571663704261e-06, "loss": 0.4719, "step": 11872 }, { "epoch": 1.8655383168782322, "grad_norm": 0.3808485269546509, "learning_rate": 1.5620995881673521e-06, "loss": 0.9254, "step": 11904 }, { "epoch": 1.8705532048268296, "grad_norm": 5.26698112487793, "learning_rate": 1.4505262340988284e-06, "loss": 0.4605, "step": 11936 }, { "epoch": 1.875568092775427, "grad_norm": 16.77387809753418, "learning_rate": 1.3428839721382502e-06, "loss": 0.5605, "step": 11968 }, { "epoch": 1.8805829807240244, "grad_norm": 21.53814125061035, "learning_rate": 1.2392040324022953e-06, "loss": 0.804, "step": 12000 }, { "epoch": 1.8855978686726218, "grad_norm": 4.215524196624756, "learning_rate": 1.1395164954239678e-06, "loss": 0.8148, "step": 12032 }, { "epoch": 1.8906127566212192, "grad_norm": 0.18259356915950775, "learning_rate": 1.0438502834253855e-06, "loss": 0.6428, "step": 12064 }, { "epoch": 1.8956276445698168, "grad_norm": 12.928991317749023, "learning_rate": 9.522331519266136e-07, "loss": 0.764, "step": 12096 }, { "epoch": 1.9006425325184142, "grad_norm": 13.300838470458984, "learning_rate": 8.646916816929929e-07, "loss": 0.8099, "step": 12128 }, { "epoch": 1.9056574204670116, "grad_norm": 11.422415733337402, "learning_rate": 7.812512710232789e-07, "loss": 0.3568, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_nli-pairs_loss": 0.8413404226303101, "eval_nli-pairs_runtime": 3.6923, "eval_nli-pairs_samples_per_second": 27.083, "eval_nli-pairs_steps_per_second": 1.083, "eval_sts-test_pearson_cosine": 0.7890091154078223, "eval_sts-test_pearson_dot": 0.5105139397287081, "eval_sts-test_pearson_euclidean": 0.7195910583360985, "eval_sts-test_pearson_manhattan": 0.7128914490088293, "eval_sts-test_pearson_max": 0.7890091154078223, "eval_sts-test_spearman_cosine": 0.8083731882764517, "eval_sts-test_spearman_dot": 0.491674771948391, "eval_sts-test_spearman_euclidean": 0.711658681042867, "eval_sts-test_spearman_manhattan": 0.7067794421682814, "eval_sts-test_spearman_max": 0.8083731882764517, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_vitaminc-pairs_loss": 4.713425159454346, "eval_vitaminc-pairs_runtime": 1.1469, "eval_vitaminc-pairs_samples_per_second": 74.11, "eval_vitaminc-pairs_steps_per_second": 2.616, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_sts-label_loss": 4.026952266693115, "eval_sts-label_runtime": 0.291, "eval_sts-label_samples_per_second": 343.61, "eval_sts-label_steps_per_second": 13.744, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_qnli-contrastive_loss": 0.12160658091306686, "eval_qnli-contrastive_runtime": 0.3659, "eval_qnli-contrastive_samples_per_second": 273.295, "eval_qnli-contrastive_steps_per_second": 10.932, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_scitail-pairs-qa_loss": 0.04484746977686882, "eval_scitail-pairs-qa_runtime": 0.8896, "eval_scitail-pairs-qa_samples_per_second": 112.405, "eval_scitail-pairs-qa_steps_per_second": 4.496, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_scitail-pairs-pos_loss": 0.39468416571617126, "eval_scitail-pairs-pos_runtime": 1.3313, "eval_scitail-pairs-pos_samples_per_second": 75.117, "eval_scitail-pairs-pos_steps_per_second": 3.005, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_xsum-pairs_loss": 0.21074718236923218, "eval_xsum-pairs_runtime": 0.9508, "eval_xsum-pairs_samples_per_second": 105.175, "eval_xsum-pairs_steps_per_second": 4.207, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_compression-pairs_loss": 0.0713261142373085, "eval_compression-pairs_runtime": 0.2806, "eval_compression-pairs_samples_per_second": 356.382, "eval_compression-pairs_steps_per_second": 14.255, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_sciq_pairs_loss": 0.2296866774559021, "eval_sciq_pairs_runtime": 4.1325, "eval_sciq_pairs_samples_per_second": 24.199, "eval_sciq_pairs_steps_per_second": 0.968, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_qasc_pairs_loss": 0.14393150806427002, "eval_qasc_pairs_runtime": 1.0884, "eval_qasc_pairs_samples_per_second": 91.879, "eval_qasc_pairs_steps_per_second": 3.675, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_openbookqa_pairs_loss": 1.5734162330627441, "eval_openbookqa_pairs_runtime": 0.9138, "eval_openbookqa_pairs_samples_per_second": 109.432, "eval_openbookqa_pairs_steps_per_second": 4.377, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_msmarco_pairs_loss": 0.45633164048194885, "eval_msmarco_pairs_runtime": 2.0653, "eval_msmarco_pairs_samples_per_second": 48.42, "eval_msmarco_pairs_steps_per_second": 1.937, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_nq_pairs_loss": 0.19797872006893158, "eval_nq_pairs_runtime": 4.5059, "eval_nq_pairs_samples_per_second": 22.193, "eval_nq_pairs_steps_per_second": 0.888, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_trivia_pairs_loss": 0.6929593682289124, "eval_trivia_pairs_runtime": 6.4422, "eval_trivia_pairs_samples_per_second": 15.523, "eval_trivia_pairs_steps_per_second": 0.621, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_quora_pairs_loss": 0.022142255678772926, "eval_quora_pairs_runtime": 0.6787, "eval_quora_pairs_samples_per_second": 147.335, "eval_quora_pairs_steps_per_second": 5.893, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_gooaq_pairs_loss": 0.4508766233921051, "eval_gooaq_pairs_runtime": 1.4282, "eval_gooaq_pairs_samples_per_second": 70.017, "eval_gooaq_pairs_steps_per_second": 2.801, "step": 12160 }, { "epoch": 1.9056574204670116, "eval_mrpc_pairs_loss": 0.0345807746052742, "eval_mrpc_pairs_runtime": 0.2773, "eval_mrpc_pairs_samples_per_second": 360.571, "eval_mrpc_pairs_steps_per_second": 14.423, "step": 12160 }, { "epoch": 1.910672308415609, "grad_norm": 11.768965721130371, "learning_rate": 7.019361283808789e-07, "loss": 0.6994, "step": 12192 }, { "epoch": 1.9156871963642064, "grad_norm": 0.217757910490036, "learning_rate": 6.267692653702834e-07, "loss": 1.102, "step": 12224 }, { "epoch": 1.9207020843128038, "grad_norm": 2.162067413330078, "learning_rate": 5.557724900607225e-07, "loss": 0.7589, "step": 12256 }, { "epoch": 1.9257169722614011, "grad_norm": 4.040590763092041, "learning_rate": 4.889664006590316e-07, "loss": 0.8421, "step": 12288 }, { "epoch": 1.9307318602099985, "grad_norm": 26.240829467773438, "learning_rate": 4.263703795335061e-07, "loss": 0.6796, "step": 12320 }, { "epoch": 1.935746748158596, "grad_norm": 2.219181776046753, "learning_rate": 3.6800258759053795e-07, "loss": 0.8515, "step": 12352 }, { "epoch": 1.9407616361071933, "grad_norm": 16.353717803955078, "learning_rate": 3.1387995900559075e-07, "loss": 0.6122, "step": 12384 }, { "epoch": 1.9457765240557907, "grad_norm": 1.4311314821243286, "learning_rate": 2.640181963101296e-07, "loss": 1.1603, "step": 12416 }, { "epoch": 1.950791412004388, "grad_norm": 0.4592527747154236, "learning_rate": 2.1843176583584867e-07, "loss": 1.2334, "step": 12448 }, { "epoch": 1.9558062999529855, "grad_norm": 5.437335968017578, "learning_rate": 1.7713389351758534e-07, "loss": 0.6642, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_nli-pairs_loss": 0.8451821804046631, "eval_nli-pairs_runtime": 3.6371, "eval_nli-pairs_samples_per_second": 27.495, "eval_nli-pairs_steps_per_second": 1.1, "eval_sts-test_pearson_cosine": 0.7888540920861717, "eval_sts-test_pearson_dot": 0.5102956102994395, "eval_sts-test_pearson_euclidean": 0.7193257196131406, "eval_sts-test_pearson_manhattan": 0.7126501427242363, "eval_sts-test_pearson_max": 0.7888540920861717, "eval_sts-test_spearman_cosine": 0.8083968903707864, "eval_sts-test_spearman_dot": 0.49164605546490964, "eval_sts-test_spearman_euclidean": 0.7113233675349284, "eval_sts-test_spearman_manhattan": 0.7065647557585915, "eval_sts-test_spearman_max": 0.8083968903707864, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_vitaminc-pairs_loss": 4.710177898406982, "eval_vitaminc-pairs_runtime": 1.1385, "eval_vitaminc-pairs_samples_per_second": 74.659, "eval_vitaminc-pairs_steps_per_second": 2.635, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_sts-label_loss": 4.044436454772949, "eval_sts-label_runtime": 0.2998, "eval_sts-label_samples_per_second": 333.515, "eval_sts-label_steps_per_second": 13.341, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_qnli-contrastive_loss": 0.11889366060495377, "eval_qnli-contrastive_runtime": 0.3676, "eval_qnli-contrastive_samples_per_second": 271.999, "eval_qnli-contrastive_steps_per_second": 10.88, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_scitail-pairs-qa_loss": 0.04474009573459625, "eval_scitail-pairs-qa_runtime": 0.8891, "eval_scitail-pairs-qa_samples_per_second": 112.471, "eval_scitail-pairs-qa_steps_per_second": 4.499, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_scitail-pairs-pos_loss": 0.3929445147514343, "eval_scitail-pairs-pos_runtime": 1.3685, "eval_scitail-pairs-pos_samples_per_second": 73.074, "eval_scitail-pairs-pos_steps_per_second": 2.923, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_xsum-pairs_loss": 0.2108439952135086, "eval_xsum-pairs_runtime": 0.9561, "eval_xsum-pairs_samples_per_second": 104.591, "eval_xsum-pairs_steps_per_second": 4.184, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_compression-pairs_loss": 0.07098440825939178, "eval_compression-pairs_runtime": 0.3082, "eval_compression-pairs_samples_per_second": 324.46, "eval_compression-pairs_steps_per_second": 12.978, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_sciq_pairs_loss": 0.2221365123987198, "eval_sciq_pairs_runtime": 4.094, "eval_sciq_pairs_samples_per_second": 24.426, "eval_sciq_pairs_steps_per_second": 0.977, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_qasc_pairs_loss": 0.14360681176185608, "eval_qasc_pairs_runtime": 1.0634, "eval_qasc_pairs_samples_per_second": 94.04, "eval_qasc_pairs_steps_per_second": 3.762, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_openbookqa_pairs_loss": 1.562986135482788, "eval_openbookqa_pairs_runtime": 0.8887, "eval_openbookqa_pairs_samples_per_second": 112.526, "eval_openbookqa_pairs_steps_per_second": 4.501, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_msmarco_pairs_loss": 0.45447924733161926, "eval_msmarco_pairs_runtime": 2.0848, "eval_msmarco_pairs_samples_per_second": 47.965, "eval_msmarco_pairs_steps_per_second": 1.919, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_nq_pairs_loss": 0.19736650586128235, "eval_nq_pairs_runtime": 4.5063, "eval_nq_pairs_samples_per_second": 22.191, "eval_nq_pairs_steps_per_second": 0.888, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_trivia_pairs_loss": 0.6915084719657898, "eval_trivia_pairs_runtime": 6.4628, "eval_trivia_pairs_samples_per_second": 15.473, "eval_trivia_pairs_steps_per_second": 0.619, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_quora_pairs_loss": 0.049287740141153336, "eval_quora_pairs_runtime": 0.6827, "eval_quora_pairs_samples_per_second": 146.476, "eval_quora_pairs_steps_per_second": 5.859, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_gooaq_pairs_loss": 0.4529716968536377, "eval_gooaq_pairs_runtime": 1.4011, "eval_gooaq_pairs_samples_per_second": 71.372, "eval_gooaq_pairs_steps_per_second": 2.855, "step": 12480 }, { "epoch": 1.9558062999529855, "eval_mrpc_pairs_loss": 0.03451301530003548, "eval_mrpc_pairs_runtime": 0.2694, "eval_mrpc_pairs_samples_per_second": 371.193, "eval_mrpc_pairs_steps_per_second": 14.848, "step": 12480 }, { "epoch": 1.9608211879015829, "grad_norm": 1.1752641201019287, "learning_rate": 1.4013656105608451e-07, "loss": 0.747, "step": 12512 }, { "epoch": 1.9658360758501803, "grad_norm": 1.9333534240722656, "learning_rate": 1.0745050244177102e-07, "loss": 0.9231, "step": 12544 }, { "epoch": 1.9708509637987777, "grad_norm": 17.17812156677246, "learning_rate": 7.990612946926268e-08, "loss": 1.1242, "step": 12576 }, { "epoch": 1.975865851747375, "grad_norm": 0.1146775558590889, "learning_rate": 5.573442154119257e-08, "loss": 0.5239, "step": 12608 }, { "epoch": 1.9808807396959724, "grad_norm": 3.578068971633911, "learning_rate": 3.589847494892162e-08, "loss": 0.697, "step": 12640 }, { "epoch": 1.9858956276445698, "grad_norm": 2.2802326679229736, "learning_rate": 2.0404044671163413e-08, "loss": 0.9842, "step": 12672 }, { "epoch": 1.9909105155931672, "grad_norm": 14.248183250427246, "learning_rate": 9.255626087861236e-09, "loss": 0.8476, "step": 12704 }, { "epoch": 1.9959254035417646, "grad_norm": 18.7357177734375, "learning_rate": 2.4564536759413837e-09, "loss": 0.6754, "step": 12736 } ], "logging_steps": 32, "max_steps": 12762, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1277, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }