{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0006268609935747, "eval_steps": 320, "global_step": 6385, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0050148879485973985, "grad_norm": 14.771158218383789, "learning_rate": 9.707724425887265e-07, "loss": 0.6329, "step": 32 }, { "epoch": 0.010029775897194797, "grad_norm": 11.052021980285645, "learning_rate": 1.9728601252609606e-06, "loss": 0.9693, "step": 64 }, { "epoch": 0.015044663845792195, "grad_norm": 20.26296615600586, "learning_rate": 2.9749478079331944e-06, "loss": 0.6548, "step": 96 }, { "epoch": 0.020059551794389594, "grad_norm": 12.62913703918457, "learning_rate": 3.945720250521921e-06, "loss": 1.1279, "step": 128 }, { "epoch": 0.025074439742986992, "grad_norm": 12.316486358642578, "learning_rate": 4.916492693110647e-06, "loss": 1.0017, "step": 160 }, { "epoch": 0.03008932769158439, "grad_norm": 64.25923919677734, "learning_rate": 5.918580375782881e-06, "loss": 0.7571, "step": 192 }, { "epoch": 0.03510421564018179, "grad_norm": 0.8205029368400574, "learning_rate": 6.920668058455115e-06, "loss": 0.7304, "step": 224 }, { "epoch": 0.04011910358877919, "grad_norm": 6.598870754241943, "learning_rate": 7.922755741127349e-06, "loss": 0.7636, "step": 256 }, { "epoch": 0.045133991537376586, "grad_norm": 8.728073120117188, "learning_rate": 8.924843423799583e-06, "loss": 0.482, "step": 288 }, { "epoch": 0.050148879485973984, "grad_norm": 7.645521640777588, "learning_rate": 9.926931106471817e-06, "loss": 0.6312, "step": 320 }, { "epoch": 0.050148879485973984, "eval_nli-pairs_loss": 1.0158467292785645, "eval_nli-pairs_runtime": 3.7267, "eval_nli-pairs_samples_per_second": 26.833, "eval_nli-pairs_steps_per_second": 1.073, "eval_sts-test_pearson_cosine": 0.7848265412179125, "eval_sts-test_pearson_dot": 0.5437080705284749, "eval_sts-test_pearson_euclidean": 0.7445845076364892, "eval_sts-test_pearson_manhattan": 0.7429239204432232, "eval_sts-test_pearson_max": 0.7848265412179125, "eval_sts-test_spearman_cosine": 0.7989504707258924, "eval_sts-test_spearman_dot": 0.5206855421174118, "eval_sts-test_spearman_euclidean": 0.733568982260844, "eval_sts-test_spearman_manhattan": 0.7349407257944446, "eval_sts-test_spearman_max": 0.7989504707258924, "step": 320 }, { "epoch": 0.050148879485973984, "eval_vitaminc-pairs_loss": 4.692601680755615, "eval_vitaminc-pairs_runtime": 1.1397, "eval_vitaminc-pairs_samples_per_second": 74.578, "eval_vitaminc-pairs_steps_per_second": 2.632, "step": 320 }, { "epoch": 0.050148879485973984, "eval_sts-label_loss": 3.5502490997314453, "eval_sts-label_runtime": 0.28, "eval_sts-label_samples_per_second": 357.117, "eval_sts-label_steps_per_second": 14.285, "step": 320 }, { "epoch": 0.050148879485973984, "eval_qnli-contrastive_loss": 0.16079513728618622, "eval_qnli-contrastive_runtime": 0.3646, "eval_qnli-contrastive_samples_per_second": 274.299, "eval_qnli-contrastive_steps_per_second": 10.972, "step": 320 }, { "epoch": 0.050148879485973984, "eval_scitail-pairs-qa_loss": 0.07610582560300827, "eval_scitail-pairs-qa_runtime": 0.8885, "eval_scitail-pairs-qa_samples_per_second": 112.548, "eval_scitail-pairs-qa_steps_per_second": 4.502, "step": 320 }, { "epoch": 0.050148879485973984, "eval_scitail-pairs-pos_loss": 0.5141278505325317, "eval_scitail-pairs-pos_runtime": 1.3498, "eval_scitail-pairs-pos_samples_per_second": 74.085, "eval_scitail-pairs-pos_steps_per_second": 2.963, "step": 320 }, { "epoch": 0.050148879485973984, "eval_xsum-pairs_loss": 0.25581496953964233, "eval_xsum-pairs_runtime": 0.9407, "eval_xsum-pairs_samples_per_second": 106.304, "eval_xsum-pairs_steps_per_second": 4.252, "step": 320 }, { "epoch": 0.050148879485973984, "eval_compression-pairs_loss": 0.09814296662807465, "eval_compression-pairs_runtime": 0.2758, "eval_compression-pairs_samples_per_second": 362.517, "eval_compression-pairs_steps_per_second": 14.501, "step": 320 }, { "epoch": 0.050148879485973984, "eval_sciq_pairs_loss": 0.25620242953300476, "eval_sciq_pairs_runtime": 4.1155, "eval_sciq_pairs_samples_per_second": 24.298, "eval_sciq_pairs_steps_per_second": 0.972, "step": 320 }, { "epoch": 0.050148879485973984, "eval_qasc_pairs_loss": 0.2044612169265747, "eval_qasc_pairs_runtime": 1.1029, "eval_qasc_pairs_samples_per_second": 90.672, "eval_qasc_pairs_steps_per_second": 3.627, "step": 320 }, { "epoch": 0.050148879485973984, "eval_openbookqa_pairs_loss": 1.7537646293640137, "eval_openbookqa_pairs_runtime": 0.9037, "eval_openbookqa_pairs_samples_per_second": 110.653, "eval_openbookqa_pairs_steps_per_second": 4.426, "step": 320 }, { "epoch": 0.050148879485973984, "eval_msmarco_pairs_loss": 0.5138561725616455, "eval_msmarco_pairs_runtime": 2.0511, "eval_msmarco_pairs_samples_per_second": 48.754, "eval_msmarco_pairs_steps_per_second": 1.95, "step": 320 }, { "epoch": 0.050148879485973984, "eval_nq_pairs_loss": 0.23510317504405975, "eval_nq_pairs_runtime": 4.5293, "eval_nq_pairs_samples_per_second": 22.078, "eval_nq_pairs_steps_per_second": 0.883, "step": 320 }, { "epoch": 0.050148879485973984, "eval_trivia_pairs_loss": 0.7808571457862854, "eval_trivia_pairs_runtime": 6.5065, "eval_trivia_pairs_samples_per_second": 15.369, "eval_trivia_pairs_steps_per_second": 0.615, "step": 320 }, { "epoch": 0.050148879485973984, "eval_quora_pairs_loss": 0.0392119362950325, "eval_quora_pairs_runtime": 0.675, "eval_quora_pairs_samples_per_second": 148.153, "eval_quora_pairs_steps_per_second": 5.926, "step": 320 }, { "epoch": 0.050148879485973984, "eval_gooaq_pairs_loss": 0.4712902009487152, "eval_gooaq_pairs_runtime": 1.4079, "eval_gooaq_pairs_samples_per_second": 71.028, "eval_gooaq_pairs_steps_per_second": 2.841, "step": 320 }, { "epoch": 0.050148879485973984, "eval_mrpc_pairs_loss": 0.05498996376991272, "eval_mrpc_pairs_runtime": 0.2623, "eval_mrpc_pairs_samples_per_second": 381.172, "eval_mrpc_pairs_steps_per_second": 15.247, "step": 320 }, { "epoch": 0.05516376743457138, "grad_norm": 0.34924012422561646, "learning_rate": 1.092901878914405e-05, "loss": 0.5791, "step": 352 }, { "epoch": 0.06017865538316878, "grad_norm": 0.36700841784477234, "learning_rate": 1.1931106471816284e-05, "loss": 0.6413, "step": 384 }, { "epoch": 0.06519354333176618, "grad_norm": 7.559622764587402, "learning_rate": 1.2933194154488518e-05, "loss": 0.4319, "step": 416 }, { "epoch": 0.07020843128036358, "grad_norm": 7.982416152954102, "learning_rate": 1.3935281837160753e-05, "loss": 0.6672, "step": 448 }, { "epoch": 0.07522331922896097, "grad_norm": 0.6726166009902954, "learning_rate": 1.4937369519832987e-05, "loss": 0.459, "step": 480 }, { "epoch": 0.08023820717755838, "grad_norm": 14.846123695373535, "learning_rate": 1.593945720250522e-05, "loss": 0.7621, "step": 512 }, { "epoch": 0.08525309512615578, "grad_norm": 0.7846627831459045, "learning_rate": 1.6941544885177454e-05, "loss": 0.864, "step": 544 }, { "epoch": 0.09026798307475317, "grad_norm": 0.8993583917617798, "learning_rate": 1.7943632567849688e-05, "loss": 0.5081, "step": 576 }, { "epoch": 0.09528287102335058, "grad_norm": 1.4990565776824951, "learning_rate": 1.894572025052192e-05, "loss": 0.654, "step": 608 }, { "epoch": 0.10029775897194797, "grad_norm": 15.647976875305176, "learning_rate": 1.9947807933194157e-05, "loss": 0.6372, "step": 640 }, { "epoch": 0.10029775897194797, "eval_nli-pairs_loss": 1.0652996301651, "eval_nli-pairs_runtime": 3.6326, "eval_nli-pairs_samples_per_second": 27.528, "eval_nli-pairs_steps_per_second": 1.101, "eval_sts-test_pearson_cosine": 0.785263018402905, "eval_sts-test_pearson_dot": 0.5290450141477089, "eval_sts-test_pearson_euclidean": 0.7433756286425983, "eval_sts-test_pearson_manhattan": 0.7411097274300102, "eval_sts-test_pearson_max": 0.785263018402905, "eval_sts-test_spearman_cosine": 0.7996928912411947, "eval_sts-test_spearman_dot": 0.5102571497667188, "eval_sts-test_spearman_euclidean": 0.7338969723324641, "eval_sts-test_spearman_manhattan": 0.7343494860194358, "eval_sts-test_spearman_max": 0.7996928912411947, "step": 640 }, { "epoch": 0.10029775897194797, "eval_vitaminc-pairs_loss": 4.719416618347168, "eval_vitaminc-pairs_runtime": 1.1268, "eval_vitaminc-pairs_samples_per_second": 75.437, "eval_vitaminc-pairs_steps_per_second": 2.662, "step": 640 }, { "epoch": 0.10029775897194797, "eval_sts-label_loss": 3.612347364425659, "eval_sts-label_runtime": 0.2683, "eval_sts-label_samples_per_second": 372.651, "eval_sts-label_steps_per_second": 14.906, "step": 640 }, { "epoch": 0.10029775897194797, "eval_qnli-contrastive_loss": 0.15202775597572327, "eval_qnli-contrastive_runtime": 0.3528, "eval_qnli-contrastive_samples_per_second": 283.457, "eval_qnli-contrastive_steps_per_second": 11.338, "step": 640 }, { "epoch": 0.10029775897194797, "eval_scitail-pairs-qa_loss": 0.07544919103384018, "eval_scitail-pairs-qa_runtime": 0.8732, "eval_scitail-pairs-qa_samples_per_second": 114.517, "eval_scitail-pairs-qa_steps_per_second": 4.581, "step": 640 }, { "epoch": 0.10029775897194797, "eval_scitail-pairs-pos_loss": 0.5404170751571655, "eval_scitail-pairs-pos_runtime": 1.3146, "eval_scitail-pairs-pos_samples_per_second": 76.067, "eval_scitail-pairs-pos_steps_per_second": 3.043, "step": 640 }, { "epoch": 0.10029775897194797, "eval_xsum-pairs_loss": 0.25958582758903503, "eval_xsum-pairs_runtime": 0.9287, "eval_xsum-pairs_samples_per_second": 107.679, "eval_xsum-pairs_steps_per_second": 4.307, "step": 640 }, { "epoch": 0.10029775897194797, "eval_compression-pairs_loss": 0.10066353529691696, "eval_compression-pairs_runtime": 0.2732, "eval_compression-pairs_samples_per_second": 366.076, "eval_compression-pairs_steps_per_second": 14.643, "step": 640 }, { "epoch": 0.10029775897194797, "eval_sciq_pairs_loss": 0.2645374834537506, "eval_sciq_pairs_runtime": 4.0725, "eval_sciq_pairs_samples_per_second": 24.555, "eval_sciq_pairs_steps_per_second": 0.982, "step": 640 }, { "epoch": 0.10029775897194797, "eval_qasc_pairs_loss": 0.21021947264671326, "eval_qasc_pairs_runtime": 1.0743, "eval_qasc_pairs_samples_per_second": 93.084, "eval_qasc_pairs_steps_per_second": 3.723, "step": 640 }, { "epoch": 0.10029775897194797, "eval_openbookqa_pairs_loss": 1.7905032634735107, "eval_openbookqa_pairs_runtime": 0.8886, "eval_openbookqa_pairs_samples_per_second": 112.532, "eval_openbookqa_pairs_steps_per_second": 4.501, "step": 640 }, { "epoch": 0.10029775897194797, "eval_msmarco_pairs_loss": 0.5102832913398743, "eval_msmarco_pairs_runtime": 2.0529, "eval_msmarco_pairs_samples_per_second": 48.712, "eval_msmarco_pairs_steps_per_second": 1.948, "step": 640 }, { "epoch": 0.10029775897194797, "eval_nq_pairs_loss": 0.24466972053050995, "eval_nq_pairs_runtime": 4.4973, "eval_nq_pairs_samples_per_second": 22.235, "eval_nq_pairs_steps_per_second": 0.889, "step": 640 }, { "epoch": 0.10029775897194797, "eval_trivia_pairs_loss": 0.8748095631599426, "eval_trivia_pairs_runtime": 6.4825, "eval_trivia_pairs_samples_per_second": 15.426, "eval_trivia_pairs_steps_per_second": 0.617, "step": 640 }, { "epoch": 0.10029775897194797, "eval_quora_pairs_loss": 0.07820220291614532, "eval_quora_pairs_runtime": 0.6944, "eval_quora_pairs_samples_per_second": 144.008, "eval_quora_pairs_steps_per_second": 5.76, "step": 640 }, { "epoch": 0.10029775897194797, "eval_gooaq_pairs_loss": 0.5236212611198425, "eval_gooaq_pairs_runtime": 1.3899, "eval_gooaq_pairs_samples_per_second": 71.949, "eval_gooaq_pairs_steps_per_second": 2.878, "step": 640 }, { "epoch": 0.10029775897194797, "eval_mrpc_pairs_loss": 0.05494727939367294, "eval_mrpc_pairs_runtime": 0.2598, "eval_mrpc_pairs_samples_per_second": 384.941, "eval_mrpc_pairs_steps_per_second": 15.398, "step": 640 }, { "epoch": 0.10531264692054537, "grad_norm": 11.01974105834961, "learning_rate": 2.0949895615866387e-05, "loss": 0.9292, "step": 672 }, { "epoch": 0.11032753486914276, "grad_norm": 0.5542309284210205, "learning_rate": 2.1951983298538625e-05, "loss": 1.3108, "step": 704 }, { "epoch": 0.11534242281774017, "grad_norm": 15.458569526672363, "learning_rate": 2.2954070981210856e-05, "loss": 0.9674, "step": 736 }, { "epoch": 0.12035731076633756, "grad_norm": 2.7814478874206543, "learning_rate": 2.395615866388309e-05, "loss": 0.9226, "step": 768 }, { "epoch": 0.12537219871493496, "grad_norm": 11.393244743347168, "learning_rate": 2.4958246346555324e-05, "loss": 0.789, "step": 800 }, { "epoch": 0.13038708666353235, "grad_norm": 9.288290977478027, "learning_rate": 2.596033402922756e-05, "loss": 0.5186, "step": 832 }, { "epoch": 0.13540197461212977, "grad_norm": 47.65571212768555, "learning_rate": 2.6962421711899793e-05, "loss": 0.6726, "step": 864 }, { "epoch": 0.14041686256072716, "grad_norm": 12.908064842224121, "learning_rate": 2.7964509394572024e-05, "loss": 0.5381, "step": 896 }, { "epoch": 0.14543175050932455, "grad_norm": 14.951742172241211, "learning_rate": 2.896659707724426e-05, "loss": 0.581, "step": 928 }, { "epoch": 0.15044663845792194, "grad_norm": 20.12006187438965, "learning_rate": 2.9968684759916492e-05, "loss": 0.9038, "step": 960 }, { "epoch": 0.15044663845792194, "eval_nli-pairs_loss": 1.2173175811767578, "eval_nli-pairs_runtime": 3.7098, "eval_nli-pairs_samples_per_second": 26.955, "eval_nli-pairs_steps_per_second": 1.078, "eval_sts-test_pearson_cosine": 0.7840992835675669, "eval_sts-test_pearson_dot": 0.5220462136106129, "eval_sts-test_pearson_euclidean": 0.7457350047351855, "eval_sts-test_pearson_manhattan": 0.7425970830541657, "eval_sts-test_pearson_max": 0.7840992835675669, "eval_sts-test_spearman_cosine": 0.8006376809572144, "eval_sts-test_spearman_dot": 0.5020544543992158, "eval_sts-test_spearman_euclidean": 0.7369257710408655, "eval_sts-test_spearman_manhattan": 0.7362649758012406, "eval_sts-test_spearman_max": 0.8006376809572144, "step": 960 }, { "epoch": 0.15044663845792194, "eval_vitaminc-pairs_loss": 4.774902820587158, "eval_vitaminc-pairs_runtime": 1.1212, "eval_vitaminc-pairs_samples_per_second": 75.809, "eval_vitaminc-pairs_steps_per_second": 2.676, "step": 960 }, { "epoch": 0.15044663845792194, "eval_sts-label_loss": 3.198556900024414, "eval_sts-label_runtime": 0.2678, "eval_sts-label_samples_per_second": 373.382, "eval_sts-label_steps_per_second": 14.935, "step": 960 }, { "epoch": 0.15044663845792194, "eval_qnli-contrastive_loss": 0.1943340301513672, "eval_qnli-contrastive_runtime": 0.3511, "eval_qnli-contrastive_samples_per_second": 284.789, "eval_qnli-contrastive_steps_per_second": 11.392, "step": 960 }, { "epoch": 0.15044663845792194, "eval_scitail-pairs-qa_loss": 0.08060617744922638, "eval_scitail-pairs-qa_runtime": 0.8778, "eval_scitail-pairs-qa_samples_per_second": 113.92, "eval_scitail-pairs-qa_steps_per_second": 4.557, "step": 960 }, { "epoch": 0.15044663845792194, "eval_scitail-pairs-pos_loss": 0.4759831428527832, "eval_scitail-pairs-pos_runtime": 1.3609, "eval_scitail-pairs-pos_samples_per_second": 73.48, "eval_scitail-pairs-pos_steps_per_second": 2.939, "step": 960 }, { "epoch": 0.15044663845792194, "eval_xsum-pairs_loss": 0.27583304047584534, "eval_xsum-pairs_runtime": 0.9343, "eval_xsum-pairs_samples_per_second": 107.035, "eval_xsum-pairs_steps_per_second": 4.281, "step": 960 }, { "epoch": 0.15044663845792194, "eval_compression-pairs_loss": 0.10094660520553589, "eval_compression-pairs_runtime": 0.2739, "eval_compression-pairs_samples_per_second": 365.047, "eval_compression-pairs_steps_per_second": 14.602, "step": 960 }, { "epoch": 0.15044663845792194, "eval_sciq_pairs_loss": 0.2688131630420685, "eval_sciq_pairs_runtime": 4.0582, "eval_sciq_pairs_samples_per_second": 24.641, "eval_sciq_pairs_steps_per_second": 0.986, "step": 960 }, { "epoch": 0.15044663845792194, "eval_qasc_pairs_loss": 0.23267821967601776, "eval_qasc_pairs_runtime": 1.0554, "eval_qasc_pairs_samples_per_second": 94.75, "eval_qasc_pairs_steps_per_second": 3.79, "step": 960 }, { "epoch": 0.15044663845792194, "eval_openbookqa_pairs_loss": 1.8053069114685059, "eval_openbookqa_pairs_runtime": 0.8871, "eval_openbookqa_pairs_samples_per_second": 112.727, "eval_openbookqa_pairs_steps_per_second": 4.509, "step": 960 }, { "epoch": 0.15044663845792194, "eval_msmarco_pairs_loss": 0.5809260606765747, "eval_msmarco_pairs_runtime": 2.0498, "eval_msmarco_pairs_samples_per_second": 48.786, "eval_msmarco_pairs_steps_per_second": 1.951, "step": 960 }, { "epoch": 0.15044663845792194, "eval_nq_pairs_loss": 0.2808491885662079, "eval_nq_pairs_runtime": 4.4982, "eval_nq_pairs_samples_per_second": 22.231, "eval_nq_pairs_steps_per_second": 0.889, "step": 960 }, { "epoch": 0.15044663845792194, "eval_trivia_pairs_loss": 0.9379808902740479, "eval_trivia_pairs_runtime": 6.4578, "eval_trivia_pairs_samples_per_second": 15.485, "eval_trivia_pairs_steps_per_second": 0.619, "step": 960 }, { "epoch": 0.15044663845792194, "eval_quora_pairs_loss": 0.0913279801607132, "eval_quora_pairs_runtime": 0.6721, "eval_quora_pairs_samples_per_second": 148.79, "eval_quora_pairs_steps_per_second": 5.952, "step": 960 }, { "epoch": 0.15044663845792194, "eval_gooaq_pairs_loss": 0.5807955265045166, "eval_gooaq_pairs_runtime": 1.3915, "eval_gooaq_pairs_samples_per_second": 71.865, "eval_gooaq_pairs_steps_per_second": 2.875, "step": 960 }, { "epoch": 0.15044663845792194, "eval_mrpc_pairs_loss": 0.05799216777086258, "eval_mrpc_pairs_runtime": 0.2571, "eval_mrpc_pairs_samples_per_second": 388.998, "eval_mrpc_pairs_steps_per_second": 15.56, "step": 960 }, { "epoch": 0.15546152640651936, "grad_norm": 9.773286819458008, "learning_rate": 2.9997957904107625e-05, "loss": 0.7964, "step": 992 }, { "epoch": 0.16047641435511675, "grad_norm": 19.411075592041016, "learning_rate": 2.9991566594209126e-05, "loss": 0.8213, "step": 1024 }, { "epoch": 0.16549130230371414, "grad_norm": 3.5282175540924072, "learning_rate": 2.9980825799589488e-05, "loss": 0.5396, "step": 1056 }, { "epoch": 0.17050619025231156, "grad_norm": 62.66339874267578, "learning_rate": 2.996573863646219e-05, "loss": 0.9297, "step": 1088 }, { "epoch": 0.17552107820090895, "grad_norm": 8.785274505615234, "learning_rate": 2.994630948204727e-05, "loss": 1.169, "step": 1120 }, { "epoch": 0.18053596614950634, "grad_norm": 24.10859489440918, "learning_rate": 2.992254397330132e-05, "loss": 0.7486, "step": 1152 }, { "epoch": 0.18555085409810373, "grad_norm": 25.545284271240234, "learning_rate": 2.9894449005282077e-05, "loss": 0.6821, "step": 1184 }, { "epoch": 0.19056574204670115, "grad_norm": 0.8675521016120911, "learning_rate": 2.9862032729147954e-05, "loss": 0.6125, "step": 1216 }, { "epoch": 0.19558062999529854, "grad_norm": 16.122114181518555, "learning_rate": 2.9825304549793153e-05, "loss": 0.8061, "step": 1248 }, { "epoch": 0.20059551794389593, "grad_norm": 1.0314382314682007, "learning_rate": 2.978427512311904e-05, "loss": 0.6918, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_nli-pairs_loss": 1.1552109718322754, "eval_nli-pairs_runtime": 3.8751, "eval_nli-pairs_samples_per_second": 25.806, "eval_nli-pairs_steps_per_second": 1.032, "eval_sts-test_pearson_cosine": 0.786106976104726, "eval_sts-test_pearson_dot": 0.5116758767219935, "eval_sts-test_pearson_euclidean": 0.7432891018313416, "eval_sts-test_pearson_manhattan": 0.7400929158927781, "eval_sts-test_pearson_max": 0.786106976104726, "eval_sts-test_spearman_cosine": 0.801377272203007, "eval_sts-test_spearman_dot": 0.4921454166952506, "eval_sts-test_spearman_euclidean": 0.7343686249967402, "eval_sts-test_spearman_manhattan": 0.7331946050808561, "eval_sts-test_spearman_max": 0.801377272203007, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_vitaminc-pairs_loss": 4.6789751052856445, "eval_vitaminc-pairs_runtime": 1.1504, "eval_vitaminc-pairs_samples_per_second": 73.889, "eval_vitaminc-pairs_steps_per_second": 2.608, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_sts-label_loss": 3.5580556392669678, "eval_sts-label_runtime": 0.2834, "eval_sts-label_samples_per_second": 352.858, "eval_sts-label_steps_per_second": 14.114, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_qnli-contrastive_loss": 0.20369713008403778, "eval_qnli-contrastive_runtime": 0.358, "eval_qnli-contrastive_samples_per_second": 279.331, "eval_qnli-contrastive_steps_per_second": 11.173, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_scitail-pairs-qa_loss": 0.07465875148773193, "eval_scitail-pairs-qa_runtime": 0.9504, "eval_scitail-pairs-qa_samples_per_second": 105.214, "eval_scitail-pairs-qa_steps_per_second": 4.209, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_scitail-pairs-pos_loss": 0.49434563517570496, "eval_scitail-pairs-pos_runtime": 1.6041, "eval_scitail-pairs-pos_samples_per_second": 62.339, "eval_scitail-pairs-pos_steps_per_second": 2.494, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_xsum-pairs_loss": 0.28282061219215393, "eval_xsum-pairs_runtime": 0.9316, "eval_xsum-pairs_samples_per_second": 107.346, "eval_xsum-pairs_steps_per_second": 4.294, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_compression-pairs_loss": 0.097385473549366, "eval_compression-pairs_runtime": 0.2754, "eval_compression-pairs_samples_per_second": 363.1, "eval_compression-pairs_steps_per_second": 14.524, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_sciq_pairs_loss": 0.2762215733528137, "eval_sciq_pairs_runtime": 4.2307, "eval_sciq_pairs_samples_per_second": 23.637, "eval_sciq_pairs_steps_per_second": 0.945, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_qasc_pairs_loss": 0.19347424805164337, "eval_qasc_pairs_runtime": 1.2282, "eval_qasc_pairs_samples_per_second": 81.421, "eval_qasc_pairs_steps_per_second": 3.257, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_openbookqa_pairs_loss": 1.6875064373016357, "eval_openbookqa_pairs_runtime": 1.1661, "eval_openbookqa_pairs_samples_per_second": 85.754, "eval_openbookqa_pairs_steps_per_second": 3.43, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_msmarco_pairs_loss": 0.5743877291679382, "eval_msmarco_pairs_runtime": 2.1428, "eval_msmarco_pairs_samples_per_second": 46.669, "eval_msmarco_pairs_steps_per_second": 1.867, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_nq_pairs_loss": 0.30348217487335205, "eval_nq_pairs_runtime": 4.5543, "eval_nq_pairs_samples_per_second": 21.957, "eval_nq_pairs_steps_per_second": 0.878, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_trivia_pairs_loss": 0.9221765995025635, "eval_trivia_pairs_runtime": 6.6513, "eval_trivia_pairs_samples_per_second": 15.035, "eval_trivia_pairs_steps_per_second": 0.601, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_quora_pairs_loss": 0.03854631260037422, "eval_quora_pairs_runtime": 0.7822, "eval_quora_pairs_samples_per_second": 127.852, "eval_quora_pairs_steps_per_second": 5.114, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_gooaq_pairs_loss": 0.528398334980011, "eval_gooaq_pairs_runtime": 1.4882, "eval_gooaq_pairs_samples_per_second": 67.194, "eval_gooaq_pairs_steps_per_second": 2.688, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_mrpc_pairs_loss": 0.05623970925807953, "eval_mrpc_pairs_runtime": 0.2698, "eval_mrpc_pairs_samples_per_second": 370.713, "eval_mrpc_pairs_steps_per_second": 14.829, "step": 1280 }, { "epoch": 0.20561040589249335, "grad_norm": 0.6042119860649109, "learning_rate": 2.9738956352942557e-05, "loss": 0.9421, "step": 1312 }, { "epoch": 0.21062529384109074, "grad_norm": 13.87867546081543, "learning_rate": 2.968936138754259e-05, "loss": 0.8641, "step": 1344 }, { "epoch": 0.21564018178968813, "grad_norm": 44.48640441894531, "learning_rate": 2.9635504615845257e-05, "loss": 1.157, "step": 1376 }, { "epoch": 0.22065506973828553, "grad_norm": 15.554729461669922, "learning_rate": 2.957928148945977e-05, "loss": 0.8772, "step": 1408 }, { "epoch": 0.22566995768688294, "grad_norm": 16.644670486450195, "learning_rate": 2.9517081112297707e-05, "loss": 1.0496, "step": 1440 }, { "epoch": 0.23068484563548033, "grad_norm": 13.053145408630371, "learning_rate": 2.9450668912302004e-05, "loss": 0.589, "step": 1472 }, { "epoch": 0.23569973358407773, "grad_norm": 7.827791213989258, "learning_rate": 2.9380064157562306e-05, "loss": 0.8234, "step": 1504 }, { "epoch": 0.24071462153267512, "grad_norm": 15.598438262939453, "learning_rate": 2.930528733254901e-05, "loss": 0.7365, "step": 1536 }, { "epoch": 0.24572950948127253, "grad_norm": 13.723180770874023, "learning_rate": 2.9226360132170112e-05, "loss": 0.5076, "step": 1568 }, { "epoch": 0.2507443974298699, "grad_norm": 10.20022964477539, "learning_rate": 2.9143305455476866e-05, "loss": 1.0329, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_nli-pairs_loss": 1.0577216148376465, "eval_nli-pairs_runtime": 3.6476, "eval_nli-pairs_samples_per_second": 27.415, "eval_nli-pairs_steps_per_second": 1.097, "eval_sts-test_pearson_cosine": 0.7876359552191669, "eval_sts-test_pearson_dot": 0.5220803655074544, "eval_sts-test_pearson_euclidean": 0.7444632413869628, "eval_sts-test_pearson_manhattan": 0.7418744760088763, "eval_sts-test_pearson_max": 0.7876359552191669, "eval_sts-test_spearman_cosine": 0.8018874000525117, "eval_sts-test_spearman_dot": 0.5034518981121652, "eval_sts-test_spearman_euclidean": 0.7344750702387959, "eval_sts-test_spearman_manhattan": 0.7332804063416474, "eval_sts-test_spearman_max": 0.8018874000525117, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_vitaminc-pairs_loss": 4.784573554992676, "eval_vitaminc-pairs_runtime": 1.145, "eval_vitaminc-pairs_samples_per_second": 74.235, "eval_vitaminc-pairs_steps_per_second": 2.62, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_sts-label_loss": 3.6113080978393555, "eval_sts-label_runtime": 0.2746, "eval_sts-label_samples_per_second": 364.172, "eval_sts-label_steps_per_second": 14.567, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_qnli-contrastive_loss": 0.18593625724315643, "eval_qnli-contrastive_runtime": 0.3541, "eval_qnli-contrastive_samples_per_second": 282.413, "eval_qnli-contrastive_steps_per_second": 11.297, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_scitail-pairs-qa_loss": 0.07545661181211472, "eval_scitail-pairs-qa_runtime": 0.8854, "eval_scitail-pairs-qa_samples_per_second": 112.941, "eval_scitail-pairs-qa_steps_per_second": 4.518, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_scitail-pairs-pos_loss": 0.5018333792686462, "eval_scitail-pairs-pos_runtime": 1.3443, "eval_scitail-pairs-pos_samples_per_second": 74.386, "eval_scitail-pairs-pos_steps_per_second": 2.975, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_xsum-pairs_loss": 0.2749001085758209, "eval_xsum-pairs_runtime": 0.9439, "eval_xsum-pairs_samples_per_second": 105.939, "eval_xsum-pairs_steps_per_second": 4.238, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_compression-pairs_loss": 0.09735233336687088, "eval_compression-pairs_runtime": 0.2764, "eval_compression-pairs_samples_per_second": 361.753, "eval_compression-pairs_steps_per_second": 14.47, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_sciq_pairs_loss": 0.2648228108882904, "eval_sciq_pairs_runtime": 4.1207, "eval_sciq_pairs_samples_per_second": 24.268, "eval_sciq_pairs_steps_per_second": 0.971, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_qasc_pairs_loss": 0.21318012475967407, "eval_qasc_pairs_runtime": 1.0917, "eval_qasc_pairs_samples_per_second": 91.604, "eval_qasc_pairs_steps_per_second": 3.664, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_openbookqa_pairs_loss": 1.790009617805481, "eval_openbookqa_pairs_runtime": 0.8969, "eval_openbookqa_pairs_samples_per_second": 111.496, "eval_openbookqa_pairs_steps_per_second": 4.46, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_msmarco_pairs_loss": 0.57186359167099, "eval_msmarco_pairs_runtime": 2.0592, "eval_msmarco_pairs_samples_per_second": 48.563, "eval_msmarco_pairs_steps_per_second": 1.943, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_nq_pairs_loss": 0.2738310396671295, "eval_nq_pairs_runtime": 4.5092, "eval_nq_pairs_samples_per_second": 22.177, "eval_nq_pairs_steps_per_second": 0.887, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_trivia_pairs_loss": 0.8291679620742798, "eval_trivia_pairs_runtime": 6.526, "eval_trivia_pairs_samples_per_second": 15.323, "eval_trivia_pairs_steps_per_second": 0.613, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_quora_pairs_loss": 0.08000540733337402, "eval_quora_pairs_runtime": 0.6761, "eval_quora_pairs_samples_per_second": 147.909, "eval_quora_pairs_steps_per_second": 5.916, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_gooaq_pairs_loss": 0.5998037457466125, "eval_gooaq_pairs_runtime": 1.3978, "eval_gooaq_pairs_samples_per_second": 71.541, "eval_gooaq_pairs_steps_per_second": 2.862, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_mrpc_pairs_loss": 0.05507182702422142, "eval_mrpc_pairs_runtime": 0.2617, "eval_mrpc_pairs_samples_per_second": 382.156, "eval_mrpc_pairs_steps_per_second": 15.286, "step": 1600 }, { "epoch": 0.2557592853784673, "grad_norm": 8.05022144317627, "learning_rate": 2.9056147399020182e-05, "loss": 1.4006, "step": 1632 }, { "epoch": 0.2607741733270647, "grad_norm": 0.38224154710769653, "learning_rate": 2.8964911249859437e-05, "loss": 0.5963, "step": 1664 }, { "epoch": 0.2657890612756621, "grad_norm": 0.46655791997909546, "learning_rate": 2.886962347822604e-05, "loss": 0.7488, "step": 1696 }, { "epoch": 0.27080394922425954, "grad_norm": 8.102537155151367, "learning_rate": 2.8770311729843616e-05, "loss": 0.8548, "step": 1728 }, { "epoch": 0.27581883717285693, "grad_norm": 11.803775787353516, "learning_rate": 2.86670048179072e-05, "loss": 1.3324, "step": 1760 }, { "epoch": 0.2808337251214543, "grad_norm": 16.266756057739258, "learning_rate": 2.8559732714723715e-05, "loss": 0.5804, "step": 1792 }, { "epoch": 0.2858486130700517, "grad_norm": 2.8448822498321533, "learning_rate": 2.8448526543016114e-05, "loss": 0.7827, "step": 1824 }, { "epoch": 0.2908635010186491, "grad_norm": 21.346328735351562, "learning_rate": 2.8333418566893796e-05, "loss": 0.5448, "step": 1856 }, { "epoch": 0.2958783889672465, "grad_norm": 3.4379029273986816, "learning_rate": 2.8214442182491866e-05, "loss": 0.7368, "step": 1888 }, { "epoch": 0.3008932769158439, "grad_norm": 17.05881690979004, "learning_rate": 2.8091631908281963e-05, "loss": 0.5657, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_nli-pairs_loss": 1.0244356393814087, "eval_nli-pairs_runtime": 3.6217, "eval_nli-pairs_samples_per_second": 27.612, "eval_nli-pairs_steps_per_second": 1.104, "eval_sts-test_pearson_cosine": 0.781915957368962, "eval_sts-test_pearson_dot": 0.49821032356844613, "eval_sts-test_pearson_euclidean": 0.7329308897504494, "eval_sts-test_pearson_manhattan": 0.7292186092506918, "eval_sts-test_pearson_max": 0.781915957368962, "eval_sts-test_spearman_cosine": 0.7983596570250642, "eval_sts-test_spearman_dot": 0.4812350313638781, "eval_sts-test_spearman_euclidean": 0.7265758267352669, "eval_sts-test_spearman_manhattan": 0.7259264140902829, "eval_sts-test_spearman_max": 0.7983596570250642, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_vitaminc-pairs_loss": 4.698296070098877, "eval_vitaminc-pairs_runtime": 1.1338, "eval_vitaminc-pairs_samples_per_second": 74.97, "eval_vitaminc-pairs_steps_per_second": 2.646, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_sts-label_loss": 3.1822261810302734, "eval_sts-label_runtime": 0.2702, "eval_sts-label_samples_per_second": 370.09, "eval_sts-label_steps_per_second": 14.804, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_qnli-contrastive_loss": 0.11326340585947037, "eval_qnli-contrastive_runtime": 0.3581, "eval_qnli-contrastive_samples_per_second": 279.28, "eval_qnli-contrastive_steps_per_second": 11.171, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_scitail-pairs-qa_loss": 0.07009608298540115, "eval_scitail-pairs-qa_runtime": 0.8816, "eval_scitail-pairs-qa_samples_per_second": 113.424, "eval_scitail-pairs-qa_steps_per_second": 4.537, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_scitail-pairs-pos_loss": 0.49156129360198975, "eval_scitail-pairs-pos_runtime": 1.3759, "eval_scitail-pairs-pos_samples_per_second": 72.678, "eval_scitail-pairs-pos_steps_per_second": 2.907, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_xsum-pairs_loss": 0.25940877199172974, "eval_xsum-pairs_runtime": 0.9373, "eval_xsum-pairs_samples_per_second": 106.695, "eval_xsum-pairs_steps_per_second": 4.268, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_compression-pairs_loss": 0.0919649675488472, "eval_compression-pairs_runtime": 0.2738, "eval_compression-pairs_samples_per_second": 365.291, "eval_compression-pairs_steps_per_second": 14.612, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_sciq_pairs_loss": 0.29138606786727905, "eval_sciq_pairs_runtime": 4.1059, "eval_sciq_pairs_samples_per_second": 24.355, "eval_sciq_pairs_steps_per_second": 0.974, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_qasc_pairs_loss": 0.19625085592269897, "eval_qasc_pairs_runtime": 1.0611, "eval_qasc_pairs_samples_per_second": 94.24, "eval_qasc_pairs_steps_per_second": 3.77, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_openbookqa_pairs_loss": 1.7960456609725952, "eval_openbookqa_pairs_runtime": 0.9042, "eval_openbookqa_pairs_samples_per_second": 110.601, "eval_openbookqa_pairs_steps_per_second": 4.424, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_msmarco_pairs_loss": 0.5171416997909546, "eval_msmarco_pairs_runtime": 2.0637, "eval_msmarco_pairs_samples_per_second": 48.457, "eval_msmarco_pairs_steps_per_second": 1.938, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_nq_pairs_loss": 0.24809740483760834, "eval_nq_pairs_runtime": 4.529, "eval_nq_pairs_samples_per_second": 22.08, "eval_nq_pairs_steps_per_second": 0.883, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_trivia_pairs_loss": 0.9041999578475952, "eval_trivia_pairs_runtime": 6.5257, "eval_trivia_pairs_samples_per_second": 15.324, "eval_trivia_pairs_steps_per_second": 0.613, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_quora_pairs_loss": 0.03601976856589317, "eval_quora_pairs_runtime": 0.6811, "eval_quora_pairs_samples_per_second": 146.827, "eval_quora_pairs_steps_per_second": 5.873, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_gooaq_pairs_loss": 0.5626399517059326, "eval_gooaq_pairs_runtime": 1.3943, "eval_gooaq_pairs_samples_per_second": 71.72, "eval_gooaq_pairs_steps_per_second": 2.869, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_mrpc_pairs_loss": 0.04984402656555176, "eval_mrpc_pairs_runtime": 0.2579, "eval_mrpc_pairs_samples_per_second": 387.725, "eval_mrpc_pairs_steps_per_second": 15.509, "step": 1920 }, { "epoch": 0.30590816486444133, "grad_norm": 22.65591812133789, "learning_rate": 2.796502337505742e-05, "loss": 0.7425, "step": 1952 }, { "epoch": 0.3109230528130387, "grad_norm": 10.119640350341797, "learning_rate": 2.78346533155958e-05, "loss": 0.7819, "step": 1984 }, { "epoch": 0.3159379407616361, "grad_norm": 8.690531730651855, "learning_rate": 2.770055955400161e-05, "loss": 0.5937, "step": 2016 }, { "epoch": 0.3209528287102335, "grad_norm": 0.8992699384689331, "learning_rate": 2.7562780994732476e-05, "loss": 0.8133, "step": 2048 }, { "epoch": 0.3259677166588309, "grad_norm": 10.619684219360352, "learning_rate": 2.7421357611311824e-05, "loss": 1.0674, "step": 2080 }, { "epoch": 0.3309826046074283, "grad_norm": 7.222084045410156, "learning_rate": 2.727633043473141e-05, "loss": 0.6288, "step": 2112 }, { "epoch": 0.3359974925560257, "grad_norm": 10.166888236999512, "learning_rate": 2.712774154154707e-05, "loss": 0.5866, "step": 2144 }, { "epoch": 0.3410123805046231, "grad_norm": 0.36360761523246765, "learning_rate": 2.6975634041671052e-05, "loss": 0.6962, "step": 2176 }, { "epoch": 0.3460272684532205, "grad_norm": 9.586665153503418, "learning_rate": 2.6820052065864665e-05, "loss": 0.5562, "step": 2208 }, { "epoch": 0.3510421564018179, "grad_norm": 1.1307642459869385, "learning_rate": 2.6661040752934594e-05, "loss": 0.8871, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_nli-pairs_loss": 1.0147591829299927, "eval_nli-pairs_runtime": 3.7201, "eval_nli-pairs_samples_per_second": 26.881, "eval_nli-pairs_steps_per_second": 1.075, "eval_sts-test_pearson_cosine": 0.7872126529181761, "eval_sts-test_pearson_dot": 0.5062045289861089, "eval_sts-test_pearson_euclidean": 0.7351473988633473, "eval_sts-test_pearson_manhattan": 0.7310226402088944, "eval_sts-test_pearson_max": 0.7872126529181761, "eval_sts-test_spearman_cosine": 0.801487068999052, "eval_sts-test_spearman_dot": 0.4912205722904683, "eval_sts-test_spearman_euclidean": 0.7267262355024484, "eval_sts-test_spearman_manhattan": 0.72510169253649, "eval_sts-test_spearman_max": 0.801487068999052, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_vitaminc-pairs_loss": 4.644638538360596, "eval_vitaminc-pairs_runtime": 1.1453, "eval_vitaminc-pairs_samples_per_second": 74.215, "eval_vitaminc-pairs_steps_per_second": 2.619, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_sts-label_loss": 3.915343999862671, "eval_sts-label_runtime": 0.2807, "eval_sts-label_samples_per_second": 356.217, "eval_sts-label_steps_per_second": 14.249, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_qnli-contrastive_loss": 0.11220741271972656, "eval_qnli-contrastive_runtime": 0.3614, "eval_qnli-contrastive_samples_per_second": 276.705, "eval_qnli-contrastive_steps_per_second": 11.068, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_scitail-pairs-qa_loss": 0.06635177880525589, "eval_scitail-pairs-qa_runtime": 0.8881, "eval_scitail-pairs-qa_samples_per_second": 112.594, "eval_scitail-pairs-qa_steps_per_second": 4.504, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_scitail-pairs-pos_loss": 0.5765587687492371, "eval_scitail-pairs-pos_runtime": 1.3496, "eval_scitail-pairs-pos_samples_per_second": 74.097, "eval_scitail-pairs-pos_steps_per_second": 2.964, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_xsum-pairs_loss": 0.2595808804035187, "eval_xsum-pairs_runtime": 0.9377, "eval_xsum-pairs_samples_per_second": 106.641, "eval_xsum-pairs_steps_per_second": 4.266, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_compression-pairs_loss": 0.0918564721941948, "eval_compression-pairs_runtime": 0.2755, "eval_compression-pairs_samples_per_second": 363.032, "eval_compression-pairs_steps_per_second": 14.521, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_sciq_pairs_loss": 0.284303218126297, "eval_sciq_pairs_runtime": 4.1289, "eval_sciq_pairs_samples_per_second": 24.22, "eval_sciq_pairs_steps_per_second": 0.969, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_qasc_pairs_loss": 0.19232892990112305, "eval_qasc_pairs_runtime": 1.0709, "eval_qasc_pairs_samples_per_second": 93.384, "eval_qasc_pairs_steps_per_second": 3.735, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_openbookqa_pairs_loss": 1.6234371662139893, "eval_openbookqa_pairs_runtime": 0.9558, "eval_openbookqa_pairs_samples_per_second": 104.62, "eval_openbookqa_pairs_steps_per_second": 4.185, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_msmarco_pairs_loss": 0.5325217247009277, "eval_msmarco_pairs_runtime": 2.0971, "eval_msmarco_pairs_samples_per_second": 47.685, "eval_msmarco_pairs_steps_per_second": 1.907, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_nq_pairs_loss": 0.2721095681190491, "eval_nq_pairs_runtime": 4.5393, "eval_nq_pairs_samples_per_second": 22.03, "eval_nq_pairs_steps_per_second": 0.881, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_trivia_pairs_loss": 0.8544899821281433, "eval_trivia_pairs_runtime": 6.4668, "eval_trivia_pairs_samples_per_second": 15.464, "eval_trivia_pairs_steps_per_second": 0.619, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_quora_pairs_loss": 0.08441996574401855, "eval_quora_pairs_runtime": 0.6933, "eval_quora_pairs_samples_per_second": 144.233, "eval_quora_pairs_steps_per_second": 5.769, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_gooaq_pairs_loss": 0.5711588859558105, "eval_gooaq_pairs_runtime": 1.3941, "eval_gooaq_pairs_samples_per_second": 71.733, "eval_gooaq_pairs_steps_per_second": 2.869, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_mrpc_pairs_loss": 0.05093960464000702, "eval_mrpc_pairs_runtime": 0.2633, "eval_mrpc_pairs_samples_per_second": 379.777, "eval_mrpc_pairs_steps_per_second": 15.191, "step": 2240 }, { "epoch": 0.3560570443504153, "grad_norm": 0.39178094267845154, "learning_rate": 2.6498646236636892e-05, "loss": 0.6805, "step": 2272 }, { "epoch": 0.3610719322990127, "grad_norm": 7.91475248336792, "learning_rate": 2.6332915632292237e-05, "loss": 1.0451, "step": 2304 }, { "epoch": 0.3660868202476101, "grad_norm": 31.54157066345215, "learning_rate": 2.616389702311641e-05, "loss": 1.0603, "step": 2336 }, { "epoch": 0.37110170819620747, "grad_norm": 8.400779724121094, "learning_rate": 2.5991639446269964e-05, "loss": 0.8142, "step": 2368 }, { "epoch": 0.3761165961448049, "grad_norm": 20.99441146850586, "learning_rate": 2.5816192878631166e-05, "loss": 1.7211, "step": 2400 }, { "epoch": 0.3811314840934023, "grad_norm": 10.574430465698242, "learning_rate": 2.5637608222296237e-05, "loss": 0.7523, "step": 2432 }, { "epoch": 0.3861463720419997, "grad_norm": 0.8941424489021301, "learning_rate": 2.5455937289811207e-05, "loss": 0.8053, "step": 2464 }, { "epoch": 0.3911612599905971, "grad_norm": 1.9402281045913696, "learning_rate": 2.5271232789139587e-05, "loss": 0.8427, "step": 2496 }, { "epoch": 0.3961761479391945, "grad_norm": 23.42873764038086, "learning_rate": 2.5083548308370296e-05, "loss": 0.8204, "step": 2528 }, { "epoch": 0.40119103588779187, "grad_norm": 4.5422234535217285, "learning_rate": 2.4892938300170198e-05, "loss": 0.5343, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_nli-pairs_loss": 1.002213478088379, "eval_nli-pairs_runtime": 3.8843, "eval_nli-pairs_samples_per_second": 25.745, "eval_nli-pairs_steps_per_second": 1.03, "eval_sts-test_pearson_cosine": 0.7872537557423719, "eval_sts-test_pearson_dot": 0.5372668921721468, "eval_sts-test_pearson_euclidean": 0.7383744840101544, "eval_sts-test_pearson_manhattan": 0.7333039162515002, "eval_sts-test_pearson_max": 0.7872537557423719, "eval_sts-test_spearman_cosine": 0.8038647026605977, "eval_sts-test_spearman_dot": 0.5191465873751544, "eval_sts-test_spearman_euclidean": 0.730034619048548, "eval_sts-test_spearman_manhattan": 0.7277569753761504, "eval_sts-test_spearman_max": 0.8038647026605977, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_vitaminc-pairs_loss": 4.723379135131836, "eval_vitaminc-pairs_runtime": 1.3031, "eval_vitaminc-pairs_samples_per_second": 65.23, "eval_vitaminc-pairs_steps_per_second": 2.302, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_sts-label_loss": 3.8185579776763916, "eval_sts-label_runtime": 0.4182, "eval_sts-label_samples_per_second": 239.094, "eval_sts-label_steps_per_second": 9.564, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_qnli-contrastive_loss": 0.15084019303321838, "eval_qnli-contrastive_runtime": 0.3638, "eval_qnli-contrastive_samples_per_second": 274.906, "eval_qnli-contrastive_steps_per_second": 10.996, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_scitail-pairs-qa_loss": 0.06741151213645935, "eval_scitail-pairs-qa_runtime": 0.9458, "eval_scitail-pairs-qa_samples_per_second": 105.735, "eval_scitail-pairs-qa_steps_per_second": 4.229, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_scitail-pairs-pos_loss": 0.47680819034576416, "eval_scitail-pairs-pos_runtime": 1.4736, "eval_scitail-pairs-pos_samples_per_second": 67.859, "eval_scitail-pairs-pos_steps_per_second": 2.714, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_xsum-pairs_loss": 0.2572269141674042, "eval_xsum-pairs_runtime": 0.9448, "eval_xsum-pairs_samples_per_second": 105.847, "eval_xsum-pairs_steps_per_second": 4.234, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_compression-pairs_loss": 0.09604756534099579, "eval_compression-pairs_runtime": 0.2774, "eval_compression-pairs_samples_per_second": 360.554, "eval_compression-pairs_steps_per_second": 14.422, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_sciq_pairs_loss": 0.2735004425048828, "eval_sciq_pairs_runtime": 4.2103, "eval_sciq_pairs_samples_per_second": 23.751, "eval_sciq_pairs_steps_per_second": 0.95, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_qasc_pairs_loss": 0.1924300342798233, "eval_qasc_pairs_runtime": 1.1352, "eval_qasc_pairs_samples_per_second": 88.089, "eval_qasc_pairs_steps_per_second": 3.524, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_openbookqa_pairs_loss": 1.6290359497070312, "eval_openbookqa_pairs_runtime": 0.9392, "eval_openbookqa_pairs_samples_per_second": 106.476, "eval_openbookqa_pairs_steps_per_second": 4.259, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_msmarco_pairs_loss": 0.518312931060791, "eval_msmarco_pairs_runtime": 2.121, "eval_msmarco_pairs_samples_per_second": 47.147, "eval_msmarco_pairs_steps_per_second": 1.886, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_nq_pairs_loss": 0.3077375292778015, "eval_nq_pairs_runtime": 4.6617, "eval_nq_pairs_samples_per_second": 21.451, "eval_nq_pairs_steps_per_second": 0.858, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_trivia_pairs_loss": 0.8588294386863708, "eval_trivia_pairs_runtime": 6.6293, "eval_trivia_pairs_samples_per_second": 15.085, "eval_trivia_pairs_steps_per_second": 0.603, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_quora_pairs_loss": 0.07980062067508698, "eval_quora_pairs_runtime": 0.7261, "eval_quora_pairs_samples_per_second": 137.72, "eval_quora_pairs_steps_per_second": 5.509, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_gooaq_pairs_loss": 0.6570906043052673, "eval_gooaq_pairs_runtime": 1.5071, "eval_gooaq_pairs_samples_per_second": 66.352, "eval_gooaq_pairs_steps_per_second": 2.654, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_mrpc_pairs_loss": 0.051231566816568375, "eval_mrpc_pairs_runtime": 0.2799, "eval_mrpc_pairs_samples_per_second": 357.322, "eval_mrpc_pairs_steps_per_second": 14.293, "step": 2560 }, { "epoch": 0.40620592383638926, "grad_norm": 37.2639045715332, "learning_rate": 2.4699458065985813e-05, "loss": 0.9709, "step": 2592 }, { "epoch": 0.4112208117849867, "grad_norm": 15.363207817077637, "learning_rate": 2.45031637399988e-05, "loss": 0.708, "step": 2624 }, { "epoch": 0.4162356997335841, "grad_norm": 1.8831324577331543, "learning_rate": 2.430411227283978e-05, "loss": 0.4083, "step": 2656 }, { "epoch": 0.4212505876821815, "grad_norm": 5.664551734924316, "learning_rate": 2.4102361415065367e-05, "loss": 0.8732, "step": 2688 }, { "epoch": 0.4262654756307789, "grad_norm": 0.615675151348114, "learning_rate": 2.3897969700403022e-05, "loss": 1.2616, "step": 2720 }, { "epoch": 0.43128036357937627, "grad_norm": 19.81829261779785, "learning_rate": 2.3690996428768772e-05, "loss": 1.3324, "step": 2752 }, { "epoch": 0.43629525152797366, "grad_norm": 6.3363118171691895, "learning_rate": 2.348150164906257e-05, "loss": 0.6244, "step": 2784 }, { "epoch": 0.44131013947657105, "grad_norm": 1.103615641593933, "learning_rate": 2.3269546141746407e-05, "loss": 0.6176, "step": 2816 }, { "epoch": 0.44632502742516844, "grad_norm": 11.468894004821777, "learning_rate": 2.3055191401210126e-05, "loss": 0.6926, "step": 2848 }, { "epoch": 0.4513399153737659, "grad_norm": 4.0951619148254395, "learning_rate": 2.283849961793017e-05, "loss": 0.8158, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_nli-pairs_loss": 1.2103344202041626, "eval_nli-pairs_runtime": 3.656, "eval_nli-pairs_samples_per_second": 27.353, "eval_nli-pairs_steps_per_second": 1.094, "eval_sts-test_pearson_cosine": 0.7884135608823999, "eval_sts-test_pearson_dot": 0.5043809957478502, "eval_sts-test_pearson_euclidean": 0.73325296875941, "eval_sts-test_pearson_manhattan": 0.7274442771815695, "eval_sts-test_pearson_max": 0.7884135608823999, "eval_sts-test_spearman_cosine": 0.8024151272859597, "eval_sts-test_spearman_dot": 0.4849613226687463, "eval_sts-test_spearman_euclidean": 0.7267107319000072, "eval_sts-test_spearman_manhattan": 0.7238097600272174, "eval_sts-test_spearman_max": 0.8024151272859597, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_vitaminc-pairs_loss": 4.7560882568359375, "eval_vitaminc-pairs_runtime": 1.1898, "eval_vitaminc-pairs_samples_per_second": 71.438, "eval_vitaminc-pairs_steps_per_second": 2.521, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_sts-label_loss": 3.4280478954315186, "eval_sts-label_runtime": 0.2879, "eval_sts-label_samples_per_second": 347.303, "eval_sts-label_steps_per_second": 13.892, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_qnli-contrastive_loss": 0.1333482712507248, "eval_qnli-contrastive_runtime": 0.3658, "eval_qnli-contrastive_samples_per_second": 273.37, "eval_qnli-contrastive_steps_per_second": 10.935, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_scitail-pairs-qa_loss": 0.0703386664390564, "eval_scitail-pairs-qa_runtime": 0.8879, "eval_scitail-pairs-qa_samples_per_second": 112.63, "eval_scitail-pairs-qa_steps_per_second": 4.505, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_scitail-pairs-pos_loss": 0.4763020873069763, "eval_scitail-pairs-pos_runtime": 1.3239, "eval_scitail-pairs-pos_samples_per_second": 75.532, "eval_scitail-pairs-pos_steps_per_second": 3.021, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_xsum-pairs_loss": 0.25743284821510315, "eval_xsum-pairs_runtime": 0.9333, "eval_xsum-pairs_samples_per_second": 107.15, "eval_xsum-pairs_steps_per_second": 4.286, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_compression-pairs_loss": 0.09842805564403534, "eval_compression-pairs_runtime": 0.2944, "eval_compression-pairs_samples_per_second": 339.674, "eval_compression-pairs_steps_per_second": 13.587, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_sciq_pairs_loss": 0.28244778513908386, "eval_sciq_pairs_runtime": 4.0785, "eval_sciq_pairs_samples_per_second": 24.519, "eval_sciq_pairs_steps_per_second": 0.981, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_qasc_pairs_loss": 0.18051397800445557, "eval_qasc_pairs_runtime": 1.0561, "eval_qasc_pairs_samples_per_second": 94.69, "eval_qasc_pairs_steps_per_second": 3.788, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_openbookqa_pairs_loss": 1.5708725452423096, "eval_openbookqa_pairs_runtime": 0.9072, "eval_openbookqa_pairs_samples_per_second": 110.229, "eval_openbookqa_pairs_steps_per_second": 4.409, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_msmarco_pairs_loss": 0.5720314979553223, "eval_msmarco_pairs_runtime": 2.0694, "eval_msmarco_pairs_samples_per_second": 48.322, "eval_msmarco_pairs_steps_per_second": 1.933, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_nq_pairs_loss": 0.2748319208621979, "eval_nq_pairs_runtime": 4.5496, "eval_nq_pairs_samples_per_second": 21.98, "eval_nq_pairs_steps_per_second": 0.879, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_trivia_pairs_loss": 0.8936847448348999, "eval_trivia_pairs_runtime": 6.4784, "eval_trivia_pairs_samples_per_second": 15.436, "eval_trivia_pairs_steps_per_second": 0.617, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_quora_pairs_loss": 0.07990340888500214, "eval_quora_pairs_runtime": 0.6852, "eval_quora_pairs_samples_per_second": 145.945, "eval_quora_pairs_steps_per_second": 5.838, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_gooaq_pairs_loss": 0.6210995316505432, "eval_gooaq_pairs_runtime": 1.4234, "eval_gooaq_pairs_samples_per_second": 70.255, "eval_gooaq_pairs_steps_per_second": 2.81, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_mrpc_pairs_loss": 0.053870730102062225, "eval_mrpc_pairs_runtime": 0.2678, "eval_mrpc_pairs_samples_per_second": 373.436, "eval_mrpc_pairs_steps_per_second": 14.937, "step": 2880 }, { "epoch": 0.4563548033223633, "grad_norm": 0.5031663775444031, "learning_rate": 2.261953366042628e-05, "loss": 1.4753, "step": 2912 }, { "epoch": 0.46136969127096067, "grad_norm": 3.3404605388641357, "learning_rate": 2.239835705702158e-05, "loss": 0.5735, "step": 2944 }, { "epoch": 0.46638457921955806, "grad_norm": 14.60761547088623, "learning_rate": 2.217503397741115e-05, "loss": 1.2261, "step": 2976 }, { "epoch": 0.47139946716815545, "grad_norm": 0.7826951146125793, "learning_rate": 2.194962921404456e-05, "loss": 0.6085, "step": 3008 }, { "epoch": 0.47641435511675284, "grad_norm": 5.523419380187988, "learning_rate": 2.1722208163327738e-05, "loss": 0.8766, "step": 3040 }, { "epoch": 0.48142924306535023, "grad_norm": 1.2507153749465942, "learning_rate": 2.1492836806649564e-05, "loss": 1.1824, "step": 3072 }, { "epoch": 0.4864441310139477, "grad_norm": 10.76526165008545, "learning_rate": 2.1261581691238775e-05, "loss": 0.7192, "step": 3104 }, { "epoch": 0.49145901896254507, "grad_norm": 2.5375277996063232, "learning_rate": 2.1028509910856705e-05, "loss": 0.6131, "step": 3136 }, { "epoch": 0.49647390691114246, "grad_norm": 6.569655418395996, "learning_rate": 2.0793689086331472e-05, "loss": 0.7407, "step": 3168 }, { "epoch": 0.5014887948597399, "grad_norm": 0.42745527625083923, "learning_rate": 2.055718734593919e-05, "loss": 0.5857, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_nli-pairs_loss": 1.1431602239608765, "eval_nli-pairs_runtime": 3.6407, "eval_nli-pairs_samples_per_second": 27.467, "eval_nli-pairs_steps_per_second": 1.099, "eval_sts-test_pearson_cosine": 0.7838341260331343, "eval_sts-test_pearson_dot": 0.5274891201747137, "eval_sts-test_pearson_euclidean": 0.734987175544037, "eval_sts-test_pearson_manhattan": 0.7296263541205231, "eval_sts-test_pearson_max": 0.7838341260331343, "eval_sts-test_spearman_cosine": 0.8013224760849562, "eval_sts-test_spearman_dot": 0.5061225327907017, "eval_sts-test_spearman_euclidean": 0.7282525362996873, "eval_sts-test_spearman_manhattan": 0.7265322068183514, "eval_sts-test_spearman_max": 0.8013224760849562, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_vitaminc-pairs_loss": 4.748112201690674, "eval_vitaminc-pairs_runtime": 1.1378, "eval_vitaminc-pairs_samples_per_second": 74.706, "eval_vitaminc-pairs_steps_per_second": 2.637, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_sts-label_loss": 3.9402565956115723, "eval_sts-label_runtime": 0.2789, "eval_sts-label_samples_per_second": 358.596, "eval_sts-label_steps_per_second": 14.344, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_qnli-contrastive_loss": 0.10341227799654007, "eval_qnli-contrastive_runtime": 0.3605, "eval_qnli-contrastive_samples_per_second": 277.417, "eval_qnli-contrastive_steps_per_second": 11.097, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_scitail-pairs-qa_loss": 0.06673895567655563, "eval_scitail-pairs-qa_runtime": 0.8765, "eval_scitail-pairs-qa_samples_per_second": 114.092, "eval_scitail-pairs-qa_steps_per_second": 4.564, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_scitail-pairs-pos_loss": 0.510690450668335, "eval_scitail-pairs-pos_runtime": 1.3274, "eval_scitail-pairs-pos_samples_per_second": 75.334, "eval_scitail-pairs-pos_steps_per_second": 3.013, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_xsum-pairs_loss": 0.26573723554611206, "eval_xsum-pairs_runtime": 0.9342, "eval_xsum-pairs_samples_per_second": 107.047, "eval_xsum-pairs_steps_per_second": 4.282, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_compression-pairs_loss": 0.09096826612949371, "eval_compression-pairs_runtime": 0.2779, "eval_compression-pairs_samples_per_second": 359.804, "eval_compression-pairs_steps_per_second": 14.392, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_sciq_pairs_loss": 0.30787500739097595, "eval_sciq_pairs_runtime": 4.1007, "eval_sciq_pairs_samples_per_second": 24.386, "eval_sciq_pairs_steps_per_second": 0.975, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_qasc_pairs_loss": 0.1825849413871765, "eval_qasc_pairs_runtime": 1.0526, "eval_qasc_pairs_samples_per_second": 94.998, "eval_qasc_pairs_steps_per_second": 3.8, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_openbookqa_pairs_loss": 1.5945305824279785, "eval_openbookqa_pairs_runtime": 0.8948, "eval_openbookqa_pairs_samples_per_second": 111.759, "eval_openbookqa_pairs_steps_per_second": 4.47, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_msmarco_pairs_loss": 0.5864604711532593, "eval_msmarco_pairs_runtime": 2.0556, "eval_msmarco_pairs_samples_per_second": 48.646, "eval_msmarco_pairs_steps_per_second": 1.946, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_nq_pairs_loss": 0.2538978159427643, "eval_nq_pairs_runtime": 4.5409, "eval_nq_pairs_samples_per_second": 22.022, "eval_nq_pairs_steps_per_second": 0.881, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_trivia_pairs_loss": 0.8825237154960632, "eval_trivia_pairs_runtime": 6.4701, "eval_trivia_pairs_samples_per_second": 15.456, "eval_trivia_pairs_steps_per_second": 0.618, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_quora_pairs_loss": 0.06264814734458923, "eval_quora_pairs_runtime": 0.6792, "eval_quora_pairs_samples_per_second": 147.238, "eval_quora_pairs_steps_per_second": 5.89, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_gooaq_pairs_loss": 0.5953384041786194, "eval_gooaq_pairs_runtime": 1.4186, "eval_gooaq_pairs_samples_per_second": 70.49, "eval_gooaq_pairs_steps_per_second": 2.82, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_mrpc_pairs_loss": 0.05028616264462471, "eval_mrpc_pairs_runtime": 0.2664, "eval_mrpc_pairs_samples_per_second": 375.444, "eval_mrpc_pairs_steps_per_second": 15.018, "step": 3200 }, { "epoch": 0.5065036828083372, "grad_norm": 17.477581024169922, "learning_rate": 2.0319073305638035e-05, "loss": 0.6212, "step": 3232 }, { "epoch": 0.5115185707569346, "grad_norm": 15.705268859863281, "learning_rate": 2.0079416049160762e-05, "loss": 1.1408, "step": 3264 }, { "epoch": 0.516533458705532, "grad_norm": 15.518088340759277, "learning_rate": 1.983828510797154e-05, "loss": 0.6898, "step": 3296 }, { "epoch": 0.5215483466541294, "grad_norm": 18.28449058532715, "learning_rate": 1.9595750441092844e-05, "loss": 0.9827, "step": 3328 }, { "epoch": 0.5265632346027268, "grad_norm": 11.187614440917969, "learning_rate": 1.935188241480837e-05, "loss": 0.9518, "step": 3360 }, { "epoch": 0.5315781225513242, "grad_norm": 24.515199661254883, "learning_rate": 1.910675178224773e-05, "loss": 0.5584, "step": 3392 }, { "epoch": 0.5365930104999217, "grad_norm": 21.595224380493164, "learning_rate": 1.886042966285894e-05, "loss": 1.3362, "step": 3424 }, { "epoch": 0.5416078984485191, "grad_norm": 14.934494972229004, "learning_rate": 1.8612987521774603e-05, "loss": 0.4418, "step": 3456 }, { "epoch": 0.5466227863971165, "grad_norm": 1.0222537517547607, "learning_rate": 1.836449714907785e-05, "loss": 0.5896, "step": 3488 }, { "epoch": 0.5516376743457139, "grad_norm": 13.705151557922363, "learning_rate": 1.811503063897396e-05, "loss": 0.7951, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_nli-pairs_loss": 1.0016616582870483, "eval_nli-pairs_runtime": 3.6365, "eval_nli-pairs_samples_per_second": 27.499, "eval_nli-pairs_steps_per_second": 1.1, "eval_sts-test_pearson_cosine": 0.783269156461013, "eval_sts-test_pearson_dot": 0.5146760761775918, "eval_sts-test_pearson_euclidean": 0.7293244171224789, "eval_sts-test_pearson_manhattan": 0.722566066058283, "eval_sts-test_pearson_max": 0.783269156461013, "eval_sts-test_spearman_cosine": 0.800346163751739, "eval_sts-test_spearman_dot": 0.49134463318009686, "eval_sts-test_spearman_euclidean": 0.7220780456605193, "eval_sts-test_spearman_manhattan": 0.7185570530657137, "eval_sts-test_spearman_max": 0.800346163751739, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_vitaminc-pairs_loss": 4.628457546234131, "eval_vitaminc-pairs_runtime": 1.1358, "eval_vitaminc-pairs_samples_per_second": 74.837, "eval_vitaminc-pairs_steps_per_second": 2.641, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_sts-label_loss": 3.698469877243042, "eval_sts-label_runtime": 0.2763, "eval_sts-label_samples_per_second": 361.871, "eval_sts-label_steps_per_second": 14.475, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_qnli-contrastive_loss": 0.11857427656650543, "eval_qnli-contrastive_runtime": 0.3599, "eval_qnli-contrastive_samples_per_second": 277.865, "eval_qnli-contrastive_steps_per_second": 11.115, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_scitail-pairs-qa_loss": 0.06011494621634483, "eval_scitail-pairs-qa_runtime": 0.8855, "eval_scitail-pairs-qa_samples_per_second": 112.93, "eval_scitail-pairs-qa_steps_per_second": 4.517, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_scitail-pairs-pos_loss": 0.5179685950279236, "eval_scitail-pairs-pos_runtime": 1.3428, "eval_scitail-pairs-pos_samples_per_second": 74.469, "eval_scitail-pairs-pos_steps_per_second": 2.979, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_xsum-pairs_loss": 0.2575337886810303, "eval_xsum-pairs_runtime": 0.9362, "eval_xsum-pairs_samples_per_second": 106.81, "eval_xsum-pairs_steps_per_second": 4.272, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_compression-pairs_loss": 0.08986295014619827, "eval_compression-pairs_runtime": 0.2735, "eval_compression-pairs_samples_per_second": 365.659, "eval_compression-pairs_steps_per_second": 14.626, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_sciq_pairs_loss": 0.2898155748844147, "eval_sciq_pairs_runtime": 4.1009, "eval_sciq_pairs_samples_per_second": 24.385, "eval_sciq_pairs_steps_per_second": 0.975, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_qasc_pairs_loss": 0.1790761798620224, "eval_qasc_pairs_runtime": 1.0559, "eval_qasc_pairs_samples_per_second": 94.702, "eval_qasc_pairs_steps_per_second": 3.788, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_openbookqa_pairs_loss": 1.6558103561401367, "eval_openbookqa_pairs_runtime": 0.8846, "eval_openbookqa_pairs_samples_per_second": 113.048, "eval_openbookqa_pairs_steps_per_second": 4.522, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_msmarco_pairs_loss": 0.5547183156013489, "eval_msmarco_pairs_runtime": 2.0592, "eval_msmarco_pairs_samples_per_second": 48.563, "eval_msmarco_pairs_steps_per_second": 1.943, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_nq_pairs_loss": 0.24799224734306335, "eval_nq_pairs_runtime": 4.5115, "eval_nq_pairs_samples_per_second": 22.166, "eval_nq_pairs_steps_per_second": 0.887, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_trivia_pairs_loss": 0.9036693572998047, "eval_trivia_pairs_runtime": 6.5286, "eval_trivia_pairs_samples_per_second": 15.317, "eval_trivia_pairs_steps_per_second": 0.613, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_quora_pairs_loss": 0.05727443844079971, "eval_quora_pairs_runtime": 0.6763, "eval_quora_pairs_samples_per_second": 147.873, "eval_quora_pairs_steps_per_second": 5.915, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_gooaq_pairs_loss": 0.5602415800094604, "eval_gooaq_pairs_runtime": 1.4132, "eval_gooaq_pairs_samples_per_second": 70.759, "eval_gooaq_pairs_steps_per_second": 2.83, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_mrpc_pairs_loss": 0.04762456938624382, "eval_mrpc_pairs_runtime": 0.2648, "eval_mrpc_pairs_samples_per_second": 377.632, "eval_mrpc_pairs_steps_per_second": 15.105, "step": 3520 }, { "epoch": 0.5566525622943113, "grad_norm": 0.39285340905189514, "learning_rate": 1.7864660368873747e-05, "loss": 0.5201, "step": 3552 }, { "epoch": 0.5616674502429087, "grad_norm": 16.01999855041504, "learning_rate": 1.7613458978394786e-05, "loss": 0.6351, "step": 3584 }, { "epoch": 0.566682338191506, "grad_norm": 0.5487422347068787, "learning_rate": 1.7361499348286606e-05, "loss": 0.8652, "step": 3616 }, { "epoch": 0.5716972261401034, "grad_norm": 0.9249119758605957, "learning_rate": 1.710885457928585e-05, "loss": 0.6407, "step": 3648 }, { "epoch": 0.5767121140887008, "grad_norm": 6.578505992889404, "learning_rate": 1.6855597970907664e-05, "loss": 0.9435, "step": 3680 }, { "epoch": 0.5817270020372982, "grad_norm": 14.307022094726562, "learning_rate": 1.6601803000179394e-05, "loss": 0.9295, "step": 3712 }, { "epoch": 0.5867418899858956, "grad_norm": 16.091779708862305, "learning_rate": 1.6347543300322795e-05, "loss": 0.6829, "step": 3744 }, { "epoch": 0.591756777934493, "grad_norm": 29.058805465698242, "learning_rate": 1.6092892639390916e-05, "loss": 0.8683, "step": 3776 }, { "epoch": 0.5967716658830904, "grad_norm": 13.12238597869873, "learning_rate": 1.583792489886586e-05, "loss": 1.115, "step": 3808 }, { "epoch": 0.6017865538316878, "grad_norm": 11.606388092041016, "learning_rate": 1.558271405222362e-05, "loss": 1.0936, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_nli-pairs_loss": 0.9106074571609497, "eval_nli-pairs_runtime": 3.9467, "eval_nli-pairs_samples_per_second": 25.337, "eval_nli-pairs_steps_per_second": 1.013, "eval_sts-test_pearson_cosine": 0.7831915073063493, "eval_sts-test_pearson_dot": 0.51712727721244, "eval_sts-test_pearson_euclidean": 0.7355201142492419, "eval_sts-test_pearson_manhattan": 0.7299910115321456, "eval_sts-test_pearson_max": 0.7831915073063493, "eval_sts-test_spearman_cosine": 0.8005432620025132, "eval_sts-test_spearman_dot": 0.49466719400094655, "eval_sts-test_spearman_euclidean": 0.7273424991180402, "eval_sts-test_spearman_manhattan": 0.7249394934262583, "eval_sts-test_spearman_max": 0.8005432620025132, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_vitaminc-pairs_loss": 4.7559494972229, "eval_vitaminc-pairs_runtime": 1.1844, "eval_vitaminc-pairs_samples_per_second": 71.768, "eval_vitaminc-pairs_steps_per_second": 2.533, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_sts-label_loss": 3.46917724609375, "eval_sts-label_runtime": 0.3003, "eval_sts-label_samples_per_second": 333.048, "eval_sts-label_steps_per_second": 13.322, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_qnli-contrastive_loss": 0.13890141248703003, "eval_qnli-contrastive_runtime": 0.3729, "eval_qnli-contrastive_samples_per_second": 268.18, "eval_qnli-contrastive_steps_per_second": 10.727, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_scitail-pairs-qa_loss": 0.0611240416765213, "eval_scitail-pairs-qa_runtime": 0.9367, "eval_scitail-pairs-qa_samples_per_second": 106.755, "eval_scitail-pairs-qa_steps_per_second": 4.27, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_scitail-pairs-pos_loss": 0.46203696727752686, "eval_scitail-pairs-pos_runtime": 1.4874, "eval_scitail-pairs-pos_samples_per_second": 67.232, "eval_scitail-pairs-pos_steps_per_second": 2.689, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_xsum-pairs_loss": 0.24919259548187256, "eval_xsum-pairs_runtime": 0.9576, "eval_xsum-pairs_samples_per_second": 104.427, "eval_xsum-pairs_steps_per_second": 4.177, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_compression-pairs_loss": 0.08809012174606323, "eval_compression-pairs_runtime": 0.298, "eval_compression-pairs_samples_per_second": 335.567, "eval_compression-pairs_steps_per_second": 13.423, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_sciq_pairs_loss": 0.28287386894226074, "eval_sciq_pairs_runtime": 4.2668, "eval_sciq_pairs_samples_per_second": 23.437, "eval_sciq_pairs_steps_per_second": 0.937, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_qasc_pairs_loss": 0.1861308217048645, "eval_qasc_pairs_runtime": 1.0488, "eval_qasc_pairs_samples_per_second": 95.351, "eval_qasc_pairs_steps_per_second": 3.814, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_openbookqa_pairs_loss": 1.600982666015625, "eval_openbookqa_pairs_runtime": 0.9077, "eval_openbookqa_pairs_samples_per_second": 110.17, "eval_openbookqa_pairs_steps_per_second": 4.407, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_msmarco_pairs_loss": 0.5555463433265686, "eval_msmarco_pairs_runtime": 2.1064, "eval_msmarco_pairs_samples_per_second": 47.474, "eval_msmarco_pairs_steps_per_second": 1.899, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_nq_pairs_loss": 0.23241031169891357, "eval_nq_pairs_runtime": 4.6119, "eval_nq_pairs_samples_per_second": 21.683, "eval_nq_pairs_steps_per_second": 0.867, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_trivia_pairs_loss": 0.7936394214630127, "eval_trivia_pairs_runtime": 6.6242, "eval_trivia_pairs_samples_per_second": 15.096, "eval_trivia_pairs_steps_per_second": 0.604, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_quora_pairs_loss": 0.05936668440699577, "eval_quora_pairs_runtime": 0.7463, "eval_quora_pairs_samples_per_second": 133.994, "eval_quora_pairs_steps_per_second": 5.36, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_gooaq_pairs_loss": 0.5735708475112915, "eval_gooaq_pairs_runtime": 1.4747, "eval_gooaq_pairs_samples_per_second": 67.809, "eval_gooaq_pairs_steps_per_second": 2.712, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_mrpc_pairs_loss": 0.046309370547533035, "eval_mrpc_pairs_runtime": 0.2694, "eval_mrpc_pairs_samples_per_second": 371.218, "eval_mrpc_pairs_steps_per_second": 14.849, "step": 3840 }, { "epoch": 0.6068014417802852, "grad_norm": 6.513147830963135, "learning_rate": 1.53273341434723e-05, "loss": 0.8689, "step": 3872 }, { "epoch": 0.6118163297288827, "grad_norm": 0.2349071353673935, "learning_rate": 1.5071859265669756e-05, "loss": 0.8692, "step": 3904 }, { "epoch": 0.6168312176774801, "grad_norm": 18.028608322143555, "learning_rate": 1.4816363539427118e-05, "loss": 0.9083, "step": 3936 }, { "epoch": 0.6218461056260774, "grad_norm": 17.381690979003906, "learning_rate": 1.456092109140423e-05, "loss": 1.0782, "step": 3968 }, { "epoch": 0.6268609935746748, "grad_norm": 20.72548484802246, "learning_rate": 1.4305606032803418e-05, "loss": 0.7711, "step": 4000 }, { "epoch": 0.6318758815232722, "grad_norm": 28.311264038085938, "learning_rate": 1.4050492437867641e-05, "loss": 1.0005, "step": 4032 }, { "epoch": 0.6368907694718696, "grad_norm": 14.892809867858887, "learning_rate": 1.3795654322389481e-05, "loss": 0.7229, "step": 4064 }, { "epoch": 0.641905657420467, "grad_norm": 18.567630767822266, "learning_rate": 1.3541165622236977e-05, "loss": 0.4871, "step": 4096 }, { "epoch": 0.6469205453690644, "grad_norm": 8.814851760864258, "learning_rate": 1.3287100171902759e-05, "loss": 0.7853, "step": 4128 }, { "epoch": 0.6519354333176618, "grad_norm": 19.43486785888672, "learning_rate": 1.3033531683082495e-05, "loss": 0.9271, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_nli-pairs_loss": 0.8979966640472412, "eval_nli-pairs_runtime": 3.6341, "eval_nli-pairs_samples_per_second": 27.517, "eval_nli-pairs_steps_per_second": 1.101, "eval_sts-test_pearson_cosine": 0.786081877366483, "eval_sts-test_pearson_dot": 0.5354100918466089, "eval_sts-test_pearson_euclidean": 0.7368659505908834, "eval_sts-test_pearson_manhattan": 0.7310042183211231, "eval_sts-test_pearson_max": 0.786081877366483, "eval_sts-test_spearman_cosine": 0.8043456052578905, "eval_sts-test_spearman_dot": 0.5150264179790126, "eval_sts-test_spearman_euclidean": 0.7297811553069841, "eval_sts-test_spearman_manhattan": 0.7264172194761916, "eval_sts-test_spearman_max": 0.8043456052578905, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_vitaminc-pairs_loss": 4.720225811004639, "eval_vitaminc-pairs_runtime": 1.1487, "eval_vitaminc-pairs_samples_per_second": 73.995, "eval_vitaminc-pairs_steps_per_second": 2.612, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_sts-label_loss": 3.9553511142730713, "eval_sts-label_runtime": 0.2732, "eval_sts-label_samples_per_second": 366.049, "eval_sts-label_steps_per_second": 14.642, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_qnli-contrastive_loss": 0.14256399869918823, "eval_qnli-contrastive_runtime": 0.3558, "eval_qnli-contrastive_samples_per_second": 281.03, "eval_qnli-contrastive_steps_per_second": 11.241, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_scitail-pairs-qa_loss": 0.06135182082653046, "eval_scitail-pairs-qa_runtime": 0.8797, "eval_scitail-pairs-qa_samples_per_second": 113.67, "eval_scitail-pairs-qa_steps_per_second": 4.547, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_scitail-pairs-pos_loss": 0.42590686678886414, "eval_scitail-pairs-pos_runtime": 1.3288, "eval_scitail-pairs-pos_samples_per_second": 75.254, "eval_scitail-pairs-pos_steps_per_second": 3.01, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_xsum-pairs_loss": 0.2564789056777954, "eval_xsum-pairs_runtime": 0.9345, "eval_xsum-pairs_samples_per_second": 107.011, "eval_xsum-pairs_steps_per_second": 4.28, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_compression-pairs_loss": 0.08838170021772385, "eval_compression-pairs_runtime": 0.2761, "eval_compression-pairs_samples_per_second": 362.144, "eval_compression-pairs_steps_per_second": 14.486, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_sciq_pairs_loss": 0.2946786880493164, "eval_sciq_pairs_runtime": 4.076, "eval_sciq_pairs_samples_per_second": 24.534, "eval_sciq_pairs_steps_per_second": 0.981, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_qasc_pairs_loss": 0.17502914369106293, "eval_qasc_pairs_runtime": 1.0723, "eval_qasc_pairs_samples_per_second": 93.259, "eval_qasc_pairs_steps_per_second": 3.73, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_openbookqa_pairs_loss": 1.5555152893066406, "eval_openbookqa_pairs_runtime": 0.8973, "eval_openbookqa_pairs_samples_per_second": 111.451, "eval_openbookqa_pairs_steps_per_second": 4.458, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_msmarco_pairs_loss": 0.5041812062263489, "eval_msmarco_pairs_runtime": 2.0593, "eval_msmarco_pairs_samples_per_second": 48.56, "eval_msmarco_pairs_steps_per_second": 1.942, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_nq_pairs_loss": 0.24564537405967712, "eval_nq_pairs_runtime": 4.527, "eval_nq_pairs_samples_per_second": 22.09, "eval_nq_pairs_steps_per_second": 0.884, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_trivia_pairs_loss": 0.8565467000007629, "eval_trivia_pairs_runtime": 6.4751, "eval_trivia_pairs_samples_per_second": 15.444, "eval_trivia_pairs_steps_per_second": 0.618, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_quora_pairs_loss": 0.052645713090896606, "eval_quora_pairs_runtime": 0.6803, "eval_quora_pairs_samples_per_second": 146.985, "eval_quora_pairs_steps_per_second": 5.879, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_gooaq_pairs_loss": 0.5815556645393372, "eval_gooaq_pairs_runtime": 1.3985, "eval_gooaq_pairs_samples_per_second": 71.504, "eval_gooaq_pairs_steps_per_second": 2.86, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_mrpc_pairs_loss": 0.047052089124917984, "eval_mrpc_pairs_runtime": 0.2602, "eval_mrpc_pairs_samples_per_second": 384.349, "eval_mrpc_pairs_steps_per_second": 15.374, "step": 4160 }, { "epoch": 0.6569503212662592, "grad_norm": 21.91355323791504, "learning_rate": 1.2780533723289014e-05, "loss": 0.5223, "step": 4192 }, { "epoch": 0.6619652092148566, "grad_norm": 9.792081832885742, "learning_rate": 1.2528179694508286e-05, "loss": 1.0498, "step": 4224 }, { "epoch": 0.666980097163454, "grad_norm": 6.606201648712158, "learning_rate": 1.2276542811903345e-05, "loss": 0.6791, "step": 4256 }, { "epoch": 0.6719949851120514, "grad_norm": 16.744705200195312, "learning_rate": 1.2025696082572509e-05, "loss": 0.8836, "step": 4288 }, { "epoch": 0.6770098730606487, "grad_norm": 8.791626930236816, "learning_rate": 1.1775712284367882e-05, "loss": 0.6035, "step": 4320 }, { "epoch": 0.6820247610092462, "grad_norm": 1.067271113395691, "learning_rate": 1.152666394478045e-05, "loss": 0.5167, "step": 4352 }, { "epoch": 0.6870396489578436, "grad_norm": 7.685211181640625, "learning_rate": 1.1286358620301126e-05, "loss": 0.981, "step": 4384 }, { "epoch": 0.692054536906441, "grad_norm": 19.07784652709961, "learning_rate": 1.10393628476565e-05, "loss": 0.4873, "step": 4416 }, { "epoch": 0.6970694248550384, "grad_norm": 1.4715958833694458, "learning_rate": 1.0793516169782712e-05, "loss": 0.4762, "step": 4448 }, { "epoch": 0.7020843128036358, "grad_norm": 14.572600364685059, "learning_rate": 1.0548889913873123e-05, "loss": 0.8201, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_nli-pairs_loss": 0.8704043626785278, "eval_nli-pairs_runtime": 3.6418, "eval_nli-pairs_samples_per_second": 27.459, "eval_nli-pairs_steps_per_second": 1.098, "eval_sts-test_pearson_cosine": 0.7871366351762351, "eval_sts-test_pearson_dot": 0.520292802271069, "eval_sts-test_pearson_euclidean": 0.7358991589918665, "eval_sts-test_pearson_manhattan": 0.7306487678482384, "eval_sts-test_pearson_max": 0.7871366351762351, "eval_sts-test_spearman_cosine": 0.8043053229220561, "eval_sts-test_spearman_dot": 0.500924984433136, "eval_sts-test_spearman_euclidean": 0.7279966902078664, "eval_sts-test_spearman_manhattan": 0.7254635738312362, "eval_sts-test_spearman_max": 0.8043053229220561, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_vitaminc-pairs_loss": 4.733531475067139, "eval_vitaminc-pairs_runtime": 1.1524, "eval_vitaminc-pairs_samples_per_second": 73.759, "eval_vitaminc-pairs_steps_per_second": 2.603, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_sts-label_loss": 3.589179515838623, "eval_sts-label_runtime": 0.2802, "eval_sts-label_samples_per_second": 356.831, "eval_sts-label_steps_per_second": 14.273, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_qnli-contrastive_loss": 0.11559023708105087, "eval_qnli-contrastive_runtime": 0.3803, "eval_qnli-contrastive_samples_per_second": 262.956, "eval_qnli-contrastive_steps_per_second": 10.518, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_scitail-pairs-qa_loss": 0.05958002060651779, "eval_scitail-pairs-qa_runtime": 0.9171, "eval_scitail-pairs-qa_samples_per_second": 109.042, "eval_scitail-pairs-qa_steps_per_second": 4.362, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_scitail-pairs-pos_loss": 0.43254122138023376, "eval_scitail-pairs-pos_runtime": 1.3676, "eval_scitail-pairs-pos_samples_per_second": 73.118, "eval_scitail-pairs-pos_steps_per_second": 2.925, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_xsum-pairs_loss": 0.248906210064888, "eval_xsum-pairs_runtime": 0.9364, "eval_xsum-pairs_samples_per_second": 106.797, "eval_xsum-pairs_steps_per_second": 4.272, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_compression-pairs_loss": 0.08712127059698105, "eval_compression-pairs_runtime": 0.2771, "eval_compression-pairs_samples_per_second": 360.923, "eval_compression-pairs_steps_per_second": 14.437, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_sciq_pairs_loss": 0.2863478362560272, "eval_sciq_pairs_runtime": 4.1006, "eval_sciq_pairs_samples_per_second": 24.386, "eval_sciq_pairs_steps_per_second": 0.975, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_qasc_pairs_loss": 0.17710347473621368, "eval_qasc_pairs_runtime": 1.0521, "eval_qasc_pairs_samples_per_second": 95.051, "eval_qasc_pairs_steps_per_second": 3.802, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_openbookqa_pairs_loss": 1.5271464586257935, "eval_openbookqa_pairs_runtime": 0.8986, "eval_openbookqa_pairs_samples_per_second": 111.286, "eval_openbookqa_pairs_steps_per_second": 4.451, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_msmarco_pairs_loss": 0.5346755385398865, "eval_msmarco_pairs_runtime": 2.0827, "eval_msmarco_pairs_samples_per_second": 48.014, "eval_msmarco_pairs_steps_per_second": 1.921, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_nq_pairs_loss": 0.24830152094364166, "eval_nq_pairs_runtime": 4.5025, "eval_nq_pairs_samples_per_second": 22.21, "eval_nq_pairs_steps_per_second": 0.888, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_trivia_pairs_loss": 0.799673318862915, "eval_trivia_pairs_runtime": 6.4664, "eval_trivia_pairs_samples_per_second": 15.465, "eval_trivia_pairs_steps_per_second": 0.619, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_quora_pairs_loss": 0.030656050890684128, "eval_quora_pairs_runtime": 0.6818, "eval_quora_pairs_samples_per_second": 146.669, "eval_quora_pairs_steps_per_second": 5.867, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_gooaq_pairs_loss": 0.5036634802818298, "eval_gooaq_pairs_runtime": 1.4051, "eval_gooaq_pairs_samples_per_second": 71.169, "eval_gooaq_pairs_steps_per_second": 2.847, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_mrpc_pairs_loss": 0.04455450549721718, "eval_mrpc_pairs_runtime": 0.2642, "eval_mrpc_pairs_samples_per_second": 378.478, "eval_mrpc_pairs_steps_per_second": 15.139, "step": 4480 }, { "epoch": 0.7070992007522332, "grad_norm": 15.19054889678955, "learning_rate": 1.030555505304156e-05, "loss": 0.7799, "step": 4512 }, { "epoch": 0.7121140887008306, "grad_norm": 16.065160751342773, "learning_rate": 1.0063582185731009e-05, "loss": 0.8006, "step": 4544 }, { "epoch": 0.717128976649428, "grad_norm": 3.2584469318389893, "learning_rate": 9.823041515230937e-06, "loss": 0.5123, "step": 4576 }, { "epoch": 0.7221438645980254, "grad_norm": 2.2951438426971436, "learning_rate": 9.584002829309324e-06, "loss": 0.7421, "step": 4608 }, { "epoch": 0.7271587525466228, "grad_norm": 21.291872024536133, "learning_rate": 9.346535479965231e-06, "loss": 0.9477, "step": 4640 }, { "epoch": 0.7321736404952202, "grad_norm": 4.785529613494873, "learning_rate": 9.11070836330775e-06, "loss": 0.5021, "step": 4672 }, { "epoch": 0.7371885284438175, "grad_norm": 1.7058138847351074, "learning_rate": 8.876589899567312e-06, "loss": 0.931, "step": 4704 }, { "epoch": 0.7422034163924149, "grad_norm": 9.1055326461792, "learning_rate": 8.644248013244963e-06, "loss": 0.7777, "step": 4736 }, { "epoch": 0.7472183043410123, "grad_norm": 3.6529128551483154, "learning_rate": 8.413750113405556e-06, "loss": 0.9462, "step": 4768 }, { "epoch": 0.7522331922896098, "grad_norm": 0.5643049478530884, "learning_rate": 8.185163074120399e-06, "loss": 0.5846, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_nli-pairs_loss": 0.9291799664497375, "eval_nli-pairs_runtime": 3.7498, "eval_nli-pairs_samples_per_second": 26.668, "eval_nli-pairs_steps_per_second": 1.067, "eval_sts-test_pearson_cosine": 0.7855324842750789, "eval_sts-test_pearson_dot": 0.5242204261314407, "eval_sts-test_pearson_euclidean": 0.7349702751512333, "eval_sts-test_pearson_manhattan": 0.7293454465410049, "eval_sts-test_pearson_max": 0.7855324842750789, "eval_sts-test_spearman_cosine": 0.8044211074352633, "eval_sts-test_spearman_dot": 0.5021807579050959, "eval_sts-test_spearman_euclidean": 0.7270456124616013, "eval_sts-test_spearman_manhattan": 0.7246691951731193, "eval_sts-test_spearman_max": 0.8044211074352633, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_vitaminc-pairs_loss": 4.687094688415527, "eval_vitaminc-pairs_runtime": 1.1386, "eval_vitaminc-pairs_samples_per_second": 74.654, "eval_vitaminc-pairs_steps_per_second": 2.635, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_sts-label_loss": 3.8013510704040527, "eval_sts-label_runtime": 0.2716, "eval_sts-label_samples_per_second": 368.125, "eval_sts-label_steps_per_second": 14.725, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_qnli-contrastive_loss": 0.1414812207221985, "eval_qnli-contrastive_runtime": 0.3601, "eval_qnli-contrastive_samples_per_second": 277.73, "eval_qnli-contrastive_steps_per_second": 11.109, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_scitail-pairs-qa_loss": 0.05851547792553902, "eval_scitail-pairs-qa_runtime": 0.8864, "eval_scitail-pairs-qa_samples_per_second": 112.817, "eval_scitail-pairs-qa_steps_per_second": 4.513, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_scitail-pairs-pos_loss": 0.4562886357307434, "eval_scitail-pairs-pos_runtime": 1.3535, "eval_scitail-pairs-pos_samples_per_second": 73.88, "eval_scitail-pairs-pos_steps_per_second": 2.955, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_xsum-pairs_loss": 0.23483119904994965, "eval_xsum-pairs_runtime": 0.9336, "eval_xsum-pairs_samples_per_second": 107.109, "eval_xsum-pairs_steps_per_second": 4.284, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_compression-pairs_loss": 0.08680214732885361, "eval_compression-pairs_runtime": 0.2716, "eval_compression-pairs_samples_per_second": 368.254, "eval_compression-pairs_steps_per_second": 14.73, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_sciq_pairs_loss": 0.2816057801246643, "eval_sciq_pairs_runtime": 4.0742, "eval_sciq_pairs_samples_per_second": 24.545, "eval_sciq_pairs_steps_per_second": 0.982, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_qasc_pairs_loss": 0.17035560309886932, "eval_qasc_pairs_runtime": 1.0717, "eval_qasc_pairs_samples_per_second": 93.311, "eval_qasc_pairs_steps_per_second": 3.732, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_openbookqa_pairs_loss": 1.5671054124832153, "eval_openbookqa_pairs_runtime": 0.8973, "eval_openbookqa_pairs_samples_per_second": 111.441, "eval_openbookqa_pairs_steps_per_second": 4.458, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_msmarco_pairs_loss": 0.5062486529350281, "eval_msmarco_pairs_runtime": 2.0609, "eval_msmarco_pairs_samples_per_second": 48.524, "eval_msmarco_pairs_steps_per_second": 1.941, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_nq_pairs_loss": 0.22875532507896423, "eval_nq_pairs_runtime": 4.5041, "eval_nq_pairs_samples_per_second": 22.202, "eval_nq_pairs_steps_per_second": 0.888, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_trivia_pairs_loss": 0.8119627237319946, "eval_trivia_pairs_runtime": 6.4609, "eval_trivia_pairs_samples_per_second": 15.478, "eval_trivia_pairs_steps_per_second": 0.619, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_quora_pairs_loss": 0.06211049482226372, "eval_quora_pairs_runtime": 0.6765, "eval_quora_pairs_samples_per_second": 147.827, "eval_quora_pairs_steps_per_second": 5.913, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_gooaq_pairs_loss": 0.4847571551799774, "eval_gooaq_pairs_runtime": 1.3911, "eval_gooaq_pairs_samples_per_second": 71.886, "eval_gooaq_pairs_steps_per_second": 2.875, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_mrpc_pairs_loss": 0.04384278133511543, "eval_mrpc_pairs_runtime": 0.2617, "eval_mrpc_pairs_samples_per_second": 382.146, "eval_mrpc_pairs_steps_per_second": 15.286, "step": 4800 }, { "epoch": 0.7572480802382072, "grad_norm": 14.555929183959961, "learning_rate": 7.958553215065208e-06, "loss": 0.6735, "step": 4832 }, { "epoch": 0.7622629681868046, "grad_norm": 10.30207347869873, "learning_rate": 7.733986282278816e-06, "loss": 1.1569, "step": 4864 }, { "epoch": 0.767277856135402, "grad_norm": 17.255786895751953, "learning_rate": 7.511527429088396e-06, "loss": 0.9749, "step": 4896 }, { "epoch": 0.7722927440839994, "grad_norm": 14.730864524841309, "learning_rate": 7.291241197206574e-06, "loss": 0.6581, "step": 4928 }, { "epoch": 0.7773076320325968, "grad_norm": 8.807291984558105, "learning_rate": 7.07319149800605e-06, "loss": 0.6979, "step": 4960 }, { "epoch": 0.7823225199811942, "grad_norm": 0.6080070734024048, "learning_rate": 6.857441593977046e-06, "loss": 0.7582, "step": 4992 }, { "epoch": 0.7873374079297916, "grad_norm": 2.2002525329589844, "learning_rate": 6.6440540803730425e-06, "loss": 1.0082, "step": 5024 }, { "epoch": 0.792352295878389, "grad_norm": 8.624346733093262, "learning_rate": 6.433090867050122e-06, "loss": 0.6206, "step": 5056 }, { "epoch": 0.7973671838269863, "grad_norm": 0.9821205139160156, "learning_rate": 6.224613160505094e-06, "loss": 0.5165, "step": 5088 }, { "epoch": 0.8023820717755837, "grad_norm": 4.104696750640869, "learning_rate": 6.018681446117773e-06, "loss": 0.4914, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_nli-pairs_loss": 0.8841198682785034, "eval_nli-pairs_runtime": 4.1793, "eval_nli-pairs_samples_per_second": 23.928, "eval_nli-pairs_steps_per_second": 0.957, "eval_sts-test_pearson_cosine": 0.7866468635321827, "eval_sts-test_pearson_dot": 0.5124924570863083, "eval_sts-test_pearson_euclidean": 0.7320768163626257, "eval_sts-test_pearson_manhattan": 0.7266238528084388, "eval_sts-test_pearson_max": 0.7866468635321827, "eval_sts-test_spearman_cosine": 0.8041619306345255, "eval_sts-test_spearman_dot": 0.4913316974763461, "eval_sts-test_spearman_euclidean": 0.7232005770314757, "eval_sts-test_spearman_manhattan": 0.7207683852583252, "eval_sts-test_spearman_max": 0.8041619306345255, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_vitaminc-pairs_loss": 4.725103855133057, "eval_vitaminc-pairs_runtime": 1.2146, "eval_vitaminc-pairs_samples_per_second": 69.982, "eval_vitaminc-pairs_steps_per_second": 2.47, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_sts-label_loss": 3.6535470485687256, "eval_sts-label_runtime": 0.3164, "eval_sts-label_samples_per_second": 316.056, "eval_sts-label_steps_per_second": 12.642, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_qnli-contrastive_loss": 0.10529302805662155, "eval_qnli-contrastive_runtime": 0.368, "eval_qnli-contrastive_samples_per_second": 271.711, "eval_qnli-contrastive_steps_per_second": 10.868, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_scitail-pairs-qa_loss": 0.05555274337530136, "eval_scitail-pairs-qa_runtime": 0.9542, "eval_scitail-pairs-qa_samples_per_second": 104.795, "eval_scitail-pairs-qa_steps_per_second": 4.192, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_scitail-pairs-pos_loss": 0.4785614013671875, "eval_scitail-pairs-pos_runtime": 1.4937, "eval_scitail-pairs-pos_samples_per_second": 66.949, "eval_scitail-pairs-pos_steps_per_second": 2.678, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_xsum-pairs_loss": 0.2355932593345642, "eval_xsum-pairs_runtime": 0.9396, "eval_xsum-pairs_samples_per_second": 106.432, "eval_xsum-pairs_steps_per_second": 4.257, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_compression-pairs_loss": 0.083825021982193, "eval_compression-pairs_runtime": 0.2789, "eval_compression-pairs_samples_per_second": 358.564, "eval_compression-pairs_steps_per_second": 14.343, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_sciq_pairs_loss": 0.28157705068588257, "eval_sciq_pairs_runtime": 4.1947, "eval_sciq_pairs_samples_per_second": 23.84, "eval_sciq_pairs_steps_per_second": 0.954, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_qasc_pairs_loss": 0.1739024668931961, "eval_qasc_pairs_runtime": 1.1277, "eval_qasc_pairs_samples_per_second": 88.676, "eval_qasc_pairs_steps_per_second": 3.547, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_openbookqa_pairs_loss": 1.591935396194458, "eval_openbookqa_pairs_runtime": 1.0022, "eval_openbookqa_pairs_samples_per_second": 99.782, "eval_openbookqa_pairs_steps_per_second": 3.991, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_msmarco_pairs_loss": 0.5132349133491516, "eval_msmarco_pairs_runtime": 2.1322, "eval_msmarco_pairs_samples_per_second": 46.901, "eval_msmarco_pairs_steps_per_second": 1.876, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_nq_pairs_loss": 0.2343132346868515, "eval_nq_pairs_runtime": 4.5529, "eval_nq_pairs_samples_per_second": 21.964, "eval_nq_pairs_steps_per_second": 0.879, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_trivia_pairs_loss": 0.7988561987876892, "eval_trivia_pairs_runtime": 6.5661, "eval_trivia_pairs_samples_per_second": 15.23, "eval_trivia_pairs_steps_per_second": 0.609, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_quora_pairs_loss": 0.05578049644827843, "eval_quora_pairs_runtime": 0.8028, "eval_quora_pairs_samples_per_second": 124.564, "eval_quora_pairs_steps_per_second": 4.983, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_gooaq_pairs_loss": 0.48901888728141785, "eval_gooaq_pairs_runtime": 1.5605, "eval_gooaq_pairs_samples_per_second": 64.082, "eval_gooaq_pairs_steps_per_second": 2.563, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_mrpc_pairs_loss": 0.04172317683696747, "eval_mrpc_pairs_runtime": 0.2628, "eval_mrpc_pairs_samples_per_second": 380.505, "eval_mrpc_pairs_steps_per_second": 15.22, "step": 5120 }, { "epoch": 0.8073969597241811, "grad_norm": 9.413043022155762, "learning_rate": 5.815355470602388e-06, "loss": 1.098, "step": 5152 }, { "epoch": 0.8124118476727785, "grad_norm": 0.25412222743034363, "learning_rate": 5.614694224673387e-06, "loss": 0.821, "step": 5184 }, { "epoch": 0.8174267356213759, "grad_norm": 18.76092529296875, "learning_rate": 5.416755925930494e-06, "loss": 0.9351, "step": 5216 }, { "epoch": 0.8224416235699734, "grad_norm": 19.607337951660156, "learning_rate": 5.221598001968132e-06, "loss": 0.8784, "step": 5248 }, { "epoch": 0.8274565115185708, "grad_norm": 3.2164149284362793, "learning_rate": 5.029277073714009e-06, "loss": 0.8326, "step": 5280 }, { "epoch": 0.8324713994671682, "grad_norm": 11.156713485717773, "learning_rate": 4.839848939001789e-06, "loss": 0.7551, "step": 5312 }, { "epoch": 0.8374862874157656, "grad_norm": 8.80623722076416, "learning_rate": 4.653368556382492e-06, "loss": 0.8234, "step": 5344 }, { "epoch": 0.842501175364363, "grad_norm": 16.081491470336914, "learning_rate": 4.469890029179472e-06, "loss": 1.0922, "step": 5376 }, { "epoch": 0.8475160633129604, "grad_norm": 0.8583326935768127, "learning_rate": 4.2894665897914794e-06, "loss": 1.0925, "step": 5408 }, { "epoch": 0.8525309512615578, "grad_norm": 7.903942108154297, "learning_rate": 4.112150584248388e-06, "loss": 1.099, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_nli-pairs_loss": 0.8909263014793396, "eval_nli-pairs_runtime": 3.6329, "eval_nli-pairs_samples_per_second": 27.526, "eval_nli-pairs_steps_per_second": 1.101, "eval_sts-test_pearson_cosine": 0.7892673589571536, "eval_sts-test_pearson_dot": 0.5308666684424199, "eval_sts-test_pearson_euclidean": 0.7372214599353599, "eval_sts-test_pearson_manhattan": 0.73149442324126, "eval_sts-test_pearson_max": 0.7892673589571536, "eval_sts-test_spearman_cosine": 0.8088174691107087, "eval_sts-test_spearman_dot": 0.5097841799376374, "eval_sts-test_spearman_euclidean": 0.7291099552995026, "eval_sts-test_spearman_manhattan": 0.7255023946868168, "eval_sts-test_spearman_max": 0.8088174691107087, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_vitaminc-pairs_loss": 4.696901798248291, "eval_vitaminc-pairs_runtime": 1.13, "eval_vitaminc-pairs_samples_per_second": 75.219, "eval_vitaminc-pairs_steps_per_second": 2.655, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_sts-label_loss": 3.794584274291992, "eval_sts-label_runtime": 0.2757, "eval_sts-label_samples_per_second": 362.777, "eval_sts-label_steps_per_second": 14.511, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_qnli-contrastive_loss": 0.1291896551847458, "eval_qnli-contrastive_runtime": 0.3577, "eval_qnli-contrastive_samples_per_second": 279.536, "eval_qnli-contrastive_steps_per_second": 11.181, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_scitail-pairs-qa_loss": 0.05729294940829277, "eval_scitail-pairs-qa_runtime": 0.8757, "eval_scitail-pairs-qa_samples_per_second": 114.199, "eval_scitail-pairs-qa_steps_per_second": 4.568, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_scitail-pairs-pos_loss": 0.47140783071517944, "eval_scitail-pairs-pos_runtime": 1.3328, "eval_scitail-pairs-pos_samples_per_second": 75.031, "eval_scitail-pairs-pos_steps_per_second": 3.001, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_xsum-pairs_loss": 0.2317724972963333, "eval_xsum-pairs_runtime": 0.934, "eval_xsum-pairs_samples_per_second": 107.065, "eval_xsum-pairs_steps_per_second": 4.283, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_compression-pairs_loss": 0.0849599540233612, "eval_compression-pairs_runtime": 0.2772, "eval_compression-pairs_samples_per_second": 360.752, "eval_compression-pairs_steps_per_second": 14.43, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_sciq_pairs_loss": 0.2746911346912384, "eval_sciq_pairs_runtime": 4.0398, "eval_sciq_pairs_samples_per_second": 24.754, "eval_sciq_pairs_steps_per_second": 0.99, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_qasc_pairs_loss": 0.16956950724124908, "eval_qasc_pairs_runtime": 1.0682, "eval_qasc_pairs_samples_per_second": 93.615, "eval_qasc_pairs_steps_per_second": 3.745, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_openbookqa_pairs_loss": 1.5424996614456177, "eval_openbookqa_pairs_runtime": 0.8928, "eval_openbookqa_pairs_samples_per_second": 112.006, "eval_openbookqa_pairs_steps_per_second": 4.48, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_msmarco_pairs_loss": 0.5047981142997742, "eval_msmarco_pairs_runtime": 2.0436, "eval_msmarco_pairs_samples_per_second": 48.932, "eval_msmarco_pairs_steps_per_second": 1.957, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_nq_pairs_loss": 0.230237677693367, "eval_nq_pairs_runtime": 4.5251, "eval_nq_pairs_samples_per_second": 22.099, "eval_nq_pairs_steps_per_second": 0.884, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_trivia_pairs_loss": 0.7567735314369202, "eval_trivia_pairs_runtime": 6.4545, "eval_trivia_pairs_samples_per_second": 15.493, "eval_trivia_pairs_steps_per_second": 0.62, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_quora_pairs_loss": 0.05753583088517189, "eval_quora_pairs_runtime": 0.6769, "eval_quora_pairs_samples_per_second": 147.736, "eval_quora_pairs_steps_per_second": 5.909, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_gooaq_pairs_loss": 0.49448758363723755, "eval_gooaq_pairs_runtime": 1.3984, "eval_gooaq_pairs_samples_per_second": 71.51, "eval_gooaq_pairs_steps_per_second": 2.86, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_mrpc_pairs_loss": 0.04384453222155571, "eval_mrpc_pairs_runtime": 0.2653, "eval_mrpc_pairs_samples_per_second": 376.996, "eval_mrpc_pairs_steps_per_second": 15.08, "step": 5440 }, { "epoch": 0.8575458392101551, "grad_norm": 0.8697513341903687, "learning_rate": 3.93799345702415e-06, "loss": 0.5396, "step": 5472 }, { "epoch": 0.8625607271587525, "grad_norm": 8.337197303771973, "learning_rate": 3.7670457361112116e-06, "loss": 0.6636, "step": 5504 }, { "epoch": 0.8675756151073499, "grad_norm": 0.3655373156070709, "learning_rate": 3.5993570183609596e-06, "loss": 1.0095, "step": 5536 }, { "epoch": 0.8725905030559473, "grad_norm": 13.748374938964844, "learning_rate": 3.4349759550941933e-06, "loss": 0.631, "step": 5568 }, { "epoch": 0.8776053910045447, "grad_norm": 15.683762550354004, "learning_rate": 3.273950237986013e-06, "loss": 0.5415, "step": 5600 }, { "epoch": 0.8826202789531421, "grad_norm": 10.004467964172363, "learning_rate": 3.11632658522906e-06, "loss": 0.9227, "step": 5632 }, { "epoch": 0.8876351669017395, "grad_norm": 12.990907669067383, "learning_rate": 2.9621507279792564e-06, "loss": 0.8991, "step": 5664 }, { "epoch": 0.8926500548503369, "grad_norm": 0.4619373679161072, "learning_rate": 2.8114673970878584e-06, "loss": 0.5068, "step": 5696 }, { "epoch": 0.8976649427989344, "grad_norm": 8.317788124084473, "learning_rate": 2.664320310123768e-06, "loss": 1.2134, "step": 5728 }, { "epoch": 0.9026798307475318, "grad_norm": 0.38993319869041443, "learning_rate": 2.5207521586897876e-06, "loss": 0.4651, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_nli-pairs_loss": 0.8765493631362915, "eval_nli-pairs_runtime": 3.6164, "eval_nli-pairs_samples_per_second": 27.652, "eval_nli-pairs_steps_per_second": 1.106, "eval_sts-test_pearson_cosine": 0.7880147168961996, "eval_sts-test_pearson_dot": 0.5198107156003906, "eval_sts-test_pearson_euclidean": 0.7362840264051249, "eval_sts-test_pearson_manhattan": 0.7307716823389564, "eval_sts-test_pearson_max": 0.7880147168961996, "eval_sts-test_spearman_cosine": 0.8071394355093185, "eval_sts-test_spearman_dot": 0.49865317522814645, "eval_sts-test_spearman_euclidean": 0.7278395467197664, "eval_sts-test_spearman_manhattan": 0.7246934378777047, "eval_sts-test_spearman_max": 0.8071394355093185, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_vitaminc-pairs_loss": 4.717629432678223, "eval_vitaminc-pairs_runtime": 1.1248, "eval_vitaminc-pairs_samples_per_second": 75.571, "eval_vitaminc-pairs_steps_per_second": 2.667, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_sts-label_loss": 3.7598328590393066, "eval_sts-label_runtime": 0.2743, "eval_sts-label_samples_per_second": 364.548, "eval_sts-label_steps_per_second": 14.582, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_qnli-contrastive_loss": 0.11829647421836853, "eval_qnli-contrastive_runtime": 0.3606, "eval_qnli-contrastive_samples_per_second": 277.334, "eval_qnli-contrastive_steps_per_second": 11.093, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_scitail-pairs-qa_loss": 0.05503571406006813, "eval_scitail-pairs-qa_runtime": 0.874, "eval_scitail-pairs-qa_samples_per_second": 114.411, "eval_scitail-pairs-qa_steps_per_second": 4.576, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_scitail-pairs-pos_loss": 0.47530597448349, "eval_scitail-pairs-pos_runtime": 1.3429, "eval_scitail-pairs-pos_samples_per_second": 74.463, "eval_scitail-pairs-pos_steps_per_second": 2.979, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_xsum-pairs_loss": 0.22936196625232697, "eval_xsum-pairs_runtime": 0.9431, "eval_xsum-pairs_samples_per_second": 106.028, "eval_xsum-pairs_steps_per_second": 4.241, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_compression-pairs_loss": 0.08313465863466263, "eval_compression-pairs_runtime": 0.2781, "eval_compression-pairs_samples_per_second": 359.542, "eval_compression-pairs_steps_per_second": 14.382, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_sciq_pairs_loss": 0.27646955847740173, "eval_sciq_pairs_runtime": 4.0554, "eval_sciq_pairs_samples_per_second": 24.658, "eval_sciq_pairs_steps_per_second": 0.986, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_qasc_pairs_loss": 0.17006540298461914, "eval_qasc_pairs_runtime": 1.0538, "eval_qasc_pairs_samples_per_second": 94.898, "eval_qasc_pairs_steps_per_second": 3.796, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_openbookqa_pairs_loss": 1.5487664937973022, "eval_openbookqa_pairs_runtime": 0.8956, "eval_openbookqa_pairs_samples_per_second": 111.653, "eval_openbookqa_pairs_steps_per_second": 4.466, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_msmarco_pairs_loss": 0.4861982464790344, "eval_msmarco_pairs_runtime": 2.0548, "eval_msmarco_pairs_samples_per_second": 48.666, "eval_msmarco_pairs_steps_per_second": 1.947, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_nq_pairs_loss": 0.22520922124385834, "eval_nq_pairs_runtime": 4.4973, "eval_nq_pairs_samples_per_second": 22.236, "eval_nq_pairs_steps_per_second": 0.889, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_trivia_pairs_loss": 0.7480303049087524, "eval_trivia_pairs_runtime": 6.498, "eval_trivia_pairs_samples_per_second": 15.389, "eval_trivia_pairs_steps_per_second": 0.616, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_quora_pairs_loss": 0.06060533598065376, "eval_quora_pairs_runtime": 0.6722, "eval_quora_pairs_samples_per_second": 148.76, "eval_quora_pairs_steps_per_second": 5.95, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_gooaq_pairs_loss": 0.4696855843067169, "eval_gooaq_pairs_runtime": 1.3985, "eval_gooaq_pairs_samples_per_second": 71.503, "eval_gooaq_pairs_steps_per_second": 2.86, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_mrpc_pairs_loss": 0.04175671190023422, "eval_mrpc_pairs_runtime": 0.2618, "eval_mrpc_pairs_samples_per_second": 381.956, "eval_mrpc_pairs_steps_per_second": 15.278, "step": 5760 }, { "epoch": 0.9076947186961292, "grad_norm": 19.970914840698242, "learning_rate": 2.3808045960365743e-06, "loss": 0.6346, "step": 5792 }, { "epoch": 0.9127096066447266, "grad_norm": 7.2970075607299805, "learning_rate": 2.2445182249778363e-06, "loss": 1.1103, "step": 5824 }, { "epoch": 0.917724494593324, "grad_norm": 14.34080982208252, "learning_rate": 2.1119325861102666e-06, "loss": 0.7667, "step": 5856 }, { "epoch": 0.9227393825419213, "grad_norm": 16.219850540161133, "learning_rate": 1.98308614634171e-06, "loss": 0.9174, "step": 5888 }, { "epoch": 0.9277542704905187, "grad_norm": 17.201740264892578, "learning_rate": 1.8580162877307744e-06, "loss": 0.7609, "step": 5920 }, { "epoch": 0.9327691584391161, "grad_norm": 12.591241836547852, "learning_rate": 1.7367592966412454e-06, "loss": 0.8993, "step": 5952 }, { "epoch": 0.9377840463877135, "grad_norm": 17.12389373779297, "learning_rate": 1.619350353214355e-06, "loss": 0.7587, "step": 5984 }, { "epoch": 0.9427989343363109, "grad_norm": 44.237342834472656, "learning_rate": 1.5058235211620126e-06, "loss": 0.935, "step": 6016 }, { "epoch": 0.9478138222849083, "grad_norm": 4.658092975616455, "learning_rate": 1.3962117378839439e-06, "loss": 0.8551, "step": 6048 }, { "epoch": 0.9528287102335057, "grad_norm": 0.4202437698841095, "learning_rate": 1.2905468049116077e-06, "loss": 1.4247, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_nli-pairs_loss": 0.880797266960144, "eval_nli-pairs_runtime": 3.65, "eval_nli-pairs_samples_per_second": 27.397, "eval_nli-pairs_steps_per_second": 1.096, "eval_sts-test_pearson_cosine": 0.7886384880168056, "eval_sts-test_pearson_dot": 0.5209320238457065, "eval_sts-test_pearson_euclidean": 0.7365619856047663, "eval_sts-test_pearson_manhattan": 0.7309874377904119, "eval_sts-test_pearson_max": 0.7886384880168056, "eval_sts-test_spearman_cosine": 0.8078306606920327, "eval_sts-test_spearman_dot": 0.4995671547413244, "eval_sts-test_spearman_euclidean": 0.7281379887760366, "eval_sts-test_spearman_manhattan": 0.7249545388844193, "eval_sts-test_spearman_max": 0.8078306606920327, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_vitaminc-pairs_loss": 4.70750617980957, "eval_vitaminc-pairs_runtime": 1.1372, "eval_vitaminc-pairs_samples_per_second": 74.747, "eval_vitaminc-pairs_steps_per_second": 2.638, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_sts-label_loss": 3.7686922550201416, "eval_sts-label_runtime": 0.2807, "eval_sts-label_samples_per_second": 356.243, "eval_sts-label_steps_per_second": 14.25, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_qnli-contrastive_loss": 0.12000326067209244, "eval_qnli-contrastive_runtime": 0.3651, "eval_qnli-contrastive_samples_per_second": 273.878, "eval_qnli-contrastive_steps_per_second": 10.955, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_scitail-pairs-qa_loss": 0.055266913026571274, "eval_scitail-pairs-qa_runtime": 0.8813, "eval_scitail-pairs-qa_samples_per_second": 113.472, "eval_scitail-pairs-qa_steps_per_second": 4.539, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_scitail-pairs-pos_loss": 0.46404972672462463, "eval_scitail-pairs-pos_runtime": 1.3468, "eval_scitail-pairs-pos_samples_per_second": 74.248, "eval_scitail-pairs-pos_steps_per_second": 2.97, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_xsum-pairs_loss": 0.22768865525722504, "eval_xsum-pairs_runtime": 0.9385, "eval_xsum-pairs_samples_per_second": 106.553, "eval_xsum-pairs_steps_per_second": 4.262, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_compression-pairs_loss": 0.08245458453893661, "eval_compression-pairs_runtime": 0.2783, "eval_compression-pairs_samples_per_second": 359.331, "eval_compression-pairs_steps_per_second": 14.373, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_sciq_pairs_loss": 0.24696679413318634, "eval_sciq_pairs_runtime": 4.072, "eval_sciq_pairs_samples_per_second": 24.558, "eval_sciq_pairs_steps_per_second": 0.982, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_qasc_pairs_loss": 0.16628116369247437, "eval_qasc_pairs_runtime": 1.066, "eval_qasc_pairs_samples_per_second": 93.809, "eval_qasc_pairs_steps_per_second": 3.752, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_openbookqa_pairs_loss": 1.5343760251998901, "eval_openbookqa_pairs_runtime": 0.9064, "eval_openbookqa_pairs_samples_per_second": 110.324, "eval_openbookqa_pairs_steps_per_second": 4.413, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_msmarco_pairs_loss": 0.48861968517303467, "eval_msmarco_pairs_runtime": 2.0777, "eval_msmarco_pairs_samples_per_second": 48.131, "eval_msmarco_pairs_steps_per_second": 1.925, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_nq_pairs_loss": 0.2192871868610382, "eval_nq_pairs_runtime": 4.5629, "eval_nq_pairs_samples_per_second": 21.916, "eval_nq_pairs_steps_per_second": 0.877, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_trivia_pairs_loss": 0.7455114126205444, "eval_trivia_pairs_runtime": 6.4434, "eval_trivia_pairs_samples_per_second": 15.52, "eval_trivia_pairs_steps_per_second": 0.621, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_quora_pairs_loss": 0.0536942183971405, "eval_quora_pairs_runtime": 0.6874, "eval_quora_pairs_samples_per_second": 145.481, "eval_quora_pairs_steps_per_second": 5.819, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_gooaq_pairs_loss": 0.4775075614452362, "eval_gooaq_pairs_runtime": 1.3946, "eval_gooaq_pairs_samples_per_second": 71.707, "eval_gooaq_pairs_steps_per_second": 2.868, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_mrpc_pairs_loss": 0.041804660111665726, "eval_mrpc_pairs_runtime": 0.2631, "eval_mrpc_pairs_samples_per_second": 380.035, "eval_mrpc_pairs_steps_per_second": 15.201, "step": 6080 }, { "epoch": 0.9578435981821031, "grad_norm": 15.8797607421875, "learning_rate": 1.1888593786816527e-06, "loss": 0.3377, "step": 6112 }, { "epoch": 0.9628584861307005, "grad_norm": 54.2625732421875, "learning_rate": 1.0911789616415957e-06, "loss": 1.163, "step": 6144 }, { "epoch": 0.967873374079298, "grad_norm": 27.014169692993164, "learning_rate": 9.975338936903327e-07, "loss": 1.1638, "step": 6176 }, { "epoch": 0.9728882620278954, "grad_norm": 12.264323234558105, "learning_rate": 9.079513439558945e-07, "loss": 0.7428, "step": 6208 }, { "epoch": 0.9779031499764927, "grad_norm": 0.2486962229013443, "learning_rate": 8.224573029129201e-07, "loss": 0.3827, "step": 6240 }, { "epoch": 0.9829180379250901, "grad_norm": 0.19951488077640533, "learning_rate": 7.41076574842064e-07, "loss": 1.0739, "step": 6272 }, { "epoch": 0.9879329258736875, "grad_norm": 1.6168636083602905, "learning_rate": 6.638327706335673e-07, "loss": 0.7049, "step": 6304 }, { "epoch": 0.9929478138222849, "grad_norm": 1.4084432125091553, "learning_rate": 5.907483009370463e-07, "loss": 0.9298, "step": 6336 }, { "epoch": 0.9979627017708823, "grad_norm": 0.7779116630554199, "learning_rate": 5.218443696595343e-07, "loss": 0.6243, "step": 6368 } ], "logging_steps": 32, "max_steps": 12762, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1277, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }