{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4008776053910046, "eval_steps": 320, "global_step": 8939, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0050148879485973985, "grad_norm": 14.771158218383789, "learning_rate": 9.707724425887265e-07, "loss": 0.6329, "step": 32 }, { "epoch": 0.010029775897194797, "grad_norm": 11.052021980285645, "learning_rate": 1.9728601252609606e-06, "loss": 0.9693, "step": 64 }, { "epoch": 0.015044663845792195, "grad_norm": 20.26296615600586, "learning_rate": 2.9749478079331944e-06, "loss": 0.6548, "step": 96 }, { "epoch": 0.020059551794389594, "grad_norm": 12.62913703918457, "learning_rate": 3.945720250521921e-06, "loss": 1.1279, "step": 128 }, { "epoch": 0.025074439742986992, "grad_norm": 12.316486358642578, "learning_rate": 4.916492693110647e-06, "loss": 1.0017, "step": 160 }, { "epoch": 0.03008932769158439, "grad_norm": 64.25923919677734, "learning_rate": 5.918580375782881e-06, "loss": 0.7571, "step": 192 }, { "epoch": 0.03510421564018179, "grad_norm": 0.8205029368400574, "learning_rate": 6.920668058455115e-06, "loss": 0.7304, "step": 224 }, { "epoch": 0.04011910358877919, "grad_norm": 6.598870754241943, "learning_rate": 7.922755741127349e-06, "loss": 0.7636, "step": 256 }, { "epoch": 0.045133991537376586, "grad_norm": 8.728073120117188, "learning_rate": 8.924843423799583e-06, "loss": 0.482, "step": 288 }, { "epoch": 0.050148879485973984, "grad_norm": 7.645521640777588, "learning_rate": 9.926931106471817e-06, "loss": 0.6312, "step": 320 }, { "epoch": 0.050148879485973984, "eval_nli-pairs_loss": 1.0158467292785645, "eval_nli-pairs_runtime": 3.7267, "eval_nli-pairs_samples_per_second": 26.833, "eval_nli-pairs_steps_per_second": 1.073, "eval_sts-test_pearson_cosine": 0.7848265412179125, "eval_sts-test_pearson_dot": 0.5437080705284749, "eval_sts-test_pearson_euclidean": 0.7445845076364892, "eval_sts-test_pearson_manhattan": 0.7429239204432232, "eval_sts-test_pearson_max": 0.7848265412179125, "eval_sts-test_spearman_cosine": 0.7989504707258924, "eval_sts-test_spearman_dot": 0.5206855421174118, "eval_sts-test_spearman_euclidean": 0.733568982260844, "eval_sts-test_spearman_manhattan": 0.7349407257944446, "eval_sts-test_spearman_max": 0.7989504707258924, "step": 320 }, { "epoch": 0.050148879485973984, "eval_vitaminc-pairs_loss": 4.692601680755615, "eval_vitaminc-pairs_runtime": 1.1397, "eval_vitaminc-pairs_samples_per_second": 74.578, "eval_vitaminc-pairs_steps_per_second": 2.632, "step": 320 }, { "epoch": 0.050148879485973984, "eval_sts-label_loss": 3.5502490997314453, "eval_sts-label_runtime": 0.28, "eval_sts-label_samples_per_second": 357.117, "eval_sts-label_steps_per_second": 14.285, "step": 320 }, { "epoch": 0.050148879485973984, "eval_qnli-contrastive_loss": 0.16079513728618622, "eval_qnli-contrastive_runtime": 0.3646, "eval_qnli-contrastive_samples_per_second": 274.299, "eval_qnli-contrastive_steps_per_second": 10.972, "step": 320 }, { "epoch": 0.050148879485973984, "eval_scitail-pairs-qa_loss": 0.07610582560300827, "eval_scitail-pairs-qa_runtime": 0.8885, "eval_scitail-pairs-qa_samples_per_second": 112.548, "eval_scitail-pairs-qa_steps_per_second": 4.502, "step": 320 }, { "epoch": 0.050148879485973984, "eval_scitail-pairs-pos_loss": 0.5141278505325317, "eval_scitail-pairs-pos_runtime": 1.3498, "eval_scitail-pairs-pos_samples_per_second": 74.085, "eval_scitail-pairs-pos_steps_per_second": 2.963, "step": 320 }, { "epoch": 0.050148879485973984, "eval_xsum-pairs_loss": 0.25581496953964233, "eval_xsum-pairs_runtime": 0.9407, "eval_xsum-pairs_samples_per_second": 106.304, "eval_xsum-pairs_steps_per_second": 4.252, "step": 320 }, { "epoch": 0.050148879485973984, "eval_compression-pairs_loss": 0.09814296662807465, "eval_compression-pairs_runtime": 0.2758, "eval_compression-pairs_samples_per_second": 362.517, "eval_compression-pairs_steps_per_second": 14.501, "step": 320 }, { "epoch": 0.050148879485973984, "eval_sciq_pairs_loss": 0.25620242953300476, "eval_sciq_pairs_runtime": 4.1155, "eval_sciq_pairs_samples_per_second": 24.298, "eval_sciq_pairs_steps_per_second": 0.972, "step": 320 }, { "epoch": 0.050148879485973984, "eval_qasc_pairs_loss": 0.2044612169265747, "eval_qasc_pairs_runtime": 1.1029, "eval_qasc_pairs_samples_per_second": 90.672, "eval_qasc_pairs_steps_per_second": 3.627, "step": 320 }, { "epoch": 0.050148879485973984, "eval_openbookqa_pairs_loss": 1.7537646293640137, "eval_openbookqa_pairs_runtime": 0.9037, "eval_openbookqa_pairs_samples_per_second": 110.653, "eval_openbookqa_pairs_steps_per_second": 4.426, "step": 320 }, { "epoch": 0.050148879485973984, "eval_msmarco_pairs_loss": 0.5138561725616455, "eval_msmarco_pairs_runtime": 2.0511, "eval_msmarco_pairs_samples_per_second": 48.754, "eval_msmarco_pairs_steps_per_second": 1.95, "step": 320 }, { "epoch": 0.050148879485973984, "eval_nq_pairs_loss": 0.23510317504405975, "eval_nq_pairs_runtime": 4.5293, "eval_nq_pairs_samples_per_second": 22.078, "eval_nq_pairs_steps_per_second": 0.883, "step": 320 }, { "epoch": 0.050148879485973984, "eval_trivia_pairs_loss": 0.7808571457862854, "eval_trivia_pairs_runtime": 6.5065, "eval_trivia_pairs_samples_per_second": 15.369, "eval_trivia_pairs_steps_per_second": 0.615, "step": 320 }, { "epoch": 0.050148879485973984, "eval_quora_pairs_loss": 0.0392119362950325, "eval_quora_pairs_runtime": 0.675, "eval_quora_pairs_samples_per_second": 148.153, "eval_quora_pairs_steps_per_second": 5.926, "step": 320 }, { "epoch": 0.050148879485973984, "eval_gooaq_pairs_loss": 0.4712902009487152, "eval_gooaq_pairs_runtime": 1.4079, "eval_gooaq_pairs_samples_per_second": 71.028, "eval_gooaq_pairs_steps_per_second": 2.841, "step": 320 }, { "epoch": 0.050148879485973984, "eval_mrpc_pairs_loss": 0.05498996376991272, "eval_mrpc_pairs_runtime": 0.2623, "eval_mrpc_pairs_samples_per_second": 381.172, "eval_mrpc_pairs_steps_per_second": 15.247, "step": 320 }, { "epoch": 0.05516376743457138, "grad_norm": 0.34924012422561646, "learning_rate": 1.092901878914405e-05, "loss": 0.5791, "step": 352 }, { "epoch": 0.06017865538316878, "grad_norm": 0.36700841784477234, "learning_rate": 1.1931106471816284e-05, "loss": 0.6413, "step": 384 }, { "epoch": 0.06519354333176618, "grad_norm": 7.559622764587402, "learning_rate": 1.2933194154488518e-05, "loss": 0.4319, "step": 416 }, { "epoch": 0.07020843128036358, "grad_norm": 7.982416152954102, "learning_rate": 1.3935281837160753e-05, "loss": 0.6672, "step": 448 }, { "epoch": 0.07522331922896097, "grad_norm": 0.6726166009902954, "learning_rate": 1.4937369519832987e-05, "loss": 0.459, "step": 480 }, { "epoch": 0.08023820717755838, "grad_norm": 14.846123695373535, "learning_rate": 1.593945720250522e-05, "loss": 0.7621, "step": 512 }, { "epoch": 0.08525309512615578, "grad_norm": 0.7846627831459045, "learning_rate": 1.6941544885177454e-05, "loss": 0.864, "step": 544 }, { "epoch": 0.09026798307475317, "grad_norm": 0.8993583917617798, "learning_rate": 1.7943632567849688e-05, "loss": 0.5081, "step": 576 }, { "epoch": 0.09528287102335058, "grad_norm": 1.4990565776824951, "learning_rate": 1.894572025052192e-05, "loss": 0.654, "step": 608 }, { "epoch": 0.10029775897194797, "grad_norm": 15.647976875305176, "learning_rate": 1.9947807933194157e-05, "loss": 0.6372, "step": 640 }, { "epoch": 0.10029775897194797, "eval_nli-pairs_loss": 1.0652996301651, "eval_nli-pairs_runtime": 3.6326, "eval_nli-pairs_samples_per_second": 27.528, "eval_nli-pairs_steps_per_second": 1.101, "eval_sts-test_pearson_cosine": 0.785263018402905, "eval_sts-test_pearson_dot": 0.5290450141477089, "eval_sts-test_pearson_euclidean": 0.7433756286425983, "eval_sts-test_pearson_manhattan": 0.7411097274300102, "eval_sts-test_pearson_max": 0.785263018402905, "eval_sts-test_spearman_cosine": 0.7996928912411947, "eval_sts-test_spearman_dot": 0.5102571497667188, "eval_sts-test_spearman_euclidean": 0.7338969723324641, "eval_sts-test_spearman_manhattan": 0.7343494860194358, "eval_sts-test_spearman_max": 0.7996928912411947, "step": 640 }, { "epoch": 0.10029775897194797, "eval_vitaminc-pairs_loss": 4.719416618347168, "eval_vitaminc-pairs_runtime": 1.1268, "eval_vitaminc-pairs_samples_per_second": 75.437, "eval_vitaminc-pairs_steps_per_second": 2.662, "step": 640 }, { "epoch": 0.10029775897194797, "eval_sts-label_loss": 3.612347364425659, "eval_sts-label_runtime": 0.2683, "eval_sts-label_samples_per_second": 372.651, "eval_sts-label_steps_per_second": 14.906, "step": 640 }, { "epoch": 0.10029775897194797, "eval_qnli-contrastive_loss": 0.15202775597572327, "eval_qnli-contrastive_runtime": 0.3528, "eval_qnli-contrastive_samples_per_second": 283.457, "eval_qnli-contrastive_steps_per_second": 11.338, "step": 640 }, { "epoch": 0.10029775897194797, "eval_scitail-pairs-qa_loss": 0.07544919103384018, "eval_scitail-pairs-qa_runtime": 0.8732, "eval_scitail-pairs-qa_samples_per_second": 114.517, "eval_scitail-pairs-qa_steps_per_second": 4.581, "step": 640 }, { "epoch": 0.10029775897194797, "eval_scitail-pairs-pos_loss": 0.5404170751571655, "eval_scitail-pairs-pos_runtime": 1.3146, "eval_scitail-pairs-pos_samples_per_second": 76.067, "eval_scitail-pairs-pos_steps_per_second": 3.043, "step": 640 }, { "epoch": 0.10029775897194797, "eval_xsum-pairs_loss": 0.25958582758903503, "eval_xsum-pairs_runtime": 0.9287, "eval_xsum-pairs_samples_per_second": 107.679, "eval_xsum-pairs_steps_per_second": 4.307, "step": 640 }, { "epoch": 0.10029775897194797, "eval_compression-pairs_loss": 0.10066353529691696, "eval_compression-pairs_runtime": 0.2732, "eval_compression-pairs_samples_per_second": 366.076, "eval_compression-pairs_steps_per_second": 14.643, "step": 640 }, { "epoch": 0.10029775897194797, "eval_sciq_pairs_loss": 0.2645374834537506, "eval_sciq_pairs_runtime": 4.0725, "eval_sciq_pairs_samples_per_second": 24.555, "eval_sciq_pairs_steps_per_second": 0.982, "step": 640 }, { "epoch": 0.10029775897194797, "eval_qasc_pairs_loss": 0.21021947264671326, "eval_qasc_pairs_runtime": 1.0743, "eval_qasc_pairs_samples_per_second": 93.084, "eval_qasc_pairs_steps_per_second": 3.723, "step": 640 }, { "epoch": 0.10029775897194797, "eval_openbookqa_pairs_loss": 1.7905032634735107, "eval_openbookqa_pairs_runtime": 0.8886, "eval_openbookqa_pairs_samples_per_second": 112.532, "eval_openbookqa_pairs_steps_per_second": 4.501, "step": 640 }, { "epoch": 0.10029775897194797, "eval_msmarco_pairs_loss": 0.5102832913398743, "eval_msmarco_pairs_runtime": 2.0529, "eval_msmarco_pairs_samples_per_second": 48.712, "eval_msmarco_pairs_steps_per_second": 1.948, "step": 640 }, { "epoch": 0.10029775897194797, "eval_nq_pairs_loss": 0.24466972053050995, "eval_nq_pairs_runtime": 4.4973, "eval_nq_pairs_samples_per_second": 22.235, "eval_nq_pairs_steps_per_second": 0.889, "step": 640 }, { "epoch": 0.10029775897194797, "eval_trivia_pairs_loss": 0.8748095631599426, "eval_trivia_pairs_runtime": 6.4825, "eval_trivia_pairs_samples_per_second": 15.426, "eval_trivia_pairs_steps_per_second": 0.617, "step": 640 }, { "epoch": 0.10029775897194797, "eval_quora_pairs_loss": 0.07820220291614532, "eval_quora_pairs_runtime": 0.6944, "eval_quora_pairs_samples_per_second": 144.008, "eval_quora_pairs_steps_per_second": 5.76, "step": 640 }, { "epoch": 0.10029775897194797, "eval_gooaq_pairs_loss": 0.5236212611198425, "eval_gooaq_pairs_runtime": 1.3899, "eval_gooaq_pairs_samples_per_second": 71.949, "eval_gooaq_pairs_steps_per_second": 2.878, "step": 640 }, { "epoch": 0.10029775897194797, "eval_mrpc_pairs_loss": 0.05494727939367294, "eval_mrpc_pairs_runtime": 0.2598, "eval_mrpc_pairs_samples_per_second": 384.941, "eval_mrpc_pairs_steps_per_second": 15.398, "step": 640 }, { "epoch": 0.10531264692054537, "grad_norm": 11.01974105834961, "learning_rate": 2.0949895615866387e-05, "loss": 0.9292, "step": 672 }, { "epoch": 0.11032753486914276, "grad_norm": 0.5542309284210205, "learning_rate": 2.1951983298538625e-05, "loss": 1.3108, "step": 704 }, { "epoch": 0.11534242281774017, "grad_norm": 15.458569526672363, "learning_rate": 2.2954070981210856e-05, "loss": 0.9674, "step": 736 }, { "epoch": 0.12035731076633756, "grad_norm": 2.7814478874206543, "learning_rate": 2.395615866388309e-05, "loss": 0.9226, "step": 768 }, { "epoch": 0.12537219871493496, "grad_norm": 11.393244743347168, "learning_rate": 2.4958246346555324e-05, "loss": 0.789, "step": 800 }, { "epoch": 0.13038708666353235, "grad_norm": 9.288290977478027, "learning_rate": 2.596033402922756e-05, "loss": 0.5186, "step": 832 }, { "epoch": 0.13540197461212977, "grad_norm": 47.65571212768555, "learning_rate": 2.6962421711899793e-05, "loss": 0.6726, "step": 864 }, { "epoch": 0.14041686256072716, "grad_norm": 12.908064842224121, "learning_rate": 2.7964509394572024e-05, "loss": 0.5381, "step": 896 }, { "epoch": 0.14543175050932455, "grad_norm": 14.951742172241211, "learning_rate": 2.896659707724426e-05, "loss": 0.581, "step": 928 }, { "epoch": 0.15044663845792194, "grad_norm": 20.12006187438965, "learning_rate": 2.9968684759916492e-05, "loss": 0.9038, "step": 960 }, { "epoch": 0.15044663845792194, "eval_nli-pairs_loss": 1.2173175811767578, "eval_nli-pairs_runtime": 3.7098, "eval_nli-pairs_samples_per_second": 26.955, "eval_nli-pairs_steps_per_second": 1.078, "eval_sts-test_pearson_cosine": 0.7840992835675669, "eval_sts-test_pearson_dot": 0.5220462136106129, "eval_sts-test_pearson_euclidean": 0.7457350047351855, "eval_sts-test_pearson_manhattan": 0.7425970830541657, "eval_sts-test_pearson_max": 0.7840992835675669, "eval_sts-test_spearman_cosine": 0.8006376809572144, "eval_sts-test_spearman_dot": 0.5020544543992158, "eval_sts-test_spearman_euclidean": 0.7369257710408655, "eval_sts-test_spearman_manhattan": 0.7362649758012406, "eval_sts-test_spearman_max": 0.8006376809572144, "step": 960 }, { "epoch": 0.15044663845792194, "eval_vitaminc-pairs_loss": 4.774902820587158, "eval_vitaminc-pairs_runtime": 1.1212, "eval_vitaminc-pairs_samples_per_second": 75.809, "eval_vitaminc-pairs_steps_per_second": 2.676, "step": 960 }, { "epoch": 0.15044663845792194, "eval_sts-label_loss": 3.198556900024414, "eval_sts-label_runtime": 0.2678, "eval_sts-label_samples_per_second": 373.382, "eval_sts-label_steps_per_second": 14.935, "step": 960 }, { "epoch": 0.15044663845792194, "eval_qnli-contrastive_loss": 0.1943340301513672, "eval_qnli-contrastive_runtime": 0.3511, "eval_qnli-contrastive_samples_per_second": 284.789, "eval_qnli-contrastive_steps_per_second": 11.392, "step": 960 }, { "epoch": 0.15044663845792194, "eval_scitail-pairs-qa_loss": 0.08060617744922638, "eval_scitail-pairs-qa_runtime": 0.8778, "eval_scitail-pairs-qa_samples_per_second": 113.92, "eval_scitail-pairs-qa_steps_per_second": 4.557, "step": 960 }, { "epoch": 0.15044663845792194, "eval_scitail-pairs-pos_loss": 0.4759831428527832, "eval_scitail-pairs-pos_runtime": 1.3609, "eval_scitail-pairs-pos_samples_per_second": 73.48, "eval_scitail-pairs-pos_steps_per_second": 2.939, "step": 960 }, { "epoch": 0.15044663845792194, "eval_xsum-pairs_loss": 0.27583304047584534, "eval_xsum-pairs_runtime": 0.9343, "eval_xsum-pairs_samples_per_second": 107.035, "eval_xsum-pairs_steps_per_second": 4.281, "step": 960 }, { "epoch": 0.15044663845792194, "eval_compression-pairs_loss": 0.10094660520553589, "eval_compression-pairs_runtime": 0.2739, "eval_compression-pairs_samples_per_second": 365.047, "eval_compression-pairs_steps_per_second": 14.602, "step": 960 }, { "epoch": 0.15044663845792194, "eval_sciq_pairs_loss": 0.2688131630420685, "eval_sciq_pairs_runtime": 4.0582, "eval_sciq_pairs_samples_per_second": 24.641, "eval_sciq_pairs_steps_per_second": 0.986, "step": 960 }, { "epoch": 0.15044663845792194, "eval_qasc_pairs_loss": 0.23267821967601776, "eval_qasc_pairs_runtime": 1.0554, "eval_qasc_pairs_samples_per_second": 94.75, "eval_qasc_pairs_steps_per_second": 3.79, "step": 960 }, { "epoch": 0.15044663845792194, "eval_openbookqa_pairs_loss": 1.8053069114685059, "eval_openbookqa_pairs_runtime": 0.8871, "eval_openbookqa_pairs_samples_per_second": 112.727, "eval_openbookqa_pairs_steps_per_second": 4.509, "step": 960 }, { "epoch": 0.15044663845792194, "eval_msmarco_pairs_loss": 0.5809260606765747, "eval_msmarco_pairs_runtime": 2.0498, "eval_msmarco_pairs_samples_per_second": 48.786, "eval_msmarco_pairs_steps_per_second": 1.951, "step": 960 }, { "epoch": 0.15044663845792194, "eval_nq_pairs_loss": 0.2808491885662079, "eval_nq_pairs_runtime": 4.4982, "eval_nq_pairs_samples_per_second": 22.231, "eval_nq_pairs_steps_per_second": 0.889, "step": 960 }, { "epoch": 0.15044663845792194, "eval_trivia_pairs_loss": 0.9379808902740479, "eval_trivia_pairs_runtime": 6.4578, "eval_trivia_pairs_samples_per_second": 15.485, "eval_trivia_pairs_steps_per_second": 0.619, "step": 960 }, { "epoch": 0.15044663845792194, "eval_quora_pairs_loss": 0.0913279801607132, "eval_quora_pairs_runtime": 0.6721, "eval_quora_pairs_samples_per_second": 148.79, "eval_quora_pairs_steps_per_second": 5.952, "step": 960 }, { "epoch": 0.15044663845792194, "eval_gooaq_pairs_loss": 0.5807955265045166, "eval_gooaq_pairs_runtime": 1.3915, "eval_gooaq_pairs_samples_per_second": 71.865, "eval_gooaq_pairs_steps_per_second": 2.875, "step": 960 }, { "epoch": 0.15044663845792194, "eval_mrpc_pairs_loss": 0.05799216777086258, "eval_mrpc_pairs_runtime": 0.2571, "eval_mrpc_pairs_samples_per_second": 388.998, "eval_mrpc_pairs_steps_per_second": 15.56, "step": 960 }, { "epoch": 0.15546152640651936, "grad_norm": 9.773286819458008, "learning_rate": 2.9997957904107625e-05, "loss": 0.7964, "step": 992 }, { "epoch": 0.16047641435511675, "grad_norm": 19.411075592041016, "learning_rate": 2.9991566594209126e-05, "loss": 0.8213, "step": 1024 }, { "epoch": 0.16549130230371414, "grad_norm": 3.5282175540924072, "learning_rate": 2.9980825799589488e-05, "loss": 0.5396, "step": 1056 }, { "epoch": 0.17050619025231156, "grad_norm": 62.66339874267578, "learning_rate": 2.996573863646219e-05, "loss": 0.9297, "step": 1088 }, { "epoch": 0.17552107820090895, "grad_norm": 8.785274505615234, "learning_rate": 2.994630948204727e-05, "loss": 1.169, "step": 1120 }, { "epoch": 0.18053596614950634, "grad_norm": 24.10859489440918, "learning_rate": 2.992254397330132e-05, "loss": 0.7486, "step": 1152 }, { "epoch": 0.18555085409810373, "grad_norm": 25.545284271240234, "learning_rate": 2.9894449005282077e-05, "loss": 0.6821, "step": 1184 }, { "epoch": 0.19056574204670115, "grad_norm": 0.8675521016120911, "learning_rate": 2.9862032729147954e-05, "loss": 0.6125, "step": 1216 }, { "epoch": 0.19558062999529854, "grad_norm": 16.122114181518555, "learning_rate": 2.9825304549793153e-05, "loss": 0.8061, "step": 1248 }, { "epoch": 0.20059551794389593, "grad_norm": 1.0314382314682007, "learning_rate": 2.978427512311904e-05, "loss": 0.6918, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_nli-pairs_loss": 1.1552109718322754, "eval_nli-pairs_runtime": 3.8751, "eval_nli-pairs_samples_per_second": 25.806, "eval_nli-pairs_steps_per_second": 1.032, "eval_sts-test_pearson_cosine": 0.786106976104726, "eval_sts-test_pearson_dot": 0.5116758767219935, "eval_sts-test_pearson_euclidean": 0.7432891018313416, "eval_sts-test_pearson_manhattan": 0.7400929158927781, "eval_sts-test_pearson_max": 0.786106976104726, "eval_sts-test_spearman_cosine": 0.801377272203007, "eval_sts-test_spearman_dot": 0.4921454166952506, "eval_sts-test_spearman_euclidean": 0.7343686249967402, "eval_sts-test_spearman_manhattan": 0.7331946050808561, "eval_sts-test_spearman_max": 0.801377272203007, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_vitaminc-pairs_loss": 4.6789751052856445, "eval_vitaminc-pairs_runtime": 1.1504, "eval_vitaminc-pairs_samples_per_second": 73.889, "eval_vitaminc-pairs_steps_per_second": 2.608, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_sts-label_loss": 3.5580556392669678, "eval_sts-label_runtime": 0.2834, "eval_sts-label_samples_per_second": 352.858, "eval_sts-label_steps_per_second": 14.114, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_qnli-contrastive_loss": 0.20369713008403778, "eval_qnli-contrastive_runtime": 0.358, "eval_qnli-contrastive_samples_per_second": 279.331, "eval_qnli-contrastive_steps_per_second": 11.173, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_scitail-pairs-qa_loss": 0.07465875148773193, "eval_scitail-pairs-qa_runtime": 0.9504, "eval_scitail-pairs-qa_samples_per_second": 105.214, "eval_scitail-pairs-qa_steps_per_second": 4.209, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_scitail-pairs-pos_loss": 0.49434563517570496, "eval_scitail-pairs-pos_runtime": 1.6041, "eval_scitail-pairs-pos_samples_per_second": 62.339, "eval_scitail-pairs-pos_steps_per_second": 2.494, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_xsum-pairs_loss": 0.28282061219215393, "eval_xsum-pairs_runtime": 0.9316, "eval_xsum-pairs_samples_per_second": 107.346, "eval_xsum-pairs_steps_per_second": 4.294, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_compression-pairs_loss": 0.097385473549366, "eval_compression-pairs_runtime": 0.2754, "eval_compression-pairs_samples_per_second": 363.1, "eval_compression-pairs_steps_per_second": 14.524, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_sciq_pairs_loss": 0.2762215733528137, "eval_sciq_pairs_runtime": 4.2307, "eval_sciq_pairs_samples_per_second": 23.637, "eval_sciq_pairs_steps_per_second": 0.945, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_qasc_pairs_loss": 0.19347424805164337, "eval_qasc_pairs_runtime": 1.2282, "eval_qasc_pairs_samples_per_second": 81.421, "eval_qasc_pairs_steps_per_second": 3.257, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_openbookqa_pairs_loss": 1.6875064373016357, "eval_openbookqa_pairs_runtime": 1.1661, "eval_openbookqa_pairs_samples_per_second": 85.754, "eval_openbookqa_pairs_steps_per_second": 3.43, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_msmarco_pairs_loss": 0.5743877291679382, "eval_msmarco_pairs_runtime": 2.1428, "eval_msmarco_pairs_samples_per_second": 46.669, "eval_msmarco_pairs_steps_per_second": 1.867, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_nq_pairs_loss": 0.30348217487335205, "eval_nq_pairs_runtime": 4.5543, "eval_nq_pairs_samples_per_second": 21.957, "eval_nq_pairs_steps_per_second": 0.878, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_trivia_pairs_loss": 0.9221765995025635, "eval_trivia_pairs_runtime": 6.6513, "eval_trivia_pairs_samples_per_second": 15.035, "eval_trivia_pairs_steps_per_second": 0.601, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_quora_pairs_loss": 0.03854631260037422, "eval_quora_pairs_runtime": 0.7822, "eval_quora_pairs_samples_per_second": 127.852, "eval_quora_pairs_steps_per_second": 5.114, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_gooaq_pairs_loss": 0.528398334980011, "eval_gooaq_pairs_runtime": 1.4882, "eval_gooaq_pairs_samples_per_second": 67.194, "eval_gooaq_pairs_steps_per_second": 2.688, "step": 1280 }, { "epoch": 0.20059551794389593, "eval_mrpc_pairs_loss": 0.05623970925807953, "eval_mrpc_pairs_runtime": 0.2698, "eval_mrpc_pairs_samples_per_second": 370.713, "eval_mrpc_pairs_steps_per_second": 14.829, "step": 1280 }, { "epoch": 0.20561040589249335, "grad_norm": 0.6042119860649109, "learning_rate": 2.9738956352942557e-05, "loss": 0.9421, "step": 1312 }, { "epoch": 0.21062529384109074, "grad_norm": 13.87867546081543, "learning_rate": 2.968936138754259e-05, "loss": 0.8641, "step": 1344 }, { "epoch": 0.21564018178968813, "grad_norm": 44.48640441894531, "learning_rate": 2.9635504615845257e-05, "loss": 1.157, "step": 1376 }, { "epoch": 0.22065506973828553, "grad_norm": 15.554729461669922, "learning_rate": 2.957928148945977e-05, "loss": 0.8772, "step": 1408 }, { "epoch": 0.22566995768688294, "grad_norm": 16.644670486450195, "learning_rate": 2.9517081112297707e-05, "loss": 1.0496, "step": 1440 }, { "epoch": 0.23068484563548033, "grad_norm": 13.053145408630371, "learning_rate": 2.9450668912302004e-05, "loss": 0.589, "step": 1472 }, { "epoch": 0.23569973358407773, "grad_norm": 7.827791213989258, "learning_rate": 2.9380064157562306e-05, "loss": 0.8234, "step": 1504 }, { "epoch": 0.24071462153267512, "grad_norm": 15.598438262939453, "learning_rate": 2.930528733254901e-05, "loss": 0.7365, "step": 1536 }, { "epoch": 0.24572950948127253, "grad_norm": 13.723180770874023, "learning_rate": 2.9226360132170112e-05, "loss": 0.5076, "step": 1568 }, { "epoch": 0.2507443974298699, "grad_norm": 10.20022964477539, "learning_rate": 2.9143305455476866e-05, "loss": 1.0329, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_nli-pairs_loss": 1.0577216148376465, "eval_nli-pairs_runtime": 3.6476, "eval_nli-pairs_samples_per_second": 27.415, "eval_nli-pairs_steps_per_second": 1.097, "eval_sts-test_pearson_cosine": 0.7876359552191669, "eval_sts-test_pearson_dot": 0.5220803655074544, "eval_sts-test_pearson_euclidean": 0.7444632413869628, "eval_sts-test_pearson_manhattan": 0.7418744760088763, "eval_sts-test_pearson_max": 0.7876359552191669, "eval_sts-test_spearman_cosine": 0.8018874000525117, "eval_sts-test_spearman_dot": 0.5034518981121652, "eval_sts-test_spearman_euclidean": 0.7344750702387959, "eval_sts-test_spearman_manhattan": 0.7332804063416474, "eval_sts-test_spearman_max": 0.8018874000525117, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_vitaminc-pairs_loss": 4.784573554992676, "eval_vitaminc-pairs_runtime": 1.145, "eval_vitaminc-pairs_samples_per_second": 74.235, "eval_vitaminc-pairs_steps_per_second": 2.62, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_sts-label_loss": 3.6113080978393555, "eval_sts-label_runtime": 0.2746, "eval_sts-label_samples_per_second": 364.172, "eval_sts-label_steps_per_second": 14.567, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_qnli-contrastive_loss": 0.18593625724315643, "eval_qnli-contrastive_runtime": 0.3541, "eval_qnli-contrastive_samples_per_second": 282.413, "eval_qnli-contrastive_steps_per_second": 11.297, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_scitail-pairs-qa_loss": 0.07545661181211472, "eval_scitail-pairs-qa_runtime": 0.8854, "eval_scitail-pairs-qa_samples_per_second": 112.941, "eval_scitail-pairs-qa_steps_per_second": 4.518, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_scitail-pairs-pos_loss": 0.5018333792686462, "eval_scitail-pairs-pos_runtime": 1.3443, "eval_scitail-pairs-pos_samples_per_second": 74.386, "eval_scitail-pairs-pos_steps_per_second": 2.975, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_xsum-pairs_loss": 0.2749001085758209, "eval_xsum-pairs_runtime": 0.9439, "eval_xsum-pairs_samples_per_second": 105.939, "eval_xsum-pairs_steps_per_second": 4.238, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_compression-pairs_loss": 0.09735233336687088, "eval_compression-pairs_runtime": 0.2764, "eval_compression-pairs_samples_per_second": 361.753, "eval_compression-pairs_steps_per_second": 14.47, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_sciq_pairs_loss": 0.2648228108882904, "eval_sciq_pairs_runtime": 4.1207, "eval_sciq_pairs_samples_per_second": 24.268, "eval_sciq_pairs_steps_per_second": 0.971, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_qasc_pairs_loss": 0.21318012475967407, "eval_qasc_pairs_runtime": 1.0917, "eval_qasc_pairs_samples_per_second": 91.604, "eval_qasc_pairs_steps_per_second": 3.664, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_openbookqa_pairs_loss": 1.790009617805481, "eval_openbookqa_pairs_runtime": 0.8969, "eval_openbookqa_pairs_samples_per_second": 111.496, "eval_openbookqa_pairs_steps_per_second": 4.46, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_msmarco_pairs_loss": 0.57186359167099, "eval_msmarco_pairs_runtime": 2.0592, "eval_msmarco_pairs_samples_per_second": 48.563, "eval_msmarco_pairs_steps_per_second": 1.943, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_nq_pairs_loss": 0.2738310396671295, "eval_nq_pairs_runtime": 4.5092, "eval_nq_pairs_samples_per_second": 22.177, "eval_nq_pairs_steps_per_second": 0.887, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_trivia_pairs_loss": 0.8291679620742798, "eval_trivia_pairs_runtime": 6.526, "eval_trivia_pairs_samples_per_second": 15.323, "eval_trivia_pairs_steps_per_second": 0.613, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_quora_pairs_loss": 0.08000540733337402, "eval_quora_pairs_runtime": 0.6761, "eval_quora_pairs_samples_per_second": 147.909, "eval_quora_pairs_steps_per_second": 5.916, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_gooaq_pairs_loss": 0.5998037457466125, "eval_gooaq_pairs_runtime": 1.3978, "eval_gooaq_pairs_samples_per_second": 71.541, "eval_gooaq_pairs_steps_per_second": 2.862, "step": 1600 }, { "epoch": 0.2507443974298699, "eval_mrpc_pairs_loss": 0.05507182702422142, "eval_mrpc_pairs_runtime": 0.2617, "eval_mrpc_pairs_samples_per_second": 382.156, "eval_mrpc_pairs_steps_per_second": 15.286, "step": 1600 }, { "epoch": 0.2557592853784673, "grad_norm": 8.05022144317627, "learning_rate": 2.9056147399020182e-05, "loss": 1.4006, "step": 1632 }, { "epoch": 0.2607741733270647, "grad_norm": 0.38224154710769653, "learning_rate": 2.8964911249859437e-05, "loss": 0.5963, "step": 1664 }, { "epoch": 0.2657890612756621, "grad_norm": 0.46655791997909546, "learning_rate": 2.886962347822604e-05, "loss": 0.7488, "step": 1696 }, { "epoch": 0.27080394922425954, "grad_norm": 8.102537155151367, "learning_rate": 2.8770311729843616e-05, "loss": 0.8548, "step": 1728 }, { "epoch": 0.27581883717285693, "grad_norm": 11.803775787353516, "learning_rate": 2.86670048179072e-05, "loss": 1.3324, "step": 1760 }, { "epoch": 0.2808337251214543, "grad_norm": 16.266756057739258, "learning_rate": 2.8559732714723715e-05, "loss": 0.5804, "step": 1792 }, { "epoch": 0.2858486130700517, "grad_norm": 2.8448822498321533, "learning_rate": 2.8448526543016114e-05, "loss": 0.7827, "step": 1824 }, { "epoch": 0.2908635010186491, "grad_norm": 21.346328735351562, "learning_rate": 2.8333418566893796e-05, "loss": 0.5448, "step": 1856 }, { "epoch": 0.2958783889672465, "grad_norm": 3.4379029273986816, "learning_rate": 2.8214442182491866e-05, "loss": 0.7368, "step": 1888 }, { "epoch": 0.3008932769158439, "grad_norm": 17.05881690979004, "learning_rate": 2.8091631908281963e-05, "loss": 0.5657, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_nli-pairs_loss": 1.0244356393814087, "eval_nli-pairs_runtime": 3.6217, "eval_nli-pairs_samples_per_second": 27.612, "eval_nli-pairs_steps_per_second": 1.104, "eval_sts-test_pearson_cosine": 0.781915957368962, "eval_sts-test_pearson_dot": 0.49821032356844613, "eval_sts-test_pearson_euclidean": 0.7329308897504494, "eval_sts-test_pearson_manhattan": 0.7292186092506918, "eval_sts-test_pearson_max": 0.781915957368962, "eval_sts-test_spearman_cosine": 0.7983596570250642, "eval_sts-test_spearman_dot": 0.4812350313638781, "eval_sts-test_spearman_euclidean": 0.7265758267352669, "eval_sts-test_spearman_manhattan": 0.7259264140902829, "eval_sts-test_spearman_max": 0.7983596570250642, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_vitaminc-pairs_loss": 4.698296070098877, "eval_vitaminc-pairs_runtime": 1.1338, "eval_vitaminc-pairs_samples_per_second": 74.97, "eval_vitaminc-pairs_steps_per_second": 2.646, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_sts-label_loss": 3.1822261810302734, "eval_sts-label_runtime": 0.2702, "eval_sts-label_samples_per_second": 370.09, "eval_sts-label_steps_per_second": 14.804, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_qnli-contrastive_loss": 0.11326340585947037, "eval_qnli-contrastive_runtime": 0.3581, "eval_qnli-contrastive_samples_per_second": 279.28, "eval_qnli-contrastive_steps_per_second": 11.171, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_scitail-pairs-qa_loss": 0.07009608298540115, "eval_scitail-pairs-qa_runtime": 0.8816, "eval_scitail-pairs-qa_samples_per_second": 113.424, "eval_scitail-pairs-qa_steps_per_second": 4.537, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_scitail-pairs-pos_loss": 0.49156129360198975, "eval_scitail-pairs-pos_runtime": 1.3759, "eval_scitail-pairs-pos_samples_per_second": 72.678, "eval_scitail-pairs-pos_steps_per_second": 2.907, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_xsum-pairs_loss": 0.25940877199172974, "eval_xsum-pairs_runtime": 0.9373, "eval_xsum-pairs_samples_per_second": 106.695, "eval_xsum-pairs_steps_per_second": 4.268, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_compression-pairs_loss": 0.0919649675488472, "eval_compression-pairs_runtime": 0.2738, "eval_compression-pairs_samples_per_second": 365.291, "eval_compression-pairs_steps_per_second": 14.612, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_sciq_pairs_loss": 0.29138606786727905, "eval_sciq_pairs_runtime": 4.1059, "eval_sciq_pairs_samples_per_second": 24.355, "eval_sciq_pairs_steps_per_second": 0.974, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_qasc_pairs_loss": 0.19625085592269897, "eval_qasc_pairs_runtime": 1.0611, "eval_qasc_pairs_samples_per_second": 94.24, "eval_qasc_pairs_steps_per_second": 3.77, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_openbookqa_pairs_loss": 1.7960456609725952, "eval_openbookqa_pairs_runtime": 0.9042, "eval_openbookqa_pairs_samples_per_second": 110.601, "eval_openbookqa_pairs_steps_per_second": 4.424, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_msmarco_pairs_loss": 0.5171416997909546, "eval_msmarco_pairs_runtime": 2.0637, "eval_msmarco_pairs_samples_per_second": 48.457, "eval_msmarco_pairs_steps_per_second": 1.938, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_nq_pairs_loss": 0.24809740483760834, "eval_nq_pairs_runtime": 4.529, "eval_nq_pairs_samples_per_second": 22.08, "eval_nq_pairs_steps_per_second": 0.883, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_trivia_pairs_loss": 0.9041999578475952, "eval_trivia_pairs_runtime": 6.5257, "eval_trivia_pairs_samples_per_second": 15.324, "eval_trivia_pairs_steps_per_second": 0.613, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_quora_pairs_loss": 0.03601976856589317, "eval_quora_pairs_runtime": 0.6811, "eval_quora_pairs_samples_per_second": 146.827, "eval_quora_pairs_steps_per_second": 5.873, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_gooaq_pairs_loss": 0.5626399517059326, "eval_gooaq_pairs_runtime": 1.3943, "eval_gooaq_pairs_samples_per_second": 71.72, "eval_gooaq_pairs_steps_per_second": 2.869, "step": 1920 }, { "epoch": 0.3008932769158439, "eval_mrpc_pairs_loss": 0.04984402656555176, "eval_mrpc_pairs_runtime": 0.2579, "eval_mrpc_pairs_samples_per_second": 387.725, "eval_mrpc_pairs_steps_per_second": 15.509, "step": 1920 }, { "epoch": 0.30590816486444133, "grad_norm": 22.65591812133789, "learning_rate": 2.796502337505742e-05, "loss": 0.7425, "step": 1952 }, { "epoch": 0.3109230528130387, "grad_norm": 10.119640350341797, "learning_rate": 2.78346533155958e-05, "loss": 0.7819, "step": 1984 }, { "epoch": 0.3159379407616361, "grad_norm": 8.690531730651855, "learning_rate": 2.770055955400161e-05, "loss": 0.5937, "step": 2016 }, { "epoch": 0.3209528287102335, "grad_norm": 0.8992699384689331, "learning_rate": 2.7562780994732476e-05, "loss": 0.8133, "step": 2048 }, { "epoch": 0.3259677166588309, "grad_norm": 10.619684219360352, "learning_rate": 2.7421357611311824e-05, "loss": 1.0674, "step": 2080 }, { "epoch": 0.3309826046074283, "grad_norm": 7.222084045410156, "learning_rate": 2.727633043473141e-05, "loss": 0.6288, "step": 2112 }, { "epoch": 0.3359974925560257, "grad_norm": 10.166888236999512, "learning_rate": 2.712774154154707e-05, "loss": 0.5866, "step": 2144 }, { "epoch": 0.3410123805046231, "grad_norm": 0.36360761523246765, "learning_rate": 2.6975634041671052e-05, "loss": 0.6962, "step": 2176 }, { "epoch": 0.3460272684532205, "grad_norm": 9.586665153503418, "learning_rate": 2.6820052065864665e-05, "loss": 0.5562, "step": 2208 }, { "epoch": 0.3510421564018179, "grad_norm": 1.1307642459869385, "learning_rate": 2.6661040752934594e-05, "loss": 0.8871, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_nli-pairs_loss": 1.0147591829299927, "eval_nli-pairs_runtime": 3.7201, "eval_nli-pairs_samples_per_second": 26.881, "eval_nli-pairs_steps_per_second": 1.075, "eval_sts-test_pearson_cosine": 0.7872126529181761, "eval_sts-test_pearson_dot": 0.5062045289861089, "eval_sts-test_pearson_euclidean": 0.7351473988633473, "eval_sts-test_pearson_manhattan": 0.7310226402088944, "eval_sts-test_pearson_max": 0.7872126529181761, "eval_sts-test_spearman_cosine": 0.801487068999052, "eval_sts-test_spearman_dot": 0.4912205722904683, "eval_sts-test_spearman_euclidean": 0.7267262355024484, "eval_sts-test_spearman_manhattan": 0.72510169253649, "eval_sts-test_spearman_max": 0.801487068999052, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_vitaminc-pairs_loss": 4.644638538360596, "eval_vitaminc-pairs_runtime": 1.1453, "eval_vitaminc-pairs_samples_per_second": 74.215, "eval_vitaminc-pairs_steps_per_second": 2.619, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_sts-label_loss": 3.915343999862671, "eval_sts-label_runtime": 0.2807, "eval_sts-label_samples_per_second": 356.217, "eval_sts-label_steps_per_second": 14.249, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_qnli-contrastive_loss": 0.11220741271972656, "eval_qnli-contrastive_runtime": 0.3614, "eval_qnli-contrastive_samples_per_second": 276.705, "eval_qnli-contrastive_steps_per_second": 11.068, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_scitail-pairs-qa_loss": 0.06635177880525589, "eval_scitail-pairs-qa_runtime": 0.8881, "eval_scitail-pairs-qa_samples_per_second": 112.594, "eval_scitail-pairs-qa_steps_per_second": 4.504, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_scitail-pairs-pos_loss": 0.5765587687492371, "eval_scitail-pairs-pos_runtime": 1.3496, "eval_scitail-pairs-pos_samples_per_second": 74.097, "eval_scitail-pairs-pos_steps_per_second": 2.964, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_xsum-pairs_loss": 0.2595808804035187, "eval_xsum-pairs_runtime": 0.9377, "eval_xsum-pairs_samples_per_second": 106.641, "eval_xsum-pairs_steps_per_second": 4.266, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_compression-pairs_loss": 0.0918564721941948, "eval_compression-pairs_runtime": 0.2755, "eval_compression-pairs_samples_per_second": 363.032, "eval_compression-pairs_steps_per_second": 14.521, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_sciq_pairs_loss": 0.284303218126297, "eval_sciq_pairs_runtime": 4.1289, "eval_sciq_pairs_samples_per_second": 24.22, "eval_sciq_pairs_steps_per_second": 0.969, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_qasc_pairs_loss": 0.19232892990112305, "eval_qasc_pairs_runtime": 1.0709, "eval_qasc_pairs_samples_per_second": 93.384, "eval_qasc_pairs_steps_per_second": 3.735, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_openbookqa_pairs_loss": 1.6234371662139893, "eval_openbookqa_pairs_runtime": 0.9558, "eval_openbookqa_pairs_samples_per_second": 104.62, "eval_openbookqa_pairs_steps_per_second": 4.185, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_msmarco_pairs_loss": 0.5325217247009277, "eval_msmarco_pairs_runtime": 2.0971, "eval_msmarco_pairs_samples_per_second": 47.685, "eval_msmarco_pairs_steps_per_second": 1.907, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_nq_pairs_loss": 0.2721095681190491, "eval_nq_pairs_runtime": 4.5393, "eval_nq_pairs_samples_per_second": 22.03, "eval_nq_pairs_steps_per_second": 0.881, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_trivia_pairs_loss": 0.8544899821281433, "eval_trivia_pairs_runtime": 6.4668, "eval_trivia_pairs_samples_per_second": 15.464, "eval_trivia_pairs_steps_per_second": 0.619, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_quora_pairs_loss": 0.08441996574401855, "eval_quora_pairs_runtime": 0.6933, "eval_quora_pairs_samples_per_second": 144.233, "eval_quora_pairs_steps_per_second": 5.769, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_gooaq_pairs_loss": 0.5711588859558105, "eval_gooaq_pairs_runtime": 1.3941, "eval_gooaq_pairs_samples_per_second": 71.733, "eval_gooaq_pairs_steps_per_second": 2.869, "step": 2240 }, { "epoch": 0.3510421564018179, "eval_mrpc_pairs_loss": 0.05093960464000702, "eval_mrpc_pairs_runtime": 0.2633, "eval_mrpc_pairs_samples_per_second": 379.777, "eval_mrpc_pairs_steps_per_second": 15.191, "step": 2240 }, { "epoch": 0.3560570443504153, "grad_norm": 0.39178094267845154, "learning_rate": 2.6498646236636892e-05, "loss": 0.6805, "step": 2272 }, { "epoch": 0.3610719322990127, "grad_norm": 7.91475248336792, "learning_rate": 2.6332915632292237e-05, "loss": 1.0451, "step": 2304 }, { "epoch": 0.3660868202476101, "grad_norm": 31.54157066345215, "learning_rate": 2.616389702311641e-05, "loss": 1.0603, "step": 2336 }, { "epoch": 0.37110170819620747, "grad_norm": 8.400779724121094, "learning_rate": 2.5991639446269964e-05, "loss": 0.8142, "step": 2368 }, { "epoch": 0.3761165961448049, "grad_norm": 20.99441146850586, "learning_rate": 2.5816192878631166e-05, "loss": 1.7211, "step": 2400 }, { "epoch": 0.3811314840934023, "grad_norm": 10.574430465698242, "learning_rate": 2.5637608222296237e-05, "loss": 0.7523, "step": 2432 }, { "epoch": 0.3861463720419997, "grad_norm": 0.8941424489021301, "learning_rate": 2.5455937289811207e-05, "loss": 0.8053, "step": 2464 }, { "epoch": 0.3911612599905971, "grad_norm": 1.9402281045913696, "learning_rate": 2.5271232789139587e-05, "loss": 0.8427, "step": 2496 }, { "epoch": 0.3961761479391945, "grad_norm": 23.42873764038086, "learning_rate": 2.5083548308370296e-05, "loss": 0.8204, "step": 2528 }, { "epoch": 0.40119103588779187, "grad_norm": 4.5422234535217285, "learning_rate": 2.4892938300170198e-05, "loss": 0.5343, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_nli-pairs_loss": 1.002213478088379, "eval_nli-pairs_runtime": 3.8843, "eval_nli-pairs_samples_per_second": 25.745, "eval_nli-pairs_steps_per_second": 1.03, "eval_sts-test_pearson_cosine": 0.7872537557423719, "eval_sts-test_pearson_dot": 0.5372668921721468, "eval_sts-test_pearson_euclidean": 0.7383744840101544, "eval_sts-test_pearson_manhattan": 0.7333039162515002, "eval_sts-test_pearson_max": 0.7872537557423719, "eval_sts-test_spearman_cosine": 0.8038647026605977, "eval_sts-test_spearman_dot": 0.5191465873751544, "eval_sts-test_spearman_euclidean": 0.730034619048548, "eval_sts-test_spearman_manhattan": 0.7277569753761504, "eval_sts-test_spearman_max": 0.8038647026605977, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_vitaminc-pairs_loss": 4.723379135131836, "eval_vitaminc-pairs_runtime": 1.3031, "eval_vitaminc-pairs_samples_per_second": 65.23, "eval_vitaminc-pairs_steps_per_second": 2.302, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_sts-label_loss": 3.8185579776763916, "eval_sts-label_runtime": 0.4182, "eval_sts-label_samples_per_second": 239.094, "eval_sts-label_steps_per_second": 9.564, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_qnli-contrastive_loss": 0.15084019303321838, "eval_qnli-contrastive_runtime": 0.3638, "eval_qnli-contrastive_samples_per_second": 274.906, "eval_qnli-contrastive_steps_per_second": 10.996, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_scitail-pairs-qa_loss": 0.06741151213645935, "eval_scitail-pairs-qa_runtime": 0.9458, "eval_scitail-pairs-qa_samples_per_second": 105.735, "eval_scitail-pairs-qa_steps_per_second": 4.229, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_scitail-pairs-pos_loss": 0.47680819034576416, "eval_scitail-pairs-pos_runtime": 1.4736, "eval_scitail-pairs-pos_samples_per_second": 67.859, "eval_scitail-pairs-pos_steps_per_second": 2.714, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_xsum-pairs_loss": 0.2572269141674042, "eval_xsum-pairs_runtime": 0.9448, "eval_xsum-pairs_samples_per_second": 105.847, "eval_xsum-pairs_steps_per_second": 4.234, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_compression-pairs_loss": 0.09604756534099579, "eval_compression-pairs_runtime": 0.2774, "eval_compression-pairs_samples_per_second": 360.554, "eval_compression-pairs_steps_per_second": 14.422, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_sciq_pairs_loss": 0.2735004425048828, "eval_sciq_pairs_runtime": 4.2103, "eval_sciq_pairs_samples_per_second": 23.751, "eval_sciq_pairs_steps_per_second": 0.95, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_qasc_pairs_loss": 0.1924300342798233, "eval_qasc_pairs_runtime": 1.1352, "eval_qasc_pairs_samples_per_second": 88.089, "eval_qasc_pairs_steps_per_second": 3.524, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_openbookqa_pairs_loss": 1.6290359497070312, "eval_openbookqa_pairs_runtime": 0.9392, "eval_openbookqa_pairs_samples_per_second": 106.476, "eval_openbookqa_pairs_steps_per_second": 4.259, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_msmarco_pairs_loss": 0.518312931060791, "eval_msmarco_pairs_runtime": 2.121, "eval_msmarco_pairs_samples_per_second": 47.147, "eval_msmarco_pairs_steps_per_second": 1.886, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_nq_pairs_loss": 0.3077375292778015, "eval_nq_pairs_runtime": 4.6617, "eval_nq_pairs_samples_per_second": 21.451, "eval_nq_pairs_steps_per_second": 0.858, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_trivia_pairs_loss": 0.8588294386863708, "eval_trivia_pairs_runtime": 6.6293, "eval_trivia_pairs_samples_per_second": 15.085, "eval_trivia_pairs_steps_per_second": 0.603, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_quora_pairs_loss": 0.07980062067508698, "eval_quora_pairs_runtime": 0.7261, "eval_quora_pairs_samples_per_second": 137.72, "eval_quora_pairs_steps_per_second": 5.509, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_gooaq_pairs_loss": 0.6570906043052673, "eval_gooaq_pairs_runtime": 1.5071, "eval_gooaq_pairs_samples_per_second": 66.352, "eval_gooaq_pairs_steps_per_second": 2.654, "step": 2560 }, { "epoch": 0.40119103588779187, "eval_mrpc_pairs_loss": 0.051231566816568375, "eval_mrpc_pairs_runtime": 0.2799, "eval_mrpc_pairs_samples_per_second": 357.322, "eval_mrpc_pairs_steps_per_second": 14.293, "step": 2560 }, { "epoch": 0.40620592383638926, "grad_norm": 37.2639045715332, "learning_rate": 2.4699458065985813e-05, "loss": 0.9709, "step": 2592 }, { "epoch": 0.4112208117849867, "grad_norm": 15.363207817077637, "learning_rate": 2.45031637399988e-05, "loss": 0.708, "step": 2624 }, { "epoch": 0.4162356997335841, "grad_norm": 1.8831324577331543, "learning_rate": 2.430411227283978e-05, "loss": 0.4083, "step": 2656 }, { "epoch": 0.4212505876821815, "grad_norm": 5.664551734924316, "learning_rate": 2.4102361415065367e-05, "loss": 0.8732, "step": 2688 }, { "epoch": 0.4262654756307789, "grad_norm": 0.615675151348114, "learning_rate": 2.3897969700403022e-05, "loss": 1.2616, "step": 2720 }, { "epoch": 0.43128036357937627, "grad_norm": 19.81829261779785, "learning_rate": 2.3690996428768772e-05, "loss": 1.3324, "step": 2752 }, { "epoch": 0.43629525152797366, "grad_norm": 6.3363118171691895, "learning_rate": 2.348150164906257e-05, "loss": 0.6244, "step": 2784 }, { "epoch": 0.44131013947657105, "grad_norm": 1.103615641593933, "learning_rate": 2.3269546141746407e-05, "loss": 0.6176, "step": 2816 }, { "epoch": 0.44632502742516844, "grad_norm": 11.468894004821777, "learning_rate": 2.3055191401210126e-05, "loss": 0.6926, "step": 2848 }, { "epoch": 0.4513399153737659, "grad_norm": 4.0951619148254395, "learning_rate": 2.283849961793017e-05, "loss": 0.8158, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_nli-pairs_loss": 1.2103344202041626, "eval_nli-pairs_runtime": 3.656, "eval_nli-pairs_samples_per_second": 27.353, "eval_nli-pairs_steps_per_second": 1.094, "eval_sts-test_pearson_cosine": 0.7884135608823999, "eval_sts-test_pearson_dot": 0.5043809957478502, "eval_sts-test_pearson_euclidean": 0.73325296875941, "eval_sts-test_pearson_manhattan": 0.7274442771815695, "eval_sts-test_pearson_max": 0.7884135608823999, "eval_sts-test_spearman_cosine": 0.8024151272859597, "eval_sts-test_spearman_dot": 0.4849613226687463, "eval_sts-test_spearman_euclidean": 0.7267107319000072, "eval_sts-test_spearman_manhattan": 0.7238097600272174, "eval_sts-test_spearman_max": 0.8024151272859597, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_vitaminc-pairs_loss": 4.7560882568359375, "eval_vitaminc-pairs_runtime": 1.1898, "eval_vitaminc-pairs_samples_per_second": 71.438, "eval_vitaminc-pairs_steps_per_second": 2.521, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_sts-label_loss": 3.4280478954315186, "eval_sts-label_runtime": 0.2879, "eval_sts-label_samples_per_second": 347.303, "eval_sts-label_steps_per_second": 13.892, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_qnli-contrastive_loss": 0.1333482712507248, "eval_qnli-contrastive_runtime": 0.3658, "eval_qnli-contrastive_samples_per_second": 273.37, "eval_qnli-contrastive_steps_per_second": 10.935, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_scitail-pairs-qa_loss": 0.0703386664390564, "eval_scitail-pairs-qa_runtime": 0.8879, "eval_scitail-pairs-qa_samples_per_second": 112.63, "eval_scitail-pairs-qa_steps_per_second": 4.505, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_scitail-pairs-pos_loss": 0.4763020873069763, "eval_scitail-pairs-pos_runtime": 1.3239, "eval_scitail-pairs-pos_samples_per_second": 75.532, "eval_scitail-pairs-pos_steps_per_second": 3.021, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_xsum-pairs_loss": 0.25743284821510315, "eval_xsum-pairs_runtime": 0.9333, "eval_xsum-pairs_samples_per_second": 107.15, "eval_xsum-pairs_steps_per_second": 4.286, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_compression-pairs_loss": 0.09842805564403534, "eval_compression-pairs_runtime": 0.2944, "eval_compression-pairs_samples_per_second": 339.674, "eval_compression-pairs_steps_per_second": 13.587, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_sciq_pairs_loss": 0.28244778513908386, "eval_sciq_pairs_runtime": 4.0785, "eval_sciq_pairs_samples_per_second": 24.519, "eval_sciq_pairs_steps_per_second": 0.981, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_qasc_pairs_loss": 0.18051397800445557, "eval_qasc_pairs_runtime": 1.0561, "eval_qasc_pairs_samples_per_second": 94.69, "eval_qasc_pairs_steps_per_second": 3.788, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_openbookqa_pairs_loss": 1.5708725452423096, "eval_openbookqa_pairs_runtime": 0.9072, "eval_openbookqa_pairs_samples_per_second": 110.229, "eval_openbookqa_pairs_steps_per_second": 4.409, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_msmarco_pairs_loss": 0.5720314979553223, "eval_msmarco_pairs_runtime": 2.0694, "eval_msmarco_pairs_samples_per_second": 48.322, "eval_msmarco_pairs_steps_per_second": 1.933, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_nq_pairs_loss": 0.2748319208621979, "eval_nq_pairs_runtime": 4.5496, "eval_nq_pairs_samples_per_second": 21.98, "eval_nq_pairs_steps_per_second": 0.879, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_trivia_pairs_loss": 0.8936847448348999, "eval_trivia_pairs_runtime": 6.4784, "eval_trivia_pairs_samples_per_second": 15.436, "eval_trivia_pairs_steps_per_second": 0.617, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_quora_pairs_loss": 0.07990340888500214, "eval_quora_pairs_runtime": 0.6852, "eval_quora_pairs_samples_per_second": 145.945, "eval_quora_pairs_steps_per_second": 5.838, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_gooaq_pairs_loss": 0.6210995316505432, "eval_gooaq_pairs_runtime": 1.4234, "eval_gooaq_pairs_samples_per_second": 70.255, "eval_gooaq_pairs_steps_per_second": 2.81, "step": 2880 }, { "epoch": 0.4513399153737659, "eval_mrpc_pairs_loss": 0.053870730102062225, "eval_mrpc_pairs_runtime": 0.2678, "eval_mrpc_pairs_samples_per_second": 373.436, "eval_mrpc_pairs_steps_per_second": 14.937, "step": 2880 }, { "epoch": 0.4563548033223633, "grad_norm": 0.5031663775444031, "learning_rate": 2.261953366042628e-05, "loss": 1.4753, "step": 2912 }, { "epoch": 0.46136969127096067, "grad_norm": 3.3404605388641357, "learning_rate": 2.239835705702158e-05, "loss": 0.5735, "step": 2944 }, { "epoch": 0.46638457921955806, "grad_norm": 14.60761547088623, "learning_rate": 2.217503397741115e-05, "loss": 1.2261, "step": 2976 }, { "epoch": 0.47139946716815545, "grad_norm": 0.7826951146125793, "learning_rate": 2.194962921404456e-05, "loss": 0.6085, "step": 3008 }, { "epoch": 0.47641435511675284, "grad_norm": 5.523419380187988, "learning_rate": 2.1722208163327738e-05, "loss": 0.8766, "step": 3040 }, { "epoch": 0.48142924306535023, "grad_norm": 1.2507153749465942, "learning_rate": 2.1492836806649564e-05, "loss": 1.1824, "step": 3072 }, { "epoch": 0.4864441310139477, "grad_norm": 10.76526165008545, "learning_rate": 2.1261581691238775e-05, "loss": 0.7192, "step": 3104 }, { "epoch": 0.49145901896254507, "grad_norm": 2.5375277996063232, "learning_rate": 2.1028509910856705e-05, "loss": 0.6131, "step": 3136 }, { "epoch": 0.49647390691114246, "grad_norm": 6.569655418395996, "learning_rate": 2.0793689086331472e-05, "loss": 0.7407, "step": 3168 }, { "epoch": 0.5014887948597399, "grad_norm": 0.42745527625083923, "learning_rate": 2.055718734593919e-05, "loss": 0.5857, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_nli-pairs_loss": 1.1431602239608765, "eval_nli-pairs_runtime": 3.6407, "eval_nli-pairs_samples_per_second": 27.467, "eval_nli-pairs_steps_per_second": 1.099, "eval_sts-test_pearson_cosine": 0.7838341260331343, "eval_sts-test_pearson_dot": 0.5274891201747137, "eval_sts-test_pearson_euclidean": 0.734987175544037, "eval_sts-test_pearson_manhattan": 0.7296263541205231, "eval_sts-test_pearson_max": 0.7838341260331343, "eval_sts-test_spearman_cosine": 0.8013224760849562, "eval_sts-test_spearman_dot": 0.5061225327907017, "eval_sts-test_spearman_euclidean": 0.7282525362996873, "eval_sts-test_spearman_manhattan": 0.7265322068183514, "eval_sts-test_spearman_max": 0.8013224760849562, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_vitaminc-pairs_loss": 4.748112201690674, "eval_vitaminc-pairs_runtime": 1.1378, "eval_vitaminc-pairs_samples_per_second": 74.706, "eval_vitaminc-pairs_steps_per_second": 2.637, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_sts-label_loss": 3.9402565956115723, "eval_sts-label_runtime": 0.2789, "eval_sts-label_samples_per_second": 358.596, "eval_sts-label_steps_per_second": 14.344, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_qnli-contrastive_loss": 0.10341227799654007, "eval_qnli-contrastive_runtime": 0.3605, "eval_qnli-contrastive_samples_per_second": 277.417, "eval_qnli-contrastive_steps_per_second": 11.097, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_scitail-pairs-qa_loss": 0.06673895567655563, "eval_scitail-pairs-qa_runtime": 0.8765, "eval_scitail-pairs-qa_samples_per_second": 114.092, "eval_scitail-pairs-qa_steps_per_second": 4.564, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_scitail-pairs-pos_loss": 0.510690450668335, "eval_scitail-pairs-pos_runtime": 1.3274, "eval_scitail-pairs-pos_samples_per_second": 75.334, "eval_scitail-pairs-pos_steps_per_second": 3.013, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_xsum-pairs_loss": 0.26573723554611206, "eval_xsum-pairs_runtime": 0.9342, "eval_xsum-pairs_samples_per_second": 107.047, "eval_xsum-pairs_steps_per_second": 4.282, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_compression-pairs_loss": 0.09096826612949371, "eval_compression-pairs_runtime": 0.2779, "eval_compression-pairs_samples_per_second": 359.804, "eval_compression-pairs_steps_per_second": 14.392, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_sciq_pairs_loss": 0.30787500739097595, "eval_sciq_pairs_runtime": 4.1007, "eval_sciq_pairs_samples_per_second": 24.386, "eval_sciq_pairs_steps_per_second": 0.975, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_qasc_pairs_loss": 0.1825849413871765, "eval_qasc_pairs_runtime": 1.0526, "eval_qasc_pairs_samples_per_second": 94.998, "eval_qasc_pairs_steps_per_second": 3.8, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_openbookqa_pairs_loss": 1.5945305824279785, "eval_openbookqa_pairs_runtime": 0.8948, "eval_openbookqa_pairs_samples_per_second": 111.759, "eval_openbookqa_pairs_steps_per_second": 4.47, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_msmarco_pairs_loss": 0.5864604711532593, "eval_msmarco_pairs_runtime": 2.0556, "eval_msmarco_pairs_samples_per_second": 48.646, "eval_msmarco_pairs_steps_per_second": 1.946, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_nq_pairs_loss": 0.2538978159427643, "eval_nq_pairs_runtime": 4.5409, "eval_nq_pairs_samples_per_second": 22.022, "eval_nq_pairs_steps_per_second": 0.881, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_trivia_pairs_loss": 0.8825237154960632, "eval_trivia_pairs_runtime": 6.4701, "eval_trivia_pairs_samples_per_second": 15.456, "eval_trivia_pairs_steps_per_second": 0.618, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_quora_pairs_loss": 0.06264814734458923, "eval_quora_pairs_runtime": 0.6792, "eval_quora_pairs_samples_per_second": 147.238, "eval_quora_pairs_steps_per_second": 5.89, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_gooaq_pairs_loss": 0.5953384041786194, "eval_gooaq_pairs_runtime": 1.4186, "eval_gooaq_pairs_samples_per_second": 70.49, "eval_gooaq_pairs_steps_per_second": 2.82, "step": 3200 }, { "epoch": 0.5014887948597399, "eval_mrpc_pairs_loss": 0.05028616264462471, "eval_mrpc_pairs_runtime": 0.2664, "eval_mrpc_pairs_samples_per_second": 375.444, "eval_mrpc_pairs_steps_per_second": 15.018, "step": 3200 }, { "epoch": 0.5065036828083372, "grad_norm": 17.477581024169922, "learning_rate": 2.0319073305638035e-05, "loss": 0.6212, "step": 3232 }, { "epoch": 0.5115185707569346, "grad_norm": 15.705268859863281, "learning_rate": 2.0079416049160762e-05, "loss": 1.1408, "step": 3264 }, { "epoch": 0.516533458705532, "grad_norm": 15.518088340759277, "learning_rate": 1.983828510797154e-05, "loss": 0.6898, "step": 3296 }, { "epoch": 0.5215483466541294, "grad_norm": 18.28449058532715, "learning_rate": 1.9595750441092844e-05, "loss": 0.9827, "step": 3328 }, { "epoch": 0.5265632346027268, "grad_norm": 11.187614440917969, "learning_rate": 1.935188241480837e-05, "loss": 0.9518, "step": 3360 }, { "epoch": 0.5315781225513242, "grad_norm": 24.515199661254883, "learning_rate": 1.910675178224773e-05, "loss": 0.5584, "step": 3392 }, { "epoch": 0.5365930104999217, "grad_norm": 21.595224380493164, "learning_rate": 1.886042966285894e-05, "loss": 1.3362, "step": 3424 }, { "epoch": 0.5416078984485191, "grad_norm": 14.934494972229004, "learning_rate": 1.8612987521774603e-05, "loss": 0.4418, "step": 3456 }, { "epoch": 0.5466227863971165, "grad_norm": 1.0222537517547607, "learning_rate": 1.836449714907785e-05, "loss": 0.5896, "step": 3488 }, { "epoch": 0.5516376743457139, "grad_norm": 13.705151557922363, "learning_rate": 1.811503063897396e-05, "loss": 0.7951, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_nli-pairs_loss": 1.0016616582870483, "eval_nli-pairs_runtime": 3.6365, "eval_nli-pairs_samples_per_second": 27.499, "eval_nli-pairs_steps_per_second": 1.1, "eval_sts-test_pearson_cosine": 0.783269156461013, "eval_sts-test_pearson_dot": 0.5146760761775918, "eval_sts-test_pearson_euclidean": 0.7293244171224789, "eval_sts-test_pearson_manhattan": 0.722566066058283, "eval_sts-test_pearson_max": 0.783269156461013, "eval_sts-test_spearman_cosine": 0.800346163751739, "eval_sts-test_spearman_dot": 0.49134463318009686, "eval_sts-test_spearman_euclidean": 0.7220780456605193, "eval_sts-test_spearman_manhattan": 0.7185570530657137, "eval_sts-test_spearman_max": 0.800346163751739, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_vitaminc-pairs_loss": 4.628457546234131, "eval_vitaminc-pairs_runtime": 1.1358, "eval_vitaminc-pairs_samples_per_second": 74.837, "eval_vitaminc-pairs_steps_per_second": 2.641, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_sts-label_loss": 3.698469877243042, "eval_sts-label_runtime": 0.2763, "eval_sts-label_samples_per_second": 361.871, "eval_sts-label_steps_per_second": 14.475, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_qnli-contrastive_loss": 0.11857427656650543, "eval_qnli-contrastive_runtime": 0.3599, "eval_qnli-contrastive_samples_per_second": 277.865, "eval_qnli-contrastive_steps_per_second": 11.115, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_scitail-pairs-qa_loss": 0.06011494621634483, "eval_scitail-pairs-qa_runtime": 0.8855, "eval_scitail-pairs-qa_samples_per_second": 112.93, "eval_scitail-pairs-qa_steps_per_second": 4.517, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_scitail-pairs-pos_loss": 0.5179685950279236, "eval_scitail-pairs-pos_runtime": 1.3428, "eval_scitail-pairs-pos_samples_per_second": 74.469, "eval_scitail-pairs-pos_steps_per_second": 2.979, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_xsum-pairs_loss": 0.2575337886810303, "eval_xsum-pairs_runtime": 0.9362, "eval_xsum-pairs_samples_per_second": 106.81, "eval_xsum-pairs_steps_per_second": 4.272, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_compression-pairs_loss": 0.08986295014619827, "eval_compression-pairs_runtime": 0.2735, "eval_compression-pairs_samples_per_second": 365.659, "eval_compression-pairs_steps_per_second": 14.626, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_sciq_pairs_loss": 0.2898155748844147, "eval_sciq_pairs_runtime": 4.1009, "eval_sciq_pairs_samples_per_second": 24.385, "eval_sciq_pairs_steps_per_second": 0.975, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_qasc_pairs_loss": 0.1790761798620224, "eval_qasc_pairs_runtime": 1.0559, "eval_qasc_pairs_samples_per_second": 94.702, "eval_qasc_pairs_steps_per_second": 3.788, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_openbookqa_pairs_loss": 1.6558103561401367, "eval_openbookqa_pairs_runtime": 0.8846, "eval_openbookqa_pairs_samples_per_second": 113.048, "eval_openbookqa_pairs_steps_per_second": 4.522, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_msmarco_pairs_loss": 0.5547183156013489, "eval_msmarco_pairs_runtime": 2.0592, "eval_msmarco_pairs_samples_per_second": 48.563, "eval_msmarco_pairs_steps_per_second": 1.943, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_nq_pairs_loss": 0.24799224734306335, "eval_nq_pairs_runtime": 4.5115, "eval_nq_pairs_samples_per_second": 22.166, "eval_nq_pairs_steps_per_second": 0.887, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_trivia_pairs_loss": 0.9036693572998047, "eval_trivia_pairs_runtime": 6.5286, "eval_trivia_pairs_samples_per_second": 15.317, "eval_trivia_pairs_steps_per_second": 0.613, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_quora_pairs_loss": 0.05727443844079971, "eval_quora_pairs_runtime": 0.6763, "eval_quora_pairs_samples_per_second": 147.873, "eval_quora_pairs_steps_per_second": 5.915, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_gooaq_pairs_loss": 0.5602415800094604, "eval_gooaq_pairs_runtime": 1.4132, "eval_gooaq_pairs_samples_per_second": 70.759, "eval_gooaq_pairs_steps_per_second": 2.83, "step": 3520 }, { "epoch": 0.5516376743457139, "eval_mrpc_pairs_loss": 0.04762456938624382, "eval_mrpc_pairs_runtime": 0.2648, "eval_mrpc_pairs_samples_per_second": 377.632, "eval_mrpc_pairs_steps_per_second": 15.105, "step": 3520 }, { "epoch": 0.5566525622943113, "grad_norm": 0.39285340905189514, "learning_rate": 1.7864660368873747e-05, "loss": 0.5201, "step": 3552 }, { "epoch": 0.5616674502429087, "grad_norm": 16.01999855041504, "learning_rate": 1.7613458978394786e-05, "loss": 0.6351, "step": 3584 }, { "epoch": 0.566682338191506, "grad_norm": 0.5487422347068787, "learning_rate": 1.7361499348286606e-05, "loss": 0.8652, "step": 3616 }, { "epoch": 0.5716972261401034, "grad_norm": 0.9249119758605957, "learning_rate": 1.710885457928585e-05, "loss": 0.6407, "step": 3648 }, { "epoch": 0.5767121140887008, "grad_norm": 6.578505992889404, "learning_rate": 1.6855597970907664e-05, "loss": 0.9435, "step": 3680 }, { "epoch": 0.5817270020372982, "grad_norm": 14.307022094726562, "learning_rate": 1.6601803000179394e-05, "loss": 0.9295, "step": 3712 }, { "epoch": 0.5867418899858956, "grad_norm": 16.091779708862305, "learning_rate": 1.6347543300322795e-05, "loss": 0.6829, "step": 3744 }, { "epoch": 0.591756777934493, "grad_norm": 29.058805465698242, "learning_rate": 1.6092892639390916e-05, "loss": 0.8683, "step": 3776 }, { "epoch": 0.5967716658830904, "grad_norm": 13.12238597869873, "learning_rate": 1.583792489886586e-05, "loss": 1.115, "step": 3808 }, { "epoch": 0.6017865538316878, "grad_norm": 11.606388092041016, "learning_rate": 1.558271405222362e-05, "loss": 1.0936, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_nli-pairs_loss": 0.9106074571609497, "eval_nli-pairs_runtime": 3.9467, "eval_nli-pairs_samples_per_second": 25.337, "eval_nli-pairs_steps_per_second": 1.013, "eval_sts-test_pearson_cosine": 0.7831915073063493, "eval_sts-test_pearson_dot": 0.51712727721244, "eval_sts-test_pearson_euclidean": 0.7355201142492419, "eval_sts-test_pearson_manhattan": 0.7299910115321456, "eval_sts-test_pearson_max": 0.7831915073063493, "eval_sts-test_spearman_cosine": 0.8005432620025132, "eval_sts-test_spearman_dot": 0.49466719400094655, "eval_sts-test_spearman_euclidean": 0.7273424991180402, "eval_sts-test_spearman_manhattan": 0.7249394934262583, "eval_sts-test_spearman_max": 0.8005432620025132, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_vitaminc-pairs_loss": 4.7559494972229, "eval_vitaminc-pairs_runtime": 1.1844, "eval_vitaminc-pairs_samples_per_second": 71.768, "eval_vitaminc-pairs_steps_per_second": 2.533, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_sts-label_loss": 3.46917724609375, "eval_sts-label_runtime": 0.3003, "eval_sts-label_samples_per_second": 333.048, "eval_sts-label_steps_per_second": 13.322, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_qnli-contrastive_loss": 0.13890141248703003, "eval_qnli-contrastive_runtime": 0.3729, "eval_qnli-contrastive_samples_per_second": 268.18, "eval_qnli-contrastive_steps_per_second": 10.727, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_scitail-pairs-qa_loss": 0.0611240416765213, "eval_scitail-pairs-qa_runtime": 0.9367, "eval_scitail-pairs-qa_samples_per_second": 106.755, "eval_scitail-pairs-qa_steps_per_second": 4.27, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_scitail-pairs-pos_loss": 0.46203696727752686, "eval_scitail-pairs-pos_runtime": 1.4874, "eval_scitail-pairs-pos_samples_per_second": 67.232, "eval_scitail-pairs-pos_steps_per_second": 2.689, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_xsum-pairs_loss": 0.24919259548187256, "eval_xsum-pairs_runtime": 0.9576, "eval_xsum-pairs_samples_per_second": 104.427, "eval_xsum-pairs_steps_per_second": 4.177, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_compression-pairs_loss": 0.08809012174606323, "eval_compression-pairs_runtime": 0.298, "eval_compression-pairs_samples_per_second": 335.567, "eval_compression-pairs_steps_per_second": 13.423, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_sciq_pairs_loss": 0.28287386894226074, "eval_sciq_pairs_runtime": 4.2668, "eval_sciq_pairs_samples_per_second": 23.437, "eval_sciq_pairs_steps_per_second": 0.937, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_qasc_pairs_loss": 0.1861308217048645, "eval_qasc_pairs_runtime": 1.0488, "eval_qasc_pairs_samples_per_second": 95.351, "eval_qasc_pairs_steps_per_second": 3.814, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_openbookqa_pairs_loss": 1.600982666015625, "eval_openbookqa_pairs_runtime": 0.9077, "eval_openbookqa_pairs_samples_per_second": 110.17, "eval_openbookqa_pairs_steps_per_second": 4.407, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_msmarco_pairs_loss": 0.5555463433265686, "eval_msmarco_pairs_runtime": 2.1064, "eval_msmarco_pairs_samples_per_second": 47.474, "eval_msmarco_pairs_steps_per_second": 1.899, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_nq_pairs_loss": 0.23241031169891357, "eval_nq_pairs_runtime": 4.6119, "eval_nq_pairs_samples_per_second": 21.683, "eval_nq_pairs_steps_per_second": 0.867, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_trivia_pairs_loss": 0.7936394214630127, "eval_trivia_pairs_runtime": 6.6242, "eval_trivia_pairs_samples_per_second": 15.096, "eval_trivia_pairs_steps_per_second": 0.604, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_quora_pairs_loss": 0.05936668440699577, "eval_quora_pairs_runtime": 0.7463, "eval_quora_pairs_samples_per_second": 133.994, "eval_quora_pairs_steps_per_second": 5.36, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_gooaq_pairs_loss": 0.5735708475112915, "eval_gooaq_pairs_runtime": 1.4747, "eval_gooaq_pairs_samples_per_second": 67.809, "eval_gooaq_pairs_steps_per_second": 2.712, "step": 3840 }, { "epoch": 0.6017865538316878, "eval_mrpc_pairs_loss": 0.046309370547533035, "eval_mrpc_pairs_runtime": 0.2694, "eval_mrpc_pairs_samples_per_second": 371.218, "eval_mrpc_pairs_steps_per_second": 14.849, "step": 3840 }, { "epoch": 0.6068014417802852, "grad_norm": 6.513147830963135, "learning_rate": 1.53273341434723e-05, "loss": 0.8689, "step": 3872 }, { "epoch": 0.6118163297288827, "grad_norm": 0.2349071353673935, "learning_rate": 1.5071859265669756e-05, "loss": 0.8692, "step": 3904 }, { "epoch": 0.6168312176774801, "grad_norm": 18.028608322143555, "learning_rate": 1.4816363539427118e-05, "loss": 0.9083, "step": 3936 }, { "epoch": 0.6218461056260774, "grad_norm": 17.381690979003906, "learning_rate": 1.456092109140423e-05, "loss": 1.0782, "step": 3968 }, { "epoch": 0.6268609935746748, "grad_norm": 20.72548484802246, "learning_rate": 1.4305606032803418e-05, "loss": 0.7711, "step": 4000 }, { "epoch": 0.6318758815232722, "grad_norm": 28.311264038085938, "learning_rate": 1.4050492437867641e-05, "loss": 1.0005, "step": 4032 }, { "epoch": 0.6368907694718696, "grad_norm": 14.892809867858887, "learning_rate": 1.3795654322389481e-05, "loss": 0.7229, "step": 4064 }, { "epoch": 0.641905657420467, "grad_norm": 18.567630767822266, "learning_rate": 1.3541165622236977e-05, "loss": 0.4871, "step": 4096 }, { "epoch": 0.6469205453690644, "grad_norm": 8.814851760864258, "learning_rate": 1.3287100171902759e-05, "loss": 0.7853, "step": 4128 }, { "epoch": 0.6519354333176618, "grad_norm": 19.43486785888672, "learning_rate": 1.3033531683082495e-05, "loss": 0.9271, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_nli-pairs_loss": 0.8979966640472412, "eval_nli-pairs_runtime": 3.6341, "eval_nli-pairs_samples_per_second": 27.517, "eval_nli-pairs_steps_per_second": 1.101, "eval_sts-test_pearson_cosine": 0.786081877366483, "eval_sts-test_pearson_dot": 0.5354100918466089, "eval_sts-test_pearson_euclidean": 0.7368659505908834, "eval_sts-test_pearson_manhattan": 0.7310042183211231, "eval_sts-test_pearson_max": 0.786081877366483, "eval_sts-test_spearman_cosine": 0.8043456052578905, "eval_sts-test_spearman_dot": 0.5150264179790126, "eval_sts-test_spearman_euclidean": 0.7297811553069841, "eval_sts-test_spearman_manhattan": 0.7264172194761916, "eval_sts-test_spearman_max": 0.8043456052578905, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_vitaminc-pairs_loss": 4.720225811004639, "eval_vitaminc-pairs_runtime": 1.1487, "eval_vitaminc-pairs_samples_per_second": 73.995, "eval_vitaminc-pairs_steps_per_second": 2.612, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_sts-label_loss": 3.9553511142730713, "eval_sts-label_runtime": 0.2732, "eval_sts-label_samples_per_second": 366.049, "eval_sts-label_steps_per_second": 14.642, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_qnli-contrastive_loss": 0.14256399869918823, "eval_qnli-contrastive_runtime": 0.3558, "eval_qnli-contrastive_samples_per_second": 281.03, "eval_qnli-contrastive_steps_per_second": 11.241, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_scitail-pairs-qa_loss": 0.06135182082653046, "eval_scitail-pairs-qa_runtime": 0.8797, "eval_scitail-pairs-qa_samples_per_second": 113.67, "eval_scitail-pairs-qa_steps_per_second": 4.547, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_scitail-pairs-pos_loss": 0.42590686678886414, "eval_scitail-pairs-pos_runtime": 1.3288, "eval_scitail-pairs-pos_samples_per_second": 75.254, "eval_scitail-pairs-pos_steps_per_second": 3.01, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_xsum-pairs_loss": 0.2564789056777954, "eval_xsum-pairs_runtime": 0.9345, "eval_xsum-pairs_samples_per_second": 107.011, "eval_xsum-pairs_steps_per_second": 4.28, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_compression-pairs_loss": 0.08838170021772385, "eval_compression-pairs_runtime": 0.2761, "eval_compression-pairs_samples_per_second": 362.144, "eval_compression-pairs_steps_per_second": 14.486, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_sciq_pairs_loss": 0.2946786880493164, "eval_sciq_pairs_runtime": 4.076, "eval_sciq_pairs_samples_per_second": 24.534, "eval_sciq_pairs_steps_per_second": 0.981, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_qasc_pairs_loss": 0.17502914369106293, "eval_qasc_pairs_runtime": 1.0723, "eval_qasc_pairs_samples_per_second": 93.259, "eval_qasc_pairs_steps_per_second": 3.73, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_openbookqa_pairs_loss": 1.5555152893066406, "eval_openbookqa_pairs_runtime": 0.8973, "eval_openbookqa_pairs_samples_per_second": 111.451, "eval_openbookqa_pairs_steps_per_second": 4.458, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_msmarco_pairs_loss": 0.5041812062263489, "eval_msmarco_pairs_runtime": 2.0593, "eval_msmarco_pairs_samples_per_second": 48.56, "eval_msmarco_pairs_steps_per_second": 1.942, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_nq_pairs_loss": 0.24564537405967712, "eval_nq_pairs_runtime": 4.527, "eval_nq_pairs_samples_per_second": 22.09, "eval_nq_pairs_steps_per_second": 0.884, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_trivia_pairs_loss": 0.8565467000007629, "eval_trivia_pairs_runtime": 6.4751, "eval_trivia_pairs_samples_per_second": 15.444, "eval_trivia_pairs_steps_per_second": 0.618, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_quora_pairs_loss": 0.052645713090896606, "eval_quora_pairs_runtime": 0.6803, "eval_quora_pairs_samples_per_second": 146.985, "eval_quora_pairs_steps_per_second": 5.879, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_gooaq_pairs_loss": 0.5815556645393372, "eval_gooaq_pairs_runtime": 1.3985, "eval_gooaq_pairs_samples_per_second": 71.504, "eval_gooaq_pairs_steps_per_second": 2.86, "step": 4160 }, { "epoch": 0.6519354333176618, "eval_mrpc_pairs_loss": 0.047052089124917984, "eval_mrpc_pairs_runtime": 0.2602, "eval_mrpc_pairs_samples_per_second": 384.349, "eval_mrpc_pairs_steps_per_second": 15.374, "step": 4160 }, { "epoch": 0.6569503212662592, "grad_norm": 21.91355323791504, "learning_rate": 1.2780533723289014e-05, "loss": 0.5223, "step": 4192 }, { "epoch": 0.6619652092148566, "grad_norm": 9.792081832885742, "learning_rate": 1.2528179694508286e-05, "loss": 1.0498, "step": 4224 }, { "epoch": 0.666980097163454, "grad_norm": 6.606201648712158, "learning_rate": 1.2276542811903345e-05, "loss": 0.6791, "step": 4256 }, { "epoch": 0.6719949851120514, "grad_norm": 16.744705200195312, "learning_rate": 1.2025696082572509e-05, "loss": 0.8836, "step": 4288 }, { "epoch": 0.6770098730606487, "grad_norm": 8.791626930236816, "learning_rate": 1.1775712284367882e-05, "loss": 0.6035, "step": 4320 }, { "epoch": 0.6820247610092462, "grad_norm": 1.067271113395691, "learning_rate": 1.152666394478045e-05, "loss": 0.5167, "step": 4352 }, { "epoch": 0.6870396489578436, "grad_norm": 7.685211181640625, "learning_rate": 1.1286358620301126e-05, "loss": 0.981, "step": 4384 }, { "epoch": 0.692054536906441, "grad_norm": 19.07784652709961, "learning_rate": 1.10393628476565e-05, "loss": 0.4873, "step": 4416 }, { "epoch": 0.6970694248550384, "grad_norm": 1.4715958833694458, "learning_rate": 1.0793516169782712e-05, "loss": 0.4762, "step": 4448 }, { "epoch": 0.7020843128036358, "grad_norm": 14.572600364685059, "learning_rate": 1.0548889913873123e-05, "loss": 0.8201, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_nli-pairs_loss": 0.8704043626785278, "eval_nli-pairs_runtime": 3.6418, "eval_nli-pairs_samples_per_second": 27.459, "eval_nli-pairs_steps_per_second": 1.098, "eval_sts-test_pearson_cosine": 0.7871366351762351, "eval_sts-test_pearson_dot": 0.520292802271069, "eval_sts-test_pearson_euclidean": 0.7358991589918665, "eval_sts-test_pearson_manhattan": 0.7306487678482384, "eval_sts-test_pearson_max": 0.7871366351762351, "eval_sts-test_spearman_cosine": 0.8043053229220561, "eval_sts-test_spearman_dot": 0.500924984433136, "eval_sts-test_spearman_euclidean": 0.7279966902078664, "eval_sts-test_spearman_manhattan": 0.7254635738312362, "eval_sts-test_spearman_max": 0.8043053229220561, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_vitaminc-pairs_loss": 4.733531475067139, "eval_vitaminc-pairs_runtime": 1.1524, "eval_vitaminc-pairs_samples_per_second": 73.759, "eval_vitaminc-pairs_steps_per_second": 2.603, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_sts-label_loss": 3.589179515838623, "eval_sts-label_runtime": 0.2802, "eval_sts-label_samples_per_second": 356.831, "eval_sts-label_steps_per_second": 14.273, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_qnli-contrastive_loss": 0.11559023708105087, "eval_qnli-contrastive_runtime": 0.3803, "eval_qnli-contrastive_samples_per_second": 262.956, "eval_qnli-contrastive_steps_per_second": 10.518, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_scitail-pairs-qa_loss": 0.05958002060651779, "eval_scitail-pairs-qa_runtime": 0.9171, "eval_scitail-pairs-qa_samples_per_second": 109.042, "eval_scitail-pairs-qa_steps_per_second": 4.362, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_scitail-pairs-pos_loss": 0.43254122138023376, "eval_scitail-pairs-pos_runtime": 1.3676, "eval_scitail-pairs-pos_samples_per_second": 73.118, "eval_scitail-pairs-pos_steps_per_second": 2.925, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_xsum-pairs_loss": 0.248906210064888, "eval_xsum-pairs_runtime": 0.9364, "eval_xsum-pairs_samples_per_second": 106.797, "eval_xsum-pairs_steps_per_second": 4.272, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_compression-pairs_loss": 0.08712127059698105, "eval_compression-pairs_runtime": 0.2771, "eval_compression-pairs_samples_per_second": 360.923, "eval_compression-pairs_steps_per_second": 14.437, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_sciq_pairs_loss": 0.2863478362560272, "eval_sciq_pairs_runtime": 4.1006, "eval_sciq_pairs_samples_per_second": 24.386, "eval_sciq_pairs_steps_per_second": 0.975, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_qasc_pairs_loss": 0.17710347473621368, "eval_qasc_pairs_runtime": 1.0521, "eval_qasc_pairs_samples_per_second": 95.051, "eval_qasc_pairs_steps_per_second": 3.802, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_openbookqa_pairs_loss": 1.5271464586257935, "eval_openbookqa_pairs_runtime": 0.8986, "eval_openbookqa_pairs_samples_per_second": 111.286, "eval_openbookqa_pairs_steps_per_second": 4.451, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_msmarco_pairs_loss": 0.5346755385398865, "eval_msmarco_pairs_runtime": 2.0827, "eval_msmarco_pairs_samples_per_second": 48.014, "eval_msmarco_pairs_steps_per_second": 1.921, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_nq_pairs_loss": 0.24830152094364166, "eval_nq_pairs_runtime": 4.5025, "eval_nq_pairs_samples_per_second": 22.21, "eval_nq_pairs_steps_per_second": 0.888, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_trivia_pairs_loss": 0.799673318862915, "eval_trivia_pairs_runtime": 6.4664, "eval_trivia_pairs_samples_per_second": 15.465, "eval_trivia_pairs_steps_per_second": 0.619, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_quora_pairs_loss": 0.030656050890684128, "eval_quora_pairs_runtime": 0.6818, "eval_quora_pairs_samples_per_second": 146.669, "eval_quora_pairs_steps_per_second": 5.867, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_gooaq_pairs_loss": 0.5036634802818298, "eval_gooaq_pairs_runtime": 1.4051, "eval_gooaq_pairs_samples_per_second": 71.169, "eval_gooaq_pairs_steps_per_second": 2.847, "step": 4480 }, { "epoch": 0.7020843128036358, "eval_mrpc_pairs_loss": 0.04455450549721718, "eval_mrpc_pairs_runtime": 0.2642, "eval_mrpc_pairs_samples_per_second": 378.478, "eval_mrpc_pairs_steps_per_second": 15.139, "step": 4480 }, { "epoch": 0.7070992007522332, "grad_norm": 15.19054889678955, "learning_rate": 1.030555505304156e-05, "loss": 0.7799, "step": 4512 }, { "epoch": 0.7121140887008306, "grad_norm": 16.065160751342773, "learning_rate": 1.0063582185731009e-05, "loss": 0.8006, "step": 4544 }, { "epoch": 0.717128976649428, "grad_norm": 3.2584469318389893, "learning_rate": 9.823041515230937e-06, "loss": 0.5123, "step": 4576 }, { "epoch": 0.7221438645980254, "grad_norm": 2.2951438426971436, "learning_rate": 9.584002829309324e-06, "loss": 0.7421, "step": 4608 }, { "epoch": 0.7271587525466228, "grad_norm": 21.291872024536133, "learning_rate": 9.346535479965231e-06, "loss": 0.9477, "step": 4640 }, { "epoch": 0.7321736404952202, "grad_norm": 4.785529613494873, "learning_rate": 9.11070836330775e-06, "loss": 0.5021, "step": 4672 }, { "epoch": 0.7371885284438175, "grad_norm": 1.7058138847351074, "learning_rate": 8.876589899567312e-06, "loss": 0.931, "step": 4704 }, { "epoch": 0.7422034163924149, "grad_norm": 9.1055326461792, "learning_rate": 8.644248013244963e-06, "loss": 0.7777, "step": 4736 }, { "epoch": 0.7472183043410123, "grad_norm": 3.6529128551483154, "learning_rate": 8.413750113405556e-06, "loss": 0.9462, "step": 4768 }, { "epoch": 0.7522331922896098, "grad_norm": 0.5643049478530884, "learning_rate": 8.185163074120399e-06, "loss": 0.5846, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_nli-pairs_loss": 0.9291799664497375, "eval_nli-pairs_runtime": 3.7498, "eval_nli-pairs_samples_per_second": 26.668, "eval_nli-pairs_steps_per_second": 1.067, "eval_sts-test_pearson_cosine": 0.7855324842750789, "eval_sts-test_pearson_dot": 0.5242204261314407, "eval_sts-test_pearson_euclidean": 0.7349702751512333, "eval_sts-test_pearson_manhattan": 0.7293454465410049, "eval_sts-test_pearson_max": 0.7855324842750789, "eval_sts-test_spearman_cosine": 0.8044211074352633, "eval_sts-test_spearman_dot": 0.5021807579050959, "eval_sts-test_spearman_euclidean": 0.7270456124616013, "eval_sts-test_spearman_manhattan": 0.7246691951731193, "eval_sts-test_spearman_max": 0.8044211074352633, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_vitaminc-pairs_loss": 4.687094688415527, "eval_vitaminc-pairs_runtime": 1.1386, "eval_vitaminc-pairs_samples_per_second": 74.654, "eval_vitaminc-pairs_steps_per_second": 2.635, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_sts-label_loss": 3.8013510704040527, "eval_sts-label_runtime": 0.2716, "eval_sts-label_samples_per_second": 368.125, "eval_sts-label_steps_per_second": 14.725, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_qnli-contrastive_loss": 0.1414812207221985, "eval_qnli-contrastive_runtime": 0.3601, "eval_qnli-contrastive_samples_per_second": 277.73, "eval_qnli-contrastive_steps_per_second": 11.109, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_scitail-pairs-qa_loss": 0.05851547792553902, "eval_scitail-pairs-qa_runtime": 0.8864, "eval_scitail-pairs-qa_samples_per_second": 112.817, "eval_scitail-pairs-qa_steps_per_second": 4.513, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_scitail-pairs-pos_loss": 0.4562886357307434, "eval_scitail-pairs-pos_runtime": 1.3535, "eval_scitail-pairs-pos_samples_per_second": 73.88, "eval_scitail-pairs-pos_steps_per_second": 2.955, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_xsum-pairs_loss": 0.23483119904994965, "eval_xsum-pairs_runtime": 0.9336, "eval_xsum-pairs_samples_per_second": 107.109, "eval_xsum-pairs_steps_per_second": 4.284, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_compression-pairs_loss": 0.08680214732885361, "eval_compression-pairs_runtime": 0.2716, "eval_compression-pairs_samples_per_second": 368.254, "eval_compression-pairs_steps_per_second": 14.73, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_sciq_pairs_loss": 0.2816057801246643, "eval_sciq_pairs_runtime": 4.0742, "eval_sciq_pairs_samples_per_second": 24.545, "eval_sciq_pairs_steps_per_second": 0.982, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_qasc_pairs_loss": 0.17035560309886932, "eval_qasc_pairs_runtime": 1.0717, "eval_qasc_pairs_samples_per_second": 93.311, "eval_qasc_pairs_steps_per_second": 3.732, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_openbookqa_pairs_loss": 1.5671054124832153, "eval_openbookqa_pairs_runtime": 0.8973, "eval_openbookqa_pairs_samples_per_second": 111.441, "eval_openbookqa_pairs_steps_per_second": 4.458, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_msmarco_pairs_loss": 0.5062486529350281, "eval_msmarco_pairs_runtime": 2.0609, "eval_msmarco_pairs_samples_per_second": 48.524, "eval_msmarco_pairs_steps_per_second": 1.941, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_nq_pairs_loss": 0.22875532507896423, "eval_nq_pairs_runtime": 4.5041, "eval_nq_pairs_samples_per_second": 22.202, "eval_nq_pairs_steps_per_second": 0.888, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_trivia_pairs_loss": 0.8119627237319946, "eval_trivia_pairs_runtime": 6.4609, "eval_trivia_pairs_samples_per_second": 15.478, "eval_trivia_pairs_steps_per_second": 0.619, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_quora_pairs_loss": 0.06211049482226372, "eval_quora_pairs_runtime": 0.6765, "eval_quora_pairs_samples_per_second": 147.827, "eval_quora_pairs_steps_per_second": 5.913, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_gooaq_pairs_loss": 0.4847571551799774, "eval_gooaq_pairs_runtime": 1.3911, "eval_gooaq_pairs_samples_per_second": 71.886, "eval_gooaq_pairs_steps_per_second": 2.875, "step": 4800 }, { "epoch": 0.7522331922896098, "eval_mrpc_pairs_loss": 0.04384278133511543, "eval_mrpc_pairs_runtime": 0.2617, "eval_mrpc_pairs_samples_per_second": 382.146, "eval_mrpc_pairs_steps_per_second": 15.286, "step": 4800 }, { "epoch": 0.7572480802382072, "grad_norm": 14.555929183959961, "learning_rate": 7.958553215065208e-06, "loss": 0.6735, "step": 4832 }, { "epoch": 0.7622629681868046, "grad_norm": 10.30207347869873, "learning_rate": 7.733986282278816e-06, "loss": 1.1569, "step": 4864 }, { "epoch": 0.767277856135402, "grad_norm": 17.255786895751953, "learning_rate": 7.511527429088396e-06, "loss": 0.9749, "step": 4896 }, { "epoch": 0.7722927440839994, "grad_norm": 14.730864524841309, "learning_rate": 7.291241197206574e-06, "loss": 0.6581, "step": 4928 }, { "epoch": 0.7773076320325968, "grad_norm": 8.807291984558105, "learning_rate": 7.07319149800605e-06, "loss": 0.6979, "step": 4960 }, { "epoch": 0.7823225199811942, "grad_norm": 0.6080070734024048, "learning_rate": 6.857441593977046e-06, "loss": 0.7582, "step": 4992 }, { "epoch": 0.7873374079297916, "grad_norm": 2.2002525329589844, "learning_rate": 6.6440540803730425e-06, "loss": 1.0082, "step": 5024 }, { "epoch": 0.792352295878389, "grad_norm": 8.624346733093262, "learning_rate": 6.433090867050122e-06, "loss": 0.6206, "step": 5056 }, { "epoch": 0.7973671838269863, "grad_norm": 0.9821205139160156, "learning_rate": 6.224613160505094e-06, "loss": 0.5165, "step": 5088 }, { "epoch": 0.8023820717755837, "grad_norm": 4.104696750640869, "learning_rate": 6.018681446117773e-06, "loss": 0.4914, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_nli-pairs_loss": 0.8841198682785034, "eval_nli-pairs_runtime": 4.1793, "eval_nli-pairs_samples_per_second": 23.928, "eval_nli-pairs_steps_per_second": 0.957, "eval_sts-test_pearson_cosine": 0.7866468635321827, "eval_sts-test_pearson_dot": 0.5124924570863083, "eval_sts-test_pearson_euclidean": 0.7320768163626257, "eval_sts-test_pearson_manhattan": 0.7266238528084388, "eval_sts-test_pearson_max": 0.7866468635321827, "eval_sts-test_spearman_cosine": 0.8041619306345255, "eval_sts-test_spearman_dot": 0.4913316974763461, "eval_sts-test_spearman_euclidean": 0.7232005770314757, "eval_sts-test_spearman_manhattan": 0.7207683852583252, "eval_sts-test_spearman_max": 0.8041619306345255, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_vitaminc-pairs_loss": 4.725103855133057, "eval_vitaminc-pairs_runtime": 1.2146, "eval_vitaminc-pairs_samples_per_second": 69.982, "eval_vitaminc-pairs_steps_per_second": 2.47, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_sts-label_loss": 3.6535470485687256, "eval_sts-label_runtime": 0.3164, "eval_sts-label_samples_per_second": 316.056, "eval_sts-label_steps_per_second": 12.642, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_qnli-contrastive_loss": 0.10529302805662155, "eval_qnli-contrastive_runtime": 0.368, "eval_qnli-contrastive_samples_per_second": 271.711, "eval_qnli-contrastive_steps_per_second": 10.868, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_scitail-pairs-qa_loss": 0.05555274337530136, "eval_scitail-pairs-qa_runtime": 0.9542, "eval_scitail-pairs-qa_samples_per_second": 104.795, "eval_scitail-pairs-qa_steps_per_second": 4.192, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_scitail-pairs-pos_loss": 0.4785614013671875, "eval_scitail-pairs-pos_runtime": 1.4937, "eval_scitail-pairs-pos_samples_per_second": 66.949, "eval_scitail-pairs-pos_steps_per_second": 2.678, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_xsum-pairs_loss": 0.2355932593345642, "eval_xsum-pairs_runtime": 0.9396, "eval_xsum-pairs_samples_per_second": 106.432, "eval_xsum-pairs_steps_per_second": 4.257, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_compression-pairs_loss": 0.083825021982193, "eval_compression-pairs_runtime": 0.2789, "eval_compression-pairs_samples_per_second": 358.564, "eval_compression-pairs_steps_per_second": 14.343, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_sciq_pairs_loss": 0.28157705068588257, "eval_sciq_pairs_runtime": 4.1947, "eval_sciq_pairs_samples_per_second": 23.84, "eval_sciq_pairs_steps_per_second": 0.954, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_qasc_pairs_loss": 0.1739024668931961, "eval_qasc_pairs_runtime": 1.1277, "eval_qasc_pairs_samples_per_second": 88.676, "eval_qasc_pairs_steps_per_second": 3.547, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_openbookqa_pairs_loss": 1.591935396194458, "eval_openbookqa_pairs_runtime": 1.0022, "eval_openbookqa_pairs_samples_per_second": 99.782, "eval_openbookqa_pairs_steps_per_second": 3.991, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_msmarco_pairs_loss": 0.5132349133491516, "eval_msmarco_pairs_runtime": 2.1322, "eval_msmarco_pairs_samples_per_second": 46.901, "eval_msmarco_pairs_steps_per_second": 1.876, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_nq_pairs_loss": 0.2343132346868515, "eval_nq_pairs_runtime": 4.5529, "eval_nq_pairs_samples_per_second": 21.964, "eval_nq_pairs_steps_per_second": 0.879, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_trivia_pairs_loss": 0.7988561987876892, "eval_trivia_pairs_runtime": 6.5661, "eval_trivia_pairs_samples_per_second": 15.23, "eval_trivia_pairs_steps_per_second": 0.609, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_quora_pairs_loss": 0.05578049644827843, "eval_quora_pairs_runtime": 0.8028, "eval_quora_pairs_samples_per_second": 124.564, "eval_quora_pairs_steps_per_second": 4.983, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_gooaq_pairs_loss": 0.48901888728141785, "eval_gooaq_pairs_runtime": 1.5605, "eval_gooaq_pairs_samples_per_second": 64.082, "eval_gooaq_pairs_steps_per_second": 2.563, "step": 5120 }, { "epoch": 0.8023820717755837, "eval_mrpc_pairs_loss": 0.04172317683696747, "eval_mrpc_pairs_runtime": 0.2628, "eval_mrpc_pairs_samples_per_second": 380.505, "eval_mrpc_pairs_steps_per_second": 15.22, "step": 5120 }, { "epoch": 0.8073969597241811, "grad_norm": 9.413043022155762, "learning_rate": 5.815355470602388e-06, "loss": 1.098, "step": 5152 }, { "epoch": 0.8124118476727785, "grad_norm": 0.25412222743034363, "learning_rate": 5.614694224673387e-06, "loss": 0.821, "step": 5184 }, { "epoch": 0.8174267356213759, "grad_norm": 18.76092529296875, "learning_rate": 5.416755925930494e-06, "loss": 0.9351, "step": 5216 }, { "epoch": 0.8224416235699734, "grad_norm": 19.607337951660156, "learning_rate": 5.221598001968132e-06, "loss": 0.8784, "step": 5248 }, { "epoch": 0.8274565115185708, "grad_norm": 3.2164149284362793, "learning_rate": 5.029277073714009e-06, "loss": 0.8326, "step": 5280 }, { "epoch": 0.8324713994671682, "grad_norm": 11.156713485717773, "learning_rate": 4.839848939001789e-06, "loss": 0.7551, "step": 5312 }, { "epoch": 0.8374862874157656, "grad_norm": 8.80623722076416, "learning_rate": 4.653368556382492e-06, "loss": 0.8234, "step": 5344 }, { "epoch": 0.842501175364363, "grad_norm": 16.081491470336914, "learning_rate": 4.469890029179472e-06, "loss": 1.0922, "step": 5376 }, { "epoch": 0.8475160633129604, "grad_norm": 0.8583326935768127, "learning_rate": 4.2894665897914794e-06, "loss": 1.0925, "step": 5408 }, { "epoch": 0.8525309512615578, "grad_norm": 7.903942108154297, "learning_rate": 4.112150584248388e-06, "loss": 1.099, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_nli-pairs_loss": 0.8909263014793396, "eval_nli-pairs_runtime": 3.6329, "eval_nli-pairs_samples_per_second": 27.526, "eval_nli-pairs_steps_per_second": 1.101, "eval_sts-test_pearson_cosine": 0.7892673589571536, "eval_sts-test_pearson_dot": 0.5308666684424199, "eval_sts-test_pearson_euclidean": 0.7372214599353599, "eval_sts-test_pearson_manhattan": 0.73149442324126, "eval_sts-test_pearson_max": 0.7892673589571536, "eval_sts-test_spearman_cosine": 0.8088174691107087, "eval_sts-test_spearman_dot": 0.5097841799376374, "eval_sts-test_spearman_euclidean": 0.7291099552995026, "eval_sts-test_spearman_manhattan": 0.7255023946868168, "eval_sts-test_spearman_max": 0.8088174691107087, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_vitaminc-pairs_loss": 4.696901798248291, "eval_vitaminc-pairs_runtime": 1.13, "eval_vitaminc-pairs_samples_per_second": 75.219, "eval_vitaminc-pairs_steps_per_second": 2.655, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_sts-label_loss": 3.794584274291992, "eval_sts-label_runtime": 0.2757, "eval_sts-label_samples_per_second": 362.777, "eval_sts-label_steps_per_second": 14.511, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_qnli-contrastive_loss": 0.1291896551847458, "eval_qnli-contrastive_runtime": 0.3577, "eval_qnli-contrastive_samples_per_second": 279.536, "eval_qnli-contrastive_steps_per_second": 11.181, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_scitail-pairs-qa_loss": 0.05729294940829277, "eval_scitail-pairs-qa_runtime": 0.8757, "eval_scitail-pairs-qa_samples_per_second": 114.199, "eval_scitail-pairs-qa_steps_per_second": 4.568, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_scitail-pairs-pos_loss": 0.47140783071517944, "eval_scitail-pairs-pos_runtime": 1.3328, "eval_scitail-pairs-pos_samples_per_second": 75.031, "eval_scitail-pairs-pos_steps_per_second": 3.001, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_xsum-pairs_loss": 0.2317724972963333, "eval_xsum-pairs_runtime": 0.934, "eval_xsum-pairs_samples_per_second": 107.065, "eval_xsum-pairs_steps_per_second": 4.283, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_compression-pairs_loss": 0.0849599540233612, "eval_compression-pairs_runtime": 0.2772, "eval_compression-pairs_samples_per_second": 360.752, "eval_compression-pairs_steps_per_second": 14.43, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_sciq_pairs_loss": 0.2746911346912384, "eval_sciq_pairs_runtime": 4.0398, "eval_sciq_pairs_samples_per_second": 24.754, "eval_sciq_pairs_steps_per_second": 0.99, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_qasc_pairs_loss": 0.16956950724124908, "eval_qasc_pairs_runtime": 1.0682, "eval_qasc_pairs_samples_per_second": 93.615, "eval_qasc_pairs_steps_per_second": 3.745, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_openbookqa_pairs_loss": 1.5424996614456177, "eval_openbookqa_pairs_runtime": 0.8928, "eval_openbookqa_pairs_samples_per_second": 112.006, "eval_openbookqa_pairs_steps_per_second": 4.48, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_msmarco_pairs_loss": 0.5047981142997742, "eval_msmarco_pairs_runtime": 2.0436, "eval_msmarco_pairs_samples_per_second": 48.932, "eval_msmarco_pairs_steps_per_second": 1.957, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_nq_pairs_loss": 0.230237677693367, "eval_nq_pairs_runtime": 4.5251, "eval_nq_pairs_samples_per_second": 22.099, "eval_nq_pairs_steps_per_second": 0.884, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_trivia_pairs_loss": 0.7567735314369202, "eval_trivia_pairs_runtime": 6.4545, "eval_trivia_pairs_samples_per_second": 15.493, "eval_trivia_pairs_steps_per_second": 0.62, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_quora_pairs_loss": 0.05753583088517189, "eval_quora_pairs_runtime": 0.6769, "eval_quora_pairs_samples_per_second": 147.736, "eval_quora_pairs_steps_per_second": 5.909, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_gooaq_pairs_loss": 0.49448758363723755, "eval_gooaq_pairs_runtime": 1.3984, "eval_gooaq_pairs_samples_per_second": 71.51, "eval_gooaq_pairs_steps_per_second": 2.86, "step": 5440 }, { "epoch": 0.8525309512615578, "eval_mrpc_pairs_loss": 0.04384453222155571, "eval_mrpc_pairs_runtime": 0.2653, "eval_mrpc_pairs_samples_per_second": 376.996, "eval_mrpc_pairs_steps_per_second": 15.08, "step": 5440 }, { "epoch": 0.8575458392101551, "grad_norm": 0.8697513341903687, "learning_rate": 3.93799345702415e-06, "loss": 0.5396, "step": 5472 }, { "epoch": 0.8625607271587525, "grad_norm": 8.337197303771973, "learning_rate": 3.7670457361112116e-06, "loss": 0.6636, "step": 5504 }, { "epoch": 0.8675756151073499, "grad_norm": 0.3655373156070709, "learning_rate": 3.5993570183609596e-06, "loss": 1.0095, "step": 5536 }, { "epoch": 0.8725905030559473, "grad_norm": 13.748374938964844, "learning_rate": 3.4349759550941933e-06, "loss": 0.631, "step": 5568 }, { "epoch": 0.8776053910045447, "grad_norm": 15.683762550354004, "learning_rate": 3.273950237986013e-06, "loss": 0.5415, "step": 5600 }, { "epoch": 0.8826202789531421, "grad_norm": 10.004467964172363, "learning_rate": 3.11632658522906e-06, "loss": 0.9227, "step": 5632 }, { "epoch": 0.8876351669017395, "grad_norm": 12.990907669067383, "learning_rate": 2.9621507279792564e-06, "loss": 0.8991, "step": 5664 }, { "epoch": 0.8926500548503369, "grad_norm": 0.4619373679161072, "learning_rate": 2.8114673970878584e-06, "loss": 0.5068, "step": 5696 }, { "epoch": 0.8976649427989344, "grad_norm": 8.317788124084473, "learning_rate": 2.664320310123768e-06, "loss": 1.2134, "step": 5728 }, { "epoch": 0.9026798307475318, "grad_norm": 0.38993319869041443, "learning_rate": 2.5207521586897876e-06, "loss": 0.4651, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_nli-pairs_loss": 0.8765493631362915, "eval_nli-pairs_runtime": 3.6164, "eval_nli-pairs_samples_per_second": 27.652, "eval_nli-pairs_steps_per_second": 1.106, "eval_sts-test_pearson_cosine": 0.7880147168961996, "eval_sts-test_pearson_dot": 0.5198107156003906, "eval_sts-test_pearson_euclidean": 0.7362840264051249, "eval_sts-test_pearson_manhattan": 0.7307716823389564, "eval_sts-test_pearson_max": 0.7880147168961996, "eval_sts-test_spearman_cosine": 0.8071394355093185, "eval_sts-test_spearman_dot": 0.49865317522814645, "eval_sts-test_spearman_euclidean": 0.7278395467197664, "eval_sts-test_spearman_manhattan": 0.7246934378777047, "eval_sts-test_spearman_max": 0.8071394355093185, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_vitaminc-pairs_loss": 4.717629432678223, "eval_vitaminc-pairs_runtime": 1.1248, "eval_vitaminc-pairs_samples_per_second": 75.571, "eval_vitaminc-pairs_steps_per_second": 2.667, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_sts-label_loss": 3.7598328590393066, "eval_sts-label_runtime": 0.2743, "eval_sts-label_samples_per_second": 364.548, "eval_sts-label_steps_per_second": 14.582, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_qnli-contrastive_loss": 0.11829647421836853, "eval_qnli-contrastive_runtime": 0.3606, "eval_qnli-contrastive_samples_per_second": 277.334, "eval_qnli-contrastive_steps_per_second": 11.093, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_scitail-pairs-qa_loss": 0.05503571406006813, "eval_scitail-pairs-qa_runtime": 0.874, "eval_scitail-pairs-qa_samples_per_second": 114.411, "eval_scitail-pairs-qa_steps_per_second": 4.576, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_scitail-pairs-pos_loss": 0.47530597448349, "eval_scitail-pairs-pos_runtime": 1.3429, "eval_scitail-pairs-pos_samples_per_second": 74.463, "eval_scitail-pairs-pos_steps_per_second": 2.979, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_xsum-pairs_loss": 0.22936196625232697, "eval_xsum-pairs_runtime": 0.9431, "eval_xsum-pairs_samples_per_second": 106.028, "eval_xsum-pairs_steps_per_second": 4.241, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_compression-pairs_loss": 0.08313465863466263, "eval_compression-pairs_runtime": 0.2781, "eval_compression-pairs_samples_per_second": 359.542, "eval_compression-pairs_steps_per_second": 14.382, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_sciq_pairs_loss": 0.27646955847740173, "eval_sciq_pairs_runtime": 4.0554, "eval_sciq_pairs_samples_per_second": 24.658, "eval_sciq_pairs_steps_per_second": 0.986, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_qasc_pairs_loss": 0.17006540298461914, "eval_qasc_pairs_runtime": 1.0538, "eval_qasc_pairs_samples_per_second": 94.898, "eval_qasc_pairs_steps_per_second": 3.796, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_openbookqa_pairs_loss": 1.5487664937973022, "eval_openbookqa_pairs_runtime": 0.8956, "eval_openbookqa_pairs_samples_per_second": 111.653, "eval_openbookqa_pairs_steps_per_second": 4.466, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_msmarco_pairs_loss": 0.4861982464790344, "eval_msmarco_pairs_runtime": 2.0548, "eval_msmarco_pairs_samples_per_second": 48.666, "eval_msmarco_pairs_steps_per_second": 1.947, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_nq_pairs_loss": 0.22520922124385834, "eval_nq_pairs_runtime": 4.4973, "eval_nq_pairs_samples_per_second": 22.236, "eval_nq_pairs_steps_per_second": 0.889, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_trivia_pairs_loss": 0.7480303049087524, "eval_trivia_pairs_runtime": 6.498, "eval_trivia_pairs_samples_per_second": 15.389, "eval_trivia_pairs_steps_per_second": 0.616, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_quora_pairs_loss": 0.06060533598065376, "eval_quora_pairs_runtime": 0.6722, "eval_quora_pairs_samples_per_second": 148.76, "eval_quora_pairs_steps_per_second": 5.95, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_gooaq_pairs_loss": 0.4696855843067169, "eval_gooaq_pairs_runtime": 1.3985, "eval_gooaq_pairs_samples_per_second": 71.503, "eval_gooaq_pairs_steps_per_second": 2.86, "step": 5760 }, { "epoch": 0.9026798307475318, "eval_mrpc_pairs_loss": 0.04175671190023422, "eval_mrpc_pairs_runtime": 0.2618, "eval_mrpc_pairs_samples_per_second": 381.956, "eval_mrpc_pairs_steps_per_second": 15.278, "step": 5760 }, { "epoch": 0.9076947186961292, "grad_norm": 19.970914840698242, "learning_rate": 2.3808045960365743e-06, "loss": 0.6346, "step": 5792 }, { "epoch": 0.9127096066447266, "grad_norm": 7.2970075607299805, "learning_rate": 2.2445182249778363e-06, "loss": 1.1103, "step": 5824 }, { "epoch": 0.917724494593324, "grad_norm": 14.34080982208252, "learning_rate": 2.1119325861102666e-06, "loss": 0.7667, "step": 5856 }, { "epoch": 0.9227393825419213, "grad_norm": 16.219850540161133, "learning_rate": 1.98308614634171e-06, "loss": 0.9174, "step": 5888 }, { "epoch": 0.9277542704905187, "grad_norm": 17.201740264892578, "learning_rate": 1.8580162877307744e-06, "loss": 0.7609, "step": 5920 }, { "epoch": 0.9327691584391161, "grad_norm": 12.591241836547852, "learning_rate": 1.7367592966412454e-06, "loss": 0.8993, "step": 5952 }, { "epoch": 0.9377840463877135, "grad_norm": 17.12389373779297, "learning_rate": 1.619350353214355e-06, "loss": 0.7587, "step": 5984 }, { "epoch": 0.9427989343363109, "grad_norm": 44.237342834472656, "learning_rate": 1.5058235211620126e-06, "loss": 0.935, "step": 6016 }, { "epoch": 0.9478138222849083, "grad_norm": 4.658092975616455, "learning_rate": 1.3962117378839439e-06, "loss": 0.8551, "step": 6048 }, { "epoch": 0.9528287102335057, "grad_norm": 0.4202437698841095, "learning_rate": 1.2905468049116077e-06, "loss": 1.4247, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_nli-pairs_loss": 0.880797266960144, "eval_nli-pairs_runtime": 3.65, "eval_nli-pairs_samples_per_second": 27.397, "eval_nli-pairs_steps_per_second": 1.096, "eval_sts-test_pearson_cosine": 0.7886384880168056, "eval_sts-test_pearson_dot": 0.5209320238457065, "eval_sts-test_pearson_euclidean": 0.7365619856047663, "eval_sts-test_pearson_manhattan": 0.7309874377904119, "eval_sts-test_pearson_max": 0.7886384880168056, "eval_sts-test_spearman_cosine": 0.8078306606920327, "eval_sts-test_spearman_dot": 0.4995671547413244, "eval_sts-test_spearman_euclidean": 0.7281379887760366, "eval_sts-test_spearman_manhattan": 0.7249545388844193, "eval_sts-test_spearman_max": 0.8078306606920327, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_vitaminc-pairs_loss": 4.70750617980957, "eval_vitaminc-pairs_runtime": 1.1372, "eval_vitaminc-pairs_samples_per_second": 74.747, "eval_vitaminc-pairs_steps_per_second": 2.638, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_sts-label_loss": 3.7686922550201416, "eval_sts-label_runtime": 0.2807, "eval_sts-label_samples_per_second": 356.243, "eval_sts-label_steps_per_second": 14.25, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_qnli-contrastive_loss": 0.12000326067209244, "eval_qnli-contrastive_runtime": 0.3651, "eval_qnli-contrastive_samples_per_second": 273.878, "eval_qnli-contrastive_steps_per_second": 10.955, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_scitail-pairs-qa_loss": 0.055266913026571274, "eval_scitail-pairs-qa_runtime": 0.8813, "eval_scitail-pairs-qa_samples_per_second": 113.472, "eval_scitail-pairs-qa_steps_per_second": 4.539, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_scitail-pairs-pos_loss": 0.46404972672462463, "eval_scitail-pairs-pos_runtime": 1.3468, "eval_scitail-pairs-pos_samples_per_second": 74.248, "eval_scitail-pairs-pos_steps_per_second": 2.97, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_xsum-pairs_loss": 0.22768865525722504, "eval_xsum-pairs_runtime": 0.9385, "eval_xsum-pairs_samples_per_second": 106.553, "eval_xsum-pairs_steps_per_second": 4.262, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_compression-pairs_loss": 0.08245458453893661, "eval_compression-pairs_runtime": 0.2783, "eval_compression-pairs_samples_per_second": 359.331, "eval_compression-pairs_steps_per_second": 14.373, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_sciq_pairs_loss": 0.24696679413318634, "eval_sciq_pairs_runtime": 4.072, "eval_sciq_pairs_samples_per_second": 24.558, "eval_sciq_pairs_steps_per_second": 0.982, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_qasc_pairs_loss": 0.16628116369247437, "eval_qasc_pairs_runtime": 1.066, "eval_qasc_pairs_samples_per_second": 93.809, "eval_qasc_pairs_steps_per_second": 3.752, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_openbookqa_pairs_loss": 1.5343760251998901, "eval_openbookqa_pairs_runtime": 0.9064, "eval_openbookqa_pairs_samples_per_second": 110.324, "eval_openbookqa_pairs_steps_per_second": 4.413, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_msmarco_pairs_loss": 0.48861968517303467, "eval_msmarco_pairs_runtime": 2.0777, "eval_msmarco_pairs_samples_per_second": 48.131, "eval_msmarco_pairs_steps_per_second": 1.925, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_nq_pairs_loss": 0.2192871868610382, "eval_nq_pairs_runtime": 4.5629, "eval_nq_pairs_samples_per_second": 21.916, "eval_nq_pairs_steps_per_second": 0.877, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_trivia_pairs_loss": 0.7455114126205444, "eval_trivia_pairs_runtime": 6.4434, "eval_trivia_pairs_samples_per_second": 15.52, "eval_trivia_pairs_steps_per_second": 0.621, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_quora_pairs_loss": 0.0536942183971405, "eval_quora_pairs_runtime": 0.6874, "eval_quora_pairs_samples_per_second": 145.481, "eval_quora_pairs_steps_per_second": 5.819, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_gooaq_pairs_loss": 0.4775075614452362, "eval_gooaq_pairs_runtime": 1.3946, "eval_gooaq_pairs_samples_per_second": 71.707, "eval_gooaq_pairs_steps_per_second": 2.868, "step": 6080 }, { "epoch": 0.9528287102335057, "eval_mrpc_pairs_loss": 0.041804660111665726, "eval_mrpc_pairs_runtime": 0.2631, "eval_mrpc_pairs_samples_per_second": 380.035, "eval_mrpc_pairs_steps_per_second": 15.201, "step": 6080 }, { "epoch": 0.9578435981821031, "grad_norm": 15.8797607421875, "learning_rate": 1.1888593786816527e-06, "loss": 0.3377, "step": 6112 }, { "epoch": 0.9628584861307005, "grad_norm": 54.2625732421875, "learning_rate": 1.0911789616415957e-06, "loss": 1.163, "step": 6144 }, { "epoch": 0.967873374079298, "grad_norm": 27.014169692993164, "learning_rate": 9.975338936903327e-07, "loss": 1.1638, "step": 6176 }, { "epoch": 0.9728882620278954, "grad_norm": 12.264323234558105, "learning_rate": 9.079513439558945e-07, "loss": 0.7428, "step": 6208 }, { "epoch": 0.9779031499764927, "grad_norm": 0.2486962229013443, "learning_rate": 8.224573029129201e-07, "loss": 0.3827, "step": 6240 }, { "epoch": 0.9829180379250901, "grad_norm": 0.19951488077640533, "learning_rate": 7.41076574842064e-07, "loss": 1.0739, "step": 6272 }, { "epoch": 0.9879329258736875, "grad_norm": 1.6168636083602905, "learning_rate": 6.638327706335673e-07, "loss": 0.7049, "step": 6304 }, { "epoch": 0.9929478138222849, "grad_norm": 1.4084432125091553, "learning_rate": 5.907483009370463e-07, "loss": 0.9298, "step": 6336 }, { "epoch": 0.9979627017708823, "grad_norm": 0.7779116630554199, "learning_rate": 5.218443696595343e-07, "loss": 0.6243, "step": 6368 }, { "epoch": 1.0029775897194797, "grad_norm": 10.389066696166992, "learning_rate": 4.5714096781360346e-07, "loss": 0.8693, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_nli-pairs_loss": 0.8764966726303101, "eval_nli-pairs_runtime": 3.9731, "eval_nli-pairs_samples_per_second": 25.169, "eval_nli-pairs_steps_per_second": 1.007, "eval_sts-test_pearson_cosine": 0.7883389668315285, "eval_sts-test_pearson_dot": 0.517346671859764, "eval_sts-test_pearson_euclidean": 0.7353164199200737, "eval_sts-test_pearson_manhattan": 0.7297049415657237, "eval_sts-test_pearson_max": 0.7883389668315285, "eval_sts-test_spearman_cosine": 0.8072800949662179, "eval_sts-test_spearman_dot": 0.4963365732568842, "eval_sts-test_spearman_euclidean": 0.7268218204343426, "eval_sts-test_spearman_manhattan": 0.7238000634035274, "eval_sts-test_spearman_max": 0.8072800949662179, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_vitaminc-pairs_loss": 4.692606449127197, "eval_vitaminc-pairs_runtime": 1.1964, "eval_vitaminc-pairs_samples_per_second": 71.046, "eval_vitaminc-pairs_steps_per_second": 2.508, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_sts-label_loss": 3.7494537830352783, "eval_sts-label_runtime": 0.2884, "eval_sts-label_samples_per_second": 346.773, "eval_sts-label_steps_per_second": 13.871, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_qnli-contrastive_loss": 0.11221926659345627, "eval_qnli-contrastive_runtime": 0.366, "eval_qnli-contrastive_samples_per_second": 273.23, "eval_qnli-contrastive_steps_per_second": 10.929, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_scitail-pairs-qa_loss": 0.05439920350909233, "eval_scitail-pairs-qa_runtime": 1.0826, "eval_scitail-pairs-qa_samples_per_second": 92.37, "eval_scitail-pairs-qa_steps_per_second": 3.695, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_scitail-pairs-pos_loss": 0.47426754236221313, "eval_scitail-pairs-pos_runtime": 1.4478, "eval_scitail-pairs-pos_samples_per_second": 69.07, "eval_scitail-pairs-pos_steps_per_second": 2.763, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_xsum-pairs_loss": 0.22696803510189056, "eval_xsum-pairs_runtime": 0.9498, "eval_xsum-pairs_samples_per_second": 105.287, "eval_xsum-pairs_steps_per_second": 4.211, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_compression-pairs_loss": 0.08134880661964417, "eval_compression-pairs_runtime": 0.2978, "eval_compression-pairs_samples_per_second": 335.83, "eval_compression-pairs_steps_per_second": 13.433, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_sciq_pairs_loss": 0.22929410636425018, "eval_sciq_pairs_runtime": 4.3229, "eval_sciq_pairs_samples_per_second": 23.132, "eval_sciq_pairs_steps_per_second": 0.925, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_qasc_pairs_loss": 0.16514292359352112, "eval_qasc_pairs_runtime": 1.1535, "eval_qasc_pairs_samples_per_second": 86.694, "eval_qasc_pairs_steps_per_second": 3.468, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_openbookqa_pairs_loss": 1.5505836009979248, "eval_openbookqa_pairs_runtime": 0.9784, "eval_openbookqa_pairs_samples_per_second": 102.21, "eval_openbookqa_pairs_steps_per_second": 4.088, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_msmarco_pairs_loss": 0.48988625407218933, "eval_msmarco_pairs_runtime": 2.1515, "eval_msmarco_pairs_samples_per_second": 46.48, "eval_msmarco_pairs_steps_per_second": 1.859, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_nq_pairs_loss": 0.21817754209041595, "eval_nq_pairs_runtime": 4.6579, "eval_nq_pairs_samples_per_second": 21.469, "eval_nq_pairs_steps_per_second": 0.859, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_trivia_pairs_loss": 0.7522485852241516, "eval_trivia_pairs_runtime": 6.6903, "eval_trivia_pairs_samples_per_second": 14.947, "eval_trivia_pairs_steps_per_second": 0.598, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_quora_pairs_loss": 0.026629021391272545, "eval_quora_pairs_runtime": 0.7757, "eval_quora_pairs_samples_per_second": 128.912, "eval_quora_pairs_steps_per_second": 5.156, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_gooaq_pairs_loss": 0.47202804684638977, "eval_gooaq_pairs_runtime": 1.5337, "eval_gooaq_pairs_samples_per_second": 65.202, "eval_gooaq_pairs_steps_per_second": 2.608, "step": 6400 }, { "epoch": 1.0029775897194797, "eval_mrpc_pairs_loss": 0.041211605072021484, "eval_mrpc_pairs_runtime": 0.2796, "eval_mrpc_pairs_samples_per_second": 357.626, "eval_mrpc_pairs_steps_per_second": 14.305, "step": 6400 }, { "epoch": 1.007992477668077, "grad_norm": 0.12192127108573914, "learning_rate": 3.9665686771741374e-07, "loss": 0.731, "step": 6432 }, { "epoch": 1.0130073656166745, "grad_norm": 4.465780258178711, "learning_rate": 3.404096175483029e-07, "loss": 0.7662, "step": 6464 }, { "epoch": 1.0180222535652719, "grad_norm": 19.539562225341797, "learning_rate": 2.8841553625157116e-07, "loss": 0.5362, "step": 6496 }, { "epoch": 1.0230371415138693, "grad_norm": 1.8675719499588013, "learning_rate": 2.406897088058863e-07, "loss": 0.9786, "step": 6528 }, { "epoch": 1.0280520294624667, "grad_norm": 1.5663179159164429, "learning_rate": 1.9724598184667987e-07, "loss": 0.9213, "step": 6560 }, { "epoch": 1.033066917411064, "grad_norm": 1.0503817796707153, "learning_rate": 1.580969596488624e-07, "loss": 0.7601, "step": 6592 }, { "epoch": 1.0380818053596614, "grad_norm": 1.7467032670974731, "learning_rate": 1.2325400046994672e-07, "loss": 0.4821, "step": 6624 }, { "epoch": 1.0430966933082588, "grad_norm": 0.5685003399848938, "learning_rate": 9.272721325469414e-08, "loss": 0.73, "step": 6656 }, { "epoch": 1.0481115812568562, "grad_norm": 0.16832184791564941, "learning_rate": 6.652545470221705e-08, "loss": 0.4139, "step": 6688 }, { "epoch": 1.0531264692054536, "grad_norm": 17.248783111572266, "learning_rate": 4.465632669640285e-08, "loss": 0.5152, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_nli-pairs_loss": 0.8771082162857056, "eval_nli-pairs_runtime": 3.6864, "eval_nli-pairs_samples_per_second": 27.127, "eval_nli-pairs_steps_per_second": 1.085, "eval_sts-test_pearson_cosine": 0.7895199953969396, "eval_sts-test_pearson_dot": 0.5189310649741209, "eval_sts-test_pearson_euclidean": 0.7358975444358454, "eval_sts-test_pearson_manhattan": 0.7303294470043906, "eval_sts-test_pearson_max": 0.7895199953969396, "eval_sts-test_spearman_cosine": 0.8080710925195471, "eval_sts-test_spearman_dot": 0.49813617315229736, "eval_sts-test_spearman_euclidean": 0.727349183443088, "eval_sts-test_spearman_manhattan": 0.7243520585394965, "eval_sts-test_spearman_max": 0.8080710925195471, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_vitaminc-pairs_loss": 4.680215358734131, "eval_vitaminc-pairs_runtime": 1.1767, "eval_vitaminc-pairs_samples_per_second": 72.234, "eval_vitaminc-pairs_steps_per_second": 2.549, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_sts-label_loss": 3.747551202774048, "eval_sts-label_runtime": 0.2756, "eval_sts-label_samples_per_second": 362.89, "eval_sts-label_steps_per_second": 14.516, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_qnli-contrastive_loss": 0.11317223310470581, "eval_qnli-contrastive_runtime": 0.362, "eval_qnli-contrastive_samples_per_second": 276.263, "eval_qnli-contrastive_steps_per_second": 11.051, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_scitail-pairs-qa_loss": 0.05494887754321098, "eval_scitail-pairs-qa_runtime": 0.8771, "eval_scitail-pairs-qa_samples_per_second": 114.01, "eval_scitail-pairs-qa_steps_per_second": 4.56, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_scitail-pairs-pos_loss": 0.46942538022994995, "eval_scitail-pairs-pos_runtime": 1.3418, "eval_scitail-pairs-pos_samples_per_second": 74.527, "eval_scitail-pairs-pos_steps_per_second": 2.981, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_xsum-pairs_loss": 0.22760838270187378, "eval_xsum-pairs_runtime": 0.9366, "eval_xsum-pairs_samples_per_second": 106.764, "eval_xsum-pairs_steps_per_second": 4.271, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_compression-pairs_loss": 0.081705242395401, "eval_compression-pairs_runtime": 0.2786, "eval_compression-pairs_samples_per_second": 358.908, "eval_compression-pairs_steps_per_second": 14.356, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_sciq_pairs_loss": 0.22932235896587372, "eval_sciq_pairs_runtime": 4.0839, "eval_sciq_pairs_samples_per_second": 24.486, "eval_sciq_pairs_steps_per_second": 0.979, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_qasc_pairs_loss": 0.1658654361963272, "eval_qasc_pairs_runtime": 1.0521, "eval_qasc_pairs_samples_per_second": 95.048, "eval_qasc_pairs_steps_per_second": 3.802, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_openbookqa_pairs_loss": 1.5459561347961426, "eval_openbookqa_pairs_runtime": 0.8996, "eval_openbookqa_pairs_samples_per_second": 111.162, "eval_openbookqa_pairs_steps_per_second": 4.446, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_msmarco_pairs_loss": 0.49013325572013855, "eval_msmarco_pairs_runtime": 2.0531, "eval_msmarco_pairs_samples_per_second": 48.707, "eval_msmarco_pairs_steps_per_second": 1.948, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_nq_pairs_loss": 0.22049441933631897, "eval_nq_pairs_runtime": 4.5149, "eval_nq_pairs_samples_per_second": 22.149, "eval_nq_pairs_steps_per_second": 0.886, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_trivia_pairs_loss": 0.7513056397438049, "eval_trivia_pairs_runtime": 6.4705, "eval_trivia_pairs_samples_per_second": 15.455, "eval_trivia_pairs_steps_per_second": 0.618, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_quora_pairs_loss": 0.024981992319226265, "eval_quora_pairs_runtime": 0.6855, "eval_quora_pairs_samples_per_second": 145.879, "eval_quora_pairs_steps_per_second": 5.835, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_gooaq_pairs_loss": 0.47234511375427246, "eval_gooaq_pairs_runtime": 1.4025, "eval_gooaq_pairs_samples_per_second": 71.304, "eval_gooaq_pairs_steps_per_second": 2.852, "step": 6720 }, { "epoch": 1.0531264692054536, "eval_mrpc_pairs_loss": 0.04154253005981445, "eval_mrpc_pairs_runtime": 0.2618, "eval_mrpc_pairs_samples_per_second": 382.036, "eval_mrpc_pairs_steps_per_second": 15.281, "step": 6720 }, { "epoch": 1.058141357154051, "grad_norm": 14.043108940124512, "learning_rate": 2.7126174100376432e-08, "loss": 0.4684, "step": 6752 }, { "epoch": 1.0631562451026484, "grad_norm": 0.5513893365859985, "learning_rate": 1.3940082915687713e-08, "loss": 0.445, "step": 6784 }, { "epoch": 1.068171133051246, "grad_norm": 7.036909580230713, "learning_rate": 5.101878806703652e-09, "loss": 0.4288, "step": 6816 }, { "epoch": 1.0731860209998434, "grad_norm": 0.2966393828392029, "learning_rate": 6.141259906761176e-10, "loss": 0.3797, "step": 6848 }, { "epoch": 1.0782009089484408, "grad_norm": 9.721883773803711, "learning_rate": 2.9999521873506204e-05, "loss": 0.4304, "step": 6880 }, { "epoch": 1.0832157968970382, "grad_norm": 2.1523923873901367, "learning_rate": 2.9995306080226573e-05, "loss": 0.8562, "step": 6912 }, { "epoch": 1.0882306848456356, "grad_norm": 12.939388275146484, "learning_rate": 2.9986739717293326e-05, "loss": 0.4902, "step": 6944 }, { "epoch": 1.093245572794233, "grad_norm": 0.37949275970458984, "learning_rate": 2.9973825270054784e-05, "loss": 0.4285, "step": 6976 }, { "epoch": 1.0982604607428303, "grad_norm": 2.427003860473633, "learning_rate": 2.995656648536359e-05, "loss": 0.4782, "step": 7008 }, { "epoch": 1.1032753486914277, "grad_norm": 10.36500072479248, "learning_rate": 2.9934968370489646e-05, "loss": 0.7503, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_nli-pairs_loss": 0.9263110160827637, "eval_nli-pairs_runtime": 3.6445, "eval_nli-pairs_samples_per_second": 27.439, "eval_nli-pairs_steps_per_second": 1.098, "eval_sts-test_pearson_cosine": 0.7937369016852821, "eval_sts-test_pearson_dot": 0.5273705048333348, "eval_sts-test_pearson_euclidean": 0.7373368406202081, "eval_sts-test_pearson_manhattan": 0.7318756816157863, "eval_sts-test_pearson_max": 0.7937369016852821, "eval_sts-test_spearman_cosine": 0.810858247608813, "eval_sts-test_spearman_dot": 0.508640420451459, "eval_sts-test_spearman_euclidean": 0.73158962258494, "eval_sts-test_spearman_manhattan": 0.7284434977078286, "eval_sts-test_spearman_max": 0.810858247608813, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_vitaminc-pairs_loss": 4.521730422973633, "eval_vitaminc-pairs_runtime": 1.1248, "eval_vitaminc-pairs_samples_per_second": 75.569, "eval_vitaminc-pairs_steps_per_second": 2.667, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_sts-label_loss": 3.8786072731018066, "eval_sts-label_runtime": 0.2698, "eval_sts-label_samples_per_second": 370.602, "eval_sts-label_steps_per_second": 14.824, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_qnli-contrastive_loss": 0.1796300858259201, "eval_qnli-contrastive_runtime": 0.3573, "eval_qnli-contrastive_samples_per_second": 279.916, "eval_qnli-contrastive_steps_per_second": 11.197, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_scitail-pairs-qa_loss": 0.06360480934381485, "eval_scitail-pairs-qa_runtime": 0.8855, "eval_scitail-pairs-qa_samples_per_second": 112.93, "eval_scitail-pairs-qa_steps_per_second": 4.517, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_scitail-pairs-pos_loss": 0.5473235249519348, "eval_scitail-pairs-pos_runtime": 1.3255, "eval_scitail-pairs-pos_samples_per_second": 75.446, "eval_scitail-pairs-pos_steps_per_second": 3.018, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_xsum-pairs_loss": 0.24051249027252197, "eval_xsum-pairs_runtime": 0.9384, "eval_xsum-pairs_samples_per_second": 106.567, "eval_xsum-pairs_steps_per_second": 4.263, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_compression-pairs_loss": 0.0928964912891388, "eval_compression-pairs_runtime": 0.2778, "eval_compression-pairs_samples_per_second": 359.983, "eval_compression-pairs_steps_per_second": 14.399, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_sciq_pairs_loss": 0.28897982835769653, "eval_sciq_pairs_runtime": 4.1339, "eval_sciq_pairs_samples_per_second": 24.19, "eval_sciq_pairs_steps_per_second": 0.968, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_qasc_pairs_loss": 0.1793307065963745, "eval_qasc_pairs_runtime": 1.0598, "eval_qasc_pairs_samples_per_second": 94.357, "eval_qasc_pairs_steps_per_second": 3.774, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_openbookqa_pairs_loss": 1.7123816013336182, "eval_openbookqa_pairs_runtime": 0.8946, "eval_openbookqa_pairs_samples_per_second": 111.784, "eval_openbookqa_pairs_steps_per_second": 4.471, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_msmarco_pairs_loss": 0.4797554016113281, "eval_msmarco_pairs_runtime": 2.0659, "eval_msmarco_pairs_samples_per_second": 48.405, "eval_msmarco_pairs_steps_per_second": 1.936, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_nq_pairs_loss": 0.2459176480770111, "eval_nq_pairs_runtime": 4.5081, "eval_nq_pairs_samples_per_second": 22.182, "eval_nq_pairs_steps_per_second": 0.887, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_trivia_pairs_loss": 0.9698570966720581, "eval_trivia_pairs_runtime": 6.4733, "eval_trivia_pairs_samples_per_second": 15.448, "eval_trivia_pairs_steps_per_second": 0.618, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_quora_pairs_loss": 0.03161533921957016, "eval_quora_pairs_runtime": 0.6866, "eval_quora_pairs_samples_per_second": 145.647, "eval_quora_pairs_steps_per_second": 5.826, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_gooaq_pairs_loss": 0.5500377416610718, "eval_gooaq_pairs_runtime": 1.4051, "eval_gooaq_pairs_samples_per_second": 71.17, "eval_gooaq_pairs_steps_per_second": 2.847, "step": 7040 }, { "epoch": 1.1032753486914277, "eval_mrpc_pairs_loss": 0.04846707731485367, "eval_mrpc_pairs_runtime": 0.2627, "eval_mrpc_pairs_samples_per_second": 380.671, "eval_mrpc_pairs_steps_per_second": 15.227, "step": 7040 }, { "epoch": 1.1082902366400251, "grad_norm": 10.539325714111328, "learning_rate": 2.9909037191667383e-05, "loss": 1.0828, "step": 7072 }, { "epoch": 1.1133051245886225, "grad_norm": 14.641651153564453, "learning_rate": 2.987878047227772e-05, "loss": 0.6206, "step": 7104 }, { "epoch": 1.11832001253722, "grad_norm": 12.57785415649414, "learning_rate": 2.9844206990665325e-05, "loss": 0.8111, "step": 7136 }, { "epoch": 1.1233349004858173, "grad_norm": 6.1240129470825195, "learning_rate": 2.980532677759177e-05, "loss": 0.49, "step": 7168 }, { "epoch": 1.1283497884344147, "grad_norm": 8.179468154907227, "learning_rate": 2.97621511133253e-05, "loss": 0.5289, "step": 7200 }, { "epoch": 1.133364676383012, "grad_norm": 13.069085121154785, "learning_rate": 2.971469252436813e-05, "loss": 0.2983, "step": 7232 }, { "epoch": 1.1383795643316095, "grad_norm": 11.689116477966309, "learning_rate": 2.9662964779822125e-05, "loss": 0.5183, "step": 7264 }, { "epoch": 1.1433944522802069, "grad_norm": 6.402202606201172, "learning_rate": 2.9606982887393993e-05, "loss": 0.3254, "step": 7296 }, { "epoch": 1.1484093402288043, "grad_norm": 17.79107093811035, "learning_rate": 2.9546763089041115e-05, "loss": 0.5142, "step": 7328 }, { "epoch": 1.1534242281774016, "grad_norm": 3.3558926582336426, "learning_rate": 2.9482322856259305e-05, "loss": 0.5605, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_nli-pairs_loss": 1.034800410270691, "eval_nli-pairs_runtime": 3.6881, "eval_nli-pairs_samples_per_second": 27.114, "eval_nli-pairs_steps_per_second": 1.085, "eval_sts-test_pearson_cosine": 0.7910968553972442, "eval_sts-test_pearson_dot": 0.5191989002837457, "eval_sts-test_pearson_euclidean": 0.7346238729069505, "eval_sts-test_pearson_manhattan": 0.7286075410186882, "eval_sts-test_pearson_max": 0.7910968553972442, "eval_sts-test_spearman_cosine": 0.8066961580110351, "eval_sts-test_spearman_dot": 0.5084443140830514, "eval_sts-test_spearman_euclidean": 0.72712818838666, "eval_sts-test_spearman_manhattan": 0.7230020447891047, "eval_sts-test_spearman_max": 0.8066961580110351, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_vitaminc-pairs_loss": 4.861147403717041, "eval_vitaminc-pairs_runtime": 1.2006, "eval_vitaminc-pairs_samples_per_second": 70.796, "eval_vitaminc-pairs_steps_per_second": 2.499, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_sts-label_loss": 3.832930326461792, "eval_sts-label_runtime": 0.2878, "eval_sts-label_samples_per_second": 347.487, "eval_sts-label_steps_per_second": 13.899, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_qnli-contrastive_loss": 0.20628628134727478, "eval_qnli-contrastive_runtime": 0.3622, "eval_qnli-contrastive_samples_per_second": 276.06, "eval_qnli-contrastive_steps_per_second": 11.042, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_scitail-pairs-qa_loss": 0.06246212124824524, "eval_scitail-pairs-qa_runtime": 0.9341, "eval_scitail-pairs-qa_samples_per_second": 107.06, "eval_scitail-pairs-qa_steps_per_second": 4.282, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_scitail-pairs-pos_loss": 0.4741693437099457, "eval_scitail-pairs-pos_runtime": 1.6197, "eval_scitail-pairs-pos_samples_per_second": 61.738, "eval_scitail-pairs-pos_steps_per_second": 2.47, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_xsum-pairs_loss": 0.23739749193191528, "eval_xsum-pairs_runtime": 0.9463, "eval_xsum-pairs_samples_per_second": 105.68, "eval_xsum-pairs_steps_per_second": 4.227, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_compression-pairs_loss": 0.09255027025938034, "eval_compression-pairs_runtime": 0.2828, "eval_compression-pairs_samples_per_second": 353.649, "eval_compression-pairs_steps_per_second": 14.146, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_sciq_pairs_loss": 0.2770608365535736, "eval_sciq_pairs_runtime": 4.1267, "eval_sciq_pairs_samples_per_second": 24.232, "eval_sciq_pairs_steps_per_second": 0.969, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_qasc_pairs_loss": 0.18835808336734772, "eval_qasc_pairs_runtime": 1.0608, "eval_qasc_pairs_samples_per_second": 94.272, "eval_qasc_pairs_steps_per_second": 3.771, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_openbookqa_pairs_loss": 1.8109256029129028, "eval_openbookqa_pairs_runtime": 0.9025, "eval_openbookqa_pairs_samples_per_second": 110.805, "eval_openbookqa_pairs_steps_per_second": 4.432, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_msmarco_pairs_loss": 0.5193920731544495, "eval_msmarco_pairs_runtime": 2.1117, "eval_msmarco_pairs_samples_per_second": 47.354, "eval_msmarco_pairs_steps_per_second": 1.894, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_nq_pairs_loss": 0.2714031934738159, "eval_nq_pairs_runtime": 4.5373, "eval_nq_pairs_samples_per_second": 22.04, "eval_nq_pairs_steps_per_second": 0.882, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_trivia_pairs_loss": 0.939833402633667, "eval_trivia_pairs_runtime": 6.4956, "eval_trivia_pairs_samples_per_second": 15.395, "eval_trivia_pairs_steps_per_second": 0.616, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_quora_pairs_loss": 0.058685559779405594, "eval_quora_pairs_runtime": 0.6769, "eval_quora_pairs_samples_per_second": 147.738, "eval_quora_pairs_steps_per_second": 5.91, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_gooaq_pairs_loss": 0.6361711621284485, "eval_gooaq_pairs_runtime": 1.435, "eval_gooaq_pairs_samples_per_second": 69.685, "eval_gooaq_pairs_steps_per_second": 2.787, "step": 7360 }, { "epoch": 1.1534242281774016, "eval_mrpc_pairs_loss": 0.047355230897665024, "eval_mrpc_pairs_runtime": 0.2779, "eval_mrpc_pairs_samples_per_second": 359.791, "eval_mrpc_pairs_steps_per_second": 14.392, "step": 7360 }, { "epoch": 1.158439116125999, "grad_norm": 0.946281909942627, "learning_rate": 2.9413680885013797e-05, "loss": 0.6993, "step": 7392 }, { "epoch": 1.1634540040745964, "grad_norm": 7.1736626625061035, "learning_rate": 2.9340857090315025e-05, "loss": 0.3437, "step": 7424 }, { "epoch": 1.1684688920231938, "grad_norm": 0.19313736259937286, "learning_rate": 2.9263872600440707e-05, "loss": 0.3281, "step": 7456 }, { "epoch": 1.1734837799717912, "grad_norm": 12.984513282775879, "learning_rate": 2.9182749750805903e-05, "loss": 1.0286, "step": 7488 }, { "epoch": 1.1784986679203886, "grad_norm": 0.5984382033348083, "learning_rate": 2.9097512077482918e-05, "loss": 0.6668, "step": 7520 }, { "epoch": 1.183513555868986, "grad_norm": 4.237669944763184, "learning_rate": 2.9008184310372744e-05, "loss": 0.3861, "step": 7552 }, { "epoch": 1.1885284438175834, "grad_norm": 0.4000037610530853, "learning_rate": 2.891479236603025e-05, "loss": 0.4096, "step": 7584 }, { "epoch": 1.1935433317661808, "grad_norm": 13.399718284606934, "learning_rate": 2.8817363340145038e-05, "loss": 0.5836, "step": 7616 }, { "epoch": 1.1985582197147782, "grad_norm": 1.461013913154602, "learning_rate": 2.8715925499680188e-05, "loss": 0.2649, "step": 7648 }, { "epoch": 1.2035731076633756, "grad_norm": 6.206007957458496, "learning_rate": 2.8610508274671218e-05, "loss": 0.5884, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_nli-pairs_loss": 1.0354279279708862, "eval_nli-pairs_runtime": 3.7382, "eval_nli-pairs_samples_per_second": 26.751, "eval_nli-pairs_steps_per_second": 1.07, "eval_sts-test_pearson_cosine": 0.7841729020272651, "eval_sts-test_pearson_dot": 0.5058693889598734, "eval_sts-test_pearson_euclidean": 0.7294148871338325, "eval_sts-test_pearson_manhattan": 0.7246093271358469, "eval_sts-test_pearson_max": 0.7841729020272651, "eval_sts-test_spearman_cosine": 0.8000443657886165, "eval_sts-test_spearman_dot": 0.49286718177568123, "eval_sts-test_spearman_euclidean": 0.7196647955405734, "eval_sts-test_spearman_manhattan": 0.7181182061459461, "eval_sts-test_spearman_max": 0.8000443657886165, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_vitaminc-pairs_loss": 4.786523342132568, "eval_vitaminc-pairs_runtime": 1.1677, "eval_vitaminc-pairs_samples_per_second": 72.794, "eval_vitaminc-pairs_steps_per_second": 2.569, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_sts-label_loss": 3.80216383934021, "eval_sts-label_runtime": 0.2869, "eval_sts-label_samples_per_second": 348.532, "eval_sts-label_steps_per_second": 13.941, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_qnli-contrastive_loss": 0.15080063045024872, "eval_qnli-contrastive_runtime": 0.3765, "eval_qnli-contrastive_samples_per_second": 265.584, "eval_qnli-contrastive_steps_per_second": 10.623, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_scitail-pairs-qa_loss": 0.05477406457066536, "eval_scitail-pairs-qa_runtime": 0.9695, "eval_scitail-pairs-qa_samples_per_second": 103.142, "eval_scitail-pairs-qa_steps_per_second": 4.126, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_scitail-pairs-pos_loss": 0.49995747208595276, "eval_scitail-pairs-pos_runtime": 1.4259, "eval_scitail-pairs-pos_samples_per_second": 70.132, "eval_scitail-pairs-pos_steps_per_second": 2.805, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_xsum-pairs_loss": 0.24929432570934296, "eval_xsum-pairs_runtime": 0.9657, "eval_xsum-pairs_samples_per_second": 103.554, "eval_xsum-pairs_steps_per_second": 4.142, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_compression-pairs_loss": 0.08594885468482971, "eval_compression-pairs_runtime": 0.2846, "eval_compression-pairs_samples_per_second": 351.315, "eval_compression-pairs_steps_per_second": 14.053, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_sciq_pairs_loss": 0.28326743841171265, "eval_sciq_pairs_runtime": 4.1832, "eval_sciq_pairs_samples_per_second": 23.905, "eval_sciq_pairs_steps_per_second": 0.956, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_qasc_pairs_loss": 0.1851280927658081, "eval_qasc_pairs_runtime": 1.1629, "eval_qasc_pairs_samples_per_second": 85.993, "eval_qasc_pairs_steps_per_second": 3.44, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_openbookqa_pairs_loss": 1.686630368232727, "eval_openbookqa_pairs_runtime": 0.9518, "eval_openbookqa_pairs_samples_per_second": 105.066, "eval_openbookqa_pairs_steps_per_second": 4.203, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_msmarco_pairs_loss": 0.5506166219711304, "eval_msmarco_pairs_runtime": 2.1738, "eval_msmarco_pairs_samples_per_second": 46.002, "eval_msmarco_pairs_steps_per_second": 1.84, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_nq_pairs_loss": 0.24249011278152466, "eval_nq_pairs_runtime": 4.6491, "eval_nq_pairs_samples_per_second": 21.51, "eval_nq_pairs_steps_per_second": 0.86, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_trivia_pairs_loss": 0.9296412467956543, "eval_trivia_pairs_runtime": 6.6163, "eval_trivia_pairs_samples_per_second": 15.114, "eval_trivia_pairs_steps_per_second": 0.605, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_quora_pairs_loss": 0.0314582884311676, "eval_quora_pairs_runtime": 0.7294, "eval_quora_pairs_samples_per_second": 137.107, "eval_quora_pairs_steps_per_second": 5.484, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_gooaq_pairs_loss": 0.5459653735160828, "eval_gooaq_pairs_runtime": 1.4937, "eval_gooaq_pairs_samples_per_second": 66.948, "eval_gooaq_pairs_steps_per_second": 2.678, "step": 7680 }, { "epoch": 1.2035731076633756, "eval_mrpc_pairs_loss": 0.04276818782091141, "eval_mrpc_pairs_runtime": 0.2699, "eval_mrpc_pairs_samples_per_second": 370.467, "eval_mrpc_pairs_steps_per_second": 14.819, "step": 7680 }, { "epoch": 1.208587995611973, "grad_norm": 2.091714859008789, "learning_rate": 2.8501142249687554e-05, "loss": 0.7018, "step": 7712 }, { "epoch": 1.2136028835605703, "grad_norm": 0.21109235286712646, "learning_rate": 2.838785915495912e-05, "loss": 0.7082, "step": 7744 }, { "epoch": 1.2186177715091677, "grad_norm": 0.1267768293619156, "learning_rate": 2.827069185717042e-05, "loss": 0.7527, "step": 7776 }, { "epoch": 1.2236326594577653, "grad_norm": 1.6667953729629517, "learning_rate": 2.8149674349925023e-05, "loss": 0.4255, "step": 7808 }, { "epoch": 1.2286475474063627, "grad_norm": 12.699274063110352, "learning_rate": 2.8024841743882998e-05, "loss": 0.7488, "step": 7840 }, { "epoch": 1.2336624353549601, "grad_norm": 8.052750587463379, "learning_rate": 2.7896230256574348e-05, "loss": 0.3364, "step": 7872 }, { "epoch": 1.2386773233035575, "grad_norm": 7.821995258331299, "learning_rate": 2.7763877201891205e-05, "loss": 0.6963, "step": 7904 }, { "epoch": 1.243692211252155, "grad_norm": 5.756433486938477, "learning_rate": 2.762782097926205e-05, "loss": 0.2829, "step": 7936 }, { "epoch": 1.2487070992007523, "grad_norm": 18.80353355407715, "learning_rate": 2.7488101062510904e-05, "loss": 0.7504, "step": 7968 }, { "epoch": 1.2537219871493497, "grad_norm": 3.668611526489258, "learning_rate": 2.734475798840485e-05, "loss": 0.7759, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_nli-pairs_loss": 0.9781379699707031, "eval_nli-pairs_runtime": 3.6268, "eval_nli-pairs_samples_per_second": 27.573, "eval_nli-pairs_steps_per_second": 1.103, "eval_sts-test_pearson_cosine": 0.7896747038559737, "eval_sts-test_pearson_dot": 0.5160875833412549, "eval_sts-test_pearson_euclidean": 0.7398944244671477, "eval_sts-test_pearson_manhattan": 0.7345204191784053, "eval_sts-test_pearson_max": 0.7896747038559737, "eval_sts-test_spearman_cosine": 0.81067276102482, "eval_sts-test_spearman_dot": 0.5010127030277397, "eval_sts-test_spearman_euclidean": 0.7318872170742919, "eval_sts-test_spearman_manhattan": 0.7283578865769135, "eval_sts-test_spearman_max": 0.81067276102482, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_vitaminc-pairs_loss": 4.802966594696045, "eval_vitaminc-pairs_runtime": 1.1396, "eval_vitaminc-pairs_samples_per_second": 74.59, "eval_vitaminc-pairs_steps_per_second": 2.633, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_sts-label_loss": 4.1984076499938965, "eval_sts-label_runtime": 0.2755, "eval_sts-label_samples_per_second": 362.988, "eval_sts-label_steps_per_second": 14.52, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_qnli-contrastive_loss": 0.23863555490970612, "eval_qnli-contrastive_runtime": 0.3602, "eval_qnli-contrastive_samples_per_second": 277.617, "eval_qnli-contrastive_steps_per_second": 11.105, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_scitail-pairs-qa_loss": 0.06188047304749489, "eval_scitail-pairs-qa_runtime": 0.8935, "eval_scitail-pairs-qa_samples_per_second": 111.921, "eval_scitail-pairs-qa_steps_per_second": 4.477, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_scitail-pairs-pos_loss": 0.44846847653388977, "eval_scitail-pairs-pos_runtime": 1.3467, "eval_scitail-pairs-pos_samples_per_second": 74.254, "eval_scitail-pairs-pos_steps_per_second": 2.97, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_xsum-pairs_loss": 0.2367183268070221, "eval_xsum-pairs_runtime": 0.9443, "eval_xsum-pairs_samples_per_second": 105.898, "eval_xsum-pairs_steps_per_second": 4.236, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_compression-pairs_loss": 0.08503348380327225, "eval_compression-pairs_runtime": 0.2921, "eval_compression-pairs_samples_per_second": 342.302, "eval_compression-pairs_steps_per_second": 13.692, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_sciq_pairs_loss": 0.270333856344223, "eval_sciq_pairs_runtime": 4.0839, "eval_sciq_pairs_samples_per_second": 24.486, "eval_sciq_pairs_steps_per_second": 0.979, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_qasc_pairs_loss": 0.18802641332149506, "eval_qasc_pairs_runtime": 1.0724, "eval_qasc_pairs_samples_per_second": 93.25, "eval_qasc_pairs_steps_per_second": 3.73, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_openbookqa_pairs_loss": 1.7418819665908813, "eval_openbookqa_pairs_runtime": 0.8925, "eval_openbookqa_pairs_samples_per_second": 112.041, "eval_openbookqa_pairs_steps_per_second": 4.482, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_msmarco_pairs_loss": 0.4858554005622864, "eval_msmarco_pairs_runtime": 2.0565, "eval_msmarco_pairs_samples_per_second": 48.627, "eval_msmarco_pairs_steps_per_second": 1.945, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_nq_pairs_loss": 0.23637117445468903, "eval_nq_pairs_runtime": 4.5088, "eval_nq_pairs_samples_per_second": 22.179, "eval_nq_pairs_steps_per_second": 0.887, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_trivia_pairs_loss": 0.7162200808525085, "eval_trivia_pairs_runtime": 6.4981, "eval_trivia_pairs_samples_per_second": 15.389, "eval_trivia_pairs_steps_per_second": 0.616, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_quora_pairs_loss": 0.06219913437962532, "eval_quora_pairs_runtime": 0.6795, "eval_quora_pairs_samples_per_second": 147.17, "eval_quora_pairs_steps_per_second": 5.887, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_gooaq_pairs_loss": 0.609254777431488, "eval_gooaq_pairs_runtime": 1.4106, "eval_gooaq_pairs_samples_per_second": 70.891, "eval_gooaq_pairs_steps_per_second": 2.836, "step": 8000 }, { "epoch": 1.2537219871493497, "eval_mrpc_pairs_loss": 0.04724707454442978, "eval_mrpc_pairs_runtime": 0.2638, "eval_mrpc_pairs_samples_per_second": 379.073, "eval_mrpc_pairs_steps_per_second": 15.163, "step": 8000 }, { "epoch": 1.258736875097947, "grad_norm": 0.6591500043869019, "learning_rate": 2.7197833344893126e-05, "loss": 0.5297, "step": 8032 }, { "epoch": 1.2637517630465445, "grad_norm": 4.714929103851318, "learning_rate": 2.7047369759041298e-05, "loss": 0.4933, "step": 8064 }, { "epoch": 1.2687666509951419, "grad_norm": 0.15916971862316132, "learning_rate": 2.6893410884663914e-05, "loss": 0.3868, "step": 8096 }, { "epoch": 1.2737815389437392, "grad_norm": 0.6014376282691956, "learning_rate": 2.6736001389659254e-05, "loss": 0.9955, "step": 8128 }, { "epoch": 1.2787964268923366, "grad_norm": 2.986762762069702, "learning_rate": 2.6575186943049913e-05, "loss": 0.5548, "step": 8160 }, { "epoch": 1.283811314840934, "grad_norm": 0.3188874125480652, "learning_rate": 2.6411014201732884e-05, "loss": 0.4924, "step": 8192 }, { "epoch": 1.2888262027895314, "grad_norm": 0.7150152921676636, "learning_rate": 2.624353079694308e-05, "loss": 0.3422, "step": 8224 }, { "epoch": 1.2938410907381288, "grad_norm": 0.8286885619163513, "learning_rate": 2.6072785320434107e-05, "loss": 0.4707, "step": 8256 }, { "epoch": 1.2988559786867262, "grad_norm": 27.87748146057129, "learning_rate": 2.5898827310380408e-05, "loss": 0.3956, "step": 8288 }, { "epoch": 1.3038708666353236, "grad_norm": 0.3072638213634491, "learning_rate": 2.5721707237004854e-05, "loss": 0.547, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_nli-pairs_loss": 0.9301618933677673, "eval_nli-pairs_runtime": 3.7138, "eval_nli-pairs_samples_per_second": 26.926, "eval_nli-pairs_steps_per_second": 1.077, "eval_sts-test_pearson_cosine": 0.7849967022727309, "eval_sts-test_pearson_dot": 0.4795538577643521, "eval_sts-test_pearson_euclidean": 0.7253853385122256, "eval_sts-test_pearson_manhattan": 0.7194021088193217, "eval_sts-test_pearson_max": 0.7849967022727309, "eval_sts-test_spearman_cosine": 0.8020224630491872, "eval_sts-test_spearman_dot": 0.46441948467132393, "eval_sts-test_spearman_euclidean": 0.7190775648500753, "eval_sts-test_spearman_manhattan": 0.7154699878910861, "eval_sts-test_spearman_max": 0.8020224630491872, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_vitaminc-pairs_loss": 4.762923240661621, "eval_vitaminc-pairs_runtime": 1.1277, "eval_vitaminc-pairs_samples_per_second": 75.372, "eval_vitaminc-pairs_steps_per_second": 2.66, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_sts-label_loss": 3.531181573867798, "eval_sts-label_runtime": 0.2802, "eval_sts-label_samples_per_second": 356.848, "eval_sts-label_steps_per_second": 14.274, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_qnli-contrastive_loss": 0.13507510721683502, "eval_qnli-contrastive_runtime": 0.3622, "eval_qnli-contrastive_samples_per_second": 276.104, "eval_qnli-contrastive_steps_per_second": 11.044, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_scitail-pairs-qa_loss": 0.052693866193294525, "eval_scitail-pairs-qa_runtime": 0.8696, "eval_scitail-pairs-qa_samples_per_second": 115.0, "eval_scitail-pairs-qa_steps_per_second": 4.6, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_scitail-pairs-pos_loss": 0.47491660714149475, "eval_scitail-pairs-pos_runtime": 1.3447, "eval_scitail-pairs-pos_samples_per_second": 74.365, "eval_scitail-pairs-pos_steps_per_second": 2.975, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_xsum-pairs_loss": 0.23617514967918396, "eval_xsum-pairs_runtime": 0.9378, "eval_xsum-pairs_samples_per_second": 106.627, "eval_xsum-pairs_steps_per_second": 4.265, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_compression-pairs_loss": 0.07913873344659805, "eval_compression-pairs_runtime": 0.2742, "eval_compression-pairs_samples_per_second": 364.643, "eval_compression-pairs_steps_per_second": 14.586, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_sciq_pairs_loss": 0.298448383808136, "eval_sciq_pairs_runtime": 4.0839, "eval_sciq_pairs_samples_per_second": 24.486, "eval_sciq_pairs_steps_per_second": 0.979, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_qasc_pairs_loss": 0.1738889515399933, "eval_qasc_pairs_runtime": 1.0525, "eval_qasc_pairs_samples_per_second": 95.013, "eval_qasc_pairs_steps_per_second": 3.801, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_openbookqa_pairs_loss": 1.8043091297149658, "eval_openbookqa_pairs_runtime": 0.893, "eval_openbookqa_pairs_samples_per_second": 111.985, "eval_openbookqa_pairs_steps_per_second": 4.479, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_msmarco_pairs_loss": 0.5003547668457031, "eval_msmarco_pairs_runtime": 2.0613, "eval_msmarco_pairs_samples_per_second": 48.513, "eval_msmarco_pairs_steps_per_second": 1.941, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_nq_pairs_loss": 0.21183601021766663, "eval_nq_pairs_runtime": 4.5233, "eval_nq_pairs_samples_per_second": 22.108, "eval_nq_pairs_steps_per_second": 0.884, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_trivia_pairs_loss": 0.8857311010360718, "eval_trivia_pairs_runtime": 6.4553, "eval_trivia_pairs_samples_per_second": 15.491, "eval_trivia_pairs_steps_per_second": 0.62, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_quora_pairs_loss": 0.029251573607325554, "eval_quora_pairs_runtime": 0.6755, "eval_quora_pairs_samples_per_second": 148.047, "eval_quora_pairs_steps_per_second": 5.922, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_gooaq_pairs_loss": 0.5669267773628235, "eval_gooaq_pairs_runtime": 1.4109, "eval_gooaq_pairs_samples_per_second": 70.878, "eval_gooaq_pairs_steps_per_second": 2.835, "step": 8320 }, { "epoch": 1.3038708666353236, "eval_mrpc_pairs_loss": 0.04050436615943909, "eval_mrpc_pairs_runtime": 0.2684, "eval_mrpc_pairs_samples_per_second": 372.59, "eval_mrpc_pairs_steps_per_second": 14.904, "step": 8320 }, { "epoch": 1.308885754583921, "grad_norm": 0.1075374037027359, "learning_rate": 2.5541476487935806e-05, "loss": 0.5412, "step": 8352 }, { "epoch": 1.3139006425325184, "grad_norm": 7.75120735168457, "learning_rate": 2.535818735329815e-05, "loss": 0.3885, "step": 8384 }, { "epoch": 1.3189155304811158, "grad_norm": 0.5364068150520325, "learning_rate": 2.5171893010542385e-05, "loss": 0.4274, "step": 8416 }, { "epoch": 1.3239304184297132, "grad_norm": 0.6744114756584167, "learning_rate": 2.4988605558565137e-05, "loss": 0.893, "step": 8448 }, { "epoch": 1.3289453063783105, "grad_norm": 1.6057082414627075, "learning_rate": 2.4796553472267232e-05, "loss": 0.3456, "step": 8480 }, { "epoch": 1.333960194326908, "grad_norm": 0.8727301955223083, "learning_rate": 2.460165912399626e-05, "loss": 0.4292, "step": 8512 }, { "epoch": 1.3389750822755053, "grad_norm": 0.23973700404167175, "learning_rate": 2.440397905820904e-05, "loss": 0.4275, "step": 8544 }, { "epoch": 1.343989970224103, "grad_norm": 16.09794807434082, "learning_rate": 2.4203570627579187e-05, "loss": 0.3236, "step": 8576 }, { "epoch": 1.3490048581727003, "grad_norm": 0.1335248500108719, "learning_rate": 2.4000491976357433e-05, "loss": 0.3961, "step": 8608 }, { "epoch": 1.3540197461212977, "grad_norm": 4.587371349334717, "learning_rate": 2.3794802023502332e-05, "loss": 0.5146, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_nli-pairs_loss": 0.996806800365448, "eval_nli-pairs_runtime": 3.6888, "eval_nli-pairs_samples_per_second": 27.109, "eval_nli-pairs_steps_per_second": 1.084, "eval_sts-test_pearson_cosine": 0.7869180410057008, "eval_sts-test_pearson_dot": 0.4938689019771704, "eval_sts-test_pearson_euclidean": 0.7205117910572312, "eval_sts-test_pearson_manhattan": 0.7128032248904813, "eval_sts-test_pearson_max": 0.7869180410057008, "eval_sts-test_spearman_cosine": 0.8042081001243602, "eval_sts-test_spearman_dot": 0.48091332474106047, "eval_sts-test_spearman_euclidean": 0.7130418025896658, "eval_sts-test_spearman_manhattan": 0.7066951779815502, "eval_sts-test_spearman_max": 0.8042081001243602, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_vitaminc-pairs_loss": 4.757246971130371, "eval_vitaminc-pairs_runtime": 1.13, "eval_vitaminc-pairs_samples_per_second": 75.223, "eval_vitaminc-pairs_steps_per_second": 2.655, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_sts-label_loss": 3.562749147415161, "eval_sts-label_runtime": 0.2852, "eval_sts-label_samples_per_second": 350.667, "eval_sts-label_steps_per_second": 14.027, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_qnli-contrastive_loss": 0.10447724163532257, "eval_qnli-contrastive_runtime": 0.3616, "eval_qnli-contrastive_samples_per_second": 276.535, "eval_qnli-contrastive_steps_per_second": 11.061, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_scitail-pairs-qa_loss": 0.050035107880830765, "eval_scitail-pairs-qa_runtime": 0.8786, "eval_scitail-pairs-qa_samples_per_second": 113.822, "eval_scitail-pairs-qa_steps_per_second": 4.553, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_scitail-pairs-pos_loss": 0.4792901873588562, "eval_scitail-pairs-pos_runtime": 1.3333, "eval_scitail-pairs-pos_samples_per_second": 75.004, "eval_scitail-pairs-pos_steps_per_second": 3.0, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_xsum-pairs_loss": 0.23096245527267456, "eval_xsum-pairs_runtime": 0.9402, "eval_xsum-pairs_samples_per_second": 106.362, "eval_xsum-pairs_steps_per_second": 4.254, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_compression-pairs_loss": 0.0777381882071495, "eval_compression-pairs_runtime": 0.2739, "eval_compression-pairs_samples_per_second": 365.114, "eval_compression-pairs_steps_per_second": 14.605, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_sciq_pairs_loss": 0.2707681953907013, "eval_sciq_pairs_runtime": 4.1199, "eval_sciq_pairs_samples_per_second": 24.272, "eval_sciq_pairs_steps_per_second": 0.971, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_qasc_pairs_loss": 0.1706008017063141, "eval_qasc_pairs_runtime": 1.065, "eval_qasc_pairs_samples_per_second": 93.9, "eval_qasc_pairs_steps_per_second": 3.756, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_openbookqa_pairs_loss": 1.7370460033416748, "eval_openbookqa_pairs_runtime": 0.8951, "eval_openbookqa_pairs_samples_per_second": 111.72, "eval_openbookqa_pairs_steps_per_second": 4.469, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_msmarco_pairs_loss": 0.4633770287036896, "eval_msmarco_pairs_runtime": 2.064, "eval_msmarco_pairs_samples_per_second": 48.449, "eval_msmarco_pairs_steps_per_second": 1.938, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_nq_pairs_loss": 0.21504688262939453, "eval_nq_pairs_runtime": 4.523, "eval_nq_pairs_samples_per_second": 22.109, "eval_nq_pairs_steps_per_second": 0.884, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_trivia_pairs_loss": 0.8408924341201782, "eval_trivia_pairs_runtime": 6.4614, "eval_trivia_pairs_samples_per_second": 15.476, "eval_trivia_pairs_steps_per_second": 0.619, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_quora_pairs_loss": 0.02466999925673008, "eval_quora_pairs_runtime": 0.681, "eval_quora_pairs_samples_per_second": 146.836, "eval_quora_pairs_steps_per_second": 5.873, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_gooaq_pairs_loss": 0.5489644408226013, "eval_gooaq_pairs_runtime": 1.4284, "eval_gooaq_pairs_samples_per_second": 70.007, "eval_gooaq_pairs_steps_per_second": 2.8, "step": 8640 }, { "epoch": 1.3540197461212977, "eval_mrpc_pairs_loss": 0.039087630808353424, "eval_mrpc_pairs_runtime": 0.2676, "eval_mrpc_pairs_samples_per_second": 373.632, "eval_mrpc_pairs_steps_per_second": 14.945, "step": 8640 }, { "epoch": 1.359034634069895, "grad_norm": 0.20070208609104156, "learning_rate": 2.3586560445586147e-05, "loss": 0.7562, "step": 8672 }, { "epoch": 1.3640495220184925, "grad_norm": 14.552980422973633, "learning_rate": 2.3375827659480975e-05, "loss": 0.7881, "step": 8704 }, { "epoch": 1.36906440996709, "grad_norm": 0.728196382522583, "learning_rate": 2.3162664804830062e-05, "loss": 0.6117, "step": 8736 }, { "epoch": 1.3740792979156873, "grad_norm": 104.08293151855469, "learning_rate": 2.2947133726309464e-05, "loss": 1.3083, "step": 8768 }, { "epoch": 1.3790941858642847, "grad_norm": 9.243626594543457, "learning_rate": 2.2729296955685097e-05, "loss": 0.5359, "step": 8800 }, { "epoch": 1.384109073812882, "grad_norm": 1.2041038274765015, "learning_rate": 2.2509217693670464e-05, "loss": 0.45, "step": 8832 }, { "epoch": 1.3891239617614795, "grad_norm": 3.953394889831543, "learning_rate": 2.2286959791590365e-05, "loss": 0.6022, "step": 8864 }, { "epoch": 1.3941388497100768, "grad_norm": 18.004009246826172, "learning_rate": 2.2062587732855727e-05, "loss": 0.6664, "step": 8896 }, { "epoch": 1.3991537376586742, "grad_norm": 4.0190887451171875, "learning_rate": 2.1836166614255147e-05, "loss": 0.3255, "step": 8928 } ], "logging_steps": 32, "max_steps": 12762, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1277, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }