bobox's picture
Training in progress, step 8939, checkpoint
6c4e8af verified
raw
history blame contribute delete
No virus
189 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.4008776053910046,
"eval_steps": 320,
"global_step": 8939,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0050148879485973985,
"grad_norm": 14.771158218383789,
"learning_rate": 9.707724425887265e-07,
"loss": 0.6329,
"step": 32
},
{
"epoch": 0.010029775897194797,
"grad_norm": 11.052021980285645,
"learning_rate": 1.9728601252609606e-06,
"loss": 0.9693,
"step": 64
},
{
"epoch": 0.015044663845792195,
"grad_norm": 20.26296615600586,
"learning_rate": 2.9749478079331944e-06,
"loss": 0.6548,
"step": 96
},
{
"epoch": 0.020059551794389594,
"grad_norm": 12.62913703918457,
"learning_rate": 3.945720250521921e-06,
"loss": 1.1279,
"step": 128
},
{
"epoch": 0.025074439742986992,
"grad_norm": 12.316486358642578,
"learning_rate": 4.916492693110647e-06,
"loss": 1.0017,
"step": 160
},
{
"epoch": 0.03008932769158439,
"grad_norm": 64.25923919677734,
"learning_rate": 5.918580375782881e-06,
"loss": 0.7571,
"step": 192
},
{
"epoch": 0.03510421564018179,
"grad_norm": 0.8205029368400574,
"learning_rate": 6.920668058455115e-06,
"loss": 0.7304,
"step": 224
},
{
"epoch": 0.04011910358877919,
"grad_norm": 6.598870754241943,
"learning_rate": 7.922755741127349e-06,
"loss": 0.7636,
"step": 256
},
{
"epoch": 0.045133991537376586,
"grad_norm": 8.728073120117188,
"learning_rate": 8.924843423799583e-06,
"loss": 0.482,
"step": 288
},
{
"epoch": 0.050148879485973984,
"grad_norm": 7.645521640777588,
"learning_rate": 9.926931106471817e-06,
"loss": 0.6312,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_nli-pairs_loss": 1.0158467292785645,
"eval_nli-pairs_runtime": 3.7267,
"eval_nli-pairs_samples_per_second": 26.833,
"eval_nli-pairs_steps_per_second": 1.073,
"eval_sts-test_pearson_cosine": 0.7848265412179125,
"eval_sts-test_pearson_dot": 0.5437080705284749,
"eval_sts-test_pearson_euclidean": 0.7445845076364892,
"eval_sts-test_pearson_manhattan": 0.7429239204432232,
"eval_sts-test_pearson_max": 0.7848265412179125,
"eval_sts-test_spearman_cosine": 0.7989504707258924,
"eval_sts-test_spearman_dot": 0.5206855421174118,
"eval_sts-test_spearman_euclidean": 0.733568982260844,
"eval_sts-test_spearman_manhattan": 0.7349407257944446,
"eval_sts-test_spearman_max": 0.7989504707258924,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_vitaminc-pairs_loss": 4.692601680755615,
"eval_vitaminc-pairs_runtime": 1.1397,
"eval_vitaminc-pairs_samples_per_second": 74.578,
"eval_vitaminc-pairs_steps_per_second": 2.632,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_sts-label_loss": 3.5502490997314453,
"eval_sts-label_runtime": 0.28,
"eval_sts-label_samples_per_second": 357.117,
"eval_sts-label_steps_per_second": 14.285,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_qnli-contrastive_loss": 0.16079513728618622,
"eval_qnli-contrastive_runtime": 0.3646,
"eval_qnli-contrastive_samples_per_second": 274.299,
"eval_qnli-contrastive_steps_per_second": 10.972,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_scitail-pairs-qa_loss": 0.07610582560300827,
"eval_scitail-pairs-qa_runtime": 0.8885,
"eval_scitail-pairs-qa_samples_per_second": 112.548,
"eval_scitail-pairs-qa_steps_per_second": 4.502,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_scitail-pairs-pos_loss": 0.5141278505325317,
"eval_scitail-pairs-pos_runtime": 1.3498,
"eval_scitail-pairs-pos_samples_per_second": 74.085,
"eval_scitail-pairs-pos_steps_per_second": 2.963,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_xsum-pairs_loss": 0.25581496953964233,
"eval_xsum-pairs_runtime": 0.9407,
"eval_xsum-pairs_samples_per_second": 106.304,
"eval_xsum-pairs_steps_per_second": 4.252,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_compression-pairs_loss": 0.09814296662807465,
"eval_compression-pairs_runtime": 0.2758,
"eval_compression-pairs_samples_per_second": 362.517,
"eval_compression-pairs_steps_per_second": 14.501,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_sciq_pairs_loss": 0.25620242953300476,
"eval_sciq_pairs_runtime": 4.1155,
"eval_sciq_pairs_samples_per_second": 24.298,
"eval_sciq_pairs_steps_per_second": 0.972,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_qasc_pairs_loss": 0.2044612169265747,
"eval_qasc_pairs_runtime": 1.1029,
"eval_qasc_pairs_samples_per_second": 90.672,
"eval_qasc_pairs_steps_per_second": 3.627,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_openbookqa_pairs_loss": 1.7537646293640137,
"eval_openbookqa_pairs_runtime": 0.9037,
"eval_openbookqa_pairs_samples_per_second": 110.653,
"eval_openbookqa_pairs_steps_per_second": 4.426,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_msmarco_pairs_loss": 0.5138561725616455,
"eval_msmarco_pairs_runtime": 2.0511,
"eval_msmarco_pairs_samples_per_second": 48.754,
"eval_msmarco_pairs_steps_per_second": 1.95,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_nq_pairs_loss": 0.23510317504405975,
"eval_nq_pairs_runtime": 4.5293,
"eval_nq_pairs_samples_per_second": 22.078,
"eval_nq_pairs_steps_per_second": 0.883,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_trivia_pairs_loss": 0.7808571457862854,
"eval_trivia_pairs_runtime": 6.5065,
"eval_trivia_pairs_samples_per_second": 15.369,
"eval_trivia_pairs_steps_per_second": 0.615,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_quora_pairs_loss": 0.0392119362950325,
"eval_quora_pairs_runtime": 0.675,
"eval_quora_pairs_samples_per_second": 148.153,
"eval_quora_pairs_steps_per_second": 5.926,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_gooaq_pairs_loss": 0.4712902009487152,
"eval_gooaq_pairs_runtime": 1.4079,
"eval_gooaq_pairs_samples_per_second": 71.028,
"eval_gooaq_pairs_steps_per_second": 2.841,
"step": 320
},
{
"epoch": 0.050148879485973984,
"eval_mrpc_pairs_loss": 0.05498996376991272,
"eval_mrpc_pairs_runtime": 0.2623,
"eval_mrpc_pairs_samples_per_second": 381.172,
"eval_mrpc_pairs_steps_per_second": 15.247,
"step": 320
},
{
"epoch": 0.05516376743457138,
"grad_norm": 0.34924012422561646,
"learning_rate": 1.092901878914405e-05,
"loss": 0.5791,
"step": 352
},
{
"epoch": 0.06017865538316878,
"grad_norm": 0.36700841784477234,
"learning_rate": 1.1931106471816284e-05,
"loss": 0.6413,
"step": 384
},
{
"epoch": 0.06519354333176618,
"grad_norm": 7.559622764587402,
"learning_rate": 1.2933194154488518e-05,
"loss": 0.4319,
"step": 416
},
{
"epoch": 0.07020843128036358,
"grad_norm": 7.982416152954102,
"learning_rate": 1.3935281837160753e-05,
"loss": 0.6672,
"step": 448
},
{
"epoch": 0.07522331922896097,
"grad_norm": 0.6726166009902954,
"learning_rate": 1.4937369519832987e-05,
"loss": 0.459,
"step": 480
},
{
"epoch": 0.08023820717755838,
"grad_norm": 14.846123695373535,
"learning_rate": 1.593945720250522e-05,
"loss": 0.7621,
"step": 512
},
{
"epoch": 0.08525309512615578,
"grad_norm": 0.7846627831459045,
"learning_rate": 1.6941544885177454e-05,
"loss": 0.864,
"step": 544
},
{
"epoch": 0.09026798307475317,
"grad_norm": 0.8993583917617798,
"learning_rate": 1.7943632567849688e-05,
"loss": 0.5081,
"step": 576
},
{
"epoch": 0.09528287102335058,
"grad_norm": 1.4990565776824951,
"learning_rate": 1.894572025052192e-05,
"loss": 0.654,
"step": 608
},
{
"epoch": 0.10029775897194797,
"grad_norm": 15.647976875305176,
"learning_rate": 1.9947807933194157e-05,
"loss": 0.6372,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_nli-pairs_loss": 1.0652996301651,
"eval_nli-pairs_runtime": 3.6326,
"eval_nli-pairs_samples_per_second": 27.528,
"eval_nli-pairs_steps_per_second": 1.101,
"eval_sts-test_pearson_cosine": 0.785263018402905,
"eval_sts-test_pearson_dot": 0.5290450141477089,
"eval_sts-test_pearson_euclidean": 0.7433756286425983,
"eval_sts-test_pearson_manhattan": 0.7411097274300102,
"eval_sts-test_pearson_max": 0.785263018402905,
"eval_sts-test_spearman_cosine": 0.7996928912411947,
"eval_sts-test_spearman_dot": 0.5102571497667188,
"eval_sts-test_spearman_euclidean": 0.7338969723324641,
"eval_sts-test_spearman_manhattan": 0.7343494860194358,
"eval_sts-test_spearman_max": 0.7996928912411947,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_vitaminc-pairs_loss": 4.719416618347168,
"eval_vitaminc-pairs_runtime": 1.1268,
"eval_vitaminc-pairs_samples_per_second": 75.437,
"eval_vitaminc-pairs_steps_per_second": 2.662,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_sts-label_loss": 3.612347364425659,
"eval_sts-label_runtime": 0.2683,
"eval_sts-label_samples_per_second": 372.651,
"eval_sts-label_steps_per_second": 14.906,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_qnli-contrastive_loss": 0.15202775597572327,
"eval_qnli-contrastive_runtime": 0.3528,
"eval_qnli-contrastive_samples_per_second": 283.457,
"eval_qnli-contrastive_steps_per_second": 11.338,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_scitail-pairs-qa_loss": 0.07544919103384018,
"eval_scitail-pairs-qa_runtime": 0.8732,
"eval_scitail-pairs-qa_samples_per_second": 114.517,
"eval_scitail-pairs-qa_steps_per_second": 4.581,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_scitail-pairs-pos_loss": 0.5404170751571655,
"eval_scitail-pairs-pos_runtime": 1.3146,
"eval_scitail-pairs-pos_samples_per_second": 76.067,
"eval_scitail-pairs-pos_steps_per_second": 3.043,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_xsum-pairs_loss": 0.25958582758903503,
"eval_xsum-pairs_runtime": 0.9287,
"eval_xsum-pairs_samples_per_second": 107.679,
"eval_xsum-pairs_steps_per_second": 4.307,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_compression-pairs_loss": 0.10066353529691696,
"eval_compression-pairs_runtime": 0.2732,
"eval_compression-pairs_samples_per_second": 366.076,
"eval_compression-pairs_steps_per_second": 14.643,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_sciq_pairs_loss": 0.2645374834537506,
"eval_sciq_pairs_runtime": 4.0725,
"eval_sciq_pairs_samples_per_second": 24.555,
"eval_sciq_pairs_steps_per_second": 0.982,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_qasc_pairs_loss": 0.21021947264671326,
"eval_qasc_pairs_runtime": 1.0743,
"eval_qasc_pairs_samples_per_second": 93.084,
"eval_qasc_pairs_steps_per_second": 3.723,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_openbookqa_pairs_loss": 1.7905032634735107,
"eval_openbookqa_pairs_runtime": 0.8886,
"eval_openbookqa_pairs_samples_per_second": 112.532,
"eval_openbookqa_pairs_steps_per_second": 4.501,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_msmarco_pairs_loss": 0.5102832913398743,
"eval_msmarco_pairs_runtime": 2.0529,
"eval_msmarco_pairs_samples_per_second": 48.712,
"eval_msmarco_pairs_steps_per_second": 1.948,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_nq_pairs_loss": 0.24466972053050995,
"eval_nq_pairs_runtime": 4.4973,
"eval_nq_pairs_samples_per_second": 22.235,
"eval_nq_pairs_steps_per_second": 0.889,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_trivia_pairs_loss": 0.8748095631599426,
"eval_trivia_pairs_runtime": 6.4825,
"eval_trivia_pairs_samples_per_second": 15.426,
"eval_trivia_pairs_steps_per_second": 0.617,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_quora_pairs_loss": 0.07820220291614532,
"eval_quora_pairs_runtime": 0.6944,
"eval_quora_pairs_samples_per_second": 144.008,
"eval_quora_pairs_steps_per_second": 5.76,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_gooaq_pairs_loss": 0.5236212611198425,
"eval_gooaq_pairs_runtime": 1.3899,
"eval_gooaq_pairs_samples_per_second": 71.949,
"eval_gooaq_pairs_steps_per_second": 2.878,
"step": 640
},
{
"epoch": 0.10029775897194797,
"eval_mrpc_pairs_loss": 0.05494727939367294,
"eval_mrpc_pairs_runtime": 0.2598,
"eval_mrpc_pairs_samples_per_second": 384.941,
"eval_mrpc_pairs_steps_per_second": 15.398,
"step": 640
},
{
"epoch": 0.10531264692054537,
"grad_norm": 11.01974105834961,
"learning_rate": 2.0949895615866387e-05,
"loss": 0.9292,
"step": 672
},
{
"epoch": 0.11032753486914276,
"grad_norm": 0.5542309284210205,
"learning_rate": 2.1951983298538625e-05,
"loss": 1.3108,
"step": 704
},
{
"epoch": 0.11534242281774017,
"grad_norm": 15.458569526672363,
"learning_rate": 2.2954070981210856e-05,
"loss": 0.9674,
"step": 736
},
{
"epoch": 0.12035731076633756,
"grad_norm": 2.7814478874206543,
"learning_rate": 2.395615866388309e-05,
"loss": 0.9226,
"step": 768
},
{
"epoch": 0.12537219871493496,
"grad_norm": 11.393244743347168,
"learning_rate": 2.4958246346555324e-05,
"loss": 0.789,
"step": 800
},
{
"epoch": 0.13038708666353235,
"grad_norm": 9.288290977478027,
"learning_rate": 2.596033402922756e-05,
"loss": 0.5186,
"step": 832
},
{
"epoch": 0.13540197461212977,
"grad_norm": 47.65571212768555,
"learning_rate": 2.6962421711899793e-05,
"loss": 0.6726,
"step": 864
},
{
"epoch": 0.14041686256072716,
"grad_norm": 12.908064842224121,
"learning_rate": 2.7964509394572024e-05,
"loss": 0.5381,
"step": 896
},
{
"epoch": 0.14543175050932455,
"grad_norm": 14.951742172241211,
"learning_rate": 2.896659707724426e-05,
"loss": 0.581,
"step": 928
},
{
"epoch": 0.15044663845792194,
"grad_norm": 20.12006187438965,
"learning_rate": 2.9968684759916492e-05,
"loss": 0.9038,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_nli-pairs_loss": 1.2173175811767578,
"eval_nli-pairs_runtime": 3.7098,
"eval_nli-pairs_samples_per_second": 26.955,
"eval_nli-pairs_steps_per_second": 1.078,
"eval_sts-test_pearson_cosine": 0.7840992835675669,
"eval_sts-test_pearson_dot": 0.5220462136106129,
"eval_sts-test_pearson_euclidean": 0.7457350047351855,
"eval_sts-test_pearson_manhattan": 0.7425970830541657,
"eval_sts-test_pearson_max": 0.7840992835675669,
"eval_sts-test_spearman_cosine": 0.8006376809572144,
"eval_sts-test_spearman_dot": 0.5020544543992158,
"eval_sts-test_spearman_euclidean": 0.7369257710408655,
"eval_sts-test_spearman_manhattan": 0.7362649758012406,
"eval_sts-test_spearman_max": 0.8006376809572144,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_vitaminc-pairs_loss": 4.774902820587158,
"eval_vitaminc-pairs_runtime": 1.1212,
"eval_vitaminc-pairs_samples_per_second": 75.809,
"eval_vitaminc-pairs_steps_per_second": 2.676,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_sts-label_loss": 3.198556900024414,
"eval_sts-label_runtime": 0.2678,
"eval_sts-label_samples_per_second": 373.382,
"eval_sts-label_steps_per_second": 14.935,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_qnli-contrastive_loss": 0.1943340301513672,
"eval_qnli-contrastive_runtime": 0.3511,
"eval_qnli-contrastive_samples_per_second": 284.789,
"eval_qnli-contrastive_steps_per_second": 11.392,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_scitail-pairs-qa_loss": 0.08060617744922638,
"eval_scitail-pairs-qa_runtime": 0.8778,
"eval_scitail-pairs-qa_samples_per_second": 113.92,
"eval_scitail-pairs-qa_steps_per_second": 4.557,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_scitail-pairs-pos_loss": 0.4759831428527832,
"eval_scitail-pairs-pos_runtime": 1.3609,
"eval_scitail-pairs-pos_samples_per_second": 73.48,
"eval_scitail-pairs-pos_steps_per_second": 2.939,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_xsum-pairs_loss": 0.27583304047584534,
"eval_xsum-pairs_runtime": 0.9343,
"eval_xsum-pairs_samples_per_second": 107.035,
"eval_xsum-pairs_steps_per_second": 4.281,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_compression-pairs_loss": 0.10094660520553589,
"eval_compression-pairs_runtime": 0.2739,
"eval_compression-pairs_samples_per_second": 365.047,
"eval_compression-pairs_steps_per_second": 14.602,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_sciq_pairs_loss": 0.2688131630420685,
"eval_sciq_pairs_runtime": 4.0582,
"eval_sciq_pairs_samples_per_second": 24.641,
"eval_sciq_pairs_steps_per_second": 0.986,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_qasc_pairs_loss": 0.23267821967601776,
"eval_qasc_pairs_runtime": 1.0554,
"eval_qasc_pairs_samples_per_second": 94.75,
"eval_qasc_pairs_steps_per_second": 3.79,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_openbookqa_pairs_loss": 1.8053069114685059,
"eval_openbookqa_pairs_runtime": 0.8871,
"eval_openbookqa_pairs_samples_per_second": 112.727,
"eval_openbookqa_pairs_steps_per_second": 4.509,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_msmarco_pairs_loss": 0.5809260606765747,
"eval_msmarco_pairs_runtime": 2.0498,
"eval_msmarco_pairs_samples_per_second": 48.786,
"eval_msmarco_pairs_steps_per_second": 1.951,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_nq_pairs_loss": 0.2808491885662079,
"eval_nq_pairs_runtime": 4.4982,
"eval_nq_pairs_samples_per_second": 22.231,
"eval_nq_pairs_steps_per_second": 0.889,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_trivia_pairs_loss": 0.9379808902740479,
"eval_trivia_pairs_runtime": 6.4578,
"eval_trivia_pairs_samples_per_second": 15.485,
"eval_trivia_pairs_steps_per_second": 0.619,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_quora_pairs_loss": 0.0913279801607132,
"eval_quora_pairs_runtime": 0.6721,
"eval_quora_pairs_samples_per_second": 148.79,
"eval_quora_pairs_steps_per_second": 5.952,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_gooaq_pairs_loss": 0.5807955265045166,
"eval_gooaq_pairs_runtime": 1.3915,
"eval_gooaq_pairs_samples_per_second": 71.865,
"eval_gooaq_pairs_steps_per_second": 2.875,
"step": 960
},
{
"epoch": 0.15044663845792194,
"eval_mrpc_pairs_loss": 0.05799216777086258,
"eval_mrpc_pairs_runtime": 0.2571,
"eval_mrpc_pairs_samples_per_second": 388.998,
"eval_mrpc_pairs_steps_per_second": 15.56,
"step": 960
},
{
"epoch": 0.15546152640651936,
"grad_norm": 9.773286819458008,
"learning_rate": 2.9997957904107625e-05,
"loss": 0.7964,
"step": 992
},
{
"epoch": 0.16047641435511675,
"grad_norm": 19.411075592041016,
"learning_rate": 2.9991566594209126e-05,
"loss": 0.8213,
"step": 1024
},
{
"epoch": 0.16549130230371414,
"grad_norm": 3.5282175540924072,
"learning_rate": 2.9980825799589488e-05,
"loss": 0.5396,
"step": 1056
},
{
"epoch": 0.17050619025231156,
"grad_norm": 62.66339874267578,
"learning_rate": 2.996573863646219e-05,
"loss": 0.9297,
"step": 1088
},
{
"epoch": 0.17552107820090895,
"grad_norm": 8.785274505615234,
"learning_rate": 2.994630948204727e-05,
"loss": 1.169,
"step": 1120
},
{
"epoch": 0.18053596614950634,
"grad_norm": 24.10859489440918,
"learning_rate": 2.992254397330132e-05,
"loss": 0.7486,
"step": 1152
},
{
"epoch": 0.18555085409810373,
"grad_norm": 25.545284271240234,
"learning_rate": 2.9894449005282077e-05,
"loss": 0.6821,
"step": 1184
},
{
"epoch": 0.19056574204670115,
"grad_norm": 0.8675521016120911,
"learning_rate": 2.9862032729147954e-05,
"loss": 0.6125,
"step": 1216
},
{
"epoch": 0.19558062999529854,
"grad_norm": 16.122114181518555,
"learning_rate": 2.9825304549793153e-05,
"loss": 0.8061,
"step": 1248
},
{
"epoch": 0.20059551794389593,
"grad_norm": 1.0314382314682007,
"learning_rate": 2.978427512311904e-05,
"loss": 0.6918,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_nli-pairs_loss": 1.1552109718322754,
"eval_nli-pairs_runtime": 3.8751,
"eval_nli-pairs_samples_per_second": 25.806,
"eval_nli-pairs_steps_per_second": 1.032,
"eval_sts-test_pearson_cosine": 0.786106976104726,
"eval_sts-test_pearson_dot": 0.5116758767219935,
"eval_sts-test_pearson_euclidean": 0.7432891018313416,
"eval_sts-test_pearson_manhattan": 0.7400929158927781,
"eval_sts-test_pearson_max": 0.786106976104726,
"eval_sts-test_spearman_cosine": 0.801377272203007,
"eval_sts-test_spearman_dot": 0.4921454166952506,
"eval_sts-test_spearman_euclidean": 0.7343686249967402,
"eval_sts-test_spearman_manhattan": 0.7331946050808561,
"eval_sts-test_spearman_max": 0.801377272203007,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_vitaminc-pairs_loss": 4.6789751052856445,
"eval_vitaminc-pairs_runtime": 1.1504,
"eval_vitaminc-pairs_samples_per_second": 73.889,
"eval_vitaminc-pairs_steps_per_second": 2.608,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_sts-label_loss": 3.5580556392669678,
"eval_sts-label_runtime": 0.2834,
"eval_sts-label_samples_per_second": 352.858,
"eval_sts-label_steps_per_second": 14.114,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_qnli-contrastive_loss": 0.20369713008403778,
"eval_qnli-contrastive_runtime": 0.358,
"eval_qnli-contrastive_samples_per_second": 279.331,
"eval_qnli-contrastive_steps_per_second": 11.173,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_scitail-pairs-qa_loss": 0.07465875148773193,
"eval_scitail-pairs-qa_runtime": 0.9504,
"eval_scitail-pairs-qa_samples_per_second": 105.214,
"eval_scitail-pairs-qa_steps_per_second": 4.209,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_scitail-pairs-pos_loss": 0.49434563517570496,
"eval_scitail-pairs-pos_runtime": 1.6041,
"eval_scitail-pairs-pos_samples_per_second": 62.339,
"eval_scitail-pairs-pos_steps_per_second": 2.494,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_xsum-pairs_loss": 0.28282061219215393,
"eval_xsum-pairs_runtime": 0.9316,
"eval_xsum-pairs_samples_per_second": 107.346,
"eval_xsum-pairs_steps_per_second": 4.294,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_compression-pairs_loss": 0.097385473549366,
"eval_compression-pairs_runtime": 0.2754,
"eval_compression-pairs_samples_per_second": 363.1,
"eval_compression-pairs_steps_per_second": 14.524,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_sciq_pairs_loss": 0.2762215733528137,
"eval_sciq_pairs_runtime": 4.2307,
"eval_sciq_pairs_samples_per_second": 23.637,
"eval_sciq_pairs_steps_per_second": 0.945,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_qasc_pairs_loss": 0.19347424805164337,
"eval_qasc_pairs_runtime": 1.2282,
"eval_qasc_pairs_samples_per_second": 81.421,
"eval_qasc_pairs_steps_per_second": 3.257,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_openbookqa_pairs_loss": 1.6875064373016357,
"eval_openbookqa_pairs_runtime": 1.1661,
"eval_openbookqa_pairs_samples_per_second": 85.754,
"eval_openbookqa_pairs_steps_per_second": 3.43,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_msmarco_pairs_loss": 0.5743877291679382,
"eval_msmarco_pairs_runtime": 2.1428,
"eval_msmarco_pairs_samples_per_second": 46.669,
"eval_msmarco_pairs_steps_per_second": 1.867,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_nq_pairs_loss": 0.30348217487335205,
"eval_nq_pairs_runtime": 4.5543,
"eval_nq_pairs_samples_per_second": 21.957,
"eval_nq_pairs_steps_per_second": 0.878,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_trivia_pairs_loss": 0.9221765995025635,
"eval_trivia_pairs_runtime": 6.6513,
"eval_trivia_pairs_samples_per_second": 15.035,
"eval_trivia_pairs_steps_per_second": 0.601,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_quora_pairs_loss": 0.03854631260037422,
"eval_quora_pairs_runtime": 0.7822,
"eval_quora_pairs_samples_per_second": 127.852,
"eval_quora_pairs_steps_per_second": 5.114,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_gooaq_pairs_loss": 0.528398334980011,
"eval_gooaq_pairs_runtime": 1.4882,
"eval_gooaq_pairs_samples_per_second": 67.194,
"eval_gooaq_pairs_steps_per_second": 2.688,
"step": 1280
},
{
"epoch": 0.20059551794389593,
"eval_mrpc_pairs_loss": 0.05623970925807953,
"eval_mrpc_pairs_runtime": 0.2698,
"eval_mrpc_pairs_samples_per_second": 370.713,
"eval_mrpc_pairs_steps_per_second": 14.829,
"step": 1280
},
{
"epoch": 0.20561040589249335,
"grad_norm": 0.6042119860649109,
"learning_rate": 2.9738956352942557e-05,
"loss": 0.9421,
"step": 1312
},
{
"epoch": 0.21062529384109074,
"grad_norm": 13.87867546081543,
"learning_rate": 2.968936138754259e-05,
"loss": 0.8641,
"step": 1344
},
{
"epoch": 0.21564018178968813,
"grad_norm": 44.48640441894531,
"learning_rate": 2.9635504615845257e-05,
"loss": 1.157,
"step": 1376
},
{
"epoch": 0.22065506973828553,
"grad_norm": 15.554729461669922,
"learning_rate": 2.957928148945977e-05,
"loss": 0.8772,
"step": 1408
},
{
"epoch": 0.22566995768688294,
"grad_norm": 16.644670486450195,
"learning_rate": 2.9517081112297707e-05,
"loss": 1.0496,
"step": 1440
},
{
"epoch": 0.23068484563548033,
"grad_norm": 13.053145408630371,
"learning_rate": 2.9450668912302004e-05,
"loss": 0.589,
"step": 1472
},
{
"epoch": 0.23569973358407773,
"grad_norm": 7.827791213989258,
"learning_rate": 2.9380064157562306e-05,
"loss": 0.8234,
"step": 1504
},
{
"epoch": 0.24071462153267512,
"grad_norm": 15.598438262939453,
"learning_rate": 2.930528733254901e-05,
"loss": 0.7365,
"step": 1536
},
{
"epoch": 0.24572950948127253,
"grad_norm": 13.723180770874023,
"learning_rate": 2.9226360132170112e-05,
"loss": 0.5076,
"step": 1568
},
{
"epoch": 0.2507443974298699,
"grad_norm": 10.20022964477539,
"learning_rate": 2.9143305455476866e-05,
"loss": 1.0329,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_nli-pairs_loss": 1.0577216148376465,
"eval_nli-pairs_runtime": 3.6476,
"eval_nli-pairs_samples_per_second": 27.415,
"eval_nli-pairs_steps_per_second": 1.097,
"eval_sts-test_pearson_cosine": 0.7876359552191669,
"eval_sts-test_pearson_dot": 0.5220803655074544,
"eval_sts-test_pearson_euclidean": 0.7444632413869628,
"eval_sts-test_pearson_manhattan": 0.7418744760088763,
"eval_sts-test_pearson_max": 0.7876359552191669,
"eval_sts-test_spearman_cosine": 0.8018874000525117,
"eval_sts-test_spearman_dot": 0.5034518981121652,
"eval_sts-test_spearman_euclidean": 0.7344750702387959,
"eval_sts-test_spearman_manhattan": 0.7332804063416474,
"eval_sts-test_spearman_max": 0.8018874000525117,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_vitaminc-pairs_loss": 4.784573554992676,
"eval_vitaminc-pairs_runtime": 1.145,
"eval_vitaminc-pairs_samples_per_second": 74.235,
"eval_vitaminc-pairs_steps_per_second": 2.62,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_sts-label_loss": 3.6113080978393555,
"eval_sts-label_runtime": 0.2746,
"eval_sts-label_samples_per_second": 364.172,
"eval_sts-label_steps_per_second": 14.567,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_qnli-contrastive_loss": 0.18593625724315643,
"eval_qnli-contrastive_runtime": 0.3541,
"eval_qnli-contrastive_samples_per_second": 282.413,
"eval_qnli-contrastive_steps_per_second": 11.297,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_scitail-pairs-qa_loss": 0.07545661181211472,
"eval_scitail-pairs-qa_runtime": 0.8854,
"eval_scitail-pairs-qa_samples_per_second": 112.941,
"eval_scitail-pairs-qa_steps_per_second": 4.518,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_scitail-pairs-pos_loss": 0.5018333792686462,
"eval_scitail-pairs-pos_runtime": 1.3443,
"eval_scitail-pairs-pos_samples_per_second": 74.386,
"eval_scitail-pairs-pos_steps_per_second": 2.975,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_xsum-pairs_loss": 0.2749001085758209,
"eval_xsum-pairs_runtime": 0.9439,
"eval_xsum-pairs_samples_per_second": 105.939,
"eval_xsum-pairs_steps_per_second": 4.238,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_compression-pairs_loss": 0.09735233336687088,
"eval_compression-pairs_runtime": 0.2764,
"eval_compression-pairs_samples_per_second": 361.753,
"eval_compression-pairs_steps_per_second": 14.47,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_sciq_pairs_loss": 0.2648228108882904,
"eval_sciq_pairs_runtime": 4.1207,
"eval_sciq_pairs_samples_per_second": 24.268,
"eval_sciq_pairs_steps_per_second": 0.971,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_qasc_pairs_loss": 0.21318012475967407,
"eval_qasc_pairs_runtime": 1.0917,
"eval_qasc_pairs_samples_per_second": 91.604,
"eval_qasc_pairs_steps_per_second": 3.664,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_openbookqa_pairs_loss": 1.790009617805481,
"eval_openbookqa_pairs_runtime": 0.8969,
"eval_openbookqa_pairs_samples_per_second": 111.496,
"eval_openbookqa_pairs_steps_per_second": 4.46,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_msmarco_pairs_loss": 0.57186359167099,
"eval_msmarco_pairs_runtime": 2.0592,
"eval_msmarco_pairs_samples_per_second": 48.563,
"eval_msmarco_pairs_steps_per_second": 1.943,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_nq_pairs_loss": 0.2738310396671295,
"eval_nq_pairs_runtime": 4.5092,
"eval_nq_pairs_samples_per_second": 22.177,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_trivia_pairs_loss": 0.8291679620742798,
"eval_trivia_pairs_runtime": 6.526,
"eval_trivia_pairs_samples_per_second": 15.323,
"eval_trivia_pairs_steps_per_second": 0.613,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_quora_pairs_loss": 0.08000540733337402,
"eval_quora_pairs_runtime": 0.6761,
"eval_quora_pairs_samples_per_second": 147.909,
"eval_quora_pairs_steps_per_second": 5.916,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_gooaq_pairs_loss": 0.5998037457466125,
"eval_gooaq_pairs_runtime": 1.3978,
"eval_gooaq_pairs_samples_per_second": 71.541,
"eval_gooaq_pairs_steps_per_second": 2.862,
"step": 1600
},
{
"epoch": 0.2507443974298699,
"eval_mrpc_pairs_loss": 0.05507182702422142,
"eval_mrpc_pairs_runtime": 0.2617,
"eval_mrpc_pairs_samples_per_second": 382.156,
"eval_mrpc_pairs_steps_per_second": 15.286,
"step": 1600
},
{
"epoch": 0.2557592853784673,
"grad_norm": 8.05022144317627,
"learning_rate": 2.9056147399020182e-05,
"loss": 1.4006,
"step": 1632
},
{
"epoch": 0.2607741733270647,
"grad_norm": 0.38224154710769653,
"learning_rate": 2.8964911249859437e-05,
"loss": 0.5963,
"step": 1664
},
{
"epoch": 0.2657890612756621,
"grad_norm": 0.46655791997909546,
"learning_rate": 2.886962347822604e-05,
"loss": 0.7488,
"step": 1696
},
{
"epoch": 0.27080394922425954,
"grad_norm": 8.102537155151367,
"learning_rate": 2.8770311729843616e-05,
"loss": 0.8548,
"step": 1728
},
{
"epoch": 0.27581883717285693,
"grad_norm": 11.803775787353516,
"learning_rate": 2.86670048179072e-05,
"loss": 1.3324,
"step": 1760
},
{
"epoch": 0.2808337251214543,
"grad_norm": 16.266756057739258,
"learning_rate": 2.8559732714723715e-05,
"loss": 0.5804,
"step": 1792
},
{
"epoch": 0.2858486130700517,
"grad_norm": 2.8448822498321533,
"learning_rate": 2.8448526543016114e-05,
"loss": 0.7827,
"step": 1824
},
{
"epoch": 0.2908635010186491,
"grad_norm": 21.346328735351562,
"learning_rate": 2.8333418566893796e-05,
"loss": 0.5448,
"step": 1856
},
{
"epoch": 0.2958783889672465,
"grad_norm": 3.4379029273986816,
"learning_rate": 2.8214442182491866e-05,
"loss": 0.7368,
"step": 1888
},
{
"epoch": 0.3008932769158439,
"grad_norm": 17.05881690979004,
"learning_rate": 2.8091631908281963e-05,
"loss": 0.5657,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_nli-pairs_loss": 1.0244356393814087,
"eval_nli-pairs_runtime": 3.6217,
"eval_nli-pairs_samples_per_second": 27.612,
"eval_nli-pairs_steps_per_second": 1.104,
"eval_sts-test_pearson_cosine": 0.781915957368962,
"eval_sts-test_pearson_dot": 0.49821032356844613,
"eval_sts-test_pearson_euclidean": 0.7329308897504494,
"eval_sts-test_pearson_manhattan": 0.7292186092506918,
"eval_sts-test_pearson_max": 0.781915957368962,
"eval_sts-test_spearman_cosine": 0.7983596570250642,
"eval_sts-test_spearman_dot": 0.4812350313638781,
"eval_sts-test_spearman_euclidean": 0.7265758267352669,
"eval_sts-test_spearman_manhattan": 0.7259264140902829,
"eval_sts-test_spearman_max": 0.7983596570250642,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_vitaminc-pairs_loss": 4.698296070098877,
"eval_vitaminc-pairs_runtime": 1.1338,
"eval_vitaminc-pairs_samples_per_second": 74.97,
"eval_vitaminc-pairs_steps_per_second": 2.646,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_sts-label_loss": 3.1822261810302734,
"eval_sts-label_runtime": 0.2702,
"eval_sts-label_samples_per_second": 370.09,
"eval_sts-label_steps_per_second": 14.804,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_qnli-contrastive_loss": 0.11326340585947037,
"eval_qnli-contrastive_runtime": 0.3581,
"eval_qnli-contrastive_samples_per_second": 279.28,
"eval_qnli-contrastive_steps_per_second": 11.171,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_scitail-pairs-qa_loss": 0.07009608298540115,
"eval_scitail-pairs-qa_runtime": 0.8816,
"eval_scitail-pairs-qa_samples_per_second": 113.424,
"eval_scitail-pairs-qa_steps_per_second": 4.537,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_scitail-pairs-pos_loss": 0.49156129360198975,
"eval_scitail-pairs-pos_runtime": 1.3759,
"eval_scitail-pairs-pos_samples_per_second": 72.678,
"eval_scitail-pairs-pos_steps_per_second": 2.907,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_xsum-pairs_loss": 0.25940877199172974,
"eval_xsum-pairs_runtime": 0.9373,
"eval_xsum-pairs_samples_per_second": 106.695,
"eval_xsum-pairs_steps_per_second": 4.268,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_compression-pairs_loss": 0.0919649675488472,
"eval_compression-pairs_runtime": 0.2738,
"eval_compression-pairs_samples_per_second": 365.291,
"eval_compression-pairs_steps_per_second": 14.612,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_sciq_pairs_loss": 0.29138606786727905,
"eval_sciq_pairs_runtime": 4.1059,
"eval_sciq_pairs_samples_per_second": 24.355,
"eval_sciq_pairs_steps_per_second": 0.974,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_qasc_pairs_loss": 0.19625085592269897,
"eval_qasc_pairs_runtime": 1.0611,
"eval_qasc_pairs_samples_per_second": 94.24,
"eval_qasc_pairs_steps_per_second": 3.77,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_openbookqa_pairs_loss": 1.7960456609725952,
"eval_openbookqa_pairs_runtime": 0.9042,
"eval_openbookqa_pairs_samples_per_second": 110.601,
"eval_openbookqa_pairs_steps_per_second": 4.424,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_msmarco_pairs_loss": 0.5171416997909546,
"eval_msmarco_pairs_runtime": 2.0637,
"eval_msmarco_pairs_samples_per_second": 48.457,
"eval_msmarco_pairs_steps_per_second": 1.938,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_nq_pairs_loss": 0.24809740483760834,
"eval_nq_pairs_runtime": 4.529,
"eval_nq_pairs_samples_per_second": 22.08,
"eval_nq_pairs_steps_per_second": 0.883,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_trivia_pairs_loss": 0.9041999578475952,
"eval_trivia_pairs_runtime": 6.5257,
"eval_trivia_pairs_samples_per_second": 15.324,
"eval_trivia_pairs_steps_per_second": 0.613,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_quora_pairs_loss": 0.03601976856589317,
"eval_quora_pairs_runtime": 0.6811,
"eval_quora_pairs_samples_per_second": 146.827,
"eval_quora_pairs_steps_per_second": 5.873,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_gooaq_pairs_loss": 0.5626399517059326,
"eval_gooaq_pairs_runtime": 1.3943,
"eval_gooaq_pairs_samples_per_second": 71.72,
"eval_gooaq_pairs_steps_per_second": 2.869,
"step": 1920
},
{
"epoch": 0.3008932769158439,
"eval_mrpc_pairs_loss": 0.04984402656555176,
"eval_mrpc_pairs_runtime": 0.2579,
"eval_mrpc_pairs_samples_per_second": 387.725,
"eval_mrpc_pairs_steps_per_second": 15.509,
"step": 1920
},
{
"epoch": 0.30590816486444133,
"grad_norm": 22.65591812133789,
"learning_rate": 2.796502337505742e-05,
"loss": 0.7425,
"step": 1952
},
{
"epoch": 0.3109230528130387,
"grad_norm": 10.119640350341797,
"learning_rate": 2.78346533155958e-05,
"loss": 0.7819,
"step": 1984
},
{
"epoch": 0.3159379407616361,
"grad_norm": 8.690531730651855,
"learning_rate": 2.770055955400161e-05,
"loss": 0.5937,
"step": 2016
},
{
"epoch": 0.3209528287102335,
"grad_norm": 0.8992699384689331,
"learning_rate": 2.7562780994732476e-05,
"loss": 0.8133,
"step": 2048
},
{
"epoch": 0.3259677166588309,
"grad_norm": 10.619684219360352,
"learning_rate": 2.7421357611311824e-05,
"loss": 1.0674,
"step": 2080
},
{
"epoch": 0.3309826046074283,
"grad_norm": 7.222084045410156,
"learning_rate": 2.727633043473141e-05,
"loss": 0.6288,
"step": 2112
},
{
"epoch": 0.3359974925560257,
"grad_norm": 10.166888236999512,
"learning_rate": 2.712774154154707e-05,
"loss": 0.5866,
"step": 2144
},
{
"epoch": 0.3410123805046231,
"grad_norm": 0.36360761523246765,
"learning_rate": 2.6975634041671052e-05,
"loss": 0.6962,
"step": 2176
},
{
"epoch": 0.3460272684532205,
"grad_norm": 9.586665153503418,
"learning_rate": 2.6820052065864665e-05,
"loss": 0.5562,
"step": 2208
},
{
"epoch": 0.3510421564018179,
"grad_norm": 1.1307642459869385,
"learning_rate": 2.6661040752934594e-05,
"loss": 0.8871,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_nli-pairs_loss": 1.0147591829299927,
"eval_nli-pairs_runtime": 3.7201,
"eval_nli-pairs_samples_per_second": 26.881,
"eval_nli-pairs_steps_per_second": 1.075,
"eval_sts-test_pearson_cosine": 0.7872126529181761,
"eval_sts-test_pearson_dot": 0.5062045289861089,
"eval_sts-test_pearson_euclidean": 0.7351473988633473,
"eval_sts-test_pearson_manhattan": 0.7310226402088944,
"eval_sts-test_pearson_max": 0.7872126529181761,
"eval_sts-test_spearman_cosine": 0.801487068999052,
"eval_sts-test_spearman_dot": 0.4912205722904683,
"eval_sts-test_spearman_euclidean": 0.7267262355024484,
"eval_sts-test_spearman_manhattan": 0.72510169253649,
"eval_sts-test_spearman_max": 0.801487068999052,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_vitaminc-pairs_loss": 4.644638538360596,
"eval_vitaminc-pairs_runtime": 1.1453,
"eval_vitaminc-pairs_samples_per_second": 74.215,
"eval_vitaminc-pairs_steps_per_second": 2.619,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_sts-label_loss": 3.915343999862671,
"eval_sts-label_runtime": 0.2807,
"eval_sts-label_samples_per_second": 356.217,
"eval_sts-label_steps_per_second": 14.249,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_qnli-contrastive_loss": 0.11220741271972656,
"eval_qnli-contrastive_runtime": 0.3614,
"eval_qnli-contrastive_samples_per_second": 276.705,
"eval_qnli-contrastive_steps_per_second": 11.068,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_scitail-pairs-qa_loss": 0.06635177880525589,
"eval_scitail-pairs-qa_runtime": 0.8881,
"eval_scitail-pairs-qa_samples_per_second": 112.594,
"eval_scitail-pairs-qa_steps_per_second": 4.504,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_scitail-pairs-pos_loss": 0.5765587687492371,
"eval_scitail-pairs-pos_runtime": 1.3496,
"eval_scitail-pairs-pos_samples_per_second": 74.097,
"eval_scitail-pairs-pos_steps_per_second": 2.964,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_xsum-pairs_loss": 0.2595808804035187,
"eval_xsum-pairs_runtime": 0.9377,
"eval_xsum-pairs_samples_per_second": 106.641,
"eval_xsum-pairs_steps_per_second": 4.266,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_compression-pairs_loss": 0.0918564721941948,
"eval_compression-pairs_runtime": 0.2755,
"eval_compression-pairs_samples_per_second": 363.032,
"eval_compression-pairs_steps_per_second": 14.521,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_sciq_pairs_loss": 0.284303218126297,
"eval_sciq_pairs_runtime": 4.1289,
"eval_sciq_pairs_samples_per_second": 24.22,
"eval_sciq_pairs_steps_per_second": 0.969,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_qasc_pairs_loss": 0.19232892990112305,
"eval_qasc_pairs_runtime": 1.0709,
"eval_qasc_pairs_samples_per_second": 93.384,
"eval_qasc_pairs_steps_per_second": 3.735,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_openbookqa_pairs_loss": 1.6234371662139893,
"eval_openbookqa_pairs_runtime": 0.9558,
"eval_openbookqa_pairs_samples_per_second": 104.62,
"eval_openbookqa_pairs_steps_per_second": 4.185,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_msmarco_pairs_loss": 0.5325217247009277,
"eval_msmarco_pairs_runtime": 2.0971,
"eval_msmarco_pairs_samples_per_second": 47.685,
"eval_msmarco_pairs_steps_per_second": 1.907,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_nq_pairs_loss": 0.2721095681190491,
"eval_nq_pairs_runtime": 4.5393,
"eval_nq_pairs_samples_per_second": 22.03,
"eval_nq_pairs_steps_per_second": 0.881,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_trivia_pairs_loss": 0.8544899821281433,
"eval_trivia_pairs_runtime": 6.4668,
"eval_trivia_pairs_samples_per_second": 15.464,
"eval_trivia_pairs_steps_per_second": 0.619,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_quora_pairs_loss": 0.08441996574401855,
"eval_quora_pairs_runtime": 0.6933,
"eval_quora_pairs_samples_per_second": 144.233,
"eval_quora_pairs_steps_per_second": 5.769,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_gooaq_pairs_loss": 0.5711588859558105,
"eval_gooaq_pairs_runtime": 1.3941,
"eval_gooaq_pairs_samples_per_second": 71.733,
"eval_gooaq_pairs_steps_per_second": 2.869,
"step": 2240
},
{
"epoch": 0.3510421564018179,
"eval_mrpc_pairs_loss": 0.05093960464000702,
"eval_mrpc_pairs_runtime": 0.2633,
"eval_mrpc_pairs_samples_per_second": 379.777,
"eval_mrpc_pairs_steps_per_second": 15.191,
"step": 2240
},
{
"epoch": 0.3560570443504153,
"grad_norm": 0.39178094267845154,
"learning_rate": 2.6498646236636892e-05,
"loss": 0.6805,
"step": 2272
},
{
"epoch": 0.3610719322990127,
"grad_norm": 7.91475248336792,
"learning_rate": 2.6332915632292237e-05,
"loss": 1.0451,
"step": 2304
},
{
"epoch": 0.3660868202476101,
"grad_norm": 31.54157066345215,
"learning_rate": 2.616389702311641e-05,
"loss": 1.0603,
"step": 2336
},
{
"epoch": 0.37110170819620747,
"grad_norm": 8.400779724121094,
"learning_rate": 2.5991639446269964e-05,
"loss": 0.8142,
"step": 2368
},
{
"epoch": 0.3761165961448049,
"grad_norm": 20.99441146850586,
"learning_rate": 2.5816192878631166e-05,
"loss": 1.7211,
"step": 2400
},
{
"epoch": 0.3811314840934023,
"grad_norm": 10.574430465698242,
"learning_rate": 2.5637608222296237e-05,
"loss": 0.7523,
"step": 2432
},
{
"epoch": 0.3861463720419997,
"grad_norm": 0.8941424489021301,
"learning_rate": 2.5455937289811207e-05,
"loss": 0.8053,
"step": 2464
},
{
"epoch": 0.3911612599905971,
"grad_norm": 1.9402281045913696,
"learning_rate": 2.5271232789139587e-05,
"loss": 0.8427,
"step": 2496
},
{
"epoch": 0.3961761479391945,
"grad_norm": 23.42873764038086,
"learning_rate": 2.5083548308370296e-05,
"loss": 0.8204,
"step": 2528
},
{
"epoch": 0.40119103588779187,
"grad_norm": 4.5422234535217285,
"learning_rate": 2.4892938300170198e-05,
"loss": 0.5343,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_nli-pairs_loss": 1.002213478088379,
"eval_nli-pairs_runtime": 3.8843,
"eval_nli-pairs_samples_per_second": 25.745,
"eval_nli-pairs_steps_per_second": 1.03,
"eval_sts-test_pearson_cosine": 0.7872537557423719,
"eval_sts-test_pearson_dot": 0.5372668921721468,
"eval_sts-test_pearson_euclidean": 0.7383744840101544,
"eval_sts-test_pearson_manhattan": 0.7333039162515002,
"eval_sts-test_pearson_max": 0.7872537557423719,
"eval_sts-test_spearman_cosine": 0.8038647026605977,
"eval_sts-test_spearman_dot": 0.5191465873751544,
"eval_sts-test_spearman_euclidean": 0.730034619048548,
"eval_sts-test_spearman_manhattan": 0.7277569753761504,
"eval_sts-test_spearman_max": 0.8038647026605977,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_vitaminc-pairs_loss": 4.723379135131836,
"eval_vitaminc-pairs_runtime": 1.3031,
"eval_vitaminc-pairs_samples_per_second": 65.23,
"eval_vitaminc-pairs_steps_per_second": 2.302,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_sts-label_loss": 3.8185579776763916,
"eval_sts-label_runtime": 0.4182,
"eval_sts-label_samples_per_second": 239.094,
"eval_sts-label_steps_per_second": 9.564,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_qnli-contrastive_loss": 0.15084019303321838,
"eval_qnli-contrastive_runtime": 0.3638,
"eval_qnli-contrastive_samples_per_second": 274.906,
"eval_qnli-contrastive_steps_per_second": 10.996,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_scitail-pairs-qa_loss": 0.06741151213645935,
"eval_scitail-pairs-qa_runtime": 0.9458,
"eval_scitail-pairs-qa_samples_per_second": 105.735,
"eval_scitail-pairs-qa_steps_per_second": 4.229,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_scitail-pairs-pos_loss": 0.47680819034576416,
"eval_scitail-pairs-pos_runtime": 1.4736,
"eval_scitail-pairs-pos_samples_per_second": 67.859,
"eval_scitail-pairs-pos_steps_per_second": 2.714,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_xsum-pairs_loss": 0.2572269141674042,
"eval_xsum-pairs_runtime": 0.9448,
"eval_xsum-pairs_samples_per_second": 105.847,
"eval_xsum-pairs_steps_per_second": 4.234,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_compression-pairs_loss": 0.09604756534099579,
"eval_compression-pairs_runtime": 0.2774,
"eval_compression-pairs_samples_per_second": 360.554,
"eval_compression-pairs_steps_per_second": 14.422,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_sciq_pairs_loss": 0.2735004425048828,
"eval_sciq_pairs_runtime": 4.2103,
"eval_sciq_pairs_samples_per_second": 23.751,
"eval_sciq_pairs_steps_per_second": 0.95,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_qasc_pairs_loss": 0.1924300342798233,
"eval_qasc_pairs_runtime": 1.1352,
"eval_qasc_pairs_samples_per_second": 88.089,
"eval_qasc_pairs_steps_per_second": 3.524,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_openbookqa_pairs_loss": 1.6290359497070312,
"eval_openbookqa_pairs_runtime": 0.9392,
"eval_openbookqa_pairs_samples_per_second": 106.476,
"eval_openbookqa_pairs_steps_per_second": 4.259,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_msmarco_pairs_loss": 0.518312931060791,
"eval_msmarco_pairs_runtime": 2.121,
"eval_msmarco_pairs_samples_per_second": 47.147,
"eval_msmarco_pairs_steps_per_second": 1.886,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_nq_pairs_loss": 0.3077375292778015,
"eval_nq_pairs_runtime": 4.6617,
"eval_nq_pairs_samples_per_second": 21.451,
"eval_nq_pairs_steps_per_second": 0.858,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_trivia_pairs_loss": 0.8588294386863708,
"eval_trivia_pairs_runtime": 6.6293,
"eval_trivia_pairs_samples_per_second": 15.085,
"eval_trivia_pairs_steps_per_second": 0.603,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_quora_pairs_loss": 0.07980062067508698,
"eval_quora_pairs_runtime": 0.7261,
"eval_quora_pairs_samples_per_second": 137.72,
"eval_quora_pairs_steps_per_second": 5.509,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_gooaq_pairs_loss": 0.6570906043052673,
"eval_gooaq_pairs_runtime": 1.5071,
"eval_gooaq_pairs_samples_per_second": 66.352,
"eval_gooaq_pairs_steps_per_second": 2.654,
"step": 2560
},
{
"epoch": 0.40119103588779187,
"eval_mrpc_pairs_loss": 0.051231566816568375,
"eval_mrpc_pairs_runtime": 0.2799,
"eval_mrpc_pairs_samples_per_second": 357.322,
"eval_mrpc_pairs_steps_per_second": 14.293,
"step": 2560
},
{
"epoch": 0.40620592383638926,
"grad_norm": 37.2639045715332,
"learning_rate": 2.4699458065985813e-05,
"loss": 0.9709,
"step": 2592
},
{
"epoch": 0.4112208117849867,
"grad_norm": 15.363207817077637,
"learning_rate": 2.45031637399988e-05,
"loss": 0.708,
"step": 2624
},
{
"epoch": 0.4162356997335841,
"grad_norm": 1.8831324577331543,
"learning_rate": 2.430411227283978e-05,
"loss": 0.4083,
"step": 2656
},
{
"epoch": 0.4212505876821815,
"grad_norm": 5.664551734924316,
"learning_rate": 2.4102361415065367e-05,
"loss": 0.8732,
"step": 2688
},
{
"epoch": 0.4262654756307789,
"grad_norm": 0.615675151348114,
"learning_rate": 2.3897969700403022e-05,
"loss": 1.2616,
"step": 2720
},
{
"epoch": 0.43128036357937627,
"grad_norm": 19.81829261779785,
"learning_rate": 2.3690996428768772e-05,
"loss": 1.3324,
"step": 2752
},
{
"epoch": 0.43629525152797366,
"grad_norm": 6.3363118171691895,
"learning_rate": 2.348150164906257e-05,
"loss": 0.6244,
"step": 2784
},
{
"epoch": 0.44131013947657105,
"grad_norm": 1.103615641593933,
"learning_rate": 2.3269546141746407e-05,
"loss": 0.6176,
"step": 2816
},
{
"epoch": 0.44632502742516844,
"grad_norm": 11.468894004821777,
"learning_rate": 2.3055191401210126e-05,
"loss": 0.6926,
"step": 2848
},
{
"epoch": 0.4513399153737659,
"grad_norm": 4.0951619148254395,
"learning_rate": 2.283849961793017e-05,
"loss": 0.8158,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_nli-pairs_loss": 1.2103344202041626,
"eval_nli-pairs_runtime": 3.656,
"eval_nli-pairs_samples_per_second": 27.353,
"eval_nli-pairs_steps_per_second": 1.094,
"eval_sts-test_pearson_cosine": 0.7884135608823999,
"eval_sts-test_pearson_dot": 0.5043809957478502,
"eval_sts-test_pearson_euclidean": 0.73325296875941,
"eval_sts-test_pearson_manhattan": 0.7274442771815695,
"eval_sts-test_pearson_max": 0.7884135608823999,
"eval_sts-test_spearman_cosine": 0.8024151272859597,
"eval_sts-test_spearman_dot": 0.4849613226687463,
"eval_sts-test_spearman_euclidean": 0.7267107319000072,
"eval_sts-test_spearman_manhattan": 0.7238097600272174,
"eval_sts-test_spearman_max": 0.8024151272859597,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_vitaminc-pairs_loss": 4.7560882568359375,
"eval_vitaminc-pairs_runtime": 1.1898,
"eval_vitaminc-pairs_samples_per_second": 71.438,
"eval_vitaminc-pairs_steps_per_second": 2.521,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_sts-label_loss": 3.4280478954315186,
"eval_sts-label_runtime": 0.2879,
"eval_sts-label_samples_per_second": 347.303,
"eval_sts-label_steps_per_second": 13.892,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_qnli-contrastive_loss": 0.1333482712507248,
"eval_qnli-contrastive_runtime": 0.3658,
"eval_qnli-contrastive_samples_per_second": 273.37,
"eval_qnli-contrastive_steps_per_second": 10.935,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_scitail-pairs-qa_loss": 0.0703386664390564,
"eval_scitail-pairs-qa_runtime": 0.8879,
"eval_scitail-pairs-qa_samples_per_second": 112.63,
"eval_scitail-pairs-qa_steps_per_second": 4.505,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_scitail-pairs-pos_loss": 0.4763020873069763,
"eval_scitail-pairs-pos_runtime": 1.3239,
"eval_scitail-pairs-pos_samples_per_second": 75.532,
"eval_scitail-pairs-pos_steps_per_second": 3.021,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_xsum-pairs_loss": 0.25743284821510315,
"eval_xsum-pairs_runtime": 0.9333,
"eval_xsum-pairs_samples_per_second": 107.15,
"eval_xsum-pairs_steps_per_second": 4.286,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_compression-pairs_loss": 0.09842805564403534,
"eval_compression-pairs_runtime": 0.2944,
"eval_compression-pairs_samples_per_second": 339.674,
"eval_compression-pairs_steps_per_second": 13.587,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_sciq_pairs_loss": 0.28244778513908386,
"eval_sciq_pairs_runtime": 4.0785,
"eval_sciq_pairs_samples_per_second": 24.519,
"eval_sciq_pairs_steps_per_second": 0.981,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_qasc_pairs_loss": 0.18051397800445557,
"eval_qasc_pairs_runtime": 1.0561,
"eval_qasc_pairs_samples_per_second": 94.69,
"eval_qasc_pairs_steps_per_second": 3.788,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_openbookqa_pairs_loss": 1.5708725452423096,
"eval_openbookqa_pairs_runtime": 0.9072,
"eval_openbookqa_pairs_samples_per_second": 110.229,
"eval_openbookqa_pairs_steps_per_second": 4.409,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_msmarco_pairs_loss": 0.5720314979553223,
"eval_msmarco_pairs_runtime": 2.0694,
"eval_msmarco_pairs_samples_per_second": 48.322,
"eval_msmarco_pairs_steps_per_second": 1.933,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_nq_pairs_loss": 0.2748319208621979,
"eval_nq_pairs_runtime": 4.5496,
"eval_nq_pairs_samples_per_second": 21.98,
"eval_nq_pairs_steps_per_second": 0.879,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_trivia_pairs_loss": 0.8936847448348999,
"eval_trivia_pairs_runtime": 6.4784,
"eval_trivia_pairs_samples_per_second": 15.436,
"eval_trivia_pairs_steps_per_second": 0.617,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_quora_pairs_loss": 0.07990340888500214,
"eval_quora_pairs_runtime": 0.6852,
"eval_quora_pairs_samples_per_second": 145.945,
"eval_quora_pairs_steps_per_second": 5.838,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_gooaq_pairs_loss": 0.6210995316505432,
"eval_gooaq_pairs_runtime": 1.4234,
"eval_gooaq_pairs_samples_per_second": 70.255,
"eval_gooaq_pairs_steps_per_second": 2.81,
"step": 2880
},
{
"epoch": 0.4513399153737659,
"eval_mrpc_pairs_loss": 0.053870730102062225,
"eval_mrpc_pairs_runtime": 0.2678,
"eval_mrpc_pairs_samples_per_second": 373.436,
"eval_mrpc_pairs_steps_per_second": 14.937,
"step": 2880
},
{
"epoch": 0.4563548033223633,
"grad_norm": 0.5031663775444031,
"learning_rate": 2.261953366042628e-05,
"loss": 1.4753,
"step": 2912
},
{
"epoch": 0.46136969127096067,
"grad_norm": 3.3404605388641357,
"learning_rate": 2.239835705702158e-05,
"loss": 0.5735,
"step": 2944
},
{
"epoch": 0.46638457921955806,
"grad_norm": 14.60761547088623,
"learning_rate": 2.217503397741115e-05,
"loss": 1.2261,
"step": 2976
},
{
"epoch": 0.47139946716815545,
"grad_norm": 0.7826951146125793,
"learning_rate": 2.194962921404456e-05,
"loss": 0.6085,
"step": 3008
},
{
"epoch": 0.47641435511675284,
"grad_norm": 5.523419380187988,
"learning_rate": 2.1722208163327738e-05,
"loss": 0.8766,
"step": 3040
},
{
"epoch": 0.48142924306535023,
"grad_norm": 1.2507153749465942,
"learning_rate": 2.1492836806649564e-05,
"loss": 1.1824,
"step": 3072
},
{
"epoch": 0.4864441310139477,
"grad_norm": 10.76526165008545,
"learning_rate": 2.1261581691238775e-05,
"loss": 0.7192,
"step": 3104
},
{
"epoch": 0.49145901896254507,
"grad_norm": 2.5375277996063232,
"learning_rate": 2.1028509910856705e-05,
"loss": 0.6131,
"step": 3136
},
{
"epoch": 0.49647390691114246,
"grad_norm": 6.569655418395996,
"learning_rate": 2.0793689086331472e-05,
"loss": 0.7407,
"step": 3168
},
{
"epoch": 0.5014887948597399,
"grad_norm": 0.42745527625083923,
"learning_rate": 2.055718734593919e-05,
"loss": 0.5857,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_nli-pairs_loss": 1.1431602239608765,
"eval_nli-pairs_runtime": 3.6407,
"eval_nli-pairs_samples_per_second": 27.467,
"eval_nli-pairs_steps_per_second": 1.099,
"eval_sts-test_pearson_cosine": 0.7838341260331343,
"eval_sts-test_pearson_dot": 0.5274891201747137,
"eval_sts-test_pearson_euclidean": 0.734987175544037,
"eval_sts-test_pearson_manhattan": 0.7296263541205231,
"eval_sts-test_pearson_max": 0.7838341260331343,
"eval_sts-test_spearman_cosine": 0.8013224760849562,
"eval_sts-test_spearman_dot": 0.5061225327907017,
"eval_sts-test_spearman_euclidean": 0.7282525362996873,
"eval_sts-test_spearman_manhattan": 0.7265322068183514,
"eval_sts-test_spearman_max": 0.8013224760849562,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_vitaminc-pairs_loss": 4.748112201690674,
"eval_vitaminc-pairs_runtime": 1.1378,
"eval_vitaminc-pairs_samples_per_second": 74.706,
"eval_vitaminc-pairs_steps_per_second": 2.637,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_sts-label_loss": 3.9402565956115723,
"eval_sts-label_runtime": 0.2789,
"eval_sts-label_samples_per_second": 358.596,
"eval_sts-label_steps_per_second": 14.344,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_qnli-contrastive_loss": 0.10341227799654007,
"eval_qnli-contrastive_runtime": 0.3605,
"eval_qnli-contrastive_samples_per_second": 277.417,
"eval_qnli-contrastive_steps_per_second": 11.097,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_scitail-pairs-qa_loss": 0.06673895567655563,
"eval_scitail-pairs-qa_runtime": 0.8765,
"eval_scitail-pairs-qa_samples_per_second": 114.092,
"eval_scitail-pairs-qa_steps_per_second": 4.564,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_scitail-pairs-pos_loss": 0.510690450668335,
"eval_scitail-pairs-pos_runtime": 1.3274,
"eval_scitail-pairs-pos_samples_per_second": 75.334,
"eval_scitail-pairs-pos_steps_per_second": 3.013,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_xsum-pairs_loss": 0.26573723554611206,
"eval_xsum-pairs_runtime": 0.9342,
"eval_xsum-pairs_samples_per_second": 107.047,
"eval_xsum-pairs_steps_per_second": 4.282,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_compression-pairs_loss": 0.09096826612949371,
"eval_compression-pairs_runtime": 0.2779,
"eval_compression-pairs_samples_per_second": 359.804,
"eval_compression-pairs_steps_per_second": 14.392,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_sciq_pairs_loss": 0.30787500739097595,
"eval_sciq_pairs_runtime": 4.1007,
"eval_sciq_pairs_samples_per_second": 24.386,
"eval_sciq_pairs_steps_per_second": 0.975,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_qasc_pairs_loss": 0.1825849413871765,
"eval_qasc_pairs_runtime": 1.0526,
"eval_qasc_pairs_samples_per_second": 94.998,
"eval_qasc_pairs_steps_per_second": 3.8,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_openbookqa_pairs_loss": 1.5945305824279785,
"eval_openbookqa_pairs_runtime": 0.8948,
"eval_openbookqa_pairs_samples_per_second": 111.759,
"eval_openbookqa_pairs_steps_per_second": 4.47,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_msmarco_pairs_loss": 0.5864604711532593,
"eval_msmarco_pairs_runtime": 2.0556,
"eval_msmarco_pairs_samples_per_second": 48.646,
"eval_msmarco_pairs_steps_per_second": 1.946,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_nq_pairs_loss": 0.2538978159427643,
"eval_nq_pairs_runtime": 4.5409,
"eval_nq_pairs_samples_per_second": 22.022,
"eval_nq_pairs_steps_per_second": 0.881,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_trivia_pairs_loss": 0.8825237154960632,
"eval_trivia_pairs_runtime": 6.4701,
"eval_trivia_pairs_samples_per_second": 15.456,
"eval_trivia_pairs_steps_per_second": 0.618,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_quora_pairs_loss": 0.06264814734458923,
"eval_quora_pairs_runtime": 0.6792,
"eval_quora_pairs_samples_per_second": 147.238,
"eval_quora_pairs_steps_per_second": 5.89,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_gooaq_pairs_loss": 0.5953384041786194,
"eval_gooaq_pairs_runtime": 1.4186,
"eval_gooaq_pairs_samples_per_second": 70.49,
"eval_gooaq_pairs_steps_per_second": 2.82,
"step": 3200
},
{
"epoch": 0.5014887948597399,
"eval_mrpc_pairs_loss": 0.05028616264462471,
"eval_mrpc_pairs_runtime": 0.2664,
"eval_mrpc_pairs_samples_per_second": 375.444,
"eval_mrpc_pairs_steps_per_second": 15.018,
"step": 3200
},
{
"epoch": 0.5065036828083372,
"grad_norm": 17.477581024169922,
"learning_rate": 2.0319073305638035e-05,
"loss": 0.6212,
"step": 3232
},
{
"epoch": 0.5115185707569346,
"grad_norm": 15.705268859863281,
"learning_rate": 2.0079416049160762e-05,
"loss": 1.1408,
"step": 3264
},
{
"epoch": 0.516533458705532,
"grad_norm": 15.518088340759277,
"learning_rate": 1.983828510797154e-05,
"loss": 0.6898,
"step": 3296
},
{
"epoch": 0.5215483466541294,
"grad_norm": 18.28449058532715,
"learning_rate": 1.9595750441092844e-05,
"loss": 0.9827,
"step": 3328
},
{
"epoch": 0.5265632346027268,
"grad_norm": 11.187614440917969,
"learning_rate": 1.935188241480837e-05,
"loss": 0.9518,
"step": 3360
},
{
"epoch": 0.5315781225513242,
"grad_norm": 24.515199661254883,
"learning_rate": 1.910675178224773e-05,
"loss": 0.5584,
"step": 3392
},
{
"epoch": 0.5365930104999217,
"grad_norm": 21.595224380493164,
"learning_rate": 1.886042966285894e-05,
"loss": 1.3362,
"step": 3424
},
{
"epoch": 0.5416078984485191,
"grad_norm": 14.934494972229004,
"learning_rate": 1.8612987521774603e-05,
"loss": 0.4418,
"step": 3456
},
{
"epoch": 0.5466227863971165,
"grad_norm": 1.0222537517547607,
"learning_rate": 1.836449714907785e-05,
"loss": 0.5896,
"step": 3488
},
{
"epoch": 0.5516376743457139,
"grad_norm": 13.705151557922363,
"learning_rate": 1.811503063897396e-05,
"loss": 0.7951,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_nli-pairs_loss": 1.0016616582870483,
"eval_nli-pairs_runtime": 3.6365,
"eval_nli-pairs_samples_per_second": 27.499,
"eval_nli-pairs_steps_per_second": 1.1,
"eval_sts-test_pearson_cosine": 0.783269156461013,
"eval_sts-test_pearson_dot": 0.5146760761775918,
"eval_sts-test_pearson_euclidean": 0.7293244171224789,
"eval_sts-test_pearson_manhattan": 0.722566066058283,
"eval_sts-test_pearson_max": 0.783269156461013,
"eval_sts-test_spearman_cosine": 0.800346163751739,
"eval_sts-test_spearman_dot": 0.49134463318009686,
"eval_sts-test_spearman_euclidean": 0.7220780456605193,
"eval_sts-test_spearman_manhattan": 0.7185570530657137,
"eval_sts-test_spearman_max": 0.800346163751739,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_vitaminc-pairs_loss": 4.628457546234131,
"eval_vitaminc-pairs_runtime": 1.1358,
"eval_vitaminc-pairs_samples_per_second": 74.837,
"eval_vitaminc-pairs_steps_per_second": 2.641,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_sts-label_loss": 3.698469877243042,
"eval_sts-label_runtime": 0.2763,
"eval_sts-label_samples_per_second": 361.871,
"eval_sts-label_steps_per_second": 14.475,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_qnli-contrastive_loss": 0.11857427656650543,
"eval_qnli-contrastive_runtime": 0.3599,
"eval_qnli-contrastive_samples_per_second": 277.865,
"eval_qnli-contrastive_steps_per_second": 11.115,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_scitail-pairs-qa_loss": 0.06011494621634483,
"eval_scitail-pairs-qa_runtime": 0.8855,
"eval_scitail-pairs-qa_samples_per_second": 112.93,
"eval_scitail-pairs-qa_steps_per_second": 4.517,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_scitail-pairs-pos_loss": 0.5179685950279236,
"eval_scitail-pairs-pos_runtime": 1.3428,
"eval_scitail-pairs-pos_samples_per_second": 74.469,
"eval_scitail-pairs-pos_steps_per_second": 2.979,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_xsum-pairs_loss": 0.2575337886810303,
"eval_xsum-pairs_runtime": 0.9362,
"eval_xsum-pairs_samples_per_second": 106.81,
"eval_xsum-pairs_steps_per_second": 4.272,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_compression-pairs_loss": 0.08986295014619827,
"eval_compression-pairs_runtime": 0.2735,
"eval_compression-pairs_samples_per_second": 365.659,
"eval_compression-pairs_steps_per_second": 14.626,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_sciq_pairs_loss": 0.2898155748844147,
"eval_sciq_pairs_runtime": 4.1009,
"eval_sciq_pairs_samples_per_second": 24.385,
"eval_sciq_pairs_steps_per_second": 0.975,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_qasc_pairs_loss": 0.1790761798620224,
"eval_qasc_pairs_runtime": 1.0559,
"eval_qasc_pairs_samples_per_second": 94.702,
"eval_qasc_pairs_steps_per_second": 3.788,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_openbookqa_pairs_loss": 1.6558103561401367,
"eval_openbookqa_pairs_runtime": 0.8846,
"eval_openbookqa_pairs_samples_per_second": 113.048,
"eval_openbookqa_pairs_steps_per_second": 4.522,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_msmarco_pairs_loss": 0.5547183156013489,
"eval_msmarco_pairs_runtime": 2.0592,
"eval_msmarco_pairs_samples_per_second": 48.563,
"eval_msmarco_pairs_steps_per_second": 1.943,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_nq_pairs_loss": 0.24799224734306335,
"eval_nq_pairs_runtime": 4.5115,
"eval_nq_pairs_samples_per_second": 22.166,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_trivia_pairs_loss": 0.9036693572998047,
"eval_trivia_pairs_runtime": 6.5286,
"eval_trivia_pairs_samples_per_second": 15.317,
"eval_trivia_pairs_steps_per_second": 0.613,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_quora_pairs_loss": 0.05727443844079971,
"eval_quora_pairs_runtime": 0.6763,
"eval_quora_pairs_samples_per_second": 147.873,
"eval_quora_pairs_steps_per_second": 5.915,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_gooaq_pairs_loss": 0.5602415800094604,
"eval_gooaq_pairs_runtime": 1.4132,
"eval_gooaq_pairs_samples_per_second": 70.759,
"eval_gooaq_pairs_steps_per_second": 2.83,
"step": 3520
},
{
"epoch": 0.5516376743457139,
"eval_mrpc_pairs_loss": 0.04762456938624382,
"eval_mrpc_pairs_runtime": 0.2648,
"eval_mrpc_pairs_samples_per_second": 377.632,
"eval_mrpc_pairs_steps_per_second": 15.105,
"step": 3520
},
{
"epoch": 0.5566525622943113,
"grad_norm": 0.39285340905189514,
"learning_rate": 1.7864660368873747e-05,
"loss": 0.5201,
"step": 3552
},
{
"epoch": 0.5616674502429087,
"grad_norm": 16.01999855041504,
"learning_rate": 1.7613458978394786e-05,
"loss": 0.6351,
"step": 3584
},
{
"epoch": 0.566682338191506,
"grad_norm": 0.5487422347068787,
"learning_rate": 1.7361499348286606e-05,
"loss": 0.8652,
"step": 3616
},
{
"epoch": 0.5716972261401034,
"grad_norm": 0.9249119758605957,
"learning_rate": 1.710885457928585e-05,
"loss": 0.6407,
"step": 3648
},
{
"epoch": 0.5767121140887008,
"grad_norm": 6.578505992889404,
"learning_rate": 1.6855597970907664e-05,
"loss": 0.9435,
"step": 3680
},
{
"epoch": 0.5817270020372982,
"grad_norm": 14.307022094726562,
"learning_rate": 1.6601803000179394e-05,
"loss": 0.9295,
"step": 3712
},
{
"epoch": 0.5867418899858956,
"grad_norm": 16.091779708862305,
"learning_rate": 1.6347543300322795e-05,
"loss": 0.6829,
"step": 3744
},
{
"epoch": 0.591756777934493,
"grad_norm": 29.058805465698242,
"learning_rate": 1.6092892639390916e-05,
"loss": 0.8683,
"step": 3776
},
{
"epoch": 0.5967716658830904,
"grad_norm": 13.12238597869873,
"learning_rate": 1.583792489886586e-05,
"loss": 1.115,
"step": 3808
},
{
"epoch": 0.6017865538316878,
"grad_norm": 11.606388092041016,
"learning_rate": 1.558271405222362e-05,
"loss": 1.0936,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_nli-pairs_loss": 0.9106074571609497,
"eval_nli-pairs_runtime": 3.9467,
"eval_nli-pairs_samples_per_second": 25.337,
"eval_nli-pairs_steps_per_second": 1.013,
"eval_sts-test_pearson_cosine": 0.7831915073063493,
"eval_sts-test_pearson_dot": 0.51712727721244,
"eval_sts-test_pearson_euclidean": 0.7355201142492419,
"eval_sts-test_pearson_manhattan": 0.7299910115321456,
"eval_sts-test_pearson_max": 0.7831915073063493,
"eval_sts-test_spearman_cosine": 0.8005432620025132,
"eval_sts-test_spearman_dot": 0.49466719400094655,
"eval_sts-test_spearman_euclidean": 0.7273424991180402,
"eval_sts-test_spearman_manhattan": 0.7249394934262583,
"eval_sts-test_spearman_max": 0.8005432620025132,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_vitaminc-pairs_loss": 4.7559494972229,
"eval_vitaminc-pairs_runtime": 1.1844,
"eval_vitaminc-pairs_samples_per_second": 71.768,
"eval_vitaminc-pairs_steps_per_second": 2.533,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_sts-label_loss": 3.46917724609375,
"eval_sts-label_runtime": 0.3003,
"eval_sts-label_samples_per_second": 333.048,
"eval_sts-label_steps_per_second": 13.322,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_qnli-contrastive_loss": 0.13890141248703003,
"eval_qnli-contrastive_runtime": 0.3729,
"eval_qnli-contrastive_samples_per_second": 268.18,
"eval_qnli-contrastive_steps_per_second": 10.727,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_scitail-pairs-qa_loss": 0.0611240416765213,
"eval_scitail-pairs-qa_runtime": 0.9367,
"eval_scitail-pairs-qa_samples_per_second": 106.755,
"eval_scitail-pairs-qa_steps_per_second": 4.27,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_scitail-pairs-pos_loss": 0.46203696727752686,
"eval_scitail-pairs-pos_runtime": 1.4874,
"eval_scitail-pairs-pos_samples_per_second": 67.232,
"eval_scitail-pairs-pos_steps_per_second": 2.689,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_xsum-pairs_loss": 0.24919259548187256,
"eval_xsum-pairs_runtime": 0.9576,
"eval_xsum-pairs_samples_per_second": 104.427,
"eval_xsum-pairs_steps_per_second": 4.177,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_compression-pairs_loss": 0.08809012174606323,
"eval_compression-pairs_runtime": 0.298,
"eval_compression-pairs_samples_per_second": 335.567,
"eval_compression-pairs_steps_per_second": 13.423,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_sciq_pairs_loss": 0.28287386894226074,
"eval_sciq_pairs_runtime": 4.2668,
"eval_sciq_pairs_samples_per_second": 23.437,
"eval_sciq_pairs_steps_per_second": 0.937,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_qasc_pairs_loss": 0.1861308217048645,
"eval_qasc_pairs_runtime": 1.0488,
"eval_qasc_pairs_samples_per_second": 95.351,
"eval_qasc_pairs_steps_per_second": 3.814,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_openbookqa_pairs_loss": 1.600982666015625,
"eval_openbookqa_pairs_runtime": 0.9077,
"eval_openbookqa_pairs_samples_per_second": 110.17,
"eval_openbookqa_pairs_steps_per_second": 4.407,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_msmarco_pairs_loss": 0.5555463433265686,
"eval_msmarco_pairs_runtime": 2.1064,
"eval_msmarco_pairs_samples_per_second": 47.474,
"eval_msmarco_pairs_steps_per_second": 1.899,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_nq_pairs_loss": 0.23241031169891357,
"eval_nq_pairs_runtime": 4.6119,
"eval_nq_pairs_samples_per_second": 21.683,
"eval_nq_pairs_steps_per_second": 0.867,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_trivia_pairs_loss": 0.7936394214630127,
"eval_trivia_pairs_runtime": 6.6242,
"eval_trivia_pairs_samples_per_second": 15.096,
"eval_trivia_pairs_steps_per_second": 0.604,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_quora_pairs_loss": 0.05936668440699577,
"eval_quora_pairs_runtime": 0.7463,
"eval_quora_pairs_samples_per_second": 133.994,
"eval_quora_pairs_steps_per_second": 5.36,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_gooaq_pairs_loss": 0.5735708475112915,
"eval_gooaq_pairs_runtime": 1.4747,
"eval_gooaq_pairs_samples_per_second": 67.809,
"eval_gooaq_pairs_steps_per_second": 2.712,
"step": 3840
},
{
"epoch": 0.6017865538316878,
"eval_mrpc_pairs_loss": 0.046309370547533035,
"eval_mrpc_pairs_runtime": 0.2694,
"eval_mrpc_pairs_samples_per_second": 371.218,
"eval_mrpc_pairs_steps_per_second": 14.849,
"step": 3840
},
{
"epoch": 0.6068014417802852,
"grad_norm": 6.513147830963135,
"learning_rate": 1.53273341434723e-05,
"loss": 0.8689,
"step": 3872
},
{
"epoch": 0.6118163297288827,
"grad_norm": 0.2349071353673935,
"learning_rate": 1.5071859265669756e-05,
"loss": 0.8692,
"step": 3904
},
{
"epoch": 0.6168312176774801,
"grad_norm": 18.028608322143555,
"learning_rate": 1.4816363539427118e-05,
"loss": 0.9083,
"step": 3936
},
{
"epoch": 0.6218461056260774,
"grad_norm": 17.381690979003906,
"learning_rate": 1.456092109140423e-05,
"loss": 1.0782,
"step": 3968
},
{
"epoch": 0.6268609935746748,
"grad_norm": 20.72548484802246,
"learning_rate": 1.4305606032803418e-05,
"loss": 0.7711,
"step": 4000
},
{
"epoch": 0.6318758815232722,
"grad_norm": 28.311264038085938,
"learning_rate": 1.4050492437867641e-05,
"loss": 1.0005,
"step": 4032
},
{
"epoch": 0.6368907694718696,
"grad_norm": 14.892809867858887,
"learning_rate": 1.3795654322389481e-05,
"loss": 0.7229,
"step": 4064
},
{
"epoch": 0.641905657420467,
"grad_norm": 18.567630767822266,
"learning_rate": 1.3541165622236977e-05,
"loss": 0.4871,
"step": 4096
},
{
"epoch": 0.6469205453690644,
"grad_norm": 8.814851760864258,
"learning_rate": 1.3287100171902759e-05,
"loss": 0.7853,
"step": 4128
},
{
"epoch": 0.6519354333176618,
"grad_norm": 19.43486785888672,
"learning_rate": 1.3033531683082495e-05,
"loss": 0.9271,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_nli-pairs_loss": 0.8979966640472412,
"eval_nli-pairs_runtime": 3.6341,
"eval_nli-pairs_samples_per_second": 27.517,
"eval_nli-pairs_steps_per_second": 1.101,
"eval_sts-test_pearson_cosine": 0.786081877366483,
"eval_sts-test_pearson_dot": 0.5354100918466089,
"eval_sts-test_pearson_euclidean": 0.7368659505908834,
"eval_sts-test_pearson_manhattan": 0.7310042183211231,
"eval_sts-test_pearson_max": 0.786081877366483,
"eval_sts-test_spearman_cosine": 0.8043456052578905,
"eval_sts-test_spearman_dot": 0.5150264179790126,
"eval_sts-test_spearman_euclidean": 0.7297811553069841,
"eval_sts-test_spearman_manhattan": 0.7264172194761916,
"eval_sts-test_spearman_max": 0.8043456052578905,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_vitaminc-pairs_loss": 4.720225811004639,
"eval_vitaminc-pairs_runtime": 1.1487,
"eval_vitaminc-pairs_samples_per_second": 73.995,
"eval_vitaminc-pairs_steps_per_second": 2.612,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_sts-label_loss": 3.9553511142730713,
"eval_sts-label_runtime": 0.2732,
"eval_sts-label_samples_per_second": 366.049,
"eval_sts-label_steps_per_second": 14.642,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_qnli-contrastive_loss": 0.14256399869918823,
"eval_qnli-contrastive_runtime": 0.3558,
"eval_qnli-contrastive_samples_per_second": 281.03,
"eval_qnli-contrastive_steps_per_second": 11.241,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_scitail-pairs-qa_loss": 0.06135182082653046,
"eval_scitail-pairs-qa_runtime": 0.8797,
"eval_scitail-pairs-qa_samples_per_second": 113.67,
"eval_scitail-pairs-qa_steps_per_second": 4.547,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_scitail-pairs-pos_loss": 0.42590686678886414,
"eval_scitail-pairs-pos_runtime": 1.3288,
"eval_scitail-pairs-pos_samples_per_second": 75.254,
"eval_scitail-pairs-pos_steps_per_second": 3.01,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_xsum-pairs_loss": 0.2564789056777954,
"eval_xsum-pairs_runtime": 0.9345,
"eval_xsum-pairs_samples_per_second": 107.011,
"eval_xsum-pairs_steps_per_second": 4.28,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_compression-pairs_loss": 0.08838170021772385,
"eval_compression-pairs_runtime": 0.2761,
"eval_compression-pairs_samples_per_second": 362.144,
"eval_compression-pairs_steps_per_second": 14.486,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_sciq_pairs_loss": 0.2946786880493164,
"eval_sciq_pairs_runtime": 4.076,
"eval_sciq_pairs_samples_per_second": 24.534,
"eval_sciq_pairs_steps_per_second": 0.981,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_qasc_pairs_loss": 0.17502914369106293,
"eval_qasc_pairs_runtime": 1.0723,
"eval_qasc_pairs_samples_per_second": 93.259,
"eval_qasc_pairs_steps_per_second": 3.73,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_openbookqa_pairs_loss": 1.5555152893066406,
"eval_openbookqa_pairs_runtime": 0.8973,
"eval_openbookqa_pairs_samples_per_second": 111.451,
"eval_openbookqa_pairs_steps_per_second": 4.458,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_msmarco_pairs_loss": 0.5041812062263489,
"eval_msmarco_pairs_runtime": 2.0593,
"eval_msmarco_pairs_samples_per_second": 48.56,
"eval_msmarco_pairs_steps_per_second": 1.942,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_nq_pairs_loss": 0.24564537405967712,
"eval_nq_pairs_runtime": 4.527,
"eval_nq_pairs_samples_per_second": 22.09,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_trivia_pairs_loss": 0.8565467000007629,
"eval_trivia_pairs_runtime": 6.4751,
"eval_trivia_pairs_samples_per_second": 15.444,
"eval_trivia_pairs_steps_per_second": 0.618,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_quora_pairs_loss": 0.052645713090896606,
"eval_quora_pairs_runtime": 0.6803,
"eval_quora_pairs_samples_per_second": 146.985,
"eval_quora_pairs_steps_per_second": 5.879,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_gooaq_pairs_loss": 0.5815556645393372,
"eval_gooaq_pairs_runtime": 1.3985,
"eval_gooaq_pairs_samples_per_second": 71.504,
"eval_gooaq_pairs_steps_per_second": 2.86,
"step": 4160
},
{
"epoch": 0.6519354333176618,
"eval_mrpc_pairs_loss": 0.047052089124917984,
"eval_mrpc_pairs_runtime": 0.2602,
"eval_mrpc_pairs_samples_per_second": 384.349,
"eval_mrpc_pairs_steps_per_second": 15.374,
"step": 4160
},
{
"epoch": 0.6569503212662592,
"grad_norm": 21.91355323791504,
"learning_rate": 1.2780533723289014e-05,
"loss": 0.5223,
"step": 4192
},
{
"epoch": 0.6619652092148566,
"grad_norm": 9.792081832885742,
"learning_rate": 1.2528179694508286e-05,
"loss": 1.0498,
"step": 4224
},
{
"epoch": 0.666980097163454,
"grad_norm": 6.606201648712158,
"learning_rate": 1.2276542811903345e-05,
"loss": 0.6791,
"step": 4256
},
{
"epoch": 0.6719949851120514,
"grad_norm": 16.744705200195312,
"learning_rate": 1.2025696082572509e-05,
"loss": 0.8836,
"step": 4288
},
{
"epoch": 0.6770098730606487,
"grad_norm": 8.791626930236816,
"learning_rate": 1.1775712284367882e-05,
"loss": 0.6035,
"step": 4320
},
{
"epoch": 0.6820247610092462,
"grad_norm": 1.067271113395691,
"learning_rate": 1.152666394478045e-05,
"loss": 0.5167,
"step": 4352
},
{
"epoch": 0.6870396489578436,
"grad_norm": 7.685211181640625,
"learning_rate": 1.1286358620301126e-05,
"loss": 0.981,
"step": 4384
},
{
"epoch": 0.692054536906441,
"grad_norm": 19.07784652709961,
"learning_rate": 1.10393628476565e-05,
"loss": 0.4873,
"step": 4416
},
{
"epoch": 0.6970694248550384,
"grad_norm": 1.4715958833694458,
"learning_rate": 1.0793516169782712e-05,
"loss": 0.4762,
"step": 4448
},
{
"epoch": 0.7020843128036358,
"grad_norm": 14.572600364685059,
"learning_rate": 1.0548889913873123e-05,
"loss": 0.8201,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_nli-pairs_loss": 0.8704043626785278,
"eval_nli-pairs_runtime": 3.6418,
"eval_nli-pairs_samples_per_second": 27.459,
"eval_nli-pairs_steps_per_second": 1.098,
"eval_sts-test_pearson_cosine": 0.7871366351762351,
"eval_sts-test_pearson_dot": 0.520292802271069,
"eval_sts-test_pearson_euclidean": 0.7358991589918665,
"eval_sts-test_pearson_manhattan": 0.7306487678482384,
"eval_sts-test_pearson_max": 0.7871366351762351,
"eval_sts-test_spearman_cosine": 0.8043053229220561,
"eval_sts-test_spearman_dot": 0.500924984433136,
"eval_sts-test_spearman_euclidean": 0.7279966902078664,
"eval_sts-test_spearman_manhattan": 0.7254635738312362,
"eval_sts-test_spearman_max": 0.8043053229220561,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_vitaminc-pairs_loss": 4.733531475067139,
"eval_vitaminc-pairs_runtime": 1.1524,
"eval_vitaminc-pairs_samples_per_second": 73.759,
"eval_vitaminc-pairs_steps_per_second": 2.603,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_sts-label_loss": 3.589179515838623,
"eval_sts-label_runtime": 0.2802,
"eval_sts-label_samples_per_second": 356.831,
"eval_sts-label_steps_per_second": 14.273,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_qnli-contrastive_loss": 0.11559023708105087,
"eval_qnli-contrastive_runtime": 0.3803,
"eval_qnli-contrastive_samples_per_second": 262.956,
"eval_qnli-contrastive_steps_per_second": 10.518,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_scitail-pairs-qa_loss": 0.05958002060651779,
"eval_scitail-pairs-qa_runtime": 0.9171,
"eval_scitail-pairs-qa_samples_per_second": 109.042,
"eval_scitail-pairs-qa_steps_per_second": 4.362,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_scitail-pairs-pos_loss": 0.43254122138023376,
"eval_scitail-pairs-pos_runtime": 1.3676,
"eval_scitail-pairs-pos_samples_per_second": 73.118,
"eval_scitail-pairs-pos_steps_per_second": 2.925,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_xsum-pairs_loss": 0.248906210064888,
"eval_xsum-pairs_runtime": 0.9364,
"eval_xsum-pairs_samples_per_second": 106.797,
"eval_xsum-pairs_steps_per_second": 4.272,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_compression-pairs_loss": 0.08712127059698105,
"eval_compression-pairs_runtime": 0.2771,
"eval_compression-pairs_samples_per_second": 360.923,
"eval_compression-pairs_steps_per_second": 14.437,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_sciq_pairs_loss": 0.2863478362560272,
"eval_sciq_pairs_runtime": 4.1006,
"eval_sciq_pairs_samples_per_second": 24.386,
"eval_sciq_pairs_steps_per_second": 0.975,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_qasc_pairs_loss": 0.17710347473621368,
"eval_qasc_pairs_runtime": 1.0521,
"eval_qasc_pairs_samples_per_second": 95.051,
"eval_qasc_pairs_steps_per_second": 3.802,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_openbookqa_pairs_loss": 1.5271464586257935,
"eval_openbookqa_pairs_runtime": 0.8986,
"eval_openbookqa_pairs_samples_per_second": 111.286,
"eval_openbookqa_pairs_steps_per_second": 4.451,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_msmarco_pairs_loss": 0.5346755385398865,
"eval_msmarco_pairs_runtime": 2.0827,
"eval_msmarco_pairs_samples_per_second": 48.014,
"eval_msmarco_pairs_steps_per_second": 1.921,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_nq_pairs_loss": 0.24830152094364166,
"eval_nq_pairs_runtime": 4.5025,
"eval_nq_pairs_samples_per_second": 22.21,
"eval_nq_pairs_steps_per_second": 0.888,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_trivia_pairs_loss": 0.799673318862915,
"eval_trivia_pairs_runtime": 6.4664,
"eval_trivia_pairs_samples_per_second": 15.465,
"eval_trivia_pairs_steps_per_second": 0.619,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_quora_pairs_loss": 0.030656050890684128,
"eval_quora_pairs_runtime": 0.6818,
"eval_quora_pairs_samples_per_second": 146.669,
"eval_quora_pairs_steps_per_second": 5.867,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_gooaq_pairs_loss": 0.5036634802818298,
"eval_gooaq_pairs_runtime": 1.4051,
"eval_gooaq_pairs_samples_per_second": 71.169,
"eval_gooaq_pairs_steps_per_second": 2.847,
"step": 4480
},
{
"epoch": 0.7020843128036358,
"eval_mrpc_pairs_loss": 0.04455450549721718,
"eval_mrpc_pairs_runtime": 0.2642,
"eval_mrpc_pairs_samples_per_second": 378.478,
"eval_mrpc_pairs_steps_per_second": 15.139,
"step": 4480
},
{
"epoch": 0.7070992007522332,
"grad_norm": 15.19054889678955,
"learning_rate": 1.030555505304156e-05,
"loss": 0.7799,
"step": 4512
},
{
"epoch": 0.7121140887008306,
"grad_norm": 16.065160751342773,
"learning_rate": 1.0063582185731009e-05,
"loss": 0.8006,
"step": 4544
},
{
"epoch": 0.717128976649428,
"grad_norm": 3.2584469318389893,
"learning_rate": 9.823041515230937e-06,
"loss": 0.5123,
"step": 4576
},
{
"epoch": 0.7221438645980254,
"grad_norm": 2.2951438426971436,
"learning_rate": 9.584002829309324e-06,
"loss": 0.7421,
"step": 4608
},
{
"epoch": 0.7271587525466228,
"grad_norm": 21.291872024536133,
"learning_rate": 9.346535479965231e-06,
"loss": 0.9477,
"step": 4640
},
{
"epoch": 0.7321736404952202,
"grad_norm": 4.785529613494873,
"learning_rate": 9.11070836330775e-06,
"loss": 0.5021,
"step": 4672
},
{
"epoch": 0.7371885284438175,
"grad_norm": 1.7058138847351074,
"learning_rate": 8.876589899567312e-06,
"loss": 0.931,
"step": 4704
},
{
"epoch": 0.7422034163924149,
"grad_norm": 9.1055326461792,
"learning_rate": 8.644248013244963e-06,
"loss": 0.7777,
"step": 4736
},
{
"epoch": 0.7472183043410123,
"grad_norm": 3.6529128551483154,
"learning_rate": 8.413750113405556e-06,
"loss": 0.9462,
"step": 4768
},
{
"epoch": 0.7522331922896098,
"grad_norm": 0.5643049478530884,
"learning_rate": 8.185163074120399e-06,
"loss": 0.5846,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_nli-pairs_loss": 0.9291799664497375,
"eval_nli-pairs_runtime": 3.7498,
"eval_nli-pairs_samples_per_second": 26.668,
"eval_nli-pairs_steps_per_second": 1.067,
"eval_sts-test_pearson_cosine": 0.7855324842750789,
"eval_sts-test_pearson_dot": 0.5242204261314407,
"eval_sts-test_pearson_euclidean": 0.7349702751512333,
"eval_sts-test_pearson_manhattan": 0.7293454465410049,
"eval_sts-test_pearson_max": 0.7855324842750789,
"eval_sts-test_spearman_cosine": 0.8044211074352633,
"eval_sts-test_spearman_dot": 0.5021807579050959,
"eval_sts-test_spearman_euclidean": 0.7270456124616013,
"eval_sts-test_spearman_manhattan": 0.7246691951731193,
"eval_sts-test_spearman_max": 0.8044211074352633,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_vitaminc-pairs_loss": 4.687094688415527,
"eval_vitaminc-pairs_runtime": 1.1386,
"eval_vitaminc-pairs_samples_per_second": 74.654,
"eval_vitaminc-pairs_steps_per_second": 2.635,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_sts-label_loss": 3.8013510704040527,
"eval_sts-label_runtime": 0.2716,
"eval_sts-label_samples_per_second": 368.125,
"eval_sts-label_steps_per_second": 14.725,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_qnli-contrastive_loss": 0.1414812207221985,
"eval_qnli-contrastive_runtime": 0.3601,
"eval_qnli-contrastive_samples_per_second": 277.73,
"eval_qnli-contrastive_steps_per_second": 11.109,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_scitail-pairs-qa_loss": 0.05851547792553902,
"eval_scitail-pairs-qa_runtime": 0.8864,
"eval_scitail-pairs-qa_samples_per_second": 112.817,
"eval_scitail-pairs-qa_steps_per_second": 4.513,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_scitail-pairs-pos_loss": 0.4562886357307434,
"eval_scitail-pairs-pos_runtime": 1.3535,
"eval_scitail-pairs-pos_samples_per_second": 73.88,
"eval_scitail-pairs-pos_steps_per_second": 2.955,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_xsum-pairs_loss": 0.23483119904994965,
"eval_xsum-pairs_runtime": 0.9336,
"eval_xsum-pairs_samples_per_second": 107.109,
"eval_xsum-pairs_steps_per_second": 4.284,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_compression-pairs_loss": 0.08680214732885361,
"eval_compression-pairs_runtime": 0.2716,
"eval_compression-pairs_samples_per_second": 368.254,
"eval_compression-pairs_steps_per_second": 14.73,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_sciq_pairs_loss": 0.2816057801246643,
"eval_sciq_pairs_runtime": 4.0742,
"eval_sciq_pairs_samples_per_second": 24.545,
"eval_sciq_pairs_steps_per_second": 0.982,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_qasc_pairs_loss": 0.17035560309886932,
"eval_qasc_pairs_runtime": 1.0717,
"eval_qasc_pairs_samples_per_second": 93.311,
"eval_qasc_pairs_steps_per_second": 3.732,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_openbookqa_pairs_loss": 1.5671054124832153,
"eval_openbookqa_pairs_runtime": 0.8973,
"eval_openbookqa_pairs_samples_per_second": 111.441,
"eval_openbookqa_pairs_steps_per_second": 4.458,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_msmarco_pairs_loss": 0.5062486529350281,
"eval_msmarco_pairs_runtime": 2.0609,
"eval_msmarco_pairs_samples_per_second": 48.524,
"eval_msmarco_pairs_steps_per_second": 1.941,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_nq_pairs_loss": 0.22875532507896423,
"eval_nq_pairs_runtime": 4.5041,
"eval_nq_pairs_samples_per_second": 22.202,
"eval_nq_pairs_steps_per_second": 0.888,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_trivia_pairs_loss": 0.8119627237319946,
"eval_trivia_pairs_runtime": 6.4609,
"eval_trivia_pairs_samples_per_second": 15.478,
"eval_trivia_pairs_steps_per_second": 0.619,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_quora_pairs_loss": 0.06211049482226372,
"eval_quora_pairs_runtime": 0.6765,
"eval_quora_pairs_samples_per_second": 147.827,
"eval_quora_pairs_steps_per_second": 5.913,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_gooaq_pairs_loss": 0.4847571551799774,
"eval_gooaq_pairs_runtime": 1.3911,
"eval_gooaq_pairs_samples_per_second": 71.886,
"eval_gooaq_pairs_steps_per_second": 2.875,
"step": 4800
},
{
"epoch": 0.7522331922896098,
"eval_mrpc_pairs_loss": 0.04384278133511543,
"eval_mrpc_pairs_runtime": 0.2617,
"eval_mrpc_pairs_samples_per_second": 382.146,
"eval_mrpc_pairs_steps_per_second": 15.286,
"step": 4800
},
{
"epoch": 0.7572480802382072,
"grad_norm": 14.555929183959961,
"learning_rate": 7.958553215065208e-06,
"loss": 0.6735,
"step": 4832
},
{
"epoch": 0.7622629681868046,
"grad_norm": 10.30207347869873,
"learning_rate": 7.733986282278816e-06,
"loss": 1.1569,
"step": 4864
},
{
"epoch": 0.767277856135402,
"grad_norm": 17.255786895751953,
"learning_rate": 7.511527429088396e-06,
"loss": 0.9749,
"step": 4896
},
{
"epoch": 0.7722927440839994,
"grad_norm": 14.730864524841309,
"learning_rate": 7.291241197206574e-06,
"loss": 0.6581,
"step": 4928
},
{
"epoch": 0.7773076320325968,
"grad_norm": 8.807291984558105,
"learning_rate": 7.07319149800605e-06,
"loss": 0.6979,
"step": 4960
},
{
"epoch": 0.7823225199811942,
"grad_norm": 0.6080070734024048,
"learning_rate": 6.857441593977046e-06,
"loss": 0.7582,
"step": 4992
},
{
"epoch": 0.7873374079297916,
"grad_norm": 2.2002525329589844,
"learning_rate": 6.6440540803730425e-06,
"loss": 1.0082,
"step": 5024
},
{
"epoch": 0.792352295878389,
"grad_norm": 8.624346733093262,
"learning_rate": 6.433090867050122e-06,
"loss": 0.6206,
"step": 5056
},
{
"epoch": 0.7973671838269863,
"grad_norm": 0.9821205139160156,
"learning_rate": 6.224613160505094e-06,
"loss": 0.5165,
"step": 5088
},
{
"epoch": 0.8023820717755837,
"grad_norm": 4.104696750640869,
"learning_rate": 6.018681446117773e-06,
"loss": 0.4914,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_nli-pairs_loss": 0.8841198682785034,
"eval_nli-pairs_runtime": 4.1793,
"eval_nli-pairs_samples_per_second": 23.928,
"eval_nli-pairs_steps_per_second": 0.957,
"eval_sts-test_pearson_cosine": 0.7866468635321827,
"eval_sts-test_pearson_dot": 0.5124924570863083,
"eval_sts-test_pearson_euclidean": 0.7320768163626257,
"eval_sts-test_pearson_manhattan": 0.7266238528084388,
"eval_sts-test_pearson_max": 0.7866468635321827,
"eval_sts-test_spearman_cosine": 0.8041619306345255,
"eval_sts-test_spearman_dot": 0.4913316974763461,
"eval_sts-test_spearman_euclidean": 0.7232005770314757,
"eval_sts-test_spearman_manhattan": 0.7207683852583252,
"eval_sts-test_spearman_max": 0.8041619306345255,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_vitaminc-pairs_loss": 4.725103855133057,
"eval_vitaminc-pairs_runtime": 1.2146,
"eval_vitaminc-pairs_samples_per_second": 69.982,
"eval_vitaminc-pairs_steps_per_second": 2.47,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_sts-label_loss": 3.6535470485687256,
"eval_sts-label_runtime": 0.3164,
"eval_sts-label_samples_per_second": 316.056,
"eval_sts-label_steps_per_second": 12.642,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_qnli-contrastive_loss": 0.10529302805662155,
"eval_qnli-contrastive_runtime": 0.368,
"eval_qnli-contrastive_samples_per_second": 271.711,
"eval_qnli-contrastive_steps_per_second": 10.868,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_scitail-pairs-qa_loss": 0.05555274337530136,
"eval_scitail-pairs-qa_runtime": 0.9542,
"eval_scitail-pairs-qa_samples_per_second": 104.795,
"eval_scitail-pairs-qa_steps_per_second": 4.192,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_scitail-pairs-pos_loss": 0.4785614013671875,
"eval_scitail-pairs-pos_runtime": 1.4937,
"eval_scitail-pairs-pos_samples_per_second": 66.949,
"eval_scitail-pairs-pos_steps_per_second": 2.678,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_xsum-pairs_loss": 0.2355932593345642,
"eval_xsum-pairs_runtime": 0.9396,
"eval_xsum-pairs_samples_per_second": 106.432,
"eval_xsum-pairs_steps_per_second": 4.257,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_compression-pairs_loss": 0.083825021982193,
"eval_compression-pairs_runtime": 0.2789,
"eval_compression-pairs_samples_per_second": 358.564,
"eval_compression-pairs_steps_per_second": 14.343,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_sciq_pairs_loss": 0.28157705068588257,
"eval_sciq_pairs_runtime": 4.1947,
"eval_sciq_pairs_samples_per_second": 23.84,
"eval_sciq_pairs_steps_per_second": 0.954,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_qasc_pairs_loss": 0.1739024668931961,
"eval_qasc_pairs_runtime": 1.1277,
"eval_qasc_pairs_samples_per_second": 88.676,
"eval_qasc_pairs_steps_per_second": 3.547,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_openbookqa_pairs_loss": 1.591935396194458,
"eval_openbookqa_pairs_runtime": 1.0022,
"eval_openbookqa_pairs_samples_per_second": 99.782,
"eval_openbookqa_pairs_steps_per_second": 3.991,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_msmarco_pairs_loss": 0.5132349133491516,
"eval_msmarco_pairs_runtime": 2.1322,
"eval_msmarco_pairs_samples_per_second": 46.901,
"eval_msmarco_pairs_steps_per_second": 1.876,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_nq_pairs_loss": 0.2343132346868515,
"eval_nq_pairs_runtime": 4.5529,
"eval_nq_pairs_samples_per_second": 21.964,
"eval_nq_pairs_steps_per_second": 0.879,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_trivia_pairs_loss": 0.7988561987876892,
"eval_trivia_pairs_runtime": 6.5661,
"eval_trivia_pairs_samples_per_second": 15.23,
"eval_trivia_pairs_steps_per_second": 0.609,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_quora_pairs_loss": 0.05578049644827843,
"eval_quora_pairs_runtime": 0.8028,
"eval_quora_pairs_samples_per_second": 124.564,
"eval_quora_pairs_steps_per_second": 4.983,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_gooaq_pairs_loss": 0.48901888728141785,
"eval_gooaq_pairs_runtime": 1.5605,
"eval_gooaq_pairs_samples_per_second": 64.082,
"eval_gooaq_pairs_steps_per_second": 2.563,
"step": 5120
},
{
"epoch": 0.8023820717755837,
"eval_mrpc_pairs_loss": 0.04172317683696747,
"eval_mrpc_pairs_runtime": 0.2628,
"eval_mrpc_pairs_samples_per_second": 380.505,
"eval_mrpc_pairs_steps_per_second": 15.22,
"step": 5120
},
{
"epoch": 0.8073969597241811,
"grad_norm": 9.413043022155762,
"learning_rate": 5.815355470602388e-06,
"loss": 1.098,
"step": 5152
},
{
"epoch": 0.8124118476727785,
"grad_norm": 0.25412222743034363,
"learning_rate": 5.614694224673387e-06,
"loss": 0.821,
"step": 5184
},
{
"epoch": 0.8174267356213759,
"grad_norm": 18.76092529296875,
"learning_rate": 5.416755925930494e-06,
"loss": 0.9351,
"step": 5216
},
{
"epoch": 0.8224416235699734,
"grad_norm": 19.607337951660156,
"learning_rate": 5.221598001968132e-06,
"loss": 0.8784,
"step": 5248
},
{
"epoch": 0.8274565115185708,
"grad_norm": 3.2164149284362793,
"learning_rate": 5.029277073714009e-06,
"loss": 0.8326,
"step": 5280
},
{
"epoch": 0.8324713994671682,
"grad_norm": 11.156713485717773,
"learning_rate": 4.839848939001789e-06,
"loss": 0.7551,
"step": 5312
},
{
"epoch": 0.8374862874157656,
"grad_norm": 8.80623722076416,
"learning_rate": 4.653368556382492e-06,
"loss": 0.8234,
"step": 5344
},
{
"epoch": 0.842501175364363,
"grad_norm": 16.081491470336914,
"learning_rate": 4.469890029179472e-06,
"loss": 1.0922,
"step": 5376
},
{
"epoch": 0.8475160633129604,
"grad_norm": 0.8583326935768127,
"learning_rate": 4.2894665897914794e-06,
"loss": 1.0925,
"step": 5408
},
{
"epoch": 0.8525309512615578,
"grad_norm": 7.903942108154297,
"learning_rate": 4.112150584248388e-06,
"loss": 1.099,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_nli-pairs_loss": 0.8909263014793396,
"eval_nli-pairs_runtime": 3.6329,
"eval_nli-pairs_samples_per_second": 27.526,
"eval_nli-pairs_steps_per_second": 1.101,
"eval_sts-test_pearson_cosine": 0.7892673589571536,
"eval_sts-test_pearson_dot": 0.5308666684424199,
"eval_sts-test_pearson_euclidean": 0.7372214599353599,
"eval_sts-test_pearson_manhattan": 0.73149442324126,
"eval_sts-test_pearson_max": 0.7892673589571536,
"eval_sts-test_spearman_cosine": 0.8088174691107087,
"eval_sts-test_spearman_dot": 0.5097841799376374,
"eval_sts-test_spearman_euclidean": 0.7291099552995026,
"eval_sts-test_spearman_manhattan": 0.7255023946868168,
"eval_sts-test_spearman_max": 0.8088174691107087,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_vitaminc-pairs_loss": 4.696901798248291,
"eval_vitaminc-pairs_runtime": 1.13,
"eval_vitaminc-pairs_samples_per_second": 75.219,
"eval_vitaminc-pairs_steps_per_second": 2.655,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_sts-label_loss": 3.794584274291992,
"eval_sts-label_runtime": 0.2757,
"eval_sts-label_samples_per_second": 362.777,
"eval_sts-label_steps_per_second": 14.511,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_qnli-contrastive_loss": 0.1291896551847458,
"eval_qnli-contrastive_runtime": 0.3577,
"eval_qnli-contrastive_samples_per_second": 279.536,
"eval_qnli-contrastive_steps_per_second": 11.181,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_scitail-pairs-qa_loss": 0.05729294940829277,
"eval_scitail-pairs-qa_runtime": 0.8757,
"eval_scitail-pairs-qa_samples_per_second": 114.199,
"eval_scitail-pairs-qa_steps_per_second": 4.568,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_scitail-pairs-pos_loss": 0.47140783071517944,
"eval_scitail-pairs-pos_runtime": 1.3328,
"eval_scitail-pairs-pos_samples_per_second": 75.031,
"eval_scitail-pairs-pos_steps_per_second": 3.001,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_xsum-pairs_loss": 0.2317724972963333,
"eval_xsum-pairs_runtime": 0.934,
"eval_xsum-pairs_samples_per_second": 107.065,
"eval_xsum-pairs_steps_per_second": 4.283,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_compression-pairs_loss": 0.0849599540233612,
"eval_compression-pairs_runtime": 0.2772,
"eval_compression-pairs_samples_per_second": 360.752,
"eval_compression-pairs_steps_per_second": 14.43,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_sciq_pairs_loss": 0.2746911346912384,
"eval_sciq_pairs_runtime": 4.0398,
"eval_sciq_pairs_samples_per_second": 24.754,
"eval_sciq_pairs_steps_per_second": 0.99,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_qasc_pairs_loss": 0.16956950724124908,
"eval_qasc_pairs_runtime": 1.0682,
"eval_qasc_pairs_samples_per_second": 93.615,
"eval_qasc_pairs_steps_per_second": 3.745,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_openbookqa_pairs_loss": 1.5424996614456177,
"eval_openbookqa_pairs_runtime": 0.8928,
"eval_openbookqa_pairs_samples_per_second": 112.006,
"eval_openbookqa_pairs_steps_per_second": 4.48,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_msmarco_pairs_loss": 0.5047981142997742,
"eval_msmarco_pairs_runtime": 2.0436,
"eval_msmarco_pairs_samples_per_second": 48.932,
"eval_msmarco_pairs_steps_per_second": 1.957,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_nq_pairs_loss": 0.230237677693367,
"eval_nq_pairs_runtime": 4.5251,
"eval_nq_pairs_samples_per_second": 22.099,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_trivia_pairs_loss": 0.7567735314369202,
"eval_trivia_pairs_runtime": 6.4545,
"eval_trivia_pairs_samples_per_second": 15.493,
"eval_trivia_pairs_steps_per_second": 0.62,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_quora_pairs_loss": 0.05753583088517189,
"eval_quora_pairs_runtime": 0.6769,
"eval_quora_pairs_samples_per_second": 147.736,
"eval_quora_pairs_steps_per_second": 5.909,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_gooaq_pairs_loss": 0.49448758363723755,
"eval_gooaq_pairs_runtime": 1.3984,
"eval_gooaq_pairs_samples_per_second": 71.51,
"eval_gooaq_pairs_steps_per_second": 2.86,
"step": 5440
},
{
"epoch": 0.8525309512615578,
"eval_mrpc_pairs_loss": 0.04384453222155571,
"eval_mrpc_pairs_runtime": 0.2653,
"eval_mrpc_pairs_samples_per_second": 376.996,
"eval_mrpc_pairs_steps_per_second": 15.08,
"step": 5440
},
{
"epoch": 0.8575458392101551,
"grad_norm": 0.8697513341903687,
"learning_rate": 3.93799345702415e-06,
"loss": 0.5396,
"step": 5472
},
{
"epoch": 0.8625607271587525,
"grad_norm": 8.337197303771973,
"learning_rate": 3.7670457361112116e-06,
"loss": 0.6636,
"step": 5504
},
{
"epoch": 0.8675756151073499,
"grad_norm": 0.3655373156070709,
"learning_rate": 3.5993570183609596e-06,
"loss": 1.0095,
"step": 5536
},
{
"epoch": 0.8725905030559473,
"grad_norm": 13.748374938964844,
"learning_rate": 3.4349759550941933e-06,
"loss": 0.631,
"step": 5568
},
{
"epoch": 0.8776053910045447,
"grad_norm": 15.683762550354004,
"learning_rate": 3.273950237986013e-06,
"loss": 0.5415,
"step": 5600
},
{
"epoch": 0.8826202789531421,
"grad_norm": 10.004467964172363,
"learning_rate": 3.11632658522906e-06,
"loss": 0.9227,
"step": 5632
},
{
"epoch": 0.8876351669017395,
"grad_norm": 12.990907669067383,
"learning_rate": 2.9621507279792564e-06,
"loss": 0.8991,
"step": 5664
},
{
"epoch": 0.8926500548503369,
"grad_norm": 0.4619373679161072,
"learning_rate": 2.8114673970878584e-06,
"loss": 0.5068,
"step": 5696
},
{
"epoch": 0.8976649427989344,
"grad_norm": 8.317788124084473,
"learning_rate": 2.664320310123768e-06,
"loss": 1.2134,
"step": 5728
},
{
"epoch": 0.9026798307475318,
"grad_norm": 0.38993319869041443,
"learning_rate": 2.5207521586897876e-06,
"loss": 0.4651,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_nli-pairs_loss": 0.8765493631362915,
"eval_nli-pairs_runtime": 3.6164,
"eval_nli-pairs_samples_per_second": 27.652,
"eval_nli-pairs_steps_per_second": 1.106,
"eval_sts-test_pearson_cosine": 0.7880147168961996,
"eval_sts-test_pearson_dot": 0.5198107156003906,
"eval_sts-test_pearson_euclidean": 0.7362840264051249,
"eval_sts-test_pearson_manhattan": 0.7307716823389564,
"eval_sts-test_pearson_max": 0.7880147168961996,
"eval_sts-test_spearman_cosine": 0.8071394355093185,
"eval_sts-test_spearman_dot": 0.49865317522814645,
"eval_sts-test_spearman_euclidean": 0.7278395467197664,
"eval_sts-test_spearman_manhattan": 0.7246934378777047,
"eval_sts-test_spearman_max": 0.8071394355093185,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_vitaminc-pairs_loss": 4.717629432678223,
"eval_vitaminc-pairs_runtime": 1.1248,
"eval_vitaminc-pairs_samples_per_second": 75.571,
"eval_vitaminc-pairs_steps_per_second": 2.667,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_sts-label_loss": 3.7598328590393066,
"eval_sts-label_runtime": 0.2743,
"eval_sts-label_samples_per_second": 364.548,
"eval_sts-label_steps_per_second": 14.582,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_qnli-contrastive_loss": 0.11829647421836853,
"eval_qnli-contrastive_runtime": 0.3606,
"eval_qnli-contrastive_samples_per_second": 277.334,
"eval_qnli-contrastive_steps_per_second": 11.093,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_scitail-pairs-qa_loss": 0.05503571406006813,
"eval_scitail-pairs-qa_runtime": 0.874,
"eval_scitail-pairs-qa_samples_per_second": 114.411,
"eval_scitail-pairs-qa_steps_per_second": 4.576,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_scitail-pairs-pos_loss": 0.47530597448349,
"eval_scitail-pairs-pos_runtime": 1.3429,
"eval_scitail-pairs-pos_samples_per_second": 74.463,
"eval_scitail-pairs-pos_steps_per_second": 2.979,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_xsum-pairs_loss": 0.22936196625232697,
"eval_xsum-pairs_runtime": 0.9431,
"eval_xsum-pairs_samples_per_second": 106.028,
"eval_xsum-pairs_steps_per_second": 4.241,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_compression-pairs_loss": 0.08313465863466263,
"eval_compression-pairs_runtime": 0.2781,
"eval_compression-pairs_samples_per_second": 359.542,
"eval_compression-pairs_steps_per_second": 14.382,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_sciq_pairs_loss": 0.27646955847740173,
"eval_sciq_pairs_runtime": 4.0554,
"eval_sciq_pairs_samples_per_second": 24.658,
"eval_sciq_pairs_steps_per_second": 0.986,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_qasc_pairs_loss": 0.17006540298461914,
"eval_qasc_pairs_runtime": 1.0538,
"eval_qasc_pairs_samples_per_second": 94.898,
"eval_qasc_pairs_steps_per_second": 3.796,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_openbookqa_pairs_loss": 1.5487664937973022,
"eval_openbookqa_pairs_runtime": 0.8956,
"eval_openbookqa_pairs_samples_per_second": 111.653,
"eval_openbookqa_pairs_steps_per_second": 4.466,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_msmarco_pairs_loss": 0.4861982464790344,
"eval_msmarco_pairs_runtime": 2.0548,
"eval_msmarco_pairs_samples_per_second": 48.666,
"eval_msmarco_pairs_steps_per_second": 1.947,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_nq_pairs_loss": 0.22520922124385834,
"eval_nq_pairs_runtime": 4.4973,
"eval_nq_pairs_samples_per_second": 22.236,
"eval_nq_pairs_steps_per_second": 0.889,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_trivia_pairs_loss": 0.7480303049087524,
"eval_trivia_pairs_runtime": 6.498,
"eval_trivia_pairs_samples_per_second": 15.389,
"eval_trivia_pairs_steps_per_second": 0.616,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_quora_pairs_loss": 0.06060533598065376,
"eval_quora_pairs_runtime": 0.6722,
"eval_quora_pairs_samples_per_second": 148.76,
"eval_quora_pairs_steps_per_second": 5.95,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_gooaq_pairs_loss": 0.4696855843067169,
"eval_gooaq_pairs_runtime": 1.3985,
"eval_gooaq_pairs_samples_per_second": 71.503,
"eval_gooaq_pairs_steps_per_second": 2.86,
"step": 5760
},
{
"epoch": 0.9026798307475318,
"eval_mrpc_pairs_loss": 0.04175671190023422,
"eval_mrpc_pairs_runtime": 0.2618,
"eval_mrpc_pairs_samples_per_second": 381.956,
"eval_mrpc_pairs_steps_per_second": 15.278,
"step": 5760
},
{
"epoch": 0.9076947186961292,
"grad_norm": 19.970914840698242,
"learning_rate": 2.3808045960365743e-06,
"loss": 0.6346,
"step": 5792
},
{
"epoch": 0.9127096066447266,
"grad_norm": 7.2970075607299805,
"learning_rate": 2.2445182249778363e-06,
"loss": 1.1103,
"step": 5824
},
{
"epoch": 0.917724494593324,
"grad_norm": 14.34080982208252,
"learning_rate": 2.1119325861102666e-06,
"loss": 0.7667,
"step": 5856
},
{
"epoch": 0.9227393825419213,
"grad_norm": 16.219850540161133,
"learning_rate": 1.98308614634171e-06,
"loss": 0.9174,
"step": 5888
},
{
"epoch": 0.9277542704905187,
"grad_norm": 17.201740264892578,
"learning_rate": 1.8580162877307744e-06,
"loss": 0.7609,
"step": 5920
},
{
"epoch": 0.9327691584391161,
"grad_norm": 12.591241836547852,
"learning_rate": 1.7367592966412454e-06,
"loss": 0.8993,
"step": 5952
},
{
"epoch": 0.9377840463877135,
"grad_norm": 17.12389373779297,
"learning_rate": 1.619350353214355e-06,
"loss": 0.7587,
"step": 5984
},
{
"epoch": 0.9427989343363109,
"grad_norm": 44.237342834472656,
"learning_rate": 1.5058235211620126e-06,
"loss": 0.935,
"step": 6016
},
{
"epoch": 0.9478138222849083,
"grad_norm": 4.658092975616455,
"learning_rate": 1.3962117378839439e-06,
"loss": 0.8551,
"step": 6048
},
{
"epoch": 0.9528287102335057,
"grad_norm": 0.4202437698841095,
"learning_rate": 1.2905468049116077e-06,
"loss": 1.4247,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_nli-pairs_loss": 0.880797266960144,
"eval_nli-pairs_runtime": 3.65,
"eval_nli-pairs_samples_per_second": 27.397,
"eval_nli-pairs_steps_per_second": 1.096,
"eval_sts-test_pearson_cosine": 0.7886384880168056,
"eval_sts-test_pearson_dot": 0.5209320238457065,
"eval_sts-test_pearson_euclidean": 0.7365619856047663,
"eval_sts-test_pearson_manhattan": 0.7309874377904119,
"eval_sts-test_pearson_max": 0.7886384880168056,
"eval_sts-test_spearman_cosine": 0.8078306606920327,
"eval_sts-test_spearman_dot": 0.4995671547413244,
"eval_sts-test_spearman_euclidean": 0.7281379887760366,
"eval_sts-test_spearman_manhattan": 0.7249545388844193,
"eval_sts-test_spearman_max": 0.8078306606920327,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_vitaminc-pairs_loss": 4.70750617980957,
"eval_vitaminc-pairs_runtime": 1.1372,
"eval_vitaminc-pairs_samples_per_second": 74.747,
"eval_vitaminc-pairs_steps_per_second": 2.638,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_sts-label_loss": 3.7686922550201416,
"eval_sts-label_runtime": 0.2807,
"eval_sts-label_samples_per_second": 356.243,
"eval_sts-label_steps_per_second": 14.25,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_qnli-contrastive_loss": 0.12000326067209244,
"eval_qnli-contrastive_runtime": 0.3651,
"eval_qnli-contrastive_samples_per_second": 273.878,
"eval_qnli-contrastive_steps_per_second": 10.955,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_scitail-pairs-qa_loss": 0.055266913026571274,
"eval_scitail-pairs-qa_runtime": 0.8813,
"eval_scitail-pairs-qa_samples_per_second": 113.472,
"eval_scitail-pairs-qa_steps_per_second": 4.539,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_scitail-pairs-pos_loss": 0.46404972672462463,
"eval_scitail-pairs-pos_runtime": 1.3468,
"eval_scitail-pairs-pos_samples_per_second": 74.248,
"eval_scitail-pairs-pos_steps_per_second": 2.97,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_xsum-pairs_loss": 0.22768865525722504,
"eval_xsum-pairs_runtime": 0.9385,
"eval_xsum-pairs_samples_per_second": 106.553,
"eval_xsum-pairs_steps_per_second": 4.262,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_compression-pairs_loss": 0.08245458453893661,
"eval_compression-pairs_runtime": 0.2783,
"eval_compression-pairs_samples_per_second": 359.331,
"eval_compression-pairs_steps_per_second": 14.373,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_sciq_pairs_loss": 0.24696679413318634,
"eval_sciq_pairs_runtime": 4.072,
"eval_sciq_pairs_samples_per_second": 24.558,
"eval_sciq_pairs_steps_per_second": 0.982,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_qasc_pairs_loss": 0.16628116369247437,
"eval_qasc_pairs_runtime": 1.066,
"eval_qasc_pairs_samples_per_second": 93.809,
"eval_qasc_pairs_steps_per_second": 3.752,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_openbookqa_pairs_loss": 1.5343760251998901,
"eval_openbookqa_pairs_runtime": 0.9064,
"eval_openbookqa_pairs_samples_per_second": 110.324,
"eval_openbookqa_pairs_steps_per_second": 4.413,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_msmarco_pairs_loss": 0.48861968517303467,
"eval_msmarco_pairs_runtime": 2.0777,
"eval_msmarco_pairs_samples_per_second": 48.131,
"eval_msmarco_pairs_steps_per_second": 1.925,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_nq_pairs_loss": 0.2192871868610382,
"eval_nq_pairs_runtime": 4.5629,
"eval_nq_pairs_samples_per_second": 21.916,
"eval_nq_pairs_steps_per_second": 0.877,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_trivia_pairs_loss": 0.7455114126205444,
"eval_trivia_pairs_runtime": 6.4434,
"eval_trivia_pairs_samples_per_second": 15.52,
"eval_trivia_pairs_steps_per_second": 0.621,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_quora_pairs_loss": 0.0536942183971405,
"eval_quora_pairs_runtime": 0.6874,
"eval_quora_pairs_samples_per_second": 145.481,
"eval_quora_pairs_steps_per_second": 5.819,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_gooaq_pairs_loss": 0.4775075614452362,
"eval_gooaq_pairs_runtime": 1.3946,
"eval_gooaq_pairs_samples_per_second": 71.707,
"eval_gooaq_pairs_steps_per_second": 2.868,
"step": 6080
},
{
"epoch": 0.9528287102335057,
"eval_mrpc_pairs_loss": 0.041804660111665726,
"eval_mrpc_pairs_runtime": 0.2631,
"eval_mrpc_pairs_samples_per_second": 380.035,
"eval_mrpc_pairs_steps_per_second": 15.201,
"step": 6080
},
{
"epoch": 0.9578435981821031,
"grad_norm": 15.8797607421875,
"learning_rate": 1.1888593786816527e-06,
"loss": 0.3377,
"step": 6112
},
{
"epoch": 0.9628584861307005,
"grad_norm": 54.2625732421875,
"learning_rate": 1.0911789616415957e-06,
"loss": 1.163,
"step": 6144
},
{
"epoch": 0.967873374079298,
"grad_norm": 27.014169692993164,
"learning_rate": 9.975338936903327e-07,
"loss": 1.1638,
"step": 6176
},
{
"epoch": 0.9728882620278954,
"grad_norm": 12.264323234558105,
"learning_rate": 9.079513439558945e-07,
"loss": 0.7428,
"step": 6208
},
{
"epoch": 0.9779031499764927,
"grad_norm": 0.2486962229013443,
"learning_rate": 8.224573029129201e-07,
"loss": 0.3827,
"step": 6240
},
{
"epoch": 0.9829180379250901,
"grad_norm": 0.19951488077640533,
"learning_rate": 7.41076574842064e-07,
"loss": 1.0739,
"step": 6272
},
{
"epoch": 0.9879329258736875,
"grad_norm": 1.6168636083602905,
"learning_rate": 6.638327706335673e-07,
"loss": 0.7049,
"step": 6304
},
{
"epoch": 0.9929478138222849,
"grad_norm": 1.4084432125091553,
"learning_rate": 5.907483009370463e-07,
"loss": 0.9298,
"step": 6336
},
{
"epoch": 0.9979627017708823,
"grad_norm": 0.7779116630554199,
"learning_rate": 5.218443696595343e-07,
"loss": 0.6243,
"step": 6368
},
{
"epoch": 1.0029775897194797,
"grad_norm": 10.389066696166992,
"learning_rate": 4.5714096781360346e-07,
"loss": 0.8693,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_nli-pairs_loss": 0.8764966726303101,
"eval_nli-pairs_runtime": 3.9731,
"eval_nli-pairs_samples_per_second": 25.169,
"eval_nli-pairs_steps_per_second": 1.007,
"eval_sts-test_pearson_cosine": 0.7883389668315285,
"eval_sts-test_pearson_dot": 0.517346671859764,
"eval_sts-test_pearson_euclidean": 0.7353164199200737,
"eval_sts-test_pearson_manhattan": 0.7297049415657237,
"eval_sts-test_pearson_max": 0.7883389668315285,
"eval_sts-test_spearman_cosine": 0.8072800949662179,
"eval_sts-test_spearman_dot": 0.4963365732568842,
"eval_sts-test_spearman_euclidean": 0.7268218204343426,
"eval_sts-test_spearman_manhattan": 0.7238000634035274,
"eval_sts-test_spearman_max": 0.8072800949662179,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_vitaminc-pairs_loss": 4.692606449127197,
"eval_vitaminc-pairs_runtime": 1.1964,
"eval_vitaminc-pairs_samples_per_second": 71.046,
"eval_vitaminc-pairs_steps_per_second": 2.508,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_sts-label_loss": 3.7494537830352783,
"eval_sts-label_runtime": 0.2884,
"eval_sts-label_samples_per_second": 346.773,
"eval_sts-label_steps_per_second": 13.871,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_qnli-contrastive_loss": 0.11221926659345627,
"eval_qnli-contrastive_runtime": 0.366,
"eval_qnli-contrastive_samples_per_second": 273.23,
"eval_qnli-contrastive_steps_per_second": 10.929,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_scitail-pairs-qa_loss": 0.05439920350909233,
"eval_scitail-pairs-qa_runtime": 1.0826,
"eval_scitail-pairs-qa_samples_per_second": 92.37,
"eval_scitail-pairs-qa_steps_per_second": 3.695,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_scitail-pairs-pos_loss": 0.47426754236221313,
"eval_scitail-pairs-pos_runtime": 1.4478,
"eval_scitail-pairs-pos_samples_per_second": 69.07,
"eval_scitail-pairs-pos_steps_per_second": 2.763,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_xsum-pairs_loss": 0.22696803510189056,
"eval_xsum-pairs_runtime": 0.9498,
"eval_xsum-pairs_samples_per_second": 105.287,
"eval_xsum-pairs_steps_per_second": 4.211,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_compression-pairs_loss": 0.08134880661964417,
"eval_compression-pairs_runtime": 0.2978,
"eval_compression-pairs_samples_per_second": 335.83,
"eval_compression-pairs_steps_per_second": 13.433,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_sciq_pairs_loss": 0.22929410636425018,
"eval_sciq_pairs_runtime": 4.3229,
"eval_sciq_pairs_samples_per_second": 23.132,
"eval_sciq_pairs_steps_per_second": 0.925,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_qasc_pairs_loss": 0.16514292359352112,
"eval_qasc_pairs_runtime": 1.1535,
"eval_qasc_pairs_samples_per_second": 86.694,
"eval_qasc_pairs_steps_per_second": 3.468,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_openbookqa_pairs_loss": 1.5505836009979248,
"eval_openbookqa_pairs_runtime": 0.9784,
"eval_openbookqa_pairs_samples_per_second": 102.21,
"eval_openbookqa_pairs_steps_per_second": 4.088,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_msmarco_pairs_loss": 0.48988625407218933,
"eval_msmarco_pairs_runtime": 2.1515,
"eval_msmarco_pairs_samples_per_second": 46.48,
"eval_msmarco_pairs_steps_per_second": 1.859,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_nq_pairs_loss": 0.21817754209041595,
"eval_nq_pairs_runtime": 4.6579,
"eval_nq_pairs_samples_per_second": 21.469,
"eval_nq_pairs_steps_per_second": 0.859,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_trivia_pairs_loss": 0.7522485852241516,
"eval_trivia_pairs_runtime": 6.6903,
"eval_trivia_pairs_samples_per_second": 14.947,
"eval_trivia_pairs_steps_per_second": 0.598,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_quora_pairs_loss": 0.026629021391272545,
"eval_quora_pairs_runtime": 0.7757,
"eval_quora_pairs_samples_per_second": 128.912,
"eval_quora_pairs_steps_per_second": 5.156,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_gooaq_pairs_loss": 0.47202804684638977,
"eval_gooaq_pairs_runtime": 1.5337,
"eval_gooaq_pairs_samples_per_second": 65.202,
"eval_gooaq_pairs_steps_per_second": 2.608,
"step": 6400
},
{
"epoch": 1.0029775897194797,
"eval_mrpc_pairs_loss": 0.041211605072021484,
"eval_mrpc_pairs_runtime": 0.2796,
"eval_mrpc_pairs_samples_per_second": 357.626,
"eval_mrpc_pairs_steps_per_second": 14.305,
"step": 6400
},
{
"epoch": 1.007992477668077,
"grad_norm": 0.12192127108573914,
"learning_rate": 3.9665686771741374e-07,
"loss": 0.731,
"step": 6432
},
{
"epoch": 1.0130073656166745,
"grad_norm": 4.465780258178711,
"learning_rate": 3.404096175483029e-07,
"loss": 0.7662,
"step": 6464
},
{
"epoch": 1.0180222535652719,
"grad_norm": 19.539562225341797,
"learning_rate": 2.8841553625157116e-07,
"loss": 0.5362,
"step": 6496
},
{
"epoch": 1.0230371415138693,
"grad_norm": 1.8675719499588013,
"learning_rate": 2.406897088058863e-07,
"loss": 0.9786,
"step": 6528
},
{
"epoch": 1.0280520294624667,
"grad_norm": 1.5663179159164429,
"learning_rate": 1.9724598184667987e-07,
"loss": 0.9213,
"step": 6560
},
{
"epoch": 1.033066917411064,
"grad_norm": 1.0503817796707153,
"learning_rate": 1.580969596488624e-07,
"loss": 0.7601,
"step": 6592
},
{
"epoch": 1.0380818053596614,
"grad_norm": 1.7467032670974731,
"learning_rate": 1.2325400046994672e-07,
"loss": 0.4821,
"step": 6624
},
{
"epoch": 1.0430966933082588,
"grad_norm": 0.5685003399848938,
"learning_rate": 9.272721325469414e-08,
"loss": 0.73,
"step": 6656
},
{
"epoch": 1.0481115812568562,
"grad_norm": 0.16832184791564941,
"learning_rate": 6.652545470221705e-08,
"loss": 0.4139,
"step": 6688
},
{
"epoch": 1.0531264692054536,
"grad_norm": 17.248783111572266,
"learning_rate": 4.465632669640285e-08,
"loss": 0.5152,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_nli-pairs_loss": 0.8771082162857056,
"eval_nli-pairs_runtime": 3.6864,
"eval_nli-pairs_samples_per_second": 27.127,
"eval_nli-pairs_steps_per_second": 1.085,
"eval_sts-test_pearson_cosine": 0.7895199953969396,
"eval_sts-test_pearson_dot": 0.5189310649741209,
"eval_sts-test_pearson_euclidean": 0.7358975444358454,
"eval_sts-test_pearson_manhattan": 0.7303294470043906,
"eval_sts-test_pearson_max": 0.7895199953969396,
"eval_sts-test_spearman_cosine": 0.8080710925195471,
"eval_sts-test_spearman_dot": 0.49813617315229736,
"eval_sts-test_spearman_euclidean": 0.727349183443088,
"eval_sts-test_spearman_manhattan": 0.7243520585394965,
"eval_sts-test_spearman_max": 0.8080710925195471,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_vitaminc-pairs_loss": 4.680215358734131,
"eval_vitaminc-pairs_runtime": 1.1767,
"eval_vitaminc-pairs_samples_per_second": 72.234,
"eval_vitaminc-pairs_steps_per_second": 2.549,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_sts-label_loss": 3.747551202774048,
"eval_sts-label_runtime": 0.2756,
"eval_sts-label_samples_per_second": 362.89,
"eval_sts-label_steps_per_second": 14.516,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_qnli-contrastive_loss": 0.11317223310470581,
"eval_qnli-contrastive_runtime": 0.362,
"eval_qnli-contrastive_samples_per_second": 276.263,
"eval_qnli-contrastive_steps_per_second": 11.051,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_scitail-pairs-qa_loss": 0.05494887754321098,
"eval_scitail-pairs-qa_runtime": 0.8771,
"eval_scitail-pairs-qa_samples_per_second": 114.01,
"eval_scitail-pairs-qa_steps_per_second": 4.56,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_scitail-pairs-pos_loss": 0.46942538022994995,
"eval_scitail-pairs-pos_runtime": 1.3418,
"eval_scitail-pairs-pos_samples_per_second": 74.527,
"eval_scitail-pairs-pos_steps_per_second": 2.981,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_xsum-pairs_loss": 0.22760838270187378,
"eval_xsum-pairs_runtime": 0.9366,
"eval_xsum-pairs_samples_per_second": 106.764,
"eval_xsum-pairs_steps_per_second": 4.271,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_compression-pairs_loss": 0.081705242395401,
"eval_compression-pairs_runtime": 0.2786,
"eval_compression-pairs_samples_per_second": 358.908,
"eval_compression-pairs_steps_per_second": 14.356,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_sciq_pairs_loss": 0.22932235896587372,
"eval_sciq_pairs_runtime": 4.0839,
"eval_sciq_pairs_samples_per_second": 24.486,
"eval_sciq_pairs_steps_per_second": 0.979,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_qasc_pairs_loss": 0.1658654361963272,
"eval_qasc_pairs_runtime": 1.0521,
"eval_qasc_pairs_samples_per_second": 95.048,
"eval_qasc_pairs_steps_per_second": 3.802,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_openbookqa_pairs_loss": 1.5459561347961426,
"eval_openbookqa_pairs_runtime": 0.8996,
"eval_openbookqa_pairs_samples_per_second": 111.162,
"eval_openbookqa_pairs_steps_per_second": 4.446,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_msmarco_pairs_loss": 0.49013325572013855,
"eval_msmarco_pairs_runtime": 2.0531,
"eval_msmarco_pairs_samples_per_second": 48.707,
"eval_msmarco_pairs_steps_per_second": 1.948,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_nq_pairs_loss": 0.22049441933631897,
"eval_nq_pairs_runtime": 4.5149,
"eval_nq_pairs_samples_per_second": 22.149,
"eval_nq_pairs_steps_per_second": 0.886,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_trivia_pairs_loss": 0.7513056397438049,
"eval_trivia_pairs_runtime": 6.4705,
"eval_trivia_pairs_samples_per_second": 15.455,
"eval_trivia_pairs_steps_per_second": 0.618,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_quora_pairs_loss": 0.024981992319226265,
"eval_quora_pairs_runtime": 0.6855,
"eval_quora_pairs_samples_per_second": 145.879,
"eval_quora_pairs_steps_per_second": 5.835,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_gooaq_pairs_loss": 0.47234511375427246,
"eval_gooaq_pairs_runtime": 1.4025,
"eval_gooaq_pairs_samples_per_second": 71.304,
"eval_gooaq_pairs_steps_per_second": 2.852,
"step": 6720
},
{
"epoch": 1.0531264692054536,
"eval_mrpc_pairs_loss": 0.04154253005981445,
"eval_mrpc_pairs_runtime": 0.2618,
"eval_mrpc_pairs_samples_per_second": 382.036,
"eval_mrpc_pairs_steps_per_second": 15.281,
"step": 6720
},
{
"epoch": 1.058141357154051,
"grad_norm": 14.043108940124512,
"learning_rate": 2.7126174100376432e-08,
"loss": 0.4684,
"step": 6752
},
{
"epoch": 1.0631562451026484,
"grad_norm": 0.5513893365859985,
"learning_rate": 1.3940082915687713e-08,
"loss": 0.445,
"step": 6784
},
{
"epoch": 1.068171133051246,
"grad_norm": 7.036909580230713,
"learning_rate": 5.101878806703652e-09,
"loss": 0.4288,
"step": 6816
},
{
"epoch": 1.0731860209998434,
"grad_norm": 0.2966393828392029,
"learning_rate": 6.141259906761176e-10,
"loss": 0.3797,
"step": 6848
},
{
"epoch": 1.0782009089484408,
"grad_norm": 9.721883773803711,
"learning_rate": 2.9999521873506204e-05,
"loss": 0.4304,
"step": 6880
},
{
"epoch": 1.0832157968970382,
"grad_norm": 2.1523923873901367,
"learning_rate": 2.9995306080226573e-05,
"loss": 0.8562,
"step": 6912
},
{
"epoch": 1.0882306848456356,
"grad_norm": 12.939388275146484,
"learning_rate": 2.9986739717293326e-05,
"loss": 0.4902,
"step": 6944
},
{
"epoch": 1.093245572794233,
"grad_norm": 0.37949275970458984,
"learning_rate": 2.9973825270054784e-05,
"loss": 0.4285,
"step": 6976
},
{
"epoch": 1.0982604607428303,
"grad_norm": 2.427003860473633,
"learning_rate": 2.995656648536359e-05,
"loss": 0.4782,
"step": 7008
},
{
"epoch": 1.1032753486914277,
"grad_norm": 10.36500072479248,
"learning_rate": 2.9934968370489646e-05,
"loss": 0.7503,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_nli-pairs_loss": 0.9263110160827637,
"eval_nli-pairs_runtime": 3.6445,
"eval_nli-pairs_samples_per_second": 27.439,
"eval_nli-pairs_steps_per_second": 1.098,
"eval_sts-test_pearson_cosine": 0.7937369016852821,
"eval_sts-test_pearson_dot": 0.5273705048333348,
"eval_sts-test_pearson_euclidean": 0.7373368406202081,
"eval_sts-test_pearson_manhattan": 0.7318756816157863,
"eval_sts-test_pearson_max": 0.7937369016852821,
"eval_sts-test_spearman_cosine": 0.810858247608813,
"eval_sts-test_spearman_dot": 0.508640420451459,
"eval_sts-test_spearman_euclidean": 0.73158962258494,
"eval_sts-test_spearman_manhattan": 0.7284434977078286,
"eval_sts-test_spearman_max": 0.810858247608813,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_vitaminc-pairs_loss": 4.521730422973633,
"eval_vitaminc-pairs_runtime": 1.1248,
"eval_vitaminc-pairs_samples_per_second": 75.569,
"eval_vitaminc-pairs_steps_per_second": 2.667,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_sts-label_loss": 3.8786072731018066,
"eval_sts-label_runtime": 0.2698,
"eval_sts-label_samples_per_second": 370.602,
"eval_sts-label_steps_per_second": 14.824,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_qnli-contrastive_loss": 0.1796300858259201,
"eval_qnli-contrastive_runtime": 0.3573,
"eval_qnli-contrastive_samples_per_second": 279.916,
"eval_qnli-contrastive_steps_per_second": 11.197,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_scitail-pairs-qa_loss": 0.06360480934381485,
"eval_scitail-pairs-qa_runtime": 0.8855,
"eval_scitail-pairs-qa_samples_per_second": 112.93,
"eval_scitail-pairs-qa_steps_per_second": 4.517,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_scitail-pairs-pos_loss": 0.5473235249519348,
"eval_scitail-pairs-pos_runtime": 1.3255,
"eval_scitail-pairs-pos_samples_per_second": 75.446,
"eval_scitail-pairs-pos_steps_per_second": 3.018,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_xsum-pairs_loss": 0.24051249027252197,
"eval_xsum-pairs_runtime": 0.9384,
"eval_xsum-pairs_samples_per_second": 106.567,
"eval_xsum-pairs_steps_per_second": 4.263,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_compression-pairs_loss": 0.0928964912891388,
"eval_compression-pairs_runtime": 0.2778,
"eval_compression-pairs_samples_per_second": 359.983,
"eval_compression-pairs_steps_per_second": 14.399,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_sciq_pairs_loss": 0.28897982835769653,
"eval_sciq_pairs_runtime": 4.1339,
"eval_sciq_pairs_samples_per_second": 24.19,
"eval_sciq_pairs_steps_per_second": 0.968,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_qasc_pairs_loss": 0.1793307065963745,
"eval_qasc_pairs_runtime": 1.0598,
"eval_qasc_pairs_samples_per_second": 94.357,
"eval_qasc_pairs_steps_per_second": 3.774,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_openbookqa_pairs_loss": 1.7123816013336182,
"eval_openbookqa_pairs_runtime": 0.8946,
"eval_openbookqa_pairs_samples_per_second": 111.784,
"eval_openbookqa_pairs_steps_per_second": 4.471,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_msmarco_pairs_loss": 0.4797554016113281,
"eval_msmarco_pairs_runtime": 2.0659,
"eval_msmarco_pairs_samples_per_second": 48.405,
"eval_msmarco_pairs_steps_per_second": 1.936,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_nq_pairs_loss": 0.2459176480770111,
"eval_nq_pairs_runtime": 4.5081,
"eval_nq_pairs_samples_per_second": 22.182,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_trivia_pairs_loss": 0.9698570966720581,
"eval_trivia_pairs_runtime": 6.4733,
"eval_trivia_pairs_samples_per_second": 15.448,
"eval_trivia_pairs_steps_per_second": 0.618,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_quora_pairs_loss": 0.03161533921957016,
"eval_quora_pairs_runtime": 0.6866,
"eval_quora_pairs_samples_per_second": 145.647,
"eval_quora_pairs_steps_per_second": 5.826,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_gooaq_pairs_loss": 0.5500377416610718,
"eval_gooaq_pairs_runtime": 1.4051,
"eval_gooaq_pairs_samples_per_second": 71.17,
"eval_gooaq_pairs_steps_per_second": 2.847,
"step": 7040
},
{
"epoch": 1.1032753486914277,
"eval_mrpc_pairs_loss": 0.04846707731485367,
"eval_mrpc_pairs_runtime": 0.2627,
"eval_mrpc_pairs_samples_per_second": 380.671,
"eval_mrpc_pairs_steps_per_second": 15.227,
"step": 7040
},
{
"epoch": 1.1082902366400251,
"grad_norm": 10.539325714111328,
"learning_rate": 2.9909037191667383e-05,
"loss": 1.0828,
"step": 7072
},
{
"epoch": 1.1133051245886225,
"grad_norm": 14.641651153564453,
"learning_rate": 2.987878047227772e-05,
"loss": 0.6206,
"step": 7104
},
{
"epoch": 1.11832001253722,
"grad_norm": 12.57785415649414,
"learning_rate": 2.9844206990665325e-05,
"loss": 0.8111,
"step": 7136
},
{
"epoch": 1.1233349004858173,
"grad_norm": 6.1240129470825195,
"learning_rate": 2.980532677759177e-05,
"loss": 0.49,
"step": 7168
},
{
"epoch": 1.1283497884344147,
"grad_norm": 8.179468154907227,
"learning_rate": 2.97621511133253e-05,
"loss": 0.5289,
"step": 7200
},
{
"epoch": 1.133364676383012,
"grad_norm": 13.069085121154785,
"learning_rate": 2.971469252436813e-05,
"loss": 0.2983,
"step": 7232
},
{
"epoch": 1.1383795643316095,
"grad_norm": 11.689116477966309,
"learning_rate": 2.9662964779822125e-05,
"loss": 0.5183,
"step": 7264
},
{
"epoch": 1.1433944522802069,
"grad_norm": 6.402202606201172,
"learning_rate": 2.9606982887393993e-05,
"loss": 0.3254,
"step": 7296
},
{
"epoch": 1.1484093402288043,
"grad_norm": 17.79107093811035,
"learning_rate": 2.9546763089041115e-05,
"loss": 0.5142,
"step": 7328
},
{
"epoch": 1.1534242281774016,
"grad_norm": 3.3558926582336426,
"learning_rate": 2.9482322856259305e-05,
"loss": 0.5605,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_nli-pairs_loss": 1.034800410270691,
"eval_nli-pairs_runtime": 3.6881,
"eval_nli-pairs_samples_per_second": 27.114,
"eval_nli-pairs_steps_per_second": 1.085,
"eval_sts-test_pearson_cosine": 0.7910968553972442,
"eval_sts-test_pearson_dot": 0.5191989002837457,
"eval_sts-test_pearson_euclidean": 0.7346238729069505,
"eval_sts-test_pearson_manhattan": 0.7286075410186882,
"eval_sts-test_pearson_max": 0.7910968553972442,
"eval_sts-test_spearman_cosine": 0.8066961580110351,
"eval_sts-test_spearman_dot": 0.5084443140830514,
"eval_sts-test_spearman_euclidean": 0.72712818838666,
"eval_sts-test_spearman_manhattan": 0.7230020447891047,
"eval_sts-test_spearman_max": 0.8066961580110351,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_vitaminc-pairs_loss": 4.861147403717041,
"eval_vitaminc-pairs_runtime": 1.2006,
"eval_vitaminc-pairs_samples_per_second": 70.796,
"eval_vitaminc-pairs_steps_per_second": 2.499,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_sts-label_loss": 3.832930326461792,
"eval_sts-label_runtime": 0.2878,
"eval_sts-label_samples_per_second": 347.487,
"eval_sts-label_steps_per_second": 13.899,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_qnli-contrastive_loss": 0.20628628134727478,
"eval_qnli-contrastive_runtime": 0.3622,
"eval_qnli-contrastive_samples_per_second": 276.06,
"eval_qnli-contrastive_steps_per_second": 11.042,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_scitail-pairs-qa_loss": 0.06246212124824524,
"eval_scitail-pairs-qa_runtime": 0.9341,
"eval_scitail-pairs-qa_samples_per_second": 107.06,
"eval_scitail-pairs-qa_steps_per_second": 4.282,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_scitail-pairs-pos_loss": 0.4741693437099457,
"eval_scitail-pairs-pos_runtime": 1.6197,
"eval_scitail-pairs-pos_samples_per_second": 61.738,
"eval_scitail-pairs-pos_steps_per_second": 2.47,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_xsum-pairs_loss": 0.23739749193191528,
"eval_xsum-pairs_runtime": 0.9463,
"eval_xsum-pairs_samples_per_second": 105.68,
"eval_xsum-pairs_steps_per_second": 4.227,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_compression-pairs_loss": 0.09255027025938034,
"eval_compression-pairs_runtime": 0.2828,
"eval_compression-pairs_samples_per_second": 353.649,
"eval_compression-pairs_steps_per_second": 14.146,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_sciq_pairs_loss": 0.2770608365535736,
"eval_sciq_pairs_runtime": 4.1267,
"eval_sciq_pairs_samples_per_second": 24.232,
"eval_sciq_pairs_steps_per_second": 0.969,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_qasc_pairs_loss": 0.18835808336734772,
"eval_qasc_pairs_runtime": 1.0608,
"eval_qasc_pairs_samples_per_second": 94.272,
"eval_qasc_pairs_steps_per_second": 3.771,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_openbookqa_pairs_loss": 1.8109256029129028,
"eval_openbookqa_pairs_runtime": 0.9025,
"eval_openbookqa_pairs_samples_per_second": 110.805,
"eval_openbookqa_pairs_steps_per_second": 4.432,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_msmarco_pairs_loss": 0.5193920731544495,
"eval_msmarco_pairs_runtime": 2.1117,
"eval_msmarco_pairs_samples_per_second": 47.354,
"eval_msmarco_pairs_steps_per_second": 1.894,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_nq_pairs_loss": 0.2714031934738159,
"eval_nq_pairs_runtime": 4.5373,
"eval_nq_pairs_samples_per_second": 22.04,
"eval_nq_pairs_steps_per_second": 0.882,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_trivia_pairs_loss": 0.939833402633667,
"eval_trivia_pairs_runtime": 6.4956,
"eval_trivia_pairs_samples_per_second": 15.395,
"eval_trivia_pairs_steps_per_second": 0.616,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_quora_pairs_loss": 0.058685559779405594,
"eval_quora_pairs_runtime": 0.6769,
"eval_quora_pairs_samples_per_second": 147.738,
"eval_quora_pairs_steps_per_second": 5.91,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_gooaq_pairs_loss": 0.6361711621284485,
"eval_gooaq_pairs_runtime": 1.435,
"eval_gooaq_pairs_samples_per_second": 69.685,
"eval_gooaq_pairs_steps_per_second": 2.787,
"step": 7360
},
{
"epoch": 1.1534242281774016,
"eval_mrpc_pairs_loss": 0.047355230897665024,
"eval_mrpc_pairs_runtime": 0.2779,
"eval_mrpc_pairs_samples_per_second": 359.791,
"eval_mrpc_pairs_steps_per_second": 14.392,
"step": 7360
},
{
"epoch": 1.158439116125999,
"grad_norm": 0.946281909942627,
"learning_rate": 2.9413680885013797e-05,
"loss": 0.6993,
"step": 7392
},
{
"epoch": 1.1634540040745964,
"grad_norm": 7.1736626625061035,
"learning_rate": 2.9340857090315025e-05,
"loss": 0.3437,
"step": 7424
},
{
"epoch": 1.1684688920231938,
"grad_norm": 0.19313736259937286,
"learning_rate": 2.9263872600440707e-05,
"loss": 0.3281,
"step": 7456
},
{
"epoch": 1.1734837799717912,
"grad_norm": 12.984513282775879,
"learning_rate": 2.9182749750805903e-05,
"loss": 1.0286,
"step": 7488
},
{
"epoch": 1.1784986679203886,
"grad_norm": 0.5984382033348083,
"learning_rate": 2.9097512077482918e-05,
"loss": 0.6668,
"step": 7520
},
{
"epoch": 1.183513555868986,
"grad_norm": 4.237669944763184,
"learning_rate": 2.9008184310372744e-05,
"loss": 0.3861,
"step": 7552
},
{
"epoch": 1.1885284438175834,
"grad_norm": 0.4000037610530853,
"learning_rate": 2.891479236603025e-05,
"loss": 0.4096,
"step": 7584
},
{
"epoch": 1.1935433317661808,
"grad_norm": 13.399718284606934,
"learning_rate": 2.8817363340145038e-05,
"loss": 0.5836,
"step": 7616
},
{
"epoch": 1.1985582197147782,
"grad_norm": 1.461013913154602,
"learning_rate": 2.8715925499680188e-05,
"loss": 0.2649,
"step": 7648
},
{
"epoch": 1.2035731076633756,
"grad_norm": 6.206007957458496,
"learning_rate": 2.8610508274671218e-05,
"loss": 0.5884,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_nli-pairs_loss": 1.0354279279708862,
"eval_nli-pairs_runtime": 3.7382,
"eval_nli-pairs_samples_per_second": 26.751,
"eval_nli-pairs_steps_per_second": 1.07,
"eval_sts-test_pearson_cosine": 0.7841729020272651,
"eval_sts-test_pearson_dot": 0.5058693889598734,
"eval_sts-test_pearson_euclidean": 0.7294148871338325,
"eval_sts-test_pearson_manhattan": 0.7246093271358469,
"eval_sts-test_pearson_max": 0.7841729020272651,
"eval_sts-test_spearman_cosine": 0.8000443657886165,
"eval_sts-test_spearman_dot": 0.49286718177568123,
"eval_sts-test_spearman_euclidean": 0.7196647955405734,
"eval_sts-test_spearman_manhattan": 0.7181182061459461,
"eval_sts-test_spearman_max": 0.8000443657886165,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_vitaminc-pairs_loss": 4.786523342132568,
"eval_vitaminc-pairs_runtime": 1.1677,
"eval_vitaminc-pairs_samples_per_second": 72.794,
"eval_vitaminc-pairs_steps_per_second": 2.569,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_sts-label_loss": 3.80216383934021,
"eval_sts-label_runtime": 0.2869,
"eval_sts-label_samples_per_second": 348.532,
"eval_sts-label_steps_per_second": 13.941,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_qnli-contrastive_loss": 0.15080063045024872,
"eval_qnli-contrastive_runtime": 0.3765,
"eval_qnli-contrastive_samples_per_second": 265.584,
"eval_qnli-contrastive_steps_per_second": 10.623,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_scitail-pairs-qa_loss": 0.05477406457066536,
"eval_scitail-pairs-qa_runtime": 0.9695,
"eval_scitail-pairs-qa_samples_per_second": 103.142,
"eval_scitail-pairs-qa_steps_per_second": 4.126,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_scitail-pairs-pos_loss": 0.49995747208595276,
"eval_scitail-pairs-pos_runtime": 1.4259,
"eval_scitail-pairs-pos_samples_per_second": 70.132,
"eval_scitail-pairs-pos_steps_per_second": 2.805,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_xsum-pairs_loss": 0.24929432570934296,
"eval_xsum-pairs_runtime": 0.9657,
"eval_xsum-pairs_samples_per_second": 103.554,
"eval_xsum-pairs_steps_per_second": 4.142,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_compression-pairs_loss": 0.08594885468482971,
"eval_compression-pairs_runtime": 0.2846,
"eval_compression-pairs_samples_per_second": 351.315,
"eval_compression-pairs_steps_per_second": 14.053,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_sciq_pairs_loss": 0.28326743841171265,
"eval_sciq_pairs_runtime": 4.1832,
"eval_sciq_pairs_samples_per_second": 23.905,
"eval_sciq_pairs_steps_per_second": 0.956,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_qasc_pairs_loss": 0.1851280927658081,
"eval_qasc_pairs_runtime": 1.1629,
"eval_qasc_pairs_samples_per_second": 85.993,
"eval_qasc_pairs_steps_per_second": 3.44,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_openbookqa_pairs_loss": 1.686630368232727,
"eval_openbookqa_pairs_runtime": 0.9518,
"eval_openbookqa_pairs_samples_per_second": 105.066,
"eval_openbookqa_pairs_steps_per_second": 4.203,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_msmarco_pairs_loss": 0.5506166219711304,
"eval_msmarco_pairs_runtime": 2.1738,
"eval_msmarco_pairs_samples_per_second": 46.002,
"eval_msmarco_pairs_steps_per_second": 1.84,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_nq_pairs_loss": 0.24249011278152466,
"eval_nq_pairs_runtime": 4.6491,
"eval_nq_pairs_samples_per_second": 21.51,
"eval_nq_pairs_steps_per_second": 0.86,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_trivia_pairs_loss": 0.9296412467956543,
"eval_trivia_pairs_runtime": 6.6163,
"eval_trivia_pairs_samples_per_second": 15.114,
"eval_trivia_pairs_steps_per_second": 0.605,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_quora_pairs_loss": 0.0314582884311676,
"eval_quora_pairs_runtime": 0.7294,
"eval_quora_pairs_samples_per_second": 137.107,
"eval_quora_pairs_steps_per_second": 5.484,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_gooaq_pairs_loss": 0.5459653735160828,
"eval_gooaq_pairs_runtime": 1.4937,
"eval_gooaq_pairs_samples_per_second": 66.948,
"eval_gooaq_pairs_steps_per_second": 2.678,
"step": 7680
},
{
"epoch": 1.2035731076633756,
"eval_mrpc_pairs_loss": 0.04276818782091141,
"eval_mrpc_pairs_runtime": 0.2699,
"eval_mrpc_pairs_samples_per_second": 370.467,
"eval_mrpc_pairs_steps_per_second": 14.819,
"step": 7680
},
{
"epoch": 1.208587995611973,
"grad_norm": 2.091714859008789,
"learning_rate": 2.8501142249687554e-05,
"loss": 0.7018,
"step": 7712
},
{
"epoch": 1.2136028835605703,
"grad_norm": 0.21109235286712646,
"learning_rate": 2.838785915495912e-05,
"loss": 0.7082,
"step": 7744
},
{
"epoch": 1.2186177715091677,
"grad_norm": 0.1267768293619156,
"learning_rate": 2.827069185717042e-05,
"loss": 0.7527,
"step": 7776
},
{
"epoch": 1.2236326594577653,
"grad_norm": 1.6667953729629517,
"learning_rate": 2.8149674349925023e-05,
"loss": 0.4255,
"step": 7808
},
{
"epoch": 1.2286475474063627,
"grad_norm": 12.699274063110352,
"learning_rate": 2.8024841743882998e-05,
"loss": 0.7488,
"step": 7840
},
{
"epoch": 1.2336624353549601,
"grad_norm": 8.052750587463379,
"learning_rate": 2.7896230256574348e-05,
"loss": 0.3364,
"step": 7872
},
{
"epoch": 1.2386773233035575,
"grad_norm": 7.821995258331299,
"learning_rate": 2.7763877201891205e-05,
"loss": 0.6963,
"step": 7904
},
{
"epoch": 1.243692211252155,
"grad_norm": 5.756433486938477,
"learning_rate": 2.762782097926205e-05,
"loss": 0.2829,
"step": 7936
},
{
"epoch": 1.2487070992007523,
"grad_norm": 18.80353355407715,
"learning_rate": 2.7488101062510904e-05,
"loss": 0.7504,
"step": 7968
},
{
"epoch": 1.2537219871493497,
"grad_norm": 3.668611526489258,
"learning_rate": 2.734475798840485e-05,
"loss": 0.7759,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_nli-pairs_loss": 0.9781379699707031,
"eval_nli-pairs_runtime": 3.6268,
"eval_nli-pairs_samples_per_second": 27.573,
"eval_nli-pairs_steps_per_second": 1.103,
"eval_sts-test_pearson_cosine": 0.7896747038559737,
"eval_sts-test_pearson_dot": 0.5160875833412549,
"eval_sts-test_pearson_euclidean": 0.7398944244671477,
"eval_sts-test_pearson_manhattan": 0.7345204191784053,
"eval_sts-test_pearson_max": 0.7896747038559737,
"eval_sts-test_spearman_cosine": 0.81067276102482,
"eval_sts-test_spearman_dot": 0.5010127030277397,
"eval_sts-test_spearman_euclidean": 0.7318872170742919,
"eval_sts-test_spearman_manhattan": 0.7283578865769135,
"eval_sts-test_spearman_max": 0.81067276102482,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_vitaminc-pairs_loss": 4.802966594696045,
"eval_vitaminc-pairs_runtime": 1.1396,
"eval_vitaminc-pairs_samples_per_second": 74.59,
"eval_vitaminc-pairs_steps_per_second": 2.633,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_sts-label_loss": 4.1984076499938965,
"eval_sts-label_runtime": 0.2755,
"eval_sts-label_samples_per_second": 362.988,
"eval_sts-label_steps_per_second": 14.52,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_qnli-contrastive_loss": 0.23863555490970612,
"eval_qnli-contrastive_runtime": 0.3602,
"eval_qnli-contrastive_samples_per_second": 277.617,
"eval_qnli-contrastive_steps_per_second": 11.105,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_scitail-pairs-qa_loss": 0.06188047304749489,
"eval_scitail-pairs-qa_runtime": 0.8935,
"eval_scitail-pairs-qa_samples_per_second": 111.921,
"eval_scitail-pairs-qa_steps_per_second": 4.477,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_scitail-pairs-pos_loss": 0.44846847653388977,
"eval_scitail-pairs-pos_runtime": 1.3467,
"eval_scitail-pairs-pos_samples_per_second": 74.254,
"eval_scitail-pairs-pos_steps_per_second": 2.97,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_xsum-pairs_loss": 0.2367183268070221,
"eval_xsum-pairs_runtime": 0.9443,
"eval_xsum-pairs_samples_per_second": 105.898,
"eval_xsum-pairs_steps_per_second": 4.236,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_compression-pairs_loss": 0.08503348380327225,
"eval_compression-pairs_runtime": 0.2921,
"eval_compression-pairs_samples_per_second": 342.302,
"eval_compression-pairs_steps_per_second": 13.692,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_sciq_pairs_loss": 0.270333856344223,
"eval_sciq_pairs_runtime": 4.0839,
"eval_sciq_pairs_samples_per_second": 24.486,
"eval_sciq_pairs_steps_per_second": 0.979,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_qasc_pairs_loss": 0.18802641332149506,
"eval_qasc_pairs_runtime": 1.0724,
"eval_qasc_pairs_samples_per_second": 93.25,
"eval_qasc_pairs_steps_per_second": 3.73,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_openbookqa_pairs_loss": 1.7418819665908813,
"eval_openbookqa_pairs_runtime": 0.8925,
"eval_openbookqa_pairs_samples_per_second": 112.041,
"eval_openbookqa_pairs_steps_per_second": 4.482,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_msmarco_pairs_loss": 0.4858554005622864,
"eval_msmarco_pairs_runtime": 2.0565,
"eval_msmarco_pairs_samples_per_second": 48.627,
"eval_msmarco_pairs_steps_per_second": 1.945,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_nq_pairs_loss": 0.23637117445468903,
"eval_nq_pairs_runtime": 4.5088,
"eval_nq_pairs_samples_per_second": 22.179,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_trivia_pairs_loss": 0.7162200808525085,
"eval_trivia_pairs_runtime": 6.4981,
"eval_trivia_pairs_samples_per_second": 15.389,
"eval_trivia_pairs_steps_per_second": 0.616,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_quora_pairs_loss": 0.06219913437962532,
"eval_quora_pairs_runtime": 0.6795,
"eval_quora_pairs_samples_per_second": 147.17,
"eval_quora_pairs_steps_per_second": 5.887,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_gooaq_pairs_loss": 0.609254777431488,
"eval_gooaq_pairs_runtime": 1.4106,
"eval_gooaq_pairs_samples_per_second": 70.891,
"eval_gooaq_pairs_steps_per_second": 2.836,
"step": 8000
},
{
"epoch": 1.2537219871493497,
"eval_mrpc_pairs_loss": 0.04724707454442978,
"eval_mrpc_pairs_runtime": 0.2638,
"eval_mrpc_pairs_samples_per_second": 379.073,
"eval_mrpc_pairs_steps_per_second": 15.163,
"step": 8000
},
{
"epoch": 1.258736875097947,
"grad_norm": 0.6591500043869019,
"learning_rate": 2.7197833344893126e-05,
"loss": 0.5297,
"step": 8032
},
{
"epoch": 1.2637517630465445,
"grad_norm": 4.714929103851318,
"learning_rate": 2.7047369759041298e-05,
"loss": 0.4933,
"step": 8064
},
{
"epoch": 1.2687666509951419,
"grad_norm": 0.15916971862316132,
"learning_rate": 2.6893410884663914e-05,
"loss": 0.3868,
"step": 8096
},
{
"epoch": 1.2737815389437392,
"grad_norm": 0.6014376282691956,
"learning_rate": 2.6736001389659254e-05,
"loss": 0.9955,
"step": 8128
},
{
"epoch": 1.2787964268923366,
"grad_norm": 2.986762762069702,
"learning_rate": 2.6575186943049913e-05,
"loss": 0.5548,
"step": 8160
},
{
"epoch": 1.283811314840934,
"grad_norm": 0.3188874125480652,
"learning_rate": 2.6411014201732884e-05,
"loss": 0.4924,
"step": 8192
},
{
"epoch": 1.2888262027895314,
"grad_norm": 0.7150152921676636,
"learning_rate": 2.624353079694308e-05,
"loss": 0.3422,
"step": 8224
},
{
"epoch": 1.2938410907381288,
"grad_norm": 0.8286885619163513,
"learning_rate": 2.6072785320434107e-05,
"loss": 0.4707,
"step": 8256
},
{
"epoch": 1.2988559786867262,
"grad_norm": 27.87748146057129,
"learning_rate": 2.5898827310380408e-05,
"loss": 0.3956,
"step": 8288
},
{
"epoch": 1.3038708666353236,
"grad_norm": 0.3072638213634491,
"learning_rate": 2.5721707237004854e-05,
"loss": 0.547,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_nli-pairs_loss": 0.9301618933677673,
"eval_nli-pairs_runtime": 3.7138,
"eval_nli-pairs_samples_per_second": 26.926,
"eval_nli-pairs_steps_per_second": 1.077,
"eval_sts-test_pearson_cosine": 0.7849967022727309,
"eval_sts-test_pearson_dot": 0.4795538577643521,
"eval_sts-test_pearson_euclidean": 0.7253853385122256,
"eval_sts-test_pearson_manhattan": 0.7194021088193217,
"eval_sts-test_pearson_max": 0.7849967022727309,
"eval_sts-test_spearman_cosine": 0.8020224630491872,
"eval_sts-test_spearman_dot": 0.46441948467132393,
"eval_sts-test_spearman_euclidean": 0.7190775648500753,
"eval_sts-test_spearman_manhattan": 0.7154699878910861,
"eval_sts-test_spearman_max": 0.8020224630491872,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_vitaminc-pairs_loss": 4.762923240661621,
"eval_vitaminc-pairs_runtime": 1.1277,
"eval_vitaminc-pairs_samples_per_second": 75.372,
"eval_vitaminc-pairs_steps_per_second": 2.66,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_sts-label_loss": 3.531181573867798,
"eval_sts-label_runtime": 0.2802,
"eval_sts-label_samples_per_second": 356.848,
"eval_sts-label_steps_per_second": 14.274,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_qnli-contrastive_loss": 0.13507510721683502,
"eval_qnli-contrastive_runtime": 0.3622,
"eval_qnli-contrastive_samples_per_second": 276.104,
"eval_qnli-contrastive_steps_per_second": 11.044,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_scitail-pairs-qa_loss": 0.052693866193294525,
"eval_scitail-pairs-qa_runtime": 0.8696,
"eval_scitail-pairs-qa_samples_per_second": 115.0,
"eval_scitail-pairs-qa_steps_per_second": 4.6,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_scitail-pairs-pos_loss": 0.47491660714149475,
"eval_scitail-pairs-pos_runtime": 1.3447,
"eval_scitail-pairs-pos_samples_per_second": 74.365,
"eval_scitail-pairs-pos_steps_per_second": 2.975,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_xsum-pairs_loss": 0.23617514967918396,
"eval_xsum-pairs_runtime": 0.9378,
"eval_xsum-pairs_samples_per_second": 106.627,
"eval_xsum-pairs_steps_per_second": 4.265,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_compression-pairs_loss": 0.07913873344659805,
"eval_compression-pairs_runtime": 0.2742,
"eval_compression-pairs_samples_per_second": 364.643,
"eval_compression-pairs_steps_per_second": 14.586,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_sciq_pairs_loss": 0.298448383808136,
"eval_sciq_pairs_runtime": 4.0839,
"eval_sciq_pairs_samples_per_second": 24.486,
"eval_sciq_pairs_steps_per_second": 0.979,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_qasc_pairs_loss": 0.1738889515399933,
"eval_qasc_pairs_runtime": 1.0525,
"eval_qasc_pairs_samples_per_second": 95.013,
"eval_qasc_pairs_steps_per_second": 3.801,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_openbookqa_pairs_loss": 1.8043091297149658,
"eval_openbookqa_pairs_runtime": 0.893,
"eval_openbookqa_pairs_samples_per_second": 111.985,
"eval_openbookqa_pairs_steps_per_second": 4.479,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_msmarco_pairs_loss": 0.5003547668457031,
"eval_msmarco_pairs_runtime": 2.0613,
"eval_msmarco_pairs_samples_per_second": 48.513,
"eval_msmarco_pairs_steps_per_second": 1.941,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_nq_pairs_loss": 0.21183601021766663,
"eval_nq_pairs_runtime": 4.5233,
"eval_nq_pairs_samples_per_second": 22.108,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_trivia_pairs_loss": 0.8857311010360718,
"eval_trivia_pairs_runtime": 6.4553,
"eval_trivia_pairs_samples_per_second": 15.491,
"eval_trivia_pairs_steps_per_second": 0.62,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_quora_pairs_loss": 0.029251573607325554,
"eval_quora_pairs_runtime": 0.6755,
"eval_quora_pairs_samples_per_second": 148.047,
"eval_quora_pairs_steps_per_second": 5.922,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_gooaq_pairs_loss": 0.5669267773628235,
"eval_gooaq_pairs_runtime": 1.4109,
"eval_gooaq_pairs_samples_per_second": 70.878,
"eval_gooaq_pairs_steps_per_second": 2.835,
"step": 8320
},
{
"epoch": 1.3038708666353236,
"eval_mrpc_pairs_loss": 0.04050436615943909,
"eval_mrpc_pairs_runtime": 0.2684,
"eval_mrpc_pairs_samples_per_second": 372.59,
"eval_mrpc_pairs_steps_per_second": 14.904,
"step": 8320
},
{
"epoch": 1.308885754583921,
"grad_norm": 0.1075374037027359,
"learning_rate": 2.5541476487935806e-05,
"loss": 0.5412,
"step": 8352
},
{
"epoch": 1.3139006425325184,
"grad_norm": 7.75120735168457,
"learning_rate": 2.535818735329815e-05,
"loss": 0.3885,
"step": 8384
},
{
"epoch": 1.3189155304811158,
"grad_norm": 0.5364068150520325,
"learning_rate": 2.5171893010542385e-05,
"loss": 0.4274,
"step": 8416
},
{
"epoch": 1.3239304184297132,
"grad_norm": 0.6744114756584167,
"learning_rate": 2.4988605558565137e-05,
"loss": 0.893,
"step": 8448
},
{
"epoch": 1.3289453063783105,
"grad_norm": 1.6057082414627075,
"learning_rate": 2.4796553472267232e-05,
"loss": 0.3456,
"step": 8480
},
{
"epoch": 1.333960194326908,
"grad_norm": 0.8727301955223083,
"learning_rate": 2.460165912399626e-05,
"loss": 0.4292,
"step": 8512
},
{
"epoch": 1.3389750822755053,
"grad_norm": 0.23973700404167175,
"learning_rate": 2.440397905820904e-05,
"loss": 0.4275,
"step": 8544
},
{
"epoch": 1.343989970224103,
"grad_norm": 16.09794807434082,
"learning_rate": 2.4203570627579187e-05,
"loss": 0.3236,
"step": 8576
},
{
"epoch": 1.3490048581727003,
"grad_norm": 0.1335248500108719,
"learning_rate": 2.4000491976357433e-05,
"loss": 0.3961,
"step": 8608
},
{
"epoch": 1.3540197461212977,
"grad_norm": 4.587371349334717,
"learning_rate": 2.3794802023502332e-05,
"loss": 0.5146,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_nli-pairs_loss": 0.996806800365448,
"eval_nli-pairs_runtime": 3.6888,
"eval_nli-pairs_samples_per_second": 27.109,
"eval_nli-pairs_steps_per_second": 1.084,
"eval_sts-test_pearson_cosine": 0.7869180410057008,
"eval_sts-test_pearson_dot": 0.4938689019771704,
"eval_sts-test_pearson_euclidean": 0.7205117910572312,
"eval_sts-test_pearson_manhattan": 0.7128032248904813,
"eval_sts-test_pearson_max": 0.7869180410057008,
"eval_sts-test_spearman_cosine": 0.8042081001243602,
"eval_sts-test_spearman_dot": 0.48091332474106047,
"eval_sts-test_spearman_euclidean": 0.7130418025896658,
"eval_sts-test_spearman_manhattan": 0.7066951779815502,
"eval_sts-test_spearman_max": 0.8042081001243602,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_vitaminc-pairs_loss": 4.757246971130371,
"eval_vitaminc-pairs_runtime": 1.13,
"eval_vitaminc-pairs_samples_per_second": 75.223,
"eval_vitaminc-pairs_steps_per_second": 2.655,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_sts-label_loss": 3.562749147415161,
"eval_sts-label_runtime": 0.2852,
"eval_sts-label_samples_per_second": 350.667,
"eval_sts-label_steps_per_second": 14.027,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_qnli-contrastive_loss": 0.10447724163532257,
"eval_qnli-contrastive_runtime": 0.3616,
"eval_qnli-contrastive_samples_per_second": 276.535,
"eval_qnli-contrastive_steps_per_second": 11.061,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_scitail-pairs-qa_loss": 0.050035107880830765,
"eval_scitail-pairs-qa_runtime": 0.8786,
"eval_scitail-pairs-qa_samples_per_second": 113.822,
"eval_scitail-pairs-qa_steps_per_second": 4.553,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_scitail-pairs-pos_loss": 0.4792901873588562,
"eval_scitail-pairs-pos_runtime": 1.3333,
"eval_scitail-pairs-pos_samples_per_second": 75.004,
"eval_scitail-pairs-pos_steps_per_second": 3.0,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_xsum-pairs_loss": 0.23096245527267456,
"eval_xsum-pairs_runtime": 0.9402,
"eval_xsum-pairs_samples_per_second": 106.362,
"eval_xsum-pairs_steps_per_second": 4.254,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_compression-pairs_loss": 0.0777381882071495,
"eval_compression-pairs_runtime": 0.2739,
"eval_compression-pairs_samples_per_second": 365.114,
"eval_compression-pairs_steps_per_second": 14.605,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_sciq_pairs_loss": 0.2707681953907013,
"eval_sciq_pairs_runtime": 4.1199,
"eval_sciq_pairs_samples_per_second": 24.272,
"eval_sciq_pairs_steps_per_second": 0.971,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_qasc_pairs_loss": 0.1706008017063141,
"eval_qasc_pairs_runtime": 1.065,
"eval_qasc_pairs_samples_per_second": 93.9,
"eval_qasc_pairs_steps_per_second": 3.756,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_openbookqa_pairs_loss": 1.7370460033416748,
"eval_openbookqa_pairs_runtime": 0.8951,
"eval_openbookqa_pairs_samples_per_second": 111.72,
"eval_openbookqa_pairs_steps_per_second": 4.469,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_msmarco_pairs_loss": 0.4633770287036896,
"eval_msmarco_pairs_runtime": 2.064,
"eval_msmarco_pairs_samples_per_second": 48.449,
"eval_msmarco_pairs_steps_per_second": 1.938,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_nq_pairs_loss": 0.21504688262939453,
"eval_nq_pairs_runtime": 4.523,
"eval_nq_pairs_samples_per_second": 22.109,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_trivia_pairs_loss": 0.8408924341201782,
"eval_trivia_pairs_runtime": 6.4614,
"eval_trivia_pairs_samples_per_second": 15.476,
"eval_trivia_pairs_steps_per_second": 0.619,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_quora_pairs_loss": 0.02466999925673008,
"eval_quora_pairs_runtime": 0.681,
"eval_quora_pairs_samples_per_second": 146.836,
"eval_quora_pairs_steps_per_second": 5.873,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_gooaq_pairs_loss": 0.5489644408226013,
"eval_gooaq_pairs_runtime": 1.4284,
"eval_gooaq_pairs_samples_per_second": 70.007,
"eval_gooaq_pairs_steps_per_second": 2.8,
"step": 8640
},
{
"epoch": 1.3540197461212977,
"eval_mrpc_pairs_loss": 0.039087630808353424,
"eval_mrpc_pairs_runtime": 0.2676,
"eval_mrpc_pairs_samples_per_second": 373.632,
"eval_mrpc_pairs_steps_per_second": 14.945,
"step": 8640
},
{
"epoch": 1.359034634069895,
"grad_norm": 0.20070208609104156,
"learning_rate": 2.3586560445586147e-05,
"loss": 0.7562,
"step": 8672
},
{
"epoch": 1.3640495220184925,
"grad_norm": 14.552980422973633,
"learning_rate": 2.3375827659480975e-05,
"loss": 0.7881,
"step": 8704
},
{
"epoch": 1.36906440996709,
"grad_norm": 0.728196382522583,
"learning_rate": 2.3162664804830062e-05,
"loss": 0.6117,
"step": 8736
},
{
"epoch": 1.3740792979156873,
"grad_norm": 104.08293151855469,
"learning_rate": 2.2947133726309464e-05,
"loss": 1.3083,
"step": 8768
},
{
"epoch": 1.3790941858642847,
"grad_norm": 9.243626594543457,
"learning_rate": 2.2729296955685097e-05,
"loss": 0.5359,
"step": 8800
},
{
"epoch": 1.384109073812882,
"grad_norm": 1.2041038274765015,
"learning_rate": 2.2509217693670464e-05,
"loss": 0.45,
"step": 8832
},
{
"epoch": 1.3891239617614795,
"grad_norm": 3.953394889831543,
"learning_rate": 2.2286959791590365e-05,
"loss": 0.6022,
"step": 8864
},
{
"epoch": 1.3941388497100768,
"grad_norm": 18.004009246826172,
"learning_rate": 2.2062587732855727e-05,
"loss": 0.6664,
"step": 8896
},
{
"epoch": 1.3991537376586742,
"grad_norm": 4.0190887451171875,
"learning_rate": 2.1836166614255147e-05,
"loss": 0.3255,
"step": 8928
}
],
"logging_steps": 32,
"max_steps": 12762,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1277,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}