|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0006268609935747, |
|
"eval_steps": 320, |
|
"global_step": 6385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0050148879485973985, |
|
"grad_norm": 14.771158218383789, |
|
"learning_rate": 9.707724425887265e-07, |
|
"loss": 0.6329, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.010029775897194797, |
|
"grad_norm": 11.052021980285645, |
|
"learning_rate": 1.9728601252609606e-06, |
|
"loss": 0.9693, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.015044663845792195, |
|
"grad_norm": 20.26296615600586, |
|
"learning_rate": 2.9749478079331944e-06, |
|
"loss": 0.6548, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.020059551794389594, |
|
"grad_norm": 12.62913703918457, |
|
"learning_rate": 3.945720250521921e-06, |
|
"loss": 1.1279, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.025074439742986992, |
|
"grad_norm": 12.316486358642578, |
|
"learning_rate": 4.916492693110647e-06, |
|
"loss": 1.0017, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03008932769158439, |
|
"grad_norm": 64.25923919677734, |
|
"learning_rate": 5.918580375782881e-06, |
|
"loss": 0.7571, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03510421564018179, |
|
"grad_norm": 0.8205029368400574, |
|
"learning_rate": 6.920668058455115e-06, |
|
"loss": 0.7304, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.04011910358877919, |
|
"grad_norm": 6.598870754241943, |
|
"learning_rate": 7.922755741127349e-06, |
|
"loss": 0.7636, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.045133991537376586, |
|
"grad_norm": 8.728073120117188, |
|
"learning_rate": 8.924843423799583e-06, |
|
"loss": 0.482, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"grad_norm": 7.645521640777588, |
|
"learning_rate": 9.926931106471817e-06, |
|
"loss": 0.6312, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_nli-pairs_loss": 1.0158467292785645, |
|
"eval_nli-pairs_runtime": 3.7267, |
|
"eval_nli-pairs_samples_per_second": 26.833, |
|
"eval_nli-pairs_steps_per_second": 1.073, |
|
"eval_sts-test_pearson_cosine": 0.7848265412179125, |
|
"eval_sts-test_pearson_dot": 0.5437080705284749, |
|
"eval_sts-test_pearson_euclidean": 0.7445845076364892, |
|
"eval_sts-test_pearson_manhattan": 0.7429239204432232, |
|
"eval_sts-test_pearson_max": 0.7848265412179125, |
|
"eval_sts-test_spearman_cosine": 0.7989504707258924, |
|
"eval_sts-test_spearman_dot": 0.5206855421174118, |
|
"eval_sts-test_spearman_euclidean": 0.733568982260844, |
|
"eval_sts-test_spearman_manhattan": 0.7349407257944446, |
|
"eval_sts-test_spearman_max": 0.7989504707258924, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_vitaminc-pairs_loss": 4.692601680755615, |
|
"eval_vitaminc-pairs_runtime": 1.1397, |
|
"eval_vitaminc-pairs_samples_per_second": 74.578, |
|
"eval_vitaminc-pairs_steps_per_second": 2.632, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_sts-label_loss": 3.5502490997314453, |
|
"eval_sts-label_runtime": 0.28, |
|
"eval_sts-label_samples_per_second": 357.117, |
|
"eval_sts-label_steps_per_second": 14.285, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_qnli-contrastive_loss": 0.16079513728618622, |
|
"eval_qnli-contrastive_runtime": 0.3646, |
|
"eval_qnli-contrastive_samples_per_second": 274.299, |
|
"eval_qnli-contrastive_steps_per_second": 10.972, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_scitail-pairs-qa_loss": 0.07610582560300827, |
|
"eval_scitail-pairs-qa_runtime": 0.8885, |
|
"eval_scitail-pairs-qa_samples_per_second": 112.548, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.502, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_scitail-pairs-pos_loss": 0.5141278505325317, |
|
"eval_scitail-pairs-pos_runtime": 1.3498, |
|
"eval_scitail-pairs-pos_samples_per_second": 74.085, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.963, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_xsum-pairs_loss": 0.25581496953964233, |
|
"eval_xsum-pairs_runtime": 0.9407, |
|
"eval_xsum-pairs_samples_per_second": 106.304, |
|
"eval_xsum-pairs_steps_per_second": 4.252, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_compression-pairs_loss": 0.09814296662807465, |
|
"eval_compression-pairs_runtime": 0.2758, |
|
"eval_compression-pairs_samples_per_second": 362.517, |
|
"eval_compression-pairs_steps_per_second": 14.501, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_sciq_pairs_loss": 0.25620242953300476, |
|
"eval_sciq_pairs_runtime": 4.1155, |
|
"eval_sciq_pairs_samples_per_second": 24.298, |
|
"eval_sciq_pairs_steps_per_second": 0.972, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_qasc_pairs_loss": 0.2044612169265747, |
|
"eval_qasc_pairs_runtime": 1.1029, |
|
"eval_qasc_pairs_samples_per_second": 90.672, |
|
"eval_qasc_pairs_steps_per_second": 3.627, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_openbookqa_pairs_loss": 1.7537646293640137, |
|
"eval_openbookqa_pairs_runtime": 0.9037, |
|
"eval_openbookqa_pairs_samples_per_second": 110.653, |
|
"eval_openbookqa_pairs_steps_per_second": 4.426, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_msmarco_pairs_loss": 0.5138561725616455, |
|
"eval_msmarco_pairs_runtime": 2.0511, |
|
"eval_msmarco_pairs_samples_per_second": 48.754, |
|
"eval_msmarco_pairs_steps_per_second": 1.95, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_nq_pairs_loss": 0.23510317504405975, |
|
"eval_nq_pairs_runtime": 4.5293, |
|
"eval_nq_pairs_samples_per_second": 22.078, |
|
"eval_nq_pairs_steps_per_second": 0.883, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_trivia_pairs_loss": 0.7808571457862854, |
|
"eval_trivia_pairs_runtime": 6.5065, |
|
"eval_trivia_pairs_samples_per_second": 15.369, |
|
"eval_trivia_pairs_steps_per_second": 0.615, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_quora_pairs_loss": 0.0392119362950325, |
|
"eval_quora_pairs_runtime": 0.675, |
|
"eval_quora_pairs_samples_per_second": 148.153, |
|
"eval_quora_pairs_steps_per_second": 5.926, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_gooaq_pairs_loss": 0.4712902009487152, |
|
"eval_gooaq_pairs_runtime": 1.4079, |
|
"eval_gooaq_pairs_samples_per_second": 71.028, |
|
"eval_gooaq_pairs_steps_per_second": 2.841, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050148879485973984, |
|
"eval_mrpc_pairs_loss": 0.05498996376991272, |
|
"eval_mrpc_pairs_runtime": 0.2623, |
|
"eval_mrpc_pairs_samples_per_second": 381.172, |
|
"eval_mrpc_pairs_steps_per_second": 15.247, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05516376743457138, |
|
"grad_norm": 0.34924012422561646, |
|
"learning_rate": 1.092901878914405e-05, |
|
"loss": 0.5791, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.06017865538316878, |
|
"grad_norm": 0.36700841784477234, |
|
"learning_rate": 1.1931106471816284e-05, |
|
"loss": 0.6413, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.06519354333176618, |
|
"grad_norm": 7.559622764587402, |
|
"learning_rate": 1.2933194154488518e-05, |
|
"loss": 0.4319, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.07020843128036358, |
|
"grad_norm": 7.982416152954102, |
|
"learning_rate": 1.3935281837160753e-05, |
|
"loss": 0.6672, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.07522331922896097, |
|
"grad_norm": 0.6726166009902954, |
|
"learning_rate": 1.4937369519832987e-05, |
|
"loss": 0.459, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08023820717755838, |
|
"grad_norm": 14.846123695373535, |
|
"learning_rate": 1.593945720250522e-05, |
|
"loss": 0.7621, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.08525309512615578, |
|
"grad_norm": 0.7846627831459045, |
|
"learning_rate": 1.6941544885177454e-05, |
|
"loss": 0.864, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.09026798307475317, |
|
"grad_norm": 0.8993583917617798, |
|
"learning_rate": 1.7943632567849688e-05, |
|
"loss": 0.5081, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.09528287102335058, |
|
"grad_norm": 1.4990565776824951, |
|
"learning_rate": 1.894572025052192e-05, |
|
"loss": 0.654, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"grad_norm": 15.647976875305176, |
|
"learning_rate": 1.9947807933194157e-05, |
|
"loss": 0.6372, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_nli-pairs_loss": 1.0652996301651, |
|
"eval_nli-pairs_runtime": 3.6326, |
|
"eval_nli-pairs_samples_per_second": 27.528, |
|
"eval_nli-pairs_steps_per_second": 1.101, |
|
"eval_sts-test_pearson_cosine": 0.785263018402905, |
|
"eval_sts-test_pearson_dot": 0.5290450141477089, |
|
"eval_sts-test_pearson_euclidean": 0.7433756286425983, |
|
"eval_sts-test_pearson_manhattan": 0.7411097274300102, |
|
"eval_sts-test_pearson_max": 0.785263018402905, |
|
"eval_sts-test_spearman_cosine": 0.7996928912411947, |
|
"eval_sts-test_spearman_dot": 0.5102571497667188, |
|
"eval_sts-test_spearman_euclidean": 0.7338969723324641, |
|
"eval_sts-test_spearman_manhattan": 0.7343494860194358, |
|
"eval_sts-test_spearman_max": 0.7996928912411947, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_vitaminc-pairs_loss": 4.719416618347168, |
|
"eval_vitaminc-pairs_runtime": 1.1268, |
|
"eval_vitaminc-pairs_samples_per_second": 75.437, |
|
"eval_vitaminc-pairs_steps_per_second": 2.662, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_sts-label_loss": 3.612347364425659, |
|
"eval_sts-label_runtime": 0.2683, |
|
"eval_sts-label_samples_per_second": 372.651, |
|
"eval_sts-label_steps_per_second": 14.906, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_qnli-contrastive_loss": 0.15202775597572327, |
|
"eval_qnli-contrastive_runtime": 0.3528, |
|
"eval_qnli-contrastive_samples_per_second": 283.457, |
|
"eval_qnli-contrastive_steps_per_second": 11.338, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_scitail-pairs-qa_loss": 0.07544919103384018, |
|
"eval_scitail-pairs-qa_runtime": 0.8732, |
|
"eval_scitail-pairs-qa_samples_per_second": 114.517, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.581, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_scitail-pairs-pos_loss": 0.5404170751571655, |
|
"eval_scitail-pairs-pos_runtime": 1.3146, |
|
"eval_scitail-pairs-pos_samples_per_second": 76.067, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.043, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_xsum-pairs_loss": 0.25958582758903503, |
|
"eval_xsum-pairs_runtime": 0.9287, |
|
"eval_xsum-pairs_samples_per_second": 107.679, |
|
"eval_xsum-pairs_steps_per_second": 4.307, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_compression-pairs_loss": 0.10066353529691696, |
|
"eval_compression-pairs_runtime": 0.2732, |
|
"eval_compression-pairs_samples_per_second": 366.076, |
|
"eval_compression-pairs_steps_per_second": 14.643, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_sciq_pairs_loss": 0.2645374834537506, |
|
"eval_sciq_pairs_runtime": 4.0725, |
|
"eval_sciq_pairs_samples_per_second": 24.555, |
|
"eval_sciq_pairs_steps_per_second": 0.982, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_qasc_pairs_loss": 0.21021947264671326, |
|
"eval_qasc_pairs_runtime": 1.0743, |
|
"eval_qasc_pairs_samples_per_second": 93.084, |
|
"eval_qasc_pairs_steps_per_second": 3.723, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_openbookqa_pairs_loss": 1.7905032634735107, |
|
"eval_openbookqa_pairs_runtime": 0.8886, |
|
"eval_openbookqa_pairs_samples_per_second": 112.532, |
|
"eval_openbookqa_pairs_steps_per_second": 4.501, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_msmarco_pairs_loss": 0.5102832913398743, |
|
"eval_msmarco_pairs_runtime": 2.0529, |
|
"eval_msmarco_pairs_samples_per_second": 48.712, |
|
"eval_msmarco_pairs_steps_per_second": 1.948, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_nq_pairs_loss": 0.24466972053050995, |
|
"eval_nq_pairs_runtime": 4.4973, |
|
"eval_nq_pairs_samples_per_second": 22.235, |
|
"eval_nq_pairs_steps_per_second": 0.889, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_trivia_pairs_loss": 0.8748095631599426, |
|
"eval_trivia_pairs_runtime": 6.4825, |
|
"eval_trivia_pairs_samples_per_second": 15.426, |
|
"eval_trivia_pairs_steps_per_second": 0.617, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_quora_pairs_loss": 0.07820220291614532, |
|
"eval_quora_pairs_runtime": 0.6944, |
|
"eval_quora_pairs_samples_per_second": 144.008, |
|
"eval_quora_pairs_steps_per_second": 5.76, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_gooaq_pairs_loss": 0.5236212611198425, |
|
"eval_gooaq_pairs_runtime": 1.3899, |
|
"eval_gooaq_pairs_samples_per_second": 71.949, |
|
"eval_gooaq_pairs_steps_per_second": 2.878, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10029775897194797, |
|
"eval_mrpc_pairs_loss": 0.05494727939367294, |
|
"eval_mrpc_pairs_runtime": 0.2598, |
|
"eval_mrpc_pairs_samples_per_second": 384.941, |
|
"eval_mrpc_pairs_steps_per_second": 15.398, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10531264692054537, |
|
"grad_norm": 11.01974105834961, |
|
"learning_rate": 2.0949895615866387e-05, |
|
"loss": 0.9292, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.11032753486914276, |
|
"grad_norm": 0.5542309284210205, |
|
"learning_rate": 2.1951983298538625e-05, |
|
"loss": 1.3108, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.11534242281774017, |
|
"grad_norm": 15.458569526672363, |
|
"learning_rate": 2.2954070981210856e-05, |
|
"loss": 0.9674, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.12035731076633756, |
|
"grad_norm": 2.7814478874206543, |
|
"learning_rate": 2.395615866388309e-05, |
|
"loss": 0.9226, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.12537219871493496, |
|
"grad_norm": 11.393244743347168, |
|
"learning_rate": 2.4958246346555324e-05, |
|
"loss": 0.789, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13038708666353235, |
|
"grad_norm": 9.288290977478027, |
|
"learning_rate": 2.596033402922756e-05, |
|
"loss": 0.5186, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.13540197461212977, |
|
"grad_norm": 47.65571212768555, |
|
"learning_rate": 2.6962421711899793e-05, |
|
"loss": 0.6726, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.14041686256072716, |
|
"grad_norm": 12.908064842224121, |
|
"learning_rate": 2.7964509394572024e-05, |
|
"loss": 0.5381, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.14543175050932455, |
|
"grad_norm": 14.951742172241211, |
|
"learning_rate": 2.896659707724426e-05, |
|
"loss": 0.581, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"grad_norm": 20.12006187438965, |
|
"learning_rate": 2.9968684759916492e-05, |
|
"loss": 0.9038, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_nli-pairs_loss": 1.2173175811767578, |
|
"eval_nli-pairs_runtime": 3.7098, |
|
"eval_nli-pairs_samples_per_second": 26.955, |
|
"eval_nli-pairs_steps_per_second": 1.078, |
|
"eval_sts-test_pearson_cosine": 0.7840992835675669, |
|
"eval_sts-test_pearson_dot": 0.5220462136106129, |
|
"eval_sts-test_pearson_euclidean": 0.7457350047351855, |
|
"eval_sts-test_pearson_manhattan": 0.7425970830541657, |
|
"eval_sts-test_pearson_max": 0.7840992835675669, |
|
"eval_sts-test_spearman_cosine": 0.8006376809572144, |
|
"eval_sts-test_spearman_dot": 0.5020544543992158, |
|
"eval_sts-test_spearman_euclidean": 0.7369257710408655, |
|
"eval_sts-test_spearman_manhattan": 0.7362649758012406, |
|
"eval_sts-test_spearman_max": 0.8006376809572144, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_vitaminc-pairs_loss": 4.774902820587158, |
|
"eval_vitaminc-pairs_runtime": 1.1212, |
|
"eval_vitaminc-pairs_samples_per_second": 75.809, |
|
"eval_vitaminc-pairs_steps_per_second": 2.676, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_sts-label_loss": 3.198556900024414, |
|
"eval_sts-label_runtime": 0.2678, |
|
"eval_sts-label_samples_per_second": 373.382, |
|
"eval_sts-label_steps_per_second": 14.935, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_qnli-contrastive_loss": 0.1943340301513672, |
|
"eval_qnli-contrastive_runtime": 0.3511, |
|
"eval_qnli-contrastive_samples_per_second": 284.789, |
|
"eval_qnli-contrastive_steps_per_second": 11.392, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_scitail-pairs-qa_loss": 0.08060617744922638, |
|
"eval_scitail-pairs-qa_runtime": 0.8778, |
|
"eval_scitail-pairs-qa_samples_per_second": 113.92, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.557, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_scitail-pairs-pos_loss": 0.4759831428527832, |
|
"eval_scitail-pairs-pos_runtime": 1.3609, |
|
"eval_scitail-pairs-pos_samples_per_second": 73.48, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.939, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_xsum-pairs_loss": 0.27583304047584534, |
|
"eval_xsum-pairs_runtime": 0.9343, |
|
"eval_xsum-pairs_samples_per_second": 107.035, |
|
"eval_xsum-pairs_steps_per_second": 4.281, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_compression-pairs_loss": 0.10094660520553589, |
|
"eval_compression-pairs_runtime": 0.2739, |
|
"eval_compression-pairs_samples_per_second": 365.047, |
|
"eval_compression-pairs_steps_per_second": 14.602, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_sciq_pairs_loss": 0.2688131630420685, |
|
"eval_sciq_pairs_runtime": 4.0582, |
|
"eval_sciq_pairs_samples_per_second": 24.641, |
|
"eval_sciq_pairs_steps_per_second": 0.986, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_qasc_pairs_loss": 0.23267821967601776, |
|
"eval_qasc_pairs_runtime": 1.0554, |
|
"eval_qasc_pairs_samples_per_second": 94.75, |
|
"eval_qasc_pairs_steps_per_second": 3.79, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_openbookqa_pairs_loss": 1.8053069114685059, |
|
"eval_openbookqa_pairs_runtime": 0.8871, |
|
"eval_openbookqa_pairs_samples_per_second": 112.727, |
|
"eval_openbookqa_pairs_steps_per_second": 4.509, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_msmarco_pairs_loss": 0.5809260606765747, |
|
"eval_msmarco_pairs_runtime": 2.0498, |
|
"eval_msmarco_pairs_samples_per_second": 48.786, |
|
"eval_msmarco_pairs_steps_per_second": 1.951, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_nq_pairs_loss": 0.2808491885662079, |
|
"eval_nq_pairs_runtime": 4.4982, |
|
"eval_nq_pairs_samples_per_second": 22.231, |
|
"eval_nq_pairs_steps_per_second": 0.889, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_trivia_pairs_loss": 0.9379808902740479, |
|
"eval_trivia_pairs_runtime": 6.4578, |
|
"eval_trivia_pairs_samples_per_second": 15.485, |
|
"eval_trivia_pairs_steps_per_second": 0.619, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_quora_pairs_loss": 0.0913279801607132, |
|
"eval_quora_pairs_runtime": 0.6721, |
|
"eval_quora_pairs_samples_per_second": 148.79, |
|
"eval_quora_pairs_steps_per_second": 5.952, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_gooaq_pairs_loss": 0.5807955265045166, |
|
"eval_gooaq_pairs_runtime": 1.3915, |
|
"eval_gooaq_pairs_samples_per_second": 71.865, |
|
"eval_gooaq_pairs_steps_per_second": 2.875, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15044663845792194, |
|
"eval_mrpc_pairs_loss": 0.05799216777086258, |
|
"eval_mrpc_pairs_runtime": 0.2571, |
|
"eval_mrpc_pairs_samples_per_second": 388.998, |
|
"eval_mrpc_pairs_steps_per_second": 15.56, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15546152640651936, |
|
"grad_norm": 9.773286819458008, |
|
"learning_rate": 2.9997957904107625e-05, |
|
"loss": 0.7964, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.16047641435511675, |
|
"grad_norm": 19.411075592041016, |
|
"learning_rate": 2.9991566594209126e-05, |
|
"loss": 0.8213, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.16549130230371414, |
|
"grad_norm": 3.5282175540924072, |
|
"learning_rate": 2.9980825799589488e-05, |
|
"loss": 0.5396, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.17050619025231156, |
|
"grad_norm": 62.66339874267578, |
|
"learning_rate": 2.996573863646219e-05, |
|
"loss": 0.9297, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.17552107820090895, |
|
"grad_norm": 8.785274505615234, |
|
"learning_rate": 2.994630948204727e-05, |
|
"loss": 1.169, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.18053596614950634, |
|
"grad_norm": 24.10859489440918, |
|
"learning_rate": 2.992254397330132e-05, |
|
"loss": 0.7486, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.18555085409810373, |
|
"grad_norm": 25.545284271240234, |
|
"learning_rate": 2.9894449005282077e-05, |
|
"loss": 0.6821, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.19056574204670115, |
|
"grad_norm": 0.8675521016120911, |
|
"learning_rate": 2.9862032729147954e-05, |
|
"loss": 0.6125, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.19558062999529854, |
|
"grad_norm": 16.122114181518555, |
|
"learning_rate": 2.9825304549793153e-05, |
|
"loss": 0.8061, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"grad_norm": 1.0314382314682007, |
|
"learning_rate": 2.978427512311904e-05, |
|
"loss": 0.6918, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_nli-pairs_loss": 1.1552109718322754, |
|
"eval_nli-pairs_runtime": 3.8751, |
|
"eval_nli-pairs_samples_per_second": 25.806, |
|
"eval_nli-pairs_steps_per_second": 1.032, |
|
"eval_sts-test_pearson_cosine": 0.786106976104726, |
|
"eval_sts-test_pearson_dot": 0.5116758767219935, |
|
"eval_sts-test_pearson_euclidean": 0.7432891018313416, |
|
"eval_sts-test_pearson_manhattan": 0.7400929158927781, |
|
"eval_sts-test_pearson_max": 0.786106976104726, |
|
"eval_sts-test_spearman_cosine": 0.801377272203007, |
|
"eval_sts-test_spearman_dot": 0.4921454166952506, |
|
"eval_sts-test_spearman_euclidean": 0.7343686249967402, |
|
"eval_sts-test_spearman_manhattan": 0.7331946050808561, |
|
"eval_sts-test_spearman_max": 0.801377272203007, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_vitaminc-pairs_loss": 4.6789751052856445, |
|
"eval_vitaminc-pairs_runtime": 1.1504, |
|
"eval_vitaminc-pairs_samples_per_second": 73.889, |
|
"eval_vitaminc-pairs_steps_per_second": 2.608, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_sts-label_loss": 3.5580556392669678, |
|
"eval_sts-label_runtime": 0.2834, |
|
"eval_sts-label_samples_per_second": 352.858, |
|
"eval_sts-label_steps_per_second": 14.114, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_qnli-contrastive_loss": 0.20369713008403778, |
|
"eval_qnli-contrastive_runtime": 0.358, |
|
"eval_qnli-contrastive_samples_per_second": 279.331, |
|
"eval_qnli-contrastive_steps_per_second": 11.173, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_scitail-pairs-qa_loss": 0.07465875148773193, |
|
"eval_scitail-pairs-qa_runtime": 0.9504, |
|
"eval_scitail-pairs-qa_samples_per_second": 105.214, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.209, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_scitail-pairs-pos_loss": 0.49434563517570496, |
|
"eval_scitail-pairs-pos_runtime": 1.6041, |
|
"eval_scitail-pairs-pos_samples_per_second": 62.339, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.494, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_xsum-pairs_loss": 0.28282061219215393, |
|
"eval_xsum-pairs_runtime": 0.9316, |
|
"eval_xsum-pairs_samples_per_second": 107.346, |
|
"eval_xsum-pairs_steps_per_second": 4.294, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_compression-pairs_loss": 0.097385473549366, |
|
"eval_compression-pairs_runtime": 0.2754, |
|
"eval_compression-pairs_samples_per_second": 363.1, |
|
"eval_compression-pairs_steps_per_second": 14.524, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_sciq_pairs_loss": 0.2762215733528137, |
|
"eval_sciq_pairs_runtime": 4.2307, |
|
"eval_sciq_pairs_samples_per_second": 23.637, |
|
"eval_sciq_pairs_steps_per_second": 0.945, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_qasc_pairs_loss": 0.19347424805164337, |
|
"eval_qasc_pairs_runtime": 1.2282, |
|
"eval_qasc_pairs_samples_per_second": 81.421, |
|
"eval_qasc_pairs_steps_per_second": 3.257, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_openbookqa_pairs_loss": 1.6875064373016357, |
|
"eval_openbookqa_pairs_runtime": 1.1661, |
|
"eval_openbookqa_pairs_samples_per_second": 85.754, |
|
"eval_openbookqa_pairs_steps_per_second": 3.43, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_msmarco_pairs_loss": 0.5743877291679382, |
|
"eval_msmarco_pairs_runtime": 2.1428, |
|
"eval_msmarco_pairs_samples_per_second": 46.669, |
|
"eval_msmarco_pairs_steps_per_second": 1.867, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_nq_pairs_loss": 0.30348217487335205, |
|
"eval_nq_pairs_runtime": 4.5543, |
|
"eval_nq_pairs_samples_per_second": 21.957, |
|
"eval_nq_pairs_steps_per_second": 0.878, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_trivia_pairs_loss": 0.9221765995025635, |
|
"eval_trivia_pairs_runtime": 6.6513, |
|
"eval_trivia_pairs_samples_per_second": 15.035, |
|
"eval_trivia_pairs_steps_per_second": 0.601, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_quora_pairs_loss": 0.03854631260037422, |
|
"eval_quora_pairs_runtime": 0.7822, |
|
"eval_quora_pairs_samples_per_second": 127.852, |
|
"eval_quora_pairs_steps_per_second": 5.114, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_gooaq_pairs_loss": 0.528398334980011, |
|
"eval_gooaq_pairs_runtime": 1.4882, |
|
"eval_gooaq_pairs_samples_per_second": 67.194, |
|
"eval_gooaq_pairs_steps_per_second": 2.688, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20059551794389593, |
|
"eval_mrpc_pairs_loss": 0.05623970925807953, |
|
"eval_mrpc_pairs_runtime": 0.2698, |
|
"eval_mrpc_pairs_samples_per_second": 370.713, |
|
"eval_mrpc_pairs_steps_per_second": 14.829, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20561040589249335, |
|
"grad_norm": 0.6042119860649109, |
|
"learning_rate": 2.9738956352942557e-05, |
|
"loss": 0.9421, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.21062529384109074, |
|
"grad_norm": 13.87867546081543, |
|
"learning_rate": 2.968936138754259e-05, |
|
"loss": 0.8641, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.21564018178968813, |
|
"grad_norm": 44.48640441894531, |
|
"learning_rate": 2.9635504615845257e-05, |
|
"loss": 1.157, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.22065506973828553, |
|
"grad_norm": 15.554729461669922, |
|
"learning_rate": 2.957928148945977e-05, |
|
"loss": 0.8772, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.22566995768688294, |
|
"grad_norm": 16.644670486450195, |
|
"learning_rate": 2.9517081112297707e-05, |
|
"loss": 1.0496, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.23068484563548033, |
|
"grad_norm": 13.053145408630371, |
|
"learning_rate": 2.9450668912302004e-05, |
|
"loss": 0.589, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.23569973358407773, |
|
"grad_norm": 7.827791213989258, |
|
"learning_rate": 2.9380064157562306e-05, |
|
"loss": 0.8234, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.24071462153267512, |
|
"grad_norm": 15.598438262939453, |
|
"learning_rate": 2.930528733254901e-05, |
|
"loss": 0.7365, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.24572950948127253, |
|
"grad_norm": 13.723180770874023, |
|
"learning_rate": 2.9226360132170112e-05, |
|
"loss": 0.5076, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"grad_norm": 10.20022964477539, |
|
"learning_rate": 2.9143305455476866e-05, |
|
"loss": 1.0329, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_nli-pairs_loss": 1.0577216148376465, |
|
"eval_nli-pairs_runtime": 3.6476, |
|
"eval_nli-pairs_samples_per_second": 27.415, |
|
"eval_nli-pairs_steps_per_second": 1.097, |
|
"eval_sts-test_pearson_cosine": 0.7876359552191669, |
|
"eval_sts-test_pearson_dot": 0.5220803655074544, |
|
"eval_sts-test_pearson_euclidean": 0.7444632413869628, |
|
"eval_sts-test_pearson_manhattan": 0.7418744760088763, |
|
"eval_sts-test_pearson_max": 0.7876359552191669, |
|
"eval_sts-test_spearman_cosine": 0.8018874000525117, |
|
"eval_sts-test_spearman_dot": 0.5034518981121652, |
|
"eval_sts-test_spearman_euclidean": 0.7344750702387959, |
|
"eval_sts-test_spearman_manhattan": 0.7332804063416474, |
|
"eval_sts-test_spearman_max": 0.8018874000525117, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_vitaminc-pairs_loss": 4.784573554992676, |
|
"eval_vitaminc-pairs_runtime": 1.145, |
|
"eval_vitaminc-pairs_samples_per_second": 74.235, |
|
"eval_vitaminc-pairs_steps_per_second": 2.62, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_sts-label_loss": 3.6113080978393555, |
|
"eval_sts-label_runtime": 0.2746, |
|
"eval_sts-label_samples_per_second": 364.172, |
|
"eval_sts-label_steps_per_second": 14.567, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_qnli-contrastive_loss": 0.18593625724315643, |
|
"eval_qnli-contrastive_runtime": 0.3541, |
|
"eval_qnli-contrastive_samples_per_second": 282.413, |
|
"eval_qnli-contrastive_steps_per_second": 11.297, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_scitail-pairs-qa_loss": 0.07545661181211472, |
|
"eval_scitail-pairs-qa_runtime": 0.8854, |
|
"eval_scitail-pairs-qa_samples_per_second": 112.941, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.518, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_scitail-pairs-pos_loss": 0.5018333792686462, |
|
"eval_scitail-pairs-pos_runtime": 1.3443, |
|
"eval_scitail-pairs-pos_samples_per_second": 74.386, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.975, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_xsum-pairs_loss": 0.2749001085758209, |
|
"eval_xsum-pairs_runtime": 0.9439, |
|
"eval_xsum-pairs_samples_per_second": 105.939, |
|
"eval_xsum-pairs_steps_per_second": 4.238, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_compression-pairs_loss": 0.09735233336687088, |
|
"eval_compression-pairs_runtime": 0.2764, |
|
"eval_compression-pairs_samples_per_second": 361.753, |
|
"eval_compression-pairs_steps_per_second": 14.47, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_sciq_pairs_loss": 0.2648228108882904, |
|
"eval_sciq_pairs_runtime": 4.1207, |
|
"eval_sciq_pairs_samples_per_second": 24.268, |
|
"eval_sciq_pairs_steps_per_second": 0.971, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_qasc_pairs_loss": 0.21318012475967407, |
|
"eval_qasc_pairs_runtime": 1.0917, |
|
"eval_qasc_pairs_samples_per_second": 91.604, |
|
"eval_qasc_pairs_steps_per_second": 3.664, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_openbookqa_pairs_loss": 1.790009617805481, |
|
"eval_openbookqa_pairs_runtime": 0.8969, |
|
"eval_openbookqa_pairs_samples_per_second": 111.496, |
|
"eval_openbookqa_pairs_steps_per_second": 4.46, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_msmarco_pairs_loss": 0.57186359167099, |
|
"eval_msmarco_pairs_runtime": 2.0592, |
|
"eval_msmarco_pairs_samples_per_second": 48.563, |
|
"eval_msmarco_pairs_steps_per_second": 1.943, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_nq_pairs_loss": 0.2738310396671295, |
|
"eval_nq_pairs_runtime": 4.5092, |
|
"eval_nq_pairs_samples_per_second": 22.177, |
|
"eval_nq_pairs_steps_per_second": 0.887, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_trivia_pairs_loss": 0.8291679620742798, |
|
"eval_trivia_pairs_runtime": 6.526, |
|
"eval_trivia_pairs_samples_per_second": 15.323, |
|
"eval_trivia_pairs_steps_per_second": 0.613, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_quora_pairs_loss": 0.08000540733337402, |
|
"eval_quora_pairs_runtime": 0.6761, |
|
"eval_quora_pairs_samples_per_second": 147.909, |
|
"eval_quora_pairs_steps_per_second": 5.916, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_gooaq_pairs_loss": 0.5998037457466125, |
|
"eval_gooaq_pairs_runtime": 1.3978, |
|
"eval_gooaq_pairs_samples_per_second": 71.541, |
|
"eval_gooaq_pairs_steps_per_second": 2.862, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2507443974298699, |
|
"eval_mrpc_pairs_loss": 0.05507182702422142, |
|
"eval_mrpc_pairs_runtime": 0.2617, |
|
"eval_mrpc_pairs_samples_per_second": 382.156, |
|
"eval_mrpc_pairs_steps_per_second": 15.286, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2557592853784673, |
|
"grad_norm": 8.05022144317627, |
|
"learning_rate": 2.9056147399020182e-05, |
|
"loss": 1.4006, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.2607741733270647, |
|
"grad_norm": 0.38224154710769653, |
|
"learning_rate": 2.8964911249859437e-05, |
|
"loss": 0.5963, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.2657890612756621, |
|
"grad_norm": 0.46655791997909546, |
|
"learning_rate": 2.886962347822604e-05, |
|
"loss": 0.7488, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.27080394922425954, |
|
"grad_norm": 8.102537155151367, |
|
"learning_rate": 2.8770311729843616e-05, |
|
"loss": 0.8548, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.27581883717285693, |
|
"grad_norm": 11.803775787353516, |
|
"learning_rate": 2.86670048179072e-05, |
|
"loss": 1.3324, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.2808337251214543, |
|
"grad_norm": 16.266756057739258, |
|
"learning_rate": 2.8559732714723715e-05, |
|
"loss": 0.5804, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.2858486130700517, |
|
"grad_norm": 2.8448822498321533, |
|
"learning_rate": 2.8448526543016114e-05, |
|
"loss": 0.7827, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.2908635010186491, |
|
"grad_norm": 21.346328735351562, |
|
"learning_rate": 2.8333418566893796e-05, |
|
"loss": 0.5448, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.2958783889672465, |
|
"grad_norm": 3.4379029273986816, |
|
"learning_rate": 2.8214442182491866e-05, |
|
"loss": 0.7368, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"grad_norm": 17.05881690979004, |
|
"learning_rate": 2.8091631908281963e-05, |
|
"loss": 0.5657, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_nli-pairs_loss": 1.0244356393814087, |
|
"eval_nli-pairs_runtime": 3.6217, |
|
"eval_nli-pairs_samples_per_second": 27.612, |
|
"eval_nli-pairs_steps_per_second": 1.104, |
|
"eval_sts-test_pearson_cosine": 0.781915957368962, |
|
"eval_sts-test_pearson_dot": 0.49821032356844613, |
|
"eval_sts-test_pearson_euclidean": 0.7329308897504494, |
|
"eval_sts-test_pearson_manhattan": 0.7292186092506918, |
|
"eval_sts-test_pearson_max": 0.781915957368962, |
|
"eval_sts-test_spearman_cosine": 0.7983596570250642, |
|
"eval_sts-test_spearman_dot": 0.4812350313638781, |
|
"eval_sts-test_spearman_euclidean": 0.7265758267352669, |
|
"eval_sts-test_spearman_manhattan": 0.7259264140902829, |
|
"eval_sts-test_spearman_max": 0.7983596570250642, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_vitaminc-pairs_loss": 4.698296070098877, |
|
"eval_vitaminc-pairs_runtime": 1.1338, |
|
"eval_vitaminc-pairs_samples_per_second": 74.97, |
|
"eval_vitaminc-pairs_steps_per_second": 2.646, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_sts-label_loss": 3.1822261810302734, |
|
"eval_sts-label_runtime": 0.2702, |
|
"eval_sts-label_samples_per_second": 370.09, |
|
"eval_sts-label_steps_per_second": 14.804, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_qnli-contrastive_loss": 0.11326340585947037, |
|
"eval_qnli-contrastive_runtime": 0.3581, |
|
"eval_qnli-contrastive_samples_per_second": 279.28, |
|
"eval_qnli-contrastive_steps_per_second": 11.171, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_scitail-pairs-qa_loss": 0.07009608298540115, |
|
"eval_scitail-pairs-qa_runtime": 0.8816, |
|
"eval_scitail-pairs-qa_samples_per_second": 113.424, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.537, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_scitail-pairs-pos_loss": 0.49156129360198975, |
|
"eval_scitail-pairs-pos_runtime": 1.3759, |
|
"eval_scitail-pairs-pos_samples_per_second": 72.678, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.907, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_xsum-pairs_loss": 0.25940877199172974, |
|
"eval_xsum-pairs_runtime": 0.9373, |
|
"eval_xsum-pairs_samples_per_second": 106.695, |
|
"eval_xsum-pairs_steps_per_second": 4.268, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_compression-pairs_loss": 0.0919649675488472, |
|
"eval_compression-pairs_runtime": 0.2738, |
|
"eval_compression-pairs_samples_per_second": 365.291, |
|
"eval_compression-pairs_steps_per_second": 14.612, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_sciq_pairs_loss": 0.29138606786727905, |
|
"eval_sciq_pairs_runtime": 4.1059, |
|
"eval_sciq_pairs_samples_per_second": 24.355, |
|
"eval_sciq_pairs_steps_per_second": 0.974, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_qasc_pairs_loss": 0.19625085592269897, |
|
"eval_qasc_pairs_runtime": 1.0611, |
|
"eval_qasc_pairs_samples_per_second": 94.24, |
|
"eval_qasc_pairs_steps_per_second": 3.77, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_openbookqa_pairs_loss": 1.7960456609725952, |
|
"eval_openbookqa_pairs_runtime": 0.9042, |
|
"eval_openbookqa_pairs_samples_per_second": 110.601, |
|
"eval_openbookqa_pairs_steps_per_second": 4.424, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_msmarco_pairs_loss": 0.5171416997909546, |
|
"eval_msmarco_pairs_runtime": 2.0637, |
|
"eval_msmarco_pairs_samples_per_second": 48.457, |
|
"eval_msmarco_pairs_steps_per_second": 1.938, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_nq_pairs_loss": 0.24809740483760834, |
|
"eval_nq_pairs_runtime": 4.529, |
|
"eval_nq_pairs_samples_per_second": 22.08, |
|
"eval_nq_pairs_steps_per_second": 0.883, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_trivia_pairs_loss": 0.9041999578475952, |
|
"eval_trivia_pairs_runtime": 6.5257, |
|
"eval_trivia_pairs_samples_per_second": 15.324, |
|
"eval_trivia_pairs_steps_per_second": 0.613, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_quora_pairs_loss": 0.03601976856589317, |
|
"eval_quora_pairs_runtime": 0.6811, |
|
"eval_quora_pairs_samples_per_second": 146.827, |
|
"eval_quora_pairs_steps_per_second": 5.873, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_gooaq_pairs_loss": 0.5626399517059326, |
|
"eval_gooaq_pairs_runtime": 1.3943, |
|
"eval_gooaq_pairs_samples_per_second": 71.72, |
|
"eval_gooaq_pairs_steps_per_second": 2.869, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3008932769158439, |
|
"eval_mrpc_pairs_loss": 0.04984402656555176, |
|
"eval_mrpc_pairs_runtime": 0.2579, |
|
"eval_mrpc_pairs_samples_per_second": 387.725, |
|
"eval_mrpc_pairs_steps_per_second": 15.509, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.30590816486444133, |
|
"grad_norm": 22.65591812133789, |
|
"learning_rate": 2.796502337505742e-05, |
|
"loss": 0.7425, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.3109230528130387, |
|
"grad_norm": 10.119640350341797, |
|
"learning_rate": 2.78346533155958e-05, |
|
"loss": 0.7819, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.3159379407616361, |
|
"grad_norm": 8.690531730651855, |
|
"learning_rate": 2.770055955400161e-05, |
|
"loss": 0.5937, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.3209528287102335, |
|
"grad_norm": 0.8992699384689331, |
|
"learning_rate": 2.7562780994732476e-05, |
|
"loss": 0.8133, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.3259677166588309, |
|
"grad_norm": 10.619684219360352, |
|
"learning_rate": 2.7421357611311824e-05, |
|
"loss": 1.0674, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.3309826046074283, |
|
"grad_norm": 7.222084045410156, |
|
"learning_rate": 2.727633043473141e-05, |
|
"loss": 0.6288, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.3359974925560257, |
|
"grad_norm": 10.166888236999512, |
|
"learning_rate": 2.712774154154707e-05, |
|
"loss": 0.5866, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 0.3410123805046231, |
|
"grad_norm": 0.36360761523246765, |
|
"learning_rate": 2.6975634041671052e-05, |
|
"loss": 0.6962, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.3460272684532205, |
|
"grad_norm": 9.586665153503418, |
|
"learning_rate": 2.6820052065864665e-05, |
|
"loss": 0.5562, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"grad_norm": 1.1307642459869385, |
|
"learning_rate": 2.6661040752934594e-05, |
|
"loss": 0.8871, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_nli-pairs_loss": 1.0147591829299927, |
|
"eval_nli-pairs_runtime": 3.7201, |
|
"eval_nli-pairs_samples_per_second": 26.881, |
|
"eval_nli-pairs_steps_per_second": 1.075, |
|
"eval_sts-test_pearson_cosine": 0.7872126529181761, |
|
"eval_sts-test_pearson_dot": 0.5062045289861089, |
|
"eval_sts-test_pearson_euclidean": 0.7351473988633473, |
|
"eval_sts-test_pearson_manhattan": 0.7310226402088944, |
|
"eval_sts-test_pearson_max": 0.7872126529181761, |
|
"eval_sts-test_spearman_cosine": 0.801487068999052, |
|
"eval_sts-test_spearman_dot": 0.4912205722904683, |
|
"eval_sts-test_spearman_euclidean": 0.7267262355024484, |
|
"eval_sts-test_spearman_manhattan": 0.72510169253649, |
|
"eval_sts-test_spearman_max": 0.801487068999052, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_vitaminc-pairs_loss": 4.644638538360596, |
|
"eval_vitaminc-pairs_runtime": 1.1453, |
|
"eval_vitaminc-pairs_samples_per_second": 74.215, |
|
"eval_vitaminc-pairs_steps_per_second": 2.619, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_sts-label_loss": 3.915343999862671, |
|
"eval_sts-label_runtime": 0.2807, |
|
"eval_sts-label_samples_per_second": 356.217, |
|
"eval_sts-label_steps_per_second": 14.249, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_qnli-contrastive_loss": 0.11220741271972656, |
|
"eval_qnli-contrastive_runtime": 0.3614, |
|
"eval_qnli-contrastive_samples_per_second": 276.705, |
|
"eval_qnli-contrastive_steps_per_second": 11.068, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_scitail-pairs-qa_loss": 0.06635177880525589, |
|
"eval_scitail-pairs-qa_runtime": 0.8881, |
|
"eval_scitail-pairs-qa_samples_per_second": 112.594, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.504, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_scitail-pairs-pos_loss": 0.5765587687492371, |
|
"eval_scitail-pairs-pos_runtime": 1.3496, |
|
"eval_scitail-pairs-pos_samples_per_second": 74.097, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.964, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_xsum-pairs_loss": 0.2595808804035187, |
|
"eval_xsum-pairs_runtime": 0.9377, |
|
"eval_xsum-pairs_samples_per_second": 106.641, |
|
"eval_xsum-pairs_steps_per_second": 4.266, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_compression-pairs_loss": 0.0918564721941948, |
|
"eval_compression-pairs_runtime": 0.2755, |
|
"eval_compression-pairs_samples_per_second": 363.032, |
|
"eval_compression-pairs_steps_per_second": 14.521, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_sciq_pairs_loss": 0.284303218126297, |
|
"eval_sciq_pairs_runtime": 4.1289, |
|
"eval_sciq_pairs_samples_per_second": 24.22, |
|
"eval_sciq_pairs_steps_per_second": 0.969, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_qasc_pairs_loss": 0.19232892990112305, |
|
"eval_qasc_pairs_runtime": 1.0709, |
|
"eval_qasc_pairs_samples_per_second": 93.384, |
|
"eval_qasc_pairs_steps_per_second": 3.735, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_openbookqa_pairs_loss": 1.6234371662139893, |
|
"eval_openbookqa_pairs_runtime": 0.9558, |
|
"eval_openbookqa_pairs_samples_per_second": 104.62, |
|
"eval_openbookqa_pairs_steps_per_second": 4.185, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_msmarco_pairs_loss": 0.5325217247009277, |
|
"eval_msmarco_pairs_runtime": 2.0971, |
|
"eval_msmarco_pairs_samples_per_second": 47.685, |
|
"eval_msmarco_pairs_steps_per_second": 1.907, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_nq_pairs_loss": 0.2721095681190491, |
|
"eval_nq_pairs_runtime": 4.5393, |
|
"eval_nq_pairs_samples_per_second": 22.03, |
|
"eval_nq_pairs_steps_per_second": 0.881, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_trivia_pairs_loss": 0.8544899821281433, |
|
"eval_trivia_pairs_runtime": 6.4668, |
|
"eval_trivia_pairs_samples_per_second": 15.464, |
|
"eval_trivia_pairs_steps_per_second": 0.619, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_quora_pairs_loss": 0.08441996574401855, |
|
"eval_quora_pairs_runtime": 0.6933, |
|
"eval_quora_pairs_samples_per_second": 144.233, |
|
"eval_quora_pairs_steps_per_second": 5.769, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_gooaq_pairs_loss": 0.5711588859558105, |
|
"eval_gooaq_pairs_runtime": 1.3941, |
|
"eval_gooaq_pairs_samples_per_second": 71.733, |
|
"eval_gooaq_pairs_steps_per_second": 2.869, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3510421564018179, |
|
"eval_mrpc_pairs_loss": 0.05093960464000702, |
|
"eval_mrpc_pairs_runtime": 0.2633, |
|
"eval_mrpc_pairs_samples_per_second": 379.777, |
|
"eval_mrpc_pairs_steps_per_second": 15.191, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3560570443504153, |
|
"grad_norm": 0.39178094267845154, |
|
"learning_rate": 2.6498646236636892e-05, |
|
"loss": 0.6805, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 0.3610719322990127, |
|
"grad_norm": 7.91475248336792, |
|
"learning_rate": 2.6332915632292237e-05, |
|
"loss": 1.0451, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.3660868202476101, |
|
"grad_norm": 31.54157066345215, |
|
"learning_rate": 2.616389702311641e-05, |
|
"loss": 1.0603, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 0.37110170819620747, |
|
"grad_norm": 8.400779724121094, |
|
"learning_rate": 2.5991639446269964e-05, |
|
"loss": 0.8142, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 0.3761165961448049, |
|
"grad_norm": 20.99441146850586, |
|
"learning_rate": 2.5816192878631166e-05, |
|
"loss": 1.7211, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3811314840934023, |
|
"grad_norm": 10.574430465698242, |
|
"learning_rate": 2.5637608222296237e-05, |
|
"loss": 0.7523, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 0.3861463720419997, |
|
"grad_norm": 0.8941424489021301, |
|
"learning_rate": 2.5455937289811207e-05, |
|
"loss": 0.8053, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 0.3911612599905971, |
|
"grad_norm": 1.9402281045913696, |
|
"learning_rate": 2.5271232789139587e-05, |
|
"loss": 0.8427, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 0.3961761479391945, |
|
"grad_norm": 23.42873764038086, |
|
"learning_rate": 2.5083548308370296e-05, |
|
"loss": 0.8204, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"grad_norm": 4.5422234535217285, |
|
"learning_rate": 2.4892938300170198e-05, |
|
"loss": 0.5343, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_nli-pairs_loss": 1.002213478088379, |
|
"eval_nli-pairs_runtime": 3.8843, |
|
"eval_nli-pairs_samples_per_second": 25.745, |
|
"eval_nli-pairs_steps_per_second": 1.03, |
|
"eval_sts-test_pearson_cosine": 0.7872537557423719, |
|
"eval_sts-test_pearson_dot": 0.5372668921721468, |
|
"eval_sts-test_pearson_euclidean": 0.7383744840101544, |
|
"eval_sts-test_pearson_manhattan": 0.7333039162515002, |
|
"eval_sts-test_pearson_max": 0.7872537557423719, |
|
"eval_sts-test_spearman_cosine": 0.8038647026605977, |
|
"eval_sts-test_spearman_dot": 0.5191465873751544, |
|
"eval_sts-test_spearman_euclidean": 0.730034619048548, |
|
"eval_sts-test_spearman_manhattan": 0.7277569753761504, |
|
"eval_sts-test_spearman_max": 0.8038647026605977, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_vitaminc-pairs_loss": 4.723379135131836, |
|
"eval_vitaminc-pairs_runtime": 1.3031, |
|
"eval_vitaminc-pairs_samples_per_second": 65.23, |
|
"eval_vitaminc-pairs_steps_per_second": 2.302, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_sts-label_loss": 3.8185579776763916, |
|
"eval_sts-label_runtime": 0.4182, |
|
"eval_sts-label_samples_per_second": 239.094, |
|
"eval_sts-label_steps_per_second": 9.564, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_qnli-contrastive_loss": 0.15084019303321838, |
|
"eval_qnli-contrastive_runtime": 0.3638, |
|
"eval_qnli-contrastive_samples_per_second": 274.906, |
|
"eval_qnli-contrastive_steps_per_second": 10.996, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_scitail-pairs-qa_loss": 0.06741151213645935, |
|
"eval_scitail-pairs-qa_runtime": 0.9458, |
|
"eval_scitail-pairs-qa_samples_per_second": 105.735, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.229, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_scitail-pairs-pos_loss": 0.47680819034576416, |
|
"eval_scitail-pairs-pos_runtime": 1.4736, |
|
"eval_scitail-pairs-pos_samples_per_second": 67.859, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.714, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_xsum-pairs_loss": 0.2572269141674042, |
|
"eval_xsum-pairs_runtime": 0.9448, |
|
"eval_xsum-pairs_samples_per_second": 105.847, |
|
"eval_xsum-pairs_steps_per_second": 4.234, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_compression-pairs_loss": 0.09604756534099579, |
|
"eval_compression-pairs_runtime": 0.2774, |
|
"eval_compression-pairs_samples_per_second": 360.554, |
|
"eval_compression-pairs_steps_per_second": 14.422, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_sciq_pairs_loss": 0.2735004425048828, |
|
"eval_sciq_pairs_runtime": 4.2103, |
|
"eval_sciq_pairs_samples_per_second": 23.751, |
|
"eval_sciq_pairs_steps_per_second": 0.95, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_qasc_pairs_loss": 0.1924300342798233, |
|
"eval_qasc_pairs_runtime": 1.1352, |
|
"eval_qasc_pairs_samples_per_second": 88.089, |
|
"eval_qasc_pairs_steps_per_second": 3.524, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_openbookqa_pairs_loss": 1.6290359497070312, |
|
"eval_openbookqa_pairs_runtime": 0.9392, |
|
"eval_openbookqa_pairs_samples_per_second": 106.476, |
|
"eval_openbookqa_pairs_steps_per_second": 4.259, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_msmarco_pairs_loss": 0.518312931060791, |
|
"eval_msmarco_pairs_runtime": 2.121, |
|
"eval_msmarco_pairs_samples_per_second": 47.147, |
|
"eval_msmarco_pairs_steps_per_second": 1.886, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_nq_pairs_loss": 0.3077375292778015, |
|
"eval_nq_pairs_runtime": 4.6617, |
|
"eval_nq_pairs_samples_per_second": 21.451, |
|
"eval_nq_pairs_steps_per_second": 0.858, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_trivia_pairs_loss": 0.8588294386863708, |
|
"eval_trivia_pairs_runtime": 6.6293, |
|
"eval_trivia_pairs_samples_per_second": 15.085, |
|
"eval_trivia_pairs_steps_per_second": 0.603, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_quora_pairs_loss": 0.07980062067508698, |
|
"eval_quora_pairs_runtime": 0.7261, |
|
"eval_quora_pairs_samples_per_second": 137.72, |
|
"eval_quora_pairs_steps_per_second": 5.509, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_gooaq_pairs_loss": 0.6570906043052673, |
|
"eval_gooaq_pairs_runtime": 1.5071, |
|
"eval_gooaq_pairs_samples_per_second": 66.352, |
|
"eval_gooaq_pairs_steps_per_second": 2.654, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40119103588779187, |
|
"eval_mrpc_pairs_loss": 0.051231566816568375, |
|
"eval_mrpc_pairs_runtime": 0.2799, |
|
"eval_mrpc_pairs_samples_per_second": 357.322, |
|
"eval_mrpc_pairs_steps_per_second": 14.293, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.40620592383638926, |
|
"grad_norm": 37.2639045715332, |
|
"learning_rate": 2.4699458065985813e-05, |
|
"loss": 0.9709, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 0.4112208117849867, |
|
"grad_norm": 15.363207817077637, |
|
"learning_rate": 2.45031637399988e-05, |
|
"loss": 0.708, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 0.4162356997335841, |
|
"grad_norm": 1.8831324577331543, |
|
"learning_rate": 2.430411227283978e-05, |
|
"loss": 0.4083, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 0.4212505876821815, |
|
"grad_norm": 5.664551734924316, |
|
"learning_rate": 2.4102361415065367e-05, |
|
"loss": 0.8732, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.4262654756307789, |
|
"grad_norm": 0.615675151348114, |
|
"learning_rate": 2.3897969700403022e-05, |
|
"loss": 1.2616, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.43128036357937627, |
|
"grad_norm": 19.81829261779785, |
|
"learning_rate": 2.3690996428768772e-05, |
|
"loss": 1.3324, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 0.43629525152797366, |
|
"grad_norm": 6.3363118171691895, |
|
"learning_rate": 2.348150164906257e-05, |
|
"loss": 0.6244, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 0.44131013947657105, |
|
"grad_norm": 1.103615641593933, |
|
"learning_rate": 2.3269546141746407e-05, |
|
"loss": 0.6176, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.44632502742516844, |
|
"grad_norm": 11.468894004821777, |
|
"learning_rate": 2.3055191401210126e-05, |
|
"loss": 0.6926, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"grad_norm": 4.0951619148254395, |
|
"learning_rate": 2.283849961793017e-05, |
|
"loss": 0.8158, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_nli-pairs_loss": 1.2103344202041626, |
|
"eval_nli-pairs_runtime": 3.656, |
|
"eval_nli-pairs_samples_per_second": 27.353, |
|
"eval_nli-pairs_steps_per_second": 1.094, |
|
"eval_sts-test_pearson_cosine": 0.7884135608823999, |
|
"eval_sts-test_pearson_dot": 0.5043809957478502, |
|
"eval_sts-test_pearson_euclidean": 0.73325296875941, |
|
"eval_sts-test_pearson_manhattan": 0.7274442771815695, |
|
"eval_sts-test_pearson_max": 0.7884135608823999, |
|
"eval_sts-test_spearman_cosine": 0.8024151272859597, |
|
"eval_sts-test_spearman_dot": 0.4849613226687463, |
|
"eval_sts-test_spearman_euclidean": 0.7267107319000072, |
|
"eval_sts-test_spearman_manhattan": 0.7238097600272174, |
|
"eval_sts-test_spearman_max": 0.8024151272859597, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_vitaminc-pairs_loss": 4.7560882568359375, |
|
"eval_vitaminc-pairs_runtime": 1.1898, |
|
"eval_vitaminc-pairs_samples_per_second": 71.438, |
|
"eval_vitaminc-pairs_steps_per_second": 2.521, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_sts-label_loss": 3.4280478954315186, |
|
"eval_sts-label_runtime": 0.2879, |
|
"eval_sts-label_samples_per_second": 347.303, |
|
"eval_sts-label_steps_per_second": 13.892, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_qnli-contrastive_loss": 0.1333482712507248, |
|
"eval_qnli-contrastive_runtime": 0.3658, |
|
"eval_qnli-contrastive_samples_per_second": 273.37, |
|
"eval_qnli-contrastive_steps_per_second": 10.935, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_scitail-pairs-qa_loss": 0.0703386664390564, |
|
"eval_scitail-pairs-qa_runtime": 0.8879, |
|
"eval_scitail-pairs-qa_samples_per_second": 112.63, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.505, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_scitail-pairs-pos_loss": 0.4763020873069763, |
|
"eval_scitail-pairs-pos_runtime": 1.3239, |
|
"eval_scitail-pairs-pos_samples_per_second": 75.532, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.021, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_xsum-pairs_loss": 0.25743284821510315, |
|
"eval_xsum-pairs_runtime": 0.9333, |
|
"eval_xsum-pairs_samples_per_second": 107.15, |
|
"eval_xsum-pairs_steps_per_second": 4.286, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_compression-pairs_loss": 0.09842805564403534, |
|
"eval_compression-pairs_runtime": 0.2944, |
|
"eval_compression-pairs_samples_per_second": 339.674, |
|
"eval_compression-pairs_steps_per_second": 13.587, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_sciq_pairs_loss": 0.28244778513908386, |
|
"eval_sciq_pairs_runtime": 4.0785, |
|
"eval_sciq_pairs_samples_per_second": 24.519, |
|
"eval_sciq_pairs_steps_per_second": 0.981, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_qasc_pairs_loss": 0.18051397800445557, |
|
"eval_qasc_pairs_runtime": 1.0561, |
|
"eval_qasc_pairs_samples_per_second": 94.69, |
|
"eval_qasc_pairs_steps_per_second": 3.788, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_openbookqa_pairs_loss": 1.5708725452423096, |
|
"eval_openbookqa_pairs_runtime": 0.9072, |
|
"eval_openbookqa_pairs_samples_per_second": 110.229, |
|
"eval_openbookqa_pairs_steps_per_second": 4.409, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_msmarco_pairs_loss": 0.5720314979553223, |
|
"eval_msmarco_pairs_runtime": 2.0694, |
|
"eval_msmarco_pairs_samples_per_second": 48.322, |
|
"eval_msmarco_pairs_steps_per_second": 1.933, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_nq_pairs_loss": 0.2748319208621979, |
|
"eval_nq_pairs_runtime": 4.5496, |
|
"eval_nq_pairs_samples_per_second": 21.98, |
|
"eval_nq_pairs_steps_per_second": 0.879, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_trivia_pairs_loss": 0.8936847448348999, |
|
"eval_trivia_pairs_runtime": 6.4784, |
|
"eval_trivia_pairs_samples_per_second": 15.436, |
|
"eval_trivia_pairs_steps_per_second": 0.617, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_quora_pairs_loss": 0.07990340888500214, |
|
"eval_quora_pairs_runtime": 0.6852, |
|
"eval_quora_pairs_samples_per_second": 145.945, |
|
"eval_quora_pairs_steps_per_second": 5.838, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_gooaq_pairs_loss": 0.6210995316505432, |
|
"eval_gooaq_pairs_runtime": 1.4234, |
|
"eval_gooaq_pairs_samples_per_second": 70.255, |
|
"eval_gooaq_pairs_steps_per_second": 2.81, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4513399153737659, |
|
"eval_mrpc_pairs_loss": 0.053870730102062225, |
|
"eval_mrpc_pairs_runtime": 0.2678, |
|
"eval_mrpc_pairs_samples_per_second": 373.436, |
|
"eval_mrpc_pairs_steps_per_second": 14.937, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4563548033223633, |
|
"grad_norm": 0.5031663775444031, |
|
"learning_rate": 2.261953366042628e-05, |
|
"loss": 1.4753, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 0.46136969127096067, |
|
"grad_norm": 3.3404605388641357, |
|
"learning_rate": 2.239835705702158e-05, |
|
"loss": 0.5735, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 0.46638457921955806, |
|
"grad_norm": 14.60761547088623, |
|
"learning_rate": 2.217503397741115e-05, |
|
"loss": 1.2261, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.47139946716815545, |
|
"grad_norm": 0.7826951146125793, |
|
"learning_rate": 2.194962921404456e-05, |
|
"loss": 0.6085, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 0.47641435511675284, |
|
"grad_norm": 5.523419380187988, |
|
"learning_rate": 2.1722208163327738e-05, |
|
"loss": 0.8766, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.48142924306535023, |
|
"grad_norm": 1.2507153749465942, |
|
"learning_rate": 2.1492836806649564e-05, |
|
"loss": 1.1824, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.4864441310139477, |
|
"grad_norm": 10.76526165008545, |
|
"learning_rate": 2.1261581691238775e-05, |
|
"loss": 0.7192, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 0.49145901896254507, |
|
"grad_norm": 2.5375277996063232, |
|
"learning_rate": 2.1028509910856705e-05, |
|
"loss": 0.6131, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.49647390691114246, |
|
"grad_norm": 6.569655418395996, |
|
"learning_rate": 2.0793689086331472e-05, |
|
"loss": 0.7407, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"grad_norm": 0.42745527625083923, |
|
"learning_rate": 2.055718734593919e-05, |
|
"loss": 0.5857, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_nli-pairs_loss": 1.1431602239608765, |
|
"eval_nli-pairs_runtime": 3.6407, |
|
"eval_nli-pairs_samples_per_second": 27.467, |
|
"eval_nli-pairs_steps_per_second": 1.099, |
|
"eval_sts-test_pearson_cosine": 0.7838341260331343, |
|
"eval_sts-test_pearson_dot": 0.5274891201747137, |
|
"eval_sts-test_pearson_euclidean": 0.734987175544037, |
|
"eval_sts-test_pearson_manhattan": 0.7296263541205231, |
|
"eval_sts-test_pearson_max": 0.7838341260331343, |
|
"eval_sts-test_spearman_cosine": 0.8013224760849562, |
|
"eval_sts-test_spearman_dot": 0.5061225327907017, |
|
"eval_sts-test_spearman_euclidean": 0.7282525362996873, |
|
"eval_sts-test_spearman_manhattan": 0.7265322068183514, |
|
"eval_sts-test_spearman_max": 0.8013224760849562, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_vitaminc-pairs_loss": 4.748112201690674, |
|
"eval_vitaminc-pairs_runtime": 1.1378, |
|
"eval_vitaminc-pairs_samples_per_second": 74.706, |
|
"eval_vitaminc-pairs_steps_per_second": 2.637, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_sts-label_loss": 3.9402565956115723, |
|
"eval_sts-label_runtime": 0.2789, |
|
"eval_sts-label_samples_per_second": 358.596, |
|
"eval_sts-label_steps_per_second": 14.344, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_qnli-contrastive_loss": 0.10341227799654007, |
|
"eval_qnli-contrastive_runtime": 0.3605, |
|
"eval_qnli-contrastive_samples_per_second": 277.417, |
|
"eval_qnli-contrastive_steps_per_second": 11.097, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_scitail-pairs-qa_loss": 0.06673895567655563, |
|
"eval_scitail-pairs-qa_runtime": 0.8765, |
|
"eval_scitail-pairs-qa_samples_per_second": 114.092, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.564, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_scitail-pairs-pos_loss": 0.510690450668335, |
|
"eval_scitail-pairs-pos_runtime": 1.3274, |
|
"eval_scitail-pairs-pos_samples_per_second": 75.334, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.013, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_xsum-pairs_loss": 0.26573723554611206, |
|
"eval_xsum-pairs_runtime": 0.9342, |
|
"eval_xsum-pairs_samples_per_second": 107.047, |
|
"eval_xsum-pairs_steps_per_second": 4.282, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_compression-pairs_loss": 0.09096826612949371, |
|
"eval_compression-pairs_runtime": 0.2779, |
|
"eval_compression-pairs_samples_per_second": 359.804, |
|
"eval_compression-pairs_steps_per_second": 14.392, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_sciq_pairs_loss": 0.30787500739097595, |
|
"eval_sciq_pairs_runtime": 4.1007, |
|
"eval_sciq_pairs_samples_per_second": 24.386, |
|
"eval_sciq_pairs_steps_per_second": 0.975, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_qasc_pairs_loss": 0.1825849413871765, |
|
"eval_qasc_pairs_runtime": 1.0526, |
|
"eval_qasc_pairs_samples_per_second": 94.998, |
|
"eval_qasc_pairs_steps_per_second": 3.8, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_openbookqa_pairs_loss": 1.5945305824279785, |
|
"eval_openbookqa_pairs_runtime": 0.8948, |
|
"eval_openbookqa_pairs_samples_per_second": 111.759, |
|
"eval_openbookqa_pairs_steps_per_second": 4.47, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_msmarco_pairs_loss": 0.5864604711532593, |
|
"eval_msmarco_pairs_runtime": 2.0556, |
|
"eval_msmarco_pairs_samples_per_second": 48.646, |
|
"eval_msmarco_pairs_steps_per_second": 1.946, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_nq_pairs_loss": 0.2538978159427643, |
|
"eval_nq_pairs_runtime": 4.5409, |
|
"eval_nq_pairs_samples_per_second": 22.022, |
|
"eval_nq_pairs_steps_per_second": 0.881, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_trivia_pairs_loss": 0.8825237154960632, |
|
"eval_trivia_pairs_runtime": 6.4701, |
|
"eval_trivia_pairs_samples_per_second": 15.456, |
|
"eval_trivia_pairs_steps_per_second": 0.618, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_quora_pairs_loss": 0.06264814734458923, |
|
"eval_quora_pairs_runtime": 0.6792, |
|
"eval_quora_pairs_samples_per_second": 147.238, |
|
"eval_quora_pairs_steps_per_second": 5.89, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_gooaq_pairs_loss": 0.5953384041786194, |
|
"eval_gooaq_pairs_runtime": 1.4186, |
|
"eval_gooaq_pairs_samples_per_second": 70.49, |
|
"eval_gooaq_pairs_steps_per_second": 2.82, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5014887948597399, |
|
"eval_mrpc_pairs_loss": 0.05028616264462471, |
|
"eval_mrpc_pairs_runtime": 0.2664, |
|
"eval_mrpc_pairs_samples_per_second": 375.444, |
|
"eval_mrpc_pairs_steps_per_second": 15.018, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5065036828083372, |
|
"grad_norm": 17.477581024169922, |
|
"learning_rate": 2.0319073305638035e-05, |
|
"loss": 0.6212, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 0.5115185707569346, |
|
"grad_norm": 15.705268859863281, |
|
"learning_rate": 2.0079416049160762e-05, |
|
"loss": 1.1408, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 0.516533458705532, |
|
"grad_norm": 15.518088340759277, |
|
"learning_rate": 1.983828510797154e-05, |
|
"loss": 0.6898, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 0.5215483466541294, |
|
"grad_norm": 18.28449058532715, |
|
"learning_rate": 1.9595750441092844e-05, |
|
"loss": 0.9827, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.5265632346027268, |
|
"grad_norm": 11.187614440917969, |
|
"learning_rate": 1.935188241480837e-05, |
|
"loss": 0.9518, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.5315781225513242, |
|
"grad_norm": 24.515199661254883, |
|
"learning_rate": 1.910675178224773e-05, |
|
"loss": 0.5584, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 0.5365930104999217, |
|
"grad_norm": 21.595224380493164, |
|
"learning_rate": 1.886042966285894e-05, |
|
"loss": 1.3362, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 0.5416078984485191, |
|
"grad_norm": 14.934494972229004, |
|
"learning_rate": 1.8612987521774603e-05, |
|
"loss": 0.4418, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 0.5466227863971165, |
|
"grad_norm": 1.0222537517547607, |
|
"learning_rate": 1.836449714907785e-05, |
|
"loss": 0.5896, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"grad_norm": 13.705151557922363, |
|
"learning_rate": 1.811503063897396e-05, |
|
"loss": 0.7951, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_nli-pairs_loss": 1.0016616582870483, |
|
"eval_nli-pairs_runtime": 3.6365, |
|
"eval_nli-pairs_samples_per_second": 27.499, |
|
"eval_nli-pairs_steps_per_second": 1.1, |
|
"eval_sts-test_pearson_cosine": 0.783269156461013, |
|
"eval_sts-test_pearson_dot": 0.5146760761775918, |
|
"eval_sts-test_pearson_euclidean": 0.7293244171224789, |
|
"eval_sts-test_pearson_manhattan": 0.722566066058283, |
|
"eval_sts-test_pearson_max": 0.783269156461013, |
|
"eval_sts-test_spearman_cosine": 0.800346163751739, |
|
"eval_sts-test_spearman_dot": 0.49134463318009686, |
|
"eval_sts-test_spearman_euclidean": 0.7220780456605193, |
|
"eval_sts-test_spearman_manhattan": 0.7185570530657137, |
|
"eval_sts-test_spearman_max": 0.800346163751739, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_vitaminc-pairs_loss": 4.628457546234131, |
|
"eval_vitaminc-pairs_runtime": 1.1358, |
|
"eval_vitaminc-pairs_samples_per_second": 74.837, |
|
"eval_vitaminc-pairs_steps_per_second": 2.641, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_sts-label_loss": 3.698469877243042, |
|
"eval_sts-label_runtime": 0.2763, |
|
"eval_sts-label_samples_per_second": 361.871, |
|
"eval_sts-label_steps_per_second": 14.475, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_qnli-contrastive_loss": 0.11857427656650543, |
|
"eval_qnli-contrastive_runtime": 0.3599, |
|
"eval_qnli-contrastive_samples_per_second": 277.865, |
|
"eval_qnli-contrastive_steps_per_second": 11.115, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_scitail-pairs-qa_loss": 0.06011494621634483, |
|
"eval_scitail-pairs-qa_runtime": 0.8855, |
|
"eval_scitail-pairs-qa_samples_per_second": 112.93, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.517, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_scitail-pairs-pos_loss": 0.5179685950279236, |
|
"eval_scitail-pairs-pos_runtime": 1.3428, |
|
"eval_scitail-pairs-pos_samples_per_second": 74.469, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.979, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_xsum-pairs_loss": 0.2575337886810303, |
|
"eval_xsum-pairs_runtime": 0.9362, |
|
"eval_xsum-pairs_samples_per_second": 106.81, |
|
"eval_xsum-pairs_steps_per_second": 4.272, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_compression-pairs_loss": 0.08986295014619827, |
|
"eval_compression-pairs_runtime": 0.2735, |
|
"eval_compression-pairs_samples_per_second": 365.659, |
|
"eval_compression-pairs_steps_per_second": 14.626, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_sciq_pairs_loss": 0.2898155748844147, |
|
"eval_sciq_pairs_runtime": 4.1009, |
|
"eval_sciq_pairs_samples_per_second": 24.385, |
|
"eval_sciq_pairs_steps_per_second": 0.975, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_qasc_pairs_loss": 0.1790761798620224, |
|
"eval_qasc_pairs_runtime": 1.0559, |
|
"eval_qasc_pairs_samples_per_second": 94.702, |
|
"eval_qasc_pairs_steps_per_second": 3.788, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_openbookqa_pairs_loss": 1.6558103561401367, |
|
"eval_openbookqa_pairs_runtime": 0.8846, |
|
"eval_openbookqa_pairs_samples_per_second": 113.048, |
|
"eval_openbookqa_pairs_steps_per_second": 4.522, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_msmarco_pairs_loss": 0.5547183156013489, |
|
"eval_msmarco_pairs_runtime": 2.0592, |
|
"eval_msmarco_pairs_samples_per_second": 48.563, |
|
"eval_msmarco_pairs_steps_per_second": 1.943, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_nq_pairs_loss": 0.24799224734306335, |
|
"eval_nq_pairs_runtime": 4.5115, |
|
"eval_nq_pairs_samples_per_second": 22.166, |
|
"eval_nq_pairs_steps_per_second": 0.887, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_trivia_pairs_loss": 0.9036693572998047, |
|
"eval_trivia_pairs_runtime": 6.5286, |
|
"eval_trivia_pairs_samples_per_second": 15.317, |
|
"eval_trivia_pairs_steps_per_second": 0.613, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_quora_pairs_loss": 0.05727443844079971, |
|
"eval_quora_pairs_runtime": 0.6763, |
|
"eval_quora_pairs_samples_per_second": 147.873, |
|
"eval_quora_pairs_steps_per_second": 5.915, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_gooaq_pairs_loss": 0.5602415800094604, |
|
"eval_gooaq_pairs_runtime": 1.4132, |
|
"eval_gooaq_pairs_samples_per_second": 70.759, |
|
"eval_gooaq_pairs_steps_per_second": 2.83, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5516376743457139, |
|
"eval_mrpc_pairs_loss": 0.04762456938624382, |
|
"eval_mrpc_pairs_runtime": 0.2648, |
|
"eval_mrpc_pairs_samples_per_second": 377.632, |
|
"eval_mrpc_pairs_steps_per_second": 15.105, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5566525622943113, |
|
"grad_norm": 0.39285340905189514, |
|
"learning_rate": 1.7864660368873747e-05, |
|
"loss": 0.5201, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 0.5616674502429087, |
|
"grad_norm": 16.01999855041504, |
|
"learning_rate": 1.7613458978394786e-05, |
|
"loss": 0.6351, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.566682338191506, |
|
"grad_norm": 0.5487422347068787, |
|
"learning_rate": 1.7361499348286606e-05, |
|
"loss": 0.8652, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 0.5716972261401034, |
|
"grad_norm": 0.9249119758605957, |
|
"learning_rate": 1.710885457928585e-05, |
|
"loss": 0.6407, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 0.5767121140887008, |
|
"grad_norm": 6.578505992889404, |
|
"learning_rate": 1.6855597970907664e-05, |
|
"loss": 0.9435, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.5817270020372982, |
|
"grad_norm": 14.307022094726562, |
|
"learning_rate": 1.6601803000179394e-05, |
|
"loss": 0.9295, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 0.5867418899858956, |
|
"grad_norm": 16.091779708862305, |
|
"learning_rate": 1.6347543300322795e-05, |
|
"loss": 0.6829, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 0.591756777934493, |
|
"grad_norm": 29.058805465698242, |
|
"learning_rate": 1.6092892639390916e-05, |
|
"loss": 0.8683, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 0.5967716658830904, |
|
"grad_norm": 13.12238597869873, |
|
"learning_rate": 1.583792489886586e-05, |
|
"loss": 1.115, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"grad_norm": 11.606388092041016, |
|
"learning_rate": 1.558271405222362e-05, |
|
"loss": 1.0936, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_nli-pairs_loss": 0.9106074571609497, |
|
"eval_nli-pairs_runtime": 3.9467, |
|
"eval_nli-pairs_samples_per_second": 25.337, |
|
"eval_nli-pairs_steps_per_second": 1.013, |
|
"eval_sts-test_pearson_cosine": 0.7831915073063493, |
|
"eval_sts-test_pearson_dot": 0.51712727721244, |
|
"eval_sts-test_pearson_euclidean": 0.7355201142492419, |
|
"eval_sts-test_pearson_manhattan": 0.7299910115321456, |
|
"eval_sts-test_pearson_max": 0.7831915073063493, |
|
"eval_sts-test_spearman_cosine": 0.8005432620025132, |
|
"eval_sts-test_spearman_dot": 0.49466719400094655, |
|
"eval_sts-test_spearman_euclidean": 0.7273424991180402, |
|
"eval_sts-test_spearman_manhattan": 0.7249394934262583, |
|
"eval_sts-test_spearman_max": 0.8005432620025132, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_vitaminc-pairs_loss": 4.7559494972229, |
|
"eval_vitaminc-pairs_runtime": 1.1844, |
|
"eval_vitaminc-pairs_samples_per_second": 71.768, |
|
"eval_vitaminc-pairs_steps_per_second": 2.533, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_sts-label_loss": 3.46917724609375, |
|
"eval_sts-label_runtime": 0.3003, |
|
"eval_sts-label_samples_per_second": 333.048, |
|
"eval_sts-label_steps_per_second": 13.322, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_qnli-contrastive_loss": 0.13890141248703003, |
|
"eval_qnli-contrastive_runtime": 0.3729, |
|
"eval_qnli-contrastive_samples_per_second": 268.18, |
|
"eval_qnli-contrastive_steps_per_second": 10.727, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_scitail-pairs-qa_loss": 0.0611240416765213, |
|
"eval_scitail-pairs-qa_runtime": 0.9367, |
|
"eval_scitail-pairs-qa_samples_per_second": 106.755, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.27, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_scitail-pairs-pos_loss": 0.46203696727752686, |
|
"eval_scitail-pairs-pos_runtime": 1.4874, |
|
"eval_scitail-pairs-pos_samples_per_second": 67.232, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.689, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_xsum-pairs_loss": 0.24919259548187256, |
|
"eval_xsum-pairs_runtime": 0.9576, |
|
"eval_xsum-pairs_samples_per_second": 104.427, |
|
"eval_xsum-pairs_steps_per_second": 4.177, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_compression-pairs_loss": 0.08809012174606323, |
|
"eval_compression-pairs_runtime": 0.298, |
|
"eval_compression-pairs_samples_per_second": 335.567, |
|
"eval_compression-pairs_steps_per_second": 13.423, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_sciq_pairs_loss": 0.28287386894226074, |
|
"eval_sciq_pairs_runtime": 4.2668, |
|
"eval_sciq_pairs_samples_per_second": 23.437, |
|
"eval_sciq_pairs_steps_per_second": 0.937, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_qasc_pairs_loss": 0.1861308217048645, |
|
"eval_qasc_pairs_runtime": 1.0488, |
|
"eval_qasc_pairs_samples_per_second": 95.351, |
|
"eval_qasc_pairs_steps_per_second": 3.814, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_openbookqa_pairs_loss": 1.600982666015625, |
|
"eval_openbookqa_pairs_runtime": 0.9077, |
|
"eval_openbookqa_pairs_samples_per_second": 110.17, |
|
"eval_openbookqa_pairs_steps_per_second": 4.407, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_msmarco_pairs_loss": 0.5555463433265686, |
|
"eval_msmarco_pairs_runtime": 2.1064, |
|
"eval_msmarco_pairs_samples_per_second": 47.474, |
|
"eval_msmarco_pairs_steps_per_second": 1.899, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_nq_pairs_loss": 0.23241031169891357, |
|
"eval_nq_pairs_runtime": 4.6119, |
|
"eval_nq_pairs_samples_per_second": 21.683, |
|
"eval_nq_pairs_steps_per_second": 0.867, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_trivia_pairs_loss": 0.7936394214630127, |
|
"eval_trivia_pairs_runtime": 6.6242, |
|
"eval_trivia_pairs_samples_per_second": 15.096, |
|
"eval_trivia_pairs_steps_per_second": 0.604, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_quora_pairs_loss": 0.05936668440699577, |
|
"eval_quora_pairs_runtime": 0.7463, |
|
"eval_quora_pairs_samples_per_second": 133.994, |
|
"eval_quora_pairs_steps_per_second": 5.36, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_gooaq_pairs_loss": 0.5735708475112915, |
|
"eval_gooaq_pairs_runtime": 1.4747, |
|
"eval_gooaq_pairs_samples_per_second": 67.809, |
|
"eval_gooaq_pairs_steps_per_second": 2.712, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6017865538316878, |
|
"eval_mrpc_pairs_loss": 0.046309370547533035, |
|
"eval_mrpc_pairs_runtime": 0.2694, |
|
"eval_mrpc_pairs_samples_per_second": 371.218, |
|
"eval_mrpc_pairs_steps_per_second": 14.849, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6068014417802852, |
|
"grad_norm": 6.513147830963135, |
|
"learning_rate": 1.53273341434723e-05, |
|
"loss": 0.8689, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 0.6118163297288827, |
|
"grad_norm": 0.2349071353673935, |
|
"learning_rate": 1.5071859265669756e-05, |
|
"loss": 0.8692, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 0.6168312176774801, |
|
"grad_norm": 18.028608322143555, |
|
"learning_rate": 1.4816363539427118e-05, |
|
"loss": 0.9083, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 0.6218461056260774, |
|
"grad_norm": 17.381690979003906, |
|
"learning_rate": 1.456092109140423e-05, |
|
"loss": 1.0782, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.6268609935746748, |
|
"grad_norm": 20.72548484802246, |
|
"learning_rate": 1.4305606032803418e-05, |
|
"loss": 0.7711, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6318758815232722, |
|
"grad_norm": 28.311264038085938, |
|
"learning_rate": 1.4050492437867641e-05, |
|
"loss": 1.0005, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 0.6368907694718696, |
|
"grad_norm": 14.892809867858887, |
|
"learning_rate": 1.3795654322389481e-05, |
|
"loss": 0.7229, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 0.641905657420467, |
|
"grad_norm": 18.567630767822266, |
|
"learning_rate": 1.3541165622236977e-05, |
|
"loss": 0.4871, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.6469205453690644, |
|
"grad_norm": 8.814851760864258, |
|
"learning_rate": 1.3287100171902759e-05, |
|
"loss": 0.7853, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"grad_norm": 19.43486785888672, |
|
"learning_rate": 1.3033531683082495e-05, |
|
"loss": 0.9271, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_nli-pairs_loss": 0.8979966640472412, |
|
"eval_nli-pairs_runtime": 3.6341, |
|
"eval_nli-pairs_samples_per_second": 27.517, |
|
"eval_nli-pairs_steps_per_second": 1.101, |
|
"eval_sts-test_pearson_cosine": 0.786081877366483, |
|
"eval_sts-test_pearson_dot": 0.5354100918466089, |
|
"eval_sts-test_pearson_euclidean": 0.7368659505908834, |
|
"eval_sts-test_pearson_manhattan": 0.7310042183211231, |
|
"eval_sts-test_pearson_max": 0.786081877366483, |
|
"eval_sts-test_spearman_cosine": 0.8043456052578905, |
|
"eval_sts-test_spearman_dot": 0.5150264179790126, |
|
"eval_sts-test_spearman_euclidean": 0.7297811553069841, |
|
"eval_sts-test_spearman_manhattan": 0.7264172194761916, |
|
"eval_sts-test_spearman_max": 0.8043456052578905, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_vitaminc-pairs_loss": 4.720225811004639, |
|
"eval_vitaminc-pairs_runtime": 1.1487, |
|
"eval_vitaminc-pairs_samples_per_second": 73.995, |
|
"eval_vitaminc-pairs_steps_per_second": 2.612, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_sts-label_loss": 3.9553511142730713, |
|
"eval_sts-label_runtime": 0.2732, |
|
"eval_sts-label_samples_per_second": 366.049, |
|
"eval_sts-label_steps_per_second": 14.642, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_qnli-contrastive_loss": 0.14256399869918823, |
|
"eval_qnli-contrastive_runtime": 0.3558, |
|
"eval_qnli-contrastive_samples_per_second": 281.03, |
|
"eval_qnli-contrastive_steps_per_second": 11.241, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_scitail-pairs-qa_loss": 0.06135182082653046, |
|
"eval_scitail-pairs-qa_runtime": 0.8797, |
|
"eval_scitail-pairs-qa_samples_per_second": 113.67, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.547, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_scitail-pairs-pos_loss": 0.42590686678886414, |
|
"eval_scitail-pairs-pos_runtime": 1.3288, |
|
"eval_scitail-pairs-pos_samples_per_second": 75.254, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.01, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_xsum-pairs_loss": 0.2564789056777954, |
|
"eval_xsum-pairs_runtime": 0.9345, |
|
"eval_xsum-pairs_samples_per_second": 107.011, |
|
"eval_xsum-pairs_steps_per_second": 4.28, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_compression-pairs_loss": 0.08838170021772385, |
|
"eval_compression-pairs_runtime": 0.2761, |
|
"eval_compression-pairs_samples_per_second": 362.144, |
|
"eval_compression-pairs_steps_per_second": 14.486, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_sciq_pairs_loss": 0.2946786880493164, |
|
"eval_sciq_pairs_runtime": 4.076, |
|
"eval_sciq_pairs_samples_per_second": 24.534, |
|
"eval_sciq_pairs_steps_per_second": 0.981, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_qasc_pairs_loss": 0.17502914369106293, |
|
"eval_qasc_pairs_runtime": 1.0723, |
|
"eval_qasc_pairs_samples_per_second": 93.259, |
|
"eval_qasc_pairs_steps_per_second": 3.73, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_openbookqa_pairs_loss": 1.5555152893066406, |
|
"eval_openbookqa_pairs_runtime": 0.8973, |
|
"eval_openbookqa_pairs_samples_per_second": 111.451, |
|
"eval_openbookqa_pairs_steps_per_second": 4.458, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_msmarco_pairs_loss": 0.5041812062263489, |
|
"eval_msmarco_pairs_runtime": 2.0593, |
|
"eval_msmarco_pairs_samples_per_second": 48.56, |
|
"eval_msmarco_pairs_steps_per_second": 1.942, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_nq_pairs_loss": 0.24564537405967712, |
|
"eval_nq_pairs_runtime": 4.527, |
|
"eval_nq_pairs_samples_per_second": 22.09, |
|
"eval_nq_pairs_steps_per_second": 0.884, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_trivia_pairs_loss": 0.8565467000007629, |
|
"eval_trivia_pairs_runtime": 6.4751, |
|
"eval_trivia_pairs_samples_per_second": 15.444, |
|
"eval_trivia_pairs_steps_per_second": 0.618, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_quora_pairs_loss": 0.052645713090896606, |
|
"eval_quora_pairs_runtime": 0.6803, |
|
"eval_quora_pairs_samples_per_second": 146.985, |
|
"eval_quora_pairs_steps_per_second": 5.879, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_gooaq_pairs_loss": 0.5815556645393372, |
|
"eval_gooaq_pairs_runtime": 1.3985, |
|
"eval_gooaq_pairs_samples_per_second": 71.504, |
|
"eval_gooaq_pairs_steps_per_second": 2.86, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6519354333176618, |
|
"eval_mrpc_pairs_loss": 0.047052089124917984, |
|
"eval_mrpc_pairs_runtime": 0.2602, |
|
"eval_mrpc_pairs_samples_per_second": 384.349, |
|
"eval_mrpc_pairs_steps_per_second": 15.374, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6569503212662592, |
|
"grad_norm": 21.91355323791504, |
|
"learning_rate": 1.2780533723289014e-05, |
|
"loss": 0.5223, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 0.6619652092148566, |
|
"grad_norm": 9.792081832885742, |
|
"learning_rate": 1.2528179694508286e-05, |
|
"loss": 1.0498, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.666980097163454, |
|
"grad_norm": 6.606201648712158, |
|
"learning_rate": 1.2276542811903345e-05, |
|
"loss": 0.6791, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 0.6719949851120514, |
|
"grad_norm": 16.744705200195312, |
|
"learning_rate": 1.2025696082572509e-05, |
|
"loss": 0.8836, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 0.6770098730606487, |
|
"grad_norm": 8.791626930236816, |
|
"learning_rate": 1.1775712284367882e-05, |
|
"loss": 0.6035, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.6820247610092462, |
|
"grad_norm": 1.067271113395691, |
|
"learning_rate": 1.152666394478045e-05, |
|
"loss": 0.5167, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.6870396489578436, |
|
"grad_norm": 7.685211181640625, |
|
"learning_rate": 1.1286358620301126e-05, |
|
"loss": 0.981, |
|
"step": 4384 |
|
}, |
|
{ |
|
"epoch": 0.692054536906441, |
|
"grad_norm": 19.07784652709961, |
|
"learning_rate": 1.10393628476565e-05, |
|
"loss": 0.4873, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 0.6970694248550384, |
|
"grad_norm": 1.4715958833694458, |
|
"learning_rate": 1.0793516169782712e-05, |
|
"loss": 0.4762, |
|
"step": 4448 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"grad_norm": 14.572600364685059, |
|
"learning_rate": 1.0548889913873123e-05, |
|
"loss": 0.8201, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_nli-pairs_loss": 0.8704043626785278, |
|
"eval_nli-pairs_runtime": 3.6418, |
|
"eval_nli-pairs_samples_per_second": 27.459, |
|
"eval_nli-pairs_steps_per_second": 1.098, |
|
"eval_sts-test_pearson_cosine": 0.7871366351762351, |
|
"eval_sts-test_pearson_dot": 0.520292802271069, |
|
"eval_sts-test_pearson_euclidean": 0.7358991589918665, |
|
"eval_sts-test_pearson_manhattan": 0.7306487678482384, |
|
"eval_sts-test_pearson_max": 0.7871366351762351, |
|
"eval_sts-test_spearman_cosine": 0.8043053229220561, |
|
"eval_sts-test_spearman_dot": 0.500924984433136, |
|
"eval_sts-test_spearman_euclidean": 0.7279966902078664, |
|
"eval_sts-test_spearman_manhattan": 0.7254635738312362, |
|
"eval_sts-test_spearman_max": 0.8043053229220561, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_vitaminc-pairs_loss": 4.733531475067139, |
|
"eval_vitaminc-pairs_runtime": 1.1524, |
|
"eval_vitaminc-pairs_samples_per_second": 73.759, |
|
"eval_vitaminc-pairs_steps_per_second": 2.603, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_sts-label_loss": 3.589179515838623, |
|
"eval_sts-label_runtime": 0.2802, |
|
"eval_sts-label_samples_per_second": 356.831, |
|
"eval_sts-label_steps_per_second": 14.273, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_qnli-contrastive_loss": 0.11559023708105087, |
|
"eval_qnli-contrastive_runtime": 0.3803, |
|
"eval_qnli-contrastive_samples_per_second": 262.956, |
|
"eval_qnli-contrastive_steps_per_second": 10.518, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_scitail-pairs-qa_loss": 0.05958002060651779, |
|
"eval_scitail-pairs-qa_runtime": 0.9171, |
|
"eval_scitail-pairs-qa_samples_per_second": 109.042, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.362, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_scitail-pairs-pos_loss": 0.43254122138023376, |
|
"eval_scitail-pairs-pos_runtime": 1.3676, |
|
"eval_scitail-pairs-pos_samples_per_second": 73.118, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.925, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_xsum-pairs_loss": 0.248906210064888, |
|
"eval_xsum-pairs_runtime": 0.9364, |
|
"eval_xsum-pairs_samples_per_second": 106.797, |
|
"eval_xsum-pairs_steps_per_second": 4.272, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_compression-pairs_loss": 0.08712127059698105, |
|
"eval_compression-pairs_runtime": 0.2771, |
|
"eval_compression-pairs_samples_per_second": 360.923, |
|
"eval_compression-pairs_steps_per_second": 14.437, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_sciq_pairs_loss": 0.2863478362560272, |
|
"eval_sciq_pairs_runtime": 4.1006, |
|
"eval_sciq_pairs_samples_per_second": 24.386, |
|
"eval_sciq_pairs_steps_per_second": 0.975, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_qasc_pairs_loss": 0.17710347473621368, |
|
"eval_qasc_pairs_runtime": 1.0521, |
|
"eval_qasc_pairs_samples_per_second": 95.051, |
|
"eval_qasc_pairs_steps_per_second": 3.802, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_openbookqa_pairs_loss": 1.5271464586257935, |
|
"eval_openbookqa_pairs_runtime": 0.8986, |
|
"eval_openbookqa_pairs_samples_per_second": 111.286, |
|
"eval_openbookqa_pairs_steps_per_second": 4.451, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_msmarco_pairs_loss": 0.5346755385398865, |
|
"eval_msmarco_pairs_runtime": 2.0827, |
|
"eval_msmarco_pairs_samples_per_second": 48.014, |
|
"eval_msmarco_pairs_steps_per_second": 1.921, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_nq_pairs_loss": 0.24830152094364166, |
|
"eval_nq_pairs_runtime": 4.5025, |
|
"eval_nq_pairs_samples_per_second": 22.21, |
|
"eval_nq_pairs_steps_per_second": 0.888, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_trivia_pairs_loss": 0.799673318862915, |
|
"eval_trivia_pairs_runtime": 6.4664, |
|
"eval_trivia_pairs_samples_per_second": 15.465, |
|
"eval_trivia_pairs_steps_per_second": 0.619, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_quora_pairs_loss": 0.030656050890684128, |
|
"eval_quora_pairs_runtime": 0.6818, |
|
"eval_quora_pairs_samples_per_second": 146.669, |
|
"eval_quora_pairs_steps_per_second": 5.867, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_gooaq_pairs_loss": 0.5036634802818298, |
|
"eval_gooaq_pairs_runtime": 1.4051, |
|
"eval_gooaq_pairs_samples_per_second": 71.169, |
|
"eval_gooaq_pairs_steps_per_second": 2.847, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7020843128036358, |
|
"eval_mrpc_pairs_loss": 0.04455450549721718, |
|
"eval_mrpc_pairs_runtime": 0.2642, |
|
"eval_mrpc_pairs_samples_per_second": 378.478, |
|
"eval_mrpc_pairs_steps_per_second": 15.139, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7070992007522332, |
|
"grad_norm": 15.19054889678955, |
|
"learning_rate": 1.030555505304156e-05, |
|
"loss": 0.7799, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 0.7121140887008306, |
|
"grad_norm": 16.065160751342773, |
|
"learning_rate": 1.0063582185731009e-05, |
|
"loss": 0.8006, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 0.717128976649428, |
|
"grad_norm": 3.2584469318389893, |
|
"learning_rate": 9.823041515230937e-06, |
|
"loss": 0.5123, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 0.7221438645980254, |
|
"grad_norm": 2.2951438426971436, |
|
"learning_rate": 9.584002829309324e-06, |
|
"loss": 0.7421, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.7271587525466228, |
|
"grad_norm": 21.291872024536133, |
|
"learning_rate": 9.346535479965231e-06, |
|
"loss": 0.9477, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.7321736404952202, |
|
"grad_norm": 4.785529613494873, |
|
"learning_rate": 9.11070836330775e-06, |
|
"loss": 0.5021, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 0.7371885284438175, |
|
"grad_norm": 1.7058138847351074, |
|
"learning_rate": 8.876589899567312e-06, |
|
"loss": 0.931, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.7422034163924149, |
|
"grad_norm": 9.1055326461792, |
|
"learning_rate": 8.644248013244963e-06, |
|
"loss": 0.7777, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 0.7472183043410123, |
|
"grad_norm": 3.6529128551483154, |
|
"learning_rate": 8.413750113405556e-06, |
|
"loss": 0.9462, |
|
"step": 4768 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"grad_norm": 0.5643049478530884, |
|
"learning_rate": 8.185163074120399e-06, |
|
"loss": 0.5846, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_nli-pairs_loss": 0.9291799664497375, |
|
"eval_nli-pairs_runtime": 3.7498, |
|
"eval_nli-pairs_samples_per_second": 26.668, |
|
"eval_nli-pairs_steps_per_second": 1.067, |
|
"eval_sts-test_pearson_cosine": 0.7855324842750789, |
|
"eval_sts-test_pearson_dot": 0.5242204261314407, |
|
"eval_sts-test_pearson_euclidean": 0.7349702751512333, |
|
"eval_sts-test_pearson_manhattan": 0.7293454465410049, |
|
"eval_sts-test_pearson_max": 0.7855324842750789, |
|
"eval_sts-test_spearman_cosine": 0.8044211074352633, |
|
"eval_sts-test_spearman_dot": 0.5021807579050959, |
|
"eval_sts-test_spearman_euclidean": 0.7270456124616013, |
|
"eval_sts-test_spearman_manhattan": 0.7246691951731193, |
|
"eval_sts-test_spearman_max": 0.8044211074352633, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_vitaminc-pairs_loss": 4.687094688415527, |
|
"eval_vitaminc-pairs_runtime": 1.1386, |
|
"eval_vitaminc-pairs_samples_per_second": 74.654, |
|
"eval_vitaminc-pairs_steps_per_second": 2.635, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_sts-label_loss": 3.8013510704040527, |
|
"eval_sts-label_runtime": 0.2716, |
|
"eval_sts-label_samples_per_second": 368.125, |
|
"eval_sts-label_steps_per_second": 14.725, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_qnli-contrastive_loss": 0.1414812207221985, |
|
"eval_qnli-contrastive_runtime": 0.3601, |
|
"eval_qnli-contrastive_samples_per_second": 277.73, |
|
"eval_qnli-contrastive_steps_per_second": 11.109, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_scitail-pairs-qa_loss": 0.05851547792553902, |
|
"eval_scitail-pairs-qa_runtime": 0.8864, |
|
"eval_scitail-pairs-qa_samples_per_second": 112.817, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.513, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_scitail-pairs-pos_loss": 0.4562886357307434, |
|
"eval_scitail-pairs-pos_runtime": 1.3535, |
|
"eval_scitail-pairs-pos_samples_per_second": 73.88, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.955, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_xsum-pairs_loss": 0.23483119904994965, |
|
"eval_xsum-pairs_runtime": 0.9336, |
|
"eval_xsum-pairs_samples_per_second": 107.109, |
|
"eval_xsum-pairs_steps_per_second": 4.284, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_compression-pairs_loss": 0.08680214732885361, |
|
"eval_compression-pairs_runtime": 0.2716, |
|
"eval_compression-pairs_samples_per_second": 368.254, |
|
"eval_compression-pairs_steps_per_second": 14.73, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_sciq_pairs_loss": 0.2816057801246643, |
|
"eval_sciq_pairs_runtime": 4.0742, |
|
"eval_sciq_pairs_samples_per_second": 24.545, |
|
"eval_sciq_pairs_steps_per_second": 0.982, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_qasc_pairs_loss": 0.17035560309886932, |
|
"eval_qasc_pairs_runtime": 1.0717, |
|
"eval_qasc_pairs_samples_per_second": 93.311, |
|
"eval_qasc_pairs_steps_per_second": 3.732, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_openbookqa_pairs_loss": 1.5671054124832153, |
|
"eval_openbookqa_pairs_runtime": 0.8973, |
|
"eval_openbookqa_pairs_samples_per_second": 111.441, |
|
"eval_openbookqa_pairs_steps_per_second": 4.458, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_msmarco_pairs_loss": 0.5062486529350281, |
|
"eval_msmarco_pairs_runtime": 2.0609, |
|
"eval_msmarco_pairs_samples_per_second": 48.524, |
|
"eval_msmarco_pairs_steps_per_second": 1.941, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_nq_pairs_loss": 0.22875532507896423, |
|
"eval_nq_pairs_runtime": 4.5041, |
|
"eval_nq_pairs_samples_per_second": 22.202, |
|
"eval_nq_pairs_steps_per_second": 0.888, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_trivia_pairs_loss": 0.8119627237319946, |
|
"eval_trivia_pairs_runtime": 6.4609, |
|
"eval_trivia_pairs_samples_per_second": 15.478, |
|
"eval_trivia_pairs_steps_per_second": 0.619, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_quora_pairs_loss": 0.06211049482226372, |
|
"eval_quora_pairs_runtime": 0.6765, |
|
"eval_quora_pairs_samples_per_second": 147.827, |
|
"eval_quora_pairs_steps_per_second": 5.913, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_gooaq_pairs_loss": 0.4847571551799774, |
|
"eval_gooaq_pairs_runtime": 1.3911, |
|
"eval_gooaq_pairs_samples_per_second": 71.886, |
|
"eval_gooaq_pairs_steps_per_second": 2.875, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7522331922896098, |
|
"eval_mrpc_pairs_loss": 0.04384278133511543, |
|
"eval_mrpc_pairs_runtime": 0.2617, |
|
"eval_mrpc_pairs_samples_per_second": 382.146, |
|
"eval_mrpc_pairs_steps_per_second": 15.286, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7572480802382072, |
|
"grad_norm": 14.555929183959961, |
|
"learning_rate": 7.958553215065208e-06, |
|
"loss": 0.6735, |
|
"step": 4832 |
|
}, |
|
{ |
|
"epoch": 0.7622629681868046, |
|
"grad_norm": 10.30207347869873, |
|
"learning_rate": 7.733986282278816e-06, |
|
"loss": 1.1569, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.767277856135402, |
|
"grad_norm": 17.255786895751953, |
|
"learning_rate": 7.511527429088396e-06, |
|
"loss": 0.9749, |
|
"step": 4896 |
|
}, |
|
{ |
|
"epoch": 0.7722927440839994, |
|
"grad_norm": 14.730864524841309, |
|
"learning_rate": 7.291241197206574e-06, |
|
"loss": 0.6581, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.7773076320325968, |
|
"grad_norm": 8.807291984558105, |
|
"learning_rate": 7.07319149800605e-06, |
|
"loss": 0.6979, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.7823225199811942, |
|
"grad_norm": 0.6080070734024048, |
|
"learning_rate": 6.857441593977046e-06, |
|
"loss": 0.7582, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 0.7873374079297916, |
|
"grad_norm": 2.2002525329589844, |
|
"learning_rate": 6.6440540803730425e-06, |
|
"loss": 1.0082, |
|
"step": 5024 |
|
}, |
|
{ |
|
"epoch": 0.792352295878389, |
|
"grad_norm": 8.624346733093262, |
|
"learning_rate": 6.433090867050122e-06, |
|
"loss": 0.6206, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 0.7973671838269863, |
|
"grad_norm": 0.9821205139160156, |
|
"learning_rate": 6.224613160505094e-06, |
|
"loss": 0.5165, |
|
"step": 5088 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"grad_norm": 4.104696750640869, |
|
"learning_rate": 6.018681446117773e-06, |
|
"loss": 0.4914, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_nli-pairs_loss": 0.8841198682785034, |
|
"eval_nli-pairs_runtime": 4.1793, |
|
"eval_nli-pairs_samples_per_second": 23.928, |
|
"eval_nli-pairs_steps_per_second": 0.957, |
|
"eval_sts-test_pearson_cosine": 0.7866468635321827, |
|
"eval_sts-test_pearson_dot": 0.5124924570863083, |
|
"eval_sts-test_pearson_euclidean": 0.7320768163626257, |
|
"eval_sts-test_pearson_manhattan": 0.7266238528084388, |
|
"eval_sts-test_pearson_max": 0.7866468635321827, |
|
"eval_sts-test_spearman_cosine": 0.8041619306345255, |
|
"eval_sts-test_spearman_dot": 0.4913316974763461, |
|
"eval_sts-test_spearman_euclidean": 0.7232005770314757, |
|
"eval_sts-test_spearman_manhattan": 0.7207683852583252, |
|
"eval_sts-test_spearman_max": 0.8041619306345255, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_vitaminc-pairs_loss": 4.725103855133057, |
|
"eval_vitaminc-pairs_runtime": 1.2146, |
|
"eval_vitaminc-pairs_samples_per_second": 69.982, |
|
"eval_vitaminc-pairs_steps_per_second": 2.47, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_sts-label_loss": 3.6535470485687256, |
|
"eval_sts-label_runtime": 0.3164, |
|
"eval_sts-label_samples_per_second": 316.056, |
|
"eval_sts-label_steps_per_second": 12.642, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_qnli-contrastive_loss": 0.10529302805662155, |
|
"eval_qnli-contrastive_runtime": 0.368, |
|
"eval_qnli-contrastive_samples_per_second": 271.711, |
|
"eval_qnli-contrastive_steps_per_second": 10.868, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_scitail-pairs-qa_loss": 0.05555274337530136, |
|
"eval_scitail-pairs-qa_runtime": 0.9542, |
|
"eval_scitail-pairs-qa_samples_per_second": 104.795, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.192, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_scitail-pairs-pos_loss": 0.4785614013671875, |
|
"eval_scitail-pairs-pos_runtime": 1.4937, |
|
"eval_scitail-pairs-pos_samples_per_second": 66.949, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.678, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_xsum-pairs_loss": 0.2355932593345642, |
|
"eval_xsum-pairs_runtime": 0.9396, |
|
"eval_xsum-pairs_samples_per_second": 106.432, |
|
"eval_xsum-pairs_steps_per_second": 4.257, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_compression-pairs_loss": 0.083825021982193, |
|
"eval_compression-pairs_runtime": 0.2789, |
|
"eval_compression-pairs_samples_per_second": 358.564, |
|
"eval_compression-pairs_steps_per_second": 14.343, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_sciq_pairs_loss": 0.28157705068588257, |
|
"eval_sciq_pairs_runtime": 4.1947, |
|
"eval_sciq_pairs_samples_per_second": 23.84, |
|
"eval_sciq_pairs_steps_per_second": 0.954, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_qasc_pairs_loss": 0.1739024668931961, |
|
"eval_qasc_pairs_runtime": 1.1277, |
|
"eval_qasc_pairs_samples_per_second": 88.676, |
|
"eval_qasc_pairs_steps_per_second": 3.547, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_openbookqa_pairs_loss": 1.591935396194458, |
|
"eval_openbookqa_pairs_runtime": 1.0022, |
|
"eval_openbookqa_pairs_samples_per_second": 99.782, |
|
"eval_openbookqa_pairs_steps_per_second": 3.991, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_msmarco_pairs_loss": 0.5132349133491516, |
|
"eval_msmarco_pairs_runtime": 2.1322, |
|
"eval_msmarco_pairs_samples_per_second": 46.901, |
|
"eval_msmarco_pairs_steps_per_second": 1.876, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_nq_pairs_loss": 0.2343132346868515, |
|
"eval_nq_pairs_runtime": 4.5529, |
|
"eval_nq_pairs_samples_per_second": 21.964, |
|
"eval_nq_pairs_steps_per_second": 0.879, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_trivia_pairs_loss": 0.7988561987876892, |
|
"eval_trivia_pairs_runtime": 6.5661, |
|
"eval_trivia_pairs_samples_per_second": 15.23, |
|
"eval_trivia_pairs_steps_per_second": 0.609, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_quora_pairs_loss": 0.05578049644827843, |
|
"eval_quora_pairs_runtime": 0.8028, |
|
"eval_quora_pairs_samples_per_second": 124.564, |
|
"eval_quora_pairs_steps_per_second": 4.983, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_gooaq_pairs_loss": 0.48901888728141785, |
|
"eval_gooaq_pairs_runtime": 1.5605, |
|
"eval_gooaq_pairs_samples_per_second": 64.082, |
|
"eval_gooaq_pairs_steps_per_second": 2.563, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8023820717755837, |
|
"eval_mrpc_pairs_loss": 0.04172317683696747, |
|
"eval_mrpc_pairs_runtime": 0.2628, |
|
"eval_mrpc_pairs_samples_per_second": 380.505, |
|
"eval_mrpc_pairs_steps_per_second": 15.22, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8073969597241811, |
|
"grad_norm": 9.413043022155762, |
|
"learning_rate": 5.815355470602388e-06, |
|
"loss": 1.098, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 0.8124118476727785, |
|
"grad_norm": 0.25412222743034363, |
|
"learning_rate": 5.614694224673387e-06, |
|
"loss": 0.821, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 0.8174267356213759, |
|
"grad_norm": 18.76092529296875, |
|
"learning_rate": 5.416755925930494e-06, |
|
"loss": 0.9351, |
|
"step": 5216 |
|
}, |
|
{ |
|
"epoch": 0.8224416235699734, |
|
"grad_norm": 19.607337951660156, |
|
"learning_rate": 5.221598001968132e-06, |
|
"loss": 0.8784, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 0.8274565115185708, |
|
"grad_norm": 3.2164149284362793, |
|
"learning_rate": 5.029277073714009e-06, |
|
"loss": 0.8326, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.8324713994671682, |
|
"grad_norm": 11.156713485717773, |
|
"learning_rate": 4.839848939001789e-06, |
|
"loss": 0.7551, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 0.8374862874157656, |
|
"grad_norm": 8.80623722076416, |
|
"learning_rate": 4.653368556382492e-06, |
|
"loss": 0.8234, |
|
"step": 5344 |
|
}, |
|
{ |
|
"epoch": 0.842501175364363, |
|
"grad_norm": 16.081491470336914, |
|
"learning_rate": 4.469890029179472e-06, |
|
"loss": 1.0922, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.8475160633129604, |
|
"grad_norm": 0.8583326935768127, |
|
"learning_rate": 4.2894665897914794e-06, |
|
"loss": 1.0925, |
|
"step": 5408 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"grad_norm": 7.903942108154297, |
|
"learning_rate": 4.112150584248388e-06, |
|
"loss": 1.099, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_nli-pairs_loss": 0.8909263014793396, |
|
"eval_nli-pairs_runtime": 3.6329, |
|
"eval_nli-pairs_samples_per_second": 27.526, |
|
"eval_nli-pairs_steps_per_second": 1.101, |
|
"eval_sts-test_pearson_cosine": 0.7892673589571536, |
|
"eval_sts-test_pearson_dot": 0.5308666684424199, |
|
"eval_sts-test_pearson_euclidean": 0.7372214599353599, |
|
"eval_sts-test_pearson_manhattan": 0.73149442324126, |
|
"eval_sts-test_pearson_max": 0.7892673589571536, |
|
"eval_sts-test_spearman_cosine": 0.8088174691107087, |
|
"eval_sts-test_spearman_dot": 0.5097841799376374, |
|
"eval_sts-test_spearman_euclidean": 0.7291099552995026, |
|
"eval_sts-test_spearman_manhattan": 0.7255023946868168, |
|
"eval_sts-test_spearman_max": 0.8088174691107087, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_vitaminc-pairs_loss": 4.696901798248291, |
|
"eval_vitaminc-pairs_runtime": 1.13, |
|
"eval_vitaminc-pairs_samples_per_second": 75.219, |
|
"eval_vitaminc-pairs_steps_per_second": 2.655, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_sts-label_loss": 3.794584274291992, |
|
"eval_sts-label_runtime": 0.2757, |
|
"eval_sts-label_samples_per_second": 362.777, |
|
"eval_sts-label_steps_per_second": 14.511, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_qnli-contrastive_loss": 0.1291896551847458, |
|
"eval_qnli-contrastive_runtime": 0.3577, |
|
"eval_qnli-contrastive_samples_per_second": 279.536, |
|
"eval_qnli-contrastive_steps_per_second": 11.181, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_scitail-pairs-qa_loss": 0.05729294940829277, |
|
"eval_scitail-pairs-qa_runtime": 0.8757, |
|
"eval_scitail-pairs-qa_samples_per_second": 114.199, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.568, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_scitail-pairs-pos_loss": 0.47140783071517944, |
|
"eval_scitail-pairs-pos_runtime": 1.3328, |
|
"eval_scitail-pairs-pos_samples_per_second": 75.031, |
|
"eval_scitail-pairs-pos_steps_per_second": 3.001, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_xsum-pairs_loss": 0.2317724972963333, |
|
"eval_xsum-pairs_runtime": 0.934, |
|
"eval_xsum-pairs_samples_per_second": 107.065, |
|
"eval_xsum-pairs_steps_per_second": 4.283, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_compression-pairs_loss": 0.0849599540233612, |
|
"eval_compression-pairs_runtime": 0.2772, |
|
"eval_compression-pairs_samples_per_second": 360.752, |
|
"eval_compression-pairs_steps_per_second": 14.43, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_sciq_pairs_loss": 0.2746911346912384, |
|
"eval_sciq_pairs_runtime": 4.0398, |
|
"eval_sciq_pairs_samples_per_second": 24.754, |
|
"eval_sciq_pairs_steps_per_second": 0.99, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_qasc_pairs_loss": 0.16956950724124908, |
|
"eval_qasc_pairs_runtime": 1.0682, |
|
"eval_qasc_pairs_samples_per_second": 93.615, |
|
"eval_qasc_pairs_steps_per_second": 3.745, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_openbookqa_pairs_loss": 1.5424996614456177, |
|
"eval_openbookqa_pairs_runtime": 0.8928, |
|
"eval_openbookqa_pairs_samples_per_second": 112.006, |
|
"eval_openbookqa_pairs_steps_per_second": 4.48, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_msmarco_pairs_loss": 0.5047981142997742, |
|
"eval_msmarco_pairs_runtime": 2.0436, |
|
"eval_msmarco_pairs_samples_per_second": 48.932, |
|
"eval_msmarco_pairs_steps_per_second": 1.957, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_nq_pairs_loss": 0.230237677693367, |
|
"eval_nq_pairs_runtime": 4.5251, |
|
"eval_nq_pairs_samples_per_second": 22.099, |
|
"eval_nq_pairs_steps_per_second": 0.884, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_trivia_pairs_loss": 0.7567735314369202, |
|
"eval_trivia_pairs_runtime": 6.4545, |
|
"eval_trivia_pairs_samples_per_second": 15.493, |
|
"eval_trivia_pairs_steps_per_second": 0.62, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_quora_pairs_loss": 0.05753583088517189, |
|
"eval_quora_pairs_runtime": 0.6769, |
|
"eval_quora_pairs_samples_per_second": 147.736, |
|
"eval_quora_pairs_steps_per_second": 5.909, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_gooaq_pairs_loss": 0.49448758363723755, |
|
"eval_gooaq_pairs_runtime": 1.3984, |
|
"eval_gooaq_pairs_samples_per_second": 71.51, |
|
"eval_gooaq_pairs_steps_per_second": 2.86, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8525309512615578, |
|
"eval_mrpc_pairs_loss": 0.04384453222155571, |
|
"eval_mrpc_pairs_runtime": 0.2653, |
|
"eval_mrpc_pairs_samples_per_second": 376.996, |
|
"eval_mrpc_pairs_steps_per_second": 15.08, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8575458392101551, |
|
"grad_norm": 0.8697513341903687, |
|
"learning_rate": 3.93799345702415e-06, |
|
"loss": 0.5396, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 0.8625607271587525, |
|
"grad_norm": 8.337197303771973, |
|
"learning_rate": 3.7670457361112116e-06, |
|
"loss": 0.6636, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 0.8675756151073499, |
|
"grad_norm": 0.3655373156070709, |
|
"learning_rate": 3.5993570183609596e-06, |
|
"loss": 1.0095, |
|
"step": 5536 |
|
}, |
|
{ |
|
"epoch": 0.8725905030559473, |
|
"grad_norm": 13.748374938964844, |
|
"learning_rate": 3.4349759550941933e-06, |
|
"loss": 0.631, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 0.8776053910045447, |
|
"grad_norm": 15.683762550354004, |
|
"learning_rate": 3.273950237986013e-06, |
|
"loss": 0.5415, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.8826202789531421, |
|
"grad_norm": 10.004467964172363, |
|
"learning_rate": 3.11632658522906e-06, |
|
"loss": 0.9227, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.8876351669017395, |
|
"grad_norm": 12.990907669067383, |
|
"learning_rate": 2.9621507279792564e-06, |
|
"loss": 0.8991, |
|
"step": 5664 |
|
}, |
|
{ |
|
"epoch": 0.8926500548503369, |
|
"grad_norm": 0.4619373679161072, |
|
"learning_rate": 2.8114673970878584e-06, |
|
"loss": 0.5068, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 0.8976649427989344, |
|
"grad_norm": 8.317788124084473, |
|
"learning_rate": 2.664320310123768e-06, |
|
"loss": 1.2134, |
|
"step": 5728 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"grad_norm": 0.38993319869041443, |
|
"learning_rate": 2.5207521586897876e-06, |
|
"loss": 0.4651, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_nli-pairs_loss": 0.8765493631362915, |
|
"eval_nli-pairs_runtime": 3.6164, |
|
"eval_nli-pairs_samples_per_second": 27.652, |
|
"eval_nli-pairs_steps_per_second": 1.106, |
|
"eval_sts-test_pearson_cosine": 0.7880147168961996, |
|
"eval_sts-test_pearson_dot": 0.5198107156003906, |
|
"eval_sts-test_pearson_euclidean": 0.7362840264051249, |
|
"eval_sts-test_pearson_manhattan": 0.7307716823389564, |
|
"eval_sts-test_pearson_max": 0.7880147168961996, |
|
"eval_sts-test_spearman_cosine": 0.8071394355093185, |
|
"eval_sts-test_spearman_dot": 0.49865317522814645, |
|
"eval_sts-test_spearman_euclidean": 0.7278395467197664, |
|
"eval_sts-test_spearman_manhattan": 0.7246934378777047, |
|
"eval_sts-test_spearman_max": 0.8071394355093185, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_vitaminc-pairs_loss": 4.717629432678223, |
|
"eval_vitaminc-pairs_runtime": 1.1248, |
|
"eval_vitaminc-pairs_samples_per_second": 75.571, |
|
"eval_vitaminc-pairs_steps_per_second": 2.667, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_sts-label_loss": 3.7598328590393066, |
|
"eval_sts-label_runtime": 0.2743, |
|
"eval_sts-label_samples_per_second": 364.548, |
|
"eval_sts-label_steps_per_second": 14.582, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_qnli-contrastive_loss": 0.11829647421836853, |
|
"eval_qnli-contrastive_runtime": 0.3606, |
|
"eval_qnli-contrastive_samples_per_second": 277.334, |
|
"eval_qnli-contrastive_steps_per_second": 11.093, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_scitail-pairs-qa_loss": 0.05503571406006813, |
|
"eval_scitail-pairs-qa_runtime": 0.874, |
|
"eval_scitail-pairs-qa_samples_per_second": 114.411, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.576, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_scitail-pairs-pos_loss": 0.47530597448349, |
|
"eval_scitail-pairs-pos_runtime": 1.3429, |
|
"eval_scitail-pairs-pos_samples_per_second": 74.463, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.979, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_xsum-pairs_loss": 0.22936196625232697, |
|
"eval_xsum-pairs_runtime": 0.9431, |
|
"eval_xsum-pairs_samples_per_second": 106.028, |
|
"eval_xsum-pairs_steps_per_second": 4.241, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_compression-pairs_loss": 0.08313465863466263, |
|
"eval_compression-pairs_runtime": 0.2781, |
|
"eval_compression-pairs_samples_per_second": 359.542, |
|
"eval_compression-pairs_steps_per_second": 14.382, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_sciq_pairs_loss": 0.27646955847740173, |
|
"eval_sciq_pairs_runtime": 4.0554, |
|
"eval_sciq_pairs_samples_per_second": 24.658, |
|
"eval_sciq_pairs_steps_per_second": 0.986, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_qasc_pairs_loss": 0.17006540298461914, |
|
"eval_qasc_pairs_runtime": 1.0538, |
|
"eval_qasc_pairs_samples_per_second": 94.898, |
|
"eval_qasc_pairs_steps_per_second": 3.796, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_openbookqa_pairs_loss": 1.5487664937973022, |
|
"eval_openbookqa_pairs_runtime": 0.8956, |
|
"eval_openbookqa_pairs_samples_per_second": 111.653, |
|
"eval_openbookqa_pairs_steps_per_second": 4.466, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_msmarco_pairs_loss": 0.4861982464790344, |
|
"eval_msmarco_pairs_runtime": 2.0548, |
|
"eval_msmarco_pairs_samples_per_second": 48.666, |
|
"eval_msmarco_pairs_steps_per_second": 1.947, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_nq_pairs_loss": 0.22520922124385834, |
|
"eval_nq_pairs_runtime": 4.4973, |
|
"eval_nq_pairs_samples_per_second": 22.236, |
|
"eval_nq_pairs_steps_per_second": 0.889, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_trivia_pairs_loss": 0.7480303049087524, |
|
"eval_trivia_pairs_runtime": 6.498, |
|
"eval_trivia_pairs_samples_per_second": 15.389, |
|
"eval_trivia_pairs_steps_per_second": 0.616, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_quora_pairs_loss": 0.06060533598065376, |
|
"eval_quora_pairs_runtime": 0.6722, |
|
"eval_quora_pairs_samples_per_second": 148.76, |
|
"eval_quora_pairs_steps_per_second": 5.95, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_gooaq_pairs_loss": 0.4696855843067169, |
|
"eval_gooaq_pairs_runtime": 1.3985, |
|
"eval_gooaq_pairs_samples_per_second": 71.503, |
|
"eval_gooaq_pairs_steps_per_second": 2.86, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9026798307475318, |
|
"eval_mrpc_pairs_loss": 0.04175671190023422, |
|
"eval_mrpc_pairs_runtime": 0.2618, |
|
"eval_mrpc_pairs_samples_per_second": 381.956, |
|
"eval_mrpc_pairs_steps_per_second": 15.278, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9076947186961292, |
|
"grad_norm": 19.970914840698242, |
|
"learning_rate": 2.3808045960365743e-06, |
|
"loss": 0.6346, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 0.9127096066447266, |
|
"grad_norm": 7.2970075607299805, |
|
"learning_rate": 2.2445182249778363e-06, |
|
"loss": 1.1103, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 0.917724494593324, |
|
"grad_norm": 14.34080982208252, |
|
"learning_rate": 2.1119325861102666e-06, |
|
"loss": 0.7667, |
|
"step": 5856 |
|
}, |
|
{ |
|
"epoch": 0.9227393825419213, |
|
"grad_norm": 16.219850540161133, |
|
"learning_rate": 1.98308614634171e-06, |
|
"loss": 0.9174, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.9277542704905187, |
|
"grad_norm": 17.201740264892578, |
|
"learning_rate": 1.8580162877307744e-06, |
|
"loss": 0.7609, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.9327691584391161, |
|
"grad_norm": 12.591241836547852, |
|
"learning_rate": 1.7367592966412454e-06, |
|
"loss": 0.8993, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.9377840463877135, |
|
"grad_norm": 17.12389373779297, |
|
"learning_rate": 1.619350353214355e-06, |
|
"loss": 0.7587, |
|
"step": 5984 |
|
}, |
|
{ |
|
"epoch": 0.9427989343363109, |
|
"grad_norm": 44.237342834472656, |
|
"learning_rate": 1.5058235211620126e-06, |
|
"loss": 0.935, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 0.9478138222849083, |
|
"grad_norm": 4.658092975616455, |
|
"learning_rate": 1.3962117378839439e-06, |
|
"loss": 0.8551, |
|
"step": 6048 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"grad_norm": 0.4202437698841095, |
|
"learning_rate": 1.2905468049116077e-06, |
|
"loss": 1.4247, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_nli-pairs_loss": 0.880797266960144, |
|
"eval_nli-pairs_runtime": 3.65, |
|
"eval_nli-pairs_samples_per_second": 27.397, |
|
"eval_nli-pairs_steps_per_second": 1.096, |
|
"eval_sts-test_pearson_cosine": 0.7886384880168056, |
|
"eval_sts-test_pearson_dot": 0.5209320238457065, |
|
"eval_sts-test_pearson_euclidean": 0.7365619856047663, |
|
"eval_sts-test_pearson_manhattan": 0.7309874377904119, |
|
"eval_sts-test_pearson_max": 0.7886384880168056, |
|
"eval_sts-test_spearman_cosine": 0.8078306606920327, |
|
"eval_sts-test_spearman_dot": 0.4995671547413244, |
|
"eval_sts-test_spearman_euclidean": 0.7281379887760366, |
|
"eval_sts-test_spearman_manhattan": 0.7249545388844193, |
|
"eval_sts-test_spearman_max": 0.8078306606920327, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_vitaminc-pairs_loss": 4.70750617980957, |
|
"eval_vitaminc-pairs_runtime": 1.1372, |
|
"eval_vitaminc-pairs_samples_per_second": 74.747, |
|
"eval_vitaminc-pairs_steps_per_second": 2.638, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_sts-label_loss": 3.7686922550201416, |
|
"eval_sts-label_runtime": 0.2807, |
|
"eval_sts-label_samples_per_second": 356.243, |
|
"eval_sts-label_steps_per_second": 14.25, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_qnli-contrastive_loss": 0.12000326067209244, |
|
"eval_qnli-contrastive_runtime": 0.3651, |
|
"eval_qnli-contrastive_samples_per_second": 273.878, |
|
"eval_qnli-contrastive_steps_per_second": 10.955, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_scitail-pairs-qa_loss": 0.055266913026571274, |
|
"eval_scitail-pairs-qa_runtime": 0.8813, |
|
"eval_scitail-pairs-qa_samples_per_second": 113.472, |
|
"eval_scitail-pairs-qa_steps_per_second": 4.539, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_scitail-pairs-pos_loss": 0.46404972672462463, |
|
"eval_scitail-pairs-pos_runtime": 1.3468, |
|
"eval_scitail-pairs-pos_samples_per_second": 74.248, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.97, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_xsum-pairs_loss": 0.22768865525722504, |
|
"eval_xsum-pairs_runtime": 0.9385, |
|
"eval_xsum-pairs_samples_per_second": 106.553, |
|
"eval_xsum-pairs_steps_per_second": 4.262, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_compression-pairs_loss": 0.08245458453893661, |
|
"eval_compression-pairs_runtime": 0.2783, |
|
"eval_compression-pairs_samples_per_second": 359.331, |
|
"eval_compression-pairs_steps_per_second": 14.373, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_sciq_pairs_loss": 0.24696679413318634, |
|
"eval_sciq_pairs_runtime": 4.072, |
|
"eval_sciq_pairs_samples_per_second": 24.558, |
|
"eval_sciq_pairs_steps_per_second": 0.982, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_qasc_pairs_loss": 0.16628116369247437, |
|
"eval_qasc_pairs_runtime": 1.066, |
|
"eval_qasc_pairs_samples_per_second": 93.809, |
|
"eval_qasc_pairs_steps_per_second": 3.752, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_openbookqa_pairs_loss": 1.5343760251998901, |
|
"eval_openbookqa_pairs_runtime": 0.9064, |
|
"eval_openbookqa_pairs_samples_per_second": 110.324, |
|
"eval_openbookqa_pairs_steps_per_second": 4.413, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_msmarco_pairs_loss": 0.48861968517303467, |
|
"eval_msmarco_pairs_runtime": 2.0777, |
|
"eval_msmarco_pairs_samples_per_second": 48.131, |
|
"eval_msmarco_pairs_steps_per_second": 1.925, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_nq_pairs_loss": 0.2192871868610382, |
|
"eval_nq_pairs_runtime": 4.5629, |
|
"eval_nq_pairs_samples_per_second": 21.916, |
|
"eval_nq_pairs_steps_per_second": 0.877, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_trivia_pairs_loss": 0.7455114126205444, |
|
"eval_trivia_pairs_runtime": 6.4434, |
|
"eval_trivia_pairs_samples_per_second": 15.52, |
|
"eval_trivia_pairs_steps_per_second": 0.621, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_quora_pairs_loss": 0.0536942183971405, |
|
"eval_quora_pairs_runtime": 0.6874, |
|
"eval_quora_pairs_samples_per_second": 145.481, |
|
"eval_quora_pairs_steps_per_second": 5.819, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_gooaq_pairs_loss": 0.4775075614452362, |
|
"eval_gooaq_pairs_runtime": 1.3946, |
|
"eval_gooaq_pairs_samples_per_second": 71.707, |
|
"eval_gooaq_pairs_steps_per_second": 2.868, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9528287102335057, |
|
"eval_mrpc_pairs_loss": 0.041804660111665726, |
|
"eval_mrpc_pairs_runtime": 0.2631, |
|
"eval_mrpc_pairs_samples_per_second": 380.035, |
|
"eval_mrpc_pairs_steps_per_second": 15.201, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9578435981821031, |
|
"grad_norm": 15.8797607421875, |
|
"learning_rate": 1.1888593786816527e-06, |
|
"loss": 0.3377, |
|
"step": 6112 |
|
}, |
|
{ |
|
"epoch": 0.9628584861307005, |
|
"grad_norm": 54.2625732421875, |
|
"learning_rate": 1.0911789616415957e-06, |
|
"loss": 1.163, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.967873374079298, |
|
"grad_norm": 27.014169692993164, |
|
"learning_rate": 9.975338936903327e-07, |
|
"loss": 1.1638, |
|
"step": 6176 |
|
}, |
|
{ |
|
"epoch": 0.9728882620278954, |
|
"grad_norm": 12.264323234558105, |
|
"learning_rate": 9.079513439558945e-07, |
|
"loss": 0.7428, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 0.9779031499764927, |
|
"grad_norm": 0.2486962229013443, |
|
"learning_rate": 8.224573029129201e-07, |
|
"loss": 0.3827, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.9829180379250901, |
|
"grad_norm": 0.19951488077640533, |
|
"learning_rate": 7.41076574842064e-07, |
|
"loss": 1.0739, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 0.9879329258736875, |
|
"grad_norm": 1.6168636083602905, |
|
"learning_rate": 6.638327706335673e-07, |
|
"loss": 0.7049, |
|
"step": 6304 |
|
}, |
|
{ |
|
"epoch": 0.9929478138222849, |
|
"grad_norm": 1.4084432125091553, |
|
"learning_rate": 5.907483009370463e-07, |
|
"loss": 0.9298, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.9979627017708823, |
|
"grad_norm": 0.7779116630554199, |
|
"learning_rate": 5.218443696595343e-07, |
|
"loss": 0.6243, |
|
"step": 6368 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 12762, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1277, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|