|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.202247191011236, |
|
"eval_steps": 27, |
|
"global_step": 214, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016853932584269662, |
|
"grad_norm": 2.9885776042938232, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 0.6012, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.033707865168539325, |
|
"grad_norm": 3.184929132461548, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 0.7573, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05056179775280899, |
|
"grad_norm": 3.256159782409668, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.9212, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06741573033707865, |
|
"grad_norm": 2.833339214324951, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 0.6117, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08426966292134831, |
|
"grad_norm": 3.08292818069458, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.8545, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"grad_norm": 2.317431688308716, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.6515, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11797752808988764, |
|
"grad_norm": 2.9611644744873047, |
|
"learning_rate": 1.037037037037037e-05, |
|
"loss": 0.7159, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.1348314606741573, |
|
"grad_norm": 2.698537826538086, |
|
"learning_rate": 1.1851851851851852e-05, |
|
"loss": 0.7019, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"grad_norm": 2.222154378890991, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.4411, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.578125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7817381620407104, |
|
"eval_VitaminC_cosine_ap": 0.5507972943944112, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.28573715686798096, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5703125, |
|
"eval_VitaminC_dot_accuracy_threshold": 316.7283020019531, |
|
"eval_VitaminC_dot_ap": 0.5511866185449577, |
|
"eval_VitaminC_dot_f1": 0.6577540106951871, |
|
"eval_VitaminC_dot_f1_threshold": 106.75863647460938, |
|
"eval_VitaminC_dot_precision": 0.4900398406374502, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.578125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.298419952392578, |
|
"eval_VitaminC_euclidean_ap": 0.5476323986807207, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.83933448791504, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.578125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 279.69085693359375, |
|
"eval_VitaminC_manhattan_ap": 0.5412538781107805, |
|
"eval_VitaminC_manhattan_f1": 0.6577540106951871, |
|
"eval_VitaminC_manhattan_f1_threshold": 499.8836364746094, |
|
"eval_VitaminC_manhattan_precision": 0.4900398406374502, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.578125, |
|
"eval_VitaminC_max_accuracy_threshold": 316.7283020019531, |
|
"eval_VitaminC_max_ap": 0.5511866185449577, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 499.8836364746094, |
|
"eval_VitaminC_max_precision": 0.492, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5511866185449577, |
|
"eval_sts-test_pearson_cosine": 0.8488243436029344, |
|
"eval_sts-test_pearson_dot": 0.8480167969551653, |
|
"eval_sts-test_pearson_euclidean": 0.8800283985117625, |
|
"eval_sts-test_pearson_manhattan": 0.880588311422627, |
|
"eval_sts-test_pearson_max": 0.880588311422627, |
|
"eval_sts-test_spearman_cosine": 0.8905659331642088, |
|
"eval_sts-test_spearman_dot": 0.8692084657204004, |
|
"eval_sts-test_spearman_euclidean": 0.8809566840232712, |
|
"eval_sts-test_spearman_manhattan": 0.883434007028195, |
|
"eval_sts-test_spearman_max": 0.8905659331642088, |
|
"eval_vitaminc-pairs_loss": 2.465860366821289, |
|
"eval_vitaminc-pairs_runtime": 1.4615, |
|
"eval_vitaminc-pairs_samples_per_second": 73.899, |
|
"eval_vitaminc-pairs_steps_per_second": 1.368, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_negation-triplets_loss": 1.7310789823532104, |
|
"eval_negation-triplets_runtime": 0.3009, |
|
"eval_negation-triplets_samples_per_second": 212.692, |
|
"eval_negation-triplets_steps_per_second": 3.323, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_scitail-pairs-pos_loss": 0.1150394082069397, |
|
"eval_scitail-pairs-pos_runtime": 0.3739, |
|
"eval_scitail-pairs-pos_samples_per_second": 144.431, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.675, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_xsum-pairs_loss": 0.11168850213289261, |
|
"eval_xsum-pairs_runtime": 3.1697, |
|
"eval_xsum-pairs_samples_per_second": 40.382, |
|
"eval_xsum-pairs_steps_per_second": 0.631, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_sciq_pairs_loss": 0.03450964391231537, |
|
"eval_sciq_pairs_runtime": 3.3283, |
|
"eval_sciq_pairs_samples_per_second": 38.459, |
|
"eval_sciq_pairs_steps_per_second": 0.601, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_qasc_pairs_loss": 0.11095743626356125, |
|
"eval_qasc_pairs_runtime": 0.6261, |
|
"eval_qasc_pairs_samples_per_second": 204.45, |
|
"eval_qasc_pairs_steps_per_second": 3.195, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_openbookqa_pairs_loss": 0.7092063426971436, |
|
"eval_openbookqa_pairs_runtime": 0.5866, |
|
"eval_openbookqa_pairs_samples_per_second": 218.19, |
|
"eval_openbookqa_pairs_steps_per_second": 3.409, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_msmarco_pairs_loss": 0.3955218493938446, |
|
"eval_msmarco_pairs_runtime": 1.2942, |
|
"eval_msmarco_pairs_samples_per_second": 98.902, |
|
"eval_msmarco_pairs_steps_per_second": 1.545, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_nq_pairs_loss": 0.42051073908805847, |
|
"eval_nq_pairs_runtime": 2.3875, |
|
"eval_nq_pairs_samples_per_second": 53.612, |
|
"eval_nq_pairs_steps_per_second": 0.838, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_trivia_pairs_loss": 0.93178790807724, |
|
"eval_trivia_pairs_runtime": 4.4363, |
|
"eval_trivia_pairs_samples_per_second": 28.853, |
|
"eval_trivia_pairs_steps_per_second": 0.451, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_gooaq_pairs_loss": 0.6505913138389587, |
|
"eval_gooaq_pairs_runtime": 0.8826, |
|
"eval_gooaq_pairs_samples_per_second": 145.027, |
|
"eval_gooaq_pairs_steps_per_second": 2.266, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15168539325842698, |
|
"eval_paws-pos_loss": 0.024931101128458977, |
|
"eval_paws-pos_runtime": 0.6852, |
|
"eval_paws-pos_samples_per_second": 186.805, |
|
"eval_paws-pos_steps_per_second": 2.919, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.16853932584269662, |
|
"grad_norm": 2.826900005340576, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.5125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1853932584269663, |
|
"grad_norm": 2.9938910007476807, |
|
"learning_rate": 1.6296296296296297e-05, |
|
"loss": 0.6885, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"grad_norm": 3.3046395778656006, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.6435, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.21910112359550563, |
|
"grad_norm": 2.4184651374816895, |
|
"learning_rate": 1.925925925925926e-05, |
|
"loss": 0.753, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.23595505617977527, |
|
"grad_norm": 2.9905433654785156, |
|
"learning_rate": 2.074074074074074e-05, |
|
"loss": 0.7427, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.25280898876404495, |
|
"grad_norm": 2.745820999145508, |
|
"learning_rate": 2.2222222222222227e-05, |
|
"loss": 0.5083, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2696629213483146, |
|
"grad_norm": 2.6370577812194824, |
|
"learning_rate": 2.3703703703703703e-05, |
|
"loss": 0.7454, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.28651685393258425, |
|
"grad_norm": 3.044011116027832, |
|
"learning_rate": 2.5185185185185187e-05, |
|
"loss": 0.8356, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"grad_norm": 3.718804121017456, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.8864, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.57421875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7991844415664673, |
|
"eval_VitaminC_cosine_ap": 0.5485498837322925, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3160865008831024, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.578125, |
|
"eval_VitaminC_dot_accuracy_threshold": 327.0416564941406, |
|
"eval_VitaminC_dot_ap": 0.54993134882601, |
|
"eval_VitaminC_dot_f1": 0.6595174262734584, |
|
"eval_VitaminC_dot_f1_threshold": 117.44181060791016, |
|
"eval_VitaminC_dot_precision": 0.492, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.57421875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.019258499145508, |
|
"eval_VitaminC_euclidean_ap": 0.5435066540334542, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.688644409179688, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.57421875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 283.876220703125, |
|
"eval_VitaminC_manhattan_ap": 0.5416615397828658, |
|
"eval_VitaminC_manhattan_f1": 0.6559999999999999, |
|
"eval_VitaminC_manhattan_f1_threshold": 514.0216064453125, |
|
"eval_VitaminC_manhattan_precision": 0.4880952380952381, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.578125, |
|
"eval_VitaminC_max_accuracy_threshold": 327.0416564941406, |
|
"eval_VitaminC_max_ap": 0.54993134882601, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 514.0216064453125, |
|
"eval_VitaminC_max_precision": 0.492, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.54993134882601, |
|
"eval_sts-test_pearson_cosine": 0.8452615878553369, |
|
"eval_sts-test_pearson_dot": 0.8404858620687519, |
|
"eval_sts-test_pearson_euclidean": 0.8780527810910925, |
|
"eval_sts-test_pearson_manhattan": 0.878916157345712, |
|
"eval_sts-test_pearson_max": 0.878916157345712, |
|
"eval_sts-test_spearman_cosine": 0.8876915367075635, |
|
"eval_sts-test_spearman_dot": 0.8608104875327304, |
|
"eval_sts-test_spearman_euclidean": 0.8804138856889071, |
|
"eval_sts-test_spearman_manhattan": 0.8822803815444743, |
|
"eval_sts-test_spearman_max": 0.8876915367075635, |
|
"eval_vitaminc-pairs_loss": 2.454524040222168, |
|
"eval_vitaminc-pairs_runtime": 1.4583, |
|
"eval_vitaminc-pairs_samples_per_second": 74.057, |
|
"eval_vitaminc-pairs_steps_per_second": 1.371, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_negation-triplets_loss": 1.7277792692184448, |
|
"eval_negation-triplets_runtime": 0.3027, |
|
"eval_negation-triplets_samples_per_second": 211.436, |
|
"eval_negation-triplets_steps_per_second": 3.304, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_scitail-pairs-pos_loss": 0.11168555170297623, |
|
"eval_scitail-pairs-pos_runtime": 0.3726, |
|
"eval_scitail-pairs-pos_samples_per_second": 144.911, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.684, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_xsum-pairs_loss": 0.10087604075670242, |
|
"eval_xsum-pairs_runtime": 3.1701, |
|
"eval_xsum-pairs_samples_per_second": 40.377, |
|
"eval_xsum-pairs_steps_per_second": 0.631, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_sciq_pairs_loss": 0.03466618433594704, |
|
"eval_sciq_pairs_runtime": 3.3778, |
|
"eval_sciq_pairs_samples_per_second": 37.895, |
|
"eval_sciq_pairs_steps_per_second": 0.592, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_qasc_pairs_loss": 0.10551701486110687, |
|
"eval_qasc_pairs_runtime": 0.6271, |
|
"eval_qasc_pairs_samples_per_second": 204.125, |
|
"eval_qasc_pairs_steps_per_second": 3.189, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_openbookqa_pairs_loss": 0.7239958643913269, |
|
"eval_openbookqa_pairs_runtime": 0.5811, |
|
"eval_openbookqa_pairs_samples_per_second": 220.255, |
|
"eval_openbookqa_pairs_steps_per_second": 3.441, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_msmarco_pairs_loss": 0.3808779716491699, |
|
"eval_msmarco_pairs_runtime": 1.2919, |
|
"eval_msmarco_pairs_samples_per_second": 99.082, |
|
"eval_msmarco_pairs_steps_per_second": 1.548, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_nq_pairs_loss": 0.44170400500297546, |
|
"eval_nq_pairs_runtime": 2.3835, |
|
"eval_nq_pairs_samples_per_second": 53.703, |
|
"eval_nq_pairs_steps_per_second": 0.839, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_trivia_pairs_loss": 0.9158428907394409, |
|
"eval_trivia_pairs_runtime": 4.4326, |
|
"eval_trivia_pairs_samples_per_second": 28.877, |
|
"eval_trivia_pairs_steps_per_second": 0.451, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_gooaq_pairs_loss": 0.6208247542381287, |
|
"eval_gooaq_pairs_runtime": 0.8797, |
|
"eval_gooaq_pairs_samples_per_second": 145.497, |
|
"eval_gooaq_pairs_steps_per_second": 2.273, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_paws-pos_loss": 0.02517784759402275, |
|
"eval_paws-pos_runtime": 0.694, |
|
"eval_paws-pos_samples_per_second": 184.442, |
|
"eval_paws-pos_steps_per_second": 2.882, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3202247191011236, |
|
"grad_norm": 2.173736572265625, |
|
"learning_rate": 2.814814814814815e-05, |
|
"loss": 0.6015, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.33707865168539325, |
|
"grad_norm": 3.8964712619781494, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.9482, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3539325842696629, |
|
"grad_norm": 2.659498691558838, |
|
"learning_rate": 3.111111111111112e-05, |
|
"loss": 0.5404, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3707865168539326, |
|
"grad_norm": 3.3499844074249268, |
|
"learning_rate": 3.259259259259259e-05, |
|
"loss": 0.805, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.38764044943820225, |
|
"grad_norm": 3.770142078399658, |
|
"learning_rate": 3.4074074074074077e-05, |
|
"loss": 0.7184, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"grad_norm": 3.740880012512207, |
|
"learning_rate": 3.555555555555555e-05, |
|
"loss": 0.8708, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.42134831460674155, |
|
"grad_norm": 2.981106996536255, |
|
"learning_rate": 3.703703703703704e-05, |
|
"loss": 0.8327, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.43820224719101125, |
|
"grad_norm": 2.3469011783599854, |
|
"learning_rate": 3.851851851851852e-05, |
|
"loss": 0.5025, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"grad_norm": 3.296035051345825, |
|
"learning_rate": 4e-05, |
|
"loss": 0.6517, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.578125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7859437465667725, |
|
"eval_VitaminC_cosine_ap": 0.5557444337961499, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3211573362350464, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.578125, |
|
"eval_VitaminC_dot_accuracy_threshold": 315.9444580078125, |
|
"eval_VitaminC_dot_ap": 0.5539524528858992, |
|
"eval_VitaminC_dot_f1": 0.6595174262734584, |
|
"eval_VitaminC_dot_f1_threshold": 129.88558959960938, |
|
"eval_VitaminC_dot_precision": 0.492, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.58203125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.113249778747559, |
|
"eval_VitaminC_euclidean_ap": 0.5510190217865811, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.90462303161621, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.578125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 276.40142822265625, |
|
"eval_VitaminC_manhattan_ap": 0.5429240708188645, |
|
"eval_VitaminC_manhattan_f1": 0.6576819407008085, |
|
"eval_VitaminC_manhattan_f1_threshold": 469.7353515625, |
|
"eval_VitaminC_manhattan_precision": 0.49193548387096775, |
|
"eval_VitaminC_manhattan_recall": 0.991869918699187, |
|
"eval_VitaminC_max_accuracy": 0.58203125, |
|
"eval_VitaminC_max_accuracy_threshold": 315.9444580078125, |
|
"eval_VitaminC_max_ap": 0.5557444337961499, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 469.7353515625, |
|
"eval_VitaminC_max_precision": 0.492, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5557444337961499, |
|
"eval_sts-test_pearson_cosine": 0.8483316632682467, |
|
"eval_sts-test_pearson_dot": 0.8392403098680445, |
|
"eval_sts-test_pearson_euclidean": 0.8814283057813619, |
|
"eval_sts-test_pearson_manhattan": 0.8815226866327923, |
|
"eval_sts-test_pearson_max": 0.8815226866327923, |
|
"eval_sts-test_spearman_cosine": 0.8903503892346, |
|
"eval_sts-test_spearman_dot": 0.857844431199042, |
|
"eval_sts-test_spearman_euclidean": 0.8851830636663006, |
|
"eval_sts-test_spearman_manhattan": 0.8865568876827619, |
|
"eval_sts-test_spearman_max": 0.8903503892346, |
|
"eval_vitaminc-pairs_loss": 2.3538782596588135, |
|
"eval_vitaminc-pairs_runtime": 1.4618, |
|
"eval_vitaminc-pairs_samples_per_second": 73.88, |
|
"eval_vitaminc-pairs_steps_per_second": 1.368, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_negation-triplets_loss": 1.649215579032898, |
|
"eval_negation-triplets_runtime": 0.3081, |
|
"eval_negation-triplets_samples_per_second": 207.723, |
|
"eval_negation-triplets_steps_per_second": 3.246, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_scitail-pairs-pos_loss": 0.11823470890522003, |
|
"eval_scitail-pairs-pos_runtime": 0.376, |
|
"eval_scitail-pairs-pos_samples_per_second": 143.616, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.66, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_xsum-pairs_loss": 0.08420603722333908, |
|
"eval_xsum-pairs_runtime": 3.1576, |
|
"eval_xsum-pairs_samples_per_second": 40.538, |
|
"eval_xsum-pairs_steps_per_second": 0.633, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_sciq_pairs_loss": 0.034781794995069504, |
|
"eval_sciq_pairs_runtime": 3.2597, |
|
"eval_sciq_pairs_samples_per_second": 39.267, |
|
"eval_sciq_pairs_steps_per_second": 0.614, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_qasc_pairs_loss": 0.10597346723079681, |
|
"eval_qasc_pairs_runtime": 0.6245, |
|
"eval_qasc_pairs_samples_per_second": 204.979, |
|
"eval_qasc_pairs_steps_per_second": 3.203, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_openbookqa_pairs_loss": 0.7160983681678772, |
|
"eval_openbookqa_pairs_runtime": 0.5767, |
|
"eval_openbookqa_pairs_samples_per_second": 221.961, |
|
"eval_openbookqa_pairs_steps_per_second": 3.468, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_msmarco_pairs_loss": 0.3454173803329468, |
|
"eval_msmarco_pairs_runtime": 1.2912, |
|
"eval_msmarco_pairs_samples_per_second": 99.134, |
|
"eval_msmarco_pairs_steps_per_second": 1.549, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_nq_pairs_loss": 0.4442503750324249, |
|
"eval_nq_pairs_runtime": 2.3854, |
|
"eval_nq_pairs_samples_per_second": 53.659, |
|
"eval_nq_pairs_steps_per_second": 0.838, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_trivia_pairs_loss": 0.9324482679367065, |
|
"eval_trivia_pairs_runtime": 4.4251, |
|
"eval_trivia_pairs_samples_per_second": 28.926, |
|
"eval_trivia_pairs_steps_per_second": 0.452, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_gooaq_pairs_loss": 0.6094165444374084, |
|
"eval_gooaq_pairs_runtime": 0.8751, |
|
"eval_gooaq_pairs_samples_per_second": 146.261, |
|
"eval_gooaq_pairs_steps_per_second": 2.285, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4550561797752809, |
|
"eval_paws-pos_loss": 0.024421451613307, |
|
"eval_paws-pos_runtime": 0.6865, |
|
"eval_paws-pos_samples_per_second": 186.444, |
|
"eval_paws-pos_steps_per_second": 2.913, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.47191011235955055, |
|
"grad_norm": 3.1395561695098877, |
|
"learning_rate": 3.999675367909485e-05, |
|
"loss": 0.5801, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4887640449438202, |
|
"grad_norm": 2.7977917194366455, |
|
"learning_rate": 3.998701612152597e-05, |
|
"loss": 0.791, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"grad_norm": 2.3682048320770264, |
|
"learning_rate": 3.997079154212493e-05, |
|
"loss": 0.6042, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5224719101123596, |
|
"grad_norm": 2.843482255935669, |
|
"learning_rate": 3.99480869635839e-05, |
|
"loss": 0.7559, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5393258426966292, |
|
"grad_norm": 2.7346785068511963, |
|
"learning_rate": 3.9918912213415936e-05, |
|
"loss": 0.6258, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5561797752808989, |
|
"grad_norm": 3.149007558822632, |
|
"learning_rate": 3.9883279919701226e-05, |
|
"loss": 0.8853, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5730337078651685, |
|
"grad_norm": 3.3424761295318604, |
|
"learning_rate": 3.9841205505621106e-05, |
|
"loss": 0.5947, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5898876404494382, |
|
"grad_norm": 2.6377146244049072, |
|
"learning_rate": 3.979270718278224e-05, |
|
"loss": 0.644, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"grad_norm": 1.3963145017623901, |
|
"learning_rate": 3.973780594333386e-05, |
|
"loss": 0.5682, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.58984375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7784540057182312, |
|
"eval_VitaminC_cosine_ap": 0.556890553952148, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3015836775302887, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.57421875, |
|
"eval_VitaminC_dot_accuracy_threshold": 310.07818603515625, |
|
"eval_VitaminC_dot_ap": 0.5486679382699982, |
|
"eval_VitaminC_dot_f1": 0.6595174262734584, |
|
"eval_VitaminC_dot_f1_threshold": 110.96945190429688, |
|
"eval_VitaminC_dot_precision": 0.492, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.578125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.410951614379883, |
|
"eval_VitaminC_euclidean_ap": 0.5551150763619972, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.933565139770508, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.58203125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 236.87246704101562, |
|
"eval_VitaminC_manhattan_ap": 0.5465417933692003, |
|
"eval_VitaminC_manhattan_f1": 0.6576086956521738, |
|
"eval_VitaminC_manhattan_f1_threshold": 479.8819580078125, |
|
"eval_VitaminC_manhattan_precision": 0.49387755102040815, |
|
"eval_VitaminC_manhattan_recall": 0.983739837398374, |
|
"eval_VitaminC_max_accuracy": 0.58984375, |
|
"eval_VitaminC_max_accuracy_threshold": 310.07818603515625, |
|
"eval_VitaminC_max_ap": 0.556890553952148, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 479.8819580078125, |
|
"eval_VitaminC_max_precision": 0.49387755102040815, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.556890553952148, |
|
"eval_sts-test_pearson_cosine": 0.8554011700695058, |
|
"eval_sts-test_pearson_dot": 0.8545651085608208, |
|
"eval_sts-test_pearson_euclidean": 0.8842988585732054, |
|
"eval_sts-test_pearson_manhattan": 0.8850809337540164, |
|
"eval_sts-test_pearson_max": 0.8850809337540164, |
|
"eval_sts-test_spearman_cosine": 0.8959079853731212, |
|
"eval_sts-test_spearman_dot": 0.876135947365041, |
|
"eval_sts-test_spearman_euclidean": 0.8856381406339634, |
|
"eval_sts-test_spearman_manhattan": 0.8868948834793577, |
|
"eval_sts-test_spearman_max": 0.8959079853731212, |
|
"eval_vitaminc-pairs_loss": 2.4271271228790283, |
|
"eval_vitaminc-pairs_runtime": 1.4672, |
|
"eval_vitaminc-pairs_samples_per_second": 73.61, |
|
"eval_vitaminc-pairs_steps_per_second": 1.363, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_negation-triplets_loss": 1.6174229383468628, |
|
"eval_negation-triplets_runtime": 0.3094, |
|
"eval_negation-triplets_samples_per_second": 206.869, |
|
"eval_negation-triplets_steps_per_second": 3.232, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_scitail-pairs-pos_loss": 0.10586681962013245, |
|
"eval_scitail-pairs-pos_runtime": 0.4307, |
|
"eval_scitail-pairs-pos_samples_per_second": 125.374, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.322, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_xsum-pairs_loss": 0.1041470319032669, |
|
"eval_xsum-pairs_runtime": 3.2912, |
|
"eval_xsum-pairs_samples_per_second": 38.891, |
|
"eval_xsum-pairs_steps_per_second": 0.608, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_sciq_pairs_loss": 0.03364330902695656, |
|
"eval_sciq_pairs_runtime": 3.3617, |
|
"eval_sciq_pairs_samples_per_second": 38.076, |
|
"eval_sciq_pairs_steps_per_second": 0.595, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_qasc_pairs_loss": 0.10827122628688812, |
|
"eval_qasc_pairs_runtime": 0.6238, |
|
"eval_qasc_pairs_samples_per_second": 205.206, |
|
"eval_qasc_pairs_steps_per_second": 3.206, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_openbookqa_pairs_loss": 0.7513518929481506, |
|
"eval_openbookqa_pairs_runtime": 0.5882, |
|
"eval_openbookqa_pairs_samples_per_second": 217.628, |
|
"eval_openbookqa_pairs_steps_per_second": 3.4, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_msmarco_pairs_loss": 0.3447520434856415, |
|
"eval_msmarco_pairs_runtime": 1.2968, |
|
"eval_msmarco_pairs_samples_per_second": 98.705, |
|
"eval_msmarco_pairs_steps_per_second": 1.542, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_nq_pairs_loss": 0.4307234287261963, |
|
"eval_nq_pairs_runtime": 2.4164, |
|
"eval_nq_pairs_samples_per_second": 52.971, |
|
"eval_nq_pairs_steps_per_second": 0.828, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_trivia_pairs_loss": 0.9260168075561523, |
|
"eval_trivia_pairs_runtime": 4.4644, |
|
"eval_trivia_pairs_samples_per_second": 28.671, |
|
"eval_trivia_pairs_steps_per_second": 0.448, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_gooaq_pairs_loss": 0.6301646828651428, |
|
"eval_gooaq_pairs_runtime": 0.9092, |
|
"eval_gooaq_pairs_samples_per_second": 140.777, |
|
"eval_gooaq_pairs_steps_per_second": 2.2, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_paws-pos_loss": 0.024637963622808456, |
|
"eval_paws-pos_runtime": 0.696, |
|
"eval_paws-pos_samples_per_second": 183.9, |
|
"eval_paws-pos_steps_per_second": 2.873, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6235955056179775, |
|
"grad_norm": 3.0150091648101807, |
|
"learning_rate": 3.9676525550881484e-05, |
|
"loss": 0.5974, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6404494382022472, |
|
"grad_norm": 2.7985854148864746, |
|
"learning_rate": 3.9608892530200996e-05, |
|
"loss": 0.649, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6573033707865169, |
|
"grad_norm": 3.00435471534729, |
|
"learning_rate": 3.953493615575757e-05, |
|
"loss": 0.6966, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6741573033707865, |
|
"grad_norm": 3.276264190673828, |
|
"learning_rate": 3.945468843903448e-05, |
|
"loss": 0.542, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6910112359550562, |
|
"grad_norm": 3.144037961959839, |
|
"learning_rate": 3.936818411467709e-05, |
|
"loss": 0.8583, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"grad_norm": 2.498800754547119, |
|
"learning_rate": 3.9275460625458294e-05, |
|
"loss": 0.6416, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7247191011235955, |
|
"grad_norm": 3.01488995552063, |
|
"learning_rate": 3.917655810607162e-05, |
|
"loss": 0.6273, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7415730337078652, |
|
"grad_norm": 3.439838171005249, |
|
"learning_rate": 3.907151936575922e-05, |
|
"loss": 0.8621, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"grad_norm": 3.0246922969818115, |
|
"learning_rate": 3.896038986978224e-05, |
|
"loss": 0.7221, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.57421875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8462294936180115, |
|
"eval_VitaminC_cosine_ap": 0.5610960529859609, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3176865577697754, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5859375, |
|
"eval_VitaminC_dot_accuracy_threshold": 319.20416259765625, |
|
"eval_VitaminC_dot_ap": 0.5540963620104548, |
|
"eval_VitaminC_dot_f1": 0.6595174262734584, |
|
"eval_VitaminC_dot_f1_threshold": 121.18156433105469, |
|
"eval_VitaminC_dot_precision": 0.492, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.58203125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 15.303094863891602, |
|
"eval_VitaminC_euclidean_ap": 0.5590109366458975, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.605255126953125, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.578125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 240.15660095214844, |
|
"eval_VitaminC_manhattan_ap": 0.5523438512618317, |
|
"eval_VitaminC_manhattan_f1": 0.6577540106951871, |
|
"eval_VitaminC_manhattan_f1_threshold": 503.4666748046875, |
|
"eval_VitaminC_manhattan_precision": 0.4900398406374502, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.5859375, |
|
"eval_VitaminC_max_accuracy_threshold": 319.20416259765625, |
|
"eval_VitaminC_max_ap": 0.5610960529859609, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 503.4666748046875, |
|
"eval_VitaminC_max_precision": 0.492, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5610960529859609, |
|
"eval_sts-test_pearson_cosine": 0.8558158452024208, |
|
"eval_sts-test_pearson_dot": 0.8576514422982167, |
|
"eval_sts-test_pearson_euclidean": 0.8865043101846597, |
|
"eval_sts-test_pearson_manhattan": 0.8866527906896583, |
|
"eval_sts-test_pearson_max": 0.8866527906896583, |
|
"eval_sts-test_spearman_cosine": 0.8964919590428757, |
|
"eval_sts-test_spearman_dot": 0.8785377225806833, |
|
"eval_sts-test_spearman_euclidean": 0.8874999818863996, |
|
"eval_sts-test_spearman_manhattan": 0.8881272791699125, |
|
"eval_sts-test_spearman_max": 0.8964919590428757, |
|
"eval_vitaminc-pairs_loss": 2.336690902709961, |
|
"eval_vitaminc-pairs_runtime": 1.4561, |
|
"eval_vitaminc-pairs_samples_per_second": 74.172, |
|
"eval_vitaminc-pairs_steps_per_second": 1.374, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_negation-triplets_loss": 1.5712968111038208, |
|
"eval_negation-triplets_runtime": 0.2973, |
|
"eval_negation-triplets_samples_per_second": 215.304, |
|
"eval_negation-triplets_steps_per_second": 3.364, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_scitail-pairs-pos_loss": 0.1093834936618805, |
|
"eval_scitail-pairs-pos_runtime": 0.3709, |
|
"eval_scitail-pairs-pos_samples_per_second": 145.598, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.696, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_xsum-pairs_loss": 0.09297582507133484, |
|
"eval_xsum-pairs_runtime": 3.1496, |
|
"eval_xsum-pairs_samples_per_second": 40.64, |
|
"eval_xsum-pairs_steps_per_second": 0.635, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_sciq_pairs_loss": 0.031587302684783936, |
|
"eval_sciq_pairs_runtime": 3.2666, |
|
"eval_sciq_pairs_samples_per_second": 39.185, |
|
"eval_sciq_pairs_steps_per_second": 0.612, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_qasc_pairs_loss": 0.10210572183132172, |
|
"eval_qasc_pairs_runtime": 0.6258, |
|
"eval_qasc_pairs_samples_per_second": 204.538, |
|
"eval_qasc_pairs_steps_per_second": 3.196, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_openbookqa_pairs_loss": 0.7485109567642212, |
|
"eval_openbookqa_pairs_runtime": 0.5836, |
|
"eval_openbookqa_pairs_samples_per_second": 219.317, |
|
"eval_openbookqa_pairs_steps_per_second": 3.427, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_msmarco_pairs_loss": 0.34248754382133484, |
|
"eval_msmarco_pairs_runtime": 1.286, |
|
"eval_msmarco_pairs_samples_per_second": 99.53, |
|
"eval_msmarco_pairs_steps_per_second": 1.555, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_nq_pairs_loss": 0.41257673501968384, |
|
"eval_nq_pairs_runtime": 2.3755, |
|
"eval_nq_pairs_samples_per_second": 53.883, |
|
"eval_nq_pairs_steps_per_second": 0.842, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_trivia_pairs_loss": 0.9274640083312988, |
|
"eval_trivia_pairs_runtime": 4.4185, |
|
"eval_trivia_pairs_samples_per_second": 28.969, |
|
"eval_trivia_pairs_steps_per_second": 0.453, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_gooaq_pairs_loss": 0.5840359330177307, |
|
"eval_gooaq_pairs_runtime": 0.8784, |
|
"eval_gooaq_pairs_samples_per_second": 145.726, |
|
"eval_gooaq_pairs_steps_per_second": 2.277, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7584269662921348, |
|
"eval_paws-pos_loss": 0.024575484916567802, |
|
"eval_paws-pos_runtime": 0.6869, |
|
"eval_paws-pos_samples_per_second": 186.355, |
|
"eval_paws-pos_steps_per_second": 2.912, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7752808988764045, |
|
"grad_norm": 3.189115285873413, |
|
"learning_rate": 3.884321771974146e-05, |
|
"loss": 0.9421, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7921348314606742, |
|
"grad_norm": 1.8421012163162231, |
|
"learning_rate": 3.872005363275693e-05, |
|
"loss": 0.6845, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"grad_norm": 2.2710273265838623, |
|
"learning_rate": 3.859095091951534e-05, |
|
"loss": 0.5464, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.8258426966292135, |
|
"grad_norm": 2.98201060295105, |
|
"learning_rate": 3.845596546119496e-05, |
|
"loss": 0.6338, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8426966292134831, |
|
"grad_norm": 2.9222068786621094, |
|
"learning_rate": 3.831515568527782e-05, |
|
"loss": 0.4993, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8595505617977528, |
|
"grad_norm": 3.6596696376800537, |
|
"learning_rate": 3.81685825402598e-05, |
|
"loss": 0.6939, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8764044943820225, |
|
"grad_norm": 3.5363073348999023, |
|
"learning_rate": 3.801630946926956e-05, |
|
"loss": 0.5791, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8932584269662921, |
|
"grad_norm": 3.6599326133728027, |
|
"learning_rate": 3.785840238260758e-05, |
|
"loss": 0.9226, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"grad_norm": 2.6967382431030273, |
|
"learning_rate": 3.7694929629217385e-05, |
|
"loss": 0.6336, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.578125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8077100515365601, |
|
"eval_VitaminC_cosine_ap": 0.560345569715395, |
|
"eval_VitaminC_cosine_f1": 0.6577540106951871, |
|
"eval_VitaminC_cosine_f1_threshold": 0.25425243377685547, |
|
"eval_VitaminC_cosine_precision": 0.4900398406374502, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.578125, |
|
"eval_VitaminC_dot_accuracy_threshold": 331.0595703125, |
|
"eval_VitaminC_dot_ap": 0.5499174718252662, |
|
"eval_VitaminC_dot_f1": 0.6594594594594595, |
|
"eval_VitaminC_dot_f1_threshold": 121.5512924194336, |
|
"eval_VitaminC_dot_precision": 0.4939271255060729, |
|
"eval_VitaminC_dot_recall": 0.991869918699187, |
|
"eval_VitaminC_euclidean_accuracy": 0.58203125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.087348937988281, |
|
"eval_VitaminC_euclidean_ap": 0.5563988051869968, |
|
"eval_VitaminC_euclidean_f1": 0.6595174262734584, |
|
"eval_VitaminC_euclidean_f1_threshold": 24.085674285888672, |
|
"eval_VitaminC_euclidean_precision": 0.492, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.58203125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 339.80157470703125, |
|
"eval_VitaminC_manhattan_ap": 0.5563552623581395, |
|
"eval_VitaminC_manhattan_f1": 0.6595174262734584, |
|
"eval_VitaminC_manhattan_f1_threshold": 513.6148681640625, |
|
"eval_VitaminC_manhattan_precision": 0.492, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.58203125, |
|
"eval_VitaminC_max_accuracy_threshold": 339.80157470703125, |
|
"eval_VitaminC_max_ap": 0.560345569715395, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 513.6148681640625, |
|
"eval_VitaminC_max_precision": 0.4939271255060729, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.560345569715395, |
|
"eval_sts-test_pearson_cosine": 0.8572374144718338, |
|
"eval_sts-test_pearson_dot": 0.8574922304214377, |
|
"eval_sts-test_pearson_euclidean": 0.8855365860444931, |
|
"eval_sts-test_pearson_manhattan": 0.886788554614621, |
|
"eval_sts-test_pearson_max": 0.886788554614621, |
|
"eval_sts-test_spearman_cosine": 0.8965259734503722, |
|
"eval_sts-test_spearman_dot": 0.8784832995286892, |
|
"eval_sts-test_spearman_euclidean": 0.8855371715506584, |
|
"eval_sts-test_spearman_manhattan": 0.8887295132057965, |
|
"eval_sts-test_spearman_max": 0.8965259734503722, |
|
"eval_vitaminc-pairs_loss": 2.376066207885742, |
|
"eval_vitaminc-pairs_runtime": 1.4378, |
|
"eval_vitaminc-pairs_samples_per_second": 75.117, |
|
"eval_vitaminc-pairs_steps_per_second": 1.391, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_negation-triplets_loss": 1.63050377368927, |
|
"eval_negation-triplets_runtime": 0.3004, |
|
"eval_negation-triplets_samples_per_second": 213.075, |
|
"eval_negation-triplets_steps_per_second": 3.329, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_scitail-pairs-pos_loss": 0.10006655752658844, |
|
"eval_scitail-pairs-pos_runtime": 0.365, |
|
"eval_scitail-pairs-pos_samples_per_second": 147.952, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.74, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_xsum-pairs_loss": 0.0762382224202156, |
|
"eval_xsum-pairs_runtime": 3.1586, |
|
"eval_xsum-pairs_samples_per_second": 40.525, |
|
"eval_xsum-pairs_steps_per_second": 0.633, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_sciq_pairs_loss": 0.032084282487630844, |
|
"eval_sciq_pairs_runtime": 3.2586, |
|
"eval_sciq_pairs_samples_per_second": 39.28, |
|
"eval_sciq_pairs_steps_per_second": 0.614, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_qasc_pairs_loss": 0.10310036689043045, |
|
"eval_qasc_pairs_runtime": 0.6197, |
|
"eval_qasc_pairs_samples_per_second": 206.545, |
|
"eval_qasc_pairs_steps_per_second": 3.227, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_openbookqa_pairs_loss": 0.6995278000831604, |
|
"eval_openbookqa_pairs_runtime": 0.575, |
|
"eval_openbookqa_pairs_samples_per_second": 222.624, |
|
"eval_openbookqa_pairs_steps_per_second": 3.478, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_msmarco_pairs_loss": 0.37089064717292786, |
|
"eval_msmarco_pairs_runtime": 1.2866, |
|
"eval_msmarco_pairs_samples_per_second": 99.49, |
|
"eval_msmarco_pairs_steps_per_second": 1.555, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_nq_pairs_loss": 0.39078566431999207, |
|
"eval_nq_pairs_runtime": 2.3928, |
|
"eval_nq_pairs_samples_per_second": 53.493, |
|
"eval_nq_pairs_steps_per_second": 0.836, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_trivia_pairs_loss": 0.9003691077232361, |
|
"eval_trivia_pairs_runtime": 4.479, |
|
"eval_trivia_pairs_samples_per_second": 28.578, |
|
"eval_trivia_pairs_steps_per_second": 0.447, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_gooaq_pairs_loss": 0.5496390461921692, |
|
"eval_gooaq_pairs_runtime": 0.8735, |
|
"eval_gooaq_pairs_samples_per_second": 146.532, |
|
"eval_gooaq_pairs_steps_per_second": 2.29, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_paws-pos_loss": 0.024540428072214127, |
|
"eval_paws-pos_runtime": 0.6836, |
|
"eval_paws-pos_samples_per_second": 187.246, |
|
"eval_paws-pos_steps_per_second": 2.926, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9269662921348315, |
|
"grad_norm": 3.270443916320801, |
|
"learning_rate": 3.7525961967101216e-05, |
|
"loss": 0.5395, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9438202247191011, |
|
"grad_norm": 2.608531951904297, |
|
"learning_rate": 3.7351572532692915e-05, |
|
"loss": 0.6874, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9606741573033708, |
|
"grad_norm": 2.9995415210723877, |
|
"learning_rate": 3.717183680920136e-05, |
|
"loss": 0.5614, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9775280898876404, |
|
"grad_norm": 3.42486572265625, |
|
"learning_rate": 3.698683259393809e-05, |
|
"loss": 0.5812, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9943820224719101, |
|
"grad_norm": 1.8073548078536987, |
|
"learning_rate": 3.679663996464331e-05, |
|
"loss": 0.427, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.0112359550561798, |
|
"grad_norm": 3.2385106086730957, |
|
"learning_rate": 3.660134124482482e-05, |
|
"loss": 0.4603, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0280898876404494, |
|
"grad_norm": 2.8062069416046143, |
|
"learning_rate": 3.640102096812488e-05, |
|
"loss": 0.6493, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.0449438202247192, |
|
"grad_norm": 2.6892104148864746, |
|
"learning_rate": 3.619576584173041e-05, |
|
"loss": 0.6646, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"grad_norm": 2.6583099365234375, |
|
"learning_rate": 3.598566470884244e-05, |
|
"loss": 0.7239, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.58984375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8360881209373474, |
|
"eval_VitaminC_cosine_ap": 0.5601848253252508, |
|
"eval_VitaminC_cosine_f1": 0.6559999999999999, |
|
"eval_VitaminC_cosine_f1_threshold": 0.26484909653663635, |
|
"eval_VitaminC_cosine_precision": 0.4880952380952381, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.58203125, |
|
"eval_VitaminC_dot_accuracy_threshold": 314.279052734375, |
|
"eval_VitaminC_dot_ap": 0.5513292673695236, |
|
"eval_VitaminC_dot_f1": 0.6558265582655827, |
|
"eval_VitaminC_dot_f1_threshold": 126.1304931640625, |
|
"eval_VitaminC_dot_precision": 0.491869918699187, |
|
"eval_VitaminC_dot_recall": 0.983739837398374, |
|
"eval_VitaminC_euclidean_accuracy": 0.578125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 15.01893424987793, |
|
"eval_VitaminC_euclidean_ap": 0.5549132214851141, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.76571273803711, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.57421875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 244.02972412109375, |
|
"eval_VitaminC_manhattan_ap": 0.5562338006363409, |
|
"eval_VitaminC_manhattan_f1": 0.6577540106951871, |
|
"eval_VitaminC_manhattan_f1_threshold": 498.5762634277344, |
|
"eval_VitaminC_manhattan_precision": 0.4900398406374502, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.58984375, |
|
"eval_VitaminC_max_accuracy_threshold": 314.279052734375, |
|
"eval_VitaminC_max_ap": 0.5601848253252508, |
|
"eval_VitaminC_max_f1": 0.6577540106951871, |
|
"eval_VitaminC_max_f1_threshold": 498.5762634277344, |
|
"eval_VitaminC_max_precision": 0.491869918699187, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5601848253252508, |
|
"eval_sts-test_pearson_cosine": 0.854968805652805, |
|
"eval_sts-test_pearson_dot": 0.8534110565503882, |
|
"eval_sts-test_pearson_euclidean": 0.8853384519331917, |
|
"eval_sts-test_pearson_manhattan": 0.8864271118397893, |
|
"eval_sts-test_pearson_max": 0.8864271118397893, |
|
"eval_sts-test_spearman_cosine": 0.8956917253507228, |
|
"eval_sts-test_spearman_dot": 0.877726389450295, |
|
"eval_sts-test_spearman_euclidean": 0.8875533307992096, |
|
"eval_sts-test_spearman_manhattan": 0.890112288382125, |
|
"eval_sts-test_spearman_max": 0.8956917253507228, |
|
"eval_vitaminc-pairs_loss": 2.3751792907714844, |
|
"eval_vitaminc-pairs_runtime": 1.4524, |
|
"eval_vitaminc-pairs_samples_per_second": 74.358, |
|
"eval_vitaminc-pairs_steps_per_second": 1.377, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_negation-triplets_loss": 1.580323338508606, |
|
"eval_negation-triplets_runtime": 0.301, |
|
"eval_negation-triplets_samples_per_second": 212.649, |
|
"eval_negation-triplets_steps_per_second": 3.323, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_scitail-pairs-pos_loss": 0.10438331216573715, |
|
"eval_scitail-pairs-pos_runtime": 0.363, |
|
"eval_scitail-pairs-pos_samples_per_second": 148.774, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.755, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_xsum-pairs_loss": 0.06599828600883484, |
|
"eval_xsum-pairs_runtime": 3.1538, |
|
"eval_xsum-pairs_samples_per_second": 40.586, |
|
"eval_xsum-pairs_steps_per_second": 0.634, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_sciq_pairs_loss": 0.033071305602788925, |
|
"eval_sciq_pairs_runtime": 3.2631, |
|
"eval_sciq_pairs_samples_per_second": 39.227, |
|
"eval_sciq_pairs_steps_per_second": 0.613, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_qasc_pairs_loss": 0.10076003521680832, |
|
"eval_qasc_pairs_runtime": 0.6385, |
|
"eval_qasc_pairs_samples_per_second": 200.46, |
|
"eval_qasc_pairs_steps_per_second": 3.132, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_openbookqa_pairs_loss": 0.6888580918312073, |
|
"eval_openbookqa_pairs_runtime": 0.6067, |
|
"eval_openbookqa_pairs_samples_per_second": 210.99, |
|
"eval_openbookqa_pairs_steps_per_second": 3.297, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_msmarco_pairs_loss": 0.335863322019577, |
|
"eval_msmarco_pairs_runtime": 1.3206, |
|
"eval_msmarco_pairs_samples_per_second": 96.922, |
|
"eval_msmarco_pairs_steps_per_second": 1.514, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_nq_pairs_loss": 0.3690747916698456, |
|
"eval_nq_pairs_runtime": 2.3983, |
|
"eval_nq_pairs_samples_per_second": 53.371, |
|
"eval_nq_pairs_steps_per_second": 0.834, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_trivia_pairs_loss": 0.881881594657898, |
|
"eval_trivia_pairs_runtime": 4.4261, |
|
"eval_trivia_pairs_samples_per_second": 28.919, |
|
"eval_trivia_pairs_steps_per_second": 0.452, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_gooaq_pairs_loss": 0.5453925728797913, |
|
"eval_gooaq_pairs_runtime": 0.8775, |
|
"eval_gooaq_pairs_samples_per_second": 145.871, |
|
"eval_gooaq_pairs_steps_per_second": 2.279, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0617977528089888, |
|
"eval_paws-pos_loss": 0.024841103702783585, |
|
"eval_paws-pos_runtime": 0.6851, |
|
"eval_paws-pos_samples_per_second": 186.83, |
|
"eval_paws-pos_steps_per_second": 2.919, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0786516853932584, |
|
"grad_norm": 2.942894220352173, |
|
"learning_rate": 3.5770808510220957e-05, |
|
"loss": 0.7593, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.095505617977528, |
|
"grad_norm": 2.636993169784546, |
|
"learning_rate": 3.5551290244821855e-05, |
|
"loss": 0.6877, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1123595505617978, |
|
"grad_norm": 2.7987968921661377, |
|
"learning_rate": 3.5327204929543e-05, |
|
"loss": 0.5482, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.1292134831460674, |
|
"grad_norm": 2.3775548934936523, |
|
"learning_rate": 3.509864955809687e-05, |
|
"loss": 0.6047, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.146067415730337, |
|
"grad_norm": 1.982017993927002, |
|
"learning_rate": 3.4865723059027493e-05, |
|
"loss": 0.4358, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.1629213483146068, |
|
"grad_norm": 1.5380574464797974, |
|
"learning_rate": 3.462852625288999e-05, |
|
"loss": 0.3343, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.1797752808988764, |
|
"grad_norm": 3.152486562728882, |
|
"learning_rate": 3.438716180861106e-05, |
|
"loss": 0.5624, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.196629213483146, |
|
"grad_norm": 2.410505533218384, |
|
"learning_rate": 3.414173419904956e-05, |
|
"loss": 0.4578, |
|
"step": 213 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 534, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 107, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 320, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|