{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.601123595505618, "eval_steps": 27, "global_step": 107, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016853932584269662, "grad_norm": 2.9885776042938232, "learning_rate": 1.4814814814814815e-06, "loss": 0.6012, "step": 3 }, { "epoch": 0.033707865168539325, "grad_norm": 3.184929132461548, "learning_rate": 2.962962962962963e-06, "loss": 0.7573, "step": 6 }, { "epoch": 0.05056179775280899, "grad_norm": 3.256159782409668, "learning_rate": 4.444444444444444e-06, "loss": 0.9212, "step": 9 }, { "epoch": 0.06741573033707865, "grad_norm": 2.833339214324951, "learning_rate": 5.925925925925926e-06, "loss": 0.6117, "step": 12 }, { "epoch": 0.08426966292134831, "grad_norm": 3.08292818069458, "learning_rate": 7.4074074074074075e-06, "loss": 0.8545, "step": 15 }, { "epoch": 0.10112359550561797, "grad_norm": 2.317431688308716, "learning_rate": 8.888888888888888e-06, "loss": 0.6515, "step": 18 }, { "epoch": 0.11797752808988764, "grad_norm": 2.9611644744873047, "learning_rate": 1.037037037037037e-05, "loss": 0.7159, "step": 21 }, { "epoch": 0.1348314606741573, "grad_norm": 2.698537826538086, "learning_rate": 1.1851851851851852e-05, "loss": 0.7019, "step": 24 }, { "epoch": 0.15168539325842698, "grad_norm": 2.222154378890991, "learning_rate": 1.3333333333333333e-05, "loss": 0.4411, "step": 27 }, { "epoch": 0.15168539325842698, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.578125, "eval_VitaminC_cosine_accuracy_threshold": 0.7817381620407104, "eval_VitaminC_cosine_ap": 0.5507972943944112, "eval_VitaminC_cosine_f1": 0.6595174262734584, "eval_VitaminC_cosine_f1_threshold": 0.28573715686798096, "eval_VitaminC_cosine_precision": 0.492, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5703125, "eval_VitaminC_dot_accuracy_threshold": 316.7283020019531, "eval_VitaminC_dot_ap": 0.5511866185449577, "eval_VitaminC_dot_f1": 0.6577540106951871, "eval_VitaminC_dot_f1_threshold": 106.75863647460938, "eval_VitaminC_dot_precision": 0.4900398406374502, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.578125, "eval_VitaminC_euclidean_accuracy_threshold": 13.298419952392578, "eval_VitaminC_euclidean_ap": 0.5476323986807207, "eval_VitaminC_euclidean_f1": 0.6577540106951871, "eval_VitaminC_euclidean_f1_threshold": 23.83933448791504, "eval_VitaminC_euclidean_precision": 0.4900398406374502, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.578125, "eval_VitaminC_manhattan_accuracy_threshold": 279.69085693359375, "eval_VitaminC_manhattan_ap": 0.5412538781107805, "eval_VitaminC_manhattan_f1": 0.6577540106951871, "eval_VitaminC_manhattan_f1_threshold": 499.8836364746094, "eval_VitaminC_manhattan_precision": 0.4900398406374502, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.578125, "eval_VitaminC_max_accuracy_threshold": 316.7283020019531, "eval_VitaminC_max_ap": 0.5511866185449577, "eval_VitaminC_max_f1": 0.6595174262734584, "eval_VitaminC_max_f1_threshold": 499.8836364746094, "eval_VitaminC_max_precision": 0.492, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5511866185449577, "eval_sts-test_pearson_cosine": 0.8488243436029344, "eval_sts-test_pearson_dot": 0.8480167969551653, "eval_sts-test_pearson_euclidean": 0.8800283985117625, "eval_sts-test_pearson_manhattan": 0.880588311422627, "eval_sts-test_pearson_max": 0.880588311422627, "eval_sts-test_spearman_cosine": 0.8905659331642088, "eval_sts-test_spearman_dot": 0.8692084657204004, "eval_sts-test_spearman_euclidean": 0.8809566840232712, "eval_sts-test_spearman_manhattan": 0.883434007028195, "eval_sts-test_spearman_max": 0.8905659331642088, "eval_vitaminc-pairs_loss": 2.465860366821289, "eval_vitaminc-pairs_runtime": 1.4615, "eval_vitaminc-pairs_samples_per_second": 73.899, "eval_vitaminc-pairs_steps_per_second": 1.368, "step": 27 }, { "epoch": 0.15168539325842698, "eval_negation-triplets_loss": 1.7310789823532104, "eval_negation-triplets_runtime": 0.3009, "eval_negation-triplets_samples_per_second": 212.692, "eval_negation-triplets_steps_per_second": 3.323, "step": 27 }, { "epoch": 0.15168539325842698, "eval_scitail-pairs-pos_loss": 0.1150394082069397, "eval_scitail-pairs-pos_runtime": 0.3739, "eval_scitail-pairs-pos_samples_per_second": 144.431, "eval_scitail-pairs-pos_steps_per_second": 2.675, "step": 27 }, { "epoch": 0.15168539325842698, "eval_xsum-pairs_loss": 0.11168850213289261, "eval_xsum-pairs_runtime": 3.1697, "eval_xsum-pairs_samples_per_second": 40.382, "eval_xsum-pairs_steps_per_second": 0.631, "step": 27 }, { "epoch": 0.15168539325842698, "eval_sciq_pairs_loss": 0.03450964391231537, "eval_sciq_pairs_runtime": 3.3283, "eval_sciq_pairs_samples_per_second": 38.459, "eval_sciq_pairs_steps_per_second": 0.601, "step": 27 }, { "epoch": 0.15168539325842698, "eval_qasc_pairs_loss": 0.11095743626356125, "eval_qasc_pairs_runtime": 0.6261, "eval_qasc_pairs_samples_per_second": 204.45, "eval_qasc_pairs_steps_per_second": 3.195, "step": 27 }, { "epoch": 0.15168539325842698, "eval_openbookqa_pairs_loss": 0.7092063426971436, "eval_openbookqa_pairs_runtime": 0.5866, "eval_openbookqa_pairs_samples_per_second": 218.19, "eval_openbookqa_pairs_steps_per_second": 3.409, "step": 27 }, { "epoch": 0.15168539325842698, "eval_msmarco_pairs_loss": 0.3955218493938446, "eval_msmarco_pairs_runtime": 1.2942, "eval_msmarco_pairs_samples_per_second": 98.902, "eval_msmarco_pairs_steps_per_second": 1.545, "step": 27 }, { "epoch": 0.15168539325842698, "eval_nq_pairs_loss": 0.42051073908805847, "eval_nq_pairs_runtime": 2.3875, "eval_nq_pairs_samples_per_second": 53.612, "eval_nq_pairs_steps_per_second": 0.838, "step": 27 }, { "epoch": 0.15168539325842698, "eval_trivia_pairs_loss": 0.93178790807724, "eval_trivia_pairs_runtime": 4.4363, "eval_trivia_pairs_samples_per_second": 28.853, "eval_trivia_pairs_steps_per_second": 0.451, "step": 27 }, { "epoch": 0.15168539325842698, "eval_gooaq_pairs_loss": 0.6505913138389587, "eval_gooaq_pairs_runtime": 0.8826, "eval_gooaq_pairs_samples_per_second": 145.027, "eval_gooaq_pairs_steps_per_second": 2.266, "step": 27 }, { "epoch": 0.15168539325842698, "eval_paws-pos_loss": 0.024931101128458977, "eval_paws-pos_runtime": 0.6852, "eval_paws-pos_samples_per_second": 186.805, "eval_paws-pos_steps_per_second": 2.919, "step": 27 }, { "epoch": 0.16853932584269662, "grad_norm": 2.826900005340576, "learning_rate": 1.4814814814814815e-05, "loss": 0.5125, "step": 30 }, { "epoch": 0.1853932584269663, "grad_norm": 2.9938910007476807, "learning_rate": 1.6296296296296297e-05, "loss": 0.6885, "step": 33 }, { "epoch": 0.20224719101123595, "grad_norm": 3.3046395778656006, "learning_rate": 1.7777777777777777e-05, "loss": 0.6435, "step": 36 }, { "epoch": 0.21910112359550563, "grad_norm": 2.4184651374816895, "learning_rate": 1.925925925925926e-05, "loss": 0.753, "step": 39 }, { "epoch": 0.23595505617977527, "grad_norm": 2.9905433654785156, "learning_rate": 2.074074074074074e-05, "loss": 0.7427, "step": 42 }, { "epoch": 0.25280898876404495, "grad_norm": 2.745820999145508, "learning_rate": 2.2222222222222227e-05, "loss": 0.5083, "step": 45 }, { "epoch": 0.2696629213483146, "grad_norm": 2.6370577812194824, "learning_rate": 2.3703703703703703e-05, "loss": 0.7454, "step": 48 }, { "epoch": 0.28651685393258425, "grad_norm": 3.044011116027832, "learning_rate": 2.5185185185185187e-05, "loss": 0.8356, "step": 51 }, { "epoch": 0.30337078651685395, "grad_norm": 3.718804121017456, "learning_rate": 2.6666666666666667e-05, "loss": 0.8864, "step": 54 }, { "epoch": 0.30337078651685395, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.57421875, "eval_VitaminC_cosine_accuracy_threshold": 0.7991844415664673, "eval_VitaminC_cosine_ap": 0.5485498837322925, "eval_VitaminC_cosine_f1": 0.6595174262734584, "eval_VitaminC_cosine_f1_threshold": 0.3160865008831024, "eval_VitaminC_cosine_precision": 0.492, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.578125, "eval_VitaminC_dot_accuracy_threshold": 327.0416564941406, "eval_VitaminC_dot_ap": 0.54993134882601, "eval_VitaminC_dot_f1": 0.6595174262734584, "eval_VitaminC_dot_f1_threshold": 117.44181060791016, "eval_VitaminC_dot_precision": 0.492, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.57421875, "eval_VitaminC_euclidean_accuracy_threshold": 13.019258499145508, "eval_VitaminC_euclidean_ap": 0.5435066540334542, "eval_VitaminC_euclidean_f1": 0.6577540106951871, "eval_VitaminC_euclidean_f1_threshold": 23.688644409179688, "eval_VitaminC_euclidean_precision": 0.4900398406374502, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.57421875, "eval_VitaminC_manhattan_accuracy_threshold": 283.876220703125, "eval_VitaminC_manhattan_ap": 0.5416615397828658, "eval_VitaminC_manhattan_f1": 0.6559999999999999, "eval_VitaminC_manhattan_f1_threshold": 514.0216064453125, "eval_VitaminC_manhattan_precision": 0.4880952380952381, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.578125, "eval_VitaminC_max_accuracy_threshold": 327.0416564941406, "eval_VitaminC_max_ap": 0.54993134882601, "eval_VitaminC_max_f1": 0.6595174262734584, "eval_VitaminC_max_f1_threshold": 514.0216064453125, "eval_VitaminC_max_precision": 0.492, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.54993134882601, "eval_sts-test_pearson_cosine": 0.8452615878553369, "eval_sts-test_pearson_dot": 0.8404858620687519, "eval_sts-test_pearson_euclidean": 0.8780527810910925, "eval_sts-test_pearson_manhattan": 0.878916157345712, "eval_sts-test_pearson_max": 0.878916157345712, "eval_sts-test_spearman_cosine": 0.8876915367075635, "eval_sts-test_spearman_dot": 0.8608104875327304, "eval_sts-test_spearman_euclidean": 0.8804138856889071, "eval_sts-test_spearman_manhattan": 0.8822803815444743, "eval_sts-test_spearman_max": 0.8876915367075635, "eval_vitaminc-pairs_loss": 2.454524040222168, "eval_vitaminc-pairs_runtime": 1.4583, "eval_vitaminc-pairs_samples_per_second": 74.057, "eval_vitaminc-pairs_steps_per_second": 1.371, "step": 54 }, { "epoch": 0.30337078651685395, "eval_negation-triplets_loss": 1.7277792692184448, "eval_negation-triplets_runtime": 0.3027, "eval_negation-triplets_samples_per_second": 211.436, "eval_negation-triplets_steps_per_second": 3.304, "step": 54 }, { "epoch": 0.30337078651685395, "eval_scitail-pairs-pos_loss": 0.11168555170297623, "eval_scitail-pairs-pos_runtime": 0.3726, "eval_scitail-pairs-pos_samples_per_second": 144.911, "eval_scitail-pairs-pos_steps_per_second": 2.684, "step": 54 }, { "epoch": 0.30337078651685395, "eval_xsum-pairs_loss": 0.10087604075670242, "eval_xsum-pairs_runtime": 3.1701, "eval_xsum-pairs_samples_per_second": 40.377, "eval_xsum-pairs_steps_per_second": 0.631, "step": 54 }, { "epoch": 0.30337078651685395, "eval_sciq_pairs_loss": 0.03466618433594704, "eval_sciq_pairs_runtime": 3.3778, "eval_sciq_pairs_samples_per_second": 37.895, "eval_sciq_pairs_steps_per_second": 0.592, "step": 54 }, { "epoch": 0.30337078651685395, "eval_qasc_pairs_loss": 0.10551701486110687, "eval_qasc_pairs_runtime": 0.6271, "eval_qasc_pairs_samples_per_second": 204.125, "eval_qasc_pairs_steps_per_second": 3.189, "step": 54 }, { "epoch": 0.30337078651685395, "eval_openbookqa_pairs_loss": 0.7239958643913269, "eval_openbookqa_pairs_runtime": 0.5811, "eval_openbookqa_pairs_samples_per_second": 220.255, "eval_openbookqa_pairs_steps_per_second": 3.441, "step": 54 }, { "epoch": 0.30337078651685395, "eval_msmarco_pairs_loss": 0.3808779716491699, "eval_msmarco_pairs_runtime": 1.2919, "eval_msmarco_pairs_samples_per_second": 99.082, "eval_msmarco_pairs_steps_per_second": 1.548, "step": 54 }, { "epoch": 0.30337078651685395, "eval_nq_pairs_loss": 0.44170400500297546, "eval_nq_pairs_runtime": 2.3835, "eval_nq_pairs_samples_per_second": 53.703, "eval_nq_pairs_steps_per_second": 0.839, "step": 54 }, { "epoch": 0.30337078651685395, "eval_trivia_pairs_loss": 0.9158428907394409, "eval_trivia_pairs_runtime": 4.4326, "eval_trivia_pairs_samples_per_second": 28.877, "eval_trivia_pairs_steps_per_second": 0.451, "step": 54 }, { "epoch": 0.30337078651685395, "eval_gooaq_pairs_loss": 0.6208247542381287, "eval_gooaq_pairs_runtime": 0.8797, "eval_gooaq_pairs_samples_per_second": 145.497, "eval_gooaq_pairs_steps_per_second": 2.273, "step": 54 }, { "epoch": 0.30337078651685395, "eval_paws-pos_loss": 0.02517784759402275, "eval_paws-pos_runtime": 0.694, "eval_paws-pos_samples_per_second": 184.442, "eval_paws-pos_steps_per_second": 2.882, "step": 54 }, { "epoch": 0.3202247191011236, "grad_norm": 2.173736572265625, "learning_rate": 2.814814814814815e-05, "loss": 0.6015, "step": 57 }, { "epoch": 0.33707865168539325, "grad_norm": 3.8964712619781494, "learning_rate": 2.962962962962963e-05, "loss": 0.9482, "step": 60 }, { "epoch": 0.3539325842696629, "grad_norm": 2.659498691558838, "learning_rate": 3.111111111111112e-05, "loss": 0.5404, "step": 63 }, { "epoch": 0.3707865168539326, "grad_norm": 3.3499844074249268, "learning_rate": 3.259259259259259e-05, "loss": 0.805, "step": 66 }, { "epoch": 0.38764044943820225, "grad_norm": 3.770142078399658, "learning_rate": 3.4074074074074077e-05, "loss": 0.7184, "step": 69 }, { "epoch": 0.4044943820224719, "grad_norm": 3.740880012512207, "learning_rate": 3.555555555555555e-05, "loss": 0.8708, "step": 72 }, { "epoch": 0.42134831460674155, "grad_norm": 2.981106996536255, "learning_rate": 3.703703703703704e-05, "loss": 0.8327, "step": 75 }, { "epoch": 0.43820224719101125, "grad_norm": 2.3469011783599854, "learning_rate": 3.851851851851852e-05, "loss": 0.5025, "step": 78 }, { "epoch": 0.4550561797752809, "grad_norm": 3.296035051345825, "learning_rate": 4e-05, "loss": 0.6517, "step": 81 }, { "epoch": 0.4550561797752809, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.578125, "eval_VitaminC_cosine_accuracy_threshold": 0.7859437465667725, "eval_VitaminC_cosine_ap": 0.5557444337961499, "eval_VitaminC_cosine_f1": 0.6595174262734584, "eval_VitaminC_cosine_f1_threshold": 0.3211573362350464, "eval_VitaminC_cosine_precision": 0.492, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.578125, "eval_VitaminC_dot_accuracy_threshold": 315.9444580078125, "eval_VitaminC_dot_ap": 0.5539524528858992, "eval_VitaminC_dot_f1": 0.6595174262734584, "eval_VitaminC_dot_f1_threshold": 129.88558959960938, "eval_VitaminC_dot_precision": 0.492, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.58203125, "eval_VitaminC_euclidean_accuracy_threshold": 13.113249778747559, "eval_VitaminC_euclidean_ap": 0.5510190217865811, "eval_VitaminC_euclidean_f1": 0.6577540106951871, "eval_VitaminC_euclidean_f1_threshold": 23.90462303161621, "eval_VitaminC_euclidean_precision": 0.4900398406374502, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.578125, "eval_VitaminC_manhattan_accuracy_threshold": 276.40142822265625, "eval_VitaminC_manhattan_ap": 0.5429240708188645, "eval_VitaminC_manhattan_f1": 0.6576819407008085, "eval_VitaminC_manhattan_f1_threshold": 469.7353515625, "eval_VitaminC_manhattan_precision": 0.49193548387096775, "eval_VitaminC_manhattan_recall": 0.991869918699187, "eval_VitaminC_max_accuracy": 0.58203125, "eval_VitaminC_max_accuracy_threshold": 315.9444580078125, "eval_VitaminC_max_ap": 0.5557444337961499, "eval_VitaminC_max_f1": 0.6595174262734584, "eval_VitaminC_max_f1_threshold": 469.7353515625, "eval_VitaminC_max_precision": 0.492, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5557444337961499, "eval_sts-test_pearson_cosine": 0.8483316632682467, "eval_sts-test_pearson_dot": 0.8392403098680445, "eval_sts-test_pearson_euclidean": 0.8814283057813619, "eval_sts-test_pearson_manhattan": 0.8815226866327923, "eval_sts-test_pearson_max": 0.8815226866327923, "eval_sts-test_spearman_cosine": 0.8903503892346, "eval_sts-test_spearman_dot": 0.857844431199042, "eval_sts-test_spearman_euclidean": 0.8851830636663006, "eval_sts-test_spearman_manhattan": 0.8865568876827619, "eval_sts-test_spearman_max": 0.8903503892346, "eval_vitaminc-pairs_loss": 2.3538782596588135, "eval_vitaminc-pairs_runtime": 1.4618, "eval_vitaminc-pairs_samples_per_second": 73.88, "eval_vitaminc-pairs_steps_per_second": 1.368, "step": 81 }, { "epoch": 0.4550561797752809, "eval_negation-triplets_loss": 1.649215579032898, "eval_negation-triplets_runtime": 0.3081, "eval_negation-triplets_samples_per_second": 207.723, "eval_negation-triplets_steps_per_second": 3.246, "step": 81 }, { "epoch": 0.4550561797752809, "eval_scitail-pairs-pos_loss": 0.11823470890522003, "eval_scitail-pairs-pos_runtime": 0.376, "eval_scitail-pairs-pos_samples_per_second": 143.616, "eval_scitail-pairs-pos_steps_per_second": 2.66, "step": 81 }, { "epoch": 0.4550561797752809, "eval_xsum-pairs_loss": 0.08420603722333908, "eval_xsum-pairs_runtime": 3.1576, "eval_xsum-pairs_samples_per_second": 40.538, "eval_xsum-pairs_steps_per_second": 0.633, "step": 81 }, { "epoch": 0.4550561797752809, "eval_sciq_pairs_loss": 0.034781794995069504, "eval_sciq_pairs_runtime": 3.2597, "eval_sciq_pairs_samples_per_second": 39.267, "eval_sciq_pairs_steps_per_second": 0.614, "step": 81 }, { "epoch": 0.4550561797752809, "eval_qasc_pairs_loss": 0.10597346723079681, "eval_qasc_pairs_runtime": 0.6245, "eval_qasc_pairs_samples_per_second": 204.979, "eval_qasc_pairs_steps_per_second": 3.203, "step": 81 }, { "epoch": 0.4550561797752809, "eval_openbookqa_pairs_loss": 0.7160983681678772, "eval_openbookqa_pairs_runtime": 0.5767, "eval_openbookqa_pairs_samples_per_second": 221.961, "eval_openbookqa_pairs_steps_per_second": 3.468, "step": 81 }, { "epoch": 0.4550561797752809, "eval_msmarco_pairs_loss": 0.3454173803329468, "eval_msmarco_pairs_runtime": 1.2912, "eval_msmarco_pairs_samples_per_second": 99.134, "eval_msmarco_pairs_steps_per_second": 1.549, "step": 81 }, { "epoch": 0.4550561797752809, "eval_nq_pairs_loss": 0.4442503750324249, "eval_nq_pairs_runtime": 2.3854, "eval_nq_pairs_samples_per_second": 53.659, "eval_nq_pairs_steps_per_second": 0.838, "step": 81 }, { "epoch": 0.4550561797752809, "eval_trivia_pairs_loss": 0.9324482679367065, "eval_trivia_pairs_runtime": 4.4251, "eval_trivia_pairs_samples_per_second": 28.926, "eval_trivia_pairs_steps_per_second": 0.452, "step": 81 }, { "epoch": 0.4550561797752809, "eval_gooaq_pairs_loss": 0.6094165444374084, "eval_gooaq_pairs_runtime": 0.8751, "eval_gooaq_pairs_samples_per_second": 146.261, "eval_gooaq_pairs_steps_per_second": 2.285, "step": 81 }, { "epoch": 0.4550561797752809, "eval_paws-pos_loss": 0.024421451613307, "eval_paws-pos_runtime": 0.6865, "eval_paws-pos_samples_per_second": 186.444, "eval_paws-pos_steps_per_second": 2.913, "step": 81 }, { "epoch": 0.47191011235955055, "grad_norm": 3.1395561695098877, "learning_rate": 3.999675367909485e-05, "loss": 0.5801, "step": 84 }, { "epoch": 0.4887640449438202, "grad_norm": 2.7977917194366455, "learning_rate": 3.998701612152597e-05, "loss": 0.791, "step": 87 }, { "epoch": 0.5056179775280899, "grad_norm": 2.3682048320770264, "learning_rate": 3.997079154212493e-05, "loss": 0.6042, "step": 90 }, { "epoch": 0.5224719101123596, "grad_norm": 2.843482255935669, "learning_rate": 3.99480869635839e-05, "loss": 0.7559, "step": 93 }, { "epoch": 0.5393258426966292, "grad_norm": 2.7346785068511963, "learning_rate": 3.9918912213415936e-05, "loss": 0.6258, "step": 96 }, { "epoch": 0.5561797752808989, "grad_norm": 3.149007558822632, "learning_rate": 3.9883279919701226e-05, "loss": 0.8853, "step": 99 }, { "epoch": 0.5730337078651685, "grad_norm": 3.3424761295318604, "learning_rate": 3.9841205505621106e-05, "loss": 0.5947, "step": 102 }, { "epoch": 0.5898876404494382, "grad_norm": 2.6377146244049072, "learning_rate": 3.979270718278224e-05, "loss": 0.644, "step": 105 } ], "logging_steps": 3, "max_steps": 534, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 107, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 320, "trial_name": null, "trial_params": null }