{ "best_metric": 0.7978723404255319, "best_model_checkpoint": "xtreme_s_w2v2_t5lephone-small_minds14.en-US/checkpoint-560", "epoch": 149.95238095238096, "global_step": 750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 2.9999999999999997e-06, "loss": 2.6446, "step": 1 }, { "epoch": 0.38, "learning_rate": 5.999999999999999e-06, "loss": 2.638, "step": 2 }, { "epoch": 0.57, "learning_rate": 8.999999999999999e-06, "loss": 2.6331, "step": 3 }, { "epoch": 0.76, "learning_rate": 1.1999999999999999e-05, "loss": 2.6434, "step": 4 }, { "epoch": 0.95, "learning_rate": 1.4999999999999999e-05, "loss": 2.6546, "step": 5 }, { "epoch": 1.19, "learning_rate": 1.7999999999999997e-05, "loss": 3.3013, "step": 6 }, { "epoch": 1.38, "learning_rate": 2.1e-05, "loss": 2.6376, "step": 7 }, { "epoch": 1.57, "learning_rate": 2.3999999999999997e-05, "loss": 2.6285, "step": 8 }, { "epoch": 1.76, "learning_rate": 2.6999999999999996e-05, "loss": 2.6585, "step": 9 }, { "epoch": 1.95, "learning_rate": 2.9999999999999997e-05, "loss": 2.6296, "step": 10 }, { "epoch": 2.19, "learning_rate": 3.2999999999999996e-05, "loss": 3.2724, "step": 11 }, { "epoch": 2.38, "learning_rate": 3.5999999999999994e-05, "loss": 2.6163, "step": 12 }, { "epoch": 2.57, "learning_rate": 3.9e-05, "loss": 2.6382, "step": 13 }, { "epoch": 2.76, "learning_rate": 4.2e-05, "loss": 2.6799, "step": 14 }, { "epoch": 2.95, "learning_rate": 4.4999999999999996e-05, "loss": 2.6185, "step": 15 }, { "epoch": 3.19, "learning_rate": 4.7999999999999994e-05, "loss": 3.3198, "step": 16 }, { "epoch": 3.38, "learning_rate": 5.1e-05, "loss": 2.6003, "step": 17 }, { "epoch": 3.57, "learning_rate": 5.399999999999999e-05, "loss": 2.6428, "step": 18 }, { "epoch": 3.76, "learning_rate": 5.6999999999999996e-05, "loss": 2.6455, "step": 19 }, { "epoch": 3.95, "learning_rate": 5.6999999999999996e-05, "loss": 2.589, "step": 20 }, { "epoch": 3.95, "eval_accuracy": 0.08156028368794327, "eval_f1": 0.010772833723653397, "eval_loss": 2.6400604248046875, "eval_runtime": 20.6852, "eval_samples_per_second": 13.633, "eval_steps_per_second": 0.87, "step": 20 }, { "epoch": 4.19, "learning_rate": 5.9999999999999995e-05, "loss": 3.2552, "step": 21 }, { "epoch": 4.38, "learning_rate": 6.299999999999999e-05, "loss": 2.623, "step": 22 }, { "epoch": 4.57, "learning_rate": 6.599999999999999e-05, "loss": 2.6142, "step": 23 }, { "epoch": 4.76, "learning_rate": 6.9e-05, "loss": 2.6147, "step": 24 }, { "epoch": 4.95, "learning_rate": 7.199999999999999e-05, "loss": 2.6758, "step": 25 }, { "epoch": 5.19, "learning_rate": 7.5e-05, "loss": 3.2524, "step": 26 }, { "epoch": 5.38, "learning_rate": 7.8e-05, "loss": 2.6057, "step": 27 }, { "epoch": 5.57, "learning_rate": 8.1e-05, "loss": 2.6022, "step": 28 }, { "epoch": 5.76, "learning_rate": 8.4e-05, "loss": 2.5923, "step": 29 }, { "epoch": 5.95, "learning_rate": 8.4e-05, "loss": 2.585, "step": 30 }, { "epoch": 6.19, "learning_rate": 8.699999999999999e-05, "loss": 3.2676, "step": 31 }, { "epoch": 6.38, "learning_rate": 8.699999999999999e-05, "loss": 2.5804, "step": 32 }, { "epoch": 6.57, "learning_rate": 8.999999999999999e-05, "loss": 2.5847, "step": 33 }, { "epoch": 6.76, "learning_rate": 9.3e-05, "loss": 2.5798, "step": 34 }, { "epoch": 6.95, "learning_rate": 9.599999999999999e-05, "loss": 2.5483, "step": 35 }, { "epoch": 7.19, "learning_rate": 9.9e-05, "loss": 3.1732, "step": 36 }, { "epoch": 7.38, "learning_rate": 0.000102, "loss": 2.5273, "step": 37 }, { "epoch": 7.57, "learning_rate": 0.00010499999999999999, "loss": 2.6066, "step": 38 }, { "epoch": 7.76, "learning_rate": 0.00010799999999999998, "loss": 2.5901, "step": 39 }, { "epoch": 7.95, "learning_rate": 0.00010799999999999998, "loss": 2.5223, "step": 40 }, { "epoch": 7.95, "eval_accuracy": 0.08156028368794327, "eval_f1": 0.03386127045143884, "eval_loss": 2.6493048667907715, "eval_runtime": 19.979, "eval_samples_per_second": 14.115, "eval_steps_per_second": 0.901, "step": 40 }, { "epoch": 8.19, "learning_rate": 0.00011099999999999999, "loss": 3.1703, "step": 41 }, { "epoch": 8.38, "learning_rate": 0.00011399999999999999, "loss": 2.5396, "step": 42 }, { "epoch": 8.57, "learning_rate": 0.000117, "loss": 2.5271, "step": 43 }, { "epoch": 8.76, "learning_rate": 0.00011999999999999999, "loss": 2.491, "step": 44 }, { "epoch": 8.95, "learning_rate": 0.00012299999999999998, "loss": 2.5793, "step": 45 }, { "epoch": 9.19, "learning_rate": 0.00012599999999999997, "loss": 3.1367, "step": 46 }, { "epoch": 9.38, "learning_rate": 0.000129, "loss": 2.4544, "step": 47 }, { "epoch": 9.57, "learning_rate": 0.00013199999999999998, "loss": 2.5115, "step": 48 }, { "epoch": 9.76, "learning_rate": 0.000135, "loss": 2.5844, "step": 49 }, { "epoch": 9.95, "learning_rate": 0.000138, "loss": 2.4266, "step": 50 }, { "epoch": 10.19, "learning_rate": 0.00014099999999999998, "loss": 3.1072, "step": 51 }, { "epoch": 10.38, "learning_rate": 0.00014399999999999998, "loss": 2.5217, "step": 52 }, { "epoch": 10.57, "learning_rate": 0.000147, "loss": 2.4175, "step": 53 }, { "epoch": 10.76, "learning_rate": 0.00015, "loss": 2.5184, "step": 54 }, { "epoch": 10.95, "learning_rate": 0.00015299999999999998, "loss": 2.4507, "step": 55 }, { "epoch": 11.19, "learning_rate": 0.000156, "loss": 3.0183, "step": 56 }, { "epoch": 11.38, "learning_rate": 0.000159, "loss": 2.3708, "step": 57 }, { "epoch": 11.57, "learning_rate": 0.000162, "loss": 2.4474, "step": 58 }, { "epoch": 11.76, "learning_rate": 0.000165, "loss": 2.434, "step": 59 }, { "epoch": 11.95, "learning_rate": 0.000168, "loss": 2.5085, "step": 60 }, { "epoch": 11.95, "eval_accuracy": 0.10283687943262411, "eval_f1": 0.053938617485886006, "eval_loss": 2.6236460208892822, "eval_runtime": 19.9705, "eval_samples_per_second": 14.121, "eval_steps_per_second": 0.901, "step": 60 }, { "epoch": 12.19, "learning_rate": 0.00017099999999999998, "loss": 3.0425, "step": 61 }, { "epoch": 12.38, "learning_rate": 0.00017399999999999997, "loss": 2.4193, "step": 62 }, { "epoch": 12.57, "learning_rate": 0.00017699999999999997, "loss": 2.4185, "step": 63 }, { "epoch": 12.76, "learning_rate": 0.00017999999999999998, "loss": 2.3635, "step": 64 }, { "epoch": 12.95, "learning_rate": 0.00018299999999999998, "loss": 2.4296, "step": 65 }, { "epoch": 13.19, "learning_rate": 0.000186, "loss": 3.1505, "step": 66 }, { "epoch": 13.38, "learning_rate": 0.00018899999999999999, "loss": 2.4116, "step": 67 }, { "epoch": 13.57, "learning_rate": 0.00019199999999999998, "loss": 2.3003, "step": 68 }, { "epoch": 13.76, "learning_rate": 0.000195, "loss": 2.2922, "step": 69 }, { "epoch": 13.95, "learning_rate": 0.000198, "loss": 2.3293, "step": 70 }, { "epoch": 14.19, "learning_rate": 0.000201, "loss": 2.6521, "step": 71 }, { "epoch": 14.38, "learning_rate": 0.000204, "loss": 2.239, "step": 72 }, { "epoch": 14.57, "learning_rate": 0.00020699999999999996, "loss": 2.155, "step": 73 }, { "epoch": 14.76, "learning_rate": 0.00020999999999999998, "loss": 2.1629, "step": 74 }, { "epoch": 14.95, "learning_rate": 0.00021299999999999997, "loss": 2.206, "step": 75 }, { "epoch": 15.19, "learning_rate": 0.00021599999999999996, "loss": 2.773, "step": 76 }, { "epoch": 15.38, "learning_rate": 0.00021899999999999998, "loss": 1.9279, "step": 77 }, { "epoch": 15.57, "learning_rate": 0.00022199999999999998, "loss": 2.0716, "step": 78 }, { "epoch": 15.76, "learning_rate": 0.000225, "loss": 2.0931, "step": 79 }, { "epoch": 15.95, "learning_rate": 0.00022799999999999999, "loss": 2.1252, "step": 80 }, { "epoch": 15.95, "eval_accuracy": 0.16666666666666666, "eval_f1": 0.1457801226478514, "eval_loss": 2.500581741333008, "eval_runtime": 19.8541, "eval_samples_per_second": 14.204, "eval_steps_per_second": 0.907, "step": 80 }, { "epoch": 16.19, "learning_rate": 0.00023099999999999998, "loss": 2.3222, "step": 81 }, { "epoch": 16.38, "learning_rate": 0.000234, "loss": 1.8863, "step": 82 }, { "epoch": 16.57, "learning_rate": 0.000237, "loss": 1.9207, "step": 83 }, { "epoch": 16.76, "learning_rate": 0.00023999999999999998, "loss": 1.7601, "step": 84 }, { "epoch": 16.95, "learning_rate": 0.000243, "loss": 1.848, "step": 85 }, { "epoch": 17.19, "learning_rate": 0.00024599999999999996, "loss": 2.0474, "step": 86 }, { "epoch": 17.38, "learning_rate": 0.000249, "loss": 1.6883, "step": 87 }, { "epoch": 17.57, "learning_rate": 0.00025199999999999995, "loss": 1.7101, "step": 88 }, { "epoch": 17.76, "learning_rate": 0.00025499999999999996, "loss": 1.7042, "step": 89 }, { "epoch": 17.95, "learning_rate": 0.000258, "loss": 1.459, "step": 90 }, { "epoch": 18.19, "learning_rate": 0.000261, "loss": 1.7655, "step": 91 }, { "epoch": 18.38, "learning_rate": 0.00026399999999999997, "loss": 1.6881, "step": 92 }, { "epoch": 18.57, "learning_rate": 0.000267, "loss": 1.4786, "step": 93 }, { "epoch": 18.76, "learning_rate": 0.00027, "loss": 1.5071, "step": 94 }, { "epoch": 18.95, "learning_rate": 0.00027299999999999997, "loss": 1.5058, "step": 95 }, { "epoch": 19.19, "learning_rate": 0.000276, "loss": 1.5096, "step": 96 }, { "epoch": 19.38, "learning_rate": 0.000279, "loss": 1.4937, "step": 97 }, { "epoch": 19.57, "learning_rate": 0.00028199999999999997, "loss": 1.4655, "step": 98 }, { "epoch": 19.76, "learning_rate": 0.000285, "loss": 1.4129, "step": 99 }, { "epoch": 19.95, "learning_rate": 0.00028799999999999995, "loss": 1.3711, "step": 100 }, { "epoch": 19.95, "eval_accuracy": 0.28368794326241137, "eval_f1": 0.2343690351119023, "eval_loss": 2.2712228298187256, "eval_runtime": 20.0273, "eval_samples_per_second": 14.081, "eval_steps_per_second": 0.899, "step": 100 }, { "epoch": 20.19, "learning_rate": 0.00029099999999999997, "loss": 1.8385, "step": 101 }, { "epoch": 20.38, "learning_rate": 0.000294, "loss": 1.4425, "step": 102 }, { "epoch": 20.57, "learning_rate": 0.00029699999999999996, "loss": 1.2965, "step": 103 }, { "epoch": 20.76, "learning_rate": 0.0003, "loss": 1.1531, "step": 104 }, { "epoch": 20.95, "learning_rate": 0.00029953846153846155, "loss": 0.9991, "step": 105 }, { "epoch": 21.19, "learning_rate": 0.00029907692307692307, "loss": 1.7072, "step": 106 }, { "epoch": 21.38, "learning_rate": 0.0002986153846153846, "loss": 1.057, "step": 107 }, { "epoch": 21.57, "learning_rate": 0.0002981538461538461, "loss": 1.1977, "step": 108 }, { "epoch": 21.76, "learning_rate": 0.0002976923076923077, "loss": 1.3346, "step": 109 }, { "epoch": 21.95, "learning_rate": 0.0002972307692307692, "loss": 1.0087, "step": 110 }, { "epoch": 22.19, "learning_rate": 0.00029676923076923077, "loss": 1.0643, "step": 111 }, { "epoch": 22.38, "learning_rate": 0.0002963076923076923, "loss": 0.976, "step": 112 }, { "epoch": 22.57, "learning_rate": 0.0002958461538461538, "loss": 1.0169, "step": 113 }, { "epoch": 22.76, "learning_rate": 0.0002953846153846154, "loss": 0.9714, "step": 114 }, { "epoch": 22.95, "learning_rate": 0.0002949230769230769, "loss": 0.9259, "step": 115 }, { "epoch": 23.19, "learning_rate": 0.00029446153846153847, "loss": 1.1425, "step": 116 }, { "epoch": 23.38, "learning_rate": 0.000294, "loss": 0.7541, "step": 117 }, { "epoch": 23.57, "learning_rate": 0.0002935384615384615, "loss": 0.9185, "step": 118 }, { "epoch": 23.76, "learning_rate": 0.00029307692307692303, "loss": 0.9087, "step": 119 }, { "epoch": 23.95, "learning_rate": 0.0002926153846153846, "loss": 1.5092, "step": 120 }, { "epoch": 23.95, "eval_accuracy": 0.39361702127659576, "eval_f1": 0.3630891177172831, "eval_loss": 2.059882164001465, "eval_runtime": 20.0966, "eval_samples_per_second": 14.032, "eval_steps_per_second": 0.896, "step": 120 }, { "epoch": 24.19, "learning_rate": 0.0002921538461538461, "loss": 1.1087, "step": 121 }, { "epoch": 24.38, "learning_rate": 0.0002916923076923077, "loss": 0.7721, "step": 122 }, { "epoch": 24.57, "learning_rate": 0.0002912307692307692, "loss": 0.7697, "step": 123 }, { "epoch": 24.76, "learning_rate": 0.00029076923076923073, "loss": 0.7947, "step": 124 }, { "epoch": 24.95, "learning_rate": 0.0002903076923076923, "loss": 0.6434, "step": 125 }, { "epoch": 25.19, "learning_rate": 0.0002898461538461538, "loss": 1.0976, "step": 126 }, { "epoch": 25.38, "learning_rate": 0.0002893846153846154, "loss": 1.1569, "step": 127 }, { "epoch": 25.57, "learning_rate": 0.0002889230769230769, "loss": 0.9648, "step": 128 }, { "epoch": 25.76, "learning_rate": 0.00028846153846153843, "loss": 0.5878, "step": 129 }, { "epoch": 25.95, "learning_rate": 0.00028799999999999995, "loss": 0.6146, "step": 130 }, { "epoch": 26.19, "learning_rate": 0.0002875384615384615, "loss": 0.6901, "step": 131 }, { "epoch": 26.38, "learning_rate": 0.00028707692307692305, "loss": 0.6625, "step": 132 }, { "epoch": 26.57, "learning_rate": 0.0002866153846153846, "loss": 0.6783, "step": 133 }, { "epoch": 26.76, "learning_rate": 0.00028615384615384614, "loss": 0.6966, "step": 134 }, { "epoch": 26.95, "learning_rate": 0.00028569230769230766, "loss": 0.7059, "step": 135 }, { "epoch": 27.19, "learning_rate": 0.00028523076923076923, "loss": 0.6308, "step": 136 }, { "epoch": 27.38, "learning_rate": 0.00028476923076923075, "loss": 0.5333, "step": 137 }, { "epoch": 27.57, "learning_rate": 0.00028430769230769227, "loss": 0.9039, "step": 138 }, { "epoch": 27.76, "learning_rate": 0.0002838461538461538, "loss": 0.4599, "step": 139 }, { "epoch": 27.95, "learning_rate": 0.00028338461538461536, "loss": 0.4962, "step": 140 }, { "epoch": 27.95, "eval_accuracy": 0.48936170212765956, "eval_f1": 0.488144790245034, "eval_loss": 1.8474787473678589, "eval_runtime": 19.8169, "eval_samples_per_second": 14.23, "eval_steps_per_second": 0.908, "step": 140 }, { "epoch": 28.19, "learning_rate": 0.0002829230769230769, "loss": 0.6536, "step": 141 }, { "epoch": 28.38, "learning_rate": 0.00028246153846153845, "loss": 0.4908, "step": 142 }, { "epoch": 28.57, "learning_rate": 0.00028199999999999997, "loss": 0.4469, "step": 143 }, { "epoch": 28.76, "learning_rate": 0.0002815384615384615, "loss": 0.7811, "step": 144 }, { "epoch": 28.95, "learning_rate": 0.00028107692307692306, "loss": 0.3092, "step": 145 }, { "epoch": 29.19, "learning_rate": 0.0002806153846153846, "loss": 0.479, "step": 146 }, { "epoch": 29.38, "learning_rate": 0.00028015384615384615, "loss": 0.6959, "step": 147 }, { "epoch": 29.57, "learning_rate": 0.00027969230769230767, "loss": 0.4072, "step": 148 }, { "epoch": 29.76, "learning_rate": 0.0002792307692307692, "loss": 0.5718, "step": 149 }, { "epoch": 29.95, "learning_rate": 0.0002787692307692307, "loss": 0.4537, "step": 150 }, { "epoch": 30.19, "learning_rate": 0.0002783076923076923, "loss": 0.7326, "step": 151 }, { "epoch": 30.38, "learning_rate": 0.0002778461538461538, "loss": 0.3294, "step": 152 }, { "epoch": 30.57, "learning_rate": 0.0002773846153846154, "loss": 0.6189, "step": 153 }, { "epoch": 30.76, "learning_rate": 0.0002769230769230769, "loss": 0.4525, "step": 154 }, { "epoch": 30.95, "learning_rate": 0.0002764615384615384, "loss": 0.3617, "step": 155 }, { "epoch": 31.19, "learning_rate": 0.000276, "loss": 0.4524, "step": 156 }, { "epoch": 31.38, "learning_rate": 0.0002755384615384615, "loss": 0.3273, "step": 157 }, { "epoch": 31.57, "learning_rate": 0.0002750769230769231, "loss": 0.3527, "step": 158 }, { "epoch": 31.76, "learning_rate": 0.0002746153846153846, "loss": 0.2702, "step": 159 }, { "epoch": 31.95, "learning_rate": 0.0002741538461538461, "loss": 0.4169, "step": 160 }, { "epoch": 31.95, "eval_accuracy": 0.5141843971631206, "eval_f1": 0.5357717839214351, "eval_loss": 1.8262206315994263, "eval_runtime": 19.8388, "eval_samples_per_second": 14.215, "eval_steps_per_second": 0.907, "step": 160 }, { "epoch": 32.19, "learning_rate": 0.00027369230769230764, "loss": 0.6815, "step": 161 }, { "epoch": 32.38, "learning_rate": 0.0002732307692307692, "loss": 0.3709, "step": 162 }, { "epoch": 32.57, "learning_rate": 0.00027276923076923073, "loss": 0.3857, "step": 163 }, { "epoch": 32.76, "learning_rate": 0.0002723076923076923, "loss": 0.2424, "step": 164 }, { "epoch": 32.95, "learning_rate": 0.0002718461538461538, "loss": 0.3207, "step": 165 }, { "epoch": 33.19, "learning_rate": 0.00027138461538461534, "loss": 0.3938, "step": 166 }, { "epoch": 33.38, "learning_rate": 0.0002709230769230769, "loss": 0.4451, "step": 167 }, { "epoch": 33.57, "learning_rate": 0.00027046153846153843, "loss": 0.3841, "step": 168 }, { "epoch": 33.76, "learning_rate": 0.00027, "loss": 0.3128, "step": 169 }, { "epoch": 33.95, "learning_rate": 0.0002695384615384615, "loss": 0.261, "step": 170 }, { "epoch": 34.19, "learning_rate": 0.00026907692307692304, "loss": 0.296, "step": 171 }, { "epoch": 34.38, "learning_rate": 0.00026861538461538456, "loss": 0.3021, "step": 172 }, { "epoch": 34.57, "learning_rate": 0.00026815384615384613, "loss": 0.138, "step": 173 }, { "epoch": 34.76, "learning_rate": 0.0002676923076923077, "loss": 0.1447, "step": 174 }, { "epoch": 34.95, "learning_rate": 0.0002672307692307692, "loss": 0.3438, "step": 175 }, { "epoch": 35.19, "learning_rate": 0.00026676923076923074, "loss": 0.1804, "step": 176 }, { "epoch": 35.38, "learning_rate": 0.00026630769230769226, "loss": 0.2828, "step": 177 }, { "epoch": 35.57, "learning_rate": 0.00026584615384615384, "loss": 0.1647, "step": 178 }, { "epoch": 35.76, "learning_rate": 0.00026538461538461536, "loss": 0.1497, "step": 179 }, { "epoch": 35.95, "learning_rate": 0.00026492307692307693, "loss": 0.1579, "step": 180 }, { "epoch": 35.95, "eval_accuracy": 0.6028368794326241, "eval_f1": 0.5966633522864526, "eval_loss": 1.6481057405471802, "eval_runtime": 19.8395, "eval_samples_per_second": 14.214, "eval_steps_per_second": 0.907, "step": 180 }, { "epoch": 36.19, "learning_rate": 0.00026446153846153845, "loss": 0.4555, "step": 181 }, { "epoch": 36.38, "learning_rate": 0.00026399999999999997, "loss": 0.1293, "step": 182 }, { "epoch": 36.57, "learning_rate": 0.00026353846153846154, "loss": 0.1339, "step": 183 }, { "epoch": 36.76, "learning_rate": 0.00026307692307692306, "loss": 0.129, "step": 184 }, { "epoch": 36.95, "learning_rate": 0.00026261538461538463, "loss": 0.3687, "step": 185 }, { "epoch": 37.19, "learning_rate": 0.00026215384615384615, "loss": 0.2163, "step": 186 }, { "epoch": 37.38, "learning_rate": 0.00026169230769230767, "loss": 0.24, "step": 187 }, { "epoch": 37.57, "learning_rate": 0.0002612307692307692, "loss": 0.1009, "step": 188 }, { "epoch": 37.76, "learning_rate": 0.00026076923076923076, "loss": 0.2447, "step": 189 }, { "epoch": 37.95, "learning_rate": 0.0002603076923076923, "loss": 0.3268, "step": 190 }, { "epoch": 38.19, "learning_rate": 0.00025984615384615385, "loss": 0.2535, "step": 191 }, { "epoch": 38.38, "learning_rate": 0.00025938461538461537, "loss": 0.0901, "step": 192 }, { "epoch": 38.57, "learning_rate": 0.0002589230769230769, "loss": 0.1204, "step": 193 }, { "epoch": 38.76, "learning_rate": 0.00025846153846153846, "loss": 0.4441, "step": 194 }, { "epoch": 38.95, "learning_rate": 0.000258, "loss": 0.5799, "step": 195 }, { "epoch": 39.19, "learning_rate": 0.0002575384615384615, "loss": 0.3594, "step": 196 }, { "epoch": 39.38, "learning_rate": 0.000257076923076923, "loss": 0.3127, "step": 197 }, { "epoch": 39.57, "learning_rate": 0.0002566153846153846, "loss": 0.1884, "step": 198 }, { "epoch": 39.76, "learning_rate": 0.0002561538461538461, "loss": 0.3896, "step": 199 }, { "epoch": 39.95, "learning_rate": 0.0002556923076923077, "loss": 0.0927, "step": 200 }, { "epoch": 39.95, "eval_accuracy": 0.6560283687943262, "eval_f1": 0.6748379562199183, "eval_loss": 1.4469894170761108, "eval_runtime": 19.808, "eval_samples_per_second": 14.237, "eval_steps_per_second": 0.909, "step": 200 }, { "epoch": 40.19, "learning_rate": 0.0002552307692307692, "loss": 0.2098, "step": 201 }, { "epoch": 40.38, "learning_rate": 0.0002547692307692307, "loss": 0.2339, "step": 202 }, { "epoch": 40.57, "learning_rate": 0.0002543076923076923, "loss": 0.3805, "step": 203 }, { "epoch": 40.76, "learning_rate": 0.0002538461538461538, "loss": 0.3653, "step": 204 }, { "epoch": 40.95, "learning_rate": 0.0002533846153846154, "loss": 0.3083, "step": 205 }, { "epoch": 41.19, "learning_rate": 0.0002529230769230769, "loss": 0.2298, "step": 206 }, { "epoch": 41.38, "learning_rate": 0.0002524615384615384, "loss": 0.2759, "step": 207 }, { "epoch": 41.57, "learning_rate": 0.00025199999999999995, "loss": 0.2716, "step": 208 }, { "epoch": 41.76, "learning_rate": 0.0002515384615384615, "loss": 0.0739, "step": 209 }, { "epoch": 41.95, "learning_rate": 0.00025107692307692304, "loss": 0.389, "step": 210 }, { "epoch": 42.19, "learning_rate": 0.0002506153846153846, "loss": 0.3143, "step": 211 }, { "epoch": 42.38, "learning_rate": 0.00025015384615384613, "loss": 0.6487, "step": 212 }, { "epoch": 42.57, "learning_rate": 0.00024969230769230765, "loss": 0.3041, "step": 213 }, { "epoch": 42.76, "learning_rate": 0.0002492307692307692, "loss": 0.0614, "step": 214 }, { "epoch": 42.95, "learning_rate": 0.00024876923076923074, "loss": 0.3855, "step": 215 }, { "epoch": 43.19, "learning_rate": 0.0002483076923076923, "loss": 0.2741, "step": 216 }, { "epoch": 43.38, "learning_rate": 0.00024784615384615383, "loss": 0.3597, "step": 217 }, { "epoch": 43.57, "learning_rate": 0.00024738461538461535, "loss": 0.084, "step": 218 }, { "epoch": 43.76, "learning_rate": 0.00024692307692307687, "loss": 0.064, "step": 219 }, { "epoch": 43.95, "learning_rate": 0.00024646153846153844, "loss": 0.1363, "step": 220 }, { "epoch": 43.95, "eval_accuracy": 0.6879432624113475, "eval_f1": 0.6835880422352972, "eval_loss": 1.272509217262268, "eval_runtime": 19.828, "eval_samples_per_second": 14.222, "eval_steps_per_second": 0.908, "step": 220 }, { "epoch": 44.19, "learning_rate": 0.00024599999999999996, "loss": 0.1101, "step": 221 }, { "epoch": 44.38, "learning_rate": 0.00024553846153846154, "loss": 0.2001, "step": 222 }, { "epoch": 44.57, "learning_rate": 0.00024507692307692305, "loss": 0.0606, "step": 223 }, { "epoch": 44.76, "learning_rate": 0.0002446153846153846, "loss": 0.0578, "step": 224 }, { "epoch": 44.95, "learning_rate": 0.00024415384615384615, "loss": 0.0547, "step": 225 }, { "epoch": 45.19, "learning_rate": 0.00024369230769230767, "loss": 0.37, "step": 226 }, { "epoch": 45.38, "learning_rate": 0.0002432307692307692, "loss": 0.2059, "step": 227 }, { "epoch": 45.57, "learning_rate": 0.00024276923076923073, "loss": 0.0543, "step": 228 }, { "epoch": 45.76, "learning_rate": 0.0002423076923076923, "loss": 0.0749, "step": 229 }, { "epoch": 45.95, "learning_rate": 0.00024184615384615382, "loss": 0.1723, "step": 230 }, { "epoch": 46.19, "learning_rate": 0.00024138461538461537, "loss": 0.062, "step": 231 }, { "epoch": 46.38, "learning_rate": 0.0002409230769230769, "loss": 0.0449, "step": 232 }, { "epoch": 46.57, "learning_rate": 0.00024046153846153843, "loss": 0.0614, "step": 233 }, { "epoch": 46.76, "learning_rate": 0.00023999999999999998, "loss": 0.0559, "step": 234 }, { "epoch": 46.95, "learning_rate": 0.00023953846153846152, "loss": 0.0845, "step": 235 }, { "epoch": 47.19, "learning_rate": 0.00023907692307692307, "loss": 0.0529, "step": 236 }, { "epoch": 47.38, "learning_rate": 0.0002386153846153846, "loss": 0.3006, "step": 237 }, { "epoch": 47.57, "learning_rate": 0.00023815384615384614, "loss": 0.0423, "step": 238 }, { "epoch": 47.76, "learning_rate": 0.00023769230769230765, "loss": 0.2572, "step": 239 }, { "epoch": 47.95, "learning_rate": 0.0002372307692307692, "loss": 0.1324, "step": 240 }, { "epoch": 47.95, "eval_accuracy": 0.6702127659574468, "eval_f1": 0.6652893100108913, "eval_loss": 1.433002233505249, "eval_runtime": 19.9069, "eval_samples_per_second": 14.166, "eval_steps_per_second": 0.904, "step": 240 }, { "epoch": 48.19, "learning_rate": 0.00023676923076923075, "loss": 0.3214, "step": 241 }, { "epoch": 48.38, "learning_rate": 0.0002363076923076923, "loss": 0.0891, "step": 242 }, { "epoch": 48.57, "learning_rate": 0.00023584615384615384, "loss": 0.036, "step": 243 }, { "epoch": 48.76, "learning_rate": 0.00023538461538461536, "loss": 0.0393, "step": 244 }, { "epoch": 48.95, "learning_rate": 0.0002349230769230769, "loss": 0.1088, "step": 245 }, { "epoch": 49.19, "learning_rate": 0.00023446153846153842, "loss": 0.1833, "step": 246 }, { "epoch": 49.38, "learning_rate": 0.000234, "loss": 0.1241, "step": 247 }, { "epoch": 49.57, "learning_rate": 0.00023353846153846151, "loss": 0.0358, "step": 248 }, { "epoch": 49.76, "learning_rate": 0.00023307692307692306, "loss": 0.1539, "step": 249 }, { "epoch": 49.95, "learning_rate": 0.00023261538461538458, "loss": 0.0356, "step": 250 }, { "epoch": 50.19, "learning_rate": 0.00023215384615384613, "loss": 0.0602, "step": 251 }, { "epoch": 50.38, "learning_rate": 0.0002316923076923077, "loss": 0.1208, "step": 252 }, { "epoch": 50.57, "learning_rate": 0.00023123076923076922, "loss": 0.3355, "step": 253 }, { "epoch": 50.76, "learning_rate": 0.00023076923076923076, "loss": 0.0316, "step": 254 }, { "epoch": 50.95, "learning_rate": 0.00023030769230769228, "loss": 0.0541, "step": 255 }, { "epoch": 51.19, "learning_rate": 0.00022984615384615383, "loss": 0.0597, "step": 256 }, { "epoch": 51.38, "learning_rate": 0.00022938461538461535, "loss": 0.0593, "step": 257 }, { "epoch": 51.57, "learning_rate": 0.00022892307692307692, "loss": 0.0337, "step": 258 }, { "epoch": 51.76, "learning_rate": 0.00022846153846153844, "loss": 0.0286, "step": 259 }, { "epoch": 51.95, "learning_rate": 0.00022799999999999999, "loss": 0.0294, "step": 260 }, { "epoch": 51.95, "eval_accuracy": 0.7163120567375887, "eval_f1": 0.7079282129386951, "eval_loss": 1.297837734222412, "eval_runtime": 19.8116, "eval_samples_per_second": 14.234, "eval_steps_per_second": 0.909, "step": 260 }, { "epoch": 52.19, "learning_rate": 0.0002275384615384615, "loss": 0.0523, "step": 261 }, { "epoch": 52.38, "learning_rate": 0.00022707692307692305, "loss": 0.221, "step": 262 }, { "epoch": 52.57, "learning_rate": 0.0002266153846153846, "loss": 0.1214, "step": 263 }, { "epoch": 52.76, "learning_rate": 0.00022615384615384614, "loss": 0.3424, "step": 264 }, { "epoch": 52.95, "learning_rate": 0.0002256923076923077, "loss": 0.0862, "step": 265 }, { "epoch": 53.19, "learning_rate": 0.0002252307692307692, "loss": 0.0643, "step": 266 }, { "epoch": 53.38, "learning_rate": 0.00022476923076923075, "loss": 0.0275, "step": 267 }, { "epoch": 53.57, "learning_rate": 0.00022430769230769227, "loss": 0.2242, "step": 268 }, { "epoch": 53.76, "learning_rate": 0.00022384615384615382, "loss": 0.0681, "step": 269 }, { "epoch": 53.95, "learning_rate": 0.00022338461538461536, "loss": 0.3556, "step": 270 }, { "epoch": 54.19, "learning_rate": 0.0002229230769230769, "loss": 0.2421, "step": 271 }, { "epoch": 54.38, "learning_rate": 0.00022246153846153846, "loss": 0.1901, "step": 272 }, { "epoch": 54.57, "learning_rate": 0.00022199999999999998, "loss": 0.031, "step": 273 }, { "epoch": 54.76, "learning_rate": 0.00022153846153846152, "loss": 0.0291, "step": 274 }, { "epoch": 54.95, "learning_rate": 0.00022107692307692304, "loss": 0.279, "step": 275 }, { "epoch": 55.19, "learning_rate": 0.0002206153846153846, "loss": 0.0335, "step": 276 }, { "epoch": 55.38, "learning_rate": 0.00022015384615384613, "loss": 0.3136, "step": 277 }, { "epoch": 55.57, "learning_rate": 0.00021969230769230768, "loss": 0.1364, "step": 278 }, { "epoch": 55.76, "learning_rate": 0.0002192307692307692, "loss": 0.0385, "step": 279 }, { "epoch": 55.95, "learning_rate": 0.00021876923076923074, "loss": 0.0326, "step": 280 }, { "epoch": 55.95, "eval_accuracy": 0.6879432624113475, "eval_f1": 0.6823408219987953, "eval_loss": 1.386937141418457, "eval_runtime": 19.8275, "eval_samples_per_second": 14.223, "eval_steps_per_second": 0.908, "step": 280 }, { "epoch": 56.19, "learning_rate": 0.00021830769230769226, "loss": 0.068, "step": 281 }, { "epoch": 56.38, "learning_rate": 0.00021784615384615383, "loss": 0.0258, "step": 282 }, { "epoch": 56.57, "learning_rate": 0.00021738461538461538, "loss": 0.0254, "step": 283 }, { "epoch": 56.76, "learning_rate": 0.0002169230769230769, "loss": 0.0365, "step": 284 }, { "epoch": 56.95, "learning_rate": 0.00021646153846153845, "loss": 0.0257, "step": 285 }, { "epoch": 57.19, "learning_rate": 0.00021599999999999996, "loss": 0.2682, "step": 286 }, { "epoch": 57.38, "learning_rate": 0.00021553846153846154, "loss": 0.0247, "step": 287 }, { "epoch": 57.57, "learning_rate": 0.00021507692307692306, "loss": 0.026, "step": 288 }, { "epoch": 57.76, "learning_rate": 0.0002146153846153846, "loss": 0.0264, "step": 289 }, { "epoch": 57.95, "learning_rate": 0.00021415384615384612, "loss": 0.0299, "step": 290 }, { "epoch": 58.19, "learning_rate": 0.00021369230769230767, "loss": 0.1304, "step": 291 }, { "epoch": 58.38, "learning_rate": 0.00021323076923076921, "loss": 0.0216, "step": 292 }, { "epoch": 58.57, "learning_rate": 0.00021276923076923076, "loss": 0.0417, "step": 293 }, { "epoch": 58.76, "learning_rate": 0.0002123076923076923, "loss": 0.034, "step": 294 }, { "epoch": 58.95, "learning_rate": 0.00021184615384615382, "loss": 0.0213, "step": 295 }, { "epoch": 59.19, "learning_rate": 0.00021138461538461537, "loss": 0.026, "step": 296 }, { "epoch": 59.38, "learning_rate": 0.0002109230769230769, "loss": 0.223, "step": 297 }, { "epoch": 59.57, "learning_rate": 0.00021046153846153844, "loss": 0.0356, "step": 298 }, { "epoch": 59.76, "learning_rate": 0.00020999999999999998, "loss": 0.0222, "step": 299 }, { "epoch": 59.95, "learning_rate": 0.00020953846153846153, "loss": 0.0444, "step": 300 }, { "epoch": 59.95, "eval_accuracy": 0.6985815602836879, "eval_f1": 0.7051247082537795, "eval_loss": 1.5764325857162476, "eval_runtime": 19.864, "eval_samples_per_second": 14.197, "eval_steps_per_second": 0.906, "step": 300 }, { "epoch": 60.19, "learning_rate": 0.00020907692307692307, "loss": 0.1866, "step": 301 }, { "epoch": 60.38, "learning_rate": 0.0002086153846153846, "loss": 0.1206, "step": 302 }, { "epoch": 60.57, "learning_rate": 0.00020815384615384614, "loss": 0.0799, "step": 303 }, { "epoch": 60.76, "learning_rate": 0.00020769230769230766, "loss": 0.0215, "step": 304 }, { "epoch": 60.95, "learning_rate": 0.00020723076923076923, "loss": 0.0208, "step": 305 }, { "epoch": 61.19, "learning_rate": 0.00020676923076923075, "loss": 0.2188, "step": 306 }, { "epoch": 61.38, "learning_rate": 0.0002063076923076923, "loss": 0.3451, "step": 307 }, { "epoch": 61.57, "learning_rate": 0.00020584615384615381, "loss": 0.0308, "step": 308 }, { "epoch": 61.76, "learning_rate": 0.00020538461538461536, "loss": 0.034, "step": 309 }, { "epoch": 61.95, "learning_rate": 0.00020492307692307688, "loss": 0.1951, "step": 310 }, { "epoch": 62.19, "learning_rate": 0.00020446153846153845, "loss": 0.0278, "step": 311 }, { "epoch": 62.38, "learning_rate": 0.000204, "loss": 0.5601, "step": 312 }, { "epoch": 62.57, "learning_rate": 0.00020353846153846152, "loss": 0.4956, "step": 313 }, { "epoch": 62.76, "learning_rate": 0.00020307692307692306, "loss": 0.0908, "step": 314 }, { "epoch": 62.95, "learning_rate": 0.00020261538461538458, "loss": 0.6746, "step": 315 }, { "epoch": 63.19, "learning_rate": 0.00020215384615384616, "loss": 0.0353, "step": 316 }, { "epoch": 63.38, "learning_rate": 0.00020169230769230767, "loss": 0.0237, "step": 317 }, { "epoch": 63.57, "learning_rate": 0.00020123076923076922, "loss": 0.0209, "step": 318 }, { "epoch": 63.76, "learning_rate": 0.00020076923076923074, "loss": 0.1865, "step": 319 }, { "epoch": 63.95, "learning_rate": 0.00020030769230769229, "loss": 0.0527, "step": 320 }, { "epoch": 63.95, "eval_accuracy": 0.5851063829787234, "eval_f1": 0.5899177646212178, "eval_loss": 2.2013132572174072, "eval_runtime": 19.8591, "eval_samples_per_second": 14.2, "eval_steps_per_second": 0.906, "step": 320 }, { "epoch": 64.19, "learning_rate": 0.00019984615384615386, "loss": 0.3311, "step": 321 }, { "epoch": 64.38, "learning_rate": 0.00019938461538461538, "loss": 0.2145, "step": 322 }, { "epoch": 64.57, "learning_rate": 0.00019892307692307692, "loss": 0.0205, "step": 323 }, { "epoch": 64.76, "learning_rate": 0.00019846153846153844, "loss": 0.0362, "step": 324 }, { "epoch": 64.95, "learning_rate": 0.000198, "loss": 0.2112, "step": 325 }, { "epoch": 65.19, "learning_rate": 0.0001975384615384615, "loss": 0.1292, "step": 326 }, { "epoch": 65.38, "learning_rate": 0.00019707692307692305, "loss": 0.0185, "step": 327 }, { "epoch": 65.57, "learning_rate": 0.0001966153846153846, "loss": 0.1552, "step": 328 }, { "epoch": 65.76, "learning_rate": 0.00019615384615384615, "loss": 0.2192, "step": 329 }, { "epoch": 65.95, "learning_rate": 0.00019569230769230766, "loss": 0.3125, "step": 330 }, { "epoch": 66.19, "learning_rate": 0.0001952307692307692, "loss": 0.0252, "step": 331 }, { "epoch": 66.38, "learning_rate": 0.00019476923076923076, "loss": 0.0311, "step": 332 }, { "epoch": 66.57, "learning_rate": 0.00019430769230769227, "loss": 0.0281, "step": 333 }, { "epoch": 66.76, "learning_rate": 0.00019384615384615385, "loss": 0.0186, "step": 334 }, { "epoch": 66.95, "learning_rate": 0.00019338461538461537, "loss": 0.0242, "step": 335 }, { "epoch": 67.19, "learning_rate": 0.0001929230769230769, "loss": 0.2067, "step": 336 }, { "epoch": 67.38, "learning_rate": 0.00019246153846153843, "loss": 0.024, "step": 337 }, { "epoch": 67.57, "learning_rate": 0.00019199999999999998, "loss": 0.021, "step": 338 }, { "epoch": 67.76, "learning_rate": 0.0001915384615384615, "loss": 0.0176, "step": 339 }, { "epoch": 67.95, "learning_rate": 0.00019107692307692307, "loss": 0.1542, "step": 340 }, { "epoch": 67.95, "eval_accuracy": 0.6985815602836879, "eval_f1": 0.7053323496041893, "eval_loss": 1.5203052759170532, "eval_runtime": 19.8405, "eval_samples_per_second": 14.213, "eval_steps_per_second": 0.907, "step": 340 }, { "epoch": 68.19, "learning_rate": 0.00019061538461538462, "loss": 0.1157, "step": 341 }, { "epoch": 68.38, "learning_rate": 0.00019015384615384613, "loss": 0.023, "step": 342 }, { "epoch": 68.57, "learning_rate": 0.00018969230769230768, "loss": 0.0181, "step": 343 }, { "epoch": 68.76, "learning_rate": 0.0001892307692307692, "loss": 0.0293, "step": 344 }, { "epoch": 68.95, "learning_rate": 0.00018876923076923077, "loss": 0.0218, "step": 345 }, { "epoch": 69.19, "learning_rate": 0.0001883076923076923, "loss": 0.0235, "step": 346 }, { "epoch": 69.38, "learning_rate": 0.00018784615384615384, "loss": 0.0157, "step": 347 }, { "epoch": 69.57, "learning_rate": 0.00018738461538461536, "loss": 0.0627, "step": 348 }, { "epoch": 69.76, "learning_rate": 0.0001869230769230769, "loss": 0.016, "step": 349 }, { "epoch": 69.95, "learning_rate": 0.00018646153846153842, "loss": 0.046, "step": 350 }, { "epoch": 70.19, "learning_rate": 0.000186, "loss": 0.0234, "step": 351 }, { "epoch": 70.38, "learning_rate": 0.00018553846153846154, "loss": 0.0146, "step": 352 }, { "epoch": 70.57, "learning_rate": 0.00018507692307692306, "loss": 0.0146, "step": 353 }, { "epoch": 70.76, "learning_rate": 0.0001846153846153846, "loss": 0.014, "step": 354 }, { "epoch": 70.95, "learning_rate": 0.00018415384615384612, "loss": 0.0135, "step": 355 }, { "epoch": 71.19, "learning_rate": 0.00018369230769230767, "loss": 0.041, "step": 356 }, { "epoch": 71.38, "learning_rate": 0.00018323076923076922, "loss": 0.0155, "step": 357 }, { "epoch": 71.57, "learning_rate": 0.00018276923076923076, "loss": 0.0132, "step": 358 }, { "epoch": 71.76, "learning_rate": 0.00018230769230769228, "loss": 0.0148, "step": 359 }, { "epoch": 71.95, "learning_rate": 0.00018184615384615383, "loss": 0.0127, "step": 360 }, { "epoch": 71.95, "eval_accuracy": 0.7127659574468085, "eval_f1": 0.7104673960244846, "eval_loss": 1.714851975440979, "eval_runtime": 19.8331, "eval_samples_per_second": 14.219, "eval_steps_per_second": 0.908, "step": 360 }, { "epoch": 72.19, "learning_rate": 0.00018138461538461537, "loss": 0.0166, "step": 361 }, { "epoch": 72.38, "learning_rate": 0.0001809230769230769, "loss": 0.0125, "step": 362 }, { "epoch": 72.57, "learning_rate": 0.00018046153846153847, "loss": 0.0122, "step": 363 }, { "epoch": 72.76, "learning_rate": 0.00017999999999999998, "loss": 0.0118, "step": 364 }, { "epoch": 72.95, "learning_rate": 0.00017953846153846153, "loss": 0.0121, "step": 365 }, { "epoch": 73.19, "learning_rate": 0.00017907692307692305, "loss": 0.0154, "step": 366 }, { "epoch": 73.38, "learning_rate": 0.0001786153846153846, "loss": 0.082, "step": 367 }, { "epoch": 73.57, "learning_rate": 0.00017815384615384611, "loss": 0.0119, "step": 368 }, { "epoch": 73.76, "learning_rate": 0.0001776923076923077, "loss": 0.0131, "step": 369 }, { "epoch": 73.95, "learning_rate": 0.00017723076923076923, "loss": 0.0162, "step": 370 }, { "epoch": 74.19, "learning_rate": 0.00017676923076923075, "loss": 0.015, "step": 371 }, { "epoch": 74.38, "learning_rate": 0.0001763076923076923, "loss": 0.0139, "step": 372 }, { "epoch": 74.57, "learning_rate": 0.00017584615384615382, "loss": 0.2208, "step": 373 }, { "epoch": 74.76, "learning_rate": 0.0001753846153846154, "loss": 0.011, "step": 374 }, { "epoch": 74.95, "learning_rate": 0.0001749230769230769, "loss": 0.0109, "step": 375 }, { "epoch": 75.19, "learning_rate": 0.00017446153846153846, "loss": 0.0334, "step": 376 }, { "epoch": 75.38, "learning_rate": 0.00017399999999999997, "loss": 0.0109, "step": 377 }, { "epoch": 75.57, "learning_rate": 0.00017353846153846152, "loss": 0.0105, "step": 378 }, { "epoch": 75.76, "learning_rate": 0.00017307692307692304, "loss": 0.0113, "step": 379 }, { "epoch": 75.95, "learning_rate": 0.0001726153846153846, "loss": 0.0105, "step": 380 }, { "epoch": 75.95, "eval_accuracy": 0.7836879432624113, "eval_f1": 0.7853347417585911, "eval_loss": 1.2471365928649902, "eval_runtime": 19.8303, "eval_samples_per_second": 14.221, "eval_steps_per_second": 0.908, "step": 380 }, { "epoch": 76.19, "learning_rate": 0.00017215384615384616, "loss": 0.013, "step": 381 }, { "epoch": 76.38, "learning_rate": 0.00017169230769230768, "loss": 0.0103, "step": 382 }, { "epoch": 76.57, "learning_rate": 0.00017123076923076922, "loss": 0.0103, "step": 383 }, { "epoch": 76.76, "learning_rate": 0.00017076923076923074, "loss": 0.012, "step": 384 }, { "epoch": 76.95, "learning_rate": 0.0001703076923076923, "loss": 0.0111, "step": 385 }, { "epoch": 77.19, "learning_rate": 0.00016984615384615383, "loss": 0.0129, "step": 386 }, { "epoch": 77.38, "learning_rate": 0.00016938461538461538, "loss": 0.0099, "step": 387 }, { "epoch": 77.57, "learning_rate": 0.0001689230769230769, "loss": 0.0103, "step": 388 }, { "epoch": 77.76, "learning_rate": 0.00016846153846153844, "loss": 0.0099, "step": 389 }, { "epoch": 77.95, "learning_rate": 0.000168, "loss": 0.0792, "step": 390 }, { "epoch": 78.19, "learning_rate": 0.0001675384615384615, "loss": 0.0194, "step": 391 }, { "epoch": 78.38, "learning_rate": 0.00016707692307692308, "loss": 0.01, "step": 392 }, { "epoch": 78.57, "learning_rate": 0.0001666153846153846, "loss": 0.0101, "step": 393 }, { "epoch": 78.76, "learning_rate": 0.00016615384615384615, "loss": 0.0144, "step": 394 }, { "epoch": 78.95, "learning_rate": 0.00016569230769230767, "loss": 0.0175, "step": 395 }, { "epoch": 79.19, "learning_rate": 0.0001652307692307692, "loss": 0.0138, "step": 396 }, { "epoch": 79.38, "learning_rate": 0.00016476923076923073, "loss": 0.0102, "step": 397 }, { "epoch": 79.57, "learning_rate": 0.0001643076923076923, "loss": 0.0132, "step": 398 }, { "epoch": 79.76, "learning_rate": 0.00016384615384615382, "loss": 0.009, "step": 399 }, { "epoch": 79.95, "learning_rate": 0.00016338461538461537, "loss": 0.009, "step": 400 }, { "epoch": 79.95, "eval_accuracy": 0.7056737588652482, "eval_f1": 0.7064894781985094, "eval_loss": 1.571977972984314, "eval_runtime": 19.9376, "eval_samples_per_second": 14.144, "eval_steps_per_second": 0.903, "step": 400 }, { "epoch": 80.19, "learning_rate": 0.00016292307692307692, "loss": 0.0109, "step": 401 }, { "epoch": 80.38, "learning_rate": 0.00016246153846153843, "loss": 0.0088, "step": 402 }, { "epoch": 80.57, "learning_rate": 0.000162, "loss": 0.0519, "step": 403 }, { "epoch": 80.76, "learning_rate": 0.00016153846153846153, "loss": 0.0086, "step": 404 }, { "epoch": 80.95, "learning_rate": 0.00016107692307692307, "loss": 0.009, "step": 405 }, { "epoch": 81.19, "learning_rate": 0.0001606153846153846, "loss": 0.0104, "step": 406 }, { "epoch": 81.38, "learning_rate": 0.00016015384615384614, "loss": 0.229, "step": 407 }, { "epoch": 81.57, "learning_rate": 0.00015969230769230766, "loss": 0.0084, "step": 408 }, { "epoch": 81.76, "learning_rate": 0.00015923076923076923, "loss": 0.0085, "step": 409 }, { "epoch": 81.95, "learning_rate": 0.00015876923076923078, "loss": 0.0088, "step": 410 }, { "epoch": 82.19, "learning_rate": 0.0001583076923076923, "loss": 0.0105, "step": 411 }, { "epoch": 82.38, "learning_rate": 0.00015784615384615384, "loss": 0.0085, "step": 412 }, { "epoch": 82.57, "learning_rate": 0.00015738461538461536, "loss": 0.0085, "step": 413 }, { "epoch": 82.76, "learning_rate": 0.0001569230769230769, "loss": 0.0085, "step": 414 }, { "epoch": 82.95, "learning_rate": 0.00015646153846153845, "loss": 0.0084, "step": 415 }, { "epoch": 83.19, "learning_rate": 0.000156, "loss": 0.0103, "step": 416 }, { "epoch": 83.38, "learning_rate": 0.00015553846153846152, "loss": 0.009, "step": 417 }, { "epoch": 83.57, "learning_rate": 0.00015507692307692306, "loss": 0.0086, "step": 418 }, { "epoch": 83.76, "learning_rate": 0.00015461538461538458, "loss": 0.0239, "step": 419 }, { "epoch": 83.95, "learning_rate": 0.00015415384615384613, "loss": 0.0081, "step": 420 }, { "epoch": 83.95, "eval_accuracy": 0.6702127659574468, "eval_f1": 0.6656039778264036, "eval_loss": 1.939513087272644, "eval_runtime": 19.9021, "eval_samples_per_second": 14.169, "eval_steps_per_second": 0.904, "step": 420 }, { "epoch": 84.19, "learning_rate": 0.0001536923076923077, "loss": 0.0107, "step": 421 }, { "epoch": 84.38, "learning_rate": 0.00015323076923076922, "loss": 0.0077, "step": 422 }, { "epoch": 84.57, "learning_rate": 0.00015276923076923077, "loss": 0.008, "step": 423 }, { "epoch": 84.76, "learning_rate": 0.00015230769230769228, "loss": 0.0126, "step": 424 }, { "epoch": 84.95, "learning_rate": 0.00015184615384615383, "loss": 0.2308, "step": 425 }, { "epoch": 85.19, "learning_rate": 0.00015138461538461535, "loss": 0.0853, "step": 426 }, { "epoch": 85.38, "learning_rate": 0.00015092307692307692, "loss": 0.0081, "step": 427 }, { "epoch": 85.57, "learning_rate": 0.00015046153846153844, "loss": 0.0077, "step": 428 }, { "epoch": 85.76, "learning_rate": 0.00015, "loss": 0.0082, "step": 429 }, { "epoch": 85.95, "learning_rate": 0.00014953846153846153, "loss": 0.0087, "step": 430 }, { "epoch": 86.19, "learning_rate": 0.00014907692307692305, "loss": 0.0097, "step": 431 }, { "epoch": 86.38, "learning_rate": 0.0001486153846153846, "loss": 0.0089, "step": 432 }, { "epoch": 86.57, "learning_rate": 0.00014815384615384614, "loss": 0.0079, "step": 433 }, { "epoch": 86.76, "learning_rate": 0.0001476923076923077, "loss": 0.0079, "step": 434 }, { "epoch": 86.95, "learning_rate": 0.00014723076923076924, "loss": 0.0097, "step": 435 }, { "epoch": 87.19, "learning_rate": 0.00014676923076923075, "loss": 0.0102, "step": 436 }, { "epoch": 87.38, "learning_rate": 0.0001463076923076923, "loss": 0.0078, "step": 437 }, { "epoch": 87.57, "learning_rate": 0.00014584615384615385, "loss": 0.0078, "step": 438 }, { "epoch": 87.76, "learning_rate": 0.00014538461538461537, "loss": 0.0082, "step": 439 }, { "epoch": 87.95, "learning_rate": 0.0001449230769230769, "loss": 0.2345, "step": 440 }, { "epoch": 87.95, "eval_accuracy": 0.7411347517730497, "eval_f1": 0.7408030934503061, "eval_loss": 1.5703579187393188, "eval_runtime": 19.8366, "eval_samples_per_second": 14.216, "eval_steps_per_second": 0.907, "step": 440 }, { "epoch": 88.19, "learning_rate": 0.00014446153846153846, "loss": 0.0099, "step": 441 }, { "epoch": 88.38, "learning_rate": 0.00014399999999999998, "loss": 0.0156, "step": 442 }, { "epoch": 88.57, "learning_rate": 0.00014353846153846152, "loss": 0.0076, "step": 443 }, { "epoch": 88.76, "learning_rate": 0.00014307692307692307, "loss": 0.0286, "step": 444 }, { "epoch": 88.95, "learning_rate": 0.00014261538461538461, "loss": 0.0079, "step": 445 }, { "epoch": 89.19, "learning_rate": 0.00014215384615384613, "loss": 0.01, "step": 446 }, { "epoch": 89.38, "learning_rate": 0.00014169230769230768, "loss": 0.0081, "step": 447 }, { "epoch": 89.57, "learning_rate": 0.00014123076923076923, "loss": 0.0072, "step": 448 }, { "epoch": 89.76, "learning_rate": 0.00014076923076923074, "loss": 0.0076, "step": 449 }, { "epoch": 89.95, "learning_rate": 0.0001403076923076923, "loss": 0.0073, "step": 450 }, { "epoch": 90.19, "learning_rate": 0.00013984615384615384, "loss": 0.0095, "step": 451 }, { "epoch": 90.38, "learning_rate": 0.00013938461538461536, "loss": 0.0076, "step": 452 }, { "epoch": 90.57, "learning_rate": 0.0001389230769230769, "loss": 0.0078, "step": 453 }, { "epoch": 90.76, "learning_rate": 0.00013846153846153845, "loss": 0.0076, "step": 454 }, { "epoch": 90.95, "learning_rate": 0.000138, "loss": 0.0074, "step": 455 }, { "epoch": 91.19, "learning_rate": 0.00013753846153846154, "loss": 0.0152, "step": 456 }, { "epoch": 91.38, "learning_rate": 0.00013707692307692306, "loss": 0.0075, "step": 457 }, { "epoch": 91.57, "learning_rate": 0.0001366153846153846, "loss": 0.0077, "step": 458 }, { "epoch": 91.76, "learning_rate": 0.00013615384615384615, "loss": 0.007, "step": 459 }, { "epoch": 91.95, "learning_rate": 0.00013569230769230767, "loss": 0.0076, "step": 460 }, { "epoch": 91.95, "eval_accuracy": 0.7588652482269503, "eval_f1": 0.7553565470142631, "eval_loss": 1.4705615043640137, "eval_runtime": 19.84, "eval_samples_per_second": 14.214, "eval_steps_per_second": 0.907, "step": 460 }, { "epoch": 92.19, "learning_rate": 0.00013523076923076922, "loss": 0.0092, "step": 461 }, { "epoch": 92.38, "learning_rate": 0.00013476923076923076, "loss": 0.0069, "step": 462 }, { "epoch": 92.57, "learning_rate": 0.00013430769230769228, "loss": 0.0088, "step": 463 }, { "epoch": 92.76, "learning_rate": 0.00013384615384615385, "loss": 0.0077, "step": 464 }, { "epoch": 92.95, "learning_rate": 0.00013338461538461537, "loss": 0.0073, "step": 465 }, { "epoch": 93.19, "learning_rate": 0.00013292307692307692, "loss": 0.0088, "step": 466 }, { "epoch": 93.38, "learning_rate": 0.00013246153846153846, "loss": 0.0068, "step": 467 }, { "epoch": 93.57, "learning_rate": 0.00013199999999999998, "loss": 0.0069, "step": 468 }, { "epoch": 93.76, "learning_rate": 0.00013153846153846153, "loss": 0.0067, "step": 469 }, { "epoch": 93.95, "learning_rate": 0.00013107692307692308, "loss": 0.0069, "step": 470 }, { "epoch": 94.19, "learning_rate": 0.0001306153846153846, "loss": 0.0086, "step": 471 }, { "epoch": 94.38, "learning_rate": 0.00013015384615384614, "loss": 0.0065, "step": 472 }, { "epoch": 94.57, "learning_rate": 0.00012969230769230769, "loss": 0.0072, "step": 473 }, { "epoch": 94.76, "learning_rate": 0.00012923076923076923, "loss": 0.2418, "step": 474 }, { "epoch": 94.95, "learning_rate": 0.00012876923076923075, "loss": 0.0063, "step": 475 }, { "epoch": 95.19, "learning_rate": 0.0001283076923076923, "loss": 0.0765, "step": 476 }, { "epoch": 95.38, "learning_rate": 0.00012784615384615384, "loss": 0.007, "step": 477 }, { "epoch": 95.57, "learning_rate": 0.00012738461538461536, "loss": 0.0066, "step": 478 }, { "epoch": 95.76, "learning_rate": 0.0001269230769230769, "loss": 0.0065, "step": 479 }, { "epoch": 95.95, "learning_rate": 0.00012646153846153845, "loss": 0.0064, "step": 480 }, { "epoch": 95.95, "eval_accuracy": 0.75177304964539, "eval_f1": 0.7490816090582566, "eval_loss": 1.5745741128921509, "eval_runtime": 19.8499, "eval_samples_per_second": 14.207, "eval_steps_per_second": 0.907, "step": 480 }, { "epoch": 96.19, "learning_rate": 0.00012599999999999997, "loss": 0.0082, "step": 481 }, { "epoch": 96.38, "learning_rate": 0.00012553846153846152, "loss": 0.2158, "step": 482 }, { "epoch": 96.57, "learning_rate": 0.00012507692307692306, "loss": 0.0063, "step": 483 }, { "epoch": 96.76, "learning_rate": 0.0001246153846153846, "loss": 0.0065, "step": 484 }, { "epoch": 96.95, "learning_rate": 0.00012415384615384616, "loss": 0.0064, "step": 485 }, { "epoch": 97.19, "learning_rate": 0.00012369230769230768, "loss": 0.0081, "step": 486 }, { "epoch": 97.38, "learning_rate": 0.00012323076923076922, "loss": 0.0063, "step": 487 }, { "epoch": 97.57, "learning_rate": 0.00012276923076923077, "loss": 0.0063, "step": 488 }, { "epoch": 97.76, "learning_rate": 0.0001223076923076923, "loss": 0.0065, "step": 489 }, { "epoch": 97.95, "learning_rate": 0.00012184615384615383, "loss": 0.0064, "step": 490 }, { "epoch": 98.19, "learning_rate": 0.00012138461538461537, "loss": 0.008, "step": 491 }, { "epoch": 98.38, "learning_rate": 0.00012092307692307691, "loss": 0.0062, "step": 492 }, { "epoch": 98.57, "learning_rate": 0.00012046153846153844, "loss": 0.0063, "step": 493 }, { "epoch": 98.76, "learning_rate": 0.00011999999999999999, "loss": 0.0065, "step": 494 }, { "epoch": 98.95, "learning_rate": 0.00011953846153846154, "loss": 0.0063, "step": 495 }, { "epoch": 99.19, "learning_rate": 0.00011907692307692307, "loss": 0.1624, "step": 496 }, { "epoch": 99.38, "learning_rate": 0.0001186153846153846, "loss": 0.0064, "step": 497 }, { "epoch": 99.57, "learning_rate": 0.00011815384615384615, "loss": 0.0063, "step": 498 }, { "epoch": 99.76, "learning_rate": 0.00011769230769230768, "loss": 0.006, "step": 499 }, { "epoch": 99.95, "learning_rate": 0.00011723076923076921, "loss": 0.3105, "step": 500 }, { "epoch": 99.95, "eval_accuracy": 0.7375886524822695, "eval_f1": 0.7273234491658735, "eval_loss": 1.6824193000793457, "eval_runtime": 19.974, "eval_samples_per_second": 14.118, "eval_steps_per_second": 0.901, "step": 500 }, { "epoch": 100.19, "learning_rate": 0.00011676923076923076, "loss": 0.0082, "step": 501 }, { "epoch": 100.38, "learning_rate": 0.00011630769230769229, "loss": 0.042, "step": 502 }, { "epoch": 100.57, "learning_rate": 0.00011584615384615385, "loss": 0.0059, "step": 503 }, { "epoch": 100.76, "learning_rate": 0.00011538461538461538, "loss": 0.0062, "step": 504 }, { "epoch": 100.95, "learning_rate": 0.00011492307692307691, "loss": 0.0059, "step": 505 }, { "epoch": 101.19, "learning_rate": 0.00011446153846153846, "loss": 0.0076, "step": 506 }, { "epoch": 101.38, "learning_rate": 0.00011399999999999999, "loss": 0.0058, "step": 507 }, { "epoch": 101.57, "learning_rate": 0.00011353846153846153, "loss": 0.006, "step": 508 }, { "epoch": 101.76, "learning_rate": 0.00011307692307692307, "loss": 0.0063, "step": 509 }, { "epoch": 101.95, "learning_rate": 0.0001126153846153846, "loss": 0.0071, "step": 510 }, { "epoch": 102.19, "learning_rate": 0.00011215384615384614, "loss": 0.0074, "step": 511 }, { "epoch": 102.38, "learning_rate": 0.00011169230769230768, "loss": 0.0097, "step": 512 }, { "epoch": 102.57, "learning_rate": 0.00011123076923076923, "loss": 0.0058, "step": 513 }, { "epoch": 102.76, "learning_rate": 0.00011076923076923076, "loss": 0.0056, "step": 514 }, { "epoch": 102.95, "learning_rate": 0.0001103076923076923, "loss": 0.0057, "step": 515 }, { "epoch": 103.19, "learning_rate": 0.00010984615384615384, "loss": 0.0075, "step": 516 }, { "epoch": 103.38, "learning_rate": 0.00010938461538461537, "loss": 0.0055, "step": 517 }, { "epoch": 103.57, "learning_rate": 0.00010892307692307692, "loss": 0.2099, "step": 518 }, { "epoch": 103.76, "learning_rate": 0.00010846153846153845, "loss": 0.0056, "step": 519 }, { "epoch": 103.95, "learning_rate": 0.00010799999999999998, "loss": 0.0058, "step": 520 }, { "epoch": 103.95, "eval_accuracy": 0.7624113475177305, "eval_f1": 0.7474444107317791, "eval_loss": 1.3798716068267822, "eval_runtime": 19.813, "eval_samples_per_second": 14.233, "eval_steps_per_second": 0.908, "step": 520 }, { "epoch": 104.19, "learning_rate": 0.00010753846153846153, "loss": 0.0071, "step": 521 }, { "epoch": 104.38, "learning_rate": 0.00010707692307692306, "loss": 0.0054, "step": 522 }, { "epoch": 104.57, "learning_rate": 0.00010661538461538461, "loss": 0.0055, "step": 523 }, { "epoch": 104.76, "learning_rate": 0.00010615384615384615, "loss": 0.0174, "step": 524 }, { "epoch": 104.95, "learning_rate": 0.00010569230769230769, "loss": 0.0271, "step": 525 }, { "epoch": 105.19, "learning_rate": 0.00010523076923076922, "loss": 0.0067, "step": 526 }, { "epoch": 105.38, "learning_rate": 0.00010476923076923076, "loss": 0.0054, "step": 527 }, { "epoch": 105.57, "learning_rate": 0.0001043076923076923, "loss": 0.0055, "step": 528 }, { "epoch": 105.76, "learning_rate": 0.00010384615384615383, "loss": 0.0055, "step": 529 }, { "epoch": 105.95, "learning_rate": 0.00010338461538461537, "loss": 0.0061, "step": 530 }, { "epoch": 106.19, "learning_rate": 0.00010292307692307691, "loss": 0.0068, "step": 531 }, { "epoch": 106.38, "learning_rate": 0.00010246153846153844, "loss": 0.0055, "step": 532 }, { "epoch": 106.57, "learning_rate": 0.000102, "loss": 0.0058, "step": 533 }, { "epoch": 106.76, "learning_rate": 0.00010153846153846153, "loss": 0.0052, "step": 534 }, { "epoch": 106.95, "learning_rate": 0.00010107692307692308, "loss": 0.0054, "step": 535 }, { "epoch": 107.19, "learning_rate": 0.00010061538461538461, "loss": 0.0065, "step": 536 }, { "epoch": 107.38, "learning_rate": 0.00010015384615384614, "loss": 0.2378, "step": 537 }, { "epoch": 107.57, "learning_rate": 9.969230769230769e-05, "loss": 0.0054, "step": 538 }, { "epoch": 107.76, "learning_rate": 9.923076923076922e-05, "loss": 0.0053, "step": 539 }, { "epoch": 107.95, "learning_rate": 9.876923076923075e-05, "loss": 0.0055, "step": 540 }, { "epoch": 107.95, "eval_accuracy": 0.75177304964539, "eval_f1": 0.7350291455322384, "eval_loss": 1.4086250066757202, "eval_runtime": 19.8925, "eval_samples_per_second": 14.176, "eval_steps_per_second": 0.905, "step": 540 }, { "epoch": 108.19, "learning_rate": 9.83076923076923e-05, "loss": 0.0065, "step": 541 }, { "epoch": 108.38, "learning_rate": 9.784615384615383e-05, "loss": 0.0051, "step": 542 }, { "epoch": 108.57, "learning_rate": 9.738461538461538e-05, "loss": 0.0052, "step": 543 }, { "epoch": 108.76, "learning_rate": 9.692307692307692e-05, "loss": 0.0052, "step": 544 }, { "epoch": 108.95, "learning_rate": 9.646153846153846e-05, "loss": 0.0056, "step": 545 }, { "epoch": 109.19, "learning_rate": 9.599999999999999e-05, "loss": 0.0064, "step": 546 }, { "epoch": 109.38, "learning_rate": 9.553846153846153e-05, "loss": 0.0052, "step": 547 }, { "epoch": 109.57, "learning_rate": 9.507692307692307e-05, "loss": 0.0058, "step": 548 }, { "epoch": 109.76, "learning_rate": 9.46153846153846e-05, "loss": 0.0051, "step": 549 }, { "epoch": 109.95, "learning_rate": 9.415384615384615e-05, "loss": 0.0224, "step": 550 }, { "epoch": 110.19, "learning_rate": 9.369230769230768e-05, "loss": 0.0064, "step": 551 }, { "epoch": 110.38, "learning_rate": 9.323076923076921e-05, "loss": 0.0575, "step": 552 }, { "epoch": 110.57, "learning_rate": 9.276923076923077e-05, "loss": 0.005, "step": 553 }, { "epoch": 110.76, "learning_rate": 9.23076923076923e-05, "loss": 0.2271, "step": 554 }, { "epoch": 110.95, "learning_rate": 9.184615384615384e-05, "loss": 0.0052, "step": 555 }, { "epoch": 111.19, "learning_rate": 9.138461538461538e-05, "loss": 0.0064, "step": 556 }, { "epoch": 111.38, "learning_rate": 9.092307692307691e-05, "loss": 0.0052, "step": 557 }, { "epoch": 111.57, "learning_rate": 9.046153846153845e-05, "loss": 0.1925, "step": 558 }, { "epoch": 111.76, "learning_rate": 8.999999999999999e-05, "loss": 0.0049, "step": 559 }, { "epoch": 111.95, "learning_rate": 8.953846153846152e-05, "loss": 0.0051, "step": 560 }, { "epoch": 111.95, "eval_accuracy": 0.7978723404255319, "eval_f1": 0.7874453236488933, "eval_loss": 1.2832341194152832, "eval_runtime": 19.6808, "eval_samples_per_second": 14.329, "eval_steps_per_second": 0.915, "step": 560 }, { "epoch": 112.19, "learning_rate": 8.907692307692306e-05, "loss": 0.0066, "step": 561 }, { "epoch": 112.38, "learning_rate": 8.861538461538462e-05, "loss": 0.0051, "step": 562 }, { "epoch": 112.57, "learning_rate": 8.815384615384615e-05, "loss": 0.0051, "step": 563 }, { "epoch": 112.76, "learning_rate": 8.76923076923077e-05, "loss": 0.0143, "step": 564 }, { "epoch": 112.95, "learning_rate": 8.723076923076923e-05, "loss": 0.0051, "step": 565 }, { "epoch": 113.19, "learning_rate": 8.676923076923076e-05, "loss": 0.0064, "step": 566 }, { "epoch": 113.38, "learning_rate": 8.63076923076923e-05, "loss": 0.0053, "step": 567 }, { "epoch": 113.57, "learning_rate": 8.584615384615384e-05, "loss": 0.0051, "step": 568 }, { "epoch": 113.76, "learning_rate": 8.538461538461537e-05, "loss": 0.0053, "step": 569 }, { "epoch": 113.95, "learning_rate": 8.492307692307692e-05, "loss": 0.0052, "step": 570 }, { "epoch": 114.19, "learning_rate": 8.446153846153845e-05, "loss": 0.0071, "step": 571 }, { "epoch": 114.38, "learning_rate": 8.4e-05, "loss": 0.0053, "step": 572 }, { "epoch": 114.57, "learning_rate": 8.353846153846154e-05, "loss": 0.0088, "step": 573 }, { "epoch": 114.76, "learning_rate": 8.307692307692307e-05, "loss": 0.1024, "step": 574 }, { "epoch": 114.95, "learning_rate": 8.26153846153846e-05, "loss": 0.0053, "step": 575 }, { "epoch": 115.19, "learning_rate": 8.215384615384615e-05, "loss": 0.0062, "step": 576 }, { "epoch": 115.38, "learning_rate": 8.169230769230768e-05, "loss": 0.0051, "step": 577 }, { "epoch": 115.57, "learning_rate": 8.123076923076922e-05, "loss": 0.0053, "step": 578 }, { "epoch": 115.76, "learning_rate": 8.076923076923076e-05, "loss": 0.0052, "step": 579 }, { "epoch": 115.95, "learning_rate": 8.03076923076923e-05, "loss": 0.0052, "step": 580 }, { "epoch": 115.95, "eval_accuracy": 0.7801418439716312, "eval_f1": 0.77522776654755, "eval_loss": 1.3473820686340332, "eval_runtime": 19.7098, "eval_samples_per_second": 14.308, "eval_steps_per_second": 0.913, "step": 580 }, { "epoch": 116.19, "learning_rate": 7.984615384615383e-05, "loss": 0.0065, "step": 581 }, { "epoch": 116.38, "learning_rate": 7.938461538461539e-05, "loss": 0.0051, "step": 582 }, { "epoch": 116.57, "learning_rate": 7.892307692307692e-05, "loss": 0.0049, "step": 583 }, { "epoch": 116.76, "learning_rate": 7.846153846153845e-05, "loss": 0.0061, "step": 584 }, { "epoch": 116.95, "learning_rate": 7.8e-05, "loss": 0.005, "step": 585 }, { "epoch": 117.19, "learning_rate": 7.753846153846153e-05, "loss": 0.0061, "step": 586 }, { "epoch": 117.38, "learning_rate": 7.707692307692306e-05, "loss": 0.0051, "step": 587 }, { "epoch": 117.57, "learning_rate": 7.661538461538461e-05, "loss": 0.0051, "step": 588 }, { "epoch": 117.76, "learning_rate": 7.615384615384614e-05, "loss": 0.0053, "step": 589 }, { "epoch": 117.95, "learning_rate": 7.569230769230767e-05, "loss": 0.005, "step": 590 }, { "epoch": 118.19, "learning_rate": 7.523076923076922e-05, "loss": 0.006, "step": 591 }, { "epoch": 118.38, "learning_rate": 7.476923076923077e-05, "loss": 0.0051, "step": 592 }, { "epoch": 118.57, "learning_rate": 7.43076923076923e-05, "loss": 0.0052, "step": 593 }, { "epoch": 118.76, "learning_rate": 7.384615384615384e-05, "loss": 0.0052, "step": 594 }, { "epoch": 118.95, "learning_rate": 7.338461538461538e-05, "loss": 0.0047, "step": 595 }, { "epoch": 119.19, "learning_rate": 7.292307692307692e-05, "loss": 0.0059, "step": 596 }, { "epoch": 119.38, "learning_rate": 7.246153846153846e-05, "loss": 0.012, "step": 597 }, { "epoch": 119.57, "learning_rate": 7.199999999999999e-05, "loss": 0.005, "step": 598 }, { "epoch": 119.76, "learning_rate": 7.153846153846153e-05, "loss": 0.0046, "step": 599 }, { "epoch": 119.95, "learning_rate": 7.107692307692307e-05, "loss": 0.0046, "step": 600 }, { "epoch": 119.95, "eval_accuracy": 0.74822695035461, "eval_f1": 0.7451347261702654, "eval_loss": 1.612459421157837, "eval_runtime": 19.7993, "eval_samples_per_second": 14.243, "eval_steps_per_second": 0.909, "step": 600 }, { "epoch": 120.19, "learning_rate": 7.061538461538461e-05, "loss": 0.0117, "step": 601 }, { "epoch": 120.38, "learning_rate": 7.015384615384615e-05, "loss": 0.0048, "step": 602 }, { "epoch": 120.57, "learning_rate": 6.969230769230768e-05, "loss": 0.0047, "step": 603 }, { "epoch": 120.76, "learning_rate": 6.923076923076922e-05, "loss": 0.0048, "step": 604 }, { "epoch": 120.95, "learning_rate": 6.876923076923077e-05, "loss": 0.0046, "step": 605 }, { "epoch": 121.19, "learning_rate": 6.83076923076923e-05, "loss": 0.0057, "step": 606 }, { "epoch": 121.38, "learning_rate": 6.784615384615383e-05, "loss": 0.0047, "step": 607 }, { "epoch": 121.57, "learning_rate": 6.738461538461538e-05, "loss": 0.0046, "step": 608 }, { "epoch": 121.76, "learning_rate": 6.692307692307693e-05, "loss": 0.0047, "step": 609 }, { "epoch": 121.95, "learning_rate": 6.646153846153846e-05, "loss": 0.0045, "step": 610 }, { "epoch": 122.19, "learning_rate": 6.599999999999999e-05, "loss": 0.0056, "step": 611 }, { "epoch": 122.38, "learning_rate": 6.553846153846154e-05, "loss": 0.1306, "step": 612 }, { "epoch": 122.57, "learning_rate": 6.507692307692307e-05, "loss": 0.0047, "step": 613 }, { "epoch": 122.76, "learning_rate": 6.461538461538462e-05, "loss": 0.0044, "step": 614 }, { "epoch": 122.95, "learning_rate": 6.415384615384615e-05, "loss": 0.0077, "step": 615 }, { "epoch": 123.19, "learning_rate": 6.369230769230768e-05, "loss": 0.0056, "step": 616 }, { "epoch": 123.38, "learning_rate": 6.323076923076923e-05, "loss": 0.0046, "step": 617 }, { "epoch": 123.57, "learning_rate": 6.276923076923076e-05, "loss": 0.0051, "step": 618 }, { "epoch": 123.76, "learning_rate": 6.23076923076923e-05, "loss": 0.0046, "step": 619 }, { "epoch": 123.95, "learning_rate": 6.184615384615384e-05, "loss": 0.0044, "step": 620 }, { "epoch": 123.95, "eval_accuracy": 0.75177304964539, "eval_f1": 0.7485959515964336, "eval_loss": 1.592715859413147, "eval_runtime": 19.7766, "eval_samples_per_second": 14.259, "eval_steps_per_second": 0.91, "step": 620 }, { "epoch": 124.19, "learning_rate": 6.138461538461538e-05, "loss": 0.0054, "step": 621 }, { "epoch": 124.38, "learning_rate": 6.0923076923076916e-05, "loss": 0.0046, "step": 622 }, { "epoch": 124.57, "learning_rate": 6.0461538461538456e-05, "loss": 0.0046, "step": 623 }, { "epoch": 124.76, "learning_rate": 5.9999999999999995e-05, "loss": 0.0045, "step": 624 }, { "epoch": 124.95, "learning_rate": 5.9538461538461534e-05, "loss": 0.0043, "step": 625 }, { "epoch": 125.19, "learning_rate": 5.907692307692307e-05, "loss": 0.0056, "step": 626 }, { "epoch": 125.38, "learning_rate": 5.8615384615384606e-05, "loss": 0.0044, "step": 627 }, { "epoch": 125.57, "learning_rate": 5.8153846153846145e-05, "loss": 0.0044, "step": 628 }, { "epoch": 125.76, "learning_rate": 5.769230769230769e-05, "loss": 0.0043, "step": 629 }, { "epoch": 125.95, "learning_rate": 5.723076923076923e-05, "loss": 0.0044, "step": 630 }, { "epoch": 126.19, "learning_rate": 5.676923076923076e-05, "loss": 0.0056, "step": 631 }, { "epoch": 126.38, "learning_rate": 5.63076923076923e-05, "loss": 0.0045, "step": 632 }, { "epoch": 126.57, "learning_rate": 5.584615384615384e-05, "loss": 0.0044, "step": 633 }, { "epoch": 126.76, "learning_rate": 5.538461538461538e-05, "loss": 0.0044, "step": 634 }, { "epoch": 126.95, "learning_rate": 5.492307692307692e-05, "loss": 0.0046, "step": 635 }, { "epoch": 127.19, "learning_rate": 5.446153846153846e-05, "loss": 0.0055, "step": 636 }, { "epoch": 127.38, "learning_rate": 5.399999999999999e-05, "loss": 0.0042, "step": 637 }, { "epoch": 127.57, "learning_rate": 5.353846153846153e-05, "loss": 0.0043, "step": 638 }, { "epoch": 127.76, "learning_rate": 5.3076923076923076e-05, "loss": 0.0042, "step": 639 }, { "epoch": 127.95, "learning_rate": 5.261538461538461e-05, "loss": 0.0044, "step": 640 }, { "epoch": 127.95, "eval_accuracy": 0.75177304964539, "eval_f1": 0.7487428211402694, "eval_loss": 1.5551108121871948, "eval_runtime": 19.8883, "eval_samples_per_second": 14.179, "eval_steps_per_second": 0.905, "step": 640 }, { "epoch": 128.19, "learning_rate": 5.215384615384615e-05, "loss": 0.0054, "step": 641 }, { "epoch": 128.38, "learning_rate": 5.169230769230769e-05, "loss": 0.0042, "step": 642 }, { "epoch": 128.57, "learning_rate": 5.123076923076922e-05, "loss": 0.0043, "step": 643 }, { "epoch": 128.76, "learning_rate": 5.0769230769230766e-05, "loss": 0.0043, "step": 644 }, { "epoch": 128.95, "learning_rate": 5.0307692307692305e-05, "loss": 0.0044, "step": 645 }, { "epoch": 129.19, "learning_rate": 4.9846153846153844e-05, "loss": 0.0054, "step": 646 }, { "epoch": 129.38, "learning_rate": 4.938461538461538e-05, "loss": 0.0041, "step": 647 }, { "epoch": 129.57, "learning_rate": 4.8923076923076916e-05, "loss": 0.0043, "step": 648 }, { "epoch": 129.76, "learning_rate": 4.846153846153846e-05, "loss": 0.0045, "step": 649 }, { "epoch": 129.95, "learning_rate": 4.7999999999999994e-05, "loss": 0.0043, "step": 650 }, { "epoch": 130.19, "learning_rate": 4.7538461538461534e-05, "loss": 0.0053, "step": 651 }, { "epoch": 130.38, "learning_rate": 4.707692307692307e-05, "loss": 0.0041, "step": 652 }, { "epoch": 130.57, "learning_rate": 4.6615384615384605e-05, "loss": 0.0043, "step": 653 }, { "epoch": 130.76, "learning_rate": 4.615384615384615e-05, "loss": 0.0043, "step": 654 }, { "epoch": 130.95, "learning_rate": 4.569230769230769e-05, "loss": 0.0043, "step": 655 }, { "epoch": 131.19, "learning_rate": 4.523076923076922e-05, "loss": 0.0053, "step": 656 }, { "epoch": 131.38, "learning_rate": 4.476923076923076e-05, "loss": 0.0042, "step": 657 }, { "epoch": 131.57, "learning_rate": 4.430769230769231e-05, "loss": 0.0043, "step": 658 }, { "epoch": 131.76, "learning_rate": 4.384615384615385e-05, "loss": 0.0043, "step": 659 }, { "epoch": 131.95, "learning_rate": 4.338461538461538e-05, "loss": 0.0041, "step": 660 }, { "epoch": 131.95, "eval_accuracy": 0.7659574468085106, "eval_f1": 0.7631228398357921, "eval_loss": 1.511696457862854, "eval_runtime": 19.89, "eval_samples_per_second": 14.178, "eval_steps_per_second": 0.905, "step": 660 }, { "epoch": 132.19, "learning_rate": 4.292307692307692e-05, "loss": 0.0052, "step": 661 }, { "epoch": 132.38, "learning_rate": 4.246153846153846e-05, "loss": 0.0041, "step": 662 }, { "epoch": 132.57, "learning_rate": 4.2e-05, "loss": 0.0043, "step": 663 }, { "epoch": 132.76, "learning_rate": 4.153846153846154e-05, "loss": 0.0041, "step": 664 }, { "epoch": 132.95, "learning_rate": 4.1076923076923076e-05, "loss": 0.004, "step": 665 }, { "epoch": 133.19, "learning_rate": 4.061538461538461e-05, "loss": 0.0053, "step": 666 }, { "epoch": 133.38, "learning_rate": 4.015384615384615e-05, "loss": 0.0042, "step": 667 }, { "epoch": 133.57, "learning_rate": 3.9692307692307694e-05, "loss": 0.004, "step": 668 }, { "epoch": 133.76, "learning_rate": 3.9230769230769226e-05, "loss": 0.0041, "step": 669 }, { "epoch": 133.95, "learning_rate": 3.8769230769230766e-05, "loss": 0.0042, "step": 670 }, { "epoch": 134.19, "learning_rate": 3.8307692307692305e-05, "loss": 0.0051, "step": 671 }, { "epoch": 134.38, "learning_rate": 3.784615384615384e-05, "loss": 0.004, "step": 672 }, { "epoch": 134.57, "learning_rate": 3.738461538461538e-05, "loss": 0.0041, "step": 673 }, { "epoch": 134.76, "learning_rate": 3.692307692307692e-05, "loss": 0.0041, "step": 674 }, { "epoch": 134.95, "learning_rate": 3.646153846153846e-05, "loss": 0.0041, "step": 675 }, { "epoch": 135.19, "learning_rate": 3.5999999999999994e-05, "loss": 0.0051, "step": 676 }, { "epoch": 135.38, "learning_rate": 3.553846153846153e-05, "loss": 0.004, "step": 677 }, { "epoch": 135.57, "learning_rate": 3.507692307692307e-05, "loss": 0.0044, "step": 678 }, { "epoch": 135.76, "learning_rate": 3.461538461538461e-05, "loss": 0.0716, "step": 679 }, { "epoch": 135.95, "learning_rate": 3.415384615384615e-05, "loss": 0.0041, "step": 680 }, { "epoch": 135.95, "eval_accuracy": 0.7624113475177305, "eval_f1": 0.757713653242459, "eval_loss": 1.5209753513336182, "eval_runtime": 19.8488, "eval_samples_per_second": 14.207, "eval_steps_per_second": 0.907, "step": 680 }, { "epoch": 136.19, "learning_rate": 3.369230769230769e-05, "loss": 0.005, "step": 681 }, { "epoch": 136.38, "learning_rate": 3.323076923076923e-05, "loss": 0.1763, "step": 682 }, { "epoch": 136.57, "learning_rate": 3.276923076923077e-05, "loss": 0.0041, "step": 683 }, { "epoch": 136.76, "learning_rate": 3.230769230769231e-05, "loss": 0.0041, "step": 684 }, { "epoch": 136.95, "learning_rate": 3.184615384615384e-05, "loss": 0.0087, "step": 685 }, { "epoch": 137.19, "learning_rate": 3.138461538461538e-05, "loss": 0.005, "step": 686 }, { "epoch": 137.38, "learning_rate": 3.092307692307692e-05, "loss": 0.0042, "step": 687 }, { "epoch": 137.57, "learning_rate": 3.0461538461538458e-05, "loss": 0.0041, "step": 688 }, { "epoch": 137.76, "learning_rate": 2.9999999999999997e-05, "loss": 0.0041, "step": 689 }, { "epoch": 137.95, "learning_rate": 2.9538461538461537e-05, "loss": 0.004, "step": 690 }, { "epoch": 138.19, "learning_rate": 2.9076923076923072e-05, "loss": 0.0066, "step": 691 }, { "epoch": 138.38, "learning_rate": 2.8615384615384615e-05, "loss": 0.0041, "step": 692 }, { "epoch": 138.57, "learning_rate": 2.815384615384615e-05, "loss": 0.004, "step": 693 }, { "epoch": 138.76, "learning_rate": 2.769230769230769e-05, "loss": 0.0041, "step": 694 }, { "epoch": 138.95, "learning_rate": 2.723076923076923e-05, "loss": 0.004, "step": 695 }, { "epoch": 139.19, "learning_rate": 2.6769230769230765e-05, "loss": 0.0051, "step": 696 }, { "epoch": 139.38, "learning_rate": 2.6307692307692304e-05, "loss": 0.0042, "step": 697 }, { "epoch": 139.57, "learning_rate": 2.5846153846153844e-05, "loss": 0.0039, "step": 698 }, { "epoch": 139.76, "learning_rate": 2.5384615384615383e-05, "loss": 0.0039, "step": 699 }, { "epoch": 139.95, "learning_rate": 2.4923076923076922e-05, "loss": 0.0041, "step": 700 }, { "epoch": 139.95, "eval_accuracy": 0.7659574468085106, "eval_f1": 0.7655299229836832, "eval_loss": 1.5145211219787598, "eval_runtime": 19.9039, "eval_samples_per_second": 14.168, "eval_steps_per_second": 0.904, "step": 700 }, { "epoch": 140.19, "learning_rate": 2.4461538461538458e-05, "loss": 0.005, "step": 701 }, { "epoch": 140.38, "learning_rate": 2.3999999999999997e-05, "loss": 0.0041, "step": 702 }, { "epoch": 140.57, "learning_rate": 2.3538461538461536e-05, "loss": 0.004, "step": 703 }, { "epoch": 140.76, "learning_rate": 2.3076923076923076e-05, "loss": 0.0039, "step": 704 }, { "epoch": 140.95, "learning_rate": 2.261538461538461e-05, "loss": 0.0041, "step": 705 }, { "epoch": 141.19, "learning_rate": 2.2153846153846154e-05, "loss": 0.005, "step": 706 }, { "epoch": 141.38, "learning_rate": 2.169230769230769e-05, "loss": 0.0041, "step": 707 }, { "epoch": 141.57, "learning_rate": 2.123076923076923e-05, "loss": 0.004, "step": 708 }, { "epoch": 141.76, "learning_rate": 2.076923076923077e-05, "loss": 0.004, "step": 709 }, { "epoch": 141.95, "learning_rate": 2.0307692307692304e-05, "loss": 0.0041, "step": 710 }, { "epoch": 142.19, "learning_rate": 1.9846153846153847e-05, "loss": 0.2044, "step": 711 }, { "epoch": 142.38, "learning_rate": 1.9384615384615383e-05, "loss": 0.004, "step": 712 }, { "epoch": 142.57, "learning_rate": 1.892307692307692e-05, "loss": 0.0039, "step": 713 }, { "epoch": 142.76, "learning_rate": 1.846153846153846e-05, "loss": 0.0041, "step": 714 }, { "epoch": 142.95, "learning_rate": 1.7999999999999997e-05, "loss": 0.0039, "step": 715 }, { "epoch": 143.19, "learning_rate": 1.7538461538461536e-05, "loss": 0.0049, "step": 716 }, { "epoch": 143.38, "learning_rate": 1.7076923076923076e-05, "loss": 0.0039, "step": 717 }, { "epoch": 143.57, "learning_rate": 1.6615384615384615e-05, "loss": 0.004, "step": 718 }, { "epoch": 143.76, "learning_rate": 1.6153846153846154e-05, "loss": 0.008, "step": 719 }, { "epoch": 143.95, "learning_rate": 1.569230769230769e-05, "loss": 0.004, "step": 720 }, { "epoch": 143.95, "eval_accuracy": 0.7659574468085106, "eval_f1": 0.7665454681972557, "eval_loss": 1.5053002834320068, "eval_runtime": 19.9465, "eval_samples_per_second": 14.138, "eval_steps_per_second": 0.902, "step": 720 }, { "epoch": 144.19, "learning_rate": 1.5230769230769229e-05, "loss": 0.0051, "step": 721 }, { "epoch": 144.38, "learning_rate": 1.4769230769230768e-05, "loss": 0.0039, "step": 722 }, { "epoch": 144.57, "learning_rate": 1.4307692307692308e-05, "loss": 0.0041, "step": 723 }, { "epoch": 144.76, "learning_rate": 1.3846153846153845e-05, "loss": 0.0039, "step": 724 }, { "epoch": 144.95, "learning_rate": 1.3384615384615383e-05, "loss": 0.0039, "step": 725 }, { "epoch": 145.19, "learning_rate": 1.2923076923076922e-05, "loss": 0.005, "step": 726 }, { "epoch": 145.38, "learning_rate": 1.2461538461538461e-05, "loss": 0.0039, "step": 727 }, { "epoch": 145.57, "learning_rate": 1.1999999999999999e-05, "loss": 0.0039, "step": 728 }, { "epoch": 145.76, "learning_rate": 1.1538461538461538e-05, "loss": 0.004, "step": 729 }, { "epoch": 145.95, "learning_rate": 1.1076923076923077e-05, "loss": 0.004, "step": 730 }, { "epoch": 146.19, "learning_rate": 1.0615384615384615e-05, "loss": 0.0049, "step": 731 }, { "epoch": 146.38, "learning_rate": 1.0153846153846152e-05, "loss": 0.004, "step": 732 }, { "epoch": 146.57, "learning_rate": 9.692307692307691e-06, "loss": 0.004, "step": 733 }, { "epoch": 146.76, "learning_rate": 9.23076923076923e-06, "loss": 0.004, "step": 734 }, { "epoch": 146.95, "learning_rate": 8.769230769230768e-06, "loss": 0.0039, "step": 735 }, { "epoch": 147.19, "learning_rate": 8.307692307692307e-06, "loss": 0.005, "step": 736 }, { "epoch": 147.38, "learning_rate": 7.846153846153845e-06, "loss": 0.004, "step": 737 }, { "epoch": 147.57, "learning_rate": 7.384615384615384e-06, "loss": 0.0039, "step": 738 }, { "epoch": 147.76, "learning_rate": 6.9230769230769225e-06, "loss": 0.004, "step": 739 }, { "epoch": 147.95, "learning_rate": 6.461538461538461e-06, "loss": 0.004, "step": 740 }, { "epoch": 147.95, "eval_accuracy": 0.75177304964539, "eval_f1": 0.7526200587699554, "eval_loss": 1.520263671875, "eval_runtime": 19.9111, "eval_samples_per_second": 14.163, "eval_steps_per_second": 0.904, "step": 740 }, { "epoch": 148.19, "learning_rate": 5.999999999999999e-06, "loss": 0.0049, "step": 741 }, { "epoch": 148.38, "learning_rate": 5.5384615384615385e-06, "loss": 0.004, "step": 742 }, { "epoch": 148.57, "learning_rate": 5.076923076923076e-06, "loss": 0.0044, "step": 743 }, { "epoch": 148.76, "learning_rate": 4.615384615384615e-06, "loss": 0.0039, "step": 744 }, { "epoch": 148.95, "learning_rate": 4.153846153846154e-06, "loss": 0.004, "step": 745 }, { "epoch": 149.19, "learning_rate": 3.692307692307692e-06, "loss": 0.0129, "step": 746 }, { "epoch": 149.38, "learning_rate": 3.2307692307692305e-06, "loss": 0.004, "step": 747 }, { "epoch": 149.57, "learning_rate": 2.7692307692307693e-06, "loss": 0.004, "step": 748 }, { "epoch": 149.76, "learning_rate": 2.3076923076923077e-06, "loss": 0.0039, "step": 749 }, { "epoch": 149.95, "learning_rate": 1.846153846153846e-06, "loss": 0.0038, "step": 750 }, { "epoch": 149.95, "step": 750, "total_flos": 1.2977555545660588e+19, "train_loss": 0.4369454041322072, "train_runtime": 3358.8945, "train_samples_per_second": 7.458, "train_steps_per_second": 0.223 } ], "max_steps": 750, "num_train_epochs": 150, "total_flos": 1.2977555545660588e+19, "trial_name": null, "trial_params": null }