{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 278, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.222222222222222e-06, "loss": 0.9528, "step": 1 }, { "epoch": 0.01, "learning_rate": 4.444444444444444e-06, "loss": 0.8803, "step": 2 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-06, "loss": 0.8748, "step": 3 }, { "epoch": 0.01, "learning_rate": 8.888888888888888e-06, "loss": 0.9064, "step": 4 }, { "epoch": 0.02, "learning_rate": 1.1111111111111113e-05, "loss": 0.8915, "step": 5 }, { "epoch": 0.02, "learning_rate": 1.3333333333333333e-05, "loss": 0.8656, "step": 6 }, { "epoch": 0.03, "learning_rate": 1.555555555555556e-05, "loss": 0.8642, "step": 7 }, { "epoch": 0.03, "learning_rate": 1.7777777777777777e-05, "loss": 0.9196, "step": 8 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.8783, "step": 9 }, { "epoch": 0.04, "learning_rate": 1.9999318037877998e-05, "loss": 0.8703, "step": 10 }, { "epoch": 0.04, "learning_rate": 1.9997272244526454e-05, "loss": 0.9208, "step": 11 }, { "epoch": 0.04, "learning_rate": 1.9993862898976092e-05, "loss": 0.912, "step": 12 }, { "epoch": 0.05, "learning_rate": 1.998909046623581e-05, "loss": 0.8988, "step": 13 }, { "epoch": 0.05, "learning_rate": 1.9982955597229275e-05, "loss": 0.8873, "step": 14 }, { "epoch": 0.05, "learning_rate": 1.9975459128706155e-05, "loss": 0.8688, "step": 15 }, { "epoch": 0.06, "learning_rate": 1.996660208312796e-05, "loss": 0.9077, "step": 16 }, { "epoch": 0.06, "learning_rate": 1.9956385668528614e-05, "loss": 0.8782, "step": 17 }, { "epoch": 0.06, "learning_rate": 1.9944811278349666e-05, "loss": 0.873, "step": 18 }, { "epoch": 0.07, "learning_rate": 1.9931880491250263e-05, "loss": 0.8998, "step": 19 }, { "epoch": 0.07, "learning_rate": 1.9917595070891796e-05, "loss": 0.9115, "step": 20 }, { "epoch": 0.08, "learning_rate": 1.9901956965697387e-05, "loss": 0.9197, "step": 21 }, { "epoch": 0.08, "learning_rate": 1.988496830858612e-05, "loss": 0.898, "step": 22 }, { "epoch": 0.08, "learning_rate": 1.986663141668212e-05, "loss": 0.8908, "step": 23 }, { "epoch": 0.09, "learning_rate": 1.9846948790998532e-05, "loss": 0.8601, "step": 24 }, { "epoch": 0.09, "learning_rate": 1.982592311609639e-05, "loss": 0.859, "step": 25 }, { "epoch": 0.09, "learning_rate": 1.9803557259718472e-05, "loss": 0.9306, "step": 26 }, { "epoch": 0.1, "learning_rate": 1.977985427239815e-05, "loss": 0.906, "step": 27 }, { "epoch": 0.1, "learning_rate": 1.975481738704333e-05, "loss": 0.9386, "step": 28 }, { "epoch": 0.1, "learning_rate": 1.9728450018495506e-05, "loss": 0.8569, "step": 29 }, { "epoch": 0.11, "learning_rate": 1.9700755763064e-05, "loss": 0.8911, "step": 30 }, { "epoch": 0.11, "learning_rate": 1.967173839803545e-05, "loss": 0.9246, "step": 31 }, { "epoch": 0.12, "learning_rate": 1.9641401881158625e-05, "loss": 0.8808, "step": 32 }, { "epoch": 0.12, "learning_rate": 1.960975035010461e-05, "loss": 0.8474, "step": 33 }, { "epoch": 0.12, "learning_rate": 1.9576788121902457e-05, "loss": 0.8672, "step": 34 }, { "epoch": 0.13, "learning_rate": 1.954251969235039e-05, "loss": 0.8729, "step": 35 }, { "epoch": 0.13, "learning_rate": 1.950694973540259e-05, "loss": 0.8927, "step": 36 }, { "epoch": 0.13, "learning_rate": 1.9470083102531724e-05, "loss": 0.9035, "step": 37 }, { "epoch": 0.14, "learning_rate": 1.943192482206723e-05, "loss": 0.9003, "step": 38 }, { "epoch": 0.14, "learning_rate": 1.9392480098509488e-05, "loss": 0.8605, "step": 39 }, { "epoch": 0.14, "learning_rate": 1.9351754311819978e-05, "loss": 0.9471, "step": 40 }, { "epoch": 0.15, "learning_rate": 1.9309753016687478e-05, "loss": 0.9558, "step": 41 }, { "epoch": 0.15, "learning_rate": 1.9266481941770463e-05, "loss": 0.9359, "step": 42 }, { "epoch": 0.15, "learning_rate": 1.9221946988915745e-05, "loss": 0.9561, "step": 43 }, { "epoch": 0.16, "learning_rate": 1.9176154232353513e-05, "loss": 0.9012, "step": 44 }, { "epoch": 0.16, "learning_rate": 1.9129109917868863e-05, "loss": 0.8935, "step": 45 }, { "epoch": 0.17, "learning_rate": 1.9080820461949886e-05, "loss": 0.8635, "step": 46 }, { "epoch": 0.17, "learning_rate": 1.9031292450912565e-05, "loss": 0.9522, "step": 47 }, { "epoch": 0.17, "learning_rate": 1.898053264000239e-05, "loss": 0.9226, "step": 48 }, { "epoch": 0.18, "learning_rate": 1.8928547952473037e-05, "loss": 0.8975, "step": 49 }, { "epoch": 0.18, "learning_rate": 1.8875345478642067e-05, "loss": 0.8776, "step": 50 }, { "epoch": 0.18, "learning_rate": 1.8820932474923874e-05, "loss": 0.8925, "step": 51 }, { "epoch": 0.19, "learning_rate": 1.8765316362839955e-05, "loss": 0.8753, "step": 52 }, { "epoch": 0.19, "learning_rate": 1.8708504728006668e-05, "loss": 0.8859, "step": 53 }, { "epoch": 0.19, "learning_rate": 1.865050531910062e-05, "loss": 0.9087, "step": 54 }, { "epoch": 0.2, "learning_rate": 1.8591326046801813e-05, "loss": 0.8739, "step": 55 }, { "epoch": 0.2, "learning_rate": 1.8530974982714667e-05, "loss": 0.9321, "step": 56 }, { "epoch": 0.21, "learning_rate": 1.8469460358267127e-05, "loss": 0.8945, "step": 57 }, { "epoch": 0.21, "learning_rate": 1.8406790563587958e-05, "loss": 0.9255, "step": 58 }, { "epoch": 0.21, "learning_rate": 1.8342974146362397e-05, "loss": 0.8816, "step": 59 }, { "epoch": 0.22, "learning_rate": 1.8278019810666295e-05, "loss": 0.8863, "step": 60 }, { "epoch": 0.22, "learning_rate": 1.8211936415778986e-05, "loss": 0.9121, "step": 61 }, { "epoch": 0.22, "learning_rate": 1.8144732974974902e-05, "loss": 0.8759, "step": 62 }, { "epoch": 0.23, "learning_rate": 1.8076418654294267e-05, "loss": 0.9008, "step": 63 }, { "epoch": 0.23, "learning_rate": 1.80070027712929e-05, "loss": 0.9176, "step": 64 }, { "epoch": 0.23, "learning_rate": 1.793649479377137e-05, "loss": 0.8681, "step": 65 }, { "epoch": 0.24, "learning_rate": 1.7864904338483676e-05, "loss": 0.915, "step": 66 }, { "epoch": 0.24, "learning_rate": 1.779224116982558e-05, "loss": 0.8937, "step": 67 }, { "epoch": 0.24, "learning_rate": 1.7718515198502816e-05, "loss": 0.8827, "step": 68 }, { "epoch": 0.25, "learning_rate": 1.7643736480179353e-05, "loss": 0.8496, "step": 69 }, { "epoch": 0.25, "learning_rate": 1.7567915214105883e-05, "loss": 0.9188, "step": 70 }, { "epoch": 0.26, "learning_rate": 1.7491061741728703e-05, "loss": 0.8845, "step": 71 }, { "epoch": 0.26, "learning_rate": 1.741318654527923e-05, "loss": 0.863, "step": 72 }, { "epoch": 0.26, "learning_rate": 1.7334300246344318e-05, "loss": 0.9035, "step": 73 }, { "epoch": 0.27, "learning_rate": 1.725441360441752e-05, "loss": 0.8462, "step": 74 }, { "epoch": 0.27, "learning_rate": 1.7173537515431612e-05, "loss": 0.8881, "step": 75 }, { "epoch": 0.27, "learning_rate": 1.7091683010272447e-05, "loss": 0.8944, "step": 76 }, { "epoch": 0.28, "learning_rate": 1.700886125327443e-05, "loss": 0.9079, "step": 77 }, { "epoch": 0.28, "learning_rate": 1.692508354069779e-05, "loss": 0.8947, "step": 78 }, { "epoch": 0.28, "learning_rate": 1.684036129918786e-05, "loss": 0.8519, "step": 79 }, { "epoch": 0.29, "learning_rate": 1.6754706084216556e-05, "loss": 0.9102, "step": 80 }, { "epoch": 0.29, "learning_rate": 1.6668129578506315e-05, "loss": 0.9016, "step": 81 }, { "epoch": 0.29, "learning_rate": 1.658064359043664e-05, "loss": 0.9281, "step": 82 }, { "epoch": 0.3, "learning_rate": 1.6492260052433554e-05, "loss": 0.9072, "step": 83 }, { "epoch": 0.3, "learning_rate": 1.6402991019342073e-05, "loss": 0.9166, "step": 84 }, { "epoch": 0.31, "learning_rate": 1.631284866678205e-05, "loss": 0.8633, "step": 85 }, { "epoch": 0.31, "learning_rate": 1.6221845289487493e-05, "loss": 0.9126, "step": 86 }, { "epoch": 0.31, "learning_rate": 1.6129993299629652e-05, "loss": 0.9278, "step": 87 }, { "epoch": 0.32, "learning_rate": 1.6037305225124122e-05, "loss": 0.895, "step": 88 }, { "epoch": 0.32, "learning_rate": 1.5943793707922086e-05, "loss": 0.909, "step": 89 }, { "epoch": 0.32, "learning_rate": 1.5849471502286088e-05, "loss": 0.8707, "step": 90 }, { "epoch": 0.33, "learning_rate": 1.5754351473050434e-05, "loss": 0.9124, "step": 91 }, { "epoch": 0.33, "learning_rate": 1.5658446593866517e-05, "loss": 0.9002, "step": 92 }, { "epoch": 0.33, "learning_rate": 1.5561769945433326e-05, "loss": 0.8842, "step": 93 }, { "epoch": 0.34, "learning_rate": 1.5464334713713312e-05, "loss": 0.8894, "step": 94 }, { "epoch": 0.34, "learning_rate": 1.5366154188133962e-05, "loss": 0.9092, "step": 95 }, { "epoch": 0.35, "learning_rate": 1.526724175977518e-05, "loss": 0.907, "step": 96 }, { "epoch": 0.35, "learning_rate": 1.5167610919542885e-05, "loss": 0.975, "step": 97 }, { "epoch": 0.35, "learning_rate": 1.5067275256328913e-05, "loss": 0.919, "step": 98 }, { "epoch": 0.36, "learning_rate": 1.4966248455157622e-05, "loss": 0.8805, "step": 99 }, { "epoch": 0.36, "learning_rate": 1.4864544295319357e-05, "loss": 0.8917, "step": 100 }, { "epoch": 0.36, "learning_rate": 1.4762176648491052e-05, "loss": 0.8866, "step": 101 }, { "epoch": 0.37, "learning_rate": 1.4659159476844231e-05, "loss": 0.8638, "step": 102 }, { "epoch": 0.37, "learning_rate": 1.4555506831140698e-05, "loss": 0.8955, "step": 103 }, { "epoch": 0.37, "learning_rate": 1.445123284881609e-05, "loss": 0.9303, "step": 104 }, { "epoch": 0.38, "learning_rate": 1.4346351752051663e-05, "loss": 0.8765, "step": 105 }, { "epoch": 0.38, "learning_rate": 1.4240877845834473e-05, "loss": 0.8824, "step": 106 }, { "epoch": 0.38, "learning_rate": 1.4134825516006307e-05, "loss": 0.8933, "step": 107 }, { "epoch": 0.39, "learning_rate": 1.4028209227301534e-05, "loss": 0.8633, "step": 108 }, { "epoch": 0.39, "learning_rate": 1.392104352137426e-05, "loss": 0.8933, "step": 109 }, { "epoch": 0.4, "learning_rate": 1.3813343014814926e-05, "loss": 0.8914, "step": 110 }, { "epoch": 0.4, "learning_rate": 1.3705122397156727e-05, "loss": 0.8869, "step": 111 }, { "epoch": 0.4, "learning_rate": 1.359639642887208e-05, "loss": 0.8688, "step": 112 }, { "epoch": 0.41, "learning_rate": 1.3487179939359394e-05, "loss": 0.9112, "step": 113 }, { "epoch": 0.41, "learning_rate": 1.3377487824920459e-05, "loss": 0.8622, "step": 114 }, { "epoch": 0.41, "learning_rate": 1.32673350467287e-05, "loss": 0.9195, "step": 115 }, { "epoch": 0.42, "learning_rate": 1.3156736628788585e-05, "loss": 0.9125, "step": 116 }, { "epoch": 0.42, "learning_rate": 1.304570765588648e-05, "loss": 0.8975, "step": 117 }, { "epoch": 0.42, "learning_rate": 1.293426327153317e-05, "loss": 0.8466, "step": 118 }, { "epoch": 0.43, "learning_rate": 1.2822418675898428e-05, "loss": 0.9109, "step": 119 }, { "epoch": 0.43, "learning_rate": 1.2710189123737804e-05, "loss": 0.8949, "step": 120 }, { "epoch": 0.44, "learning_rate": 1.2597589922312009e-05, "loss": 0.8578, "step": 121 }, { "epoch": 0.44, "learning_rate": 1.2484636429299113e-05, "loss": 0.8887, "step": 122 }, { "epoch": 0.44, "learning_rate": 1.2371344050699872e-05, "loss": 0.8935, "step": 123 }, { "epoch": 0.45, "learning_rate": 1.2257728238736468e-05, "loss": 0.8413, "step": 124 }, { "epoch": 0.45, "learning_rate": 1.2143804489744941e-05, "loss": 0.8918, "step": 125 }, { "epoch": 0.45, "learning_rate": 1.2029588342061623e-05, "loss": 0.8996, "step": 126 }, { "epoch": 0.46, "learning_rate": 1.1915095373903789e-05, "loss": 0.8716, "step": 127 }, { "epoch": 0.46, "learning_rate": 1.1800341201244954e-05, "loss": 0.8695, "step": 128 }, { "epoch": 0.46, "learning_rate": 1.1685341475684935e-05, "loss": 0.9327, "step": 129 }, { "epoch": 0.47, "learning_rate": 1.15701118823151e-05, "loss": 0.8849, "step": 130 }, { "epoch": 0.47, "learning_rate": 1.1454668137579059e-05, "loss": 0.8831, "step": 131 }, { "epoch": 0.47, "learning_rate": 1.1339025987129033e-05, "loss": 0.8848, "step": 132 }, { "epoch": 0.48, "learning_rate": 1.1223201203678289e-05, "loss": 0.8591, "step": 133 }, { "epoch": 0.48, "learning_rate": 1.1107209584849845e-05, "loss": 0.8592, "step": 134 }, { "epoch": 0.49, "learning_rate": 1.0991066951021802e-05, "loss": 0.8658, "step": 135 }, { "epoch": 0.49, "learning_rate": 1.0874789143169569e-05, "loss": 0.9167, "step": 136 }, { "epoch": 0.49, "learning_rate": 1.0758392020705258e-05, "loss": 0.8967, "step": 137 }, { "epoch": 0.5, "learning_rate": 1.0641891459314598e-05, "loss": 0.9196, "step": 138 }, { "epoch": 0.5, "learning_rate": 1.0525303348791599e-05, "loss": 0.8851, "step": 139 }, { "epoch": 0.5, "learning_rate": 1.0408643590871312e-05, "loss": 0.8484, "step": 140 }, { "epoch": 0.51, "learning_rate": 1.029192809706095e-05, "loss": 0.9097, "step": 141 }, { "epoch": 0.51, "learning_rate": 1.017517278646968e-05, "loss": 0.9015, "step": 142 }, { "epoch": 0.51, "learning_rate": 1.0058393583637376e-05, "loss": 0.8429, "step": 143 }, { "epoch": 0.52, "learning_rate": 9.94160641636263e-06, "loss": 0.8631, "step": 144 }, { "epoch": 0.52, "learning_rate": 9.824827213530323e-06, "loss": 0.8986, "step": 145 }, { "epoch": 0.53, "learning_rate": 9.708071902939053e-06, "loss": 0.897, "step": 146 }, { "epoch": 0.53, "learning_rate": 9.591356409128691e-06, "loss": 0.8773, "step": 147 }, { "epoch": 0.53, "learning_rate": 9.474696651208406e-06, "loss": 0.8805, "step": 148 }, { "epoch": 0.54, "learning_rate": 9.358108540685406e-06, "loss": 0.8887, "step": 149 }, { "epoch": 0.54, "learning_rate": 9.241607979294745e-06, "loss": 0.8689, "step": 150 }, { "epoch": 0.54, "learning_rate": 9.125210856830433e-06, "loss": 0.8726, "step": 151 }, { "epoch": 0.55, "learning_rate": 9.0089330489782e-06, "loss": 0.9044, "step": 152 }, { "epoch": 0.55, "learning_rate": 8.892790415150161e-06, "loss": 0.8903, "step": 153 }, { "epoch": 0.55, "learning_rate": 8.776798796321715e-06, "loss": 0.8569, "step": 154 }, { "epoch": 0.56, "learning_rate": 8.66097401287097e-06, "loss": 0.8636, "step": 155 }, { "epoch": 0.56, "learning_rate": 8.545331862420945e-06, "loss": 0.9027, "step": 156 }, { "epoch": 0.56, "learning_rate": 8.429888117684904e-06, "loss": 0.8581, "step": 157 }, { "epoch": 0.57, "learning_rate": 8.314658524315068e-06, "loss": 0.8882, "step": 158 }, { "epoch": 0.57, "learning_rate": 8.199658798755048e-06, "loss": 0.8915, "step": 159 }, { "epoch": 0.58, "learning_rate": 8.084904626096211e-06, "loss": 0.9091, "step": 160 }, { "epoch": 0.58, "learning_rate": 7.970411657938382e-06, "loss": 0.8708, "step": 161 }, { "epoch": 0.58, "learning_rate": 7.856195510255059e-06, "loss": 0.8687, "step": 162 }, { "epoch": 0.59, "learning_rate": 7.742271761263537e-06, "loss": 0.8653, "step": 163 }, { "epoch": 0.59, "learning_rate": 7.628655949300133e-06, "loss": 0.8938, "step": 164 }, { "epoch": 0.59, "learning_rate": 7.51536357070089e-06, "loss": 0.8943, "step": 165 }, { "epoch": 0.6, "learning_rate": 7.402410077687994e-06, "loss": 0.871, "step": 166 }, { "epoch": 0.6, "learning_rate": 7.2898108762622e-06, "loss": 0.8611, "step": 167 }, { "epoch": 0.6, "learning_rate": 7.1775813241015755e-06, "loss": 0.9223, "step": 168 }, { "epoch": 0.61, "learning_rate": 7.065736728466832e-06, "loss": 0.8372, "step": 169 }, { "epoch": 0.61, "learning_rate": 6.9542923441135226e-06, "loss": 0.8493, "step": 170 }, { "epoch": 0.62, "learning_rate": 6.843263371211415e-06, "loss": 0.8764, "step": 171 }, { "epoch": 0.62, "learning_rate": 6.732664953271305e-06, "loss": 0.8427, "step": 172 }, { "epoch": 0.62, "learning_rate": 6.622512175079543e-06, "loss": 0.9206, "step": 173 }, { "epoch": 0.63, "learning_rate": 6.512820060640608e-06, "loss": 0.9046, "step": 174 }, { "epoch": 0.63, "learning_rate": 6.403603571127921e-06, "loss": 0.9025, "step": 175 }, { "epoch": 0.63, "learning_rate": 6.294877602843276e-06, "loss": 0.8967, "step": 176 }, { "epoch": 0.64, "learning_rate": 6.186656985185078e-06, "loss": 0.8848, "step": 177 }, { "epoch": 0.64, "learning_rate": 6.078956478625743e-06, "loss": 0.906, "step": 178 }, { "epoch": 0.64, "learning_rate": 5.971790772698467e-06, "loss": 0.918, "step": 179 }, { "epoch": 0.65, "learning_rate": 5.865174483993697e-06, "loss": 0.893, "step": 180 }, { "epoch": 0.65, "learning_rate": 5.759122154165528e-06, "loss": 0.9007, "step": 181 }, { "epoch": 0.65, "learning_rate": 5.653648247948342e-06, "loss": 0.8744, "step": 182 }, { "epoch": 0.66, "learning_rate": 5.548767151183912e-06, "loss": 0.917, "step": 183 }, { "epoch": 0.66, "learning_rate": 5.444493168859304e-06, "loss": 0.8773, "step": 184 }, { "epoch": 0.67, "learning_rate": 5.340840523155769e-06, "loss": 0.9227, "step": 185 }, { "epoch": 0.67, "learning_rate": 5.237823351508953e-06, "loss": 0.8372, "step": 186 }, { "epoch": 0.67, "learning_rate": 5.135455704680646e-06, "loss": 0.9047, "step": 187 }, { "epoch": 0.68, "learning_rate": 5.03375154484238e-06, "loss": 0.8698, "step": 188 }, { "epoch": 0.68, "learning_rate": 4.932724743671089e-06, "loss": 0.8616, "step": 189 }, { "epoch": 0.68, "learning_rate": 4.832389080457118e-06, "loss": 0.9023, "step": 190 }, { "epoch": 0.69, "learning_rate": 4.732758240224819e-06, "loss": 0.8613, "step": 191 }, { "epoch": 0.69, "learning_rate": 4.633845811866044e-06, "loss": 0.8543, "step": 192 }, { "epoch": 0.69, "learning_rate": 4.535665286286691e-06, "loss": 0.9005, "step": 193 }, { "epoch": 0.7, "learning_rate": 4.438230054566678e-06, "loss": 0.9237, "step": 194 }, { "epoch": 0.7, "learning_rate": 4.34155340613348e-06, "loss": 0.908, "step": 195 }, { "epoch": 0.71, "learning_rate": 4.245648526949568e-06, "loss": 0.8667, "step": 196 }, { "epoch": 0.71, "learning_rate": 4.150528497713911e-06, "loss": 0.8766, "step": 197 }, { "epoch": 0.71, "learning_rate": 4.056206292077916e-06, "loss": 0.8879, "step": 198 }, { "epoch": 0.72, "learning_rate": 3.96269477487588e-06, "loss": 0.8712, "step": 199 }, { "epoch": 0.72, "learning_rate": 3.870006700370348e-06, "loss": 0.8465, "step": 200 }, { "epoch": 0.72, "learning_rate": 3.778154710512513e-06, "loss": 0.9037, "step": 201 }, { "epoch": 0.73, "learning_rate": 3.687151333217952e-06, "loss": 0.8617, "step": 202 }, { "epoch": 0.73, "learning_rate": 3.597008980657929e-06, "loss": 0.8778, "step": 203 }, { "epoch": 0.73, "learning_rate": 3.5077399475664474e-06, "loss": 0.8629, "step": 204 }, { "epoch": 0.74, "learning_rate": 3.419356409563361e-06, "loss": 0.8194, "step": 205 }, { "epoch": 0.74, "learning_rate": 3.331870421493688e-06, "loss": 0.8806, "step": 206 }, { "epoch": 0.74, "learning_rate": 3.245293915783444e-06, "loss": 0.8949, "step": 207 }, { "epoch": 0.75, "learning_rate": 3.1596387008121386e-06, "loss": 0.8451, "step": 208 }, { "epoch": 0.75, "learning_rate": 3.074916459302211e-06, "loss": 0.8941, "step": 209 }, { "epoch": 0.76, "learning_rate": 2.9911387467255737e-06, "loss": 0.8887, "step": 210 }, { "epoch": 0.76, "learning_rate": 2.9083169897275554e-06, "loss": 0.8624, "step": 211 }, { "epoch": 0.76, "learning_rate": 2.82646248456839e-06, "loss": 0.8769, "step": 212 }, { "epoch": 0.77, "learning_rate": 2.745586395582481e-06, "loss": 0.87, "step": 213 }, { "epoch": 0.77, "learning_rate": 2.665699753655684e-06, "loss": 0.8524, "step": 214 }, { "epoch": 0.77, "learning_rate": 2.586813454720771e-06, "loss": 0.8492, "step": 215 }, { "epoch": 0.78, "learning_rate": 2.5089382582712995e-06, "loss": 0.8835, "step": 216 }, { "epoch": 0.78, "learning_rate": 2.4320847858941167e-06, "loss": 0.8711, "step": 217 }, { "epoch": 0.78, "learning_rate": 2.3562635198206476e-06, "loss": 0.8955, "step": 218 }, { "epoch": 0.79, "learning_rate": 2.281484801497186e-06, "loss": 0.8767, "step": 219 }, { "epoch": 0.79, "learning_rate": 2.2077588301744234e-06, "loss": 0.8499, "step": 220 }, { "epoch": 0.79, "learning_rate": 2.1350956615163254e-06, "loss": 0.835, "step": 221 }, { "epoch": 0.8, "learning_rate": 2.0635052062286323e-06, "loss": 0.8427, "step": 222 }, { "epoch": 0.8, "learning_rate": 1.992997228707103e-06, "loss": 0.8295, "step": 223 }, { "epoch": 0.81, "learning_rate": 1.923581345705736e-06, "loss": 0.8669, "step": 224 }, { "epoch": 0.81, "learning_rate": 1.8552670250251003e-06, "loss": 0.8733, "step": 225 }, { "epoch": 0.81, "learning_rate": 1.788063584221017e-06, "loss": 0.862, "step": 226 }, { "epoch": 0.82, "learning_rate": 1.7219801893337073e-06, "loss": 0.8726, "step": 227 }, { "epoch": 0.82, "learning_rate": 1.6570258536376083e-06, "loss": 0.8486, "step": 228 }, { "epoch": 0.82, "learning_rate": 1.5932094364120453e-06, "loss": 0.8599, "step": 229 }, { "epoch": 0.83, "learning_rate": 1.5305396417328755e-06, "loss": 0.8423, "step": 230 }, { "epoch": 0.83, "learning_rate": 1.469025017285335e-06, "loss": 0.8835, "step": 231 }, { "epoch": 0.83, "learning_rate": 1.4086739531981886e-06, "loss": 0.9035, "step": 232 }, { "epoch": 0.84, "learning_rate": 1.3494946808993804e-06, "loss": 0.8399, "step": 233 }, { "epoch": 0.84, "learning_rate": 1.291495271993337e-06, "loss": 0.8797, "step": 234 }, { "epoch": 0.85, "learning_rate": 1.234683637160048e-06, "loss": 0.8579, "step": 235 }, { "epoch": 0.85, "learning_rate": 1.1790675250761263e-06, "loss": 0.8749, "step": 236 }, { "epoch": 0.85, "learning_rate": 1.124654521357934e-06, "loss": 0.8661, "step": 237 }, { "epoch": 0.86, "learning_rate": 1.0714520475269653e-06, "loss": 0.9152, "step": 238 }, { "epoch": 0.86, "learning_rate": 1.0194673599976134e-06, "loss": 0.8602, "step": 239 }, { "epoch": 0.86, "learning_rate": 9.687075490874376e-07, "loss": 0.8802, "step": 240 }, { "epoch": 0.87, "learning_rate": 9.191795380501133e-07, "loss": 0.8678, "step": 241 }, { "epoch": 0.87, "learning_rate": 8.708900821311405e-07, "loss": 0.8902, "step": 242 }, { "epoch": 0.87, "learning_rate": 8.238457676464873e-07, "loss": 0.8982, "step": 243 }, { "epoch": 0.88, "learning_rate": 7.780530110842566e-07, "loss": 0.8694, "step": 244 }, { "epoch": 0.88, "learning_rate": 7.335180582295387e-07, "loss": 0.8896, "step": 245 }, { "epoch": 0.88, "learning_rate": 6.902469833125236e-07, "loss": 0.8869, "step": 246 }, { "epoch": 0.89, "learning_rate": 6.482456881800248e-07, "loss": 0.9181, "step": 247 }, { "epoch": 0.89, "learning_rate": 6.075199014905153e-07, "loss": 0.814, "step": 248 }, { "epoch": 0.9, "learning_rate": 5.680751779327742e-07, "loss": 0.885, "step": 249 }, { "epoch": 0.9, "learning_rate": 5.299168974682789e-07, "loss": 0.8642, "step": 250 }, { "epoch": 0.9, "learning_rate": 4.930502645974122e-07, "loss": 0.8697, "step": 251 }, { "epoch": 0.91, "learning_rate": 4.574803076496148e-07, "loss": 0.8614, "step": 252 }, { "epoch": 0.91, "learning_rate": 4.232118780975447e-07, "loss": 0.8606, "step": 253 }, { "epoch": 0.91, "learning_rate": 3.9024964989539227e-07, "loss": 0.849, "step": 254 }, { "epoch": 0.92, "learning_rate": 3.585981188413767e-07, "loss": 0.8665, "step": 255 }, { "epoch": 0.92, "learning_rate": 3.2826160196455124e-07, "loss": 0.8916, "step": 256 }, { "epoch": 0.92, "learning_rate": 2.9924423693600157e-07, "loss": 0.846, "step": 257 }, { "epoch": 0.93, "learning_rate": 2.7154998150449643e-07, "loss": 0.8738, "step": 258 }, { "epoch": 0.93, "learning_rate": 2.4518261295667255e-07, "loss": 0.8699, "step": 259 }, { "epoch": 0.94, "learning_rate": 2.201457276018526e-07, "loss": 0.8869, "step": 260 }, { "epoch": 0.94, "learning_rate": 1.9644274028152944e-07, "loss": 0.8696, "step": 261 }, { "epoch": 0.94, "learning_rate": 1.740768839036111e-07, "loss": 0.8931, "step": 262 }, { "epoch": 0.95, "learning_rate": 1.5305120900146908e-07, "loss": 0.887, "step": 263 }, { "epoch": 0.95, "learning_rate": 1.3336858331787993e-07, "loss": 0.8705, "step": 264 }, { "epoch": 0.95, "learning_rate": 1.1503169141388049e-07, "loss": 0.8813, "step": 265 }, { "epoch": 0.96, "learning_rate": 9.804303430261175e-08, "loss": 0.8476, "step": 266 }, { "epoch": 0.96, "learning_rate": 8.240492910820407e-08, "loss": 0.8973, "step": 267 }, { "epoch": 0.96, "learning_rate": 6.811950874973994e-08, "loss": 0.8578, "step": 268 }, { "epoch": 0.97, "learning_rate": 5.518872165033329e-08, "loss": 0.8851, "step": 269 }, { "epoch": 0.97, "learning_rate": 4.361433147138772e-08, "loss": 0.8397, "step": 270 }, { "epoch": 0.97, "learning_rate": 3.339791687203997e-08, "loss": 0.8525, "step": 271 }, { "epoch": 0.98, "learning_rate": 2.4540871293845526e-08, "loss": 0.8295, "step": 272 }, { "epoch": 0.98, "learning_rate": 1.7044402770725055e-08, "loss": 0.8433, "step": 273 }, { "epoch": 0.99, "learning_rate": 1.0909533764194013e-08, "loss": 0.8829, "step": 274 }, { "epoch": 0.99, "learning_rate": 6.137101023910852e-09, "loss": 0.8685, "step": 275 }, { "epoch": 0.99, "learning_rate": 2.7277554735449797e-09, "loss": 0.8855, "step": 276 }, { "epoch": 1.0, "learning_rate": 6.819621220033323e-10, "loss": 0.8618, "step": 277 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.8748, "step": 278 }, { "epoch": 1.0, "step": 278, "total_flos": 4.5714113651172966e+17, "train_loss": 0.8846922922048638, "train_runtime": 5115.5251, "train_samples_per_second": 17.336, "train_steps_per_second": 0.054 } ], "max_steps": 278, "num_train_epochs": 1, "total_flos": 4.5714113651172966e+17, "trial_name": null, "trial_params": null }