{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 686, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9956268221574345e-05, "loss": 5.3629, "step": 1 }, { "epoch": 0.01, "learning_rate": 2.991253644314869e-05, "loss": 3.9386, "step": 2 }, { "epoch": 0.01, "learning_rate": 2.9868804664723033e-05, "loss": 3.4456, "step": 3 }, { "epoch": 0.01, "learning_rate": 2.9825072886297377e-05, "loss": 3.3224, "step": 4 }, { "epoch": 0.01, "learning_rate": 2.978134110787172e-05, "loss": 2.9937, "step": 5 }, { "epoch": 0.02, "learning_rate": 2.9737609329446064e-05, "loss": 2.8936, "step": 6 }, { "epoch": 0.02, "learning_rate": 2.969387755102041e-05, "loss": 2.6102, "step": 7 }, { "epoch": 0.02, "learning_rate": 2.9650145772594756e-05, "loss": 2.5955, "step": 8 }, { "epoch": 0.03, "learning_rate": 2.9606413994169096e-05, "loss": 2.2454, "step": 9 }, { "epoch": 0.03, "learning_rate": 2.956268221574344e-05, "loss": 2.294, "step": 10 }, { "epoch": 0.03, "learning_rate": 2.9518950437317784e-05, "loss": 2.01, "step": 11 }, { "epoch": 0.03, "learning_rate": 2.9475218658892128e-05, "loss": 2.1138, "step": 12 }, { "epoch": 0.04, "learning_rate": 2.9431486880466475e-05, "loss": 1.8897, "step": 13 }, { "epoch": 0.04, "learning_rate": 2.9387755102040816e-05, "loss": 1.8003, "step": 14 }, { "epoch": 0.04, "learning_rate": 2.9344023323615163e-05, "loss": 1.7822, "step": 15 }, { "epoch": 0.05, "learning_rate": 2.9300291545189504e-05, "loss": 1.8079, "step": 16 }, { "epoch": 0.05, "learning_rate": 2.925655976676385e-05, "loss": 1.9259, "step": 17 }, { "epoch": 0.05, "learning_rate": 2.9212827988338192e-05, "loss": 1.5808, "step": 18 }, { "epoch": 0.06, "learning_rate": 2.9169096209912536e-05, "loss": 1.621, "step": 19 }, { "epoch": 0.06, "learning_rate": 2.9125364431486883e-05, "loss": 1.651, "step": 20 }, { "epoch": 0.06, "learning_rate": 2.9081632653061224e-05, "loss": 1.5975, "step": 21 }, { "epoch": 0.06, "learning_rate": 2.903790087463557e-05, "loss": 1.5772, "step": 22 }, { "epoch": 0.07, "learning_rate": 2.899416909620991e-05, "loss": 1.7805, "step": 23 }, { "epoch": 0.07, "learning_rate": 2.895043731778426e-05, "loss": 1.5725, "step": 24 }, { "epoch": 0.07, "learning_rate": 2.89067055393586e-05, "loss": 1.4716, "step": 25 }, { "epoch": 0.08, "learning_rate": 2.8862973760932947e-05, "loss": 1.6262, "step": 26 }, { "epoch": 0.08, "learning_rate": 2.881924198250729e-05, "loss": 1.536, "step": 27 }, { "epoch": 0.08, "learning_rate": 2.877551020408163e-05, "loss": 1.5282, "step": 28 }, { "epoch": 0.08, "learning_rate": 2.873177842565598e-05, "loss": 1.506, "step": 29 }, { "epoch": 0.09, "learning_rate": 2.868804664723032e-05, "loss": 1.4047, "step": 30 }, { "epoch": 0.09, "learning_rate": 2.8644314868804667e-05, "loss": 1.5451, "step": 31 }, { "epoch": 0.09, "learning_rate": 2.860058309037901e-05, "loss": 1.2876, "step": 32 }, { "epoch": 0.1, "learning_rate": 2.8556851311953354e-05, "loss": 1.3083, "step": 33 }, { "epoch": 0.1, "learning_rate": 2.85131195335277e-05, "loss": 1.3228, "step": 34 }, { "epoch": 0.1, "learning_rate": 2.8469387755102042e-05, "loss": 1.2497, "step": 35 }, { "epoch": 0.1, "learning_rate": 2.8425655976676386e-05, "loss": 1.2854, "step": 36 }, { "epoch": 0.11, "learning_rate": 2.8381924198250727e-05, "loss": 1.332, "step": 37 }, { "epoch": 0.11, "learning_rate": 2.8338192419825074e-05, "loss": 1.2977, "step": 38 }, { "epoch": 0.11, "learning_rate": 2.8294460641399418e-05, "loss": 1.1913, "step": 39 }, { "epoch": 0.12, "learning_rate": 2.8250728862973762e-05, "loss": 1.4206, "step": 40 }, { "epoch": 0.12, "learning_rate": 2.8206997084548106e-05, "loss": 1.3032, "step": 41 }, { "epoch": 0.12, "learning_rate": 2.816326530612245e-05, "loss": 1.1998, "step": 42 }, { "epoch": 0.13, "learning_rate": 2.8119533527696794e-05, "loss": 1.1315, "step": 43 }, { "epoch": 0.13, "learning_rate": 2.8075801749271134e-05, "loss": 1.199, "step": 44 }, { "epoch": 0.13, "learning_rate": 2.8032069970845482e-05, "loss": 1.1658, "step": 45 }, { "epoch": 0.13, "learning_rate": 2.7988338192419826e-05, "loss": 1.1449, "step": 46 }, { "epoch": 0.14, "learning_rate": 2.794460641399417e-05, "loss": 1.1687, "step": 47 }, { "epoch": 0.14, "learning_rate": 2.7900874635568514e-05, "loss": 1.2565, "step": 48 }, { "epoch": 0.14, "learning_rate": 2.7857142857142858e-05, "loss": 1.1276, "step": 49 }, { "epoch": 0.15, "learning_rate": 2.78134110787172e-05, "loss": 1.1296, "step": 50 }, { "epoch": 0.15, "learning_rate": 2.776967930029155e-05, "loss": 1.2696, "step": 51 }, { "epoch": 0.15, "learning_rate": 2.772594752186589e-05, "loss": 1.3116, "step": 52 }, { "epoch": 0.15, "learning_rate": 2.7682215743440233e-05, "loss": 1.2603, "step": 53 }, { "epoch": 0.16, "learning_rate": 2.7638483965014577e-05, "loss": 1.1573, "step": 54 }, { "epoch": 0.16, "learning_rate": 2.759475218658892e-05, "loss": 1.1059, "step": 55 }, { "epoch": 0.16, "learning_rate": 2.7551020408163265e-05, "loss": 1.0841, "step": 56 }, { "epoch": 0.17, "learning_rate": 2.750728862973761e-05, "loss": 1.1224, "step": 57 }, { "epoch": 0.17, "learning_rate": 2.7463556851311957e-05, "loss": 1.0036, "step": 58 }, { "epoch": 0.17, "learning_rate": 2.7419825072886297e-05, "loss": 1.1716, "step": 59 }, { "epoch": 0.17, "learning_rate": 2.7376093294460644e-05, "loss": 1.1219, "step": 60 }, { "epoch": 0.18, "learning_rate": 2.7332361516034985e-05, "loss": 1.3287, "step": 61 }, { "epoch": 0.18, "learning_rate": 2.728862973760933e-05, "loss": 1.1589, "step": 62 }, { "epoch": 0.18, "learning_rate": 2.7244897959183673e-05, "loss": 1.1281, "step": 63 }, { "epoch": 0.19, "learning_rate": 2.7201166180758017e-05, "loss": 1.0111, "step": 64 }, { "epoch": 0.19, "learning_rate": 2.7157434402332364e-05, "loss": 1.0364, "step": 65 }, { "epoch": 0.19, "learning_rate": 2.7113702623906705e-05, "loss": 0.9568, "step": 66 }, { "epoch": 0.2, "learning_rate": 2.7069970845481052e-05, "loss": 1.0818, "step": 67 }, { "epoch": 0.2, "learning_rate": 2.7026239067055393e-05, "loss": 1.3587, "step": 68 }, { "epoch": 0.2, "learning_rate": 2.698250728862974e-05, "loss": 1.0199, "step": 69 }, { "epoch": 0.2, "learning_rate": 2.6938775510204084e-05, "loss": 1.1711, "step": 70 }, { "epoch": 0.21, "learning_rate": 2.6895043731778424e-05, "loss": 1.0558, "step": 71 }, { "epoch": 0.21, "learning_rate": 2.6851311953352772e-05, "loss": 1.1889, "step": 72 }, { "epoch": 0.21, "learning_rate": 2.6807580174927112e-05, "loss": 0.8719, "step": 73 }, { "epoch": 0.22, "learning_rate": 2.676384839650146e-05, "loss": 1.0826, "step": 74 }, { "epoch": 0.22, "learning_rate": 2.67201166180758e-05, "loss": 0.8779, "step": 75 }, { "epoch": 0.22, "learning_rate": 2.6676384839650148e-05, "loss": 1.0484, "step": 76 }, { "epoch": 0.22, "learning_rate": 2.663265306122449e-05, "loss": 1.2392, "step": 77 }, { "epoch": 0.23, "learning_rate": 2.6588921282798835e-05, "loss": 1.0915, "step": 78 }, { "epoch": 0.23, "learning_rate": 2.654518950437318e-05, "loss": 0.9508, "step": 79 }, { "epoch": 0.23, "learning_rate": 2.650145772594752e-05, "loss": 1.0686, "step": 80 }, { "epoch": 0.24, "learning_rate": 2.6457725947521867e-05, "loss": 1.0613, "step": 81 }, { "epoch": 0.24, "learning_rate": 2.6413994169096208e-05, "loss": 0.9126, "step": 82 }, { "epoch": 0.24, "learning_rate": 2.6370262390670555e-05, "loss": 1.2267, "step": 83 }, { "epoch": 0.24, "learning_rate": 2.63265306122449e-05, "loss": 0.9892, "step": 84 }, { "epoch": 0.25, "learning_rate": 2.6282798833819243e-05, "loss": 0.9648, "step": 85 }, { "epoch": 0.25, "learning_rate": 2.6239067055393587e-05, "loss": 0.9477, "step": 86 }, { "epoch": 0.25, "learning_rate": 2.619533527696793e-05, "loss": 1.0255, "step": 87 }, { "epoch": 0.26, "learning_rate": 2.6151603498542275e-05, "loss": 0.9431, "step": 88 }, { "epoch": 0.26, "learning_rate": 2.610787172011662e-05, "loss": 0.9383, "step": 89 }, { "epoch": 0.26, "learning_rate": 2.6064139941690963e-05, "loss": 1.0558, "step": 90 }, { "epoch": 0.27, "learning_rate": 2.6020408163265307e-05, "loss": 0.9906, "step": 91 }, { "epoch": 0.27, "learning_rate": 2.597667638483965e-05, "loss": 1.0479, "step": 92 }, { "epoch": 0.27, "learning_rate": 2.5932944606413995e-05, "loss": 1.0124, "step": 93 }, { "epoch": 0.27, "learning_rate": 2.588921282798834e-05, "loss": 0.8342, "step": 94 }, { "epoch": 0.28, "learning_rate": 2.5845481049562683e-05, "loss": 1.0599, "step": 95 }, { "epoch": 0.28, "learning_rate": 2.580174927113703e-05, "loss": 1.0933, "step": 96 }, { "epoch": 0.28, "learning_rate": 2.575801749271137e-05, "loss": 1.0057, "step": 97 }, { "epoch": 0.29, "learning_rate": 2.5714285714285714e-05, "loss": 1.0297, "step": 98 }, { "epoch": 0.29, "learning_rate": 2.567055393586006e-05, "loss": 1.1918, "step": 99 }, { "epoch": 0.29, "learning_rate": 2.5626822157434402e-05, "loss": 0.9445, "step": 100 }, { "epoch": 0.29, "learning_rate": 2.5583090379008746e-05, "loss": 1.1254, "step": 101 }, { "epoch": 0.3, "learning_rate": 2.553935860058309e-05, "loss": 0.9527, "step": 102 }, { "epoch": 0.3, "learning_rate": 2.5495626822157438e-05, "loss": 0.8619, "step": 103 }, { "epoch": 0.3, "learning_rate": 2.5451895043731778e-05, "loss": 0.9988, "step": 104 }, { "epoch": 0.31, "learning_rate": 2.5408163265306125e-05, "loss": 0.7942, "step": 105 }, { "epoch": 0.31, "learning_rate": 2.5364431486880466e-05, "loss": 1.0989, "step": 106 }, { "epoch": 0.31, "learning_rate": 2.532069970845481e-05, "loss": 0.9513, "step": 107 }, { "epoch": 0.31, "learning_rate": 2.5276967930029154e-05, "loss": 1.0775, "step": 108 }, { "epoch": 0.32, "learning_rate": 2.5233236151603498e-05, "loss": 1.1696, "step": 109 }, { "epoch": 0.32, "learning_rate": 2.5189504373177845e-05, "loss": 1.0216, "step": 110 }, { "epoch": 0.32, "learning_rate": 2.5145772594752186e-05, "loss": 1.03, "step": 111 }, { "epoch": 0.33, "learning_rate": 2.5102040816326533e-05, "loss": 0.9126, "step": 112 }, { "epoch": 0.33, "learning_rate": 2.5058309037900874e-05, "loss": 0.8499, "step": 113 }, { "epoch": 0.33, "learning_rate": 2.501457725947522e-05, "loss": 0.8761, "step": 114 }, { "epoch": 0.34, "learning_rate": 2.4970845481049565e-05, "loss": 1.0023, "step": 115 }, { "epoch": 0.34, "learning_rate": 2.4927113702623906e-05, "loss": 0.9489, "step": 116 }, { "epoch": 0.34, "learning_rate": 2.4883381924198253e-05, "loss": 0.9472, "step": 117 }, { "epoch": 0.34, "learning_rate": 2.4839650145772593e-05, "loss": 0.9639, "step": 118 }, { "epoch": 0.35, "learning_rate": 2.479591836734694e-05, "loss": 1.0146, "step": 119 }, { "epoch": 0.35, "learning_rate": 2.475218658892128e-05, "loss": 1.0069, "step": 120 }, { "epoch": 0.35, "learning_rate": 2.470845481049563e-05, "loss": 1.0586, "step": 121 }, { "epoch": 0.36, "learning_rate": 2.4664723032069973e-05, "loss": 1.2354, "step": 122 }, { "epoch": 0.36, "learning_rate": 2.4620991253644317e-05, "loss": 0.9995, "step": 123 }, { "epoch": 0.36, "learning_rate": 2.457725947521866e-05, "loss": 0.9738, "step": 124 }, { "epoch": 0.36, "learning_rate": 2.4533527696793e-05, "loss": 1.0605, "step": 125 }, { "epoch": 0.37, "learning_rate": 2.448979591836735e-05, "loss": 0.9859, "step": 126 }, { "epoch": 0.37, "learning_rate": 2.4446064139941692e-05, "loss": 1.0085, "step": 127 }, { "epoch": 0.37, "learning_rate": 2.4402332361516036e-05, "loss": 0.8942, "step": 128 }, { "epoch": 0.38, "learning_rate": 2.435860058309038e-05, "loss": 0.9521, "step": 129 }, { "epoch": 0.38, "learning_rate": 2.4314868804664724e-05, "loss": 0.9921, "step": 130 }, { "epoch": 0.38, "learning_rate": 2.4271137026239068e-05, "loss": 0.985, "step": 131 }, { "epoch": 0.38, "learning_rate": 2.422740524781341e-05, "loss": 0.8275, "step": 132 }, { "epoch": 0.39, "learning_rate": 2.4183673469387756e-05, "loss": 1.0239, "step": 133 }, { "epoch": 0.39, "learning_rate": 2.41399416909621e-05, "loss": 0.9122, "step": 134 }, { "epoch": 0.39, "learning_rate": 2.4096209912536444e-05, "loss": 0.912, "step": 135 }, { "epoch": 0.4, "learning_rate": 2.4052478134110788e-05, "loss": 0.9607, "step": 136 }, { "epoch": 0.4, "learning_rate": 2.4008746355685132e-05, "loss": 1.101, "step": 137 }, { "epoch": 0.4, "learning_rate": 2.3965014577259476e-05, "loss": 0.9501, "step": 138 }, { "epoch": 0.41, "learning_rate": 2.392128279883382e-05, "loss": 0.8686, "step": 139 }, { "epoch": 0.41, "learning_rate": 2.3877551020408164e-05, "loss": 0.9697, "step": 140 }, { "epoch": 0.41, "learning_rate": 2.3833819241982508e-05, "loss": 0.9529, "step": 141 }, { "epoch": 0.41, "learning_rate": 2.379008746355685e-05, "loss": 1.0023, "step": 142 }, { "epoch": 0.42, "learning_rate": 2.3746355685131196e-05, "loss": 0.9224, "step": 143 }, { "epoch": 0.42, "learning_rate": 2.370262390670554e-05, "loss": 0.8481, "step": 144 }, { "epoch": 0.42, "learning_rate": 2.3658892128279883e-05, "loss": 0.8655, "step": 145 }, { "epoch": 0.43, "learning_rate": 2.3615160349854227e-05, "loss": 0.9538, "step": 146 }, { "epoch": 0.43, "learning_rate": 2.357142857142857e-05, "loss": 0.9949, "step": 147 }, { "epoch": 0.43, "learning_rate": 2.352769679300292e-05, "loss": 0.9171, "step": 148 }, { "epoch": 0.43, "learning_rate": 2.348396501457726e-05, "loss": 0.8127, "step": 149 }, { "epoch": 0.44, "learning_rate": 2.3440233236151603e-05, "loss": 0.9542, "step": 150 }, { "epoch": 0.44, "learning_rate": 2.3396501457725947e-05, "loss": 0.8685, "step": 151 }, { "epoch": 0.44, "learning_rate": 2.335276967930029e-05, "loss": 1.0402, "step": 152 }, { "epoch": 0.45, "learning_rate": 2.330903790087464e-05, "loss": 0.978, "step": 153 }, { "epoch": 0.45, "learning_rate": 2.326530612244898e-05, "loss": 0.7553, "step": 154 }, { "epoch": 0.45, "learning_rate": 2.3221574344023326e-05, "loss": 0.9466, "step": 155 }, { "epoch": 0.45, "learning_rate": 2.3177842565597667e-05, "loss": 0.9465, "step": 156 }, { "epoch": 0.46, "learning_rate": 2.3134110787172014e-05, "loss": 0.8261, "step": 157 }, { "epoch": 0.46, "learning_rate": 2.3090379008746355e-05, "loss": 0.957, "step": 158 }, { "epoch": 0.46, "learning_rate": 2.30466472303207e-05, "loss": 0.7844, "step": 159 }, { "epoch": 0.47, "learning_rate": 2.3002915451895046e-05, "loss": 0.8258, "step": 160 }, { "epoch": 0.47, "learning_rate": 2.2959183673469387e-05, "loss": 1.0157, "step": 161 }, { "epoch": 0.47, "learning_rate": 2.2915451895043734e-05, "loss": 0.9961, "step": 162 }, { "epoch": 0.48, "learning_rate": 2.2871720116618074e-05, "loss": 0.8449, "step": 163 }, { "epoch": 0.48, "learning_rate": 2.2827988338192422e-05, "loss": 0.8206, "step": 164 }, { "epoch": 0.48, "learning_rate": 2.2784256559766762e-05, "loss": 0.815, "step": 165 }, { "epoch": 0.48, "learning_rate": 2.274052478134111e-05, "loss": 0.9007, "step": 166 }, { "epoch": 0.49, "learning_rate": 2.2696793002915454e-05, "loss": 0.8002, "step": 167 }, { "epoch": 0.49, "learning_rate": 2.2653061224489794e-05, "loss": 0.8584, "step": 168 }, { "epoch": 0.49, "learning_rate": 2.260932944606414e-05, "loss": 0.8984, "step": 169 }, { "epoch": 0.5, "learning_rate": 2.2565597667638482e-05, "loss": 0.9296, "step": 170 }, { "epoch": 0.5, "learning_rate": 2.252186588921283e-05, "loss": 0.9471, "step": 171 }, { "epoch": 0.5, "learning_rate": 2.2478134110787173e-05, "loss": 0.9958, "step": 172 }, { "epoch": 0.5, "learning_rate": 2.2434402332361517e-05, "loss": 0.9842, "step": 173 }, { "epoch": 0.51, "learning_rate": 2.239067055393586e-05, "loss": 0.9085, "step": 174 }, { "epoch": 0.51, "learning_rate": 2.2346938775510205e-05, "loss": 0.8902, "step": 175 }, { "epoch": 0.51, "learning_rate": 2.230320699708455e-05, "loss": 1.108, "step": 176 }, { "epoch": 0.52, "learning_rate": 2.225947521865889e-05, "loss": 0.8656, "step": 177 }, { "epoch": 0.52, "learning_rate": 2.2215743440233237e-05, "loss": 0.9684, "step": 178 }, { "epoch": 0.52, "learning_rate": 2.217201166180758e-05, "loss": 1.0054, "step": 179 }, { "epoch": 0.52, "learning_rate": 2.2128279883381925e-05, "loss": 0.9552, "step": 180 }, { "epoch": 0.53, "learning_rate": 2.208454810495627e-05, "loss": 0.7809, "step": 181 }, { "epoch": 0.53, "learning_rate": 2.2040816326530613e-05, "loss": 1.0736, "step": 182 }, { "epoch": 0.53, "learning_rate": 2.1997084548104957e-05, "loss": 0.8351, "step": 183 }, { "epoch": 0.54, "learning_rate": 2.19533527696793e-05, "loss": 1.0844, "step": 184 }, { "epoch": 0.54, "learning_rate": 2.1909620991253645e-05, "loss": 0.8503, "step": 185 }, { "epoch": 0.54, "learning_rate": 2.186588921282799e-05, "loss": 0.9959, "step": 186 }, { "epoch": 0.55, "learning_rate": 2.1822157434402333e-05, "loss": 1.0989, "step": 187 }, { "epoch": 0.55, "learning_rate": 2.1778425655976677e-05, "loss": 0.9763, "step": 188 }, { "epoch": 0.55, "learning_rate": 2.173469387755102e-05, "loss": 1.0347, "step": 189 }, { "epoch": 0.55, "learning_rate": 2.1690962099125364e-05, "loss": 0.9371, "step": 190 }, { "epoch": 0.56, "learning_rate": 2.1647230320699712e-05, "loss": 0.7817, "step": 191 }, { "epoch": 0.56, "learning_rate": 2.1603498542274052e-05, "loss": 0.9467, "step": 192 }, { "epoch": 0.56, "learning_rate": 2.15597667638484e-05, "loss": 0.9691, "step": 193 }, { "epoch": 0.57, "learning_rate": 2.151603498542274e-05, "loss": 0.7149, "step": 194 }, { "epoch": 0.57, "learning_rate": 2.1472303206997084e-05, "loss": 0.7316, "step": 195 }, { "epoch": 0.57, "learning_rate": 2.1428571428571428e-05, "loss": 0.7645, "step": 196 }, { "epoch": 0.57, "learning_rate": 2.1384839650145772e-05, "loss": 0.7922, "step": 197 }, { "epoch": 0.58, "learning_rate": 2.134110787172012e-05, "loss": 0.9184, "step": 198 }, { "epoch": 0.58, "learning_rate": 2.129737609329446e-05, "loss": 0.8611, "step": 199 }, { "epoch": 0.58, "learning_rate": 2.1253644314868807e-05, "loss": 0.8158, "step": 200 }, { "epoch": 0.59, "learning_rate": 2.1209912536443148e-05, "loss": 1.0274, "step": 201 }, { "epoch": 0.59, "learning_rate": 2.1166180758017495e-05, "loss": 0.9442, "step": 202 }, { "epoch": 0.59, "learning_rate": 2.1122448979591836e-05, "loss": 1.1595, "step": 203 }, { "epoch": 0.59, "learning_rate": 2.107871720116618e-05, "loss": 0.8674, "step": 204 }, { "epoch": 0.6, "learning_rate": 2.1034985422740527e-05, "loss": 0.9162, "step": 205 }, { "epoch": 0.6, "learning_rate": 2.0991253644314868e-05, "loss": 0.8482, "step": 206 }, { "epoch": 0.6, "learning_rate": 2.0947521865889215e-05, "loss": 0.8585, "step": 207 }, { "epoch": 0.61, "learning_rate": 2.0903790087463556e-05, "loss": 0.9633, "step": 208 }, { "epoch": 0.61, "learning_rate": 2.0860058309037903e-05, "loss": 0.9125, "step": 209 }, { "epoch": 0.61, "learning_rate": 2.0816326530612247e-05, "loss": 0.7642, "step": 210 }, { "epoch": 0.62, "learning_rate": 2.077259475218659e-05, "loss": 0.8236, "step": 211 }, { "epoch": 0.62, "learning_rate": 2.0728862973760935e-05, "loss": 0.7912, "step": 212 }, { "epoch": 0.62, "learning_rate": 2.0685131195335275e-05, "loss": 0.8434, "step": 213 }, { "epoch": 0.62, "learning_rate": 2.0641399416909623e-05, "loss": 0.8584, "step": 214 }, { "epoch": 0.63, "learning_rate": 2.0597667638483963e-05, "loss": 0.8301, "step": 215 }, { "epoch": 0.63, "learning_rate": 2.055393586005831e-05, "loss": 0.8389, "step": 216 }, { "epoch": 0.63, "learning_rate": 2.0510204081632654e-05, "loss": 0.9902, "step": 217 }, { "epoch": 0.64, "learning_rate": 2.0466472303207e-05, "loss": 0.8553, "step": 218 }, { "epoch": 0.64, "learning_rate": 2.0422740524781342e-05, "loss": 0.872, "step": 219 }, { "epoch": 0.64, "learning_rate": 2.0379008746355683e-05, "loss": 0.8447, "step": 220 }, { "epoch": 0.64, "learning_rate": 2.033527696793003e-05, "loss": 0.8772, "step": 221 }, { "epoch": 0.65, "learning_rate": 2.029154518950437e-05, "loss": 0.8157, "step": 222 }, { "epoch": 0.65, "learning_rate": 2.0247813411078718e-05, "loss": 0.7805, "step": 223 }, { "epoch": 0.65, "learning_rate": 2.0204081632653062e-05, "loss": 0.708, "step": 224 }, { "epoch": 0.66, "learning_rate": 2.0160349854227406e-05, "loss": 0.7722, "step": 225 }, { "epoch": 0.66, "learning_rate": 2.011661807580175e-05, "loss": 0.9642, "step": 226 }, { "epoch": 0.66, "learning_rate": 2.0072886297376094e-05, "loss": 0.7912, "step": 227 }, { "epoch": 0.66, "learning_rate": 2.0029154518950438e-05, "loss": 1.0257, "step": 228 }, { "epoch": 0.67, "learning_rate": 1.9985422740524782e-05, "loss": 0.9954, "step": 229 }, { "epoch": 0.67, "learning_rate": 1.9941690962099126e-05, "loss": 0.9803, "step": 230 }, { "epoch": 0.67, "learning_rate": 1.989795918367347e-05, "loss": 0.7859, "step": 231 }, { "epoch": 0.68, "learning_rate": 1.9854227405247814e-05, "loss": 0.8381, "step": 232 }, { "epoch": 0.68, "learning_rate": 1.9810495626822158e-05, "loss": 0.6901, "step": 233 }, { "epoch": 0.68, "learning_rate": 1.97667638483965e-05, "loss": 1.0198, "step": 234 }, { "epoch": 0.69, "learning_rate": 1.9723032069970846e-05, "loss": 0.9632, "step": 235 }, { "epoch": 0.69, "learning_rate": 1.9679300291545193e-05, "loss": 0.7811, "step": 236 }, { "epoch": 0.69, "learning_rate": 1.9635568513119533e-05, "loss": 0.776, "step": 237 }, { "epoch": 0.69, "learning_rate": 1.9591836734693877e-05, "loss": 0.7588, "step": 238 }, { "epoch": 0.7, "learning_rate": 1.954810495626822e-05, "loss": 0.8946, "step": 239 }, { "epoch": 0.7, "learning_rate": 1.9504373177842565e-05, "loss": 0.8895, "step": 240 }, { "epoch": 0.7, "learning_rate": 1.946064139941691e-05, "loss": 0.9084, "step": 241 }, { "epoch": 0.71, "learning_rate": 1.9416909620991253e-05, "loss": 0.8147, "step": 242 }, { "epoch": 0.71, "learning_rate": 1.93731778425656e-05, "loss": 0.8603, "step": 243 }, { "epoch": 0.71, "learning_rate": 1.932944606413994e-05, "loss": 0.8547, "step": 244 }, { "epoch": 0.71, "learning_rate": 1.928571428571429e-05, "loss": 0.7791, "step": 245 }, { "epoch": 0.72, "learning_rate": 1.924198250728863e-05, "loss": 0.8709, "step": 246 }, { "epoch": 0.72, "learning_rate": 1.9198250728862973e-05, "loss": 0.7485, "step": 247 }, { "epoch": 0.72, "learning_rate": 1.915451895043732e-05, "loss": 0.8703, "step": 248 }, { "epoch": 0.73, "learning_rate": 1.911078717201166e-05, "loss": 0.9068, "step": 249 }, { "epoch": 0.73, "learning_rate": 1.9067055393586008e-05, "loss": 0.8725, "step": 250 }, { "epoch": 0.73, "learning_rate": 1.902332361516035e-05, "loss": 0.7559, "step": 251 }, { "epoch": 0.73, "learning_rate": 1.8979591836734696e-05, "loss": 0.8044, "step": 252 }, { "epoch": 0.74, "learning_rate": 1.8935860058309037e-05, "loss": 1.0526, "step": 253 }, { "epoch": 0.74, "learning_rate": 1.8892128279883384e-05, "loss": 0.8945, "step": 254 }, { "epoch": 0.74, "learning_rate": 1.8848396501457728e-05, "loss": 0.799, "step": 255 }, { "epoch": 0.75, "learning_rate": 1.880466472303207e-05, "loss": 0.7597, "step": 256 }, { "epoch": 0.75, "learning_rate": 1.8760932944606416e-05, "loss": 0.772, "step": 257 }, { "epoch": 0.75, "learning_rate": 1.8717201166180756e-05, "loss": 0.9276, "step": 258 }, { "epoch": 0.76, "learning_rate": 1.8673469387755104e-05, "loss": 0.7726, "step": 259 }, { "epoch": 0.76, "learning_rate": 1.8629737609329444e-05, "loss": 0.9847, "step": 260 }, { "epoch": 0.76, "learning_rate": 1.858600583090379e-05, "loss": 0.8455, "step": 261 }, { "epoch": 0.76, "learning_rate": 1.8542274052478135e-05, "loss": 0.7554, "step": 262 }, { "epoch": 0.77, "learning_rate": 1.849854227405248e-05, "loss": 0.7653, "step": 263 }, { "epoch": 0.77, "learning_rate": 1.8454810495626823e-05, "loss": 0.8519, "step": 264 }, { "epoch": 0.77, "learning_rate": 1.8411078717201164e-05, "loss": 0.7033, "step": 265 }, { "epoch": 0.78, "learning_rate": 1.836734693877551e-05, "loss": 0.8955, "step": 266 }, { "epoch": 0.78, "learning_rate": 1.8323615160349855e-05, "loss": 0.7778, "step": 267 }, { "epoch": 0.78, "learning_rate": 1.82798833819242e-05, "loss": 0.819, "step": 268 }, { "epoch": 0.78, "learning_rate": 1.8236151603498543e-05, "loss": 0.8427, "step": 269 }, { "epoch": 0.79, "learning_rate": 1.8192419825072887e-05, "loss": 0.8098, "step": 270 }, { "epoch": 0.79, "learning_rate": 1.814868804664723e-05, "loss": 0.8787, "step": 271 }, { "epoch": 0.79, "learning_rate": 1.8104956268221575e-05, "loss": 0.7492, "step": 272 }, { "epoch": 0.8, "learning_rate": 1.806122448979592e-05, "loss": 0.7385, "step": 273 }, { "epoch": 0.8, "learning_rate": 1.8017492711370263e-05, "loss": 0.7456, "step": 274 }, { "epoch": 0.8, "learning_rate": 1.7973760932944607e-05, "loss": 0.8629, "step": 275 }, { "epoch": 0.8, "learning_rate": 1.793002915451895e-05, "loss": 0.9145, "step": 276 }, { "epoch": 0.81, "learning_rate": 1.7886297376093295e-05, "loss": 0.8599, "step": 277 }, { "epoch": 0.81, "learning_rate": 1.784256559766764e-05, "loss": 0.8948, "step": 278 }, { "epoch": 0.81, "learning_rate": 1.7798833819241983e-05, "loss": 0.8635, "step": 279 }, { "epoch": 0.82, "learning_rate": 1.7755102040816327e-05, "loss": 0.7858, "step": 280 }, { "epoch": 0.82, "learning_rate": 1.7711370262390674e-05, "loss": 0.9611, "step": 281 }, { "epoch": 0.82, "learning_rate": 1.7667638483965014e-05, "loss": 0.814, "step": 282 }, { "epoch": 0.83, "learning_rate": 1.762390670553936e-05, "loss": 0.8337, "step": 283 }, { "epoch": 0.83, "learning_rate": 1.7580174927113702e-05, "loss": 0.7874, "step": 284 }, { "epoch": 0.83, "learning_rate": 1.7536443148688046e-05, "loss": 0.7569, "step": 285 }, { "epoch": 0.83, "learning_rate": 1.7492711370262394e-05, "loss": 0.8028, "step": 286 }, { "epoch": 0.84, "learning_rate": 1.7448979591836734e-05, "loss": 0.7952, "step": 287 }, { "epoch": 0.84, "learning_rate": 1.740524781341108e-05, "loss": 0.8058, "step": 288 }, { "epoch": 0.84, "learning_rate": 1.7361516034985422e-05, "loss": 0.7663, "step": 289 }, { "epoch": 0.85, "learning_rate": 1.731778425655977e-05, "loss": 0.8116, "step": 290 }, { "epoch": 0.85, "learning_rate": 1.727405247813411e-05, "loss": 0.8715, "step": 291 }, { "epoch": 0.85, "learning_rate": 1.7230320699708454e-05, "loss": 0.9337, "step": 292 }, { "epoch": 0.85, "learning_rate": 1.71865889212828e-05, "loss": 0.6941, "step": 293 }, { "epoch": 0.86, "learning_rate": 1.7142857142857142e-05, "loss": 0.7186, "step": 294 }, { "epoch": 0.86, "learning_rate": 1.709912536443149e-05, "loss": 0.8248, "step": 295 }, { "epoch": 0.86, "learning_rate": 1.705539358600583e-05, "loss": 0.9292, "step": 296 }, { "epoch": 0.87, "learning_rate": 1.7011661807580177e-05, "loss": 0.8749, "step": 297 }, { "epoch": 0.87, "learning_rate": 1.6967930029154518e-05, "loss": 0.9838, "step": 298 }, { "epoch": 0.87, "learning_rate": 1.6924198250728865e-05, "loss": 0.8464, "step": 299 }, { "epoch": 0.87, "learning_rate": 1.688046647230321e-05, "loss": 0.8021, "step": 300 }, { "epoch": 0.88, "learning_rate": 1.683673469387755e-05, "loss": 0.8395, "step": 301 }, { "epoch": 0.88, "learning_rate": 1.6793002915451897e-05, "loss": 0.7987, "step": 302 }, { "epoch": 0.88, "learning_rate": 1.6749271137026237e-05, "loss": 0.8711, "step": 303 }, { "epoch": 0.89, "learning_rate": 1.6705539358600585e-05, "loss": 0.752, "step": 304 }, { "epoch": 0.89, "learning_rate": 1.666180758017493e-05, "loss": 0.7489, "step": 305 }, { "epoch": 0.89, "learning_rate": 1.6618075801749273e-05, "loss": 0.8933, "step": 306 }, { "epoch": 0.9, "learning_rate": 1.6574344023323617e-05, "loss": 0.8087, "step": 307 }, { "epoch": 0.9, "learning_rate": 1.6530612244897957e-05, "loss": 0.9219, "step": 308 }, { "epoch": 0.9, "learning_rate": 1.6486880466472304e-05, "loss": 0.7434, "step": 309 }, { "epoch": 0.9, "learning_rate": 1.6443148688046645e-05, "loss": 0.6875, "step": 310 }, { "epoch": 0.91, "learning_rate": 1.6399416909620992e-05, "loss": 0.8969, "step": 311 }, { "epoch": 0.91, "learning_rate": 1.6355685131195336e-05, "loss": 0.7929, "step": 312 }, { "epoch": 0.91, "learning_rate": 1.631195335276968e-05, "loss": 0.783, "step": 313 }, { "epoch": 0.92, "learning_rate": 1.6268221574344024e-05, "loss": 0.98, "step": 314 }, { "epoch": 0.92, "learning_rate": 1.6224489795918368e-05, "loss": 0.8074, "step": 315 }, { "epoch": 0.92, "learning_rate": 1.6180758017492712e-05, "loss": 0.8186, "step": 316 }, { "epoch": 0.92, "learning_rate": 1.6137026239067053e-05, "loss": 0.6993, "step": 317 }, { "epoch": 0.93, "learning_rate": 1.60932944606414e-05, "loss": 0.8308, "step": 318 }, { "epoch": 0.93, "learning_rate": 1.6049562682215744e-05, "loss": 0.7593, "step": 319 }, { "epoch": 0.93, "learning_rate": 1.6005830903790088e-05, "loss": 0.6809, "step": 320 }, { "epoch": 0.94, "learning_rate": 1.5962099125364432e-05, "loss": 0.7695, "step": 321 }, { "epoch": 0.94, "learning_rate": 1.5918367346938776e-05, "loss": 0.917, "step": 322 }, { "epoch": 0.94, "learning_rate": 1.587463556851312e-05, "loss": 0.7101, "step": 323 }, { "epoch": 0.94, "learning_rate": 1.5830903790087464e-05, "loss": 0.9573, "step": 324 }, { "epoch": 0.95, "learning_rate": 1.5787172011661808e-05, "loss": 0.8902, "step": 325 }, { "epoch": 0.95, "learning_rate": 1.574344023323615e-05, "loss": 0.7231, "step": 326 }, { "epoch": 0.95, "learning_rate": 1.5699708454810496e-05, "loss": 0.8754, "step": 327 }, { "epoch": 0.96, "learning_rate": 1.565597667638484e-05, "loss": 0.9213, "step": 328 }, { "epoch": 0.96, "learning_rate": 1.5612244897959183e-05, "loss": 0.7397, "step": 329 }, { "epoch": 0.96, "learning_rate": 1.5568513119533527e-05, "loss": 0.7227, "step": 330 }, { "epoch": 0.97, "learning_rate": 1.5524781341107875e-05, "loss": 0.7672, "step": 331 }, { "epoch": 0.97, "learning_rate": 1.5481049562682215e-05, "loss": 0.7325, "step": 332 }, { "epoch": 0.97, "learning_rate": 1.5437317784256563e-05, "loss": 0.8339, "step": 333 }, { "epoch": 0.97, "learning_rate": 1.5393586005830903e-05, "loss": 0.793, "step": 334 }, { "epoch": 0.98, "learning_rate": 1.5349854227405247e-05, "loss": 0.9217, "step": 335 }, { "epoch": 0.98, "learning_rate": 1.530612244897959e-05, "loss": 0.9395, "step": 336 }, { "epoch": 0.98, "learning_rate": 1.5262390670553935e-05, "loss": 0.7461, "step": 337 }, { "epoch": 0.99, "learning_rate": 1.521865889212828e-05, "loss": 0.7148, "step": 338 }, { "epoch": 0.99, "learning_rate": 1.5174927113702623e-05, "loss": 0.9149, "step": 339 }, { "epoch": 0.99, "learning_rate": 1.513119533527697e-05, "loss": 0.812, "step": 340 }, { "epoch": 0.99, "learning_rate": 1.5087463556851312e-05, "loss": 0.6849, "step": 341 }, { "epoch": 1.0, "learning_rate": 1.5043731778425658e-05, "loss": 0.7184, "step": 342 }, { "epoch": 1.0, "learning_rate": 1.5e-05, "loss": 0.8343, "step": 343 }, { "epoch": 1.0, "learning_rate": 1.4956268221574344e-05, "loss": 0.6949, "step": 344 }, { "epoch": 1.01, "learning_rate": 1.4912536443148688e-05, "loss": 0.6325, "step": 345 }, { "epoch": 1.01, "learning_rate": 1.4868804664723032e-05, "loss": 0.6235, "step": 346 }, { "epoch": 1.01, "learning_rate": 1.4825072886297378e-05, "loss": 0.5982, "step": 347 }, { "epoch": 1.01, "learning_rate": 1.478134110787172e-05, "loss": 0.803, "step": 348 }, { "epoch": 1.02, "learning_rate": 1.4737609329446064e-05, "loss": 0.6774, "step": 349 }, { "epoch": 1.02, "learning_rate": 1.4693877551020408e-05, "loss": 0.6396, "step": 350 }, { "epoch": 1.02, "learning_rate": 1.4650145772594752e-05, "loss": 0.7721, "step": 351 }, { "epoch": 1.03, "learning_rate": 1.4606413994169096e-05, "loss": 0.8477, "step": 352 }, { "epoch": 1.03, "learning_rate": 1.4562682215743442e-05, "loss": 0.7166, "step": 353 }, { "epoch": 1.03, "learning_rate": 1.4518950437317786e-05, "loss": 0.6573, "step": 354 }, { "epoch": 1.03, "learning_rate": 1.447521865889213e-05, "loss": 0.7047, "step": 355 }, { "epoch": 1.04, "learning_rate": 1.4431486880466473e-05, "loss": 0.613, "step": 356 }, { "epoch": 1.04, "learning_rate": 1.4387755102040816e-05, "loss": 0.7446, "step": 357 }, { "epoch": 1.04, "learning_rate": 1.434402332361516e-05, "loss": 0.668, "step": 358 }, { "epoch": 1.05, "learning_rate": 1.4300291545189505e-05, "loss": 0.6964, "step": 359 }, { "epoch": 1.05, "learning_rate": 1.425655976676385e-05, "loss": 0.7486, "step": 360 }, { "epoch": 1.05, "learning_rate": 1.4212827988338193e-05, "loss": 0.7398, "step": 361 }, { "epoch": 1.06, "learning_rate": 1.4169096209912537e-05, "loss": 0.5944, "step": 362 }, { "epoch": 1.06, "learning_rate": 1.4125364431486881e-05, "loss": 0.6959, "step": 363 }, { "epoch": 1.06, "learning_rate": 1.4081632653061225e-05, "loss": 0.6982, "step": 364 }, { "epoch": 1.06, "learning_rate": 1.4037900874635567e-05, "loss": 0.6906, "step": 365 }, { "epoch": 1.07, "learning_rate": 1.3994169096209913e-05, "loss": 0.6168, "step": 366 }, { "epoch": 1.07, "learning_rate": 1.3950437317784257e-05, "loss": 0.8569, "step": 367 }, { "epoch": 1.07, "learning_rate": 1.39067055393586e-05, "loss": 0.696, "step": 368 }, { "epoch": 1.08, "learning_rate": 1.3862973760932945e-05, "loss": 0.7874, "step": 369 }, { "epoch": 1.08, "learning_rate": 1.3819241982507289e-05, "loss": 0.6416, "step": 370 }, { "epoch": 1.08, "learning_rate": 1.3775510204081633e-05, "loss": 0.7262, "step": 371 }, { "epoch": 1.08, "learning_rate": 1.3731778425655978e-05, "loss": 0.7177, "step": 372 }, { "epoch": 1.09, "learning_rate": 1.3688046647230322e-05, "loss": 0.7477, "step": 373 }, { "epoch": 1.09, "learning_rate": 1.3644314868804664e-05, "loss": 0.7449, "step": 374 }, { "epoch": 1.09, "learning_rate": 1.3600583090379008e-05, "loss": 0.6565, "step": 375 }, { "epoch": 1.1, "learning_rate": 1.3556851311953352e-05, "loss": 0.7586, "step": 376 }, { "epoch": 1.1, "learning_rate": 1.3513119533527696e-05, "loss": 0.519, "step": 377 }, { "epoch": 1.1, "learning_rate": 1.3469387755102042e-05, "loss": 0.711, "step": 378 }, { "epoch": 1.1, "learning_rate": 1.3425655976676386e-05, "loss": 0.7435, "step": 379 }, { "epoch": 1.11, "learning_rate": 1.338192419825073e-05, "loss": 0.822, "step": 380 }, { "epoch": 1.11, "learning_rate": 1.3338192419825074e-05, "loss": 0.7531, "step": 381 }, { "epoch": 1.11, "learning_rate": 1.3294460641399418e-05, "loss": 0.8351, "step": 382 }, { "epoch": 1.12, "learning_rate": 1.325072886297376e-05, "loss": 0.6045, "step": 383 }, { "epoch": 1.12, "learning_rate": 1.3206997084548104e-05, "loss": 0.8374, "step": 384 }, { "epoch": 1.12, "learning_rate": 1.316326530612245e-05, "loss": 0.7024, "step": 385 }, { "epoch": 1.13, "learning_rate": 1.3119533527696794e-05, "loss": 0.6995, "step": 386 }, { "epoch": 1.13, "learning_rate": 1.3075801749271137e-05, "loss": 0.7083, "step": 387 }, { "epoch": 1.13, "learning_rate": 1.3032069970845481e-05, "loss": 0.6404, "step": 388 }, { "epoch": 1.13, "learning_rate": 1.2988338192419825e-05, "loss": 0.8153, "step": 389 }, { "epoch": 1.14, "learning_rate": 1.294460641399417e-05, "loss": 0.6466, "step": 390 }, { "epoch": 1.14, "learning_rate": 1.2900874635568515e-05, "loss": 0.6655, "step": 391 }, { "epoch": 1.14, "learning_rate": 1.2857142857142857e-05, "loss": 0.4714, "step": 392 }, { "epoch": 1.15, "learning_rate": 1.2813411078717201e-05, "loss": 0.7221, "step": 393 }, { "epoch": 1.15, "learning_rate": 1.2769679300291545e-05, "loss": 0.6697, "step": 394 }, { "epoch": 1.15, "learning_rate": 1.2725947521865889e-05, "loss": 0.8205, "step": 395 }, { "epoch": 1.15, "learning_rate": 1.2682215743440233e-05, "loss": 0.6631, "step": 396 }, { "epoch": 1.16, "learning_rate": 1.2638483965014577e-05, "loss": 0.6889, "step": 397 }, { "epoch": 1.16, "learning_rate": 1.2594752186588923e-05, "loss": 0.7284, "step": 398 }, { "epoch": 1.16, "learning_rate": 1.2551020408163267e-05, "loss": 0.7834, "step": 399 }, { "epoch": 1.17, "learning_rate": 1.250728862973761e-05, "loss": 0.6491, "step": 400 }, { "epoch": 1.17, "learning_rate": 1.2463556851311953e-05, "loss": 0.796, "step": 401 }, { "epoch": 1.17, "learning_rate": 1.2419825072886297e-05, "loss": 0.7083, "step": 402 }, { "epoch": 1.17, "learning_rate": 1.237609329446064e-05, "loss": 0.6393, "step": 403 }, { "epoch": 1.18, "learning_rate": 1.2332361516034986e-05, "loss": 0.7, "step": 404 }, { "epoch": 1.18, "learning_rate": 1.228862973760933e-05, "loss": 0.7005, "step": 405 }, { "epoch": 1.18, "learning_rate": 1.2244897959183674e-05, "loss": 0.635, "step": 406 }, { "epoch": 1.19, "learning_rate": 1.2201166180758018e-05, "loss": 0.7491, "step": 407 }, { "epoch": 1.19, "learning_rate": 1.2157434402332362e-05, "loss": 0.6133, "step": 408 }, { "epoch": 1.19, "learning_rate": 1.2113702623906704e-05, "loss": 0.6128, "step": 409 }, { "epoch": 1.2, "learning_rate": 1.206997084548105e-05, "loss": 0.6358, "step": 410 }, { "epoch": 1.2, "learning_rate": 1.2026239067055394e-05, "loss": 0.8197, "step": 411 }, { "epoch": 1.2, "learning_rate": 1.1982507288629738e-05, "loss": 0.7669, "step": 412 }, { "epoch": 1.2, "learning_rate": 1.1938775510204082e-05, "loss": 0.6701, "step": 413 }, { "epoch": 1.21, "learning_rate": 1.1895043731778426e-05, "loss": 0.7437, "step": 414 }, { "epoch": 1.21, "learning_rate": 1.185131195335277e-05, "loss": 0.8168, "step": 415 }, { "epoch": 1.21, "learning_rate": 1.1807580174927114e-05, "loss": 0.6048, "step": 416 }, { "epoch": 1.22, "learning_rate": 1.176384839650146e-05, "loss": 0.681, "step": 417 }, { "epoch": 1.22, "learning_rate": 1.1720116618075802e-05, "loss": 0.779, "step": 418 }, { "epoch": 1.22, "learning_rate": 1.1676384839650146e-05, "loss": 0.7463, "step": 419 }, { "epoch": 1.22, "learning_rate": 1.163265306122449e-05, "loss": 0.7479, "step": 420 }, { "epoch": 1.23, "learning_rate": 1.1588921282798833e-05, "loss": 0.7277, "step": 421 }, { "epoch": 1.23, "learning_rate": 1.1545189504373177e-05, "loss": 0.7327, "step": 422 }, { "epoch": 1.23, "learning_rate": 1.1501457725947523e-05, "loss": 0.6682, "step": 423 }, { "epoch": 1.24, "learning_rate": 1.1457725947521867e-05, "loss": 0.6823, "step": 424 }, { "epoch": 1.24, "learning_rate": 1.1413994169096211e-05, "loss": 0.6142, "step": 425 }, { "epoch": 1.24, "learning_rate": 1.1370262390670555e-05, "loss": 0.7939, "step": 426 }, { "epoch": 1.24, "learning_rate": 1.1326530612244897e-05, "loss": 0.7539, "step": 427 }, { "epoch": 1.25, "learning_rate": 1.1282798833819241e-05, "loss": 0.6073, "step": 428 }, { "epoch": 1.25, "learning_rate": 1.1239067055393587e-05, "loss": 0.7128, "step": 429 }, { "epoch": 1.25, "learning_rate": 1.119533527696793e-05, "loss": 0.6475, "step": 430 }, { "epoch": 1.26, "learning_rate": 1.1151603498542275e-05, "loss": 0.7107, "step": 431 }, { "epoch": 1.26, "learning_rate": 1.1107871720116619e-05, "loss": 0.6458, "step": 432 }, { "epoch": 1.26, "learning_rate": 1.1064139941690962e-05, "loss": 0.8056, "step": 433 }, { "epoch": 1.27, "learning_rate": 1.1020408163265306e-05, "loss": 0.7321, "step": 434 }, { "epoch": 1.27, "learning_rate": 1.097667638483965e-05, "loss": 0.7666, "step": 435 }, { "epoch": 1.27, "learning_rate": 1.0932944606413994e-05, "loss": 0.7894, "step": 436 }, { "epoch": 1.27, "learning_rate": 1.0889212827988338e-05, "loss": 0.581, "step": 437 }, { "epoch": 1.28, "learning_rate": 1.0845481049562682e-05, "loss": 0.6994, "step": 438 }, { "epoch": 1.28, "learning_rate": 1.0801749271137026e-05, "loss": 0.7405, "step": 439 }, { "epoch": 1.28, "learning_rate": 1.075801749271137e-05, "loss": 0.7731, "step": 440 }, { "epoch": 1.29, "learning_rate": 1.0714285714285714e-05, "loss": 0.8333, "step": 441 }, { "epoch": 1.29, "learning_rate": 1.067055393586006e-05, "loss": 0.8215, "step": 442 }, { "epoch": 1.29, "learning_rate": 1.0626822157434404e-05, "loss": 0.7639, "step": 443 }, { "epoch": 1.29, "learning_rate": 1.0583090379008748e-05, "loss": 0.6595, "step": 444 }, { "epoch": 1.3, "learning_rate": 1.053935860058309e-05, "loss": 0.7345, "step": 445 }, { "epoch": 1.3, "learning_rate": 1.0495626822157434e-05, "loss": 0.6349, "step": 446 }, { "epoch": 1.3, "learning_rate": 1.0451895043731778e-05, "loss": 0.7739, "step": 447 }, { "epoch": 1.31, "learning_rate": 1.0408163265306123e-05, "loss": 0.7256, "step": 448 }, { "epoch": 1.31, "learning_rate": 1.0364431486880467e-05, "loss": 0.6909, "step": 449 }, { "epoch": 1.31, "learning_rate": 1.0320699708454811e-05, "loss": 0.663, "step": 450 }, { "epoch": 1.31, "learning_rate": 1.0276967930029155e-05, "loss": 0.6206, "step": 451 }, { "epoch": 1.32, "learning_rate": 1.02332361516035e-05, "loss": 0.6993, "step": 452 }, { "epoch": 1.32, "learning_rate": 1.0189504373177841e-05, "loss": 0.7027, "step": 453 }, { "epoch": 1.32, "learning_rate": 1.0145772594752185e-05, "loss": 0.7586, "step": 454 }, { "epoch": 1.33, "learning_rate": 1.0102040816326531e-05, "loss": 0.821, "step": 455 }, { "epoch": 1.33, "learning_rate": 1.0058309037900875e-05, "loss": 0.7111, "step": 456 }, { "epoch": 1.33, "learning_rate": 1.0014577259475219e-05, "loss": 0.6358, "step": 457 }, { "epoch": 1.34, "learning_rate": 9.970845481049563e-06, "loss": 0.635, "step": 458 }, { "epoch": 1.34, "learning_rate": 9.927113702623907e-06, "loss": 0.6775, "step": 459 }, { "epoch": 1.34, "learning_rate": 9.88338192419825e-06, "loss": 0.6923, "step": 460 }, { "epoch": 1.34, "learning_rate": 9.839650145772596e-06, "loss": 0.7741, "step": 461 }, { "epoch": 1.35, "learning_rate": 9.795918367346939e-06, "loss": 0.6225, "step": 462 }, { "epoch": 1.35, "learning_rate": 9.752186588921283e-06, "loss": 0.733, "step": 463 }, { "epoch": 1.35, "learning_rate": 9.708454810495627e-06, "loss": 0.8117, "step": 464 }, { "epoch": 1.36, "learning_rate": 9.66472303206997e-06, "loss": 0.7334, "step": 465 }, { "epoch": 1.36, "learning_rate": 9.620991253644314e-06, "loss": 0.6461, "step": 466 }, { "epoch": 1.36, "learning_rate": 9.57725947521866e-06, "loss": 0.7507, "step": 467 }, { "epoch": 1.36, "learning_rate": 9.533527696793004e-06, "loss": 0.6619, "step": 468 }, { "epoch": 1.37, "learning_rate": 9.489795918367348e-06, "loss": 0.6141, "step": 469 }, { "epoch": 1.37, "learning_rate": 9.446064139941692e-06, "loss": 0.6757, "step": 470 }, { "epoch": 1.37, "learning_rate": 9.402332361516034e-06, "loss": 0.6319, "step": 471 }, { "epoch": 1.38, "learning_rate": 9.358600583090378e-06, "loss": 0.651, "step": 472 }, { "epoch": 1.38, "learning_rate": 9.314868804664722e-06, "loss": 0.6266, "step": 473 }, { "epoch": 1.38, "learning_rate": 9.271137026239068e-06, "loss": 0.5657, "step": 474 }, { "epoch": 1.38, "learning_rate": 9.227405247813412e-06, "loss": 0.6838, "step": 475 }, { "epoch": 1.39, "learning_rate": 9.183673469387756e-06, "loss": 0.8081, "step": 476 }, { "epoch": 1.39, "learning_rate": 9.1399416909621e-06, "loss": 0.5915, "step": 477 }, { "epoch": 1.39, "learning_rate": 9.096209912536444e-06, "loss": 0.6921, "step": 478 }, { "epoch": 1.4, "learning_rate": 9.052478134110787e-06, "loss": 0.6148, "step": 479 }, { "epoch": 1.4, "learning_rate": 9.008746355685131e-06, "loss": 0.6602, "step": 480 }, { "epoch": 1.4, "learning_rate": 8.965014577259475e-06, "loss": 0.5945, "step": 481 }, { "epoch": 1.41, "learning_rate": 8.92128279883382e-06, "loss": 0.6965, "step": 482 }, { "epoch": 1.41, "learning_rate": 8.877551020408163e-06, "loss": 0.7285, "step": 483 }, { "epoch": 1.41, "learning_rate": 8.833819241982507e-06, "loss": 0.8048, "step": 484 }, { "epoch": 1.41, "learning_rate": 8.790087463556851e-06, "loss": 0.6653, "step": 485 }, { "epoch": 1.42, "learning_rate": 8.746355685131197e-06, "loss": 0.6806, "step": 486 }, { "epoch": 1.42, "learning_rate": 8.70262390670554e-06, "loss": 0.7117, "step": 487 }, { "epoch": 1.42, "learning_rate": 8.658892128279885e-06, "loss": 0.6386, "step": 488 }, { "epoch": 1.43, "learning_rate": 8.615160349854227e-06, "loss": 0.6763, "step": 489 }, { "epoch": 1.43, "learning_rate": 8.571428571428571e-06, "loss": 0.7101, "step": 490 }, { "epoch": 1.43, "learning_rate": 8.527696793002915e-06, "loss": 0.6231, "step": 491 }, { "epoch": 1.43, "learning_rate": 8.483965014577259e-06, "loss": 0.6945, "step": 492 }, { "epoch": 1.44, "learning_rate": 8.440233236151604e-06, "loss": 0.6969, "step": 493 }, { "epoch": 1.44, "learning_rate": 8.396501457725948e-06, "loss": 0.6003, "step": 494 }, { "epoch": 1.44, "learning_rate": 8.352769679300292e-06, "loss": 0.7275, "step": 495 }, { "epoch": 1.45, "learning_rate": 8.309037900874636e-06, "loss": 0.659, "step": 496 }, { "epoch": 1.45, "learning_rate": 8.265306122448979e-06, "loss": 0.6436, "step": 497 }, { "epoch": 1.45, "learning_rate": 8.221574344023323e-06, "loss": 0.7147, "step": 498 }, { "epoch": 1.45, "learning_rate": 8.177842565597668e-06, "loss": 0.6923, "step": 499 }, { "epoch": 1.46, "learning_rate": 8.134110787172012e-06, "loss": 0.6701, "step": 500 }, { "epoch": 1.46, "learning_rate": 8.090379008746356e-06, "loss": 0.7177, "step": 501 }, { "epoch": 1.46, "learning_rate": 8.0466472303207e-06, "loss": 0.6963, "step": 502 }, { "epoch": 1.47, "learning_rate": 8.002915451895044e-06, "loss": 0.7587, "step": 503 }, { "epoch": 1.47, "learning_rate": 7.959183673469388e-06, "loss": 0.5824, "step": 504 }, { "epoch": 1.47, "learning_rate": 7.915451895043732e-06, "loss": 0.7099, "step": 505 }, { "epoch": 1.48, "learning_rate": 7.871720116618076e-06, "loss": 0.6534, "step": 506 }, { "epoch": 1.48, "learning_rate": 7.82798833819242e-06, "loss": 0.6332, "step": 507 }, { "epoch": 1.48, "learning_rate": 7.784256559766764e-06, "loss": 0.655, "step": 508 }, { "epoch": 1.48, "learning_rate": 7.740524781341108e-06, "loss": 0.6822, "step": 509 }, { "epoch": 1.49, "learning_rate": 7.696793002915452e-06, "loss": 0.5835, "step": 510 }, { "epoch": 1.49, "learning_rate": 7.653061224489796e-06, "loss": 0.7233, "step": 511 }, { "epoch": 1.49, "learning_rate": 7.60932944606414e-06, "loss": 0.6536, "step": 512 }, { "epoch": 1.5, "learning_rate": 7.565597667638485e-06, "loss": 0.6018, "step": 513 }, { "epoch": 1.5, "learning_rate": 7.521865889212829e-06, "loss": 0.6019, "step": 514 }, { "epoch": 1.5, "learning_rate": 7.478134110787172e-06, "loss": 0.7141, "step": 515 }, { "epoch": 1.5, "learning_rate": 7.434402332361516e-06, "loss": 0.7671, "step": 516 }, { "epoch": 1.51, "learning_rate": 7.39067055393586e-06, "loss": 0.6633, "step": 517 }, { "epoch": 1.51, "learning_rate": 7.346938775510204e-06, "loss": 0.5977, "step": 518 }, { "epoch": 1.51, "learning_rate": 7.303206997084548e-06, "loss": 0.7577, "step": 519 }, { "epoch": 1.52, "learning_rate": 7.259475218658893e-06, "loss": 0.7126, "step": 520 }, { "epoch": 1.52, "learning_rate": 7.215743440233237e-06, "loss": 0.6572, "step": 521 }, { "epoch": 1.52, "learning_rate": 7.17201166180758e-06, "loss": 0.6463, "step": 522 }, { "epoch": 1.52, "learning_rate": 7.128279883381925e-06, "loss": 0.565, "step": 523 }, { "epoch": 1.53, "learning_rate": 7.0845481049562685e-06, "loss": 0.6441, "step": 524 }, { "epoch": 1.53, "learning_rate": 7.0408163265306125e-06, "loss": 0.8821, "step": 525 }, { "epoch": 1.53, "learning_rate": 6.9970845481049564e-06, "loss": 0.6033, "step": 526 }, { "epoch": 1.54, "learning_rate": 6.9533527696793e-06, "loss": 0.6548, "step": 527 }, { "epoch": 1.54, "learning_rate": 6.909620991253644e-06, "loss": 0.6689, "step": 528 }, { "epoch": 1.54, "learning_rate": 6.865889212827989e-06, "loss": 0.6853, "step": 529 }, { "epoch": 1.55, "learning_rate": 6.822157434402332e-06, "loss": 0.5613, "step": 530 }, { "epoch": 1.55, "learning_rate": 6.778425655976676e-06, "loss": 0.7409, "step": 531 }, { "epoch": 1.55, "learning_rate": 6.734693877551021e-06, "loss": 0.644, "step": 532 }, { "epoch": 1.55, "learning_rate": 6.690962099125365e-06, "loss": 0.5878, "step": 533 }, { "epoch": 1.56, "learning_rate": 6.647230320699709e-06, "loss": 0.6137, "step": 534 }, { "epoch": 1.56, "learning_rate": 6.603498542274052e-06, "loss": 0.7217, "step": 535 }, { "epoch": 1.56, "learning_rate": 6.559766763848397e-06, "loss": 0.7005, "step": 536 }, { "epoch": 1.57, "learning_rate": 6.516034985422741e-06, "loss": 0.7718, "step": 537 }, { "epoch": 1.57, "learning_rate": 6.472303206997085e-06, "loss": 0.6834, "step": 538 }, { "epoch": 1.57, "learning_rate": 6.428571428571429e-06, "loss": 0.6291, "step": 539 }, { "epoch": 1.57, "learning_rate": 6.3848396501457726e-06, "loss": 0.6906, "step": 540 }, { "epoch": 1.58, "learning_rate": 6.3411078717201165e-06, "loss": 0.7809, "step": 541 }, { "epoch": 1.58, "learning_rate": 6.297376093294461e-06, "loss": 0.661, "step": 542 }, { "epoch": 1.58, "learning_rate": 6.253644314868805e-06, "loss": 0.6932, "step": 543 }, { "epoch": 1.59, "learning_rate": 6.209912536443148e-06, "loss": 0.7602, "step": 544 }, { "epoch": 1.59, "learning_rate": 6.166180758017493e-06, "loss": 0.7317, "step": 545 }, { "epoch": 1.59, "learning_rate": 6.122448979591837e-06, "loss": 0.7391, "step": 546 }, { "epoch": 1.59, "learning_rate": 6.078717201166181e-06, "loss": 0.6777, "step": 547 }, { "epoch": 1.6, "learning_rate": 6.034985422740525e-06, "loss": 0.7403, "step": 548 }, { "epoch": 1.6, "learning_rate": 5.991253644314869e-06, "loss": 0.6889, "step": 549 }, { "epoch": 1.6, "learning_rate": 5.947521865889213e-06, "loss": 0.655, "step": 550 }, { "epoch": 1.61, "learning_rate": 5.903790087463557e-06, "loss": 0.5358, "step": 551 }, { "epoch": 1.61, "learning_rate": 5.860058309037901e-06, "loss": 0.6668, "step": 552 }, { "epoch": 1.61, "learning_rate": 5.816326530612245e-06, "loss": 0.5347, "step": 553 }, { "epoch": 1.62, "learning_rate": 5.772594752186589e-06, "loss": 0.5213, "step": 554 }, { "epoch": 1.62, "learning_rate": 5.7288629737609335e-06, "loss": 0.7983, "step": 555 }, { "epoch": 1.62, "learning_rate": 5.6851311953352774e-06, "loss": 0.7329, "step": 556 }, { "epoch": 1.62, "learning_rate": 5.6413994169096205e-06, "loss": 0.6845, "step": 557 }, { "epoch": 1.63, "learning_rate": 5.597667638483965e-06, "loss": 0.54, "step": 558 }, { "epoch": 1.63, "learning_rate": 5.553935860058309e-06, "loss": 0.5828, "step": 559 }, { "epoch": 1.63, "learning_rate": 5.510204081632653e-06, "loss": 0.7079, "step": 560 }, { "epoch": 1.64, "learning_rate": 5.466472303206997e-06, "loss": 0.6471, "step": 561 }, { "epoch": 1.64, "learning_rate": 5.422740524781341e-06, "loss": 0.7349, "step": 562 }, { "epoch": 1.64, "learning_rate": 5.379008746355685e-06, "loss": 0.6399, "step": 563 }, { "epoch": 1.64, "learning_rate": 5.33527696793003e-06, "loss": 0.7541, "step": 564 }, { "epoch": 1.65, "learning_rate": 5.291545189504374e-06, "loss": 0.6564, "step": 565 }, { "epoch": 1.65, "learning_rate": 5.247813411078717e-06, "loss": 0.5897, "step": 566 }, { "epoch": 1.65, "learning_rate": 5.204081632653062e-06, "loss": 0.7713, "step": 567 }, { "epoch": 1.66, "learning_rate": 5.160349854227406e-06, "loss": 0.5714, "step": 568 }, { "epoch": 1.66, "learning_rate": 5.11661807580175e-06, "loss": 0.666, "step": 569 }, { "epoch": 1.66, "learning_rate": 5.072886297376093e-06, "loss": 0.6647, "step": 570 }, { "epoch": 1.66, "learning_rate": 5.0291545189504375e-06, "loss": 0.7072, "step": 571 }, { "epoch": 1.67, "learning_rate": 4.9854227405247814e-06, "loss": 0.7326, "step": 572 }, { "epoch": 1.67, "learning_rate": 4.941690962099125e-06, "loss": 0.6275, "step": 573 }, { "epoch": 1.67, "learning_rate": 4.897959183673469e-06, "loss": 0.6175, "step": 574 }, { "epoch": 1.68, "learning_rate": 4.854227405247813e-06, "loss": 0.6715, "step": 575 }, { "epoch": 1.68, "learning_rate": 4.810495626822157e-06, "loss": 0.69, "step": 576 }, { "epoch": 1.68, "learning_rate": 4.766763848396502e-06, "loss": 0.6194, "step": 577 }, { "epoch": 1.69, "learning_rate": 4.723032069970846e-06, "loss": 0.7825, "step": 578 }, { "epoch": 1.69, "learning_rate": 4.679300291545189e-06, "loss": 0.5966, "step": 579 }, { "epoch": 1.69, "learning_rate": 4.635568513119534e-06, "loss": 0.593, "step": 580 }, { "epoch": 1.69, "learning_rate": 4.591836734693878e-06, "loss": 0.8133, "step": 581 }, { "epoch": 1.7, "learning_rate": 4.548104956268222e-06, "loss": 0.665, "step": 582 }, { "epoch": 1.7, "learning_rate": 4.504373177842566e-06, "loss": 0.6998, "step": 583 }, { "epoch": 1.7, "learning_rate": 4.46064139941691e-06, "loss": 0.6609, "step": 584 }, { "epoch": 1.71, "learning_rate": 4.416909620991254e-06, "loss": 0.5941, "step": 585 }, { "epoch": 1.71, "learning_rate": 4.373177842565598e-06, "loss": 0.6983, "step": 586 }, { "epoch": 1.71, "learning_rate": 4.329446064139942e-06, "loss": 0.5953, "step": 587 }, { "epoch": 1.71, "learning_rate": 4.2857142857142855e-06, "loss": 0.7267, "step": 588 }, { "epoch": 1.72, "learning_rate": 4.241982507288629e-06, "loss": 0.791, "step": 589 }, { "epoch": 1.72, "learning_rate": 4.198250728862974e-06, "loss": 0.6921, "step": 590 }, { "epoch": 1.72, "learning_rate": 4.154518950437318e-06, "loss": 0.8183, "step": 591 }, { "epoch": 1.73, "learning_rate": 4.110787172011661e-06, "loss": 0.6162, "step": 592 }, { "epoch": 1.73, "learning_rate": 4.067055393586006e-06, "loss": 0.7695, "step": 593 }, { "epoch": 1.73, "learning_rate": 4.02332361516035e-06, "loss": 0.6925, "step": 594 }, { "epoch": 1.73, "learning_rate": 3.979591836734694e-06, "loss": 0.6336, "step": 595 }, { "epoch": 1.74, "learning_rate": 3.935860058309038e-06, "loss": 0.7375, "step": 596 }, { "epoch": 1.74, "learning_rate": 3.892128279883382e-06, "loss": 0.8469, "step": 597 }, { "epoch": 1.74, "learning_rate": 3.848396501457726e-06, "loss": 0.635, "step": 598 }, { "epoch": 1.75, "learning_rate": 3.80466472303207e-06, "loss": 0.9703, "step": 599 }, { "epoch": 1.75, "learning_rate": 3.7609329446064145e-06, "loss": 0.653, "step": 600 }, { "epoch": 1.75, "learning_rate": 3.717201166180758e-06, "loss": 0.6354, "step": 601 }, { "epoch": 1.76, "learning_rate": 3.673469387755102e-06, "loss": 0.6025, "step": 602 }, { "epoch": 1.76, "learning_rate": 3.6297376093294464e-06, "loss": 0.6416, "step": 603 }, { "epoch": 1.76, "learning_rate": 3.58600583090379e-06, "loss": 0.6034, "step": 604 }, { "epoch": 1.76, "learning_rate": 3.5422740524781343e-06, "loss": 0.6376, "step": 605 }, { "epoch": 1.77, "learning_rate": 3.4985422740524782e-06, "loss": 0.6776, "step": 606 }, { "epoch": 1.77, "learning_rate": 3.454810495626822e-06, "loss": 0.6841, "step": 607 }, { "epoch": 1.77, "learning_rate": 3.411078717201166e-06, "loss": 0.641, "step": 608 }, { "epoch": 1.78, "learning_rate": 3.3673469387755105e-06, "loss": 0.76, "step": 609 }, { "epoch": 1.78, "learning_rate": 3.3236151603498544e-06, "loss": 0.7558, "step": 610 }, { "epoch": 1.78, "learning_rate": 3.2798833819241984e-06, "loss": 0.6344, "step": 611 }, { "epoch": 1.78, "learning_rate": 3.2361516034985423e-06, "loss": 0.6477, "step": 612 }, { "epoch": 1.79, "learning_rate": 3.1924198250728863e-06, "loss": 0.5704, "step": 613 }, { "epoch": 1.79, "learning_rate": 3.1486880466472307e-06, "loss": 0.7345, "step": 614 }, { "epoch": 1.79, "learning_rate": 3.104956268221574e-06, "loss": 0.6538, "step": 615 }, { "epoch": 1.8, "learning_rate": 3.0612244897959185e-06, "loss": 0.6457, "step": 616 }, { "epoch": 1.8, "learning_rate": 3.0174927113702625e-06, "loss": 0.6632, "step": 617 }, { "epoch": 1.8, "learning_rate": 2.9737609329446064e-06, "loss": 0.5877, "step": 618 }, { "epoch": 1.8, "learning_rate": 2.9300291545189504e-06, "loss": 0.7263, "step": 619 }, { "epoch": 1.81, "learning_rate": 2.8862973760932943e-06, "loss": 0.7151, "step": 620 }, { "epoch": 1.81, "learning_rate": 2.8425655976676387e-06, "loss": 0.7417, "step": 621 }, { "epoch": 1.81, "learning_rate": 2.7988338192419827e-06, "loss": 0.7464, "step": 622 }, { "epoch": 1.82, "learning_rate": 2.7551020408163266e-06, "loss": 0.6743, "step": 623 }, { "epoch": 1.82, "learning_rate": 2.7113702623906706e-06, "loss": 0.6379, "step": 624 }, { "epoch": 1.82, "learning_rate": 2.667638483965015e-06, "loss": 0.6686, "step": 625 }, { "epoch": 1.83, "learning_rate": 2.6239067055393585e-06, "loss": 0.6287, "step": 626 }, { "epoch": 1.83, "learning_rate": 2.580174927113703e-06, "loss": 0.8058, "step": 627 }, { "epoch": 1.83, "learning_rate": 2.5364431486880463e-06, "loss": 0.6915, "step": 628 }, { "epoch": 1.83, "learning_rate": 2.4927113702623907e-06, "loss": 0.6038, "step": 629 }, { "epoch": 1.84, "learning_rate": 2.4489795918367347e-06, "loss": 0.6473, "step": 630 }, { "epoch": 1.84, "learning_rate": 2.4052478134110786e-06, "loss": 0.6237, "step": 631 }, { "epoch": 1.84, "learning_rate": 2.361516034985423e-06, "loss": 0.8068, "step": 632 }, { "epoch": 1.85, "learning_rate": 2.317784256559767e-06, "loss": 0.6403, "step": 633 }, { "epoch": 1.85, "learning_rate": 2.274052478134111e-06, "loss": 0.6132, "step": 634 }, { "epoch": 1.85, "learning_rate": 2.230320699708455e-06, "loss": 0.8262, "step": 635 }, { "epoch": 1.85, "learning_rate": 2.186588921282799e-06, "loss": 0.7788, "step": 636 }, { "epoch": 1.86, "learning_rate": 2.1428571428571427e-06, "loss": 0.6717, "step": 637 }, { "epoch": 1.86, "learning_rate": 2.099125364431487e-06, "loss": 0.7498, "step": 638 }, { "epoch": 1.86, "learning_rate": 2.0553935860058306e-06, "loss": 0.6071, "step": 639 }, { "epoch": 1.87, "learning_rate": 2.011661807580175e-06, "loss": 0.6848, "step": 640 }, { "epoch": 1.87, "learning_rate": 1.967930029154519e-06, "loss": 0.784, "step": 641 }, { "epoch": 1.87, "learning_rate": 1.924198250728863e-06, "loss": 0.6056, "step": 642 }, { "epoch": 1.87, "learning_rate": 1.8804664723032073e-06, "loss": 0.7672, "step": 643 }, { "epoch": 1.88, "learning_rate": 1.836734693877551e-06, "loss": 0.7579, "step": 644 }, { "epoch": 1.88, "learning_rate": 1.793002915451895e-06, "loss": 0.6712, "step": 645 }, { "epoch": 1.88, "learning_rate": 1.7492711370262391e-06, "loss": 0.758, "step": 646 }, { "epoch": 1.89, "learning_rate": 1.705539358600583e-06, "loss": 0.6262, "step": 647 }, { "epoch": 1.89, "learning_rate": 1.6618075801749272e-06, "loss": 0.5719, "step": 648 }, { "epoch": 1.89, "learning_rate": 1.6180758017492712e-06, "loss": 0.6228, "step": 649 }, { "epoch": 1.9, "learning_rate": 1.5743440233236153e-06, "loss": 0.5954, "step": 650 }, { "epoch": 1.9, "learning_rate": 1.5306122448979593e-06, "loss": 0.6157, "step": 651 }, { "epoch": 1.9, "learning_rate": 1.4868804664723032e-06, "loss": 0.6907, "step": 652 }, { "epoch": 1.9, "learning_rate": 1.4431486880466472e-06, "loss": 0.5488, "step": 653 }, { "epoch": 1.91, "learning_rate": 1.3994169096209913e-06, "loss": 0.5904, "step": 654 }, { "epoch": 1.91, "learning_rate": 1.3556851311953353e-06, "loss": 0.608, "step": 655 }, { "epoch": 1.91, "learning_rate": 1.3119533527696792e-06, "loss": 0.8896, "step": 656 }, { "epoch": 1.92, "learning_rate": 1.2682215743440232e-06, "loss": 0.6197, "step": 657 }, { "epoch": 1.92, "learning_rate": 1.2244897959183673e-06, "loss": 0.6063, "step": 658 }, { "epoch": 1.92, "learning_rate": 1.1807580174927115e-06, "loss": 0.7669, "step": 659 }, { "epoch": 1.92, "learning_rate": 1.1370262390670554e-06, "loss": 0.8527, "step": 660 }, { "epoch": 1.93, "learning_rate": 1.0932944606413996e-06, "loss": 0.6512, "step": 661 }, { "epoch": 1.93, "learning_rate": 1.0495626822157436e-06, "loss": 0.6591, "step": 662 }, { "epoch": 1.93, "learning_rate": 1.0058309037900875e-06, "loss": 0.6701, "step": 663 }, { "epoch": 1.94, "learning_rate": 9.620991253644314e-07, "loss": 0.598, "step": 664 }, { "epoch": 1.94, "learning_rate": 9.183673469387755e-07, "loss": 0.7497, "step": 665 }, { "epoch": 1.94, "learning_rate": 8.746355685131196e-07, "loss": 0.6544, "step": 666 }, { "epoch": 1.94, "learning_rate": 8.309037900874636e-07, "loss": 0.7283, "step": 667 }, { "epoch": 1.95, "learning_rate": 7.871720116618077e-07, "loss": 0.7193, "step": 668 }, { "epoch": 1.95, "learning_rate": 7.434402332361516e-07, "loss": 0.6521, "step": 669 }, { "epoch": 1.95, "learning_rate": 6.997084548104957e-07, "loss": 0.5521, "step": 670 }, { "epoch": 1.96, "learning_rate": 6.559766763848396e-07, "loss": 0.581, "step": 671 }, { "epoch": 1.96, "learning_rate": 6.122448979591837e-07, "loss": 0.6203, "step": 672 }, { "epoch": 1.96, "learning_rate": 5.685131195335277e-07, "loss": 0.5413, "step": 673 }, { "epoch": 1.97, "learning_rate": 5.247813411078718e-07, "loss": 0.7687, "step": 674 }, { "epoch": 1.97, "learning_rate": 4.810495626822157e-07, "loss": 0.6807, "step": 675 }, { "epoch": 1.97, "learning_rate": 4.373177842565598e-07, "loss": 0.6888, "step": 676 }, { "epoch": 1.97, "learning_rate": 3.9358600583090383e-07, "loss": 0.59, "step": 677 }, { "epoch": 1.98, "learning_rate": 3.4985422740524783e-07, "loss": 0.617, "step": 678 }, { "epoch": 1.98, "learning_rate": 3.0612244897959183e-07, "loss": 0.6244, "step": 679 }, { "epoch": 1.98, "learning_rate": 2.623906705539359e-07, "loss": 0.6787, "step": 680 }, { "epoch": 1.99, "learning_rate": 2.186588921282799e-07, "loss": 0.7535, "step": 681 }, { "epoch": 1.99, "learning_rate": 1.7492711370262392e-07, "loss": 0.649, "step": 682 }, { "epoch": 1.99, "learning_rate": 1.3119533527696794e-07, "loss": 0.6996, "step": 683 }, { "epoch": 1.99, "learning_rate": 8.746355685131196e-08, "loss": 0.7669, "step": 684 }, { "epoch": 2.0, "learning_rate": 4.373177842565598e-08, "loss": 0.7109, "step": 685 }, { "epoch": 2.0, "learning_rate": 0.0, "loss": 0.7227, "step": 686 }, { "epoch": 2.0, "step": 686, "total_flos": 1.932278221504512e+17, "train_loss": 0.8650754356088861, "train_runtime": 3796.8514, "train_samples_per_second": 46.143, "train_steps_per_second": 0.181 } ], "max_steps": 686, "num_train_epochs": 2, "total_flos": 1.932278221504512e+17, "trial_name": null, "trial_params": null }