{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "global_step": 16416, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.113590263691684e-08, "loss": 1.2614, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.6227180527383367e-07, "loss": 0.5289, "step": 4 }, { "epoch": 0.0, "learning_rate": 2.434077079107505e-07, "loss": 0.4145, "step": 6 }, { "epoch": 0.0, "learning_rate": 3.2454361054766735e-07, "loss": 0.4136, "step": 8 }, { "epoch": 0.0, "learning_rate": 4.056795131845842e-07, "loss": 0.414, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.86815415821501e-07, "loss": 0.4016, "step": 12 }, { "epoch": 0.0, "learning_rate": 5.679513184584178e-07, "loss": 0.4036, "step": 14 }, { "epoch": 0.0, "learning_rate": 6.490872210953347e-07, "loss": 0.3919, "step": 16 }, { "epoch": 0.0, "learning_rate": 7.302231237322515e-07, "loss": 0.3831, "step": 18 }, { "epoch": 0.0, "learning_rate": 8.113590263691684e-07, "loss": 0.3727, "step": 20 }, { "epoch": 0.01, "learning_rate": 8.924949290060852e-07, "loss": 0.3496, "step": 22 }, { "epoch": 0.01, "learning_rate": 9.73630831643002e-07, "loss": 0.3871, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.0547667342799188e-06, "loss": 0.3724, "step": 26 }, { "epoch": 0.01, "learning_rate": 1.1359026369168357e-06, "loss": 0.3791, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.2170385395537525e-06, "loss": 0.3617, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.2981744421906694e-06, "loss": 0.3574, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.3793103448275862e-06, "loss": 0.3616, "step": 34 }, { "epoch": 0.01, "learning_rate": 1.460446247464503e-06, "loss": 0.3585, "step": 36 }, { "epoch": 0.01, "learning_rate": 1.54158215010142e-06, "loss": 0.3509, "step": 38 }, { "epoch": 0.01, "learning_rate": 1.6227180527383368e-06, "loss": 0.3897, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.7038539553752536e-06, "loss": 0.3582, "step": 42 }, { "epoch": 0.01, "learning_rate": 1.7849898580121705e-06, "loss": 0.3566, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.8661257606490873e-06, "loss": 0.3368, "step": 46 }, { "epoch": 0.01, "learning_rate": 1.947261663286004e-06, "loss": 0.3712, "step": 48 }, { "epoch": 0.01, "learning_rate": 2.028397565922921e-06, "loss": 0.3633, "step": 50 }, { "epoch": 0.01, "learning_rate": 2.1095334685598377e-06, "loss": 0.3518, "step": 52 }, { "epoch": 0.01, "learning_rate": 2.1906693711967548e-06, "loss": 0.3629, "step": 54 }, { "epoch": 0.01, "learning_rate": 2.2718052738336714e-06, "loss": 0.3377, "step": 56 }, { "epoch": 0.01, "learning_rate": 2.3529411764705885e-06, "loss": 0.3539, "step": 58 }, { "epoch": 0.01, "learning_rate": 2.434077079107505e-06, "loss": 0.3736, "step": 60 }, { "epoch": 0.02, "learning_rate": 2.515212981744422e-06, "loss": 0.3617, "step": 62 }, { "epoch": 0.02, "learning_rate": 2.596348884381339e-06, "loss": 0.3517, "step": 64 }, { "epoch": 0.02, "learning_rate": 2.677484787018256e-06, "loss": 0.3694, "step": 66 }, { "epoch": 0.02, "learning_rate": 2.7586206896551725e-06, "loss": 0.3483, "step": 68 }, { "epoch": 0.02, "learning_rate": 2.8397565922920896e-06, "loss": 0.3322, "step": 70 }, { "epoch": 0.02, "learning_rate": 2.920892494929006e-06, "loss": 0.3295, "step": 72 }, { "epoch": 0.02, "learning_rate": 3.0020283975659233e-06, "loss": 0.3514, "step": 74 }, { "epoch": 0.02, "learning_rate": 3.08316430020284e-06, "loss": 0.3366, "step": 76 }, { "epoch": 0.02, "learning_rate": 3.164300202839757e-06, "loss": 0.3086, "step": 78 }, { "epoch": 0.02, "learning_rate": 3.2454361054766736e-06, "loss": 0.3452, "step": 80 }, { "epoch": 0.02, "learning_rate": 3.3265720081135907e-06, "loss": 0.2971, "step": 82 }, { "epoch": 0.02, "learning_rate": 3.4077079107505073e-06, "loss": 0.3384, "step": 84 }, { "epoch": 0.02, "learning_rate": 3.4888438133874244e-06, "loss": 0.3428, "step": 86 }, { "epoch": 0.02, "learning_rate": 3.569979716024341e-06, "loss": 0.3635, "step": 88 }, { "epoch": 0.02, "learning_rate": 3.651115618661258e-06, "loss": 0.3625, "step": 90 }, { "epoch": 0.02, "learning_rate": 3.7322515212981747e-06, "loss": 0.3502, "step": 92 }, { "epoch": 0.02, "learning_rate": 3.8133874239350913e-06, "loss": 0.3678, "step": 94 }, { "epoch": 0.02, "learning_rate": 3.894523326572008e-06, "loss": 0.3365, "step": 96 }, { "epoch": 0.02, "learning_rate": 3.975659229208925e-06, "loss": 0.3364, "step": 98 }, { "epoch": 0.02, "learning_rate": 4.056795131845842e-06, "loss": 0.3526, "step": 100 }, { "epoch": 0.02, "learning_rate": 4.137931034482759e-06, "loss": 0.3411, "step": 102 }, { "epoch": 0.03, "learning_rate": 4.219066937119675e-06, "loss": 0.3537, "step": 104 }, { "epoch": 0.03, "learning_rate": 4.3002028397565924e-06, "loss": 0.3394, "step": 106 }, { "epoch": 0.03, "learning_rate": 4.3813387423935095e-06, "loss": 0.3474, "step": 108 }, { "epoch": 0.03, "learning_rate": 4.4624746450304266e-06, "loss": 0.3248, "step": 110 }, { "epoch": 0.03, "learning_rate": 4.543610547667343e-06, "loss": 0.3571, "step": 112 }, { "epoch": 0.03, "learning_rate": 4.62474645030426e-06, "loss": 0.3544, "step": 114 }, { "epoch": 0.03, "learning_rate": 4.705882352941177e-06, "loss": 0.3519, "step": 116 }, { "epoch": 0.03, "learning_rate": 4.787018255578094e-06, "loss": 0.3528, "step": 118 }, { "epoch": 0.03, "learning_rate": 4.86815415821501e-06, "loss": 0.3595, "step": 120 }, { "epoch": 0.03, "learning_rate": 4.949290060851927e-06, "loss": 0.3573, "step": 122 }, { "epoch": 0.03, "learning_rate": 5.030425963488844e-06, "loss": 0.3196, "step": 124 }, { "epoch": 0.03, "learning_rate": 5.111561866125761e-06, "loss": 0.3401, "step": 126 }, { "epoch": 0.03, "learning_rate": 5.192697768762678e-06, "loss": 0.3253, "step": 128 }, { "epoch": 0.03, "learning_rate": 5.2738336713995955e-06, "loss": 0.3246, "step": 130 }, { "epoch": 0.03, "learning_rate": 5.354969574036512e-06, "loss": 0.3478, "step": 132 }, { "epoch": 0.03, "learning_rate": 5.436105476673429e-06, "loss": 0.3498, "step": 134 }, { "epoch": 0.03, "learning_rate": 5.517241379310345e-06, "loss": 0.3404, "step": 136 }, { "epoch": 0.03, "learning_rate": 5.598377281947263e-06, "loss": 0.3578, "step": 138 }, { "epoch": 0.03, "learning_rate": 5.679513184584179e-06, "loss": 0.3445, "step": 140 }, { "epoch": 0.03, "learning_rate": 5.760649087221096e-06, "loss": 0.3386, "step": 142 }, { "epoch": 0.04, "learning_rate": 5.841784989858012e-06, "loss": 0.3562, "step": 144 }, { "epoch": 0.04, "learning_rate": 5.92292089249493e-06, "loss": 0.3339, "step": 146 }, { "epoch": 0.04, "learning_rate": 6.0040567951318465e-06, "loss": 0.3415, "step": 148 }, { "epoch": 0.04, "learning_rate": 6.0851926977687636e-06, "loss": 0.3352, "step": 150 }, { "epoch": 0.04, "learning_rate": 6.16632860040568e-06, "loss": 0.3339, "step": 152 }, { "epoch": 0.04, "learning_rate": 6.247464503042598e-06, "loss": 0.3511, "step": 154 }, { "epoch": 0.04, "learning_rate": 6.328600405679514e-06, "loss": 0.3339, "step": 156 }, { "epoch": 0.04, "learning_rate": 6.409736308316431e-06, "loss": 0.312, "step": 158 }, { "epoch": 0.04, "learning_rate": 6.490872210953347e-06, "loss": 0.3313, "step": 160 }, { "epoch": 0.04, "learning_rate": 6.572008113590265e-06, "loss": 0.3529, "step": 162 }, { "epoch": 0.04, "learning_rate": 6.653144016227181e-06, "loss": 0.3632, "step": 164 }, { "epoch": 0.04, "learning_rate": 6.734279918864098e-06, "loss": 0.3218, "step": 166 }, { "epoch": 0.04, "learning_rate": 6.815415821501015e-06, "loss": 0.3444, "step": 168 }, { "epoch": 0.04, "learning_rate": 6.896551724137932e-06, "loss": 0.3473, "step": 170 }, { "epoch": 0.04, "learning_rate": 6.977687626774849e-06, "loss": 0.3323, "step": 172 }, { "epoch": 0.04, "learning_rate": 7.058823529411766e-06, "loss": 0.3382, "step": 174 }, { "epoch": 0.04, "learning_rate": 7.139959432048682e-06, "loss": 0.324, "step": 176 }, { "epoch": 0.04, "learning_rate": 7.221095334685599e-06, "loss": 0.3314, "step": 178 }, { "epoch": 0.04, "learning_rate": 7.302231237322516e-06, "loss": 0.3148, "step": 180 }, { "epoch": 0.04, "learning_rate": 7.383367139959433e-06, "loss": 0.3384, "step": 182 }, { "epoch": 0.04, "learning_rate": 7.464503042596349e-06, "loss": 0.3375, "step": 184 }, { "epoch": 0.05, "learning_rate": 7.5456389452332665e-06, "loss": 0.3501, "step": 186 }, { "epoch": 0.05, "learning_rate": 7.626774847870183e-06, "loss": 0.3491, "step": 188 }, { "epoch": 0.05, "learning_rate": 7.7079107505071e-06, "loss": 0.353, "step": 190 }, { "epoch": 0.05, "learning_rate": 7.789046653144016e-06, "loss": 0.3278, "step": 192 }, { "epoch": 0.05, "learning_rate": 7.870182555780935e-06, "loss": 0.3406, "step": 194 }, { "epoch": 0.05, "learning_rate": 7.95131845841785e-06, "loss": 0.3584, "step": 196 }, { "epoch": 0.05, "learning_rate": 8.032454361054767e-06, "loss": 0.3401, "step": 198 }, { "epoch": 0.05, "learning_rate": 8.113590263691684e-06, "loss": 0.3356, "step": 200 }, { "epoch": 0.05, "learning_rate": 8.194726166328601e-06, "loss": 0.3439, "step": 202 }, { "epoch": 0.05, "learning_rate": 8.275862068965518e-06, "loss": 0.3372, "step": 204 }, { "epoch": 0.05, "learning_rate": 8.356997971602435e-06, "loss": 0.3373, "step": 206 }, { "epoch": 0.05, "learning_rate": 8.43813387423935e-06, "loss": 0.3266, "step": 208 }, { "epoch": 0.05, "learning_rate": 8.51926977687627e-06, "loss": 0.3466, "step": 210 }, { "epoch": 0.05, "learning_rate": 8.600405679513185e-06, "loss": 0.3455, "step": 212 }, { "epoch": 0.05, "learning_rate": 8.681541582150102e-06, "loss": 0.3269, "step": 214 }, { "epoch": 0.05, "learning_rate": 8.762677484787019e-06, "loss": 0.3052, "step": 216 }, { "epoch": 0.05, "learning_rate": 8.843813387423936e-06, "loss": 0.3405, "step": 218 }, { "epoch": 0.05, "learning_rate": 8.924949290060853e-06, "loss": 0.3277, "step": 220 }, { "epoch": 0.05, "learning_rate": 9.00608519269777e-06, "loss": 0.3151, "step": 222 }, { "epoch": 0.05, "learning_rate": 9.087221095334686e-06, "loss": 0.3159, "step": 224 }, { "epoch": 0.06, "learning_rate": 9.168356997971604e-06, "loss": 0.3493, "step": 226 }, { "epoch": 0.06, "learning_rate": 9.24949290060852e-06, "loss": 0.3403, "step": 228 }, { "epoch": 0.06, "learning_rate": 9.330628803245437e-06, "loss": 0.3386, "step": 230 }, { "epoch": 0.06, "learning_rate": 9.411764705882354e-06, "loss": 0.3393, "step": 232 }, { "epoch": 0.06, "learning_rate": 9.492900608519271e-06, "loss": 0.3426, "step": 234 }, { "epoch": 0.06, "learning_rate": 9.574036511156188e-06, "loss": 0.3531, "step": 236 }, { "epoch": 0.06, "learning_rate": 9.655172413793105e-06, "loss": 0.351, "step": 238 }, { "epoch": 0.06, "learning_rate": 9.73630831643002e-06, "loss": 0.3543, "step": 240 }, { "epoch": 0.06, "learning_rate": 9.817444219066939e-06, "loss": 0.3378, "step": 242 }, { "epoch": 0.06, "learning_rate": 9.898580121703854e-06, "loss": 0.3288, "step": 244 }, { "epoch": 0.06, "learning_rate": 9.979716024340772e-06, "loss": 0.3512, "step": 246 }, { "epoch": 0.06, "learning_rate": 1.0060851926977689e-05, "loss": 0.3391, "step": 248 }, { "epoch": 0.06, "learning_rate": 1.0141987829614606e-05, "loss": 0.3461, "step": 250 }, { "epoch": 0.06, "learning_rate": 1.0223123732251523e-05, "loss": 0.3246, "step": 252 }, { "epoch": 0.06, "learning_rate": 1.0304259634888438e-05, "loss": 0.325, "step": 254 }, { "epoch": 0.06, "learning_rate": 1.0385395537525355e-05, "loss": 0.3457, "step": 256 }, { "epoch": 0.06, "learning_rate": 1.0466531440162272e-05, "loss": 0.3072, "step": 258 }, { "epoch": 0.06, "learning_rate": 1.0547667342799191e-05, "loss": 0.3352, "step": 260 }, { "epoch": 0.06, "learning_rate": 1.0628803245436106e-05, "loss": 0.3282, "step": 262 }, { "epoch": 0.06, "learning_rate": 1.0709939148073023e-05, "loss": 0.3274, "step": 264 }, { "epoch": 0.06, "learning_rate": 1.079107505070994e-05, "loss": 0.3467, "step": 266 }, { "epoch": 0.07, "learning_rate": 1.0872210953346858e-05, "loss": 0.3319, "step": 268 }, { "epoch": 0.07, "learning_rate": 1.0953346855983773e-05, "loss": 0.3206, "step": 270 }, { "epoch": 0.07, "learning_rate": 1.103448275862069e-05, "loss": 0.3468, "step": 272 }, { "epoch": 0.07, "learning_rate": 1.1115618661257607e-05, "loss": 0.3423, "step": 274 }, { "epoch": 0.07, "learning_rate": 1.1196754563894526e-05, "loss": 0.3467, "step": 276 }, { "epoch": 0.07, "learning_rate": 1.1277890466531441e-05, "loss": 0.3367, "step": 278 }, { "epoch": 0.07, "learning_rate": 1.1359026369168358e-05, "loss": 0.3399, "step": 280 }, { "epoch": 0.07, "learning_rate": 1.1440162271805275e-05, "loss": 0.33, "step": 282 }, { "epoch": 0.07, "learning_rate": 1.1521298174442192e-05, "loss": 0.3267, "step": 284 }, { "epoch": 0.07, "learning_rate": 1.1602434077079108e-05, "loss": 0.3496, "step": 286 }, { "epoch": 0.07, "learning_rate": 1.1683569979716025e-05, "loss": 0.3487, "step": 288 }, { "epoch": 0.07, "learning_rate": 1.1764705882352942e-05, "loss": 0.3477, "step": 290 }, { "epoch": 0.07, "learning_rate": 1.184584178498986e-05, "loss": 0.3298, "step": 292 }, { "epoch": 0.07, "learning_rate": 1.1926977687626774e-05, "loss": 0.3296, "step": 294 }, { "epoch": 0.07, "learning_rate": 1.2008113590263693e-05, "loss": 0.3371, "step": 296 }, { "epoch": 0.07, "learning_rate": 1.208924949290061e-05, "loss": 0.3444, "step": 298 }, { "epoch": 0.07, "learning_rate": 1.2170385395537527e-05, "loss": 0.3382, "step": 300 }, { "epoch": 0.07, "learning_rate": 1.2251521298174443e-05, "loss": 0.3259, "step": 302 }, { "epoch": 0.07, "learning_rate": 1.233265720081136e-05, "loss": 0.3469, "step": 304 }, { "epoch": 0.07, "learning_rate": 1.2413793103448277e-05, "loss": 0.317, "step": 306 }, { "epoch": 0.08, "learning_rate": 1.2494929006085195e-05, "loss": 0.3263, "step": 308 }, { "epoch": 0.08, "learning_rate": 1.2576064908722109e-05, "loss": 0.3204, "step": 310 }, { "epoch": 0.08, "learning_rate": 1.2657200811359028e-05, "loss": 0.3197, "step": 312 }, { "epoch": 0.08, "learning_rate": 1.2738336713995945e-05, "loss": 0.3275, "step": 314 }, { "epoch": 0.08, "learning_rate": 1.2819472616632862e-05, "loss": 0.3183, "step": 316 }, { "epoch": 0.08, "learning_rate": 1.2900608519269777e-05, "loss": 0.3363, "step": 318 }, { "epoch": 0.08, "learning_rate": 1.2981744421906694e-05, "loss": 0.3328, "step": 320 }, { "epoch": 0.08, "learning_rate": 1.3062880324543611e-05, "loss": 0.3409, "step": 322 }, { "epoch": 0.08, "learning_rate": 1.314401622718053e-05, "loss": 0.3373, "step": 324 }, { "epoch": 0.08, "learning_rate": 1.3225152129817444e-05, "loss": 0.3331, "step": 326 }, { "epoch": 0.08, "learning_rate": 1.3306288032454363e-05, "loss": 0.3247, "step": 328 }, { "epoch": 0.08, "learning_rate": 1.338742393509128e-05, "loss": 0.3305, "step": 330 }, { "epoch": 0.08, "learning_rate": 1.3468559837728197e-05, "loss": 0.3329, "step": 332 }, { "epoch": 0.08, "learning_rate": 1.3549695740365112e-05, "loss": 0.332, "step": 334 }, { "epoch": 0.08, "learning_rate": 1.363083164300203e-05, "loss": 0.3168, "step": 336 }, { "epoch": 0.08, "learning_rate": 1.3711967545638946e-05, "loss": 0.3176, "step": 338 }, { "epoch": 0.08, "learning_rate": 1.3793103448275863e-05, "loss": 0.312, "step": 340 }, { "epoch": 0.08, "learning_rate": 1.3874239350912779e-05, "loss": 0.3378, "step": 342 }, { "epoch": 0.08, "learning_rate": 1.3955375253549697e-05, "loss": 0.338, "step": 344 }, { "epoch": 0.08, "learning_rate": 1.4036511156186615e-05, "loss": 0.3286, "step": 346 }, { "epoch": 0.08, "learning_rate": 1.4117647058823532e-05, "loss": 0.3103, "step": 348 }, { "epoch": 0.09, "learning_rate": 1.4198782961460447e-05, "loss": 0.3186, "step": 350 }, { "epoch": 0.09, "learning_rate": 1.4279918864097364e-05, "loss": 0.3559, "step": 352 }, { "epoch": 0.09, "learning_rate": 1.4361054766734281e-05, "loss": 0.313, "step": 354 }, { "epoch": 0.09, "learning_rate": 1.4442190669371198e-05, "loss": 0.3274, "step": 356 }, { "epoch": 0.09, "learning_rate": 1.4523326572008113e-05, "loss": 0.3427, "step": 358 }, { "epoch": 0.09, "learning_rate": 1.4604462474645032e-05, "loss": 0.3379, "step": 360 }, { "epoch": 0.09, "learning_rate": 1.468559837728195e-05, "loss": 0.3384, "step": 362 }, { "epoch": 0.09, "learning_rate": 1.4766734279918866e-05, "loss": 0.3395, "step": 364 }, { "epoch": 0.09, "learning_rate": 1.4847870182555782e-05, "loss": 0.3361, "step": 366 }, { "epoch": 0.09, "learning_rate": 1.4929006085192699e-05, "loss": 0.3459, "step": 368 }, { "epoch": 0.09, "learning_rate": 1.5010141987829616e-05, "loss": 0.3415, "step": 370 }, { "epoch": 0.09, "learning_rate": 1.5091277890466533e-05, "loss": 0.3399, "step": 372 }, { "epoch": 0.09, "learning_rate": 1.5172413793103448e-05, "loss": 0.3289, "step": 374 }, { "epoch": 0.09, "learning_rate": 1.5253549695740365e-05, "loss": 0.3346, "step": 376 }, { "epoch": 0.09, "learning_rate": 1.5334685598377284e-05, "loss": 0.3278, "step": 378 }, { "epoch": 0.09, "learning_rate": 1.54158215010142e-05, "loss": 0.3178, "step": 380 }, { "epoch": 0.09, "learning_rate": 1.5496957403651115e-05, "loss": 0.3179, "step": 382 }, { "epoch": 0.09, "learning_rate": 1.5578093306288032e-05, "loss": 0.3195, "step": 384 }, { "epoch": 0.09, "learning_rate": 1.5659229208924952e-05, "loss": 0.3376, "step": 386 }, { "epoch": 0.09, "learning_rate": 1.574036511156187e-05, "loss": 0.3371, "step": 388 }, { "epoch": 0.1, "learning_rate": 1.5821501014198783e-05, "loss": 0.3258, "step": 390 }, { "epoch": 0.1, "learning_rate": 1.59026369168357e-05, "loss": 0.3207, "step": 392 }, { "epoch": 0.1, "learning_rate": 1.5983772819472617e-05, "loss": 0.3065, "step": 394 }, { "epoch": 0.1, "learning_rate": 1.6064908722109534e-05, "loss": 0.3562, "step": 396 }, { "epoch": 0.1, "learning_rate": 1.614604462474645e-05, "loss": 0.3228, "step": 398 }, { "epoch": 0.1, "learning_rate": 1.622718052738337e-05, "loss": 0.3368, "step": 400 }, { "epoch": 0.1, "learning_rate": 1.6308316430020285e-05, "loss": 0.3478, "step": 402 }, { "epoch": 0.1, "learning_rate": 1.6389452332657203e-05, "loss": 0.3398, "step": 404 }, { "epoch": 0.1, "learning_rate": 1.647058823529412e-05, "loss": 0.3244, "step": 406 }, { "epoch": 0.1, "learning_rate": 1.6551724137931037e-05, "loss": 0.3294, "step": 408 }, { "epoch": 0.1, "learning_rate": 1.6632860040567954e-05, "loss": 0.3381, "step": 410 }, { "epoch": 0.1, "learning_rate": 1.671399594320487e-05, "loss": 0.3363, "step": 412 }, { "epoch": 0.1, "learning_rate": 1.6795131845841784e-05, "loss": 0.3429, "step": 414 }, { "epoch": 0.1, "learning_rate": 1.68762677484787e-05, "loss": 0.337, "step": 416 }, { "epoch": 0.1, "learning_rate": 1.6957403651115622e-05, "loss": 0.327, "step": 418 }, { "epoch": 0.1, "learning_rate": 1.703853955375254e-05, "loss": 0.3235, "step": 420 }, { "epoch": 0.1, "learning_rate": 1.7119675456389453e-05, "loss": 0.3387, "step": 422 }, { "epoch": 0.1, "learning_rate": 1.720081135902637e-05, "loss": 0.336, "step": 424 }, { "epoch": 0.1, "learning_rate": 1.7281947261663287e-05, "loss": 0.3452, "step": 426 }, { "epoch": 0.1, "learning_rate": 1.7363083164300204e-05, "loss": 0.3181, "step": 428 }, { "epoch": 0.1, "learning_rate": 1.744421906693712e-05, "loss": 0.3369, "step": 430 }, { "epoch": 0.11, "learning_rate": 1.7525354969574038e-05, "loss": 0.3506, "step": 432 }, { "epoch": 0.11, "learning_rate": 1.7606490872210955e-05, "loss": 0.3135, "step": 434 }, { "epoch": 0.11, "learning_rate": 1.7687626774847872e-05, "loss": 0.3127, "step": 436 }, { "epoch": 0.11, "learning_rate": 1.776876267748479e-05, "loss": 0.35, "step": 438 }, { "epoch": 0.11, "learning_rate": 1.7849898580121706e-05, "loss": 0.3423, "step": 440 }, { "epoch": 0.11, "learning_rate": 1.7931034482758623e-05, "loss": 0.3362, "step": 442 }, { "epoch": 0.11, "learning_rate": 1.801217038539554e-05, "loss": 0.3, "step": 444 }, { "epoch": 0.11, "learning_rate": 1.8093306288032454e-05, "loss": 0.3476, "step": 446 }, { "epoch": 0.11, "learning_rate": 1.817444219066937e-05, "loss": 0.3289, "step": 448 }, { "epoch": 0.11, "learning_rate": 1.8255578093306288e-05, "loss": 0.3418, "step": 450 }, { "epoch": 0.11, "learning_rate": 1.833671399594321e-05, "loss": 0.3152, "step": 452 }, { "epoch": 0.11, "learning_rate": 1.8417849898580122e-05, "loss": 0.3405, "step": 454 }, { "epoch": 0.11, "learning_rate": 1.849898580121704e-05, "loss": 0.3378, "step": 456 }, { "epoch": 0.11, "learning_rate": 1.8580121703853956e-05, "loss": 0.3447, "step": 458 }, { "epoch": 0.11, "learning_rate": 1.8661257606490873e-05, "loss": 0.3415, "step": 460 }, { "epoch": 0.11, "learning_rate": 1.874239350912779e-05, "loss": 0.2988, "step": 462 }, { "epoch": 0.11, "learning_rate": 1.8823529411764708e-05, "loss": 0.3443, "step": 464 }, { "epoch": 0.11, "learning_rate": 1.8904665314401625e-05, "loss": 0.3468, "step": 466 }, { "epoch": 0.11, "learning_rate": 1.8985801217038542e-05, "loss": 0.3389, "step": 468 }, { "epoch": 0.11, "learning_rate": 1.906693711967546e-05, "loss": 0.3202, "step": 470 }, { "epoch": 0.12, "learning_rate": 1.9148073022312376e-05, "loss": 0.3352, "step": 472 }, { "epoch": 0.12, "learning_rate": 1.9229208924949293e-05, "loss": 0.3316, "step": 474 }, { "epoch": 0.12, "learning_rate": 1.931034482758621e-05, "loss": 0.3248, "step": 476 }, { "epoch": 0.12, "learning_rate": 1.9391480730223124e-05, "loss": 0.3419, "step": 478 }, { "epoch": 0.12, "learning_rate": 1.947261663286004e-05, "loss": 0.3124, "step": 480 }, { "epoch": 0.12, "learning_rate": 1.9553752535496958e-05, "loss": 0.4068, "step": 482 }, { "epoch": 0.12, "learning_rate": 1.9634888438133878e-05, "loss": 0.3444, "step": 484 }, { "epoch": 0.12, "learning_rate": 1.9716024340770792e-05, "loss": 0.3553, "step": 486 }, { "epoch": 0.12, "learning_rate": 1.979716024340771e-05, "loss": 0.3286, "step": 488 }, { "epoch": 0.12, "learning_rate": 1.9878296146044626e-05, "loss": 0.3579, "step": 490 }, { "epoch": 0.12, "learning_rate": 1.9959432048681543e-05, "loss": 0.3307, "step": 492 }, { "epoch": 0.12, "learning_rate": 1.999999980536544e-05, "loss": 0.3232, "step": 494 }, { "epoch": 0.12, "learning_rate": 1.9999998248289005e-05, "loss": 0.3437, "step": 496 }, { "epoch": 0.12, "learning_rate": 1.9999995134136373e-05, "loss": 0.328, "step": 498 }, { "epoch": 0.12, "learning_rate": 1.9999990462908037e-05, "loss": 0.3468, "step": 500 }, { "epoch": 0.12, "learning_rate": 1.9999984234604716e-05, "loss": 0.3406, "step": 502 }, { "epoch": 0.12, "learning_rate": 1.9999976449227386e-05, "loss": 0.3182, "step": 504 }, { "epoch": 0.12, "learning_rate": 1.9999967106777253e-05, "loss": 0.3249, "step": 506 }, { "epoch": 0.12, "learning_rate": 1.9999956207255773e-05, "loss": 0.3209, "step": 508 }, { "epoch": 0.12, "learning_rate": 1.9999943750664653e-05, "loss": 0.3341, "step": 510 }, { "epoch": 0.12, "learning_rate": 1.9999929737005818e-05, "loss": 0.3335, "step": 512 }, { "epoch": 0.13, "learning_rate": 1.999991416628146e-05, "loss": 0.3344, "step": 514 }, { "epoch": 0.13, "learning_rate": 1.9999897038494e-05, "loss": 0.3374, "step": 516 }, { "epoch": 0.13, "learning_rate": 1.999987835364611e-05, "loss": 0.3262, "step": 518 }, { "epoch": 0.13, "learning_rate": 1.9999858111740688e-05, "loss": 0.3144, "step": 520 }, { "epoch": 0.13, "learning_rate": 1.9999836312780895e-05, "loss": 0.3221, "step": 522 }, { "epoch": 0.13, "learning_rate": 1.9999812956770125e-05, "loss": 0.3367, "step": 524 }, { "epoch": 0.13, "learning_rate": 1.999978804371201e-05, "loss": 0.3143, "step": 526 }, { "epoch": 0.13, "learning_rate": 1.9999761573610432e-05, "loss": 0.3397, "step": 528 }, { "epoch": 0.13, "learning_rate": 1.9999733546469514e-05, "loss": 0.3183, "step": 530 }, { "epoch": 0.13, "learning_rate": 1.9999703962293612e-05, "loss": 0.3474, "step": 532 }, { "epoch": 0.13, "learning_rate": 1.9999672821087347e-05, "loss": 0.3183, "step": 534 }, { "epoch": 0.13, "learning_rate": 1.9999640122855556e-05, "loss": 0.3377, "step": 536 }, { "epoch": 0.13, "learning_rate": 1.9999605867603333e-05, "loss": 0.3219, "step": 538 }, { "epoch": 0.13, "learning_rate": 1.9999570055336014e-05, "loss": 0.3197, "step": 540 }, { "epoch": 0.13, "learning_rate": 1.9999532686059175e-05, "loss": 0.3282, "step": 542 }, { "epoch": 0.13, "learning_rate": 1.9999493759778635e-05, "loss": 0.3137, "step": 544 }, { "epoch": 0.13, "learning_rate": 1.999945327650045e-05, "loss": 0.3431, "step": 546 }, { "epoch": 0.13, "learning_rate": 1.999941123623093e-05, "loss": 0.3354, "step": 548 }, { "epoch": 0.13, "learning_rate": 1.999936763897662e-05, "loss": 0.3173, "step": 550 }, { "epoch": 0.13, "learning_rate": 1.9999322484744305e-05, "loss": 0.3322, "step": 552 }, { "epoch": 0.13, "learning_rate": 1.9999275773541016e-05, "loss": 0.3082, "step": 554 }, { "epoch": 0.14, "learning_rate": 1.9999227505374033e-05, "loss": 0.3208, "step": 556 }, { "epoch": 0.14, "learning_rate": 1.9999177680250863e-05, "loss": 0.3308, "step": 558 }, { "epoch": 0.14, "learning_rate": 1.999912629817927e-05, "loss": 0.3151, "step": 560 }, { "epoch": 0.14, "learning_rate": 1.999907335916725e-05, "loss": 0.295, "step": 562 }, { "epoch": 0.14, "learning_rate": 1.999901886322305e-05, "loss": 0.3134, "step": 564 }, { "epoch": 0.14, "learning_rate": 1.9998962810355152e-05, "loss": 0.3051, "step": 566 }, { "epoch": 0.14, "learning_rate": 1.999890520057229e-05, "loss": 0.327, "step": 568 }, { "epoch": 0.14, "learning_rate": 1.9998846033883427e-05, "loss": 0.3553, "step": 570 }, { "epoch": 0.14, "learning_rate": 1.999878531029778e-05, "loss": 0.3285, "step": 572 }, { "epoch": 0.14, "learning_rate": 1.99987230298248e-05, "loss": 0.2928, "step": 574 }, { "epoch": 0.14, "learning_rate": 1.9998659192474193e-05, "loss": 0.3042, "step": 576 }, { "epoch": 0.14, "learning_rate": 1.999859379825589e-05, "loss": 0.3498, "step": 578 }, { "epoch": 0.14, "learning_rate": 1.999852684718008e-05, "loss": 0.3228, "step": 580 }, { "epoch": 0.14, "learning_rate": 1.999845833925718e-05, "loss": 0.3335, "step": 582 }, { "epoch": 0.14, "learning_rate": 1.9998388274497864e-05, "loss": 0.3374, "step": 584 }, { "epoch": 0.14, "learning_rate": 1.9998316652913038e-05, "loss": 0.3328, "step": 586 }, { "epoch": 0.14, "learning_rate": 1.999824347451386e-05, "loss": 0.3158, "step": 588 }, { "epoch": 0.14, "learning_rate": 1.9998168739311715e-05, "loss": 0.2963, "step": 590 }, { "epoch": 0.14, "learning_rate": 1.9998092447318247e-05, "loss": 0.3467, "step": 592 }, { "epoch": 0.14, "learning_rate": 1.9998014598545335e-05, "loss": 0.3319, "step": 594 }, { "epoch": 0.15, "learning_rate": 1.9997935193005093e-05, "loss": 0.3027, "step": 596 }, { "epoch": 0.15, "learning_rate": 1.9997854230709896e-05, "loss": 0.32, "step": 598 }, { "epoch": 0.15, "learning_rate": 1.9997771711672343e-05, "loss": 0.315, "step": 600 }, { "epoch": 0.15, "learning_rate": 1.9997687635905284e-05, "loss": 0.331, "step": 602 }, { "epoch": 0.15, "learning_rate": 1.9997602003421815e-05, "loss": 0.3158, "step": 604 }, { "epoch": 0.15, "learning_rate": 1.9997514814235264e-05, "loss": 0.2994, "step": 606 }, { "epoch": 0.15, "learning_rate": 1.999742606835921e-05, "loss": 0.3224, "step": 608 }, { "epoch": 0.15, "learning_rate": 1.9997335765807463e-05, "loss": 0.3146, "step": 610 }, { "epoch": 0.15, "learning_rate": 1.9997243906594098e-05, "loss": 0.3013, "step": 612 }, { "epoch": 0.15, "learning_rate": 1.9997150490733412e-05, "loss": 0.3083, "step": 614 }, { "epoch": 0.15, "learning_rate": 1.9997055518239947e-05, "loss": 0.3039, "step": 616 }, { "epoch": 0.15, "learning_rate": 1.9996958989128498e-05, "loss": 0.3265, "step": 618 }, { "epoch": 0.15, "learning_rate": 1.9996860903414085e-05, "loss": 0.3186, "step": 620 }, { "epoch": 0.15, "learning_rate": 1.999676126111199e-05, "loss": 0.3373, "step": 622 }, { "epoch": 0.15, "learning_rate": 1.9996660062237723e-05, "loss": 0.2947, "step": 624 }, { "epoch": 0.15, "learning_rate": 1.9996557306807046e-05, "loss": 0.3157, "step": 626 }, { "epoch": 0.15, "learning_rate": 1.9996452994835954e-05, "loss": 0.3326, "step": 628 }, { "epoch": 0.15, "learning_rate": 1.9996347126340692e-05, "loss": 0.3033, "step": 630 }, { "epoch": 0.15, "learning_rate": 1.9996239701337744e-05, "loss": 0.3376, "step": 632 }, { "epoch": 0.15, "learning_rate": 1.999613071984384e-05, "loss": 0.3279, "step": 634 }, { "epoch": 0.15, "learning_rate": 1.9996020181875942e-05, "loss": 0.3207, "step": 636 }, { "epoch": 0.16, "learning_rate": 1.9995908087451264e-05, "loss": 0.3149, "step": 638 }, { "epoch": 0.16, "learning_rate": 1.9995794436587266e-05, "loss": 0.3375, "step": 640 }, { "epoch": 0.16, "learning_rate": 1.999567922930164e-05, "loss": 0.3287, "step": 642 }, { "epoch": 0.16, "learning_rate": 1.9995562465612322e-05, "loss": 0.3353, "step": 644 }, { "epoch": 0.16, "learning_rate": 1.9995444145537494e-05, "loss": 0.3311, "step": 646 }, { "epoch": 0.16, "learning_rate": 1.9995324269095585e-05, "loss": 0.3122, "step": 648 }, { "epoch": 0.16, "learning_rate": 1.9995202836305255e-05, "loss": 0.2925, "step": 650 }, { "epoch": 0.16, "learning_rate": 1.999507984718541e-05, "loss": 0.3346, "step": 652 }, { "epoch": 0.16, "learning_rate": 1.999495530175521e-05, "loss": 0.3232, "step": 654 }, { "epoch": 0.16, "learning_rate": 1.9994829200034037e-05, "loss": 0.3189, "step": 656 }, { "epoch": 0.16, "learning_rate": 1.9994701542041533e-05, "loss": 0.3218, "step": 658 }, { "epoch": 0.16, "learning_rate": 1.999457232779757e-05, "loss": 0.3283, "step": 660 }, { "epoch": 0.16, "learning_rate": 1.9994441557322273e-05, "loss": 0.3224, "step": 662 }, { "epoch": 0.16, "learning_rate": 1.9994309230636003e-05, "loss": 0.3298, "step": 664 }, { "epoch": 0.16, "learning_rate": 1.999417534775936e-05, "loss": 0.3048, "step": 666 }, { "epoch": 0.16, "learning_rate": 1.9994039908713194e-05, "loss": 0.3109, "step": 668 }, { "epoch": 0.16, "learning_rate": 1.9993902913518593e-05, "loss": 0.3329, "step": 670 }, { "epoch": 0.16, "learning_rate": 1.999376436219689e-05, "loss": 0.3356, "step": 672 }, { "epoch": 0.16, "learning_rate": 1.9993624254769655e-05, "loss": 0.314, "step": 674 }, { "epoch": 0.16, "learning_rate": 1.9993482591258706e-05, "loss": 0.3093, "step": 676 }, { "epoch": 0.17, "learning_rate": 1.99933393716861e-05, "loss": 0.3187, "step": 678 }, { "epoch": 0.17, "learning_rate": 1.999319459607414e-05, "loss": 0.3038, "step": 680 }, { "epoch": 0.17, "learning_rate": 1.9993048264445368e-05, "loss": 0.3096, "step": 682 }, { "epoch": 0.17, "learning_rate": 1.999290037682257e-05, "loss": 0.3198, "step": 684 }, { "epoch": 0.17, "learning_rate": 1.9992750933228768e-05, "loss": 0.3216, "step": 686 }, { "epoch": 0.17, "learning_rate": 1.9992599933687233e-05, "loss": 0.3128, "step": 688 }, { "epoch": 0.17, "learning_rate": 1.999244737822148e-05, "loss": 0.3169, "step": 690 }, { "epoch": 0.17, "learning_rate": 1.999229326685526e-05, "loss": 0.3119, "step": 692 }, { "epoch": 0.17, "learning_rate": 1.9992137599612573e-05, "loss": 0.3272, "step": 694 }, { "epoch": 0.17, "learning_rate": 1.999198037651765e-05, "loss": 0.3413, "step": 696 }, { "epoch": 0.17, "learning_rate": 1.9991821597594983e-05, "loss": 0.3305, "step": 698 }, { "epoch": 0.17, "learning_rate": 1.999166126286929e-05, "loss": 0.3104, "step": 700 }, { "epoch": 0.17, "learning_rate": 1.999149937236553e-05, "loss": 0.3277, "step": 702 }, { "epoch": 0.17, "learning_rate": 1.9991335926108923e-05, "loss": 0.3228, "step": 704 }, { "epoch": 0.17, "learning_rate": 1.9991170924124906e-05, "loss": 0.3249, "step": 706 }, { "epoch": 0.17, "learning_rate": 1.999100436643918e-05, "loss": 0.3304, "step": 708 }, { "epoch": 0.17, "learning_rate": 1.9990836253077677e-05, "loss": 0.3435, "step": 710 }, { "epoch": 0.17, "learning_rate": 1.999066658406657e-05, "loss": 0.3404, "step": 712 }, { "epoch": 0.17, "learning_rate": 1.9990495359432286e-05, "loss": 0.3116, "step": 714 }, { "epoch": 0.17, "learning_rate": 1.9990322579201476e-05, "loss": 0.3237, "step": 716 }, { "epoch": 0.17, "learning_rate": 1.9990148243401048e-05, "loss": 0.3107, "step": 718 }, { "epoch": 0.18, "learning_rate": 1.998997235205815e-05, "loss": 0.3326, "step": 720 }, { "epoch": 0.18, "learning_rate": 1.9989794905200167e-05, "loss": 0.3296, "step": 722 }, { "epoch": 0.18, "learning_rate": 1.9989615902854726e-05, "loss": 0.3097, "step": 724 }, { "epoch": 0.18, "learning_rate": 1.9989435345049704e-05, "loss": 0.3424, "step": 726 }, { "epoch": 0.18, "learning_rate": 1.998925323181321e-05, "loss": 0.3295, "step": 728 }, { "epoch": 0.18, "learning_rate": 1.9989069563173606e-05, "loss": 0.3117, "step": 730 }, { "epoch": 0.18, "learning_rate": 1.9988884339159487e-05, "loss": 0.3138, "step": 732 }, { "epoch": 0.18, "learning_rate": 1.9988697559799696e-05, "loss": 0.3237, "step": 734 }, { "epoch": 0.18, "learning_rate": 1.9988509225123317e-05, "loss": 0.2955, "step": 736 }, { "epoch": 0.18, "learning_rate": 1.998831933515967e-05, "loss": 0.3375, "step": 738 }, { "epoch": 0.18, "learning_rate": 1.9988127889938324e-05, "loss": 0.3289, "step": 740 }, { "epoch": 0.18, "learning_rate": 1.998793488948909e-05, "loss": 0.3463, "step": 742 }, { "epoch": 0.18, "learning_rate": 1.998774033384202e-05, "loss": 0.3198, "step": 744 }, { "epoch": 0.18, "learning_rate": 1.9987544223027412e-05, "loss": 0.3082, "step": 746 }, { "epoch": 0.18, "learning_rate": 1.9987346557075792e-05, "loss": 0.3383, "step": 748 }, { "epoch": 0.18, "learning_rate": 1.9987147336017945e-05, "loss": 0.325, "step": 750 }, { "epoch": 0.18, "learning_rate": 1.998694655988489e-05, "loss": 0.3152, "step": 752 }, { "epoch": 0.18, "learning_rate": 1.9986744228707888e-05, "loss": 0.3224, "step": 754 }, { "epoch": 0.18, "learning_rate": 1.9986540342518445e-05, "loss": 0.3282, "step": 756 }, { "epoch": 0.18, "learning_rate": 1.9986334901348307e-05, "loss": 0.3152, "step": 758 }, { "epoch": 0.19, "learning_rate": 1.9986127905229463e-05, "loss": 0.3278, "step": 760 }, { "epoch": 0.19, "learning_rate": 1.9985919354194142e-05, "loss": 0.3435, "step": 762 }, { "epoch": 0.19, "learning_rate": 1.9985709248274822e-05, "loss": 0.3143, "step": 764 }, { "epoch": 0.19, "learning_rate": 1.9985497587504213e-05, "loss": 0.326, "step": 766 }, { "epoch": 0.19, "learning_rate": 1.9985284371915273e-05, "loss": 0.3432, "step": 768 }, { "epoch": 0.19, "learning_rate": 1.99850696015412e-05, "loss": 0.3417, "step": 770 }, { "epoch": 0.19, "learning_rate": 1.9984853276415444e-05, "loss": 0.3378, "step": 772 }, { "epoch": 0.19, "learning_rate": 1.998463539657168e-05, "loss": 0.324, "step": 774 }, { "epoch": 0.19, "learning_rate": 1.9984415962043835e-05, "loss": 0.3303, "step": 776 }, { "epoch": 0.19, "learning_rate": 1.9984194972866076e-05, "loss": 0.2976, "step": 778 }, { "epoch": 0.19, "learning_rate": 1.9983972429072814e-05, "loss": 0.3077, "step": 780 }, { "epoch": 0.19, "learning_rate": 1.9983748330698703e-05, "loss": 0.3179, "step": 782 }, { "epoch": 0.19, "learning_rate": 1.9983522677778634e-05, "loss": 0.3089, "step": 784 }, { "epoch": 0.19, "learning_rate": 1.9983295470347745e-05, "loss": 0.3315, "step": 786 }, { "epoch": 0.19, "learning_rate": 1.998306670844141e-05, "loss": 0.3128, "step": 788 }, { "epoch": 0.19, "learning_rate": 1.998283639209525e-05, "loss": 0.3377, "step": 790 }, { "epoch": 0.19, "learning_rate": 1.9982604521345132e-05, "loss": 0.3234, "step": 792 }, { "epoch": 0.19, "learning_rate": 1.9982371096227155e-05, "loss": 0.3373, "step": 794 }, { "epoch": 0.19, "learning_rate": 1.9982136116777666e-05, "loss": 0.3241, "step": 796 }, { "epoch": 0.19, "learning_rate": 1.9981899583033255e-05, "loss": 0.3161, "step": 798 }, { "epoch": 0.19, "learning_rate": 1.998166149503075e-05, "loss": 0.3227, "step": 800 }, { "epoch": 0.2, "learning_rate": 1.9981421852807227e-05, "loss": 0.3276, "step": 802 }, { "epoch": 0.2, "learning_rate": 1.9981180656399993e-05, "loss": 0.3242, "step": 804 }, { "epoch": 0.2, "learning_rate": 1.998093790584661e-05, "loss": 0.3239, "step": 806 }, { "epoch": 0.2, "learning_rate": 1.9980693601184874e-05, "loss": 0.3241, "step": 808 }, { "epoch": 0.2, "learning_rate": 1.9980447742452823e-05, "loss": 0.3365, "step": 810 }, { "epoch": 0.2, "learning_rate": 1.9980200329688742e-05, "loss": 0.3305, "step": 812 }, { "epoch": 0.2, "learning_rate": 1.997995136293116e-05, "loss": 0.3351, "step": 814 }, { "epoch": 0.2, "learning_rate": 1.9979700842218833e-05, "loss": 0.3169, "step": 816 }, { "epoch": 0.2, "learning_rate": 1.9979448767590776e-05, "loss": 0.3424, "step": 818 }, { "epoch": 0.2, "learning_rate": 1.9979195139086235e-05, "loss": 0.3177, "step": 820 }, { "epoch": 0.2, "learning_rate": 1.99789399567447e-05, "loss": 0.3276, "step": 822 }, { "epoch": 0.2, "learning_rate": 1.9978683220605912e-05, "loss": 0.335, "step": 824 }, { "epoch": 0.2, "learning_rate": 1.9978424930709842e-05, "loss": 0.3291, "step": 826 }, { "epoch": 0.2, "learning_rate": 1.997816508709671e-05, "loss": 0.3103, "step": 828 }, { "epoch": 0.2, "learning_rate": 1.9977903689806975e-05, "loss": 0.3105, "step": 830 }, { "epoch": 0.2, "learning_rate": 1.9977640738881337e-05, "loss": 0.3115, "step": 832 }, { "epoch": 0.2, "learning_rate": 1.997737623436074e-05, "loss": 0.3296, "step": 834 }, { "epoch": 0.2, "learning_rate": 1.997711017628637e-05, "loss": 0.3187, "step": 836 }, { "epoch": 0.2, "learning_rate": 1.9976842564699654e-05, "loss": 0.299, "step": 838 }, { "epoch": 0.2, "learning_rate": 1.997657339964226e-05, "loss": 0.3225, "step": 840 }, { "epoch": 0.21, "learning_rate": 1.9976302681156103e-05, "loss": 0.3118, "step": 842 }, { "epoch": 0.21, "learning_rate": 1.997603040928333e-05, "loss": 0.3034, "step": 844 }, { "epoch": 0.21, "learning_rate": 1.997575658406634e-05, "loss": 0.2916, "step": 846 }, { "epoch": 0.21, "learning_rate": 1.997548120554777e-05, "loss": 0.3229, "step": 848 }, { "epoch": 0.21, "learning_rate": 1.9975204273770497e-05, "loss": 0.3357, "step": 850 }, { "epoch": 0.21, "learning_rate": 1.9974925788777642e-05, "loss": 0.3207, "step": 852 }, { "epoch": 0.21, "learning_rate": 1.9974645750612568e-05, "loss": 0.3423, "step": 854 }, { "epoch": 0.21, "learning_rate": 1.9974364159318876e-05, "loss": 0.3072, "step": 856 }, { "epoch": 0.21, "learning_rate": 1.9974081014940415e-05, "loss": 0.3288, "step": 858 }, { "epoch": 0.21, "learning_rate": 1.997379631752127e-05, "loss": 0.3285, "step": 860 }, { "epoch": 0.21, "learning_rate": 1.9973510067105774e-05, "loss": 0.307, "step": 862 }, { "epoch": 0.21, "learning_rate": 1.9973222263738495e-05, "loss": 0.3245, "step": 864 }, { "epoch": 0.21, "learning_rate": 1.997293290746425e-05, "loss": 0.3332, "step": 866 }, { "epoch": 0.21, "learning_rate": 1.9972641998328086e-05, "loss": 0.3261, "step": 868 }, { "epoch": 0.21, "learning_rate": 1.9972349536375312e-05, "loss": 0.3223, "step": 870 }, { "epoch": 0.21, "learning_rate": 1.997205552165146e-05, "loss": 0.3134, "step": 872 }, { "epoch": 0.21, "learning_rate": 1.9971759954202306e-05, "loss": 0.3199, "step": 874 }, { "epoch": 0.21, "learning_rate": 1.9971462834073878e-05, "loss": 0.3102, "step": 876 }, { "epoch": 0.21, "learning_rate": 1.997116416131244e-05, "loss": 0.3278, "step": 878 }, { "epoch": 0.21, "learning_rate": 1.9970863935964496e-05, "loss": 0.3101, "step": 880 }, { "epoch": 0.21, "learning_rate": 1.9970562158076793e-05, "loss": 0.3196, "step": 882 }, { "epoch": 0.22, "learning_rate": 1.997025882769632e-05, "loss": 0.2994, "step": 884 }, { "epoch": 0.22, "learning_rate": 1.996995394487031e-05, "loss": 0.3033, "step": 886 }, { "epoch": 0.22, "learning_rate": 1.9969647509646234e-05, "loss": 0.3227, "step": 888 }, { "epoch": 0.22, "learning_rate": 1.99693395220718e-05, "loss": 0.3207, "step": 890 }, { "epoch": 0.22, "learning_rate": 1.9969029982194978e-05, "loss": 0.3291, "step": 892 }, { "epoch": 0.22, "learning_rate": 1.9968718890063952e-05, "loss": 0.3326, "step": 894 }, { "epoch": 0.22, "learning_rate": 1.9968406245727175e-05, "loss": 0.309, "step": 896 }, { "epoch": 0.22, "learning_rate": 1.9968092049233317e-05, "loss": 0.3268, "step": 898 }, { "epoch": 0.22, "learning_rate": 1.9967776300631302e-05, "loss": 0.3132, "step": 900 }, { "epoch": 0.22, "learning_rate": 1.9967458999970302e-05, "loss": 0.3, "step": 902 }, { "epoch": 0.22, "learning_rate": 1.9967140147299714e-05, "loss": 0.3339, "step": 904 }, { "epoch": 0.22, "learning_rate": 1.996681974266919e-05, "loss": 0.3136, "step": 906 }, { "epoch": 0.22, "learning_rate": 1.996649778612862e-05, "loss": 0.3094, "step": 908 }, { "epoch": 0.22, "learning_rate": 1.9966174277728135e-05, "loss": 0.3212, "step": 910 }, { "epoch": 0.22, "learning_rate": 1.9965849217518107e-05, "loss": 0.2945, "step": 912 }, { "epoch": 0.22, "learning_rate": 1.996552260554915e-05, "loss": 0.3019, "step": 914 }, { "epoch": 0.22, "learning_rate": 1.996519444187212e-05, "loss": 0.3295, "step": 916 }, { "epoch": 0.22, "learning_rate": 1.9964864726538117e-05, "loss": 0.3355, "step": 918 }, { "epoch": 0.22, "learning_rate": 1.9964533459598473e-05, "loss": 0.3212, "step": 920 }, { "epoch": 0.22, "learning_rate": 1.996420064110478e-05, "loss": 0.3067, "step": 922 }, { "epoch": 0.23, "learning_rate": 1.9963866271108854e-05, "loss": 0.3124, "step": 924 }, { "epoch": 0.23, "learning_rate": 1.9963530349662754e-05, "loss": 0.3017, "step": 926 }, { "epoch": 0.23, "learning_rate": 1.9963192876818797e-05, "loss": 0.3132, "step": 928 }, { "epoch": 0.23, "learning_rate": 1.996285385262952e-05, "loss": 0.31, "step": 930 }, { "epoch": 0.23, "learning_rate": 1.996251327714772e-05, "loss": 0.3187, "step": 932 }, { "epoch": 0.23, "learning_rate": 1.9962171150426418e-05, "loss": 0.323, "step": 934 }, { "epoch": 0.23, "learning_rate": 1.996182747251889e-05, "loss": 0.3116, "step": 936 }, { "epoch": 0.23, "learning_rate": 1.9961482243478654e-05, "loss": 0.3026, "step": 938 }, { "epoch": 0.23, "learning_rate": 1.996113546335946e-05, "loss": 0.3032, "step": 940 }, { "epoch": 0.23, "learning_rate": 1.9960787132215305e-05, "loss": 0.2994, "step": 942 }, { "epoch": 0.23, "learning_rate": 1.9960437250100427e-05, "loss": 0.325, "step": 944 }, { "epoch": 0.23, "learning_rate": 1.99600858170693e-05, "loss": 0.3098, "step": 946 }, { "epoch": 0.23, "learning_rate": 1.9959732833176656e-05, "loss": 0.3191, "step": 948 }, { "epoch": 0.23, "learning_rate": 1.995937829847745e-05, "loss": 0.3162, "step": 950 }, { "epoch": 0.23, "learning_rate": 1.995902221302689e-05, "loss": 0.2874, "step": 952 }, { "epoch": 0.23, "learning_rate": 1.9958664576880412e-05, "loss": 0.3069, "step": 954 }, { "epoch": 0.23, "learning_rate": 1.9958305390093714e-05, "loss": 0.3371, "step": 956 }, { "epoch": 0.23, "learning_rate": 1.9957944652722716e-05, "loss": 0.3269, "step": 958 }, { "epoch": 0.23, "learning_rate": 1.995758236482359e-05, "loss": 0.3234, "step": 960 }, { "epoch": 0.23, "learning_rate": 1.995721852645275e-05, "loss": 0.2994, "step": 962 }, { "epoch": 0.23, "learning_rate": 1.9956853137666842e-05, "loss": 0.2938, "step": 964 }, { "epoch": 0.24, "learning_rate": 1.9956486198522767e-05, "loss": 0.3126, "step": 966 }, { "epoch": 0.24, "learning_rate": 1.995611770907766e-05, "loss": 0.32, "step": 968 }, { "epoch": 0.24, "learning_rate": 1.9955747669388893e-05, "loss": 0.3068, "step": 970 }, { "epoch": 0.24, "learning_rate": 1.9955376079514083e-05, "loss": 0.3029, "step": 972 }, { "epoch": 0.24, "learning_rate": 1.9955002939511093e-05, "loss": 0.3227, "step": 974 }, { "epoch": 0.24, "learning_rate": 1.9954628249438023e-05, "loss": 0.2945, "step": 976 }, { "epoch": 0.24, "learning_rate": 1.9954252009353217e-05, "loss": 0.325, "step": 978 }, { "epoch": 0.24, "learning_rate": 1.9953874219315256e-05, "loss": 0.3219, "step": 980 }, { "epoch": 0.24, "learning_rate": 1.9953494879382963e-05, "loss": 0.315, "step": 982 }, { "epoch": 0.24, "learning_rate": 1.995311398961541e-05, "loss": 0.3171, "step": 984 }, { "epoch": 0.24, "learning_rate": 1.9952731550071894e-05, "loss": 0.3189, "step": 986 }, { "epoch": 0.24, "learning_rate": 1.9952347560811977e-05, "loss": 0.3194, "step": 988 }, { "epoch": 0.24, "learning_rate": 1.995196202189544e-05, "loss": 0.3283, "step": 990 }, { "epoch": 0.24, "learning_rate": 1.995157493338232e-05, "loss": 0.3157, "step": 992 }, { "epoch": 0.24, "learning_rate": 1.9951186295332882e-05, "loss": 0.3113, "step": 994 }, { "epoch": 0.24, "learning_rate": 1.9950796107807648e-05, "loss": 0.309, "step": 996 }, { "epoch": 0.24, "learning_rate": 1.9950404370867368e-05, "loss": 0.2968, "step": 998 }, { "epoch": 0.24, "learning_rate": 1.9950011084573042e-05, "loss": 0.3049, "step": 1000 }, { "epoch": 0.24, "learning_rate": 1.9949616248985904e-05, "loss": 0.3273, "step": 1002 }, { "epoch": 0.24, "learning_rate": 1.994921986416744e-05, "loss": 0.3085, "step": 1004 }, { "epoch": 0.25, "learning_rate": 1.9948821930179357e-05, "loss": 0.3198, "step": 1006 }, { "epoch": 0.25, "learning_rate": 1.9948422447083628e-05, "loss": 0.292, "step": 1008 }, { "epoch": 0.25, "learning_rate": 1.994802141494245e-05, "loss": 0.3178, "step": 1010 }, { "epoch": 0.25, "learning_rate": 1.994761883381827e-05, "loss": 0.3165, "step": 1012 }, { "epoch": 0.25, "learning_rate": 1.9947214703773773e-05, "loss": 0.2986, "step": 1014 }, { "epoch": 0.25, "learning_rate": 1.9946809024871884e-05, "loss": 0.3241, "step": 1016 }, { "epoch": 0.25, "learning_rate": 1.9946401797175767e-05, "loss": 0.3061, "step": 1018 }, { "epoch": 0.25, "learning_rate": 1.9945993020748834e-05, "loss": 0.3012, "step": 1020 }, { "epoch": 0.25, "learning_rate": 1.9945582695654738e-05, "loss": 0.3203, "step": 1022 }, { "epoch": 0.25, "learning_rate": 1.994517082195736e-05, "loss": 0.3154, "step": 1024 }, { "epoch": 0.25, "learning_rate": 1.9944757399720843e-05, "loss": 0.323, "step": 1026 }, { "epoch": 0.25, "learning_rate": 1.994434242900955e-05, "loss": 0.3123, "step": 1028 }, { "epoch": 0.25, "learning_rate": 1.9943925909888103e-05, "loss": 0.3145, "step": 1030 }, { "epoch": 0.25, "learning_rate": 1.9943507842421357e-05, "loss": 0.3033, "step": 1032 }, { "epoch": 0.25, "learning_rate": 1.99430882266744e-05, "loss": 0.3297, "step": 1034 }, { "epoch": 0.25, "learning_rate": 1.994266706271258e-05, "loss": 0.3268, "step": 1036 }, { "epoch": 0.25, "learning_rate": 1.9942244350601462e-05, "loss": 0.3109, "step": 1038 }, { "epoch": 0.25, "learning_rate": 1.994182009040688e-05, "loss": 0.2908, "step": 1040 }, { "epoch": 0.25, "learning_rate": 1.9941394282194887e-05, "loss": 0.3152, "step": 1042 }, { "epoch": 0.25, "learning_rate": 1.9940966926031788e-05, "loss": 0.29, "step": 1044 }, { "epoch": 0.25, "learning_rate": 1.994053802198412e-05, "loss": 0.3145, "step": 1046 }, { "epoch": 0.26, "learning_rate": 1.994010757011867e-05, "loss": 0.3104, "step": 1048 }, { "epoch": 0.26, "learning_rate": 1.9939675570502467e-05, "loss": 0.2953, "step": 1050 }, { "epoch": 0.26, "learning_rate": 1.993924202320277e-05, "loss": 0.3167, "step": 1052 }, { "epoch": 0.26, "learning_rate": 1.9938806928287086e-05, "loss": 0.3192, "step": 1054 }, { "epoch": 0.26, "learning_rate": 1.9938370285823167e-05, "loss": 0.326, "step": 1056 }, { "epoch": 0.26, "learning_rate": 1.9937932095879e-05, "loss": 0.3085, "step": 1058 }, { "epoch": 0.26, "learning_rate": 1.993749235852281e-05, "loss": 0.3399, "step": 1060 }, { "epoch": 0.26, "learning_rate": 1.9937051073823074e-05, "loss": 0.3125, "step": 1062 }, { "epoch": 0.26, "learning_rate": 1.99366082418485e-05, "loss": 0.3179, "step": 1064 }, { "epoch": 0.26, "learning_rate": 1.9936163862668043e-05, "loss": 0.3049, "step": 1066 }, { "epoch": 0.26, "learning_rate": 1.9935717936350894e-05, "loss": 0.3066, "step": 1068 }, { "epoch": 0.26, "learning_rate": 1.9935270462966484e-05, "loss": 0.2742, "step": 1070 }, { "epoch": 0.26, "learning_rate": 1.9934821442584495e-05, "loss": 0.3001, "step": 1072 }, { "epoch": 0.26, "learning_rate": 1.9934370875274836e-05, "loss": 0.3153, "step": 1074 }, { "epoch": 0.26, "learning_rate": 1.993391876110767e-05, "loss": 0.3057, "step": 1076 }, { "epoch": 0.26, "learning_rate": 1.9933465100153388e-05, "loss": 0.3336, "step": 1078 }, { "epoch": 0.26, "learning_rate": 1.9933009892482636e-05, "loss": 0.3095, "step": 1080 }, { "epoch": 0.26, "learning_rate": 1.9932553138166287e-05, "loss": 0.3327, "step": 1082 }, { "epoch": 0.26, "learning_rate": 1.9932094837275465e-05, "loss": 0.3228, "step": 1084 }, { "epoch": 0.26, "learning_rate": 1.993163498988153e-05, "loss": 0.309, "step": 1086 }, { "epoch": 0.27, "learning_rate": 1.9931173596056085e-05, "loss": 0.3207, "step": 1088 }, { "epoch": 0.27, "learning_rate": 1.9930710655870967e-05, "loss": 0.32, "step": 1090 }, { "epoch": 0.27, "learning_rate": 1.993024616939826e-05, "loss": 0.325, "step": 1092 }, { "epoch": 0.27, "learning_rate": 1.99297801367103e-05, "loss": 0.3103, "step": 1094 }, { "epoch": 0.27, "learning_rate": 1.9929312557879638e-05, "loss": 0.316, "step": 1096 }, { "epoch": 0.27, "learning_rate": 1.9928843432979084e-05, "loss": 0.3174, "step": 1098 }, { "epoch": 0.27, "learning_rate": 1.9928372762081686e-05, "loss": 0.3038, "step": 1100 }, { "epoch": 0.27, "learning_rate": 1.9927900545260735e-05, "loss": 0.287, "step": 1102 }, { "epoch": 0.27, "learning_rate": 1.992742678258975e-05, "loss": 0.3089, "step": 1104 }, { "epoch": 0.27, "learning_rate": 1.99269514741425e-05, "loss": 0.2913, "step": 1106 }, { "epoch": 0.27, "learning_rate": 1.9926474619993e-05, "loss": 0.2994, "step": 1108 }, { "epoch": 0.27, "learning_rate": 1.99259962202155e-05, "loss": 0.317, "step": 1110 }, { "epoch": 0.27, "learning_rate": 1.9925516274884487e-05, "loss": 0.3194, "step": 1112 }, { "epoch": 0.27, "learning_rate": 1.9925034784074692e-05, "loss": 0.3157, "step": 1114 }, { "epoch": 0.27, "learning_rate": 1.9924551747861088e-05, "loss": 0.3334, "step": 1116 }, { "epoch": 0.27, "learning_rate": 1.9924067166318884e-05, "loss": 0.3092, "step": 1118 }, { "epoch": 0.27, "learning_rate": 1.992358103952354e-05, "loss": 0.3108, "step": 1120 }, { "epoch": 0.27, "learning_rate": 1.9923093367550747e-05, "loss": 0.277, "step": 1122 }, { "epoch": 0.27, "learning_rate": 1.9922604150476435e-05, "loss": 0.3012, "step": 1124 }, { "epoch": 0.27, "learning_rate": 1.9922113388376786e-05, "loss": 0.276, "step": 1126 }, { "epoch": 0.27, "learning_rate": 1.9921621081328207e-05, "loss": 0.3107, "step": 1128 }, { "epoch": 0.28, "learning_rate": 1.9921127229407363e-05, "loss": 0.3169, "step": 1130 }, { "epoch": 0.28, "learning_rate": 1.9920631832691143e-05, "loss": 0.3211, "step": 1132 }, { "epoch": 0.28, "learning_rate": 1.9920134891256685e-05, "loss": 0.3012, "step": 1134 }, { "epoch": 0.28, "learning_rate": 1.9919636405181375e-05, "loss": 0.317, "step": 1136 }, { "epoch": 0.28, "learning_rate": 1.991913637454282e-05, "loss": 0.3059, "step": 1138 }, { "epoch": 0.28, "learning_rate": 1.9918634799418886e-05, "loss": 0.308, "step": 1140 }, { "epoch": 0.28, "learning_rate": 1.9918131679887668e-05, "loss": 0.3348, "step": 1142 }, { "epoch": 0.28, "learning_rate": 1.9917627016027504e-05, "loss": 0.3083, "step": 1144 }, { "epoch": 0.28, "learning_rate": 1.991712080791698e-05, "loss": 0.2949, "step": 1146 }, { "epoch": 0.28, "learning_rate": 1.9916613055634914e-05, "loss": 0.3144, "step": 1148 }, { "epoch": 0.28, "learning_rate": 1.9916103759260363e-05, "loss": 0.3472, "step": 1150 }, { "epoch": 0.28, "learning_rate": 1.9915592918872635e-05, "loss": 0.3034, "step": 1152 }, { "epoch": 0.28, "learning_rate": 1.991508053455127e-05, "loss": 0.3044, "step": 1154 }, { "epoch": 0.28, "learning_rate": 1.9914566606376045e-05, "loss": 0.3265, "step": 1156 }, { "epoch": 0.28, "learning_rate": 1.9914051134426987e-05, "loss": 0.3078, "step": 1158 }, { "epoch": 0.28, "learning_rate": 1.9913534118784358e-05, "loss": 0.2931, "step": 1160 }, { "epoch": 0.28, "learning_rate": 1.9913015559528662e-05, "loss": 0.351, "step": 1162 }, { "epoch": 0.28, "learning_rate": 1.9912495456740642e-05, "loss": 0.3184, "step": 1164 }, { "epoch": 0.28, "learning_rate": 1.991197381050128e-05, "loss": 0.3217, "step": 1166 }, { "epoch": 0.28, "learning_rate": 1.9911450620891807e-05, "loss": 0.2977, "step": 1168 }, { "epoch": 0.29, "learning_rate": 1.9910925887993676e-05, "loss": 0.3109, "step": 1170 }, { "epoch": 0.29, "learning_rate": 1.9910399611888604e-05, "loss": 0.3246, "step": 1172 }, { "epoch": 0.29, "learning_rate": 1.990987179265853e-05, "loss": 0.2781, "step": 1174 }, { "epoch": 0.29, "learning_rate": 1.990934243038564e-05, "loss": 0.2934, "step": 1176 }, { "epoch": 0.29, "learning_rate": 1.9908811525152362e-05, "loss": 0.3316, "step": 1178 }, { "epoch": 0.29, "learning_rate": 1.990827907704136e-05, "loss": 0.346, "step": 1180 }, { "epoch": 0.29, "learning_rate": 1.9907745086135538e-05, "loss": 0.3196, "step": 1182 }, { "epoch": 0.29, "learning_rate": 1.9907209552518046e-05, "loss": 0.3159, "step": 1184 }, { "epoch": 0.29, "learning_rate": 1.990667247627227e-05, "loss": 0.3162, "step": 1186 }, { "epoch": 0.29, "learning_rate": 1.9906133857481837e-05, "loss": 0.3112, "step": 1188 }, { "epoch": 0.29, "learning_rate": 1.9905593696230615e-05, "loss": 0.3185, "step": 1190 }, { "epoch": 0.29, "learning_rate": 1.9905051992602708e-05, "loss": 0.3425, "step": 1192 }, { "epoch": 0.29, "learning_rate": 1.9904508746682465e-05, "loss": 0.307, "step": 1194 }, { "epoch": 0.29, "learning_rate": 1.9903963958554474e-05, "loss": 0.3286, "step": 1196 }, { "epoch": 0.29, "learning_rate": 1.9903417628303565e-05, "loss": 0.3038, "step": 1198 }, { "epoch": 0.29, "learning_rate": 1.99028697560148e-05, "loss": 0.3197, "step": 1200 }, { "epoch": 0.29, "learning_rate": 1.990232034177349e-05, "loss": 0.3308, "step": 1202 }, { "epoch": 0.29, "learning_rate": 1.9901769385665185e-05, "loss": 0.2893, "step": 1204 }, { "epoch": 0.29, "learning_rate": 1.990121688777567e-05, "loss": 0.2965, "step": 1206 }, { "epoch": 0.29, "learning_rate": 1.9900662848190977e-05, "loss": 0.3098, "step": 1208 }, { "epoch": 0.29, "learning_rate": 1.9900107266997367e-05, "loss": 0.3128, "step": 1210 }, { "epoch": 0.3, "learning_rate": 1.9899550144281358e-05, "loss": 0.3294, "step": 1212 }, { "epoch": 0.3, "learning_rate": 1.9898991480129692e-05, "loss": 0.3076, "step": 1214 }, { "epoch": 0.3, "learning_rate": 1.9898431274629356e-05, "loss": 0.3294, "step": 1216 }, { "epoch": 0.3, "learning_rate": 1.9897869527867582e-05, "loss": 0.3314, "step": 1218 }, { "epoch": 0.3, "learning_rate": 1.9897306239931837e-05, "loss": 0.3265, "step": 1220 }, { "epoch": 0.3, "learning_rate": 1.989674141090983e-05, "loss": 0.3109, "step": 1222 }, { "epoch": 0.3, "learning_rate": 1.989617504088951e-05, "loss": 0.2841, "step": 1224 }, { "epoch": 0.3, "learning_rate": 1.9895607129959058e-05, "loss": 0.2836, "step": 1226 }, { "epoch": 0.3, "learning_rate": 1.989503767820691e-05, "loss": 0.2899, "step": 1228 }, { "epoch": 0.3, "learning_rate": 1.9894466685721734e-05, "loss": 0.3002, "step": 1230 }, { "epoch": 0.3, "learning_rate": 1.9893894152592433e-05, "loss": 0.3189, "step": 1232 }, { "epoch": 0.3, "learning_rate": 1.989332007890816e-05, "loss": 0.3337, "step": 1234 }, { "epoch": 0.3, "learning_rate": 1.9892744464758295e-05, "loss": 0.3012, "step": 1236 }, { "epoch": 0.3, "learning_rate": 1.9892167310232473e-05, "loss": 0.3007, "step": 1238 }, { "epoch": 0.3, "learning_rate": 1.9891588615420558e-05, "loss": 0.3122, "step": 1240 }, { "epoch": 0.3, "learning_rate": 1.989100838041266e-05, "loss": 0.3058, "step": 1242 }, { "epoch": 0.3, "learning_rate": 1.9890426605299125e-05, "loss": 0.3151, "step": 1244 }, { "epoch": 0.3, "learning_rate": 1.9889843290170535e-05, "loss": 0.2995, "step": 1246 }, { "epoch": 0.3, "learning_rate": 1.9889258435117723e-05, "loss": 0.3267, "step": 1248 }, { "epoch": 0.3, "learning_rate": 1.9888672040231753e-05, "loss": 0.3216, "step": 1250 }, { "epoch": 0.31, "learning_rate": 1.988808410560393e-05, "loss": 0.3212, "step": 1252 }, { "epoch": 0.31, "learning_rate": 1.9887494631325805e-05, "loss": 0.303, "step": 1254 }, { "epoch": 0.31, "learning_rate": 1.9886903617489156e-05, "loss": 0.3135, "step": 1256 }, { "epoch": 0.31, "learning_rate": 1.9886311064186012e-05, "loss": 0.3279, "step": 1258 }, { "epoch": 0.31, "learning_rate": 1.988571697150864e-05, "loss": 0.3013, "step": 1260 }, { "epoch": 0.31, "learning_rate": 1.988512133954954e-05, "loss": 0.3068, "step": 1262 }, { "epoch": 0.31, "learning_rate": 1.988452416840146e-05, "loss": 0.3294, "step": 1264 }, { "epoch": 0.31, "learning_rate": 1.9883925458157386e-05, "loss": 0.3274, "step": 1266 }, { "epoch": 0.31, "learning_rate": 1.9883325208910537e-05, "loss": 0.3023, "step": 1268 }, { "epoch": 0.31, "learning_rate": 1.988272342075438e-05, "loss": 0.327, "step": 1270 }, { "epoch": 0.31, "learning_rate": 1.9882120093782616e-05, "loss": 0.2948, "step": 1272 }, { "epoch": 0.31, "learning_rate": 1.9881515228089188e-05, "loss": 0.3254, "step": 1274 }, { "epoch": 0.31, "learning_rate": 1.988090882376828e-05, "loss": 0.2967, "step": 1276 }, { "epoch": 0.31, "learning_rate": 1.988030088091431e-05, "loss": 0.2976, "step": 1278 }, { "epoch": 0.31, "learning_rate": 1.987969139962194e-05, "loss": 0.2674, "step": 1280 }, { "epoch": 0.31, "learning_rate": 1.9879080379986074e-05, "loss": 0.3203, "step": 1282 }, { "epoch": 0.31, "learning_rate": 1.9878467822101853e-05, "loss": 0.2864, "step": 1284 }, { "epoch": 0.31, "learning_rate": 1.9877853726064655e-05, "loss": 0.327, "step": 1286 }, { "epoch": 0.31, "learning_rate": 1.98772380919701e-05, "loss": 0.3233, "step": 1288 }, { "epoch": 0.31, "learning_rate": 1.9876620919914044e-05, "loss": 0.3, "step": 1290 }, { "epoch": 0.31, "learning_rate": 1.9876002209992586e-05, "loss": 0.3088, "step": 1292 }, { "epoch": 0.32, "learning_rate": 1.987538196230207e-05, "loss": 0.2999, "step": 1294 }, { "epoch": 0.32, "learning_rate": 1.9874760176939066e-05, "loss": 0.3079, "step": 1296 }, { "epoch": 0.32, "learning_rate": 1.9874136854000398e-05, "loss": 0.3162, "step": 1298 }, { "epoch": 0.32, "learning_rate": 1.9873511993583114e-05, "loss": 0.3089, "step": 1300 }, { "epoch": 0.32, "learning_rate": 1.987288559578451e-05, "loss": 0.3147, "step": 1302 }, { "epoch": 0.32, "learning_rate": 1.9872257660702126e-05, "loss": 0.3282, "step": 1304 }, { "epoch": 0.32, "learning_rate": 1.987162818843374e-05, "loss": 0.3074, "step": 1306 }, { "epoch": 0.32, "learning_rate": 1.9870997179077353e-05, "loss": 0.3278, "step": 1308 }, { "epoch": 0.32, "learning_rate": 1.987036463273123e-05, "loss": 0.3033, "step": 1310 }, { "epoch": 0.32, "learning_rate": 1.9869730549493857e-05, "loss": 0.3363, "step": 1312 }, { "epoch": 0.32, "learning_rate": 1.9869094929463967e-05, "loss": 0.3086, "step": 1314 }, { "epoch": 0.32, "learning_rate": 1.986845777274053e-05, "loss": 0.3084, "step": 1316 }, { "epoch": 0.32, "learning_rate": 1.9867819079422758e-05, "loss": 0.3092, "step": 1318 }, { "epoch": 0.32, "learning_rate": 1.98671788496101e-05, "loss": 0.2931, "step": 1320 }, { "epoch": 0.32, "learning_rate": 1.9866537083402243e-05, "loss": 0.3154, "step": 1322 }, { "epoch": 0.32, "learning_rate": 1.986589378089912e-05, "loss": 0.2928, "step": 1324 }, { "epoch": 0.32, "learning_rate": 1.986524894220089e-05, "loss": 0.3224, "step": 1326 }, { "epoch": 0.32, "learning_rate": 1.9864602567407962e-05, "loss": 0.2886, "step": 1328 }, { "epoch": 0.32, "learning_rate": 1.9863954656620987e-05, "loss": 0.3036, "step": 1330 }, { "epoch": 0.32, "learning_rate": 1.9863305209940843e-05, "loss": 0.3011, "step": 1332 }, { "epoch": 0.33, "learning_rate": 1.986265422746866e-05, "loss": 0.34, "step": 1334 }, { "epoch": 0.33, "learning_rate": 1.9862001709305793e-05, "loss": 0.3024, "step": 1336 }, { "epoch": 0.33, "learning_rate": 1.9861347655553852e-05, "loss": 0.3307, "step": 1338 }, { "epoch": 0.33, "learning_rate": 1.9860692066314676e-05, "loss": 0.2974, "step": 1340 }, { "epoch": 0.33, "learning_rate": 1.9860034941690342e-05, "loss": 0.3023, "step": 1342 }, { "epoch": 0.33, "learning_rate": 1.9859376281783168e-05, "loss": 0.3131, "step": 1344 }, { "epoch": 0.33, "learning_rate": 1.9858716086695723e-05, "loss": 0.3038, "step": 1346 }, { "epoch": 0.33, "learning_rate": 1.985805435653079e-05, "loss": 0.3196, "step": 1348 }, { "epoch": 0.33, "learning_rate": 1.9857391091391415e-05, "loss": 0.3102, "step": 1350 }, { "epoch": 0.33, "learning_rate": 1.9856726291380872e-05, "loss": 0.307, "step": 1352 }, { "epoch": 0.33, "learning_rate": 1.9856059956602675e-05, "loss": 0.2894, "step": 1354 }, { "epoch": 0.33, "learning_rate": 1.9855392087160576e-05, "loss": 0.3243, "step": 1356 }, { "epoch": 0.33, "learning_rate": 1.985472268315857e-05, "loss": 0.3037, "step": 1358 }, { "epoch": 0.33, "learning_rate": 1.9854051744700884e-05, "loss": 0.3132, "step": 1360 }, { "epoch": 0.33, "learning_rate": 1.9853379271891994e-05, "loss": 0.2907, "step": 1362 }, { "epoch": 0.33, "learning_rate": 1.9852705264836602e-05, "loss": 0.328, "step": 1364 }, { "epoch": 0.33, "learning_rate": 1.9852029723639663e-05, "loss": 0.2926, "step": 1366 }, { "epoch": 0.33, "learning_rate": 1.985135264840636e-05, "loss": 0.2987, "step": 1368 }, { "epoch": 0.33, "learning_rate": 1.9850674039242117e-05, "loss": 0.3277, "step": 1370 }, { "epoch": 0.33, "learning_rate": 1.9849993896252604e-05, "loss": 0.2985, "step": 1372 }, { "epoch": 0.33, "learning_rate": 1.9849312219543723e-05, "loss": 0.3122, "step": 1374 }, { "epoch": 0.34, "learning_rate": 1.9848629009221615e-05, "loss": 0.3129, "step": 1376 }, { "epoch": 0.34, "learning_rate": 1.984794426539266e-05, "loss": 0.3185, "step": 1378 }, { "epoch": 0.34, "learning_rate": 1.984725798816348e-05, "loss": 0.2946, "step": 1380 }, { "epoch": 0.34, "learning_rate": 1.984657017764093e-05, "loss": 0.3036, "step": 1382 }, { "epoch": 0.34, "learning_rate": 1.9845880833932113e-05, "loss": 0.2939, "step": 1384 }, { "epoch": 0.34, "learning_rate": 1.9845189957144358e-05, "loss": 0.3104, "step": 1386 }, { "epoch": 0.34, "learning_rate": 1.984449754738525e-05, "loss": 0.3117, "step": 1388 }, { "epoch": 0.34, "learning_rate": 1.9843803604762594e-05, "loss": 0.3189, "step": 1390 }, { "epoch": 0.34, "learning_rate": 1.9843108129384444e-05, "loss": 0.316, "step": 1392 }, { "epoch": 0.34, "learning_rate": 1.984241112135909e-05, "loss": 0.3162, "step": 1394 }, { "epoch": 0.34, "learning_rate": 1.984171258079507e-05, "loss": 0.3082, "step": 1396 }, { "epoch": 0.34, "learning_rate": 1.9841012507801136e-05, "loss": 0.27, "step": 1398 }, { "epoch": 0.34, "learning_rate": 1.9840310902486308e-05, "loss": 0.292, "step": 1400 }, { "epoch": 0.34, "learning_rate": 1.9839607764959827e-05, "loss": 0.2984, "step": 1402 }, { "epoch": 0.34, "learning_rate": 1.9838903095331175e-05, "loss": 0.3186, "step": 1404 }, { "epoch": 0.34, "learning_rate": 1.983819689371008e-05, "loss": 0.3414, "step": 1406 }, { "epoch": 0.34, "learning_rate": 1.9837489160206495e-05, "loss": 0.3212, "step": 1408 }, { "epoch": 0.34, "learning_rate": 1.9836779894930623e-05, "loss": 0.28, "step": 1410 }, { "epoch": 0.34, "learning_rate": 1.9836069097992906e-05, "loss": 0.2762, "step": 1412 }, { "epoch": 0.34, "learning_rate": 1.983535676950402e-05, "loss": 0.3076, "step": 1414 }, { "epoch": 0.35, "learning_rate": 1.9834642909574875e-05, "loss": 0.324, "step": 1416 }, { "epoch": 0.35, "learning_rate": 1.9833927518316625e-05, "loss": 0.305, "step": 1418 }, { "epoch": 0.35, "learning_rate": 1.9833210595840667e-05, "loss": 0.2914, "step": 1420 }, { "epoch": 0.35, "learning_rate": 1.983249214225863e-05, "loss": 0.277, "step": 1422 }, { "epoch": 0.35, "learning_rate": 1.9831772157682375e-05, "loss": 0.302, "step": 1424 }, { "epoch": 0.35, "learning_rate": 1.9831050642224017e-05, "loss": 0.3169, "step": 1426 }, { "epoch": 0.35, "learning_rate": 1.9830327595995898e-05, "loss": 0.3141, "step": 1428 }, { "epoch": 0.35, "learning_rate": 1.9829603019110607e-05, "loss": 0.3157, "step": 1430 }, { "epoch": 0.35, "learning_rate": 1.9828876911680962e-05, "loss": 0.2737, "step": 1432 }, { "epoch": 0.35, "learning_rate": 1.9828149273820017e-05, "loss": 0.339, "step": 1434 }, { "epoch": 0.35, "learning_rate": 1.9827420105641086e-05, "loss": 0.3179, "step": 1436 }, { "epoch": 0.35, "learning_rate": 1.9826689407257694e-05, "loss": 0.3278, "step": 1438 }, { "epoch": 0.35, "learning_rate": 1.9825957178783622e-05, "loss": 0.3079, "step": 1440 }, { "epoch": 0.35, "learning_rate": 1.982522342033288e-05, "loss": 0.2928, "step": 1442 }, { "epoch": 0.35, "learning_rate": 1.9824488132019717e-05, "loss": 0.3072, "step": 1444 }, { "epoch": 0.35, "learning_rate": 1.9823751313958634e-05, "loss": 0.3153, "step": 1446 }, { "epoch": 0.35, "learning_rate": 1.9823012966264353e-05, "loss": 0.3207, "step": 1448 }, { "epoch": 0.35, "learning_rate": 1.9822273089051834e-05, "loss": 0.3214, "step": 1450 }, { "epoch": 0.35, "learning_rate": 1.9821531682436293e-05, "loss": 0.3116, "step": 1452 }, { "epoch": 0.35, "learning_rate": 1.9820788746533165e-05, "loss": 0.3103, "step": 1454 }, { "epoch": 0.35, "learning_rate": 1.9820044281458136e-05, "loss": 0.3026, "step": 1456 }, { "epoch": 0.36, "learning_rate": 1.9819298287327114e-05, "loss": 0.3033, "step": 1458 }, { "epoch": 0.36, "learning_rate": 1.9818550764256273e-05, "loss": 0.3103, "step": 1460 }, { "epoch": 0.36, "learning_rate": 1.981780171236199e-05, "loss": 0.2929, "step": 1462 }, { "epoch": 0.36, "learning_rate": 1.9817051131760915e-05, "loss": 0.3138, "step": 1464 }, { "epoch": 0.36, "learning_rate": 1.981629902256991e-05, "loss": 0.3056, "step": 1466 }, { "epoch": 0.36, "learning_rate": 1.9815545384906083e-05, "loss": 0.3235, "step": 1468 }, { "epoch": 0.36, "learning_rate": 1.9814790218886783e-05, "loss": 0.3198, "step": 1470 }, { "epoch": 0.36, "learning_rate": 1.98140335246296e-05, "loss": 0.3209, "step": 1472 }, { "epoch": 0.36, "learning_rate": 1.9813275302252347e-05, "loss": 0.3038, "step": 1474 }, { "epoch": 0.36, "learning_rate": 1.9812515551873093e-05, "loss": 0.3263, "step": 1476 }, { "epoch": 0.36, "learning_rate": 1.9811754273610138e-05, "loss": 0.3314, "step": 1478 }, { "epoch": 0.36, "learning_rate": 1.9810991467582013e-05, "loss": 0.2907, "step": 1480 }, { "epoch": 0.36, "learning_rate": 1.9810227133907492e-05, "loss": 0.3088, "step": 1482 }, { "epoch": 0.36, "learning_rate": 1.9809461272705595e-05, "loss": 0.3254, "step": 1484 }, { "epoch": 0.36, "learning_rate": 1.9808693884095568e-05, "loss": 0.3263, "step": 1486 }, { "epoch": 0.36, "learning_rate": 1.9807924968196897e-05, "loss": 0.3091, "step": 1488 }, { "epoch": 0.36, "learning_rate": 1.9807154525129314e-05, "loss": 0.3114, "step": 1490 }, { "epoch": 0.36, "learning_rate": 1.9806382555012777e-05, "loss": 0.2993, "step": 1492 }, { "epoch": 0.36, "learning_rate": 1.980560905796749e-05, "loss": 0.3116, "step": 1494 }, { "epoch": 0.36, "learning_rate": 1.9804834034113893e-05, "loss": 0.2834, "step": 1496 }, { "epoch": 0.37, "learning_rate": 1.9804057483572663e-05, "loss": 0.2976, "step": 1498 }, { "epoch": 0.37, "learning_rate": 1.9803279406464714e-05, "loss": 0.3019, "step": 1500 }, { "epoch": 0.37, "learning_rate": 1.98024998029112e-05, "loss": 0.3148, "step": 1502 }, { "epoch": 0.37, "learning_rate": 1.980171867303351e-05, "loss": 0.2935, "step": 1504 }, { "epoch": 0.37, "learning_rate": 1.9800936016953277e-05, "loss": 0.3229, "step": 1506 }, { "epoch": 0.37, "learning_rate": 1.9800151834792355e-05, "loss": 0.3158, "step": 1508 }, { "epoch": 0.37, "learning_rate": 1.9799366126672858e-05, "loss": 0.2974, "step": 1510 }, { "epoch": 0.37, "learning_rate": 1.979857889271712e-05, "loss": 0.2913, "step": 1512 }, { "epoch": 0.37, "learning_rate": 1.9797790133047722e-05, "loss": 0.2826, "step": 1514 }, { "epoch": 0.37, "learning_rate": 1.9796999847787485e-05, "loss": 0.2959, "step": 1516 }, { "epoch": 0.37, "learning_rate": 1.9796208037059454e-05, "loss": 0.327, "step": 1518 }, { "epoch": 0.37, "learning_rate": 1.9795414700986922e-05, "loss": 0.3133, "step": 1520 }, { "epoch": 0.37, "learning_rate": 1.9794619839693417e-05, "loss": 0.2951, "step": 1522 }, { "epoch": 0.37, "learning_rate": 1.9793823453302712e-05, "loss": 0.3277, "step": 1524 }, { "epoch": 0.37, "learning_rate": 1.9793025541938805e-05, "loss": 0.3191, "step": 1526 }, { "epoch": 0.37, "learning_rate": 1.9792226105725935e-05, "loss": 0.3309, "step": 1528 }, { "epoch": 0.37, "learning_rate": 1.9791425144788583e-05, "loss": 0.2971, "step": 1530 }, { "epoch": 0.37, "learning_rate": 1.9790622659251465e-05, "loss": 0.316, "step": 1532 }, { "epoch": 0.37, "learning_rate": 1.9789818649239533e-05, "loss": 0.3063, "step": 1534 }, { "epoch": 0.37, "learning_rate": 1.9789013114877978e-05, "loss": 0.3034, "step": 1536 }, { "epoch": 0.37, "learning_rate": 1.978820605629223e-05, "loss": 0.3228, "step": 1538 }, { "epoch": 0.38, "learning_rate": 1.9787397473607947e-05, "loss": 0.3186, "step": 1540 }, { "epoch": 0.38, "learning_rate": 1.978658736695104e-05, "loss": 0.3237, "step": 1542 }, { "epoch": 0.38, "learning_rate": 1.9785775736447644e-05, "loss": 0.2878, "step": 1544 }, { "epoch": 0.38, "learning_rate": 1.978496258222414e-05, "loss": 0.288, "step": 1546 }, { "epoch": 0.38, "learning_rate": 1.978414790440714e-05, "loss": 0.3085, "step": 1548 }, { "epoch": 0.38, "learning_rate": 1.978333170312349e-05, "loss": 0.3123, "step": 1550 }, { "epoch": 0.38, "learning_rate": 1.978251397850029e-05, "loss": 0.337, "step": 1552 }, { "epoch": 0.38, "learning_rate": 1.9781694730664855e-05, "loss": 0.2834, "step": 1554 }, { "epoch": 0.38, "learning_rate": 1.9780873959744754e-05, "loss": 0.3375, "step": 1556 }, { "epoch": 0.38, "learning_rate": 1.9780051665867792e-05, "loss": 0.3127, "step": 1558 }, { "epoch": 0.38, "learning_rate": 1.9779227849161998e-05, "loss": 0.3209, "step": 1560 }, { "epoch": 0.38, "learning_rate": 1.9778402509755645e-05, "loss": 0.3221, "step": 1562 }, { "epoch": 0.38, "learning_rate": 1.9777575647777256e-05, "loss": 0.3113, "step": 1564 }, { "epoch": 0.38, "learning_rate": 1.9776747263355566e-05, "loss": 0.3399, "step": 1566 }, { "epoch": 0.38, "learning_rate": 1.9775917356619575e-05, "loss": 0.3173, "step": 1568 }, { "epoch": 0.38, "learning_rate": 1.9775085927698496e-05, "loss": 0.3113, "step": 1570 }, { "epoch": 0.38, "learning_rate": 1.977425297672179e-05, "loss": 0.3062, "step": 1572 }, { "epoch": 0.38, "learning_rate": 1.9773418503819153e-05, "loss": 0.3285, "step": 1574 }, { "epoch": 0.38, "learning_rate": 1.9772582509120525e-05, "loss": 0.3211, "step": 1576 }, { "epoch": 0.38, "learning_rate": 1.977174499275607e-05, "loss": 0.3244, "step": 1578 }, { "epoch": 0.38, "learning_rate": 1.97709059548562e-05, "loss": 0.3001, "step": 1580 }, { "epoch": 0.39, "learning_rate": 1.9770065395551562e-05, "loss": 0.3094, "step": 1582 }, { "epoch": 0.39, "learning_rate": 1.9769223314973027e-05, "loss": 0.3091, "step": 1584 }, { "epoch": 0.39, "learning_rate": 1.9768379713251725e-05, "loss": 0.3053, "step": 1586 }, { "epoch": 0.39, "learning_rate": 1.9767534590519e-05, "loss": 0.3255, "step": 1588 }, { "epoch": 0.39, "learning_rate": 1.976668794690646e-05, "loss": 0.3164, "step": 1590 }, { "epoch": 0.39, "learning_rate": 1.9765839782545916e-05, "loss": 0.3025, "step": 1592 }, { "epoch": 0.39, "learning_rate": 1.9764990097569445e-05, "loss": 0.2943, "step": 1594 }, { "epoch": 0.39, "learning_rate": 1.9764138892109344e-05, "loss": 0.3292, "step": 1596 }, { "epoch": 0.39, "learning_rate": 1.9763286166298157e-05, "loss": 0.3143, "step": 1598 }, { "epoch": 0.39, "learning_rate": 1.976243192026866e-05, "loss": 0.2977, "step": 1600 }, { "epoch": 0.39, "learning_rate": 1.9761576154153855e-05, "loss": 0.3237, "step": 1602 }, { "epoch": 0.39, "learning_rate": 1.9760718868087006e-05, "loss": 0.3173, "step": 1604 }, { "epoch": 0.39, "learning_rate": 1.975986006220159e-05, "loss": 0.324, "step": 1606 }, { "epoch": 0.39, "learning_rate": 1.9758999736631336e-05, "loss": 0.3094, "step": 1608 }, { "epoch": 0.39, "learning_rate": 1.9758137891510194e-05, "loss": 0.3136, "step": 1610 }, { "epoch": 0.39, "learning_rate": 1.9757274526972367e-05, "loss": 0.2919, "step": 1612 }, { "epoch": 0.39, "learning_rate": 1.9756409643152287e-05, "loss": 0.2911, "step": 1614 }, { "epoch": 0.39, "learning_rate": 1.975554324018462e-05, "loss": 0.3079, "step": 1616 }, { "epoch": 0.39, "learning_rate": 1.9754675318204275e-05, "loss": 0.3119, "step": 1618 }, { "epoch": 0.39, "learning_rate": 1.975380587734639e-05, "loss": 0.294, "step": 1620 }, { "epoch": 0.4, "learning_rate": 1.9752934917746346e-05, "loss": 0.3157, "step": 1622 }, { "epoch": 0.4, "learning_rate": 1.975206243953976e-05, "loss": 0.3027, "step": 1624 }, { "epoch": 0.4, "learning_rate": 1.9751188442862485e-05, "loss": 0.2889, "step": 1626 }, { "epoch": 0.4, "learning_rate": 1.97503129278506e-05, "loss": 0.2995, "step": 1628 }, { "epoch": 0.4, "learning_rate": 1.9749435894640438e-05, "loss": 0.2985, "step": 1630 }, { "epoch": 0.4, "learning_rate": 1.9748557343368554e-05, "loss": 0.2941, "step": 1632 }, { "epoch": 0.4, "learning_rate": 1.9747677274171745e-05, "loss": 0.2984, "step": 1634 }, { "epoch": 0.4, "learning_rate": 1.9746795687187054e-05, "loss": 0.3193, "step": 1636 }, { "epoch": 0.4, "learning_rate": 1.974591258255174e-05, "loss": 0.3129, "step": 1638 }, { "epoch": 0.4, "learning_rate": 1.9745027960403312e-05, "loss": 0.3223, "step": 1640 }, { "epoch": 0.4, "learning_rate": 1.974414182087952e-05, "loss": 0.3171, "step": 1642 }, { "epoch": 0.4, "learning_rate": 1.974325416411833e-05, "loss": 0.3139, "step": 1644 }, { "epoch": 0.4, "learning_rate": 1.9742364990257966e-05, "loss": 0.3186, "step": 1646 }, { "epoch": 0.4, "learning_rate": 1.9741474299436877e-05, "loss": 0.3171, "step": 1648 }, { "epoch": 0.4, "learning_rate": 1.974058209179375e-05, "loss": 0.275, "step": 1650 }, { "epoch": 0.4, "learning_rate": 1.9739688367467508e-05, "loss": 0.2959, "step": 1652 }, { "epoch": 0.4, "learning_rate": 1.973879312659731e-05, "loss": 0.3034, "step": 1654 }, { "epoch": 0.4, "learning_rate": 1.9737896369322555e-05, "loss": 0.3057, "step": 1656 }, { "epoch": 0.4, "learning_rate": 1.973699809578287e-05, "loss": 0.3162, "step": 1658 }, { "epoch": 0.4, "learning_rate": 1.973609830611813e-05, "loss": 0.3393, "step": 1660 }, { "epoch": 0.4, "learning_rate": 1.9735197000468435e-05, "loss": 0.3355, "step": 1662 }, { "epoch": 0.41, "learning_rate": 1.9734294178974123e-05, "loss": 0.3039, "step": 1664 }, { "epoch": 0.41, "learning_rate": 1.9733389841775773e-05, "loss": 0.2998, "step": 1666 }, { "epoch": 0.41, "learning_rate": 1.9732483989014197e-05, "loss": 0.3127, "step": 1668 }, { "epoch": 0.41, "learning_rate": 1.9731576620830444e-05, "loss": 0.2932, "step": 1670 }, { "epoch": 0.41, "learning_rate": 1.9730667737365796e-05, "loss": 0.3171, "step": 1672 }, { "epoch": 0.41, "learning_rate": 1.9729757338761775e-05, "loss": 0.3, "step": 1674 }, { "epoch": 0.41, "learning_rate": 1.9728845425160137e-05, "loss": 0.3099, "step": 1676 }, { "epoch": 0.41, "learning_rate": 1.972793199670287e-05, "loss": 0.2798, "step": 1678 }, { "epoch": 0.41, "learning_rate": 1.9727017053532207e-05, "loss": 0.3084, "step": 1680 }, { "epoch": 0.41, "learning_rate": 1.972610059579061e-05, "loss": 0.3135, "step": 1682 }, { "epoch": 0.41, "learning_rate": 1.9725182623620777e-05, "loss": 0.3098, "step": 1684 }, { "epoch": 0.41, "learning_rate": 1.9724263137165648e-05, "loss": 0.2916, "step": 1686 }, { "epoch": 0.41, "learning_rate": 1.9723342136568385e-05, "loss": 0.319, "step": 1688 }, { "epoch": 0.41, "learning_rate": 1.9722419621972405e-05, "loss": 0.2902, "step": 1690 }, { "epoch": 0.41, "learning_rate": 1.9721495593521343e-05, "loss": 0.3347, "step": 1692 }, { "epoch": 0.41, "learning_rate": 1.9720570051359084e-05, "loss": 0.3115, "step": 1694 }, { "epoch": 0.41, "learning_rate": 1.9719642995629735e-05, "loss": 0.3027, "step": 1696 }, { "epoch": 0.41, "learning_rate": 1.9718714426477646e-05, "loss": 0.3016, "step": 1698 }, { "epoch": 0.41, "learning_rate": 1.9717784344047408e-05, "loss": 0.3083, "step": 1700 }, { "epoch": 0.41, "learning_rate": 1.971685274848384e-05, "loss": 0.3114, "step": 1702 }, { "epoch": 0.42, "learning_rate": 1.9715919639931996e-05, "loss": 0.3163, "step": 1704 }, { "epoch": 0.42, "learning_rate": 1.9714985018537173e-05, "loss": 0.3084, "step": 1706 }, { "epoch": 0.42, "learning_rate": 1.9714048884444894e-05, "loss": 0.3248, "step": 1708 }, { "epoch": 0.42, "learning_rate": 1.971311123780092e-05, "loss": 0.3078, "step": 1710 }, { "epoch": 0.42, "learning_rate": 1.971217207875126e-05, "loss": 0.3056, "step": 1712 }, { "epoch": 0.42, "learning_rate": 1.9711231407442136e-05, "loss": 0.3068, "step": 1714 }, { "epoch": 0.42, "learning_rate": 1.9710289224020028e-05, "loss": 0.3245, "step": 1716 }, { "epoch": 0.42, "learning_rate": 1.9709345528631637e-05, "loss": 0.2868, "step": 1718 }, { "epoch": 0.42, "learning_rate": 1.9708400321423897e-05, "loss": 0.3025, "step": 1720 }, { "epoch": 0.42, "learning_rate": 1.9707453602543994e-05, "loss": 0.2995, "step": 1722 }, { "epoch": 0.42, "learning_rate": 1.9706505372139337e-05, "loss": 0.3032, "step": 1724 }, { "epoch": 0.42, "learning_rate": 1.970555563035757e-05, "loss": 0.3164, "step": 1726 }, { "epoch": 0.42, "learning_rate": 1.9704604377346577e-05, "loss": 0.2949, "step": 1728 }, { "epoch": 0.42, "learning_rate": 1.9703651613254475e-05, "loss": 0.318, "step": 1730 }, { "epoch": 0.42, "learning_rate": 1.9702697338229613e-05, "loss": 0.3144, "step": 1732 }, { "epoch": 0.42, "learning_rate": 1.9701741552420587e-05, "loss": 0.2944, "step": 1734 }, { "epoch": 0.42, "learning_rate": 1.9700784255976212e-05, "loss": 0.3185, "step": 1736 }, { "epoch": 0.42, "learning_rate": 1.9699825449045548e-05, "loss": 0.3026, "step": 1738 }, { "epoch": 0.42, "learning_rate": 1.9698865131777896e-05, "loss": 0.3052, "step": 1740 }, { "epoch": 0.42, "learning_rate": 1.9697903304322775e-05, "loss": 0.314, "step": 1742 }, { "epoch": 0.42, "learning_rate": 1.9696939966829957e-05, "loss": 0.3105, "step": 1744 }, { "epoch": 0.43, "learning_rate": 1.9695975119449434e-05, "loss": 0.3298, "step": 1746 }, { "epoch": 0.43, "learning_rate": 1.9695008762331444e-05, "loss": 0.3048, "step": 1748 }, { "epoch": 0.43, "learning_rate": 1.9694040895626452e-05, "loss": 0.2939, "step": 1750 }, { "epoch": 0.43, "learning_rate": 1.969307151948517e-05, "loss": 0.3049, "step": 1752 }, { "epoch": 0.43, "learning_rate": 1.969210063405853e-05, "loss": 0.2966, "step": 1754 }, { "epoch": 0.43, "learning_rate": 1.969112823949771e-05, "loss": 0.3224, "step": 1756 }, { "epoch": 0.43, "learning_rate": 1.969015433595412e-05, "loss": 0.3018, "step": 1758 }, { "epoch": 0.43, "learning_rate": 1.9689178923579404e-05, "loss": 0.2984, "step": 1760 }, { "epoch": 0.43, "learning_rate": 1.9688202002525437e-05, "loss": 0.2696, "step": 1762 }, { "epoch": 0.43, "learning_rate": 1.9687223572944337e-05, "loss": 0.2899, "step": 1764 }, { "epoch": 0.43, "learning_rate": 1.968624363498845e-05, "loss": 0.3124, "step": 1766 }, { "epoch": 0.43, "learning_rate": 1.9685262188810365e-05, "loss": 0.3092, "step": 1768 }, { "epoch": 0.43, "learning_rate": 1.9684279234562894e-05, "loss": 0.3199, "step": 1770 }, { "epoch": 0.43, "learning_rate": 1.9683294772399094e-05, "loss": 0.3017, "step": 1772 }, { "epoch": 0.43, "learning_rate": 1.9682308802472256e-05, "loss": 0.3229, "step": 1774 }, { "epoch": 0.43, "learning_rate": 1.9681321324935897e-05, "loss": 0.3124, "step": 1776 }, { "epoch": 0.43, "learning_rate": 1.968033233994378e-05, "loss": 0.3186, "step": 1778 }, { "epoch": 0.43, "learning_rate": 1.9679341847649894e-05, "loss": 0.3194, "step": 1780 }, { "epoch": 0.43, "learning_rate": 1.967834984820847e-05, "loss": 0.3029, "step": 1782 }, { "epoch": 0.43, "learning_rate": 1.9677356341773968e-05, "loss": 0.314, "step": 1784 }, { "epoch": 0.44, "learning_rate": 1.967636132850108e-05, "loss": 0.2938, "step": 1786 }, { "epoch": 0.44, "learning_rate": 1.9675364808544745e-05, "loss": 0.3223, "step": 1788 }, { "epoch": 0.44, "learning_rate": 1.9674366782060126e-05, "loss": 0.3082, "step": 1790 }, { "epoch": 0.44, "learning_rate": 1.9673367249202623e-05, "loss": 0.309, "step": 1792 }, { "epoch": 0.44, "learning_rate": 1.9672366210127863e-05, "loss": 0.3034, "step": 1794 }, { "epoch": 0.44, "learning_rate": 1.9671363664991728e-05, "loss": 0.2849, "step": 1796 }, { "epoch": 0.44, "learning_rate": 1.967035961395032e-05, "loss": 0.2915, "step": 1798 }, { "epoch": 0.44, "learning_rate": 1.966935405715997e-05, "loss": 0.3152, "step": 1800 }, { "epoch": 0.44, "learning_rate": 1.9668346994777257e-05, "loss": 0.3072, "step": 1802 }, { "epoch": 0.44, "learning_rate": 1.9667338426958986e-05, "loss": 0.3013, "step": 1804 }, { "epoch": 0.44, "learning_rate": 1.96663283538622e-05, "loss": 0.303, "step": 1806 }, { "epoch": 0.44, "learning_rate": 1.9665316775644174e-05, "loss": 0.2886, "step": 1808 }, { "epoch": 0.44, "learning_rate": 1.9664303692462417e-05, "loss": 0.3094, "step": 1810 }, { "epoch": 0.44, "learning_rate": 1.9663289104474675e-05, "loss": 0.3039, "step": 1812 }, { "epoch": 0.44, "learning_rate": 1.966227301183893e-05, "loss": 0.3065, "step": 1814 }, { "epoch": 0.44, "learning_rate": 1.9661255414713394e-05, "loss": 0.3259, "step": 1816 }, { "epoch": 0.44, "learning_rate": 1.9660236313256508e-05, "loss": 0.3194, "step": 1818 }, { "epoch": 0.44, "learning_rate": 1.9659215707626968e-05, "loss": 0.3159, "step": 1820 }, { "epoch": 0.44, "learning_rate": 1.9658193597983673e-05, "loss": 0.3046, "step": 1822 }, { "epoch": 0.44, "learning_rate": 1.9657169984485785e-05, "loss": 0.3013, "step": 1824 }, { "epoch": 0.44, "learning_rate": 1.965614486729269e-05, "loss": 0.3137, "step": 1826 }, { "epoch": 0.45, "learning_rate": 1.9655118246563996e-05, "loss": 0.2932, "step": 1828 }, { "epoch": 0.45, "learning_rate": 1.9654090122459566e-05, "loss": 0.3086, "step": 1830 }, { "epoch": 0.45, "learning_rate": 1.965306049513948e-05, "loss": 0.2965, "step": 1832 }, { "epoch": 0.45, "learning_rate": 1.965202936476406e-05, "loss": 0.297, "step": 1834 }, { "epoch": 0.45, "learning_rate": 1.9650996731493866e-05, "loss": 0.308, "step": 1836 }, { "epoch": 0.45, "learning_rate": 1.964996259548968e-05, "loss": 0.3188, "step": 1838 }, { "epoch": 0.45, "learning_rate": 1.964892695691253e-05, "loss": 0.309, "step": 1840 }, { "epoch": 0.45, "learning_rate": 1.9647889815923672e-05, "loss": 0.3366, "step": 1842 }, { "epoch": 0.45, "learning_rate": 1.9646851172684593e-05, "loss": 0.2949, "step": 1844 }, { "epoch": 0.45, "learning_rate": 1.9645811027357017e-05, "loss": 0.3055, "step": 1846 }, { "epoch": 0.45, "learning_rate": 1.9644769380102912e-05, "loss": 0.2972, "step": 1848 }, { "epoch": 0.45, "learning_rate": 1.964372623108446e-05, "loss": 0.3134, "step": 1850 }, { "epoch": 0.45, "learning_rate": 1.9642681580464095e-05, "loss": 0.2692, "step": 1852 }, { "epoch": 0.45, "learning_rate": 1.9641635428404475e-05, "loss": 0.3146, "step": 1854 }, { "epoch": 0.45, "learning_rate": 1.964058777506849e-05, "loss": 0.3048, "step": 1856 }, { "epoch": 0.45, "learning_rate": 1.9639538620619266e-05, "loss": 0.3155, "step": 1858 }, { "epoch": 0.45, "learning_rate": 1.9638487965220176e-05, "loss": 0.3291, "step": 1860 }, { "epoch": 0.45, "learning_rate": 1.96374358090348e-05, "loss": 0.2986, "step": 1862 }, { "epoch": 0.45, "learning_rate": 1.963638215222698e-05, "loss": 0.2878, "step": 1864 }, { "epoch": 0.45, "learning_rate": 1.9635326994960773e-05, "loss": 0.3019, "step": 1866 }, { "epoch": 0.46, "learning_rate": 1.9634270337400474e-05, "loss": 0.2835, "step": 1868 }, { "epoch": 0.46, "learning_rate": 1.9633212179710613e-05, "loss": 0.2899, "step": 1870 }, { "epoch": 0.46, "learning_rate": 1.9632152522055953e-05, "loss": 0.2793, "step": 1872 }, { "epoch": 0.46, "learning_rate": 1.9631091364601495e-05, "loss": 0.29, "step": 1874 }, { "epoch": 0.46, "learning_rate": 1.9630028707512462e-05, "loss": 0.3209, "step": 1876 }, { "epoch": 0.46, "learning_rate": 1.9628964550954325e-05, "loss": 0.3049, "step": 1878 }, { "epoch": 0.46, "learning_rate": 1.9627898895092773e-05, "loss": 0.3086, "step": 1880 }, { "epoch": 0.46, "learning_rate": 1.9626831740093745e-05, "loss": 0.3065, "step": 1882 }, { "epoch": 0.46, "learning_rate": 1.9625763086123403e-05, "loss": 0.3178, "step": 1884 }, { "epoch": 0.46, "learning_rate": 1.9624692933348142e-05, "loss": 0.3102, "step": 1886 }, { "epoch": 0.46, "learning_rate": 1.96236212819346e-05, "loss": 0.2425, "step": 1888 }, { "epoch": 0.46, "learning_rate": 1.962254813204963e-05, "loss": 0.2794, "step": 1890 }, { "epoch": 0.46, "learning_rate": 1.9621473483860335e-05, "loss": 0.3106, "step": 1892 }, { "epoch": 0.46, "learning_rate": 1.962039733753405e-05, "loss": 0.3126, "step": 1894 }, { "epoch": 0.46, "learning_rate": 1.9619319693238333e-05, "loss": 0.3055, "step": 1896 }, { "epoch": 0.46, "learning_rate": 1.9618240551140985e-05, "loss": 0.2972, "step": 1898 }, { "epoch": 0.46, "learning_rate": 1.9617159911410037e-05, "loss": 0.2988, "step": 1900 }, { "epoch": 0.46, "learning_rate": 1.961607777421375e-05, "loss": 0.2913, "step": 1902 }, { "epoch": 0.46, "learning_rate": 1.9614994139720624e-05, "loss": 0.3034, "step": 1904 }, { "epoch": 0.46, "learning_rate": 1.9613909008099387e-05, "loss": 0.3074, "step": 1906 }, { "epoch": 0.46, "learning_rate": 1.9612822379519004e-05, "loss": 0.3059, "step": 1908 }, { "epoch": 0.47, "learning_rate": 1.961173425414867e-05, "loss": 0.3056, "step": 1910 }, { "epoch": 0.47, "learning_rate": 1.9610644632157814e-05, "loss": 0.2983, "step": 1912 }, { "epoch": 0.47, "learning_rate": 1.96095535137161e-05, "loss": 0.2777, "step": 1914 }, { "epoch": 0.47, "learning_rate": 1.9608460898993425e-05, "loss": 0.2874, "step": 1916 }, { "epoch": 0.47, "learning_rate": 1.9607366788159914e-05, "loss": 0.2735, "step": 1918 }, { "epoch": 0.47, "learning_rate": 1.960627118138593e-05, "loss": 0.2974, "step": 1920 }, { "epoch": 0.47, "learning_rate": 1.960517407884207e-05, "loss": 0.3067, "step": 1922 }, { "epoch": 0.47, "learning_rate": 1.9604075480699155e-05, "loss": 0.2966, "step": 1924 }, { "epoch": 0.47, "learning_rate": 1.960297538712825e-05, "loss": 0.2996, "step": 1926 }, { "epoch": 0.47, "learning_rate": 1.9601873798300647e-05, "loss": 0.2881, "step": 1928 }, { "epoch": 0.47, "learning_rate": 1.960077071438787e-05, "loss": 0.2806, "step": 1930 }, { "epoch": 0.47, "learning_rate": 1.959966613556168e-05, "loss": 0.2916, "step": 1932 }, { "epoch": 0.47, "learning_rate": 1.9598560061994067e-05, "loss": 0.3156, "step": 1934 }, { "epoch": 0.47, "learning_rate": 1.9597452493857253e-05, "loss": 0.3111, "step": 1936 }, { "epoch": 0.47, "learning_rate": 1.95963434313237e-05, "loss": 0.3016, "step": 1938 }, { "epoch": 0.47, "learning_rate": 1.9595232874566098e-05, "loss": 0.3144, "step": 1940 }, { "epoch": 0.47, "learning_rate": 1.959412082375736e-05, "loss": 0.3074, "step": 1942 }, { "epoch": 0.47, "learning_rate": 1.9593007279070654e-05, "loss": 0.3026, "step": 1944 }, { "epoch": 0.47, "learning_rate": 1.9591892240679353e-05, "loss": 0.2951, "step": 1946 }, { "epoch": 0.47, "learning_rate": 1.959077570875709e-05, "loss": 0.3124, "step": 1948 }, { "epoch": 0.48, "learning_rate": 1.9589657683477708e-05, "loss": 0.2941, "step": 1950 }, { "epoch": 0.48, "learning_rate": 1.9588538165015297e-05, "loss": 0.298, "step": 1952 }, { "epoch": 0.48, "learning_rate": 1.958741715354417e-05, "loss": 0.3097, "step": 1954 }, { "epoch": 0.48, "learning_rate": 1.9586294649238885e-05, "loss": 0.2838, "step": 1956 }, { "epoch": 0.48, "learning_rate": 1.958517065227422e-05, "loss": 0.3099, "step": 1958 }, { "epoch": 0.48, "learning_rate": 1.9584045162825188e-05, "loss": 0.3099, "step": 1960 }, { "epoch": 0.48, "learning_rate": 1.9582918181067043e-05, "loss": 0.2977, "step": 1962 }, { "epoch": 0.48, "learning_rate": 1.9581789707175254e-05, "loss": 0.3225, "step": 1964 }, { "epoch": 0.48, "learning_rate": 1.9580659741325544e-05, "loss": 0.3154, "step": 1966 }, { "epoch": 0.48, "learning_rate": 1.9579528283693846e-05, "loss": 0.303, "step": 1968 }, { "epoch": 0.48, "learning_rate": 1.9578395334456348e-05, "loss": 0.2989, "step": 1970 }, { "epoch": 0.48, "learning_rate": 1.957726089378945e-05, "loss": 0.3123, "step": 1972 }, { "epoch": 0.48, "learning_rate": 1.95761249618698e-05, "loss": 0.3198, "step": 1974 }, { "epoch": 0.48, "learning_rate": 1.957498753887427e-05, "loss": 0.2754, "step": 1976 }, { "epoch": 0.48, "learning_rate": 1.957384862497996e-05, "loss": 0.2932, "step": 1978 }, { "epoch": 0.48, "learning_rate": 1.9572708220364212e-05, "loss": 0.3162, "step": 1980 }, { "epoch": 0.48, "learning_rate": 1.9571566325204593e-05, "loss": 0.31, "step": 1982 }, { "epoch": 0.48, "learning_rate": 1.957042293967891e-05, "loss": 0.3051, "step": 1984 }, { "epoch": 0.48, "learning_rate": 1.9569278063965193e-05, "loss": 0.2821, "step": 1986 }, { "epoch": 0.48, "learning_rate": 1.9568131698241708e-05, "loss": 0.3124, "step": 1988 }, { "epoch": 0.48, "learning_rate": 1.9566983842686954e-05, "loss": 0.2982, "step": 1990 }, { "epoch": 0.49, "learning_rate": 1.956583449747966e-05, "loss": 0.3054, "step": 1992 }, { "epoch": 0.49, "learning_rate": 1.9564683662798788e-05, "loss": 0.3064, "step": 1994 }, { "epoch": 0.49, "learning_rate": 1.9563531338823528e-05, "loss": 0.2784, "step": 1996 }, { "epoch": 0.49, "learning_rate": 1.9562377525733314e-05, "loss": 0.297, "step": 1998 }, { "epoch": 0.49, "learning_rate": 1.95612222237078e-05, "loss": 0.279, "step": 2000 }, { "epoch": 0.49, "learning_rate": 1.956006543292687e-05, "loss": 0.2912, "step": 2002 }, { "epoch": 0.49, "learning_rate": 1.9558907153570654e-05, "loss": 0.324, "step": 2004 }, { "epoch": 0.49, "learning_rate": 1.95577473858195e-05, "loss": 0.2992, "step": 2006 }, { "epoch": 0.49, "learning_rate": 1.9556586129853993e-05, "loss": 0.2856, "step": 2008 }, { "epoch": 0.49, "learning_rate": 1.9555423385854948e-05, "loss": 0.2596, "step": 2010 }, { "epoch": 0.49, "learning_rate": 1.9554259154003415e-05, "loss": 0.2967, "step": 2012 }, { "epoch": 0.49, "learning_rate": 1.9553093434480673e-05, "loss": 0.3143, "step": 2014 }, { "epoch": 0.49, "learning_rate": 1.9551926227468238e-05, "loss": 0.3187, "step": 2016 }, { "epoch": 0.49, "learning_rate": 1.9550757533147845e-05, "loss": 0.3061, "step": 2018 }, { "epoch": 0.49, "learning_rate": 1.9549587351701474e-05, "loss": 0.3104, "step": 2020 }, { "epoch": 0.49, "learning_rate": 1.954841568331133e-05, "loss": 0.3155, "step": 2022 }, { "epoch": 0.49, "learning_rate": 1.9547242528159852e-05, "loss": 0.2866, "step": 2024 }, { "epoch": 0.49, "learning_rate": 1.9546067886429705e-05, "loss": 0.3206, "step": 2026 }, { "epoch": 0.49, "learning_rate": 1.9544891758303795e-05, "loss": 0.3331, "step": 2028 }, { "epoch": 0.49, "learning_rate": 1.9543714143965252e-05, "loss": 0.31, "step": 2030 }, { "epoch": 0.5, "learning_rate": 1.954253504359744e-05, "loss": 0.3137, "step": 2032 }, { "epoch": 0.5, "learning_rate": 1.954135445738395e-05, "loss": 0.3016, "step": 2034 }, { "epoch": 0.5, "learning_rate": 1.9540172385508614e-05, "loss": 0.2827, "step": 2036 }, { "epoch": 0.5, "learning_rate": 1.9538988828155484e-05, "loss": 0.32, "step": 2038 }, { "epoch": 0.5, "learning_rate": 1.9537803785508854e-05, "loss": 0.303, "step": 2040 }, { "epoch": 0.5, "learning_rate": 1.9536617257753246e-05, "loss": 0.3173, "step": 2042 }, { "epoch": 0.5, "learning_rate": 1.9535429245073407e-05, "loss": 0.3248, "step": 2044 }, { "epoch": 0.5, "learning_rate": 1.9534239747654316e-05, "loss": 0.3261, "step": 2046 }, { "epoch": 0.5, "learning_rate": 1.953304876568119e-05, "loss": 0.3067, "step": 2048 }, { "epoch": 0.5, "learning_rate": 1.9531856299339483e-05, "loss": 0.2753, "step": 2050 }, { "epoch": 0.5, "learning_rate": 1.9530662348814858e-05, "loss": 0.3137, "step": 2052 }, { "epoch": 0.5, "learning_rate": 1.952946691429323e-05, "loss": 0.3059, "step": 2054 }, { "epoch": 0.5, "learning_rate": 1.9528269995960732e-05, "loss": 0.2983, "step": 2056 }, { "epoch": 0.5, "learning_rate": 1.952707159400374e-05, "loss": 0.2921, "step": 2058 }, { "epoch": 0.5, "learning_rate": 1.9525871708608847e-05, "loss": 0.3104, "step": 2060 }, { "epoch": 0.5, "learning_rate": 1.952467033996289e-05, "loss": 0.2975, "step": 2062 }, { "epoch": 0.5, "learning_rate": 1.952346748825293e-05, "loss": 0.2984, "step": 2064 }, { "epoch": 0.5, "learning_rate": 1.9522263153666257e-05, "loss": 0.2981, "step": 2066 }, { "epoch": 0.5, "learning_rate": 1.9521057336390398e-05, "loss": 0.3075, "step": 2068 }, { "epoch": 0.5, "learning_rate": 1.951985003661311e-05, "loss": 0.308, "step": 2070 }, { "epoch": 0.5, "learning_rate": 1.9518641254522374e-05, "loss": 0.3135, "step": 2072 }, { "epoch": 0.51, "learning_rate": 1.951743099030641e-05, "loss": 0.3081, "step": 2074 }, { "epoch": 0.51, "learning_rate": 1.9516219244153666e-05, "loss": 0.2991, "step": 2076 }, { "epoch": 0.51, "learning_rate": 1.9515006016252816e-05, "loss": 0.2934, "step": 2078 }, { "epoch": 0.51, "learning_rate": 1.951379130679277e-05, "loss": 0.3083, "step": 2080 }, { "epoch": 0.51, "learning_rate": 1.951257511596267e-05, "loss": 0.3318, "step": 2082 }, { "epoch": 0.51, "learning_rate": 1.9511357443951885e-05, "loss": 0.3098, "step": 2084 }, { "epoch": 0.51, "learning_rate": 1.951013829095002e-05, "loss": 0.3099, "step": 2086 }, { "epoch": 0.51, "learning_rate": 1.95089176571469e-05, "loss": 0.3306, "step": 2088 }, { "epoch": 0.51, "learning_rate": 1.950769554273259e-05, "loss": 0.2969, "step": 2090 }, { "epoch": 0.51, "learning_rate": 1.9506471947897378e-05, "loss": 0.2976, "step": 2092 }, { "epoch": 0.51, "learning_rate": 1.9505246872831792e-05, "loss": 0.3151, "step": 2094 }, { "epoch": 0.51, "learning_rate": 1.9504020317726585e-05, "loss": 0.3159, "step": 2096 }, { "epoch": 0.51, "learning_rate": 1.950279228277274e-05, "loss": 0.3117, "step": 2098 }, { "epoch": 0.51, "learning_rate": 1.9501562768161477e-05, "loss": 0.2997, "step": 2100 }, { "epoch": 0.51, "learning_rate": 1.9500331774084232e-05, "loss": 0.2662, "step": 2102 }, { "epoch": 0.51, "learning_rate": 1.9499099300732687e-05, "loss": 0.3126, "step": 2104 }, { "epoch": 0.51, "learning_rate": 1.9497865348298738e-05, "loss": 0.3011, "step": 2106 }, { "epoch": 0.51, "learning_rate": 1.9496629916974533e-05, "loss": 0.3095, "step": 2108 }, { "epoch": 0.51, "learning_rate": 1.9495393006952433e-05, "loss": 0.3251, "step": 2110 }, { "epoch": 0.51, "learning_rate": 1.949415461842503e-05, "loss": 0.3057, "step": 2112 }, { "epoch": 0.52, "learning_rate": 1.9492914751585157e-05, "loss": 0.3205, "step": 2114 }, { "epoch": 0.52, "learning_rate": 1.949167340662587e-05, "loss": 0.2909, "step": 2116 }, { "epoch": 0.52, "learning_rate": 1.949043058374045e-05, "loss": 0.3098, "step": 2118 }, { "epoch": 0.52, "learning_rate": 1.9489186283122417e-05, "loss": 0.2958, "step": 2120 }, { "epoch": 0.52, "learning_rate": 1.9487940504965525e-05, "loss": 0.3054, "step": 2122 }, { "epoch": 0.52, "learning_rate": 1.948669324946374e-05, "loss": 0.3021, "step": 2124 }, { "epoch": 0.52, "learning_rate": 1.948544451681128e-05, "loss": 0.2918, "step": 2126 }, { "epoch": 0.52, "learning_rate": 1.9484194307202573e-05, "loss": 0.2922, "step": 2128 }, { "epoch": 0.52, "learning_rate": 1.948294262083229e-05, "loss": 0.3322, "step": 2130 }, { "epoch": 0.52, "learning_rate": 1.948168945789533e-05, "loss": 0.311, "step": 2132 }, { "epoch": 0.52, "learning_rate": 1.9480434818586817e-05, "loss": 0.3313, "step": 2134 }, { "epoch": 0.52, "learning_rate": 1.9479178703102108e-05, "loss": 0.2972, "step": 2136 }, { "epoch": 0.52, "learning_rate": 1.9477921111636794e-05, "loss": 0.3031, "step": 2138 }, { "epoch": 0.52, "learning_rate": 1.9476662044386687e-05, "loss": 0.2851, "step": 2140 }, { "epoch": 0.52, "learning_rate": 1.9475401501547835e-05, "loss": 0.2924, "step": 2142 }, { "epoch": 0.52, "learning_rate": 1.9474139483316512e-05, "loss": 0.2908, "step": 2144 }, { "epoch": 0.52, "learning_rate": 1.9472875989889227e-05, "loss": 0.2872, "step": 2146 }, { "epoch": 0.52, "learning_rate": 1.947161102146272e-05, "loss": 0.297, "step": 2148 }, { "epoch": 0.52, "learning_rate": 1.9470344578233942e-05, "loss": 0.2732, "step": 2150 }, { "epoch": 0.52, "learning_rate": 1.9469076660400104e-05, "loss": 0.3101, "step": 2152 }, { "epoch": 0.52, "learning_rate": 1.946780726815862e-05, "loss": 0.3093, "step": 2154 }, { "epoch": 0.53, "learning_rate": 1.9466536401707146e-05, "loss": 0.3066, "step": 2156 }, { "epoch": 0.53, "learning_rate": 1.946526406124357e-05, "loss": 0.3087, "step": 2158 }, { "epoch": 0.53, "learning_rate": 1.9463990246966e-05, "loss": 0.3039, "step": 2160 }, { "epoch": 0.53, "learning_rate": 1.946271495907278e-05, "loss": 0.3063, "step": 2162 }, { "epoch": 0.53, "learning_rate": 1.9461438197762485e-05, "loss": 0.2598, "step": 2164 }, { "epoch": 0.53, "learning_rate": 1.9460159963233916e-05, "loss": 0.3244, "step": 2166 }, { "epoch": 0.53, "learning_rate": 1.94588802556861e-05, "loss": 0.2994, "step": 2168 }, { "epoch": 0.53, "learning_rate": 1.9457599075318297e-05, "loss": 0.3135, "step": 2170 }, { "epoch": 0.53, "learning_rate": 1.9456316422330003e-05, "loss": 0.3108, "step": 2172 }, { "epoch": 0.53, "learning_rate": 1.9455032296920932e-05, "loss": 0.3212, "step": 2174 }, { "epoch": 0.53, "learning_rate": 1.945374669929103e-05, "loss": 0.2943, "step": 2176 }, { "epoch": 0.53, "learning_rate": 1.945245962964048e-05, "loss": 0.2851, "step": 2178 }, { "epoch": 0.53, "learning_rate": 1.9451171088169684e-05, "loss": 0.292, "step": 2180 }, { "epoch": 0.53, "learning_rate": 1.9449881075079282e-05, "loss": 0.2911, "step": 2182 }, { "epoch": 0.53, "learning_rate": 1.9448589590570136e-05, "loss": 0.3014, "step": 2184 }, { "epoch": 0.53, "learning_rate": 1.944729663484334e-05, "loss": 0.3048, "step": 2186 }, { "epoch": 0.53, "learning_rate": 1.944600220810022e-05, "loss": 0.281, "step": 2188 }, { "epoch": 0.53, "learning_rate": 1.9444706310542326e-05, "loss": 0.3162, "step": 2190 }, { "epoch": 0.53, "learning_rate": 1.9443408942371435e-05, "loss": 0.3266, "step": 2192 }, { "epoch": 0.53, "learning_rate": 1.944211010378956e-05, "loss": 0.3273, "step": 2194 }, { "epoch": 0.54, "learning_rate": 1.9440809794998947e-05, "loss": 0.2907, "step": 2196 }, { "epoch": 0.54, "learning_rate": 1.9439508016202057e-05, "loss": 0.3187, "step": 2198 }, { "epoch": 0.54, "learning_rate": 1.9438204767601586e-05, "loss": 0.2982, "step": 2200 }, { "epoch": 0.54, "learning_rate": 1.9436900049400466e-05, "loss": 0.305, "step": 2202 }, { "epoch": 0.54, "learning_rate": 1.9435593861801846e-05, "loss": 0.2946, "step": 2204 }, { "epoch": 0.54, "learning_rate": 1.943428620500911e-05, "loss": 0.3075, "step": 2206 }, { "epoch": 0.54, "learning_rate": 1.943297707922587e-05, "loss": 0.2992, "step": 2208 }, { "epoch": 0.54, "learning_rate": 1.943166648465597e-05, "loss": 0.2891, "step": 2210 }, { "epoch": 0.54, "learning_rate": 1.9430354421503476e-05, "loss": 0.2712, "step": 2212 }, { "epoch": 0.54, "learning_rate": 1.942904088997269e-05, "loss": 0.2869, "step": 2214 }, { "epoch": 0.54, "learning_rate": 1.9427725890268133e-05, "loss": 0.2955, "step": 2216 }, { "epoch": 0.54, "learning_rate": 1.942640942259457e-05, "loss": 0.2933, "step": 2218 }, { "epoch": 0.54, "learning_rate": 1.9425091487156976e-05, "loss": 0.3002, "step": 2220 }, { "epoch": 0.54, "learning_rate": 1.9423772084160565e-05, "loss": 0.3053, "step": 2222 }, { "epoch": 0.54, "learning_rate": 1.9422451213810786e-05, "loss": 0.3091, "step": 2224 }, { "epoch": 0.54, "learning_rate": 1.9421128876313297e-05, "loss": 0.3076, "step": 2226 }, { "epoch": 0.54, "learning_rate": 1.941980507187401e-05, "loss": 0.2878, "step": 2228 }, { "epoch": 0.54, "learning_rate": 1.9418479800699035e-05, "loss": 0.3039, "step": 2230 }, { "epoch": 0.54, "learning_rate": 1.9417153062994737e-05, "loss": 0.3059, "step": 2232 }, { "epoch": 0.54, "learning_rate": 1.94158248589677e-05, "loss": 0.2913, "step": 2234 }, { "epoch": 0.54, "learning_rate": 1.941449518882473e-05, "loss": 0.2824, "step": 2236 }, { "epoch": 0.55, "learning_rate": 1.941316405277287e-05, "loss": 0.3135, "step": 2238 }, { "epoch": 0.55, "learning_rate": 1.941183145101939e-05, "loss": 0.2874, "step": 2240 }, { "epoch": 0.55, "learning_rate": 1.9410497383771782e-05, "loss": 0.3155, "step": 2242 }, { "epoch": 0.55, "learning_rate": 1.9409161851237775e-05, "loss": 0.3064, "step": 2244 }, { "epoch": 0.55, "learning_rate": 1.9407824853625316e-05, "loss": 0.3088, "step": 2246 }, { "epoch": 0.55, "learning_rate": 1.9406486391142593e-05, "loss": 0.2959, "step": 2248 }, { "epoch": 0.55, "learning_rate": 1.940514646399801e-05, "loss": 0.2983, "step": 2250 }, { "epoch": 0.55, "learning_rate": 1.94038050724002e-05, "loss": 0.2917, "step": 2252 }, { "epoch": 0.55, "learning_rate": 1.940246221655804e-05, "loss": 0.3225, "step": 2254 }, { "epoch": 0.55, "learning_rate": 1.9401117896680607e-05, "loss": 0.313, "step": 2256 }, { "epoch": 0.55, "learning_rate": 1.9399772112977234e-05, "loss": 0.2962, "step": 2258 }, { "epoch": 0.55, "learning_rate": 1.939842486565747e-05, "loss": 0.3086, "step": 2260 }, { "epoch": 0.55, "learning_rate": 1.9397076154931086e-05, "loss": 0.2956, "step": 2262 }, { "epoch": 0.55, "learning_rate": 1.9395725981008084e-05, "loss": 0.3017, "step": 2264 }, { "epoch": 0.55, "learning_rate": 1.9394374344098706e-05, "loss": 0.3099, "step": 2266 }, { "epoch": 0.55, "learning_rate": 1.9393021244413406e-05, "loss": 0.3061, "step": 2268 }, { "epoch": 0.55, "learning_rate": 1.939166668216287e-05, "loss": 0.2888, "step": 2270 }, { "epoch": 0.55, "learning_rate": 1.939031065755802e-05, "loss": 0.2944, "step": 2272 }, { "epoch": 0.55, "learning_rate": 1.9388953170809996e-05, "loss": 0.3022, "step": 2274 }, { "epoch": 0.55, "learning_rate": 1.938759422213017e-05, "loss": 0.3085, "step": 2276 }, { "epoch": 0.56, "learning_rate": 1.9386233811730136e-05, "loss": 0.2693, "step": 2278 }, { "epoch": 0.56, "learning_rate": 1.938487193982173e-05, "loss": 0.3024, "step": 2280 }, { "epoch": 0.56, "learning_rate": 1.9383508606616994e-05, "loss": 0.3012, "step": 2282 }, { "epoch": 0.56, "learning_rate": 1.9382143812328218e-05, "loss": 0.3065, "step": 2284 }, { "epoch": 0.56, "learning_rate": 1.9380777557167908e-05, "loss": 0.3001, "step": 2286 }, { "epoch": 0.56, "learning_rate": 1.9379409841348805e-05, "loss": 0.2973, "step": 2288 }, { "epoch": 0.56, "learning_rate": 1.9378040665083862e-05, "loss": 0.3029, "step": 2290 }, { "epoch": 0.56, "learning_rate": 1.9376670028586283e-05, "loss": 0.3005, "step": 2292 }, { "epoch": 0.56, "learning_rate": 1.9375297932069477e-05, "loss": 0.3084, "step": 2294 }, { "epoch": 0.56, "learning_rate": 1.9373924375747095e-05, "loss": 0.289, "step": 2296 }, { "epoch": 0.56, "learning_rate": 1.9372549359833005e-05, "loss": 0.3064, "step": 2298 }, { "epoch": 0.56, "learning_rate": 1.9371172884541316e-05, "loss": 0.2915, "step": 2300 }, { "epoch": 0.56, "learning_rate": 1.9369794950086346e-05, "loss": 0.2964, "step": 2302 }, { "epoch": 0.56, "learning_rate": 1.936841555668266e-05, "loss": 0.2922, "step": 2304 }, { "epoch": 0.56, "learning_rate": 1.936703470454503e-05, "loss": 0.2912, "step": 2306 }, { "epoch": 0.56, "learning_rate": 1.9365652393888476e-05, "loss": 0.3013, "step": 2308 }, { "epoch": 0.56, "learning_rate": 1.9364268624928223e-05, "loss": 0.3095, "step": 2310 }, { "epoch": 0.56, "learning_rate": 1.9362883397879738e-05, "loss": 0.327, "step": 2312 }, { "epoch": 0.56, "learning_rate": 1.9361496712958718e-05, "loss": 0.3151, "step": 2314 }, { "epoch": 0.56, "learning_rate": 1.9360108570381073e-05, "loss": 0.2809, "step": 2316 }, { "epoch": 0.56, "learning_rate": 1.9358718970362954e-05, "loss": 0.3027, "step": 2318 }, { "epoch": 0.57, "learning_rate": 1.9357327913120723e-05, "loss": 0.31, "step": 2320 }, { "epoch": 0.57, "learning_rate": 1.9355935398870987e-05, "loss": 0.3004, "step": 2322 }, { "epoch": 0.57, "learning_rate": 1.9354541427830566e-05, "loss": 0.311, "step": 2324 }, { "epoch": 0.57, "learning_rate": 1.9353146000216517e-05, "loss": 0.2999, "step": 2326 }, { "epoch": 0.57, "learning_rate": 1.935174911624611e-05, "loss": 0.303, "step": 2328 }, { "epoch": 0.57, "learning_rate": 1.9350350776136862e-05, "loss": 0.2858, "step": 2330 }, { "epoch": 0.57, "learning_rate": 1.9348950980106496e-05, "loss": 0.3085, "step": 2332 }, { "epoch": 0.57, "learning_rate": 1.9347549728372977e-05, "loss": 0.3017, "step": 2334 }, { "epoch": 0.57, "learning_rate": 1.9346147021154485e-05, "loss": 0.3166, "step": 2336 }, { "epoch": 0.57, "learning_rate": 1.9344742858669435e-05, "loss": 0.3032, "step": 2338 }, { "epoch": 0.57, "learning_rate": 1.9343337241136466e-05, "loss": 0.2859, "step": 2340 }, { "epoch": 0.57, "learning_rate": 1.9341930168774446e-05, "loss": 0.3238, "step": 2342 }, { "epoch": 0.57, "learning_rate": 1.9340521641802464e-05, "loss": 0.2853, "step": 2344 }, { "epoch": 0.57, "learning_rate": 1.9339111660439835e-05, "loss": 0.3151, "step": 2346 }, { "epoch": 0.57, "learning_rate": 1.933770022490611e-05, "loss": 0.2785, "step": 2348 }, { "epoch": 0.57, "learning_rate": 1.9336287335421057e-05, "loss": 0.3166, "step": 2350 }, { "epoch": 0.57, "learning_rate": 1.9334872992204677e-05, "loss": 0.3239, "step": 2352 }, { "epoch": 0.57, "learning_rate": 1.933345719547719e-05, "loss": 0.3006, "step": 2354 }, { "epoch": 0.57, "learning_rate": 1.9332039945459044e-05, "loss": 0.2962, "step": 2356 }, { "epoch": 0.57, "learning_rate": 1.9330621242370924e-05, "loss": 0.3114, "step": 2358 }, { "epoch": 0.58, "learning_rate": 1.932920108643373e-05, "loss": 0.2896, "step": 2360 }, { "epoch": 0.58, "learning_rate": 1.9327779477868586e-05, "loss": 0.3102, "step": 2362 }, { "epoch": 0.58, "learning_rate": 1.932635641689685e-05, "loss": 0.2954, "step": 2364 }, { "epoch": 0.58, "learning_rate": 1.932493190374011e-05, "loss": 0.2728, "step": 2366 }, { "epoch": 0.58, "learning_rate": 1.9323505938620163e-05, "loss": 0.2819, "step": 2368 }, { "epoch": 0.58, "learning_rate": 1.932207852175905e-05, "loss": 0.2837, "step": 2370 }, { "epoch": 0.58, "learning_rate": 1.932064965337903e-05, "loss": 0.3101, "step": 2372 }, { "epoch": 0.58, "learning_rate": 1.9319219333702584e-05, "loss": 0.3016, "step": 2374 }, { "epoch": 0.58, "learning_rate": 1.931778756295243e-05, "loss": 0.2733, "step": 2376 }, { "epoch": 0.58, "learning_rate": 1.9316354341351497e-05, "loss": 0.3171, "step": 2378 }, { "epoch": 0.58, "learning_rate": 1.931491966912296e-05, "loss": 0.2806, "step": 2380 }, { "epoch": 0.58, "learning_rate": 1.93134835464902e-05, "loss": 0.3011, "step": 2382 }, { "epoch": 0.58, "learning_rate": 1.9312045973676835e-05, "loss": 0.3103, "step": 2384 }, { "epoch": 0.58, "learning_rate": 1.9310606950906706e-05, "loss": 0.2665, "step": 2386 }, { "epoch": 0.58, "learning_rate": 1.9309166478403882e-05, "loss": 0.305, "step": 2388 }, { "epoch": 0.58, "learning_rate": 1.9307724556392654e-05, "loss": 0.3093, "step": 2390 }, { "epoch": 0.58, "learning_rate": 1.9306281185097532e-05, "loss": 0.268, "step": 2392 }, { "epoch": 0.58, "learning_rate": 1.9304836364743277e-05, "loss": 0.2995, "step": 2394 }, { "epoch": 0.58, "learning_rate": 1.9303390095554842e-05, "loss": 0.2973, "step": 2396 }, { "epoch": 0.58, "learning_rate": 1.9301942377757433e-05, "loss": 0.3085, "step": 2398 }, { "epoch": 0.58, "learning_rate": 1.9300493211576467e-05, "loss": 0.287, "step": 2400 }, { "epoch": 0.59, "learning_rate": 1.929904259723759e-05, "loss": 0.2907, "step": 2402 }, { "epoch": 0.59, "learning_rate": 1.9297590534966676e-05, "loss": 0.288, "step": 2404 }, { "epoch": 0.59, "learning_rate": 1.9296137024989818e-05, "loss": 0.2706, "step": 2406 }, { "epoch": 0.59, "learning_rate": 1.9294682067533338e-05, "loss": 0.2976, "step": 2408 }, { "epoch": 0.59, "learning_rate": 1.9293225662823788e-05, "loss": 0.2883, "step": 2410 }, { "epoch": 0.59, "learning_rate": 1.9291767811087942e-05, "loss": 0.3068, "step": 2412 }, { "epoch": 0.59, "learning_rate": 1.92903085125528e-05, "loss": 0.3056, "step": 2414 }, { "epoch": 0.59, "learning_rate": 1.928884776744558e-05, "loss": 0.3094, "step": 2416 }, { "epoch": 0.59, "learning_rate": 1.928738557599373e-05, "loss": 0.2917, "step": 2418 }, { "epoch": 0.59, "learning_rate": 1.9285921938424934e-05, "loss": 0.3092, "step": 2420 }, { "epoch": 0.59, "learning_rate": 1.9284456854967086e-05, "loss": 0.3042, "step": 2422 }, { "epoch": 0.59, "learning_rate": 1.9282990325848307e-05, "loss": 0.2973, "step": 2424 }, { "epoch": 0.59, "learning_rate": 1.9281522351296952e-05, "loss": 0.3205, "step": 2426 }, { "epoch": 0.59, "learning_rate": 1.9280052931541594e-05, "loss": 0.2851, "step": 2428 }, { "epoch": 0.59, "learning_rate": 1.9278582066811034e-05, "loss": 0.3056, "step": 2430 }, { "epoch": 0.59, "learning_rate": 1.9277109757334296e-05, "loss": 0.3115, "step": 2432 }, { "epoch": 0.59, "learning_rate": 1.927563600334063e-05, "loss": 0.2996, "step": 2434 }, { "epoch": 0.59, "learning_rate": 1.9274160805059513e-05, "loss": 0.297, "step": 2436 }, { "epoch": 0.59, "learning_rate": 1.927268416272064e-05, "loss": 0.3132, "step": 2438 }, { "epoch": 0.59, "learning_rate": 1.9271206076553936e-05, "loss": 0.296, "step": 2440 }, { "epoch": 0.6, "learning_rate": 1.9269726546789553e-05, "loss": 0.3107, "step": 2442 }, { "epoch": 0.6, "learning_rate": 1.9268245573657867e-05, "loss": 0.3028, "step": 2444 }, { "epoch": 0.6, "learning_rate": 1.926676315738947e-05, "loss": 0.3078, "step": 2446 }, { "epoch": 0.6, "learning_rate": 1.9265279298215197e-05, "loss": 0.2909, "step": 2448 }, { "epoch": 0.6, "learning_rate": 1.9263793996366083e-05, "loss": 0.3159, "step": 2450 }, { "epoch": 0.6, "learning_rate": 1.926230725207341e-05, "loss": 0.2733, "step": 2452 }, { "epoch": 0.6, "learning_rate": 1.9260819065568667e-05, "loss": 0.291, "step": 2454 }, { "epoch": 0.6, "learning_rate": 1.9259329437083586e-05, "loss": 0.3063, "step": 2456 }, { "epoch": 0.6, "learning_rate": 1.9257838366850106e-05, "loss": 0.2884, "step": 2458 }, { "epoch": 0.6, "learning_rate": 1.92563458551004e-05, "loss": 0.2785, "step": 2460 }, { "epoch": 0.6, "learning_rate": 1.9254851902066867e-05, "loss": 0.3104, "step": 2462 }, { "epoch": 0.6, "learning_rate": 1.9253356507982125e-05, "loss": 0.2991, "step": 2464 }, { "epoch": 0.6, "learning_rate": 1.9251859673079016e-05, "loss": 0.2842, "step": 2466 }, { "epoch": 0.6, "learning_rate": 1.925036139759061e-05, "loss": 0.2965, "step": 2468 }, { "epoch": 0.6, "learning_rate": 1.92488616817502e-05, "loss": 0.2986, "step": 2470 }, { "epoch": 0.6, "learning_rate": 1.9247360525791303e-05, "loss": 0.2956, "step": 2472 }, { "epoch": 0.6, "learning_rate": 1.924585792994766e-05, "loss": 0.2855, "step": 2474 }, { "epoch": 0.6, "learning_rate": 1.924435389445324e-05, "loss": 0.2867, "step": 2476 }, { "epoch": 0.6, "learning_rate": 1.9242848419542232e-05, "loss": 0.3022, "step": 2478 }, { "epoch": 0.6, "learning_rate": 1.9241341505449043e-05, "loss": 0.3224, "step": 2480 }, { "epoch": 0.6, "learning_rate": 1.923983315240832e-05, "loss": 0.2938, "step": 2482 }, { "epoch": 0.61, "learning_rate": 1.923832336065492e-05, "loss": 0.3167, "step": 2484 }, { "epoch": 0.61, "learning_rate": 1.923681213042393e-05, "loss": 0.3131, "step": 2486 }, { "epoch": 0.61, "learning_rate": 1.923529946195066e-05, "loss": 0.2955, "step": 2488 }, { "epoch": 0.61, "learning_rate": 1.9233785355470645e-05, "loss": 0.3071, "step": 2490 }, { "epoch": 0.61, "learning_rate": 1.9232269811219643e-05, "loss": 0.3017, "step": 2492 }, { "epoch": 0.61, "learning_rate": 1.923075282943364e-05, "loss": 0.2925, "step": 2494 }, { "epoch": 0.61, "learning_rate": 1.922923441034883e-05, "loss": 0.3116, "step": 2496 }, { "epoch": 0.61, "learning_rate": 1.9227714554201654e-05, "loss": 0.3121, "step": 2498 }, { "epoch": 0.61, "learning_rate": 1.922619326122876e-05, "loss": 0.2915, "step": 2500 }, { "epoch": 0.61, "learning_rate": 1.9224670531667023e-05, "loss": 0.2796, "step": 2502 }, { "epoch": 0.61, "learning_rate": 1.9223146365753548e-05, "loss": 0.2623, "step": 2504 }, { "epoch": 0.61, "learning_rate": 1.9221620763725658e-05, "loss": 0.2923, "step": 2506 }, { "epoch": 0.61, "learning_rate": 1.9220093725820903e-05, "loss": 0.3046, "step": 2508 }, { "epoch": 0.61, "learning_rate": 1.9218565252277047e-05, "loss": 0.3085, "step": 2510 }, { "epoch": 0.61, "learning_rate": 1.9217035343332094e-05, "loss": 0.2772, "step": 2512 }, { "epoch": 0.61, "learning_rate": 1.921550399922426e-05, "loss": 0.2952, "step": 2514 }, { "epoch": 0.61, "learning_rate": 1.921397122019198e-05, "loss": 0.2839, "step": 2516 }, { "epoch": 0.61, "learning_rate": 1.9212437006473927e-05, "loss": 0.3098, "step": 2518 }, { "epoch": 0.61, "learning_rate": 1.921090135830899e-05, "loss": 0.2951, "step": 2520 }, { "epoch": 0.61, "learning_rate": 1.9209364275936278e-05, "loss": 0.3081, "step": 2522 }, { "epoch": 0.62, "learning_rate": 1.9207825759595126e-05, "loss": 0.2788, "step": 2524 }, { "epoch": 0.62, "learning_rate": 1.9206285809525097e-05, "loss": 0.2964, "step": 2526 }, { "epoch": 0.62, "learning_rate": 1.9204744425965967e-05, "loss": 0.2875, "step": 2528 }, { "epoch": 0.62, "learning_rate": 1.9203201609157746e-05, "loss": 0.2919, "step": 2530 }, { "epoch": 0.62, "learning_rate": 1.9201657359340662e-05, "loss": 0.3252, "step": 2532 }, { "epoch": 0.62, "learning_rate": 1.9200111676755164e-05, "loss": 0.3178, "step": 2534 }, { "epoch": 0.62, "learning_rate": 1.9198564561641927e-05, "loss": 0.3143, "step": 2536 }, { "epoch": 0.62, "learning_rate": 1.9197016014241853e-05, "loss": 0.3072, "step": 2538 }, { "epoch": 0.62, "learning_rate": 1.9195466034796056e-05, "loss": 0.3021, "step": 2540 }, { "epoch": 0.62, "learning_rate": 1.919391462354588e-05, "loss": 0.3162, "step": 2542 }, { "epoch": 0.62, "learning_rate": 1.9192361780732896e-05, "loss": 0.301, "step": 2544 }, { "epoch": 0.62, "learning_rate": 1.9190807506598895e-05, "loss": 0.2746, "step": 2546 }, { "epoch": 0.62, "learning_rate": 1.918925180138589e-05, "loss": 0.2919, "step": 2548 }, { "epoch": 0.62, "learning_rate": 1.9187694665336106e-05, "loss": 0.3085, "step": 2550 }, { "epoch": 0.62, "learning_rate": 1.9186136098692008e-05, "loss": 0.302, "step": 2552 }, { "epoch": 0.62, "learning_rate": 1.9184576101696277e-05, "loss": 0.3078, "step": 2554 }, { "epoch": 0.62, "learning_rate": 1.918301467459182e-05, "loss": 0.2958, "step": 2556 }, { "epoch": 0.62, "learning_rate": 1.9181451817621753e-05, "loss": 0.2922, "step": 2558 }, { "epoch": 0.62, "learning_rate": 1.917988753102943e-05, "loss": 0.3217, "step": 2560 }, { "epoch": 0.62, "learning_rate": 1.9178321815058426e-05, "loss": 0.285, "step": 2562 }, { "epoch": 0.62, "learning_rate": 1.9176754669952534e-05, "loss": 0.3077, "step": 2564 }, { "epoch": 0.63, "learning_rate": 1.9175186095955766e-05, "loss": 0.308, "step": 2566 }, { "epoch": 0.63, "learning_rate": 1.9173616093312364e-05, "loss": 0.2936, "step": 2568 }, { "epoch": 0.63, "learning_rate": 1.9172044662266787e-05, "loss": 0.2961, "step": 2570 }, { "epoch": 0.63, "learning_rate": 1.917047180306372e-05, "loss": 0.2918, "step": 2572 }, { "epoch": 0.63, "learning_rate": 1.9168897515948074e-05, "loss": 0.314, "step": 2574 }, { "epoch": 0.63, "learning_rate": 1.916732180116497e-05, "loss": 0.3068, "step": 2576 }, { "epoch": 0.63, "learning_rate": 1.9165744658959763e-05, "loss": 0.2924, "step": 2578 }, { "epoch": 0.63, "learning_rate": 1.9164166089578026e-05, "loss": 0.2987, "step": 2580 }, { "epoch": 0.63, "learning_rate": 1.9162586093265555e-05, "loss": 0.315, "step": 2582 }, { "epoch": 0.63, "learning_rate": 1.9161004670268365e-05, "loss": 0.2955, "step": 2584 }, { "epoch": 0.63, "learning_rate": 1.9159421820832696e-05, "loss": 0.3077, "step": 2586 }, { "epoch": 0.63, "learning_rate": 1.915783754520501e-05, "loss": 0.3106, "step": 2588 }, { "epoch": 0.63, "learning_rate": 1.9156251843631996e-05, "loss": 0.3177, "step": 2590 }, { "epoch": 0.63, "learning_rate": 1.915466471636055e-05, "loss": 0.3211, "step": 2592 }, { "epoch": 0.63, "learning_rate": 1.9153076163637806e-05, "loss": 0.2919, "step": 2594 }, { "epoch": 0.63, "learning_rate": 1.9151486185711113e-05, "loss": 0.2859, "step": 2596 }, { "epoch": 0.63, "learning_rate": 1.9149894782828044e-05, "loss": 0.3109, "step": 2598 }, { "epoch": 0.63, "learning_rate": 1.914830195523639e-05, "loss": 0.2911, "step": 2600 }, { "epoch": 0.63, "learning_rate": 1.9146707703184168e-05, "loss": 0.2569, "step": 2602 }, { "epoch": 0.63, "learning_rate": 1.9145112026919615e-05, "loss": 0.3109, "step": 2604 }, { "epoch": 0.63, "learning_rate": 1.914351492669119e-05, "loss": 0.3088, "step": 2606 }, { "epoch": 0.64, "learning_rate": 1.9141916402747576e-05, "loss": 0.2975, "step": 2608 }, { "epoch": 0.64, "learning_rate": 1.914031645533767e-05, "loss": 0.2903, "step": 2610 }, { "epoch": 0.64, "learning_rate": 1.91387150847106e-05, "loss": 0.3029, "step": 2612 }, { "epoch": 0.64, "learning_rate": 1.9137112291115713e-05, "loss": 0.2918, "step": 2614 }, { "epoch": 0.64, "learning_rate": 1.913550807480257e-05, "loss": 0.3048, "step": 2616 }, { "epoch": 0.64, "learning_rate": 1.9133902436020966e-05, "loss": 0.2966, "step": 2618 }, { "epoch": 0.64, "learning_rate": 1.913229537502091e-05, "loss": 0.2864, "step": 2620 }, { "epoch": 0.64, "learning_rate": 1.9130686892052628e-05, "loss": 0.3041, "step": 2622 }, { "epoch": 0.64, "learning_rate": 1.912907698736658e-05, "loss": 0.2901, "step": 2624 }, { "epoch": 0.64, "learning_rate": 1.912746566121344e-05, "loss": 0.3059, "step": 2626 }, { "epoch": 0.64, "learning_rate": 1.9125852913844102e-05, "loss": 0.2821, "step": 2628 }, { "epoch": 0.64, "learning_rate": 1.9124238745509682e-05, "loss": 0.3068, "step": 2630 }, { "epoch": 0.64, "learning_rate": 1.912262315646152e-05, "loss": 0.3024, "step": 2632 }, { "epoch": 0.64, "learning_rate": 1.9121006146951172e-05, "loss": 0.2663, "step": 2634 }, { "epoch": 0.64, "learning_rate": 1.9119387717230427e-05, "loss": 0.283, "step": 2636 }, { "epoch": 0.64, "learning_rate": 1.911776786755128e-05, "loss": 0.2837, "step": 2638 }, { "epoch": 0.64, "learning_rate": 1.9116146598165954e-05, "loss": 0.3077, "step": 2640 }, { "epoch": 0.64, "learning_rate": 1.9114523909326892e-05, "loss": 0.274, "step": 2642 }, { "epoch": 0.64, "learning_rate": 1.9112899801286766e-05, "loss": 0.2965, "step": 2644 }, { "epoch": 0.64, "learning_rate": 1.9111274274298457e-05, "loss": 0.3009, "step": 2646 }, { "epoch": 0.65, "learning_rate": 1.9109647328615074e-05, "loss": 0.2753, "step": 2648 }, { "epoch": 0.65, "learning_rate": 1.9108018964489942e-05, "loss": 0.2705, "step": 2650 }, { "epoch": 0.65, "learning_rate": 1.910638918217661e-05, "loss": 0.2892, "step": 2652 }, { "epoch": 0.65, "learning_rate": 1.9104757981928852e-05, "loss": 0.3046, "step": 2654 }, { "epoch": 0.65, "learning_rate": 1.910312536400065e-05, "loss": 0.2998, "step": 2656 }, { "epoch": 0.65, "learning_rate": 1.9101491328646228e-05, "loss": 0.2896, "step": 2658 }, { "epoch": 0.65, "learning_rate": 1.9099855876120005e-05, "loss": 0.2855, "step": 2660 }, { "epoch": 0.65, "learning_rate": 1.909821900667664e-05, "loss": 0.2924, "step": 2662 }, { "epoch": 0.65, "learning_rate": 1.9096580720571004e-05, "loss": 0.2856, "step": 2664 }, { "epoch": 0.65, "learning_rate": 1.909494101805819e-05, "loss": 0.2791, "step": 2666 }, { "epoch": 0.65, "learning_rate": 1.909329989939352e-05, "loss": 0.2848, "step": 2668 }, { "epoch": 0.65, "learning_rate": 1.9091657364832518e-05, "loss": 0.3059, "step": 2670 }, { "epoch": 0.65, "learning_rate": 1.9090013414630943e-05, "loss": 0.2832, "step": 2672 }, { "epoch": 0.65, "learning_rate": 1.908836804904477e-05, "loss": 0.2779, "step": 2674 }, { "epoch": 0.65, "learning_rate": 1.90867212683302e-05, "loss": 0.2676, "step": 2676 }, { "epoch": 0.65, "learning_rate": 1.9085073072743644e-05, "loss": 0.3032, "step": 2678 }, { "epoch": 0.65, "learning_rate": 1.908342346254174e-05, "loss": 0.3164, "step": 2680 }, { "epoch": 0.65, "learning_rate": 1.9081772437981344e-05, "loss": 0.2889, "step": 2682 }, { "epoch": 0.65, "learning_rate": 1.9080119999319536e-05, "loss": 0.2923, "step": 2684 }, { "epoch": 0.65, "learning_rate": 1.907846614681361e-05, "loss": 0.2842, "step": 2686 }, { "epoch": 0.65, "learning_rate": 1.9076810880721085e-05, "loss": 0.3022, "step": 2688 }, { "epoch": 0.66, "learning_rate": 1.90751542012997e-05, "loss": 0.2779, "step": 2690 }, { "epoch": 0.66, "learning_rate": 1.907349610880741e-05, "loss": 0.2785, "step": 2692 }, { "epoch": 0.66, "learning_rate": 1.9071836603502397e-05, "loss": 0.2906, "step": 2694 }, { "epoch": 0.66, "learning_rate": 1.9070175685643054e-05, "loss": 0.2717, "step": 2696 }, { "epoch": 0.66, "learning_rate": 1.9068513355487996e-05, "loss": 0.2896, "step": 2698 }, { "epoch": 0.66, "learning_rate": 1.906684961329607e-05, "loss": 0.2838, "step": 2700 }, { "epoch": 0.66, "learning_rate": 1.9065184459326328e-05, "loss": 0.3062, "step": 2702 }, { "epoch": 0.66, "learning_rate": 1.9063517893838048e-05, "loss": 0.3351, "step": 2704 }, { "epoch": 0.66, "learning_rate": 1.906184991709072e-05, "loss": 0.3178, "step": 2706 }, { "epoch": 0.66, "learning_rate": 1.906018052934407e-05, "loss": 0.313, "step": 2708 }, { "epoch": 0.66, "learning_rate": 1.9058509730858037e-05, "loss": 0.2599, "step": 2710 }, { "epoch": 0.66, "learning_rate": 1.9056837521892765e-05, "loss": 0.2795, "step": 2712 }, { "epoch": 0.66, "learning_rate": 1.9055163902708642e-05, "loss": 0.289, "step": 2714 }, { "epoch": 0.66, "learning_rate": 1.905348887356625e-05, "loss": 0.2986, "step": 2716 }, { "epoch": 0.66, "learning_rate": 1.9051812434726418e-05, "loss": 0.2873, "step": 2718 }, { "epoch": 0.66, "learning_rate": 1.9050134586450175e-05, "loss": 0.3006, "step": 2720 }, { "epoch": 0.66, "learning_rate": 1.904845532899877e-05, "loss": 0.3107, "step": 2722 }, { "epoch": 0.66, "learning_rate": 1.904677466263368e-05, "loss": 0.2941, "step": 2724 }, { "epoch": 0.66, "learning_rate": 1.9045092587616598e-05, "loss": 0.3029, "step": 2726 }, { "epoch": 0.66, "learning_rate": 1.9043409104209436e-05, "loss": 0.2769, "step": 2728 }, { "epoch": 0.67, "learning_rate": 1.9041724212674323e-05, "loss": 0.2872, "step": 2730 }, { "epoch": 0.67, "learning_rate": 1.9040037913273616e-05, "loss": 0.287, "step": 2732 }, { "epoch": 0.67, "learning_rate": 1.9038350206269874e-05, "loss": 0.2954, "step": 2734 }, { "epoch": 0.67, "learning_rate": 1.9036661091925894e-05, "loss": 0.3083, "step": 2736 }, { "epoch": 0.67, "learning_rate": 1.9034970570504684e-05, "loss": 0.2589, "step": 2738 }, { "epoch": 0.67, "learning_rate": 1.9033278642269467e-05, "loss": 0.3008, "step": 2740 }, { "epoch": 0.67, "learning_rate": 1.9031585307483688e-05, "loss": 0.2895, "step": 2742 }, { "epoch": 0.67, "learning_rate": 1.902989056641102e-05, "loss": 0.3067, "step": 2744 }, { "epoch": 0.67, "learning_rate": 1.9028194419315338e-05, "loss": 0.2846, "step": 2746 }, { "epoch": 0.67, "learning_rate": 1.9026496866460753e-05, "loss": 0.2877, "step": 2748 }, { "epoch": 0.67, "learning_rate": 1.902479790811158e-05, "loss": 0.3036, "step": 2750 }, { "epoch": 0.67, "learning_rate": 1.9023097544532368e-05, "loss": 0.3012, "step": 2752 }, { "epoch": 0.67, "learning_rate": 1.902139577598787e-05, "loss": 0.2911, "step": 2754 }, { "epoch": 0.67, "learning_rate": 1.9019692602743067e-05, "loss": 0.2929, "step": 2756 }, { "epoch": 0.67, "learning_rate": 1.9017988025063153e-05, "loss": 0.283, "step": 2758 }, { "epoch": 0.67, "learning_rate": 1.9016282043213544e-05, "loss": 0.2908, "step": 2760 }, { "epoch": 0.67, "learning_rate": 1.901457465745988e-05, "loss": 0.2935, "step": 2762 }, { "epoch": 0.67, "learning_rate": 1.901286586806801e-05, "loss": 0.3021, "step": 2764 }, { "epoch": 0.67, "learning_rate": 1.9011155675304006e-05, "loss": 0.2932, "step": 2766 }, { "epoch": 0.67, "learning_rate": 1.9009444079434162e-05, "loss": 0.3196, "step": 2768 }, { "epoch": 0.67, "learning_rate": 1.9007731080724977e-05, "loss": 0.2999, "step": 2770 }, { "epoch": 0.68, "learning_rate": 1.9006016679443187e-05, "loss": 0.2936, "step": 2772 }, { "epoch": 0.68, "learning_rate": 1.9004300875855734e-05, "loss": 0.285, "step": 2774 }, { "epoch": 0.68, "learning_rate": 1.9002583670229778e-05, "loss": 0.2927, "step": 2776 }, { "epoch": 0.68, "learning_rate": 1.9000865062832708e-05, "loss": 0.2814, "step": 2778 }, { "epoch": 0.68, "learning_rate": 1.899914505393212e-05, "loss": 0.3099, "step": 2780 }, { "epoch": 0.68, "learning_rate": 1.8997423643795836e-05, "loss": 0.3047, "step": 2782 }, { "epoch": 0.68, "learning_rate": 1.899570083269189e-05, "loss": 0.2742, "step": 2784 }, { "epoch": 0.68, "learning_rate": 1.8993976620888536e-05, "loss": 0.3233, "step": 2786 }, { "epoch": 0.68, "learning_rate": 1.8992251008654248e-05, "loss": 0.3003, "step": 2788 }, { "epoch": 0.68, "learning_rate": 1.899052399625772e-05, "loss": 0.3147, "step": 2790 }, { "epoch": 0.68, "learning_rate": 1.8988795583967855e-05, "loss": 0.2998, "step": 2792 }, { "epoch": 0.68, "learning_rate": 1.8987065772053785e-05, "loss": 0.2711, "step": 2794 }, { "epoch": 0.68, "learning_rate": 1.8985334560784853e-05, "loss": 0.3152, "step": 2796 }, { "epoch": 0.68, "learning_rate": 1.8983601950430623e-05, "loss": 0.3011, "step": 2798 }, { "epoch": 0.68, "learning_rate": 1.8981867941260875e-05, "loss": 0.2822, "step": 2800 }, { "epoch": 0.68, "learning_rate": 1.8980132533545605e-05, "loss": 0.2903, "step": 2802 }, { "epoch": 0.68, "learning_rate": 1.8978395727555034e-05, "loss": 0.3006, "step": 2804 }, { "epoch": 0.68, "learning_rate": 1.8976657523559592e-05, "loss": 0.318, "step": 2806 }, { "epoch": 0.68, "learning_rate": 1.8974917921829934e-05, "loss": 0.2895, "step": 2808 }, { "epoch": 0.68, "learning_rate": 1.8973176922636927e-05, "loss": 0.3041, "step": 2810 }, { "epoch": 0.69, "learning_rate": 1.897143452625166e-05, "loss": 0.3035, "step": 2812 }, { "epoch": 0.69, "learning_rate": 1.896969073294543e-05, "loss": 0.2651, "step": 2814 }, { "epoch": 0.69, "learning_rate": 1.896794554298977e-05, "loss": 0.2857, "step": 2816 }, { "epoch": 0.69, "learning_rate": 1.896619895665641e-05, "loss": 0.2826, "step": 2818 }, { "epoch": 0.69, "learning_rate": 1.8964450974217317e-05, "loss": 0.2946, "step": 2820 }, { "epoch": 0.69, "learning_rate": 1.8962701595944657e-05, "loss": 0.295, "step": 2822 }, { "epoch": 0.69, "learning_rate": 1.8960950822110822e-05, "loss": 0.3086, "step": 2824 }, { "epoch": 0.69, "learning_rate": 1.8959198652988423e-05, "loss": 0.2817, "step": 2826 }, { "epoch": 0.69, "learning_rate": 1.8957445088850286e-05, "loss": 0.2671, "step": 2828 }, { "epoch": 0.69, "learning_rate": 1.8955690129969455e-05, "loss": 0.2744, "step": 2830 }, { "epoch": 0.69, "learning_rate": 1.8953933776619192e-05, "loss": 0.2972, "step": 2832 }, { "epoch": 0.69, "learning_rate": 1.8952176029072968e-05, "loss": 0.3035, "step": 2834 }, { "epoch": 0.69, "learning_rate": 1.8950416887604485e-05, "loss": 0.3069, "step": 2836 }, { "epoch": 0.69, "learning_rate": 1.8948656352487652e-05, "loss": 0.2702, "step": 2838 }, { "epoch": 0.69, "learning_rate": 1.8946894423996592e-05, "loss": 0.299, "step": 2840 }, { "epoch": 0.69, "learning_rate": 1.8945131102405664e-05, "loss": 0.2958, "step": 2842 }, { "epoch": 0.69, "learning_rate": 1.894336638798942e-05, "loss": 0.2513, "step": 2844 }, { "epoch": 0.69, "learning_rate": 1.894160028102264e-05, "loss": 0.316, "step": 2846 }, { "epoch": 0.69, "learning_rate": 1.8939832781780326e-05, "loss": 0.2811, "step": 2848 }, { "epoch": 0.69, "learning_rate": 1.8938063890537687e-05, "loss": 0.2888, "step": 2850 }, { "epoch": 0.69, "learning_rate": 1.8936293607570154e-05, "loss": 0.2695, "step": 2852 }, { "epoch": 0.7, "learning_rate": 1.8934521933153376e-05, "loss": 0.299, "step": 2854 }, { "epoch": 0.7, "learning_rate": 1.893274886756321e-05, "loss": 0.2679, "step": 2856 }, { "epoch": 0.7, "learning_rate": 1.8930974411075743e-05, "loss": 0.2949, "step": 2858 }, { "epoch": 0.7, "learning_rate": 1.8929198563967265e-05, "loss": 0.2841, "step": 2860 }, { "epoch": 0.7, "learning_rate": 1.8927421326514296e-05, "loss": 0.2693, "step": 2862 }, { "epoch": 0.7, "learning_rate": 1.892564269899356e-05, "loss": 0.2988, "step": 2864 }, { "epoch": 0.7, "learning_rate": 1.8923862681682005e-05, "loss": 0.3143, "step": 2866 }, { "epoch": 0.7, "learning_rate": 1.892208127485679e-05, "loss": 0.2889, "step": 2868 }, { "epoch": 0.7, "learning_rate": 1.8920298478795298e-05, "loss": 0.2918, "step": 2870 }, { "epoch": 0.7, "learning_rate": 1.8918514293775123e-05, "loss": 0.2983, "step": 2872 }, { "epoch": 0.7, "learning_rate": 1.8916728720074077e-05, "loss": 0.2775, "step": 2874 }, { "epoch": 0.7, "learning_rate": 1.8914941757970184e-05, "loss": 0.2812, "step": 2876 }, { "epoch": 0.7, "learning_rate": 1.891315340774169e-05, "loss": 0.2889, "step": 2878 }, { "epoch": 0.7, "learning_rate": 1.8911363669667052e-05, "loss": 0.2653, "step": 2880 }, { "epoch": 0.7, "learning_rate": 1.8909572544024955e-05, "loss": 0.2936, "step": 2882 }, { "epoch": 0.7, "learning_rate": 1.8907780031094282e-05, "loss": 0.2886, "step": 2884 }, { "epoch": 0.7, "learning_rate": 1.8905986131154138e-05, "loss": 0.2668, "step": 2886 }, { "epoch": 0.7, "learning_rate": 1.890419084448386e-05, "loss": 0.3083, "step": 2888 }, { "epoch": 0.7, "learning_rate": 1.8902394171362975e-05, "loss": 0.2789, "step": 2890 }, { "epoch": 0.7, "learning_rate": 1.8900596112071247e-05, "loss": 0.2871, "step": 2892 }, { "epoch": 0.71, "learning_rate": 1.8898796666888645e-05, "loss": 0.276, "step": 2894 }, { "epoch": 0.71, "learning_rate": 1.889699583609535e-05, "loss": 0.2866, "step": 2896 }, { "epoch": 0.71, "learning_rate": 1.889519361997178e-05, "loss": 0.2888, "step": 2898 }, { "epoch": 0.71, "learning_rate": 1.8893390018798538e-05, "loss": 0.2905, "step": 2900 }, { "epoch": 0.71, "learning_rate": 1.889158503285647e-05, "loss": 0.3007, "step": 2902 }, { "epoch": 0.71, "learning_rate": 1.888977866242662e-05, "loss": 0.2915, "step": 2904 }, { "epoch": 0.71, "learning_rate": 1.8887970907790255e-05, "loss": 0.308, "step": 2906 }, { "epoch": 0.71, "learning_rate": 1.8886161769228857e-05, "loss": 0.2867, "step": 2908 }, { "epoch": 0.71, "learning_rate": 1.888435124702412e-05, "loss": 0.2805, "step": 2910 }, { "epoch": 0.71, "learning_rate": 1.888253934145796e-05, "loss": 0.2939, "step": 2912 }, { "epoch": 0.71, "learning_rate": 1.8880726052812502e-05, "loss": 0.298, "step": 2914 }, { "epoch": 0.71, "learning_rate": 1.887891138137009e-05, "loss": 0.2902, "step": 2916 }, { "epoch": 0.71, "learning_rate": 1.8877095327413283e-05, "loss": 0.2854, "step": 2918 }, { "epoch": 0.71, "learning_rate": 1.8875277891224854e-05, "loss": 0.2569, "step": 2920 }, { "epoch": 0.71, "learning_rate": 1.887345907308779e-05, "loss": 0.3057, "step": 2922 }, { "epoch": 0.71, "learning_rate": 1.8871638873285295e-05, "loss": 0.2779, "step": 2924 }, { "epoch": 0.71, "learning_rate": 1.8869817292100792e-05, "loss": 0.2838, "step": 2926 }, { "epoch": 0.71, "learning_rate": 1.886799432981791e-05, "loss": 0.3311, "step": 2928 }, { "epoch": 0.71, "learning_rate": 1.88661699867205e-05, "loss": 0.2978, "step": 2930 }, { "epoch": 0.71, "learning_rate": 1.8864344263092627e-05, "loss": 0.2899, "step": 2932 }, { "epoch": 0.71, "learning_rate": 1.886251715921857e-05, "loss": 0.2747, "step": 2934 }, { "epoch": 0.72, "learning_rate": 1.8860688675382823e-05, "loss": 0.2994, "step": 2936 }, { "epoch": 0.72, "learning_rate": 1.8858858811870094e-05, "loss": 0.2803, "step": 2938 }, { "epoch": 0.72, "learning_rate": 1.885702756896531e-05, "loss": 0.2816, "step": 2940 }, { "epoch": 0.72, "learning_rate": 1.8855194946953605e-05, "loss": 0.2603, "step": 2942 }, { "epoch": 0.72, "learning_rate": 1.885336094612033e-05, "loss": 0.3026, "step": 2944 }, { "epoch": 0.72, "learning_rate": 1.8851525566751062e-05, "loss": 0.2952, "step": 2946 }, { "epoch": 0.72, "learning_rate": 1.8849688809131578e-05, "loss": 0.2844, "step": 2948 }, { "epoch": 0.72, "learning_rate": 1.8847850673547877e-05, "loss": 0.2769, "step": 2950 }, { "epoch": 0.72, "learning_rate": 1.8846011160286168e-05, "loss": 0.3053, "step": 2952 }, { "epoch": 0.72, "learning_rate": 1.8844170269632877e-05, "loss": 0.2921, "step": 2954 }, { "epoch": 0.72, "learning_rate": 1.884232800187465e-05, "loss": 0.2964, "step": 2956 }, { "epoch": 0.72, "learning_rate": 1.8840484357298338e-05, "loss": 0.2978, "step": 2958 }, { "epoch": 0.72, "learning_rate": 1.8838639336191012e-05, "loss": 0.2862, "step": 2960 }, { "epoch": 0.72, "learning_rate": 1.8836792938839955e-05, "loss": 0.245, "step": 2962 }, { "epoch": 0.72, "learning_rate": 1.8834945165532662e-05, "loss": 0.2917, "step": 2964 }, { "epoch": 0.72, "learning_rate": 1.8833096016556855e-05, "loss": 0.2604, "step": 2966 }, { "epoch": 0.72, "learning_rate": 1.883124549220045e-05, "loss": 0.2775, "step": 2968 }, { "epoch": 0.72, "learning_rate": 1.8829393592751594e-05, "loss": 0.2993, "step": 2970 }, { "epoch": 0.72, "learning_rate": 1.8827540318498644e-05, "loss": 0.2926, "step": 2972 }, { "epoch": 0.72, "learning_rate": 1.882568566973016e-05, "loss": 0.3071, "step": 2974 }, { "epoch": 0.73, "learning_rate": 1.882382964673493e-05, "loss": 0.2846, "step": 2976 }, { "epoch": 0.73, "learning_rate": 1.8821972249801956e-05, "loss": 0.2873, "step": 2978 }, { "epoch": 0.73, "learning_rate": 1.8820113479220442e-05, "loss": 0.2839, "step": 2980 }, { "epoch": 0.73, "learning_rate": 1.8818253335279815e-05, "loss": 0.2907, "step": 2982 }, { "epoch": 0.73, "learning_rate": 1.881639181826971e-05, "loss": 0.2969, "step": 2984 }, { "epoch": 0.73, "learning_rate": 1.881452892847999e-05, "loss": 0.2998, "step": 2986 }, { "epoch": 0.73, "learning_rate": 1.881266466620071e-05, "loss": 0.3013, "step": 2988 }, { "epoch": 0.73, "learning_rate": 1.8810799031722153e-05, "loss": 0.2812, "step": 2990 }, { "epoch": 0.73, "learning_rate": 1.8808932025334816e-05, "loss": 0.3009, "step": 2992 }, { "epoch": 0.73, "learning_rate": 1.8807063647329402e-05, "loss": 0.287, "step": 2994 }, { "epoch": 0.73, "learning_rate": 1.8805193897996834e-05, "loss": 0.2658, "step": 2996 }, { "epoch": 0.73, "learning_rate": 1.8803322777628247e-05, "loss": 0.2631, "step": 2998 }, { "epoch": 0.73, "learning_rate": 1.880145028651499e-05, "loss": 0.2893, "step": 3000 }, { "epoch": 0.73, "learning_rate": 1.8799576424948616e-05, "loss": 0.3094, "step": 3002 }, { "epoch": 0.73, "learning_rate": 1.8797701193220908e-05, "loss": 0.2807, "step": 3004 }, { "epoch": 0.73, "learning_rate": 1.8795824591623853e-05, "loss": 0.3014, "step": 3006 }, { "epoch": 0.73, "learning_rate": 1.8793946620449647e-05, "loss": 0.2875, "step": 3008 }, { "epoch": 0.73, "learning_rate": 1.879206727999071e-05, "loss": 0.2879, "step": 3010 }, { "epoch": 0.73, "learning_rate": 1.879018657053967e-05, "loss": 0.2772, "step": 3012 }, { "epoch": 0.73, "learning_rate": 1.878830449238936e-05, "loss": 0.275, "step": 3014 }, { "epoch": 0.73, "learning_rate": 1.8786421045832842e-05, "loss": 0.2864, "step": 3016 }, { "epoch": 0.74, "learning_rate": 1.878453623116338e-05, "loss": 0.3137, "step": 3018 }, { "epoch": 0.74, "learning_rate": 1.8782650048674456e-05, "loss": 0.2784, "step": 3020 }, { "epoch": 0.74, "learning_rate": 1.8780762498659762e-05, "loss": 0.2983, "step": 3022 }, { "epoch": 0.74, "learning_rate": 1.87788735814132e-05, "loss": 0.2735, "step": 3024 }, { "epoch": 0.74, "learning_rate": 1.8776983297228896e-05, "loss": 0.2899, "step": 3026 }, { "epoch": 0.74, "learning_rate": 1.8775091646401177e-05, "loss": 0.3009, "step": 3028 }, { "epoch": 0.74, "learning_rate": 1.8773198629224588e-05, "loss": 0.2586, "step": 3030 }, { "epoch": 0.74, "learning_rate": 1.8771304245993888e-05, "loss": 0.2723, "step": 3032 }, { "epoch": 0.74, "learning_rate": 1.876940849700404e-05, "loss": 0.278, "step": 3034 }, { "epoch": 0.74, "learning_rate": 1.876751138255024e-05, "loss": 0.2593, "step": 3036 }, { "epoch": 0.74, "learning_rate": 1.8765612902927874e-05, "loss": 0.2761, "step": 3038 }, { "epoch": 0.74, "learning_rate": 1.876371305843255e-05, "loss": 0.3046, "step": 3040 }, { "epoch": 0.74, "learning_rate": 1.8761811849360093e-05, "loss": 0.2837, "step": 3042 }, { "epoch": 0.74, "learning_rate": 1.8759909276006527e-05, "loss": 0.2996, "step": 3044 }, { "epoch": 0.74, "learning_rate": 1.8758005338668105e-05, "loss": 0.2737, "step": 3046 }, { "epoch": 0.74, "learning_rate": 1.8756100037641282e-05, "loss": 0.267, "step": 3048 }, { "epoch": 0.74, "learning_rate": 1.8754193373222732e-05, "loss": 0.2913, "step": 3050 }, { "epoch": 0.74, "learning_rate": 1.875228534570933e-05, "loss": 0.2832, "step": 3052 }, { "epoch": 0.74, "learning_rate": 1.8750375955398176e-05, "loss": 0.3088, "step": 3054 }, { "epoch": 0.74, "learning_rate": 1.874846520258657e-05, "loss": 0.2808, "step": 3056 }, { "epoch": 0.75, "learning_rate": 1.8746553087572042e-05, "loss": 0.2693, "step": 3058 }, { "epoch": 0.75, "learning_rate": 1.874463961065231e-05, "loss": 0.2798, "step": 3060 }, { "epoch": 0.75, "learning_rate": 1.8742724772125326e-05, "loss": 0.2919, "step": 3062 }, { "epoch": 0.75, "learning_rate": 1.8740808572289246e-05, "loss": 0.2757, "step": 3064 }, { "epoch": 0.75, "learning_rate": 1.873889101144243e-05, "loss": 0.2987, "step": 3066 }, { "epoch": 0.75, "learning_rate": 1.873697208988346e-05, "loss": 0.2629, "step": 3068 }, { "epoch": 0.75, "learning_rate": 1.8735051807911127e-05, "loss": 0.2788, "step": 3070 }, { "epoch": 0.75, "learning_rate": 1.8733130165824437e-05, "loss": 0.2753, "step": 3072 }, { "epoch": 0.75, "learning_rate": 1.8731207163922597e-05, "loss": 0.3008, "step": 3074 }, { "epoch": 0.75, "learning_rate": 1.8729282802505036e-05, "loss": 0.2813, "step": 3076 }, { "epoch": 0.75, "learning_rate": 1.8727357081871398e-05, "loss": 0.2807, "step": 3078 }, { "epoch": 0.75, "learning_rate": 1.872543000232152e-05, "loss": 0.2905, "step": 3080 }, { "epoch": 0.75, "learning_rate": 1.8723501564155477e-05, "loss": 0.2691, "step": 3082 }, { "epoch": 0.75, "learning_rate": 1.8721571767673534e-05, "loss": 0.2876, "step": 3084 }, { "epoch": 0.75, "learning_rate": 1.8719640613176175e-05, "loss": 0.302, "step": 3086 }, { "epoch": 0.75, "learning_rate": 1.8717708100964095e-05, "loss": 0.2913, "step": 3088 }, { "epoch": 0.75, "learning_rate": 1.8715774231338203e-05, "loss": 0.2932, "step": 3090 }, { "epoch": 0.75, "learning_rate": 1.871383900459962e-05, "loss": 0.3084, "step": 3092 }, { "epoch": 0.75, "learning_rate": 1.8711902421049668e-05, "loss": 0.2886, "step": 3094 }, { "epoch": 0.75, "learning_rate": 1.8709964480989896e-05, "loss": 0.3087, "step": 3096 }, { "epoch": 0.75, "learning_rate": 1.8708025184722046e-05, "loss": 0.2581, "step": 3098 }, { "epoch": 0.76, "learning_rate": 1.8706084532548093e-05, "loss": 0.2853, "step": 3100 }, { "epoch": 0.76, "learning_rate": 1.8704142524770207e-05, "loss": 0.2844, "step": 3102 }, { "epoch": 0.76, "learning_rate": 1.870219916169077e-05, "loss": 0.2671, "step": 3104 }, { "epoch": 0.76, "learning_rate": 1.8700254443612376e-05, "loss": 0.2533, "step": 3106 }, { "epoch": 0.76, "learning_rate": 1.8698308370837847e-05, "loss": 0.2754, "step": 3108 }, { "epoch": 0.76, "learning_rate": 1.869636094367018e-05, "loss": 0.2697, "step": 3110 }, { "epoch": 0.76, "learning_rate": 1.8694412162412626e-05, "loss": 0.3111, "step": 3112 }, { "epoch": 0.76, "learning_rate": 1.869246202736861e-05, "loss": 0.2928, "step": 3114 }, { "epoch": 0.76, "learning_rate": 1.8690510538841792e-05, "loss": 0.2738, "step": 3116 }, { "epoch": 0.76, "learning_rate": 1.868855769713603e-05, "loss": 0.2643, "step": 3118 }, { "epoch": 0.76, "learning_rate": 1.868660350255539e-05, "loss": 0.2714, "step": 3120 }, { "epoch": 0.76, "learning_rate": 1.8684647955404168e-05, "loss": 0.2977, "step": 3122 }, { "epoch": 0.76, "learning_rate": 1.8682691055986847e-05, "loss": 0.2676, "step": 3124 }, { "epoch": 0.76, "learning_rate": 1.8680732804608136e-05, "loss": 0.27, "step": 3126 }, { "epoch": 0.76, "learning_rate": 1.8678773201572948e-05, "loss": 0.2979, "step": 3128 }, { "epoch": 0.76, "learning_rate": 1.867681224718641e-05, "loss": 0.2846, "step": 3130 }, { "epoch": 0.76, "learning_rate": 1.8674849941753857e-05, "loss": 0.2798, "step": 3132 }, { "epoch": 0.76, "learning_rate": 1.8672886285580835e-05, "loss": 0.3039, "step": 3134 }, { "epoch": 0.76, "learning_rate": 1.8670921278973098e-05, "loss": 0.2681, "step": 3136 }, { "epoch": 0.76, "learning_rate": 1.8668954922236613e-05, "loss": 0.3153, "step": 3138 }, { "epoch": 0.77, "learning_rate": 1.866698721567756e-05, "loss": 0.3031, "step": 3140 }, { "epoch": 0.77, "learning_rate": 1.8665018159602323e-05, "loss": 0.2815, "step": 3142 }, { "epoch": 0.77, "learning_rate": 1.8663047754317503e-05, "loss": 0.2812, "step": 3144 }, { "epoch": 0.77, "learning_rate": 1.8661076000129902e-05, "loss": 0.2441, "step": 3146 }, { "epoch": 0.77, "learning_rate": 1.8659102897346543e-05, "loss": 0.2887, "step": 3148 }, { "epoch": 0.77, "learning_rate": 1.8657128446274644e-05, "loss": 0.2726, "step": 3150 }, { "epoch": 0.77, "learning_rate": 1.8655152647221652e-05, "loss": 0.3039, "step": 3152 }, { "epoch": 0.77, "learning_rate": 1.865317550049521e-05, "loss": 0.2891, "step": 3154 }, { "epoch": 0.77, "learning_rate": 1.8651197006403173e-05, "loss": 0.2501, "step": 3156 }, { "epoch": 0.77, "learning_rate": 1.8649217165253608e-05, "loss": 0.3005, "step": 3158 }, { "epoch": 0.77, "learning_rate": 1.8647235977354796e-05, "loss": 0.2675, "step": 3160 }, { "epoch": 0.77, "learning_rate": 1.864525344301522e-05, "loss": 0.2938, "step": 3162 }, { "epoch": 0.77, "learning_rate": 1.8643269562543573e-05, "loss": 0.2947, "step": 3164 }, { "epoch": 0.77, "learning_rate": 1.8641284336248763e-05, "loss": 0.2873, "step": 3166 }, { "epoch": 0.77, "learning_rate": 1.8639297764439908e-05, "loss": 0.2959, "step": 3168 }, { "epoch": 0.77, "learning_rate": 1.863730984742633e-05, "loss": 0.2689, "step": 3170 }, { "epoch": 0.77, "learning_rate": 1.8635320585517558e-05, "loss": 0.2744, "step": 3172 }, { "epoch": 0.77, "learning_rate": 1.863332997902334e-05, "loss": 0.2702, "step": 3174 }, { "epoch": 0.77, "learning_rate": 1.863133802825363e-05, "loss": 0.2874, "step": 3176 }, { "epoch": 0.77, "learning_rate": 1.8629344733518587e-05, "loss": 0.2631, "step": 3178 }, { "epoch": 0.77, "learning_rate": 1.8627350095128583e-05, "loss": 0.3093, "step": 3180 }, { "epoch": 0.78, "learning_rate": 1.8625354113394202e-05, "loss": 0.2725, "step": 3182 }, { "epoch": 0.78, "learning_rate": 1.862335678862623e-05, "loss": 0.2925, "step": 3184 }, { "epoch": 0.78, "learning_rate": 1.8621358121135665e-05, "loss": 0.2886, "step": 3186 }, { "epoch": 0.78, "learning_rate": 1.8619358111233717e-05, "loss": 0.2929, "step": 3188 }, { "epoch": 0.78, "learning_rate": 1.8617356759231798e-05, "loss": 0.2846, "step": 3190 }, { "epoch": 0.78, "learning_rate": 1.861535406544154e-05, "loss": 0.2764, "step": 3192 }, { "epoch": 0.78, "learning_rate": 1.8613350030174777e-05, "loss": 0.2613, "step": 3194 }, { "epoch": 0.78, "learning_rate": 1.8611344653743552e-05, "loss": 0.2757, "step": 3196 }, { "epoch": 0.78, "learning_rate": 1.860933793646011e-05, "loss": 0.2532, "step": 3198 }, { "epoch": 0.78, "learning_rate": 1.8607329878636925e-05, "loss": 0.2976, "step": 3200 }, { "epoch": 0.78, "learning_rate": 1.8605320480586658e-05, "loss": 0.2663, "step": 3202 }, { "epoch": 0.78, "learning_rate": 1.860330974262219e-05, "loss": 0.2917, "step": 3204 }, { "epoch": 0.78, "learning_rate": 1.860129766505661e-05, "loss": 0.2856, "step": 3206 }, { "epoch": 0.78, "learning_rate": 1.8599284248203215e-05, "loss": 0.2747, "step": 3208 }, { "epoch": 0.78, "learning_rate": 1.85972694923755e-05, "loss": 0.2939, "step": 3210 }, { "epoch": 0.78, "learning_rate": 1.8595253397887187e-05, "loss": 0.2551, "step": 3212 }, { "epoch": 0.78, "learning_rate": 1.85932359650522e-05, "loss": 0.2922, "step": 3214 }, { "epoch": 0.78, "learning_rate": 1.859121719418466e-05, "loss": 0.2691, "step": 3216 }, { "epoch": 0.78, "learning_rate": 1.8589197085598907e-05, "loss": 0.2693, "step": 3218 }, { "epoch": 0.78, "learning_rate": 1.8587175639609492e-05, "loss": 0.2956, "step": 3220 }, { "epoch": 0.79, "learning_rate": 1.8585152856531166e-05, "loss": 0.2831, "step": 3222 }, { "epoch": 0.79, "learning_rate": 1.858312873667889e-05, "loss": 0.2593, "step": 3224 }, { "epoch": 0.79, "learning_rate": 1.858110328036784e-05, "loss": 0.2913, "step": 3226 }, { "epoch": 0.79, "learning_rate": 1.8579076487913394e-05, "loss": 0.2999, "step": 3228 }, { "epoch": 0.79, "learning_rate": 1.8577048359631134e-05, "loss": 0.2493, "step": 3230 }, { "epoch": 0.79, "learning_rate": 1.857501889583686e-05, "loss": 0.2561, "step": 3232 }, { "epoch": 0.79, "learning_rate": 1.857298809684658e-05, "loss": 0.2595, "step": 3234 }, { "epoch": 0.79, "learning_rate": 1.857095596297649e-05, "loss": 0.2918, "step": 3236 }, { "epoch": 0.79, "learning_rate": 1.8568922494543023e-05, "loss": 0.2856, "step": 3238 }, { "epoch": 0.79, "learning_rate": 1.8566887691862797e-05, "loss": 0.3029, "step": 3240 }, { "epoch": 0.79, "learning_rate": 1.856485155525265e-05, "loss": 0.2972, "step": 3242 }, { "epoch": 0.79, "learning_rate": 1.8562814085029622e-05, "loss": 0.3018, "step": 3244 }, { "epoch": 0.79, "learning_rate": 1.8560775281510968e-05, "loss": 0.2929, "step": 3246 }, { "epoch": 0.79, "learning_rate": 1.855873514501414e-05, "loss": 0.2733, "step": 3248 }, { "epoch": 0.79, "learning_rate": 1.85566936758568e-05, "loss": 0.3046, "step": 3250 }, { "epoch": 0.79, "learning_rate": 1.855465087435683e-05, "loss": 0.3002, "step": 3252 }, { "epoch": 0.79, "learning_rate": 1.85526067408323e-05, "loss": 0.2803, "step": 3254 }, { "epoch": 0.79, "learning_rate": 1.8550561275601504e-05, "loss": 0.266, "step": 3256 }, { "epoch": 0.79, "learning_rate": 1.8548514478982934e-05, "loss": 0.2744, "step": 3258 }, { "epoch": 0.79, "learning_rate": 1.8546466351295293e-05, "loss": 0.3024, "step": 3260 }, { "epoch": 0.79, "learning_rate": 1.854441689285749e-05, "loss": 0.2852, "step": 3262 }, { "epoch": 0.8, "learning_rate": 1.8542366103988637e-05, "loss": 0.2911, "step": 3264 }, { "epoch": 0.8, "learning_rate": 1.8540313985008063e-05, "loss": 0.3099, "step": 3266 }, { "epoch": 0.8, "learning_rate": 1.85382605362353e-05, "loss": 0.2841, "step": 3268 }, { "epoch": 0.8, "learning_rate": 1.8536205757990077e-05, "loss": 0.307, "step": 3270 }, { "epoch": 0.8, "learning_rate": 1.853414965059235e-05, "loss": 0.2702, "step": 3272 }, { "epoch": 0.8, "learning_rate": 1.853209221436226e-05, "loss": 0.2847, "step": 3274 }, { "epoch": 0.8, "learning_rate": 1.853003344962017e-05, "loss": 0.2993, "step": 3276 }, { "epoch": 0.8, "learning_rate": 1.8527973356686647e-05, "loss": 0.2662, "step": 3278 }, { "epoch": 0.8, "learning_rate": 1.8525911935882463e-05, "loss": 0.2886, "step": 3280 }, { "epoch": 0.8, "learning_rate": 1.8523849187528596e-05, "loss": 0.2778, "step": 3282 }, { "epoch": 0.8, "learning_rate": 1.8521785111946227e-05, "loss": 0.2657, "step": 3284 }, { "epoch": 0.8, "learning_rate": 1.851971970945676e-05, "loss": 0.285, "step": 3286 }, { "epoch": 0.8, "learning_rate": 1.851765298038178e-05, "loss": 0.2591, "step": 3288 }, { "epoch": 0.8, "learning_rate": 1.8515584925043105e-05, "loss": 0.2755, "step": 3290 }, { "epoch": 0.8, "learning_rate": 1.851351554376274e-05, "loss": 0.2671, "step": 3292 }, { "epoch": 0.8, "learning_rate": 1.8511444836862905e-05, "loss": 0.2477, "step": 3294 }, { "epoch": 0.8, "learning_rate": 1.8509372804666022e-05, "loss": 0.2592, "step": 3296 }, { "epoch": 0.8, "learning_rate": 1.8507299447494728e-05, "loss": 0.2712, "step": 3298 }, { "epoch": 0.8, "learning_rate": 1.8505224765671857e-05, "loss": 0.2794, "step": 3300 }, { "epoch": 0.8, "learning_rate": 1.850314875952045e-05, "loss": 0.2924, "step": 3302 }, { "epoch": 0.81, "learning_rate": 1.8501071429363768e-05, "loss": 0.2983, "step": 3304 }, { "epoch": 0.81, "learning_rate": 1.8498992775525255e-05, "loss": 0.3002, "step": 3306 }, { "epoch": 0.81, "learning_rate": 1.849691279832858e-05, "loss": 0.2934, "step": 3308 }, { "epoch": 0.81, "learning_rate": 1.849483149809761e-05, "loss": 0.2765, "step": 3310 }, { "epoch": 0.81, "learning_rate": 1.8492748875156414e-05, "loss": 0.3018, "step": 3312 }, { "epoch": 0.81, "learning_rate": 1.849066492982928e-05, "loss": 0.2974, "step": 3314 }, { "epoch": 0.81, "learning_rate": 1.848857966244069e-05, "loss": 0.2681, "step": 3316 }, { "epoch": 0.81, "learning_rate": 1.848649307331534e-05, "loss": 0.314, "step": 3318 }, { "epoch": 0.81, "learning_rate": 1.8484405162778122e-05, "loss": 0.2714, "step": 3320 }, { "epoch": 0.81, "learning_rate": 1.8482315931154146e-05, "loss": 0.2594, "step": 3322 }, { "epoch": 0.81, "learning_rate": 1.8480225378768717e-05, "loss": 0.2724, "step": 3324 }, { "epoch": 0.81, "learning_rate": 1.847813350594735e-05, "loss": 0.2667, "step": 3326 }, { "epoch": 0.81, "learning_rate": 1.8476040313015767e-05, "loss": 0.2719, "step": 3328 }, { "epoch": 0.81, "learning_rate": 1.8473945800299896e-05, "loss": 0.2878, "step": 3330 }, { "epoch": 0.81, "learning_rate": 1.8471849968125867e-05, "loss": 0.2531, "step": 3332 }, { "epoch": 0.81, "learning_rate": 1.8469752816820013e-05, "loss": 0.2869, "step": 3334 }, { "epoch": 0.81, "learning_rate": 1.846765434670888e-05, "loss": 0.2731, "step": 3336 }, { "epoch": 0.81, "learning_rate": 1.846555455811922e-05, "loss": 0.2727, "step": 3338 }, { "epoch": 0.81, "learning_rate": 1.846345345137798e-05, "loss": 0.278, "step": 3340 }, { "epoch": 0.81, "learning_rate": 1.8461351026812318e-05, "loss": 0.2972, "step": 3342 }, { "epoch": 0.81, "learning_rate": 1.8459247284749607e-05, "loss": 0.2508, "step": 3344 }, { "epoch": 0.82, "learning_rate": 1.8457142225517404e-05, "loss": 0.295, "step": 3346 }, { "epoch": 0.82, "learning_rate": 1.845503584944349e-05, "loss": 0.2679, "step": 3348 }, { "epoch": 0.82, "learning_rate": 1.845292815685584e-05, "loss": 0.2655, "step": 3350 }, { "epoch": 0.82, "learning_rate": 1.8450819148082638e-05, "loss": 0.2707, "step": 3352 }, { "epoch": 0.82, "learning_rate": 1.8448708823452275e-05, "loss": 0.2818, "step": 3354 }, { "epoch": 0.82, "learning_rate": 1.8446597183293345e-05, "loss": 0.2954, "step": 3356 }, { "epoch": 0.82, "learning_rate": 1.8444484227934644e-05, "loss": 0.2882, "step": 3358 }, { "epoch": 0.82, "learning_rate": 1.8442369957705178e-05, "loss": 0.2732, "step": 3360 }, { "epoch": 0.82, "learning_rate": 1.844025437293415e-05, "loss": 0.2784, "step": 3362 }, { "epoch": 0.82, "learning_rate": 1.8438137473950984e-05, "loss": 0.2874, "step": 3364 }, { "epoch": 0.82, "learning_rate": 1.843601926108528e-05, "loss": 0.2865, "step": 3366 }, { "epoch": 0.82, "learning_rate": 1.843389973466688e-05, "loss": 0.2693, "step": 3368 }, { "epoch": 0.82, "learning_rate": 1.8431778895025795e-05, "loss": 0.2997, "step": 3370 }, { "epoch": 0.82, "learning_rate": 1.8429656742492263e-05, "loss": 0.2787, "step": 3372 }, { "epoch": 0.82, "learning_rate": 1.8427533277396713e-05, "loss": 0.2914, "step": 3374 }, { "epoch": 0.82, "learning_rate": 1.8425408500069794e-05, "loss": 0.261, "step": 3376 }, { "epoch": 0.82, "learning_rate": 1.8423282410842344e-05, "loss": 0.2572, "step": 3378 }, { "epoch": 0.82, "learning_rate": 1.8421155010045414e-05, "loss": 0.2966, "step": 3380 }, { "epoch": 0.82, "learning_rate": 1.8419026298010256e-05, "loss": 0.2618, "step": 3382 }, { "epoch": 0.82, "learning_rate": 1.841689627506832e-05, "loss": 0.2907, "step": 3384 }, { "epoch": 0.83, "learning_rate": 1.8414764941551277e-05, "loss": 0.2892, "step": 3386 }, { "epoch": 0.83, "learning_rate": 1.8412632297790985e-05, "loss": 0.2807, "step": 3388 }, { "epoch": 0.83, "learning_rate": 1.841049834411952e-05, "loss": 0.2901, "step": 3390 }, { "epoch": 0.83, "learning_rate": 1.8408363080869153e-05, "loss": 0.2693, "step": 3392 }, { "epoch": 0.83, "learning_rate": 1.840622650837235e-05, "loss": 0.2874, "step": 3394 }, { "epoch": 0.83, "learning_rate": 1.8404088626961803e-05, "loss": 0.2841, "step": 3396 }, { "epoch": 0.83, "learning_rate": 1.8401949436970397e-05, "loss": 0.2967, "step": 3398 }, { "epoch": 0.83, "learning_rate": 1.8399808938731218e-05, "loss": 0.2746, "step": 3400 }, { "epoch": 0.83, "learning_rate": 1.839766713257755e-05, "loss": 0.2864, "step": 3402 }, { "epoch": 0.83, "learning_rate": 1.8395524018842904e-05, "loss": 0.2806, "step": 3404 }, { "epoch": 0.83, "learning_rate": 1.8393379597860965e-05, "loss": 0.2732, "step": 3406 }, { "epoch": 0.83, "learning_rate": 1.839123386996564e-05, "loss": 0.303, "step": 3408 }, { "epoch": 0.83, "learning_rate": 1.838908683549104e-05, "loss": 0.278, "step": 3410 }, { "epoch": 0.83, "learning_rate": 1.8386938494771473e-05, "loss": 0.298, "step": 3412 }, { "epoch": 0.83, "learning_rate": 1.8384788848141447e-05, "loss": 0.2732, "step": 3414 }, { "epoch": 0.83, "learning_rate": 1.838263789593568e-05, "loss": 0.2543, "step": 3416 }, { "epoch": 0.83, "learning_rate": 1.8380485638489096e-05, "loss": 0.2586, "step": 3418 }, { "epoch": 0.83, "learning_rate": 1.8378332076136818e-05, "loss": 0.2696, "step": 3420 }, { "epoch": 0.83, "learning_rate": 1.8376177209214166e-05, "loss": 0.3002, "step": 3422 }, { "epoch": 0.83, "learning_rate": 1.8374021038056673e-05, "loss": 0.2638, "step": 3424 }, { "epoch": 0.83, "learning_rate": 1.837186356300007e-05, "loss": 0.283, "step": 3426 }, { "epoch": 0.84, "learning_rate": 1.8369704784380298e-05, "loss": 0.2841, "step": 3428 }, { "epoch": 0.84, "learning_rate": 1.8367544702533487e-05, "loss": 0.3038, "step": 3430 }, { "epoch": 0.84, "learning_rate": 1.8365383317795983e-05, "loss": 0.2626, "step": 3432 }, { "epoch": 0.84, "learning_rate": 1.836322063050433e-05, "loss": 0.2508, "step": 3434 }, { "epoch": 0.84, "learning_rate": 1.8361056640995275e-05, "loss": 0.2866, "step": 3436 }, { "epoch": 0.84, "learning_rate": 1.8358891349605764e-05, "loss": 0.2871, "step": 3438 }, { "epoch": 0.84, "learning_rate": 1.8356724756672955e-05, "loss": 0.2821, "step": 3440 }, { "epoch": 0.84, "learning_rate": 1.83545568625342e-05, "loss": 0.2614, "step": 3442 }, { "epoch": 0.84, "learning_rate": 1.8352387667527057e-05, "loss": 0.2998, "step": 3444 }, { "epoch": 0.84, "learning_rate": 1.8350217171989287e-05, "loss": 0.2834, "step": 3446 }, { "epoch": 0.84, "learning_rate": 1.834804537625885e-05, "loss": 0.2654, "step": 3448 }, { "epoch": 0.84, "learning_rate": 1.834587228067391e-05, "loss": 0.2774, "step": 3450 }, { "epoch": 0.84, "learning_rate": 1.8343697885572845e-05, "loss": 0.2714, "step": 3452 }, { "epoch": 0.84, "learning_rate": 1.8341522191294216e-05, "loss": 0.273, "step": 3454 }, { "epoch": 0.84, "learning_rate": 1.8339345198176798e-05, "loss": 0.2867, "step": 3456 }, { "epoch": 0.84, "learning_rate": 1.833716690655956e-05, "loss": 0.2783, "step": 3458 }, { "epoch": 0.84, "learning_rate": 1.833498731678169e-05, "loss": 0.2828, "step": 3460 }, { "epoch": 0.84, "learning_rate": 1.8332806429182556e-05, "loss": 0.2855, "step": 3462 }, { "epoch": 0.84, "learning_rate": 1.8330624244101742e-05, "loss": 0.2501, "step": 3464 }, { "epoch": 0.84, "learning_rate": 1.8328440761879033e-05, "loss": 0.263, "step": 3466 }, { "epoch": 0.85, "learning_rate": 1.832625598285441e-05, "loss": 0.2848, "step": 3468 }, { "epoch": 0.85, "learning_rate": 1.8324069907368068e-05, "loss": 0.2734, "step": 3470 }, { "epoch": 0.85, "learning_rate": 1.8321882535760386e-05, "loss": 0.2838, "step": 3472 }, { "epoch": 0.85, "learning_rate": 1.831969386837196e-05, "loss": 0.2597, "step": 3474 }, { "epoch": 0.85, "learning_rate": 1.831750390554358e-05, "loss": 0.2614, "step": 3476 }, { "epoch": 0.85, "learning_rate": 1.8315312647616245e-05, "loss": 0.2845, "step": 3478 }, { "epoch": 0.85, "learning_rate": 1.831312009493114e-05, "loss": 0.2673, "step": 3480 }, { "epoch": 0.85, "learning_rate": 1.8310926247829672e-05, "loss": 0.28, "step": 3482 }, { "epoch": 0.85, "learning_rate": 1.8308731106653436e-05, "loss": 0.2865, "step": 3484 }, { "epoch": 0.85, "learning_rate": 1.8306534671744234e-05, "loss": 0.2754, "step": 3486 }, { "epoch": 0.85, "learning_rate": 1.8304336943444066e-05, "loss": 0.2809, "step": 3488 }, { "epoch": 0.85, "learning_rate": 1.8302137922095133e-05, "loss": 0.2749, "step": 3490 }, { "epoch": 0.85, "learning_rate": 1.8299937608039845e-05, "loss": 0.2943, "step": 3492 }, { "epoch": 0.85, "learning_rate": 1.8297736001620805e-05, "loss": 0.2843, "step": 3494 }, { "epoch": 0.85, "learning_rate": 1.829553310318082e-05, "loss": 0.2729, "step": 3496 }, { "epoch": 0.85, "learning_rate": 1.8293328913062892e-05, "loss": 0.2629, "step": 3498 }, { "epoch": 0.85, "learning_rate": 1.829112343161024e-05, "loss": 0.2886, "step": 3500 }, { "epoch": 0.85, "learning_rate": 1.828891665916627e-05, "loss": 0.2669, "step": 3502 }, { "epoch": 0.85, "learning_rate": 1.8286708596074596e-05, "loss": 0.2594, "step": 3504 }, { "epoch": 0.85, "learning_rate": 1.8284499242679027e-05, "loss": 0.2645, "step": 3506 }, { "epoch": 0.85, "learning_rate": 1.8282288599323576e-05, "loss": 0.2906, "step": 3508 }, { "epoch": 0.86, "learning_rate": 1.8280076666352457e-05, "loss": 0.2944, "step": 3510 }, { "epoch": 0.86, "learning_rate": 1.827786344411009e-05, "loss": 0.2694, "step": 3512 }, { "epoch": 0.86, "learning_rate": 1.8275648932941085e-05, "loss": 0.2701, "step": 3514 }, { "epoch": 0.86, "learning_rate": 1.827343313319026e-05, "loss": 0.2529, "step": 3516 }, { "epoch": 0.86, "learning_rate": 1.8271216045202627e-05, "loss": 0.2737, "step": 3518 }, { "epoch": 0.86, "learning_rate": 1.8268997669323418e-05, "loss": 0.2881, "step": 3520 }, { "epoch": 0.86, "learning_rate": 1.8266778005898035e-05, "loss": 0.2758, "step": 3522 }, { "epoch": 0.86, "learning_rate": 1.826455705527211e-05, "loss": 0.2675, "step": 3524 }, { "epoch": 0.86, "learning_rate": 1.8262334817791448e-05, "loss": 0.2757, "step": 3526 }, { "epoch": 0.86, "learning_rate": 1.826011129380208e-05, "loss": 0.279, "step": 3528 }, { "epoch": 0.86, "learning_rate": 1.825788648365022e-05, "loss": 0.2403, "step": 3530 }, { "epoch": 0.86, "learning_rate": 1.8255660387682287e-05, "loss": 0.263, "step": 3532 }, { "epoch": 0.86, "learning_rate": 1.8253433006244907e-05, "loss": 0.2824, "step": 3534 }, { "epoch": 0.86, "learning_rate": 1.8251204339684894e-05, "loss": 0.2693, "step": 3536 }, { "epoch": 0.86, "learning_rate": 1.8248974388349275e-05, "loss": 0.2676, "step": 3538 }, { "epoch": 0.86, "learning_rate": 1.8246743152585264e-05, "loss": 0.2564, "step": 3540 }, { "epoch": 0.86, "learning_rate": 1.8244510632740285e-05, "loss": 0.2905, "step": 3542 }, { "epoch": 0.86, "learning_rate": 1.824227682916196e-05, "loss": 0.2779, "step": 3544 }, { "epoch": 0.86, "learning_rate": 1.8240041742198102e-05, "loss": 0.2716, "step": 3546 }, { "epoch": 0.86, "learning_rate": 1.8237805372196737e-05, "loss": 0.2858, "step": 3548 }, { "epoch": 0.87, "learning_rate": 1.8235567719506084e-05, "loss": 0.2716, "step": 3550 }, { "epoch": 0.87, "learning_rate": 1.8233328784474564e-05, "loss": 0.2582, "step": 3552 }, { "epoch": 0.87, "learning_rate": 1.8231088567450793e-05, "loss": 0.2474, "step": 3554 }, { "epoch": 0.87, "learning_rate": 1.8228847068783598e-05, "loss": 0.2637, "step": 3556 }, { "epoch": 0.87, "learning_rate": 1.8226604288821982e-05, "loss": 0.2809, "step": 3558 }, { "epoch": 0.87, "learning_rate": 1.822436022791518e-05, "loss": 0.2794, "step": 3560 }, { "epoch": 0.87, "learning_rate": 1.8222114886412598e-05, "loss": 0.2815, "step": 3562 }, { "epoch": 0.87, "learning_rate": 1.8219868264663854e-05, "loss": 0.2772, "step": 3564 }, { "epoch": 0.87, "learning_rate": 1.8217620363018773e-05, "loss": 0.2892, "step": 3566 }, { "epoch": 0.87, "learning_rate": 1.8215371181827358e-05, "loss": 0.2637, "step": 3568 }, { "epoch": 0.87, "learning_rate": 1.8213120721439837e-05, "loss": 0.2556, "step": 3570 }, { "epoch": 0.87, "learning_rate": 1.821086898220661e-05, "loss": 0.246, "step": 3572 }, { "epoch": 0.87, "learning_rate": 1.82086159644783e-05, "loss": 0.2754, "step": 3574 }, { "epoch": 0.87, "learning_rate": 1.8206361668605717e-05, "loss": 0.2892, "step": 3576 }, { "epoch": 0.87, "learning_rate": 1.8204106094939868e-05, "loss": 0.2541, "step": 3578 }, { "epoch": 0.87, "learning_rate": 1.8201849243831966e-05, "loss": 0.261, "step": 3580 }, { "epoch": 0.87, "learning_rate": 1.8199591115633424e-05, "loss": 0.2728, "step": 3582 }, { "epoch": 0.87, "learning_rate": 1.8197331710695843e-05, "loss": 0.2745, "step": 3584 }, { "epoch": 0.87, "learning_rate": 1.8195071029371032e-05, "loss": 0.2728, "step": 3586 }, { "epoch": 0.87, "learning_rate": 1.8192809072010998e-05, "loss": 0.2754, "step": 3588 }, { "epoch": 0.87, "learning_rate": 1.8190545838967945e-05, "loss": 0.2568, "step": 3590 }, { "epoch": 0.88, "learning_rate": 1.818828133059427e-05, "loss": 0.2508, "step": 3592 }, { "epoch": 0.88, "learning_rate": 1.8186015547242582e-05, "loss": 0.2784, "step": 3594 }, { "epoch": 0.88, "learning_rate": 1.8183748489265683e-05, "loss": 0.2851, "step": 3596 }, { "epoch": 0.88, "learning_rate": 1.8181480157016558e-05, "loss": 0.2759, "step": 3598 }, { "epoch": 0.88, "learning_rate": 1.8179210550848413e-05, "loss": 0.2819, "step": 3600 }, { "epoch": 0.88, "learning_rate": 1.8176939671114645e-05, "loss": 0.2839, "step": 3602 }, { "epoch": 0.88, "learning_rate": 1.817466751816884e-05, "loss": 0.2627, "step": 3604 }, { "epoch": 0.88, "learning_rate": 1.81723940923648e-05, "loss": 0.2747, "step": 3606 }, { "epoch": 0.88, "learning_rate": 1.81701193940565e-05, "loss": 0.2798, "step": 3608 }, { "epoch": 0.88, "learning_rate": 1.816784342359814e-05, "loss": 0.2587, "step": 3610 }, { "epoch": 0.88, "learning_rate": 1.8165566181344104e-05, "loss": 0.2338, "step": 3612 }, { "epoch": 0.88, "learning_rate": 1.8163287667648974e-05, "loss": 0.2786, "step": 3614 }, { "epoch": 0.88, "learning_rate": 1.816100788286753e-05, "loss": 0.2696, "step": 3616 }, { "epoch": 0.88, "learning_rate": 1.8158726827354756e-05, "loss": 0.2438, "step": 3618 }, { "epoch": 0.88, "learning_rate": 1.8156444501465827e-05, "loss": 0.2606, "step": 3620 }, { "epoch": 0.88, "learning_rate": 1.815416090555612e-05, "loss": 0.2681, "step": 3622 }, { "epoch": 0.88, "learning_rate": 1.8151876039981208e-05, "loss": 0.2857, "step": 3624 }, { "epoch": 0.88, "learning_rate": 1.8149589905096866e-05, "loss": 0.2803, "step": 3626 }, { "epoch": 0.88, "learning_rate": 1.8147302501259055e-05, "loss": 0.2647, "step": 3628 }, { "epoch": 0.88, "learning_rate": 1.8145013828823943e-05, "loss": 0.278, "step": 3630 }, { "epoch": 0.88, "learning_rate": 1.8142723888147897e-05, "loss": 0.2824, "step": 3632 }, { "epoch": 0.89, "learning_rate": 1.814043267958748e-05, "loss": 0.2832, "step": 3634 }, { "epoch": 0.89, "learning_rate": 1.8138140203499443e-05, "loss": 0.2856, "step": 3636 }, { "epoch": 0.89, "learning_rate": 1.8135846460240752e-05, "loss": 0.2615, "step": 3638 }, { "epoch": 0.89, "learning_rate": 1.8133551450168553e-05, "loss": 0.2731, "step": 3640 }, { "epoch": 0.89, "learning_rate": 1.8131255173640196e-05, "loss": 0.268, "step": 3642 }, { "epoch": 0.89, "learning_rate": 1.8128957631013235e-05, "loss": 0.2652, "step": 3644 }, { "epoch": 0.89, "learning_rate": 1.812665882264541e-05, "loss": 0.2669, "step": 3646 }, { "epoch": 0.89, "learning_rate": 1.8124358748894667e-05, "loss": 0.2823, "step": 3648 }, { "epoch": 0.89, "learning_rate": 1.812205741011914e-05, "loss": 0.2815, "step": 3650 }, { "epoch": 0.89, "learning_rate": 1.8119754806677167e-05, "loss": 0.2726, "step": 3652 }, { "epoch": 0.89, "learning_rate": 1.811745093892728e-05, "loss": 0.2767, "step": 3654 }, { "epoch": 0.89, "learning_rate": 1.8115145807228215e-05, "loss": 0.2743, "step": 3656 }, { "epoch": 0.89, "learning_rate": 1.8112839411938892e-05, "loss": 0.2806, "step": 3658 }, { "epoch": 0.89, "learning_rate": 1.811053175341844e-05, "loss": 0.2809, "step": 3660 }, { "epoch": 0.89, "learning_rate": 1.810822283202617e-05, "loss": 0.2689, "step": 3662 }, { "epoch": 0.89, "learning_rate": 1.810591264812161e-05, "loss": 0.2431, "step": 3664 }, { "epoch": 0.89, "learning_rate": 1.8103601202064463e-05, "loss": 0.2743, "step": 3666 }, { "epoch": 0.89, "learning_rate": 1.8101288494214647e-05, "loss": 0.2466, "step": 3668 }, { "epoch": 0.89, "learning_rate": 1.809897452493226e-05, "loss": 0.2607, "step": 3670 }, { "epoch": 0.89, "learning_rate": 1.8096659294577612e-05, "loss": 0.2422, "step": 3672 }, { "epoch": 0.9, "learning_rate": 1.80943428035112e-05, "loss": 0.2809, "step": 3674 }, { "epoch": 0.9, "learning_rate": 1.809202505209372e-05, "loss": 0.2642, "step": 3676 }, { "epoch": 0.9, "learning_rate": 1.808970604068606e-05, "loss": 0.2734, "step": 3678 }, { "epoch": 0.9, "learning_rate": 1.808738576964931e-05, "loss": 0.2741, "step": 3680 }, { "epoch": 0.9, "learning_rate": 1.8085064239344757e-05, "loss": 0.2887, "step": 3682 }, { "epoch": 0.9, "learning_rate": 1.8082741450133875e-05, "loss": 0.2625, "step": 3684 }, { "epoch": 0.9, "learning_rate": 1.8080417402378347e-05, "loss": 0.27, "step": 3686 }, { "epoch": 0.9, "learning_rate": 1.8078092096440038e-05, "loss": 0.2485, "step": 3688 }, { "epoch": 0.9, "learning_rate": 1.807576553268102e-05, "loss": 0.2768, "step": 3690 }, { "epoch": 0.9, "learning_rate": 1.807343771146356e-05, "loss": 0.2637, "step": 3692 }, { "epoch": 0.9, "learning_rate": 1.807110863315011e-05, "loss": 0.2496, "step": 3694 }, { "epoch": 0.9, "learning_rate": 1.8068778298103327e-05, "loss": 0.2774, "step": 3696 }, { "epoch": 0.9, "learning_rate": 1.8066446706686066e-05, "loss": 0.2708, "step": 3698 }, { "epoch": 0.9, "learning_rate": 1.8064113859261373e-05, "loss": 0.2626, "step": 3700 }, { "epoch": 0.9, "learning_rate": 1.8061779756192485e-05, "loss": 0.2615, "step": 3702 }, { "epoch": 0.9, "learning_rate": 1.8059444397842846e-05, "loss": 0.2869, "step": 3704 }, { "epoch": 0.9, "learning_rate": 1.8057107784576088e-05, "loss": 0.2622, "step": 3706 }, { "epoch": 0.9, "learning_rate": 1.805476991675603e-05, "loss": 0.2866, "step": 3708 }, { "epoch": 0.9, "learning_rate": 1.8052430794746714e-05, "loss": 0.2508, "step": 3710 }, { "epoch": 0.9, "learning_rate": 1.8050090418912343e-05, "loss": 0.2642, "step": 3712 }, { "epoch": 0.9, "learning_rate": 1.8047748789617342e-05, "loss": 0.2361, "step": 3714 }, { "epoch": 0.91, "learning_rate": 1.8045405907226312e-05, "loss": 0.2506, "step": 3716 }, { "epoch": 0.91, "learning_rate": 1.8043061772104064e-05, "loss": 0.2407, "step": 3718 }, { "epoch": 0.91, "learning_rate": 1.8040716384615596e-05, "loss": 0.266, "step": 3720 }, { "epoch": 0.91, "learning_rate": 1.80383697451261e-05, "loss": 0.25, "step": 3722 }, { "epoch": 0.91, "learning_rate": 1.8036021854000968e-05, "loss": 0.2429, "step": 3724 }, { "epoch": 0.91, "learning_rate": 1.803367271160579e-05, "loss": 0.262, "step": 3726 }, { "epoch": 0.91, "learning_rate": 1.8031322318306333e-05, "loss": 0.2723, "step": 3728 }, { "epoch": 0.91, "learning_rate": 1.8028970674468583e-05, "loss": 0.2677, "step": 3730 }, { "epoch": 0.91, "learning_rate": 1.8026617780458703e-05, "loss": 0.2422, "step": 3732 }, { "epoch": 0.91, "learning_rate": 1.8024263636643057e-05, "loss": 0.2729, "step": 3734 }, { "epoch": 0.91, "learning_rate": 1.8021908243388205e-05, "loss": 0.2511, "step": 3736 }, { "epoch": 0.91, "learning_rate": 1.8019551601060897e-05, "loss": 0.2589, "step": 3738 }, { "epoch": 0.91, "learning_rate": 1.8017193710028084e-05, "loss": 0.2637, "step": 3740 }, { "epoch": 0.91, "learning_rate": 1.8014834570656903e-05, "loss": 0.2846, "step": 3742 }, { "epoch": 0.91, "learning_rate": 1.8012474183314697e-05, "loss": 0.2552, "step": 3744 }, { "epoch": 0.91, "learning_rate": 1.801011254836899e-05, "loss": 0.2708, "step": 3746 }, { "epoch": 0.91, "learning_rate": 1.800774966618751e-05, "loss": 0.2664, "step": 3748 }, { "epoch": 0.91, "learning_rate": 1.8005385537138172e-05, "loss": 0.2536, "step": 3750 }, { "epoch": 0.91, "learning_rate": 1.8003020161589094e-05, "loss": 0.252, "step": 3752 }, { "epoch": 0.91, "learning_rate": 1.8000653539908575e-05, "loss": 0.2624, "step": 3754 }, { "epoch": 0.92, "learning_rate": 1.7998285672465128e-05, "loss": 0.2758, "step": 3756 }, { "epoch": 0.92, "learning_rate": 1.7995916559627437e-05, "loss": 0.2431, "step": 3758 }, { "epoch": 0.92, "learning_rate": 1.79935462017644e-05, "loss": 0.2682, "step": 3760 }, { "epoch": 0.92, "learning_rate": 1.7991174599245094e-05, "loss": 0.273, "step": 3762 }, { "epoch": 0.92, "learning_rate": 1.7988801752438797e-05, "loss": 0.2567, "step": 3764 }, { "epoch": 0.92, "learning_rate": 1.798642766171498e-05, "loss": 0.279, "step": 3766 }, { "epoch": 0.92, "learning_rate": 1.7984052327443305e-05, "loss": 0.2389, "step": 3768 }, { "epoch": 0.92, "learning_rate": 1.7981675749993633e-05, "loss": 0.25, "step": 3770 }, { "epoch": 0.92, "learning_rate": 1.7979297929736012e-05, "loss": 0.2561, "step": 3772 }, { "epoch": 0.92, "learning_rate": 1.797691886704069e-05, "loss": 0.245, "step": 3774 }, { "epoch": 0.92, "learning_rate": 1.79745385622781e-05, "loss": 0.2615, "step": 3776 }, { "epoch": 0.92, "learning_rate": 1.7972157015818882e-05, "loss": 0.2782, "step": 3778 }, { "epoch": 0.92, "learning_rate": 1.7969774228033857e-05, "loss": 0.2681, "step": 3780 }, { "epoch": 0.92, "learning_rate": 1.7967390199294038e-05, "loss": 0.2712, "step": 3782 }, { "epoch": 0.92, "learning_rate": 1.7965004929970645e-05, "loss": 0.2597, "step": 3784 }, { "epoch": 0.92, "learning_rate": 1.7962618420435075e-05, "loss": 0.2627, "step": 3786 }, { "epoch": 0.92, "learning_rate": 1.7960230671058935e-05, "loss": 0.2557, "step": 3788 }, { "epoch": 0.92, "learning_rate": 1.7957841682214005e-05, "loss": 0.2708, "step": 3790 }, { "epoch": 0.92, "learning_rate": 1.7955451454272278e-05, "loss": 0.2446, "step": 3792 }, { "epoch": 0.92, "learning_rate": 1.7953059987605926e-05, "loss": 0.2617, "step": 3794 }, { "epoch": 0.92, "learning_rate": 1.795066728258732e-05, "loss": 0.2812, "step": 3796 }, { "epoch": 0.93, "learning_rate": 1.794827333958902e-05, "loss": 0.256, "step": 3798 }, { "epoch": 0.93, "learning_rate": 1.794587815898378e-05, "loss": 0.2581, "step": 3800 }, { "epoch": 0.93, "learning_rate": 1.794348174114456e-05, "loss": 0.2626, "step": 3802 }, { "epoch": 0.93, "learning_rate": 1.7941084086444486e-05, "loss": 0.2621, "step": 3804 }, { "epoch": 0.93, "learning_rate": 1.79386851952569e-05, "loss": 0.269, "step": 3806 }, { "epoch": 0.93, "learning_rate": 1.7936285067955324e-05, "loss": 0.2676, "step": 3808 }, { "epoch": 0.93, "learning_rate": 1.7933883704913475e-05, "loss": 0.2468, "step": 3810 }, { "epoch": 0.93, "learning_rate": 1.7931481106505266e-05, "loss": 0.2564, "step": 3812 }, { "epoch": 0.93, "learning_rate": 1.79290772731048e-05, "loss": 0.2806, "step": 3814 }, { "epoch": 0.93, "learning_rate": 1.7926672205086374e-05, "loss": 0.2671, "step": 3816 }, { "epoch": 0.93, "learning_rate": 1.7924265902824468e-05, "loss": 0.2566, "step": 3818 }, { "epoch": 0.93, "learning_rate": 1.7921858366693774e-05, "loss": 0.2603, "step": 3820 }, { "epoch": 0.93, "learning_rate": 1.791944959706915e-05, "loss": 0.2804, "step": 3822 }, { "epoch": 0.93, "learning_rate": 1.791703959432567e-05, "loss": 0.2598, "step": 3824 }, { "epoch": 0.93, "learning_rate": 1.7914628358838586e-05, "loss": 0.2566, "step": 3826 }, { "epoch": 0.93, "learning_rate": 1.7912215890983344e-05, "loss": 0.2532, "step": 3828 }, { "epoch": 0.93, "learning_rate": 1.7909802191135588e-05, "loss": 0.2795, "step": 3830 }, { "epoch": 0.93, "learning_rate": 1.790738725967115e-05, "loss": 0.2441, "step": 3832 }, { "epoch": 0.93, "learning_rate": 1.7904971096966044e-05, "loss": 0.2764, "step": 3834 }, { "epoch": 0.93, "learning_rate": 1.7902553703396493e-05, "loss": 0.2491, "step": 3836 }, { "epoch": 0.94, "learning_rate": 1.7900135079338907e-05, "loss": 0.2561, "step": 3838 }, { "epoch": 0.94, "learning_rate": 1.7897715225169876e-05, "loss": 0.2609, "step": 3840 }, { "epoch": 0.94, "learning_rate": 1.789529414126619e-05, "loss": 0.2748, "step": 3842 }, { "epoch": 0.94, "learning_rate": 1.7892871828004843e-05, "loss": 0.2354, "step": 3844 }, { "epoch": 0.94, "learning_rate": 1.7890448285762992e-05, "loss": 0.2548, "step": 3846 }, { "epoch": 0.94, "learning_rate": 1.788802351491801e-05, "loss": 0.2576, "step": 3848 }, { "epoch": 0.94, "learning_rate": 1.7885597515847448e-05, "loss": 0.2618, "step": 3850 }, { "epoch": 0.94, "learning_rate": 1.7883170288929054e-05, "loss": 0.2448, "step": 3852 }, { "epoch": 0.94, "learning_rate": 1.788074183454077e-05, "loss": 0.2654, "step": 3854 }, { "epoch": 0.94, "learning_rate": 1.7878312153060714e-05, "loss": 0.2575, "step": 3856 }, { "epoch": 0.94, "learning_rate": 1.787588124486722e-05, "loss": 0.2793, "step": 3858 }, { "epoch": 0.94, "learning_rate": 1.787344911033879e-05, "loss": 0.2445, "step": 3860 }, { "epoch": 0.94, "learning_rate": 1.7871015749854124e-05, "loss": 0.2633, "step": 3862 }, { "epoch": 0.94, "learning_rate": 1.786858116379212e-05, "loss": 0.2619, "step": 3864 }, { "epoch": 0.94, "learning_rate": 1.7866145352531865e-05, "loss": 0.2529, "step": 3866 }, { "epoch": 0.94, "learning_rate": 1.7863708316452625e-05, "loss": 0.2612, "step": 3868 }, { "epoch": 0.94, "learning_rate": 1.786127005593387e-05, "loss": 0.2554, "step": 3870 }, { "epoch": 0.94, "learning_rate": 1.7858830571355258e-05, "loss": 0.2319, "step": 3872 }, { "epoch": 0.94, "learning_rate": 1.7856389863096626e-05, "loss": 0.2341, "step": 3874 }, { "epoch": 0.94, "learning_rate": 1.7853947931538023e-05, "loss": 0.2753, "step": 3876 }, { "epoch": 0.94, "learning_rate": 1.785150477705967e-05, "loss": 0.2645, "step": 3878 }, { "epoch": 0.95, "learning_rate": 1.784906040004198e-05, "loss": 0.2741, "step": 3880 }, { "epoch": 0.95, "learning_rate": 1.784661480086557e-05, "loss": 0.235, "step": 3882 }, { "epoch": 0.95, "learning_rate": 1.7844167979911238e-05, "loss": 0.2461, "step": 3884 }, { "epoch": 0.95, "learning_rate": 1.784171993755997e-05, "loss": 0.2653, "step": 3886 }, { "epoch": 0.95, "learning_rate": 1.7839270674192942e-05, "loss": 0.2591, "step": 3888 }, { "epoch": 0.95, "learning_rate": 1.7836820190191524e-05, "loss": 0.2519, "step": 3890 }, { "epoch": 0.95, "learning_rate": 1.7834368485937278e-05, "loss": 0.2516, "step": 3892 }, { "epoch": 0.95, "learning_rate": 1.7831915561811955e-05, "loss": 0.2179, "step": 3894 }, { "epoch": 0.95, "learning_rate": 1.7829461418197492e-05, "loss": 0.2696, "step": 3896 }, { "epoch": 0.95, "learning_rate": 1.7827006055476013e-05, "loss": 0.2476, "step": 3898 }, { "epoch": 0.95, "learning_rate": 1.782454947402984e-05, "loss": 0.2823, "step": 3900 }, { "epoch": 0.95, "learning_rate": 1.7822091674241487e-05, "loss": 0.2498, "step": 3902 }, { "epoch": 0.95, "learning_rate": 1.7819632656493647e-05, "loss": 0.2632, "step": 3904 }, { "epoch": 0.95, "learning_rate": 1.7817172421169206e-05, "loss": 0.2461, "step": 3906 }, { "epoch": 0.95, "learning_rate": 1.7814710968651244e-05, "loss": 0.2687, "step": 3908 }, { "epoch": 0.95, "learning_rate": 1.7812248299323026e-05, "loss": 0.2754, "step": 3910 }, { "epoch": 0.95, "learning_rate": 1.780978441356801e-05, "loss": 0.259, "step": 3912 }, { "epoch": 0.95, "learning_rate": 1.780731931176985e-05, "loss": 0.2468, "step": 3914 }, { "epoch": 0.95, "learning_rate": 1.7804852994312365e-05, "loss": 0.2661, "step": 3916 }, { "epoch": 0.95, "learning_rate": 1.7802385461579593e-05, "loss": 0.2485, "step": 3918 }, { "epoch": 0.96, "learning_rate": 1.779991671395574e-05, "loss": 0.2621, "step": 3920 }, { "epoch": 0.96, "learning_rate": 1.779744675182521e-05, "loss": 0.2483, "step": 3922 }, { "epoch": 0.96, "learning_rate": 1.7794975575572596e-05, "loss": 0.255, "step": 3924 }, { "epoch": 0.96, "learning_rate": 1.7792503185582684e-05, "loss": 0.2348, "step": 3926 }, { "epoch": 0.96, "learning_rate": 1.7790029582240436e-05, "loss": 0.2623, "step": 3928 }, { "epoch": 0.96, "learning_rate": 1.7787554765931013e-05, "loss": 0.2423, "step": 3930 }, { "epoch": 0.96, "learning_rate": 1.7785078737039766e-05, "loss": 0.2559, "step": 3932 }, { "epoch": 0.96, "learning_rate": 1.7782601495952232e-05, "loss": 0.2484, "step": 3934 }, { "epoch": 0.96, "learning_rate": 1.7780123043054135e-05, "loss": 0.2284, "step": 3936 }, { "epoch": 0.96, "learning_rate": 1.7777643378731384e-05, "loss": 0.2216, "step": 3938 }, { "epoch": 0.96, "learning_rate": 1.777516250337009e-05, "loss": 0.2606, "step": 3940 }, { "epoch": 0.96, "learning_rate": 1.7772680417356537e-05, "loss": 0.2244, "step": 3942 }, { "epoch": 0.96, "learning_rate": 1.7770197121077207e-05, "loss": 0.279, "step": 3944 }, { "epoch": 0.96, "learning_rate": 1.7767712614918773e-05, "loss": 0.2562, "step": 3946 }, { "epoch": 0.96, "learning_rate": 1.7765226899268085e-05, "loss": 0.2466, "step": 3948 }, { "epoch": 0.96, "learning_rate": 1.7762739974512194e-05, "loss": 0.2496, "step": 3950 }, { "epoch": 0.96, "learning_rate": 1.7760251841038325e-05, "loss": 0.2598, "step": 3952 }, { "epoch": 0.96, "learning_rate": 1.775776249923391e-05, "loss": 0.2678, "step": 3954 }, { "epoch": 0.96, "learning_rate": 1.775527194948655e-05, "loss": 0.2676, "step": 3956 }, { "epoch": 0.96, "learning_rate": 1.7752780192184043e-05, "loss": 0.2677, "step": 3958 }, { "epoch": 0.96, "learning_rate": 1.7750287227714382e-05, "loss": 0.2566, "step": 3960 }, { "epoch": 0.97, "learning_rate": 1.7747793056465734e-05, "loss": 0.2087, "step": 3962 }, { "epoch": 0.97, "learning_rate": 1.7745297678826464e-05, "loss": 0.2527, "step": 3964 }, { "epoch": 0.97, "learning_rate": 1.774280109518512e-05, "loss": 0.2456, "step": 3966 }, { "epoch": 0.97, "learning_rate": 1.7740303305930437e-05, "loss": 0.2669, "step": 3968 }, { "epoch": 0.97, "learning_rate": 1.773780431145134e-05, "loss": 0.2549, "step": 3970 }, { "epoch": 0.97, "learning_rate": 1.7735304112136947e-05, "loss": 0.205, "step": 3972 }, { "epoch": 0.97, "learning_rate": 1.7732802708376554e-05, "loss": 0.2572, "step": 3974 }, { "epoch": 0.97, "learning_rate": 1.7730300100559645e-05, "loss": 0.2716, "step": 3976 }, { "epoch": 0.97, "learning_rate": 1.7727796289075905e-05, "loss": 0.2565, "step": 3978 }, { "epoch": 0.97, "learning_rate": 1.7725291274315188e-05, "loss": 0.2251, "step": 3980 }, { "epoch": 0.97, "learning_rate": 1.772278505666755e-05, "loss": 0.2422, "step": 3982 }, { "epoch": 0.97, "learning_rate": 1.772027763652322e-05, "loss": 0.2535, "step": 3984 }, { "epoch": 0.97, "learning_rate": 1.771776901427263e-05, "loss": 0.2694, "step": 3986 }, { "epoch": 0.97, "learning_rate": 1.7715259190306393e-05, "loss": 0.2421, "step": 3988 }, { "epoch": 0.97, "learning_rate": 1.77127481650153e-05, "loss": 0.2569, "step": 3990 }, { "epoch": 0.97, "learning_rate": 1.7710235938790344e-05, "loss": 0.2304, "step": 3992 }, { "epoch": 0.97, "learning_rate": 1.7707722512022692e-05, "loss": 0.2402, "step": 3994 }, { "epoch": 0.97, "learning_rate": 1.7705207885103707e-05, "loss": 0.211, "step": 3996 }, { "epoch": 0.97, "learning_rate": 1.770269205842494e-05, "loss": 0.265, "step": 3998 }, { "epoch": 0.97, "learning_rate": 1.7700175032378114e-05, "loss": 0.2543, "step": 4000 }, { "epoch": 0.98, "learning_rate": 1.7697656807355157e-05, "loss": 0.2455, "step": 4002 }, { "epoch": 0.98, "learning_rate": 1.7695137383748172e-05, "loss": 0.226, "step": 4004 }, { "epoch": 0.98, "learning_rate": 1.7692616761949458e-05, "loss": 0.2697, "step": 4006 }, { "epoch": 0.98, "learning_rate": 1.7690094942351488e-05, "loss": 0.2476, "step": 4008 }, { "epoch": 0.98, "learning_rate": 1.7687571925346934e-05, "loss": 0.2375, "step": 4010 }, { "epoch": 0.98, "learning_rate": 1.7685047711328643e-05, "loss": 0.2509, "step": 4012 }, { "epoch": 0.98, "learning_rate": 1.7682522300689665e-05, "loss": 0.264, "step": 4014 }, { "epoch": 0.98, "learning_rate": 1.7679995693823217e-05, "loss": 0.2397, "step": 4016 }, { "epoch": 0.98, "learning_rate": 1.767746789112271e-05, "loss": 0.2345, "step": 4018 }, { "epoch": 0.98, "learning_rate": 1.7674938892981747e-05, "loss": 0.256, "step": 4020 }, { "epoch": 0.98, "learning_rate": 1.767240869979411e-05, "loss": 0.2561, "step": 4022 }, { "epoch": 0.98, "learning_rate": 1.7669877311953773e-05, "loss": 0.2506, "step": 4024 }, { "epoch": 0.98, "learning_rate": 1.766734472985489e-05, "loss": 0.2642, "step": 4026 }, { "epoch": 0.98, "learning_rate": 1.76648109538918e-05, "loss": 0.235, "step": 4028 }, { "epoch": 0.98, "learning_rate": 1.7662275984459036e-05, "loss": 0.2313, "step": 4030 }, { "epoch": 0.98, "learning_rate": 1.765973982195131e-05, "loss": 0.2288, "step": 4032 }, { "epoch": 0.98, "learning_rate": 1.7657202466763523e-05, "loss": 0.2154, "step": 4034 }, { "epoch": 0.98, "learning_rate": 1.765466391929076e-05, "loss": 0.247, "step": 4036 }, { "epoch": 0.98, "learning_rate": 1.765212417992829e-05, "loss": 0.2406, "step": 4038 }, { "epoch": 0.98, "learning_rate": 1.7649583249071574e-05, "loss": 0.234, "step": 4040 }, { "epoch": 0.98, "learning_rate": 1.764704112711625e-05, "loss": 0.2357, "step": 4042 }, { "epoch": 0.99, "learning_rate": 1.7644497814458153e-05, "loss": 0.2465, "step": 4044 }, { "epoch": 0.99, "learning_rate": 1.764195331149329e-05, "loss": 0.2609, "step": 4046 }, { "epoch": 0.99, "learning_rate": 1.763940761861786e-05, "loss": 0.2645, "step": 4048 }, { "epoch": 0.99, "learning_rate": 1.763686073622825e-05, "loss": 0.2594, "step": 4050 }, { "epoch": 0.99, "learning_rate": 1.763431266472102e-05, "loss": 0.2444, "step": 4052 }, { "epoch": 0.99, "learning_rate": 1.7631763404492935e-05, "loss": 0.2453, "step": 4054 }, { "epoch": 0.99, "learning_rate": 1.7629212955940934e-05, "loss": 0.2682, "step": 4056 }, { "epoch": 0.99, "learning_rate": 1.7626661319462134e-05, "loss": 0.2622, "step": 4058 }, { "epoch": 0.99, "learning_rate": 1.7624108495453848e-05, "loss": 0.2333, "step": 4060 }, { "epoch": 0.99, "learning_rate": 1.7621554484313573e-05, "loss": 0.2378, "step": 4062 }, { "epoch": 0.99, "learning_rate": 1.7618999286438986e-05, "loss": 0.2577, "step": 4064 }, { "epoch": 0.99, "learning_rate": 1.761644290222795e-05, "loss": 0.238, "step": 4066 }, { "epoch": 0.99, "learning_rate": 1.7613885332078508e-05, "loss": 0.233, "step": 4068 }, { "epoch": 0.99, "learning_rate": 1.7611326576388905e-05, "loss": 0.2374, "step": 4070 }, { "epoch": 0.99, "learning_rate": 1.7608766635557552e-05, "loss": 0.2344, "step": 4072 }, { "epoch": 0.99, "learning_rate": 1.7606205509983048e-05, "loss": 0.2529, "step": 4074 }, { "epoch": 0.99, "learning_rate": 1.7603643200064188e-05, "loss": 0.217, "step": 4076 }, { "epoch": 0.99, "learning_rate": 1.760107970619994e-05, "loss": 0.2563, "step": 4078 }, { "epoch": 0.99, "learning_rate": 1.759851502878946e-05, "loss": 0.2288, "step": 4080 }, { "epoch": 0.99, "learning_rate": 1.7595949168232083e-05, "loss": 0.2457, "step": 4082 }, { "epoch": 1.0, "learning_rate": 1.759338212492734e-05, "loss": 0.2308, "step": 4084 }, { "epoch": 1.0, "learning_rate": 1.759081389927493e-05, "loss": 0.2734, "step": 4086 }, { "epoch": 1.0, "learning_rate": 1.758824449167476e-05, "loss": 0.2532, "step": 4088 }, { "epoch": 1.0, "learning_rate": 1.7585673902526893e-05, "loss": 0.2262, "step": 4090 }, { "epoch": 1.0, "learning_rate": 1.7583102132231594e-05, "loss": 0.2583, "step": 4092 }, { "epoch": 1.0, "learning_rate": 1.758052918118931e-05, "loss": 0.2481, "step": 4094 }, { "epoch": 1.0, "learning_rate": 1.757795504980067e-05, "loss": 0.2223, "step": 4096 }, { "epoch": 1.0, "learning_rate": 1.7575379738466475e-05, "loss": 0.2158, "step": 4098 }, { "epoch": 1.0, "learning_rate": 1.7572803247587734e-05, "loss": 0.2527, "step": 4100 }, { "epoch": 1.0, "learning_rate": 1.7570225577565622e-05, "loss": 0.2227, "step": 4102 }, { "epoch": 1.0, "learning_rate": 1.7567646728801497e-05, "loss": 0.2587, "step": 4104 }, { "epoch": 1.0, "learning_rate": 1.756506670169691e-05, "loss": 0.1924, "step": 4106 }, { "epoch": 1.0, "learning_rate": 1.756248549665359e-05, "loss": 0.191, "step": 4108 }, { "epoch": 1.0, "learning_rate": 1.755990311407345e-05, "loss": 0.2099, "step": 4110 }, { "epoch": 1.0, "learning_rate": 1.755731955435859e-05, "loss": 0.1711, "step": 4112 }, { "epoch": 1.0, "learning_rate": 1.7554734817911283e-05, "loss": 0.2189, "step": 4114 }, { "epoch": 1.0, "learning_rate": 1.7552148905133995e-05, "loss": 0.2072, "step": 4116 }, { "epoch": 1.0, "learning_rate": 1.754956181642938e-05, "loss": 0.196, "step": 4118 }, { "epoch": 1.0, "learning_rate": 1.7546973552200256e-05, "loss": 0.2036, "step": 4120 }, { "epoch": 1.0, "learning_rate": 1.7544384112849648e-05, "loss": 0.2007, "step": 4122 }, { "epoch": 1.0, "learning_rate": 1.7541793498780738e-05, "loss": 0.208, "step": 4124 }, { "epoch": 1.01, "learning_rate": 1.7539201710396913e-05, "loss": 0.2203, "step": 4126 }, { "epoch": 1.01, "learning_rate": 1.7536608748101728e-05, "loss": 0.209, "step": 4128 }, { "epoch": 1.01, "learning_rate": 1.753401461229894e-05, "loss": 0.2173, "step": 4130 }, { "epoch": 1.01, "learning_rate": 1.753141930339246e-05, "loss": 0.2103, "step": 4132 }, { "epoch": 1.01, "learning_rate": 1.7528822821786403e-05, "loss": 0.2167, "step": 4134 }, { "epoch": 1.01, "learning_rate": 1.7526225167885068e-05, "loss": 0.2148, "step": 4136 }, { "epoch": 1.01, "learning_rate": 1.752362634209292e-05, "loss": 0.1986, "step": 4138 }, { "epoch": 1.01, "learning_rate": 1.752102634481462e-05, "loss": 0.1869, "step": 4140 }, { "epoch": 1.01, "learning_rate": 1.7518425176455008e-05, "loss": 0.2136, "step": 4142 }, { "epoch": 1.01, "learning_rate": 1.751582283741911e-05, "loss": 0.1973, "step": 4144 }, { "epoch": 1.01, "learning_rate": 1.751321932811212e-05, "loss": 0.2078, "step": 4146 }, { "epoch": 1.01, "learning_rate": 1.7510614648939432e-05, "loss": 0.2088, "step": 4148 }, { "epoch": 1.01, "learning_rate": 1.750800880030661e-05, "loss": 0.1904, "step": 4150 }, { "epoch": 1.01, "learning_rate": 1.7505401782619408e-05, "loss": 0.2362, "step": 4152 }, { "epoch": 1.01, "learning_rate": 1.7502793596283756e-05, "loss": 0.2193, "step": 4154 }, { "epoch": 1.01, "learning_rate": 1.750018424170577e-05, "loss": 0.2163, "step": 4156 }, { "epoch": 1.01, "learning_rate": 1.749757371929175e-05, "loss": 0.1923, "step": 4158 }, { "epoch": 1.01, "learning_rate": 1.7494962029448165e-05, "loss": 0.226, "step": 4160 }, { "epoch": 1.01, "learning_rate": 1.749234917258168e-05, "loss": 0.1786, "step": 4162 }, { "epoch": 1.01, "learning_rate": 1.7489735149099143e-05, "loss": 0.235, "step": 4164 }, { "epoch": 1.02, "learning_rate": 1.748711995940757e-05, "loss": 0.228, "step": 4166 }, { "epoch": 1.02, "learning_rate": 1.748450360391417e-05, "loss": 0.2078, "step": 4168 }, { "epoch": 1.02, "learning_rate": 1.7481886083026323e-05, "loss": 0.2359, "step": 4170 }, { "epoch": 1.02, "learning_rate": 1.7479267397151604e-05, "loss": 0.2137, "step": 4172 }, { "epoch": 1.02, "learning_rate": 1.7476647546697758e-05, "loss": 0.2122, "step": 4174 }, { "epoch": 1.02, "learning_rate": 1.747402653207272e-05, "loss": 0.1959, "step": 4176 }, { "epoch": 1.02, "learning_rate": 1.74714043536846e-05, "loss": 0.2113, "step": 4178 }, { "epoch": 1.02, "learning_rate": 1.746878101194169e-05, "loss": 0.2128, "step": 4180 }, { "epoch": 1.02, "learning_rate": 1.7466156507252462e-05, "loss": 0.1976, "step": 4182 }, { "epoch": 1.02, "learning_rate": 1.7463530840025578e-05, "loss": 0.2239, "step": 4184 }, { "epoch": 1.02, "learning_rate": 1.746090401066987e-05, "loss": 0.219, "step": 4186 }, { "epoch": 1.02, "learning_rate": 1.745827601959436e-05, "loss": 0.2199, "step": 4188 }, { "epoch": 1.02, "learning_rate": 1.7455646867208237e-05, "loss": 0.1982, "step": 4190 }, { "epoch": 1.02, "learning_rate": 1.7453016553920893e-05, "loss": 0.1771, "step": 4192 }, { "epoch": 1.02, "learning_rate": 1.7450385080141874e-05, "loss": 0.2305, "step": 4194 }, { "epoch": 1.02, "learning_rate": 1.7447752446280933e-05, "loss": 0.2265, "step": 4196 }, { "epoch": 1.02, "learning_rate": 1.7445118652747983e-05, "loss": 0.186, "step": 4198 }, { "epoch": 1.02, "learning_rate": 1.7442483699953128e-05, "loss": 0.1697, "step": 4200 }, { "epoch": 1.02, "learning_rate": 1.7439847588306653e-05, "loss": 0.2082, "step": 4202 }, { "epoch": 1.02, "learning_rate": 1.7437210318219017e-05, "loss": 0.2284, "step": 4204 }, { "epoch": 1.02, "learning_rate": 1.7434571890100864e-05, "loss": 0.1919, "step": 4206 }, { "epoch": 1.03, "learning_rate": 1.743193230436302e-05, "loss": 0.1843, "step": 4208 }, { "epoch": 1.03, "learning_rate": 1.7429291561416485e-05, "loss": 0.1933, "step": 4210 }, { "epoch": 1.03, "learning_rate": 1.7426649661672446e-05, "loss": 0.2136, "step": 4212 }, { "epoch": 1.03, "learning_rate": 1.7424006605542266e-05, "loss": 0.2131, "step": 4214 }, { "epoch": 1.03, "learning_rate": 1.742136239343749e-05, "loss": 0.1739, "step": 4216 }, { "epoch": 1.03, "learning_rate": 1.7418717025769835e-05, "loss": 0.212, "step": 4218 }, { "epoch": 1.03, "learning_rate": 1.7416070502951215e-05, "loss": 0.2331, "step": 4220 }, { "epoch": 1.03, "learning_rate": 1.741342282539371e-05, "loss": 0.2165, "step": 4222 }, { "epoch": 1.03, "learning_rate": 1.7410773993509585e-05, "loss": 0.2241, "step": 4224 }, { "epoch": 1.03, "learning_rate": 1.740812400771128e-05, "loss": 0.2422, "step": 4226 }, { "epoch": 1.03, "learning_rate": 1.7405472868411416e-05, "loss": 0.1957, "step": 4228 }, { "epoch": 1.03, "learning_rate": 1.74028205760228e-05, "loss": 0.2168, "step": 4230 }, { "epoch": 1.03, "learning_rate": 1.740016713095842e-05, "loss": 0.1938, "step": 4232 }, { "epoch": 1.03, "learning_rate": 1.7397512533631424e-05, "loss": 0.1813, "step": 4234 }, { "epoch": 1.03, "learning_rate": 1.7394856784455164e-05, "loss": 0.1872, "step": 4236 }, { "epoch": 1.03, "learning_rate": 1.7392199883843156e-05, "loss": 0.2159, "step": 4238 }, { "epoch": 1.03, "learning_rate": 1.73895418322091e-05, "loss": 0.1855, "step": 4240 }, { "epoch": 1.03, "learning_rate": 1.7386882629966875e-05, "loss": 0.2032, "step": 4242 }, { "epoch": 1.03, "learning_rate": 1.7384222277530544e-05, "loss": 0.2146, "step": 4244 }, { "epoch": 1.03, "learning_rate": 1.7381560775314336e-05, "loss": 0.2007, "step": 4246 }, { "epoch": 1.04, "learning_rate": 1.737889812373267e-05, "loss": 0.2126, "step": 4248 }, { "epoch": 1.04, "learning_rate": 1.7376234323200144e-05, "loss": 0.1904, "step": 4250 }, { "epoch": 1.04, "learning_rate": 1.737356937413153e-05, "loss": 0.2157, "step": 4252 }, { "epoch": 1.04, "learning_rate": 1.737090327694178e-05, "loss": 0.175, "step": 4254 }, { "epoch": 1.04, "learning_rate": 1.7368236032046027e-05, "loss": 0.1881, "step": 4256 }, { "epoch": 1.04, "learning_rate": 1.7365567639859583e-05, "loss": 0.1764, "step": 4258 }, { "epoch": 1.04, "learning_rate": 1.7362898100797938e-05, "loss": 0.2059, "step": 4260 }, { "epoch": 1.04, "learning_rate": 1.7360227415276755e-05, "loss": 0.2129, "step": 4262 }, { "epoch": 1.04, "learning_rate": 1.735755558371188e-05, "loss": 0.2048, "step": 4264 }, { "epoch": 1.04, "learning_rate": 1.735488260651934e-05, "loss": 0.1988, "step": 4266 }, { "epoch": 1.04, "learning_rate": 1.735220848411534e-05, "loss": 0.2093, "step": 4268 }, { "epoch": 1.04, "learning_rate": 1.734953321691626e-05, "loss": 0.1806, "step": 4270 }, { "epoch": 1.04, "learning_rate": 1.734685680533866e-05, "loss": 0.2161, "step": 4272 }, { "epoch": 1.04, "learning_rate": 1.7344179249799274e-05, "loss": 0.1984, "step": 4274 }, { "epoch": 1.04, "learning_rate": 1.734150055071502e-05, "loss": 0.2153, "step": 4276 }, { "epoch": 1.04, "learning_rate": 1.7338820708502994e-05, "loss": 0.2043, "step": 4278 }, { "epoch": 1.04, "learning_rate": 1.733613972358046e-05, "loss": 0.1908, "step": 4280 }, { "epoch": 1.04, "learning_rate": 1.7333457596364885e-05, "loss": 0.195, "step": 4282 }, { "epoch": 1.04, "learning_rate": 1.7330774327273882e-05, "loss": 0.205, "step": 4284 }, { "epoch": 1.04, "learning_rate": 1.7328089916725257e-05, "loss": 0.2049, "step": 4286 }, { "epoch": 1.04, "learning_rate": 1.7325404365136997e-05, "loss": 0.2158, "step": 4288 }, { "epoch": 1.05, "learning_rate": 1.7322717672927266e-05, "loss": 0.1865, "step": 4290 }, { "epoch": 1.05, "learning_rate": 1.7320029840514395e-05, "loss": 0.2017, "step": 4292 }, { "epoch": 1.05, "learning_rate": 1.7317340868316907e-05, "loss": 0.1783, "step": 4294 }, { "epoch": 1.05, "learning_rate": 1.7314650756753494e-05, "loss": 0.1964, "step": 4296 }, { "epoch": 1.05, "learning_rate": 1.731195950624302e-05, "loss": 0.2124, "step": 4298 }, { "epoch": 1.05, "learning_rate": 1.7309267117204545e-05, "loss": 0.2162, "step": 4300 }, { "epoch": 1.05, "learning_rate": 1.7306573590057284e-05, "loss": 0.1979, "step": 4302 }, { "epoch": 1.05, "learning_rate": 1.7303878925220646e-05, "loss": 0.1782, "step": 4304 }, { "epoch": 1.05, "learning_rate": 1.730118312311421e-05, "loss": 0.2197, "step": 4306 }, { "epoch": 1.05, "learning_rate": 1.729848618415773e-05, "loss": 0.1779, "step": 4308 }, { "epoch": 1.05, "learning_rate": 1.7295788108771145e-05, "loss": 0.2027, "step": 4310 }, { "epoch": 1.05, "learning_rate": 1.729308889737456e-05, "loss": 0.1925, "step": 4312 }, { "epoch": 1.05, "learning_rate": 1.7290388550388267e-05, "loss": 0.2162, "step": 4314 }, { "epoch": 1.05, "learning_rate": 1.728768706823273e-05, "loss": 0.1943, "step": 4316 }, { "epoch": 1.05, "learning_rate": 1.728498445132859e-05, "loss": 0.1996, "step": 4318 }, { "epoch": 1.05, "learning_rate": 1.728228070009667e-05, "loss": 0.1945, "step": 4320 }, { "epoch": 1.05, "learning_rate": 1.7279575814957952e-05, "loss": 0.2198, "step": 4322 }, { "epoch": 1.05, "learning_rate": 1.7276869796333616e-05, "loss": 0.2125, "step": 4324 }, { "epoch": 1.05, "learning_rate": 1.7274162644645013e-05, "loss": 0.2211, "step": 4326 }, { "epoch": 1.05, "learning_rate": 1.7271454360313663e-05, "loss": 0.2254, "step": 4328 }, { "epoch": 1.06, "learning_rate": 1.7268744943761264e-05, "loss": 0.2025, "step": 4330 }, { "epoch": 1.06, "learning_rate": 1.7266034395409698e-05, "loss": 0.2073, "step": 4332 }, { "epoch": 1.06, "learning_rate": 1.7263322715681015e-05, "loss": 0.215, "step": 4334 }, { "epoch": 1.06, "learning_rate": 1.7260609904997445e-05, "loss": 0.247, "step": 4336 }, { "epoch": 1.06, "learning_rate": 1.725789596378139e-05, "loss": 0.1902, "step": 4338 }, { "epoch": 1.06, "learning_rate": 1.725518089245544e-05, "loss": 0.1993, "step": 4340 }, { "epoch": 1.06, "learning_rate": 1.725246469144235e-05, "loss": 0.1685, "step": 4342 }, { "epoch": 1.06, "learning_rate": 1.7249747361165044e-05, "loss": 0.1995, "step": 4344 }, { "epoch": 1.06, "learning_rate": 1.724702890204664e-05, "loss": 0.2143, "step": 4346 }, { "epoch": 1.06, "learning_rate": 1.724430931451042e-05, "loss": 0.2039, "step": 4348 }, { "epoch": 1.06, "learning_rate": 1.7241588598979845e-05, "loss": 0.2046, "step": 4350 }, { "epoch": 1.06, "learning_rate": 1.723886675587855e-05, "loss": 0.2055, "step": 4352 }, { "epoch": 1.06, "learning_rate": 1.7236143785630347e-05, "loss": 0.174, "step": 4354 }, { "epoch": 1.06, "learning_rate": 1.7233419688659228e-05, "loss": 0.1643, "step": 4356 }, { "epoch": 1.06, "learning_rate": 1.7230694465389352e-05, "loss": 0.2063, "step": 4358 }, { "epoch": 1.06, "learning_rate": 1.7227968116245058e-05, "loss": 0.2005, "step": 4360 }, { "epoch": 1.06, "learning_rate": 1.7225240641650854e-05, "loss": 0.2329, "step": 4362 }, { "epoch": 1.06, "learning_rate": 1.7222512042031438e-05, "loss": 0.219, "step": 4364 }, { "epoch": 1.06, "learning_rate": 1.7219782317811663e-05, "loss": 0.2296, "step": 4366 }, { "epoch": 1.06, "learning_rate": 1.721705146941658e-05, "loss": 0.1989, "step": 4368 }, { "epoch": 1.06, "learning_rate": 1.721431949727139e-05, "loss": 0.2158, "step": 4370 }, { "epoch": 1.07, "learning_rate": 1.7211586401801498e-05, "loss": 0.2015, "step": 4372 }, { "epoch": 1.07, "learning_rate": 1.7208852183432455e-05, "loss": 0.2065, "step": 4374 }, { "epoch": 1.07, "learning_rate": 1.720611684259e-05, "loss": 0.1761, "step": 4376 }, { "epoch": 1.07, "learning_rate": 1.7203380379700053e-05, "loss": 0.1877, "step": 4378 }, { "epoch": 1.07, "learning_rate": 1.7200642795188697e-05, "loss": 0.1953, "step": 4380 }, { "epoch": 1.07, "learning_rate": 1.71979040894822e-05, "loss": 0.1861, "step": 4382 }, { "epoch": 1.07, "learning_rate": 1.7195164263006995e-05, "loss": 0.2139, "step": 4384 }, { "epoch": 1.07, "learning_rate": 1.7192423316189696e-05, "loss": 0.1917, "step": 4386 }, { "epoch": 1.07, "learning_rate": 1.718968124945709e-05, "loss": 0.187, "step": 4388 }, { "epoch": 1.07, "learning_rate": 1.7186938063236133e-05, "loss": 0.2014, "step": 4390 }, { "epoch": 1.07, "learning_rate": 1.7184193757953963e-05, "loss": 0.2031, "step": 4392 }, { "epoch": 1.07, "learning_rate": 1.7181448334037894e-05, "loss": 0.1763, "step": 4394 }, { "epoch": 1.07, "learning_rate": 1.7178701791915404e-05, "loss": 0.196, "step": 4396 }, { "epoch": 1.07, "learning_rate": 1.717595413201415e-05, "loss": 0.2152, "step": 4398 }, { "epoch": 1.07, "learning_rate": 1.717320535476197e-05, "loss": 0.1932, "step": 4400 }, { "epoch": 1.07, "learning_rate": 1.7170455460586858e-05, "loss": 0.2013, "step": 4402 }, { "epoch": 1.07, "learning_rate": 1.7167704449917006e-05, "loss": 0.2072, "step": 4404 }, { "epoch": 1.07, "learning_rate": 1.716495232318076e-05, "loss": 0.1828, "step": 4406 }, { "epoch": 1.07, "learning_rate": 1.7162199080806652e-05, "loss": 0.1907, "step": 4408 }, { "epoch": 1.07, "learning_rate": 1.715944472322338e-05, "loss": 0.2076, "step": 4410 }, { "epoch": 1.08, "learning_rate": 1.7156689250859817e-05, "loss": 0.193, "step": 4412 }, { "epoch": 1.08, "learning_rate": 1.7153932664145015e-05, "loss": 0.1934, "step": 4414 }, { "epoch": 1.08, "learning_rate": 1.7151174963508192e-05, "loss": 0.1976, "step": 4416 }, { "epoch": 1.08, "learning_rate": 1.7148416149378743e-05, "loss": 0.1774, "step": 4418 }, { "epoch": 1.08, "learning_rate": 1.714565622218624e-05, "loss": 0.196, "step": 4420 }, { "epoch": 1.08, "learning_rate": 1.714289518236042e-05, "loss": 0.2089, "step": 4422 }, { "epoch": 1.08, "learning_rate": 1.7140133030331205e-05, "loss": 0.1858, "step": 4424 }, { "epoch": 1.08, "learning_rate": 1.713736976652868e-05, "loss": 0.1916, "step": 4426 }, { "epoch": 1.08, "learning_rate": 1.71346053913831e-05, "loss": 0.2183, "step": 4428 }, { "epoch": 1.08, "learning_rate": 1.713183990532491e-05, "loss": 0.2112, "step": 4430 }, { "epoch": 1.08, "learning_rate": 1.712907330878471e-05, "loss": 0.1865, "step": 4432 }, { "epoch": 1.08, "learning_rate": 1.7126305602193282e-05, "loss": 0.1848, "step": 4434 }, { "epoch": 1.08, "learning_rate": 1.7123536785981582e-05, "loss": 0.2214, "step": 4436 }, { "epoch": 1.08, "learning_rate": 1.712076686058073e-05, "loss": 0.1895, "step": 4438 }, { "epoch": 1.08, "learning_rate": 1.7117995826422028e-05, "loss": 0.2154, "step": 4440 }, { "epoch": 1.08, "learning_rate": 1.7115223683936948e-05, "loss": 0.201, "step": 4442 }, { "epoch": 1.08, "learning_rate": 1.711245043355713e-05, "loss": 0.2141, "step": 4444 }, { "epoch": 1.08, "learning_rate": 1.7109676075714396e-05, "loss": 0.1983, "step": 4446 }, { "epoch": 1.08, "learning_rate": 1.710690061084073e-05, "loss": 0.2037, "step": 4448 }, { "epoch": 1.08, "learning_rate": 1.7104124039368296e-05, "loss": 0.1779, "step": 4450 }, { "epoch": 1.08, "learning_rate": 1.7101346361729426e-05, "loss": 0.208, "step": 4452 }, { "epoch": 1.09, "learning_rate": 1.7098567578356626e-05, "loss": 0.2189, "step": 4454 }, { "epoch": 1.09, "learning_rate": 1.709578768968258e-05, "loss": 0.1942, "step": 4456 }, { "epoch": 1.09, "learning_rate": 1.7093006696140123e-05, "loss": 0.1858, "step": 4458 }, { "epoch": 1.09, "learning_rate": 1.7090224598162287e-05, "loss": 0.1859, "step": 4460 }, { "epoch": 1.09, "learning_rate": 1.7087441396182267e-05, "loss": 0.1605, "step": 4462 }, { "epoch": 1.09, "learning_rate": 1.708465709063342e-05, "loss": 0.2249, "step": 4464 }, { "epoch": 1.09, "learning_rate": 1.70818716819493e-05, "loss": 0.2052, "step": 4466 }, { "epoch": 1.09, "learning_rate": 1.7079085170563602e-05, "loss": 0.1763, "step": 4468 }, { "epoch": 1.09, "learning_rate": 1.707629755691021e-05, "loss": 0.1703, "step": 4470 }, { "epoch": 1.09, "learning_rate": 1.707350884142318e-05, "loss": 0.1929, "step": 4472 }, { "epoch": 1.09, "learning_rate": 1.7070719024536736e-05, "loss": 0.1979, "step": 4474 }, { "epoch": 1.09, "learning_rate": 1.7067928106685273e-05, "loss": 0.1613, "step": 4476 }, { "epoch": 1.09, "learning_rate": 1.7065136088303356e-05, "loss": 0.192, "step": 4478 }, { "epoch": 1.09, "learning_rate": 1.7062342969825724e-05, "loss": 0.1922, "step": 4480 }, { "epoch": 1.09, "learning_rate": 1.705954875168729e-05, "loss": 0.1291, "step": 4482 }, { "epoch": 1.09, "learning_rate": 1.7056753434323134e-05, "loss": 0.2087, "step": 4484 }, { "epoch": 1.09, "learning_rate": 1.7053957018168506e-05, "loss": 0.2033, "step": 4486 }, { "epoch": 1.09, "learning_rate": 1.7051159503658833e-05, "loss": 0.1272, "step": 4488 }, { "epoch": 1.09, "learning_rate": 1.704836089122971e-05, "loss": 0.1954, "step": 4490 }, { "epoch": 1.09, "learning_rate": 1.7045561181316894e-05, "loss": 0.1574, "step": 4492 }, { "epoch": 1.1, "learning_rate": 1.7042760374356327e-05, "loss": 0.1813, "step": 4494 }, { "epoch": 1.1, "learning_rate": 1.7039958470784125e-05, "loss": 0.1815, "step": 4496 }, { "epoch": 1.1, "learning_rate": 1.7037155471036547e-05, "loss": 0.1988, "step": 4498 }, { "epoch": 1.1, "learning_rate": 1.7034351375550055e-05, "loss": 0.1873, "step": 4500 }, { "epoch": 1.1, "learning_rate": 1.7031546184761264e-05, "loss": 0.1639, "step": 4502 }, { "epoch": 1.1, "learning_rate": 1.7028739899106966e-05, "loss": 0.1751, "step": 4504 }, { "epoch": 1.1, "learning_rate": 1.702593251902412e-05, "loss": 0.1922, "step": 4506 }, { "epoch": 1.1, "learning_rate": 1.7023124044949855e-05, "loss": 0.1884, "step": 4508 }, { "epoch": 1.1, "learning_rate": 1.7020314477321472e-05, "loss": 0.2247, "step": 4510 }, { "epoch": 1.1, "learning_rate": 1.7017503816576444e-05, "loss": 0.1862, "step": 4512 }, { "epoch": 1.1, "learning_rate": 1.7014692063152408e-05, "loss": 0.2017, "step": 4514 }, { "epoch": 1.1, "learning_rate": 1.7011879217487183e-05, "loss": 0.2115, "step": 4516 }, { "epoch": 1.1, "learning_rate": 1.7009065280018745e-05, "loss": 0.2063, "step": 4518 }, { "epoch": 1.1, "learning_rate": 1.700625025118525e-05, "loss": 0.1763, "step": 4520 }, { "epoch": 1.1, "learning_rate": 1.700343413142501e-05, "loss": 0.1738, "step": 4522 }, { "epoch": 1.1, "learning_rate": 1.7000616921176528e-05, "loss": 0.1802, "step": 4524 }, { "epoch": 1.1, "learning_rate": 1.6997798620878457e-05, "loss": 0.2135, "step": 4526 }, { "epoch": 1.1, "learning_rate": 1.6994979230969634e-05, "loss": 0.185, "step": 4528 }, { "epoch": 1.1, "learning_rate": 1.6992158751889055e-05, "loss": 0.2059, "step": 4530 }, { "epoch": 1.1, "learning_rate": 1.698933718407589e-05, "loss": 0.1831, "step": 4532 }, { "epoch": 1.1, "learning_rate": 1.6986514527969486e-05, "loss": 0.1846, "step": 4534 }, { "epoch": 1.11, "learning_rate": 1.6983690784009343e-05, "loss": 0.1925, "step": 4536 }, { "epoch": 1.11, "learning_rate": 1.6980865952635143e-05, "loss": 0.1735, "step": 4538 }, { "epoch": 1.11, "learning_rate": 1.697804003428673e-05, "loss": 0.1924, "step": 4540 }, { "epoch": 1.11, "learning_rate": 1.697521302940413e-05, "loss": 0.162, "step": 4542 }, { "epoch": 1.11, "learning_rate": 1.697238493842752e-05, "loss": 0.2009, "step": 4544 }, { "epoch": 1.11, "learning_rate": 1.6969555761797266e-05, "loss": 0.2054, "step": 4546 }, { "epoch": 1.11, "learning_rate": 1.6966725499953885e-05, "loss": 0.1751, "step": 4548 }, { "epoch": 1.11, "learning_rate": 1.696389415333807e-05, "loss": 0.1728, "step": 4550 }, { "epoch": 1.11, "learning_rate": 1.6961061722390683e-05, "loss": 0.1744, "step": 4552 }, { "epoch": 1.11, "learning_rate": 1.6958228207552755e-05, "loss": 0.2016, "step": 4554 }, { "epoch": 1.11, "learning_rate": 1.6955393609265494e-05, "loss": 0.1812, "step": 4556 }, { "epoch": 1.11, "learning_rate": 1.695255792797026e-05, "loss": 0.1804, "step": 4558 }, { "epoch": 1.11, "learning_rate": 1.6949721164108594e-05, "loss": 0.1967, "step": 4560 }, { "epoch": 1.11, "learning_rate": 1.69468833181222e-05, "loss": 0.1877, "step": 4562 }, { "epoch": 1.11, "learning_rate": 1.694404439045295e-05, "loss": 0.1795, "step": 4564 }, { "epoch": 1.11, "learning_rate": 1.694120438154289e-05, "loss": 0.232, "step": 4566 }, { "epoch": 1.11, "learning_rate": 1.6938363291834234e-05, "loss": 0.2077, "step": 4568 }, { "epoch": 1.11, "learning_rate": 1.693552112176936e-05, "loss": 0.1965, "step": 4570 }, { "epoch": 1.11, "learning_rate": 1.6932677871790807e-05, "loss": 0.1888, "step": 4572 }, { "epoch": 1.11, "learning_rate": 1.6929833542341303e-05, "loss": 0.163, "step": 4574 }, { "epoch": 1.12, "learning_rate": 1.6926988133863728e-05, "loss": 0.1926, "step": 4576 }, { "epoch": 1.12, "learning_rate": 1.6924141646801126e-05, "loss": 0.2078, "step": 4578 }, { "epoch": 1.12, "learning_rate": 1.6921294081596727e-05, "loss": 0.202, "step": 4580 }, { "epoch": 1.12, "learning_rate": 1.6918445438693912e-05, "loss": 0.1746, "step": 4582 }, { "epoch": 1.12, "learning_rate": 1.6915595718536244e-05, "loss": 0.2046, "step": 4584 }, { "epoch": 1.12, "learning_rate": 1.6912744921567437e-05, "loss": 0.162, "step": 4586 }, { "epoch": 1.12, "learning_rate": 1.690989304823139e-05, "loss": 0.2115, "step": 4588 }, { "epoch": 1.12, "learning_rate": 1.6907040098972153e-05, "loss": 0.1968, "step": 4590 }, { "epoch": 1.12, "learning_rate": 1.6904186074233962e-05, "loss": 0.1774, "step": 4592 }, { "epoch": 1.12, "learning_rate": 1.6901330974461203e-05, "loss": 0.1958, "step": 4594 }, { "epoch": 1.12, "learning_rate": 1.6898474800098438e-05, "loss": 0.2087, "step": 4596 }, { "epoch": 1.12, "learning_rate": 1.6895617551590398e-05, "loss": 0.1676, "step": 4598 }, { "epoch": 1.12, "learning_rate": 1.689275922938198e-05, "loss": 0.1793, "step": 4600 }, { "epoch": 1.12, "learning_rate": 1.6889899833918237e-05, "loss": 0.184, "step": 4602 }, { "epoch": 1.12, "learning_rate": 1.6887039365644407e-05, "loss": 0.1909, "step": 4604 }, { "epoch": 1.12, "learning_rate": 1.6884177825005886e-05, "loss": 0.2137, "step": 4606 }, { "epoch": 1.12, "learning_rate": 1.6881315212448238e-05, "loss": 0.1776, "step": 4608 }, { "epoch": 1.12, "learning_rate": 1.687845152841719e-05, "loss": 0.182, "step": 4610 }, { "epoch": 1.12, "learning_rate": 1.6875586773358645e-05, "loss": 0.1627, "step": 4612 }, { "epoch": 1.12, "learning_rate": 1.6872720947718663e-05, "loss": 0.194, "step": 4614 }, { "epoch": 1.12, "learning_rate": 1.6869854051943476e-05, "loss": 0.1551, "step": 4616 }, { "epoch": 1.13, "learning_rate": 1.686698608647948e-05, "loss": 0.1795, "step": 4618 }, { "epoch": 1.13, "learning_rate": 1.6864117051773242e-05, "loss": 0.2038, "step": 4620 }, { "epoch": 1.13, "learning_rate": 1.6861246948271494e-05, "loss": 0.1884, "step": 4622 }, { "epoch": 1.13, "learning_rate": 1.685837577642113e-05, "loss": 0.2006, "step": 4624 }, { "epoch": 1.13, "learning_rate": 1.685550353666921e-05, "loss": 0.2014, "step": 4626 }, { "epoch": 1.13, "learning_rate": 1.685263022946297e-05, "loss": 0.2005, "step": 4628 }, { "epoch": 1.13, "learning_rate": 1.6849755855249803e-05, "loss": 0.1806, "step": 4630 }, { "epoch": 1.13, "learning_rate": 1.6846880414477275e-05, "loss": 0.1985, "step": 4632 }, { "epoch": 1.13, "learning_rate": 1.6844003907593104e-05, "loss": 0.2016, "step": 4634 }, { "epoch": 1.13, "learning_rate": 1.68411263350452e-05, "loss": 0.1854, "step": 4636 }, { "epoch": 1.13, "learning_rate": 1.6838247697281604e-05, "loss": 0.2079, "step": 4638 }, { "epoch": 1.13, "learning_rate": 1.6835367994750558e-05, "loss": 0.1965, "step": 4640 }, { "epoch": 1.13, "learning_rate": 1.6832487227900448e-05, "loss": 0.1885, "step": 4642 }, { "epoch": 1.13, "learning_rate": 1.682960539717983e-05, "loss": 0.1874, "step": 4644 }, { "epoch": 1.13, "learning_rate": 1.6826722503037424e-05, "loss": 0.176, "step": 4646 }, { "epoch": 1.13, "learning_rate": 1.682383854592213e-05, "loss": 0.2045, "step": 4648 }, { "epoch": 1.13, "learning_rate": 1.682095352628299e-05, "loss": 0.1989, "step": 4650 }, { "epoch": 1.13, "learning_rate": 1.6818067444569227e-05, "loss": 0.1943, "step": 4652 }, { "epoch": 1.13, "learning_rate": 1.681518030123023e-05, "loss": 0.1746, "step": 4654 }, { "epoch": 1.13, "learning_rate": 1.6812292096715546e-05, "loss": 0.1709, "step": 4656 }, { "epoch": 1.13, "learning_rate": 1.680940283147489e-05, "loss": 0.1619, "step": 4658 }, { "epoch": 1.14, "learning_rate": 1.6806512505958148e-05, "loss": 0.2099, "step": 4660 }, { "epoch": 1.14, "learning_rate": 1.6803621120615357e-05, "loss": 0.1606, "step": 4662 }, { "epoch": 1.14, "learning_rate": 1.6800728675896737e-05, "loss": 0.1682, "step": 4664 }, { "epoch": 1.14, "learning_rate": 1.679783517225266e-05, "loss": 0.2007, "step": 4666 }, { "epoch": 1.14, "learning_rate": 1.6794940610133665e-05, "loss": 0.1717, "step": 4668 }, { "epoch": 1.14, "learning_rate": 1.6792044989990453e-05, "loss": 0.1863, "step": 4670 }, { "epoch": 1.14, "learning_rate": 1.6789148312273905e-05, "loss": 0.1652, "step": 4672 }, { "epoch": 1.14, "learning_rate": 1.678625057743505e-05, "loss": 0.1698, "step": 4674 }, { "epoch": 1.14, "learning_rate": 1.678335178592509e-05, "loss": 0.184, "step": 4676 }, { "epoch": 1.14, "learning_rate": 1.6780451938195382e-05, "loss": 0.163, "step": 4678 }, { "epoch": 1.14, "learning_rate": 1.677755103469746e-05, "loss": 0.1996, "step": 4680 }, { "epoch": 1.14, "learning_rate": 1.677464907588302e-05, "loss": 0.2059, "step": 4682 }, { "epoch": 1.14, "learning_rate": 1.6771746062203914e-05, "loss": 0.1709, "step": 4684 }, { "epoch": 1.14, "learning_rate": 1.6768841994112165e-05, "loss": 0.1587, "step": 4686 }, { "epoch": 1.14, "learning_rate": 1.676593687205996e-05, "loss": 0.1776, "step": 4688 }, { "epoch": 1.14, "learning_rate": 1.676303069649964e-05, "loss": 0.1927, "step": 4690 }, { "epoch": 1.14, "learning_rate": 1.6760123467883733e-05, "loss": 0.1881, "step": 4692 }, { "epoch": 1.14, "learning_rate": 1.6757215186664907e-05, "loss": 0.1775, "step": 4694 }, { "epoch": 1.14, "learning_rate": 1.675430585329601e-05, "loss": 0.1765, "step": 4696 }, { "epoch": 1.14, "learning_rate": 1.6751395468230042e-05, "loss": 0.2045, "step": 4698 }, { "epoch": 1.15, "learning_rate": 1.6748484031920173e-05, "loss": 0.1908, "step": 4700 }, { "epoch": 1.15, "learning_rate": 1.6745571544819735e-05, "loss": 0.1479, "step": 4702 }, { "epoch": 1.15, "learning_rate": 1.674265800738223e-05, "loss": 0.1763, "step": 4704 }, { "epoch": 1.15, "learning_rate": 1.6739743420061314e-05, "loss": 0.1842, "step": 4706 }, { "epoch": 1.15, "learning_rate": 1.673682778331081e-05, "loss": 0.1549, "step": 4708 }, { "epoch": 1.15, "learning_rate": 1.6733911097584707e-05, "loss": 0.1494, "step": 4710 }, { "epoch": 1.15, "learning_rate": 1.673099336333715e-05, "loss": 0.1831, "step": 4712 }, { "epoch": 1.15, "learning_rate": 1.6728074581022462e-05, "loss": 0.1716, "step": 4714 }, { "epoch": 1.15, "learning_rate": 1.672515475109511e-05, "loss": 0.1693, "step": 4716 }, { "epoch": 1.15, "learning_rate": 1.6722233874009745e-05, "loss": 0.1735, "step": 4718 }, { "epoch": 1.15, "learning_rate": 1.6719311950221158e-05, "loss": 0.2006, "step": 4720 }, { "epoch": 1.15, "learning_rate": 1.671638898018432e-05, "loss": 0.1862, "step": 4722 }, { "epoch": 1.15, "learning_rate": 1.6713464964354363e-05, "loss": 0.1965, "step": 4724 }, { "epoch": 1.15, "learning_rate": 1.671053990318657e-05, "loss": 0.1649, "step": 4726 }, { "epoch": 1.15, "learning_rate": 1.670761379713641e-05, "loss": 0.1991, "step": 4728 }, { "epoch": 1.15, "learning_rate": 1.670468664665948e-05, "loss": 0.1855, "step": 4730 }, { "epoch": 1.15, "learning_rate": 1.670175845221158e-05, "loss": 0.1344, "step": 4732 }, { "epoch": 1.15, "learning_rate": 1.669882921424864e-05, "loss": 0.1736, "step": 4734 }, { "epoch": 1.15, "learning_rate": 1.6695898933226768e-05, "loss": 0.163, "step": 4736 }, { "epoch": 1.15, "learning_rate": 1.669296760960223e-05, "loss": 0.2212, "step": 4738 }, { "epoch": 1.15, "learning_rate": 1.6690035243831455e-05, "loss": 0.1596, "step": 4740 }, { "epoch": 1.16, "learning_rate": 1.668710183637104e-05, "loss": 0.1738, "step": 4742 }, { "epoch": 1.16, "learning_rate": 1.668416738767773e-05, "loss": 0.1881, "step": 4744 }, { "epoch": 1.16, "learning_rate": 1.668123189820845e-05, "loss": 0.1818, "step": 4746 }, { "epoch": 1.16, "learning_rate": 1.6678295368420273e-05, "loss": 0.208, "step": 4748 }, { "epoch": 1.16, "learning_rate": 1.667535779877044e-05, "loss": 0.1328, "step": 4750 }, { "epoch": 1.16, "learning_rate": 1.6672419189716354e-05, "loss": 0.1954, "step": 4752 }, { "epoch": 1.16, "learning_rate": 1.6669479541715577e-05, "loss": 0.2125, "step": 4754 }, { "epoch": 1.16, "learning_rate": 1.666653885522584e-05, "loss": 0.1761, "step": 4756 }, { "epoch": 1.16, "learning_rate": 1.6663597130705023e-05, "loss": 0.1873, "step": 4758 }, { "epoch": 1.16, "learning_rate": 1.6660654368611183e-05, "loss": 0.1924, "step": 4760 }, { "epoch": 1.16, "learning_rate": 1.6657710569402522e-05, "loss": 0.1692, "step": 4762 }, { "epoch": 1.16, "learning_rate": 1.665476573353742e-05, "loss": 0.1395, "step": 4764 }, { "epoch": 1.16, "learning_rate": 1.6651819861474402e-05, "loss": 0.1405, "step": 4766 }, { "epoch": 1.16, "learning_rate": 1.6648872953672172e-05, "loss": 0.1608, "step": 4768 }, { "epoch": 1.16, "learning_rate": 1.6645925010589576e-05, "loss": 0.173, "step": 4770 }, { "epoch": 1.16, "learning_rate": 1.664297603268564e-05, "loss": 0.1847, "step": 4772 }, { "epoch": 1.16, "learning_rate": 1.664002602041954e-05, "loss": 0.1991, "step": 4774 }, { "epoch": 1.16, "learning_rate": 1.6637074974250614e-05, "loss": 0.1877, "step": 4776 }, { "epoch": 1.16, "learning_rate": 1.663412289463836e-05, "loss": 0.1613, "step": 4778 }, { "epoch": 1.16, "learning_rate": 1.6631169782042446e-05, "loss": 0.1652, "step": 4780 }, { "epoch": 1.17, "learning_rate": 1.662821563692269e-05, "loss": 0.2009, "step": 4782 }, { "epoch": 1.17, "learning_rate": 1.6625260459739077e-05, "loss": 0.1781, "step": 4784 }, { "epoch": 1.17, "learning_rate": 1.6622304250951745e-05, "loss": 0.1937, "step": 4786 }, { "epoch": 1.17, "learning_rate": 1.661934701102101e-05, "loss": 0.19, "step": 4788 }, { "epoch": 1.17, "learning_rate": 1.6616388740407325e-05, "loss": 0.1968, "step": 4790 }, { "epoch": 1.17, "learning_rate": 1.661342943957132e-05, "loss": 0.171, "step": 4792 }, { "epoch": 1.17, "learning_rate": 1.661046910897378e-05, "loss": 0.1429, "step": 4794 }, { "epoch": 1.17, "learning_rate": 1.660750774907565e-05, "loss": 0.1749, "step": 4796 }, { "epoch": 1.17, "learning_rate": 1.660454536033804e-05, "loss": 0.1703, "step": 4798 }, { "epoch": 1.17, "learning_rate": 1.6601581943222217e-05, "loss": 0.1988, "step": 4800 }, { "epoch": 1.17, "learning_rate": 1.6598617498189604e-05, "loss": 0.1599, "step": 4802 }, { "epoch": 1.17, "learning_rate": 1.6595652025701788e-05, "loss": 0.18, "step": 4804 }, { "epoch": 1.17, "learning_rate": 1.6592685526220516e-05, "loss": 0.1812, "step": 4806 }, { "epoch": 1.17, "learning_rate": 1.65897180002077e-05, "loss": 0.1677, "step": 4808 }, { "epoch": 1.17, "learning_rate": 1.65867494481254e-05, "loss": 0.169, "step": 4810 }, { "epoch": 1.17, "learning_rate": 1.6583779870435845e-05, "loss": 0.1723, "step": 4812 }, { "epoch": 1.17, "learning_rate": 1.658080926760142e-05, "loss": 0.1933, "step": 4814 }, { "epoch": 1.17, "learning_rate": 1.6577837640084672e-05, "loss": 0.1681, "step": 4816 }, { "epoch": 1.17, "learning_rate": 1.65748649883483e-05, "loss": 0.2008, "step": 4818 }, { "epoch": 1.17, "learning_rate": 1.657189131285518e-05, "loss": 0.1612, "step": 4820 }, { "epoch": 1.17, "learning_rate": 1.6568916614068323e-05, "loss": 0.1622, "step": 4822 }, { "epoch": 1.18, "learning_rate": 1.6565940892450925e-05, "loss": 0.1727, "step": 4824 }, { "epoch": 1.18, "learning_rate": 1.6562964148466318e-05, "loss": 0.1719, "step": 4826 }, { "epoch": 1.18, "learning_rate": 1.655998638257801e-05, "loss": 0.1945, "step": 4828 }, { "epoch": 1.18, "learning_rate": 1.6557007595249663e-05, "loss": 0.1706, "step": 4830 }, { "epoch": 1.18, "learning_rate": 1.655402778694509e-05, "loss": 0.1743, "step": 4832 }, { "epoch": 1.18, "learning_rate": 1.6551046958128275e-05, "loss": 0.19, "step": 4834 }, { "epoch": 1.18, "learning_rate": 1.654806510926336e-05, "loss": 0.1869, "step": 4836 }, { "epoch": 1.18, "learning_rate": 1.654508224081463e-05, "loss": 0.1662, "step": 4838 }, { "epoch": 1.18, "learning_rate": 1.654209835324655e-05, "loss": 0.1768, "step": 4840 }, { "epoch": 1.18, "learning_rate": 1.6539113447023733e-05, "loss": 0.1614, "step": 4842 }, { "epoch": 1.18, "learning_rate": 1.6536127522610946e-05, "loss": 0.169, "step": 4844 }, { "epoch": 1.18, "learning_rate": 1.6533140580473128e-05, "loss": 0.1682, "step": 4846 }, { "epoch": 1.18, "learning_rate": 1.6530152621075364e-05, "loss": 0.1838, "step": 4848 }, { "epoch": 1.18, "learning_rate": 1.6527163644882902e-05, "loss": 0.1891, "step": 4850 }, { "epoch": 1.18, "learning_rate": 1.652417365236115e-05, "loss": 0.1951, "step": 4852 }, { "epoch": 1.18, "learning_rate": 1.6521182643975675e-05, "loss": 0.1473, "step": 4854 }, { "epoch": 1.18, "learning_rate": 1.651819062019219e-05, "loss": 0.1976, "step": 4856 }, { "epoch": 1.18, "learning_rate": 1.651519758147659e-05, "loss": 0.1624, "step": 4858 }, { "epoch": 1.18, "learning_rate": 1.6512203528294902e-05, "loss": 0.1665, "step": 4860 }, { "epoch": 1.18, "learning_rate": 1.650920846111333e-05, "loss": 0.1799, "step": 4862 }, { "epoch": 1.19, "learning_rate": 1.650621238039823e-05, "loss": 0.181, "step": 4864 }, { "epoch": 1.19, "learning_rate": 1.650321528661611e-05, "loss": 0.1849, "step": 4866 }, { "epoch": 1.19, "learning_rate": 1.650021718023364e-05, "loss": 0.1286, "step": 4868 }, { "epoch": 1.19, "learning_rate": 1.6497218061717654e-05, "loss": 0.1518, "step": 4870 }, { "epoch": 1.19, "learning_rate": 1.649421793153513e-05, "loss": 0.1738, "step": 4872 }, { "epoch": 1.19, "learning_rate": 1.6491216790153214e-05, "loss": 0.1603, "step": 4874 }, { "epoch": 1.19, "learning_rate": 1.6488214638039213e-05, "loss": 0.1952, "step": 4876 }, { "epoch": 1.19, "learning_rate": 1.6485211475660577e-05, "loss": 0.1931, "step": 4878 }, { "epoch": 1.19, "learning_rate": 1.6482207303484924e-05, "loss": 0.1886, "step": 4880 }, { "epoch": 1.19, "learning_rate": 1.6479202121980024e-05, "loss": 0.1882, "step": 4882 }, { "epoch": 1.19, "learning_rate": 1.6476195931613815e-05, "loss": 0.165, "step": 4884 }, { "epoch": 1.19, "learning_rate": 1.6473188732854374e-05, "loss": 0.1808, "step": 4886 }, { "epoch": 1.19, "learning_rate": 1.647018052616995e-05, "loss": 0.1829, "step": 4888 }, { "epoch": 1.19, "learning_rate": 1.6467171312028945e-05, "loss": 0.1655, "step": 4890 }, { "epoch": 1.19, "learning_rate": 1.6464161090899914e-05, "loss": 0.1568, "step": 4892 }, { "epoch": 1.19, "learning_rate": 1.646114986325157e-05, "loss": 0.1608, "step": 4894 }, { "epoch": 1.19, "learning_rate": 1.6458137629552785e-05, "loss": 0.1738, "step": 4896 }, { "epoch": 1.19, "learning_rate": 1.645512439027259e-05, "loss": 0.1875, "step": 4898 }, { "epoch": 1.19, "learning_rate": 1.6452110145880167e-05, "loss": 0.1516, "step": 4900 }, { "epoch": 1.19, "learning_rate": 1.6449094896844856e-05, "loss": 0.1574, "step": 4902 }, { "epoch": 1.19, "learning_rate": 1.644607864363616e-05, "loss": 0.1701, "step": 4904 }, { "epoch": 1.2, "learning_rate": 1.6443061386723722e-05, "loss": 0.1677, "step": 4906 }, { "epoch": 1.2, "learning_rate": 1.6440043126577364e-05, "loss": 0.2029, "step": 4908 }, { "epoch": 1.2, "learning_rate": 1.6437023863667043e-05, "loss": 0.162, "step": 4910 }, { "epoch": 1.2, "learning_rate": 1.6434003598462886e-05, "loss": 0.1602, "step": 4912 }, { "epoch": 1.2, "learning_rate": 1.643098233143517e-05, "loss": 0.1662, "step": 4914 }, { "epoch": 1.2, "learning_rate": 1.642796006305433e-05, "loss": 0.163, "step": 4916 }, { "epoch": 1.2, "learning_rate": 1.6424936793790954e-05, "loss": 0.1472, "step": 4918 }, { "epoch": 1.2, "learning_rate": 1.6421912524115794e-05, "loss": 0.175, "step": 4920 }, { "epoch": 1.2, "learning_rate": 1.6418887254499744e-05, "loss": 0.1747, "step": 4922 }, { "epoch": 1.2, "learning_rate": 1.6415860985413865e-05, "loss": 0.15, "step": 4924 }, { "epoch": 1.2, "learning_rate": 1.641283371732937e-05, "loss": 0.1647, "step": 4926 }, { "epoch": 1.2, "learning_rate": 1.6409805450717633e-05, "loss": 0.1705, "step": 4928 }, { "epoch": 1.2, "learning_rate": 1.6406776186050168e-05, "loss": 0.1574, "step": 4930 }, { "epoch": 1.2, "learning_rate": 1.6403745923798666e-05, "loss": 0.1969, "step": 4932 }, { "epoch": 1.2, "learning_rate": 1.6400714664434957e-05, "loss": 0.1738, "step": 4934 }, { "epoch": 1.2, "learning_rate": 1.6397682408431025e-05, "loss": 0.1752, "step": 4936 }, { "epoch": 1.2, "learning_rate": 1.6394649156259027e-05, "loss": 0.1621, "step": 4938 }, { "epoch": 1.2, "learning_rate": 1.6391614908391254e-05, "loss": 0.1694, "step": 4940 }, { "epoch": 1.2, "learning_rate": 1.6388579665300165e-05, "loss": 0.1404, "step": 4942 }, { "epoch": 1.2, "learning_rate": 1.6385543427458374e-05, "loss": 0.1475, "step": 4944 }, { "epoch": 1.21, "learning_rate": 1.638250619533864e-05, "loss": 0.1527, "step": 4946 }, { "epoch": 1.21, "learning_rate": 1.6379467969413888e-05, "loss": 0.1312, "step": 4948 }, { "epoch": 1.21, "learning_rate": 1.637642875015719e-05, "loss": 0.1693, "step": 4950 }, { "epoch": 1.21, "learning_rate": 1.6373388538041778e-05, "loss": 0.1595, "step": 4952 }, { "epoch": 1.21, "learning_rate": 1.637034733354104e-05, "loss": 0.1671, "step": 4954 }, { "epoch": 1.21, "learning_rate": 1.6367305137128507e-05, "loss": 0.1916, "step": 4956 }, { "epoch": 1.21, "learning_rate": 1.6364261949277876e-05, "loss": 0.1269, "step": 4958 }, { "epoch": 1.21, "learning_rate": 1.636121777046299e-05, "loss": 0.211, "step": 4960 }, { "epoch": 1.21, "learning_rate": 1.635817260115786e-05, "loss": 0.178, "step": 4962 }, { "epoch": 1.21, "learning_rate": 1.6355126441836637e-05, "loss": 0.1628, "step": 4964 }, { "epoch": 1.21, "learning_rate": 1.635207929297363e-05, "loss": 0.1891, "step": 4966 }, { "epoch": 1.21, "learning_rate": 1.6349031155043303e-05, "loss": 0.1496, "step": 4968 }, { "epoch": 1.21, "learning_rate": 1.6345982028520284e-05, "loss": 0.1683, "step": 4970 }, { "epoch": 1.21, "learning_rate": 1.6342931913879328e-05, "loss": 0.1483, "step": 4972 }, { "epoch": 1.21, "learning_rate": 1.6339880811595377e-05, "loss": 0.1297, "step": 4974 }, { "epoch": 1.21, "learning_rate": 1.63368287221435e-05, "loss": 0.1338, "step": 4976 }, { "epoch": 1.21, "learning_rate": 1.6333775645998938e-05, "loss": 0.1618, "step": 4978 }, { "epoch": 1.21, "learning_rate": 1.6330721583637078e-05, "loss": 0.1563, "step": 4980 }, { "epoch": 1.21, "learning_rate": 1.632766653553346e-05, "loss": 0.168, "step": 4982 }, { "epoch": 1.21, "learning_rate": 1.632461050216377e-05, "loss": 0.1493, "step": 4984 }, { "epoch": 1.21, "learning_rate": 1.632155348400387e-05, "loss": 0.1942, "step": 4986 }, { "epoch": 1.22, "learning_rate": 1.6318495481529746e-05, "loss": 0.16, "step": 4988 }, { "epoch": 1.22, "learning_rate": 1.6315436495217567e-05, "loss": 0.1347, "step": 4990 }, { "epoch": 1.22, "learning_rate": 1.6312376525543633e-05, "loss": 0.1761, "step": 4992 }, { "epoch": 1.22, "learning_rate": 1.63093155729844e-05, "loss": 0.1847, "step": 4994 }, { "epoch": 1.22, "learning_rate": 1.630625363801649e-05, "loss": 0.1803, "step": 4996 }, { "epoch": 1.22, "learning_rate": 1.630319072111667e-05, "loss": 0.1562, "step": 4998 }, { "epoch": 1.22, "learning_rate": 1.6300126822761853e-05, "loss": 0.1568, "step": 5000 }, { "epoch": 1.22, "learning_rate": 1.629706194342912e-05, "loss": 0.1618, "step": 5002 }, { "epoch": 1.22, "learning_rate": 1.6293996083595683e-05, "loss": 0.1238, "step": 5004 }, { "epoch": 1.22, "learning_rate": 1.629092924373893e-05, "loss": 0.1588, "step": 5006 }, { "epoch": 1.22, "learning_rate": 1.6287861424336392e-05, "loss": 0.1736, "step": 5008 }, { "epoch": 1.22, "learning_rate": 1.628479262586575e-05, "loss": 0.1463, "step": 5010 }, { "epoch": 1.22, "learning_rate": 1.6281722848804836e-05, "loss": 0.1548, "step": 5012 }, { "epoch": 1.22, "learning_rate": 1.627865209363164e-05, "loss": 0.1759, "step": 5014 }, { "epoch": 1.22, "learning_rate": 1.62755803608243e-05, "loss": 0.1406, "step": 5016 }, { "epoch": 1.22, "learning_rate": 1.627250765086112e-05, "loss": 0.1887, "step": 5018 }, { "epoch": 1.22, "learning_rate": 1.6269433964220525e-05, "loss": 0.1939, "step": 5020 }, { "epoch": 1.22, "learning_rate": 1.6266359301381128e-05, "loss": 0.1574, "step": 5022 }, { "epoch": 1.22, "learning_rate": 1.6263283662821666e-05, "loss": 0.1493, "step": 5024 }, { "epoch": 1.22, "learning_rate": 1.626020704902105e-05, "loss": 0.1571, "step": 5026 }, { "epoch": 1.23, "learning_rate": 1.625712946045832e-05, "loss": 0.1496, "step": 5028 }, { "epoch": 1.23, "learning_rate": 1.6254050897612694e-05, "loss": 0.1453, "step": 5030 }, { "epoch": 1.23, "learning_rate": 1.6250971360963515e-05, "loss": 0.1798, "step": 5032 }, { "epoch": 1.23, "learning_rate": 1.62478908509903e-05, "loss": 0.1395, "step": 5034 }, { "epoch": 1.23, "learning_rate": 1.62448093681727e-05, "loss": 0.167, "step": 5036 }, { "epoch": 1.23, "learning_rate": 1.6241726912990534e-05, "loss": 0.1424, "step": 5038 }, { "epoch": 1.23, "learning_rate": 1.623864348592376e-05, "loss": 0.1638, "step": 5040 }, { "epoch": 1.23, "learning_rate": 1.6235559087452485e-05, "loss": 0.1719, "step": 5042 }, { "epoch": 1.23, "learning_rate": 1.6232473718056986e-05, "loss": 0.1934, "step": 5044 }, { "epoch": 1.23, "learning_rate": 1.6229387378217665e-05, "loss": 0.186, "step": 5046 }, { "epoch": 1.23, "learning_rate": 1.6226300068415096e-05, "loss": 0.1315, "step": 5048 }, { "epoch": 1.23, "learning_rate": 1.622321178913e-05, "loss": 0.1764, "step": 5050 }, { "epoch": 1.23, "learning_rate": 1.622012254084324e-05, "loss": 0.1671, "step": 5052 }, { "epoch": 1.23, "learning_rate": 1.6217032324035837e-05, "loss": 0.1717, "step": 5054 }, { "epoch": 1.23, "learning_rate": 1.6213941139188963e-05, "loss": 0.1749, "step": 5056 }, { "epoch": 1.23, "learning_rate": 1.6210848986783933e-05, "loss": 0.149, "step": 5058 }, { "epoch": 1.23, "learning_rate": 1.620775586730223e-05, "loss": 0.1757, "step": 5060 }, { "epoch": 1.23, "learning_rate": 1.6204661781225464e-05, "loss": 0.1637, "step": 5062 }, { "epoch": 1.23, "learning_rate": 1.6201566729035418e-05, "loss": 0.1762, "step": 5064 }, { "epoch": 1.23, "learning_rate": 1.6198470711214008e-05, "loss": 0.14, "step": 5066 }, { "epoch": 1.23, "learning_rate": 1.6195373728243312e-05, "loss": 0.146, "step": 5068 }, { "epoch": 1.24, "learning_rate": 1.6192275780605556e-05, "loss": 0.1846, "step": 5070 }, { "epoch": 1.24, "learning_rate": 1.6189176868783105e-05, "loss": 0.1927, "step": 5072 }, { "epoch": 1.24, "learning_rate": 1.6186076993258493e-05, "loss": 0.1685, "step": 5074 }, { "epoch": 1.24, "learning_rate": 1.618297615451439e-05, "loss": 0.1297, "step": 5076 }, { "epoch": 1.24, "learning_rate": 1.6179874353033618e-05, "loss": 0.1731, "step": 5078 }, { "epoch": 1.24, "learning_rate": 1.6176771589299156e-05, "loss": 0.1626, "step": 5080 }, { "epoch": 1.24, "learning_rate": 1.6173667863794124e-05, "loss": 0.2089, "step": 5082 }, { "epoch": 1.24, "learning_rate": 1.61705631770018e-05, "loss": 0.1399, "step": 5084 }, { "epoch": 1.24, "learning_rate": 1.6167457529405607e-05, "loss": 0.1658, "step": 5086 }, { "epoch": 1.24, "learning_rate": 1.6164350921489112e-05, "loss": 0.1587, "step": 5088 }, { "epoch": 1.24, "learning_rate": 1.6161243353736044e-05, "loss": 0.1445, "step": 5090 }, { "epoch": 1.24, "learning_rate": 1.615813482663027e-05, "loss": 0.1835, "step": 5092 }, { "epoch": 1.24, "learning_rate": 1.615502534065582e-05, "loss": 0.1539, "step": 5094 }, { "epoch": 1.24, "learning_rate": 1.6151914896296856e-05, "loss": 0.157, "step": 5096 }, { "epoch": 1.24, "learning_rate": 1.61488034940377e-05, "loss": 0.1584, "step": 5098 }, { "epoch": 1.24, "learning_rate": 1.6145691134362823e-05, "loss": 0.1792, "step": 5100 }, { "epoch": 1.24, "learning_rate": 1.6142577817756847e-05, "loss": 0.1999, "step": 5102 }, { "epoch": 1.24, "learning_rate": 1.613946354470453e-05, "loss": 0.1591, "step": 5104 }, { "epoch": 1.24, "learning_rate": 1.6136348315690794e-05, "loss": 0.1248, "step": 5106 }, { "epoch": 1.24, "learning_rate": 1.61332321312007e-05, "loss": 0.1505, "step": 5108 }, { "epoch": 1.25, "learning_rate": 1.6130114991719468e-05, "loss": 0.1611, "step": 5110 }, { "epoch": 1.25, "learning_rate": 1.6126996897732456e-05, "loss": 0.1611, "step": 5112 }, { "epoch": 1.25, "learning_rate": 1.6123877849725174e-05, "loss": 0.1464, "step": 5114 }, { "epoch": 1.25, "learning_rate": 1.6120757848183285e-05, "loss": 0.1447, "step": 5116 }, { "epoch": 1.25, "learning_rate": 1.6117636893592595e-05, "loss": 0.1368, "step": 5118 }, { "epoch": 1.25, "learning_rate": 1.611451498643906e-05, "loss": 0.1685, "step": 5120 }, { "epoch": 1.25, "learning_rate": 1.6111392127208788e-05, "loss": 0.1594, "step": 5122 }, { "epoch": 1.25, "learning_rate": 1.610826831638803e-05, "loss": 0.1472, "step": 5124 }, { "epoch": 1.25, "learning_rate": 1.610514355446319e-05, "loss": 0.1431, "step": 5126 }, { "epoch": 1.25, "learning_rate": 1.610201784192081e-05, "loss": 0.139, "step": 5128 }, { "epoch": 1.25, "learning_rate": 1.60988911792476e-05, "loss": 0.1428, "step": 5130 }, { "epoch": 1.25, "learning_rate": 1.6095763566930388e-05, "loss": 0.1593, "step": 5132 }, { "epoch": 1.25, "learning_rate": 1.6092635005456178e-05, "loss": 0.1303, "step": 5134 }, { "epoch": 1.25, "learning_rate": 1.608950549531211e-05, "loss": 0.1821, "step": 5136 }, { "epoch": 1.25, "learning_rate": 1.6086375036985477e-05, "loss": 0.1179, "step": 5138 }, { "epoch": 1.25, "learning_rate": 1.6083243630963705e-05, "loss": 0.1674, "step": 5140 }, { "epoch": 1.25, "learning_rate": 1.6080111277734384e-05, "loss": 0.1468, "step": 5142 }, { "epoch": 1.25, "learning_rate": 1.6076977977785245e-05, "loss": 0.1852, "step": 5144 }, { "epoch": 1.25, "learning_rate": 1.6073843731604172e-05, "loss": 0.1528, "step": 5146 }, { "epoch": 1.25, "learning_rate": 1.6070708539679175e-05, "loss": 0.1183, "step": 5148 }, { "epoch": 1.25, "learning_rate": 1.6067572402498447e-05, "loss": 0.1815, "step": 5150 }, { "epoch": 1.26, "learning_rate": 1.6064435320550292e-05, "loss": 0.1327, "step": 5152 }, { "epoch": 1.26, "learning_rate": 1.6061297294323187e-05, "loss": 0.1351, "step": 5154 }, { "epoch": 1.26, "learning_rate": 1.6058158324305747e-05, "loss": 0.1657, "step": 5156 }, { "epoch": 1.26, "learning_rate": 1.6055018410986732e-05, "loss": 0.1488, "step": 5158 }, { "epoch": 1.26, "learning_rate": 1.605187755485505e-05, "loss": 0.1529, "step": 5160 }, { "epoch": 1.26, "learning_rate": 1.6048735756399755e-05, "loss": 0.1373, "step": 5162 }, { "epoch": 1.26, "learning_rate": 1.6045593016110047e-05, "loss": 0.1778, "step": 5164 }, { "epoch": 1.26, "learning_rate": 1.604244933447528e-05, "loss": 0.1656, "step": 5166 }, { "epoch": 1.26, "learning_rate": 1.6039304711984947e-05, "loss": 0.1701, "step": 5168 }, { "epoch": 1.26, "learning_rate": 1.603615914912869e-05, "loss": 0.1554, "step": 5170 }, { "epoch": 1.26, "learning_rate": 1.6033012646396297e-05, "loss": 0.1599, "step": 5172 }, { "epoch": 1.26, "learning_rate": 1.6029865204277704e-05, "loss": 0.1491, "step": 5174 }, { "epoch": 1.26, "learning_rate": 1.6026716823262988e-05, "loss": 0.1581, "step": 5176 }, { "epoch": 1.26, "learning_rate": 1.602356750384238e-05, "loss": 0.1128, "step": 5178 }, { "epoch": 1.26, "learning_rate": 1.6020417246506252e-05, "loss": 0.1509, "step": 5180 }, { "epoch": 1.26, "learning_rate": 1.601726605174512e-05, "loss": 0.1501, "step": 5182 }, { "epoch": 1.26, "learning_rate": 1.6014113920049656e-05, "loss": 0.1542, "step": 5184 }, { "epoch": 1.26, "learning_rate": 1.6010960851910665e-05, "loss": 0.1658, "step": 5186 }, { "epoch": 1.26, "learning_rate": 1.60078068478191e-05, "loss": 0.1627, "step": 5188 }, { "epoch": 1.26, "learning_rate": 1.600465190826608e-05, "loss": 0.1456, "step": 5190 }, { "epoch": 1.27, "learning_rate": 1.6001496033742833e-05, "loss": 0.1601, "step": 5192 }, { "epoch": 1.27, "learning_rate": 1.5998339224740767e-05, "loss": 0.1657, "step": 5194 }, { "epoch": 1.27, "learning_rate": 1.5995181481751415e-05, "loss": 0.1278, "step": 5196 }, { "epoch": 1.27, "learning_rate": 1.5992022805266466e-05, "loss": 0.1563, "step": 5198 }, { "epoch": 1.27, "learning_rate": 1.5988863195777745e-05, "loss": 0.1191, "step": 5200 }, { "epoch": 1.27, "learning_rate": 1.5985702653777228e-05, "loss": 0.1368, "step": 5202 }, { "epoch": 1.27, "learning_rate": 1.5982541179757038e-05, "loss": 0.1869, "step": 5204 }, { "epoch": 1.27, "learning_rate": 1.5979378774209443e-05, "loss": 0.149, "step": 5206 }, { "epoch": 1.27, "learning_rate": 1.5976215437626845e-05, "loss": 0.166, "step": 5208 }, { "epoch": 1.27, "learning_rate": 1.5973051170501814e-05, "loss": 0.1343, "step": 5210 }, { "epoch": 1.27, "learning_rate": 1.5969885973327034e-05, "loss": 0.1548, "step": 5212 }, { "epoch": 1.27, "learning_rate": 1.596671984659536e-05, "loss": 0.1496, "step": 5214 }, { "epoch": 1.27, "learning_rate": 1.596355279079978e-05, "loss": 0.2045, "step": 5216 }, { "epoch": 1.27, "learning_rate": 1.596038480643343e-05, "loss": 0.1596, "step": 5218 }, { "epoch": 1.27, "learning_rate": 1.595721589398959e-05, "loss": 0.1504, "step": 5220 }, { "epoch": 1.27, "learning_rate": 1.5954046053961683e-05, "loss": 0.1384, "step": 5222 }, { "epoch": 1.27, "learning_rate": 1.5950875286843274e-05, "loss": 0.1435, "step": 5224 }, { "epoch": 1.27, "learning_rate": 1.5947703593128084e-05, "loss": 0.1179, "step": 5226 }, { "epoch": 1.27, "learning_rate": 1.594453097330996e-05, "loss": 0.1327, "step": 5228 }, { "epoch": 1.27, "learning_rate": 1.594135742788291e-05, "loss": 0.1396, "step": 5230 }, { "epoch": 1.27, "learning_rate": 1.5938182957341072e-05, "loss": 0.1433, "step": 5232 }, { "epoch": 1.28, "learning_rate": 1.5935007562178747e-05, "loss": 0.1705, "step": 5234 }, { "epoch": 1.28, "learning_rate": 1.5931831242890357e-05, "loss": 0.1546, "step": 5236 }, { "epoch": 1.28, "learning_rate": 1.5928653999970486e-05, "loss": 0.141, "step": 5238 }, { "epoch": 1.28, "learning_rate": 1.592547583391385e-05, "loss": 0.1632, "step": 5240 }, { "epoch": 1.28, "learning_rate": 1.592229674521532e-05, "loss": 0.1296, "step": 5242 }, { "epoch": 1.28, "learning_rate": 1.59191167343699e-05, "loss": 0.1326, "step": 5244 }, { "epoch": 1.28, "learning_rate": 1.5915935801872742e-05, "loss": 0.1522, "step": 5246 }, { "epoch": 1.28, "learning_rate": 1.5912753948219143e-05, "loss": 0.159, "step": 5248 }, { "epoch": 1.28, "learning_rate": 1.5909571173904543e-05, "loss": 0.1287, "step": 5250 }, { "epoch": 1.28, "learning_rate": 1.5906387479424523e-05, "loss": 0.1781, "step": 5252 }, { "epoch": 1.28, "learning_rate": 1.5903202865274807e-05, "loss": 0.1649, "step": 5254 }, { "epoch": 1.28, "learning_rate": 1.5900017331951267e-05, "loss": 0.129, "step": 5256 }, { "epoch": 1.28, "learning_rate": 1.589683087994991e-05, "loss": 0.1417, "step": 5258 }, { "epoch": 1.28, "learning_rate": 1.5893643509766895e-05, "loss": 0.141, "step": 5260 }, { "epoch": 1.28, "learning_rate": 1.589045522189852e-05, "loss": 0.1519, "step": 5262 }, { "epoch": 1.28, "learning_rate": 1.588726601684122e-05, "loss": 0.1618, "step": 5264 }, { "epoch": 1.28, "learning_rate": 1.588407589509159e-05, "loss": 0.168, "step": 5266 }, { "epoch": 1.28, "learning_rate": 1.5880884857146343e-05, "loss": 0.1387, "step": 5268 }, { "epoch": 1.28, "learning_rate": 1.587769290350236e-05, "loss": 0.1507, "step": 5270 }, { "epoch": 1.28, "learning_rate": 1.5874500034656646e-05, "loss": 0.1274, "step": 5272 }, { "epoch": 1.29, "learning_rate": 1.5871306251106354e-05, "loss": 0.168, "step": 5274 }, { "epoch": 1.29, "learning_rate": 1.5868111553348787e-05, "loss": 0.1014, "step": 5276 }, { "epoch": 1.29, "learning_rate": 1.5864915941881378e-05, "loss": 0.1572, "step": 5278 }, { "epoch": 1.29, "learning_rate": 1.5861719417201707e-05, "loss": 0.1601, "step": 5280 }, { "epoch": 1.29, "learning_rate": 1.5858521979807508e-05, "loss": 0.1349, "step": 5282 }, { "epoch": 1.29, "learning_rate": 1.585532363019663e-05, "loss": 0.1456, "step": 5284 }, { "epoch": 1.29, "learning_rate": 1.5852124368867093e-05, "loss": 0.1686, "step": 5286 }, { "epoch": 1.29, "learning_rate": 1.5848924196317042e-05, "loss": 0.1299, "step": 5288 }, { "epoch": 1.29, "learning_rate": 1.5845723113044773e-05, "loss": 0.1532, "step": 5290 }, { "epoch": 1.29, "learning_rate": 1.5842521119548706e-05, "loss": 0.16, "step": 5292 }, { "epoch": 1.29, "learning_rate": 1.583931821632743e-05, "loss": 0.1419, "step": 5294 }, { "epoch": 1.29, "learning_rate": 1.5836114403879655e-05, "loss": 0.1427, "step": 5296 }, { "epoch": 1.29, "learning_rate": 1.5832909682704246e-05, "loss": 0.1639, "step": 5298 }, { "epoch": 1.29, "learning_rate": 1.582970405330019e-05, "loss": 0.1237, "step": 5300 }, { "epoch": 1.29, "learning_rate": 1.5826497516166642e-05, "loss": 0.1219, "step": 5302 }, { "epoch": 1.29, "learning_rate": 1.5823290071802872e-05, "loss": 0.1622, "step": 5304 }, { "epoch": 1.29, "learning_rate": 1.5820081720708313e-05, "loss": 0.1287, "step": 5306 }, { "epoch": 1.29, "learning_rate": 1.581687246338252e-05, "loss": 0.1344, "step": 5308 }, { "epoch": 1.29, "learning_rate": 1.581366230032521e-05, "loss": 0.1493, "step": 5310 }, { "epoch": 1.29, "learning_rate": 1.5810451232036226e-05, "loss": 0.1333, "step": 5312 }, { "epoch": 1.29, "learning_rate": 1.5807239259015553e-05, "loss": 0.1301, "step": 5314 }, { "epoch": 1.3, "learning_rate": 1.5804026381763324e-05, "loss": 0.1138, "step": 5316 }, { "epoch": 1.3, "learning_rate": 1.58008126007798e-05, "loss": 0.112, "step": 5318 }, { "epoch": 1.3, "learning_rate": 1.5797597916565402e-05, "loss": 0.1443, "step": 5320 }, { "epoch": 1.3, "learning_rate": 1.5794382329620672e-05, "loss": 0.1454, "step": 5322 }, { "epoch": 1.3, "learning_rate": 1.579116584044631e-05, "loss": 0.1356, "step": 5324 }, { "epoch": 1.3, "learning_rate": 1.578794844954314e-05, "loss": 0.119, "step": 5326 }, { "epoch": 1.3, "learning_rate": 1.578473015741214e-05, "loss": 0.1512, "step": 5328 }, { "epoch": 1.3, "learning_rate": 1.578151096455442e-05, "loss": 0.132, "step": 5330 }, { "epoch": 1.3, "learning_rate": 1.5778290871471234e-05, "loss": 0.146, "step": 5332 }, { "epoch": 1.3, "learning_rate": 1.577506987866398e-05, "loss": 0.1439, "step": 5334 }, { "epoch": 1.3, "learning_rate": 1.5771847986634178e-05, "loss": 0.1358, "step": 5336 }, { "epoch": 1.3, "learning_rate": 1.576862519588351e-05, "loss": 0.1098, "step": 5338 }, { "epoch": 1.3, "learning_rate": 1.576540150691379e-05, "loss": 0.123, "step": 5340 }, { "epoch": 1.3, "learning_rate": 1.576217692022697e-05, "loss": 0.1648, "step": 5342 }, { "epoch": 1.3, "learning_rate": 1.5758951436325142e-05, "loss": 0.1709, "step": 5344 }, { "epoch": 1.3, "learning_rate": 1.5755725055710537e-05, "loss": 0.149, "step": 5346 }, { "epoch": 1.3, "learning_rate": 1.5752497778885532e-05, "loss": 0.1494, "step": 5348 }, { "epoch": 1.3, "learning_rate": 1.5749269606352632e-05, "loss": 0.1305, "step": 5350 }, { "epoch": 1.3, "learning_rate": 1.5746040538614493e-05, "loss": 0.1163, "step": 5352 }, { "epoch": 1.3, "learning_rate": 1.5742810576173907e-05, "loss": 0.1354, "step": 5354 }, { "epoch": 1.31, "learning_rate": 1.5739579719533797e-05, "loss": 0.1426, "step": 5356 }, { "epoch": 1.31, "learning_rate": 1.5736347969197235e-05, "loss": 0.1567, "step": 5358 }, { "epoch": 1.31, "learning_rate": 1.5733115325667435e-05, "loss": 0.1307, "step": 5360 }, { "epoch": 1.31, "learning_rate": 1.572988178944774e-05, "loss": 0.1419, "step": 5362 }, { "epoch": 1.31, "learning_rate": 1.5726647361041632e-05, "loss": 0.1363, "step": 5364 }, { "epoch": 1.31, "learning_rate": 1.572341204095274e-05, "loss": 0.1426, "step": 5366 }, { "epoch": 1.31, "learning_rate": 1.572017582968483e-05, "loss": 0.1329, "step": 5368 }, { "epoch": 1.31, "learning_rate": 1.5716938727741803e-05, "loss": 0.1406, "step": 5370 }, { "epoch": 1.31, "learning_rate": 1.5713700735627703e-05, "loss": 0.1342, "step": 5372 }, { "epoch": 1.31, "learning_rate": 1.5710461853846702e-05, "loss": 0.172, "step": 5374 }, { "epoch": 1.31, "learning_rate": 1.570722208290313e-05, "loss": 0.1423, "step": 5376 }, { "epoch": 1.31, "learning_rate": 1.5703981423301438e-05, "loss": 0.1524, "step": 5378 }, { "epoch": 1.31, "learning_rate": 1.570073987554622e-05, "loss": 0.1668, "step": 5380 }, { "epoch": 1.31, "learning_rate": 1.5697497440142214e-05, "loss": 0.1692, "step": 5382 }, { "epoch": 1.31, "learning_rate": 1.5694254117594286e-05, "loss": 0.1663, "step": 5384 }, { "epoch": 1.31, "learning_rate": 1.5691009908407454e-05, "loss": 0.11, "step": 5386 }, { "epoch": 1.31, "learning_rate": 1.5687764813086856e-05, "loss": 0.1256, "step": 5388 }, { "epoch": 1.31, "learning_rate": 1.5684518832137793e-05, "loss": 0.0977, "step": 5390 }, { "epoch": 1.31, "learning_rate": 1.5681271966065673e-05, "loss": 0.1296, "step": 5392 }, { "epoch": 1.31, "learning_rate": 1.5678024215376067e-05, "loss": 0.1265, "step": 5394 }, { "epoch": 1.31, "learning_rate": 1.5674775580574674e-05, "loss": 0.1173, "step": 5396 }, { "epoch": 1.32, "learning_rate": 1.567152606216733e-05, "loss": 0.1524, "step": 5398 }, { "epoch": 1.32, "learning_rate": 1.5668275660660012e-05, "loss": 0.1154, "step": 5400 }, { "epoch": 1.32, "learning_rate": 1.5665024376558826e-05, "loss": 0.1707, "step": 5402 }, { "epoch": 1.32, "learning_rate": 1.5661772210370028e-05, "loss": 0.1331, "step": 5404 }, { "epoch": 1.32, "learning_rate": 1.5658519162600003e-05, "loss": 0.1561, "step": 5406 }, { "epoch": 1.32, "learning_rate": 1.5655265233755276e-05, "loss": 0.1632, "step": 5408 }, { "epoch": 1.32, "learning_rate": 1.565201042434251e-05, "loss": 0.1741, "step": 5410 }, { "epoch": 1.32, "learning_rate": 1.5648754734868498e-05, "loss": 0.1444, "step": 5412 }, { "epoch": 1.32, "learning_rate": 1.5645498165840183e-05, "loss": 0.1654, "step": 5414 }, { "epoch": 1.32, "learning_rate": 1.5642240717764634e-05, "loss": 0.1294, "step": 5416 }, { "epoch": 1.32, "learning_rate": 1.563898239114906e-05, "loss": 0.1418, "step": 5418 }, { "epoch": 1.32, "learning_rate": 1.563572318650081e-05, "loss": 0.158, "step": 5420 }, { "epoch": 1.32, "learning_rate": 1.5632463104327363e-05, "loss": 0.1268, "step": 5422 }, { "epoch": 1.32, "learning_rate": 1.5629202145136343e-05, "loss": 0.1379, "step": 5424 }, { "epoch": 1.32, "learning_rate": 1.5625940309435503e-05, "loss": 0.1185, "step": 5426 }, { "epoch": 1.32, "learning_rate": 1.5622677597732735e-05, "loss": 0.1172, "step": 5428 }, { "epoch": 1.32, "learning_rate": 1.5619414010536077e-05, "loss": 0.1336, "step": 5430 }, { "epoch": 1.32, "learning_rate": 1.5616149548353682e-05, "loss": 0.1179, "step": 5432 }, { "epoch": 1.32, "learning_rate": 1.5612884211693863e-05, "loss": 0.1264, "step": 5434 }, { "epoch": 1.32, "learning_rate": 1.560961800106505e-05, "loss": 0.1161, "step": 5436 }, { "epoch": 1.33, "learning_rate": 1.5606350916975815e-05, "loss": 0.1395, "step": 5438 }, { "epoch": 1.33, "learning_rate": 1.5603082959934877e-05, "loss": 0.1206, "step": 5440 }, { "epoch": 1.33, "learning_rate": 1.5599814130451078e-05, "loss": 0.1248, "step": 5442 }, { "epoch": 1.33, "learning_rate": 1.5596544429033395e-05, "loss": 0.1335, "step": 5444 }, { "epoch": 1.33, "learning_rate": 1.5593273856190957e-05, "loss": 0.1212, "step": 5446 }, { "epoch": 1.33, "learning_rate": 1.5590002412433004e-05, "loss": 0.145, "step": 5448 }, { "epoch": 1.33, "learning_rate": 1.558673009826893e-05, "loss": 0.1463, "step": 5450 }, { "epoch": 1.33, "learning_rate": 1.558345691420826e-05, "loss": 0.15, "step": 5452 }, { "epoch": 1.33, "learning_rate": 1.558018286076066e-05, "loss": 0.11, "step": 5454 }, { "epoch": 1.33, "learning_rate": 1.557690793843591e-05, "loss": 0.176, "step": 5456 }, { "epoch": 1.33, "learning_rate": 1.5573632147743953e-05, "loss": 0.1572, "step": 5458 }, { "epoch": 1.33, "learning_rate": 1.5570355489194852e-05, "loss": 0.1118, "step": 5460 }, { "epoch": 1.33, "learning_rate": 1.5567077963298806e-05, "loss": 0.1183, "step": 5462 }, { "epoch": 1.33, "learning_rate": 1.5563799570566152e-05, "loss": 0.1191, "step": 5464 }, { "epoch": 1.33, "learning_rate": 1.556052031150736e-05, "loss": 0.1051, "step": 5466 }, { "epoch": 1.33, "learning_rate": 1.5557240186633037e-05, "loss": 0.1357, "step": 5468 }, { "epoch": 1.33, "learning_rate": 1.5553959196453922e-05, "loss": 0.1501, "step": 5470 }, { "epoch": 1.33, "learning_rate": 1.555067734148089e-05, "loss": 0.1298, "step": 5472 }, { "epoch": 1.33, "learning_rate": 1.554739462222495e-05, "loss": 0.1183, "step": 5474 }, { "epoch": 1.33, "learning_rate": 1.5544111039197255e-05, "loss": 0.1113, "step": 5476 }, { "epoch": 1.33, "learning_rate": 1.554082659290907e-05, "loss": 0.1249, "step": 5478 }, { "epoch": 1.34, "learning_rate": 1.553754128387182e-05, "loss": 0.1607, "step": 5480 }, { "epoch": 1.34, "learning_rate": 1.5534255112597047e-05, "loss": 0.1129, "step": 5482 }, { "epoch": 1.34, "learning_rate": 1.5530968079596438e-05, "loss": 0.1558, "step": 5484 }, { "epoch": 1.34, "learning_rate": 1.5527680185381803e-05, "loss": 0.1407, "step": 5486 }, { "epoch": 1.34, "learning_rate": 1.5524391430465094e-05, "loss": 0.1204, "step": 5488 }, { "epoch": 1.34, "learning_rate": 1.5521101815358396e-05, "loss": 0.1346, "step": 5490 }, { "epoch": 1.34, "learning_rate": 1.5517811340573926e-05, "loss": 0.1014, "step": 5492 }, { "epoch": 1.34, "learning_rate": 1.5514520006624038e-05, "loss": 0.1221, "step": 5494 }, { "epoch": 1.34, "learning_rate": 1.5511227814021223e-05, "loss": 0.1233, "step": 5496 }, { "epoch": 1.34, "learning_rate": 1.550793476327809e-05, "loss": 0.1165, "step": 5498 }, { "epoch": 1.34, "learning_rate": 1.55046408549074e-05, "loss": 0.1417, "step": 5500 }, { "epoch": 1.34, "learning_rate": 1.550134608942203e-05, "loss": 0.1238, "step": 5502 }, { "epoch": 1.34, "learning_rate": 1.5498050467335012e-05, "loss": 0.1351, "step": 5504 }, { "epoch": 1.34, "learning_rate": 1.5494753989159493e-05, "loss": 0.1548, "step": 5506 }, { "epoch": 1.34, "learning_rate": 1.5491456655408767e-05, "loss": 0.1529, "step": 5508 }, { "epoch": 1.34, "learning_rate": 1.548815846659624e-05, "loss": 0.1242, "step": 5510 }, { "epoch": 1.34, "learning_rate": 1.548485942323548e-05, "loss": 0.1164, "step": 5512 }, { "epoch": 1.34, "learning_rate": 1.5481559525840167e-05, "loss": 0.1456, "step": 5514 }, { "epoch": 1.34, "learning_rate": 1.5478258774924117e-05, "loss": 0.1564, "step": 5516 }, { "epoch": 1.34, "learning_rate": 1.5474957171001287e-05, "loss": 0.14, "step": 5518 }, { "epoch": 1.35, "learning_rate": 1.547165471458576e-05, "loss": 0.1094, "step": 5520 }, { "epoch": 1.35, "learning_rate": 1.546835140619175e-05, "loss": 0.1484, "step": 5522 }, { "epoch": 1.35, "learning_rate": 1.546504724633362e-05, "loss": 0.1461, "step": 5524 }, { "epoch": 1.35, "learning_rate": 1.546174223552584e-05, "loss": 0.1358, "step": 5526 }, { "epoch": 1.35, "learning_rate": 1.545843637428303e-05, "loss": 0.1604, "step": 5528 }, { "epoch": 1.35, "learning_rate": 1.5455129663119936e-05, "loss": 0.139, "step": 5530 }, { "epoch": 1.35, "learning_rate": 1.545182210255144e-05, "loss": 0.1456, "step": 5532 }, { "epoch": 1.35, "learning_rate": 1.5448513693092558e-05, "loss": 0.1082, "step": 5534 }, { "epoch": 1.35, "learning_rate": 1.5445204435258427e-05, "loss": 0.1419, "step": 5536 }, { "epoch": 1.35, "learning_rate": 1.544189432956433e-05, "loss": 0.1547, "step": 5538 }, { "epoch": 1.35, "learning_rate": 1.5438583376525676e-05, "loss": 0.1146, "step": 5540 }, { "epoch": 1.35, "learning_rate": 1.5435271576658e-05, "loss": 0.1392, "step": 5542 }, { "epoch": 1.35, "learning_rate": 1.543195893047698e-05, "loss": 0.1439, "step": 5544 }, { "epoch": 1.35, "learning_rate": 1.5428645438498416e-05, "loss": 0.1478, "step": 5546 }, { "epoch": 1.35, "learning_rate": 1.5425331101238246e-05, "loss": 0.1426, "step": 5548 }, { "epoch": 1.35, "learning_rate": 1.5422015919212538e-05, "loss": 0.1713, "step": 5550 }, { "epoch": 1.35, "learning_rate": 1.5418699892937494e-05, "loss": 0.1137, "step": 5552 }, { "epoch": 1.35, "learning_rate": 1.541538302292944e-05, "loss": 0.1055, "step": 5554 }, { "epoch": 1.35, "learning_rate": 1.541206530970484e-05, "loss": 0.1285, "step": 5556 }, { "epoch": 1.35, "learning_rate": 1.5408746753780288e-05, "loss": 0.1351, "step": 5558 }, { "epoch": 1.35, "learning_rate": 1.5405427355672506e-05, "loss": 0.1664, "step": 5560 }, { "epoch": 1.36, "learning_rate": 1.540210711589835e-05, "loss": 0.1539, "step": 5562 }, { "epoch": 1.36, "learning_rate": 1.539878603497481e-05, "loss": 0.1294, "step": 5564 }, { "epoch": 1.36, "learning_rate": 1.5395464113419003e-05, "loss": 0.1472, "step": 5566 }, { "epoch": 1.36, "learning_rate": 1.5392141351748175e-05, "loss": 0.1132, "step": 5568 }, { "epoch": 1.36, "learning_rate": 1.5388817750479706e-05, "loss": 0.1146, "step": 5570 }, { "epoch": 1.36, "learning_rate": 1.5385493310131106e-05, "loss": 0.1115, "step": 5572 }, { "epoch": 1.36, "learning_rate": 1.538216803122002e-05, "loss": 0.1531, "step": 5574 }, { "epoch": 1.36, "learning_rate": 1.5378841914264217e-05, "loss": 0.1595, "step": 5576 }, { "epoch": 1.36, "learning_rate": 1.537551495978159e-05, "loss": 0.0974, "step": 5578 }, { "epoch": 1.36, "learning_rate": 1.5372187168290186e-05, "loss": 0.1615, "step": 5580 }, { "epoch": 1.36, "learning_rate": 1.5368858540308158e-05, "loss": 0.1167, "step": 5582 }, { "epoch": 1.36, "learning_rate": 1.5365529076353803e-05, "loss": 0.1464, "step": 5584 }, { "epoch": 1.36, "learning_rate": 1.536219877694554e-05, "loss": 0.1424, "step": 5586 }, { "epoch": 1.36, "learning_rate": 1.5358867642601928e-05, "loss": 0.1117, "step": 5588 }, { "epoch": 1.36, "learning_rate": 1.5355535673841643e-05, "loss": 0.1558, "step": 5590 }, { "epoch": 1.36, "learning_rate": 1.5352202871183504e-05, "loss": 0.1282, "step": 5592 }, { "epoch": 1.36, "learning_rate": 1.534886923514645e-05, "loss": 0.1326, "step": 5594 }, { "epoch": 1.36, "learning_rate": 1.534553476624956e-05, "loss": 0.1486, "step": 5596 }, { "epoch": 1.36, "learning_rate": 1.5342199465012026e-05, "loss": 0.1142, "step": 5598 }, { "epoch": 1.36, "learning_rate": 1.5338863331953183e-05, "loss": 0.117, "step": 5600 }, { "epoch": 1.37, "learning_rate": 1.5335526367592503e-05, "loss": 0.1362, "step": 5602 }, { "epoch": 1.37, "learning_rate": 1.533218857244956e-05, "loss": 0.1052, "step": 5604 }, { "epoch": 1.37, "learning_rate": 1.5328849947044088e-05, "loss": 0.0941, "step": 5606 }, { "epoch": 1.37, "learning_rate": 1.5325510491895934e-05, "loss": 0.1076, "step": 5608 }, { "epoch": 1.37, "learning_rate": 1.532217020752507e-05, "loss": 0.1427, "step": 5610 }, { "epoch": 1.37, "learning_rate": 1.531882909445161e-05, "loss": 0.1112, "step": 5612 }, { "epoch": 1.37, "learning_rate": 1.531548715319579e-05, "loss": 0.1277, "step": 5614 }, { "epoch": 1.37, "learning_rate": 1.5312144384277966e-05, "loss": 0.0868, "step": 5616 }, { "epoch": 1.37, "learning_rate": 1.530880078821865e-05, "loss": 0.1269, "step": 5618 }, { "epoch": 1.37, "learning_rate": 1.5305456365538455e-05, "loss": 0.1594, "step": 5620 }, { "epoch": 1.37, "learning_rate": 1.5302111116758132e-05, "loss": 0.1352, "step": 5622 }, { "epoch": 1.37, "learning_rate": 1.5298765042398567e-05, "loss": 0.1203, "step": 5624 }, { "epoch": 1.37, "learning_rate": 1.5295418142980766e-05, "loss": 0.1415, "step": 5626 }, { "epoch": 1.37, "learning_rate": 1.5292070419025868e-05, "loss": 0.1195, "step": 5628 }, { "epoch": 1.37, "learning_rate": 1.528872187105514e-05, "loss": 0.1074, "step": 5630 }, { "epoch": 1.37, "learning_rate": 1.5285372499589978e-05, "loss": 0.1317, "step": 5632 }, { "epoch": 1.37, "learning_rate": 1.5282022305151897e-05, "loss": 0.1093, "step": 5634 }, { "epoch": 1.37, "learning_rate": 1.5278671288262558e-05, "loss": 0.1401, "step": 5636 }, { "epoch": 1.37, "learning_rate": 1.5275319449443733e-05, "loss": 0.1387, "step": 5638 }, { "epoch": 1.37, "learning_rate": 1.527196678921733e-05, "loss": 0.0884, "step": 5640 }, { "epoch": 1.37, "learning_rate": 1.526861330810539e-05, "loss": 0.1308, "step": 5642 }, { "epoch": 1.38, "learning_rate": 1.5265259006630065e-05, "loss": 0.0974, "step": 5644 }, { "epoch": 1.38, "learning_rate": 1.526190388531365e-05, "loss": 0.1652, "step": 5646 }, { "epoch": 1.38, "learning_rate": 1.525854794467857e-05, "loss": 0.1436, "step": 5648 }, { "epoch": 1.38, "learning_rate": 1.5255191185247362e-05, "loss": 0.1153, "step": 5650 }, { "epoch": 1.38, "learning_rate": 1.5251833607542703e-05, "loss": 0.1627, "step": 5652 }, { "epoch": 1.38, "learning_rate": 1.524847521208739e-05, "loss": 0.1208, "step": 5654 }, { "epoch": 1.38, "learning_rate": 1.5245115999404356e-05, "loss": 0.1362, "step": 5656 }, { "epoch": 1.38, "learning_rate": 1.5241755970016651e-05, "loss": 0.1521, "step": 5658 }, { "epoch": 1.38, "learning_rate": 1.523839512444746e-05, "loss": 0.119, "step": 5660 }, { "epoch": 1.38, "learning_rate": 1.5235033463220092e-05, "loss": 0.1285, "step": 5662 }, { "epoch": 1.38, "learning_rate": 1.5231670986857987e-05, "loss": 0.0939, "step": 5664 }, { "epoch": 1.38, "learning_rate": 1.5228307695884699e-05, "loss": 0.1077, "step": 5666 }, { "epoch": 1.38, "learning_rate": 1.5224943590823926e-05, "loss": 0.1158, "step": 5668 }, { "epoch": 1.38, "learning_rate": 1.5221578672199484e-05, "loss": 0.1278, "step": 5670 }, { "epoch": 1.38, "learning_rate": 1.5218212940535312e-05, "loss": 0.1345, "step": 5672 }, { "epoch": 1.38, "learning_rate": 1.5214846396355484e-05, "loss": 0.1321, "step": 5674 }, { "epoch": 1.38, "learning_rate": 1.5211479040184198e-05, "loss": 0.1213, "step": 5676 }, { "epoch": 1.38, "learning_rate": 1.5208110872545776e-05, "loss": 0.1339, "step": 5678 }, { "epoch": 1.38, "learning_rate": 1.5204741893964663e-05, "loss": 0.1485, "step": 5680 }, { "epoch": 1.38, "learning_rate": 1.5201372104965439e-05, "loss": 0.1245, "step": 5682 }, { "epoch": 1.38, "learning_rate": 1.5198001506072806e-05, "loss": 0.1426, "step": 5684 }, { "epoch": 1.39, "learning_rate": 1.5194630097811591e-05, "loss": 0.1319, "step": 5686 }, { "epoch": 1.39, "learning_rate": 1.519125788070675e-05, "loss": 0.1286, "step": 5688 }, { "epoch": 1.39, "learning_rate": 1.5187884855283358e-05, "loss": 0.1152, "step": 5690 }, { "epoch": 1.39, "learning_rate": 1.518451102206663e-05, "loss": 0.1348, "step": 5692 }, { "epoch": 1.39, "learning_rate": 1.5181136381581884e-05, "loss": 0.1375, "step": 5694 }, { "epoch": 1.39, "learning_rate": 1.5177760934354591e-05, "loss": 0.1217, "step": 5696 }, { "epoch": 1.39, "learning_rate": 1.5174384680910323e-05, "loss": 0.1631, "step": 5698 }, { "epoch": 1.39, "learning_rate": 1.5171007621774798e-05, "loss": 0.127, "step": 5700 }, { "epoch": 1.39, "learning_rate": 1.5167629757473843e-05, "loss": 0.1128, "step": 5702 }, { "epoch": 1.39, "learning_rate": 1.516425108853342e-05, "loss": 0.0953, "step": 5704 }, { "epoch": 1.39, "learning_rate": 1.5160871615479613e-05, "loss": 0.1366, "step": 5706 }, { "epoch": 1.39, "learning_rate": 1.5157491338838633e-05, "loss": 0.1395, "step": 5708 }, { "epoch": 1.39, "learning_rate": 1.5154110259136813e-05, "loss": 0.1358, "step": 5710 }, { "epoch": 1.39, "learning_rate": 1.5150728376900614e-05, "loss": 0.1308, "step": 5712 }, { "epoch": 1.39, "learning_rate": 1.5147345692656622e-05, "loss": 0.1258, "step": 5714 }, { "epoch": 1.39, "learning_rate": 1.5143962206931545e-05, "loss": 0.1314, "step": 5716 }, { "epoch": 1.39, "learning_rate": 1.5140577920252217e-05, "loss": 0.1048, "step": 5718 }, { "epoch": 1.39, "learning_rate": 1.5137192833145604e-05, "loss": 0.1118, "step": 5720 }, { "epoch": 1.39, "learning_rate": 1.5133806946138779e-05, "loss": 0.0972, "step": 5722 }, { "epoch": 1.39, "learning_rate": 1.5130420259758958e-05, "loss": 0.1365, "step": 5724 }, { "epoch": 1.4, "learning_rate": 1.5127032774533468e-05, "loss": 0.1101, "step": 5726 }, { "epoch": 1.4, "learning_rate": 1.5123644490989777e-05, "loss": 0.1262, "step": 5728 }, { "epoch": 1.4, "learning_rate": 1.5120255409655456e-05, "loss": 0.1442, "step": 5730 }, { "epoch": 1.4, "learning_rate": 1.5116865531058216e-05, "loss": 0.1052, "step": 5732 }, { "epoch": 1.4, "learning_rate": 1.5113474855725886e-05, "loss": 0.1113, "step": 5734 }, { "epoch": 1.4, "learning_rate": 1.511008338418642e-05, "loss": 0.1176, "step": 5736 }, { "epoch": 1.4, "learning_rate": 1.5106691116967895e-05, "loss": 0.1197, "step": 5738 }, { "epoch": 1.4, "learning_rate": 1.5103298054598515e-05, "loss": 0.1444, "step": 5740 }, { "epoch": 1.4, "learning_rate": 1.5099904197606605e-05, "loss": 0.0986, "step": 5742 }, { "epoch": 1.4, "learning_rate": 1.5096509546520614e-05, "loss": 0.1298, "step": 5744 }, { "epoch": 1.4, "learning_rate": 1.5093114101869115e-05, "loss": 0.1464, "step": 5746 }, { "epoch": 1.4, "learning_rate": 1.5089717864180804e-05, "loss": 0.0886, "step": 5748 }, { "epoch": 1.4, "learning_rate": 1.5086320833984504e-05, "loss": 0.1282, "step": 5750 }, { "epoch": 1.4, "learning_rate": 1.508292301180916e-05, "loss": 0.1222, "step": 5752 }, { "epoch": 1.4, "learning_rate": 1.5079524398183826e-05, "loss": 0.1261, "step": 5754 }, { "epoch": 1.4, "learning_rate": 1.5076124993637713e-05, "loss": 0.126, "step": 5756 }, { "epoch": 1.4, "learning_rate": 1.5072724798700119e-05, "loss": 0.078, "step": 5758 }, { "epoch": 1.4, "learning_rate": 1.506932381390048e-05, "loss": 0.1408, "step": 5760 }, { "epoch": 1.4, "learning_rate": 1.5065922039768364e-05, "loss": 0.1541, "step": 5762 }, { "epoch": 1.4, "learning_rate": 1.5062519476833451e-05, "loss": 0.1278, "step": 5764 }, { "epoch": 1.4, "learning_rate": 1.5059116125625538e-05, "loss": 0.1084, "step": 5766 }, { "epoch": 1.41, "learning_rate": 1.5055711986674564e-05, "loss": 0.0804, "step": 5768 }, { "epoch": 1.41, "learning_rate": 1.5052307060510574e-05, "loss": 0.0902, "step": 5770 }, { "epoch": 1.41, "learning_rate": 1.504890134766374e-05, "loss": 0.102, "step": 5772 }, { "epoch": 1.41, "learning_rate": 1.5045494848664359e-05, "loss": 0.1267, "step": 5774 }, { "epoch": 1.41, "learning_rate": 1.5042087564042849e-05, "loss": 0.1299, "step": 5776 }, { "epoch": 1.41, "learning_rate": 1.503867949432975e-05, "loss": 0.1404, "step": 5778 }, { "epoch": 1.41, "learning_rate": 1.5035270640055726e-05, "loss": 0.1019, "step": 5780 }, { "epoch": 1.41, "learning_rate": 1.5031861001751558e-05, "loss": 0.1227, "step": 5782 }, { "epoch": 1.41, "learning_rate": 1.502845057994816e-05, "loss": 0.1436, "step": 5784 }, { "epoch": 1.41, "learning_rate": 1.5025039375176552e-05, "loss": 0.1125, "step": 5786 }, { "epoch": 1.41, "learning_rate": 1.5021627387967889e-05, "loss": 0.1081, "step": 5788 }, { "epoch": 1.41, "learning_rate": 1.5018214618853443e-05, "loss": 0.0876, "step": 5790 }, { "epoch": 1.41, "learning_rate": 1.5014801068364609e-05, "loss": 0.1656, "step": 5792 }, { "epoch": 1.41, "learning_rate": 1.50113867370329e-05, "loss": 0.1332, "step": 5794 }, { "epoch": 1.41, "learning_rate": 1.5007971625389956e-05, "loss": 0.144, "step": 5796 }, { "epoch": 1.41, "learning_rate": 1.500455573396754e-05, "loss": 0.0871, "step": 5798 }, { "epoch": 1.41, "learning_rate": 1.5001139063297525e-05, "loss": 0.1082, "step": 5800 }, { "epoch": 1.41, "learning_rate": 1.4997721613911916e-05, "loss": 0.101, "step": 5802 }, { "epoch": 1.41, "learning_rate": 1.4994303386342837e-05, "loss": 0.1037, "step": 5804 }, { "epoch": 1.41, "learning_rate": 1.4990884381122527e-05, "loss": 0.1269, "step": 5806 }, { "epoch": 1.42, "learning_rate": 1.498746459878336e-05, "loss": 0.1443, "step": 5808 }, { "epoch": 1.42, "learning_rate": 1.4984044039857814e-05, "loss": 0.1405, "step": 5810 }, { "epoch": 1.42, "learning_rate": 1.4980622704878505e-05, "loss": 0.1277, "step": 5812 }, { "epoch": 1.42, "learning_rate": 1.4977200594378152e-05, "loss": 0.1255, "step": 5814 }, { "epoch": 1.42, "learning_rate": 1.4973777708889608e-05, "loss": 0.1199, "step": 5816 }, { "epoch": 1.42, "learning_rate": 1.497035404894584e-05, "loss": 0.1337, "step": 5818 }, { "epoch": 1.42, "learning_rate": 1.4966929615079946e-05, "loss": 0.1169, "step": 5820 }, { "epoch": 1.42, "learning_rate": 1.4963504407825125e-05, "loss": 0.1119, "step": 5822 }, { "epoch": 1.42, "learning_rate": 1.4960078427714715e-05, "loss": 0.1142, "step": 5824 }, { "epoch": 1.42, "learning_rate": 1.4956651675282165e-05, "loss": 0.0938, "step": 5826 }, { "epoch": 1.42, "learning_rate": 1.495322415106105e-05, "loss": 0.1182, "step": 5828 }, { "epoch": 1.42, "learning_rate": 1.4949795855585056e-05, "loss": 0.1009, "step": 5830 }, { "epoch": 1.42, "learning_rate": 1.4946366789387998e-05, "loss": 0.1079, "step": 5832 }, { "epoch": 1.42, "learning_rate": 1.494293695300381e-05, "loss": 0.1111, "step": 5834 }, { "epoch": 1.42, "learning_rate": 1.4939506346966541e-05, "loss": 0.1, "step": 5836 }, { "epoch": 1.42, "learning_rate": 1.4936074971810361e-05, "loss": 0.0966, "step": 5838 }, { "epoch": 1.42, "learning_rate": 1.4932642828069568e-05, "loss": 0.1247, "step": 5840 }, { "epoch": 1.42, "learning_rate": 1.4929209916278564e-05, "loss": 0.108, "step": 5842 }, { "epoch": 1.42, "learning_rate": 1.4925776236971885e-05, "loss": 0.0985, "step": 5844 }, { "epoch": 1.42, "learning_rate": 1.4922341790684182e-05, "loss": 0.1109, "step": 5846 }, { "epoch": 1.42, "learning_rate": 1.4918906577950223e-05, "loss": 0.1093, "step": 5848 }, { "epoch": 1.43, "learning_rate": 1.4915470599304893e-05, "loss": 0.1403, "step": 5850 }, { "epoch": 1.43, "learning_rate": 1.4912033855283205e-05, "loss": 0.1137, "step": 5852 }, { "epoch": 1.43, "learning_rate": 1.4908596346420288e-05, "loss": 0.1273, "step": 5854 }, { "epoch": 1.43, "learning_rate": 1.4905158073251385e-05, "loss": 0.1217, "step": 5856 }, { "epoch": 1.43, "learning_rate": 1.4901719036311857e-05, "loss": 0.1224, "step": 5858 }, { "epoch": 1.43, "learning_rate": 1.4898279236137199e-05, "loss": 0.1389, "step": 5860 }, { "epoch": 1.43, "learning_rate": 1.4894838673263006e-05, "loss": 0.1226, "step": 5862 }, { "epoch": 1.43, "learning_rate": 1.4891397348225004e-05, "loss": 0.1578, "step": 5864 }, { "epoch": 1.43, "learning_rate": 1.4887955261559028e-05, "loss": 0.1173, "step": 5866 }, { "epoch": 1.43, "learning_rate": 1.4884512413801049e-05, "loss": 0.0972, "step": 5868 }, { "epoch": 1.43, "learning_rate": 1.488106880548713e-05, "loss": 0.1213, "step": 5870 }, { "epoch": 1.43, "learning_rate": 1.4877624437153477e-05, "loss": 0.1422, "step": 5872 }, { "epoch": 1.43, "learning_rate": 1.4874179309336398e-05, "loss": 0.1236, "step": 5874 }, { "epoch": 1.43, "learning_rate": 1.4870733422572337e-05, "loss": 0.1105, "step": 5876 }, { "epoch": 1.43, "learning_rate": 1.4867286777397829e-05, "loss": 0.0917, "step": 5878 }, { "epoch": 1.43, "learning_rate": 1.4863839374349555e-05, "loss": 0.1134, "step": 5880 }, { "epoch": 1.43, "learning_rate": 1.48603912139643e-05, "loss": 0.1181, "step": 5882 }, { "epoch": 1.43, "learning_rate": 1.4856942296778967e-05, "loss": 0.1018, "step": 5884 }, { "epoch": 1.43, "learning_rate": 1.4853492623330574e-05, "loss": 0.1018, "step": 5886 }, { "epoch": 1.43, "learning_rate": 1.485004219415627e-05, "loss": 0.1067, "step": 5888 }, { "epoch": 1.44, "learning_rate": 1.4846591009793313e-05, "loss": 0.1152, "step": 5890 }, { "epoch": 1.44, "learning_rate": 1.4843139070779073e-05, "loss": 0.0953, "step": 5892 }, { "epoch": 1.44, "learning_rate": 1.4839686377651044e-05, "loss": 0.1135, "step": 5894 }, { "epoch": 1.44, "learning_rate": 1.4836232930946844e-05, "loss": 0.0802, "step": 5896 }, { "epoch": 1.44, "learning_rate": 1.483277873120419e-05, "loss": 0.1216, "step": 5898 }, { "epoch": 1.44, "learning_rate": 1.4829323778960933e-05, "loss": 0.1143, "step": 5900 }, { "epoch": 1.44, "learning_rate": 1.4825868074755037e-05, "loss": 0.1223, "step": 5902 }, { "epoch": 1.44, "learning_rate": 1.4822411619124581e-05, "loss": 0.1322, "step": 5904 }, { "epoch": 1.44, "learning_rate": 1.481895441260776e-05, "loss": 0.0934, "step": 5906 }, { "epoch": 1.44, "learning_rate": 1.4815496455742889e-05, "loss": 0.106, "step": 5908 }, { "epoch": 1.44, "learning_rate": 1.4812037749068396e-05, "loss": 0.0994, "step": 5910 }, { "epoch": 1.44, "learning_rate": 1.480857829312283e-05, "loss": 0.1217, "step": 5912 }, { "epoch": 1.44, "learning_rate": 1.4805118088444852e-05, "loss": 0.1033, "step": 5914 }, { "epoch": 1.44, "learning_rate": 1.480165713557325e-05, "loss": 0.1132, "step": 5916 }, { "epoch": 1.44, "learning_rate": 1.4798195435046911e-05, "loss": 0.1144, "step": 5918 }, { "epoch": 1.44, "learning_rate": 1.4794732987404853e-05, "loss": 0.1049, "step": 5920 }, { "epoch": 1.44, "learning_rate": 1.4791269793186204e-05, "loss": 0.0806, "step": 5922 }, { "epoch": 1.44, "learning_rate": 1.4787805852930216e-05, "loss": 0.132, "step": 5924 }, { "epoch": 1.44, "learning_rate": 1.4784341167176242e-05, "loss": 0.1218, "step": 5926 }, { "epoch": 1.44, "learning_rate": 1.4780875736463764e-05, "loss": 0.1071, "step": 5928 }, { "epoch": 1.44, "learning_rate": 1.4777409561332376e-05, "loss": 0.111, "step": 5930 }, { "epoch": 1.45, "learning_rate": 1.4773942642321792e-05, "loss": 0.1218, "step": 5932 }, { "epoch": 1.45, "learning_rate": 1.477047497997183e-05, "loss": 0.1157, "step": 5934 }, { "epoch": 1.45, "learning_rate": 1.4767006574822437e-05, "loss": 0.0967, "step": 5936 }, { "epoch": 1.45, "learning_rate": 1.4763537427413669e-05, "loss": 0.1028, "step": 5938 }, { "epoch": 1.45, "learning_rate": 1.4760067538285698e-05, "loss": 0.1493, "step": 5940 }, { "epoch": 1.45, "learning_rate": 1.475659690797881e-05, "loss": 0.1018, "step": 5942 }, { "epoch": 1.45, "learning_rate": 1.4753125537033415e-05, "loss": 0.1384, "step": 5944 }, { "epoch": 1.45, "learning_rate": 1.4749653425990027e-05, "loss": 0.0771, "step": 5946 }, { "epoch": 1.45, "learning_rate": 1.4746180575389284e-05, "loss": 0.1299, "step": 5948 }, { "epoch": 1.45, "learning_rate": 1.4742706985771928e-05, "loss": 0.1114, "step": 5950 }, { "epoch": 1.45, "learning_rate": 1.4739232657678832e-05, "loss": 0.0903, "step": 5952 }, { "epoch": 1.45, "learning_rate": 1.4735757591650972e-05, "loss": 0.1031, "step": 5954 }, { "epoch": 1.45, "learning_rate": 1.4732281788229443e-05, "loss": 0.1015, "step": 5956 }, { "epoch": 1.45, "learning_rate": 1.4728805247955448e-05, "loss": 0.1063, "step": 5958 }, { "epoch": 1.45, "learning_rate": 1.4725327971370325e-05, "loss": 0.1123, "step": 5960 }, { "epoch": 1.45, "learning_rate": 1.4721849959015498e-05, "loss": 0.0838, "step": 5962 }, { "epoch": 1.45, "learning_rate": 1.4718371211432527e-05, "loss": 0.1304, "step": 5964 }, { "epoch": 1.45, "learning_rate": 1.471489172916308e-05, "loss": 0.0669, "step": 5966 }, { "epoch": 1.45, "learning_rate": 1.4711411512748936e-05, "loss": 0.1024, "step": 5968 }, { "epoch": 1.45, "learning_rate": 1.4707930562731994e-05, "loss": 0.132, "step": 5970 }, { "epoch": 1.46, "learning_rate": 1.4704448879654264e-05, "loss": 0.1174, "step": 5972 }, { "epoch": 1.46, "learning_rate": 1.4700966464057868e-05, "loss": 0.0985, "step": 5974 }, { "epoch": 1.46, "learning_rate": 1.4697483316485048e-05, "loss": 0.1103, "step": 5976 }, { "epoch": 1.46, "learning_rate": 1.4693999437478158e-05, "loss": 0.1277, "step": 5978 }, { "epoch": 1.46, "learning_rate": 1.4690514827579658e-05, "loss": 0.1202, "step": 5980 }, { "epoch": 1.46, "learning_rate": 1.4687029487332137e-05, "loss": 0.1218, "step": 5982 }, { "epoch": 1.46, "learning_rate": 1.4683543417278281e-05, "loss": 0.1117, "step": 5984 }, { "epoch": 1.46, "learning_rate": 1.4680056617960903e-05, "loss": 0.1059, "step": 5986 }, { "epoch": 1.46, "learning_rate": 1.4676569089922923e-05, "loss": 0.1184, "step": 5988 }, { "epoch": 1.46, "learning_rate": 1.4673080833707376e-05, "loss": 0.093, "step": 5990 }, { "epoch": 1.46, "learning_rate": 1.4669591849857407e-05, "loss": 0.1105, "step": 5992 }, { "epoch": 1.46, "learning_rate": 1.4666102138916282e-05, "loss": 0.1102, "step": 5994 }, { "epoch": 1.46, "learning_rate": 1.4662611701427376e-05, "loss": 0.12, "step": 5996 }, { "epoch": 1.46, "learning_rate": 1.4659120537934173e-05, "loss": 0.0972, "step": 5998 }, { "epoch": 1.46, "learning_rate": 1.4655628648980273e-05, "loss": 0.1231, "step": 6000 }, { "epoch": 1.46, "learning_rate": 1.4652136035109394e-05, "loss": 0.1563, "step": 6002 }, { "epoch": 1.46, "learning_rate": 1.4648642696865363e-05, "loss": 0.088, "step": 6004 }, { "epoch": 1.46, "learning_rate": 1.4645148634792116e-05, "loss": 0.1007, "step": 6006 }, { "epoch": 1.46, "learning_rate": 1.4641653849433709e-05, "loss": 0.0866, "step": 6008 }, { "epoch": 1.46, "learning_rate": 1.4638158341334303e-05, "loss": 0.0987, "step": 6010 }, { "epoch": 1.46, "learning_rate": 1.4634662111038176e-05, "loss": 0.1255, "step": 6012 }, { "epoch": 1.47, "learning_rate": 1.463116515908972e-05, "loss": 0.0836, "step": 6014 }, { "epoch": 1.47, "learning_rate": 1.4627667486033435e-05, "loss": 0.1185, "step": 6016 }, { "epoch": 1.47, "learning_rate": 1.4624169092413935e-05, "loss": 0.1086, "step": 6018 }, { "epoch": 1.47, "learning_rate": 1.462066997877595e-05, "loss": 0.1047, "step": 6020 }, { "epoch": 1.47, "learning_rate": 1.4617170145664314e-05, "loss": 0.0858, "step": 6022 }, { "epoch": 1.47, "learning_rate": 1.4613669593623985e-05, "loss": 0.112, "step": 6024 }, { "epoch": 1.47, "learning_rate": 1.4610168323200017e-05, "loss": 0.1033, "step": 6026 }, { "epoch": 1.47, "learning_rate": 1.4606666334937589e-05, "loss": 0.1323, "step": 6028 }, { "epoch": 1.47, "learning_rate": 1.4603163629381988e-05, "loss": 0.1033, "step": 6030 }, { "epoch": 1.47, "learning_rate": 1.4599660207078614e-05, "loss": 0.1045, "step": 6032 }, { "epoch": 1.47, "learning_rate": 1.459615606857297e-05, "loss": 0.1361, "step": 6034 }, { "epoch": 1.47, "learning_rate": 1.4592651214410683e-05, "loss": 0.0967, "step": 6036 }, { "epoch": 1.47, "learning_rate": 1.4589145645137483e-05, "loss": 0.1178, "step": 6038 }, { "epoch": 1.47, "learning_rate": 1.4585639361299213e-05, "loss": 0.1041, "step": 6040 }, { "epoch": 1.47, "learning_rate": 1.4582132363441832e-05, "loss": 0.1004, "step": 6042 }, { "epoch": 1.47, "learning_rate": 1.4578624652111403e-05, "loss": 0.1071, "step": 6044 }, { "epoch": 1.47, "learning_rate": 1.4575116227854105e-05, "loss": 0.1068, "step": 6046 }, { "epoch": 1.47, "learning_rate": 1.4571607091216223e-05, "loss": 0.0852, "step": 6048 }, { "epoch": 1.47, "learning_rate": 1.4568097242744161e-05, "loss": 0.106, "step": 6050 }, { "epoch": 1.47, "learning_rate": 1.456458668298443e-05, "loss": 0.0901, "step": 6052 }, { "epoch": 1.48, "learning_rate": 1.4561075412483648e-05, "loss": 0.1098, "step": 6054 }, { "epoch": 1.48, "learning_rate": 1.4557563431788545e-05, "loss": 0.123, "step": 6056 }, { "epoch": 1.48, "learning_rate": 1.4554050741445967e-05, "loss": 0.0801, "step": 6058 }, { "epoch": 1.48, "learning_rate": 1.4550537342002865e-05, "loss": 0.0853, "step": 6060 }, { "epoch": 1.48, "learning_rate": 1.4547023234006304e-05, "loss": 0.1047, "step": 6062 }, { "epoch": 1.48, "learning_rate": 1.4543508418003452e-05, "loss": 0.1323, "step": 6064 }, { "epoch": 1.48, "learning_rate": 1.4539992894541602e-05, "loss": 0.0731, "step": 6066 }, { "epoch": 1.48, "learning_rate": 1.4536476664168137e-05, "loss": 0.132, "step": 6068 }, { "epoch": 1.48, "learning_rate": 1.4532959727430571e-05, "loss": 0.1445, "step": 6070 }, { "epoch": 1.48, "learning_rate": 1.4529442084876513e-05, "loss": 0.0888, "step": 6072 }, { "epoch": 1.48, "learning_rate": 1.4525923737053687e-05, "loss": 0.1083, "step": 6074 }, { "epoch": 1.48, "learning_rate": 1.4522404684509923e-05, "loss": 0.0778, "step": 6076 }, { "epoch": 1.48, "learning_rate": 1.4518884927793174e-05, "loss": 0.1409, "step": 6078 }, { "epoch": 1.48, "learning_rate": 1.4515364467451485e-05, "loss": 0.112, "step": 6080 }, { "epoch": 1.48, "learning_rate": 1.4511843304033022e-05, "loss": 0.0801, "step": 6082 }, { "epoch": 1.48, "learning_rate": 1.4508321438086052e-05, "loss": 0.0965, "step": 6084 }, { "epoch": 1.48, "learning_rate": 1.4504798870158964e-05, "loss": 0.0899, "step": 6086 }, { "epoch": 1.48, "learning_rate": 1.4501275600800244e-05, "loss": 0.0907, "step": 6088 }, { "epoch": 1.48, "learning_rate": 1.4497751630558497e-05, "loss": 0.0885, "step": 6090 }, { "epoch": 1.48, "learning_rate": 1.4494226959982423e-05, "loss": 0.0759, "step": 6092 }, { "epoch": 1.48, "learning_rate": 1.449070158962085e-05, "loss": 0.1384, "step": 6094 }, { "epoch": 1.49, "learning_rate": 1.4487175520022699e-05, "loss": 0.0863, "step": 6096 }, { "epoch": 1.49, "learning_rate": 1.4483648751737007e-05, "loss": 0.1147, "step": 6098 }, { "epoch": 1.49, "learning_rate": 1.4480121285312917e-05, "loss": 0.1112, "step": 6100 }, { "epoch": 1.49, "learning_rate": 1.447659312129969e-05, "loss": 0.1479, "step": 6102 }, { "epoch": 1.49, "learning_rate": 1.447306426024668e-05, "loss": 0.1094, "step": 6104 }, { "epoch": 1.49, "learning_rate": 1.4469534702703363e-05, "loss": 0.1542, "step": 6106 }, { "epoch": 1.49, "learning_rate": 1.4466004449219315e-05, "loss": 0.0746, "step": 6108 }, { "epoch": 1.49, "learning_rate": 1.4462473500344222e-05, "loss": 0.0985, "step": 6110 }, { "epoch": 1.49, "learning_rate": 1.4458941856627884e-05, "loss": 0.1136, "step": 6112 }, { "epoch": 1.49, "learning_rate": 1.4455409518620203e-05, "loss": 0.0918, "step": 6114 }, { "epoch": 1.49, "learning_rate": 1.445187648687119e-05, "loss": 0.1103, "step": 6116 }, { "epoch": 1.49, "learning_rate": 1.4448342761930966e-05, "loss": 0.0859, "step": 6118 }, { "epoch": 1.49, "learning_rate": 1.4444808344349758e-05, "loss": 0.0783, "step": 6120 }, { "epoch": 1.49, "learning_rate": 1.4441273234677908e-05, "loss": 0.1161, "step": 6122 }, { "epoch": 1.49, "learning_rate": 1.443773743346585e-05, "loss": 0.0838, "step": 6124 }, { "epoch": 1.49, "learning_rate": 1.4434200941264141e-05, "loss": 0.1051, "step": 6126 }, { "epoch": 1.49, "learning_rate": 1.4430663758623437e-05, "loss": 0.0745, "step": 6128 }, { "epoch": 1.49, "learning_rate": 1.4427125886094512e-05, "loss": 0.1197, "step": 6130 }, { "epoch": 1.49, "learning_rate": 1.4423587324228225e-05, "loss": 0.1153, "step": 6132 }, { "epoch": 1.49, "learning_rate": 1.4420048073575573e-05, "loss": 0.1036, "step": 6134 }, { "epoch": 1.5, "learning_rate": 1.4416508134687634e-05, "loss": 0.1269, "step": 6136 }, { "epoch": 1.5, "learning_rate": 1.441296750811561e-05, "loss": 0.0646, "step": 6138 }, { "epoch": 1.5, "learning_rate": 1.4409426194410796e-05, "loss": 0.1054, "step": 6140 }, { "epoch": 1.5, "learning_rate": 1.4405884194124608e-05, "loss": 0.0993, "step": 6142 }, { "epoch": 1.5, "learning_rate": 1.440234150780856e-05, "loss": 0.0915, "step": 6144 }, { "epoch": 1.5, "learning_rate": 1.439879813601428e-05, "loss": 0.0799, "step": 6146 }, { "epoch": 1.5, "learning_rate": 1.4395254079293488e-05, "loss": 0.0987, "step": 6148 }, { "epoch": 1.5, "learning_rate": 1.4391709338198032e-05, "loss": 0.1367, "step": 6150 }, { "epoch": 1.5, "learning_rate": 1.4388163913279849e-05, "loss": 0.0956, "step": 6152 }, { "epoch": 1.5, "learning_rate": 1.438461780509099e-05, "loss": 0.095, "step": 6154 }, { "epoch": 1.5, "learning_rate": 1.438107101418361e-05, "loss": 0.1123, "step": 6156 }, { "epoch": 1.5, "learning_rate": 1.4377523541109975e-05, "loss": 0.074, "step": 6158 }, { "epoch": 1.5, "learning_rate": 1.4373975386422448e-05, "loss": 0.1176, "step": 6160 }, { "epoch": 1.5, "learning_rate": 1.4370426550673507e-05, "loss": 0.1224, "step": 6162 }, { "epoch": 1.5, "learning_rate": 1.4366877034415736e-05, "loss": 0.1145, "step": 6164 }, { "epoch": 1.5, "learning_rate": 1.4363326838201817e-05, "loss": 0.0839, "step": 6166 }, { "epoch": 1.5, "learning_rate": 1.435977596258454e-05, "loss": 0.1347, "step": 6168 }, { "epoch": 1.5, "learning_rate": 1.4356224408116814e-05, "loss": 0.1113, "step": 6170 }, { "epoch": 1.5, "learning_rate": 1.4352672175351638e-05, "loss": 0.1176, "step": 6172 }, { "epoch": 1.5, "learning_rate": 1.4349119264842117e-05, "loss": 0.1102, "step": 6174 }, { "epoch": 1.5, "learning_rate": 1.4345565677141472e-05, "loss": 0.0892, "step": 6176 }, { "epoch": 1.51, "learning_rate": 1.434201141280302e-05, "loss": 0.1004, "step": 6178 }, { "epoch": 1.51, "learning_rate": 1.4338456472380193e-05, "loss": 0.0913, "step": 6180 }, { "epoch": 1.51, "learning_rate": 1.4334900856426516e-05, "loss": 0.0929, "step": 6182 }, { "epoch": 1.51, "learning_rate": 1.4331344565495628e-05, "loss": 0.0892, "step": 6184 }, { "epoch": 1.51, "learning_rate": 1.4327787600141274e-05, "loss": 0.0961, "step": 6186 }, { "epoch": 1.51, "learning_rate": 1.4324229960917293e-05, "loss": 0.0726, "step": 6188 }, { "epoch": 1.51, "learning_rate": 1.4320671648377645e-05, "loss": 0.086, "step": 6190 }, { "epoch": 1.51, "learning_rate": 1.4317112663076382e-05, "loss": 0.0861, "step": 6192 }, { "epoch": 1.51, "learning_rate": 1.4313553005567664e-05, "loss": 0.1012, "step": 6194 }, { "epoch": 1.51, "learning_rate": 1.430999267640576e-05, "loss": 0.0886, "step": 6196 }, { "epoch": 1.51, "learning_rate": 1.4306431676145038e-05, "loss": 0.0944, "step": 6198 }, { "epoch": 1.51, "learning_rate": 1.4302870005339975e-05, "loss": 0.1189, "step": 6200 }, { "epoch": 1.51, "learning_rate": 1.4299307664545152e-05, "loss": 0.0685, "step": 6202 }, { "epoch": 1.51, "learning_rate": 1.4295744654315247e-05, "loss": 0.0749, "step": 6204 }, { "epoch": 1.51, "learning_rate": 1.4292180975205052e-05, "loss": 0.092, "step": 6206 }, { "epoch": 1.51, "learning_rate": 1.4288616627769458e-05, "loss": 0.061, "step": 6208 }, { "epoch": 1.51, "learning_rate": 1.4285051612563462e-05, "loss": 0.0825, "step": 6210 }, { "epoch": 1.51, "learning_rate": 1.4281485930142163e-05, "loss": 0.0969, "step": 6212 }, { "epoch": 1.51, "learning_rate": 1.4277919581060771e-05, "loss": 0.0764, "step": 6214 }, { "epoch": 1.51, "learning_rate": 1.4274352565874581e-05, "loss": 0.1221, "step": 6216 }, { "epoch": 1.52, "learning_rate": 1.4270784885139015e-05, "loss": 0.1147, "step": 6218 }, { "epoch": 1.52, "learning_rate": 1.4267216539409588e-05, "loss": 0.1137, "step": 6220 }, { "epoch": 1.52, "learning_rate": 1.4263647529241914e-05, "loss": 0.1143, "step": 6222 }, { "epoch": 1.52, "learning_rate": 1.4260077855191714e-05, "loss": 0.1059, "step": 6224 }, { "epoch": 1.52, "learning_rate": 1.4256507517814818e-05, "loss": 0.0878, "step": 6226 }, { "epoch": 1.52, "learning_rate": 1.4252936517667156e-05, "loss": 0.1016, "step": 6228 }, { "epoch": 1.52, "learning_rate": 1.4249364855304755e-05, "loss": 0.1202, "step": 6230 }, { "epoch": 1.52, "learning_rate": 1.4245792531283754e-05, "loss": 0.1082, "step": 6232 }, { "epoch": 1.52, "learning_rate": 1.4242219546160388e-05, "loss": 0.1111, "step": 6234 }, { "epoch": 1.52, "learning_rate": 1.4238645900491e-05, "loss": 0.1022, "step": 6236 }, { "epoch": 1.52, "learning_rate": 1.4235071594832036e-05, "loss": 0.0812, "step": 6238 }, { "epoch": 1.52, "learning_rate": 1.4231496629740038e-05, "loss": 0.0813, "step": 6240 }, { "epoch": 1.52, "learning_rate": 1.4227921005771661e-05, "loss": 0.0885, "step": 6242 }, { "epoch": 1.52, "learning_rate": 1.4224344723483652e-05, "loss": 0.1035, "step": 6244 }, { "epoch": 1.52, "learning_rate": 1.4220767783432868e-05, "loss": 0.1093, "step": 6246 }, { "epoch": 1.52, "learning_rate": 1.4217190186176266e-05, "loss": 0.1197, "step": 6248 }, { "epoch": 1.52, "learning_rate": 1.4213611932270903e-05, "loss": 0.1098, "step": 6250 }, { "epoch": 1.52, "learning_rate": 1.421003302227394e-05, "loss": 0.1052, "step": 6252 }, { "epoch": 1.52, "learning_rate": 1.4206453456742649e-05, "loss": 0.0923, "step": 6254 }, { "epoch": 1.52, "learning_rate": 1.4202873236234384e-05, "loss": 0.116, "step": 6256 }, { "epoch": 1.52, "learning_rate": 1.4199292361306623e-05, "loss": 0.0929, "step": 6258 }, { "epoch": 1.53, "learning_rate": 1.4195710832516926e-05, "loss": 0.0794, "step": 6260 }, { "epoch": 1.53, "learning_rate": 1.4192128650422974e-05, "loss": 0.0726, "step": 6262 }, { "epoch": 1.53, "learning_rate": 1.4188545815582532e-05, "loss": 0.1018, "step": 6264 }, { "epoch": 1.53, "learning_rate": 1.418496232855348e-05, "loss": 0.088, "step": 6266 }, { "epoch": 1.53, "learning_rate": 1.4181378189893788e-05, "loss": 0.1082, "step": 6268 }, { "epoch": 1.53, "learning_rate": 1.4177793400161545e-05, "loss": 0.0993, "step": 6270 }, { "epoch": 1.53, "learning_rate": 1.4174207959914919e-05, "loss": 0.0931, "step": 6272 }, { "epoch": 1.53, "learning_rate": 1.4170621869712197e-05, "loss": 0.1065, "step": 6274 }, { "epoch": 1.53, "learning_rate": 1.4167035130111758e-05, "loss": 0.0877, "step": 6276 }, { "epoch": 1.53, "learning_rate": 1.4163447741672087e-05, "loss": 0.1107, "step": 6278 }, { "epoch": 1.53, "learning_rate": 1.4159859704951762e-05, "loss": 0.0969, "step": 6280 }, { "epoch": 1.53, "learning_rate": 1.4156271020509476e-05, "loss": 0.1076, "step": 6282 }, { "epoch": 1.53, "learning_rate": 1.4152681688904009e-05, "loss": 0.0982, "step": 6284 }, { "epoch": 1.53, "learning_rate": 1.4149091710694246e-05, "loss": 0.1082, "step": 6286 }, { "epoch": 1.53, "learning_rate": 1.414550108643918e-05, "loss": 0.0902, "step": 6288 }, { "epoch": 1.53, "learning_rate": 1.4141909816697896e-05, "loss": 0.1011, "step": 6290 }, { "epoch": 1.53, "learning_rate": 1.4138317902029582e-05, "loss": 0.0917, "step": 6292 }, { "epoch": 1.53, "learning_rate": 1.4134725342993524e-05, "loss": 0.1049, "step": 6294 }, { "epoch": 1.53, "learning_rate": 1.4131132140149114e-05, "loss": 0.1184, "step": 6296 }, { "epoch": 1.53, "learning_rate": 1.4127538294055845e-05, "loss": 0.1051, "step": 6298 }, { "epoch": 1.54, "learning_rate": 1.4123943805273298e-05, "loss": 0.1399, "step": 6300 }, { "epoch": 1.54, "learning_rate": 1.4120348674361167e-05, "loss": 0.118, "step": 6302 }, { "epoch": 1.54, "learning_rate": 1.4116752901879236e-05, "loss": 0.1346, "step": 6304 }, { "epoch": 1.54, "learning_rate": 1.4113156488387405e-05, "loss": 0.1015, "step": 6306 }, { "epoch": 1.54, "learning_rate": 1.4109559434445652e-05, "loss": 0.0904, "step": 6308 }, { "epoch": 1.54, "learning_rate": 1.4105961740614076e-05, "loss": 0.0995, "step": 6310 }, { "epoch": 1.54, "learning_rate": 1.4102363407452857e-05, "loss": 0.0938, "step": 6312 }, { "epoch": 1.54, "learning_rate": 1.4098764435522288e-05, "loss": 0.0961, "step": 6314 }, { "epoch": 1.54, "learning_rate": 1.409516482538275e-05, "loss": 0.1001, "step": 6316 }, { "epoch": 1.54, "learning_rate": 1.4091564577594739e-05, "loss": 0.0802, "step": 6318 }, { "epoch": 1.54, "learning_rate": 1.4087963692718833e-05, "loss": 0.0751, "step": 6320 }, { "epoch": 1.54, "learning_rate": 1.4084362171315723e-05, "loss": 0.0737, "step": 6322 }, { "epoch": 1.54, "learning_rate": 1.408076001394619e-05, "loss": 0.1007, "step": 6324 }, { "epoch": 1.54, "learning_rate": 1.4077157221171121e-05, "loss": 0.0879, "step": 6326 }, { "epoch": 1.54, "learning_rate": 1.4073553793551495e-05, "loss": 0.079, "step": 6328 }, { "epoch": 1.54, "learning_rate": 1.4069949731648394e-05, "loss": 0.1108, "step": 6330 }, { "epoch": 1.54, "learning_rate": 1.4066345036022998e-05, "loss": 0.066, "step": 6332 }, { "epoch": 1.54, "learning_rate": 1.4062739707236588e-05, "loss": 0.1281, "step": 6334 }, { "epoch": 1.54, "learning_rate": 1.4059133745850534e-05, "loss": 0.1125, "step": 6336 }, { "epoch": 1.54, "learning_rate": 1.4055527152426323e-05, "loss": 0.0769, "step": 6338 }, { "epoch": 1.54, "learning_rate": 1.4051919927525521e-05, "loss": 0.1046, "step": 6340 }, { "epoch": 1.55, "learning_rate": 1.4048312071709803e-05, "loss": 0.0938, "step": 6342 }, { "epoch": 1.55, "learning_rate": 1.4044703585540935e-05, "loss": 0.0938, "step": 6344 }, { "epoch": 1.55, "learning_rate": 1.4041094469580796e-05, "loss": 0.0689, "step": 6346 }, { "epoch": 1.55, "learning_rate": 1.4037484724391345e-05, "loss": 0.0901, "step": 6348 }, { "epoch": 1.55, "learning_rate": 1.4033874350534648e-05, "loss": 0.0418, "step": 6350 }, { "epoch": 1.55, "learning_rate": 1.403026334857287e-05, "loss": 0.0833, "step": 6352 }, { "epoch": 1.55, "learning_rate": 1.4026651719068271e-05, "loss": 0.0939, "step": 6354 }, { "epoch": 1.55, "learning_rate": 1.402303946258321e-05, "loss": 0.1081, "step": 6356 }, { "epoch": 1.55, "learning_rate": 1.4019426579680143e-05, "loss": 0.0756, "step": 6358 }, { "epoch": 1.55, "learning_rate": 1.4015813070921617e-05, "loss": 0.1146, "step": 6360 }, { "epoch": 1.55, "learning_rate": 1.4012198936870294e-05, "loss": 0.1008, "step": 6362 }, { "epoch": 1.55, "learning_rate": 1.4008584178088914e-05, "loss": 0.0906, "step": 6364 }, { "epoch": 1.55, "learning_rate": 1.4004968795140324e-05, "loss": 0.0532, "step": 6366 }, { "epoch": 1.55, "learning_rate": 1.400135278858747e-05, "loss": 0.0909, "step": 6368 }, { "epoch": 1.55, "learning_rate": 1.3997736158993388e-05, "loss": 0.0986, "step": 6370 }, { "epoch": 1.55, "learning_rate": 1.3994118906921219e-05, "loss": 0.1116, "step": 6372 }, { "epoch": 1.55, "learning_rate": 1.3990501032934193e-05, "loss": 0.0664, "step": 6374 }, { "epoch": 1.55, "learning_rate": 1.3986882537595646e-05, "loss": 0.0952, "step": 6376 }, { "epoch": 1.55, "learning_rate": 1.3983263421468998e-05, "loss": 0.1138, "step": 6378 }, { "epoch": 1.55, "learning_rate": 1.3979643685117775e-05, "loss": 0.0898, "step": 6380 }, { "epoch": 1.56, "learning_rate": 1.3976023329105601e-05, "loss": 0.0842, "step": 6382 }, { "epoch": 1.56, "learning_rate": 1.3972402353996193e-05, "loss": 0.0735, "step": 6384 }, { "epoch": 1.56, "learning_rate": 1.396878076035336e-05, "loss": 0.1043, "step": 6386 }, { "epoch": 1.56, "learning_rate": 1.3965158548741016e-05, "loss": 0.1062, "step": 6388 }, { "epoch": 1.56, "learning_rate": 1.3961535719723168e-05, "loss": 0.0908, "step": 6390 }, { "epoch": 1.56, "learning_rate": 1.3957912273863912e-05, "loss": 0.0975, "step": 6392 }, { "epoch": 1.56, "learning_rate": 1.3954288211727454e-05, "loss": 0.0786, "step": 6394 }, { "epoch": 1.56, "learning_rate": 1.3950663533878084e-05, "loss": 0.0687, "step": 6396 }, { "epoch": 1.56, "learning_rate": 1.3947038240880191e-05, "loss": 0.1054, "step": 6398 }, { "epoch": 1.56, "learning_rate": 1.3943412333298261e-05, "loss": 0.0816, "step": 6400 }, { "epoch": 1.56, "learning_rate": 1.3939785811696878e-05, "loss": 0.1035, "step": 6402 }, { "epoch": 1.56, "learning_rate": 1.3936158676640719e-05, "loss": 0.0964, "step": 6404 }, { "epoch": 1.56, "learning_rate": 1.3932530928694555e-05, "loss": 0.0817, "step": 6406 }, { "epoch": 1.56, "learning_rate": 1.3928902568423252e-05, "loss": 0.0969, "step": 6408 }, { "epoch": 1.56, "learning_rate": 1.392527359639178e-05, "loss": 0.1049, "step": 6410 }, { "epoch": 1.56, "learning_rate": 1.3921644013165192e-05, "loss": 0.0704, "step": 6412 }, { "epoch": 1.56, "learning_rate": 1.3918013819308644e-05, "loss": 0.0889, "step": 6414 }, { "epoch": 1.56, "learning_rate": 1.3914383015387382e-05, "loss": 0.1163, "step": 6416 }, { "epoch": 1.56, "learning_rate": 1.391075160196676e-05, "loss": 0.0687, "step": 6418 }, { "epoch": 1.56, "learning_rate": 1.3907119579612203e-05, "loss": 0.1149, "step": 6420 }, { "epoch": 1.56, "learning_rate": 1.390348694888925e-05, "loss": 0.1164, "step": 6422 }, { "epoch": 1.57, "learning_rate": 1.3899853710363536e-05, "loss": 0.091, "step": 6424 }, { "epoch": 1.57, "learning_rate": 1.3896219864600778e-05, "loss": 0.059, "step": 6426 }, { "epoch": 1.57, "learning_rate": 1.3892585412166788e-05, "loss": 0.0787, "step": 6428 }, { "epoch": 1.57, "learning_rate": 1.3888950353627489e-05, "loss": 0.1243, "step": 6430 }, { "epoch": 1.57, "learning_rate": 1.388531468954888e-05, "loss": 0.0921, "step": 6432 }, { "epoch": 1.57, "learning_rate": 1.3881678420497067e-05, "loss": 0.1013, "step": 6434 }, { "epoch": 1.57, "learning_rate": 1.3878041547038238e-05, "loss": 0.0861, "step": 6436 }, { "epoch": 1.57, "learning_rate": 1.3874404069738692e-05, "loss": 0.1073, "step": 6438 }, { "epoch": 1.57, "learning_rate": 1.38707659891648e-05, "loss": 0.0689, "step": 6440 }, { "epoch": 1.57, "learning_rate": 1.386712730588305e-05, "loss": 0.0693, "step": 6442 }, { "epoch": 1.57, "learning_rate": 1.3863488020460004e-05, "loss": 0.108, "step": 6444 }, { "epoch": 1.57, "learning_rate": 1.3859848133462335e-05, "loss": 0.0884, "step": 6446 }, { "epoch": 1.57, "learning_rate": 1.3856207645456792e-05, "loss": 0.0763, "step": 6448 }, { "epoch": 1.57, "learning_rate": 1.3852566557010234e-05, "loss": 0.0818, "step": 6450 }, { "epoch": 1.57, "learning_rate": 1.3848924868689604e-05, "loss": 0.1001, "step": 6452 }, { "epoch": 1.57, "learning_rate": 1.384528258106194e-05, "loss": 0.0767, "step": 6454 }, { "epoch": 1.57, "learning_rate": 1.3841639694694373e-05, "loss": 0.1017, "step": 6456 }, { "epoch": 1.57, "learning_rate": 1.3837996210154132e-05, "loss": 0.0771, "step": 6458 }, { "epoch": 1.57, "learning_rate": 1.3834352128008533e-05, "loss": 0.1086, "step": 6460 }, { "epoch": 1.57, "learning_rate": 1.383070744882499e-05, "loss": 0.0926, "step": 6462 }, { "epoch": 1.58, "learning_rate": 1.3827062173170998e-05, "loss": 0.0757, "step": 6464 }, { "epoch": 1.58, "learning_rate": 1.3823416301614168e-05, "loss": 0.0725, "step": 6466 }, { "epoch": 1.58, "learning_rate": 1.381976983472218e-05, "loss": 0.1125, "step": 6468 }, { "epoch": 1.58, "learning_rate": 1.3816122773062824e-05, "loss": 0.1036, "step": 6470 }, { "epoch": 1.58, "learning_rate": 1.3812475117203968e-05, "loss": 0.099, "step": 6472 }, { "epoch": 1.58, "learning_rate": 1.3808826867713587e-05, "loss": 0.1099, "step": 6474 }, { "epoch": 1.58, "learning_rate": 1.3805178025159734e-05, "loss": 0.1017, "step": 6476 }, { "epoch": 1.58, "learning_rate": 1.3801528590110566e-05, "loss": 0.0859, "step": 6478 }, { "epoch": 1.58, "learning_rate": 1.3797878563134327e-05, "loss": 0.0944, "step": 6480 }, { "epoch": 1.58, "learning_rate": 1.3794227944799362e-05, "loss": 0.1067, "step": 6482 }, { "epoch": 1.58, "learning_rate": 1.3790576735674085e-05, "loss": 0.0733, "step": 6484 }, { "epoch": 1.58, "learning_rate": 1.378692493632703e-05, "loss": 0.0918, "step": 6486 }, { "epoch": 1.58, "learning_rate": 1.37832725473268e-05, "loss": 0.0913, "step": 6488 }, { "epoch": 1.58, "learning_rate": 1.3779619569242109e-05, "loss": 0.1443, "step": 6490 }, { "epoch": 1.58, "learning_rate": 1.3775966002641747e-05, "loss": 0.0941, "step": 6492 }, { "epoch": 1.58, "learning_rate": 1.3772311848094607e-05, "loss": 0.1119, "step": 6494 }, { "epoch": 1.58, "learning_rate": 1.3768657106169668e-05, "loss": 0.0961, "step": 6496 }, { "epoch": 1.58, "learning_rate": 1.3765001777435996e-05, "loss": 0.1158, "step": 6498 }, { "epoch": 1.58, "learning_rate": 1.376134586246276e-05, "loss": 0.0841, "step": 6500 }, { "epoch": 1.58, "learning_rate": 1.3757689361819209e-05, "loss": 0.0953, "step": 6502 }, { "epoch": 1.58, "learning_rate": 1.3754032276074695e-05, "loss": 0.0758, "step": 6504 }, { "epoch": 1.59, "learning_rate": 1.3750374605798647e-05, "loss": 0.1309, "step": 6506 }, { "epoch": 1.59, "learning_rate": 1.3746716351560597e-05, "loss": 0.1379, "step": 6508 }, { "epoch": 1.59, "learning_rate": 1.3743057513930161e-05, "loss": 0.0891, "step": 6510 }, { "epoch": 1.59, "learning_rate": 1.3739398093477045e-05, "loss": 0.0682, "step": 6512 }, { "epoch": 1.59, "learning_rate": 1.3735738090771053e-05, "loss": 0.0714, "step": 6514 }, { "epoch": 1.59, "learning_rate": 1.3732077506382075e-05, "loss": 0.0855, "step": 6516 }, { "epoch": 1.59, "learning_rate": 1.3728416340880093e-05, "loss": 0.0952, "step": 6518 }, { "epoch": 1.59, "learning_rate": 1.3724754594835178e-05, "loss": 0.0899, "step": 6520 }, { "epoch": 1.59, "learning_rate": 1.3721092268817484e-05, "loss": 0.0974, "step": 6522 }, { "epoch": 1.59, "learning_rate": 1.3717429363397276e-05, "loss": 0.075, "step": 6524 }, { "epoch": 1.59, "learning_rate": 1.3713765879144886e-05, "loss": 0.103, "step": 6526 }, { "epoch": 1.59, "learning_rate": 1.3710101816630755e-05, "loss": 0.0908, "step": 6528 }, { "epoch": 1.59, "learning_rate": 1.3706437176425399e-05, "loss": 0.0942, "step": 6530 }, { "epoch": 1.59, "learning_rate": 1.3702771959099434e-05, "loss": 0.1166, "step": 6532 }, { "epoch": 1.59, "learning_rate": 1.3699106165223558e-05, "loss": 0.0825, "step": 6534 }, { "epoch": 1.59, "learning_rate": 1.369543979536857e-05, "loss": 0.0648, "step": 6536 }, { "epoch": 1.59, "learning_rate": 1.3691772850105348e-05, "loss": 0.115, "step": 6538 }, { "epoch": 1.59, "learning_rate": 1.3688105330004862e-05, "loss": 0.0745, "step": 6540 }, { "epoch": 1.59, "learning_rate": 1.3684437235638173e-05, "loss": 0.111, "step": 6542 }, { "epoch": 1.59, "learning_rate": 1.3680768567576434e-05, "loss": 0.1024, "step": 6544 }, { "epoch": 1.6, "learning_rate": 1.3677099326390886e-05, "loss": 0.1281, "step": 6546 }, { "epoch": 1.6, "learning_rate": 1.3673429512652854e-05, "loss": 0.0773, "step": 6548 }, { "epoch": 1.6, "learning_rate": 1.3669759126933754e-05, "loss": 0.0829, "step": 6550 }, { "epoch": 1.6, "learning_rate": 1.3666088169805102e-05, "loss": 0.0926, "step": 6552 }, { "epoch": 1.6, "learning_rate": 1.3662416641838486e-05, "loss": 0.077, "step": 6554 }, { "epoch": 1.6, "learning_rate": 1.3658744543605594e-05, "loss": 0.0866, "step": 6556 }, { "epoch": 1.6, "learning_rate": 1.3655071875678197e-05, "loss": 0.0599, "step": 6558 }, { "epoch": 1.6, "learning_rate": 1.3651398638628164e-05, "loss": 0.1072, "step": 6560 }, { "epoch": 1.6, "learning_rate": 1.3647724833027437e-05, "loss": 0.1026, "step": 6562 }, { "epoch": 1.6, "learning_rate": 1.3644050459448066e-05, "loss": 0.1246, "step": 6564 }, { "epoch": 1.6, "learning_rate": 1.364037551846217e-05, "loss": 0.1003, "step": 6566 }, { "epoch": 1.6, "learning_rate": 1.3636700010641972e-05, "loss": 0.0968, "step": 6568 }, { "epoch": 1.6, "learning_rate": 1.363302393655977e-05, "loss": 0.0981, "step": 6570 }, { "epoch": 1.6, "learning_rate": 1.3629347296787963e-05, "loss": 0.09, "step": 6572 }, { "epoch": 1.6, "learning_rate": 1.362567009189903e-05, "loss": 0.0695, "step": 6574 }, { "epoch": 1.6, "learning_rate": 1.362199232246554e-05, "loss": 0.1228, "step": 6576 }, { "epoch": 1.6, "learning_rate": 1.3618313989060146e-05, "loss": 0.0869, "step": 6578 }, { "epoch": 1.6, "learning_rate": 1.3614635092255597e-05, "loss": 0.0724, "step": 6580 }, { "epoch": 1.6, "learning_rate": 1.3610955632624726e-05, "loss": 0.1333, "step": 6582 }, { "epoch": 1.6, "learning_rate": 1.3607275610740451e-05, "loss": 0.0725, "step": 6584 }, { "epoch": 1.6, "learning_rate": 1.3603595027175777e-05, "loss": 0.0734, "step": 6586 }, { "epoch": 1.61, "learning_rate": 1.3599913882503808e-05, "loss": 0.0788, "step": 6588 }, { "epoch": 1.61, "learning_rate": 1.3596232177297714e-05, "loss": 0.0734, "step": 6590 }, { "epoch": 1.61, "learning_rate": 1.3592549912130775e-05, "loss": 0.0769, "step": 6592 }, { "epoch": 1.61, "learning_rate": 1.358886708757634e-05, "loss": 0.1177, "step": 6594 }, { "epoch": 1.61, "learning_rate": 1.3585183704207861e-05, "loss": 0.0876, "step": 6596 }, { "epoch": 1.61, "learning_rate": 1.358149976259886e-05, "loss": 0.0744, "step": 6598 }, { "epoch": 1.61, "learning_rate": 1.3577815263322962e-05, "loss": 0.0821, "step": 6600 }, { "epoch": 1.61, "learning_rate": 1.357413020695387e-05, "loss": 0.0667, "step": 6602 }, { "epoch": 1.61, "learning_rate": 1.3570444594065375e-05, "loss": 0.0631, "step": 6604 }, { "epoch": 1.61, "learning_rate": 1.356675842523135e-05, "loss": 0.0886, "step": 6606 }, { "epoch": 1.61, "learning_rate": 1.356307170102577e-05, "loss": 0.0796, "step": 6608 }, { "epoch": 1.61, "learning_rate": 1.3559384422022677e-05, "loss": 0.0657, "step": 6610 }, { "epoch": 1.61, "learning_rate": 1.3555696588796214e-05, "loss": 0.0681, "step": 6612 }, { "epoch": 1.61, "learning_rate": 1.3552008201920602e-05, "loss": 0.0894, "step": 6614 }, { "epoch": 1.61, "learning_rate": 1.3548319261970152e-05, "loss": 0.0864, "step": 6616 }, { "epoch": 1.61, "learning_rate": 1.354462976951926e-05, "loss": 0.0782, "step": 6618 }, { "epoch": 1.61, "learning_rate": 1.354093972514241e-05, "loss": 0.0908, "step": 6620 }, { "epoch": 1.61, "learning_rate": 1.3537249129414166e-05, "loss": 0.0792, "step": 6622 }, { "epoch": 1.61, "learning_rate": 1.353355798290919e-05, "loss": 0.0967, "step": 6624 }, { "epoch": 1.61, "learning_rate": 1.3529866286202209e-05, "loss": 0.1142, "step": 6626 }, { "epoch": 1.62, "learning_rate": 1.3526174039868059e-05, "loss": 0.0679, "step": 6628 }, { "epoch": 1.62, "learning_rate": 1.3522481244481647e-05, "loss": 0.0894, "step": 6630 }, { "epoch": 1.62, "learning_rate": 1.351878790061797e-05, "loss": 0.0971, "step": 6632 }, { "epoch": 1.62, "learning_rate": 1.351509400885211e-05, "loss": 0.0876, "step": 6634 }, { "epoch": 1.62, "learning_rate": 1.3511399569759234e-05, "loss": 0.064, "step": 6636 }, { "epoch": 1.62, "learning_rate": 1.3507704583914594e-05, "loss": 0.0719, "step": 6638 }, { "epoch": 1.62, "learning_rate": 1.3504009051893529e-05, "loss": 0.101, "step": 6640 }, { "epoch": 1.62, "learning_rate": 1.350031297427146e-05, "loss": 0.0776, "step": 6642 }, { "epoch": 1.62, "learning_rate": 1.3496616351623898e-05, "loss": 0.0993, "step": 6644 }, { "epoch": 1.62, "learning_rate": 1.349291918452643e-05, "loss": 0.0617, "step": 6646 }, { "epoch": 1.62, "learning_rate": 1.3489221473554735e-05, "loss": 0.1, "step": 6648 }, { "epoch": 1.62, "learning_rate": 1.3485523219284578e-05, "loss": 0.0901, "step": 6650 }, { "epoch": 1.62, "learning_rate": 1.34818244222918e-05, "loss": 0.088, "step": 6652 }, { "epoch": 1.62, "learning_rate": 1.3478125083152337e-05, "loss": 0.0493, "step": 6654 }, { "epoch": 1.62, "learning_rate": 1.3474425202442204e-05, "loss": 0.0586, "step": 6656 }, { "epoch": 1.62, "learning_rate": 1.3470724780737498e-05, "loss": 0.0762, "step": 6658 }, { "epoch": 1.62, "learning_rate": 1.3467023818614404e-05, "loss": 0.0733, "step": 6660 }, { "epoch": 1.62, "learning_rate": 1.346332231664919e-05, "loss": 0.0833, "step": 6662 }, { "epoch": 1.62, "learning_rate": 1.345962027541821e-05, "loss": 0.053, "step": 6664 }, { "epoch": 1.62, "learning_rate": 1.3455917695497898e-05, "loss": 0.097, "step": 6666 }, { "epoch": 1.62, "learning_rate": 1.3452214577464772e-05, "loss": 0.065, "step": 6668 }, { "epoch": 1.63, "learning_rate": 1.3448510921895441e-05, "loss": 0.0837, "step": 6670 }, { "epoch": 1.63, "learning_rate": 1.3444806729366593e-05, "loss": 0.0551, "step": 6672 }, { "epoch": 1.63, "learning_rate": 1.344110200045499e-05, "loss": 0.0591, "step": 6674 }, { "epoch": 1.63, "learning_rate": 1.3437396735737495e-05, "loss": 0.0876, "step": 6676 }, { "epoch": 1.63, "learning_rate": 1.3433690935791045e-05, "loss": 0.084, "step": 6678 }, { "epoch": 1.63, "learning_rate": 1.342998460119266e-05, "loss": 0.0704, "step": 6680 }, { "epoch": 1.63, "learning_rate": 1.3426277732519442e-05, "loss": 0.0588, "step": 6682 }, { "epoch": 1.63, "learning_rate": 1.3422570330348583e-05, "loss": 0.0687, "step": 6684 }, { "epoch": 1.63, "learning_rate": 1.3418862395257353e-05, "loss": 0.0762, "step": 6686 }, { "epoch": 1.63, "learning_rate": 1.3415153927823105e-05, "loss": 0.0834, "step": 6688 }, { "epoch": 1.63, "learning_rate": 1.3411444928623274e-05, "loss": 0.0674, "step": 6690 }, { "epoch": 1.63, "learning_rate": 1.3407735398235384e-05, "loss": 0.0903, "step": 6692 }, { "epoch": 1.63, "learning_rate": 1.3404025337237033e-05, "loss": 0.0852, "step": 6694 }, { "epoch": 1.63, "learning_rate": 1.3400314746205909e-05, "loss": 0.0731, "step": 6696 }, { "epoch": 1.63, "learning_rate": 1.3396603625719777e-05, "loss": 0.0913, "step": 6698 }, { "epoch": 1.63, "learning_rate": 1.339289197635649e-05, "loss": 0.0917, "step": 6700 }, { "epoch": 1.63, "learning_rate": 1.3389179798693974e-05, "loss": 0.0653, "step": 6702 }, { "epoch": 1.63, "learning_rate": 1.3385467093310251e-05, "loss": 0.1114, "step": 6704 }, { "epoch": 1.63, "learning_rate": 1.338175386078341e-05, "loss": 0.1016, "step": 6706 }, { "epoch": 1.63, "learning_rate": 1.337804010169164e-05, "loss": 0.0744, "step": 6708 }, { "epoch": 1.63, "learning_rate": 1.3374325816613189e-05, "loss": 0.0955, "step": 6710 }, { "epoch": 1.64, "learning_rate": 1.337061100612641e-05, "loss": 0.1176, "step": 6712 }, { "epoch": 1.64, "learning_rate": 1.3366895670809723e-05, "loss": 0.0606, "step": 6714 }, { "epoch": 1.64, "learning_rate": 1.3363179811241635e-05, "loss": 0.0663, "step": 6716 }, { "epoch": 1.64, "learning_rate": 1.3359463428000732e-05, "loss": 0.0803, "step": 6718 }, { "epoch": 1.64, "learning_rate": 1.3355746521665685e-05, "loss": 0.0724, "step": 6720 }, { "epoch": 1.64, "learning_rate": 1.3352029092815244e-05, "loss": 0.0864, "step": 6722 }, { "epoch": 1.64, "learning_rate": 1.3348311142028242e-05, "loss": 0.0666, "step": 6724 }, { "epoch": 1.64, "learning_rate": 1.3344592669883591e-05, "loss": 0.1026, "step": 6726 }, { "epoch": 1.64, "learning_rate": 1.334087367696029e-05, "loss": 0.0513, "step": 6728 }, { "epoch": 1.64, "learning_rate": 1.3337154163837406e-05, "loss": 0.0928, "step": 6730 }, { "epoch": 1.64, "learning_rate": 1.3333434131094103e-05, "loss": 0.0912, "step": 6732 }, { "epoch": 1.64, "learning_rate": 1.3329713579309615e-05, "loss": 0.0679, "step": 6734 }, { "epoch": 1.64, "learning_rate": 1.3325992509063266e-05, "loss": 0.0704, "step": 6736 }, { "epoch": 1.64, "learning_rate": 1.3322270920934443e-05, "loss": 0.0817, "step": 6738 }, { "epoch": 1.64, "learning_rate": 1.3318548815502638e-05, "loss": 0.0806, "step": 6740 }, { "epoch": 1.64, "learning_rate": 1.3314826193347408e-05, "loss": 0.0747, "step": 6742 }, { "epoch": 1.64, "learning_rate": 1.331110305504839e-05, "loss": 0.068, "step": 6744 }, { "epoch": 1.64, "learning_rate": 1.3307379401185307e-05, "loss": 0.0744, "step": 6746 }, { "epoch": 1.64, "learning_rate": 1.3303655232337962e-05, "loss": 0.0704, "step": 6748 }, { "epoch": 1.64, "learning_rate": 1.3299930549086235e-05, "loss": 0.0685, "step": 6750 }, { "epoch": 1.65, "learning_rate": 1.329620535201009e-05, "loss": 0.0694, "step": 6752 }, { "epoch": 1.65, "learning_rate": 1.3292479641689565e-05, "loss": 0.0921, "step": 6754 }, { "epoch": 1.65, "learning_rate": 1.3288753418704783e-05, "loss": 0.0766, "step": 6756 }, { "epoch": 1.65, "learning_rate": 1.3285026683635947e-05, "loss": 0.0638, "step": 6758 }, { "epoch": 1.65, "learning_rate": 1.3281299437063336e-05, "loss": 0.0723, "step": 6760 }, { "epoch": 1.65, "learning_rate": 1.3277571679567309e-05, "loss": 0.0767, "step": 6762 }, { "epoch": 1.65, "learning_rate": 1.3273843411728314e-05, "loss": 0.0923, "step": 6764 }, { "epoch": 1.65, "learning_rate": 1.3270114634126862e-05, "loss": 0.0905, "step": 6766 }, { "epoch": 1.65, "learning_rate": 1.3266385347343557e-05, "loss": 0.0921, "step": 6768 }, { "epoch": 1.65, "learning_rate": 1.3262655551959079e-05, "loss": 0.0729, "step": 6770 }, { "epoch": 1.65, "learning_rate": 1.3258925248554183e-05, "loss": 0.0848, "step": 6772 }, { "epoch": 1.65, "learning_rate": 1.3255194437709702e-05, "loss": 0.067, "step": 6774 }, { "epoch": 1.65, "learning_rate": 1.3251463120006558e-05, "loss": 0.0976, "step": 6776 }, { "epoch": 1.65, "learning_rate": 1.3247731296025743e-05, "loss": 0.0812, "step": 6778 }, { "epoch": 1.65, "learning_rate": 1.3243998966348333e-05, "loss": 0.093, "step": 6780 }, { "epoch": 1.65, "learning_rate": 1.3240266131555475e-05, "loss": 0.0781, "step": 6782 }, { "epoch": 1.65, "learning_rate": 1.3236532792228405e-05, "loss": 0.1145, "step": 6784 }, { "epoch": 1.65, "learning_rate": 1.323279894894843e-05, "loss": 0.0821, "step": 6786 }, { "epoch": 1.65, "learning_rate": 1.322906460229694e-05, "loss": 0.0943, "step": 6788 }, { "epoch": 1.65, "learning_rate": 1.3225329752855395e-05, "loss": 0.115, "step": 6790 }, { "epoch": 1.65, "learning_rate": 1.3221594401205354e-05, "loss": 0.1004, "step": 6792 }, { "epoch": 1.66, "learning_rate": 1.3217858547928422e-05, "loss": 0.0497, "step": 6794 }, { "epoch": 1.66, "learning_rate": 1.3214122193606312e-05, "loss": 0.0828, "step": 6796 }, { "epoch": 1.66, "learning_rate": 1.3210385338820798e-05, "loss": 0.0554, "step": 6798 }, { "epoch": 1.66, "learning_rate": 1.3206647984153738e-05, "loss": 0.0439, "step": 6800 }, { "epoch": 1.66, "learning_rate": 1.3202910130187066e-05, "loss": 0.0718, "step": 6802 }, { "epoch": 1.66, "learning_rate": 1.3199171777502796e-05, "loss": 0.0786, "step": 6804 }, { "epoch": 1.66, "learning_rate": 1.3195432926683016e-05, "loss": 0.0559, "step": 6806 }, { "epoch": 1.66, "learning_rate": 1.3191693578309898e-05, "loss": 0.0879, "step": 6808 }, { "epoch": 1.66, "learning_rate": 1.3187953732965681e-05, "loss": 0.0609, "step": 6810 }, { "epoch": 1.66, "learning_rate": 1.3184213391232693e-05, "loss": 0.075, "step": 6812 }, { "epoch": 1.66, "learning_rate": 1.3180472553693329e-05, "loss": 0.0882, "step": 6814 }, { "epoch": 1.66, "learning_rate": 1.317673122093007e-05, "loss": 0.1017, "step": 6816 }, { "epoch": 1.66, "learning_rate": 1.3172989393525469e-05, "loss": 0.0492, "step": 6818 }, { "epoch": 1.66, "learning_rate": 1.3169247072062158e-05, "loss": 0.0848, "step": 6820 }, { "epoch": 1.66, "learning_rate": 1.3165504257122838e-05, "loss": 0.079, "step": 6822 }, { "epoch": 1.66, "learning_rate": 1.3161760949290306e-05, "loss": 0.0513, "step": 6824 }, { "epoch": 1.66, "learning_rate": 1.3158017149147415e-05, "loss": 0.0672, "step": 6826 }, { "epoch": 1.66, "learning_rate": 1.3154272857277107e-05, "loss": 0.069, "step": 6828 }, { "epoch": 1.66, "learning_rate": 1.315052807426239e-05, "loss": 0.1033, "step": 6830 }, { "epoch": 1.66, "learning_rate": 1.3146782800686366e-05, "loss": 0.0862, "step": 6832 }, { "epoch": 1.67, "learning_rate": 1.3143037037132195e-05, "loss": 0.0774, "step": 6834 }, { "epoch": 1.67, "learning_rate": 1.3139290784183127e-05, "loss": 0.0861, "step": 6836 }, { "epoch": 1.67, "learning_rate": 1.3135544042422475e-05, "loss": 0.099, "step": 6838 }, { "epoch": 1.67, "learning_rate": 1.3131796812433641e-05, "loss": 0.0701, "step": 6840 }, { "epoch": 1.67, "learning_rate": 1.3128049094800095e-05, "loss": 0.1021, "step": 6842 }, { "epoch": 1.67, "learning_rate": 1.3124300890105388e-05, "loss": 0.0735, "step": 6844 }, { "epoch": 1.67, "learning_rate": 1.3120552198933138e-05, "loss": 0.0844, "step": 6846 }, { "epoch": 1.67, "learning_rate": 1.3116803021867054e-05, "loss": 0.0757, "step": 6848 }, { "epoch": 1.67, "learning_rate": 1.3113053359490901e-05, "loss": 0.0762, "step": 6850 }, { "epoch": 1.67, "learning_rate": 1.3109303212388538e-05, "loss": 0.0775, "step": 6852 }, { "epoch": 1.67, "learning_rate": 1.310555258114389e-05, "loss": 0.065, "step": 6854 }, { "epoch": 1.67, "learning_rate": 1.3101801466340958e-05, "loss": 0.0842, "step": 6856 }, { "epoch": 1.67, "learning_rate": 1.3098049868563818e-05, "loss": 0.0922, "step": 6858 }, { "epoch": 1.67, "learning_rate": 1.3094297788396623e-05, "loss": 0.0922, "step": 6860 }, { "epoch": 1.67, "learning_rate": 1.3090545226423604e-05, "loss": 0.0887, "step": 6862 }, { "epoch": 1.67, "learning_rate": 1.308679218322906e-05, "loss": 0.0852, "step": 6864 }, { "epoch": 1.67, "learning_rate": 1.3083038659397367e-05, "loss": 0.0745, "step": 6866 }, { "epoch": 1.67, "learning_rate": 1.307928465551298e-05, "loss": 0.0971, "step": 6868 }, { "epoch": 1.67, "learning_rate": 1.3075530172160428e-05, "loss": 0.075, "step": 6870 }, { "epoch": 1.67, "learning_rate": 1.3071775209924313e-05, "loss": 0.0622, "step": 6872 }, { "epoch": 1.67, "learning_rate": 1.3068019769389304e-05, "loss": 0.0791, "step": 6874 }, { "epoch": 1.68, "learning_rate": 1.3064263851140163e-05, "loss": 0.06, "step": 6876 }, { "epoch": 1.68, "learning_rate": 1.3060507455761702e-05, "loss": 0.0656, "step": 6878 }, { "epoch": 1.68, "learning_rate": 1.3056750583838831e-05, "loss": 0.0621, "step": 6880 }, { "epoch": 1.68, "learning_rate": 1.3052993235956519e-05, "loss": 0.0763, "step": 6882 }, { "epoch": 1.68, "learning_rate": 1.3049235412699818e-05, "loss": 0.0601, "step": 6884 }, { "epoch": 1.68, "learning_rate": 1.3045477114653844e-05, "loss": 0.0569, "step": 6886 }, { "epoch": 1.68, "learning_rate": 1.3041718342403796e-05, "loss": 0.0619, "step": 6888 }, { "epoch": 1.68, "learning_rate": 1.3037959096534943e-05, "loss": 0.0899, "step": 6890 }, { "epoch": 1.68, "learning_rate": 1.303419937763263e-05, "loss": 0.0981, "step": 6892 }, { "epoch": 1.68, "learning_rate": 1.3030439186282269e-05, "loss": 0.0686, "step": 6894 }, { "epoch": 1.68, "learning_rate": 1.3026678523069355e-05, "loss": 0.072, "step": 6896 }, { "epoch": 1.68, "learning_rate": 1.3022917388579455e-05, "loss": 0.0743, "step": 6898 }, { "epoch": 1.68, "learning_rate": 1.3019155783398199e-05, "loss": 0.0568, "step": 6900 }, { "epoch": 1.68, "learning_rate": 1.3015393708111299e-05, "loss": 0.0667, "step": 6902 }, { "epoch": 1.68, "learning_rate": 1.3011631163304548e-05, "loss": 0.0629, "step": 6904 }, { "epoch": 1.68, "learning_rate": 1.300786814956379e-05, "loss": 0.0757, "step": 6906 }, { "epoch": 1.68, "learning_rate": 1.3004104667474962e-05, "loss": 0.066, "step": 6908 }, { "epoch": 1.68, "learning_rate": 1.3000340717624064e-05, "loss": 0.0786, "step": 6910 }, { "epoch": 1.68, "learning_rate": 1.2996576300597181e-05, "loss": 0.0548, "step": 6912 }, { "epoch": 1.68, "learning_rate": 1.2992811416980446e-05, "loss": 0.0579, "step": 6914 }, { "epoch": 1.69, "learning_rate": 1.2989046067360093e-05, "loss": 0.0517, "step": 6916 }, { "epoch": 1.69, "learning_rate": 1.298528025232241e-05, "loss": 0.0682, "step": 6918 }, { "epoch": 1.69, "learning_rate": 1.2981513972453766e-05, "loss": 0.0604, "step": 6920 }, { "epoch": 1.69, "learning_rate": 1.2977747228340594e-05, "loss": 0.0646, "step": 6922 }, { "epoch": 1.69, "learning_rate": 1.2973980020569413e-05, "loss": 0.085, "step": 6924 }, { "epoch": 1.69, "learning_rate": 1.2970212349726798e-05, "loss": 0.0795, "step": 6926 }, { "epoch": 1.69, "learning_rate": 1.296644421639941e-05, "loss": 0.062, "step": 6928 }, { "epoch": 1.69, "learning_rate": 1.2962675621173972e-05, "loss": 0.0899, "step": 6930 }, { "epoch": 1.69, "learning_rate": 1.2958906564637287e-05, "loss": 0.0663, "step": 6932 }, { "epoch": 1.69, "learning_rate": 1.2955137047376227e-05, "loss": 0.0601, "step": 6934 }, { "epoch": 1.69, "learning_rate": 1.295136706997773e-05, "loss": 0.0444, "step": 6936 }, { "epoch": 1.69, "learning_rate": 1.2947596633028808e-05, "loss": 0.0382, "step": 6938 }, { "epoch": 1.69, "learning_rate": 1.2943825737116558e-05, "loss": 0.0671, "step": 6940 }, { "epoch": 1.69, "learning_rate": 1.2940054382828124e-05, "loss": 0.0548, "step": 6942 }, { "epoch": 1.69, "learning_rate": 1.2936282570750745e-05, "loss": 0.0785, "step": 6944 }, { "epoch": 1.69, "learning_rate": 1.2932510301471714e-05, "loss": 0.0671, "step": 6946 }, { "epoch": 1.69, "learning_rate": 1.2928737575578407e-05, "loss": 0.0715, "step": 6948 }, { "epoch": 1.69, "learning_rate": 1.2924964393658263e-05, "loss": 0.0892, "step": 6950 }, { "epoch": 1.69, "learning_rate": 1.2921190756298798e-05, "loss": 0.0792, "step": 6952 }, { "epoch": 1.69, "learning_rate": 1.2917416664087596e-05, "loss": 0.0751, "step": 6954 }, { "epoch": 1.69, "learning_rate": 1.2913642117612311e-05, "loss": 0.0708, "step": 6956 }, { "epoch": 1.7, "learning_rate": 1.2909867117460664e-05, "loss": 0.1013, "step": 6958 }, { "epoch": 1.7, "learning_rate": 1.2906091664220461e-05, "loss": 0.0618, "step": 6960 }, { "epoch": 1.7, "learning_rate": 1.2902315758479562e-05, "loss": 0.0884, "step": 6962 }, { "epoch": 1.7, "learning_rate": 1.289853940082591e-05, "loss": 0.0489, "step": 6964 }, { "epoch": 1.7, "learning_rate": 1.2894762591847502e-05, "loss": 0.0689, "step": 6966 }, { "epoch": 1.7, "learning_rate": 1.2890985332132432e-05, "loss": 0.0461, "step": 6968 }, { "epoch": 1.7, "learning_rate": 1.2887207622268831e-05, "loss": 0.0751, "step": 6970 }, { "epoch": 1.7, "learning_rate": 1.288342946284493e-05, "loss": 0.0822, "step": 6972 }, { "epoch": 1.7, "learning_rate": 1.2879650854449013e-05, "loss": 0.0637, "step": 6974 }, { "epoch": 1.7, "learning_rate": 1.287587179766944e-05, "loss": 0.0525, "step": 6976 }, { "epoch": 1.7, "learning_rate": 1.2872092293094631e-05, "loss": 0.0633, "step": 6978 }, { "epoch": 1.7, "learning_rate": 1.2868312341313096e-05, "loss": 0.0648, "step": 6980 }, { "epoch": 1.7, "learning_rate": 1.2864531942913394e-05, "loss": 0.0624, "step": 6982 }, { "epoch": 1.7, "learning_rate": 1.2860751098484165e-05, "loss": 0.0744, "step": 6984 }, { "epoch": 1.7, "learning_rate": 1.2856969808614115e-05, "loss": 0.0659, "step": 6986 }, { "epoch": 1.7, "learning_rate": 1.285318807389202e-05, "loss": 0.0635, "step": 6988 }, { "epoch": 1.7, "learning_rate": 1.2849405894906724e-05, "loss": 0.0604, "step": 6990 }, { "epoch": 1.7, "learning_rate": 1.2845623272247142e-05, "loss": 0.0527, "step": 6992 }, { "epoch": 1.7, "learning_rate": 1.2841840206502254e-05, "loss": 0.0505, "step": 6994 }, { "epoch": 1.7, "learning_rate": 1.2838056698261122e-05, "loss": 0.0698, "step": 6996 }, { "epoch": 1.71, "learning_rate": 1.2834272748112855e-05, "loss": 0.0681, "step": 6998 }, { "epoch": 1.71, "learning_rate": 1.283048835664665e-05, "loss": 0.073, "step": 7000 }, { "epoch": 1.71, "learning_rate": 1.2826703524451764e-05, "loss": 0.0798, "step": 7002 }, { "epoch": 1.71, "learning_rate": 1.2822918252117525e-05, "loss": 0.0707, "step": 7004 }, { "epoch": 1.71, "learning_rate": 1.2819132540233326e-05, "loss": 0.0698, "step": 7006 }, { "epoch": 1.71, "learning_rate": 1.2815346389388637e-05, "loss": 0.0702, "step": 7008 }, { "epoch": 1.71, "learning_rate": 1.2811559800172986e-05, "loss": 0.0673, "step": 7010 }, { "epoch": 1.71, "learning_rate": 1.2807772773175975e-05, "loss": 0.0444, "step": 7012 }, { "epoch": 1.71, "learning_rate": 1.280398530898727e-05, "loss": 0.0665, "step": 7014 }, { "epoch": 1.71, "learning_rate": 1.2800197408196616e-05, "loss": 0.052, "step": 7016 }, { "epoch": 1.71, "learning_rate": 1.279640907139381e-05, "loss": 0.0917, "step": 7018 }, { "epoch": 1.71, "learning_rate": 1.279262029916873e-05, "loss": 0.0769, "step": 7020 }, { "epoch": 1.71, "learning_rate": 1.2788831092111316e-05, "loss": 0.0858, "step": 7022 }, { "epoch": 1.71, "learning_rate": 1.2785041450811574e-05, "loss": 0.0873, "step": 7024 }, { "epoch": 1.71, "learning_rate": 1.2781251375859585e-05, "loss": 0.0654, "step": 7026 }, { "epoch": 1.71, "learning_rate": 1.2777460867845485e-05, "loss": 0.0692, "step": 7028 }, { "epoch": 1.71, "learning_rate": 1.2773669927359494e-05, "loss": 0.0733, "step": 7030 }, { "epoch": 1.71, "learning_rate": 1.2769878554991882e-05, "loss": 0.0602, "step": 7032 }, { "epoch": 1.71, "learning_rate": 1.2766086751333e-05, "loss": 0.0904, "step": 7034 }, { "epoch": 1.71, "learning_rate": 1.276229451697326e-05, "loss": 0.0866, "step": 7036 }, { "epoch": 1.71, "learning_rate": 1.275850185250314e-05, "loss": 0.065, "step": 7038 }, { "epoch": 1.72, "learning_rate": 1.2754708758513192e-05, "loss": 0.0648, "step": 7040 }, { "epoch": 1.72, "learning_rate": 1.2750915235594023e-05, "loss": 0.0751, "step": 7042 }, { "epoch": 1.72, "learning_rate": 1.2747121284336317e-05, "loss": 0.0602, "step": 7044 }, { "epoch": 1.72, "learning_rate": 1.2743326905330822e-05, "loss": 0.0571, "step": 7046 }, { "epoch": 1.72, "learning_rate": 1.2739532099168347e-05, "loss": 0.0627, "step": 7048 }, { "epoch": 1.72, "learning_rate": 1.273573686643978e-05, "loss": 0.0493, "step": 7050 }, { "epoch": 1.72, "learning_rate": 1.2731941207736063e-05, "loss": 0.0748, "step": 7052 }, { "epoch": 1.72, "learning_rate": 1.272814512364821e-05, "loss": 0.0552, "step": 7054 }, { "epoch": 1.72, "learning_rate": 1.2724348614767296e-05, "loss": 0.0575, "step": 7056 }, { "epoch": 1.72, "learning_rate": 1.2720551681684475e-05, "loss": 0.0828, "step": 7058 }, { "epoch": 1.72, "learning_rate": 1.2716754324990952e-05, "loss": 0.0615, "step": 7060 }, { "epoch": 1.72, "learning_rate": 1.2712956545278008e-05, "loss": 0.0633, "step": 7062 }, { "epoch": 1.72, "learning_rate": 1.2709158343136983e-05, "loss": 0.0567, "step": 7064 }, { "epoch": 1.72, "learning_rate": 1.2705359719159288e-05, "loss": 0.0764, "step": 7066 }, { "epoch": 1.72, "learning_rate": 1.2701560673936399e-05, "loss": 0.0633, "step": 7068 }, { "epoch": 1.72, "learning_rate": 1.2697761208059856e-05, "loss": 0.0733, "step": 7070 }, { "epoch": 1.72, "learning_rate": 1.2693961322121262e-05, "loss": 0.0731, "step": 7072 }, { "epoch": 1.72, "learning_rate": 1.2690161016712295e-05, "loss": 0.0748, "step": 7074 }, { "epoch": 1.72, "learning_rate": 1.268636029242468e-05, "loss": 0.0657, "step": 7076 }, { "epoch": 1.72, "learning_rate": 1.2682559149850229e-05, "loss": 0.0762, "step": 7078 }, { "epoch": 1.73, "learning_rate": 1.2678757589580805e-05, "loss": 0.0656, "step": 7080 }, { "epoch": 1.73, "learning_rate": 1.2674955612208343e-05, "loss": 0.0784, "step": 7082 }, { "epoch": 1.73, "learning_rate": 1.2671153218324834e-05, "loss": 0.0761, "step": 7084 }, { "epoch": 1.73, "learning_rate": 1.2667350408522347e-05, "loss": 0.0839, "step": 7086 }, { "epoch": 1.73, "learning_rate": 1.2663547183393006e-05, "loss": 0.0711, "step": 7088 }, { "epoch": 1.73, "learning_rate": 1.2659743543529e-05, "loss": 0.0582, "step": 7090 }, { "epoch": 1.73, "learning_rate": 1.2655939489522582e-05, "loss": 0.0774, "step": 7092 }, { "epoch": 1.73, "learning_rate": 1.2652135021966081e-05, "loss": 0.0714, "step": 7094 }, { "epoch": 1.73, "learning_rate": 1.2648330141451877e-05, "loss": 0.0388, "step": 7096 }, { "epoch": 1.73, "learning_rate": 1.2644524848572419e-05, "loss": 0.0422, "step": 7098 }, { "epoch": 1.73, "learning_rate": 1.2640719143920216e-05, "loss": 0.0827, "step": 7100 }, { "epoch": 1.73, "learning_rate": 1.2636913028087856e-05, "loss": 0.0647, "step": 7102 }, { "epoch": 1.73, "learning_rate": 1.2633106501667971e-05, "loss": 0.0638, "step": 7104 }, { "epoch": 1.73, "learning_rate": 1.2629299565253268e-05, "loss": 0.061, "step": 7106 }, { "epoch": 1.73, "learning_rate": 1.2625492219436517e-05, "loss": 0.0665, "step": 7108 }, { "epoch": 1.73, "learning_rate": 1.2621684464810556e-05, "loss": 0.0547, "step": 7110 }, { "epoch": 1.73, "learning_rate": 1.2617876301968273e-05, "loss": 0.0812, "step": 7112 }, { "epoch": 1.73, "learning_rate": 1.261406773150263e-05, "loss": 0.0571, "step": 7114 }, { "epoch": 1.73, "learning_rate": 1.2610258754006656e-05, "loss": 0.075, "step": 7116 }, { "epoch": 1.73, "learning_rate": 1.2606449370073435e-05, "loss": 0.0742, "step": 7118 }, { "epoch": 1.73, "learning_rate": 1.2602639580296113e-05, "loss": 0.0594, "step": 7120 }, { "epoch": 1.74, "learning_rate": 1.2598829385267908e-05, "loss": 0.0501, "step": 7122 }, { "epoch": 1.74, "learning_rate": 1.2595018785582096e-05, "loss": 0.0565, "step": 7124 }, { "epoch": 1.74, "learning_rate": 1.2591207781832015e-05, "loss": 0.044, "step": 7126 }, { "epoch": 1.74, "learning_rate": 1.258739637461107e-05, "loss": 0.0562, "step": 7128 }, { "epoch": 1.74, "learning_rate": 1.2583584564512723e-05, "loss": 0.0556, "step": 7130 }, { "epoch": 1.74, "learning_rate": 1.2579772352130503e-05, "loss": 0.0764, "step": 7132 }, { "epoch": 1.74, "learning_rate": 1.2575959738058004e-05, "loss": 0.0508, "step": 7134 }, { "epoch": 1.74, "learning_rate": 1.2572146722888871e-05, "loss": 0.0608, "step": 7136 }, { "epoch": 1.74, "learning_rate": 1.256833330721683e-05, "loss": 0.0632, "step": 7138 }, { "epoch": 1.74, "learning_rate": 1.2564519491635651e-05, "loss": 0.0377, "step": 7140 }, { "epoch": 1.74, "learning_rate": 1.256070527673918e-05, "loss": 0.043, "step": 7142 }, { "epoch": 1.74, "learning_rate": 1.2556890663121314e-05, "loss": 0.0714, "step": 7144 }, { "epoch": 1.74, "learning_rate": 1.2553075651376023e-05, "loss": 0.0483, "step": 7146 }, { "epoch": 1.74, "learning_rate": 1.2549260242097328e-05, "loss": 0.0759, "step": 7148 }, { "epoch": 1.74, "learning_rate": 1.254544443587932e-05, "loss": 0.0351, "step": 7150 }, { "epoch": 1.74, "learning_rate": 1.254162823331615e-05, "loss": 0.0651, "step": 7152 }, { "epoch": 1.74, "learning_rate": 1.253781163500203e-05, "loss": 0.0596, "step": 7154 }, { "epoch": 1.74, "learning_rate": 1.253399464153123e-05, "loss": 0.0418, "step": 7156 }, { "epoch": 1.74, "learning_rate": 1.253017725349809e-05, "loss": 0.0747, "step": 7158 }, { "epoch": 1.74, "learning_rate": 1.2526359471497004e-05, "loss": 0.0731, "step": 7160 }, { "epoch": 1.75, "learning_rate": 1.252254129612243e-05, "loss": 0.0658, "step": 7162 }, { "epoch": 1.75, "learning_rate": 1.2518722727968886e-05, "loss": 0.0795, "step": 7164 }, { "epoch": 1.75, "learning_rate": 1.2514903767630957e-05, "loss": 0.0771, "step": 7166 }, { "epoch": 1.75, "learning_rate": 1.2511084415703276e-05, "loss": 0.0422, "step": 7168 }, { "epoch": 1.75, "learning_rate": 1.2507264672780553e-05, "loss": 0.052, "step": 7170 }, { "epoch": 1.75, "learning_rate": 1.250344453945755e-05, "loss": 0.0968, "step": 7172 }, { "epoch": 1.75, "learning_rate": 1.2499624016329086e-05, "loss": 0.057, "step": 7174 }, { "epoch": 1.75, "learning_rate": 1.2495803103990047e-05, "loss": 0.0577, "step": 7176 }, { "epoch": 1.75, "learning_rate": 1.2491981803035384e-05, "loss": 0.0737, "step": 7178 }, { "epoch": 1.75, "learning_rate": 1.2488160114060099e-05, "loss": 0.0838, "step": 7180 }, { "epoch": 1.75, "learning_rate": 1.2484338037659258e-05, "loss": 0.0762, "step": 7182 }, { "epoch": 1.75, "learning_rate": 1.2480515574427985e-05, "loss": 0.0482, "step": 7184 }, { "epoch": 1.75, "learning_rate": 1.247669272496147e-05, "loss": 0.0824, "step": 7186 }, { "epoch": 1.75, "learning_rate": 1.2472869489854961e-05, "loss": 0.0583, "step": 7188 }, { "epoch": 1.75, "learning_rate": 1.2469045869703764e-05, "loss": 0.0522, "step": 7190 }, { "epoch": 1.75, "learning_rate": 1.2465221865103243e-05, "loss": 0.071, "step": 7192 }, { "epoch": 1.75, "learning_rate": 1.2461397476648828e-05, "loss": 0.0606, "step": 7194 }, { "epoch": 1.75, "learning_rate": 1.2457572704936004e-05, "loss": 0.0634, "step": 7196 }, { "epoch": 1.75, "learning_rate": 1.2453747550560317e-05, "loss": 0.0957, "step": 7198 }, { "epoch": 1.75, "learning_rate": 1.2449922014117376e-05, "loss": 0.0689, "step": 7200 }, { "epoch": 1.75, "learning_rate": 1.2446096096202843e-05, "loss": 0.0602, "step": 7202 }, { "epoch": 1.76, "learning_rate": 1.244226979741244e-05, "loss": 0.0644, "step": 7204 }, { "epoch": 1.76, "learning_rate": 1.2438443118341957e-05, "loss": 0.0556, "step": 7206 }, { "epoch": 1.76, "learning_rate": 1.2434616059587235e-05, "loss": 0.0592, "step": 7208 }, { "epoch": 1.76, "learning_rate": 1.2430788621744174e-05, "loss": 0.0664, "step": 7210 }, { "epoch": 1.76, "learning_rate": 1.2426960805408739e-05, "loss": 0.0707, "step": 7212 }, { "epoch": 1.76, "learning_rate": 1.2423132611176947e-05, "loss": 0.0505, "step": 7214 }, { "epoch": 1.76, "learning_rate": 1.241930403964488e-05, "loss": 0.0452, "step": 7216 }, { "epoch": 1.76, "learning_rate": 1.2415475091408675e-05, "loss": 0.057, "step": 7218 }, { "epoch": 1.76, "learning_rate": 1.2411645767064524e-05, "loss": 0.0425, "step": 7220 }, { "epoch": 1.76, "learning_rate": 1.2407816067208692e-05, "loss": 0.0651, "step": 7222 }, { "epoch": 1.76, "learning_rate": 1.2403985992437482e-05, "loss": 0.073, "step": 7224 }, { "epoch": 1.76, "learning_rate": 1.2400155543347272e-05, "loss": 0.0842, "step": 7226 }, { "epoch": 1.76, "learning_rate": 1.2396324720534491e-05, "loss": 0.0768, "step": 7228 }, { "epoch": 1.76, "learning_rate": 1.2392493524595629e-05, "loss": 0.089, "step": 7230 }, { "epoch": 1.76, "learning_rate": 1.2388661956127225e-05, "loss": 0.0947, "step": 7232 }, { "epoch": 1.76, "learning_rate": 1.2384830015725893e-05, "loss": 0.0511, "step": 7234 }, { "epoch": 1.76, "learning_rate": 1.2380997703988294e-05, "loss": 0.0574, "step": 7236 }, { "epoch": 1.76, "learning_rate": 1.2377165021511142e-05, "loss": 0.0552, "step": 7238 }, { "epoch": 1.76, "learning_rate": 1.2373331968891217e-05, "loss": 0.059, "step": 7240 }, { "epoch": 1.76, "learning_rate": 1.236949854672536e-05, "loss": 0.0623, "step": 7242 }, { "epoch": 1.77, "learning_rate": 1.2365664755610461e-05, "loss": 0.0496, "step": 7244 }, { "epoch": 1.77, "learning_rate": 1.236183059614347e-05, "loss": 0.0766, "step": 7246 }, { "epoch": 1.77, "learning_rate": 1.2357996068921392e-05, "loss": 0.0592, "step": 7248 }, { "epoch": 1.77, "learning_rate": 1.23541611745413e-05, "loss": 0.0564, "step": 7250 }, { "epoch": 1.77, "learning_rate": 1.2350325913600307e-05, "loss": 0.0574, "step": 7252 }, { "epoch": 1.77, "learning_rate": 1.2346490286695601e-05, "loss": 0.0581, "step": 7254 }, { "epoch": 1.77, "learning_rate": 1.2342654294424413e-05, "loss": 0.0536, "step": 7256 }, { "epoch": 1.77, "learning_rate": 1.2338817937384038e-05, "loss": 0.054, "step": 7258 }, { "epoch": 1.77, "learning_rate": 1.2334981216171823e-05, "loss": 0.0912, "step": 7260 }, { "epoch": 1.77, "learning_rate": 1.2331144131385183e-05, "loss": 0.043, "step": 7262 }, { "epoch": 1.77, "learning_rate": 1.2327306683621573e-05, "loss": 0.0451, "step": 7264 }, { "epoch": 1.77, "learning_rate": 1.2323468873478518e-05, "loss": 0.0478, "step": 7266 }, { "epoch": 1.77, "learning_rate": 1.2319630701553589e-05, "loss": 0.0463, "step": 7268 }, { "epoch": 1.77, "learning_rate": 1.2315792168444424e-05, "loss": 0.0596, "step": 7270 }, { "epoch": 1.77, "learning_rate": 1.2311953274748714e-05, "loss": 0.053, "step": 7272 }, { "epoch": 1.77, "learning_rate": 1.2308114021064197e-05, "loss": 0.0691, "step": 7274 }, { "epoch": 1.77, "learning_rate": 1.2304274407988676e-05, "loss": 0.0751, "step": 7276 }, { "epoch": 1.77, "learning_rate": 1.2300434436120017e-05, "loss": 0.0597, "step": 7278 }, { "epoch": 1.77, "learning_rate": 1.2296594106056118e-05, "loss": 0.0776, "step": 7280 }, { "epoch": 1.77, "learning_rate": 1.2292753418394958e-05, "loss": 0.0516, "step": 7282 }, { "epoch": 1.77, "learning_rate": 1.2288912373734558e-05, "loss": 0.0512, "step": 7284 }, { "epoch": 1.78, "learning_rate": 1.2285070972673004e-05, "loss": 0.0703, "step": 7286 }, { "epoch": 1.78, "learning_rate": 1.228122921580842e-05, "loss": 0.0541, "step": 7288 }, { "epoch": 1.78, "learning_rate": 1.2277387103739006e-05, "loss": 0.048, "step": 7290 }, { "epoch": 1.78, "learning_rate": 1.2273544637063006e-05, "loss": 0.0695, "step": 7292 }, { "epoch": 1.78, "learning_rate": 1.2269701816378721e-05, "loss": 0.0592, "step": 7294 }, { "epoch": 1.78, "learning_rate": 1.2265858642284505e-05, "loss": 0.0675, "step": 7296 }, { "epoch": 1.78, "learning_rate": 1.2262015115378774e-05, "loss": 0.0494, "step": 7298 }, { "epoch": 1.78, "learning_rate": 1.2258171236259993e-05, "loss": 0.0649, "step": 7300 }, { "epoch": 1.78, "learning_rate": 1.2254327005526683e-05, "loss": 0.0679, "step": 7302 }, { "epoch": 1.78, "learning_rate": 1.2250482423777419e-05, "loss": 0.0423, "step": 7304 }, { "epoch": 1.78, "learning_rate": 1.2246637491610835e-05, "loss": 0.0632, "step": 7306 }, { "epoch": 1.78, "learning_rate": 1.2242792209625613e-05, "loss": 0.0735, "step": 7308 }, { "epoch": 1.78, "learning_rate": 1.2238946578420493e-05, "loss": 0.0536, "step": 7310 }, { "epoch": 1.78, "learning_rate": 1.2235100598594271e-05, "loss": 0.0353, "step": 7312 }, { "epoch": 1.78, "learning_rate": 1.2231254270745798e-05, "loss": 0.0562, "step": 7314 }, { "epoch": 1.78, "learning_rate": 1.222740759547397e-05, "loss": 0.0477, "step": 7316 }, { "epoch": 1.78, "learning_rate": 1.2223560573377749e-05, "loss": 0.0357, "step": 7318 }, { "epoch": 1.78, "learning_rate": 1.2219713205056143e-05, "loss": 0.0444, "step": 7320 }, { "epoch": 1.78, "learning_rate": 1.2215865491108216e-05, "loss": 0.0584, "step": 7322 }, { "epoch": 1.78, "learning_rate": 1.221201743213309e-05, "loss": 0.0737, "step": 7324 }, { "epoch": 1.79, "learning_rate": 1.2208169028729934e-05, "loss": 0.0562, "step": 7326 }, { "epoch": 1.79, "learning_rate": 1.2204320281497977e-05, "loss": 0.0666, "step": 7328 }, { "epoch": 1.79, "learning_rate": 1.2200471191036496e-05, "loss": 0.0591, "step": 7330 }, { "epoch": 1.79, "learning_rate": 1.2196621757944822e-05, "loss": 0.0802, "step": 7332 }, { "epoch": 1.79, "learning_rate": 1.2192771982822346e-05, "loss": 0.0904, "step": 7334 }, { "epoch": 1.79, "learning_rate": 1.2188921866268503e-05, "loss": 0.0593, "step": 7336 }, { "epoch": 1.79, "learning_rate": 1.2185071408882792e-05, "loss": 0.0706, "step": 7338 }, { "epoch": 1.79, "learning_rate": 1.2181220611264748e-05, "loss": 0.0537, "step": 7340 }, { "epoch": 1.79, "learning_rate": 1.217736947401398e-05, "loss": 0.0791, "step": 7342 }, { "epoch": 1.79, "learning_rate": 1.2173517997730133e-05, "loss": 0.0722, "step": 7344 }, { "epoch": 1.79, "learning_rate": 1.2169666183012915e-05, "loss": 0.0609, "step": 7346 }, { "epoch": 1.79, "learning_rate": 1.2165814030462083e-05, "loss": 0.0408, "step": 7348 }, { "epoch": 1.79, "learning_rate": 1.2161961540677442e-05, "loss": 0.0463, "step": 7350 }, { "epoch": 1.79, "learning_rate": 1.2158108714258859e-05, "loss": 0.0581, "step": 7352 }, { "epoch": 1.79, "learning_rate": 1.2154255551806246e-05, "loss": 0.051, "step": 7354 }, { "epoch": 1.79, "learning_rate": 1.2150402053919571e-05, "loss": 0.0894, "step": 7356 }, { "epoch": 1.79, "learning_rate": 1.2146548221198855e-05, "loss": 0.0349, "step": 7358 }, { "epoch": 1.79, "learning_rate": 1.2142694054244164e-05, "loss": 0.0638, "step": 7360 }, { "epoch": 1.79, "learning_rate": 1.2138839553655625e-05, "loss": 0.0809, "step": 7362 }, { "epoch": 1.79, "learning_rate": 1.2134984720033414e-05, "loss": 0.0432, "step": 7364 }, { "epoch": 1.79, "learning_rate": 1.2131129553977756e-05, "loss": 0.0694, "step": 7366 }, { "epoch": 1.8, "learning_rate": 1.212727405608893e-05, "loss": 0.0583, "step": 7368 }, { "epoch": 1.8, "learning_rate": 1.212341822696727e-05, "loss": 0.0403, "step": 7370 }, { "epoch": 1.8, "learning_rate": 1.2119562067213148e-05, "loss": 0.0793, "step": 7372 }, { "epoch": 1.8, "learning_rate": 1.211570557742701e-05, "loss": 0.0586, "step": 7374 }, { "epoch": 1.8, "learning_rate": 1.2111848758209335e-05, "loss": 0.067, "step": 7376 }, { "epoch": 1.8, "learning_rate": 1.2107991610160662e-05, "loss": 0.0443, "step": 7378 }, { "epoch": 1.8, "learning_rate": 1.2104134133881568e-05, "loss": 0.0592, "step": 7380 }, { "epoch": 1.8, "learning_rate": 1.2100276329972707e-05, "loss": 0.0659, "step": 7382 }, { "epoch": 1.8, "learning_rate": 1.2096418199034761e-05, "loss": 0.0783, "step": 7384 }, { "epoch": 1.8, "learning_rate": 1.2092559741668469e-05, "loss": 0.0566, "step": 7386 }, { "epoch": 1.8, "learning_rate": 1.2088700958474622e-05, "loss": 0.0612, "step": 7388 }, { "epoch": 1.8, "learning_rate": 1.2084841850054067e-05, "loss": 0.0438, "step": 7390 }, { "epoch": 1.8, "learning_rate": 1.2080982417007694e-05, "loss": 0.0786, "step": 7392 }, { "epoch": 1.8, "learning_rate": 1.2077122659936446e-05, "loss": 0.0555, "step": 7394 }, { "epoch": 1.8, "learning_rate": 1.2073262579441317e-05, "loss": 0.0824, "step": 7396 }, { "epoch": 1.8, "learning_rate": 1.2069402176123351e-05, "loss": 0.0639, "step": 7398 }, { "epoch": 1.8, "learning_rate": 1.206554145058364e-05, "loss": 0.0706, "step": 7400 }, { "epoch": 1.8, "learning_rate": 1.2061680403423333e-05, "loss": 0.0571, "step": 7402 }, { "epoch": 1.8, "learning_rate": 1.2057819035243622e-05, "loss": 0.0457, "step": 7404 }, { "epoch": 1.8, "learning_rate": 1.2053957346645751e-05, "loss": 0.0595, "step": 7406 }, { "epoch": 1.81, "learning_rate": 1.2050095338231015e-05, "loss": 0.0435, "step": 7408 }, { "epoch": 1.81, "learning_rate": 1.2046233010600758e-05, "loss": 0.0591, "step": 7410 }, { "epoch": 1.81, "learning_rate": 1.2042370364356375e-05, "loss": 0.0569, "step": 7412 }, { "epoch": 1.81, "learning_rate": 1.203850740009931e-05, "loss": 0.0637, "step": 7414 }, { "epoch": 1.81, "learning_rate": 1.2034644118431054e-05, "loss": 0.0537, "step": 7416 }, { "epoch": 1.81, "learning_rate": 1.2030780519953149e-05, "loss": 0.0588, "step": 7418 }, { "epoch": 1.81, "learning_rate": 1.2026916605267191e-05, "loss": 0.0919, "step": 7420 }, { "epoch": 1.81, "learning_rate": 1.202305237497482e-05, "loss": 0.0683, "step": 7422 }, { "epoch": 1.81, "learning_rate": 1.201918782967772e-05, "loss": 0.0617, "step": 7424 }, { "epoch": 1.81, "learning_rate": 1.2015322969977638e-05, "loss": 0.0796, "step": 7426 }, { "epoch": 1.81, "learning_rate": 1.2011457796476359e-05, "loss": 0.0273, "step": 7428 }, { "epoch": 1.81, "learning_rate": 1.200759230977572e-05, "loss": 0.0583, "step": 7430 }, { "epoch": 1.81, "learning_rate": 1.2003726510477605e-05, "loss": 0.0635, "step": 7432 }, { "epoch": 1.81, "learning_rate": 1.1999860399183954e-05, "loss": 0.0606, "step": 7434 }, { "epoch": 1.81, "learning_rate": 1.1995993976496742e-05, "loss": 0.0623, "step": 7436 }, { "epoch": 1.81, "learning_rate": 1.1992127243018009e-05, "loss": 0.0387, "step": 7438 }, { "epoch": 1.81, "learning_rate": 1.198826019934983e-05, "loss": 0.0544, "step": 7440 }, { "epoch": 1.81, "learning_rate": 1.1984392846094335e-05, "loss": 0.0673, "step": 7442 }, { "epoch": 1.81, "learning_rate": 1.1980525183853697e-05, "loss": 0.056, "step": 7444 }, { "epoch": 1.81, "learning_rate": 1.1976657213230147e-05, "loss": 0.0527, "step": 7446 }, { "epoch": 1.81, "learning_rate": 1.1972788934825953e-05, "loss": 0.0665, "step": 7448 }, { "epoch": 1.82, "learning_rate": 1.1968920349243435e-05, "loss": 0.0574, "step": 7450 }, { "epoch": 1.82, "learning_rate": 1.1965051457084965e-05, "loss": 0.046, "step": 7452 }, { "epoch": 1.82, "learning_rate": 1.1961182258952958e-05, "loss": 0.082, "step": 7454 }, { "epoch": 1.82, "learning_rate": 1.1957312755449874e-05, "loss": 0.0669, "step": 7456 }, { "epoch": 1.82, "learning_rate": 1.1953442947178228e-05, "loss": 0.0371, "step": 7458 }, { "epoch": 1.82, "learning_rate": 1.194957283474058e-05, "loss": 0.0458, "step": 7460 }, { "epoch": 1.82, "learning_rate": 1.1945702418739533e-05, "loss": 0.0513, "step": 7462 }, { "epoch": 1.82, "learning_rate": 1.1941831699777738e-05, "loss": 0.0445, "step": 7464 }, { "epoch": 1.82, "learning_rate": 1.1937960678457902e-05, "loss": 0.0513, "step": 7466 }, { "epoch": 1.82, "learning_rate": 1.193408935538277e-05, "loss": 0.0386, "step": 7468 }, { "epoch": 1.82, "learning_rate": 1.1930217731155133e-05, "loss": 0.0375, "step": 7470 }, { "epoch": 1.82, "learning_rate": 1.1926345806377837e-05, "loss": 0.0555, "step": 7472 }, { "epoch": 1.82, "learning_rate": 1.192247358165377e-05, "loss": 0.0475, "step": 7474 }, { "epoch": 1.82, "learning_rate": 1.1918601057585866e-05, "loss": 0.0672, "step": 7476 }, { "epoch": 1.82, "learning_rate": 1.1914728234777104e-05, "loss": 0.0506, "step": 7478 }, { "epoch": 1.82, "learning_rate": 1.1910855113830515e-05, "loss": 0.0644, "step": 7480 }, { "epoch": 1.82, "learning_rate": 1.1906981695349178e-05, "loss": 0.0561, "step": 7482 }, { "epoch": 1.82, "learning_rate": 1.1903107979936203e-05, "loss": 0.0377, "step": 7484 }, { "epoch": 1.82, "learning_rate": 1.1899233968194766e-05, "loss": 0.0492, "step": 7486 }, { "epoch": 1.82, "learning_rate": 1.1895359660728073e-05, "loss": 0.0442, "step": 7488 }, { "epoch": 1.83, "learning_rate": 1.1891485058139396e-05, "loss": 0.082, "step": 7490 }, { "epoch": 1.83, "learning_rate": 1.1887610161032026e-05, "loss": 0.0474, "step": 7492 }, { "epoch": 1.83, "learning_rate": 1.1883734970009321e-05, "loss": 0.062, "step": 7494 }, { "epoch": 1.83, "learning_rate": 1.1879859485674678e-05, "loss": 0.0484, "step": 7496 }, { "epoch": 1.83, "learning_rate": 1.1875983708631538e-05, "loss": 0.067, "step": 7498 }, { "epoch": 1.83, "learning_rate": 1.1872107639483389e-05, "loss": 0.0289, "step": 7500 }, { "epoch": 1.83, "learning_rate": 1.1868231278833764e-05, "loss": 0.0618, "step": 7502 }, { "epoch": 1.83, "learning_rate": 1.1864354627286245e-05, "loss": 0.0663, "step": 7504 }, { "epoch": 1.83, "learning_rate": 1.1860477685444456e-05, "loss": 0.0584, "step": 7506 }, { "epoch": 1.83, "learning_rate": 1.1856600453912062e-05, "loss": 0.0609, "step": 7508 }, { "epoch": 1.83, "learning_rate": 1.1852722933292781e-05, "loss": 0.054, "step": 7510 }, { "epoch": 1.83, "learning_rate": 1.1848845124190374e-05, "loss": 0.06, "step": 7512 }, { "epoch": 1.83, "learning_rate": 1.1844967027208642e-05, "loss": 0.0598, "step": 7514 }, { "epoch": 1.83, "learning_rate": 1.1841088642951434e-05, "loss": 0.0688, "step": 7516 }, { "epoch": 1.83, "learning_rate": 1.1837209972022652e-05, "loss": 0.0468, "step": 7518 }, { "epoch": 1.83, "learning_rate": 1.1833331015026224e-05, "loss": 0.0393, "step": 7520 }, { "epoch": 1.83, "learning_rate": 1.182945177256614e-05, "loss": 0.0479, "step": 7522 }, { "epoch": 1.83, "learning_rate": 1.1825572245246426e-05, "loss": 0.0501, "step": 7524 }, { "epoch": 1.83, "learning_rate": 1.1821692433671154e-05, "loss": 0.046, "step": 7526 }, { "epoch": 1.83, "learning_rate": 1.1817812338444437e-05, "loss": 0.0462, "step": 7528 }, { "epoch": 1.83, "learning_rate": 1.1813931960170442e-05, "loss": 0.0494, "step": 7530 }, { "epoch": 1.84, "learning_rate": 1.181005129945337e-05, "loss": 0.069, "step": 7532 }, { "epoch": 1.84, "learning_rate": 1.1806170356897471e-05, "loss": 0.0546, "step": 7534 }, { "epoch": 1.84, "learning_rate": 1.1802289133107032e-05, "loss": 0.0531, "step": 7536 }, { "epoch": 1.84, "learning_rate": 1.1798407628686396e-05, "loss": 0.0584, "step": 7538 }, { "epoch": 1.84, "learning_rate": 1.1794525844239941e-05, "loss": 0.0441, "step": 7540 }, { "epoch": 1.84, "learning_rate": 1.179064378037209e-05, "loss": 0.0678, "step": 7542 }, { "epoch": 1.84, "learning_rate": 1.1786761437687311e-05, "loss": 0.0746, "step": 7544 }, { "epoch": 1.84, "learning_rate": 1.1782878816790114e-05, "loss": 0.041, "step": 7546 }, { "epoch": 1.84, "learning_rate": 1.1778995918285047e-05, "loss": 0.0991, "step": 7548 }, { "epoch": 1.84, "learning_rate": 1.1775112742776715e-05, "loss": 0.0414, "step": 7550 }, { "epoch": 1.84, "learning_rate": 1.1771229290869757e-05, "loss": 0.0695, "step": 7552 }, { "epoch": 1.84, "learning_rate": 1.1767345563168852e-05, "loss": 0.0457, "step": 7554 }, { "epoch": 1.84, "learning_rate": 1.176346156027873e-05, "loss": 0.0418, "step": 7556 }, { "epoch": 1.84, "learning_rate": 1.1759577282804157e-05, "loss": 0.0333, "step": 7558 }, { "epoch": 1.84, "learning_rate": 1.1755692731349947e-05, "loss": 0.0486, "step": 7560 }, { "epoch": 1.84, "learning_rate": 1.1751807906520956e-05, "loss": 0.0484, "step": 7562 }, { "epoch": 1.84, "learning_rate": 1.1747922808922074e-05, "loss": 0.0518, "step": 7564 }, { "epoch": 1.84, "learning_rate": 1.1744037439158247e-05, "loss": 0.0293, "step": 7566 }, { "epoch": 1.84, "learning_rate": 1.1740151797834459e-05, "loss": 0.0558, "step": 7568 }, { "epoch": 1.84, "learning_rate": 1.1736265885555722e-05, "loss": 0.0539, "step": 7570 }, { "epoch": 1.85, "learning_rate": 1.1732379702927114e-05, "loss": 0.0306, "step": 7572 }, { "epoch": 1.85, "learning_rate": 1.172849325055374e-05, "loss": 0.0461, "step": 7574 }, { "epoch": 1.85, "learning_rate": 1.1724606529040749e-05, "loss": 0.0704, "step": 7576 }, { "epoch": 1.85, "learning_rate": 1.1720719538993332e-05, "loss": 0.0465, "step": 7578 }, { "epoch": 1.85, "learning_rate": 1.1716832281016726e-05, "loss": 0.0553, "step": 7580 }, { "epoch": 1.85, "learning_rate": 1.1712944755716207e-05, "loss": 0.0388, "step": 7582 }, { "epoch": 1.85, "learning_rate": 1.1709056963697091e-05, "loss": 0.0304, "step": 7584 }, { "epoch": 1.85, "learning_rate": 1.1705168905564737e-05, "loss": 0.0426, "step": 7586 }, { "epoch": 1.85, "learning_rate": 1.1701280581924546e-05, "loss": 0.0628, "step": 7588 }, { "epoch": 1.85, "learning_rate": 1.1697391993381956e-05, "loss": 0.0755, "step": 7590 }, { "epoch": 1.85, "learning_rate": 1.169350314054246e-05, "loss": 0.0762, "step": 7592 }, { "epoch": 1.85, "learning_rate": 1.1689614024011569e-05, "loss": 0.0569, "step": 7594 }, { "epoch": 1.85, "learning_rate": 1.1685724644394858e-05, "loss": 0.0615, "step": 7596 }, { "epoch": 1.85, "learning_rate": 1.1681835002297927e-05, "loss": 0.0571, "step": 7598 }, { "epoch": 1.85, "learning_rate": 1.1677945098326429e-05, "loss": 0.0297, "step": 7600 }, { "epoch": 1.85, "learning_rate": 1.1674054933086048e-05, "loss": 0.0459, "step": 7602 }, { "epoch": 1.85, "learning_rate": 1.167016450718251e-05, "loss": 0.0535, "step": 7604 }, { "epoch": 1.85, "learning_rate": 1.1666273821221588e-05, "loss": 0.0368, "step": 7606 }, { "epoch": 1.85, "learning_rate": 1.1662382875809094e-05, "loss": 0.0658, "step": 7608 }, { "epoch": 1.85, "learning_rate": 1.165849167155087e-05, "loss": 0.0469, "step": 7610 }, { "epoch": 1.85, "learning_rate": 1.1654600209052815e-05, "loss": 0.0301, "step": 7612 }, { "epoch": 1.86, "learning_rate": 1.1650708488920851e-05, "loss": 0.0431, "step": 7614 }, { "epoch": 1.86, "learning_rate": 1.1646816511760956e-05, "loss": 0.0431, "step": 7616 }, { "epoch": 1.86, "learning_rate": 1.1642924278179137e-05, "loss": 0.0605, "step": 7618 }, { "epoch": 1.86, "learning_rate": 1.1639031788781446e-05, "loss": 0.0425, "step": 7620 }, { "epoch": 1.86, "learning_rate": 1.1635139044173969e-05, "loss": 0.0449, "step": 7622 }, { "epoch": 1.86, "learning_rate": 1.1631246044962846e-05, "loss": 0.0392, "step": 7624 }, { "epoch": 1.86, "learning_rate": 1.1627352791754232e-05, "loss": 0.0475, "step": 7626 }, { "epoch": 1.86, "learning_rate": 1.1623459285154347e-05, "loss": 0.0607, "step": 7628 }, { "epoch": 1.86, "learning_rate": 1.1619565525769441e-05, "loss": 0.0473, "step": 7630 }, { "epoch": 1.86, "learning_rate": 1.1615671514205798e-05, "loss": 0.0601, "step": 7632 }, { "epoch": 1.86, "learning_rate": 1.161177725106974e-05, "loss": 0.0459, "step": 7634 }, { "epoch": 1.86, "learning_rate": 1.1607882736967643e-05, "loss": 0.0704, "step": 7636 }, { "epoch": 1.86, "learning_rate": 1.160398797250591e-05, "loss": 0.0571, "step": 7638 }, { "epoch": 1.86, "learning_rate": 1.1600092958290985e-05, "loss": 0.0279, "step": 7640 }, { "epoch": 1.86, "learning_rate": 1.1596197694929348e-05, "loss": 0.0464, "step": 7642 }, { "epoch": 1.86, "learning_rate": 1.1592302183027526e-05, "loss": 0.0503, "step": 7644 }, { "epoch": 1.86, "learning_rate": 1.1588406423192077e-05, "loss": 0.0279, "step": 7646 }, { "epoch": 1.86, "learning_rate": 1.1584510416029607e-05, "loss": 0.0315, "step": 7648 }, { "epoch": 1.86, "learning_rate": 1.1580614162146742e-05, "loss": 0.0345, "step": 7650 }, { "epoch": 1.86, "learning_rate": 1.157671766215017e-05, "loss": 0.0339, "step": 7652 }, { "epoch": 1.87, "learning_rate": 1.15728209166466e-05, "loss": 0.0618, "step": 7654 }, { "epoch": 1.87, "learning_rate": 1.1568923926242786e-05, "loss": 0.0564, "step": 7656 }, { "epoch": 1.87, "learning_rate": 1.156502669154552e-05, "loss": 0.068, "step": 7658 }, { "epoch": 1.87, "learning_rate": 1.1561129213161633e-05, "loss": 0.0486, "step": 7660 }, { "epoch": 1.87, "learning_rate": 1.1557231491697987e-05, "loss": 0.0585, "step": 7662 }, { "epoch": 1.87, "learning_rate": 1.1553333527761493e-05, "loss": 0.0451, "step": 7664 }, { "epoch": 1.87, "learning_rate": 1.1549435321959087e-05, "loss": 0.0424, "step": 7666 }, { "epoch": 1.87, "learning_rate": 1.1545536874897758e-05, "loss": 0.0484, "step": 7668 }, { "epoch": 1.87, "learning_rate": 1.1541638187184514e-05, "loss": 0.0546, "step": 7670 }, { "epoch": 1.87, "learning_rate": 1.153773925942642e-05, "loss": 0.0494, "step": 7672 }, { "epoch": 1.87, "learning_rate": 1.1533840092230564e-05, "loss": 0.0472, "step": 7674 }, { "epoch": 1.87, "learning_rate": 1.1529940686204078e-05, "loss": 0.051, "step": 7676 }, { "epoch": 1.87, "learning_rate": 1.1526041041954126e-05, "loss": 0.0327, "step": 7678 }, { "epoch": 1.87, "learning_rate": 1.1522141160087916e-05, "loss": 0.0485, "step": 7680 }, { "epoch": 1.87, "learning_rate": 1.1518241041212686e-05, "loss": 0.0634, "step": 7682 }, { "epoch": 1.87, "learning_rate": 1.1514340685935719e-05, "loss": 0.0518, "step": 7684 }, { "epoch": 1.87, "learning_rate": 1.1510440094864328e-05, "loss": 0.0507, "step": 7686 }, { "epoch": 1.87, "learning_rate": 1.1506539268605863e-05, "loss": 0.0374, "step": 7688 }, { "epoch": 1.87, "learning_rate": 1.1502638207767712e-05, "loss": 0.0627, "step": 7690 }, { "epoch": 1.87, "learning_rate": 1.1498736912957304e-05, "loss": 0.0582, "step": 7692 }, { "epoch": 1.87, "learning_rate": 1.1494835384782099e-05, "loss": 0.0506, "step": 7694 }, { "epoch": 1.88, "learning_rate": 1.1490933623849594e-05, "loss": 0.0514, "step": 7696 }, { "epoch": 1.88, "learning_rate": 1.148703163076732e-05, "loss": 0.038, "step": 7698 }, { "epoch": 1.88, "learning_rate": 1.1483129406142855e-05, "loss": 0.0517, "step": 7700 }, { "epoch": 1.88, "learning_rate": 1.1479226950583797e-05, "loss": 0.045, "step": 7702 }, { "epoch": 1.88, "learning_rate": 1.1475324264697793e-05, "loss": 0.0536, "step": 7704 }, { "epoch": 1.88, "learning_rate": 1.1471421349092517e-05, "loss": 0.0401, "step": 7706 }, { "epoch": 1.88, "learning_rate": 1.1467518204375692e-05, "loss": 0.0395, "step": 7708 }, { "epoch": 1.88, "learning_rate": 1.1463614831155054e-05, "loss": 0.0255, "step": 7710 }, { "epoch": 1.88, "learning_rate": 1.14597112300384e-05, "loss": 0.0377, "step": 7712 }, { "epoch": 1.88, "learning_rate": 1.145580740163354e-05, "loss": 0.0363, "step": 7714 }, { "epoch": 1.88, "learning_rate": 1.1451903346548343e-05, "loss": 0.0424, "step": 7716 }, { "epoch": 1.88, "learning_rate": 1.1447999065390686e-05, "loss": 0.0533, "step": 7718 }, { "epoch": 1.88, "learning_rate": 1.1444094558768506e-05, "loss": 0.0512, "step": 7720 }, { "epoch": 1.88, "learning_rate": 1.144018982728976e-05, "loss": 0.0423, "step": 7722 }, { "epoch": 1.88, "learning_rate": 1.1436284871562446e-05, "loss": 0.0439, "step": 7724 }, { "epoch": 1.88, "learning_rate": 1.1432379692194593e-05, "loss": 0.0202, "step": 7726 }, { "epoch": 1.88, "learning_rate": 1.1428474289794269e-05, "loss": 0.0439, "step": 7728 }, { "epoch": 1.88, "learning_rate": 1.1424568664969578e-05, "loss": 0.0575, "step": 7730 }, { "epoch": 1.88, "learning_rate": 1.1420662818328649e-05, "loss": 0.0481, "step": 7732 }, { "epoch": 1.88, "learning_rate": 1.1416756750479657e-05, "loss": 0.0439, "step": 7734 }, { "epoch": 1.88, "learning_rate": 1.1412850462030806e-05, "loss": 0.0269, "step": 7736 }, { "epoch": 1.89, "learning_rate": 1.1408943953590335e-05, "loss": 0.0288, "step": 7738 }, { "epoch": 1.89, "learning_rate": 1.1405037225766518e-05, "loss": 0.0515, "step": 7740 }, { "epoch": 1.89, "learning_rate": 1.1401130279167655e-05, "loss": 0.0356, "step": 7742 }, { "epoch": 1.89, "learning_rate": 1.13972231144021e-05, "loss": 0.0463, "step": 7744 }, { "epoch": 1.89, "learning_rate": 1.1393315732078219e-05, "loss": 0.0463, "step": 7746 }, { "epoch": 1.89, "learning_rate": 1.1389408132804426e-05, "loss": 0.036, "step": 7748 }, { "epoch": 1.89, "learning_rate": 1.1385500317189163e-05, "loss": 0.0508, "step": 7750 }, { "epoch": 1.89, "learning_rate": 1.1381592285840903e-05, "loss": 0.0498, "step": 7752 }, { "epoch": 1.89, "learning_rate": 1.137768403936816e-05, "loss": 0.0297, "step": 7754 }, { "epoch": 1.89, "learning_rate": 1.137377557837948e-05, "loss": 0.0464, "step": 7756 }, { "epoch": 1.89, "learning_rate": 1.1369866903483437e-05, "loss": 0.052, "step": 7758 }, { "epoch": 1.89, "learning_rate": 1.136595801528864e-05, "loss": 0.0511, "step": 7760 }, { "epoch": 1.89, "learning_rate": 1.1362048914403736e-05, "loss": 0.0447, "step": 7762 }, { "epoch": 1.89, "learning_rate": 1.1358139601437402e-05, "loss": 0.0638, "step": 7764 }, { "epoch": 1.89, "learning_rate": 1.1354230076998347e-05, "loss": 0.0534, "step": 7766 }, { "epoch": 1.89, "learning_rate": 1.1350320341695314e-05, "loss": 0.0392, "step": 7768 }, { "epoch": 1.89, "learning_rate": 1.1346410396137075e-05, "loss": 0.0487, "step": 7770 }, { "epoch": 1.89, "learning_rate": 1.1342500240932445e-05, "loss": 0.0457, "step": 7772 }, { "epoch": 1.89, "learning_rate": 1.1338589876690261e-05, "loss": 0.0435, "step": 7774 }, { "epoch": 1.89, "learning_rate": 1.1334679304019397e-05, "loss": 0.0728, "step": 7776 }, { "epoch": 1.9, "learning_rate": 1.1330768523528761e-05, "loss": 0.0512, "step": 7778 }, { "epoch": 1.9, "learning_rate": 1.132685753582729e-05, "loss": 0.0334, "step": 7780 }, { "epoch": 1.9, "learning_rate": 1.132294634152395e-05, "loss": 0.055, "step": 7782 }, { "epoch": 1.9, "learning_rate": 1.1319034941227754e-05, "loss": 0.052, "step": 7784 }, { "epoch": 1.9, "learning_rate": 1.131512333554773e-05, "loss": 0.0687, "step": 7786 }, { "epoch": 1.9, "learning_rate": 1.1311211525092947e-05, "loss": 0.0563, "step": 7788 }, { "epoch": 1.9, "learning_rate": 1.13072995104725e-05, "loss": 0.0289, "step": 7790 }, { "epoch": 1.9, "learning_rate": 1.1303387292295524e-05, "loss": 0.0318, "step": 7792 }, { "epoch": 1.9, "learning_rate": 1.1299474871171184e-05, "loss": 0.0357, "step": 7794 }, { "epoch": 1.9, "learning_rate": 1.1295562247708665e-05, "loss": 0.0283, "step": 7796 }, { "epoch": 1.9, "learning_rate": 1.1291649422517196e-05, "loss": 0.0341, "step": 7798 }, { "epoch": 1.9, "learning_rate": 1.1287736396206043e-05, "loss": 0.0473, "step": 7800 }, { "epoch": 1.9, "learning_rate": 1.128382316938448e-05, "loss": 0.0694, "step": 7802 }, { "epoch": 1.9, "learning_rate": 1.1279909742661834e-05, "loss": 0.0444, "step": 7804 }, { "epoch": 1.9, "learning_rate": 1.1275996116647454e-05, "loss": 0.0306, "step": 7806 }, { "epoch": 1.9, "learning_rate": 1.1272082291950723e-05, "loss": 0.0405, "step": 7808 }, { "epoch": 1.9, "learning_rate": 1.1268168269181047e-05, "loss": 0.051, "step": 7810 }, { "epoch": 1.9, "learning_rate": 1.126425404894788e-05, "loss": 0.051, "step": 7812 }, { "epoch": 1.9, "learning_rate": 1.1260339631860688e-05, "loss": 0.0346, "step": 7814 }, { "epoch": 1.9, "learning_rate": 1.125642501852898e-05, "loss": 0.0586, "step": 7816 }, { "epoch": 1.9, "learning_rate": 1.1252510209562284e-05, "loss": 0.0491, "step": 7818 }, { "epoch": 1.91, "learning_rate": 1.1248595205570174e-05, "loss": 0.0461, "step": 7820 }, { "epoch": 1.91, "learning_rate": 1.1244680007162246e-05, "loss": 0.0478, "step": 7822 }, { "epoch": 1.91, "learning_rate": 1.124076461494812e-05, "loss": 0.0301, "step": 7824 }, { "epoch": 1.91, "learning_rate": 1.1236849029537453e-05, "loss": 0.0567, "step": 7826 }, { "epoch": 1.91, "learning_rate": 1.1232933251539941e-05, "loss": 0.0397, "step": 7828 }, { "epoch": 1.91, "learning_rate": 1.1229017281565288e-05, "loss": 0.0522, "step": 7830 }, { "epoch": 1.91, "learning_rate": 1.122510112022325e-05, "loss": 0.0357, "step": 7832 }, { "epoch": 1.91, "learning_rate": 1.1221184768123598e-05, "loss": 0.0452, "step": 7834 }, { "epoch": 1.91, "learning_rate": 1.121726822587614e-05, "loss": 0.0441, "step": 7836 }, { "epoch": 1.91, "learning_rate": 1.121335149409071e-05, "loss": 0.0346, "step": 7838 }, { "epoch": 1.91, "learning_rate": 1.1209434573377176e-05, "loss": 0.0482, "step": 7840 }, { "epoch": 1.91, "learning_rate": 1.120551746434543e-05, "loss": 0.0434, "step": 7842 }, { "epoch": 1.91, "learning_rate": 1.1201600167605397e-05, "loss": 0.0564, "step": 7844 }, { "epoch": 1.91, "learning_rate": 1.1197682683767028e-05, "loss": 0.0537, "step": 7846 }, { "epoch": 1.91, "learning_rate": 1.119376501344031e-05, "loss": 0.0479, "step": 7848 }, { "epoch": 1.91, "learning_rate": 1.1189847157235249e-05, "loss": 0.0449, "step": 7850 }, { "epoch": 1.91, "learning_rate": 1.1185929115761889e-05, "loss": 0.0575, "step": 7852 }, { "epoch": 1.91, "learning_rate": 1.1182010889630295e-05, "loss": 0.0479, "step": 7854 }, { "epoch": 1.91, "learning_rate": 1.117809247945057e-05, "loss": 0.0384, "step": 7856 }, { "epoch": 1.91, "learning_rate": 1.1174173885832835e-05, "loss": 0.0481, "step": 7858 }, { "epoch": 1.92, "learning_rate": 1.1170255109387248e-05, "loss": 0.0357, "step": 7860 }, { "epoch": 1.92, "learning_rate": 1.1166336150723992e-05, "loss": 0.0313, "step": 7862 }, { "epoch": 1.92, "learning_rate": 1.1162417010453281e-05, "loss": 0.0547, "step": 7864 }, { "epoch": 1.92, "learning_rate": 1.1158497689185347e-05, "loss": 0.0415, "step": 7866 }, { "epoch": 1.92, "learning_rate": 1.115457818753047e-05, "loss": 0.0375, "step": 7868 }, { "epoch": 1.92, "learning_rate": 1.1150658506098938e-05, "loss": 0.0392, "step": 7870 }, { "epoch": 1.92, "learning_rate": 1.114673864550108e-05, "loss": 0.0445, "step": 7872 }, { "epoch": 1.92, "learning_rate": 1.1142818606347243e-05, "loss": 0.0353, "step": 7874 }, { "epoch": 1.92, "learning_rate": 1.1138898389247812e-05, "loss": 0.0281, "step": 7876 }, { "epoch": 1.92, "learning_rate": 1.1134977994813191e-05, "loss": 0.0302, "step": 7878 }, { "epoch": 1.92, "learning_rate": 1.1131057423653822e-05, "loss": 0.0335, "step": 7880 }, { "epoch": 1.92, "learning_rate": 1.112713667638016e-05, "loss": 0.0299, "step": 7882 }, { "epoch": 1.92, "learning_rate": 1.11232157536027e-05, "loss": 0.0306, "step": 7884 }, { "epoch": 1.92, "learning_rate": 1.1119294655931956e-05, "loss": 0.0358, "step": 7886 }, { "epoch": 1.92, "learning_rate": 1.1115373383978478e-05, "loss": 0.0605, "step": 7888 }, { "epoch": 1.92, "learning_rate": 1.1111451938352833e-05, "loss": 0.0521, "step": 7890 }, { "epoch": 1.92, "learning_rate": 1.1107530319665625e-05, "loss": 0.0637, "step": 7892 }, { "epoch": 1.92, "learning_rate": 1.1103608528527475e-05, "loss": 0.0534, "step": 7894 }, { "epoch": 1.92, "learning_rate": 1.109968656554904e-05, "loss": 0.0549, "step": 7896 }, { "epoch": 1.92, "learning_rate": 1.1095764431340996e-05, "loss": 0.0276, "step": 7898 }, { "epoch": 1.92, "learning_rate": 1.1091842126514052e-05, "loss": 0.0486, "step": 7900 }, { "epoch": 1.93, "learning_rate": 1.1087919651678938e-05, "loss": 0.0407, "step": 7902 }, { "epoch": 1.93, "learning_rate": 1.1083997007446418e-05, "loss": 0.0588, "step": 7904 }, { "epoch": 1.93, "learning_rate": 1.1080074194427275e-05, "loss": 0.0533, "step": 7906 }, { "epoch": 1.93, "learning_rate": 1.107615121323232e-05, "loss": 0.0421, "step": 7908 }, { "epoch": 1.93, "learning_rate": 1.1072228064472391e-05, "loss": 0.0633, "step": 7910 }, { "epoch": 1.93, "learning_rate": 1.1068304748758356e-05, "loss": 0.0433, "step": 7912 }, { "epoch": 1.93, "learning_rate": 1.10643812667011e-05, "loss": 0.074, "step": 7914 }, { "epoch": 1.93, "learning_rate": 1.1060457618911544e-05, "loss": 0.0198, "step": 7916 }, { "epoch": 1.93, "learning_rate": 1.1056533806000625e-05, "loss": 0.0433, "step": 7918 }, { "epoch": 1.93, "learning_rate": 1.1052609828579318e-05, "loss": 0.0421, "step": 7920 }, { "epoch": 1.93, "learning_rate": 1.1048685687258607e-05, "loss": 0.0387, "step": 7922 }, { "epoch": 1.93, "learning_rate": 1.1044761382649519e-05, "loss": 0.0298, "step": 7924 }, { "epoch": 1.93, "learning_rate": 1.1040836915363093e-05, "loss": 0.0515, "step": 7926 }, { "epoch": 1.93, "learning_rate": 1.1036912286010402e-05, "loss": 0.0444, "step": 7928 }, { "epoch": 1.93, "learning_rate": 1.1032987495202536e-05, "loss": 0.0399, "step": 7930 }, { "epoch": 1.93, "learning_rate": 1.1029062543550619e-05, "loss": 0.055, "step": 7932 }, { "epoch": 1.93, "learning_rate": 1.1025137431665798e-05, "loss": 0.0416, "step": 7934 }, { "epoch": 1.93, "learning_rate": 1.1021212160159238e-05, "loss": 0.0374, "step": 7936 }, { "epoch": 1.93, "learning_rate": 1.1017286729642133e-05, "loss": 0.0323, "step": 7938 }, { "epoch": 1.93, "learning_rate": 1.1013361140725712e-05, "loss": 0.0488, "step": 7940 }, { "epoch": 1.94, "learning_rate": 1.1009435394021208e-05, "loss": 0.0487, "step": 7942 }, { "epoch": 1.94, "learning_rate": 1.1005509490139897e-05, "loss": 0.0384, "step": 7944 }, { "epoch": 1.94, "learning_rate": 1.1001583429693063e-05, "loss": 0.0501, "step": 7946 }, { "epoch": 1.94, "learning_rate": 1.099765721329204e-05, "loss": 0.0436, "step": 7948 }, { "epoch": 1.94, "learning_rate": 1.0993730841548153e-05, "loss": 0.0422, "step": 7950 }, { "epoch": 1.94, "learning_rate": 1.0989804315072779e-05, "loss": 0.038, "step": 7952 }, { "epoch": 1.94, "learning_rate": 1.0985877634477301e-05, "loss": 0.045, "step": 7954 }, { "epoch": 1.94, "learning_rate": 1.098195080037314e-05, "loss": 0.0301, "step": 7956 }, { "epoch": 1.94, "learning_rate": 1.0978023813371728e-05, "loss": 0.0396, "step": 7958 }, { "epoch": 1.94, "learning_rate": 1.0974096674084531e-05, "loss": 0.0443, "step": 7960 }, { "epoch": 1.94, "learning_rate": 1.0970169383123035e-05, "loss": 0.0487, "step": 7962 }, { "epoch": 1.94, "learning_rate": 1.0966241941098745e-05, "loss": 0.0412, "step": 7964 }, { "epoch": 1.94, "learning_rate": 1.0962314348623196e-05, "loss": 0.0621, "step": 7966 }, { "epoch": 1.94, "learning_rate": 1.0958386606307947e-05, "loss": 0.0469, "step": 7968 }, { "epoch": 1.94, "learning_rate": 1.0954458714764573e-05, "loss": 0.0413, "step": 7970 }, { "epoch": 1.94, "learning_rate": 1.095053067460468e-05, "loss": 0.0447, "step": 7972 }, { "epoch": 1.94, "learning_rate": 1.094660248643989e-05, "loss": 0.0226, "step": 7974 }, { "epoch": 1.94, "learning_rate": 1.0942674150881859e-05, "loss": 0.0208, "step": 7976 }, { "epoch": 1.94, "learning_rate": 1.093874566854225e-05, "loss": 0.043, "step": 7978 }, { "epoch": 1.94, "learning_rate": 1.0934817040032763e-05, "loss": 0.0375, "step": 7980 }, { "epoch": 1.94, "learning_rate": 1.0930888265965116e-05, "loss": 0.0679, "step": 7982 }, { "epoch": 1.95, "learning_rate": 1.0926959346951046e-05, "loss": 0.0555, "step": 7984 }, { "epoch": 1.95, "learning_rate": 1.0923030283602318e-05, "loss": 0.0534, "step": 7986 }, { "epoch": 1.95, "learning_rate": 1.0919101076530719e-05, "loss": 0.072, "step": 7988 }, { "epoch": 1.95, "learning_rate": 1.0915171726348053e-05, "loss": 0.034, "step": 7990 }, { "epoch": 1.95, "learning_rate": 1.0911242233666152e-05, "loss": 0.0437, "step": 7992 }, { "epoch": 1.95, "learning_rate": 1.0907312599096864e-05, "loss": 0.0326, "step": 7994 }, { "epoch": 1.95, "learning_rate": 1.0903382823252069e-05, "loss": 0.0421, "step": 7996 }, { "epoch": 1.95, "learning_rate": 1.0899452906743662e-05, "loss": 0.0288, "step": 7998 }, { "epoch": 1.95, "learning_rate": 1.0895522850183557e-05, "loss": 0.0327, "step": 8000 }, { "epoch": 1.95, "learning_rate": 1.0891592654183695e-05, "loss": 0.0469, "step": 8002 }, { "epoch": 1.95, "learning_rate": 1.0887662319356045e-05, "loss": 0.0251, "step": 8004 }, { "epoch": 1.95, "learning_rate": 1.088373184631258e-05, "loss": 0.0282, "step": 8006 }, { "epoch": 1.95, "learning_rate": 1.0879801235665311e-05, "loss": 0.0486, "step": 8008 }, { "epoch": 1.95, "learning_rate": 1.087587048802626e-05, "loss": 0.0395, "step": 8010 }, { "epoch": 1.95, "learning_rate": 1.0871939604007477e-05, "loss": 0.0366, "step": 8012 }, { "epoch": 1.95, "learning_rate": 1.086800858422103e-05, "loss": 0.0515, "step": 8014 }, { "epoch": 1.95, "learning_rate": 1.086407742927901e-05, "loss": 0.0379, "step": 8016 }, { "epoch": 1.95, "learning_rate": 1.0860146139793525e-05, "loss": 0.0424, "step": 8018 }, { "epoch": 1.95, "learning_rate": 1.0856214716376712e-05, "loss": 0.0439, "step": 8020 }, { "epoch": 1.95, "learning_rate": 1.0852283159640718e-05, "loss": 0.0494, "step": 8022 }, { "epoch": 1.96, "learning_rate": 1.084835147019772e-05, "loss": 0.0484, "step": 8024 }, { "epoch": 1.96, "learning_rate": 1.0844419648659912e-05, "loss": 0.0313, "step": 8026 }, { "epoch": 1.96, "learning_rate": 1.0840487695639506e-05, "loss": 0.052, "step": 8028 }, { "epoch": 1.96, "learning_rate": 1.0836555611748739e-05, "loss": 0.0334, "step": 8030 }, { "epoch": 1.96, "learning_rate": 1.0832623397599869e-05, "loss": 0.0577, "step": 8032 }, { "epoch": 1.96, "learning_rate": 1.0828691053805165e-05, "loss": 0.0274, "step": 8034 }, { "epoch": 1.96, "learning_rate": 1.0824758580976929e-05, "loss": 0.0532, "step": 8036 }, { "epoch": 1.96, "learning_rate": 1.0820825979727477e-05, "loss": 0.0234, "step": 8038 }, { "epoch": 1.96, "learning_rate": 1.081689325066914e-05, "loss": 0.05, "step": 8040 }, { "epoch": 1.96, "learning_rate": 1.0812960394414278e-05, "loss": 0.0279, "step": 8042 }, { "epoch": 1.96, "learning_rate": 1.0809027411575267e-05, "loss": 0.0333, "step": 8044 }, { "epoch": 1.96, "learning_rate": 1.0805094302764505e-05, "loss": 0.0405, "step": 8046 }, { "epoch": 1.96, "learning_rate": 1.0801161068594401e-05, "loss": 0.0307, "step": 8048 }, { "epoch": 1.96, "learning_rate": 1.079722770967739e-05, "loss": 0.0375, "step": 8050 }, { "epoch": 1.96, "learning_rate": 1.0793294226625932e-05, "loss": 0.0394, "step": 8052 }, { "epoch": 1.96, "learning_rate": 1.0789360620052496e-05, "loss": 0.0318, "step": 8054 }, { "epoch": 1.96, "learning_rate": 1.0785426890569575e-05, "loss": 0.0292, "step": 8056 }, { "epoch": 1.96, "learning_rate": 1.0781493038789682e-05, "loss": 0.0332, "step": 8058 }, { "epoch": 1.96, "learning_rate": 1.0777559065325347e-05, "loss": 0.0598, "step": 8060 }, { "epoch": 1.96, "learning_rate": 1.0773624970789118e-05, "loss": 0.0342, "step": 8062 }, { "epoch": 1.96, "learning_rate": 1.0769690755793567e-05, "loss": 0.0371, "step": 8064 }, { "epoch": 1.97, "learning_rate": 1.076575642095128e-05, "loss": 0.0434, "step": 8066 }, { "epoch": 1.97, "learning_rate": 1.0761821966874862e-05, "loss": 0.0435, "step": 8068 }, { "epoch": 1.97, "learning_rate": 1.075788739417694e-05, "loss": 0.0477, "step": 8070 }, { "epoch": 1.97, "learning_rate": 1.0753952703470152e-05, "loss": 0.0393, "step": 8072 }, { "epoch": 1.97, "learning_rate": 1.0750017895367165e-05, "loss": 0.0469, "step": 8074 }, { "epoch": 1.97, "learning_rate": 1.0746082970480658e-05, "loss": 0.0254, "step": 8076 }, { "epoch": 1.97, "learning_rate": 1.0742147929423326e-05, "loss": 0.0331, "step": 8078 }, { "epoch": 1.97, "learning_rate": 1.0738212772807883e-05, "loss": 0.0347, "step": 8080 }, { "epoch": 1.97, "learning_rate": 1.0734277501247073e-05, "loss": 0.0454, "step": 8082 }, { "epoch": 1.97, "learning_rate": 1.0730342115353635e-05, "loss": 0.052, "step": 8084 }, { "epoch": 1.97, "learning_rate": 1.072640661574035e-05, "loss": 0.036, "step": 8086 }, { "epoch": 1.97, "learning_rate": 1.0722471003019998e-05, "loss": 0.0569, "step": 8088 }, { "epoch": 1.97, "learning_rate": 1.0718535277805388e-05, "loss": 0.049, "step": 8090 }, { "epoch": 1.97, "learning_rate": 1.071459944070934e-05, "loss": 0.0353, "step": 8092 }, { "epoch": 1.97, "learning_rate": 1.0710663492344693e-05, "loss": 0.0394, "step": 8094 }, { "epoch": 1.97, "learning_rate": 1.0706727433324308e-05, "loss": 0.0485, "step": 8096 }, { "epoch": 1.97, "learning_rate": 1.0702791264261056e-05, "loss": 0.0335, "step": 8098 }, { "epoch": 1.97, "learning_rate": 1.0698854985767831e-05, "loss": 0.0302, "step": 8100 }, { "epoch": 1.97, "learning_rate": 1.069491859845754e-05, "loss": 0.0371, "step": 8102 }, { "epoch": 1.97, "learning_rate": 1.0690982102943113e-05, "loss": 0.0345, "step": 8104 }, { "epoch": 1.98, "learning_rate": 1.0687045499837486e-05, "loss": 0.0313, "step": 8106 }, { "epoch": 1.98, "learning_rate": 1.0683108789753618e-05, "loss": 0.0341, "step": 8108 }, { "epoch": 1.98, "learning_rate": 1.0679171973304494e-05, "loss": 0.0459, "step": 8110 }, { "epoch": 1.98, "learning_rate": 1.0675235051103097e-05, "loss": 0.0301, "step": 8112 }, { "epoch": 1.98, "learning_rate": 1.067129802376244e-05, "loss": 0.0269, "step": 8114 }, { "epoch": 1.98, "learning_rate": 1.0667360891895547e-05, "loss": 0.0452, "step": 8116 }, { "epoch": 1.98, "learning_rate": 1.0663423656115461e-05, "loss": 0.0285, "step": 8118 }, { "epoch": 1.98, "learning_rate": 1.0659486317035237e-05, "loss": 0.0586, "step": 8120 }, { "epoch": 1.98, "learning_rate": 1.0655548875267951e-05, "loss": 0.0384, "step": 8122 }, { "epoch": 1.98, "learning_rate": 1.0651611331426694e-05, "loss": 0.0415, "step": 8124 }, { "epoch": 1.98, "learning_rate": 1.064767368612457e-05, "loss": 0.0362, "step": 8126 }, { "epoch": 1.98, "learning_rate": 1.0643735939974698e-05, "loss": 0.0274, "step": 8128 }, { "epoch": 1.98, "learning_rate": 1.0639798093590221e-05, "loss": 0.0448, "step": 8130 }, { "epoch": 1.98, "learning_rate": 1.0635860147584286e-05, "loss": 0.0221, "step": 8132 }, { "epoch": 1.98, "learning_rate": 1.0631922102570066e-05, "loss": 0.0387, "step": 8134 }, { "epoch": 1.98, "learning_rate": 1.0627983959160739e-05, "loss": 0.0381, "step": 8136 }, { "epoch": 1.98, "learning_rate": 1.0624045717969514e-05, "loss": 0.0307, "step": 8138 }, { "epoch": 1.98, "learning_rate": 1.0620107379609592e-05, "loss": 0.0443, "step": 8140 }, { "epoch": 1.98, "learning_rate": 1.0616168944694212e-05, "loss": 0.0449, "step": 8142 }, { "epoch": 1.98, "learning_rate": 1.0612230413836615e-05, "loss": 0.0474, "step": 8144 }, { "epoch": 1.98, "learning_rate": 1.0608291787650064e-05, "loss": 0.0362, "step": 8146 }, { "epoch": 1.99, "learning_rate": 1.0604353066747825e-05, "loss": 0.0477, "step": 8148 }, { "epoch": 1.99, "learning_rate": 1.0600414251743195e-05, "loss": 0.032, "step": 8150 }, { "epoch": 1.99, "learning_rate": 1.0596475343249478e-05, "loss": 0.0534, "step": 8152 }, { "epoch": 1.99, "learning_rate": 1.0592536341879986e-05, "loss": 0.0288, "step": 8154 }, { "epoch": 1.99, "learning_rate": 1.0588597248248054e-05, "loss": 0.0239, "step": 8156 }, { "epoch": 1.99, "learning_rate": 1.058465806296703e-05, "loss": 0.0279, "step": 8158 }, { "epoch": 1.99, "learning_rate": 1.0580718786650275e-05, "loss": 0.0401, "step": 8160 }, { "epoch": 1.99, "learning_rate": 1.0576779419911165e-05, "loss": 0.0421, "step": 8162 }, { "epoch": 1.99, "learning_rate": 1.0572839963363088e-05, "loss": 0.0413, "step": 8164 }, { "epoch": 1.99, "learning_rate": 1.056890041761945e-05, "loss": 0.0301, "step": 8166 }, { "epoch": 1.99, "learning_rate": 1.0564960783293666e-05, "loss": 0.0297, "step": 8168 }, { "epoch": 1.99, "learning_rate": 1.0561021060999168e-05, "loss": 0.037, "step": 8170 }, { "epoch": 1.99, "learning_rate": 1.05570812513494e-05, "loss": 0.0246, "step": 8172 }, { "epoch": 1.99, "learning_rate": 1.0553141354957823e-05, "loss": 0.0507, "step": 8174 }, { "epoch": 1.99, "learning_rate": 1.0549201372437905e-05, "loss": 0.0384, "step": 8176 }, { "epoch": 1.99, "learning_rate": 1.0545261304403137e-05, "loss": 0.0504, "step": 8178 }, { "epoch": 1.99, "learning_rate": 1.0541321151467012e-05, "loss": 0.0367, "step": 8180 }, { "epoch": 1.99, "learning_rate": 1.0537380914243045e-05, "loss": 0.0399, "step": 8182 }, { "epoch": 1.99, "learning_rate": 1.053344059334476e-05, "loss": 0.0241, "step": 8184 }, { "epoch": 1.99, "learning_rate": 1.0529500189385696e-05, "loss": 0.0202, "step": 8186 }, { "epoch": 2.0, "learning_rate": 1.0525559702979402e-05, "loss": 0.0375, "step": 8188 }, { "epoch": 2.0, "learning_rate": 1.0521619134739447e-05, "loss": 0.0425, "step": 8190 }, { "epoch": 2.0, "learning_rate": 1.05176784852794e-05, "loss": 0.0536, "step": 8192 }, { "epoch": 2.0, "learning_rate": 1.0513737755212856e-05, "loss": 0.0386, "step": 8194 }, { "epoch": 2.0, "learning_rate": 1.0509796945153416e-05, "loss": 0.0288, "step": 8196 }, { "epoch": 2.0, "learning_rate": 1.0505856055714693e-05, "loss": 0.0298, "step": 8198 }, { "epoch": 2.0, "learning_rate": 1.0501915087510314e-05, "loss": 0.0213, "step": 8200 }, { "epoch": 2.0, "learning_rate": 1.049797404115392e-05, "loss": 0.0503, "step": 8202 }, { "epoch": 2.0, "learning_rate": 1.0494032917259159e-05, "loss": 0.0498, "step": 8204 }, { "epoch": 2.0, "learning_rate": 1.0490091716439693e-05, "loss": 0.0372, "step": 8206 }, { "epoch": 2.0, "learning_rate": 1.04861504393092e-05, "loss": 0.0302, "step": 8208 }, { "epoch": 2.0, "learning_rate": 1.0482209086481368e-05, "loss": 0.0288, "step": 8210 }, { "epoch": 2.0, "learning_rate": 1.047826765856989e-05, "loss": 0.0146, "step": 8212 }, { "epoch": 2.0, "learning_rate": 1.0474326156188483e-05, "loss": 0.0127, "step": 8214 }, { "epoch": 2.0, "learning_rate": 1.0470384579950867e-05, "loss": 0.0208, "step": 8216 }, { "epoch": 2.0, "learning_rate": 1.0466442930470775e-05, "loss": 0.0144, "step": 8218 }, { "epoch": 2.0, "learning_rate": 1.0462501208361948e-05, "loss": 0.0075, "step": 8220 }, { "epoch": 2.0, "learning_rate": 1.0458559414238152e-05, "loss": 0.0204, "step": 8222 }, { "epoch": 2.0, "learning_rate": 1.0454617548713147e-05, "loss": 0.0187, "step": 8224 }, { "epoch": 2.0, "learning_rate": 1.045067561240071e-05, "loss": 0.0132, "step": 8226 }, { "epoch": 2.0, "learning_rate": 1.0446733605914636e-05, "loss": 0.0206, "step": 8228 }, { "epoch": 2.01, "learning_rate": 1.0442791529868727e-05, "loss": 0.0268, "step": 8230 }, { "epoch": 2.01, "learning_rate": 1.0438849384876789e-05, "loss": 0.0134, "step": 8232 }, { "epoch": 2.01, "learning_rate": 1.0434907171552644e-05, "loss": 0.014, "step": 8234 }, { "epoch": 2.01, "learning_rate": 1.0430964890510129e-05, "loss": 0.0223, "step": 8236 }, { "epoch": 2.01, "learning_rate": 1.0427022542363082e-05, "loss": 0.0163, "step": 8238 }, { "epoch": 2.01, "learning_rate": 1.0423080127725362e-05, "loss": 0.0229, "step": 8240 }, { "epoch": 2.01, "learning_rate": 1.041913764721083e-05, "loss": 0.0164, "step": 8242 }, { "epoch": 2.01, "learning_rate": 1.0415195101433364e-05, "loss": 0.0242, "step": 8244 }, { "epoch": 2.01, "learning_rate": 1.0411252491006844e-05, "loss": 0.0133, "step": 8246 }, { "epoch": 2.01, "learning_rate": 1.0407309816545166e-05, "loss": 0.0088, "step": 8248 }, { "epoch": 2.01, "learning_rate": 1.0403367078662236e-05, "loss": 0.0196, "step": 8250 }, { "epoch": 2.01, "learning_rate": 1.0399424277971968e-05, "loss": 0.0146, "step": 8252 }, { "epoch": 2.01, "learning_rate": 1.0395481415088284e-05, "loss": 0.0186, "step": 8254 }, { "epoch": 2.01, "learning_rate": 1.039153849062512e-05, "loss": 0.0188, "step": 8256 }, { "epoch": 2.01, "learning_rate": 1.0387595505196421e-05, "loss": 0.0091, "step": 8258 }, { "epoch": 2.01, "learning_rate": 1.0383652459416134e-05, "loss": 0.0209, "step": 8260 }, { "epoch": 2.01, "learning_rate": 1.0379709353898229e-05, "loss": 0.0091, "step": 8262 }, { "epoch": 2.01, "learning_rate": 1.0375766189256673e-05, "loss": 0.014, "step": 8264 }, { "epoch": 2.01, "learning_rate": 1.0371822966105449e-05, "loss": 0.012, "step": 8266 }, { "epoch": 2.01, "learning_rate": 1.0367879685058542e-05, "loss": 0.0188, "step": 8268 }, { "epoch": 2.02, "learning_rate": 1.036393634672996e-05, "loss": 0.0137, "step": 8270 }, { "epoch": 2.02, "learning_rate": 1.0359992951733704e-05, "loss": 0.0175, "step": 8272 }, { "epoch": 2.02, "learning_rate": 1.0356049500683792e-05, "loss": 0.0181, "step": 8274 }, { "epoch": 2.02, "learning_rate": 1.0352105994194248e-05, "loss": 0.0202, "step": 8276 }, { "epoch": 2.02, "learning_rate": 1.0348162432879111e-05, "loss": 0.0104, "step": 8278 }, { "epoch": 2.02, "learning_rate": 1.0344218817352422e-05, "loss": 0.0095, "step": 8280 }, { "epoch": 2.02, "learning_rate": 1.034027514822823e-05, "loss": 0.0235, "step": 8282 }, { "epoch": 2.02, "learning_rate": 1.0336331426120595e-05, "loss": 0.0153, "step": 8284 }, { "epoch": 2.02, "learning_rate": 1.0332387651643587e-05, "loss": 0.0207, "step": 8286 }, { "epoch": 2.02, "learning_rate": 1.0328443825411276e-05, "loss": 0.015, "step": 8288 }, { "epoch": 2.02, "learning_rate": 1.0324499948037753e-05, "loss": 0.0144, "step": 8290 }, { "epoch": 2.02, "learning_rate": 1.0320556020137104e-05, "loss": 0.0096, "step": 8292 }, { "epoch": 2.02, "learning_rate": 1.0316612042323438e-05, "loss": 0.0229, "step": 8294 }, { "epoch": 2.02, "learning_rate": 1.0312668015210848e-05, "loss": 0.0082, "step": 8296 }, { "epoch": 2.02, "learning_rate": 1.030872393941346e-05, "loss": 0.0131, "step": 8298 }, { "epoch": 2.02, "learning_rate": 1.0304779815545396e-05, "loss": 0.0146, "step": 8300 }, { "epoch": 2.02, "learning_rate": 1.0300835644220783e-05, "loss": 0.016, "step": 8302 }, { "epoch": 2.02, "learning_rate": 1.0296891426053759e-05, "loss": 0.0113, "step": 8304 }, { "epoch": 2.02, "learning_rate": 1.029294716165847e-05, "loss": 0.0298, "step": 8306 }, { "epoch": 2.02, "learning_rate": 1.0289002851649068e-05, "loss": 0.0168, "step": 8308 }, { "epoch": 2.02, "learning_rate": 1.0285058496639713e-05, "loss": 0.0171, "step": 8310 }, { "epoch": 2.03, "learning_rate": 1.0281114097244568e-05, "loss": 0.0072, "step": 8312 }, { "epoch": 2.03, "learning_rate": 1.0277169654077813e-05, "loss": 0.0126, "step": 8314 }, { "epoch": 2.03, "learning_rate": 1.0273225167753618e-05, "loss": 0.0237, "step": 8316 }, { "epoch": 2.03, "learning_rate": 1.0269280638886178e-05, "loss": 0.0141, "step": 8318 }, { "epoch": 2.03, "learning_rate": 1.0265336068089682e-05, "loss": 0.015, "step": 8320 }, { "epoch": 2.03, "learning_rate": 1.0261391455978335e-05, "loss": 0.0087, "step": 8322 }, { "epoch": 2.03, "learning_rate": 1.0257446803166336e-05, "loss": 0.0226, "step": 8324 }, { "epoch": 2.03, "learning_rate": 1.02535021102679e-05, "loss": 0.0223, "step": 8326 }, { "epoch": 2.03, "learning_rate": 1.0249557377897251e-05, "loss": 0.0152, "step": 8328 }, { "epoch": 2.03, "learning_rate": 1.0245612606668609e-05, "loss": 0.0081, "step": 8330 }, { "epoch": 2.03, "learning_rate": 1.0241667797196202e-05, "loss": 0.0234, "step": 8332 }, { "epoch": 2.03, "learning_rate": 1.0237722950094275e-05, "loss": 0.0155, "step": 8334 }, { "epoch": 2.03, "learning_rate": 1.0233778065977067e-05, "loss": 0.013, "step": 8336 }, { "epoch": 2.03, "learning_rate": 1.0229833145458825e-05, "loss": 0.0254, "step": 8338 }, { "epoch": 2.03, "learning_rate": 1.0225888189153803e-05, "loss": 0.0144, "step": 8340 }, { "epoch": 2.03, "learning_rate": 1.0221943197676265e-05, "loss": 0.0224, "step": 8342 }, { "epoch": 2.03, "learning_rate": 1.0217998171640475e-05, "loss": 0.0247, "step": 8344 }, { "epoch": 2.03, "learning_rate": 1.0214053111660702e-05, "loss": 0.019, "step": 8346 }, { "epoch": 2.03, "learning_rate": 1.0210108018351222e-05, "loss": 0.0122, "step": 8348 }, { "epoch": 2.03, "learning_rate": 1.020616289232632e-05, "loss": 0.0255, "step": 8350 }, { "epoch": 2.04, "learning_rate": 1.0202217734200273e-05, "loss": 0.0143, "step": 8352 }, { "epoch": 2.04, "learning_rate": 1.0198272544587382e-05, "loss": 0.0308, "step": 8354 }, { "epoch": 2.04, "learning_rate": 1.019432732410194e-05, "loss": 0.0123, "step": 8356 }, { "epoch": 2.04, "learning_rate": 1.019038207335825e-05, "loss": 0.014, "step": 8358 }, { "epoch": 2.04, "learning_rate": 1.0186436792970608e-05, "loss": 0.018, "step": 8360 }, { "epoch": 2.04, "learning_rate": 1.0182491483553339e-05, "loss": 0.0066, "step": 8362 }, { "epoch": 2.04, "learning_rate": 1.0178546145720746e-05, "loss": 0.0208, "step": 8364 }, { "epoch": 2.04, "learning_rate": 1.0174600780087155e-05, "loss": 0.0116, "step": 8366 }, { "epoch": 2.04, "learning_rate": 1.0170655387266888e-05, "loss": 0.0172, "step": 8368 }, { "epoch": 2.04, "learning_rate": 1.016670996787427e-05, "loss": 0.0105, "step": 8370 }, { "epoch": 2.04, "learning_rate": 1.0162764522523638e-05, "loss": 0.0158, "step": 8372 }, { "epoch": 2.04, "learning_rate": 1.0158819051829325e-05, "loss": 0.0128, "step": 8374 }, { "epoch": 2.04, "learning_rate": 1.015487355640567e-05, "loss": 0.008, "step": 8376 }, { "epoch": 2.04, "learning_rate": 1.0150928036867019e-05, "loss": 0.0125, "step": 8378 }, { "epoch": 2.04, "learning_rate": 1.0146982493827717e-05, "loss": 0.014, "step": 8380 }, { "epoch": 2.04, "learning_rate": 1.0143036927902116e-05, "loss": 0.0122, "step": 8382 }, { "epoch": 2.04, "learning_rate": 1.0139091339704574e-05, "loss": 0.0234, "step": 8384 }, { "epoch": 2.04, "learning_rate": 1.0135145729849448e-05, "loss": 0.0164, "step": 8386 }, { "epoch": 2.04, "learning_rate": 1.0131200098951092e-05, "loss": 0.0187, "step": 8388 }, { "epoch": 2.04, "learning_rate": 1.0127254447623885e-05, "loss": 0.0103, "step": 8390 }, { "epoch": 2.04, "learning_rate": 1.0123308776482184e-05, "loss": 0.0194, "step": 8392 }, { "epoch": 2.05, "learning_rate": 1.0119363086140363e-05, "loss": 0.0128, "step": 8394 }, { "epoch": 2.05, "learning_rate": 1.0115417377212797e-05, "loss": 0.0118, "step": 8396 }, { "epoch": 2.05, "learning_rate": 1.0111471650313862e-05, "loss": 0.0204, "step": 8398 }, { "epoch": 2.05, "learning_rate": 1.0107525906057943e-05, "loss": 0.0154, "step": 8400 }, { "epoch": 2.05, "learning_rate": 1.0103580145059415e-05, "loss": 0.0073, "step": 8402 }, { "epoch": 2.05, "learning_rate": 1.0099634367932664e-05, "loss": 0.024, "step": 8404 }, { "epoch": 2.05, "learning_rate": 1.0095688575292087e-05, "loss": 0.0142, "step": 8406 }, { "epoch": 2.05, "learning_rate": 1.009174276775206e-05, "loss": 0.0155, "step": 8408 }, { "epoch": 2.05, "learning_rate": 1.0087796945926984e-05, "loss": 0.0136, "step": 8410 }, { "epoch": 2.05, "learning_rate": 1.0083851110431256e-05, "loss": 0.0253, "step": 8412 }, { "epoch": 2.05, "learning_rate": 1.0079905261879265e-05, "loss": 0.0132, "step": 8414 }, { "epoch": 2.05, "learning_rate": 1.0075959400885413e-05, "loss": 0.0079, "step": 8416 }, { "epoch": 2.05, "learning_rate": 1.0072013528064107e-05, "loss": 0.0107, "step": 8418 }, { "epoch": 2.05, "learning_rate": 1.006806764402974e-05, "loss": 0.0183, "step": 8420 }, { "epoch": 2.05, "learning_rate": 1.006412174939672e-05, "loss": 0.0249, "step": 8422 }, { "epoch": 2.05, "learning_rate": 1.0060175844779454e-05, "loss": 0.0171, "step": 8424 }, { "epoch": 2.05, "learning_rate": 1.0056229930792349e-05, "loss": 0.0104, "step": 8426 }, { "epoch": 2.05, "learning_rate": 1.0052284008049813e-05, "loss": 0.0132, "step": 8428 }, { "epoch": 2.05, "learning_rate": 1.0048338077166259e-05, "loss": 0.0159, "step": 8430 }, { "epoch": 2.05, "learning_rate": 1.0044392138756094e-05, "loss": 0.0111, "step": 8432 }, { "epoch": 2.06, "learning_rate": 1.0040446193433737e-05, "loss": 0.0086, "step": 8434 }, { "epoch": 2.06, "learning_rate": 1.0036500241813596e-05, "loss": 0.0108, "step": 8436 }, { "epoch": 2.06, "learning_rate": 1.003255428451009e-05, "loss": 0.0134, "step": 8438 }, { "epoch": 2.06, "learning_rate": 1.0028608322137632e-05, "loss": 0.0162, "step": 8440 }, { "epoch": 2.06, "learning_rate": 1.002466235531064e-05, "loss": 0.0192, "step": 8442 }, { "epoch": 2.06, "learning_rate": 1.0020716384643527e-05, "loss": 0.0276, "step": 8444 }, { "epoch": 2.06, "learning_rate": 1.0016770410750718e-05, "loss": 0.0091, "step": 8446 }, { "epoch": 2.06, "learning_rate": 1.0012824434246628e-05, "loss": 0.012, "step": 8448 }, { "epoch": 2.06, "learning_rate": 1.0008878455745676e-05, "loss": 0.0144, "step": 8450 }, { "epoch": 2.06, "learning_rate": 1.0004932475862277e-05, "loss": 0.0139, "step": 8452 }, { "epoch": 2.06, "learning_rate": 1.0000986495210858e-05, "loss": 0.0213, "step": 8454 }, { "epoch": 2.06, "learning_rate": 9.997040514405832e-06, "loss": 0.0126, "step": 8456 }, { "epoch": 2.06, "learning_rate": 9.99309453406162e-06, "loss": 0.0117, "step": 8458 }, { "epoch": 2.06, "learning_rate": 9.989148554792645e-06, "loss": 0.0072, "step": 8460 }, { "epoch": 2.06, "learning_rate": 9.98520257721332e-06, "loss": 0.0191, "step": 8462 }, { "epoch": 2.06, "learning_rate": 9.981256601938067e-06, "loss": 0.0095, "step": 8464 }, { "epoch": 2.06, "learning_rate": 9.977310629581303e-06, "loss": 0.0122, "step": 8466 }, { "epoch": 2.06, "learning_rate": 9.973364660757453e-06, "loss": 0.0201, "step": 8468 }, { "epoch": 2.06, "learning_rate": 9.969418696080926e-06, "loss": 0.0141, "step": 8470 }, { "epoch": 2.06, "learning_rate": 9.96547273616614e-06, "loss": 0.0156, "step": 8472 }, { "epoch": 2.06, "learning_rate": 9.961526781627512e-06, "loss": 0.0204, "step": 8474 }, { "epoch": 2.07, "learning_rate": 9.957580833079463e-06, "loss": 0.0163, "step": 8476 }, { "epoch": 2.07, "learning_rate": 9.953634891136401e-06, "loss": 0.013, "step": 8478 }, { "epoch": 2.07, "learning_rate": 9.949688956412739e-06, "loss": 0.0126, "step": 8480 }, { "epoch": 2.07, "learning_rate": 9.945743029522893e-06, "loss": 0.0136, "step": 8482 }, { "epoch": 2.07, "learning_rate": 9.941797111081273e-06, "loss": 0.0182, "step": 8484 }, { "epoch": 2.07, "learning_rate": 9.937851201702285e-06, "loss": 0.0131, "step": 8486 }, { "epoch": 2.07, "learning_rate": 9.933905302000341e-06, "loss": 0.0114, "step": 8488 }, { "epoch": 2.07, "learning_rate": 9.929959412589848e-06, "loss": 0.0122, "step": 8490 }, { "epoch": 2.07, "learning_rate": 9.926013534085212e-06, "loss": 0.0173, "step": 8492 }, { "epoch": 2.07, "learning_rate": 9.922067667100829e-06, "loss": 0.0238, "step": 8494 }, { "epoch": 2.07, "learning_rate": 9.91812181225111e-06, "loss": 0.0137, "step": 8496 }, { "epoch": 2.07, "learning_rate": 9.914175970150449e-06, "loss": 0.0096, "step": 8498 }, { "epoch": 2.07, "learning_rate": 9.910230141413249e-06, "loss": 0.011, "step": 8500 }, { "epoch": 2.07, "learning_rate": 9.9062843266539e-06, "loss": 0.0176, "step": 8502 }, { "epoch": 2.07, "learning_rate": 9.902338526486799e-06, "loss": 0.0095, "step": 8504 }, { "epoch": 2.07, "learning_rate": 9.898392741526333e-06, "loss": 0.0195, "step": 8506 }, { "epoch": 2.07, "learning_rate": 9.8944469723869e-06, "loss": 0.0152, "step": 8508 }, { "epoch": 2.07, "learning_rate": 9.890501219682874e-06, "loss": 0.0199, "step": 8510 }, { "epoch": 2.07, "learning_rate": 9.88655548402865e-06, "loss": 0.0102, "step": 8512 }, { "epoch": 2.07, "learning_rate": 9.882609766038602e-06, "loss": 0.0204, "step": 8514 }, { "epoch": 2.08, "learning_rate": 9.878664066327114e-06, "loss": 0.021, "step": 8516 }, { "epoch": 2.08, "learning_rate": 9.874718385508555e-06, "loss": 0.0066, "step": 8518 }, { "epoch": 2.08, "learning_rate": 9.870772724197304e-06, "loss": 0.0185, "step": 8520 }, { "epoch": 2.08, "learning_rate": 9.866827083007725e-06, "loss": 0.0136, "step": 8522 }, { "epoch": 2.08, "learning_rate": 9.862881462554192e-06, "loss": 0.0178, "step": 8524 }, { "epoch": 2.08, "learning_rate": 9.858935863451062e-06, "loss": 0.02, "step": 8526 }, { "epoch": 2.08, "learning_rate": 9.854990286312695e-06, "loss": 0.0274, "step": 8528 }, { "epoch": 2.08, "learning_rate": 9.851044731753448e-06, "loss": 0.0165, "step": 8530 }, { "epoch": 2.08, "learning_rate": 9.847099200387682e-06, "loss": 0.0165, "step": 8532 }, { "epoch": 2.08, "learning_rate": 9.843153692829734e-06, "loss": 0.0119, "step": 8534 }, { "epoch": 2.08, "learning_rate": 9.839208209693956e-06, "loss": 0.0188, "step": 8536 }, { "epoch": 2.08, "learning_rate": 9.83526275159469e-06, "loss": 0.0159, "step": 8538 }, { "epoch": 2.08, "learning_rate": 9.831317319146277e-06, "loss": 0.015, "step": 8540 }, { "epoch": 2.08, "learning_rate": 9.827371912963042e-06, "loss": 0.0186, "step": 8542 }, { "epoch": 2.08, "learning_rate": 9.82342653365932e-06, "loss": 0.0152, "step": 8544 }, { "epoch": 2.08, "learning_rate": 9.819481181849439e-06, "loss": 0.017, "step": 8546 }, { "epoch": 2.08, "learning_rate": 9.81553585814772e-06, "loss": 0.0277, "step": 8548 }, { "epoch": 2.08, "learning_rate": 9.811590563168475e-06, "loss": 0.0112, "step": 8550 }, { "epoch": 2.08, "learning_rate": 9.807645297526019e-06, "loss": 0.0134, "step": 8552 }, { "epoch": 2.08, "learning_rate": 9.803700061834665e-06, "loss": 0.0073, "step": 8554 }, { "epoch": 2.08, "learning_rate": 9.799754856708713e-06, "loss": 0.0141, "step": 8556 }, { "epoch": 2.09, "learning_rate": 9.795809682762457e-06, "loss": 0.0192, "step": 8558 }, { "epoch": 2.09, "learning_rate": 9.791864540610198e-06, "loss": 0.0137, "step": 8560 }, { "epoch": 2.09, "learning_rate": 9.787919430866218e-06, "loss": 0.02, "step": 8562 }, { "epoch": 2.09, "learning_rate": 9.78397435414481e-06, "loss": 0.0134, "step": 8564 }, { "epoch": 2.09, "learning_rate": 9.780029311060241e-06, "loss": 0.0227, "step": 8566 }, { "epoch": 2.09, "learning_rate": 9.776084302226794e-06, "loss": 0.0155, "step": 8568 }, { "epoch": 2.09, "learning_rate": 9.77213932825873e-06, "loss": 0.0134, "step": 8570 }, { "epoch": 2.09, "learning_rate": 9.76819438977032e-06, "loss": 0.0065, "step": 8572 }, { "epoch": 2.09, "learning_rate": 9.764249487375812e-06, "loss": 0.0187, "step": 8574 }, { "epoch": 2.09, "learning_rate": 9.760304621689462e-06, "loss": 0.0065, "step": 8576 }, { "epoch": 2.09, "learning_rate": 9.756359793325515e-06, "loss": 0.0164, "step": 8578 }, { "epoch": 2.09, "learning_rate": 9.752415002898214e-06, "loss": 0.0157, "step": 8580 }, { "epoch": 2.09, "learning_rate": 9.74847025102179e-06, "loss": 0.016, "step": 8582 }, { "epoch": 2.09, "learning_rate": 9.744525538310468e-06, "loss": 0.0155, "step": 8584 }, { "epoch": 2.09, "learning_rate": 9.740580865378478e-06, "loss": 0.0143, "step": 8586 }, { "epoch": 2.09, "learning_rate": 9.736636232840026e-06, "loss": 0.016, "step": 8588 }, { "epoch": 2.09, "learning_rate": 9.732691641309328e-06, "loss": 0.0172, "step": 8590 }, { "epoch": 2.09, "learning_rate": 9.728747091400585e-06, "loss": 0.0132, "step": 8592 }, { "epoch": 2.09, "learning_rate": 9.724802583727996e-06, "loss": 0.0129, "step": 8594 }, { "epoch": 2.09, "learning_rate": 9.720858118905747e-06, "loss": 0.0088, "step": 8596 }, { "epoch": 2.1, "learning_rate": 9.716913697548025e-06, "loss": 0.0079, "step": 8598 }, { "epoch": 2.1, "learning_rate": 9.712969320269002e-06, "loss": 0.0179, "step": 8600 }, { "epoch": 2.1, "learning_rate": 9.709024987682855e-06, "loss": 0.0148, "step": 8602 }, { "epoch": 2.1, "learning_rate": 9.705080700403741e-06, "loss": 0.0107, "step": 8604 }, { "epoch": 2.1, "learning_rate": 9.701136459045814e-06, "loss": 0.0142, "step": 8606 }, { "epoch": 2.1, "learning_rate": 9.697192264223226e-06, "loss": 0.0241, "step": 8608 }, { "epoch": 2.1, "learning_rate": 9.693248116550123e-06, "loss": 0.0194, "step": 8610 }, { "epoch": 2.1, "learning_rate": 9.689304016640631e-06, "loss": 0.0123, "step": 8612 }, { "epoch": 2.1, "learning_rate": 9.685359965108878e-06, "loss": 0.0185, "step": 8614 }, { "epoch": 2.1, "learning_rate": 9.681415962568986e-06, "loss": 0.0157, "step": 8616 }, { "epoch": 2.1, "learning_rate": 9.677472009635064e-06, "loss": 0.0108, "step": 8618 }, { "epoch": 2.1, "learning_rate": 9.673528106921218e-06, "loss": 0.0156, "step": 8620 }, { "epoch": 2.1, "learning_rate": 9.669584255041538e-06, "loss": 0.011, "step": 8622 }, { "epoch": 2.1, "learning_rate": 9.66564045461012e-06, "loss": 0.0096, "step": 8624 }, { "epoch": 2.1, "learning_rate": 9.661696706241042e-06, "loss": 0.0235, "step": 8626 }, { "epoch": 2.1, "learning_rate": 9.657753010548368e-06, "loss": 0.0199, "step": 8628 }, { "epoch": 2.1, "learning_rate": 9.653809368146169e-06, "loss": 0.0106, "step": 8630 }, { "epoch": 2.1, "learning_rate": 9.6498657796485e-06, "loss": 0.0185, "step": 8632 }, { "epoch": 2.1, "learning_rate": 9.645922245669408e-06, "loss": 0.0255, "step": 8634 }, { "epoch": 2.1, "learning_rate": 9.641978766822925e-06, "loss": 0.0211, "step": 8636 }, { "epoch": 2.1, "learning_rate": 9.63803534372309e-06, "loss": 0.0173, "step": 8638 }, { "epoch": 2.11, "learning_rate": 9.634091976983916e-06, "loss": 0.0089, "step": 8640 }, { "epoch": 2.11, "learning_rate": 9.630148667219424e-06, "loss": 0.012, "step": 8642 }, { "epoch": 2.11, "learning_rate": 9.62620541504361e-06, "loss": 0.0126, "step": 8644 }, { "epoch": 2.11, "learning_rate": 9.62226222107047e-06, "loss": 0.0112, "step": 8646 }, { "epoch": 2.11, "learning_rate": 9.61831908591399e-06, "loss": 0.0138, "step": 8648 }, { "epoch": 2.11, "learning_rate": 9.614376010188151e-06, "loss": 0.0267, "step": 8650 }, { "epoch": 2.11, "learning_rate": 9.610432994506912e-06, "loss": 0.0218, "step": 8652 }, { "epoch": 2.11, "learning_rate": 9.606490039484237e-06, "loss": 0.013, "step": 8654 }, { "epoch": 2.11, "learning_rate": 9.602547145734069e-06, "loss": 0.0159, "step": 8656 }, { "epoch": 2.11, "learning_rate": 9.598604313870355e-06, "loss": 0.0112, "step": 8658 }, { "epoch": 2.11, "learning_rate": 9.594661544507015e-06, "loss": 0.0212, "step": 8660 }, { "epoch": 2.11, "learning_rate": 9.590718838257973e-06, "loss": 0.013, "step": 8662 }, { "epoch": 2.11, "learning_rate": 9.586776195737133e-06, "loss": 0.0105, "step": 8664 }, { "epoch": 2.11, "learning_rate": 9.582833617558406e-06, "loss": 0.0126, "step": 8666 }, { "epoch": 2.11, "learning_rate": 9.578891104335673e-06, "loss": 0.0173, "step": 8668 }, { "epoch": 2.11, "learning_rate": 9.574948656682813e-06, "loss": 0.0141, "step": 8670 }, { "epoch": 2.11, "learning_rate": 9.571006275213695e-06, "loss": 0.018, "step": 8672 }, { "epoch": 2.11, "learning_rate": 9.567063960542187e-06, "loss": 0.0111, "step": 8674 }, { "epoch": 2.11, "learning_rate": 9.563121713282126e-06, "loss": 0.0166, "step": 8676 }, { "epoch": 2.11, "learning_rate": 9.559179534047355e-06, "loss": 0.012, "step": 8678 }, { "epoch": 2.12, "learning_rate": 9.555237423451704e-06, "loss": 0.0143, "step": 8680 }, { "epoch": 2.12, "learning_rate": 9.551295382108988e-06, "loss": 0.0153, "step": 8682 }, { "epoch": 2.12, "learning_rate": 9.547353410633009e-06, "loss": 0.0134, "step": 8684 }, { "epoch": 2.12, "learning_rate": 9.543411509637564e-06, "loss": 0.0213, "step": 8686 }, { "epoch": 2.12, "learning_rate": 9.53946967973644e-06, "loss": 0.012, "step": 8688 }, { "epoch": 2.12, "learning_rate": 9.535527921543411e-06, "loss": 0.0155, "step": 8690 }, { "epoch": 2.12, "learning_rate": 9.531586235672232e-06, "loss": 0.0325, "step": 8692 }, { "epoch": 2.12, "learning_rate": 9.52764462273666e-06, "loss": 0.0147, "step": 8694 }, { "epoch": 2.12, "learning_rate": 9.52370308335043e-06, "loss": 0.0074, "step": 8696 }, { "epoch": 2.12, "learning_rate": 9.519761618127277e-06, "loss": 0.0156, "step": 8698 }, { "epoch": 2.12, "learning_rate": 9.515820227680909e-06, "loss": 0.0173, "step": 8700 }, { "epoch": 2.12, "learning_rate": 9.511878912625035e-06, "loss": 0.0114, "step": 8702 }, { "epoch": 2.12, "learning_rate": 9.507937673573343e-06, "loss": 0.0156, "step": 8704 }, { "epoch": 2.12, "learning_rate": 9.503996511139523e-06, "loss": 0.017, "step": 8706 }, { "epoch": 2.12, "learning_rate": 9.500055425937235e-06, "loss": 0.0145, "step": 8708 }, { "epoch": 2.12, "learning_rate": 9.496114418580144e-06, "loss": 0.0165, "step": 8710 }, { "epoch": 2.12, "learning_rate": 9.492173489681888e-06, "loss": 0.0136, "step": 8712 }, { "epoch": 2.12, "learning_rate": 9.488232639856106e-06, "loss": 0.0187, "step": 8714 }, { "epoch": 2.12, "learning_rate": 9.484291869716414e-06, "loss": 0.018, "step": 8716 }, { "epoch": 2.12, "learning_rate": 9.480351179876417e-06, "loss": 0.0174, "step": 8718 }, { "epoch": 2.12, "learning_rate": 9.476410570949718e-06, "loss": 0.0121, "step": 8720 }, { "epoch": 2.13, "learning_rate": 9.4724700435499e-06, "loss": 0.0255, "step": 8722 }, { "epoch": 2.13, "learning_rate": 9.468529598290526e-06, "loss": 0.0123, "step": 8724 }, { "epoch": 2.13, "learning_rate": 9.464589235785157e-06, "loss": 0.0186, "step": 8726 }, { "epoch": 2.13, "learning_rate": 9.460648956647339e-06, "loss": 0.0174, "step": 8728 }, { "epoch": 2.13, "learning_rate": 9.456708761490606e-06, "loss": 0.0144, "step": 8730 }, { "epoch": 2.13, "learning_rate": 9.452768650928472e-06, "loss": 0.0173, "step": 8732 }, { "epoch": 2.13, "learning_rate": 9.44882862557444e-06, "loss": 0.0058, "step": 8734 }, { "epoch": 2.13, "learning_rate": 9.444888686042007e-06, "loss": 0.017, "step": 8736 }, { "epoch": 2.13, "learning_rate": 9.440948832944654e-06, "loss": 0.0162, "step": 8738 }, { "epoch": 2.13, "learning_rate": 9.437009066895837e-06, "loss": 0.0137, "step": 8740 }, { "epoch": 2.13, "learning_rate": 9.433069388509015e-06, "loss": 0.0128, "step": 8742 }, { "epoch": 2.13, "learning_rate": 9.429129798397625e-06, "loss": 0.0152, "step": 8744 }, { "epoch": 2.13, "learning_rate": 9.425190297175096e-06, "loss": 0.0218, "step": 8746 }, { "epoch": 2.13, "learning_rate": 9.421250885454825e-06, "loss": 0.012, "step": 8748 }, { "epoch": 2.13, "learning_rate": 9.417311563850218e-06, "loss": 0.0131, "step": 8750 }, { "epoch": 2.13, "learning_rate": 9.41337233297466e-06, "loss": 0.0191, "step": 8752 }, { "epoch": 2.13, "learning_rate": 9.409433193441516e-06, "loss": 0.0172, "step": 8754 }, { "epoch": 2.13, "learning_rate": 9.405494145864134e-06, "loss": 0.014, "step": 8756 }, { "epoch": 2.13, "learning_rate": 9.401555190855863e-06, "loss": 0.013, "step": 8758 }, { "epoch": 2.13, "learning_rate": 9.397616329030023e-06, "loss": 0.0126, "step": 8760 }, { "epoch": 2.13, "learning_rate": 9.39367756099993e-06, "loss": 0.0152, "step": 8762 }, { "epoch": 2.14, "learning_rate": 9.389738887378873e-06, "loss": 0.0269, "step": 8764 }, { "epoch": 2.14, "learning_rate": 9.385800308780141e-06, "loss": 0.0092, "step": 8766 }, { "epoch": 2.14, "learning_rate": 9.381861825816995e-06, "loss": 0.012, "step": 8768 }, { "epoch": 2.14, "learning_rate": 9.377923439102693e-06, "loss": 0.015, "step": 8770 }, { "epoch": 2.14, "learning_rate": 9.373985149250464e-06, "loss": 0.0181, "step": 8772 }, { "epoch": 2.14, "learning_rate": 9.370046956873536e-06, "loss": 0.0339, "step": 8774 }, { "epoch": 2.14, "learning_rate": 9.366108862585113e-06, "loss": 0.0125, "step": 8776 }, { "epoch": 2.14, "learning_rate": 9.362170866998391e-06, "loss": 0.0288, "step": 8778 }, { "epoch": 2.14, "learning_rate": 9.358232970726541e-06, "loss": 0.0241, "step": 8780 }, { "epoch": 2.14, "learning_rate": 9.354295174382721e-06, "loss": 0.006, "step": 8782 }, { "epoch": 2.14, "learning_rate": 9.350357478580082e-06, "loss": 0.0203, "step": 8784 }, { "epoch": 2.14, "learning_rate": 9.346419883931752e-06, "loss": 0.0129, "step": 8786 }, { "epoch": 2.14, "learning_rate": 9.342482391050844e-06, "loss": 0.0073, "step": 8788 }, { "epoch": 2.14, "learning_rate": 9.338545000550453e-06, "loss": 0.0185, "step": 8790 }, { "epoch": 2.14, "learning_rate": 9.334607713043666e-06, "loss": 0.0166, "step": 8792 }, { "epoch": 2.14, "learning_rate": 9.330670529143545e-06, "loss": 0.0145, "step": 8794 }, { "epoch": 2.14, "learning_rate": 9.326733449463143e-06, "loss": 0.0123, "step": 8796 }, { "epoch": 2.14, "learning_rate": 9.322796474615487e-06, "loss": 0.013, "step": 8798 }, { "epoch": 2.14, "learning_rate": 9.3188596052136e-06, "loss": 0.0127, "step": 8800 }, { "epoch": 2.14, "learning_rate": 9.314922841870484e-06, "loss": 0.0127, "step": 8802 }, { "epoch": 2.15, "learning_rate": 9.310986185199115e-06, "loss": 0.0096, "step": 8804 }, { "epoch": 2.15, "learning_rate": 9.307049635812466e-06, "loss": 0.0285, "step": 8806 }, { "epoch": 2.15, "learning_rate": 9.303113194323491e-06, "loss": 0.0219, "step": 8808 }, { "epoch": 2.15, "learning_rate": 9.29917686134512e-06, "loss": 0.0225, "step": 8810 }, { "epoch": 2.15, "learning_rate": 9.295240637490269e-06, "loss": 0.025, "step": 8812 }, { "epoch": 2.15, "learning_rate": 9.29130452337184e-06, "loss": 0.0174, "step": 8814 }, { "epoch": 2.15, "learning_rate": 9.287368519602715e-06, "loss": 0.0128, "step": 8816 }, { "epoch": 2.15, "learning_rate": 9.283432626795764e-06, "loss": 0.0231, "step": 8818 }, { "epoch": 2.15, "learning_rate": 9.279496845563828e-06, "loss": 0.0112, "step": 8820 }, { "epoch": 2.15, "learning_rate": 9.275561176519747e-06, "loss": 0.0259, "step": 8822 }, { "epoch": 2.15, "learning_rate": 9.271625620276326e-06, "loss": 0.019, "step": 8824 }, { "epoch": 2.15, "learning_rate": 9.26769017744637e-06, "loss": 0.0207, "step": 8826 }, { "epoch": 2.15, "learning_rate": 9.263754848642649e-06, "loss": 0.0091, "step": 8828 }, { "epoch": 2.15, "learning_rate": 9.25981963447793e-06, "loss": 0.0089, "step": 8830 }, { "epoch": 2.15, "learning_rate": 9.255884535564952e-06, "loss": 0.0198, "step": 8832 }, { "epoch": 2.15, "learning_rate": 9.251949552516447e-06, "loss": 0.0227, "step": 8834 }, { "epoch": 2.15, "learning_rate": 9.248014685945113e-06, "loss": 0.012, "step": 8836 }, { "epoch": 2.15, "learning_rate": 9.24407993646364e-06, "loss": 0.0074, "step": 8838 }, { "epoch": 2.15, "learning_rate": 9.240145304684701e-06, "loss": 0.0172, "step": 8840 }, { "epoch": 2.15, "learning_rate": 9.236210791220956e-06, "loss": 0.0239, "step": 8842 }, { "epoch": 2.15, "learning_rate": 9.232276396685025e-06, "loss": 0.0112, "step": 8844 }, { "epoch": 2.16, "learning_rate": 9.228342121689531e-06, "loss": 0.0156, "step": 8846 }, { "epoch": 2.16, "learning_rate": 9.224407966847067e-06, "loss": 0.0159, "step": 8848 }, { "epoch": 2.16, "learning_rate": 9.220473932770217e-06, "loss": 0.0163, "step": 8850 }, { "epoch": 2.16, "learning_rate": 9.216540020071537e-06, "loss": 0.0195, "step": 8852 }, { "epoch": 2.16, "learning_rate": 9.212606229363563e-06, "loss": 0.0209, "step": 8854 }, { "epoch": 2.16, "learning_rate": 9.20867256125882e-06, "loss": 0.0172, "step": 8856 }, { "epoch": 2.16, "learning_rate": 9.204739016369817e-06, "loss": 0.0261, "step": 8858 }, { "epoch": 2.16, "learning_rate": 9.200805595309023e-06, "loss": 0.0099, "step": 8860 }, { "epoch": 2.16, "learning_rate": 9.196872298688908e-06, "loss": 0.0131, "step": 8862 }, { "epoch": 2.16, "learning_rate": 9.192939127121922e-06, "loss": 0.0156, "step": 8864 }, { "epoch": 2.16, "learning_rate": 9.189006081220486e-06, "loss": 0.0161, "step": 8866 }, { "epoch": 2.16, "learning_rate": 9.185073161597e-06, "loss": 0.0178, "step": 8868 }, { "epoch": 2.16, "learning_rate": 9.181140368863857e-06, "loss": 0.0088, "step": 8870 }, { "epoch": 2.16, "learning_rate": 9.177207703633417e-06, "loss": 0.0244, "step": 8872 }, { "epoch": 2.16, "learning_rate": 9.173275166518035e-06, "loss": 0.014, "step": 8874 }, { "epoch": 2.16, "learning_rate": 9.169342758130026e-06, "loss": 0.0125, "step": 8876 }, { "epoch": 2.16, "learning_rate": 9.165410479081704e-06, "loss": 0.014, "step": 8878 }, { "epoch": 2.16, "learning_rate": 9.161478329985349e-06, "loss": 0.0164, "step": 8880 }, { "epoch": 2.16, "learning_rate": 9.157546311453235e-06, "loss": 0.0166, "step": 8882 }, { "epoch": 2.16, "learning_rate": 9.153614424097597e-06, "loss": 0.0131, "step": 8884 }, { "epoch": 2.17, "learning_rate": 9.14968266853067e-06, "loss": 0.0255, "step": 8886 }, { "epoch": 2.17, "learning_rate": 9.145751045364648e-06, "loss": 0.0166, "step": 8888 }, { "epoch": 2.17, "learning_rate": 9.141819555211728e-06, "loss": 0.0094, "step": 8890 }, { "epoch": 2.17, "learning_rate": 9.137888198684061e-06, "loss": 0.0128, "step": 8892 }, { "epoch": 2.17, "learning_rate": 9.133956976393791e-06, "loss": 0.0218, "step": 8894 }, { "epoch": 2.17, "learning_rate": 9.130025888953044e-06, "loss": 0.0141, "step": 8896 }, { "epoch": 2.17, "learning_rate": 9.126094936973922e-06, "loss": 0.0085, "step": 8898 }, { "epoch": 2.17, "learning_rate": 9.122164121068498e-06, "loss": 0.0211, "step": 8900 }, { "epoch": 2.17, "learning_rate": 9.118233441848832e-06, "loss": 0.0129, "step": 8902 }, { "epoch": 2.17, "learning_rate": 9.11430289992696e-06, "loss": 0.0174, "step": 8904 }, { "epoch": 2.17, "learning_rate": 9.110372495914906e-06, "loss": 0.0088, "step": 8906 }, { "epoch": 2.17, "learning_rate": 9.106442230424654e-06, "loss": 0.0166, "step": 8908 }, { "epoch": 2.17, "learning_rate": 9.102512104068175e-06, "loss": 0.0204, "step": 8910 }, { "epoch": 2.17, "learning_rate": 9.098582117457429e-06, "loss": 0.0136, "step": 8912 }, { "epoch": 2.17, "learning_rate": 9.09465227120434e-06, "loss": 0.0101, "step": 8914 }, { "epoch": 2.17, "learning_rate": 9.090722565920813e-06, "loss": 0.0125, "step": 8916 }, { "epoch": 2.17, "learning_rate": 9.086793002218735e-06, "loss": 0.0211, "step": 8918 }, { "epoch": 2.17, "learning_rate": 9.08286358070997e-06, "loss": 0.0104, "step": 8920 }, { "epoch": 2.17, "learning_rate": 9.078934302006363e-06, "loss": 0.0131, "step": 8922 }, { "epoch": 2.17, "learning_rate": 9.075005166719724e-06, "loss": 0.0114, "step": 8924 }, { "epoch": 2.17, "learning_rate": 9.071076175461851e-06, "loss": 0.0162, "step": 8926 }, { "epoch": 2.18, "learning_rate": 9.067147328844526e-06, "loss": 0.0186, "step": 8928 }, { "epoch": 2.18, "learning_rate": 9.063218627479494e-06, "loss": 0.0189, "step": 8930 }, { "epoch": 2.18, "learning_rate": 9.059290071978482e-06, "loss": 0.0134, "step": 8932 }, { "epoch": 2.18, "learning_rate": 9.055361662953202e-06, "loss": 0.0118, "step": 8934 }, { "epoch": 2.18, "learning_rate": 9.051433401015333e-06, "loss": 0.0183, "step": 8936 }, { "epoch": 2.18, "learning_rate": 9.04750528677654e-06, "loss": 0.0157, "step": 8938 }, { "epoch": 2.18, "learning_rate": 9.043577320848452e-06, "loss": 0.0164, "step": 8940 }, { "epoch": 2.18, "learning_rate": 9.039649503842691e-06, "loss": 0.0108, "step": 8942 }, { "epoch": 2.18, "learning_rate": 9.035721836370845e-06, "loss": 0.01, "step": 8944 }, { "epoch": 2.18, "learning_rate": 9.031794319044486e-06, "loss": 0.0088, "step": 8946 }, { "epoch": 2.18, "learning_rate": 9.02786695247515e-06, "loss": 0.015, "step": 8948 }, { "epoch": 2.18, "learning_rate": 9.023939737274366e-06, "loss": 0.0224, "step": 8950 }, { "epoch": 2.18, "learning_rate": 9.020012674053627e-06, "loss": 0.0158, "step": 8952 }, { "epoch": 2.18, "learning_rate": 9.016085763424411e-06, "loss": 0.0248, "step": 8954 }, { "epoch": 2.18, "learning_rate": 9.012159005998165e-06, "loss": 0.0136, "step": 8956 }, { "epoch": 2.18, "learning_rate": 9.00823240238631e-06, "loss": 0.0182, "step": 8958 }, { "epoch": 2.18, "learning_rate": 9.004305953200258e-06, "loss": 0.0144, "step": 8960 }, { "epoch": 2.18, "learning_rate": 9.000379659051383e-06, "loss": 0.0132, "step": 8962 }, { "epoch": 2.18, "learning_rate": 8.99645352055104e-06, "loss": 0.0215, "step": 8964 }, { "epoch": 2.18, "learning_rate": 8.992527538310554e-06, "loss": 0.0095, "step": 8966 }, { "epoch": 2.19, "learning_rate": 8.988601712941237e-06, "loss": 0.0113, "step": 8968 }, { "epoch": 2.19, "learning_rate": 8.984676045054365e-06, "loss": 0.0118, "step": 8970 }, { "epoch": 2.19, "learning_rate": 8.980750535261199e-06, "loss": 0.0148, "step": 8972 }, { "epoch": 2.19, "learning_rate": 8.976825184172966e-06, "loss": 0.0191, "step": 8974 }, { "epoch": 2.19, "learning_rate": 8.972899992400878e-06, "loss": 0.0156, "step": 8976 }, { "epoch": 2.19, "learning_rate": 8.968974960556117e-06, "loss": 0.0088, "step": 8978 }, { "epoch": 2.19, "learning_rate": 8.965050089249832e-06, "loss": 0.0155, "step": 8980 }, { "epoch": 2.19, "learning_rate": 8.961125379093165e-06, "loss": 0.0198, "step": 8982 }, { "epoch": 2.19, "learning_rate": 8.95720083069722e-06, "loss": 0.0202, "step": 8984 }, { "epoch": 2.19, "learning_rate": 8.953276444673084e-06, "loss": 0.0103, "step": 8986 }, { "epoch": 2.19, "learning_rate": 8.949352221631805e-06, "loss": 0.0183, "step": 8988 }, { "epoch": 2.19, "learning_rate": 8.94542816218442e-06, "loss": 0.0138, "step": 8990 }, { "epoch": 2.19, "learning_rate": 8.941504266941932e-06, "loss": 0.011, "step": 8992 }, { "epoch": 2.19, "learning_rate": 8.937580536515328e-06, "loss": 0.0115, "step": 8994 }, { "epoch": 2.19, "learning_rate": 8.933656971515554e-06, "loss": 0.0231, "step": 8996 }, { "epoch": 2.19, "learning_rate": 8.929733572553546e-06, "loss": 0.0164, "step": 8998 }, { "epoch": 2.19, "learning_rate": 8.925810340240203e-06, "loss": 0.0102, "step": 9000 }, { "epoch": 2.19, "learning_rate": 8.921887275186408e-06, "loss": 0.016, "step": 9002 }, { "epoch": 2.19, "learning_rate": 8.917964378003003e-06, "loss": 0.0266, "step": 9004 }, { "epoch": 2.19, "learning_rate": 8.914041649300822e-06, "loss": 0.0162, "step": 9006 }, { "epoch": 2.19, "learning_rate": 8.910119089690657e-06, "loss": 0.0125, "step": 9008 }, { "epoch": 2.2, "learning_rate": 8.906196699783286e-06, "loss": 0.0257, "step": 9010 }, { "epoch": 2.2, "learning_rate": 8.902274480189454e-06, "loss": 0.0151, "step": 9012 }, { "epoch": 2.2, "learning_rate": 8.898352431519876e-06, "loss": 0.009, "step": 9014 }, { "epoch": 2.2, "learning_rate": 8.894430554385249e-06, "loss": 0.0126, "step": 9016 }, { "epoch": 2.2, "learning_rate": 8.890508849396243e-06, "loss": 0.0139, "step": 9018 }, { "epoch": 2.2, "learning_rate": 8.88658731716349e-06, "loss": 0.0221, "step": 9020 }, { "epoch": 2.2, "learning_rate": 8.882665958297603e-06, "loss": 0.0121, "step": 9022 }, { "epoch": 2.2, "learning_rate": 8.878744773409171e-06, "loss": 0.0136, "step": 9024 }, { "epoch": 2.2, "learning_rate": 8.874823763108755e-06, "loss": 0.0203, "step": 9026 }, { "epoch": 2.2, "learning_rate": 8.870902928006882e-06, "loss": 0.0075, "step": 9028 }, { "epoch": 2.2, "learning_rate": 8.866982268714054e-06, "loss": 0.0207, "step": 9030 }, { "epoch": 2.2, "learning_rate": 8.863061785840753e-06, "loss": 0.0118, "step": 9032 }, { "epoch": 2.2, "learning_rate": 8.859141479997427e-06, "loss": 0.0199, "step": 9034 }, { "epoch": 2.2, "learning_rate": 8.855221351794493e-06, "loss": 0.0201, "step": 9036 }, { "epoch": 2.2, "learning_rate": 8.851301401842348e-06, "loss": 0.0138, "step": 9038 }, { "epoch": 2.2, "learning_rate": 8.84738163075136e-06, "loss": 0.024, "step": 9040 }, { "epoch": 2.2, "learning_rate": 8.843462039131869e-06, "loss": 0.0081, "step": 9042 }, { "epoch": 2.2, "learning_rate": 8.839542627594177e-06, "loss": 0.0243, "step": 9044 }, { "epoch": 2.2, "learning_rate": 8.835623396748574e-06, "loss": 0.0228, "step": 9046 }, { "epoch": 2.2, "learning_rate": 8.83170434720531e-06, "loss": 0.0102, "step": 9048 }, { "epoch": 2.21, "learning_rate": 8.827785479574616e-06, "loss": 0.0072, "step": 9050 }, { "epoch": 2.21, "learning_rate": 8.823866794466683e-06, "loss": 0.0144, "step": 9052 }, { "epoch": 2.21, "learning_rate": 8.819948292491686e-06, "loss": 0.0083, "step": 9054 }, { "epoch": 2.21, "learning_rate": 8.81602997425976e-06, "loss": 0.0149, "step": 9056 }, { "epoch": 2.21, "learning_rate": 8.812111840381025e-06, "loss": 0.005, "step": 9058 }, { "epoch": 2.21, "learning_rate": 8.808193891465555e-06, "loss": 0.0089, "step": 9060 }, { "epoch": 2.21, "learning_rate": 8.804276128123412e-06, "loss": 0.0104, "step": 9062 }, { "epoch": 2.21, "learning_rate": 8.800358550964618e-06, "loss": 0.0129, "step": 9064 }, { "epoch": 2.21, "learning_rate": 8.796441160599175e-06, "loss": 0.0119, "step": 9066 }, { "epoch": 2.21, "learning_rate": 8.792523957637043e-06, "loss": 0.0098, "step": 9068 }, { "epoch": 2.21, "learning_rate": 8.788606942688163e-06, "loss": 0.0156, "step": 9070 }, { "epoch": 2.21, "learning_rate": 8.784690116362444e-06, "loss": 0.0075, "step": 9072 }, { "epoch": 2.21, "learning_rate": 8.780773479269773e-06, "loss": 0.0176, "step": 9074 }, { "epoch": 2.21, "learning_rate": 8.776857032019991e-06, "loss": 0.017, "step": 9076 }, { "epoch": 2.21, "learning_rate": 8.772940775222918e-06, "loss": 0.0137, "step": 9078 }, { "epoch": 2.21, "learning_rate": 8.769024709488353e-06, "loss": 0.0114, "step": 9080 }, { "epoch": 2.21, "learning_rate": 8.765108835426057e-06, "loss": 0.0167, "step": 9082 }, { "epoch": 2.21, "learning_rate": 8.761193153645753e-06, "loss": 0.014, "step": 9084 }, { "epoch": 2.21, "learning_rate": 8.757277664757148e-06, "loss": 0.0091, "step": 9086 }, { "epoch": 2.21, "learning_rate": 8.753362369369915e-06, "loss": 0.0123, "step": 9088 }, { "epoch": 2.21, "learning_rate": 8.749447268093697e-06, "loss": 0.0146, "step": 9090 }, { "epoch": 2.22, "learning_rate": 8.745532361538094e-06, "loss": 0.0148, "step": 9092 }, { "epoch": 2.22, "learning_rate": 8.741617650312699e-06, "loss": 0.0202, "step": 9094 }, { "epoch": 2.22, "learning_rate": 8.737703135027058e-06, "loss": 0.0158, "step": 9096 }, { "epoch": 2.22, "learning_rate": 8.733788816290694e-06, "loss": 0.0101, "step": 9098 }, { "epoch": 2.22, "learning_rate": 8.729874694713089e-06, "loss": 0.0253, "step": 9100 }, { "epoch": 2.22, "learning_rate": 8.725960770903706e-06, "loss": 0.017, "step": 9102 }, { "epoch": 2.22, "learning_rate": 8.722047045471977e-06, "loss": 0.0148, "step": 9104 }, { "epoch": 2.22, "learning_rate": 8.718133519027295e-06, "loss": 0.0163, "step": 9106 }, { "epoch": 2.22, "learning_rate": 8.714220192179023e-06, "loss": 0.0191, "step": 9108 }, { "epoch": 2.22, "learning_rate": 8.7103070655365e-06, "loss": 0.009, "step": 9110 }, { "epoch": 2.22, "learning_rate": 8.706394139709032e-06, "loss": 0.0096, "step": 9112 }, { "epoch": 2.22, "learning_rate": 8.702481415305885e-06, "loss": 0.0175, "step": 9114 }, { "epoch": 2.22, "learning_rate": 8.698568892936301e-06, "loss": 0.022, "step": 9116 }, { "epoch": 2.22, "learning_rate": 8.694656573209494e-06, "loss": 0.0122, "step": 9118 }, { "epoch": 2.22, "learning_rate": 8.690744456734644e-06, "loss": 0.0156, "step": 9120 }, { "epoch": 2.22, "learning_rate": 8.68683254412089e-06, "loss": 0.0205, "step": 9122 }, { "epoch": 2.22, "learning_rate": 8.682920835977347e-06, "loss": 0.018, "step": 9124 }, { "epoch": 2.22, "learning_rate": 8.679009332913106e-06, "loss": 0.0079, "step": 9126 }, { "epoch": 2.22, "learning_rate": 8.675098035537214e-06, "loss": 0.014, "step": 9128 }, { "epoch": 2.22, "learning_rate": 8.671186944458685e-06, "loss": 0.0166, "step": 9130 }, { "epoch": 2.23, "learning_rate": 8.667276060286511e-06, "loss": 0.0133, "step": 9132 }, { "epoch": 2.23, "learning_rate": 8.663365383629643e-06, "loss": 0.0172, "step": 9134 }, { "epoch": 2.23, "learning_rate": 8.659454915097011e-06, "loss": 0.0172, "step": 9136 }, { "epoch": 2.23, "learning_rate": 8.655544655297494e-06, "loss": 0.0196, "step": 9138 }, { "epoch": 2.23, "learning_rate": 8.651634604839958e-06, "loss": 0.0117, "step": 9140 }, { "epoch": 2.23, "learning_rate": 8.647724764333223e-06, "loss": 0.012, "step": 9142 }, { "epoch": 2.23, "learning_rate": 8.643815134386085e-06, "loss": 0.012, "step": 9144 }, { "epoch": 2.23, "learning_rate": 8.6399057156073e-06, "loss": 0.0103, "step": 9146 }, { "epoch": 2.23, "learning_rate": 8.635996508605593e-06, "loss": 0.008, "step": 9148 }, { "epoch": 2.23, "learning_rate": 8.63208751398966e-06, "loss": 0.0147, "step": 9150 }, { "epoch": 2.23, "learning_rate": 8.628178732368164e-06, "loss": 0.0117, "step": 9152 }, { "epoch": 2.23, "learning_rate": 8.624270164349726e-06, "loss": 0.0149, "step": 9154 }, { "epoch": 2.23, "learning_rate": 8.620361810542944e-06, "loss": 0.0156, "step": 9156 }, { "epoch": 2.23, "learning_rate": 8.616453671556375e-06, "loss": 0.0113, "step": 9158 }, { "epoch": 2.23, "learning_rate": 8.612545747998555e-06, "loss": 0.0112, "step": 9160 }, { "epoch": 2.23, "learning_rate": 8.60863804047797e-06, "loss": 0.0132, "step": 9162 }, { "epoch": 2.23, "learning_rate": 8.604730549603077e-06, "loss": 0.0187, "step": 9164 }, { "epoch": 2.23, "learning_rate": 8.60082327598231e-06, "loss": 0.0096, "step": 9166 }, { "epoch": 2.23, "learning_rate": 8.59691622022406e-06, "loss": 0.006, "step": 9168 }, { "epoch": 2.23, "learning_rate": 8.593009382936679e-06, "loss": 0.0238, "step": 9170 }, { "epoch": 2.23, "learning_rate": 8.589102764728495e-06, "loss": 0.0159, "step": 9172 }, { "epoch": 2.24, "learning_rate": 8.585196366207803e-06, "loss": 0.026, "step": 9174 }, { "epoch": 2.24, "learning_rate": 8.581290187982855e-06, "loss": 0.0217, "step": 9176 }, { "epoch": 2.24, "learning_rate": 8.57738423066187e-06, "loss": 0.014, "step": 9178 }, { "epoch": 2.24, "learning_rate": 8.57347849485304e-06, "loss": 0.0229, "step": 9180 }, { "epoch": 2.24, "learning_rate": 8.569572981164516e-06, "loss": 0.0201, "step": 9182 }, { "epoch": 2.24, "learning_rate": 8.56566769020442e-06, "loss": 0.0182, "step": 9184 }, { "epoch": 2.24, "learning_rate": 8.56176262258083e-06, "loss": 0.0146, "step": 9186 }, { "epoch": 2.24, "learning_rate": 8.557857778901798e-06, "loss": 0.0101, "step": 9188 }, { "epoch": 2.24, "learning_rate": 8.553953159775335e-06, "loss": 0.0198, "step": 9190 }, { "epoch": 2.24, "learning_rate": 8.550048765809427e-06, "loss": 0.0132, "step": 9192 }, { "epoch": 2.24, "learning_rate": 8.54614459761201e-06, "loss": 0.0035, "step": 9194 }, { "epoch": 2.24, "learning_rate": 8.542240655790997e-06, "loss": 0.0085, "step": 9196 }, { "epoch": 2.24, "learning_rate": 8.538336940954259e-06, "loss": 0.0122, "step": 9198 }, { "epoch": 2.24, "learning_rate": 8.534433453709642e-06, "loss": 0.0296, "step": 9200 }, { "epoch": 2.24, "learning_rate": 8.530530194664937e-06, "loss": 0.0117, "step": 9202 }, { "epoch": 2.24, "learning_rate": 8.526627164427919e-06, "loss": 0.0095, "step": 9204 }, { "epoch": 2.24, "learning_rate": 8.522724363606317e-06, "loss": 0.0139, "step": 9206 }, { "epoch": 2.24, "learning_rate": 8.518821792807832e-06, "loss": 0.0198, "step": 9208 }, { "epoch": 2.24, "learning_rate": 8.514919452640117e-06, "loss": 0.0138, "step": 9210 }, { "epoch": 2.24, "learning_rate": 8.511017343710796e-06, "loss": 0.0154, "step": 9212 }, { "epoch": 2.25, "learning_rate": 8.507115466627462e-06, "loss": 0.0152, "step": 9214 }, { "epoch": 2.25, "learning_rate": 8.50321382199767e-06, "loss": 0.0068, "step": 9216 }, { "epoch": 2.25, "learning_rate": 8.499312410428928e-06, "loss": 0.0157, "step": 9218 }, { "epoch": 2.25, "learning_rate": 8.495411232528718e-06, "loss": 0.0143, "step": 9220 }, { "epoch": 2.25, "learning_rate": 8.491510288904485e-06, "loss": 0.0238, "step": 9222 }, { "epoch": 2.25, "learning_rate": 8.487609580163636e-06, "loss": 0.0176, "step": 9224 }, { "epoch": 2.25, "learning_rate": 8.48370910691354e-06, "loss": 0.0176, "step": 9226 }, { "epoch": 2.25, "learning_rate": 8.479808869761528e-06, "loss": 0.0179, "step": 9228 }, { "epoch": 2.25, "learning_rate": 8.475908869314901e-06, "loss": 0.0164, "step": 9230 }, { "epoch": 2.25, "learning_rate": 8.47200910618092e-06, "loss": 0.0071, "step": 9232 }, { "epoch": 2.25, "learning_rate": 8.4681095809668e-06, "loss": 0.0126, "step": 9234 }, { "epoch": 2.25, "learning_rate": 8.464210294279734e-06, "loss": 0.0111, "step": 9236 }, { "epoch": 2.25, "learning_rate": 8.46031124672687e-06, "loss": 0.0146, "step": 9238 }, { "epoch": 2.25, "learning_rate": 8.45641243891532e-06, "loss": 0.0155, "step": 9240 }, { "epoch": 2.25, "learning_rate": 8.452513871452153e-06, "loss": 0.0194, "step": 9242 }, { "epoch": 2.25, "learning_rate": 8.448615544944412e-06, "loss": 0.0065, "step": 9244 }, { "epoch": 2.25, "learning_rate": 8.44471745999909e-06, "loss": 0.009, "step": 9246 }, { "epoch": 2.25, "learning_rate": 8.440819617223157e-06, "loss": 0.0129, "step": 9248 }, { "epoch": 2.25, "learning_rate": 8.436922017223528e-06, "loss": 0.0103, "step": 9250 }, { "epoch": 2.25, "learning_rate": 8.433024660607094e-06, "loss": 0.0141, "step": 9252 }, { "epoch": 2.25, "learning_rate": 8.429127547980702e-06, "loss": 0.009, "step": 9254 }, { "epoch": 2.26, "learning_rate": 8.425230679951166e-06, "loss": 0.0178, "step": 9256 }, { "epoch": 2.26, "learning_rate": 8.42133405712525e-06, "loss": 0.0091, "step": 9258 }, { "epoch": 2.26, "learning_rate": 8.417437680109697e-06, "loss": 0.015, "step": 9260 }, { "epoch": 2.26, "learning_rate": 8.413541549511195e-06, "loss": 0.0134, "step": 9262 }, { "epoch": 2.26, "learning_rate": 8.40964566593641e-06, "loss": 0.0182, "step": 9264 }, { "epoch": 2.26, "learning_rate": 8.405750029991953e-06, "loss": 0.0218, "step": 9266 }, { "epoch": 2.26, "learning_rate": 8.401854642284405e-06, "loss": 0.0159, "step": 9268 }, { "epoch": 2.26, "learning_rate": 8.39795950342031e-06, "loss": 0.0115, "step": 9270 }, { "epoch": 2.26, "learning_rate": 8.394064614006174e-06, "loss": 0.0147, "step": 9272 }, { "epoch": 2.26, "learning_rate": 8.390169974648456e-06, "loss": 0.0139, "step": 9274 }, { "epoch": 2.26, "learning_rate": 8.38627558595358e-06, "loss": 0.0148, "step": 9276 }, { "epoch": 2.26, "learning_rate": 8.382381448527934e-06, "loss": 0.017, "step": 9278 }, { "epoch": 2.26, "learning_rate": 8.378487562977872e-06, "loss": 0.0147, "step": 9280 }, { "epoch": 2.26, "learning_rate": 8.374593929909694e-06, "loss": 0.0081, "step": 9282 }, { "epoch": 2.26, "learning_rate": 8.370700549929667e-06, "loss": 0.0156, "step": 9284 }, { "epoch": 2.26, "learning_rate": 8.366807423644025e-06, "loss": 0.0098, "step": 9286 }, { "epoch": 2.26, "learning_rate": 8.362914551658958e-06, "loss": 0.0168, "step": 9288 }, { "epoch": 2.26, "learning_rate": 8.359021934580607e-06, "loss": 0.0082, "step": 9290 }, { "epoch": 2.26, "learning_rate": 8.355129573015092e-06, "loss": 0.0168, "step": 9292 }, { "epoch": 2.26, "learning_rate": 8.35123746756848e-06, "loss": 0.0085, "step": 9294 }, { "epoch": 2.27, "learning_rate": 8.347345618846807e-06, "loss": 0.0146, "step": 9296 }, { "epoch": 2.27, "learning_rate": 8.343454027456051e-06, "loss": 0.0139, "step": 9298 }, { "epoch": 2.27, "learning_rate": 8.339562694002171e-06, "loss": 0.0111, "step": 9300 }, { "epoch": 2.27, "learning_rate": 8.33567161909108e-06, "loss": 0.0099, "step": 9302 }, { "epoch": 2.27, "learning_rate": 8.331780803328643e-06, "loss": 0.0134, "step": 9304 }, { "epoch": 2.27, "learning_rate": 8.32789024732069e-06, "loss": 0.0132, "step": 9306 }, { "epoch": 2.27, "learning_rate": 8.323999951673013e-06, "loss": 0.0239, "step": 9308 }, { "epoch": 2.27, "learning_rate": 8.320109916991357e-06, "loss": 0.0204, "step": 9310 }, { "epoch": 2.27, "learning_rate": 8.316220143881438e-06, "loss": 0.0128, "step": 9312 }, { "epoch": 2.27, "learning_rate": 8.312330632948911e-06, "loss": 0.0132, "step": 9314 }, { "epoch": 2.27, "learning_rate": 8.308441384799413e-06, "loss": 0.0197, "step": 9316 }, { "epoch": 2.27, "learning_rate": 8.304552400038523e-06, "loss": 0.0251, "step": 9318 }, { "epoch": 2.27, "learning_rate": 8.300663679271794e-06, "loss": 0.0129, "step": 9320 }, { "epoch": 2.27, "learning_rate": 8.296775223104722e-06, "loss": 0.0171, "step": 9322 }, { "epoch": 2.27, "learning_rate": 8.292887032142772e-06, "loss": 0.0094, "step": 9324 }, { "epoch": 2.27, "learning_rate": 8.288999106991363e-06, "loss": 0.0074, "step": 9326 }, { "epoch": 2.27, "learning_rate": 8.28511144825588e-06, "loss": 0.0112, "step": 9328 }, { "epoch": 2.27, "learning_rate": 8.281224056541655e-06, "loss": 0.0143, "step": 9330 }, { "epoch": 2.27, "learning_rate": 8.277336932453985e-06, "loss": 0.0112, "step": 9332 }, { "epoch": 2.27, "learning_rate": 8.27345007659813e-06, "loss": 0.0205, "step": 9334 }, { "epoch": 2.27, "learning_rate": 8.269563489579302e-06, "loss": 0.0104, "step": 9336 }, { "epoch": 2.28, "learning_rate": 8.265677172002669e-06, "loss": 0.0109, "step": 9338 }, { "epoch": 2.28, "learning_rate": 8.261791124473362e-06, "loss": 0.0121, "step": 9340 }, { "epoch": 2.28, "learning_rate": 8.257905347596468e-06, "loss": 0.0133, "step": 9342 }, { "epoch": 2.28, "learning_rate": 8.254019841977036e-06, "loss": 0.0299, "step": 9344 }, { "epoch": 2.28, "learning_rate": 8.25013460822006e-06, "loss": 0.0064, "step": 9346 }, { "epoch": 2.28, "learning_rate": 8.246249646930507e-06, "loss": 0.0174, "step": 9348 }, { "epoch": 2.28, "learning_rate": 8.242364958713297e-06, "loss": 0.0208, "step": 9350 }, { "epoch": 2.28, "learning_rate": 8.238480544173304e-06, "loss": 0.0113, "step": 9352 }, { "epoch": 2.28, "learning_rate": 8.234596403915357e-06, "loss": 0.0244, "step": 9354 }, { "epoch": 2.28, "learning_rate": 8.23071253854425e-06, "loss": 0.0177, "step": 9356 }, { "epoch": 2.28, "learning_rate": 8.22682894866473e-06, "loss": 0.0095, "step": 9358 }, { "epoch": 2.28, "learning_rate": 8.222945634881504e-06, "loss": 0.0213, "step": 9360 }, { "epoch": 2.28, "learning_rate": 8.219062597799227e-06, "loss": 0.027, "step": 9362 }, { "epoch": 2.28, "learning_rate": 8.215179838022524e-06, "loss": 0.0194, "step": 9364 }, { "epoch": 2.28, "learning_rate": 8.211297356155966e-06, "loss": 0.0125, "step": 9366 }, { "epoch": 2.28, "learning_rate": 8.207415152804091e-06, "loss": 0.0077, "step": 9368 }, { "epoch": 2.28, "learning_rate": 8.20353322857138e-06, "loss": 0.0225, "step": 9370 }, { "epoch": 2.28, "learning_rate": 8.199651584062285e-06, "loss": 0.0196, "step": 9372 }, { "epoch": 2.28, "learning_rate": 8.195770219881203e-06, "loss": 0.0177, "step": 9374 }, { "epoch": 2.28, "learning_rate": 8.191889136632498e-06, "loss": 0.022, "step": 9376 }, { "epoch": 2.29, "learning_rate": 8.188008334920475e-06, "loss": 0.0162, "step": 9378 }, { "epoch": 2.29, "learning_rate": 8.184127815349415e-06, "loss": 0.0109, "step": 9380 }, { "epoch": 2.29, "learning_rate": 8.180247578523535e-06, "loss": 0.0177, "step": 9382 }, { "epoch": 2.29, "learning_rate": 8.176367625047026e-06, "loss": 0.0119, "step": 9384 }, { "epoch": 2.29, "learning_rate": 8.172487955524022e-06, "loss": 0.0108, "step": 9386 }, { "epoch": 2.29, "learning_rate": 8.168608570558614e-06, "loss": 0.015, "step": 9388 }, { "epoch": 2.29, "learning_rate": 8.164729470754855e-06, "loss": 0.0097, "step": 9390 }, { "epoch": 2.29, "learning_rate": 8.160850656716757e-06, "loss": 0.0104, "step": 9392 }, { "epoch": 2.29, "learning_rate": 8.156972129048273e-06, "loss": 0.0126, "step": 9394 }, { "epoch": 2.29, "learning_rate": 8.153093888353318e-06, "loss": 0.0094, "step": 9396 }, { "epoch": 2.29, "learning_rate": 8.14921593523577e-06, "loss": 0.015, "step": 9398 }, { "epoch": 2.29, "learning_rate": 8.145338270299454e-06, "loss": 0.0091, "step": 9400 }, { "epoch": 2.29, "learning_rate": 8.141460894148149e-06, "loss": 0.0176, "step": 9402 }, { "epoch": 2.29, "learning_rate": 8.137583807385595e-06, "loss": 0.0122, "step": 9404 }, { "epoch": 2.29, "learning_rate": 8.133707010615485e-06, "loss": 0.0136, "step": 9406 }, { "epoch": 2.29, "learning_rate": 8.129830504441466e-06, "loss": 0.0128, "step": 9408 }, { "epoch": 2.29, "learning_rate": 8.125954289467132e-06, "loss": 0.0161, "step": 9410 }, { "epoch": 2.29, "learning_rate": 8.122078366296048e-06, "loss": 0.015, "step": 9412 }, { "epoch": 2.29, "learning_rate": 8.118202735531724e-06, "loss": 0.0145, "step": 9414 }, { "epoch": 2.29, "learning_rate": 8.114327397777624e-06, "loss": 0.0213, "step": 9416 }, { "epoch": 2.29, "learning_rate": 8.110452353637165e-06, "loss": 0.0215, "step": 9418 }, { "epoch": 2.3, "learning_rate": 8.106577603713724e-06, "loss": 0.0125, "step": 9420 }, { "epoch": 2.3, "learning_rate": 8.102703148610627e-06, "loss": 0.0101, "step": 9422 }, { "epoch": 2.3, "learning_rate": 8.098828988931162e-06, "loss": 0.0123, "step": 9424 }, { "epoch": 2.3, "learning_rate": 8.094955125278555e-06, "loss": 0.0113, "step": 9426 }, { "epoch": 2.3, "learning_rate": 8.091081558256006e-06, "loss": 0.0085, "step": 9428 }, { "epoch": 2.3, "learning_rate": 8.087208288466653e-06, "loss": 0.0138, "step": 9430 }, { "epoch": 2.3, "learning_rate": 8.0833353165136e-06, "loss": 0.0117, "step": 9432 }, { "epoch": 2.3, "learning_rate": 8.07946264299989e-06, "loss": 0.0185, "step": 9434 }, { "epoch": 2.3, "learning_rate": 8.075590268528535e-06, "loss": 0.0208, "step": 9436 }, { "epoch": 2.3, "learning_rate": 8.071718193702486e-06, "loss": 0.0065, "step": 9438 }, { "epoch": 2.3, "learning_rate": 8.067846419124665e-06, "loss": 0.0176, "step": 9440 }, { "epoch": 2.3, "learning_rate": 8.06397494539793e-06, "loss": 0.0116, "step": 9442 }, { "epoch": 2.3, "learning_rate": 8.060103773125097e-06, "loss": 0.0127, "step": 9444 }, { "epoch": 2.3, "learning_rate": 8.056232902908941e-06, "loss": 0.0172, "step": 9446 }, { "epoch": 2.3, "learning_rate": 8.052362335352189e-06, "loss": 0.018, "step": 9448 }, { "epoch": 2.3, "learning_rate": 8.048492071057512e-06, "loss": 0.0145, "step": 9450 }, { "epoch": 2.3, "learning_rate": 8.044622110627539e-06, "loss": 0.0108, "step": 9452 }, { "epoch": 2.3, "learning_rate": 8.040752454664855e-06, "loss": 0.0137, "step": 9454 }, { "epoch": 2.3, "learning_rate": 8.036883103772001e-06, "loss": 0.0173, "step": 9456 }, { "epoch": 2.3, "learning_rate": 8.033014058551458e-06, "loss": 0.0145, "step": 9458 }, { "epoch": 2.31, "learning_rate": 8.029145319605664e-06, "loss": 0.0145, "step": 9460 }, { "epoch": 2.31, "learning_rate": 8.025276887537016e-06, "loss": 0.0199, "step": 9462 }, { "epoch": 2.31, "learning_rate": 8.021408762947859e-06, "loss": 0.0123, "step": 9464 }, { "epoch": 2.31, "learning_rate": 8.017540946440482e-06, "loss": 0.0148, "step": 9466 }, { "epoch": 2.31, "learning_rate": 8.01367343861714e-06, "loss": 0.0122, "step": 9468 }, { "epoch": 2.31, "learning_rate": 8.009806240080034e-06, "loss": 0.0135, "step": 9470 }, { "epoch": 2.31, "learning_rate": 8.005939351431316e-06, "loss": 0.0161, "step": 9472 }, { "epoch": 2.31, "learning_rate": 8.002072773273087e-06, "loss": 0.0202, "step": 9474 }, { "epoch": 2.31, "learning_rate": 7.998206506207402e-06, "loss": 0.0211, "step": 9476 }, { "epoch": 2.31, "learning_rate": 7.994340550836275e-06, "loss": 0.0059, "step": 9478 }, { "epoch": 2.31, "learning_rate": 7.990474907761664e-06, "loss": 0.0134, "step": 9480 }, { "epoch": 2.31, "learning_rate": 7.986609577585469e-06, "loss": 0.0088, "step": 9482 }, { "epoch": 2.31, "learning_rate": 7.98274456090956e-06, "loss": 0.0142, "step": 9484 }, { "epoch": 2.31, "learning_rate": 7.978879858335748e-06, "loss": 0.0106, "step": 9486 }, { "epoch": 2.31, "learning_rate": 7.9750154704658e-06, "loss": 0.0182, "step": 9488 }, { "epoch": 2.31, "learning_rate": 7.971151397901425e-06, "loss": 0.019, "step": 9490 }, { "epoch": 2.31, "learning_rate": 7.967287641244292e-06, "loss": 0.0097, "step": 9492 }, { "epoch": 2.31, "learning_rate": 7.963424201096014e-06, "loss": 0.0113, "step": 9494 }, { "epoch": 2.31, "learning_rate": 7.959561078058168e-06, "loss": 0.0183, "step": 9496 }, { "epoch": 2.31, "learning_rate": 7.955698272732261e-06, "loss": 0.0228, "step": 9498 }, { "epoch": 2.31, "learning_rate": 7.95183578571976e-06, "loss": 0.0149, "step": 9500 }, { "epoch": 2.32, "learning_rate": 7.947973617622093e-06, "loss": 0.0146, "step": 9502 }, { "epoch": 2.32, "learning_rate": 7.944111769040628e-06, "loss": 0.006, "step": 9504 }, { "epoch": 2.32, "learning_rate": 7.940250240576679e-06, "loss": 0.0257, "step": 9506 }, { "epoch": 2.32, "learning_rate": 7.936389032831514e-06, "loss": 0.0198, "step": 9508 }, { "epoch": 2.32, "learning_rate": 7.932528146406357e-06, "loss": 0.0162, "step": 9510 }, { "epoch": 2.32, "learning_rate": 7.928667581902382e-06, "loss": 0.0113, "step": 9512 }, { "epoch": 2.32, "learning_rate": 7.924807339920701e-06, "loss": 0.0226, "step": 9514 }, { "epoch": 2.32, "learning_rate": 7.920947421062383e-06, "loss": 0.0074, "step": 9516 }, { "epoch": 2.32, "learning_rate": 7.91708782592845e-06, "loss": 0.0139, "step": 9518 }, { "epoch": 2.32, "learning_rate": 7.913228555119875e-06, "loss": 0.0107, "step": 9520 }, { "epoch": 2.32, "learning_rate": 7.909369609237564e-06, "loss": 0.0153, "step": 9522 }, { "epoch": 2.32, "learning_rate": 7.905510988882392e-06, "loss": 0.0057, "step": 9524 }, { "epoch": 2.32, "learning_rate": 7.901652694655177e-06, "loss": 0.0184, "step": 9526 }, { "epoch": 2.32, "learning_rate": 7.897794727156685e-06, "loss": 0.0128, "step": 9528 }, { "epoch": 2.32, "learning_rate": 7.893937086987626e-06, "loss": 0.0153, "step": 9530 }, { "epoch": 2.32, "learning_rate": 7.890079774748665e-06, "loss": 0.0091, "step": 9532 }, { "epoch": 2.32, "learning_rate": 7.886222791040418e-06, "loss": 0.0156, "step": 9534 }, { "epoch": 2.32, "learning_rate": 7.88236613646345e-06, "loss": 0.0139, "step": 9536 }, { "epoch": 2.32, "learning_rate": 7.878509811618263e-06, "loss": 0.0112, "step": 9538 }, { "epoch": 2.32, "learning_rate": 7.874653817105322e-06, "loss": 0.0147, "step": 9540 }, { "epoch": 2.33, "learning_rate": 7.87079815352503e-06, "loss": 0.0065, "step": 9542 }, { "epoch": 2.33, "learning_rate": 7.866942821477754e-06, "loss": 0.0136, "step": 9544 }, { "epoch": 2.33, "learning_rate": 7.863087821563785e-06, "loss": 0.0099, "step": 9546 }, { "epoch": 2.33, "learning_rate": 7.859233154383386e-06, "loss": 0.0071, "step": 9548 }, { "epoch": 2.33, "learning_rate": 7.855378820536751e-06, "loss": 0.0076, "step": 9550 }, { "epoch": 2.33, "learning_rate": 7.851524820624039e-06, "loss": 0.0191, "step": 9552 }, { "epoch": 2.33, "learning_rate": 7.847671155245334e-06, "loss": 0.0099, "step": 9554 }, { "epoch": 2.33, "learning_rate": 7.843817825000693e-06, "loss": 0.0131, "step": 9556 }, { "epoch": 2.33, "learning_rate": 7.839964830490102e-06, "loss": 0.0197, "step": 9558 }, { "epoch": 2.33, "learning_rate": 7.836112172313505e-06, "loss": 0.017, "step": 9560 }, { "epoch": 2.33, "learning_rate": 7.83225985107079e-06, "loss": 0.0148, "step": 9562 }, { "epoch": 2.33, "learning_rate": 7.828407867361789e-06, "loss": 0.0177, "step": 9564 }, { "epoch": 2.33, "learning_rate": 7.82455622178629e-06, "loss": 0.0087, "step": 9566 }, { "epoch": 2.33, "learning_rate": 7.820704914944024e-06, "loss": 0.0184, "step": 9568 }, { "epoch": 2.33, "learning_rate": 7.816853947434666e-06, "loss": 0.0102, "step": 9570 }, { "epoch": 2.33, "learning_rate": 7.81300331985784e-06, "loss": 0.0165, "step": 9572 }, { "epoch": 2.33, "learning_rate": 7.809153032813124e-06, "loss": 0.0221, "step": 9574 }, { "epoch": 2.33, "learning_rate": 7.805303086900032e-06, "loss": 0.0121, "step": 9576 }, { "epoch": 2.33, "learning_rate": 7.801453482718032e-06, "loss": 0.0091, "step": 9578 }, { "epoch": 2.33, "learning_rate": 7.797604220866532e-06, "loss": 0.0097, "step": 9580 }, { "epoch": 2.33, "learning_rate": 7.7937553019449e-06, "loss": 0.0174, "step": 9582 }, { "epoch": 2.34, "learning_rate": 7.789906726552439e-06, "loss": 0.0077, "step": 9584 }, { "epoch": 2.34, "learning_rate": 7.786058495288396e-06, "loss": 0.0095, "step": 9586 }, { "epoch": 2.34, "learning_rate": 7.782210608751975e-06, "loss": 0.0087, "step": 9588 }, { "epoch": 2.34, "learning_rate": 7.778363067542325e-06, "loss": 0.0143, "step": 9590 }, { "epoch": 2.34, "learning_rate": 7.774515872258533e-06, "loss": 0.0138, "step": 9592 }, { "epoch": 2.34, "learning_rate": 7.770669023499633e-06, "loss": 0.0096, "step": 9594 }, { "epoch": 2.34, "learning_rate": 7.766822521864617e-06, "loss": 0.0193, "step": 9596 }, { "epoch": 2.34, "learning_rate": 7.762976367952406e-06, "loss": 0.015, "step": 9598 }, { "epoch": 2.34, "learning_rate": 7.759130562361888e-06, "loss": 0.0059, "step": 9600 }, { "epoch": 2.34, "learning_rate": 7.755285105691869e-06, "loss": 0.0164, "step": 9602 }, { "epoch": 2.34, "learning_rate": 7.751439998541127e-06, "loss": 0.0144, "step": 9604 }, { "epoch": 2.34, "learning_rate": 7.747595241508368e-06, "loss": 0.0067, "step": 9606 }, { "epoch": 2.34, "learning_rate": 7.74375083519226e-06, "loss": 0.0157, "step": 9608 }, { "epoch": 2.34, "learning_rate": 7.739906780191394e-06, "loss": 0.0109, "step": 9610 }, { "epoch": 2.34, "learning_rate": 7.736063077104326e-06, "loss": 0.0132, "step": 9612 }, { "epoch": 2.34, "learning_rate": 7.732219726529546e-06, "loss": 0.0088, "step": 9614 }, { "epoch": 2.34, "learning_rate": 7.728376729065501e-06, "loss": 0.0078, "step": 9616 }, { "epoch": 2.34, "learning_rate": 7.724534085310568e-06, "loss": 0.0066, "step": 9618 }, { "epoch": 2.34, "learning_rate": 7.720691795863075e-06, "loss": 0.0221, "step": 9620 }, { "epoch": 2.34, "learning_rate": 7.716849861321296e-06, "loss": 0.0173, "step": 9622 }, { "epoch": 2.35, "learning_rate": 7.71300828228346e-06, "loss": 0.0055, "step": 9624 }, { "epoch": 2.35, "learning_rate": 7.709167059347718e-06, "loss": 0.0128, "step": 9626 }, { "epoch": 2.35, "learning_rate": 7.70532619311218e-06, "loss": 0.0121, "step": 9628 }, { "epoch": 2.35, "learning_rate": 7.701485684174905e-06, "loss": 0.0158, "step": 9630 }, { "epoch": 2.35, "learning_rate": 7.697645533133883e-06, "loss": 0.0123, "step": 9632 }, { "epoch": 2.35, "learning_rate": 7.693805740587055e-06, "loss": 0.0128, "step": 9634 }, { "epoch": 2.35, "learning_rate": 7.689966307132306e-06, "loss": 0.0195, "step": 9636 }, { "epoch": 2.35, "learning_rate": 7.686127233367473e-06, "loss": 0.0097, "step": 9638 }, { "epoch": 2.35, "learning_rate": 7.68228851989032e-06, "loss": 0.0116, "step": 9640 }, { "epoch": 2.35, "learning_rate": 7.678450167298566e-06, "loss": 0.011, "step": 9642 }, { "epoch": 2.35, "learning_rate": 7.674612176189872e-06, "loss": 0.0098, "step": 9644 }, { "epoch": 2.35, "learning_rate": 7.67077454716185e-06, "loss": 0.0153, "step": 9646 }, { "epoch": 2.35, "learning_rate": 7.666937280812038e-06, "loss": 0.0067, "step": 9648 }, { "epoch": 2.35, "learning_rate": 7.66310037773793e-06, "loss": 0.0141, "step": 9650 }, { "epoch": 2.35, "learning_rate": 7.659263838536963e-06, "loss": 0.0088, "step": 9652 }, { "epoch": 2.35, "learning_rate": 7.655427663806517e-06, "loss": 0.0082, "step": 9654 }, { "epoch": 2.35, "learning_rate": 7.651591854143911e-06, "loss": 0.0061, "step": 9656 }, { "epoch": 2.35, "learning_rate": 7.64775641014641e-06, "loss": 0.0111, "step": 9658 }, { "epoch": 2.35, "learning_rate": 7.643921332411223e-06, "loss": 0.0082, "step": 9660 }, { "epoch": 2.35, "learning_rate": 7.640086621535504e-06, "loss": 0.0145, "step": 9662 }, { "epoch": 2.35, "learning_rate": 7.636252278116338e-06, "loss": 0.0108, "step": 9664 }, { "epoch": 2.36, "learning_rate": 7.632418302750767e-06, "loss": 0.021, "step": 9666 }, { "epoch": 2.36, "learning_rate": 7.628584696035775e-06, "loss": 0.0179, "step": 9668 }, { "epoch": 2.36, "learning_rate": 7.624751458568279e-06, "loss": 0.0153, "step": 9670 }, { "epoch": 2.36, "learning_rate": 7.6209185909451414e-06, "loss": 0.0088, "step": 9672 }, { "epoch": 2.36, "learning_rate": 7.6170860937631705e-06, "loss": 0.0093, "step": 9674 }, { "epoch": 2.36, "learning_rate": 7.613253967619117e-06, "loss": 0.0111, "step": 9676 }, { "epoch": 2.36, "learning_rate": 7.6094222131096725e-06, "loss": 0.0097, "step": 9678 }, { "epoch": 2.36, "learning_rate": 7.6055908308314675e-06, "loss": 0.0202, "step": 9680 }, { "epoch": 2.36, "learning_rate": 7.601759821381081e-06, "loss": 0.0094, "step": 9682 }, { "epoch": 2.36, "learning_rate": 7.597929185355025e-06, "loss": 0.0118, "step": 9684 }, { "epoch": 2.36, "learning_rate": 7.594098923349769e-06, "loss": 0.019, "step": 9686 }, { "epoch": 2.36, "learning_rate": 7.590269035961701e-06, "loss": 0.0196, "step": 9688 }, { "epoch": 2.36, "learning_rate": 7.586439523787176e-06, "loss": 0.0096, "step": 9690 }, { "epoch": 2.36, "learning_rate": 7.582610387422468e-06, "loss": 0.0165, "step": 9692 }, { "epoch": 2.36, "learning_rate": 7.578781627463814e-06, "loss": 0.0173, "step": 9694 }, { "epoch": 2.36, "learning_rate": 7.574953244507371e-06, "loss": 0.016, "step": 9696 }, { "epoch": 2.36, "learning_rate": 7.57112523914925e-06, "loss": 0.0121, "step": 9698 }, { "epoch": 2.36, "learning_rate": 7.5672976119855e-06, "loss": 0.0125, "step": 9700 }, { "epoch": 2.36, "learning_rate": 7.563470363612121e-06, "loss": 0.0199, "step": 9702 }, { "epoch": 2.36, "learning_rate": 7.559643494625034e-06, "loss": 0.0136, "step": 9704 }, { "epoch": 2.37, "learning_rate": 7.555817005620114e-06, "loss": 0.015, "step": 9706 }, { "epoch": 2.37, "learning_rate": 7.551990897193175e-06, "loss": 0.016, "step": 9708 }, { "epoch": 2.37, "learning_rate": 7.548165169939978e-06, "loss": 0.0163, "step": 9710 }, { "epoch": 2.37, "learning_rate": 7.54433982445621e-06, "loss": 0.0153, "step": 9712 }, { "epoch": 2.37, "learning_rate": 7.540514861337506e-06, "loss": 0.0104, "step": 9714 }, { "epoch": 2.37, "learning_rate": 7.5366902811794465e-06, "loss": 0.0131, "step": 9716 }, { "epoch": 2.37, "learning_rate": 7.53286608457755e-06, "loss": 0.016, "step": 9718 }, { "epoch": 2.37, "learning_rate": 7.529042272127264e-06, "loss": 0.0126, "step": 9720 }, { "epoch": 2.37, "learning_rate": 7.525218844423991e-06, "loss": 0.0173, "step": 9722 }, { "epoch": 2.37, "learning_rate": 7.52139580206307e-06, "loss": 0.0161, "step": 9724 }, { "epoch": 2.37, "learning_rate": 7.5175731456397785e-06, "loss": 0.0143, "step": 9726 }, { "epoch": 2.37, "learning_rate": 7.513750875749326e-06, "loss": 0.0215, "step": 9728 }, { "epoch": 2.37, "learning_rate": 7.509928992986872e-06, "loss": 0.0105, "step": 9730 }, { "epoch": 2.37, "learning_rate": 7.5061074979475194e-06, "loss": 0.01, "step": 9732 }, { "epoch": 2.37, "learning_rate": 7.502286391226299e-06, "loss": 0.01, "step": 9734 }, { "epoch": 2.37, "learning_rate": 7.4984656734181825e-06, "loss": 0.0057, "step": 9736 }, { "epoch": 2.37, "learning_rate": 7.494645345118092e-06, "loss": 0.02, "step": 9738 }, { "epoch": 2.37, "learning_rate": 7.490825406920878e-06, "loss": 0.0176, "step": 9740 }, { "epoch": 2.37, "learning_rate": 7.487005859421337e-06, "loss": 0.0113, "step": 9742 }, { "epoch": 2.37, "learning_rate": 7.4831867032141955e-06, "loss": 0.0071, "step": 9744 }, { "epoch": 2.37, "learning_rate": 7.479367938894133e-06, "loss": 0.0069, "step": 9746 }, { "epoch": 2.38, "learning_rate": 7.475549567055754e-06, "loss": 0.0039, "step": 9748 }, { "epoch": 2.38, "learning_rate": 7.471731588293616e-06, "loss": 0.0118, "step": 9750 }, { "epoch": 2.38, "learning_rate": 7.467914003202197e-06, "loss": 0.0216, "step": 9752 }, { "epoch": 2.38, "learning_rate": 7.464096812375932e-06, "loss": 0.0217, "step": 9754 }, { "epoch": 2.38, "learning_rate": 7.460280016409183e-06, "loss": 0.0105, "step": 9756 }, { "epoch": 2.38, "learning_rate": 7.45646361589626e-06, "loss": 0.0128, "step": 9758 }, { "epoch": 2.38, "learning_rate": 7.4526476114313986e-06, "loss": 0.0167, "step": 9760 }, { "epoch": 2.38, "learning_rate": 7.448832003608781e-06, "loss": 0.0199, "step": 9762 }, { "epoch": 2.38, "learning_rate": 7.4450167930225295e-06, "loss": 0.0041, "step": 9764 }, { "epoch": 2.38, "learning_rate": 7.4412019802667036e-06, "loss": 0.006, "step": 9766 }, { "epoch": 2.38, "learning_rate": 7.437387565935294e-06, "loss": 0.0146, "step": 9768 }, { "epoch": 2.38, "learning_rate": 7.433573550622233e-06, "loss": 0.0118, "step": 9770 }, { "epoch": 2.38, "learning_rate": 7.429759934921397e-06, "loss": 0.0041, "step": 9772 }, { "epoch": 2.38, "learning_rate": 7.4259467194265955e-06, "loss": 0.0113, "step": 9774 }, { "epoch": 2.38, "learning_rate": 7.422133904731568e-06, "loss": 0.0215, "step": 9776 }, { "epoch": 2.38, "learning_rate": 7.418321491430003e-06, "loss": 0.0164, "step": 9778 }, { "epoch": 2.38, "learning_rate": 7.4145094801155255e-06, "loss": 0.0062, "step": 9780 }, { "epoch": 2.38, "learning_rate": 7.410697871381695e-06, "loss": 0.0106, "step": 9782 }, { "epoch": 2.38, "learning_rate": 7.406886665822001e-06, "loss": 0.0196, "step": 9784 }, { "epoch": 2.38, "learning_rate": 7.403075864029881e-06, "loss": 0.0233, "step": 9786 }, { "epoch": 2.38, "learning_rate": 7.3992654665987095e-06, "loss": 0.0073, "step": 9788 }, { "epoch": 2.39, "learning_rate": 7.395455474121793e-06, "loss": 0.0136, "step": 9790 }, { "epoch": 2.39, "learning_rate": 7.391645887192374e-06, "loss": 0.02, "step": 9792 }, { "epoch": 2.39, "learning_rate": 7.387836706403635e-06, "loss": 0.0122, "step": 9794 }, { "epoch": 2.39, "learning_rate": 7.384027932348692e-06, "loss": 0.0195, "step": 9796 }, { "epoch": 2.39, "learning_rate": 7.38021956562061e-06, "loss": 0.0178, "step": 9798 }, { "epoch": 2.39, "learning_rate": 7.376411606812368e-06, "loss": 0.0148, "step": 9800 }, { "epoch": 2.39, "learning_rate": 7.372604056516904e-06, "loss": 0.0127, "step": 9802 }, { "epoch": 2.39, "learning_rate": 7.368796915327076e-06, "loss": 0.0118, "step": 9804 }, { "epoch": 2.39, "learning_rate": 7.364990183835694e-06, "loss": 0.0137, "step": 9806 }, { "epoch": 2.39, "learning_rate": 7.361183862635484e-06, "loss": 0.0136, "step": 9808 }, { "epoch": 2.39, "learning_rate": 7.357377952319127e-06, "loss": 0.0158, "step": 9810 }, { "epoch": 2.39, "learning_rate": 7.353572453479228e-06, "loss": 0.0131, "step": 9812 }, { "epoch": 2.39, "learning_rate": 7.349767366708338e-06, "loss": 0.0174, "step": 9814 }, { "epoch": 2.39, "learning_rate": 7.345962692598934e-06, "loss": 0.0109, "step": 9816 }, { "epoch": 2.39, "learning_rate": 7.342158431743429e-06, "loss": 0.01, "step": 9818 }, { "epoch": 2.39, "learning_rate": 7.338354584734182e-06, "loss": 0.0067, "step": 9820 }, { "epoch": 2.39, "learning_rate": 7.334551152163481e-06, "loss": 0.0179, "step": 9822 }, { "epoch": 2.39, "learning_rate": 7.330748134623546e-06, "loss": 0.0075, "step": 9824 }, { "epoch": 2.39, "learning_rate": 7.326945532706535e-06, "loss": 0.0112, "step": 9826 }, { "epoch": 2.39, "learning_rate": 7.323143347004547e-06, "loss": 0.0096, "step": 9828 }, { "epoch": 2.4, "learning_rate": 7.319341578109609e-06, "loss": 0.0133, "step": 9830 }, { "epoch": 2.4, "learning_rate": 7.315540226613684e-06, "loss": 0.0258, "step": 9832 }, { "epoch": 2.4, "learning_rate": 7.3117392931086726e-06, "loss": 0.0186, "step": 9834 }, { "epoch": 2.4, "learning_rate": 7.30793877818641e-06, "loss": 0.0155, "step": 9836 }, { "epoch": 2.4, "learning_rate": 7.304138682438669e-06, "loss": 0.0241, "step": 9838 }, { "epoch": 2.4, "learning_rate": 7.3003390064571425e-06, "loss": 0.0139, "step": 9840 }, { "epoch": 2.4, "learning_rate": 7.296539750833478e-06, "loss": 0.0162, "step": 9842 }, { "epoch": 2.4, "learning_rate": 7.292740916159249e-06, "loss": 0.0088, "step": 9844 }, { "epoch": 2.4, "learning_rate": 7.288942503025962e-06, "loss": 0.0211, "step": 9846 }, { "epoch": 2.4, "learning_rate": 7.285144512025053e-06, "loss": 0.0162, "step": 9848 }, { "epoch": 2.4, "learning_rate": 7.281346943747907e-06, "loss": 0.0156, "step": 9850 }, { "epoch": 2.4, "learning_rate": 7.277549798785825e-06, "loss": 0.0235, "step": 9852 }, { "epoch": 2.4, "learning_rate": 7.273753077730065e-06, "loss": 0.0255, "step": 9854 }, { "epoch": 2.4, "learning_rate": 7.269956781171792e-06, "loss": 0.0194, "step": 9856 }, { "epoch": 2.4, "learning_rate": 7.266160909702125e-06, "loss": 0.0117, "step": 9858 }, { "epoch": 2.4, "learning_rate": 7.262365463912108e-06, "loss": 0.0144, "step": 9860 }, { "epoch": 2.4, "learning_rate": 7.258570444392725e-06, "loss": 0.0096, "step": 9862 }, { "epoch": 2.4, "learning_rate": 7.254775851734883e-06, "loss": 0.0076, "step": 9864 }, { "epoch": 2.4, "learning_rate": 7.250981686529436e-06, "loss": 0.0104, "step": 9866 }, { "epoch": 2.4, "learning_rate": 7.247187949367158e-06, "loss": 0.0096, "step": 9868 }, { "epoch": 2.4, "learning_rate": 7.24339464083877e-06, "loss": 0.0133, "step": 9870 }, { "epoch": 2.41, "learning_rate": 7.239601761534913e-06, "loss": 0.022, "step": 9872 }, { "epoch": 2.41, "learning_rate": 7.235809312046169e-06, "loss": 0.0146, "step": 9874 }, { "epoch": 2.41, "learning_rate": 7.232017292963049e-06, "loss": 0.0084, "step": 9876 }, { "epoch": 2.41, "learning_rate": 7.22822570487601e-06, "loss": 0.0192, "step": 9878 }, { "epoch": 2.41, "learning_rate": 7.224434548375419e-06, "loss": 0.017, "step": 9880 }, { "epoch": 2.41, "learning_rate": 7.220643824051592e-06, "loss": 0.0135, "step": 9882 }, { "epoch": 2.41, "learning_rate": 7.216853532494773e-06, "loss": 0.021, "step": 9884 }, { "epoch": 2.41, "learning_rate": 7.213063674295146e-06, "loss": 0.0179, "step": 9886 }, { "epoch": 2.41, "learning_rate": 7.2092742500428126e-06, "loss": 0.0178, "step": 9888 }, { "epoch": 2.41, "learning_rate": 7.205485260327817e-06, "loss": 0.0146, "step": 9890 }, { "epoch": 2.41, "learning_rate": 7.201696705740137e-06, "loss": 0.0105, "step": 9892 }, { "epoch": 2.41, "learning_rate": 7.19790858686968e-06, "loss": 0.012, "step": 9894 }, { "epoch": 2.41, "learning_rate": 7.194120904306277e-06, "loss": 0.0092, "step": 9896 }, { "epoch": 2.41, "learning_rate": 7.190333658639705e-06, "loss": 0.0071, "step": 9898 }, { "epoch": 2.41, "learning_rate": 7.1865468504596704e-06, "loss": 0.0152, "step": 9900 }, { "epoch": 2.41, "learning_rate": 7.182760480355806e-06, "loss": 0.0131, "step": 9902 }, { "epoch": 2.41, "learning_rate": 7.178974548917672e-06, "loss": 0.0161, "step": 9904 }, { "epoch": 2.41, "learning_rate": 7.175189056734774e-06, "loss": 0.0138, "step": 9906 }, { "epoch": 2.41, "learning_rate": 7.17140400439654e-06, "loss": 0.0083, "step": 9908 }, { "epoch": 2.41, "learning_rate": 7.167619392492337e-06, "loss": 0.0108, "step": 9910 }, { "epoch": 2.42, "learning_rate": 7.163835221611446e-06, "loss": 0.0103, "step": 9912 }, { "epoch": 2.42, "learning_rate": 7.160051492343101e-06, "loss": 0.009, "step": 9914 }, { "epoch": 2.42, "learning_rate": 7.156268205276453e-06, "loss": 0.013, "step": 9916 }, { "epoch": 2.42, "learning_rate": 7.152485361000595e-06, "loss": 0.0129, "step": 9918 }, { "epoch": 2.42, "learning_rate": 7.148702960104535e-06, "loss": 0.0141, "step": 9920 }, { "epoch": 2.42, "learning_rate": 7.14492100317723e-06, "loss": 0.0107, "step": 9922 }, { "epoch": 2.42, "learning_rate": 7.141139490807554e-06, "loss": 0.0219, "step": 9924 }, { "epoch": 2.42, "learning_rate": 7.137358423584324e-06, "loss": 0.0121, "step": 9926 }, { "epoch": 2.42, "learning_rate": 7.133577802096274e-06, "loss": 0.0141, "step": 9928 }, { "epoch": 2.42, "learning_rate": 7.12979762693208e-06, "loss": 0.0201, "step": 9930 }, { "epoch": 2.42, "learning_rate": 7.1260178986803415e-06, "loss": 0.0046, "step": 9932 }, { "epoch": 2.42, "learning_rate": 7.122238617929596e-06, "loss": 0.0123, "step": 9934 }, { "epoch": 2.42, "learning_rate": 7.118459785268301e-06, "loss": 0.0125, "step": 9936 }, { "epoch": 2.42, "learning_rate": 7.114681401284848e-06, "loss": 0.0191, "step": 9938 }, { "epoch": 2.42, "learning_rate": 7.110903466567567e-06, "loss": 0.0147, "step": 9940 }, { "epoch": 2.42, "learning_rate": 7.10712598170471e-06, "loss": 0.0215, "step": 9942 }, { "epoch": 2.42, "learning_rate": 7.1033489472844566e-06, "loss": 0.0156, "step": 9944 }, { "epoch": 2.42, "learning_rate": 7.0995723638949195e-06, "loss": 0.0105, "step": 9946 }, { "epoch": 2.42, "learning_rate": 7.095796232124148e-06, "loss": 0.0089, "step": 9948 }, { "epoch": 2.42, "learning_rate": 7.092020552560111e-06, "loss": 0.013, "step": 9950 }, { "epoch": 2.42, "learning_rate": 7.088245325790705e-06, "loss": 0.0085, "step": 9952 }, { "epoch": 2.43, "learning_rate": 7.084470552403769e-06, "loss": 0.0188, "step": 9954 }, { "epoch": 2.43, "learning_rate": 7.080696232987063e-06, "loss": 0.0135, "step": 9956 }, { "epoch": 2.43, "learning_rate": 7.07692236812828e-06, "loss": 0.0071, "step": 9958 }, { "epoch": 2.43, "learning_rate": 7.07314895841503e-06, "loss": 0.0109, "step": 9960 }, { "epoch": 2.43, "learning_rate": 7.0693760044348695e-06, "loss": 0.0116, "step": 9962 }, { "epoch": 2.43, "learning_rate": 7.065603506775276e-06, "loss": 0.0155, "step": 9964 }, { "epoch": 2.43, "learning_rate": 7.061831466023656e-06, "loss": 0.0096, "step": 9966 }, { "epoch": 2.43, "learning_rate": 7.058059882767341e-06, "loss": 0.0105, "step": 9968 }, { "epoch": 2.43, "learning_rate": 7.054288757593599e-06, "loss": 0.0113, "step": 9970 }, { "epoch": 2.43, "learning_rate": 7.050518091089621e-06, "loss": 0.0111, "step": 9972 }, { "epoch": 2.43, "learning_rate": 7.046747883842534e-06, "loss": 0.0068, "step": 9974 }, { "epoch": 2.43, "learning_rate": 7.04297813643938e-06, "loss": 0.0163, "step": 9976 }, { "epoch": 2.43, "learning_rate": 7.039208849467143e-06, "loss": 0.0243, "step": 9978 }, { "epoch": 2.43, "learning_rate": 7.0354400235127264e-06, "loss": 0.01, "step": 9980 }, { "epoch": 2.43, "learning_rate": 7.0316716591629706e-06, "loss": 0.0097, "step": 9982 }, { "epoch": 2.43, "learning_rate": 7.0279037570046325e-06, "loss": 0.0091, "step": 9984 }, { "epoch": 2.43, "learning_rate": 7.024136317624407e-06, "loss": 0.0141, "step": 9986 }, { "epoch": 2.43, "learning_rate": 7.02036934160891e-06, "loss": 0.0088, "step": 9988 }, { "epoch": 2.43, "learning_rate": 7.016602829544696e-06, "loss": 0.0174, "step": 9990 }, { "epoch": 2.43, "learning_rate": 7.012836782018232e-06, "loss": 0.0346, "step": 9992 }, { "epoch": 2.44, "learning_rate": 7.00907119961592e-06, "loss": 0.0124, "step": 9994 }, { "epoch": 2.44, "learning_rate": 7.005306082924094e-06, "loss": 0.0128, "step": 9996 }, { "epoch": 2.44, "learning_rate": 7.001541432529013e-06, "loss": 0.0114, "step": 9998 }, { "epoch": 2.44, "learning_rate": 6.9977772490168594e-06, "loss": 0.0196, "step": 10000 }, { "epoch": 2.44, "learning_rate": 6.9940135329737404e-06, "loss": 0.0138, "step": 10002 }, { "epoch": 2.44, "learning_rate": 6.9902502849857036e-06, "loss": 0.0127, "step": 10004 }, { "epoch": 2.44, "learning_rate": 6.98648750563871e-06, "loss": 0.0162, "step": 10006 }, { "epoch": 2.44, "learning_rate": 6.982725195518658e-06, "loss": 0.016, "step": 10008 }, { "epoch": 2.44, "learning_rate": 6.978963355211361e-06, "loss": 0.0138, "step": 10010 }, { "epoch": 2.44, "learning_rate": 6.975201985302573e-06, "loss": 0.0205, "step": 10012 }, { "epoch": 2.44, "learning_rate": 6.971441086377968e-06, "loss": 0.0087, "step": 10014 }, { "epoch": 2.44, "learning_rate": 6.96768065902314e-06, "loss": 0.0135, "step": 10016 }, { "epoch": 2.44, "learning_rate": 6.963920703823619e-06, "loss": 0.0127, "step": 10018 }, { "epoch": 2.44, "learning_rate": 6.960161221364864e-06, "loss": 0.0092, "step": 10020 }, { "epoch": 2.44, "learning_rate": 6.956402212232254e-06, "loss": 0.0209, "step": 10022 }, { "epoch": 2.44, "learning_rate": 6.952643677011086e-06, "loss": 0.0194, "step": 10024 }, { "epoch": 2.44, "learning_rate": 6.948885616286605e-06, "loss": 0.0136, "step": 10026 }, { "epoch": 2.44, "learning_rate": 6.945128030643959e-06, "loss": 0.0159, "step": 10028 }, { "epoch": 2.44, "learning_rate": 6.941370920668244e-06, "loss": 0.0192, "step": 10030 }, { "epoch": 2.44, "learning_rate": 6.937614286944461e-06, "loss": 0.019, "step": 10032 }, { "epoch": 2.44, "learning_rate": 6.933858130057553e-06, "loss": 0.0139, "step": 10034 }, { "epoch": 2.45, "learning_rate": 6.930102450592376e-06, "loss": 0.0094, "step": 10036 }, { "epoch": 2.45, "learning_rate": 6.926347249133727e-06, "loss": 0.0107, "step": 10038 }, { "epoch": 2.45, "learning_rate": 6.922592526266312e-06, "loss": 0.0154, "step": 10040 }, { "epoch": 2.45, "learning_rate": 6.9188382825747715e-06, "loss": 0.0082, "step": 10042 }, { "epoch": 2.45, "learning_rate": 6.915084518643671e-06, "loss": 0.0134, "step": 10044 }, { "epoch": 2.45, "learning_rate": 6.9113312350575035e-06, "loss": 0.0054, "step": 10046 }, { "epoch": 2.45, "learning_rate": 6.907578432400679e-06, "loss": 0.0104, "step": 10048 }, { "epoch": 2.45, "learning_rate": 6.903826111257536e-06, "loss": 0.0139, "step": 10050 }, { "epoch": 2.45, "learning_rate": 6.9000742722123445e-06, "loss": 0.0105, "step": 10052 }, { "epoch": 2.45, "learning_rate": 6.896322915849298e-06, "loss": 0.0087, "step": 10054 }, { "epoch": 2.45, "learning_rate": 6.892572042752501e-06, "loss": 0.0124, "step": 10056 }, { "epoch": 2.45, "learning_rate": 6.8888216535059985e-06, "loss": 0.0083, "step": 10058 }, { "epoch": 2.45, "learning_rate": 6.885071748693755e-06, "loss": 0.0198, "step": 10060 }, { "epoch": 2.45, "learning_rate": 6.881322328899661e-06, "loss": 0.0112, "step": 10062 }, { "epoch": 2.45, "learning_rate": 6.8775733947075265e-06, "loss": 0.0087, "step": 10064 }, { "epoch": 2.45, "learning_rate": 6.873824946701089e-06, "loss": 0.0084, "step": 10066 }, { "epoch": 2.45, "learning_rate": 6.8700769854640135e-06, "loss": 0.0107, "step": 10068 }, { "epoch": 2.45, "learning_rate": 6.866329511579888e-06, "loss": 0.0079, "step": 10070 }, { "epoch": 2.45, "learning_rate": 6.862582525632215e-06, "loss": 0.0123, "step": 10072 }, { "epoch": 2.45, "learning_rate": 6.858836028204434e-06, "loss": 0.0209, "step": 10074 }, { "epoch": 2.46, "learning_rate": 6.855090019879904e-06, "loss": 0.0169, "step": 10076 }, { "epoch": 2.46, "learning_rate": 6.851344501241908e-06, "loss": 0.0159, "step": 10078 }, { "epoch": 2.46, "learning_rate": 6.847599472873646e-06, "loss": 0.0178, "step": 10080 }, { "epoch": 2.46, "learning_rate": 6.843854935358252e-06, "loss": 0.0126, "step": 10082 }, { "epoch": 2.46, "learning_rate": 6.84011088927878e-06, "loss": 0.0099, "step": 10084 }, { "epoch": 2.46, "learning_rate": 6.836367335218206e-06, "loss": 0.0093, "step": 10086 }, { "epoch": 2.46, "learning_rate": 6.832624273759428e-06, "loss": 0.0159, "step": 10088 }, { "epoch": 2.46, "learning_rate": 6.8288817054852685e-06, "loss": 0.0127, "step": 10090 }, { "epoch": 2.46, "learning_rate": 6.8251396309784764e-06, "loss": 0.0093, "step": 10092 }, { "epoch": 2.46, "learning_rate": 6.8213980508217235e-06, "loss": 0.0197, "step": 10094 }, { "epoch": 2.46, "learning_rate": 6.817656965597597e-06, "loss": 0.0064, "step": 10096 }, { "epoch": 2.46, "learning_rate": 6.813916375888617e-06, "loss": 0.0117, "step": 10098 }, { "epoch": 2.46, "learning_rate": 6.8101762822772176e-06, "loss": 0.0255, "step": 10100 }, { "epoch": 2.46, "learning_rate": 6.806436685345768e-06, "loss": 0.0069, "step": 10102 }, { "epoch": 2.46, "learning_rate": 6.802697585676543e-06, "loss": 0.0172, "step": 10104 }, { "epoch": 2.46, "learning_rate": 6.798958983851751e-06, "loss": 0.0057, "step": 10106 }, { "epoch": 2.46, "learning_rate": 6.795220880453521e-06, "loss": 0.0053, "step": 10108 }, { "epoch": 2.46, "learning_rate": 6.791483276063913e-06, "loss": 0.0086, "step": 10110 }, { "epoch": 2.46, "learning_rate": 6.7877461712648885e-06, "loss": 0.0133, "step": 10112 }, { "epoch": 2.46, "learning_rate": 6.784009566638348e-06, "loss": 0.0176, "step": 10114 }, { "epoch": 2.46, "learning_rate": 6.780273462766107e-06, "loss": 0.0153, "step": 10116 }, { "epoch": 2.47, "learning_rate": 6.776537860229915e-06, "loss": 0.0103, "step": 10118 }, { "epoch": 2.47, "learning_rate": 6.772802759611423e-06, "loss": 0.011, "step": 10120 }, { "epoch": 2.47, "learning_rate": 6.769068161492217e-06, "loss": 0.0133, "step": 10122 }, { "epoch": 2.47, "learning_rate": 6.7653340664538055e-06, "loss": 0.0054, "step": 10124 }, { "epoch": 2.47, "learning_rate": 6.761600475077618e-06, "loss": 0.0069, "step": 10126 }, { "epoch": 2.47, "learning_rate": 6.757867387944994e-06, "loss": 0.0104, "step": 10128 }, { "epoch": 2.47, "learning_rate": 6.7541348056372095e-06, "loss": 0.0124, "step": 10130 }, { "epoch": 2.47, "learning_rate": 6.750402728735457e-06, "loss": 0.0179, "step": 10132 }, { "epoch": 2.47, "learning_rate": 6.7466711578208524e-06, "loss": 0.0147, "step": 10134 }, { "epoch": 2.47, "learning_rate": 6.74294009347442e-06, "loss": 0.0161, "step": 10136 }, { "epoch": 2.47, "learning_rate": 6.73920953627712e-06, "loss": 0.0139, "step": 10138 }, { "epoch": 2.47, "learning_rate": 6.735479486809832e-06, "loss": 0.0109, "step": 10140 }, { "epoch": 2.47, "learning_rate": 6.731749945653352e-06, "loss": 0.0101, "step": 10142 }, { "epoch": 2.47, "learning_rate": 6.728020913388393e-06, "loss": 0.0125, "step": 10144 }, { "epoch": 2.47, "learning_rate": 6.7242923905956e-06, "loss": 0.0073, "step": 10146 }, { "epoch": 2.47, "learning_rate": 6.720564377855527e-06, "loss": 0.0079, "step": 10148 }, { "epoch": 2.47, "learning_rate": 6.716836875748663e-06, "loss": 0.0185, "step": 10150 }, { "epoch": 2.47, "learning_rate": 6.713109884855397e-06, "loss": 0.0111, "step": 10152 }, { "epoch": 2.47, "learning_rate": 6.709383405756058e-06, "loss": 0.0103, "step": 10154 }, { "epoch": 2.47, "learning_rate": 6.705657439030888e-06, "loss": 0.0181, "step": 10156 }, { "epoch": 2.48, "learning_rate": 6.7019319852600396e-06, "loss": 0.0216, "step": 10158 }, { "epoch": 2.48, "learning_rate": 6.6982070450236014e-06, "loss": 0.0158, "step": 10160 }, { "epoch": 2.48, "learning_rate": 6.694482618901575e-06, "loss": 0.0095, "step": 10162 }, { "epoch": 2.48, "learning_rate": 6.690758707473883e-06, "loss": 0.0123, "step": 10164 }, { "epoch": 2.48, "learning_rate": 6.687035311320363e-06, "loss": 0.0069, "step": 10166 }, { "epoch": 2.48, "learning_rate": 6.6833124310207785e-06, "loss": 0.0223, "step": 10168 }, { "epoch": 2.48, "learning_rate": 6.679590067154807e-06, "loss": 0.008, "step": 10170 }, { "epoch": 2.48, "learning_rate": 6.675868220302057e-06, "loss": 0.029, "step": 10172 }, { "epoch": 2.48, "learning_rate": 6.672146891042041e-06, "loss": 0.006, "step": 10174 }, { "epoch": 2.48, "learning_rate": 6.668426079954201e-06, "loss": 0.0089, "step": 10176 }, { "epoch": 2.48, "learning_rate": 6.664705787617894e-06, "loss": 0.0099, "step": 10178 }, { "epoch": 2.48, "learning_rate": 6.660986014612405e-06, "loss": 0.0066, "step": 10180 }, { "epoch": 2.48, "learning_rate": 6.65726676151692e-06, "loss": 0.0062, "step": 10182 }, { "epoch": 2.48, "learning_rate": 6.653548028910565e-06, "loss": 0.0174, "step": 10184 }, { "epoch": 2.48, "learning_rate": 6.649829817372369e-06, "loss": 0.0059, "step": 10186 }, { "epoch": 2.48, "learning_rate": 6.64611212748129e-06, "loss": 0.0071, "step": 10188 }, { "epoch": 2.48, "learning_rate": 6.642394959816198e-06, "loss": 0.0125, "step": 10190 }, { "epoch": 2.48, "learning_rate": 6.638678314955882e-06, "loss": 0.0108, "step": 10192 }, { "epoch": 2.48, "learning_rate": 6.634962193479057e-06, "loss": 0.0113, "step": 10194 }, { "epoch": 2.48, "learning_rate": 6.631246595964354e-06, "loss": 0.0139, "step": 10196 }, { "epoch": 2.48, "learning_rate": 6.627531522990311e-06, "loss": 0.0217, "step": 10198 }, { "epoch": 2.49, "learning_rate": 6.623816975135398e-06, "loss": 0.0177, "step": 10200 }, { "epoch": 2.49, "learning_rate": 6.620102952978e-06, "loss": 0.0096, "step": 10202 }, { "epoch": 2.49, "learning_rate": 6.616389457096417e-06, "loss": 0.01, "step": 10204 }, { "epoch": 2.49, "learning_rate": 6.61267648806887e-06, "loss": 0.0123, "step": 10206 }, { "epoch": 2.49, "learning_rate": 6.608964046473491e-06, "loss": 0.0145, "step": 10208 }, { "epoch": 2.49, "learning_rate": 6.605252132888345e-06, "loss": 0.0123, "step": 10210 }, { "epoch": 2.49, "learning_rate": 6.6015407478914e-06, "loss": 0.0123, "step": 10212 }, { "epoch": 2.49, "learning_rate": 6.597829892060544e-06, "loss": 0.0151, "step": 10214 }, { "epoch": 2.49, "learning_rate": 6.594119565973589e-06, "loss": 0.0147, "step": 10216 }, { "epoch": 2.49, "learning_rate": 6.5904097702082635e-06, "loss": 0.0127, "step": 10218 }, { "epoch": 2.49, "learning_rate": 6.586700505342212e-06, "loss": 0.0095, "step": 10220 }, { "epoch": 2.49, "learning_rate": 6.582991771952986e-06, "loss": 0.0146, "step": 10222 }, { "epoch": 2.49, "learning_rate": 6.5792835706180725e-06, "loss": 0.0133, "step": 10224 }, { "epoch": 2.49, "learning_rate": 6.5755759019148616e-06, "loss": 0.0155, "step": 10226 }, { "epoch": 2.49, "learning_rate": 6.571868766420672e-06, "loss": 0.0127, "step": 10228 }, { "epoch": 2.49, "learning_rate": 6.568162164712727e-06, "loss": 0.0148, "step": 10230 }, { "epoch": 2.49, "learning_rate": 6.564456097368176e-06, "loss": 0.0095, "step": 10232 }, { "epoch": 2.49, "learning_rate": 6.560750564964079e-06, "loss": 0.0073, "step": 10234 }, { "epoch": 2.49, "learning_rate": 6.557045568077422e-06, "loss": 0.0126, "step": 10236 }, { "epoch": 2.49, "learning_rate": 6.553341107285092e-06, "loss": 0.0202, "step": 10238 }, { "epoch": 2.5, "learning_rate": 6.549637183163911e-06, "loss": 0.0122, "step": 10240 }, { "epoch": 2.5, "learning_rate": 6.5459337962906e-06, "loss": 0.0146, "step": 10242 }, { "epoch": 2.5, "learning_rate": 6.542230947241815e-06, "loss": 0.0085, "step": 10244 }, { "epoch": 2.5, "learning_rate": 6.538528636594108e-06, "loss": 0.0157, "step": 10246 }, { "epoch": 2.5, "learning_rate": 6.53482686492396e-06, "loss": 0.0123, "step": 10248 }, { "epoch": 2.5, "learning_rate": 6.531125632807767e-06, "loss": 0.012, "step": 10250 }, { "epoch": 2.5, "learning_rate": 6.52742494082184e-06, "loss": 0.0161, "step": 10252 }, { "epoch": 2.5, "learning_rate": 6.523724789542404e-06, "loss": 0.0084, "step": 10254 }, { "epoch": 2.5, "learning_rate": 6.520025179545597e-06, "loss": 0.0177, "step": 10256 }, { "epoch": 2.5, "learning_rate": 6.516326111407478e-06, "loss": 0.015, "step": 10258 }, { "epoch": 2.5, "learning_rate": 6.512627585704028e-06, "loss": 0.008, "step": 10260 }, { "epoch": 2.5, "learning_rate": 6.508929603011127e-06, "loss": 0.0166, "step": 10262 }, { "epoch": 2.5, "learning_rate": 6.50523216390458e-06, "loss": 0.0187, "step": 10264 }, { "epoch": 2.5, "learning_rate": 6.501535268960109e-06, "loss": 0.0083, "step": 10266 }, { "epoch": 2.5, "learning_rate": 6.497838918753352e-06, "loss": 0.0102, "step": 10268 }, { "epoch": 2.5, "learning_rate": 6.49414311385985e-06, "loss": 0.0068, "step": 10270 }, { "epoch": 2.5, "learning_rate": 6.4904478548550746e-06, "loss": 0.009, "step": 10272 }, { "epoch": 2.5, "learning_rate": 6.486753142314406e-06, "loss": 0.015, "step": 10274 }, { "epoch": 2.5, "learning_rate": 6.483058976813139e-06, "loss": 0.0126, "step": 10276 }, { "epoch": 2.5, "learning_rate": 6.479365358926477e-06, "loss": 0.0074, "step": 10278 }, { "epoch": 2.5, "learning_rate": 6.475672289229555e-06, "loss": 0.0134, "step": 10280 }, { "epoch": 2.51, "learning_rate": 6.471979768297402e-06, "loss": 0.0106, "step": 10282 }, { "epoch": 2.51, "learning_rate": 6.468287796704982e-06, "loss": 0.0104, "step": 10284 }, { "epoch": 2.51, "learning_rate": 6.4645963750271525e-06, "loss": 0.0093, "step": 10286 }, { "epoch": 2.51, "learning_rate": 6.4609055038387045e-06, "loss": 0.0043, "step": 10288 }, { "epoch": 2.51, "learning_rate": 6.4572151837143295e-06, "loss": 0.0154, "step": 10290 }, { "epoch": 2.51, "learning_rate": 6.453525415228645e-06, "loss": 0.0152, "step": 10292 }, { "epoch": 2.51, "learning_rate": 6.449836198956168e-06, "loss": 0.0079, "step": 10294 }, { "epoch": 2.51, "learning_rate": 6.4461475354713435e-06, "loss": 0.0161, "step": 10296 }, { "epoch": 2.51, "learning_rate": 6.442459425348522e-06, "loss": 0.0104, "step": 10298 }, { "epoch": 2.51, "learning_rate": 6.4387718691619735e-06, "loss": 0.0116, "step": 10300 }, { "epoch": 2.51, "learning_rate": 6.435084867485875e-06, "loss": 0.0076, "step": 10302 }, { "epoch": 2.51, "learning_rate": 6.431398420894322e-06, "loss": 0.0146, "step": 10304 }, { "epoch": 2.51, "learning_rate": 6.42771252996132e-06, "loss": 0.0126, "step": 10306 }, { "epoch": 2.51, "learning_rate": 6.424027195260798e-06, "loss": 0.0124, "step": 10308 }, { "epoch": 2.51, "learning_rate": 6.420342417366584e-06, "loss": 0.0017, "step": 10310 }, { "epoch": 2.51, "learning_rate": 6.416658196852426e-06, "loss": 0.0137, "step": 10312 }, { "epoch": 2.51, "learning_rate": 6.412974534291988e-06, "loss": 0.0175, "step": 10314 }, { "epoch": 2.51, "learning_rate": 6.409291430258847e-06, "loss": 0.0082, "step": 10316 }, { "epoch": 2.51, "learning_rate": 6.405608885326486e-06, "loss": 0.0068, "step": 10318 }, { "epoch": 2.51, "learning_rate": 6.401926900068304e-06, "loss": 0.0082, "step": 10320 }, { "epoch": 2.52, "learning_rate": 6.3982454750576185e-06, "loss": 0.0104, "step": 10322 }, { "epoch": 2.52, "learning_rate": 6.394564610867656e-06, "loss": 0.0115, "step": 10324 }, { "epoch": 2.52, "learning_rate": 6.39088430807155e-06, "loss": 0.0149, "step": 10326 }, { "epoch": 2.52, "learning_rate": 6.3872045672423545e-06, "loss": 0.0098, "step": 10328 }, { "epoch": 2.52, "learning_rate": 6.383525388953036e-06, "loss": 0.0144, "step": 10330 }, { "epoch": 2.52, "learning_rate": 6.379846773776469e-06, "loss": 0.0182, "step": 10332 }, { "epoch": 2.52, "learning_rate": 6.376168722285438e-06, "loss": 0.0096, "step": 10334 }, { "epoch": 2.52, "learning_rate": 6.3724912350526465e-06, "loss": 0.0189, "step": 10336 }, { "epoch": 2.52, "learning_rate": 6.368814312650712e-06, "loss": 0.0121, "step": 10338 }, { "epoch": 2.52, "learning_rate": 6.365137955652156e-06, "loss": 0.0111, "step": 10340 }, { "epoch": 2.52, "learning_rate": 6.36146216462941e-06, "loss": 0.0099, "step": 10342 }, { "epoch": 2.52, "learning_rate": 6.357786940154832e-06, "loss": 0.0058, "step": 10344 }, { "epoch": 2.52, "learning_rate": 6.354112282800675e-06, "loss": 0.0102, "step": 10346 }, { "epoch": 2.52, "learning_rate": 6.350438193139118e-06, "loss": 0.0128, "step": 10348 }, { "epoch": 2.52, "learning_rate": 6.346764671742238e-06, "loss": 0.016, "step": 10350 }, { "epoch": 2.52, "learning_rate": 6.3430917191820354e-06, "loss": 0.0173, "step": 10352 }, { "epoch": 2.52, "learning_rate": 6.339419336030412e-06, "loss": 0.009, "step": 10354 }, { "epoch": 2.52, "learning_rate": 6.335747522859195e-06, "loss": 0.01, "step": 10356 }, { "epoch": 2.52, "learning_rate": 6.332076280240103e-06, "loss": 0.0157, "step": 10358 }, { "epoch": 2.52, "learning_rate": 6.328405608744783e-06, "loss": 0.0083, "step": 10360 }, { "epoch": 2.52, "learning_rate": 6.324735508944783e-06, "loss": 0.0108, "step": 10362 }, { "epoch": 2.53, "learning_rate": 6.321065981411574e-06, "loss": 0.0125, "step": 10364 }, { "epoch": 2.53, "learning_rate": 6.317397026716519e-06, "loss": 0.0059, "step": 10366 }, { "epoch": 2.53, "learning_rate": 6.313728645430906e-06, "loss": 0.0078, "step": 10368 }, { "epoch": 2.53, "learning_rate": 6.310060838125929e-06, "loss": 0.0077, "step": 10370 }, { "epoch": 2.53, "learning_rate": 6.3063936053726994e-06, "loss": 0.0148, "step": 10372 }, { "epoch": 2.53, "learning_rate": 6.302726947742228e-06, "loss": 0.0064, "step": 10374 }, { "epoch": 2.53, "learning_rate": 6.29906086580544e-06, "loss": 0.0113, "step": 10376 }, { "epoch": 2.53, "learning_rate": 6.295395360133177e-06, "loss": 0.0119, "step": 10378 }, { "epoch": 2.53, "learning_rate": 6.2917304312961865e-06, "loss": 0.0064, "step": 10380 }, { "epoch": 2.53, "learning_rate": 6.288066079865121e-06, "loss": 0.0118, "step": 10382 }, { "epoch": 2.53, "learning_rate": 6.2844023064105484e-06, "loss": 0.0148, "step": 10384 }, { "epoch": 2.53, "learning_rate": 6.280739111502951e-06, "loss": 0.0096, "step": 10386 }, { "epoch": 2.53, "learning_rate": 6.277076495712718e-06, "loss": 0.0071, "step": 10388 }, { "epoch": 2.53, "learning_rate": 6.2734144596101364e-06, "loss": 0.0129, "step": 10390 }, { "epoch": 2.53, "learning_rate": 6.26975300376542e-06, "loss": 0.0076, "step": 10392 }, { "epoch": 2.53, "learning_rate": 6.266092128748687e-06, "loss": 0.0062, "step": 10394 }, { "epoch": 2.53, "learning_rate": 6.262431835129963e-06, "loss": 0.0171, "step": 10396 }, { "epoch": 2.53, "learning_rate": 6.2587721234791774e-06, "loss": 0.0168, "step": 10398 }, { "epoch": 2.53, "learning_rate": 6.2551129943661825e-06, "loss": 0.0126, "step": 10400 }, { "epoch": 2.53, "learning_rate": 6.251454448360729e-06, "loss": 0.0073, "step": 10402 }, { "epoch": 2.54, "learning_rate": 6.2477964860324844e-06, "loss": 0.0189, "step": 10404 }, { "epoch": 2.54, "learning_rate": 6.244139107951017e-06, "loss": 0.0055, "step": 10406 }, { "epoch": 2.54, "learning_rate": 6.240482314685811e-06, "loss": 0.0117, "step": 10408 }, { "epoch": 2.54, "learning_rate": 6.236826106806253e-06, "loss": 0.0074, "step": 10410 }, { "epoch": 2.54, "learning_rate": 6.233170484881652e-06, "loss": 0.0185, "step": 10412 }, { "epoch": 2.54, "learning_rate": 6.229515449481205e-06, "loss": 0.0093, "step": 10414 }, { "epoch": 2.54, "learning_rate": 6.225861001174036e-06, "loss": 0.0148, "step": 10416 }, { "epoch": 2.54, "learning_rate": 6.222207140529167e-06, "loss": 0.0112, "step": 10418 }, { "epoch": 2.54, "learning_rate": 6.218553868115538e-06, "loss": 0.0103, "step": 10420 }, { "epoch": 2.54, "learning_rate": 6.214901184501984e-06, "loss": 0.0092, "step": 10422 }, { "epoch": 2.54, "learning_rate": 6.211249090257259e-06, "loss": 0.0095, "step": 10424 }, { "epoch": 2.54, "learning_rate": 6.20759758595002e-06, "loss": 0.0123, "step": 10426 }, { "epoch": 2.54, "learning_rate": 6.2039466721488405e-06, "loss": 0.0098, "step": 10428 }, { "epoch": 2.54, "learning_rate": 6.20029634942219e-06, "loss": 0.0106, "step": 10430 }, { "epoch": 2.54, "learning_rate": 6.19664661833845e-06, "loss": 0.0074, "step": 10432 }, { "epoch": 2.54, "learning_rate": 6.192997479465914e-06, "loss": 0.015, "step": 10434 }, { "epoch": 2.54, "learning_rate": 6.189348933372787e-06, "loss": 0.0095, "step": 10436 }, { "epoch": 2.54, "learning_rate": 6.185700980627167e-06, "loss": 0.0063, "step": 10438 }, { "epoch": 2.54, "learning_rate": 6.182053621797068e-06, "loss": 0.0142, "step": 10440 }, { "epoch": 2.54, "learning_rate": 6.178406857450417e-06, "loss": 0.0157, "step": 10442 }, { "epoch": 2.54, "learning_rate": 6.174760688155044e-06, "loss": 0.0135, "step": 10444 }, { "epoch": 2.55, "learning_rate": 6.171115114478677e-06, "loss": 0.0107, "step": 10446 }, { "epoch": 2.55, "learning_rate": 6.167470136988964e-06, "loss": 0.003, "step": 10448 }, { "epoch": 2.55, "learning_rate": 6.163825756253461e-06, "loss": 0.0075, "step": 10450 }, { "epoch": 2.55, "learning_rate": 6.1601819728396216e-06, "loss": 0.0155, "step": 10452 }, { "epoch": 2.55, "learning_rate": 6.156538787314808e-06, "loss": 0.0164, "step": 10454 }, { "epoch": 2.55, "learning_rate": 6.152896200246297e-06, "loss": 0.01, "step": 10456 }, { "epoch": 2.55, "learning_rate": 6.149254212201261e-06, "loss": 0.0061, "step": 10458 }, { "epoch": 2.55, "learning_rate": 6.145612823746795e-06, "loss": 0.0114, "step": 10460 }, { "epoch": 2.55, "learning_rate": 6.141972035449881e-06, "loss": 0.0159, "step": 10462 }, { "epoch": 2.55, "learning_rate": 6.1383318478774255e-06, "loss": 0.0092, "step": 10464 }, { "epoch": 2.55, "learning_rate": 6.134692261596227e-06, "loss": 0.0128, "step": 10466 }, { "epoch": 2.55, "learning_rate": 6.131053277173003e-06, "loss": 0.0094, "step": 10468 }, { "epoch": 2.55, "learning_rate": 6.127414895174366e-06, "loss": 0.0189, "step": 10470 }, { "epoch": 2.55, "learning_rate": 6.123777116166844e-06, "loss": 0.0069, "step": 10472 }, { "epoch": 2.55, "learning_rate": 6.120139940716862e-06, "loss": 0.0061, "step": 10474 }, { "epoch": 2.55, "learning_rate": 6.116503369390764e-06, "loss": 0.017, "step": 10476 }, { "epoch": 2.55, "learning_rate": 6.112867402754785e-06, "loss": 0.0162, "step": 10478 }, { "epoch": 2.55, "learning_rate": 6.1092320413750725e-06, "loss": 0.006, "step": 10480 }, { "epoch": 2.55, "learning_rate": 6.105597285817682e-06, "loss": 0.0069, "step": 10482 }, { "epoch": 2.55, "learning_rate": 6.1019631366485785e-06, "loss": 0.0195, "step": 10484 }, { "epoch": 2.56, "learning_rate": 6.09832959443362e-06, "loss": 0.0135, "step": 10486 }, { "epoch": 2.56, "learning_rate": 6.094696659738575e-06, "loss": 0.0094, "step": 10488 }, { "epoch": 2.56, "learning_rate": 6.091064333129123e-06, "loss": 0.0122, "step": 10490 }, { "epoch": 2.56, "learning_rate": 6.087432615170849e-06, "loss": 0.0074, "step": 10492 }, { "epoch": 2.56, "learning_rate": 6.0838015064292325e-06, "loss": 0.0127, "step": 10494 }, { "epoch": 2.56, "learning_rate": 6.080171007469664e-06, "loss": 0.0098, "step": 10496 }, { "epoch": 2.56, "learning_rate": 6.076541118857448e-06, "loss": 0.0142, "step": 10498 }, { "epoch": 2.56, "learning_rate": 6.07291184115778e-06, "loss": 0.0136, "step": 10500 }, { "epoch": 2.56, "learning_rate": 6.069283174935766e-06, "loss": 0.0149, "step": 10502 }, { "epoch": 2.56, "learning_rate": 6.065655120756417e-06, "loss": 0.0119, "step": 10504 }, { "epoch": 2.56, "learning_rate": 6.062027679184653e-06, "loss": 0.0124, "step": 10506 }, { "epoch": 2.56, "learning_rate": 6.058400850785293e-06, "loss": 0.0088, "step": 10508 }, { "epoch": 2.56, "learning_rate": 6.054774636123058e-06, "loss": 0.0107, "step": 10510 }, { "epoch": 2.56, "learning_rate": 6.051149035762578e-06, "loss": 0.013, "step": 10512 }, { "epoch": 2.56, "learning_rate": 6.047524050268392e-06, "loss": 0.0104, "step": 10514 }, { "epoch": 2.56, "learning_rate": 6.043899680204937e-06, "loss": 0.0077, "step": 10516 }, { "epoch": 2.56, "learning_rate": 6.040275926136547e-06, "loss": 0.0138, "step": 10518 }, { "epoch": 2.56, "learning_rate": 6.036652788627477e-06, "loss": 0.0157, "step": 10520 }, { "epoch": 2.56, "learning_rate": 6.033030268241871e-06, "loss": 0.0064, "step": 10522 }, { "epoch": 2.56, "learning_rate": 6.029408365543792e-06, "loss": 0.0137, "step": 10524 }, { "epoch": 2.56, "learning_rate": 6.025787081097188e-06, "loss": 0.0159, "step": 10526 }, { "epoch": 2.57, "learning_rate": 6.022166415465925e-06, "loss": 0.0268, "step": 10528 }, { "epoch": 2.57, "learning_rate": 6.0185463692137666e-06, "loss": 0.0108, "step": 10530 }, { "epoch": 2.57, "learning_rate": 6.014926942904388e-06, "loss": 0.0054, "step": 10532 }, { "epoch": 2.57, "learning_rate": 6.011308137101355e-06, "loss": 0.0074, "step": 10534 }, { "epoch": 2.57, "learning_rate": 6.007689952368144e-06, "loss": 0.0118, "step": 10536 }, { "epoch": 2.57, "learning_rate": 6.004072389268134e-06, "loss": 0.0085, "step": 10538 }, { "epoch": 2.57, "learning_rate": 6.0004554483646135e-06, "loss": 0.0085, "step": 10540 }, { "epoch": 2.57, "learning_rate": 5.996839130220761e-06, "loss": 0.0138, "step": 10542 }, { "epoch": 2.57, "learning_rate": 5.993223435399663e-06, "loss": 0.0068, "step": 10544 }, { "epoch": 2.57, "learning_rate": 5.989608364464317e-06, "loss": 0.0113, "step": 10546 }, { "epoch": 2.57, "learning_rate": 5.9859939179776164e-06, "loss": 0.0078, "step": 10548 }, { "epoch": 2.57, "learning_rate": 5.982380096502355e-06, "loss": 0.0091, "step": 10550 }, { "epoch": 2.57, "learning_rate": 5.978766900601232e-06, "loss": 0.0098, "step": 10552 }, { "epoch": 2.57, "learning_rate": 5.975154330836854e-06, "loss": 0.0102, "step": 10554 }, { "epoch": 2.57, "learning_rate": 5.971542387771725e-06, "loss": 0.0094, "step": 10556 }, { "epoch": 2.57, "learning_rate": 5.967931071968246e-06, "loss": 0.0062, "step": 10558 }, { "epoch": 2.57, "learning_rate": 5.964320383988731e-06, "loss": 0.017, "step": 10560 }, { "epoch": 2.57, "learning_rate": 5.960710324395394e-06, "loss": 0.0115, "step": 10562 }, { "epoch": 2.57, "learning_rate": 5.957100893750349e-06, "loss": 0.0073, "step": 10564 }, { "epoch": 2.57, "learning_rate": 5.9534920926156044e-06, "loss": 0.0121, "step": 10566 }, { "epoch": 2.58, "learning_rate": 5.9498839215530846e-06, "loss": 0.0122, "step": 10568 }, { "epoch": 2.58, "learning_rate": 5.9462763811246095e-06, "loss": 0.0111, "step": 10570 }, { "epoch": 2.58, "learning_rate": 5.9426694718919e-06, "loss": 0.0117, "step": 10572 }, { "epoch": 2.58, "learning_rate": 5.9390631944165764e-06, "loss": 0.0084, "step": 10574 }, { "epoch": 2.58, "learning_rate": 5.935457549260167e-06, "loss": 0.0136, "step": 10576 }, { "epoch": 2.58, "learning_rate": 5.931852536984096e-06, "loss": 0.009, "step": 10578 }, { "epoch": 2.58, "learning_rate": 5.928248158149697e-06, "loss": 0.0111, "step": 10580 }, { "epoch": 2.58, "learning_rate": 5.9246444133181905e-06, "loss": 0.0069, "step": 10582 }, { "epoch": 2.58, "learning_rate": 5.921041303050713e-06, "loss": 0.0072, "step": 10584 }, { "epoch": 2.58, "learning_rate": 5.917438827908293e-06, "loss": 0.0122, "step": 10586 }, { "epoch": 2.58, "learning_rate": 5.913836988451871e-06, "loss": 0.0061, "step": 10588 }, { "epoch": 2.58, "learning_rate": 5.9102357852422695e-06, "loss": 0.0112, "step": 10590 }, { "epoch": 2.58, "learning_rate": 5.906635218840233e-06, "loss": 0.0127, "step": 10592 }, { "epoch": 2.58, "learning_rate": 5.903035289806389e-06, "loss": 0.0091, "step": 10594 }, { "epoch": 2.58, "learning_rate": 5.899435998701285e-06, "loss": 0.0106, "step": 10596 }, { "epoch": 2.58, "learning_rate": 5.895837346085349e-06, "loss": 0.0122, "step": 10598 }, { "epoch": 2.58, "learning_rate": 5.892239332518919e-06, "loss": 0.0081, "step": 10600 }, { "epoch": 2.58, "learning_rate": 5.888641958562236e-06, "loss": 0.0098, "step": 10602 }, { "epoch": 2.58, "learning_rate": 5.885045224775441e-06, "loss": 0.0131, "step": 10604 }, { "epoch": 2.58, "learning_rate": 5.881449131718568e-06, "loss": 0.0168, "step": 10606 }, { "epoch": 2.58, "learning_rate": 5.877853679951557e-06, "loss": 0.0173, "step": 10608 }, { "epoch": 2.59, "learning_rate": 5.8742588700342505e-06, "loss": 0.0098, "step": 10610 }, { "epoch": 2.59, "learning_rate": 5.870664702526387e-06, "loss": 0.0067, "step": 10612 }, { "epoch": 2.59, "learning_rate": 5.867071177987604e-06, "loss": 0.0151, "step": 10614 }, { "epoch": 2.59, "learning_rate": 5.8634782969774395e-06, "loss": 0.0109, "step": 10616 }, { "epoch": 2.59, "learning_rate": 5.859886060055338e-06, "loss": 0.0069, "step": 10618 }, { "epoch": 2.59, "learning_rate": 5.8562944677806346e-06, "loss": 0.0134, "step": 10620 }, { "epoch": 2.59, "learning_rate": 5.8527035207125656e-06, "loss": 0.0088, "step": 10622 }, { "epoch": 2.59, "learning_rate": 5.849113219410272e-06, "loss": 0.0118, "step": 10624 }, { "epoch": 2.59, "learning_rate": 5.845523564432791e-06, "loss": 0.0104, "step": 10626 }, { "epoch": 2.59, "learning_rate": 5.841934556339062e-06, "loss": 0.0151, "step": 10628 }, { "epoch": 2.59, "learning_rate": 5.838346195687915e-06, "loss": 0.0098, "step": 10630 }, { "epoch": 2.59, "learning_rate": 5.834758483038087e-06, "loss": 0.0123, "step": 10632 }, { "epoch": 2.59, "learning_rate": 5.8311714189482115e-06, "loss": 0.0137, "step": 10634 }, { "epoch": 2.59, "learning_rate": 5.82758500397683e-06, "loss": 0.0087, "step": 10636 }, { "epoch": 2.59, "learning_rate": 5.823999238682363e-06, "loss": 0.0121, "step": 10638 }, { "epoch": 2.59, "learning_rate": 5.8204141236231485e-06, "loss": 0.0104, "step": 10640 }, { "epoch": 2.59, "learning_rate": 5.816829659357417e-06, "loss": 0.0177, "step": 10642 }, { "epoch": 2.59, "learning_rate": 5.813245846443295e-06, "loss": 0.0169, "step": 10644 }, { "epoch": 2.59, "learning_rate": 5.809662685438806e-06, "loss": 0.0071, "step": 10646 }, { "epoch": 2.59, "learning_rate": 5.806080176901879e-06, "loss": 0.0133, "step": 10648 }, { "epoch": 2.6, "learning_rate": 5.8024983213903374e-06, "loss": 0.0064, "step": 10650 }, { "epoch": 2.6, "learning_rate": 5.798917119461908e-06, "loss": 0.0059, "step": 10652 }, { "epoch": 2.6, "learning_rate": 5.795336571674203e-06, "loss": 0.0094, "step": 10654 }, { "epoch": 2.6, "learning_rate": 5.791756678584746e-06, "loss": 0.0103, "step": 10656 }, { "epoch": 2.6, "learning_rate": 5.788177440750958e-06, "loss": 0.0109, "step": 10658 }, { "epoch": 2.6, "learning_rate": 5.784598858730146e-06, "loss": 0.0067, "step": 10660 }, { "epoch": 2.6, "learning_rate": 5.781020933079524e-06, "loss": 0.0089, "step": 10662 }, { "epoch": 2.6, "learning_rate": 5.777443664356203e-06, "loss": 0.0142, "step": 10664 }, { "epoch": 2.6, "learning_rate": 5.773867053117192e-06, "loss": 0.0147, "step": 10666 }, { "epoch": 2.6, "learning_rate": 5.7702910999194e-06, "loss": 0.0105, "step": 10668 }, { "epoch": 2.6, "learning_rate": 5.766715805319623e-06, "loss": 0.0058, "step": 10670 }, { "epoch": 2.6, "learning_rate": 5.76314116987457e-06, "loss": 0.0097, "step": 10672 }, { "epoch": 2.6, "learning_rate": 5.759567194140834e-06, "loss": 0.0142, "step": 10674 }, { "epoch": 2.6, "learning_rate": 5.755993878674908e-06, "loss": 0.0119, "step": 10676 }, { "epoch": 2.6, "learning_rate": 5.752421224033187e-06, "loss": 0.011, "step": 10678 }, { "epoch": 2.6, "learning_rate": 5.74884923077196e-06, "loss": 0.0136, "step": 10680 }, { "epoch": 2.6, "learning_rate": 5.745277899447421e-06, "loss": 0.0132, "step": 10682 }, { "epoch": 2.6, "learning_rate": 5.741707230615643e-06, "loss": 0.0099, "step": 10684 }, { "epoch": 2.6, "learning_rate": 5.738137224832614e-06, "loss": 0.0094, "step": 10686 }, { "epoch": 2.6, "learning_rate": 5.734567882654204e-06, "loss": 0.0151, "step": 10688 }, { "epoch": 2.6, "learning_rate": 5.730999204636195e-06, "loss": 0.0065, "step": 10690 }, { "epoch": 2.61, "learning_rate": 5.727431191334249e-06, "loss": 0.01, "step": 10692 }, { "epoch": 2.61, "learning_rate": 5.723863843303938e-06, "loss": 0.0113, "step": 10694 }, { "epoch": 2.61, "learning_rate": 5.720297161100725e-06, "loss": 0.0031, "step": 10696 }, { "epoch": 2.61, "learning_rate": 5.7167311452799745e-06, "loss": 0.0104, "step": 10698 }, { "epoch": 2.61, "learning_rate": 5.713165796396931e-06, "loss": 0.0108, "step": 10700 }, { "epoch": 2.61, "learning_rate": 5.709601115006759e-06, "loss": 0.0087, "step": 10702 }, { "epoch": 2.61, "learning_rate": 5.706037101664495e-06, "loss": 0.0117, "step": 10704 }, { "epoch": 2.61, "learning_rate": 5.702473756925093e-06, "loss": 0.0142, "step": 10706 }, { "epoch": 2.61, "learning_rate": 5.698911081343386e-06, "loss": 0.0122, "step": 10708 }, { "epoch": 2.61, "learning_rate": 5.69534907547411e-06, "loss": 0.0132, "step": 10710 }, { "epoch": 2.61, "learning_rate": 5.691787739871901e-06, "loss": 0.0085, "step": 10712 }, { "epoch": 2.61, "learning_rate": 5.688227075091288e-06, "loss": 0.0137, "step": 10714 }, { "epoch": 2.61, "learning_rate": 5.68466708168669e-06, "loss": 0.0102, "step": 10716 }, { "epoch": 2.61, "learning_rate": 5.681107760212422e-06, "loss": 0.0135, "step": 10718 }, { "epoch": 2.61, "learning_rate": 5.6775491112227e-06, "loss": 0.0211, "step": 10720 }, { "epoch": 2.61, "learning_rate": 5.673991135271637e-06, "loss": 0.0136, "step": 10722 }, { "epoch": 2.61, "learning_rate": 5.670433832913231e-06, "loss": 0.0135, "step": 10724 }, { "epoch": 2.61, "learning_rate": 5.666877204701383e-06, "loss": 0.0175, "step": 10726 }, { "epoch": 2.61, "learning_rate": 5.663321251189893e-06, "loss": 0.0111, "step": 10728 }, { "epoch": 2.61, "learning_rate": 5.659765972932445e-06, "loss": 0.0077, "step": 10730 }, { "epoch": 2.62, "learning_rate": 5.656211370482618e-06, "loss": 0.0137, "step": 10732 }, { "epoch": 2.62, "learning_rate": 5.652657444393898e-06, "loss": 0.0064, "step": 10734 }, { "epoch": 2.62, "learning_rate": 5.649104195219654e-06, "loss": 0.0127, "step": 10736 }, { "epoch": 2.62, "learning_rate": 5.645551623513163e-06, "loss": 0.0078, "step": 10738 }, { "epoch": 2.62, "learning_rate": 5.641999729827576e-06, "loss": 0.0062, "step": 10740 }, { "epoch": 2.62, "learning_rate": 5.638448514715959e-06, "loss": 0.0103, "step": 10742 }, { "epoch": 2.62, "learning_rate": 5.634897978731257e-06, "loss": 0.0067, "step": 10744 }, { "epoch": 2.62, "learning_rate": 5.6313481224263215e-06, "loss": 0.0105, "step": 10746 }, { "epoch": 2.62, "learning_rate": 5.627798946353884e-06, "loss": 0.0062, "step": 10748 }, { "epoch": 2.62, "learning_rate": 5.624250451066584e-06, "loss": 0.0146, "step": 10750 }, { "epoch": 2.62, "learning_rate": 5.6207026371169485e-06, "loss": 0.0105, "step": 10752 }, { "epoch": 2.62, "learning_rate": 5.6171555050574035e-06, "loss": 0.0085, "step": 10754 }, { "epoch": 2.62, "learning_rate": 5.613609055440256e-06, "loss": 0.015, "step": 10756 }, { "epoch": 2.62, "learning_rate": 5.610063288817723e-06, "loss": 0.0162, "step": 10758 }, { "epoch": 2.62, "learning_rate": 5.606518205741902e-06, "loss": 0.0036, "step": 10760 }, { "epoch": 2.62, "learning_rate": 5.602973806764794e-06, "loss": 0.006, "step": 10762 }, { "epoch": 2.62, "learning_rate": 5.599430092438285e-06, "loss": 0.0117, "step": 10764 }, { "epoch": 2.62, "learning_rate": 5.595887063314158e-06, "loss": 0.0151, "step": 10766 }, { "epoch": 2.62, "learning_rate": 5.5923447199440935e-06, "loss": 0.008, "step": 10768 }, { "epoch": 2.62, "learning_rate": 5.588803062879663e-06, "loss": 0.014, "step": 10770 }, { "epoch": 2.62, "learning_rate": 5.585262092672328e-06, "loss": 0.0097, "step": 10772 }, { "epoch": 2.63, "learning_rate": 5.581721809873437e-06, "loss": 0.0054, "step": 10774 }, { "epoch": 2.63, "learning_rate": 5.578182215034247e-06, "loss": 0.0086, "step": 10776 }, { "epoch": 2.63, "learning_rate": 5.5746433087059e-06, "loss": 0.0097, "step": 10778 }, { "epoch": 2.63, "learning_rate": 5.571105091439427e-06, "loss": 0.0081, "step": 10780 }, { "epoch": 2.63, "learning_rate": 5.567567563785758e-06, "loss": 0.0059, "step": 10782 }, { "epoch": 2.63, "learning_rate": 5.564030726295715e-06, "loss": 0.0051, "step": 10784 }, { "epoch": 2.63, "learning_rate": 5.560494579520008e-06, "loss": 0.0142, "step": 10786 }, { "epoch": 2.63, "learning_rate": 5.55695912400924e-06, "loss": 0.015, "step": 10788 }, { "epoch": 2.63, "learning_rate": 5.553424360313909e-06, "loss": 0.0122, "step": 10790 }, { "epoch": 2.63, "learning_rate": 5.549890288984408e-06, "loss": 0.0113, "step": 10792 }, { "epoch": 2.63, "learning_rate": 5.54635691057102e-06, "loss": 0.0076, "step": 10794 }, { "epoch": 2.63, "learning_rate": 5.542824225623914e-06, "loss": 0.0106, "step": 10796 }, { "epoch": 2.63, "learning_rate": 5.539292234693158e-06, "loss": 0.0074, "step": 10798 }, { "epoch": 2.63, "learning_rate": 5.535760938328714e-06, "loss": 0.0049, "step": 10800 }, { "epoch": 2.63, "learning_rate": 5.532230337080429e-06, "loss": 0.0107, "step": 10802 }, { "epoch": 2.63, "learning_rate": 5.52870043149804e-06, "loss": 0.0073, "step": 10804 }, { "epoch": 2.63, "learning_rate": 5.5251712221311834e-06, "loss": 0.0131, "step": 10806 }, { "epoch": 2.63, "learning_rate": 5.521642709529387e-06, "loss": 0.0067, "step": 10808 }, { "epoch": 2.63, "learning_rate": 5.518114894242067e-06, "loss": 0.0128, "step": 10810 }, { "epoch": 2.63, "learning_rate": 5.514587776818526e-06, "loss": 0.0078, "step": 10812 }, { "epoch": 2.63, "learning_rate": 5.511061357807971e-06, "loss": 0.0125, "step": 10814 }, { "epoch": 2.64, "learning_rate": 5.507535637759483e-06, "loss": 0.0063, "step": 10816 }, { "epoch": 2.64, "learning_rate": 5.504010617222053e-06, "loss": 0.0068, "step": 10818 }, { "epoch": 2.64, "learning_rate": 5.500486296744546e-06, "loss": 0.0066, "step": 10820 }, { "epoch": 2.64, "learning_rate": 5.496962676875728e-06, "loss": 0.0121, "step": 10822 }, { "epoch": 2.64, "learning_rate": 5.493439758164254e-06, "loss": 0.0057, "step": 10824 }, { "epoch": 2.64, "learning_rate": 5.489917541158674e-06, "loss": 0.0117, "step": 10826 }, { "epoch": 2.64, "learning_rate": 5.4863960264074215e-06, "loss": 0.0059, "step": 10828 }, { "epoch": 2.64, "learning_rate": 5.482875214458816e-06, "loss": 0.0098, "step": 10830 }, { "epoch": 2.64, "learning_rate": 5.479355105861081e-06, "loss": 0.0161, "step": 10832 }, { "epoch": 2.64, "learning_rate": 5.475835701162326e-06, "loss": 0.0062, "step": 10834 }, { "epoch": 2.64, "learning_rate": 5.472317000910545e-06, "loss": 0.0122, "step": 10836 }, { "epoch": 2.64, "learning_rate": 5.468799005653629e-06, "loss": 0.0156, "step": 10838 }, { "epoch": 2.64, "learning_rate": 5.465281715939359e-06, "loss": 0.0084, "step": 10840 }, { "epoch": 2.64, "learning_rate": 5.461765132315402e-06, "loss": 0.0037, "step": 10842 }, { "epoch": 2.64, "learning_rate": 5.458249255329311e-06, "loss": 0.0048, "step": 10844 }, { "epoch": 2.64, "learning_rate": 5.454734085528541e-06, "loss": 0.013, "step": 10846 }, { "epoch": 2.64, "learning_rate": 5.451219623460431e-06, "loss": 0.0088, "step": 10848 }, { "epoch": 2.64, "learning_rate": 5.447705869672211e-06, "loss": 0.0178, "step": 10850 }, { "epoch": 2.64, "learning_rate": 5.444192824710994e-06, "loss": 0.0071, "step": 10852 }, { "epoch": 2.64, "learning_rate": 5.440680489123791e-06, "loss": 0.0062, "step": 10854 }, { "epoch": 2.65, "learning_rate": 5.4371688634575024e-06, "loss": 0.0117, "step": 10856 }, { "epoch": 2.65, "learning_rate": 5.433657948258912e-06, "loss": 0.0089, "step": 10858 }, { "epoch": 2.65, "learning_rate": 5.430147744074693e-06, "loss": 0.0055, "step": 10860 }, { "epoch": 2.65, "learning_rate": 5.426638251451414e-06, "loss": 0.0092, "step": 10862 }, { "epoch": 2.65, "learning_rate": 5.423129470935531e-06, "loss": 0.0051, "step": 10864 }, { "epoch": 2.65, "learning_rate": 5.41962140307339e-06, "loss": 0.0111, "step": 10866 }, { "epoch": 2.65, "learning_rate": 5.4161140484112165e-06, "loss": 0.0163, "step": 10868 }, { "epoch": 2.65, "learning_rate": 5.412607407495143e-06, "loss": 0.0126, "step": 10870 }, { "epoch": 2.65, "learning_rate": 5.409101480871168e-06, "loss": 0.0079, "step": 10872 }, { "epoch": 2.65, "learning_rate": 5.4055962690852025e-06, "loss": 0.0057, "step": 10874 }, { "epoch": 2.65, "learning_rate": 5.402091772683027e-06, "loss": 0.0079, "step": 10876 }, { "epoch": 2.65, "learning_rate": 5.39858799221032e-06, "loss": 0.0072, "step": 10878 }, { "epoch": 2.65, "learning_rate": 5.395084928212648e-06, "loss": 0.0107, "step": 10880 }, { "epoch": 2.65, "learning_rate": 5.391582581235468e-06, "loss": 0.0132, "step": 10882 }, { "epoch": 2.65, "learning_rate": 5.388080951824121e-06, "loss": 0.0103, "step": 10884 }, { "epoch": 2.65, "learning_rate": 5.38458004052383e-06, "loss": 0.0076, "step": 10886 }, { "epoch": 2.65, "learning_rate": 5.38107984787972e-06, "loss": 0.0104, "step": 10888 }, { "epoch": 2.65, "learning_rate": 5.377580374436801e-06, "loss": 0.0071, "step": 10890 }, { "epoch": 2.65, "learning_rate": 5.374081620739959e-06, "loss": 0.0075, "step": 10892 }, { "epoch": 2.65, "learning_rate": 5.3705835873339814e-06, "loss": 0.0058, "step": 10894 }, { "epoch": 2.65, "learning_rate": 5.367086274763544e-06, "loss": 0.0094, "step": 10896 }, { "epoch": 2.66, "learning_rate": 5.3635896835731945e-06, "loss": 0.0072, "step": 10898 }, { "epoch": 2.66, "learning_rate": 5.36009381430739e-06, "loss": 0.0105, "step": 10900 }, { "epoch": 2.66, "learning_rate": 5.356598667510453e-06, "loss": 0.0052, "step": 10902 }, { "epoch": 2.66, "learning_rate": 5.35310424372661e-06, "loss": 0.0088, "step": 10904 }, { "epoch": 2.66, "learning_rate": 5.349610543499973e-06, "loss": 0.0075, "step": 10906 }, { "epoch": 2.66, "learning_rate": 5.346117567374531e-06, "loss": 0.0109, "step": 10908 }, { "epoch": 2.66, "learning_rate": 5.34262531589417e-06, "loss": 0.0062, "step": 10910 }, { "epoch": 2.66, "learning_rate": 5.339133789602666e-06, "loss": 0.008, "step": 10912 }, { "epoch": 2.66, "learning_rate": 5.33564298904367e-06, "loss": 0.0048, "step": 10914 }, { "epoch": 2.66, "learning_rate": 5.3321529147607224e-06, "loss": 0.0058, "step": 10916 }, { "epoch": 2.66, "learning_rate": 5.328663567297261e-06, "loss": 0.0063, "step": 10918 }, { "epoch": 2.66, "learning_rate": 5.325174947196601e-06, "loss": 0.0078, "step": 10920 }, { "epoch": 2.66, "learning_rate": 5.321687055001953e-06, "loss": 0.0065, "step": 10922 }, { "epoch": 2.66, "learning_rate": 5.318199891256399e-06, "loss": 0.0201, "step": 10924 }, { "epoch": 2.66, "learning_rate": 5.314713456502928e-06, "loss": 0.0052, "step": 10926 }, { "epoch": 2.66, "learning_rate": 5.3112277512843935e-06, "loss": 0.0088, "step": 10928 }, { "epoch": 2.66, "learning_rate": 5.307742776143555e-06, "loss": 0.0102, "step": 10930 }, { "epoch": 2.66, "learning_rate": 5.304258531623043e-06, "loss": 0.0042, "step": 10932 }, { "epoch": 2.66, "learning_rate": 5.300775018265385e-06, "loss": 0.0073, "step": 10934 }, { "epoch": 2.66, "learning_rate": 5.297292236612989e-06, "loss": 0.0134, "step": 10936 }, { "epoch": 2.67, "learning_rate": 5.293810187208155e-06, "loss": 0.011, "step": 10938 }, { "epoch": 2.67, "learning_rate": 5.290328870593062e-06, "loss": 0.0084, "step": 10940 }, { "epoch": 2.67, "learning_rate": 5.286848287309774e-06, "loss": 0.0066, "step": 10942 }, { "epoch": 2.67, "learning_rate": 5.283368437900247e-06, "loss": 0.013, "step": 10944 }, { "epoch": 2.67, "learning_rate": 5.2798893229063245e-06, "loss": 0.0103, "step": 10946 }, { "epoch": 2.67, "learning_rate": 5.276410942869723e-06, "loss": 0.0053, "step": 10948 }, { "epoch": 2.67, "learning_rate": 5.272933298332059e-06, "loss": 0.0124, "step": 10950 }, { "epoch": 2.67, "learning_rate": 5.269456389834825e-06, "loss": 0.0061, "step": 10952 }, { "epoch": 2.67, "learning_rate": 5.265980217919408e-06, "loss": 0.0062, "step": 10954 }, { "epoch": 2.67, "learning_rate": 5.262504783127071e-06, "loss": 0.0097, "step": 10956 }, { "epoch": 2.67, "learning_rate": 5.259030085998962e-06, "loss": 0.0066, "step": 10958 }, { "epoch": 2.67, "learning_rate": 5.25555612707612e-06, "loss": 0.0064, "step": 10960 }, { "epoch": 2.67, "learning_rate": 5.2520829068994724e-06, "loss": 0.0094, "step": 10962 }, { "epoch": 2.67, "learning_rate": 5.248610426009818e-06, "loss": 0.0116, "step": 10964 }, { "epoch": 2.67, "learning_rate": 5.245138684947853e-06, "loss": 0.006, "step": 10966 }, { "epoch": 2.67, "learning_rate": 5.241667684254157e-06, "loss": 0.0085, "step": 10968 }, { "epoch": 2.67, "learning_rate": 5.238197424469187e-06, "loss": 0.0092, "step": 10970 }, { "epoch": 2.67, "learning_rate": 5.234727906133287e-06, "loss": 0.0129, "step": 10972 }, { "epoch": 2.67, "learning_rate": 5.23125912978669e-06, "loss": 0.0088, "step": 10974 }, { "epoch": 2.67, "learning_rate": 5.227791095969512e-06, "loss": 0.0081, "step": 10976 }, { "epoch": 2.67, "learning_rate": 5.224323805221755e-06, "loss": 0.0089, "step": 10978 }, { "epoch": 2.68, "learning_rate": 5.220857258083296e-06, "loss": 0.0062, "step": 10980 }, { "epoch": 2.68, "learning_rate": 5.21739145509391e-06, "loss": 0.0169, "step": 10982 }, { "epoch": 2.68, "learning_rate": 5.213926396793241e-06, "loss": 0.0072, "step": 10984 }, { "epoch": 2.68, "learning_rate": 5.210462083720833e-06, "loss": 0.0093, "step": 10986 }, { "epoch": 2.68, "learning_rate": 5.206998516416099e-06, "loss": 0.0104, "step": 10988 }, { "epoch": 2.68, "learning_rate": 5.203535695418348e-06, "loss": 0.0099, "step": 10990 }, { "epoch": 2.68, "learning_rate": 5.200073621266765e-06, "loss": 0.0167, "step": 10992 }, { "epoch": 2.68, "learning_rate": 5.196612294500426e-06, "loss": 0.0058, "step": 10994 }, { "epoch": 2.68, "learning_rate": 5.1931517156582835e-06, "loss": 0.0083, "step": 10996 }, { "epoch": 2.68, "learning_rate": 5.189691885279171e-06, "loss": 0.0147, "step": 10998 }, { "epoch": 2.68, "learning_rate": 5.186232803901814e-06, "loss": 0.0099, "step": 11000 }, { "epoch": 2.68, "learning_rate": 5.182774472064822e-06, "loss": 0.0081, "step": 11002 }, { "epoch": 2.68, "learning_rate": 5.179316890306678e-06, "loss": 0.0085, "step": 11004 }, { "epoch": 2.68, "learning_rate": 5.175860059165756e-06, "loss": 0.0063, "step": 11006 }, { "epoch": 2.68, "learning_rate": 5.17240397918031e-06, "loss": 0.0041, "step": 11008 }, { "epoch": 2.68, "learning_rate": 5.168948650888486e-06, "loss": 0.0143, "step": 11010 }, { "epoch": 2.68, "learning_rate": 5.165494074828296e-06, "loss": 0.007, "step": 11012 }, { "epoch": 2.68, "learning_rate": 5.1620402515376435e-06, "loss": 0.0099, "step": 11014 }, { "epoch": 2.68, "learning_rate": 5.158587181554318e-06, "loss": 0.0082, "step": 11016 }, { "epoch": 2.68, "learning_rate": 5.155134865415992e-06, "loss": 0.0136, "step": 11018 }, { "epoch": 2.69, "learning_rate": 5.151683303660211e-06, "loss": 0.0093, "step": 11020 }, { "epoch": 2.69, "learning_rate": 5.148232496824412e-06, "loss": 0.0081, "step": 11022 }, { "epoch": 2.69, "learning_rate": 5.144782445445918e-06, "loss": 0.0104, "step": 11024 }, { "epoch": 2.69, "learning_rate": 5.141333150061924e-06, "loss": 0.0093, "step": 11026 }, { "epoch": 2.69, "learning_rate": 5.137884611209506e-06, "loss": 0.0069, "step": 11028 }, { "epoch": 2.69, "learning_rate": 5.134436829425633e-06, "loss": 0.0088, "step": 11030 }, { "epoch": 2.69, "learning_rate": 5.130989805247152e-06, "loss": 0.0109, "step": 11032 }, { "epoch": 2.69, "learning_rate": 5.127543539210793e-06, "loss": 0.0052, "step": 11034 }, { "epoch": 2.69, "learning_rate": 5.1240980318531595e-06, "loss": 0.0092, "step": 11036 }, { "epoch": 2.69, "learning_rate": 5.12065328371075e-06, "loss": 0.0189, "step": 11038 }, { "epoch": 2.69, "learning_rate": 5.117209295319931e-06, "loss": 0.0108, "step": 11040 }, { "epoch": 2.69, "learning_rate": 5.113766067216967e-06, "loss": 0.0086, "step": 11042 }, { "epoch": 2.69, "learning_rate": 5.110323599937985e-06, "loss": 0.0156, "step": 11044 }, { "epoch": 2.69, "learning_rate": 5.1068818940190065e-06, "loss": 0.0117, "step": 11046 }, { "epoch": 2.69, "learning_rate": 5.103440949995936e-06, "loss": 0.014, "step": 11048 }, { "epoch": 2.69, "learning_rate": 5.100000768404554e-06, "loss": 0.0044, "step": 11050 }, { "epoch": 2.69, "learning_rate": 5.096561349780518e-06, "loss": 0.0054, "step": 11052 }, { "epoch": 2.69, "learning_rate": 5.093122694659377e-06, "loss": 0.0062, "step": 11054 }, { "epoch": 2.69, "learning_rate": 5.089684803576551e-06, "loss": 0.0118, "step": 11056 }, { "epoch": 2.69, "learning_rate": 5.0862476770673516e-06, "loss": 0.0041, "step": 11058 }, { "epoch": 2.69, "learning_rate": 5.08281131566696e-06, "loss": 0.0075, "step": 11060 }, { "epoch": 2.7, "learning_rate": 5.079375719910446e-06, "loss": 0.0083, "step": 11062 }, { "epoch": 2.7, "learning_rate": 5.075940890332758e-06, "loss": 0.0038, "step": 11064 }, { "epoch": 2.7, "learning_rate": 5.072506827468731e-06, "loss": 0.0097, "step": 11066 }, { "epoch": 2.7, "learning_rate": 5.069073531853068e-06, "loss": 0.0089, "step": 11068 }, { "epoch": 2.7, "learning_rate": 5.065641004020359e-06, "loss": 0.011, "step": 11070 }, { "epoch": 2.7, "learning_rate": 5.062209244505076e-06, "loss": 0.0109, "step": 11072 }, { "epoch": 2.7, "learning_rate": 5.058778253841577e-06, "loss": 0.0087, "step": 11074 }, { "epoch": 2.7, "learning_rate": 5.055348032564081e-06, "loss": 0.0095, "step": 11076 }, { "epoch": 2.7, "learning_rate": 5.051918581206708e-06, "loss": 0.0103, "step": 11078 }, { "epoch": 2.7, "learning_rate": 5.0484899003034515e-06, "loss": 0.0099, "step": 11080 }, { "epoch": 2.7, "learning_rate": 5.0450619903881805e-06, "loss": 0.0054, "step": 11082 }, { "epoch": 2.7, "learning_rate": 5.041634851994642e-06, "loss": 0.0048, "step": 11084 }, { "epoch": 2.7, "learning_rate": 5.038208485656471e-06, "loss": 0.0101, "step": 11086 }, { "epoch": 2.7, "learning_rate": 5.034782891907182e-06, "loss": 0.0079, "step": 11088 }, { "epoch": 2.7, "learning_rate": 5.031358071280165e-06, "loss": 0.0067, "step": 11090 }, { "epoch": 2.7, "learning_rate": 5.02793402430869e-06, "loss": 0.0061, "step": 11092 }, { "epoch": 2.7, "learning_rate": 5.024510751525908e-06, "loss": 0.0131, "step": 11094 }, { "epoch": 2.7, "learning_rate": 5.0210882534648455e-06, "loss": 0.0082, "step": 11096 }, { "epoch": 2.7, "learning_rate": 5.017666530658416e-06, "loss": 0.0057, "step": 11098 }, { "epoch": 2.7, "learning_rate": 5.014245583639403e-06, "loss": 0.0064, "step": 11100 }, { "epoch": 2.71, "learning_rate": 5.010825412940477e-06, "loss": 0.0124, "step": 11102 }, { "epoch": 2.71, "learning_rate": 5.007406019094185e-06, "loss": 0.0118, "step": 11104 }, { "epoch": 2.71, "learning_rate": 5.003987402632955e-06, "loss": 0.0091, "step": 11106 }, { "epoch": 2.71, "learning_rate": 5.000569564089086e-06, "loss": 0.0134, "step": 11108 }, { "epoch": 2.71, "learning_rate": 4.997152503994769e-06, "loss": 0.0163, "step": 11110 }, { "epoch": 2.71, "learning_rate": 4.993736222882058e-06, "loss": 0.0102, "step": 11112 }, { "epoch": 2.71, "learning_rate": 4.990320721282902e-06, "loss": 0.0057, "step": 11114 }, { "epoch": 2.71, "learning_rate": 4.986905999729114e-06, "loss": 0.0087, "step": 11116 }, { "epoch": 2.71, "learning_rate": 4.9834920587523935e-06, "loss": 0.004, "step": 11118 }, { "epoch": 2.71, "learning_rate": 4.9800788988843205e-06, "loss": 0.0056, "step": 11120 }, { "epoch": 2.71, "learning_rate": 4.976666520656352e-06, "loss": 0.0083, "step": 11122 }, { "epoch": 2.71, "learning_rate": 4.973254924599818e-06, "loss": 0.0104, "step": 11124 }, { "epoch": 2.71, "learning_rate": 4.9698441112459266e-06, "loss": 0.0136, "step": 11126 }, { "epoch": 2.71, "learning_rate": 4.966434081125769e-06, "loss": 0.0049, "step": 11128 }, { "epoch": 2.71, "learning_rate": 4.96302483477032e-06, "loss": 0.0073, "step": 11130 }, { "epoch": 2.71, "learning_rate": 4.959616372710416e-06, "loss": 0.0115, "step": 11132 }, { "epoch": 2.71, "learning_rate": 4.956208695476786e-06, "loss": 0.0112, "step": 11134 }, { "epoch": 2.71, "learning_rate": 4.952801803600033e-06, "loss": 0.0104, "step": 11136 }, { "epoch": 2.71, "learning_rate": 4.949395697610634e-06, "loss": 0.0085, "step": 11138 }, { "epoch": 2.71, "learning_rate": 4.945990378038941e-06, "loss": 0.013, "step": 11140 }, { "epoch": 2.71, "learning_rate": 4.942585845415192e-06, "loss": 0.0053, "step": 11142 }, { "epoch": 2.72, "learning_rate": 4.9391821002694996e-06, "loss": 0.0064, "step": 11144 }, { "epoch": 2.72, "learning_rate": 4.935779143131856e-06, "loss": 0.0063, "step": 11146 }, { "epoch": 2.72, "learning_rate": 4.932376974532121e-06, "loss": 0.0125, "step": 11148 }, { "epoch": 2.72, "learning_rate": 4.928975595000044e-06, "loss": 0.0064, "step": 11150 }, { "epoch": 2.72, "learning_rate": 4.925575005065241e-06, "loss": 0.0174, "step": 11152 }, { "epoch": 2.72, "learning_rate": 4.922175205257215e-06, "loss": 0.0052, "step": 11154 }, { "epoch": 2.72, "learning_rate": 4.9187761961053335e-06, "loss": 0.0023, "step": 11156 }, { "epoch": 2.72, "learning_rate": 4.915377978138853e-06, "loss": 0.0089, "step": 11158 }, { "epoch": 2.72, "learning_rate": 4.911980551886902e-06, "loss": 0.0051, "step": 11160 }, { "epoch": 2.72, "learning_rate": 4.908583917878489e-06, "loss": 0.01, "step": 11162 }, { "epoch": 2.72, "learning_rate": 4.9051880766424885e-06, "loss": 0.0129, "step": 11164 }, { "epoch": 2.72, "learning_rate": 4.901793028707666e-06, "loss": 0.0093, "step": 11166 }, { "epoch": 2.72, "learning_rate": 4.898398774602651e-06, "loss": 0.0065, "step": 11168 }, { "epoch": 2.72, "learning_rate": 4.895005314855959e-06, "loss": 0.0135, "step": 11170 }, { "epoch": 2.72, "learning_rate": 4.891612649995973e-06, "loss": 0.0138, "step": 11172 }, { "epoch": 2.72, "learning_rate": 4.888220780550958e-06, "loss": 0.0057, "step": 11174 }, { "epoch": 2.72, "learning_rate": 4.884829707049057e-06, "loss": 0.0112, "step": 11176 }, { "epoch": 2.72, "learning_rate": 4.881439430018288e-06, "loss": 0.0129, "step": 11178 }, { "epoch": 2.72, "learning_rate": 4.878049949986539e-06, "loss": 0.0106, "step": 11180 }, { "epoch": 2.72, "learning_rate": 4.874661267481574e-06, "loss": 0.007, "step": 11182 }, { "epoch": 2.73, "learning_rate": 4.871273383031043e-06, "loss": 0.005, "step": 11184 }, { "epoch": 2.73, "learning_rate": 4.867886297162467e-06, "loss": 0.0087, "step": 11186 }, { "epoch": 2.73, "learning_rate": 4.864500010403234e-06, "loss": 0.0104, "step": 11188 }, { "epoch": 2.73, "learning_rate": 4.861114523280619e-06, "loss": 0.0109, "step": 11190 }, { "epoch": 2.73, "learning_rate": 4.857729836321772e-06, "loss": 0.0042, "step": 11192 }, { "epoch": 2.73, "learning_rate": 4.854345950053711e-06, "loss": 0.0053, "step": 11194 }, { "epoch": 2.73, "learning_rate": 4.850962865003327e-06, "loss": 0.012, "step": 11196 }, { "epoch": 2.73, "learning_rate": 4.8475805816974e-06, "loss": 0.0092, "step": 11198 }, { "epoch": 2.73, "learning_rate": 4.844199100662578e-06, "loss": 0.0084, "step": 11200 }, { "epoch": 2.73, "learning_rate": 4.840818422425376e-06, "loss": 0.0074, "step": 11202 }, { "epoch": 2.73, "learning_rate": 4.837438547512197e-06, "loss": 0.0122, "step": 11204 }, { "epoch": 2.73, "learning_rate": 4.834059476449312e-06, "loss": 0.0114, "step": 11206 }, { "epoch": 2.73, "learning_rate": 4.830681209762873e-06, "loss": 0.0086, "step": 11208 }, { "epoch": 2.73, "learning_rate": 4.827303747978898e-06, "loss": 0.0051, "step": 11210 }, { "epoch": 2.73, "learning_rate": 4.823927091623278e-06, "loss": 0.0142, "step": 11212 }, { "epoch": 2.73, "learning_rate": 4.820551241221791e-06, "loss": 0.0071, "step": 11214 }, { "epoch": 2.73, "learning_rate": 4.8171761973000845e-06, "loss": 0.0087, "step": 11216 }, { "epoch": 2.73, "learning_rate": 4.813801960383672e-06, "loss": 0.0064, "step": 11218 }, { "epoch": 2.73, "learning_rate": 4.81042853099795e-06, "loss": 0.0104, "step": 11220 }, { "epoch": 2.73, "learning_rate": 4.807055909668193e-06, "loss": 0.0082, "step": 11222 }, { "epoch": 2.73, "learning_rate": 4.80368409691954e-06, "loss": 0.0087, "step": 11224 }, { "epoch": 2.74, "learning_rate": 4.800313093277002e-06, "loss": 0.0078, "step": 11226 }, { "epoch": 2.74, "learning_rate": 4.7969428992654775e-06, "loss": 0.0045, "step": 11228 }, { "epoch": 2.74, "learning_rate": 4.793573515409729e-06, "loss": 0.0042, "step": 11230 }, { "epoch": 2.74, "learning_rate": 4.7902049422344e-06, "loss": 0.0071, "step": 11232 }, { "epoch": 2.74, "learning_rate": 4.786837180263994e-06, "loss": 0.0095, "step": 11234 }, { "epoch": 2.74, "learning_rate": 4.783470230022908e-06, "loss": 0.0131, "step": 11236 }, { "epoch": 2.74, "learning_rate": 4.780104092035392e-06, "loss": 0.0091, "step": 11238 }, { "epoch": 2.74, "learning_rate": 4.776738766825585e-06, "loss": 0.0087, "step": 11240 }, { "epoch": 2.74, "learning_rate": 4.773374254917491e-06, "loss": 0.012, "step": 11242 }, { "epoch": 2.74, "learning_rate": 4.7700105568349905e-06, "loss": 0.0114, "step": 11244 }, { "epoch": 2.74, "learning_rate": 4.766647673101839e-06, "loss": 0.0095, "step": 11246 }, { "epoch": 2.74, "learning_rate": 4.763285604241665e-06, "loss": 0.0039, "step": 11248 }, { "epoch": 2.74, "learning_rate": 4.759924350777968e-06, "loss": 0.0097, "step": 11250 }, { "epoch": 2.74, "learning_rate": 4.756563913234113e-06, "loss": 0.0119, "step": 11252 }, { "epoch": 2.74, "learning_rate": 4.753204292133352e-06, "loss": 0.0145, "step": 11254 }, { "epoch": 2.74, "learning_rate": 4.749845487998806e-06, "loss": 0.0067, "step": 11256 }, { "epoch": 2.74, "learning_rate": 4.7464875013534614e-06, "loss": 0.0075, "step": 11258 }, { "epoch": 2.74, "learning_rate": 4.743130332720184e-06, "loss": 0.0043, "step": 11260 }, { "epoch": 2.74, "learning_rate": 4.739773982621712e-06, "loss": 0.0077, "step": 11262 }, { "epoch": 2.74, "learning_rate": 4.736418451580656e-06, "loss": 0.0097, "step": 11264 }, { "epoch": 2.75, "learning_rate": 4.733063740119497e-06, "loss": 0.0044, "step": 11266 }, { "epoch": 2.75, "learning_rate": 4.729709848760584e-06, "loss": 0.0041, "step": 11268 }, { "epoch": 2.75, "learning_rate": 4.726356778026148e-06, "loss": 0.0147, "step": 11270 }, { "epoch": 2.75, "learning_rate": 4.723004528438291e-06, "loss": 0.0047, "step": 11272 }, { "epoch": 2.75, "learning_rate": 4.719653100518976e-06, "loss": 0.0129, "step": 11274 }, { "epoch": 2.75, "learning_rate": 4.7163024947900505e-06, "loss": 0.0111, "step": 11276 }, { "epoch": 2.75, "learning_rate": 4.712952711773233e-06, "loss": 0.0086, "step": 11278 }, { "epoch": 2.75, "learning_rate": 4.7096037519901065e-06, "loss": 0.0077, "step": 11280 }, { "epoch": 2.75, "learning_rate": 4.706255615962127e-06, "loss": 0.0088, "step": 11282 }, { "epoch": 2.75, "learning_rate": 4.702908304210625e-06, "loss": 0.0122, "step": 11284 }, { "epoch": 2.75, "learning_rate": 4.6995618172568066e-06, "loss": 0.011, "step": 11286 }, { "epoch": 2.75, "learning_rate": 4.696216155621748e-06, "loss": 0.0049, "step": 11288 }, { "epoch": 2.75, "learning_rate": 4.692871319826385e-06, "loss": 0.0045, "step": 11290 }, { "epoch": 2.75, "learning_rate": 4.689527310391544e-06, "loss": 0.0065, "step": 11292 }, { "epoch": 2.75, "learning_rate": 4.686184127837905e-06, "loss": 0.0143, "step": 11294 }, { "epoch": 2.75, "learning_rate": 4.682841772686033e-06, "loss": 0.0078, "step": 11296 }, { "epoch": 2.75, "learning_rate": 4.679500245456352e-06, "loss": 0.0071, "step": 11298 }, { "epoch": 2.75, "learning_rate": 4.676159546669167e-06, "loss": 0.0039, "step": 11300 }, { "epoch": 2.75, "learning_rate": 4.672819676844649e-06, "loss": 0.0079, "step": 11302 }, { "epoch": 2.75, "learning_rate": 4.669480636502847e-06, "loss": 0.0052, "step": 11304 }, { "epoch": 2.75, "learning_rate": 4.666142426163667e-06, "loss": 0.0088, "step": 11306 }, { "epoch": 2.76, "learning_rate": 4.662805046346901e-06, "loss": 0.0042, "step": 11308 }, { "epoch": 2.76, "learning_rate": 4.6594684975721974e-06, "loss": 0.0102, "step": 11310 }, { "epoch": 2.76, "learning_rate": 4.656132780359089e-06, "loss": 0.0039, "step": 11312 }, { "epoch": 2.76, "learning_rate": 4.652797895226966e-06, "loss": 0.0132, "step": 11314 }, { "epoch": 2.76, "learning_rate": 4.649463842695099e-06, "loss": 0.0032, "step": 11316 }, { "epoch": 2.76, "learning_rate": 4.646130623282625e-06, "loss": 0.0061, "step": 11318 }, { "epoch": 2.76, "learning_rate": 4.642798237508555e-06, "loss": 0.0055, "step": 11320 }, { "epoch": 2.76, "learning_rate": 4.639466685891766e-06, "loss": 0.0071, "step": 11322 }, { "epoch": 2.76, "learning_rate": 4.636135968951e-06, "loss": 0.0126, "step": 11324 }, { "epoch": 2.76, "learning_rate": 4.632806087204878e-06, "loss": 0.0097, "step": 11326 }, { "epoch": 2.76, "learning_rate": 4.629477041171895e-06, "loss": 0.0104, "step": 11328 }, { "epoch": 2.76, "learning_rate": 4.6261488313703985e-06, "loss": 0.0064, "step": 11330 }, { "epoch": 2.76, "learning_rate": 4.6228214583186205e-06, "loss": 0.0154, "step": 11332 }, { "epoch": 2.76, "learning_rate": 4.619494922534663e-06, "loss": 0.0102, "step": 11334 }, { "epoch": 2.76, "learning_rate": 4.616169224536489e-06, "loss": 0.013, "step": 11336 }, { "epoch": 2.76, "learning_rate": 4.612844364841931e-06, "loss": 0.0042, "step": 11338 }, { "epoch": 2.76, "learning_rate": 4.6095203439686985e-06, "loss": 0.0059, "step": 11340 }, { "epoch": 2.76, "learning_rate": 4.606197162434368e-06, "loss": 0.0044, "step": 11342 }, { "epoch": 2.76, "learning_rate": 4.602874820756388e-06, "loss": 0.007, "step": 11344 }, { "epoch": 2.76, "learning_rate": 4.599553319452065e-06, "loss": 0.0096, "step": 11346 }, { "epoch": 2.77, "learning_rate": 4.596232659038588e-06, "loss": 0.0086, "step": 11348 }, { "epoch": 2.77, "learning_rate": 4.5929128400330035e-06, "loss": 0.0071, "step": 11350 }, { "epoch": 2.77, "learning_rate": 4.5895938629522396e-06, "loss": 0.0156, "step": 11352 }, { "epoch": 2.77, "learning_rate": 4.58627572831308e-06, "loss": 0.0077, "step": 11354 }, { "epoch": 2.77, "learning_rate": 4.582958436632185e-06, "loss": 0.0052, "step": 11356 }, { "epoch": 2.77, "learning_rate": 4.579641988426084e-06, "loss": 0.008, "step": 11358 }, { "epoch": 2.77, "learning_rate": 4.576326384211177e-06, "loss": 0.006, "step": 11360 }, { "epoch": 2.77, "learning_rate": 4.573011624503721e-06, "loss": 0.0058, "step": 11362 }, { "epoch": 2.77, "learning_rate": 4.569697709819857e-06, "loss": 0.0174, "step": 11364 }, { "epoch": 2.77, "learning_rate": 4.566384640675579e-06, "loss": 0.0012, "step": 11366 }, { "epoch": 2.77, "learning_rate": 4.563072417586767e-06, "loss": 0.0041, "step": 11368 }, { "epoch": 2.77, "learning_rate": 4.559761041069149e-06, "loss": 0.0123, "step": 11370 }, { "epoch": 2.77, "learning_rate": 4.556450511638336e-06, "loss": 0.0089, "step": 11372 }, { "epoch": 2.77, "learning_rate": 4.553140829809804e-06, "loss": 0.0056, "step": 11374 }, { "epoch": 2.77, "learning_rate": 4.5498319960988975e-06, "loss": 0.0022, "step": 11376 }, { "epoch": 2.77, "learning_rate": 4.546524011020826e-06, "loss": 0.0062, "step": 11378 }, { "epoch": 2.77, "learning_rate": 4.543216875090663e-06, "loss": 0.0078, "step": 11380 }, { "epoch": 2.77, "learning_rate": 4.539910588823359e-06, "loss": 0.0044, "step": 11382 }, { "epoch": 2.77, "learning_rate": 4.53660515273373e-06, "loss": 0.0097, "step": 11384 }, { "epoch": 2.77, "learning_rate": 4.533300567336454e-06, "loss": 0.0136, "step": 11386 }, { "epoch": 2.77, "learning_rate": 4.52999683314608e-06, "loss": 0.0097, "step": 11388 }, { "epoch": 2.78, "learning_rate": 4.5266939506770305e-06, "loss": 0.0061, "step": 11390 }, { "epoch": 2.78, "learning_rate": 4.523391920443584e-06, "loss": 0.0036, "step": 11392 }, { "epoch": 2.78, "learning_rate": 4.5200907429598906e-06, "loss": 0.0095, "step": 11394 }, { "epoch": 2.78, "learning_rate": 4.516790418739972e-06, "loss": 0.0091, "step": 11396 }, { "epoch": 2.78, "learning_rate": 4.513490948297713e-06, "loss": 0.0052, "step": 11398 }, { "epoch": 2.78, "learning_rate": 4.51019233214687e-06, "loss": 0.0093, "step": 11400 }, { "epoch": 2.78, "learning_rate": 4.5068945708010556e-06, "loss": 0.0086, "step": 11402 }, { "epoch": 2.78, "learning_rate": 4.503597664773761e-06, "loss": 0.0096, "step": 11404 }, { "epoch": 2.78, "learning_rate": 4.500301614578343e-06, "loss": 0.0039, "step": 11406 }, { "epoch": 2.78, "learning_rate": 4.4970064207280175e-06, "loss": 0.0068, "step": 11408 }, { "epoch": 2.78, "learning_rate": 4.493712083735868e-06, "loss": 0.0082, "step": 11410 }, { "epoch": 2.78, "learning_rate": 4.490418604114852e-06, "loss": 0.0083, "step": 11412 }, { "epoch": 2.78, "learning_rate": 4.487125982377789e-06, "loss": 0.0079, "step": 11414 }, { "epoch": 2.78, "learning_rate": 4.483834219037369e-06, "loss": 0.0095, "step": 11416 }, { "epoch": 2.78, "learning_rate": 4.480543314606138e-06, "loss": 0.0043, "step": 11418 }, { "epoch": 2.78, "learning_rate": 4.477253269596522e-06, "loss": 0.0054, "step": 11420 }, { "epoch": 2.78, "learning_rate": 4.473964084520799e-06, "loss": 0.0028, "step": 11422 }, { "epoch": 2.78, "learning_rate": 4.470675759891126e-06, "loss": 0.006, "step": 11424 }, { "epoch": 2.78, "learning_rate": 4.467388296219516e-06, "loss": 0.0034, "step": 11426 }, { "epoch": 2.78, "learning_rate": 4.4641016940178535e-06, "loss": 0.0079, "step": 11428 }, { "epoch": 2.79, "learning_rate": 4.460815953797889e-06, "loss": 0.0072, "step": 11430 }, { "epoch": 2.79, "learning_rate": 4.4575310760712395e-06, "loss": 0.0098, "step": 11432 }, { "epoch": 2.79, "learning_rate": 4.4542470613493824e-06, "loss": 0.0097, "step": 11434 }, { "epoch": 2.79, "learning_rate": 4.450963910143662e-06, "loss": 0.0127, "step": 11436 }, { "epoch": 2.79, "learning_rate": 4.447681622965292e-06, "loss": 0.0068, "step": 11438 }, { "epoch": 2.79, "learning_rate": 4.444400200325353e-06, "loss": 0.0026, "step": 11440 }, { "epoch": 2.79, "learning_rate": 4.441119642734781e-06, "loss": 0.0093, "step": 11442 }, { "epoch": 2.79, "learning_rate": 4.437839950704388e-06, "loss": 0.0104, "step": 11444 }, { "epoch": 2.79, "learning_rate": 4.434561124744849e-06, "loss": 0.0123, "step": 11446 }, { "epoch": 2.79, "learning_rate": 4.431283165366701e-06, "loss": 0.0111, "step": 11448 }, { "epoch": 2.79, "learning_rate": 4.428006073080342e-06, "loss": 0.0109, "step": 11450 }, { "epoch": 2.79, "learning_rate": 4.424729848396045e-06, "loss": 0.0086, "step": 11452 }, { "epoch": 2.79, "learning_rate": 4.421454491823942e-06, "loss": 0.0134, "step": 11454 }, { "epoch": 2.79, "learning_rate": 4.418180003874036e-06, "loss": 0.003, "step": 11456 }, { "epoch": 2.79, "learning_rate": 4.4149063850561825e-06, "loss": 0.0056, "step": 11458 }, { "epoch": 2.79, "learning_rate": 4.411633635880112e-06, "loss": 0.0057, "step": 11460 }, { "epoch": 2.79, "learning_rate": 4.408361756855419e-06, "loss": 0.0064, "step": 11462 }, { "epoch": 2.79, "learning_rate": 4.405090748491558e-06, "loss": 0.0122, "step": 11464 }, { "epoch": 2.79, "learning_rate": 4.4018206112978475e-06, "loss": 0.0118, "step": 11466 }, { "epoch": 2.79, "learning_rate": 4.398551345783474e-06, "loss": 0.0109, "step": 11468 }, { "epoch": 2.79, "learning_rate": 4.395282952457489e-06, "loss": 0.0077, "step": 11470 }, { "epoch": 2.8, "learning_rate": 4.392015431828809e-06, "loss": 0.0095, "step": 11472 }, { "epoch": 2.8, "learning_rate": 4.388748784406205e-06, "loss": 0.004, "step": 11474 }, { "epoch": 2.8, "learning_rate": 4.385483010698326e-06, "loss": 0.006, "step": 11476 }, { "epoch": 2.8, "learning_rate": 4.382218111213671e-06, "loss": 0.0066, "step": 11478 }, { "epoch": 2.8, "learning_rate": 4.378954086460616e-06, "loss": 0.0054, "step": 11480 }, { "epoch": 2.8, "learning_rate": 4.37569093694739e-06, "loss": 0.0054, "step": 11482 }, { "epoch": 2.8, "learning_rate": 4.372428663182091e-06, "loss": 0.0091, "step": 11484 }, { "epoch": 2.8, "learning_rate": 4.369167265672681e-06, "loss": 0.0098, "step": 11486 }, { "epoch": 2.8, "learning_rate": 4.36590674492699e-06, "loss": 0.0049, "step": 11488 }, { "epoch": 2.8, "learning_rate": 4.362647101452699e-06, "loss": 0.0075, "step": 11490 }, { "epoch": 2.8, "learning_rate": 4.359388335757358e-06, "loss": 0.0094, "step": 11492 }, { "epoch": 2.8, "learning_rate": 4.3561304483483855e-06, "loss": 0.0071, "step": 11494 }, { "epoch": 2.8, "learning_rate": 4.352873439733063e-06, "loss": 0.0049, "step": 11496 }, { "epoch": 2.8, "learning_rate": 4.349617310418523e-06, "loss": 0.0124, "step": 11498 }, { "epoch": 2.8, "learning_rate": 4.346362060911774e-06, "loss": 0.0144, "step": 11500 }, { "epoch": 2.8, "learning_rate": 4.343107691719688e-06, "loss": 0.0186, "step": 11502 }, { "epoch": 2.8, "learning_rate": 4.339854203348987e-06, "loss": 0.0121, "step": 11504 }, { "epoch": 2.8, "learning_rate": 4.3366015963062714e-06, "loss": 0.0165, "step": 11506 }, { "epoch": 2.8, "learning_rate": 4.333349871097988e-06, "loss": 0.0066, "step": 11508 }, { "epoch": 2.8, "learning_rate": 4.330099028230462e-06, "loss": 0.0093, "step": 11510 }, { "epoch": 2.81, "learning_rate": 4.326849068209877e-06, "loss": 0.0078, "step": 11512 }, { "epoch": 2.81, "learning_rate": 4.323599991542269e-06, "loss": 0.0138, "step": 11514 }, { "epoch": 2.81, "learning_rate": 4.320351798733547e-06, "loss": 0.0073, "step": 11516 }, { "epoch": 2.81, "learning_rate": 4.317104490289484e-06, "loss": 0.0069, "step": 11518 }, { "epoch": 2.81, "learning_rate": 4.313858066715707e-06, "loss": 0.007, "step": 11520 }, { "epoch": 2.81, "learning_rate": 4.310612528517706e-06, "loss": 0.012, "step": 11522 }, { "epoch": 2.81, "learning_rate": 4.307367876200839e-06, "loss": 0.0055, "step": 11524 }, { "epoch": 2.81, "learning_rate": 4.3041241102703225e-06, "loss": 0.0059, "step": 11526 }, { "epoch": 2.81, "learning_rate": 4.30088123123124e-06, "loss": 0.0056, "step": 11528 }, { "epoch": 2.81, "learning_rate": 4.297639239588526e-06, "loss": 0.0045, "step": 11530 }, { "epoch": 2.81, "learning_rate": 4.2943981358469885e-06, "loss": 0.0111, "step": 11532 }, { "epoch": 2.81, "learning_rate": 4.291157920511289e-06, "loss": 0.0085, "step": 11534 }, { "epoch": 2.81, "learning_rate": 4.287918594085957e-06, "loss": 0.007, "step": 11536 }, { "epoch": 2.81, "learning_rate": 4.284680157075374e-06, "loss": 0.0066, "step": 11538 }, { "epoch": 2.81, "learning_rate": 4.281442609983793e-06, "loss": 0.0082, "step": 11540 }, { "epoch": 2.81, "learning_rate": 4.278205953315327e-06, "loss": 0.0046, "step": 11542 }, { "epoch": 2.81, "learning_rate": 4.2749701875739505e-06, "loss": 0.0043, "step": 11544 }, { "epoch": 2.81, "learning_rate": 4.271735313263493e-06, "loss": 0.0065, "step": 11546 }, { "epoch": 2.81, "learning_rate": 4.268501330887644e-06, "loss": 0.0097, "step": 11548 }, { "epoch": 2.81, "learning_rate": 4.2652682409499666e-06, "loss": 0.006, "step": 11550 }, { "epoch": 2.81, "learning_rate": 4.262036043953878e-06, "loss": 0.0099, "step": 11552 }, { "epoch": 2.82, "learning_rate": 4.25880474040265e-06, "loss": 0.0189, "step": 11554 }, { "epoch": 2.82, "learning_rate": 4.255574330799426e-06, "loss": 0.0045, "step": 11556 }, { "epoch": 2.82, "learning_rate": 4.252344815647202e-06, "loss": 0.0038, "step": 11558 }, { "epoch": 2.82, "learning_rate": 4.249116195448845e-06, "loss": 0.0036, "step": 11560 }, { "epoch": 2.82, "learning_rate": 4.245888470707074e-06, "loss": 0.0075, "step": 11562 }, { "epoch": 2.82, "learning_rate": 4.242661641924461e-06, "loss": 0.0086, "step": 11564 }, { "epoch": 2.82, "learning_rate": 4.239435709603455e-06, "loss": 0.0082, "step": 11566 }, { "epoch": 2.82, "learning_rate": 4.2362106742463635e-06, "loss": 0.0134, "step": 11568 }, { "epoch": 2.82, "learning_rate": 4.232986536355339e-06, "loss": 0.0052, "step": 11570 }, { "epoch": 2.82, "learning_rate": 4.229763296432409e-06, "loss": 0.004, "step": 11572 }, { "epoch": 2.82, "learning_rate": 4.226540954979461e-06, "loss": 0.0085, "step": 11574 }, { "epoch": 2.82, "learning_rate": 4.223319512498233e-06, "loss": 0.0068, "step": 11576 }, { "epoch": 2.82, "learning_rate": 4.220098969490326e-06, "loss": 0.0092, "step": 11578 }, { "epoch": 2.82, "learning_rate": 4.216879326457206e-06, "loss": 0.0054, "step": 11580 }, { "epoch": 2.82, "learning_rate": 4.213660583900198e-06, "loss": 0.0015, "step": 11582 }, { "epoch": 2.82, "learning_rate": 4.210442742320485e-06, "loss": 0.0137, "step": 11584 }, { "epoch": 2.82, "learning_rate": 4.207225802219105e-06, "loss": 0.0078, "step": 11586 }, { "epoch": 2.82, "learning_rate": 4.204009764096966e-06, "loss": 0.0043, "step": 11588 }, { "epoch": 2.82, "learning_rate": 4.200794628454823e-06, "loss": 0.0084, "step": 11590 }, { "epoch": 2.82, "learning_rate": 4.197580395793305e-06, "loss": 0.0045, "step": 11592 }, { "epoch": 2.83, "learning_rate": 4.194367066612884e-06, "loss": 0.0051, "step": 11594 }, { "epoch": 2.83, "learning_rate": 4.191154641413905e-06, "loss": 0.0075, "step": 11596 }, { "epoch": 2.83, "learning_rate": 4.187943120696567e-06, "loss": 0.0052, "step": 11598 }, { "epoch": 2.83, "learning_rate": 4.184732504960931e-06, "loss": 0.0052, "step": 11600 }, { "epoch": 2.83, "learning_rate": 4.18152279470691e-06, "loss": 0.0042, "step": 11602 }, { "epoch": 2.83, "learning_rate": 4.178313990434281e-06, "loss": 0.0059, "step": 11604 }, { "epoch": 2.83, "learning_rate": 4.1751060926426775e-06, "loss": 0.0061, "step": 11606 }, { "epoch": 2.83, "learning_rate": 4.1718991018316015e-06, "loss": 0.0072, "step": 11608 }, { "epoch": 2.83, "learning_rate": 4.168693018500396e-06, "loss": 0.0083, "step": 11610 }, { "epoch": 2.83, "learning_rate": 4.1654878431482784e-06, "loss": 0.009, "step": 11612 }, { "epoch": 2.83, "learning_rate": 4.162283576274317e-06, "loss": 0.0079, "step": 11614 }, { "epoch": 2.83, "learning_rate": 4.159080218377447e-06, "loss": 0.0076, "step": 11616 }, { "epoch": 2.83, "learning_rate": 4.15587776995645e-06, "loss": 0.0058, "step": 11618 }, { "epoch": 2.83, "learning_rate": 4.152676231509968e-06, "loss": 0.0074, "step": 11620 }, { "epoch": 2.83, "learning_rate": 4.149475603536509e-06, "loss": 0.0151, "step": 11622 }, { "epoch": 2.83, "learning_rate": 4.14627588653444e-06, "loss": 0.0109, "step": 11624 }, { "epoch": 2.83, "learning_rate": 4.143077081001973e-06, "loss": 0.0078, "step": 11626 }, { "epoch": 2.83, "learning_rate": 4.13987918743719e-06, "loss": 0.0077, "step": 11628 }, { "epoch": 2.83, "learning_rate": 4.136682206338031e-06, "loss": 0.0066, "step": 11630 }, { "epoch": 2.83, "learning_rate": 4.133486138202288e-06, "loss": 0.0096, "step": 11632 }, { "epoch": 2.83, "learning_rate": 4.1302909835276084e-06, "loss": 0.007, "step": 11634 }, { "epoch": 2.84, "learning_rate": 4.127096742811506e-06, "loss": 0.0073, "step": 11636 }, { "epoch": 2.84, "learning_rate": 4.12390341655135e-06, "loss": 0.0064, "step": 11638 }, { "epoch": 2.84, "learning_rate": 4.1207110052443675e-06, "loss": 0.0103, "step": 11640 }, { "epoch": 2.84, "learning_rate": 4.117519509387634e-06, "loss": 0.0043, "step": 11642 }, { "epoch": 2.84, "learning_rate": 4.114328929478098e-06, "loss": 0.0103, "step": 11644 }, { "epoch": 2.84, "learning_rate": 4.111139266012551e-06, "loss": 0.0079, "step": 11646 }, { "epoch": 2.84, "learning_rate": 4.107950519487653e-06, "loss": 0.0045, "step": 11648 }, { "epoch": 2.84, "learning_rate": 4.1047626903999106e-06, "loss": 0.0066, "step": 11650 }, { "epoch": 2.84, "learning_rate": 4.101575779245696e-06, "loss": 0.0126, "step": 11652 }, { "epoch": 2.84, "learning_rate": 4.098389786521234e-06, "loss": 0.0107, "step": 11654 }, { "epoch": 2.84, "learning_rate": 4.095204712722614e-06, "loss": 0.0064, "step": 11656 }, { "epoch": 2.84, "learning_rate": 4.092020558345771e-06, "loss": 0.0102, "step": 11658 }, { "epoch": 2.84, "learning_rate": 4.088837323886504e-06, "loss": 0.008, "step": 11660 }, { "epoch": 2.84, "learning_rate": 4.0856550098404645e-06, "loss": 0.006, "step": 11662 }, { "epoch": 2.84, "learning_rate": 4.082473616703167e-06, "loss": 0.0055, "step": 11664 }, { "epoch": 2.84, "learning_rate": 4.0792931449699745e-06, "loss": 0.0074, "step": 11666 }, { "epoch": 2.84, "learning_rate": 4.076113595136113e-06, "loss": 0.0069, "step": 11668 }, { "epoch": 2.84, "learning_rate": 4.0729349676966625e-06, "loss": 0.0075, "step": 11670 }, { "epoch": 2.84, "learning_rate": 4.069757263146562e-06, "loss": 0.0077, "step": 11672 }, { "epoch": 2.84, "learning_rate": 4.066580481980603e-06, "loss": 0.0055, "step": 11674 }, { "epoch": 2.85, "learning_rate": 4.06340462469343e-06, "loss": 0.0113, "step": 11676 }, { "epoch": 2.85, "learning_rate": 4.060229691779552e-06, "loss": 0.0084, "step": 11678 }, { "epoch": 2.85, "learning_rate": 4.057055683733334e-06, "loss": 0.007, "step": 11680 }, { "epoch": 2.85, "learning_rate": 4.053882601048987e-06, "loss": 0.0068, "step": 11682 }, { "epoch": 2.85, "learning_rate": 4.050710444220585e-06, "loss": 0.0025, "step": 11684 }, { "epoch": 2.85, "learning_rate": 4.047539213742064e-06, "loss": 0.012, "step": 11686 }, { "epoch": 2.85, "learning_rate": 4.044368910107204e-06, "loss": 0.0068, "step": 11688 }, { "epoch": 2.85, "learning_rate": 4.041199533809641e-06, "loss": 0.0075, "step": 11690 }, { "epoch": 2.85, "learning_rate": 4.038031085342875e-06, "loss": 0.009, "step": 11692 }, { "epoch": 2.85, "learning_rate": 4.034863565200259e-06, "loss": 0.0105, "step": 11694 }, { "epoch": 2.85, "learning_rate": 4.031696973875003e-06, "loss": 0.0142, "step": 11696 }, { "epoch": 2.85, "learning_rate": 4.028531311860161e-06, "loss": 0.0038, "step": 11698 }, { "epoch": 2.85, "learning_rate": 4.02536657964866e-06, "loss": 0.0039, "step": 11700 }, { "epoch": 2.85, "learning_rate": 4.022202777733264e-06, "loss": 0.0039, "step": 11702 }, { "epoch": 2.85, "learning_rate": 4.01903990660661e-06, "loss": 0.0032, "step": 11704 }, { "epoch": 2.85, "learning_rate": 4.015877966761173e-06, "loss": 0.0074, "step": 11706 }, { "epoch": 2.85, "learning_rate": 4.0127169586892955e-06, "loss": 0.0067, "step": 11708 }, { "epoch": 2.85, "learning_rate": 4.00955688288317e-06, "loss": 0.0041, "step": 11710 }, { "epoch": 2.85, "learning_rate": 4.006397739834848e-06, "loss": 0.0051, "step": 11712 }, { "epoch": 2.85, "learning_rate": 4.003239530036226e-06, "loss": 0.007, "step": 11714 }, { "epoch": 2.85, "learning_rate": 4.0000822539790675e-06, "loss": 0.0054, "step": 11716 }, { "epoch": 2.86, "learning_rate": 3.9969259121549805e-06, "loss": 0.0051, "step": 11718 }, { "epoch": 2.86, "learning_rate": 3.99377050505543e-06, "loss": 0.0128, "step": 11720 }, { "epoch": 2.86, "learning_rate": 3.990616033171738e-06, "loss": 0.0083, "step": 11722 }, { "epoch": 2.86, "learning_rate": 3.987462496995082e-06, "loss": 0.0076, "step": 11724 }, { "epoch": 2.86, "learning_rate": 3.984309897016495e-06, "loss": 0.0052, "step": 11726 }, { "epoch": 2.86, "learning_rate": 3.981158233726854e-06, "loss": 0.0057, "step": 11728 }, { "epoch": 2.86, "learning_rate": 3.978007507616903e-06, "loss": 0.0045, "step": 11730 }, { "epoch": 2.86, "learning_rate": 3.974857719177227e-06, "loss": 0.0074, "step": 11732 }, { "epoch": 2.86, "learning_rate": 3.971708868898279e-06, "loss": 0.0034, "step": 11734 }, { "epoch": 2.86, "learning_rate": 3.9685609572703544e-06, "loss": 0.004, "step": 11736 }, { "epoch": 2.86, "learning_rate": 3.965413984783609e-06, "loss": 0.0062, "step": 11738 }, { "epoch": 2.86, "learning_rate": 3.9622679519280504e-06, "loss": 0.0044, "step": 11740 }, { "epoch": 2.86, "learning_rate": 3.959122859193543e-06, "loss": 0.0083, "step": 11742 }, { "epoch": 2.86, "learning_rate": 3.9559787070698e-06, "loss": 0.006, "step": 11744 }, { "epoch": 2.86, "learning_rate": 3.952835496046383e-06, "loss": 0.0018, "step": 11746 }, { "epoch": 2.86, "learning_rate": 3.949693226612722e-06, "loss": 0.0035, "step": 11748 }, { "epoch": 2.86, "learning_rate": 3.946551899258093e-06, "loss": 0.0053, "step": 11750 }, { "epoch": 2.86, "learning_rate": 3.943411514471619e-06, "loss": 0.009, "step": 11752 }, { "epoch": 2.86, "learning_rate": 3.9402720727422835e-06, "loss": 0.006, "step": 11754 }, { "epoch": 2.86, "learning_rate": 3.937133574558925e-06, "loss": 0.0121, "step": 11756 }, { "epoch": 2.87, "learning_rate": 3.933996020410231e-06, "loss": 0.0074, "step": 11758 }, { "epoch": 2.87, "learning_rate": 3.930859410784742e-06, "loss": 0.0041, "step": 11760 }, { "epoch": 2.87, "learning_rate": 3.927723746170848e-06, "loss": 0.0058, "step": 11762 }, { "epoch": 2.87, "learning_rate": 3.9245890270568e-06, "loss": 0.0102, "step": 11764 }, { "epoch": 2.87, "learning_rate": 3.921455253930699e-06, "loss": 0.0054, "step": 11766 }, { "epoch": 2.87, "learning_rate": 3.918322427280493e-06, "loss": 0.0042, "step": 11768 }, { "epoch": 2.87, "learning_rate": 3.9151905475939886e-06, "loss": 0.0079, "step": 11770 }, { "epoch": 2.87, "learning_rate": 3.912059615358849e-06, "loss": 0.0049, "step": 11772 }, { "epoch": 2.87, "learning_rate": 3.908929631062579e-06, "loss": 0.005, "step": 11774 }, { "epoch": 2.87, "learning_rate": 3.905800595192538e-06, "loss": 0.0103, "step": 11776 }, { "epoch": 2.87, "learning_rate": 3.902672508235945e-06, "loss": 0.0124, "step": 11778 }, { "epoch": 2.87, "learning_rate": 3.899545370679867e-06, "loss": 0.0067, "step": 11780 }, { "epoch": 2.87, "learning_rate": 3.896419183011226e-06, "loss": 0.0107, "step": 11782 }, { "epoch": 2.87, "learning_rate": 3.893293945716786e-06, "loss": 0.0078, "step": 11784 }, { "epoch": 2.87, "learning_rate": 3.89016965928318e-06, "loss": 0.007, "step": 11786 }, { "epoch": 2.87, "learning_rate": 3.887046324196873e-06, "loss": 0.0039, "step": 11788 }, { "epoch": 2.87, "learning_rate": 3.8839239409442e-06, "loss": 0.0046, "step": 11790 }, { "epoch": 2.87, "learning_rate": 3.8808025100113335e-06, "loss": 0.0074, "step": 11792 }, { "epoch": 2.87, "learning_rate": 3.877682031884308e-06, "loss": 0.0121, "step": 11794 }, { "epoch": 2.87, "learning_rate": 3.874562507049005e-06, "loss": 0.0084, "step": 11796 }, { "epoch": 2.87, "learning_rate": 3.871443935991161e-06, "loss": 0.0074, "step": 11798 }, { "epoch": 2.88, "learning_rate": 3.868326319196362e-06, "loss": 0.0057, "step": 11800 }, { "epoch": 2.88, "learning_rate": 3.865209657150036e-06, "loss": 0.0042, "step": 11802 }, { "epoch": 2.88, "learning_rate": 3.8620939503374775e-06, "loss": 0.0046, "step": 11804 }, { "epoch": 2.88, "learning_rate": 3.85897919924383e-06, "loss": 0.0045, "step": 11806 }, { "epoch": 2.88, "learning_rate": 3.855865404354074e-06, "loss": 0.0045, "step": 11808 }, { "epoch": 2.88, "learning_rate": 3.852752566153059e-06, "loss": 0.0062, "step": 11810 }, { "epoch": 2.88, "learning_rate": 3.849640685125473e-06, "loss": 0.0094, "step": 11812 }, { "epoch": 2.88, "learning_rate": 3.846529761755867e-06, "loss": 0.0069, "step": 11814 }, { "epoch": 2.88, "learning_rate": 3.8434197965286304e-06, "loss": 0.0027, "step": 11816 }, { "epoch": 2.88, "learning_rate": 3.840310789928005e-06, "loss": 0.0061, "step": 11818 }, { "epoch": 2.88, "learning_rate": 3.83720274243809e-06, "loss": 0.0053, "step": 11820 }, { "epoch": 2.88, "learning_rate": 3.834095654542836e-06, "loss": 0.0128, "step": 11822 }, { "epoch": 2.88, "learning_rate": 3.830989526726036e-06, "loss": 0.0097, "step": 11824 }, { "epoch": 2.88, "learning_rate": 3.8278843594713365e-06, "loss": 0.0048, "step": 11826 }, { "epoch": 2.88, "learning_rate": 3.824780153262242e-06, "loss": 0.0069, "step": 11828 }, { "epoch": 2.88, "learning_rate": 3.821676908582098e-06, "loss": 0.0068, "step": 11830 }, { "epoch": 2.88, "learning_rate": 3.8185746259141e-06, "loss": 0.0068, "step": 11832 }, { "epoch": 2.88, "learning_rate": 3.815473305741299e-06, "loss": 0.0024, "step": 11834 }, { "epoch": 2.88, "learning_rate": 3.8123729485465953e-06, "loss": 0.0063, "step": 11836 }, { "epoch": 2.88, "learning_rate": 3.8092735548127413e-06, "loss": 0.0139, "step": 11838 }, { "epoch": 2.88, "learning_rate": 3.80617512502233e-06, "loss": 0.0045, "step": 11840 }, { "epoch": 2.89, "learning_rate": 3.8030776596578177e-06, "loss": 0.0048, "step": 11842 }, { "epoch": 2.89, "learning_rate": 3.7999811592014956e-06, "loss": 0.0071, "step": 11844 }, { "epoch": 2.89, "learning_rate": 3.79688562413552e-06, "loss": 0.0088, "step": 11846 }, { "epoch": 2.89, "learning_rate": 3.793791054941882e-06, "loss": 0.0034, "step": 11848 }, { "epoch": 2.89, "learning_rate": 3.7906974521024343e-06, "loss": 0.0032, "step": 11850 }, { "epoch": 2.89, "learning_rate": 3.7876048160988734e-06, "loss": 0.0063, "step": 11852 }, { "epoch": 2.89, "learning_rate": 3.7845131474127493e-06, "loss": 0.0059, "step": 11854 }, { "epoch": 2.89, "learning_rate": 3.7814224465254525e-06, "loss": 0.0053, "step": 11856 }, { "epoch": 2.89, "learning_rate": 3.7783327139182357e-06, "loss": 0.0126, "step": 11858 }, { "epoch": 2.89, "learning_rate": 3.775243950072187e-06, "loss": 0.0067, "step": 11860 }, { "epoch": 2.89, "learning_rate": 3.772156155468257e-06, "loss": 0.0061, "step": 11862 }, { "epoch": 2.89, "learning_rate": 3.7690693305872327e-06, "loss": 0.0047, "step": 11864 }, { "epoch": 2.89, "learning_rate": 3.7659834759097603e-06, "loss": 0.0024, "step": 11866 }, { "epoch": 2.89, "learning_rate": 3.7628985919163284e-06, "loss": 0.0093, "step": 11868 }, { "epoch": 2.89, "learning_rate": 3.7598146790872825e-06, "loss": 0.0043, "step": 11870 }, { "epoch": 2.89, "learning_rate": 3.7567317379028077e-06, "loss": 0.0055, "step": 11872 }, { "epoch": 2.89, "learning_rate": 3.7536497688429384e-06, "loss": 0.0022, "step": 11874 }, { "epoch": 2.89, "learning_rate": 3.750568772387564e-06, "loss": 0.0065, "step": 11876 }, { "epoch": 2.89, "learning_rate": 3.7474887490164213e-06, "loss": 0.0083, "step": 11878 }, { "epoch": 2.89, "learning_rate": 3.744409699209088e-06, "loss": 0.0071, "step": 11880 }, { "epoch": 2.9, "learning_rate": 3.741331623444999e-06, "loss": 0.0079, "step": 11882 }, { "epoch": 2.9, "learning_rate": 3.7382545222034385e-06, "loss": 0.0039, "step": 11884 }, { "epoch": 2.9, "learning_rate": 3.735178395963529e-06, "loss": 0.0046, "step": 11886 }, { "epoch": 2.9, "learning_rate": 3.732103245204245e-06, "loss": 0.0032, "step": 11888 }, { "epoch": 2.9, "learning_rate": 3.729029070404414e-06, "loss": 0.0051, "step": 11890 }, { "epoch": 2.9, "learning_rate": 3.725955872042709e-06, "loss": 0.0062, "step": 11892 }, { "epoch": 2.9, "learning_rate": 3.722883650597654e-06, "loss": 0.0053, "step": 11894 }, { "epoch": 2.9, "learning_rate": 3.719812406547609e-06, "loss": 0.005, "step": 11896 }, { "epoch": 2.9, "learning_rate": 3.716742140370799e-06, "loss": 0.0156, "step": 11898 }, { "epoch": 2.9, "learning_rate": 3.7136728525452803e-06, "loss": 0.007, "step": 11900 }, { "epoch": 2.9, "learning_rate": 3.710604543548971e-06, "loss": 0.0022, "step": 11902 }, { "epoch": 2.9, "learning_rate": 3.707537213859623e-06, "loss": 0.0054, "step": 11904 }, { "epoch": 2.9, "learning_rate": 3.7044708639548477e-06, "loss": 0.0023, "step": 11906 }, { "epoch": 2.9, "learning_rate": 3.701405494312099e-06, "loss": 0.0041, "step": 11908 }, { "epoch": 2.9, "learning_rate": 3.6983411054086804e-06, "loss": 0.0046, "step": 11910 }, { "epoch": 2.9, "learning_rate": 3.6952776977217355e-06, "loss": 0.0056, "step": 11912 }, { "epoch": 2.9, "learning_rate": 3.6922152717282667e-06, "loss": 0.0034, "step": 11914 }, { "epoch": 2.9, "learning_rate": 3.68915382790511e-06, "loss": 0.0014, "step": 11916 }, { "epoch": 2.9, "learning_rate": 3.686093366728962e-06, "loss": 0.0099, "step": 11918 }, { "epoch": 2.9, "learning_rate": 3.683033888676354e-06, "loss": 0.0078, "step": 11920 }, { "epoch": 2.9, "learning_rate": 3.679975394223673e-06, "loss": 0.0072, "step": 11922 }, { "epoch": 2.91, "learning_rate": 3.6769178838471508e-06, "loss": 0.006, "step": 11924 }, { "epoch": 2.91, "learning_rate": 3.6738613580228664e-06, "loss": 0.0055, "step": 11926 }, { "epoch": 2.91, "learning_rate": 3.6708058172267434e-06, "loss": 0.008, "step": 11928 }, { "epoch": 2.91, "learning_rate": 3.667751261934549e-06, "loss": 0.009, "step": 11930 }, { "epoch": 2.91, "learning_rate": 3.6646976926219025e-06, "loss": 0.0031, "step": 11932 }, { "epoch": 2.91, "learning_rate": 3.6616451097642734e-06, "loss": 0.0076, "step": 11934 }, { "epoch": 2.91, "learning_rate": 3.6585935138369644e-06, "loss": 0.0048, "step": 11936 }, { "epoch": 2.91, "learning_rate": 3.655542905315135e-06, "loss": 0.0109, "step": 11938 }, { "epoch": 2.91, "learning_rate": 3.6524932846737926e-06, "loss": 0.0058, "step": 11940 }, { "epoch": 2.91, "learning_rate": 3.6494446523877835e-06, "loss": 0.0073, "step": 11942 }, { "epoch": 2.91, "learning_rate": 3.646397008931799e-06, "loss": 0.0032, "step": 11944 }, { "epoch": 2.91, "learning_rate": 3.643350354780384e-06, "loss": 0.0053, "step": 11946 }, { "epoch": 2.91, "learning_rate": 3.6403046904079255e-06, "loss": 0.0047, "step": 11948 }, { "epoch": 2.91, "learning_rate": 3.6372600162886605e-06, "loss": 0.0059, "step": 11950 }, { "epoch": 2.91, "learning_rate": 3.6342163328966617e-06, "loss": 0.0057, "step": 11952 }, { "epoch": 2.91, "learning_rate": 3.6311736407058607e-06, "loss": 0.0032, "step": 11954 }, { "epoch": 2.91, "learning_rate": 3.6281319401900194e-06, "loss": 0.0028, "step": 11956 }, { "epoch": 2.91, "learning_rate": 3.625091231822763e-06, "loss": 0.0053, "step": 11958 }, { "epoch": 2.91, "learning_rate": 3.6220515160775436e-06, "loss": 0.0049, "step": 11960 }, { "epoch": 2.91, "learning_rate": 3.6190127934276743e-06, "loss": 0.0043, "step": 11962 }, { "epoch": 2.92, "learning_rate": 3.6159750643463064e-06, "loss": 0.0045, "step": 11964 }, { "epoch": 2.92, "learning_rate": 3.61293832930644e-06, "loss": 0.0095, "step": 11966 }, { "epoch": 2.92, "learning_rate": 3.6099025887809123e-06, "loss": 0.0041, "step": 11968 }, { "epoch": 2.92, "learning_rate": 3.6068678432424175e-06, "loss": 0.0018, "step": 11970 }, { "epoch": 2.92, "learning_rate": 3.603834093163483e-06, "loss": 0.0045, "step": 11972 }, { "epoch": 2.92, "learning_rate": 3.600801339016492e-06, "loss": 0.0076, "step": 11974 }, { "epoch": 2.92, "learning_rate": 3.5977695812736613e-06, "loss": 0.0091, "step": 11976 }, { "epoch": 2.92, "learning_rate": 3.594738820407063e-06, "loss": 0.0041, "step": 11978 }, { "epoch": 2.92, "learning_rate": 3.5917090568886092e-06, "loss": 0.0048, "step": 11980 }, { "epoch": 2.92, "learning_rate": 3.588680291190061e-06, "loss": 0.0072, "step": 11982 }, { "epoch": 2.92, "learning_rate": 3.5856525237830164e-06, "loss": 0.0093, "step": 11984 }, { "epoch": 2.92, "learning_rate": 3.582625755138918e-06, "loss": 0.0062, "step": 11986 }, { "epoch": 2.92, "learning_rate": 3.579599985729062e-06, "loss": 0.0066, "step": 11988 }, { "epoch": 2.92, "learning_rate": 3.5765752160245848e-06, "loss": 0.0067, "step": 11990 }, { "epoch": 2.92, "learning_rate": 3.5735514464964615e-06, "loss": 0.0056, "step": 11992 }, { "epoch": 2.92, "learning_rate": 3.570528677615519e-06, "loss": 0.0019, "step": 11994 }, { "epoch": 2.92, "learning_rate": 3.5675069098524282e-06, "loss": 0.0092, "step": 11996 }, { "epoch": 2.92, "learning_rate": 3.564486143677699e-06, "loss": 0.0089, "step": 11998 }, { "epoch": 2.92, "learning_rate": 3.5614663795616843e-06, "loss": 0.0064, "step": 12000 }, { "epoch": 2.92, "learning_rate": 3.5584476179745876e-06, "loss": 0.0041, "step": 12002 }, { "epoch": 2.92, "learning_rate": 3.5554298593864523e-06, "loss": 0.0055, "step": 12004 }, { "epoch": 2.93, "learning_rate": 3.5524131042671705e-06, "loss": 0.0069, "step": 12006 }, { "epoch": 2.93, "learning_rate": 3.549397353086468e-06, "loss": 0.0064, "step": 12008 }, { "epoch": 2.93, "learning_rate": 3.546382606313923e-06, "loss": 0.0086, "step": 12010 }, { "epoch": 2.93, "learning_rate": 3.5433688644189577e-06, "loss": 0.0112, "step": 12012 }, { "epoch": 2.93, "learning_rate": 3.540356127870833e-06, "loss": 0.007, "step": 12014 }, { "epoch": 2.93, "learning_rate": 3.53734439713865e-06, "loss": 0.0087, "step": 12016 }, { "epoch": 2.93, "learning_rate": 3.5343336726913614e-06, "loss": 0.0026, "step": 12018 }, { "epoch": 2.93, "learning_rate": 3.5313239549977606e-06, "loss": 0.0118, "step": 12020 }, { "epoch": 2.93, "learning_rate": 3.5283152445264877e-06, "loss": 0.0107, "step": 12022 }, { "epoch": 2.93, "learning_rate": 3.5253075417460146e-06, "loss": 0.0105, "step": 12024 }, { "epoch": 2.93, "learning_rate": 3.52230084712467e-06, "loss": 0.007, "step": 12026 }, { "epoch": 2.93, "learning_rate": 3.519295161130614e-06, "loss": 0.0048, "step": 12028 }, { "epoch": 2.93, "learning_rate": 3.516290484231859e-06, "loss": 0.0048, "step": 12030 }, { "epoch": 2.93, "learning_rate": 3.513286816896251e-06, "loss": 0.0074, "step": 12032 }, { "epoch": 2.93, "learning_rate": 3.510284159591488e-06, "loss": 0.0034, "step": 12034 }, { "epoch": 2.93, "learning_rate": 3.507282512785105e-06, "loss": 0.0056, "step": 12036 }, { "epoch": 2.93, "learning_rate": 3.5042818769444866e-06, "loss": 0.0075, "step": 12038 }, { "epoch": 2.93, "learning_rate": 3.5012822525368506e-06, "loss": 0.0042, "step": 12040 }, { "epoch": 2.93, "learning_rate": 3.4982836400292573e-06, "loss": 0.0121, "step": 12042 }, { "epoch": 2.93, "learning_rate": 3.495286039888618e-06, "loss": 0.0046, "step": 12044 }, { "epoch": 2.94, "learning_rate": 3.492289452581684e-06, "loss": 0.0029, "step": 12046 }, { "epoch": 2.94, "learning_rate": 3.489293878575042e-06, "loss": 0.0044, "step": 12048 }, { "epoch": 2.94, "learning_rate": 3.486299318335128e-06, "loss": 0.0053, "step": 12050 }, { "epoch": 2.94, "learning_rate": 3.4833057723282214e-06, "loss": 0.0048, "step": 12052 }, { "epoch": 2.94, "learning_rate": 3.4803132410204364e-06, "loss": 0.0069, "step": 12054 }, { "epoch": 2.94, "learning_rate": 3.4773217248777313e-06, "loss": 0.0027, "step": 12056 }, { "epoch": 2.94, "learning_rate": 3.47433122436591e-06, "loss": 0.0055, "step": 12058 }, { "epoch": 2.94, "learning_rate": 3.4713417399506154e-06, "loss": 0.0033, "step": 12060 }, { "epoch": 2.94, "learning_rate": 3.468353272097339e-06, "loss": 0.0087, "step": 12062 }, { "epoch": 2.94, "learning_rate": 3.4653658212714003e-06, "loss": 0.0089, "step": 12064 }, { "epoch": 2.94, "learning_rate": 3.4623793879379695e-06, "loss": 0.0032, "step": 12066 }, { "epoch": 2.94, "learning_rate": 3.459393972562064e-06, "loss": 0.0053, "step": 12068 }, { "epoch": 2.94, "learning_rate": 3.4564095756085302e-06, "loss": 0.0092, "step": 12070 }, { "epoch": 2.94, "learning_rate": 3.4534261975420578e-06, "loss": 0.0104, "step": 12072 }, { "epoch": 2.94, "learning_rate": 3.4504438388271866e-06, "loss": 0.0027, "step": 12074 }, { "epoch": 2.94, "learning_rate": 3.4474624999282923e-06, "loss": 0.0097, "step": 12076 }, { "epoch": 2.94, "learning_rate": 3.444482181309594e-06, "loss": 0.0064, "step": 12078 }, { "epoch": 2.94, "learning_rate": 3.441502883435146e-06, "loss": 0.0073, "step": 12080 }, { "epoch": 2.94, "learning_rate": 3.4385246067688517e-06, "loss": 0.0048, "step": 12082 }, { "epoch": 2.94, "learning_rate": 3.4355473517744464e-06, "loss": 0.0047, "step": 12084 }, { "epoch": 2.94, "learning_rate": 3.4325711189155177e-06, "loss": 0.0017, "step": 12086 }, { "epoch": 2.95, "learning_rate": 3.429595908655482e-06, "loss": 0.0051, "step": 12088 }, { "epoch": 2.95, "learning_rate": 3.4266217214576035e-06, "loss": 0.0096, "step": 12090 }, { "epoch": 2.95, "learning_rate": 3.4236485577849886e-06, "loss": 0.0037, "step": 12092 }, { "epoch": 2.95, "learning_rate": 3.4206764181005834e-06, "loss": 0.0064, "step": 12094 }, { "epoch": 2.95, "learning_rate": 3.4177053028671693e-06, "loss": 0.0048, "step": 12096 }, { "epoch": 2.95, "learning_rate": 3.4147352125473687e-06, "loss": 0.0041, "step": 12098 }, { "epoch": 2.95, "learning_rate": 3.4117661476036514e-06, "loss": 0.0068, "step": 12100 }, { "epoch": 2.95, "learning_rate": 3.4087981084983258e-06, "loss": 0.0042, "step": 12102 }, { "epoch": 2.95, "learning_rate": 3.4058310956935314e-06, "loss": 0.0079, "step": 12104 }, { "epoch": 2.95, "learning_rate": 3.40286510965126e-06, "loss": 0.0055, "step": 12106 }, { "epoch": 2.95, "learning_rate": 3.3999001508333396e-06, "loss": 0.0041, "step": 12108 }, { "epoch": 2.95, "learning_rate": 3.39693621970143e-06, "loss": 0.0081, "step": 12110 }, { "epoch": 2.95, "learning_rate": 3.3939733167170475e-06, "loss": 0.0016, "step": 12112 }, { "epoch": 2.95, "learning_rate": 3.3910114423415296e-06, "loss": 0.0027, "step": 12114 }, { "epoch": 2.95, "learning_rate": 3.3880505970360667e-06, "loss": 0.0033, "step": 12116 }, { "epoch": 2.95, "learning_rate": 3.3850907812616873e-06, "loss": 0.0088, "step": 12118 }, { "epoch": 2.95, "learning_rate": 3.3821319954792533e-06, "loss": 0.0082, "step": 12120 }, { "epoch": 2.95, "learning_rate": 3.379174240149472e-06, "loss": 0.0142, "step": 12122 }, { "epoch": 2.95, "learning_rate": 3.3762175157328915e-06, "loss": 0.0031, "step": 12124 }, { "epoch": 2.95, "learning_rate": 3.3732618226898937e-06, "loss": 0.0034, "step": 12126 }, { "epoch": 2.96, "learning_rate": 3.3703071614806994e-06, "loss": 0.0055, "step": 12128 }, { "epoch": 2.96, "learning_rate": 3.367353532565375e-06, "loss": 0.0066, "step": 12130 }, { "epoch": 2.96, "learning_rate": 3.364400936403822e-06, "loss": 0.0012, "step": 12132 }, { "epoch": 2.96, "learning_rate": 3.3614493734557884e-06, "loss": 0.0087, "step": 12134 }, { "epoch": 2.96, "learning_rate": 3.358498844180845e-06, "loss": 0.0184, "step": 12136 }, { "epoch": 2.96, "learning_rate": 3.3555493490384215e-06, "loss": 0.0033, "step": 12138 }, { "epoch": 2.96, "learning_rate": 3.3526008884877683e-06, "loss": 0.0092, "step": 12140 }, { "epoch": 2.96, "learning_rate": 3.3496534629879905e-06, "loss": 0.0051, "step": 12142 }, { "epoch": 2.96, "learning_rate": 3.3467070729980177e-06, "loss": 0.0097, "step": 12144 }, { "epoch": 2.96, "learning_rate": 3.34376171897663e-06, "loss": 0.0036, "step": 12146 }, { "epoch": 2.96, "learning_rate": 3.3408174013824402e-06, "loss": 0.0054, "step": 12148 }, { "epoch": 2.96, "learning_rate": 3.337874120673904e-06, "loss": 0.0057, "step": 12150 }, { "epoch": 2.96, "learning_rate": 3.334931877309311e-06, "loss": 0.0063, "step": 12152 }, { "epoch": 2.96, "learning_rate": 3.3319906717467864e-06, "loss": 0.0076, "step": 12154 }, { "epoch": 2.96, "learning_rate": 3.3290505044443023e-06, "loss": 0.0033, "step": 12156 }, { "epoch": 2.96, "learning_rate": 3.3261113758596686e-06, "loss": 0.0041, "step": 12158 }, { "epoch": 2.96, "learning_rate": 3.323173286450523e-06, "loss": 0.009, "step": 12160 }, { "epoch": 2.96, "learning_rate": 3.3202362366743523e-06, "loss": 0.001, "step": 12162 }, { "epoch": 2.96, "learning_rate": 3.317300226988477e-06, "loss": 0.0087, "step": 12164 }, { "epoch": 2.96, "learning_rate": 3.3143652578500607e-06, "loss": 0.0063, "step": 12166 }, { "epoch": 2.96, "learning_rate": 3.311431329716096e-06, "loss": 0.0048, "step": 12168 }, { "epoch": 2.97, "learning_rate": 3.3084984430434152e-06, "loss": 0.004, "step": 12170 }, { "epoch": 2.97, "learning_rate": 3.305566598288694e-06, "loss": 0.0045, "step": 12172 }, { "epoch": 2.97, "learning_rate": 3.3026357959084465e-06, "loss": 0.0094, "step": 12174 }, { "epoch": 2.97, "learning_rate": 3.299706036359015e-06, "loss": 0.0046, "step": 12176 }, { "epoch": 2.97, "learning_rate": 3.296777320096589e-06, "loss": 0.0077, "step": 12178 }, { "epoch": 2.97, "learning_rate": 3.293849647577194e-06, "loss": 0.008, "step": 12180 }, { "epoch": 2.97, "learning_rate": 3.29092301925669e-06, "loss": 0.0025, "step": 12182 }, { "epoch": 2.97, "learning_rate": 3.2879974355907684e-06, "loss": 0.003, "step": 12184 }, { "epoch": 2.97, "learning_rate": 3.2850728970349722e-06, "loss": 0.0138, "step": 12186 }, { "epoch": 2.97, "learning_rate": 3.282149404044672e-06, "loss": 0.0045, "step": 12188 }, { "epoch": 2.97, "learning_rate": 3.279226957075081e-06, "loss": 0.0034, "step": 12190 }, { "epoch": 2.97, "learning_rate": 3.276305556581243e-06, "loss": 0.006, "step": 12192 }, { "epoch": 2.97, "learning_rate": 3.2733852030180444e-06, "loss": 0.0038, "step": 12194 }, { "epoch": 2.97, "learning_rate": 3.270465896840205e-06, "loss": 0.004, "step": 12196 }, { "epoch": 2.97, "learning_rate": 3.2675476385022853e-06, "loss": 0.0033, "step": 12198 }, { "epoch": 2.97, "learning_rate": 3.264630428458676e-06, "loss": 0.0039, "step": 12200 }, { "epoch": 2.97, "learning_rate": 3.2617142671636127e-06, "loss": 0.0042, "step": 12202 }, { "epoch": 2.97, "learning_rate": 3.2587991550711627e-06, "loss": 0.0049, "step": 12204 }, { "epoch": 2.97, "learning_rate": 3.255885092635236e-06, "loss": 0.0054, "step": 12206 }, { "epoch": 2.97, "learning_rate": 3.252972080309569e-06, "loss": 0.0091, "step": 12208 }, { "epoch": 2.98, "learning_rate": 3.250060118547739e-06, "loss": 0.0052, "step": 12210 }, { "epoch": 2.98, "learning_rate": 3.247149207803163e-06, "loss": 0.0076, "step": 12212 }, { "epoch": 2.98, "learning_rate": 3.2442393485290947e-06, "loss": 0.0073, "step": 12214 }, { "epoch": 2.98, "learning_rate": 3.2413305411786155e-06, "loss": 0.0051, "step": 12216 }, { "epoch": 2.98, "learning_rate": 3.2384227862046525e-06, "loss": 0.0035, "step": 12218 }, { "epoch": 2.98, "learning_rate": 3.2355160840599654e-06, "loss": 0.0072, "step": 12220 }, { "epoch": 2.98, "learning_rate": 3.232610435197152e-06, "loss": 0.0072, "step": 12222 }, { "epoch": 2.98, "learning_rate": 3.2297058400686433e-06, "loss": 0.0031, "step": 12224 }, { "epoch": 2.98, "learning_rate": 3.2268022991267e-06, "loss": 0.005, "step": 12226 }, { "epoch": 2.98, "learning_rate": 3.2238998128234324e-06, "loss": 0.0043, "step": 12228 }, { "epoch": 2.98, "learning_rate": 3.2209983816107816e-06, "loss": 0.0055, "step": 12230 }, { "epoch": 2.98, "learning_rate": 3.2180980059405166e-06, "loss": 0.0052, "step": 12232 }, { "epoch": 2.98, "learning_rate": 3.2151986862642504e-06, "loss": 0.005, "step": 12234 }, { "epoch": 2.98, "learning_rate": 3.212300423033432e-06, "loss": 0.0046, "step": 12236 }, { "epoch": 2.98, "learning_rate": 3.2094032166993425e-06, "loss": 0.0086, "step": 12238 }, { "epoch": 2.98, "learning_rate": 3.2065070677130938e-06, "loss": 0.0026, "step": 12240 }, { "epoch": 2.98, "learning_rate": 3.203611976525641e-06, "loss": 0.0073, "step": 12242 }, { "epoch": 2.98, "learning_rate": 3.200717943587778e-06, "loss": 0.0089, "step": 12244 }, { "epoch": 2.98, "learning_rate": 3.1978249693501185e-06, "loss": 0.0077, "step": 12246 }, { "epoch": 2.98, "learning_rate": 3.194933054263126e-06, "loss": 0.0051, "step": 12248 }, { "epoch": 2.98, "learning_rate": 3.1920421987770956e-06, "loss": 0.008, "step": 12250 }, { "epoch": 2.99, "learning_rate": 3.1891524033421527e-06, "loss": 0.004, "step": 12252 }, { "epoch": 2.99, "learning_rate": 3.1862636684082582e-06, "loss": 0.0069, "step": 12254 }, { "epoch": 2.99, "learning_rate": 3.183375994425213e-06, "loss": 0.0092, "step": 12256 }, { "epoch": 2.99, "learning_rate": 3.1804893818426498e-06, "loss": 0.009, "step": 12258 }, { "epoch": 2.99, "learning_rate": 3.177603831110039e-06, "loss": 0.006, "step": 12260 }, { "epoch": 2.99, "learning_rate": 3.1747193426766763e-06, "loss": 0.0082, "step": 12262 }, { "epoch": 2.99, "learning_rate": 3.1718359169917033e-06, "loss": 0.0062, "step": 12264 }, { "epoch": 2.99, "learning_rate": 3.168953554504094e-06, "loss": 0.0022, "step": 12266 }, { "epoch": 2.99, "learning_rate": 3.16607225566265e-06, "loss": 0.0059, "step": 12268 }, { "epoch": 2.99, "learning_rate": 3.1631920209160106e-06, "loss": 0.0032, "step": 12270 }, { "epoch": 2.99, "learning_rate": 3.1603128507126515e-06, "loss": 0.005, "step": 12272 }, { "epoch": 2.99, "learning_rate": 3.1574347455008813e-06, "loss": 0.004, "step": 12274 }, { "epoch": 2.99, "learning_rate": 3.154557705728849e-06, "loss": 0.0037, "step": 12276 }, { "epoch": 2.99, "learning_rate": 3.1516817318445225e-06, "loss": 0.0105, "step": 12278 }, { "epoch": 2.99, "learning_rate": 3.148806824295719e-06, "loss": 0.005, "step": 12280 }, { "epoch": 2.99, "learning_rate": 3.14593298353008e-06, "loss": 0.0061, "step": 12282 }, { "epoch": 2.99, "learning_rate": 3.1430602099950892e-06, "loss": 0.0064, "step": 12284 }, { "epoch": 2.99, "learning_rate": 3.140188504138053e-06, "loss": 0.0035, "step": 12286 }, { "epoch": 2.99, "learning_rate": 3.137317866406121e-06, "loss": 0.0061, "step": 12288 }, { "epoch": 2.99, "learning_rate": 3.134448297246274e-06, "loss": 0.0032, "step": 12290 }, { "epoch": 3.0, "learning_rate": 3.1315797971053295e-06, "loss": 0.0025, "step": 12292 }, { "epoch": 3.0, "learning_rate": 3.12871236642993e-06, "loss": 0.0024, "step": 12294 }, { "epoch": 3.0, "learning_rate": 3.1258460056665553e-06, "loss": 0.0048, "step": 12296 }, { "epoch": 3.0, "learning_rate": 3.1229807152615198e-06, "loss": 0.0053, "step": 12298 }, { "epoch": 3.0, "learning_rate": 3.1201164956609777e-06, "loss": 0.0025, "step": 12300 }, { "epoch": 3.0, "learning_rate": 3.1172533473109022e-06, "loss": 0.0006, "step": 12302 }, { "epoch": 3.0, "learning_rate": 3.11439127065711e-06, "loss": 0.0017, "step": 12304 }, { "epoch": 3.0, "learning_rate": 3.1115302661452527e-06, "loss": 0.0041, "step": 12306 }, { "epoch": 3.0, "learning_rate": 3.1086703342208026e-06, "loss": 0.0037, "step": 12308 }, { "epoch": 3.0, "learning_rate": 3.10581147532908e-06, "loss": 0.0053, "step": 12310 }, { "epoch": 3.0, "learning_rate": 3.1029536899152257e-06, "loss": 0.0054, "step": 12312 }, { "epoch": 3.0, "learning_rate": 3.1000969784242217e-06, "loss": 0.0023, "step": 12314 }, { "epoch": 3.0, "learning_rate": 3.097241341300882e-06, "loss": 0.0053, "step": 12316 }, { "epoch": 3.0, "learning_rate": 3.0943867789898453e-06, "loss": 0.004, "step": 12318 }, { "epoch": 3.0, "learning_rate": 3.0915332919355912e-06, "loss": 0.0024, "step": 12320 }, { "epoch": 3.0, "learning_rate": 3.088680880582434e-06, "loss": 0.0017, "step": 12322 }, { "epoch": 3.0, "learning_rate": 3.0858295453745123e-06, "loss": 0.0019, "step": 12324 }, { "epoch": 3.0, "learning_rate": 3.0829792867557973e-06, "loss": 0.004, "step": 12326 }, { "epoch": 3.0, "learning_rate": 3.0801301051700992e-06, "loss": 0.0037, "step": 12328 }, { "epoch": 3.0, "learning_rate": 3.077282001061057e-06, "loss": 0.002, "step": 12330 }, { "epoch": 3.0, "learning_rate": 3.074434974872147e-06, "loss": 0.0034, "step": 12332 }, { "epoch": 3.01, "learning_rate": 3.0715890270466652e-06, "loss": 0.0009, "step": 12334 }, { "epoch": 3.01, "learning_rate": 3.0687441580277546e-06, "loss": 0.0026, "step": 12336 }, { "epoch": 3.01, "learning_rate": 3.0659003682583766e-06, "loss": 0.0038, "step": 12338 }, { "epoch": 3.01, "learning_rate": 3.0630576581813365e-06, "loss": 0.004, "step": 12340 }, { "epoch": 3.01, "learning_rate": 3.06021602823926e-06, "loss": 0.0034, "step": 12342 }, { "epoch": 3.01, "learning_rate": 3.057375478874615e-06, "loss": 0.0027, "step": 12344 }, { "epoch": 3.01, "learning_rate": 3.054536010529694e-06, "loss": 0.0017, "step": 12346 }, { "epoch": 3.01, "learning_rate": 3.05169762364663e-06, "loss": 0.0055, "step": 12348 }, { "epoch": 3.01, "learning_rate": 3.0488603186673772e-06, "loss": 0.0036, "step": 12350 }, { "epoch": 3.01, "learning_rate": 3.046024096033723e-06, "loss": 0.0037, "step": 12352 }, { "epoch": 3.01, "learning_rate": 3.0431889561872905e-06, "loss": 0.0031, "step": 12354 }, { "epoch": 3.01, "learning_rate": 3.0403548995695387e-06, "loss": 0.0063, "step": 12356 }, { "epoch": 3.01, "learning_rate": 3.0375219266217428e-06, "loss": 0.003, "step": 12358 }, { "epoch": 3.01, "learning_rate": 3.0346900377850218e-06, "loss": 0.0024, "step": 12360 }, { "epoch": 3.01, "learning_rate": 3.031859233500325e-06, "loss": 0.0021, "step": 12362 }, { "epoch": 3.01, "learning_rate": 3.02902951420843e-06, "loss": 0.0017, "step": 12364 }, { "epoch": 3.01, "learning_rate": 3.0262008803499455e-06, "loss": 0.0026, "step": 12366 }, { "epoch": 3.01, "learning_rate": 3.023373332365306e-06, "loss": 0.0063, "step": 12368 }, { "epoch": 3.01, "learning_rate": 3.020546870694787e-06, "loss": 0.0027, "step": 12370 }, { "epoch": 3.01, "learning_rate": 3.017721495778493e-06, "loss": 0.0034, "step": 12372 }, { "epoch": 3.02, "learning_rate": 3.01489720805635e-06, "loss": 0.0023, "step": 12374 }, { "epoch": 3.02, "learning_rate": 3.0120740079681233e-06, "loss": 0.004, "step": 12376 }, { "epoch": 3.02, "learning_rate": 3.0092518959534125e-06, "loss": 0.0037, "step": 12378 }, { "epoch": 3.02, "learning_rate": 3.0064308724516357e-06, "loss": 0.0056, "step": 12380 }, { "epoch": 3.02, "learning_rate": 3.0036109379020474e-06, "loss": 0.0012, "step": 12382 }, { "epoch": 3.02, "learning_rate": 3.000792092743735e-06, "loss": 0.0037, "step": 12384 }, { "epoch": 3.02, "learning_rate": 2.9979743374156136e-06, "loss": 0.0066, "step": 12386 }, { "epoch": 3.02, "learning_rate": 2.9951576723564335e-06, "loss": 0.002, "step": 12388 }, { "epoch": 3.02, "learning_rate": 2.9923420980047647e-06, "loss": 0.0041, "step": 12390 }, { "epoch": 3.02, "learning_rate": 2.9895276147990192e-06, "loss": 0.0025, "step": 12392 }, { "epoch": 3.02, "learning_rate": 2.986714223177427e-06, "loss": 0.0019, "step": 12394 }, { "epoch": 3.02, "learning_rate": 2.983901923578062e-06, "loss": 0.0038, "step": 12396 }, { "epoch": 3.02, "learning_rate": 2.981090716438814e-06, "loss": 0.0045, "step": 12398 }, { "epoch": 3.02, "learning_rate": 2.978280602197412e-06, "loss": 0.0033, "step": 12400 }, { "epoch": 3.02, "learning_rate": 2.9754715812914135e-06, "loss": 0.0026, "step": 12402 }, { "epoch": 3.02, "learning_rate": 2.9726636541582076e-06, "loss": 0.0022, "step": 12404 }, { "epoch": 3.02, "learning_rate": 2.9698568212350056e-06, "loss": 0.0018, "step": 12406 }, { "epoch": 3.02, "learning_rate": 2.9670510829588508e-06, "loss": 0.0045, "step": 12408 }, { "epoch": 3.02, "learning_rate": 2.9642464397666202e-06, "loss": 0.0013, "step": 12410 }, { "epoch": 3.02, "learning_rate": 2.9614428920950234e-06, "loss": 0.0051, "step": 12412 }, { "epoch": 3.02, "learning_rate": 2.9586404403805847e-06, "loss": 0.0036, "step": 12414 }, { "epoch": 3.03, "learning_rate": 2.9558390850596743e-06, "loss": 0.0057, "step": 12416 }, { "epoch": 3.03, "learning_rate": 2.9530388265684806e-06, "loss": 0.0029, "step": 12418 }, { "epoch": 3.03, "learning_rate": 2.950239665343032e-06, "loss": 0.0027, "step": 12420 }, { "epoch": 3.03, "learning_rate": 2.947441601819173e-06, "loss": 0.002, "step": 12422 }, { "epoch": 3.03, "learning_rate": 2.944644636432583e-06, "loss": 0.0051, "step": 12424 }, { "epoch": 3.03, "learning_rate": 2.941848769618774e-06, "loss": 0.0067, "step": 12426 }, { "epoch": 3.03, "learning_rate": 2.939054001813084e-06, "loss": 0.0042, "step": 12428 }, { "epoch": 3.03, "learning_rate": 2.9362603334506755e-06, "loss": 0.0019, "step": 12430 }, { "epoch": 3.03, "learning_rate": 2.933467764966548e-06, "loss": 0.0042, "step": 12432 }, { "epoch": 3.03, "learning_rate": 2.930676296795527e-06, "loss": 0.0038, "step": 12434 }, { "epoch": 3.03, "learning_rate": 2.927885929372264e-06, "loss": 0.003, "step": 12436 }, { "epoch": 3.03, "learning_rate": 2.925096663131237e-06, "loss": 0.0052, "step": 12438 }, { "epoch": 3.03, "learning_rate": 2.9223084985067583e-06, "loss": 0.0061, "step": 12440 }, { "epoch": 3.03, "learning_rate": 2.919521435932967e-06, "loss": 0.0026, "step": 12442 }, { "epoch": 3.03, "learning_rate": 2.9167354758438337e-06, "loss": 0.0026, "step": 12444 }, { "epoch": 3.03, "learning_rate": 2.9139506186731457e-06, "loss": 0.0017, "step": 12446 }, { "epoch": 3.03, "learning_rate": 2.9111668648545354e-06, "loss": 0.0028, "step": 12448 }, { "epoch": 3.03, "learning_rate": 2.908384214821446e-06, "loss": 0.0021, "step": 12450 }, { "epoch": 3.03, "learning_rate": 2.9056026690071646e-06, "loss": 0.0045, "step": 12452 }, { "epoch": 3.03, "learning_rate": 2.9028222278447936e-06, "loss": 0.0021, "step": 12454 }, { "epoch": 3.04, "learning_rate": 2.9000428917672717e-06, "loss": 0.0039, "step": 12456 }, { "epoch": 3.04, "learning_rate": 2.897264661207362e-06, "loss": 0.0037, "step": 12458 }, { "epoch": 3.04, "learning_rate": 2.894487536597659e-06, "loss": 0.0041, "step": 12460 }, { "epoch": 3.04, "learning_rate": 2.891711518370578e-06, "loss": 0.0013, "step": 12462 }, { "epoch": 3.04, "learning_rate": 2.8889366069583703e-06, "loss": 0.0029, "step": 12464 }, { "epoch": 3.04, "learning_rate": 2.8861628027931065e-06, "loss": 0.0063, "step": 12466 }, { "epoch": 3.04, "learning_rate": 2.883390106306694e-06, "loss": 0.0018, "step": 12468 }, { "epoch": 3.04, "learning_rate": 2.8806185179308567e-06, "loss": 0.0007, "step": 12470 }, { "epoch": 3.04, "learning_rate": 2.877848038097156e-06, "loss": 0.0053, "step": 12472 }, { "epoch": 3.04, "learning_rate": 2.8750786672369757e-06, "loss": 0.0016, "step": 12474 }, { "epoch": 3.04, "learning_rate": 2.872310405781533e-06, "loss": 0.0059, "step": 12476 }, { "epoch": 3.04, "learning_rate": 2.8695432541618627e-06, "loss": 0.0007, "step": 12478 }, { "epoch": 3.04, "learning_rate": 2.866777212808829e-06, "loss": 0.009, "step": 12480 }, { "epoch": 3.04, "learning_rate": 2.8640122821531278e-06, "loss": 0.0036, "step": 12482 }, { "epoch": 3.04, "learning_rate": 2.8612484626252836e-06, "loss": 0.0019, "step": 12484 }, { "epoch": 3.04, "learning_rate": 2.8584857546556387e-06, "loss": 0.0013, "step": 12486 }, { "epoch": 3.04, "learning_rate": 2.855724158674371e-06, "loss": 0.0007, "step": 12488 }, { "epoch": 3.04, "learning_rate": 2.852963675111484e-06, "loss": 0.0045, "step": 12490 }, { "epoch": 3.04, "learning_rate": 2.8502043043968042e-06, "loss": 0.0081, "step": 12492 }, { "epoch": 3.04, "learning_rate": 2.847446046959982e-06, "loss": 0.0025, "step": 12494 }, { "epoch": 3.04, "learning_rate": 2.844688903230505e-06, "loss": 0.0019, "step": 12496 }, { "epoch": 3.05, "learning_rate": 2.841932873637678e-06, "loss": 0.0026, "step": 12498 }, { "epoch": 3.05, "learning_rate": 2.8391779586106427e-06, "loss": 0.0037, "step": 12500 }, { "epoch": 3.05, "learning_rate": 2.8364241585783514e-06, "loss": 0.0047, "step": 12502 }, { "epoch": 3.05, "learning_rate": 2.8336714739695993e-06, "loss": 0.0047, "step": 12504 }, { "epoch": 3.05, "learning_rate": 2.8309199052129944e-06, "loss": 0.0006, "step": 12506 }, { "epoch": 3.05, "learning_rate": 2.828169452736983e-06, "loss": 0.0023, "step": 12508 }, { "epoch": 3.05, "learning_rate": 2.8254201169698246e-06, "loss": 0.0106, "step": 12510 }, { "epoch": 3.05, "learning_rate": 2.822671898339615e-06, "loss": 0.0034, "step": 12512 }, { "epoch": 3.05, "learning_rate": 2.8199247972742747e-06, "loss": 0.0014, "step": 12514 }, { "epoch": 3.05, "learning_rate": 2.817178814201549e-06, "loss": 0.004, "step": 12516 }, { "epoch": 3.05, "learning_rate": 2.8144339495490035e-06, "loss": 0.0031, "step": 12518 }, { "epoch": 3.05, "learning_rate": 2.8116902037440408e-06, "loss": 0.0031, "step": 12520 }, { "epoch": 3.05, "learning_rate": 2.8089475772138775e-06, "loss": 0.004, "step": 12522 }, { "epoch": 3.05, "learning_rate": 2.8062060703855666e-06, "loss": 0.0053, "step": 12524 }, { "epoch": 3.05, "learning_rate": 2.803465683685975e-06, "loss": 0.0024, "step": 12526 }, { "epoch": 3.05, "learning_rate": 2.800726417541807e-06, "loss": 0.0038, "step": 12528 }, { "epoch": 3.05, "learning_rate": 2.797988272379586e-06, "loss": 0.0035, "step": 12530 }, { "epoch": 3.05, "learning_rate": 2.7952512486256646e-06, "loss": 0.003, "step": 12532 }, { "epoch": 3.05, "learning_rate": 2.792515346706216e-06, "loss": 0.0062, "step": 12534 }, { "epoch": 3.05, "learning_rate": 2.789780567047239e-06, "loss": 0.0017, "step": 12536 }, { "epoch": 3.06, "learning_rate": 2.7870469100745612e-06, "loss": 0.0041, "step": 12538 }, { "epoch": 3.06, "learning_rate": 2.784314376213837e-06, "loss": 0.0019, "step": 12540 }, { "epoch": 3.06, "learning_rate": 2.781582965890538e-06, "loss": 0.0024, "step": 12542 }, { "epoch": 3.06, "learning_rate": 2.7788526795299675e-06, "loss": 0.0006, "step": 12544 }, { "epoch": 3.06, "learning_rate": 2.7761235175572554e-06, "loss": 0.0016, "step": 12546 }, { "epoch": 3.06, "learning_rate": 2.7733954803973505e-06, "loss": 0.001, "step": 12548 }, { "epoch": 3.06, "learning_rate": 2.7706685684750245e-06, "loss": 0.004, "step": 12550 }, { "epoch": 3.06, "learning_rate": 2.767942782214884e-06, "loss": 0.003, "step": 12552 }, { "epoch": 3.06, "learning_rate": 2.7652181220413523e-06, "loss": 0.0029, "step": 12554 }, { "epoch": 3.06, "learning_rate": 2.762494588378685e-06, "loss": 0.0024, "step": 12556 }, { "epoch": 3.06, "learning_rate": 2.7597721816509482e-06, "loss": 0.0023, "step": 12558 }, { "epoch": 3.06, "learning_rate": 2.757050902282051e-06, "loss": 0.0028, "step": 12560 }, { "epoch": 3.06, "learning_rate": 2.7543307506957084e-06, "loss": 0.0008, "step": 12562 }, { "epoch": 3.06, "learning_rate": 2.7516117273154765e-06, "loss": 0.0022, "step": 12564 }, { "epoch": 3.06, "learning_rate": 2.7488938325647217e-06, "loss": 0.0012, "step": 12566 }, { "epoch": 3.06, "learning_rate": 2.746177066866643e-06, "loss": 0.0027, "step": 12568 }, { "epoch": 3.06, "learning_rate": 2.743461430644264e-06, "loss": 0.0058, "step": 12570 }, { "epoch": 3.06, "learning_rate": 2.740746924320431e-06, "loss": 0.0035, "step": 12572 }, { "epoch": 3.06, "learning_rate": 2.7380335483178068e-06, "loss": 0.0027, "step": 12574 }, { "epoch": 3.06, "learning_rate": 2.735321303058893e-06, "loss": 0.0013, "step": 12576 }, { "epoch": 3.06, "learning_rate": 2.7326101889659983e-06, "loss": 0.0055, "step": 12578 }, { "epoch": 3.07, "learning_rate": 2.7299002064612734e-06, "loss": 0.0036, "step": 12580 }, { "epoch": 3.07, "learning_rate": 2.7271913559666742e-06, "loss": 0.0033, "step": 12582 }, { "epoch": 3.07, "learning_rate": 2.724483637903993e-06, "loss": 0.0017, "step": 12584 }, { "epoch": 3.07, "learning_rate": 2.721777052694844e-06, "loss": 0.0032, "step": 12586 }, { "epoch": 3.07, "learning_rate": 2.7190716007606634e-06, "loss": 0.0045, "step": 12588 }, { "epoch": 3.07, "learning_rate": 2.7163672825227104e-06, "loss": 0.0021, "step": 12590 }, { "epoch": 3.07, "learning_rate": 2.7136640984020636e-06, "loss": 0.0073, "step": 12592 }, { "epoch": 3.07, "learning_rate": 2.710962048819633e-06, "loss": 0.0043, "step": 12594 }, { "epoch": 3.07, "learning_rate": 2.7082611341961506e-06, "loss": 0.0044, "step": 12596 }, { "epoch": 3.07, "learning_rate": 2.7055613549521643e-06, "loss": 0.002, "step": 12598 }, { "epoch": 3.07, "learning_rate": 2.702862711508053e-06, "loss": 0.0052, "step": 12600 }, { "epoch": 3.07, "learning_rate": 2.70016520428402e-06, "loss": 0.0032, "step": 12602 }, { "epoch": 3.07, "learning_rate": 2.6974688337000832e-06, "loss": 0.0029, "step": 12604 }, { "epoch": 3.07, "learning_rate": 2.694773600176085e-06, "loss": 0.0054, "step": 12606 }, { "epoch": 3.07, "learning_rate": 2.6920795041316994e-06, "loss": 0.0024, "step": 12608 }, { "epoch": 3.07, "learning_rate": 2.6893865459864154e-06, "loss": 0.001, "step": 12610 }, { "epoch": 3.07, "learning_rate": 2.686694726159551e-06, "loss": 0.0049, "step": 12612 }, { "epoch": 3.07, "learning_rate": 2.6840040450702366e-06, "loss": 0.0027, "step": 12614 }, { "epoch": 3.07, "learning_rate": 2.6813145031374356e-06, "loss": 0.0036, "step": 12616 }, { "epoch": 3.07, "learning_rate": 2.678626100779933e-06, "loss": 0.0044, "step": 12618 }, { "epoch": 3.08, "learning_rate": 2.67593883841633e-06, "loss": 0.0016, "step": 12620 }, { "epoch": 3.08, "learning_rate": 2.6732527164650523e-06, "loss": 0.0041, "step": 12622 }, { "epoch": 3.08, "learning_rate": 2.6705677353443514e-06, "loss": 0.0021, "step": 12624 }, { "epoch": 3.08, "learning_rate": 2.6678838954722997e-06, "loss": 0.0043, "step": 12626 }, { "epoch": 3.08, "learning_rate": 2.6652011972667945e-06, "loss": 0.0044, "step": 12628 }, { "epoch": 3.08, "learning_rate": 2.6625196411455456e-06, "loss": 0.0021, "step": 12630 }, { "epoch": 3.08, "learning_rate": 2.6598392275261e-06, "loss": 0.0029, "step": 12632 }, { "epoch": 3.08, "learning_rate": 2.657159956825811e-06, "loss": 0.0051, "step": 12634 }, { "epoch": 3.08, "learning_rate": 2.654481829461868e-06, "loss": 0.0023, "step": 12636 }, { "epoch": 3.08, "learning_rate": 2.6518048458512692e-06, "loss": 0.0072, "step": 12638 }, { "epoch": 3.08, "learning_rate": 2.6491290064108454e-06, "loss": 0.0047, "step": 12640 }, { "epoch": 3.08, "learning_rate": 2.646454311557245e-06, "loss": 0.0029, "step": 12642 }, { "epoch": 3.08, "learning_rate": 2.643780761706941e-06, "loss": 0.0058, "step": 12644 }, { "epoch": 3.08, "learning_rate": 2.641108357276223e-06, "loss": 0.0009, "step": 12646 }, { "epoch": 3.08, "learning_rate": 2.6384370986812027e-06, "loss": 0.0008, "step": 12648 }, { "epoch": 3.08, "learning_rate": 2.6357669863378155e-06, "loss": 0.0003, "step": 12650 }, { "epoch": 3.08, "learning_rate": 2.633098020661824e-06, "loss": 0.005, "step": 12652 }, { "epoch": 3.08, "learning_rate": 2.6304302020687997e-06, "loss": 0.0035, "step": 12654 }, { "epoch": 3.08, "learning_rate": 2.627763530974147e-06, "loss": 0.0042, "step": 12656 }, { "epoch": 3.08, "learning_rate": 2.6250980077930864e-06, "loss": 0.0033, "step": 12658 }, { "epoch": 3.08, "learning_rate": 2.6224336329406607e-06, "loss": 0.0013, "step": 12660 }, { "epoch": 3.09, "learning_rate": 2.6197704068317274e-06, "loss": 0.0012, "step": 12662 }, { "epoch": 3.09, "learning_rate": 2.6171083298809773e-06, "loss": 0.0041, "step": 12664 }, { "epoch": 3.09, "learning_rate": 2.6144474025029145e-06, "loss": 0.0078, "step": 12666 }, { "epoch": 3.09, "learning_rate": 2.611787625111868e-06, "loss": 0.0024, "step": 12668 }, { "epoch": 3.09, "learning_rate": 2.6091289981219813e-06, "loss": 0.0074, "step": 12670 }, { "epoch": 3.09, "learning_rate": 2.606471521947225e-06, "loss": 0.0033, "step": 12672 }, { "epoch": 3.09, "learning_rate": 2.6038151970013903e-06, "loss": 0.0067, "step": 12674 }, { "epoch": 3.09, "learning_rate": 2.601160023698086e-06, "loss": 0.0036, "step": 12676 }, { "epoch": 3.09, "learning_rate": 2.5985060024507403e-06, "loss": 0.0034, "step": 12678 }, { "epoch": 3.09, "learning_rate": 2.5958531336726057e-06, "loss": 0.0026, "step": 12680 }, { "epoch": 3.09, "learning_rate": 2.593201417776755e-06, "loss": 0.0011, "step": 12682 }, { "epoch": 3.09, "learning_rate": 2.590550855176085e-06, "loss": 0.0017, "step": 12684 }, { "epoch": 3.09, "learning_rate": 2.5879014462832997e-06, "loss": 0.0023, "step": 12686 }, { "epoch": 3.09, "learning_rate": 2.5852531915109415e-06, "loss": 0.0032, "step": 12688 }, { "epoch": 3.09, "learning_rate": 2.5826060912713544e-06, "loss": 0.0011, "step": 12690 }, { "epoch": 3.09, "learning_rate": 2.579960145976722e-06, "loss": 0.0019, "step": 12692 }, { "epoch": 3.09, "learning_rate": 2.5773153560390297e-06, "loss": 0.0027, "step": 12694 }, { "epoch": 3.09, "learning_rate": 2.5746717218700946e-06, "loss": 0.0036, "step": 12696 }, { "epoch": 3.09, "learning_rate": 2.572029243881552e-06, "loss": 0.0065, "step": 12698 }, { "epoch": 3.09, "learning_rate": 2.5693879224848585e-06, "loss": 0.0035, "step": 12700 }, { "epoch": 3.1, "learning_rate": 2.5667477580912836e-06, "loss": 0.0034, "step": 12702 }, { "epoch": 3.1, "learning_rate": 2.56410875111192e-06, "loss": 0.0022, "step": 12704 }, { "epoch": 3.1, "learning_rate": 2.561470901957683e-06, "loss": 0.0019, "step": 12706 }, { "epoch": 3.1, "learning_rate": 2.5588342110393105e-06, "loss": 0.0012, "step": 12708 }, { "epoch": 3.1, "learning_rate": 2.5561986787673477e-06, "loss": 0.0047, "step": 12710 }, { "epoch": 3.1, "learning_rate": 2.553564305552171e-06, "loss": 0.0055, "step": 12712 }, { "epoch": 3.1, "learning_rate": 2.5509310918039755e-06, "loss": 0.0014, "step": 12714 }, { "epoch": 3.1, "learning_rate": 2.5482990379327654e-06, "loss": 0.0059, "step": 12716 }, { "epoch": 3.1, "learning_rate": 2.545668144348379e-06, "loss": 0.0036, "step": 12718 }, { "epoch": 3.1, "learning_rate": 2.54303841146046e-06, "loss": 0.0026, "step": 12720 }, { "epoch": 3.1, "learning_rate": 2.5404098396784815e-06, "loss": 0.0042, "step": 12722 }, { "epoch": 3.1, "learning_rate": 2.537782429411736e-06, "loss": 0.0037, "step": 12724 }, { "epoch": 3.1, "learning_rate": 2.5351561810693225e-06, "loss": 0.0022, "step": 12726 }, { "epoch": 3.1, "learning_rate": 2.5325310950601734e-06, "loss": 0.0012, "step": 12728 }, { "epoch": 3.1, "learning_rate": 2.5299071717930367e-06, "loss": 0.0019, "step": 12730 }, { "epoch": 3.1, "learning_rate": 2.527284411676475e-06, "loss": 0.0025, "step": 12732 }, { "epoch": 3.1, "learning_rate": 2.524662815118868e-06, "loss": 0.0027, "step": 12734 }, { "epoch": 3.1, "learning_rate": 2.5220423825284223e-06, "loss": 0.0028, "step": 12736 }, { "epoch": 3.1, "learning_rate": 2.519423114313159e-06, "loss": 0.004, "step": 12738 }, { "epoch": 3.1, "learning_rate": 2.5168050108809206e-06, "loss": 0.0011, "step": 12740 }, { "epoch": 3.1, "learning_rate": 2.5141880726393607e-06, "loss": 0.0071, "step": 12742 }, { "epoch": 3.11, "learning_rate": 2.5115722999959636e-06, "loss": 0.0039, "step": 12744 }, { "epoch": 3.11, "learning_rate": 2.508957693358016e-06, "loss": 0.003, "step": 12746 }, { "epoch": 3.11, "learning_rate": 2.506344253132641e-06, "loss": 0.0011, "step": 12748 }, { "epoch": 3.11, "learning_rate": 2.5037319797267635e-06, "loss": 0.0075, "step": 12750 }, { "epoch": 3.11, "learning_rate": 2.5011208735471383e-06, "loss": 0.0071, "step": 12752 }, { "epoch": 3.11, "learning_rate": 2.4985109350003344e-06, "loss": 0.0055, "step": 12754 }, { "epoch": 3.11, "learning_rate": 2.4959021644927427e-06, "loss": 0.0034, "step": 12756 }, { "epoch": 3.11, "learning_rate": 2.493294562430565e-06, "loss": 0.004, "step": 12758 }, { "epoch": 3.11, "learning_rate": 2.4906881292198213e-06, "loss": 0.0034, "step": 12760 }, { "epoch": 3.11, "learning_rate": 2.4880828652663613e-06, "loss": 0.0025, "step": 12762 }, { "epoch": 3.11, "learning_rate": 2.4854787709758366e-06, "loss": 0.0035, "step": 12764 }, { "epoch": 3.11, "learning_rate": 2.4828758467537274e-06, "loss": 0.0022, "step": 12766 }, { "epoch": 3.11, "learning_rate": 2.4802740930053305e-06, "loss": 0.0021, "step": 12768 }, { "epoch": 3.11, "learning_rate": 2.4776735101357606e-06, "loss": 0.0029, "step": 12770 }, { "epoch": 3.11, "learning_rate": 2.4750740985499434e-06, "loss": 0.0038, "step": 12772 }, { "epoch": 3.11, "learning_rate": 2.472475858652632e-06, "loss": 0.0028, "step": 12774 }, { "epoch": 3.11, "learning_rate": 2.4698787908483867e-06, "loss": 0.0033, "step": 12776 }, { "epoch": 3.11, "learning_rate": 2.467282895541597e-06, "loss": 0.0068, "step": 12778 }, { "epoch": 3.11, "learning_rate": 2.464688173136458e-06, "loss": 0.0032, "step": 12780 }, { "epoch": 3.11, "learning_rate": 2.4620946240369904e-06, "loss": 0.0028, "step": 12782 }, { "epoch": 3.12, "learning_rate": 2.459502248647029e-06, "loss": 0.0026, "step": 12784 }, { "epoch": 3.12, "learning_rate": 2.4569110473702306e-06, "loss": 0.0051, "step": 12786 }, { "epoch": 3.12, "learning_rate": 2.454321020610061e-06, "loss": 0.0024, "step": 12788 }, { "epoch": 3.12, "learning_rate": 2.4517321687698047e-06, "loss": 0.0022, "step": 12790 }, { "epoch": 3.12, "learning_rate": 2.4491444922525687e-06, "loss": 0.0026, "step": 12792 }, { "epoch": 3.12, "learning_rate": 2.446557991461277e-06, "loss": 0.0054, "step": 12794 }, { "epoch": 3.12, "learning_rate": 2.443972666798662e-06, "loss": 0.001, "step": 12796 }, { "epoch": 3.12, "learning_rate": 2.4413885186672804e-06, "loss": 0.0067, "step": 12798 }, { "epoch": 3.12, "learning_rate": 2.4388055474695084e-06, "loss": 0.0021, "step": 12800 }, { "epoch": 3.12, "learning_rate": 2.4362237536075295e-06, "loss": 0.0064, "step": 12802 }, { "epoch": 3.12, "learning_rate": 2.433643137483347e-06, "loss": 0.0017, "step": 12804 }, { "epoch": 3.12, "learning_rate": 2.4310636994987856e-06, "loss": 0.0065, "step": 12806 }, { "epoch": 3.12, "learning_rate": 2.4284854400554834e-06, "loss": 0.0023, "step": 12808 }, { "epoch": 3.12, "learning_rate": 2.425908359554897e-06, "loss": 0.0014, "step": 12810 }, { "epoch": 3.12, "learning_rate": 2.423332458398293e-06, "loss": 0.0024, "step": 12812 }, { "epoch": 3.12, "learning_rate": 2.420757736986762e-06, "loss": 0.0014, "step": 12814 }, { "epoch": 3.12, "learning_rate": 2.4181841957212095e-06, "loss": 0.0004, "step": 12816 }, { "epoch": 3.12, "learning_rate": 2.4156118350023527e-06, "loss": 0.0057, "step": 12818 }, { "epoch": 3.12, "learning_rate": 2.413040655230726e-06, "loss": 0.0036, "step": 12820 }, { "epoch": 3.12, "learning_rate": 2.410470656806685e-06, "loss": 0.0045, "step": 12822 }, { "epoch": 3.12, "learning_rate": 2.4079018401303967e-06, "loss": 0.0046, "step": 12824 }, { "epoch": 3.13, "learning_rate": 2.405334205601848e-06, "loss": 0.0018, "step": 12826 }, { "epoch": 3.13, "learning_rate": 2.402767753620835e-06, "loss": 0.0008, "step": 12828 }, { "epoch": 3.13, "learning_rate": 2.4002024845869786e-06, "loss": 0.0019, "step": 12830 }, { "epoch": 3.13, "learning_rate": 2.3976383988997064e-06, "loss": 0.003, "step": 12832 }, { "epoch": 3.13, "learning_rate": 2.39507549695827e-06, "loss": 0.0013, "step": 12834 }, { "epoch": 3.13, "learning_rate": 2.392513779161729e-06, "loss": 0.004, "step": 12836 }, { "epoch": 3.13, "learning_rate": 2.3899532459089634e-06, "loss": 0.003, "step": 12838 }, { "epoch": 3.13, "learning_rate": 2.3873938975986698e-06, "loss": 0.0016, "step": 12840 }, { "epoch": 3.13, "learning_rate": 2.3848357346293593e-06, "loss": 0.0038, "step": 12842 }, { "epoch": 3.13, "learning_rate": 2.3822787573993557e-06, "loss": 0.0034, "step": 12844 }, { "epoch": 3.13, "learning_rate": 2.3797229663067965e-06, "loss": 0.0011, "step": 12846 }, { "epoch": 3.13, "learning_rate": 2.377168361749642e-06, "loss": 0.0044, "step": 12848 }, { "epoch": 3.13, "learning_rate": 2.374614944125665e-06, "loss": 0.003, "step": 12850 }, { "epoch": 3.13, "learning_rate": 2.372062713832447e-06, "loss": 0.0029, "step": 12852 }, { "epoch": 3.13, "learning_rate": 2.3695116712673927e-06, "loss": 0.0018, "step": 12854 }, { "epoch": 3.13, "learning_rate": 2.366961816827721e-06, "loss": 0.0037, "step": 12856 }, { "epoch": 3.13, "learning_rate": 2.364413150910463e-06, "loss": 0.0024, "step": 12858 }, { "epoch": 3.13, "learning_rate": 2.36186567391246e-06, "loss": 0.0019, "step": 12860 }, { "epoch": 3.13, "learning_rate": 2.3593193862303775e-06, "loss": 0.0063, "step": 12862 }, { "epoch": 3.13, "learning_rate": 2.356774288260694e-06, "loss": 0.002, "step": 12864 }, { "epoch": 3.13, "learning_rate": 2.3542303803997004e-06, "loss": 0.0039, "step": 12866 }, { "epoch": 3.14, "learning_rate": 2.3516876630434982e-06, "loss": 0.0034, "step": 12868 }, { "epoch": 3.14, "learning_rate": 2.349146136588011e-06, "loss": 0.0034, "step": 12870 }, { "epoch": 3.14, "learning_rate": 2.346605801428976e-06, "loss": 0.0062, "step": 12872 }, { "epoch": 3.14, "learning_rate": 2.344066657961942e-06, "loss": 0.0018, "step": 12874 }, { "epoch": 3.14, "learning_rate": 2.341528706582268e-06, "loss": 0.0019, "step": 12876 }, { "epoch": 3.14, "learning_rate": 2.3389919476851354e-06, "loss": 0.0044, "step": 12878 }, { "epoch": 3.14, "learning_rate": 2.3364563816655384e-06, "loss": 0.004, "step": 12880 }, { "epoch": 3.14, "learning_rate": 2.333922008918286e-06, "loss": 0.0028, "step": 12882 }, { "epoch": 3.14, "learning_rate": 2.3313888298379937e-06, "loss": 0.003, "step": 12884 }, { "epoch": 3.14, "learning_rate": 2.328856844819103e-06, "loss": 0.0028, "step": 12886 }, { "epoch": 3.14, "learning_rate": 2.3263260542558564e-06, "loss": 0.0022, "step": 12888 }, { "epoch": 3.14, "learning_rate": 2.3237964585423244e-06, "loss": 0.0051, "step": 12890 }, { "epoch": 3.14, "learning_rate": 2.321268058072379e-06, "loss": 0.0009, "step": 12892 }, { "epoch": 3.14, "learning_rate": 2.3187408532397126e-06, "loss": 0.0035, "step": 12894 }, { "epoch": 3.14, "learning_rate": 2.3162148444378318e-06, "loss": 0.0022, "step": 12896 }, { "epoch": 3.14, "learning_rate": 2.3136900320600574e-06, "loss": 0.0025, "step": 12898 }, { "epoch": 3.14, "learning_rate": 2.3111664164995196e-06, "loss": 0.0041, "step": 12900 }, { "epoch": 3.14, "learning_rate": 2.3086439981491616e-06, "loss": 0.0019, "step": 12902 }, { "epoch": 3.14, "learning_rate": 2.306122777401746e-06, "loss": 0.0026, "step": 12904 }, { "epoch": 3.14, "learning_rate": 2.3036027546498495e-06, "loss": 0.0035, "step": 12906 }, { "epoch": 3.15, "learning_rate": 2.3010839302858535e-06, "loss": 0.0062, "step": 12908 }, { "epoch": 3.15, "learning_rate": 2.29856630470196e-06, "loss": 0.0046, "step": 12910 }, { "epoch": 3.15, "learning_rate": 2.296049878290185e-06, "loss": 0.0034, "step": 12912 }, { "epoch": 3.15, "learning_rate": 2.2935346514423517e-06, "loss": 0.0027, "step": 12914 }, { "epoch": 3.15, "learning_rate": 2.2910206245501043e-06, "loss": 0.0014, "step": 12916 }, { "epoch": 3.15, "learning_rate": 2.2885077980048907e-06, "loss": 0.0145, "step": 12918 }, { "epoch": 3.15, "learning_rate": 2.2859961721979807e-06, "loss": 0.0044, "step": 12920 }, { "epoch": 3.15, "learning_rate": 2.2834857475204555e-06, "loss": 0.0092, "step": 12922 }, { "epoch": 3.15, "learning_rate": 2.2809765243632008e-06, "loss": 0.0017, "step": 12924 }, { "epoch": 3.15, "learning_rate": 2.2784685031169275e-06, "loss": 0.0012, "step": 12926 }, { "epoch": 3.15, "learning_rate": 2.275961684172154e-06, "loss": 0.0012, "step": 12928 }, { "epoch": 3.15, "learning_rate": 2.27345606791921e-06, "loss": 0.0034, "step": 12930 }, { "epoch": 3.15, "learning_rate": 2.2709516547482347e-06, "loss": 0.003, "step": 12932 }, { "epoch": 3.15, "learning_rate": 2.2684484450491894e-06, "loss": 0.0038, "step": 12934 }, { "epoch": 3.15, "learning_rate": 2.26594643921184e-06, "loss": 0.004, "step": 12936 }, { "epoch": 3.15, "learning_rate": 2.263445637625774e-06, "loss": 0.0022, "step": 12938 }, { "epoch": 3.15, "learning_rate": 2.2609460406803775e-06, "loss": 0.0088, "step": 12940 }, { "epoch": 3.15, "learning_rate": 2.258447648764863e-06, "loss": 0.0027, "step": 12942 }, { "epoch": 3.15, "learning_rate": 2.2559504622682436e-06, "loss": 0.0018, "step": 12944 }, { "epoch": 3.15, "learning_rate": 2.253454481579357e-06, "loss": 0.0015, "step": 12946 }, { "epoch": 3.15, "learning_rate": 2.2509597070868393e-06, "loss": 0.0015, "step": 12948 }, { "epoch": 3.16, "learning_rate": 2.2484661391791494e-06, "loss": 0.0017, "step": 12950 }, { "epoch": 3.16, "learning_rate": 2.2459737782445546e-06, "loss": 0.004, "step": 12952 }, { "epoch": 3.16, "learning_rate": 2.2434826246711384e-06, "loss": 0.0011, "step": 12954 }, { "epoch": 3.16, "learning_rate": 2.2409926788467883e-06, "loss": 0.0048, "step": 12956 }, { "epoch": 3.16, "learning_rate": 2.238503941159206e-06, "loss": 0.0041, "step": 12958 }, { "epoch": 3.16, "learning_rate": 2.2360164119959093e-06, "loss": 0.0009, "step": 12960 }, { "epoch": 3.16, "learning_rate": 2.2335300917442293e-06, "loss": 0.0022, "step": 12962 }, { "epoch": 3.16, "learning_rate": 2.231044980791298e-06, "loss": 0.0043, "step": 12964 }, { "epoch": 3.16, "learning_rate": 2.2285610795240695e-06, "loss": 0.0025, "step": 12966 }, { "epoch": 3.16, "learning_rate": 2.226078388329307e-06, "loss": 0.0052, "step": 12968 }, { "epoch": 3.16, "learning_rate": 2.223596907593586e-06, "loss": 0.0025, "step": 12970 }, { "epoch": 3.16, "learning_rate": 2.221116637703291e-06, "loss": 0.0071, "step": 12972 }, { "epoch": 3.16, "learning_rate": 2.2186375790446148e-06, "loss": 0.0079, "step": 12974 }, { "epoch": 3.16, "learning_rate": 2.216159732003568e-06, "loss": 0.0048, "step": 12976 }, { "epoch": 3.16, "learning_rate": 2.213683096965975e-06, "loss": 0.0024, "step": 12978 }, { "epoch": 3.16, "learning_rate": 2.2112076743174593e-06, "loss": 0.0014, "step": 12980 }, { "epoch": 3.16, "learning_rate": 2.208733464443468e-06, "loss": 0.004, "step": 12982 }, { "epoch": 3.16, "learning_rate": 2.2062604677292554e-06, "loss": 0.0035, "step": 12984 }, { "epoch": 3.16, "learning_rate": 2.2037886845598845e-06, "loss": 0.0028, "step": 12986 }, { "epoch": 3.16, "learning_rate": 2.201318115320227e-06, "loss": 0.0032, "step": 12988 }, { "epoch": 3.17, "learning_rate": 2.1988487603949726e-06, "loss": 0.0038, "step": 12990 }, { "epoch": 3.17, "learning_rate": 2.19638062016862e-06, "loss": 0.0045, "step": 12992 }, { "epoch": 3.17, "learning_rate": 2.193913695025478e-06, "loss": 0.0031, "step": 12994 }, { "epoch": 3.17, "learning_rate": 2.1914479853496618e-06, "loss": 0.0019, "step": 12996 }, { "epoch": 3.17, "learning_rate": 2.1889834915251063e-06, "loss": 0.0085, "step": 12998 }, { "epoch": 3.17, "learning_rate": 2.1865202139355467e-06, "loss": 0.0017, "step": 13000 }, { "epoch": 3.17, "learning_rate": 2.18405815296454e-06, "loss": 0.0049, "step": 13002 }, { "epoch": 3.17, "learning_rate": 2.181597308995441e-06, "loss": 0.0026, "step": 13004 }, { "epoch": 3.17, "learning_rate": 2.1791376824114265e-06, "loss": 0.0048, "step": 13006 }, { "epoch": 3.17, "learning_rate": 2.176679273595477e-06, "loss": 0.0019, "step": 13008 }, { "epoch": 3.17, "learning_rate": 2.1742220829303904e-06, "loss": 0.0043, "step": 13010 }, { "epoch": 3.17, "learning_rate": 2.1717661107987663e-06, "loss": 0.001, "step": 13012 }, { "epoch": 3.17, "learning_rate": 2.169311357583016e-06, "loss": 0.0035, "step": 13014 }, { "epoch": 3.17, "learning_rate": 2.1668578236653647e-06, "loss": 0.0027, "step": 13016 }, { "epoch": 3.17, "learning_rate": 2.1644055094278515e-06, "loss": 0.0045, "step": 13018 }, { "epoch": 3.17, "learning_rate": 2.1619544152523143e-06, "loss": 0.0032, "step": 13020 }, { "epoch": 3.17, "learning_rate": 2.1595045415204087e-06, "loss": 0.0011, "step": 13022 }, { "epoch": 3.17, "learning_rate": 2.1570558886135997e-06, "loss": 0.0027, "step": 13024 }, { "epoch": 3.17, "learning_rate": 2.154608456913163e-06, "loss": 0.0017, "step": 13026 }, { "epoch": 3.17, "learning_rate": 2.1521622468001814e-06, "loss": 0.0009, "step": 13028 }, { "epoch": 3.17, "learning_rate": 2.1497172586555447e-06, "loss": 0.0038, "step": 13030 }, { "epoch": 3.18, "learning_rate": 2.1472734928599593e-06, "loss": 0.0014, "step": 13032 }, { "epoch": 3.18, "learning_rate": 2.14483094979394e-06, "loss": 0.0025, "step": 13034 }, { "epoch": 3.18, "learning_rate": 2.1423896298378066e-06, "loss": 0.0036, "step": 13036 }, { "epoch": 3.18, "learning_rate": 2.1399495333716912e-06, "loss": 0.0082, "step": 13038 }, { "epoch": 3.18, "learning_rate": 2.137510660775539e-06, "loss": 0.0008, "step": 13040 }, { "epoch": 3.18, "learning_rate": 2.1350730124290985e-06, "loss": 0.0031, "step": 13042 }, { "epoch": 3.18, "learning_rate": 2.132636588711928e-06, "loss": 0.0055, "step": 13044 }, { "epoch": 3.18, "learning_rate": 2.1302013900034e-06, "loss": 0.0034, "step": 13046 }, { "epoch": 3.18, "learning_rate": 2.1277674166826935e-06, "loss": 0.0052, "step": 13048 }, { "epoch": 3.18, "learning_rate": 2.125334669128798e-06, "loss": 0.0048, "step": 13050 }, { "epoch": 3.18, "learning_rate": 2.1229031477205075e-06, "loss": 0.0017, "step": 13052 }, { "epoch": 3.18, "learning_rate": 2.120472852836433e-06, "loss": 0.0025, "step": 13054 }, { "epoch": 3.18, "learning_rate": 2.118043784854984e-06, "loss": 0.0039, "step": 13056 }, { "epoch": 3.18, "learning_rate": 2.115615944154391e-06, "loss": 0.0025, "step": 13058 }, { "epoch": 3.18, "learning_rate": 2.1131893311126815e-06, "loss": 0.0016, "step": 13060 }, { "epoch": 3.18, "learning_rate": 2.1107639461077024e-06, "loss": 0.0043, "step": 13062 }, { "epoch": 3.18, "learning_rate": 2.108339789517102e-06, "loss": 0.0066, "step": 13064 }, { "epoch": 3.18, "learning_rate": 2.1059168617183433e-06, "loss": 0.0024, "step": 13066 }, { "epoch": 3.18, "learning_rate": 2.1034951630886913e-06, "loss": 0.0016, "step": 13068 }, { "epoch": 3.18, "learning_rate": 2.101074694005226e-06, "loss": 0.0032, "step": 13070 }, { "epoch": 3.19, "learning_rate": 2.098655454844829e-06, "loss": 0.0022, "step": 13072 }, { "epoch": 3.19, "learning_rate": 2.0962374459841982e-06, "loss": 0.001, "step": 13074 }, { "epoch": 3.19, "learning_rate": 2.0938206677998328e-06, "loss": 0.0033, "step": 13076 }, { "epoch": 3.19, "learning_rate": 2.091405120668045e-06, "loss": 0.001, "step": 13078 }, { "epoch": 3.19, "learning_rate": 2.0889908049649543e-06, "loss": 0.0053, "step": 13080 }, { "epoch": 3.19, "learning_rate": 2.0865777210664904e-06, "loss": 0.0048, "step": 13082 }, { "epoch": 3.19, "learning_rate": 2.084165869348387e-06, "loss": 0.0019, "step": 13084 }, { "epoch": 3.19, "learning_rate": 2.0817552501861836e-06, "loss": 0.003, "step": 13086 }, { "epoch": 3.19, "learning_rate": 2.0793458639552365e-06, "loss": 0.004, "step": 13088 }, { "epoch": 3.19, "learning_rate": 2.0769377110307076e-06, "loss": 0.0057, "step": 13090 }, { "epoch": 3.19, "learning_rate": 2.074530791787559e-06, "loss": 0.0038, "step": 13092 }, { "epoch": 3.19, "learning_rate": 2.07212510660057e-06, "loss": 0.0034, "step": 13094 }, { "epoch": 3.19, "learning_rate": 2.069720655844325e-06, "loss": 0.0028, "step": 13096 }, { "epoch": 3.19, "learning_rate": 2.067317439893215e-06, "loss": 0.0033, "step": 13098 }, { "epoch": 3.19, "learning_rate": 2.064915459121435e-06, "loss": 0.0045, "step": 13100 }, { "epoch": 3.19, "learning_rate": 2.0625147139029956e-06, "loss": 0.0027, "step": 13102 }, { "epoch": 3.19, "learning_rate": 2.06011520461171e-06, "loss": 0.0017, "step": 13104 }, { "epoch": 3.19, "learning_rate": 2.0577169316212033e-06, "loss": 0.0024, "step": 13106 }, { "epoch": 3.19, "learning_rate": 2.055319895304899e-06, "loss": 0.0007, "step": 13108 }, { "epoch": 3.19, "learning_rate": 2.0529240960360396e-06, "loss": 0.0081, "step": 13110 }, { "epoch": 3.19, "learning_rate": 2.050529534187665e-06, "loss": 0.0029, "step": 13112 }, { "epoch": 3.2, "learning_rate": 2.048136210132632e-06, "loss": 0.0007, "step": 13114 }, { "epoch": 3.2, "learning_rate": 2.045744124243593e-06, "loss": 0.0005, "step": 13116 }, { "epoch": 3.2, "learning_rate": 2.0433532768930176e-06, "loss": 0.0028, "step": 13118 }, { "epoch": 3.2, "learning_rate": 2.0409636684531774e-06, "loss": 0.0018, "step": 13120 }, { "epoch": 3.2, "learning_rate": 2.0385752992961584e-06, "loss": 0.0033, "step": 13122 }, { "epoch": 3.2, "learning_rate": 2.03618816979384e-06, "loss": 0.0014, "step": 13124 }, { "epoch": 3.2, "learning_rate": 2.0338022803179225e-06, "loss": 0.0038, "step": 13126 }, { "epoch": 3.2, "learning_rate": 2.0314176312399024e-06, "loss": 0.0038, "step": 13128 }, { "epoch": 3.2, "learning_rate": 2.0290342229310923e-06, "loss": 0.0029, "step": 13130 }, { "epoch": 3.2, "learning_rate": 2.0266520557626022e-06, "loss": 0.0027, "step": 13132 }, { "epoch": 3.2, "learning_rate": 2.0242711301053573e-06, "loss": 0.0038, "step": 13134 }, { "epoch": 3.2, "learning_rate": 2.0218914463300843e-06, "loss": 0.0047, "step": 13136 }, { "epoch": 3.2, "learning_rate": 2.019513004807322e-06, "loss": 0.003, "step": 13138 }, { "epoch": 3.2, "learning_rate": 2.0171358059074085e-06, "loss": 0.0044, "step": 13140 }, { "epoch": 3.2, "learning_rate": 2.0147598500004904e-06, "loss": 0.0014, "step": 13142 }, { "epoch": 3.2, "learning_rate": 2.012385137456523e-06, "loss": 0.0049, "step": 13144 }, { "epoch": 3.2, "learning_rate": 2.0100116686452708e-06, "loss": 0.0033, "step": 13146 }, { "epoch": 3.2, "learning_rate": 2.0076394439362968e-06, "loss": 0.0015, "step": 13148 }, { "epoch": 3.2, "learning_rate": 2.005268463698975e-06, "loss": 0.0034, "step": 13150 }, { "epoch": 3.2, "learning_rate": 2.0028987283024902e-06, "loss": 0.0003, "step": 13152 }, { "epoch": 3.21, "learning_rate": 2.0005302381158232e-06, "loss": 0.0015, "step": 13154 }, { "epoch": 3.21, "learning_rate": 1.9981629935077663e-06, "loss": 0.0004, "step": 13156 }, { "epoch": 3.21, "learning_rate": 1.995796994846917e-06, "loss": 0.0046, "step": 13158 }, { "epoch": 3.21, "learning_rate": 1.9934322425016816e-06, "loss": 0.0025, "step": 13160 }, { "epoch": 3.21, "learning_rate": 1.9910687368402715e-06, "loss": 0.0041, "step": 13162 }, { "epoch": 3.21, "learning_rate": 1.9887064782306984e-06, "loss": 0.0049, "step": 13164 }, { "epoch": 3.21, "learning_rate": 1.986345467040789e-06, "loss": 0.001, "step": 13166 }, { "epoch": 3.21, "learning_rate": 1.983985703638166e-06, "loss": 0.0052, "step": 13168 }, { "epoch": 3.21, "learning_rate": 1.981627188390266e-06, "loss": 0.0017, "step": 13170 }, { "epoch": 3.21, "learning_rate": 1.979269921664324e-06, "loss": 0.0027, "step": 13172 }, { "epoch": 3.21, "learning_rate": 1.9769139038273875e-06, "loss": 0.0038, "step": 13174 }, { "epoch": 3.21, "learning_rate": 1.9745591352463055e-06, "loss": 0.0023, "step": 13176 }, { "epoch": 3.21, "learning_rate": 1.9722056162877367e-06, "loss": 0.0024, "step": 13178 }, { "epoch": 3.21, "learning_rate": 1.969853347318137e-06, "loss": 0.0015, "step": 13180 }, { "epoch": 3.21, "learning_rate": 1.9675023287037766e-06, "loss": 0.0046, "step": 13182 }, { "epoch": 3.21, "learning_rate": 1.9651525608107237e-06, "loss": 0.0028, "step": 13184 }, { "epoch": 3.21, "learning_rate": 1.962804044004859e-06, "loss": 0.003, "step": 13186 }, { "epoch": 3.21, "learning_rate": 1.960456778651859e-06, "loss": 0.0027, "step": 13188 }, { "epoch": 3.21, "learning_rate": 1.958110765117215e-06, "loss": 0.0009, "step": 13190 }, { "epoch": 3.21, "learning_rate": 1.9557660037662175e-06, "loss": 0.0037, "step": 13192 }, { "epoch": 3.21, "learning_rate": 1.953422494963968e-06, "loss": 0.0019, "step": 13194 }, { "epoch": 3.22, "learning_rate": 1.951080239075366e-06, "loss": 0.0022, "step": 13196 }, { "epoch": 3.22, "learning_rate": 1.9487392364651135e-06, "loss": 0.0007, "step": 13198 }, { "epoch": 3.22, "learning_rate": 1.946399487497729e-06, "loss": 0.0047, "step": 13200 }, { "epoch": 3.22, "learning_rate": 1.9440609925375285e-06, "loss": 0.0057, "step": 13202 }, { "epoch": 3.22, "learning_rate": 1.941723751948631e-06, "loss": 0.0044, "step": 13204 }, { "epoch": 3.22, "learning_rate": 1.9393877660949646e-06, "loss": 0.0037, "step": 13206 }, { "epoch": 3.22, "learning_rate": 1.9370530353402604e-06, "loss": 0.0029, "step": 13208 }, { "epoch": 3.22, "learning_rate": 1.9347195600480552e-06, "loss": 0.0023, "step": 13210 }, { "epoch": 3.22, "learning_rate": 1.9323873405816827e-06, "loss": 0.0037, "step": 13212 }, { "epoch": 3.22, "learning_rate": 1.9300563773042924e-06, "loss": 0.0056, "step": 13214 }, { "epoch": 3.22, "learning_rate": 1.927726670578831e-06, "loss": 0.0034, "step": 13216 }, { "epoch": 3.22, "learning_rate": 1.9253982207680566e-06, "loss": 0.003, "step": 13218 }, { "epoch": 3.22, "learning_rate": 1.92307102823452e-06, "loss": 0.0011, "step": 13220 }, { "epoch": 3.22, "learning_rate": 1.9207450933405857e-06, "loss": 0.0019, "step": 13222 }, { "epoch": 3.22, "learning_rate": 1.9184204164484223e-06, "loss": 0.0046, "step": 13224 }, { "epoch": 3.22, "learning_rate": 1.9160969979199974e-06, "loss": 0.0014, "step": 13226 }, { "epoch": 3.22, "learning_rate": 1.9137748381170818e-06, "loss": 0.0004, "step": 13228 }, { "epoch": 3.22, "learning_rate": 1.9114539374012564e-06, "loss": 0.0013, "step": 13230 }, { "epoch": 3.22, "learning_rate": 1.909134296133903e-06, "loss": 0.0043, "step": 13232 }, { "epoch": 3.22, "learning_rate": 1.906815914676211e-06, "loss": 0.0046, "step": 13234 }, { "epoch": 3.23, "learning_rate": 1.9044987933891635e-06, "loss": 0.0021, "step": 13236 }, { "epoch": 3.23, "learning_rate": 1.9021829326335606e-06, "loss": 0.0011, "step": 13238 }, { "epoch": 3.23, "learning_rate": 1.8998683327699929e-06, "loss": 0.002, "step": 13240 }, { "epoch": 3.23, "learning_rate": 1.8975549941588679e-06, "loss": 0.0022, "step": 13242 }, { "epoch": 3.23, "learning_rate": 1.8952429171603849e-06, "loss": 0.0015, "step": 13244 }, { "epoch": 3.23, "learning_rate": 1.8929321021345526e-06, "loss": 0.0016, "step": 13246 }, { "epoch": 3.23, "learning_rate": 1.8906225494411844e-06, "loss": 0.0064, "step": 13248 }, { "epoch": 3.23, "learning_rate": 1.8883142594398974e-06, "loss": 0.0023, "step": 13250 }, { "epoch": 3.23, "learning_rate": 1.8860072324901079e-06, "loss": 0.0027, "step": 13252 }, { "epoch": 3.23, "learning_rate": 1.8837014689510337e-06, "loss": 0.0054, "step": 13254 }, { "epoch": 3.23, "learning_rate": 1.8813969691817047e-06, "loss": 0.0017, "step": 13256 }, { "epoch": 3.23, "learning_rate": 1.8790937335409487e-06, "loss": 0.0049, "step": 13258 }, { "epoch": 3.23, "learning_rate": 1.8767917623873943e-06, "loss": 0.0014, "step": 13260 }, { "epoch": 3.23, "learning_rate": 1.8744910560794783e-06, "loss": 0.005, "step": 13262 }, { "epoch": 3.23, "learning_rate": 1.8721916149754392e-06, "loss": 0.0017, "step": 13264 }, { "epoch": 3.23, "learning_rate": 1.8698934394333146e-06, "loss": 0.0019, "step": 13266 }, { "epoch": 3.23, "learning_rate": 1.8675965298109522e-06, "loss": 0.004, "step": 13268 }, { "epoch": 3.23, "learning_rate": 1.8653008864659926e-06, "loss": 0.0031, "step": 13270 }, { "epoch": 3.23, "learning_rate": 1.863006509755888e-06, "loss": 0.004, "step": 13272 }, { "epoch": 3.23, "learning_rate": 1.8607134000378945e-06, "loss": 0.0057, "step": 13274 }, { "epoch": 3.23, "learning_rate": 1.858421557669059e-06, "loss": 0.0022, "step": 13276 }, { "epoch": 3.24, "learning_rate": 1.8561309830062434e-06, "loss": 0.0038, "step": 13278 }, { "epoch": 3.24, "learning_rate": 1.8538416764061097e-06, "loss": 0.0013, "step": 13280 }, { "epoch": 3.24, "learning_rate": 1.8515536382251176e-06, "loss": 0.0009, "step": 13282 }, { "epoch": 3.24, "learning_rate": 1.8492668688195293e-06, "loss": 0.0024, "step": 13284 }, { "epoch": 3.24, "learning_rate": 1.8469813685454164e-06, "loss": 0.0017, "step": 13286 }, { "epoch": 3.24, "learning_rate": 1.8446971377586497e-06, "loss": 0.0015, "step": 13288 }, { "epoch": 3.24, "learning_rate": 1.8424141768148962e-06, "loss": 0.0021, "step": 13290 }, { "epoch": 3.24, "learning_rate": 1.8401324860696347e-06, "loss": 0.0043, "step": 13292 }, { "epoch": 3.24, "learning_rate": 1.8378520658781429e-06, "loss": 0.0035, "step": 13294 }, { "epoch": 3.24, "learning_rate": 1.8355729165954973e-06, "loss": 0.0052, "step": 13296 }, { "epoch": 3.24, "learning_rate": 1.833295038576577e-06, "loss": 0.0015, "step": 13298 }, { "epoch": 3.24, "learning_rate": 1.831018432176066e-06, "loss": 0.0028, "step": 13300 }, { "epoch": 3.24, "learning_rate": 1.8287430977484522e-06, "loss": 0.0016, "step": 13302 }, { "epoch": 3.24, "learning_rate": 1.826469035648022e-06, "loss": 0.0084, "step": 13304 }, { "epoch": 3.24, "learning_rate": 1.8241962462288609e-06, "loss": 0.0057, "step": 13306 }, { "epoch": 3.24, "learning_rate": 1.821924729844865e-06, "loss": 0.0027, "step": 13308 }, { "epoch": 3.24, "learning_rate": 1.8196544868497201e-06, "loss": 0.0024, "step": 13310 }, { "epoch": 3.24, "learning_rate": 1.8173855175969268e-06, "loss": 0.0045, "step": 13312 }, { "epoch": 3.24, "learning_rate": 1.8151178224397758e-06, "loss": 0.0042, "step": 13314 }, { "epoch": 3.24, "learning_rate": 1.812851401731367e-06, "loss": 0.0047, "step": 13316 }, { "epoch": 3.25, "learning_rate": 1.8105862558245979e-06, "loss": 0.0032, "step": 13318 }, { "epoch": 3.25, "learning_rate": 1.8083223850721743e-06, "loss": 0.0015, "step": 13320 }, { "epoch": 3.25, "learning_rate": 1.8060597898265907e-06, "loss": 0.0009, "step": 13322 }, { "epoch": 3.25, "learning_rate": 1.8037984704401579e-06, "loss": 0.0023, "step": 13324 }, { "epoch": 3.25, "learning_rate": 1.8015384272649726e-06, "loss": 0.0023, "step": 13326 }, { "epoch": 3.25, "learning_rate": 1.7992796606529483e-06, "loss": 0.0031, "step": 13328 }, { "epoch": 3.25, "learning_rate": 1.7970221709557868e-06, "loss": 0.0016, "step": 13330 }, { "epoch": 3.25, "learning_rate": 1.7947659585249977e-06, "loss": 0.001, "step": 13332 }, { "epoch": 3.25, "learning_rate": 1.7925110237118914e-06, "loss": 0.0024, "step": 13334 }, { "epoch": 3.25, "learning_rate": 1.7902573668675815e-06, "loss": 0.0025, "step": 13336 }, { "epoch": 3.25, "learning_rate": 1.7880049883429774e-06, "loss": 0.0026, "step": 13338 }, { "epoch": 3.25, "learning_rate": 1.785753888488787e-06, "loss": 0.0028, "step": 13340 }, { "epoch": 3.25, "learning_rate": 1.783504067655528e-06, "loss": 0.0034, "step": 13342 }, { "epoch": 3.25, "learning_rate": 1.7812555261935172e-06, "loss": 0.003, "step": 13344 }, { "epoch": 3.25, "learning_rate": 1.779008264452864e-06, "loss": 0.0033, "step": 13346 }, { "epoch": 3.25, "learning_rate": 1.7767622827834873e-06, "loss": 0.0065, "step": 13348 }, { "epoch": 3.25, "learning_rate": 1.7745175815351057e-06, "loss": 0.0006, "step": 13350 }, { "epoch": 3.25, "learning_rate": 1.7722741610572348e-06, "loss": 0.0037, "step": 13352 }, { "epoch": 3.25, "learning_rate": 1.7700320216991874e-06, "loss": 0.0033, "step": 13354 }, { "epoch": 3.25, "learning_rate": 1.7677911638100876e-06, "loss": 0.0062, "step": 13356 }, { "epoch": 3.25, "learning_rate": 1.765551587738852e-06, "loss": 0.0038, "step": 13358 }, { "epoch": 3.26, "learning_rate": 1.7633132938342023e-06, "loss": 0.003, "step": 13360 }, { "epoch": 3.26, "learning_rate": 1.7610762824446537e-06, "loss": 0.0017, "step": 13362 }, { "epoch": 3.26, "learning_rate": 1.7588405539185304e-06, "loss": 0.0036, "step": 13364 }, { "epoch": 3.26, "learning_rate": 1.7566061086039476e-06, "loss": 0.0031, "step": 13366 }, { "epoch": 3.26, "learning_rate": 1.7543729468488301e-06, "loss": 0.0061, "step": 13368 }, { "epoch": 3.26, "learning_rate": 1.7521410690008944e-06, "loss": 0.0027, "step": 13370 }, { "epoch": 3.26, "learning_rate": 1.7499104754076623e-06, "loss": 0.0007, "step": 13372 }, { "epoch": 3.26, "learning_rate": 1.747681166416455e-06, "loss": 0.0032, "step": 13374 }, { "epoch": 3.26, "learning_rate": 1.7454531423743948e-06, "loss": 0.0012, "step": 13376 }, { "epoch": 3.26, "learning_rate": 1.7432264036283986e-06, "loss": 0.0028, "step": 13378 }, { "epoch": 3.26, "learning_rate": 1.741000950525189e-06, "loss": 0.001, "step": 13380 }, { "epoch": 3.26, "learning_rate": 1.7387767834112833e-06, "loss": 0.0023, "step": 13382 }, { "epoch": 3.26, "learning_rate": 1.7365539026330058e-06, "loss": 0.0067, "step": 13384 }, { "epoch": 3.26, "learning_rate": 1.7343323085364717e-06, "loss": 0.0049, "step": 13386 }, { "epoch": 3.26, "learning_rate": 1.7321120014676006e-06, "loss": 0.0043, "step": 13388 }, { "epoch": 3.26, "learning_rate": 1.7298929817721122e-06, "loss": 0.002, "step": 13390 }, { "epoch": 3.26, "learning_rate": 1.7276752497955286e-06, "loss": 0.0031, "step": 13392 }, { "epoch": 3.26, "learning_rate": 1.7254588058831633e-06, "loss": 0.0047, "step": 13394 }, { "epoch": 3.26, "learning_rate": 1.7232436503801332e-06, "loss": 0.0022, "step": 13396 }, { "epoch": 3.26, "learning_rate": 1.721029783631355e-06, "loss": 0.0018, "step": 13398 }, { "epoch": 3.27, "learning_rate": 1.718817205981549e-06, "loss": 0.004, "step": 13400 }, { "epoch": 3.27, "learning_rate": 1.7166059177752249e-06, "loss": 0.0045, "step": 13402 }, { "epoch": 3.27, "learning_rate": 1.7143959193566995e-06, "loss": 0.0024, "step": 13404 }, { "epoch": 3.27, "learning_rate": 1.7121872110700888e-06, "loss": 0.002, "step": 13406 }, { "epoch": 3.27, "learning_rate": 1.7099797932593043e-06, "loss": 0.0031, "step": 13408 }, { "epoch": 3.27, "learning_rate": 1.7077736662680533e-06, "loss": 0.002, "step": 13410 }, { "epoch": 3.27, "learning_rate": 1.705568830439851e-06, "loss": 0.0057, "step": 13412 }, { "epoch": 3.27, "learning_rate": 1.703365286118006e-06, "loss": 0.0038, "step": 13414 }, { "epoch": 3.27, "learning_rate": 1.7011630336456296e-06, "loss": 0.0031, "step": 13416 }, { "epoch": 3.27, "learning_rate": 1.6989620733656253e-06, "loss": 0.0039, "step": 13418 }, { "epoch": 3.27, "learning_rate": 1.696762405620701e-06, "loss": 0.0052, "step": 13420 }, { "epoch": 3.27, "learning_rate": 1.694564030753365e-06, "loss": 0.0029, "step": 13422 }, { "epoch": 3.27, "learning_rate": 1.692366949105918e-06, "loss": 0.0063, "step": 13424 }, { "epoch": 3.27, "learning_rate": 1.6901711610204597e-06, "loss": 0.0019, "step": 13426 }, { "epoch": 3.27, "learning_rate": 1.6879766668388943e-06, "loss": 0.0046, "step": 13428 }, { "epoch": 3.27, "learning_rate": 1.6857834669029216e-06, "loss": 0.0026, "step": 13430 }, { "epoch": 3.27, "learning_rate": 1.683591561554041e-06, "loss": 0.0025, "step": 13432 }, { "epoch": 3.27, "learning_rate": 1.681400951133544e-06, "loss": 0.0028, "step": 13434 }, { "epoch": 3.27, "learning_rate": 1.6792116359825317e-06, "loss": 0.002, "step": 13436 }, { "epoch": 3.27, "learning_rate": 1.6770236164418906e-06, "loss": 0.0028, "step": 13438 }, { "epoch": 3.27, "learning_rate": 1.6748368928523174e-06, "loss": 0.0017, "step": 13440 }, { "epoch": 3.28, "learning_rate": 1.672651465554298e-06, "loss": 0.0086, "step": 13442 }, { "epoch": 3.28, "learning_rate": 1.6704673348881217e-06, "loss": 0.0029, "step": 13444 }, { "epoch": 3.28, "learning_rate": 1.6682845011938742e-06, "loss": 0.0024, "step": 13446 }, { "epoch": 3.28, "learning_rate": 1.6661029648114425e-06, "loss": 0.0078, "step": 13448 }, { "epoch": 3.28, "learning_rate": 1.6639227260805047e-06, "loss": 0.0035, "step": 13450 }, { "epoch": 3.28, "learning_rate": 1.6617437853405395e-06, "loss": 0.009, "step": 13452 }, { "epoch": 3.28, "learning_rate": 1.6595661429308273e-06, "loss": 0.0015, "step": 13454 }, { "epoch": 3.28, "learning_rate": 1.657389799190444e-06, "loss": 0.0021, "step": 13456 }, { "epoch": 3.28, "learning_rate": 1.655214754458261e-06, "loss": 0.0019, "step": 13458 }, { "epoch": 3.28, "learning_rate": 1.6530410090729498e-06, "loss": 0.001, "step": 13460 }, { "epoch": 3.28, "learning_rate": 1.6508685633729826e-06, "loss": 0.0029, "step": 13462 }, { "epoch": 3.28, "learning_rate": 1.648697417696622e-06, "loss": 0.0033, "step": 13464 }, { "epoch": 3.28, "learning_rate": 1.646527572381932e-06, "loss": 0.0022, "step": 13466 }, { "epoch": 3.28, "learning_rate": 1.6443590277667743e-06, "loss": 0.0009, "step": 13468 }, { "epoch": 3.28, "learning_rate": 1.642191784188808e-06, "loss": 0.0063, "step": 13470 }, { "epoch": 3.28, "learning_rate": 1.640025841985493e-06, "loss": 0.0035, "step": 13472 }, { "epoch": 3.28, "learning_rate": 1.6378612014940788e-06, "loss": 0.0015, "step": 13474 }, { "epoch": 3.28, "learning_rate": 1.6356978630516163e-06, "loss": 0.002, "step": 13476 }, { "epoch": 3.28, "learning_rate": 1.6335358269949586e-06, "loss": 0.0026, "step": 13478 }, { "epoch": 3.28, "learning_rate": 1.6313750936607487e-06, "loss": 0.0035, "step": 13480 }, { "epoch": 3.29, "learning_rate": 1.629215663385425e-06, "loss": 0.0034, "step": 13482 }, { "epoch": 3.29, "learning_rate": 1.627057536505232e-06, "loss": 0.0031, "step": 13484 }, { "epoch": 3.29, "learning_rate": 1.6249007133562056e-06, "loss": 0.0027, "step": 13486 }, { "epoch": 3.29, "learning_rate": 1.6227451942741812e-06, "loss": 0.0017, "step": 13488 }, { "epoch": 3.29, "learning_rate": 1.620590979594786e-06, "loss": 0.0009, "step": 13490 }, { "epoch": 3.29, "learning_rate": 1.6184380696534518e-06, "loss": 0.0023, "step": 13492 }, { "epoch": 3.29, "learning_rate": 1.6162864647853993e-06, "loss": 0.003, "step": 13494 }, { "epoch": 3.29, "learning_rate": 1.6141361653256538e-06, "loss": 0.0048, "step": 13496 }, { "epoch": 3.29, "learning_rate": 1.6119871716090285e-06, "loss": 0.0023, "step": 13498 }, { "epoch": 3.29, "learning_rate": 1.6098394839701403e-06, "loss": 0.0007, "step": 13500 }, { "epoch": 3.29, "learning_rate": 1.6076931027434017e-06, "loss": 0.0035, "step": 13502 }, { "epoch": 3.29, "learning_rate": 1.605548028263022e-06, "loss": 0.006, "step": 13504 }, { "epoch": 3.29, "learning_rate": 1.6034042608630041e-06, "loss": 0.0029, "step": 13506 }, { "epoch": 3.29, "learning_rate": 1.6012618008771464e-06, "loss": 0.0013, "step": 13508 }, { "epoch": 3.29, "learning_rate": 1.5991206486390487e-06, "loss": 0.0018, "step": 13510 }, { "epoch": 3.29, "learning_rate": 1.5969808044821068e-06, "loss": 0.0033, "step": 13512 }, { "epoch": 3.29, "learning_rate": 1.594842268739506e-06, "loss": 0.0023, "step": 13514 }, { "epoch": 3.29, "learning_rate": 1.5927050417442347e-06, "loss": 0.0042, "step": 13516 }, { "epoch": 3.29, "learning_rate": 1.5905691238290788e-06, "loss": 0.0019, "step": 13518 }, { "epoch": 3.29, "learning_rate": 1.5884345153266124e-06, "loss": 0.0035, "step": 13520 }, { "epoch": 3.29, "learning_rate": 1.586301216569215e-06, "loss": 0.0004, "step": 13522 }, { "epoch": 3.3, "learning_rate": 1.584169227889052e-06, "loss": 0.0015, "step": 13524 }, { "epoch": 3.3, "learning_rate": 1.5820385496180923e-06, "loss": 0.001, "step": 13526 }, { "epoch": 3.3, "learning_rate": 1.579909182088103e-06, "loss": 0.002, "step": 13528 }, { "epoch": 3.3, "learning_rate": 1.5777811256306374e-06, "loss": 0.0024, "step": 13530 }, { "epoch": 3.3, "learning_rate": 1.575654380577052e-06, "loss": 0.0031, "step": 13532 }, { "epoch": 3.3, "learning_rate": 1.5735289472584991e-06, "loss": 0.0017, "step": 13534 }, { "epoch": 3.3, "learning_rate": 1.5714048260059244e-06, "loss": 0.0048, "step": 13536 }, { "epoch": 3.3, "learning_rate": 1.5692820171500655e-06, "loss": 0.0022, "step": 13538 }, { "epoch": 3.3, "learning_rate": 1.5671605210214647e-06, "loss": 0.0041, "step": 13540 }, { "epoch": 3.3, "learning_rate": 1.565040337950453e-06, "loss": 0.0036, "step": 13542 }, { "epoch": 3.3, "learning_rate": 1.5629214682671623e-06, "loss": 0.003, "step": 13544 }, { "epoch": 3.3, "learning_rate": 1.5608039123015117e-06, "loss": 0.0065, "step": 13546 }, { "epoch": 3.3, "learning_rate": 1.5586876703832254e-06, "loss": 0.0017, "step": 13548 }, { "epoch": 3.3, "learning_rate": 1.5565727428418153e-06, "loss": 0.0058, "step": 13550 }, { "epoch": 3.3, "learning_rate": 1.5544591300065947e-06, "loss": 0.0019, "step": 13552 }, { "epoch": 3.3, "learning_rate": 1.5523468322066659e-06, "loss": 0.0014, "step": 13554 }, { "epoch": 3.3, "learning_rate": 1.5502358497709314e-06, "loss": 0.001, "step": 13556 }, { "epoch": 3.3, "learning_rate": 1.548126183028088e-06, "loss": 0.0019, "step": 13558 }, { "epoch": 3.3, "learning_rate": 1.5460178323066289e-06, "loss": 0.0019, "step": 13560 }, { "epoch": 3.3, "learning_rate": 1.5439107979348366e-06, "loss": 0.003, "step": 13562 }, { "epoch": 3.31, "learning_rate": 1.5418050802407924e-06, "loss": 0.0033, "step": 13564 }, { "epoch": 3.31, "learning_rate": 1.539700679552374e-06, "loss": 0.0066, "step": 13566 }, { "epoch": 3.31, "learning_rate": 1.5375975961972556e-06, "loss": 0.002, "step": 13568 }, { "epoch": 3.31, "learning_rate": 1.5354958305028967e-06, "loss": 0.0021, "step": 13570 }, { "epoch": 3.31, "learning_rate": 1.5333953827965631e-06, "loss": 0.0009, "step": 13572 }, { "epoch": 3.31, "learning_rate": 1.53129625340531e-06, "loss": 0.0019, "step": 13574 }, { "epoch": 3.31, "learning_rate": 1.529198442655989e-06, "loss": 0.0045, "step": 13576 }, { "epoch": 3.31, "learning_rate": 1.5271019508752438e-06, "loss": 0.0059, "step": 13578 }, { "epoch": 3.31, "learning_rate": 1.5250067783895128e-06, "loss": 0.0022, "step": 13580 }, { "epoch": 3.31, "learning_rate": 1.5229129255250309e-06, "loss": 0.0029, "step": 13582 }, { "epoch": 3.31, "learning_rate": 1.5208203926078302e-06, "loss": 0.0018, "step": 13584 }, { "epoch": 3.31, "learning_rate": 1.5187291799637293e-06, "loss": 0.0016, "step": 13586 }, { "epoch": 3.31, "learning_rate": 1.516639287918349e-06, "loss": 0.0023, "step": 13588 }, { "epoch": 3.31, "learning_rate": 1.5145507167971019e-06, "loss": 0.0016, "step": 13590 }, { "epoch": 3.31, "learning_rate": 1.5124634669251948e-06, "loss": 0.0029, "step": 13592 }, { "epoch": 3.31, "learning_rate": 1.5103775386276243e-06, "loss": 0.0035, "step": 13594 }, { "epoch": 3.31, "learning_rate": 1.5082929322291883e-06, "loss": 0.007, "step": 13596 }, { "epoch": 3.31, "learning_rate": 1.5062096480544752e-06, "loss": 0.0042, "step": 13598 }, { "epoch": 3.31, "learning_rate": 1.504127686427872e-06, "loss": 0.0012, "step": 13600 }, { "epoch": 3.31, "learning_rate": 1.5020470476735505e-06, "loss": 0.0028, "step": 13602 }, { "epoch": 3.31, "learning_rate": 1.499967732115487e-06, "loss": 0.0026, "step": 13604 }, { "epoch": 3.32, "learning_rate": 1.4978897400774416e-06, "loss": 0.0026, "step": 13606 }, { "epoch": 3.32, "learning_rate": 1.49581307188298e-06, "loss": 0.0045, "step": 13608 }, { "epoch": 3.32, "learning_rate": 1.4937377278554477e-06, "loss": 0.002, "step": 13610 }, { "epoch": 3.32, "learning_rate": 1.4916637083179964e-06, "loss": 0.0037, "step": 13612 }, { "epoch": 3.32, "learning_rate": 1.4895910135935666e-06, "loss": 0.0037, "step": 13614 }, { "epoch": 3.32, "learning_rate": 1.4875196440048934e-06, "loss": 0.0022, "step": 13616 }, { "epoch": 3.32, "learning_rate": 1.4854495998745044e-06, "loss": 0.0038, "step": 13618 }, { "epoch": 3.32, "learning_rate": 1.4833808815247175e-06, "loss": 0.0003, "step": 13620 }, { "epoch": 3.32, "learning_rate": 1.4813134892776504e-06, "loss": 0.0049, "step": 13622 }, { "epoch": 3.32, "learning_rate": 1.4792474234552156e-06, "loss": 0.0026, "step": 13624 }, { "epoch": 3.32, "learning_rate": 1.4771826843791104e-06, "loss": 0.0022, "step": 13626 }, { "epoch": 3.32, "learning_rate": 1.4751192723708318e-06, "loss": 0.0038, "step": 13628 }, { "epoch": 3.32, "learning_rate": 1.4730571877516686e-06, "loss": 0.0022, "step": 13630 }, { "epoch": 3.32, "learning_rate": 1.470996430842707e-06, "loss": 0.0023, "step": 13632 }, { "epoch": 3.32, "learning_rate": 1.4689370019648198e-06, "loss": 0.0022, "step": 13634 }, { "epoch": 3.32, "learning_rate": 1.466878901438672e-06, "loss": 0.0081, "step": 13636 }, { "epoch": 3.32, "learning_rate": 1.4648221295847298e-06, "loss": 0.0027, "step": 13638 }, { "epoch": 3.32, "learning_rate": 1.4627666867232493e-06, "loss": 0.0039, "step": 13640 }, { "epoch": 3.32, "learning_rate": 1.4607125731742756e-06, "loss": 0.004, "step": 13642 }, { "epoch": 3.32, "learning_rate": 1.4586597892576503e-06, "loss": 0.0007, "step": 13644 }, { "epoch": 3.33, "learning_rate": 1.4566083352930105e-06, "loss": 0.0021, "step": 13646 }, { "epoch": 3.33, "learning_rate": 1.4545582115997825e-06, "loss": 0.0021, "step": 13648 }, { "epoch": 3.33, "learning_rate": 1.4525094184971812e-06, "loss": 0.0016, "step": 13650 }, { "epoch": 3.33, "learning_rate": 1.4504619563042244e-06, "loss": 0.003, "step": 13652 }, { "epoch": 3.33, "learning_rate": 1.4484158253397152e-06, "loss": 0.0026, "step": 13654 }, { "epoch": 3.33, "learning_rate": 1.4463710259222563e-06, "loss": 0.0022, "step": 13656 }, { "epoch": 3.33, "learning_rate": 1.444327558370232e-06, "loss": 0.0023, "step": 13658 }, { "epoch": 3.33, "learning_rate": 1.4422854230018324e-06, "loss": 0.0014, "step": 13660 }, { "epoch": 3.33, "learning_rate": 1.4402446201350273e-06, "loss": 0.0023, "step": 13662 }, { "epoch": 3.33, "learning_rate": 1.4382051500875916e-06, "loss": 0.0005, "step": 13664 }, { "epoch": 3.33, "learning_rate": 1.43616701317708e-06, "loss": 0.0012, "step": 13666 }, { "epoch": 3.33, "learning_rate": 1.4341302097208487e-06, "loss": 0.0032, "step": 13668 }, { "epoch": 3.33, "learning_rate": 1.432094740036044e-06, "loss": 0.0024, "step": 13670 }, { "epoch": 3.33, "learning_rate": 1.430060604439607e-06, "loss": 0.0031, "step": 13672 }, { "epoch": 3.33, "learning_rate": 1.4280278032482631e-06, "loss": 0.0025, "step": 13674 }, { "epoch": 3.33, "learning_rate": 1.425996336778539e-06, "loss": 0.0042, "step": 13676 }, { "epoch": 3.33, "learning_rate": 1.4239662053467452e-06, "loss": 0.0035, "step": 13678 }, { "epoch": 3.33, "learning_rate": 1.4219374092689941e-06, "loss": 0.0018, "step": 13680 }, { "epoch": 3.33, "learning_rate": 1.4199099488611789e-06, "loss": 0.0023, "step": 13682 }, { "epoch": 3.33, "learning_rate": 1.4178838244389937e-06, "loss": 0.0024, "step": 13684 }, { "epoch": 3.33, "learning_rate": 1.4158590363179215e-06, "loss": 0.0008, "step": 13686 }, { "epoch": 3.34, "learning_rate": 1.4138355848132402e-06, "loss": 0.0013, "step": 13688 }, { "epoch": 3.34, "learning_rate": 1.4118134702400133e-06, "loss": 0.0041, "step": 13690 }, { "epoch": 3.34, "learning_rate": 1.409792692913098e-06, "loss": 0.0016, "step": 13692 }, { "epoch": 3.34, "learning_rate": 1.4077732531471455e-06, "loss": 0.0044, "step": 13694 }, { "epoch": 3.34, "learning_rate": 1.4057551512566025e-06, "loss": 0.0028, "step": 13696 }, { "epoch": 3.34, "learning_rate": 1.4037383875556977e-06, "loss": 0.0016, "step": 13698 }, { "epoch": 3.34, "learning_rate": 1.4017229623584583e-06, "loss": 0.0017, "step": 13700 }, { "epoch": 3.34, "learning_rate": 1.3997088759787036e-06, "loss": 0.0043, "step": 13702 }, { "epoch": 3.34, "learning_rate": 1.3976961287300407e-06, "loss": 0.0031, "step": 13704 }, { "epoch": 3.34, "learning_rate": 1.3956847209258672e-06, "loss": 0.001, "step": 13706 }, { "epoch": 3.34, "learning_rate": 1.3936746528793765e-06, "loss": 0.0018, "step": 13708 }, { "epoch": 3.34, "learning_rate": 1.3916659249035514e-06, "loss": 0.0016, "step": 13710 }, { "epoch": 3.34, "learning_rate": 1.3896585373111703e-06, "loss": 0.0017, "step": 13712 }, { "epoch": 3.34, "learning_rate": 1.3876524904147915e-06, "loss": 0.0041, "step": 13714 }, { "epoch": 3.34, "learning_rate": 1.3856477845267791e-06, "loss": 0.0034, "step": 13716 }, { "epoch": 3.34, "learning_rate": 1.3836444199592746e-06, "loss": 0.0021, "step": 13718 }, { "epoch": 3.34, "learning_rate": 1.3816423970242232e-06, "loss": 0.0039, "step": 13720 }, { "epoch": 3.34, "learning_rate": 1.3796417160333485e-06, "loss": 0.0036, "step": 13722 }, { "epoch": 3.34, "learning_rate": 1.3776423772981772e-06, "loss": 0.0026, "step": 13724 }, { "epoch": 3.34, "learning_rate": 1.375644381130019e-06, "loss": 0.0033, "step": 13726 }, { "epoch": 3.35, "learning_rate": 1.3736477278399806e-06, "loss": 0.0031, "step": 13728 }, { "epoch": 3.35, "learning_rate": 1.3716524177389511e-06, "loss": 0.0072, "step": 13730 }, { "epoch": 3.35, "learning_rate": 1.36965845113762e-06, "loss": 0.0002, "step": 13732 }, { "epoch": 3.35, "learning_rate": 1.3676658283464595e-06, "loss": 0.003, "step": 13734 }, { "epoch": 3.35, "learning_rate": 1.3656745496757407e-06, "loss": 0.0004, "step": 13736 }, { "epoch": 3.35, "learning_rate": 1.3636846154355155e-06, "loss": 0.0021, "step": 13738 }, { "epoch": 3.35, "learning_rate": 1.3616960259356348e-06, "loss": 0.0004, "step": 13740 }, { "epoch": 3.35, "learning_rate": 1.3597087814857368e-06, "loss": 0.0023, "step": 13742 }, { "epoch": 3.35, "learning_rate": 1.3577228823952526e-06, "loss": 0.0011, "step": 13744 }, { "epoch": 3.35, "learning_rate": 1.3557383289734004e-06, "loss": 0.0063, "step": 13746 }, { "epoch": 3.35, "learning_rate": 1.353755121529189e-06, "loss": 0.0044, "step": 13748 }, { "epoch": 3.35, "learning_rate": 1.3517732603714183e-06, "loss": 0.0016, "step": 13750 }, { "epoch": 3.35, "learning_rate": 1.3497927458086836e-06, "loss": 0.0019, "step": 13752 }, { "epoch": 3.35, "learning_rate": 1.3478135781493617e-06, "loss": 0.0035, "step": 13754 }, { "epoch": 3.35, "learning_rate": 1.3458357577016257e-06, "loss": 0.002, "step": 13756 }, { "epoch": 3.35, "learning_rate": 1.34385928477344e-06, "loss": 0.0046, "step": 13758 }, { "epoch": 3.35, "learning_rate": 1.341884159672554e-06, "loss": 0.0054, "step": 13760 }, { "epoch": 3.35, "learning_rate": 1.3399103827065075e-06, "loss": 0.0049, "step": 13762 }, { "epoch": 3.35, "learning_rate": 1.3379379541826353e-06, "loss": 0.0014, "step": 13764 }, { "epoch": 3.35, "learning_rate": 1.3359668744080601e-06, "loss": 0.0022, "step": 13766 }, { "epoch": 3.35, "learning_rate": 1.3339971436896949e-06, "loss": 0.0058, "step": 13768 }, { "epoch": 3.36, "learning_rate": 1.3320287623342376e-06, "loss": 0.004, "step": 13770 }, { "epoch": 3.36, "learning_rate": 1.3300617306481833e-06, "loss": 0.0026, "step": 13772 }, { "epoch": 3.36, "learning_rate": 1.328096048937816e-06, "loss": 0.0038, "step": 13774 }, { "epoch": 3.36, "learning_rate": 1.3261317175092047e-06, "loss": 0.0023, "step": 13776 }, { "epoch": 3.36, "learning_rate": 1.32416873666821e-06, "loss": 0.0042, "step": 13778 }, { "epoch": 3.36, "learning_rate": 1.3222071067204822e-06, "loss": 0.0028, "step": 13780 }, { "epoch": 3.36, "learning_rate": 1.3202468279714653e-06, "loss": 0.0023, "step": 13782 }, { "epoch": 3.36, "learning_rate": 1.3182879007263906e-06, "loss": 0.0017, "step": 13784 }, { "epoch": 3.36, "learning_rate": 1.3163303252902727e-06, "loss": 0.0023, "step": 13786 }, { "epoch": 3.36, "learning_rate": 1.3143741019679268e-06, "loss": 0.0073, "step": 13788 }, { "epoch": 3.36, "learning_rate": 1.312419231063946e-06, "loss": 0.0066, "step": 13790 }, { "epoch": 3.36, "learning_rate": 1.3104657128827247e-06, "loss": 0.0048, "step": 13792 }, { "epoch": 3.36, "learning_rate": 1.308513547728436e-06, "loss": 0.0046, "step": 13794 }, { "epoch": 3.36, "learning_rate": 1.3065627359050491e-06, "loss": 0.0013, "step": 13796 }, { "epoch": 3.36, "learning_rate": 1.304613277716319e-06, "loss": 0.0047, "step": 13798 }, { "epoch": 3.36, "learning_rate": 1.3026651734657947e-06, "loss": 0.0014, "step": 13800 }, { "epoch": 3.36, "learning_rate": 1.3007184234568082e-06, "loss": 0.0037, "step": 13802 }, { "epoch": 3.36, "learning_rate": 1.2987730279924815e-06, "loss": 0.002, "step": 13804 }, { "epoch": 3.36, "learning_rate": 1.2968289873757311e-06, "loss": 0.001, "step": 13806 }, { "epoch": 3.36, "learning_rate": 1.294886301909256e-06, "loss": 0.0045, "step": 13808 }, { "epoch": 3.37, "learning_rate": 1.292944971895549e-06, "loss": 0.0028, "step": 13810 }, { "epoch": 3.37, "learning_rate": 1.2910049976368888e-06, "loss": 0.0025, "step": 13812 }, { "epoch": 3.37, "learning_rate": 1.2890663794353476e-06, "loss": 0.0028, "step": 13814 }, { "epoch": 3.37, "learning_rate": 1.2871291175927814e-06, "loss": 0.0024, "step": 13816 }, { "epoch": 3.37, "learning_rate": 1.2851932124108323e-06, "loss": 0.0019, "step": 13818 }, { "epoch": 3.37, "learning_rate": 1.2832586641909405e-06, "loss": 0.0041, "step": 13820 }, { "epoch": 3.37, "learning_rate": 1.2813254732343294e-06, "loss": 0.0023, "step": 13822 }, { "epoch": 3.37, "learning_rate": 1.2793936398420093e-06, "loss": 0.0036, "step": 13824 }, { "epoch": 3.37, "learning_rate": 1.2774631643147827e-06, "loss": 0.0027, "step": 13826 }, { "epoch": 3.37, "learning_rate": 1.2755340469532407e-06, "loss": 0.0007, "step": 13828 }, { "epoch": 3.37, "learning_rate": 1.2736062880577616e-06, "loss": 0.0019, "step": 13830 }, { "epoch": 3.37, "learning_rate": 1.2716798879285108e-06, "loss": 0.004, "step": 13832 }, { "epoch": 3.37, "learning_rate": 1.269754846865443e-06, "loss": 0.0036, "step": 13834 }, { "epoch": 3.37, "learning_rate": 1.2678311651683017e-06, "loss": 0.0014, "step": 13836 }, { "epoch": 3.37, "learning_rate": 1.2659088431366218e-06, "loss": 0.0033, "step": 13838 }, { "epoch": 3.37, "learning_rate": 1.2639878810697204e-06, "loss": 0.0016, "step": 13840 }, { "epoch": 3.37, "learning_rate": 1.262068279266706e-06, "loss": 0.004, "step": 13842 }, { "epoch": 3.37, "learning_rate": 1.2601500380264798e-06, "loss": 0.0019, "step": 13844 }, { "epoch": 3.37, "learning_rate": 1.2582331576477225e-06, "loss": 0.0027, "step": 13846 }, { "epoch": 3.37, "learning_rate": 1.2563176384289055e-06, "loss": 0.0023, "step": 13848 }, { "epoch": 3.37, "learning_rate": 1.2544034806682926e-06, "loss": 0.0018, "step": 13850 }, { "epoch": 3.38, "learning_rate": 1.252490684663933e-06, "loss": 0.0033, "step": 13852 }, { "epoch": 3.38, "learning_rate": 1.2505792507136638e-06, "loss": 0.0009, "step": 13854 }, { "epoch": 3.38, "learning_rate": 1.2486691791151072e-06, "loss": 0.0027, "step": 13856 }, { "epoch": 3.38, "learning_rate": 1.24676047016568e-06, "loss": 0.0037, "step": 13858 }, { "epoch": 3.38, "learning_rate": 1.2448531241625784e-06, "loss": 0.0015, "step": 13860 }, { "epoch": 3.38, "learning_rate": 1.2429471414027949e-06, "loss": 0.0018, "step": 13862 }, { "epoch": 3.38, "learning_rate": 1.2410425221831024e-06, "loss": 0.0018, "step": 13864 }, { "epoch": 3.38, "learning_rate": 1.2391392668000646e-06, "loss": 0.003, "step": 13866 }, { "epoch": 3.38, "learning_rate": 1.2372373755500356e-06, "loss": 0.003, "step": 13868 }, { "epoch": 3.38, "learning_rate": 1.235336848729154e-06, "loss": 0.0019, "step": 13870 }, { "epoch": 3.38, "learning_rate": 1.2334376866333441e-06, "loss": 0.0044, "step": 13872 }, { "epoch": 3.38, "learning_rate": 1.231539889558323e-06, "loss": 0.0019, "step": 13874 }, { "epoch": 3.38, "learning_rate": 1.2296434577995875e-06, "loss": 0.0029, "step": 13876 }, { "epoch": 3.38, "learning_rate": 1.2277483916524324e-06, "loss": 0.0028, "step": 13878 }, { "epoch": 3.38, "learning_rate": 1.2258546914119295e-06, "loss": 0.0029, "step": 13880 }, { "epoch": 3.38, "learning_rate": 1.223962357372943e-06, "loss": 0.0021, "step": 13882 }, { "epoch": 3.38, "learning_rate": 1.222071389830125e-06, "loss": 0.0031, "step": 13884 }, { "epoch": 3.38, "learning_rate": 1.2201817890779155e-06, "loss": 0.0017, "step": 13886 }, { "epoch": 3.38, "learning_rate": 1.2182935554105379e-06, "loss": 0.002, "step": 13888 }, { "epoch": 3.38, "learning_rate": 1.2164066891220017e-06, "loss": 0.0016, "step": 13890 }, { "epoch": 3.38, "learning_rate": 1.2145211905061094e-06, "loss": 0.0038, "step": 13892 }, { "epoch": 3.39, "learning_rate": 1.2126370598564497e-06, "loss": 0.0013, "step": 13894 }, { "epoch": 3.39, "learning_rate": 1.210754297466391e-06, "loss": 0.0026, "step": 13896 }, { "epoch": 3.39, "learning_rate": 1.208872903629097e-06, "loss": 0.0044, "step": 13898 }, { "epoch": 3.39, "learning_rate": 1.2069928786375173e-06, "loss": 0.0034, "step": 13900 }, { "epoch": 3.39, "learning_rate": 1.2051142227843826e-06, "loss": 0.002, "step": 13902 }, { "epoch": 3.39, "learning_rate": 1.203236936362212e-06, "loss": 0.0052, "step": 13904 }, { "epoch": 3.39, "learning_rate": 1.2013610196633174e-06, "loss": 0.0017, "step": 13906 }, { "epoch": 3.39, "learning_rate": 1.1994864729797918e-06, "loss": 0.0027, "step": 13908 }, { "epoch": 3.39, "learning_rate": 1.1976132966035192e-06, "loss": 0.0031, "step": 13910 }, { "epoch": 3.39, "learning_rate": 1.1957414908261623e-06, "loss": 0.0024, "step": 13912 }, { "epoch": 3.39, "learning_rate": 1.1938710559391809e-06, "loss": 0.001, "step": 13914 }, { "epoch": 3.39, "learning_rate": 1.1920019922338112e-06, "loss": 0.0016, "step": 13916 }, { "epoch": 3.39, "learning_rate": 1.1901343000010856e-06, "loss": 0.0018, "step": 13918 }, { "epoch": 3.39, "learning_rate": 1.1882679795318131e-06, "loss": 0.0018, "step": 13920 }, { "epoch": 3.39, "learning_rate": 1.1864030311165965e-06, "loss": 0.0076, "step": 13922 }, { "epoch": 3.39, "learning_rate": 1.1845394550458223e-06, "loss": 0.0018, "step": 13924 }, { "epoch": 3.39, "learning_rate": 1.1826772516096652e-06, "loss": 0.0018, "step": 13926 }, { "epoch": 3.39, "learning_rate": 1.1808164210980821e-06, "loss": 0.0015, "step": 13928 }, { "epoch": 3.39, "learning_rate": 1.1789569638008202e-06, "loss": 0.0032, "step": 13930 }, { "epoch": 3.39, "learning_rate": 1.177098880007409e-06, "loss": 0.0016, "step": 13932 }, { "epoch": 3.4, "learning_rate": 1.1752421700071704e-06, "loss": 0.0026, "step": 13934 }, { "epoch": 3.4, "learning_rate": 1.173386834089203e-06, "loss": 0.0033, "step": 13936 }, { "epoch": 3.4, "learning_rate": 1.1715328725424002e-06, "loss": 0.0044, "step": 13938 }, { "epoch": 3.4, "learning_rate": 1.1696802856554368e-06, "loss": 0.0043, "step": 13940 }, { "epoch": 3.4, "learning_rate": 1.1678290737167785e-06, "loss": 0.0014, "step": 13942 }, { "epoch": 3.4, "learning_rate": 1.1659792370146694e-06, "loss": 0.0029, "step": 13944 }, { "epoch": 3.4, "learning_rate": 1.1641307758371412e-06, "loss": 0.0011, "step": 13946 }, { "epoch": 3.4, "learning_rate": 1.1622836904720159e-06, "loss": 0.0029, "step": 13948 }, { "epoch": 3.4, "learning_rate": 1.160437981206901e-06, "loss": 0.0022, "step": 13950 }, { "epoch": 3.4, "learning_rate": 1.1585936483291837e-06, "loss": 0.0029, "step": 13952 }, { "epoch": 3.4, "learning_rate": 1.156750692126043e-06, "loss": 0.0019, "step": 13954 }, { "epoch": 3.4, "learning_rate": 1.1549091128844425e-06, "loss": 0.002, "step": 13956 }, { "epoch": 3.4, "learning_rate": 1.1530689108911285e-06, "loss": 0.0017, "step": 13958 }, { "epoch": 3.4, "learning_rate": 1.151230086432632e-06, "loss": 0.0025, "step": 13960 }, { "epoch": 3.4, "learning_rate": 1.1493926397952748e-06, "loss": 0.0032, "step": 13962 }, { "epoch": 3.4, "learning_rate": 1.1475565712651615e-06, "loss": 0.001, "step": 13964 }, { "epoch": 3.4, "learning_rate": 1.1457218811281834e-06, "loss": 0.0013, "step": 13966 }, { "epoch": 3.4, "learning_rate": 1.1438885696700108e-06, "loss": 0.0009, "step": 13968 }, { "epoch": 3.4, "learning_rate": 1.142056637176111e-06, "loss": 0.0036, "step": 13970 }, { "epoch": 3.4, "learning_rate": 1.1402260839317237e-06, "loss": 0.0062, "step": 13972 }, { "epoch": 3.4, "learning_rate": 1.138396910221885e-06, "loss": 0.0024, "step": 13974 }, { "epoch": 3.41, "learning_rate": 1.1365691163314074e-06, "loss": 0.0051, "step": 13976 }, { "epoch": 3.41, "learning_rate": 1.1347427025448931e-06, "loss": 0.0015, "step": 13978 }, { "epoch": 3.41, "learning_rate": 1.1329176691467303e-06, "loss": 0.0047, "step": 13980 }, { "epoch": 3.41, "learning_rate": 1.1310940164210916e-06, "loss": 0.0014, "step": 13982 }, { "epoch": 3.41, "learning_rate": 1.1292717446519296e-06, "loss": 0.0037, "step": 13984 }, { "epoch": 3.41, "learning_rate": 1.12745085412299e-06, "loss": 0.0051, "step": 13986 }, { "epoch": 3.41, "learning_rate": 1.1256313451177968e-06, "loss": 0.0022, "step": 13988 }, { "epoch": 3.41, "learning_rate": 1.1238132179196636e-06, "loss": 0.0016, "step": 13990 }, { "epoch": 3.41, "learning_rate": 1.1219964728116839e-06, "loss": 0.0024, "step": 13992 }, { "epoch": 3.41, "learning_rate": 1.12018111007674e-06, "loss": 0.0027, "step": 13994 }, { "epoch": 3.41, "learning_rate": 1.1183671299974984e-06, "loss": 0.0006, "step": 13996 }, { "epoch": 3.41, "learning_rate": 1.116554532856412e-06, "loss": 0.0037, "step": 13998 }, { "epoch": 3.41, "learning_rate": 1.1147433189357125e-06, "loss": 0.0027, "step": 14000 }, { "epoch": 3.41, "learning_rate": 1.1129334885174193e-06, "loss": 0.0036, "step": 14002 }, { "epoch": 3.41, "learning_rate": 1.1111250418833376e-06, "loss": 0.002, "step": 14004 }, { "epoch": 3.41, "learning_rate": 1.1093179793150598e-06, "loss": 0.0013, "step": 14006 }, { "epoch": 3.41, "learning_rate": 1.1075123010939538e-06, "loss": 0.0023, "step": 14008 }, { "epoch": 3.41, "learning_rate": 1.1057080075011805e-06, "loss": 0.0025, "step": 14010 }, { "epoch": 3.41, "learning_rate": 1.1039050988176847e-06, "loss": 0.0043, "step": 14012 }, { "epoch": 3.41, "learning_rate": 1.1021035753241893e-06, "loss": 0.0058, "step": 14014 }, { "epoch": 3.42, "learning_rate": 1.1003034373012055e-06, "loss": 0.0018, "step": 14016 }, { "epoch": 3.42, "learning_rate": 1.0985046850290281e-06, "loss": 0.0012, "step": 14018 }, { "epoch": 3.42, "learning_rate": 1.0967073187877386e-06, "loss": 0.003, "step": 14020 }, { "epoch": 3.42, "learning_rate": 1.094911338857202e-06, "loss": 0.0023, "step": 14022 }, { "epoch": 3.42, "learning_rate": 1.0931167455170622e-06, "loss": 0.0026, "step": 14024 }, { "epoch": 3.42, "learning_rate": 1.0913235390467524e-06, "loss": 0.0032, "step": 14026 }, { "epoch": 3.42, "learning_rate": 1.0895317197254919e-06, "loss": 0.0038, "step": 14028 }, { "epoch": 3.42, "learning_rate": 1.087741287832277e-06, "loss": 0.0041, "step": 14030 }, { "epoch": 3.42, "learning_rate": 1.0859522436458903e-06, "loss": 0.0017, "step": 14032 }, { "epoch": 3.42, "learning_rate": 1.0841645874449025e-06, "loss": 0.0007, "step": 14034 }, { "epoch": 3.42, "learning_rate": 1.0823783195076654e-06, "loss": 0.0019, "step": 14036 }, { "epoch": 3.42, "learning_rate": 1.0805934401123153e-06, "loss": 0.0018, "step": 14038 }, { "epoch": 3.42, "learning_rate": 1.0788099495367677e-06, "loss": 0.0029, "step": 14040 }, { "epoch": 3.42, "learning_rate": 1.0770278480587303e-06, "loss": 0.0006, "step": 14042 }, { "epoch": 3.42, "learning_rate": 1.075247135955686e-06, "loss": 0.0038, "step": 14044 }, { "epoch": 3.42, "learning_rate": 1.0734678135049093e-06, "loss": 0.0039, "step": 14046 }, { "epoch": 3.42, "learning_rate": 1.0716898809834497e-06, "loss": 0.0027, "step": 14048 }, { "epoch": 3.42, "learning_rate": 1.069913338668148e-06, "loss": 0.0008, "step": 14050 }, { "epoch": 3.42, "learning_rate": 1.0681381868356245e-06, "loss": 0.0014, "step": 14052 }, { "epoch": 3.42, "learning_rate": 1.066364425762285e-06, "loss": 0.0014, "step": 14054 }, { "epoch": 3.42, "learning_rate": 1.0645920557243184e-06, "loss": 0.0031, "step": 14056 }, { "epoch": 3.43, "learning_rate": 1.0628210769976922e-06, "loss": 0.0029, "step": 14058 }, { "epoch": 3.43, "learning_rate": 1.0610514898581636e-06, "loss": 0.0011, "step": 14060 }, { "epoch": 3.43, "learning_rate": 1.0592832945812725e-06, "loss": 0.0033, "step": 14062 }, { "epoch": 3.43, "learning_rate": 1.0575164914423374e-06, "loss": 0.0018, "step": 14064 }, { "epoch": 3.43, "learning_rate": 1.055751080716465e-06, "loss": 0.0019, "step": 14066 }, { "epoch": 3.43, "learning_rate": 1.0539870626785452e-06, "loss": 0.0044, "step": 14068 }, { "epoch": 3.43, "learning_rate": 1.0522244376032464e-06, "loss": 0.0024, "step": 14070 }, { "epoch": 3.43, "learning_rate": 1.050463205765021e-06, "loss": 0.0029, "step": 14072 }, { "epoch": 3.43, "learning_rate": 1.0487033674381086e-06, "loss": 0.0025, "step": 14074 }, { "epoch": 3.43, "learning_rate": 1.0469449228965289e-06, "loss": 0.0038, "step": 14076 }, { "epoch": 3.43, "learning_rate": 1.0451878724140884e-06, "loss": 0.0011, "step": 14078 }, { "epoch": 3.43, "learning_rate": 1.0434322162643673e-06, "loss": 0.0055, "step": 14080 }, { "epoch": 3.43, "learning_rate": 1.041677954720739e-06, "loss": 0.0072, "step": 14082 }, { "epoch": 3.43, "learning_rate": 1.039925088056356e-06, "loss": 0.0028, "step": 14084 }, { "epoch": 3.43, "learning_rate": 1.0381736165441514e-06, "loss": 0.0027, "step": 14086 }, { "epoch": 3.43, "learning_rate": 1.0364235404568402e-06, "loss": 0.0046, "step": 14088 }, { "epoch": 3.43, "learning_rate": 1.0346748600669254e-06, "loss": 0.0031, "step": 14090 }, { "epoch": 3.43, "learning_rate": 1.0329275756466905e-06, "loss": 0.003, "step": 14092 }, { "epoch": 3.43, "learning_rate": 1.0311816874682012e-06, "loss": 0.0007, "step": 14094 }, { "epoch": 3.43, "learning_rate": 1.0294371958033023e-06, "loss": 0.0025, "step": 14096 }, { "epoch": 3.44, "learning_rate": 1.0276941009236296e-06, "loss": 0.0044, "step": 14098 }, { "epoch": 3.44, "learning_rate": 1.0259524031005907e-06, "loss": 0.003, "step": 14100 }, { "epoch": 3.44, "learning_rate": 1.0242121026053865e-06, "loss": 0.0026, "step": 14102 }, { "epoch": 3.44, "learning_rate": 1.022473199708991e-06, "loss": 0.0022, "step": 14104 }, { "epoch": 3.44, "learning_rate": 1.0207356946821655e-06, "loss": 0.0024, "step": 14106 }, { "epoch": 3.44, "learning_rate": 1.018999587795454e-06, "loss": 0.0017, "step": 14108 }, { "epoch": 3.44, "learning_rate": 1.0172648793191831e-06, "loss": 0.0054, "step": 14110 }, { "epoch": 3.44, "learning_rate": 1.015531569523459e-06, "loss": 0.003, "step": 14112 }, { "epoch": 3.44, "learning_rate": 1.0137996586781684e-06, "loss": 0.0035, "step": 14114 }, { "epoch": 3.44, "learning_rate": 1.0120691470529843e-06, "loss": 0.0007, "step": 14116 }, { "epoch": 3.44, "learning_rate": 1.0103400349173643e-06, "loss": 0.0027, "step": 14118 }, { "epoch": 3.44, "learning_rate": 1.0086123225405398e-06, "loss": 0.003, "step": 14120 }, { "epoch": 3.44, "learning_rate": 1.0068860101915312e-06, "loss": 0.0021, "step": 14122 }, { "epoch": 3.44, "learning_rate": 1.0051610981391392e-06, "loss": 0.0027, "step": 14124 }, { "epoch": 3.44, "learning_rate": 1.003437586651943e-06, "loss": 0.0015, "step": 14126 }, { "epoch": 3.44, "learning_rate": 1.0017154759983094e-06, "loss": 0.0037, "step": 14128 }, { "epoch": 3.44, "learning_rate": 9.999947664463815e-07, "loss": 0.0015, "step": 14130 }, { "epoch": 3.44, "learning_rate": 9.982754582640885e-07, "loss": 0.0038, "step": 14132 }, { "epoch": 3.44, "learning_rate": 9.965575517191418e-07, "loss": 0.0008, "step": 14134 }, { "epoch": 3.44, "learning_rate": 9.94841047079028e-07, "loss": 0.0028, "step": 14136 }, { "epoch": 3.44, "learning_rate": 9.93125944611023e-07, "loss": 0.0042, "step": 14138 }, { "epoch": 3.45, "learning_rate": 9.914122445821828e-07, "loss": 0.0031, "step": 14140 }, { "epoch": 3.45, "learning_rate": 9.896999472593415e-07, "loss": 0.003, "step": 14142 }, { "epoch": 3.45, "learning_rate": 9.879890529091152e-07, "loss": 0.0021, "step": 14144 }, { "epoch": 3.45, "learning_rate": 9.862795617979049e-07, "loss": 0.0065, "step": 14146 }, { "epoch": 3.45, "learning_rate": 9.84571474191891e-07, "loss": 0.0049, "step": 14148 }, { "epoch": 3.45, "learning_rate": 9.828647903570387e-07, "loss": 0.0054, "step": 14150 }, { "epoch": 3.45, "learning_rate": 9.811595105590876e-07, "loss": 0.0009, "step": 14152 }, { "epoch": 3.45, "learning_rate": 9.794556350635675e-07, "loss": 0.0033, "step": 14154 }, { "epoch": 3.45, "learning_rate": 9.777531641357784e-07, "loss": 0.0029, "step": 14156 }, { "epoch": 3.45, "learning_rate": 9.76052098040815e-07, "loss": 0.0011, "step": 14158 }, { "epoch": 3.45, "learning_rate": 9.74352437043541e-07, "loss": 0.0027, "step": 14160 }, { "epoch": 3.45, "learning_rate": 9.726541814086076e-07, "loss": 0.0015, "step": 14162 }, { "epoch": 3.45, "learning_rate": 9.709573314004473e-07, "loss": 0.0015, "step": 14164 }, { "epoch": 3.45, "learning_rate": 9.692618872832748e-07, "loss": 0.0023, "step": 14166 }, { "epoch": 3.45, "learning_rate": 9.675678493210806e-07, "loss": 0.0024, "step": 14168 }, { "epoch": 3.45, "learning_rate": 9.658752177776386e-07, "loss": 0.0018, "step": 14170 }, { "epoch": 3.45, "learning_rate": 9.641839929165063e-07, "loss": 0.0026, "step": 14172 }, { "epoch": 3.45, "learning_rate": 9.62494175001022e-07, "loss": 0.001, "step": 14174 }, { "epoch": 3.45, "learning_rate": 9.608057642942992e-07, "loss": 0.0014, "step": 14176 }, { "epoch": 3.45, "learning_rate": 9.59118761059238e-07, "loss": 0.0037, "step": 14178 }, { "epoch": 3.46, "learning_rate": 9.57433165558519e-07, "loss": 0.007, "step": 14180 }, { "epoch": 3.46, "learning_rate": 9.557489780546025e-07, "loss": 0.0017, "step": 14182 }, { "epoch": 3.46, "learning_rate": 9.540661988097289e-07, "loss": 0.0026, "step": 14184 }, { "epoch": 3.46, "learning_rate": 9.523848280859172e-07, "loss": 0.0017, "step": 14186 }, { "epoch": 3.46, "learning_rate": 9.507048661449714e-07, "loss": 0.0029, "step": 14188 }, { "epoch": 3.46, "learning_rate": 9.490263132484778e-07, "loss": 0.0018, "step": 14190 }, { "epoch": 3.46, "learning_rate": 9.473491696577941e-07, "loss": 0.0034, "step": 14192 }, { "epoch": 3.46, "learning_rate": 9.456734356340668e-07, "loss": 0.0012, "step": 14194 }, { "epoch": 3.46, "learning_rate": 9.43999111438223e-07, "loss": 0.0031, "step": 14196 }, { "epoch": 3.46, "learning_rate": 9.423261973309661e-07, "loss": 0.0031, "step": 14198 }, { "epoch": 3.46, "learning_rate": 9.40654693572779e-07, "loss": 0.0013, "step": 14200 }, { "epoch": 3.46, "learning_rate": 9.3898460042393e-07, "loss": 0.0038, "step": 14202 }, { "epoch": 3.46, "learning_rate": 9.373159181444647e-07, "loss": 0.0035, "step": 14204 }, { "epoch": 3.46, "learning_rate": 9.356486469942128e-07, "loss": 0.0025, "step": 14206 }, { "epoch": 3.46, "learning_rate": 9.339827872327756e-07, "loss": 0.0022, "step": 14208 }, { "epoch": 3.46, "learning_rate": 9.323183391195457e-07, "loss": 0.0024, "step": 14210 }, { "epoch": 3.46, "learning_rate": 9.306553029136855e-07, "loss": 0.0037, "step": 14212 }, { "epoch": 3.46, "learning_rate": 9.289936788741472e-07, "loss": 0.0031, "step": 14214 }, { "epoch": 3.46, "learning_rate": 9.273334672596535e-07, "loss": 0.004, "step": 14216 }, { "epoch": 3.46, "learning_rate": 9.256746683287143e-07, "loss": 0.0014, "step": 14218 }, { "epoch": 3.46, "learning_rate": 9.240172823396176e-07, "loss": 0.0027, "step": 14220 }, { "epoch": 3.47, "learning_rate": 9.223613095504336e-07, "loss": 0.0007, "step": 14222 }, { "epoch": 3.47, "learning_rate": 9.207067502190037e-07, "loss": 0.0015, "step": 14224 }, { "epoch": 3.47, "learning_rate": 9.190536046029618e-07, "loss": 0.0018, "step": 14226 }, { "epoch": 3.47, "learning_rate": 9.174018729597112e-07, "loss": 0.0019, "step": 14228 }, { "epoch": 3.47, "learning_rate": 9.157515555464414e-07, "loss": 0.0045, "step": 14230 }, { "epoch": 3.47, "learning_rate": 9.14102652620118e-07, "loss": 0.0008, "step": 14232 }, { "epoch": 3.47, "learning_rate": 9.124551644374868e-07, "loss": 0.0031, "step": 14234 }, { "epoch": 3.47, "learning_rate": 9.10809091255076e-07, "loss": 0.0036, "step": 14236 }, { "epoch": 3.47, "learning_rate": 9.091644333291938e-07, "loss": 0.002, "step": 14238 }, { "epoch": 3.47, "learning_rate": 9.075211909159242e-07, "loss": 0.0047, "step": 14240 }, { "epoch": 3.47, "learning_rate": 9.058793642711294e-07, "loss": 0.0032, "step": 14242 }, { "epoch": 3.47, "learning_rate": 9.042389536504581e-07, "loss": 0.0038, "step": 14244 }, { "epoch": 3.47, "learning_rate": 9.025999593093349e-07, "loss": 0.0024, "step": 14246 }, { "epoch": 3.47, "learning_rate": 9.009623815029611e-07, "loss": 0.0021, "step": 14248 }, { "epoch": 3.47, "learning_rate": 8.993262204863218e-07, "loss": 0.0045, "step": 14250 }, { "epoch": 3.47, "learning_rate": 8.976914765141809e-07, "loss": 0.0021, "step": 14252 }, { "epoch": 3.47, "learning_rate": 8.960581498410803e-07, "loss": 0.0035, "step": 14254 }, { "epoch": 3.47, "learning_rate": 8.944262407213378e-07, "loss": 0.0018, "step": 14256 }, { "epoch": 3.47, "learning_rate": 8.927957494090567e-07, "loss": 0.0007, "step": 14258 }, { "epoch": 3.47, "learning_rate": 8.911666761581173e-07, "loss": 0.0065, "step": 14260 }, { "epoch": 3.48, "learning_rate": 8.895390212221811e-07, "loss": 0.0014, "step": 14262 }, { "epoch": 3.48, "learning_rate": 8.879127848546809e-07, "loss": 0.0007, "step": 14264 }, { "epoch": 3.48, "learning_rate": 8.862879673088398e-07, "loss": 0.0065, "step": 14266 }, { "epoch": 3.48, "learning_rate": 8.846645688376488e-07, "loss": 0.0061, "step": 14268 }, { "epoch": 3.48, "learning_rate": 8.830425896938888e-07, "loss": 0.0015, "step": 14270 }, { "epoch": 3.48, "learning_rate": 8.81422030130109e-07, "loss": 0.0022, "step": 14272 }, { "epoch": 3.48, "learning_rate": 8.798028903986467e-07, "loss": 0.0023, "step": 14274 }, { "epoch": 3.48, "learning_rate": 8.781851707516131e-07, "loss": 0.0019, "step": 14276 }, { "epoch": 3.48, "learning_rate": 8.765688714409016e-07, "loss": 0.0041, "step": 14278 }, { "epoch": 3.48, "learning_rate": 8.749539927181782e-07, "loss": 0.0055, "step": 14280 }, { "epoch": 3.48, "learning_rate": 8.733405348348967e-07, "loss": 0.0025, "step": 14282 }, { "epoch": 3.48, "learning_rate": 8.717284980422791e-07, "loss": 0.0039, "step": 14284 }, { "epoch": 3.48, "learning_rate": 8.701178825913382e-07, "loss": 0.0015, "step": 14286 }, { "epoch": 3.48, "learning_rate": 8.685086887328542e-07, "loss": 0.005, "step": 14288 }, { "epoch": 3.48, "learning_rate": 8.669009167173925e-07, "loss": 0.0014, "step": 14290 }, { "epoch": 3.48, "learning_rate": 8.65294566795295e-07, "loss": 0.0042, "step": 14292 }, { "epoch": 3.48, "learning_rate": 8.63689639216686e-07, "loss": 0.0014, "step": 14294 }, { "epoch": 3.48, "learning_rate": 8.620861342314624e-07, "loss": 0.0025, "step": 14296 }, { "epoch": 3.48, "learning_rate": 8.604840520892998e-07, "loss": 0.0031, "step": 14298 }, { "epoch": 3.48, "learning_rate": 8.588833930396578e-07, "loss": 0.0035, "step": 14300 }, { "epoch": 3.48, "learning_rate": 8.572841573317714e-07, "loss": 0.0045, "step": 14302 }, { "epoch": 3.49, "learning_rate": 8.556863452146513e-07, "loss": 0.003, "step": 14304 }, { "epoch": 3.49, "learning_rate": 8.540899569370909e-07, "loss": 0.004, "step": 14306 }, { "epoch": 3.49, "learning_rate": 8.524949927476611e-07, "loss": 0.002, "step": 14308 }, { "epoch": 3.49, "learning_rate": 8.50901452894709e-07, "loss": 0.0029, "step": 14310 }, { "epoch": 3.49, "learning_rate": 8.493093376263584e-07, "loss": 0.0022, "step": 14312 }, { "epoch": 3.49, "learning_rate": 8.477186471905164e-07, "loss": 0.0008, "step": 14314 }, { "epoch": 3.49, "learning_rate": 8.46129381834866e-07, "loss": 0.0049, "step": 14316 }, { "epoch": 3.49, "learning_rate": 8.445415418068681e-07, "loss": 0.0005, "step": 14318 }, { "epoch": 3.49, "learning_rate": 8.429551273537595e-07, "loss": 0.0034, "step": 14320 }, { "epoch": 3.49, "learning_rate": 8.413701387225604e-07, "loss": 0.0063, "step": 14322 }, { "epoch": 3.49, "learning_rate": 8.39786576160061e-07, "loss": 0.0025, "step": 14324 }, { "epoch": 3.49, "learning_rate": 8.382044399128386e-07, "loss": 0.003, "step": 14326 }, { "epoch": 3.49, "learning_rate": 8.366237302272407e-07, "loss": 0.0029, "step": 14328 }, { "epoch": 3.49, "learning_rate": 8.350444473493968e-07, "loss": 0.0027, "step": 14330 }, { "epoch": 3.49, "learning_rate": 8.33466591525216e-07, "loss": 0.0032, "step": 14332 }, { "epoch": 3.49, "learning_rate": 8.318901630003773e-07, "loss": 0.0048, "step": 14334 }, { "epoch": 3.49, "learning_rate": 8.303151620203464e-07, "loss": 0.0021, "step": 14336 }, { "epoch": 3.49, "learning_rate": 8.287415888303641e-07, "loss": 0.0051, "step": 14338 }, { "epoch": 3.49, "learning_rate": 8.271694436754451e-07, "loss": 0.0035, "step": 14340 }, { "epoch": 3.49, "learning_rate": 8.25598726800384e-07, "loss": 0.0014, "step": 14342 }, { "epoch": 3.5, "learning_rate": 8.240294384497538e-07, "loss": 0.0024, "step": 14344 }, { "epoch": 3.5, "learning_rate": 8.224615788679058e-07, "loss": 0.0036, "step": 14346 }, { "epoch": 3.5, "learning_rate": 8.208951482989691e-07, "loss": 0.003, "step": 14348 }, { "epoch": 3.5, "learning_rate": 8.193301469868464e-07, "loss": 0.0035, "step": 14350 }, { "epoch": 3.5, "learning_rate": 8.177665751752217e-07, "loss": 0.0032, "step": 14352 }, { "epoch": 3.5, "learning_rate": 8.162044331075536e-07, "loss": 0.0016, "step": 14354 }, { "epoch": 3.5, "learning_rate": 8.146437210270819e-07, "loss": 0.0009, "step": 14356 }, { "epoch": 3.5, "learning_rate": 8.130844391768189e-07, "loss": 0.0027, "step": 14358 }, { "epoch": 3.5, "learning_rate": 8.11526587799557e-07, "loss": 0.0026, "step": 14360 }, { "epoch": 3.5, "learning_rate": 8.099701671378668e-07, "loss": 0.0018, "step": 14362 }, { "epoch": 3.5, "learning_rate": 8.084151774340965e-07, "loss": 0.0008, "step": 14364 }, { "epoch": 3.5, "learning_rate": 8.068616189303679e-07, "loss": 0.0014, "step": 14366 }, { "epoch": 3.5, "learning_rate": 8.053094918685799e-07, "loss": 0.003, "step": 14368 }, { "epoch": 3.5, "learning_rate": 8.037587964904136e-07, "loss": 0.0008, "step": 14370 }, { "epoch": 3.5, "learning_rate": 8.022095330373236e-07, "loss": 0.0023, "step": 14372 }, { "epoch": 3.5, "learning_rate": 8.006617017505402e-07, "loss": 0.0011, "step": 14374 }, { "epoch": 3.5, "learning_rate": 7.991153028710741e-07, "loss": 0.0031, "step": 14376 }, { "epoch": 3.5, "learning_rate": 7.975703366397114e-07, "loss": 0.0016, "step": 14378 }, { "epoch": 3.5, "learning_rate": 7.960268032970175e-07, "loss": 0.0003, "step": 14380 }, { "epoch": 3.5, "learning_rate": 7.944847030833292e-07, "loss": 0.0029, "step": 14382 }, { "epoch": 3.5, "learning_rate": 7.929440362387619e-07, "loss": 0.0015, "step": 14384 }, { "epoch": 3.51, "learning_rate": 7.914048030032117e-07, "loss": 0.001, "step": 14386 }, { "epoch": 3.51, "learning_rate": 7.898670036163503e-07, "loss": 0.0038, "step": 14388 }, { "epoch": 3.51, "learning_rate": 7.883306383176215e-07, "loss": 0.0021, "step": 14390 }, { "epoch": 3.51, "learning_rate": 7.867957073462507e-07, "loss": 0.0061, "step": 14392 }, { "epoch": 3.51, "learning_rate": 7.8526221094124e-07, "loss": 0.0035, "step": 14394 }, { "epoch": 3.51, "learning_rate": 7.83730149341364e-07, "loss": 0.0008, "step": 14396 }, { "epoch": 3.51, "learning_rate": 7.821995227851775e-07, "loss": 0.0023, "step": 14398 }, { "epoch": 3.51, "learning_rate": 7.806703315110098e-07, "loss": 0.0026, "step": 14400 }, { "epoch": 3.51, "learning_rate": 7.791425757569682e-07, "loss": 0.0031, "step": 14402 }, { "epoch": 3.51, "learning_rate": 7.776162557609379e-07, "loss": 0.002, "step": 14404 }, { "epoch": 3.51, "learning_rate": 7.760913717605756e-07, "loss": 0.001, "step": 14406 }, { "epoch": 3.51, "learning_rate": 7.745679239933202e-07, "loss": 0.0014, "step": 14408 }, { "epoch": 3.51, "learning_rate": 7.730459126963808e-07, "loss": 0.0051, "step": 14410 }, { "epoch": 3.51, "learning_rate": 7.7152533810675e-07, "loss": 0.005, "step": 14412 }, { "epoch": 3.51, "learning_rate": 7.700062004611897e-07, "loss": 0.0026, "step": 14414 }, { "epoch": 3.51, "learning_rate": 7.684884999962428e-07, "loss": 0.0006, "step": 14416 }, { "epoch": 3.51, "learning_rate": 7.669722369482258e-07, "loss": 0.0015, "step": 14418 }, { "epoch": 3.51, "learning_rate": 7.654574115532353e-07, "loss": 0.004, "step": 14420 }, { "epoch": 3.51, "learning_rate": 7.639440240471385e-07, "loss": 0.0015, "step": 14422 }, { "epoch": 3.51, "learning_rate": 7.624320746655811e-07, "loss": 0.0028, "step": 14424 }, { "epoch": 3.52, "learning_rate": 7.60921563643986e-07, "loss": 0.0024, "step": 14426 }, { "epoch": 3.52, "learning_rate": 7.594124912175527e-07, "loss": 0.0034, "step": 14428 }, { "epoch": 3.52, "learning_rate": 7.579048576212534e-07, "loss": 0.001, "step": 14430 }, { "epoch": 3.52, "learning_rate": 7.563986630898379e-07, "loss": 0.0015, "step": 14432 }, { "epoch": 3.52, "learning_rate": 7.548939078578332e-07, "loss": 0.0014, "step": 14434 }, { "epoch": 3.52, "learning_rate": 7.53390592159543e-07, "loss": 0.0023, "step": 14436 }, { "epoch": 3.52, "learning_rate": 7.518887162290433e-07, "loss": 0.0035, "step": 14438 }, { "epoch": 3.52, "learning_rate": 7.50388280300186e-07, "loss": 0.0033, "step": 14440 }, { "epoch": 3.52, "learning_rate": 7.48889284606602e-07, "loss": 0.0026, "step": 14442 }, { "epoch": 3.52, "learning_rate": 7.473917293816979e-07, "loss": 0.007, "step": 14444 }, { "epoch": 3.52, "learning_rate": 7.458956148586516e-07, "loss": 0.0022, "step": 14446 }, { "epoch": 3.52, "learning_rate": 7.444009412704211e-07, "loss": 0.003, "step": 14448 }, { "epoch": 3.52, "learning_rate": 7.429077088497393e-07, "loss": 0.0048, "step": 14450 }, { "epoch": 3.52, "learning_rate": 7.414159178291136e-07, "loss": 0.0005, "step": 14452 }, { "epoch": 3.52, "learning_rate": 7.399255684408246e-07, "loss": 0.008, "step": 14454 }, { "epoch": 3.52, "learning_rate": 7.384366609169336e-07, "loss": 0.0022, "step": 14456 }, { "epoch": 3.52, "learning_rate": 7.369491954892749e-07, "loss": 0.0035, "step": 14458 }, { "epoch": 3.52, "learning_rate": 7.35463172389459e-07, "loss": 0.0021, "step": 14460 }, { "epoch": 3.52, "learning_rate": 7.339785918488673e-07, "loss": 0.0017, "step": 14462 }, { "epoch": 3.52, "learning_rate": 7.32495454098665e-07, "loss": 0.0013, "step": 14464 }, { "epoch": 3.52, "learning_rate": 7.310137593697853e-07, "loss": 0.0038, "step": 14466 }, { "epoch": 3.53, "learning_rate": 7.2953350789294e-07, "loss": 0.0014, "step": 14468 }, { "epoch": 3.53, "learning_rate": 7.280546998986149e-07, "loss": 0.0025, "step": 14470 }, { "epoch": 3.53, "learning_rate": 7.265773356170724e-07, "loss": 0.0037, "step": 14472 }, { "epoch": 3.53, "learning_rate": 7.251014152783487e-07, "loss": 0.001, "step": 14474 }, { "epoch": 3.53, "learning_rate": 7.236269391122586e-07, "loss": 0.0025, "step": 14476 }, { "epoch": 3.53, "learning_rate": 7.221539073483863e-07, "loss": 0.0036, "step": 14478 }, { "epoch": 3.53, "learning_rate": 7.206823202160951e-07, "loss": 0.0021, "step": 14480 }, { "epoch": 3.53, "learning_rate": 7.192121779445227e-07, "loss": 0.0011, "step": 14482 }, { "epoch": 3.53, "learning_rate": 7.177434807625816e-07, "loss": 0.0038, "step": 14484 }, { "epoch": 3.53, "learning_rate": 7.162762288989567e-07, "loss": 0.0018, "step": 14486 }, { "epoch": 3.53, "learning_rate": 7.148104225821128e-07, "loss": 0.0015, "step": 14488 }, { "epoch": 3.53, "learning_rate": 7.133460620402877e-07, "loss": 0.0024, "step": 14490 }, { "epoch": 3.53, "learning_rate": 7.118831475014931e-07, "loss": 0.0032, "step": 14492 }, { "epoch": 3.53, "learning_rate": 7.104216791935148e-07, "loss": 0.0019, "step": 14494 }, { "epoch": 3.53, "learning_rate": 7.089616573439151e-07, "loss": 0.0016, "step": 14496 }, { "epoch": 3.53, "learning_rate": 7.075030821800299e-07, "loss": 0.0014, "step": 14498 }, { "epoch": 3.53, "learning_rate": 7.060459539289733e-07, "loss": 0.003, "step": 14500 }, { "epoch": 3.53, "learning_rate": 7.045902728176268e-07, "loss": 0.0021, "step": 14502 }, { "epoch": 3.53, "learning_rate": 7.03136039072655e-07, "loss": 0.0036, "step": 14504 }, { "epoch": 3.53, "learning_rate": 7.01683252920492e-07, "loss": 0.0045, "step": 14506 }, { "epoch": 3.54, "learning_rate": 7.002319145873482e-07, "loss": 0.0043, "step": 14508 }, { "epoch": 3.54, "learning_rate": 6.98782024299206e-07, "loss": 0.0058, "step": 14510 }, { "epoch": 3.54, "learning_rate": 6.97333582281825e-07, "loss": 0.0037, "step": 14512 }, { "epoch": 3.54, "learning_rate": 6.958865887607402e-07, "loss": 0.0019, "step": 14514 }, { "epoch": 3.54, "learning_rate": 6.944410439612603e-07, "loss": 0.0009, "step": 14516 }, { "epoch": 3.54, "learning_rate": 6.929969481084642e-07, "loss": 0.0019, "step": 14518 }, { "epoch": 3.54, "learning_rate": 6.91554301427213e-07, "loss": 0.0003, "step": 14520 }, { "epoch": 3.54, "learning_rate": 6.901131041421327e-07, "loss": 0.0005, "step": 14522 }, { "epoch": 3.54, "learning_rate": 6.886733564776349e-07, "loss": 0.004, "step": 14524 }, { "epoch": 3.54, "learning_rate": 6.872350586578935e-07, "loss": 0.0017, "step": 14526 }, { "epoch": 3.54, "learning_rate": 6.857982109068639e-07, "loss": 0.0039, "step": 14528 }, { "epoch": 3.54, "learning_rate": 6.843628134482771e-07, "loss": 0.0013, "step": 14530 }, { "epoch": 3.54, "learning_rate": 6.829288665056344e-07, "loss": 0.0058, "step": 14532 }, { "epoch": 3.54, "learning_rate": 6.814963703022104e-07, "loss": 0.0069, "step": 14534 }, { "epoch": 3.54, "learning_rate": 6.800653250610578e-07, "loss": 0.0012, "step": 14536 }, { "epoch": 3.54, "learning_rate": 6.786357310049984e-07, "loss": 0.0019, "step": 14538 }, { "epoch": 3.54, "learning_rate": 6.772075883566353e-07, "loss": 0.0035, "step": 14540 }, { "epoch": 3.54, "learning_rate": 6.757808973383373e-07, "loss": 0.0036, "step": 14542 }, { "epoch": 3.54, "learning_rate": 6.743556581722532e-07, "loss": 0.003, "step": 14544 }, { "epoch": 3.54, "learning_rate": 6.729318710803024e-07, "loss": 0.0032, "step": 14546 }, { "epoch": 3.54, "learning_rate": 6.715095362841817e-07, "loss": 0.0044, "step": 14548 }, { "epoch": 3.55, "learning_rate": 6.700886540053575e-07, "loss": 0.002, "step": 14550 }, { "epoch": 3.55, "learning_rate": 6.686692244650716e-07, "loss": 0.0019, "step": 14552 }, { "epoch": 3.55, "learning_rate": 6.672512478843407e-07, "loss": 0.003, "step": 14554 }, { "epoch": 3.55, "learning_rate": 6.658347244839558e-07, "loss": 0.0023, "step": 14556 }, { "epoch": 3.55, "learning_rate": 6.644196544844784e-07, "loss": 0.0022, "step": 14558 }, { "epoch": 3.55, "learning_rate": 6.630060381062464e-07, "loss": 0.0019, "step": 14560 }, { "epoch": 3.55, "learning_rate": 6.61593875569373e-07, "loss": 0.0044, "step": 14562 }, { "epoch": 3.55, "learning_rate": 6.601831670937409e-07, "loss": 0.005, "step": 14564 }, { "epoch": 3.55, "learning_rate": 6.587739128990056e-07, "loss": 0.0041, "step": 14566 }, { "epoch": 3.55, "learning_rate": 6.573661132046016e-07, "loss": 0.0021, "step": 14568 }, { "epoch": 3.55, "learning_rate": 6.559597682297337e-07, "loss": 0.0018, "step": 14570 }, { "epoch": 3.55, "learning_rate": 6.545548781933819e-07, "loss": 0.0034, "step": 14572 }, { "epoch": 3.55, "learning_rate": 6.53151443314296e-07, "loss": 0.0037, "step": 14574 }, { "epoch": 3.55, "learning_rate": 6.517494638110033e-07, "loss": 0.0028, "step": 14576 }, { "epoch": 3.55, "learning_rate": 6.503489399018004e-07, "loss": 0.0035, "step": 14578 }, { "epoch": 3.55, "learning_rate": 6.489498718047626e-07, "loss": 0.0038, "step": 14580 }, { "epoch": 3.55, "learning_rate": 6.475522597377326e-07, "loss": 0.002, "step": 14582 }, { "epoch": 3.55, "learning_rate": 6.461561039183306e-07, "loss": 0.0037, "step": 14584 }, { "epoch": 3.55, "learning_rate": 6.447614045639494e-07, "loss": 0.0023, "step": 14586 }, { "epoch": 3.55, "learning_rate": 6.433681618917542e-07, "loss": 0.003, "step": 14588 }, { "epoch": 3.56, "learning_rate": 6.419763761186826e-07, "loss": 0.001, "step": 14590 }, { "epoch": 3.56, "learning_rate": 6.405860474614478e-07, "loss": 0.0041, "step": 14592 }, { "epoch": 3.56, "learning_rate": 6.391971761365323e-07, "loss": 0.0035, "step": 14594 }, { "epoch": 3.56, "learning_rate": 6.378097623601964e-07, "loss": 0.0013, "step": 14596 }, { "epoch": 3.56, "learning_rate": 6.364238063484684e-07, "loss": 0.0037, "step": 14598 }, { "epoch": 3.56, "learning_rate": 6.350393083171535e-07, "loss": 0.0043, "step": 14600 }, { "epoch": 3.56, "learning_rate": 6.336562684818292e-07, "loss": 0.0039, "step": 14602 }, { "epoch": 3.56, "learning_rate": 6.322746870578477e-07, "loss": 0.007, "step": 14604 }, { "epoch": 3.56, "learning_rate": 6.308945642603281e-07, "loss": 0.0037, "step": 14606 }, { "epoch": 3.56, "learning_rate": 6.295159003041651e-07, "loss": 0.0021, "step": 14608 }, { "epoch": 3.56, "learning_rate": 6.281386954040303e-07, "loss": 0.0013, "step": 14610 }, { "epoch": 3.56, "learning_rate": 6.267629497743643e-07, "loss": 0.0011, "step": 14612 }, { "epoch": 3.56, "learning_rate": 6.253886636293805e-07, "loss": 0.0014, "step": 14614 }, { "epoch": 3.56, "learning_rate": 6.240158371830662e-07, "loss": 0.0032, "step": 14616 }, { "epoch": 3.56, "learning_rate": 6.226444706491819e-07, "loss": 0.0019, "step": 14618 }, { "epoch": 3.56, "learning_rate": 6.212745642412587e-07, "loss": 0.0017, "step": 14620 }, { "epoch": 3.56, "learning_rate": 6.199061181726007e-07, "loss": 0.0027, "step": 14622 }, { "epoch": 3.56, "learning_rate": 6.185391326562862e-07, "loss": 0.0028, "step": 14624 }, { "epoch": 3.56, "learning_rate": 6.171736079051661e-07, "loss": 0.0054, "step": 14626 }, { "epoch": 3.56, "learning_rate": 6.158095441318634e-07, "loss": 0.0014, "step": 14628 }, { "epoch": 3.56, "learning_rate": 6.144469415487709e-07, "loss": 0.0021, "step": 14630 }, { "epoch": 3.57, "learning_rate": 6.130858003680574e-07, "loss": 0.0024, "step": 14632 }, { "epoch": 3.57, "learning_rate": 6.117261208016645e-07, "loss": 0.0023, "step": 14634 }, { "epoch": 3.57, "learning_rate": 6.103679030613042e-07, "loss": 0.0007, "step": 14636 }, { "epoch": 3.57, "learning_rate": 6.090111473584581e-07, "loss": 0.0028, "step": 14638 }, { "epoch": 3.57, "learning_rate": 6.076558539043875e-07, "loss": 0.002, "step": 14640 }, { "epoch": 3.57, "learning_rate": 6.063020229101191e-07, "loss": 0.0008, "step": 14642 }, { "epoch": 3.57, "learning_rate": 6.049496545864586e-07, "loss": 0.004, "step": 14644 }, { "epoch": 3.57, "learning_rate": 6.035987491439754e-07, "loss": 0.0012, "step": 14646 }, { "epoch": 3.57, "learning_rate": 6.022493067930191e-07, "loss": 0.0014, "step": 14648 }, { "epoch": 3.57, "learning_rate": 6.009013277437059e-07, "loss": 0.0026, "step": 14650 }, { "epoch": 3.57, "learning_rate": 5.995548122059292e-07, "loss": 0.0056, "step": 14652 }, { "epoch": 3.57, "learning_rate": 5.982097603893488e-07, "loss": 0.0016, "step": 14654 }, { "epoch": 3.57, "learning_rate": 5.968661725034008e-07, "loss": 0.0033, "step": 14656 }, { "epoch": 3.57, "learning_rate": 5.955240487572922e-07, "loss": 0.0039, "step": 14658 }, { "epoch": 3.57, "learning_rate": 5.941833893600036e-07, "loss": 0.0022, "step": 14660 }, { "epoch": 3.57, "learning_rate": 5.928441945202846e-07, "loss": 0.0012, "step": 14662 }, { "epoch": 3.57, "learning_rate": 5.915064644466562e-07, "loss": 0.0006, "step": 14664 }, { "epoch": 3.57, "learning_rate": 5.90170199347414e-07, "loss": 0.0008, "step": 14666 }, { "epoch": 3.57, "learning_rate": 5.888353994306273e-07, "loss": 0.0003, "step": 14668 }, { "epoch": 3.57, "learning_rate": 5.875020649041318e-07, "loss": 0.0054, "step": 14670 }, { "epoch": 3.58, "learning_rate": 5.861701959755384e-07, "loss": 0.0012, "step": 14672 }, { "epoch": 3.58, "learning_rate": 5.848397928522309e-07, "loss": 0.0017, "step": 14674 }, { "epoch": 3.58, "learning_rate": 5.835108557413627e-07, "loss": 0.0016, "step": 14676 }, { "epoch": 3.58, "learning_rate": 5.82183384849857e-07, "loss": 0.0048, "step": 14678 }, { "epoch": 3.58, "learning_rate": 5.808573803844131e-07, "loss": 0.0038, "step": 14680 }, { "epoch": 3.58, "learning_rate": 5.795328425515001e-07, "loss": 0.0023, "step": 14682 }, { "epoch": 3.58, "learning_rate": 5.782097715573609e-07, "loss": 0.0016, "step": 14684 }, { "epoch": 3.58, "learning_rate": 5.768881676080029e-07, "loss": 0.0017, "step": 14686 }, { "epoch": 3.58, "learning_rate": 5.755680309092127e-07, "loss": 0.0025, "step": 14688 }, { "epoch": 3.58, "learning_rate": 5.742493616665468e-07, "loss": 0.0022, "step": 14690 }, { "epoch": 3.58, "learning_rate": 5.729321600853311e-07, "loss": 0.0026, "step": 14692 }, { "epoch": 3.58, "learning_rate": 5.716164263706614e-07, "loss": 0.0028, "step": 14694 }, { "epoch": 3.58, "learning_rate": 5.703021607274095e-07, "loss": 0.0052, "step": 14696 }, { "epoch": 3.58, "learning_rate": 5.689893633602173e-07, "loss": 0.0017, "step": 14698 }, { "epoch": 3.58, "learning_rate": 5.676780344734989e-07, "loss": 0.0023, "step": 14700 }, { "epoch": 3.58, "learning_rate": 5.663681742714344e-07, "loss": 0.0036, "step": 14702 }, { "epoch": 3.58, "learning_rate": 5.650597829579818e-07, "loss": 0.0007, "step": 14704 }, { "epoch": 3.58, "learning_rate": 5.637528607368658e-07, "loss": 0.0029, "step": 14706 }, { "epoch": 3.58, "learning_rate": 5.62447407811586e-07, "loss": 0.0027, "step": 14708 }, { "epoch": 3.58, "learning_rate": 5.611434243854097e-07, "loss": 0.0046, "step": 14710 }, { "epoch": 3.58, "learning_rate": 5.598409106613778e-07, "loss": 0.0028, "step": 14712 }, { "epoch": 3.59, "learning_rate": 5.585398668423014e-07, "loss": 0.0013, "step": 14714 }, { "epoch": 3.59, "learning_rate": 5.572402931307641e-07, "loss": 0.0014, "step": 14716 }, { "epoch": 3.59, "learning_rate": 5.559421897291195e-07, "loss": 0.0041, "step": 14718 }, { "epoch": 3.59, "learning_rate": 5.546455568394904e-07, "loss": 0.0028, "step": 14720 }, { "epoch": 3.59, "learning_rate": 5.53350394663772e-07, "loss": 0.0047, "step": 14722 }, { "epoch": 3.59, "learning_rate": 5.520567034036351e-07, "loss": 0.0031, "step": 14724 }, { "epoch": 3.59, "learning_rate": 5.50764483260513e-07, "loss": 0.0019, "step": 14726 }, { "epoch": 3.59, "learning_rate": 5.49473734435615e-07, "loss": 0.0032, "step": 14728 }, { "epoch": 3.59, "learning_rate": 5.481844571299222e-07, "loss": 0.0024, "step": 14730 }, { "epoch": 3.59, "learning_rate": 5.468966515441854e-07, "loss": 0.0008, "step": 14732 }, { "epoch": 3.59, "learning_rate": 5.456103178789252e-07, "loss": 0.0016, "step": 14734 }, { "epoch": 3.59, "learning_rate": 5.443254563344302e-07, "loss": 0.0039, "step": 14736 }, { "epoch": 3.59, "learning_rate": 5.430420671107672e-07, "loss": 0.0009, "step": 14738 }, { "epoch": 3.59, "learning_rate": 5.417601504077686e-07, "loss": 0.0018, "step": 14740 }, { "epoch": 3.59, "learning_rate": 5.404797064250378e-07, "loss": 0.0034, "step": 14742 }, { "epoch": 3.59, "learning_rate": 5.3920073536195e-07, "loss": 0.0044, "step": 14744 }, { "epoch": 3.59, "learning_rate": 5.379232374176524e-07, "loss": 0.0013, "step": 14746 }, { "epoch": 3.59, "learning_rate": 5.366472127910605e-07, "loss": 0.0047, "step": 14748 }, { "epoch": 3.59, "learning_rate": 5.353726616808596e-07, "loss": 0.0012, "step": 14750 }, { "epoch": 3.59, "learning_rate": 5.340995842855068e-07, "loss": 0.0016, "step": 14752 }, { "epoch": 3.6, "learning_rate": 5.328279808032322e-07, "loss": 0.0004, "step": 14754 }, { "epoch": 3.6, "learning_rate": 5.31557851432033e-07, "loss": 0.0044, "step": 14756 }, { "epoch": 3.6, "learning_rate": 5.302891963696788e-07, "loss": 0.0037, "step": 14758 }, { "epoch": 3.6, "learning_rate": 5.290220158137083e-07, "loss": 0.0042, "step": 14760 }, { "epoch": 3.6, "learning_rate": 5.277563099614302e-07, "loss": 0.0047, "step": 14762 }, { "epoch": 3.6, "learning_rate": 5.26492079009927e-07, "loss": 0.0037, "step": 14764 }, { "epoch": 3.6, "learning_rate": 5.252293231560468e-07, "loss": 0.0014, "step": 14766 }, { "epoch": 3.6, "learning_rate": 5.2396804259641e-07, "loss": 0.0009, "step": 14768 }, { "epoch": 3.6, "learning_rate": 5.227082375274095e-07, "loss": 0.0034, "step": 14770 }, { "epoch": 3.6, "learning_rate": 5.214499081452084e-07, "loss": 0.0015, "step": 14772 }, { "epoch": 3.6, "learning_rate": 5.201930546457345e-07, "loss": 0.0015, "step": 14774 }, { "epoch": 3.6, "learning_rate": 5.189376772246901e-07, "loss": 0.005, "step": 14776 }, { "epoch": 3.6, "learning_rate": 5.176837760775466e-07, "loss": 0.002, "step": 14778 }, { "epoch": 3.6, "learning_rate": 5.1643135139955e-07, "loss": 0.0024, "step": 14780 }, { "epoch": 3.6, "learning_rate": 5.151804033857077e-07, "loss": 0.002, "step": 14782 }, { "epoch": 3.6, "learning_rate": 5.139309322308029e-07, "loss": 0.0014, "step": 14784 }, { "epoch": 3.6, "learning_rate": 5.126829381293896e-07, "loss": 0.0009, "step": 14786 }, { "epoch": 3.6, "learning_rate": 5.114364212757894e-07, "loss": 0.0017, "step": 14788 }, { "epoch": 3.6, "learning_rate": 5.101913818640958e-07, "loss": 0.0018, "step": 14790 }, { "epoch": 3.6, "learning_rate": 5.089478200881659e-07, "loss": 0.0016, "step": 14792 }, { "epoch": 3.6, "learning_rate": 5.077057361416371e-07, "loss": 0.0064, "step": 14794 }, { "epoch": 3.61, "learning_rate": 5.064651302179091e-07, "loss": 0.0027, "step": 14796 }, { "epoch": 3.61, "learning_rate": 5.052260025101541e-07, "loss": 0.0016, "step": 14798 }, { "epoch": 3.61, "learning_rate": 5.039883532113132e-07, "loss": 0.0019, "step": 14800 }, { "epoch": 3.61, "learning_rate": 5.027521825140991e-07, "loss": 0.0047, "step": 14802 }, { "epoch": 3.61, "learning_rate": 5.015174906109932e-07, "loss": 0.0015, "step": 14804 }, { "epoch": 3.61, "learning_rate": 5.00284277694244e-07, "loss": 0.0022, "step": 14806 }, { "epoch": 3.61, "learning_rate": 4.990525439558735e-07, "loss": 0.0046, "step": 14808 }, { "epoch": 3.61, "learning_rate": 4.978222895876727e-07, "loss": 0.003, "step": 14810 }, { "epoch": 3.61, "learning_rate": 4.965935147812028e-07, "loss": 0.0026, "step": 14812 }, { "epoch": 3.61, "learning_rate": 4.953662197277898e-07, "loss": 0.0046, "step": 14814 }, { "epoch": 3.61, "learning_rate": 4.941404046185372e-07, "loss": 0.0032, "step": 14816 }, { "epoch": 3.61, "learning_rate": 4.929160696443103e-07, "loss": 0.0037, "step": 14818 }, { "epoch": 3.61, "learning_rate": 4.916932149957488e-07, "loss": 0.003, "step": 14820 }, { "epoch": 3.61, "learning_rate": 4.904718408632602e-07, "loss": 0.0062, "step": 14822 }, { "epoch": 3.61, "learning_rate": 4.892519474370217e-07, "loss": 0.002, "step": 14824 }, { "epoch": 3.61, "learning_rate": 4.880335349069809e-07, "loss": 0.0017, "step": 14826 }, { "epoch": 3.61, "learning_rate": 4.868166034628541e-07, "loss": 0.0033, "step": 14828 }, { "epoch": 3.61, "learning_rate": 4.856011532941252e-07, "loss": 0.0009, "step": 14830 }, { "epoch": 3.61, "learning_rate": 4.843871845900505e-07, "loss": 0.0022, "step": 14832 }, { "epoch": 3.61, "learning_rate": 4.831746975396534e-07, "loss": 0.0019, "step": 14834 }, { "epoch": 3.62, "learning_rate": 4.819636923317284e-07, "loss": 0.0029, "step": 14836 }, { "epoch": 3.62, "learning_rate": 4.807541691548368e-07, "loss": 0.0045, "step": 14838 }, { "epoch": 3.62, "learning_rate": 4.795461281973113e-07, "loss": 0.0037, "step": 14840 }, { "epoch": 3.62, "learning_rate": 4.783395696472526e-07, "loss": 0.0047, "step": 14842 }, { "epoch": 3.62, "learning_rate": 4.771344936925337e-07, "loss": 0.0018, "step": 14844 }, { "epoch": 3.62, "learning_rate": 4.7593090052079237e-07, "loss": 0.0023, "step": 14846 }, { "epoch": 3.62, "learning_rate": 4.747287903194353e-07, "loss": 0.0046, "step": 14848 }, { "epoch": 3.62, "learning_rate": 4.735281632756439e-07, "loss": 0.003, "step": 14850 }, { "epoch": 3.62, "learning_rate": 4.723290195763608e-07, "loss": 0.0017, "step": 14852 }, { "epoch": 3.62, "learning_rate": 4.7113135940830447e-07, "loss": 0.0025, "step": 14854 }, { "epoch": 3.62, "learning_rate": 4.6993518295796015e-07, "loss": 0.0022, "step": 14856 }, { "epoch": 3.62, "learning_rate": 4.6874049041158107e-07, "loss": 0.0011, "step": 14858 }, { "epoch": 3.62, "learning_rate": 4.675472819551907e-07, "loss": 0.0024, "step": 14860 }, { "epoch": 3.62, "learning_rate": 4.663555577745782e-07, "loss": 0.0028, "step": 14862 }, { "epoch": 3.62, "learning_rate": 4.6516531805530615e-07, "loss": 0.0019, "step": 14864 }, { "epoch": 3.62, "learning_rate": 4.639765629827042e-07, "loss": 0.0001, "step": 14866 }, { "epoch": 3.62, "learning_rate": 4.627892927418698e-07, "loss": 0.002, "step": 14868 }, { "epoch": 3.62, "learning_rate": 4.6160350751766945e-07, "loss": 0.0009, "step": 14870 }, { "epoch": 3.62, "learning_rate": 4.604192074947411e-07, "loss": 0.0038, "step": 14872 }, { "epoch": 3.62, "learning_rate": 4.592363928574883e-07, "loss": 0.003, "step": 14874 }, { "epoch": 3.62, "learning_rate": 4.580550637900827e-07, "loss": 0.0014, "step": 14876 }, { "epoch": 3.63, "learning_rate": 4.5687522047646813e-07, "loss": 0.0021, "step": 14878 }, { "epoch": 3.63, "learning_rate": 4.5569686310035444e-07, "loss": 0.0025, "step": 14880 }, { "epoch": 3.63, "learning_rate": 4.5451999184522145e-07, "loss": 0.0026, "step": 14882 }, { "epoch": 3.63, "learning_rate": 4.533446068943159e-07, "loss": 0.0007, "step": 14884 }, { "epoch": 3.63, "learning_rate": 4.5217070843065593e-07, "loss": 0.0011, "step": 14886 }, { "epoch": 3.63, "learning_rate": 4.509982966370252e-07, "loss": 0.0023, "step": 14888 }, { "epoch": 3.63, "learning_rate": 4.498273716959789e-07, "loss": 0.0034, "step": 14890 }, { "epoch": 3.63, "learning_rate": 4.486579337898356e-07, "loss": 0.0016, "step": 14892 }, { "epoch": 3.63, "learning_rate": 4.474899831006885e-07, "loss": 0.0015, "step": 14894 }, { "epoch": 3.63, "learning_rate": 4.4632351981039543e-07, "loss": 0.002, "step": 14896 }, { "epoch": 3.63, "learning_rate": 4.451585441005857e-07, "loss": 0.0034, "step": 14898 }, { "epoch": 3.63, "learning_rate": 4.439950561526507e-07, "loss": 0.0018, "step": 14900 }, { "epoch": 3.63, "learning_rate": 4.4283305614775894e-07, "loss": 0.0034, "step": 14902 }, { "epoch": 3.63, "learning_rate": 4.41672544266839e-07, "loss": 0.0057, "step": 14904 }, { "epoch": 3.63, "learning_rate": 4.4051352069059526e-07, "loss": 0.0016, "step": 14906 }, { "epoch": 3.63, "learning_rate": 4.393559855994922e-07, "loss": 0.0027, "step": 14908 }, { "epoch": 3.63, "learning_rate": 4.381999391737701e-07, "loss": 0.0024, "step": 14910 }, { "epoch": 3.63, "learning_rate": 4.370453815934328e-07, "loss": 0.0026, "step": 14912 }, { "epoch": 3.63, "learning_rate": 4.358923130382553e-07, "loss": 0.0022, "step": 14914 }, { "epoch": 3.63, "learning_rate": 4.3474073368777736e-07, "loss": 0.0045, "step": 14916 }, { "epoch": 3.63, "learning_rate": 4.3359064372130886e-07, "loss": 0.0027, "step": 14918 }, { "epoch": 3.64, "learning_rate": 4.324420433179288e-07, "loss": 0.0021, "step": 14920 }, { "epoch": 3.64, "learning_rate": 4.312949326564819e-07, "loss": 0.0023, "step": 14922 }, { "epoch": 3.64, "learning_rate": 4.3014931191558307e-07, "loss": 0.0034, "step": 14924 }, { "epoch": 3.64, "learning_rate": 4.290051812736129e-07, "loss": 0.0016, "step": 14926 }, { "epoch": 3.64, "learning_rate": 4.278625409087234e-07, "loss": 0.004, "step": 14928 }, { "epoch": 3.64, "learning_rate": 4.2672139099882995e-07, "loss": 0.004, "step": 14930 }, { "epoch": 3.64, "learning_rate": 4.255817317216204e-07, "loss": 0.0015, "step": 14932 }, { "epoch": 3.64, "learning_rate": 4.244435632545463e-07, "loss": 0.0019, "step": 14934 }, { "epoch": 3.64, "learning_rate": 4.2330688577483014e-07, "loss": 0.0006, "step": 14936 }, { "epoch": 3.64, "learning_rate": 4.221716994594627e-07, "loss": 0.0014, "step": 14938 }, { "epoch": 3.64, "learning_rate": 4.2103800448519914e-07, "loss": 0.0019, "step": 14940 }, { "epoch": 3.64, "learning_rate": 4.1990580102856504e-07, "loss": 0.0014, "step": 14942 }, { "epoch": 3.64, "learning_rate": 4.1877508926585486e-07, "loss": 0.0012, "step": 14944 }, { "epoch": 3.64, "learning_rate": 4.176458693731278e-07, "loss": 0.0028, "step": 14946 }, { "epoch": 3.64, "learning_rate": 4.1651814152620985e-07, "loss": 0.001, "step": 14948 }, { "epoch": 3.64, "learning_rate": 4.1539190590069946e-07, "loss": 0.0011, "step": 14950 }, { "epoch": 3.64, "learning_rate": 4.1426716267195853e-07, "loss": 0.002, "step": 14952 }, { "epoch": 3.64, "learning_rate": 4.131439120151215e-07, "loss": 0.0077, "step": 14954 }, { "epoch": 3.64, "learning_rate": 4.1202215410508284e-07, "loss": 0.0021, "step": 14956 }, { "epoch": 3.64, "learning_rate": 4.1090188911651174e-07, "loss": 0.0025, "step": 14958 }, { "epoch": 3.65, "learning_rate": 4.0978311722383977e-07, "loss": 0.0058, "step": 14960 }, { "epoch": 3.65, "learning_rate": 4.0866583860127095e-07, "loss": 0.003, "step": 14962 }, { "epoch": 3.65, "learning_rate": 4.0755005342277167e-07, "loss": 0.001, "step": 14964 }, { "epoch": 3.65, "learning_rate": 4.0643576186207954e-07, "loss": 0.0015, "step": 14966 }, { "epoch": 3.65, "learning_rate": 4.05322964092697e-07, "loss": 0.0005, "step": 14968 }, { "epoch": 3.65, "learning_rate": 4.042116602878976e-07, "loss": 0.0013, "step": 14970 }, { "epoch": 3.65, "learning_rate": 4.031018506207185e-07, "loss": 0.0034, "step": 14972 }, { "epoch": 3.65, "learning_rate": 4.0199353526396477e-07, "loss": 0.0017, "step": 14974 }, { "epoch": 3.65, "learning_rate": 4.0088671439020953e-07, "loss": 0.0035, "step": 14976 }, { "epoch": 3.65, "learning_rate": 3.997813881717949e-07, "loss": 0.004, "step": 14978 }, { "epoch": 3.65, "learning_rate": 3.986775567808265e-07, "loss": 0.0045, "step": 14980 }, { "epoch": 3.65, "learning_rate": 3.9757522038918137e-07, "loss": 0.0031, "step": 14982 }, { "epoch": 3.65, "learning_rate": 3.9647437916849995e-07, "loss": 0.002, "step": 14984 }, { "epoch": 3.65, "learning_rate": 3.95375033290194e-07, "loss": 0.0018, "step": 14986 }, { "epoch": 3.65, "learning_rate": 3.942771829254388e-07, "loss": 0.0041, "step": 14988 }, { "epoch": 3.65, "learning_rate": 3.9318082824517656e-07, "loss": 0.0027, "step": 14990 }, { "epoch": 3.65, "learning_rate": 3.9208596942011956e-07, "loss": 0.0009, "step": 14992 }, { "epoch": 3.65, "learning_rate": 3.909926066207459e-07, "loss": 0.0049, "step": 14994 }, { "epoch": 3.65, "learning_rate": 3.899007400172994e-07, "loss": 0.0017, "step": 14996 }, { "epoch": 3.65, "learning_rate": 3.888103697797929e-07, "loss": 0.0012, "step": 14998 }, { "epoch": 3.65, "learning_rate": 3.8772149607800624e-07, "loss": 0.0025, "step": 15000 }, { "epoch": 3.66, "learning_rate": 3.8663411908148375e-07, "loss": 0.0029, "step": 15002 }, { "epoch": 3.66, "learning_rate": 3.8554823895953885e-07, "loss": 0.0037, "step": 15004 }, { "epoch": 3.66, "learning_rate": 3.844638558812508e-07, "loss": 0.0008, "step": 15006 }, { "epoch": 3.66, "learning_rate": 3.8338097001546783e-07, "loss": 0.0028, "step": 15008 }, { "epoch": 3.66, "learning_rate": 3.822995815308028e-07, "loss": 0.0008, "step": 15010 }, { "epoch": 3.66, "learning_rate": 3.812196905956356e-07, "loss": 0.0014, "step": 15012 }, { "epoch": 3.66, "learning_rate": 3.80141297378116e-07, "loss": 0.0027, "step": 15014 }, { "epoch": 3.66, "learning_rate": 3.7906440204615423e-07, "loss": 0.0056, "step": 15016 }, { "epoch": 3.66, "learning_rate": 3.779890047674339e-07, "loss": 0.0049, "step": 15018 }, { "epoch": 3.66, "learning_rate": 3.7691510570940115e-07, "loss": 0.0026, "step": 15020 }, { "epoch": 3.66, "learning_rate": 3.758427050392699e-07, "loss": 0.0036, "step": 15022 }, { "epoch": 3.66, "learning_rate": 3.7477180292402214e-07, "loss": 0.0004, "step": 15024 }, { "epoch": 3.66, "learning_rate": 3.737023995304079e-07, "loss": 0.0018, "step": 15026 }, { "epoch": 3.66, "learning_rate": 3.726344950249372e-07, "loss": 0.0021, "step": 15028 }, { "epoch": 3.66, "learning_rate": 3.7156808957389266e-07, "loss": 0.0034, "step": 15030 }, { "epoch": 3.66, "learning_rate": 3.7050318334332145e-07, "loss": 0.0025, "step": 15032 }, { "epoch": 3.66, "learning_rate": 3.694397764990398e-07, "loss": 0.0017, "step": 15034 }, { "epoch": 3.66, "learning_rate": 3.6837786920662534e-07, "loss": 0.0035, "step": 15036 }, { "epoch": 3.66, "learning_rate": 3.6731746163142567e-07, "loss": 0.0026, "step": 15038 }, { "epoch": 3.66, "learning_rate": 3.6625855393855437e-07, "loss": 0.0004, "step": 15040 }, { "epoch": 3.67, "learning_rate": 3.6520114629289504e-07, "loss": 0.0046, "step": 15042 }, { "epoch": 3.67, "learning_rate": 3.641452388590894e-07, "loss": 0.0025, "step": 15044 }, { "epoch": 3.67, "learning_rate": 3.6309083180155247e-07, "loss": 0.0025, "step": 15046 }, { "epoch": 3.67, "learning_rate": 3.620379252844619e-07, "loss": 0.0019, "step": 15048 }, { "epoch": 3.67, "learning_rate": 3.6098651947176657e-07, "loss": 0.0007, "step": 15050 }, { "epoch": 3.67, "learning_rate": 3.5993661452717433e-07, "loss": 0.0011, "step": 15052 }, { "epoch": 3.67, "learning_rate": 3.5888821061416556e-07, "loss": 0.0026, "step": 15054 }, { "epoch": 3.67, "learning_rate": 3.578413078959864e-07, "loss": 0.0027, "step": 15056 }, { "epoch": 3.67, "learning_rate": 3.567959065356452e-07, "loss": 0.0018, "step": 15058 }, { "epoch": 3.67, "learning_rate": 3.557520066959186e-07, "loss": 0.0034, "step": 15060 }, { "epoch": 3.67, "learning_rate": 3.5470960853935086e-07, "loss": 0.001, "step": 15062 }, { "epoch": 3.67, "learning_rate": 3.536687122282512e-07, "loss": 0.0024, "step": 15064 }, { "epoch": 3.67, "learning_rate": 3.5262931792469646e-07, "loss": 0.0022, "step": 15066 }, { "epoch": 3.67, "learning_rate": 3.515914257905262e-07, "loss": 0.0023, "step": 15068 }, { "epoch": 3.67, "learning_rate": 3.5055503598734996e-07, "loss": 0.0026, "step": 15070 }, { "epoch": 3.67, "learning_rate": 3.495201486765387e-07, "loss": 0.004, "step": 15072 }, { "epoch": 3.67, "learning_rate": 3.484867640192358e-07, "loss": 0.0018, "step": 15074 }, { "epoch": 3.67, "learning_rate": 3.474548821763446e-07, "loss": 0.0038, "step": 15076 }, { "epoch": 3.67, "learning_rate": 3.464245033085367e-07, "loss": 0.003, "step": 15078 }, { "epoch": 3.67, "learning_rate": 3.453956275762527e-07, "loss": 0.0051, "step": 15080 }, { "epoch": 3.67, "learning_rate": 3.443682551396954e-07, "loss": 0.0029, "step": 15082 }, { "epoch": 3.68, "learning_rate": 3.433423861588325e-07, "loss": 0.0012, "step": 15084 }, { "epoch": 3.68, "learning_rate": 3.423180207934029e-07, "loss": 0.0013, "step": 15086 }, { "epoch": 3.68, "learning_rate": 3.4129515920290455e-07, "loss": 0.0026, "step": 15088 }, { "epoch": 3.68, "learning_rate": 3.4027380154660893e-07, "loss": 0.002, "step": 15090 }, { "epoch": 3.68, "learning_rate": 3.3925394798354437e-07, "loss": 0.0028, "step": 15092 }, { "epoch": 3.68, "learning_rate": 3.382355986725139e-07, "loss": 0.0043, "step": 15094 }, { "epoch": 3.68, "learning_rate": 3.3721875377208056e-07, "loss": 0.003, "step": 15096 }, { "epoch": 3.68, "learning_rate": 3.362034134405756e-07, "loss": 0.0016, "step": 15098 }, { "epoch": 3.68, "learning_rate": 3.3518957783609476e-07, "loss": 0.0019, "step": 15100 }, { "epoch": 3.68, "learning_rate": 3.3417724711649944e-07, "loss": 0.0016, "step": 15102 }, { "epoch": 3.68, "learning_rate": 3.3316642143941814e-07, "loss": 0.0032, "step": 15104 }, { "epoch": 3.68, "learning_rate": 3.3215710096224483e-07, "loss": 0.0046, "step": 15106 }, { "epoch": 3.68, "learning_rate": 3.3114928584213614e-07, "loss": 0.0008, "step": 15108 }, { "epoch": 3.68, "learning_rate": 3.3014297623601865e-07, "loss": 0.0005, "step": 15110 }, { "epoch": 3.68, "learning_rate": 3.2913817230058265e-07, "loss": 0.0021, "step": 15112 }, { "epoch": 3.68, "learning_rate": 3.2813487419228295e-07, "loss": 0.0018, "step": 15114 }, { "epoch": 3.68, "learning_rate": 3.2713308206733907e-07, "loss": 0.0031, "step": 15116 }, { "epoch": 3.68, "learning_rate": 3.2613279608173953e-07, "loss": 0.0012, "step": 15118 }, { "epoch": 3.68, "learning_rate": 3.2513401639123643e-07, "loss": 0.0028, "step": 15120 }, { "epoch": 3.68, "learning_rate": 3.241367431513487e-07, "loss": 0.0021, "step": 15122 }, { "epoch": 3.69, "learning_rate": 3.2314097651735657e-07, "loss": 0.0026, "step": 15124 }, { "epoch": 3.69, "learning_rate": 3.221467166443115e-07, "loss": 0.0011, "step": 15126 }, { "epoch": 3.69, "learning_rate": 3.211539636870242e-07, "loss": 0.0025, "step": 15128 }, { "epoch": 3.69, "learning_rate": 3.2016271780007766e-07, "loss": 0.0038, "step": 15130 }, { "epoch": 3.69, "learning_rate": 3.1917297913781176e-07, "loss": 0.0001, "step": 15132 }, { "epoch": 3.69, "learning_rate": 3.1818474785433985e-07, "loss": 0.0046, "step": 15134 }, { "epoch": 3.69, "learning_rate": 3.171980241035355e-07, "loss": 0.0042, "step": 15136 }, { "epoch": 3.69, "learning_rate": 3.162128080390414e-07, "loss": 0.0014, "step": 15138 }, { "epoch": 3.69, "learning_rate": 3.152290998142604e-07, "loss": 0.0024, "step": 15140 }, { "epoch": 3.69, "learning_rate": 3.1424689958236556e-07, "loss": 0.0024, "step": 15142 }, { "epoch": 3.69, "learning_rate": 3.132662074962911e-07, "loss": 0.0007, "step": 15144 }, { "epoch": 3.69, "learning_rate": 3.122870237087405e-07, "loss": 0.0037, "step": 15146 }, { "epoch": 3.69, "learning_rate": 3.113093483721774e-07, "loss": 0.0016, "step": 15148 }, { "epoch": 3.69, "learning_rate": 3.1033318163883553e-07, "loss": 0.0036, "step": 15150 }, { "epoch": 3.69, "learning_rate": 3.0935852366070995e-07, "loss": 0.0018, "step": 15152 }, { "epoch": 3.69, "learning_rate": 3.0838537458956487e-07, "loss": 0.0032, "step": 15154 }, { "epoch": 3.69, "learning_rate": 3.074137345769257e-07, "loss": 0.004, "step": 15156 }, { "epoch": 3.69, "learning_rate": 3.064436037740814e-07, "loss": 0.0039, "step": 15158 }, { "epoch": 3.69, "learning_rate": 3.054749823320924e-07, "loss": 0.0009, "step": 15160 }, { "epoch": 3.69, "learning_rate": 3.045078704017801e-07, "loss": 0.0027, "step": 15162 }, { "epoch": 3.69, "learning_rate": 3.0354226813372967e-07, "loss": 0.0024, "step": 15164 }, { "epoch": 3.7, "learning_rate": 3.025781756782931e-07, "loss": 0.0009, "step": 15166 }, { "epoch": 3.7, "learning_rate": 3.0161559318558796e-07, "loss": 0.0025, "step": 15168 }, { "epoch": 3.7, "learning_rate": 3.0065452080549564e-07, "loss": 0.0012, "step": 15170 }, { "epoch": 3.7, "learning_rate": 2.996949586876607e-07, "loss": 0.0029, "step": 15172 }, { "epoch": 3.7, "learning_rate": 2.98736906981496e-07, "loss": 0.0028, "step": 15174 }, { "epoch": 3.7, "learning_rate": 2.9778036583617664e-07, "loss": 0.004, "step": 15176 }, { "epoch": 3.7, "learning_rate": 2.9682533540064564e-07, "loss": 0.0019, "step": 15178 }, { "epoch": 3.7, "learning_rate": 2.958718158236051e-07, "loss": 0.0023, "step": 15180 }, { "epoch": 3.7, "learning_rate": 2.9491980725352753e-07, "loss": 0.0034, "step": 15182 }, { "epoch": 3.7, "learning_rate": 2.939693098386465e-07, "loss": 0.0015, "step": 15184 }, { "epoch": 3.7, "learning_rate": 2.9302032372696356e-07, "loss": 0.0026, "step": 15186 }, { "epoch": 3.7, "learning_rate": 2.920728490662417e-07, "loss": 0.0061, "step": 15188 }, { "epoch": 3.7, "learning_rate": 2.911268860040095e-07, "loss": 0.0012, "step": 15190 }, { "epoch": 3.7, "learning_rate": 2.901824346875626e-07, "loss": 0.0055, "step": 15192 }, { "epoch": 3.7, "learning_rate": 2.892394952639588e-07, "loss": 0.0004, "step": 15194 }, { "epoch": 3.7, "learning_rate": 2.8829806788001846e-07, "loss": 0.0033, "step": 15196 }, { "epoch": 3.7, "learning_rate": 2.8735815268233323e-07, "loss": 0.0029, "step": 15198 }, { "epoch": 3.7, "learning_rate": 2.864197498172516e-07, "loss": 0.0042, "step": 15200 }, { "epoch": 3.7, "learning_rate": 2.8548285943089226e-07, "loss": 0.0037, "step": 15202 }, { "epoch": 3.7, "learning_rate": 2.845474816691329e-07, "loss": 0.003, "step": 15204 }, { "epoch": 3.71, "learning_rate": 2.836136166776227e-07, "loss": 0.0044, "step": 15206 }, { "epoch": 3.71, "learning_rate": 2.826812646017696e-07, "loss": 0.0014, "step": 15208 }, { "epoch": 3.71, "learning_rate": 2.8175042558675094e-07, "loss": 0.004, "step": 15210 }, { "epoch": 3.71, "learning_rate": 2.808210997775018e-07, "loss": 0.0014, "step": 15212 }, { "epoch": 3.71, "learning_rate": 2.7989328731872543e-07, "loss": 0.002, "step": 15214 }, { "epoch": 3.71, "learning_rate": 2.7896698835489065e-07, "loss": 0.0053, "step": 15216 }, { "epoch": 3.71, "learning_rate": 2.78042203030231e-07, "loss": 0.0015, "step": 15218 }, { "epoch": 3.71, "learning_rate": 2.7711893148873904e-07, "loss": 0.0025, "step": 15220 }, { "epoch": 3.71, "learning_rate": 2.7619717387417645e-07, "loss": 0.0013, "step": 15222 }, { "epoch": 3.71, "learning_rate": 2.7527693033007063e-07, "loss": 0.0002, "step": 15224 }, { "epoch": 3.71, "learning_rate": 2.74358200999707e-07, "loss": 0.0009, "step": 15226 }, { "epoch": 3.71, "learning_rate": 2.7344098602614e-07, "loss": 0.0017, "step": 15228 }, { "epoch": 3.71, "learning_rate": 2.725252855521865e-07, "loss": 0.0033, "step": 15230 }, { "epoch": 3.71, "learning_rate": 2.716110997204291e-07, "loss": 0.005, "step": 15232 }, { "epoch": 3.71, "learning_rate": 2.706984286732139e-07, "loss": 0.003, "step": 15234 }, { "epoch": 3.71, "learning_rate": 2.697872725526496e-07, "loss": 0.003, "step": 15236 }, { "epoch": 3.71, "learning_rate": 2.6887763150060917e-07, "loss": 0.0028, "step": 15238 }, { "epoch": 3.71, "learning_rate": 2.679695056587339e-07, "loss": 0.0028, "step": 15240 }, { "epoch": 3.71, "learning_rate": 2.67062895168424e-07, "loss": 0.0025, "step": 15242 }, { "epoch": 3.71, "learning_rate": 2.661578001708442e-07, "loss": 0.0015, "step": 15244 }, { "epoch": 3.71, "learning_rate": 2.6525422080692644e-07, "loss": 0.0043, "step": 15246 }, { "epoch": 3.72, "learning_rate": 2.6435215721736575e-07, "loss": 0.0012, "step": 15248 }, { "epoch": 3.72, "learning_rate": 2.6345160954261874e-07, "loss": 0.0024, "step": 15250 }, { "epoch": 3.72, "learning_rate": 2.625525779229077e-07, "loss": 0.0019, "step": 15252 }, { "epoch": 3.72, "learning_rate": 2.616550624982206e-07, "loss": 0.0028, "step": 15254 }, { "epoch": 3.72, "learning_rate": 2.607590634083046e-07, "loss": 0.0037, "step": 15256 }, { "epoch": 3.72, "learning_rate": 2.5986458079267587e-07, "loss": 0.0015, "step": 15258 }, { "epoch": 3.72, "learning_rate": 2.5897161479061073e-07, "loss": 0.0037, "step": 15260 }, { "epoch": 3.72, "learning_rate": 2.5808016554115136e-07, "loss": 0.0013, "step": 15262 }, { "epoch": 3.72, "learning_rate": 2.571902331831033e-07, "loss": 0.0022, "step": 15264 }, { "epoch": 3.72, "learning_rate": 2.5630181785503583e-07, "loss": 0.0026, "step": 15266 }, { "epoch": 3.72, "learning_rate": 2.5541491969528264e-07, "loss": 0.0045, "step": 15268 }, { "epoch": 3.72, "learning_rate": 2.5452953884193996e-07, "loss": 0.0026, "step": 15270 }, { "epoch": 3.72, "learning_rate": 2.536456754328664e-07, "loss": 0.0007, "step": 15272 }, { "epoch": 3.72, "learning_rate": 2.527633296056908e-07, "loss": 0.0029, "step": 15274 }, { "epoch": 3.72, "learning_rate": 2.518825014977966e-07, "loss": 0.0011, "step": 15276 }, { "epoch": 3.72, "learning_rate": 2.5100319124633734e-07, "loss": 0.0035, "step": 15278 }, { "epoch": 3.72, "learning_rate": 2.501253989882302e-07, "loss": 0.0026, "step": 15280 }, { "epoch": 3.72, "learning_rate": 2.492491248601503e-07, "loss": 0.0031, "step": 15282 }, { "epoch": 3.72, "learning_rate": 2.4837436899854407e-07, "loss": 0.0021, "step": 15284 }, { "epoch": 3.72, "learning_rate": 2.4750113153961477e-07, "loss": 0.0016, "step": 15286 }, { "epoch": 3.73, "learning_rate": 2.466294126193325e-07, "loss": 0.0021, "step": 15288 }, { "epoch": 3.73, "learning_rate": 2.4575921237343316e-07, "loss": 0.0017, "step": 15290 }, { "epoch": 3.73, "learning_rate": 2.4489053093741055e-07, "loss": 0.0028, "step": 15292 }, { "epoch": 3.73, "learning_rate": 2.440233684465254e-07, "loss": 0.0027, "step": 15294 }, { "epoch": 3.73, "learning_rate": 2.4315772503580416e-07, "loss": 0.0015, "step": 15296 }, { "epoch": 3.73, "learning_rate": 2.422936008400323e-07, "loss": 0.0029, "step": 15298 }, { "epoch": 3.73, "learning_rate": 2.414309959937589e-07, "loss": 0.0027, "step": 15300 }, { "epoch": 3.73, "learning_rate": 2.4056991063130084e-07, "loss": 0.0048, "step": 15302 }, { "epoch": 3.73, "learning_rate": 2.397103448867344e-07, "loss": 0.0012, "step": 15304 }, { "epoch": 3.73, "learning_rate": 2.3885229889390126e-07, "loss": 0.0019, "step": 15306 }, { "epoch": 3.73, "learning_rate": 2.3799577278640463e-07, "loss": 0.0007, "step": 15308 }, { "epoch": 3.73, "learning_rate": 2.3714076669761333e-07, "loss": 0.0008, "step": 15310 }, { "epoch": 3.73, "learning_rate": 2.3628728076065754e-07, "loss": 0.0012, "step": 15312 }, { "epoch": 3.73, "learning_rate": 2.354353151084321e-07, "loss": 0.0033, "step": 15314 }, { "epoch": 3.73, "learning_rate": 2.345848698735942e-07, "loss": 0.0013, "step": 15316 }, { "epoch": 3.73, "learning_rate": 2.3373594518856458e-07, "loss": 0.0012, "step": 15318 }, { "epoch": 3.73, "learning_rate": 2.3288854118552639e-07, "loss": 0.0019, "step": 15320 }, { "epoch": 3.73, "learning_rate": 2.3204265799643077e-07, "loss": 0.0038, "step": 15322 }, { "epoch": 3.73, "learning_rate": 2.311982957529846e-07, "loss": 0.0029, "step": 15324 }, { "epoch": 3.73, "learning_rate": 2.3035545458666154e-07, "loss": 0.0051, "step": 15326 }, { "epoch": 3.73, "learning_rate": 2.2951413462869886e-07, "loss": 0.0038, "step": 15328 }, { "epoch": 3.74, "learning_rate": 2.286743360100985e-07, "loss": 0.0027, "step": 15330 }, { "epoch": 3.74, "learning_rate": 2.2783605886162018e-07, "loss": 0.0033, "step": 15332 }, { "epoch": 3.74, "learning_rate": 2.2699930331379182e-07, "loss": 0.0017, "step": 15334 }, { "epoch": 3.74, "learning_rate": 2.2616406949690362e-07, "loss": 0.0017, "step": 15336 }, { "epoch": 3.74, "learning_rate": 2.2533035754100708e-07, "loss": 0.001, "step": 15338 }, { "epoch": 3.74, "learning_rate": 2.2449816757591835e-07, "loss": 0.0048, "step": 15340 }, { "epoch": 3.74, "learning_rate": 2.236674997312127e-07, "loss": 0.0025, "step": 15342 }, { "epoch": 3.74, "learning_rate": 2.2283835413623444e-07, "loss": 0.0007, "step": 15344 }, { "epoch": 3.74, "learning_rate": 2.2201073092008696e-07, "loss": 0.0021, "step": 15346 }, { "epoch": 3.74, "learning_rate": 2.2118463021163715e-07, "loss": 0.0062, "step": 15348 }, { "epoch": 3.74, "learning_rate": 2.2036005213951662e-07, "loss": 0.0038, "step": 15350 }, { "epoch": 3.74, "learning_rate": 2.1953699683211704e-07, "loss": 0.0028, "step": 15352 }, { "epoch": 3.74, "learning_rate": 2.1871546441759484e-07, "loss": 0.0022, "step": 15354 }, { "epoch": 3.74, "learning_rate": 2.1789545502386877e-07, "loss": 0.0025, "step": 15356 }, { "epoch": 3.74, "learning_rate": 2.1707696877862005e-07, "loss": 0.001, "step": 15358 }, { "epoch": 3.74, "learning_rate": 2.162600058092945e-07, "loss": 0.0004, "step": 15360 }, { "epoch": 3.74, "learning_rate": 2.1544456624309927e-07, "loss": 0.002, "step": 15362 }, { "epoch": 3.74, "learning_rate": 2.146306502070039e-07, "loss": 0.0005, "step": 15364 }, { "epoch": 3.74, "learning_rate": 2.1381825782774145e-07, "loss": 0.0017, "step": 15366 }, { "epoch": 3.74, "learning_rate": 2.130073892318074e-07, "loss": 0.0014, "step": 15368 }, { "epoch": 3.75, "learning_rate": 2.1219804454546188e-07, "loss": 0.0021, "step": 15370 }, { "epoch": 3.75, "learning_rate": 2.1139022389472297e-07, "loss": 0.0017, "step": 15372 }, { "epoch": 3.75, "learning_rate": 2.105839274053767e-07, "loss": 0.0033, "step": 15374 }, { "epoch": 3.75, "learning_rate": 2.0977915520297042e-07, "loss": 0.001, "step": 15376 }, { "epoch": 3.75, "learning_rate": 2.089759074128117e-07, "loss": 0.0022, "step": 15378 }, { "epoch": 3.75, "learning_rate": 2.081741841599727e-07, "loss": 0.0046, "step": 15380 }, { "epoch": 3.75, "learning_rate": 2.0737398556928689e-07, "loss": 0.0031, "step": 15382 }, { "epoch": 3.75, "learning_rate": 2.065753117653535e-07, "loss": 0.0041, "step": 15384 }, { "epoch": 3.75, "learning_rate": 2.0577816287253082e-07, "loss": 0.0013, "step": 15386 }, { "epoch": 3.75, "learning_rate": 2.049825390149396e-07, "loss": 0.0039, "step": 15388 }, { "epoch": 3.75, "learning_rate": 2.0418844031646735e-07, "loss": 0.0016, "step": 15390 }, { "epoch": 3.75, "learning_rate": 2.0339586690076074e-07, "loss": 0.0018, "step": 15392 }, { "epoch": 3.75, "learning_rate": 2.0260481889122775e-07, "loss": 0.0023, "step": 15394 }, { "epoch": 3.75, "learning_rate": 2.0181529641104315e-07, "loss": 0.0026, "step": 15396 }, { "epoch": 3.75, "learning_rate": 2.010272995831386e-07, "loss": 0.0019, "step": 15398 }, { "epoch": 3.75, "learning_rate": 2.0024082853021487e-07, "loss": 0.0012, "step": 15400 }, { "epoch": 3.75, "learning_rate": 1.9945588337472733e-07, "loss": 0.0046, "step": 15402 }, { "epoch": 3.75, "learning_rate": 1.986724642389004e-07, "loss": 0.0037, "step": 15404 }, { "epoch": 3.75, "learning_rate": 1.9789057124471876e-07, "loss": 0.0014, "step": 15406 }, { "epoch": 3.75, "learning_rate": 1.9711020451392837e-07, "loss": 0.0023, "step": 15408 }, { "epoch": 3.75, "learning_rate": 1.9633136416803867e-07, "loss": 0.0034, "step": 15410 }, { "epoch": 3.76, "learning_rate": 1.9555405032832043e-07, "loss": 0.0009, "step": 15412 }, { "epoch": 3.76, "learning_rate": 1.9477826311580793e-07, "loss": 0.0004, "step": 15414 }, { "epoch": 3.76, "learning_rate": 1.9400400265129682e-07, "loss": 0.0018, "step": 15416 }, { "epoch": 3.76, "learning_rate": 1.9323126905534618e-07, "loss": 0.0031, "step": 15418 }, { "epoch": 3.76, "learning_rate": 1.924600624482742e-07, "loss": 0.0018, "step": 15420 }, { "epoch": 3.76, "learning_rate": 1.9169038295016707e-07, "loss": 0.0026, "step": 15422 }, { "epoch": 3.76, "learning_rate": 1.9092223068086891e-07, "loss": 0.0022, "step": 15424 }, { "epoch": 3.76, "learning_rate": 1.9015560575998403e-07, "loss": 0.001, "step": 15426 }, { "epoch": 3.76, "learning_rate": 1.8939050830688366e-07, "loss": 0.0017, "step": 15428 }, { "epoch": 3.76, "learning_rate": 1.8862693844070023e-07, "loss": 0.0011, "step": 15430 }, { "epoch": 3.76, "learning_rate": 1.8786489628032644e-07, "loss": 0.0014, "step": 15432 }, { "epoch": 3.76, "learning_rate": 1.8710438194441848e-07, "loss": 0.0022, "step": 15434 }, { "epoch": 3.76, "learning_rate": 1.8634539555139385e-07, "loss": 0.0028, "step": 15436 }, { "epoch": 3.76, "learning_rate": 1.8558793721943248e-07, "loss": 0.0011, "step": 15438 }, { "epoch": 3.76, "learning_rate": 1.8483200706647774e-07, "loss": 0.0033, "step": 15440 }, { "epoch": 3.76, "learning_rate": 1.840776052102322e-07, "loss": 0.0009, "step": 15442 }, { "epoch": 3.76, "learning_rate": 1.833247317681619e-07, "loss": 0.0039, "step": 15444 }, { "epoch": 3.76, "learning_rate": 1.8257338685749526e-07, "loss": 0.0014, "step": 15446 }, { "epoch": 3.76, "learning_rate": 1.8182357059522427e-07, "loss": 0.0016, "step": 15448 }, { "epoch": 3.76, "learning_rate": 1.8107528309809774e-07, "loss": 0.0042, "step": 15450 }, { "epoch": 3.77, "learning_rate": 1.8032852448263248e-07, "loss": 0.0042, "step": 15452 }, { "epoch": 3.77, "learning_rate": 1.795832948651033e-07, "loss": 0.0018, "step": 15454 }, { "epoch": 3.77, "learning_rate": 1.7883959436154952e-07, "loss": 0.0017, "step": 15456 }, { "epoch": 3.77, "learning_rate": 1.7809742308776856e-07, "loss": 0.0027, "step": 15458 }, { "epoch": 3.77, "learning_rate": 1.7735678115932353e-07, "loss": 0.0015, "step": 15460 }, { "epoch": 3.77, "learning_rate": 1.7661766869153772e-07, "loss": 0.0005, "step": 15462 }, { "epoch": 3.77, "learning_rate": 1.75880085799498e-07, "loss": 0.0011, "step": 15464 }, { "epoch": 3.77, "learning_rate": 1.7514403259805135e-07, "loss": 0.0027, "step": 15466 }, { "epoch": 3.77, "learning_rate": 1.7440950920180388e-07, "loss": 0.0037, "step": 15468 }, { "epoch": 3.77, "learning_rate": 1.7367651572512967e-07, "loss": 0.003, "step": 15470 }, { "epoch": 3.77, "learning_rate": 1.7294505228216075e-07, "loss": 0.0019, "step": 15472 }, { "epoch": 3.77, "learning_rate": 1.7221511898679043e-07, "loss": 0.0007, "step": 15474 }, { "epoch": 3.77, "learning_rate": 1.7148671595267564e-07, "loss": 0.0031, "step": 15476 }, { "epoch": 3.77, "learning_rate": 1.707598432932356e-07, "loss": 0.0016, "step": 15478 }, { "epoch": 3.77, "learning_rate": 1.7003450112164866e-07, "loss": 0.0022, "step": 15480 }, { "epoch": 3.77, "learning_rate": 1.693106895508556e-07, "loss": 0.0018, "step": 15482 }, { "epoch": 3.77, "learning_rate": 1.6858840869355963e-07, "loss": 0.0034, "step": 15484 }, { "epoch": 3.77, "learning_rate": 1.6786765866222522e-07, "loss": 0.0047, "step": 15486 }, { "epoch": 3.77, "learning_rate": 1.6714843956908145e-07, "loss": 0.0012, "step": 15488 }, { "epoch": 3.77, "learning_rate": 1.664307515261121e-07, "loss": 0.0024, "step": 15490 }, { "epoch": 3.77, "learning_rate": 1.6571459464506888e-07, "loss": 0.0024, "step": 15492 }, { "epoch": 3.78, "learning_rate": 1.6499996903746374e-07, "loss": 0.0003, "step": 15494 }, { "epoch": 3.78, "learning_rate": 1.6428687481456762e-07, "loss": 0.0031, "step": 15496 }, { "epoch": 3.78, "learning_rate": 1.6357531208741507e-07, "loss": 0.0025, "step": 15498 }, { "epoch": 3.78, "learning_rate": 1.62865280966803e-07, "loss": 0.003, "step": 15500 }, { "epoch": 3.78, "learning_rate": 1.621567815632863e-07, "loss": 0.0018, "step": 15502 }, { "epoch": 3.78, "learning_rate": 1.6144981398718674e-07, "loss": 0.002, "step": 15504 }, { "epoch": 3.78, "learning_rate": 1.6074437834858293e-07, "loss": 0.0009, "step": 15506 }, { "epoch": 3.78, "learning_rate": 1.6004047475731587e-07, "loss": 0.0021, "step": 15508 }, { "epoch": 3.78, "learning_rate": 1.5933810332299015e-07, "loss": 0.003, "step": 15510 }, { "epoch": 3.78, "learning_rate": 1.586372641549705e-07, "loss": 0.0018, "step": 15512 }, { "epoch": 3.78, "learning_rate": 1.5793795736238182e-07, "loss": 0.003, "step": 15514 }, { "epoch": 3.78, "learning_rate": 1.5724018305411148e-07, "loss": 0.0012, "step": 15516 }, { "epoch": 3.78, "learning_rate": 1.5654394133880812e-07, "loss": 0.0024, "step": 15518 }, { "epoch": 3.78, "learning_rate": 1.5584923232488502e-07, "loss": 0.0027, "step": 15520 }, { "epoch": 3.78, "learning_rate": 1.551560561205101e-07, "loss": 0.0062, "step": 15522 }, { "epoch": 3.78, "learning_rate": 1.544644128336159e-07, "loss": 0.0016, "step": 15524 }, { "epoch": 3.78, "learning_rate": 1.5377430257189962e-07, "loss": 0.0034, "step": 15526 }, { "epoch": 3.78, "learning_rate": 1.530857254428153e-07, "loss": 0.0034, "step": 15528 }, { "epoch": 3.78, "learning_rate": 1.523986815535783e-07, "loss": 0.0028, "step": 15530 }, { "epoch": 3.78, "learning_rate": 1.5171317101116746e-07, "loss": 0.0035, "step": 15532 }, { "epoch": 3.79, "learning_rate": 1.5102919392232407e-07, "loss": 0.0012, "step": 15534 }, { "epoch": 3.79, "learning_rate": 1.5034675039354517e-07, "loss": 0.0018, "step": 15536 }, { "epoch": 3.79, "learning_rate": 1.4966584053109579e-07, "loss": 0.002, "step": 15538 }, { "epoch": 3.79, "learning_rate": 1.4898646444099553e-07, "loss": 0.0032, "step": 15540 }, { "epoch": 3.79, "learning_rate": 1.4830862222902975e-07, "loss": 0.0014, "step": 15542 }, { "epoch": 3.79, "learning_rate": 1.4763231400074519e-07, "loss": 0.0016, "step": 15544 }, { "epoch": 3.79, "learning_rate": 1.4695753986144646e-07, "loss": 0.0011, "step": 15546 }, { "epoch": 3.79, "learning_rate": 1.4628429991620175e-07, "loss": 0.0007, "step": 15548 }, { "epoch": 3.79, "learning_rate": 1.4561259426983942e-07, "loss": 0.0013, "step": 15550 }, { "epoch": 3.79, "learning_rate": 1.4494242302695029e-07, "loss": 0.0008, "step": 15552 }, { "epoch": 3.79, "learning_rate": 1.4427378629188306e-07, "loss": 0.0032, "step": 15554 }, { "epoch": 3.79, "learning_rate": 1.4360668416875002e-07, "loss": 0.0028, "step": 15556 }, { "epoch": 3.79, "learning_rate": 1.4294111676142474e-07, "loss": 0.0045, "step": 15558 }, { "epoch": 3.79, "learning_rate": 1.4227708417354213e-07, "loss": 0.0025, "step": 15560 }, { "epoch": 3.79, "learning_rate": 1.4161458650849502e-07, "loss": 0.0014, "step": 15562 }, { "epoch": 3.79, "learning_rate": 1.409536238694409e-07, "loss": 0.0007, "step": 15564 }, { "epoch": 3.79, "learning_rate": 1.402941963592963e-07, "loss": 0.0012, "step": 15566 }, { "epoch": 3.79, "learning_rate": 1.3963630408073914e-07, "loss": 0.0016, "step": 15568 }, { "epoch": 3.79, "learning_rate": 1.3897994713620855e-07, "loss": 0.0009, "step": 15570 }, { "epoch": 3.79, "learning_rate": 1.383251256279028e-07, "loss": 0.0021, "step": 15572 }, { "epoch": 3.79, "learning_rate": 1.3767183965778365e-07, "loss": 0.0049, "step": 15574 }, { "epoch": 3.8, "learning_rate": 1.3702008932757416e-07, "loss": 0.0031, "step": 15576 }, { "epoch": 3.8, "learning_rate": 1.363698747387554e-07, "loss": 0.0005, "step": 15578 }, { "epoch": 3.8, "learning_rate": 1.3572119599257083e-07, "loss": 0.0006, "step": 15580 }, { "epoch": 3.8, "learning_rate": 1.350740531900241e-07, "loss": 0.0013, "step": 15582 }, { "epoch": 3.8, "learning_rate": 1.3442844643188124e-07, "loss": 0.0029, "step": 15584 }, { "epoch": 3.8, "learning_rate": 1.3378437581866855e-07, "loss": 0.0029, "step": 15586 }, { "epoch": 3.8, "learning_rate": 1.3314184145067023e-07, "loss": 0.0015, "step": 15588 }, { "epoch": 3.8, "learning_rate": 1.3250084342793734e-07, "loss": 0.0008, "step": 15590 }, { "epoch": 3.8, "learning_rate": 1.3186138185027563e-07, "loss": 0.0021, "step": 15592 }, { "epoch": 3.8, "learning_rate": 1.3122345681725545e-07, "loss": 0.0014, "step": 15594 }, { "epoch": 3.8, "learning_rate": 1.3058706842820624e-07, "loss": 0.0017, "step": 15596 }, { "epoch": 3.8, "learning_rate": 1.2995221678221758e-07, "loss": 0.0022, "step": 15598 }, { "epoch": 3.8, "learning_rate": 1.2931890197814267e-07, "loss": 0.0011, "step": 15600 }, { "epoch": 3.8, "learning_rate": 1.286871241145915e-07, "loss": 0.0034, "step": 15602 }, { "epoch": 3.8, "learning_rate": 1.280568832899376e-07, "loss": 0.0027, "step": 15604 }, { "epoch": 3.8, "learning_rate": 1.274281796023158e-07, "loss": 0.0021, "step": 15606 }, { "epoch": 3.8, "learning_rate": 1.268010131496178e-07, "loss": 0.0011, "step": 15608 }, { "epoch": 3.8, "learning_rate": 1.261753840294977e-07, "loss": 0.0011, "step": 15610 }, { "epoch": 3.8, "learning_rate": 1.2555129233937313e-07, "loss": 0.0014, "step": 15612 }, { "epoch": 3.8, "learning_rate": 1.249287381764186e-07, "loss": 0.0029, "step": 15614 }, { "epoch": 3.81, "learning_rate": 1.24307721637571e-07, "loss": 0.0016, "step": 15616 }, { "epoch": 3.81, "learning_rate": 1.2368824281952629e-07, "loss": 0.0015, "step": 15618 }, { "epoch": 3.81, "learning_rate": 1.2307030181874402e-07, "loss": 0.0019, "step": 15620 }, { "epoch": 3.81, "learning_rate": 1.2245389873144052e-07, "loss": 0.0005, "step": 15622 }, { "epoch": 3.81, "learning_rate": 1.2183903365359574e-07, "loss": 0.0019, "step": 15624 }, { "epoch": 3.81, "learning_rate": 1.2122570668094746e-07, "loss": 0.0022, "step": 15626 }, { "epoch": 3.81, "learning_rate": 1.2061391790899602e-07, "loss": 0.001, "step": 15628 }, { "epoch": 3.81, "learning_rate": 1.2000366743300184e-07, "loss": 0.0057, "step": 15630 }, { "epoch": 3.81, "learning_rate": 1.1939495534798672e-07, "loss": 0.005, "step": 15632 }, { "epoch": 3.81, "learning_rate": 1.1878778174872929e-07, "loss": 0.0065, "step": 15634 }, { "epoch": 3.81, "learning_rate": 1.181821467297728e-07, "loss": 0.0006, "step": 15636 }, { "epoch": 3.81, "learning_rate": 1.1757805038541958e-07, "loss": 0.0013, "step": 15638 }, { "epoch": 3.81, "learning_rate": 1.1697549280973108e-07, "loss": 0.0018, "step": 15640 }, { "epoch": 3.81, "learning_rate": 1.1637447409653002e-07, "loss": 0.0024, "step": 15642 }, { "epoch": 3.81, "learning_rate": 1.1577499433939932e-07, "loss": 0.0136, "step": 15644 }, { "epoch": 3.81, "learning_rate": 1.151770536316843e-07, "loss": 0.0039, "step": 15646 }, { "epoch": 3.81, "learning_rate": 1.1458065206648717e-07, "loss": 0.0037, "step": 15648 }, { "epoch": 3.81, "learning_rate": 1.1398578973667362e-07, "loss": 0.0018, "step": 15650 }, { "epoch": 3.81, "learning_rate": 1.1339246673486737e-07, "loss": 0.0013, "step": 15652 }, { "epoch": 3.81, "learning_rate": 1.1280068315345338e-07, "loss": 0.0047, "step": 15654 }, { "epoch": 3.81, "learning_rate": 1.1221043908457685e-07, "loss": 0.0013, "step": 15656 }, { "epoch": 3.82, "learning_rate": 1.1162173462014313e-07, "loss": 0.0044, "step": 15658 }, { "epoch": 3.82, "learning_rate": 1.1103456985181893e-07, "loss": 0.0008, "step": 15660 }, { "epoch": 3.82, "learning_rate": 1.1044894487102997e-07, "loss": 0.0013, "step": 15662 }, { "epoch": 3.82, "learning_rate": 1.098648597689611e-07, "loss": 0.0038, "step": 15664 }, { "epoch": 3.82, "learning_rate": 1.0928231463656069e-07, "loss": 0.0025, "step": 15666 }, { "epoch": 3.82, "learning_rate": 1.0870130956453396e-07, "loss": 0.0016, "step": 15668 }, { "epoch": 3.82, "learning_rate": 1.0812184464334963e-07, "loss": 0.0016, "step": 15670 }, { "epoch": 3.82, "learning_rate": 1.0754391996323332e-07, "loss": 0.0009, "step": 15672 }, { "epoch": 3.82, "learning_rate": 1.0696753561417306e-07, "loss": 0.0024, "step": 15674 }, { "epoch": 3.82, "learning_rate": 1.0639269168591703e-07, "loss": 0.002, "step": 15676 }, { "epoch": 3.82, "learning_rate": 1.0581938826797034e-07, "loss": 0.0021, "step": 15678 }, { "epoch": 3.82, "learning_rate": 1.0524762544960265e-07, "loss": 0.0011, "step": 15680 }, { "epoch": 3.82, "learning_rate": 1.0467740331984166e-07, "loss": 0.0019, "step": 15682 }, { "epoch": 3.82, "learning_rate": 1.0410872196747524e-07, "loss": 0.0012, "step": 15684 }, { "epoch": 3.82, "learning_rate": 1.0354158148105032e-07, "loss": 0.0014, "step": 15686 }, { "epoch": 3.82, "learning_rate": 1.0297598194887736e-07, "loss": 0.0012, "step": 15688 }, { "epoch": 3.82, "learning_rate": 1.0241192345902152e-07, "loss": 0.0019, "step": 15690 }, { "epoch": 3.82, "learning_rate": 1.018494060993136e-07, "loss": 0.0013, "step": 15692 }, { "epoch": 3.82, "learning_rate": 1.0128842995734023e-07, "loss": 0.0017, "step": 15694 }, { "epoch": 3.82, "learning_rate": 1.0072899512045154e-07, "loss": 0.0009, "step": 15696 }, { "epoch": 3.83, "learning_rate": 1.001711016757534e-07, "loss": 0.0012, "step": 15698 }, { "epoch": 3.83, "learning_rate": 9.96147497101152e-08, "loss": 0.0034, "step": 15700 }, { "epoch": 3.83, "learning_rate": 9.905993931016544e-08, "loss": 0.0014, "step": 15702 }, { "epoch": 3.83, "learning_rate": 9.85066705622928e-08, "loss": 0.0021, "step": 15704 }, { "epoch": 3.83, "learning_rate": 9.795494355264502e-08, "loss": 0.0026, "step": 15706 }, { "epoch": 3.83, "learning_rate": 9.740475836713003e-08, "loss": 0.0015, "step": 15708 }, { "epoch": 3.83, "learning_rate": 9.68561150914149e-08, "loss": 0.0042, "step": 15710 }, { "epoch": 3.83, "learning_rate": 9.630901381092905e-08, "loss": 0.0011, "step": 15712 }, { "epoch": 3.83, "learning_rate": 9.57634546108599e-08, "loss": 0.007, "step": 15714 }, { "epoch": 3.83, "learning_rate": 9.521943757615393e-08, "loss": 0.0008, "step": 15716 }, { "epoch": 3.83, "learning_rate": 9.467696279152116e-08, "loss": 0.0025, "step": 15718 }, { "epoch": 3.83, "learning_rate": 9.413603034142849e-08, "loss": 0.0027, "step": 15720 }, { "epoch": 3.83, "learning_rate": 9.359664031010185e-08, "loss": 0.0046, "step": 15722 }, { "epoch": 3.83, "learning_rate": 9.305879278152851e-08, "loss": 0.0028, "step": 15724 }, { "epoch": 3.83, "learning_rate": 9.252248783945594e-08, "loss": 0.0006, "step": 15726 }, { "epoch": 3.83, "learning_rate": 9.198772556739177e-08, "loss": 0.0041, "step": 15728 }, { "epoch": 3.83, "learning_rate": 9.145450604860163e-08, "loss": 0.0022, "step": 15730 }, { "epoch": 3.83, "learning_rate": 9.09228293661124e-08, "loss": 0.0023, "step": 15732 }, { "epoch": 3.83, "learning_rate": 9.039269560271013e-08, "loss": 0.0021, "step": 15734 }, { "epoch": 3.83, "learning_rate": 8.986410484093988e-08, "loss": 0.0048, "step": 15736 }, { "epoch": 3.83, "learning_rate": 8.933705716310804e-08, "loss": 0.0042, "step": 15738 }, { "epoch": 3.84, "learning_rate": 8.881155265128005e-08, "loss": 0.0018, "step": 15740 }, { "epoch": 3.84, "learning_rate": 8.828759138727939e-08, "loss": 0.0017, "step": 15742 }, { "epoch": 3.84, "learning_rate": 8.776517345269408e-08, "loss": 0.0018, "step": 15744 }, { "epoch": 3.84, "learning_rate": 8.724429892886577e-08, "loss": 0.002, "step": 15746 }, { "epoch": 3.84, "learning_rate": 8.672496789689955e-08, "loss": 0.0029, "step": 15748 }, { "epoch": 3.84, "learning_rate": 8.620718043765853e-08, "loss": 0.0038, "step": 15750 }, { "epoch": 3.84, "learning_rate": 8.56909366317682e-08, "loss": 0.0009, "step": 15752 }, { "epoch": 3.84, "learning_rate": 8.517623655960872e-08, "loss": 0.0007, "step": 15754 }, { "epoch": 3.84, "learning_rate": 8.466308030132597e-08, "loss": 0.0024, "step": 15756 }, { "epoch": 3.84, "learning_rate": 8.415146793681939e-08, "loss": 0.0026, "step": 15758 }, { "epoch": 3.84, "learning_rate": 8.364139954575301e-08, "loss": 0.0042, "step": 15760 }, { "epoch": 3.84, "learning_rate": 8.313287520754776e-08, "loss": 0.0029, "step": 15762 }, { "epoch": 3.84, "learning_rate": 8.262589500138473e-08, "loss": 0.0017, "step": 15764 }, { "epoch": 3.84, "learning_rate": 8.212045900620414e-08, "loss": 0.0027, "step": 15766 }, { "epoch": 3.84, "learning_rate": 8.161656730070744e-08, "loss": 0.0023, "step": 15768 }, { "epoch": 3.84, "learning_rate": 8.1114219963353e-08, "loss": 0.0019, "step": 15770 }, { "epoch": 3.84, "learning_rate": 8.061341707236048e-08, "loss": 0.003, "step": 15772 }, { "epoch": 3.84, "learning_rate": 8.011415870570971e-08, "loss": 0.0031, "step": 15774 }, { "epoch": 3.84, "learning_rate": 7.961644494113741e-08, "loss": 0.0038, "step": 15776 }, { "epoch": 3.84, "learning_rate": 7.912027585614268e-08, "loss": 0.0025, "step": 15778 }, { "epoch": 3.85, "learning_rate": 7.862565152798263e-08, "loss": 0.0019, "step": 15780 }, { "epoch": 3.85, "learning_rate": 7.813257203367452e-08, "loss": 0.0038, "step": 15782 }, { "epoch": 3.85, "learning_rate": 7.764103744999363e-08, "loss": 0.0009, "step": 15784 }, { "epoch": 3.85, "learning_rate": 7.715104785347539e-08, "loss": 0.0025, "step": 15786 }, { "epoch": 3.85, "learning_rate": 7.666260332041653e-08, "loss": 0.0027, "step": 15788 }, { "epoch": 3.85, "learning_rate": 7.617570392686958e-08, "loss": 0.0026, "step": 15790 }, { "epoch": 3.85, "learning_rate": 7.569034974865053e-08, "loss": 0.0016, "step": 15792 }, { "epoch": 3.85, "learning_rate": 7.520654086133117e-08, "loss": 0.0019, "step": 15794 }, { "epoch": 3.85, "learning_rate": 7.472427734024567e-08, "loss": 0.0043, "step": 15796 }, { "epoch": 3.85, "learning_rate": 7.4243559260484e-08, "loss": 0.0025, "step": 15798 }, { "epoch": 3.85, "learning_rate": 7.376438669690067e-08, "loss": 0.0005, "step": 15800 }, { "epoch": 3.85, "learning_rate": 7.328675972410271e-08, "loss": 0.0061, "step": 15802 }, { "epoch": 3.85, "learning_rate": 7.281067841646394e-08, "loss": 0.0043, "step": 15804 }, { "epoch": 3.85, "learning_rate": 7.233614284811174e-08, "loss": 0.0011, "step": 15806 }, { "epoch": 3.85, "learning_rate": 7.18631530929359e-08, "loss": 0.0032, "step": 15808 }, { "epoch": 3.85, "learning_rate": 7.139170922458417e-08, "loss": 0.0009, "step": 15810 }, { "epoch": 3.85, "learning_rate": 7.092181131646336e-08, "loss": 0.0016, "step": 15812 }, { "epoch": 3.85, "learning_rate": 7.045345944174053e-08, "loss": 0.0038, "step": 15814 }, { "epoch": 3.85, "learning_rate": 6.99866536733429e-08, "loss": 0.001, "step": 15816 }, { "epoch": 3.85, "learning_rate": 6.952139408395342e-08, "loss": 0.0006, "step": 15818 }, { "epoch": 3.85, "learning_rate": 6.905768074601859e-08, "loss": 0.0015, "step": 15820 }, { "epoch": 3.86, "learning_rate": 6.859551373174067e-08, "loss": 0.0017, "step": 15822 }, { "epoch": 3.86, "learning_rate": 6.81348931130843e-08, "loss": 0.0032, "step": 15824 }, { "epoch": 3.86, "learning_rate": 6.767581896176989e-08, "loss": 0.0047, "step": 15826 }, { "epoch": 3.86, "learning_rate": 6.721829134927915e-08, "loss": 0.0038, "step": 15828 }, { "epoch": 3.86, "learning_rate": 6.676231034685398e-08, "loss": 0.0034, "step": 15830 }, { "epoch": 3.86, "learning_rate": 6.630787602549204e-08, "loss": 0.0009, "step": 15832 }, { "epoch": 3.86, "learning_rate": 6.58549884559545e-08, "loss": 0.0031, "step": 15834 }, { "epoch": 3.86, "learning_rate": 6.540364770875717e-08, "loss": 0.0015, "step": 15836 }, { "epoch": 3.86, "learning_rate": 6.495385385417829e-08, "loss": 0.0017, "step": 15838 }, { "epoch": 3.86, "learning_rate": 6.450560696225405e-08, "loss": 0.0032, "step": 15840 }, { "epoch": 3.86, "learning_rate": 6.405890710278084e-08, "loss": 0.002, "step": 15842 }, { "epoch": 3.86, "learning_rate": 6.361375434531192e-08, "loss": 0.0027, "step": 15844 }, { "epoch": 3.86, "learning_rate": 6.317014875916183e-08, "loss": 0.0002, "step": 15846 }, { "epoch": 3.86, "learning_rate": 6.272809041340311e-08, "loss": 0.0004, "step": 15848 }, { "epoch": 3.86, "learning_rate": 6.228757937686735e-08, "loss": 0.0014, "step": 15850 }, { "epoch": 3.86, "learning_rate": 6.184861571814527e-08, "loss": 0.0018, "step": 15852 }, { "epoch": 3.86, "learning_rate": 6.141119950558772e-08, "loss": 0.0031, "step": 15854 }, { "epoch": 3.86, "learning_rate": 6.097533080730466e-08, "loss": 0.0019, "step": 15856 }, { "epoch": 3.86, "learning_rate": 6.054100969116072e-08, "loss": 0.0014, "step": 15858 }, { "epoch": 3.86, "learning_rate": 6.010823622478623e-08, "loss": 0.0013, "step": 15860 }, { "epoch": 3.87, "learning_rate": 5.967701047556618e-08, "loss": 0.0018, "step": 15862 }, { "epoch": 3.87, "learning_rate": 5.924733251064574e-08, "loss": 0.0015, "step": 15864 }, { "epoch": 3.87, "learning_rate": 5.881920239692918e-08, "loss": 0.002, "step": 15866 }, { "epoch": 3.87, "learning_rate": 5.839262020107872e-08, "loss": 0.0035, "step": 15868 }, { "epoch": 3.87, "learning_rate": 5.796758598951791e-08, "loss": 0.0026, "step": 15870 }, { "epoch": 3.87, "learning_rate": 5.7544099828428237e-08, "loss": 0.001, "step": 15872 }, { "epoch": 3.87, "learning_rate": 5.7122161783748074e-08, "loss": 0.0019, "step": 15874 }, { "epoch": 3.87, "learning_rate": 5.670177192117599e-08, "loss": 0.0005, "step": 15876 }, { "epoch": 3.87, "learning_rate": 5.6282930306171824e-08, "loss": 0.0025, "step": 15878 }, { "epoch": 3.87, "learning_rate": 5.58656370039512e-08, "loss": 0.0008, "step": 15880 }, { "epoch": 3.87, "learning_rate": 5.544989207949103e-08, "loss": 0.0004, "step": 15882 }, { "epoch": 3.87, "learning_rate": 5.5035695597523977e-08, "loss": 0.0024, "step": 15884 }, { "epoch": 3.87, "learning_rate": 5.4623047622546225e-08, "loss": 0.0036, "step": 15886 }, { "epoch": 3.87, "learning_rate": 5.421194821880749e-08, "loss": 0.0021, "step": 15888 }, { "epoch": 3.87, "learning_rate": 5.380239745032101e-08, "loss": 0.0018, "step": 15890 }, { "epoch": 3.87, "learning_rate": 5.339439538085578e-08, "loss": 0.0026, "step": 15892 }, { "epoch": 3.87, "learning_rate": 5.2987942073943196e-08, "loss": 0.0006, "step": 15894 }, { "epoch": 3.87, "learning_rate": 5.258303759286709e-08, "loss": 0.0035, "step": 15896 }, { "epoch": 3.87, "learning_rate": 5.2179682000677023e-08, "loss": 0.0029, "step": 15898 }, { "epoch": 3.87, "learning_rate": 5.1777875360178307e-08, "loss": 0.0019, "step": 15900 }, { "epoch": 3.87, "learning_rate": 5.137761773393535e-08, "loss": 0.0041, "step": 15902 }, { "epoch": 3.88, "learning_rate": 5.097890918427162e-08, "loss": 0.0031, "step": 15904 }, { "epoch": 3.88, "learning_rate": 5.058174977326746e-08, "loss": 0.0031, "step": 15906 }, { "epoch": 3.88, "learning_rate": 5.0186139562764526e-08, "loss": 0.0021, "step": 15908 }, { "epoch": 3.88, "learning_rate": 4.979207861436242e-08, "loss": 0.001, "step": 15910 }, { "epoch": 3.88, "learning_rate": 4.9399566989419834e-08, "loss": 0.0023, "step": 15912 }, { "epoch": 3.88, "learning_rate": 4.900860474905234e-08, "loss": 0.0012, "step": 15914 }, { "epoch": 3.88, "learning_rate": 4.861919195413789e-08, "loss": 0.002, "step": 15916 }, { "epoch": 3.88, "learning_rate": 4.8231328665310216e-08, "loss": 0.0019, "step": 15918 }, { "epoch": 3.88, "learning_rate": 4.784501494296212e-08, "loss": 0.0015, "step": 15920 }, { "epoch": 3.88, "learning_rate": 4.746025084724548e-08, "loss": 0.0007, "step": 15922 }, { "epoch": 3.88, "learning_rate": 4.707703643807127e-08, "loss": 0.0021, "step": 15924 }, { "epoch": 3.88, "learning_rate": 4.669537177510952e-08, "loss": 0.0008, "step": 15926 }, { "epoch": 3.88, "learning_rate": 4.6315256917787155e-08, "loss": 0.0036, "step": 15928 }, { "epoch": 3.88, "learning_rate": 4.593669192529237e-08, "loss": 0.002, "step": 15930 }, { "epoch": 3.88, "learning_rate": 4.5559676856570255e-08, "loss": 0.0026, "step": 15932 }, { "epoch": 3.88, "learning_rate": 4.518421177032384e-08, "loss": 0.0012, "step": 15934 }, { "epoch": 3.88, "learning_rate": 4.481029672501635e-08, "loss": 0.0012, "step": 15936 }, { "epoch": 3.88, "learning_rate": 4.4437931778870127e-08, "loss": 0.0013, "step": 15938 }, { "epoch": 3.88, "learning_rate": 4.4067116989864325e-08, "loss": 0.0007, "step": 15940 }, { "epoch": 3.88, "learning_rate": 4.369785241573832e-08, "loss": 0.0008, "step": 15942 }, { "epoch": 3.88, "learning_rate": 4.333013811398834e-08, "loss": 0.0019, "step": 15944 }, { "epoch": 3.89, "learning_rate": 4.296397414187192e-08, "loss": 0.003, "step": 15946 }, { "epoch": 3.89, "learning_rate": 4.259936055640235e-08, "loss": 0.0012, "step": 15948 }, { "epoch": 3.89, "learning_rate": 4.22362974143542e-08, "loss": 0.0029, "step": 15950 }, { "epoch": 3.89, "learning_rate": 4.1874784772256707e-08, "loss": 0.006, "step": 15952 }, { "epoch": 3.89, "learning_rate": 4.15148226864015e-08, "loss": 0.0028, "step": 15954 }, { "epoch": 3.89, "learning_rate": 4.11564112128382e-08, "loss": 0.0016, "step": 15956 }, { "epoch": 3.89, "learning_rate": 4.0799550407373267e-08, "loss": 0.0002, "step": 15958 }, { "epoch": 3.89, "learning_rate": 4.044424032557226e-08, "loss": 0.0046, "step": 15960 }, { "epoch": 3.89, "learning_rate": 4.009048102276092e-08, "loss": 0.0019, "step": 15962 }, { "epoch": 3.89, "learning_rate": 3.973827255402185e-08, "loss": 0.0023, "step": 15964 }, { "epoch": 3.89, "learning_rate": 3.938761497419563e-08, "loss": 0.0035, "step": 15966 }, { "epoch": 3.89, "learning_rate": 3.903850833788303e-08, "loss": 0.0024, "step": 15968 }, { "epoch": 3.89, "learning_rate": 3.86909526994439e-08, "loss": 0.004, "step": 15970 }, { "epoch": 3.89, "learning_rate": 3.834494811299272e-08, "loss": 0.003, "step": 15972 }, { "epoch": 3.89, "learning_rate": 3.800049463240751e-08, "loss": 0.0021, "step": 15974 }, { "epoch": 3.89, "learning_rate": 3.765759231132093e-08, "loss": 0.0042, "step": 15976 }, { "epoch": 3.89, "learning_rate": 3.731624120312582e-08, "loss": 0.0004, "step": 15978 }, { "epoch": 3.89, "learning_rate": 3.6976441360971896e-08, "loss": 0.0025, "step": 15980 }, { "epoch": 3.89, "learning_rate": 3.663819283777126e-08, "loss": 0.0024, "step": 15982 }, { "epoch": 3.89, "learning_rate": 3.630149568618957e-08, "loss": 0.0023, "step": 15984 }, { "epoch": 3.9, "learning_rate": 3.596634995865489e-08, "loss": 0.0027, "step": 15986 }, { "epoch": 3.9, "learning_rate": 3.563275570735103e-08, "loss": 0.0012, "step": 15988 }, { "epoch": 3.9, "learning_rate": 3.5300712984219774e-08, "loss": 0.0026, "step": 15990 }, { "epoch": 3.9, "learning_rate": 3.497022184096532e-08, "loss": 0.0019, "step": 15992 }, { "epoch": 3.9, "learning_rate": 3.46412823290454e-08, "loss": 0.0026, "step": 15994 }, { "epoch": 3.9, "learning_rate": 3.4313894499680146e-08, "loss": 0.0021, "step": 15996 }, { "epoch": 3.9, "learning_rate": 3.398805840384545e-08, "loss": 0.0016, "step": 15998 }, { "epoch": 3.9, "learning_rate": 3.366377409227739e-08, "loss": 0.0016, "step": 16000 }, { "epoch": 3.9, "learning_rate": 3.334104161546781e-08, "loss": 0.0006, "step": 16002 }, { "epoch": 3.9, "learning_rate": 3.301986102366983e-08, "loss": 0.004, "step": 16004 }, { "epoch": 3.9, "learning_rate": 3.270023236689457e-08, "loss": 0.0039, "step": 16006 }, { "epoch": 3.9, "learning_rate": 3.238215569490777e-08, "loss": 0.0021, "step": 16008 }, { "epoch": 3.9, "learning_rate": 3.206563105723981e-08, "loss": 0.003, "step": 16010 }, { "epoch": 3.9, "learning_rate": 3.175065850317349e-08, "loss": 0.003, "step": 16012 }, { "epoch": 3.9, "learning_rate": 3.143723808175403e-08, "loss": 0.0093, "step": 16014 }, { "epoch": 3.9, "learning_rate": 3.112536984178238e-08, "loss": 0.0018, "step": 16016 }, { "epoch": 3.9, "learning_rate": 3.0815053831818596e-08, "loss": 0.0023, "step": 16018 }, { "epoch": 3.9, "learning_rate": 3.050629010018291e-08, "loss": 0.0016, "step": 16020 }, { "epoch": 3.9, "learning_rate": 3.01990786949502e-08, "loss": 0.0007, "step": 16022 }, { "epoch": 3.9, "learning_rate": 2.989341966395665e-08, "loss": 0.003, "step": 16024 }, { "epoch": 3.9, "learning_rate": 2.95893130547964e-08, "loss": 0.0009, "step": 16026 }, { "epoch": 3.91, "learning_rate": 2.9286758914819358e-08, "loss": 0.0016, "step": 16028 }, { "epoch": 3.91, "learning_rate": 2.8985757291137838e-08, "loss": 0.0021, "step": 16030 }, { "epoch": 3.91, "learning_rate": 2.868630823061769e-08, "loss": 0.0045, "step": 16032 }, { "epoch": 3.91, "learning_rate": 2.838841177988605e-08, "loss": 0.0033, "step": 16034 }, { "epoch": 3.91, "learning_rate": 2.809206798532915e-08, "loss": 0.0021, "step": 16036 }, { "epoch": 3.91, "learning_rate": 2.7797276893087865e-08, "loss": 0.0009, "step": 16038 }, { "epoch": 3.91, "learning_rate": 2.7504038549065472e-08, "loss": 0.0014, "step": 16040 }, { "epoch": 3.91, "learning_rate": 2.721235299891989e-08, "loss": 0.0036, "step": 16042 }, { "epoch": 3.91, "learning_rate": 2.6922220288070345e-08, "loss": 0.0018, "step": 16044 }, { "epoch": 3.91, "learning_rate": 2.6633640461691812e-08, "loss": 0.004, "step": 16046 }, { "epoch": 3.91, "learning_rate": 2.634661356471724e-08, "loss": 0.0051, "step": 16048 }, { "epoch": 3.91, "learning_rate": 2.606113964183976e-08, "loss": 0.0027, "step": 16050 }, { "epoch": 3.91, "learning_rate": 2.57772187375116e-08, "loss": 0.0017, "step": 16052 }, { "epoch": 3.91, "learning_rate": 2.5494850895938505e-08, "loss": 0.0031, "step": 16054 }, { "epoch": 3.91, "learning_rate": 2.521403616108975e-08, "loss": 0.0031, "step": 16056 }, { "epoch": 3.91, "learning_rate": 2.4934774576688137e-08, "loss": 0.0015, "step": 16058 }, { "epoch": 3.91, "learning_rate": 2.465706618621888e-08, "loss": 0.0028, "step": 16060 }, { "epoch": 3.91, "learning_rate": 2.4380911032921838e-08, "loss": 0.0025, "step": 16062 }, { "epoch": 3.91, "learning_rate": 2.410630915979817e-08, "loss": 0.0013, "step": 16064 }, { "epoch": 3.91, "learning_rate": 2.383326060960256e-08, "loss": 0.0023, "step": 16066 }, { "epoch": 3.92, "learning_rate": 2.356176542485322e-08, "loss": 0.0015, "step": 16068 }, { "epoch": 3.92, "learning_rate": 2.329182364782412e-08, "loss": 0.0009, "step": 16070 }, { "epoch": 3.92, "learning_rate": 2.3023435320546073e-08, "loss": 0.0013, "step": 16072 }, { "epoch": 3.92, "learning_rate": 2.2756600484808987e-08, "loss": 0.0056, "step": 16074 }, { "epoch": 3.92, "learning_rate": 2.2491319182162964e-08, "loss": 0.0044, "step": 16076 }, { "epoch": 3.92, "learning_rate": 2.2227591453911623e-08, "loss": 0.0015, "step": 16078 }, { "epoch": 3.92, "learning_rate": 2.196541734112212e-08, "loss": 0.0028, "step": 16080 }, { "epoch": 3.92, "learning_rate": 2.170479688461513e-08, "loss": 0.0013, "step": 16082 }, { "epoch": 3.92, "learning_rate": 2.144573012497153e-08, "loss": 0.0026, "step": 16084 }, { "epoch": 3.92, "learning_rate": 2.118821710253127e-08, "loss": 0.0015, "step": 16086 }, { "epoch": 3.92, "learning_rate": 2.093225785738895e-08, "loss": 0.0035, "step": 16088 }, { "epoch": 3.92, "learning_rate": 2.0677852429400458e-08, "loss": 0.0021, "step": 16090 }, { "epoch": 3.92, "learning_rate": 2.0425000858179665e-08, "loss": 0.0027, "step": 16092 }, { "epoch": 3.92, "learning_rate": 2.0173703183096194e-08, "loss": 0.0011, "step": 16094 }, { "epoch": 3.92, "learning_rate": 1.992395944327874e-08, "loss": 0.0032, "step": 16096 }, { "epoch": 3.92, "learning_rate": 1.9675769677616196e-08, "loss": 0.0005, "step": 16098 }, { "epoch": 3.92, "learning_rate": 1.9429133924752096e-08, "loss": 0.002, "step": 16100 }, { "epoch": 3.92, "learning_rate": 1.9184052223089054e-08, "loss": 0.0039, "step": 16102 }, { "epoch": 3.92, "learning_rate": 1.8940524610789878e-08, "loss": 0.0036, "step": 16104 }, { "epoch": 3.92, "learning_rate": 1.8698551125772015e-08, "loss": 0.0007, "step": 16106 }, { "epoch": 3.92, "learning_rate": 1.8458131805713096e-08, "loss": 0.0015, "step": 16108 }, { "epoch": 3.93, "learning_rate": 1.8219266688048742e-08, "loss": 0.0017, "step": 16110 }, { "epoch": 3.93, "learning_rate": 1.798195580997142e-08, "loss": 0.0011, "step": 16112 }, { "epoch": 3.93, "learning_rate": 1.774619920843268e-08, "loss": 0.0008, "step": 16114 }, { "epoch": 3.93, "learning_rate": 1.751199692014205e-08, "loss": 0.0007, "step": 16116 }, { "epoch": 3.93, "learning_rate": 1.7279348981565914e-08, "loss": 0.0023, "step": 16118 }, { "epoch": 3.93, "learning_rate": 1.704825542892974e-08, "loss": 0.0021, "step": 16120 }, { "epoch": 3.93, "learning_rate": 1.6818716298215855e-08, "loss": 0.0044, "step": 16122 }, { "epoch": 3.93, "learning_rate": 1.6590731625165666e-08, "loss": 0.0032, "step": 16124 }, { "epoch": 3.93, "learning_rate": 1.6364301445278563e-08, "loss": 0.0015, "step": 16126 }, { "epoch": 3.93, "learning_rate": 1.613942579381189e-08, "loss": 0.0027, "step": 16128 }, { "epoch": 3.93, "learning_rate": 1.5916104705778757e-08, "loss": 0.0008, "step": 16130 }, { "epoch": 3.93, "learning_rate": 1.5694338215952453e-08, "loss": 0.0025, "step": 16132 }, { "epoch": 3.93, "learning_rate": 1.547412635886536e-08, "loss": 0.0008, "step": 16134 }, { "epoch": 3.93, "learning_rate": 1.5255469168804492e-08, "loss": 0.0013, "step": 16136 }, { "epoch": 3.93, "learning_rate": 1.503836667981595e-08, "loss": 0.0026, "step": 16138 }, { "epoch": 3.93, "learning_rate": 1.4822818925707139e-08, "loss": 0.0024, "step": 16140 }, { "epoch": 3.93, "learning_rate": 1.4608825940036764e-08, "loss": 0.0012, "step": 16142 }, { "epoch": 3.93, "learning_rate": 1.4396387756128172e-08, "loss": 0.0018, "step": 16144 }, { "epoch": 3.93, "learning_rate": 1.4185504407058237e-08, "loss": 0.0018, "step": 16146 }, { "epoch": 3.93, "learning_rate": 1.3976175925662917e-08, "loss": 0.0012, "step": 16148 }, { "epoch": 3.94, "learning_rate": 1.3768402344537246e-08, "loss": 0.0007, "step": 16150 }, { "epoch": 3.94, "learning_rate": 1.3562183696032016e-08, "loss": 0.0004, "step": 16152 }, { "epoch": 3.94, "learning_rate": 1.3357520012258207e-08, "loss": 0.0048, "step": 16154 }, { "epoch": 3.94, "learning_rate": 1.315441132508255e-08, "loss": 0.0031, "step": 16156 }, { "epoch": 3.94, "learning_rate": 1.2952857666130858e-08, "loss": 0.0025, "step": 16158 }, { "epoch": 3.94, "learning_rate": 1.2752859066786915e-08, "loss": 0.0039, "step": 16160 }, { "epoch": 3.94, "learning_rate": 1.2554415558191368e-08, "loss": 0.0012, "step": 16162 }, { "epoch": 3.94, "learning_rate": 1.2357527171243943e-08, "loss": 0.0025, "step": 16164 }, { "epoch": 3.94, "learning_rate": 1.216219393660234e-08, "loss": 0.0018, "step": 16166 }, { "epoch": 3.94, "learning_rate": 1.1968415884680007e-08, "loss": 0.0005, "step": 16168 }, { "epoch": 3.94, "learning_rate": 1.1776193045650585e-08, "loss": 0.0023, "step": 16170 }, { "epoch": 3.94, "learning_rate": 1.1585525449443468e-08, "loss": 0.001, "step": 16172 }, { "epoch": 3.94, "learning_rate": 1.1396413125749351e-08, "loss": 0.0007, "step": 16174 }, { "epoch": 3.94, "learning_rate": 1.1208856104012455e-08, "loss": 0.0005, "step": 16176 }, { "epoch": 3.94, "learning_rate": 1.10228544134372e-08, "loss": 0.004, "step": 16178 }, { "epoch": 3.94, "learning_rate": 1.0838408082985974e-08, "loss": 0.0009, "step": 16180 }, { "epoch": 3.94, "learning_rate": 1.0655517141378024e-08, "loss": 0.0021, "step": 16182 }, { "epoch": 3.94, "learning_rate": 1.0474181617091684e-08, "loss": 0.0018, "step": 16184 }, { "epoch": 3.94, "learning_rate": 1.0294401538361032e-08, "loss": 0.0036, "step": 16186 }, { "epoch": 3.94, "learning_rate": 1.0116176933180343e-08, "loss": 0.0013, "step": 16188 }, { "epoch": 3.94, "learning_rate": 9.939507829299644e-09, "loss": 0.0021, "step": 16190 }, { "epoch": 3.95, "learning_rate": 9.764394254228037e-09, "loss": 0.0047, "step": 16192 }, { "epoch": 3.95, "learning_rate": 9.590836235232604e-09, "loss": 0.002, "step": 16194 }, { "epoch": 3.95, "learning_rate": 9.41883379933728e-09, "loss": 0.0014, "step": 16196 }, { "epoch": 3.95, "learning_rate": 9.248386973323975e-09, "loss": 0.0024, "step": 16198 }, { "epoch": 3.95, "learning_rate": 9.079495783731462e-09, "loss": 0.0027, "step": 16200 }, { "epoch": 3.95, "learning_rate": 8.912160256859813e-09, "loss": 0.0018, "step": 16202 }, { "epoch": 3.95, "learning_rate": 8.746380418762635e-09, "loss": 0.003, "step": 16204 }, { "epoch": 3.95, "learning_rate": 8.58215629525372e-09, "loss": 0.0026, "step": 16206 }, { "epoch": 3.95, "learning_rate": 8.419487911903724e-09, "loss": 0.0027, "step": 16208 }, { "epoch": 3.95, "learning_rate": 8.258375294042386e-09, "loss": 0.0023, "step": 16210 }, { "epoch": 3.95, "learning_rate": 8.098818466755198e-09, "loss": 0.0013, "step": 16212 }, { "epoch": 3.95, "learning_rate": 7.940817454885619e-09, "loss": 0.0027, "step": 16214 }, { "epoch": 3.95, "learning_rate": 7.7843722830373e-09, "loss": 0.0028, "step": 16216 }, { "epoch": 3.95, "learning_rate": 7.629482975569646e-09, "loss": 0.0014, "step": 16218 }, { "epoch": 3.95, "learning_rate": 7.476149556598922e-09, "loss": 0.0035, "step": 16220 }, { "epoch": 3.95, "learning_rate": 7.324372050001583e-09, "loss": 0.0036, "step": 16222 }, { "epoch": 3.95, "learning_rate": 7.174150479409836e-09, "loss": 0.001, "step": 16224 }, { "epoch": 3.95, "learning_rate": 7.025484868213861e-09, "loss": 0.0032, "step": 16226 }, { "epoch": 3.95, "learning_rate": 6.878375239562918e-09, "loss": 0.0018, "step": 16228 }, { "epoch": 3.95, "learning_rate": 6.732821616363128e-09, "loss": 0.0011, "step": 16230 }, { "epoch": 3.96, "learning_rate": 6.588824021278584e-09, "loss": 0.0018, "step": 16232 }, { "epoch": 3.96, "learning_rate": 6.4463824767291294e-09, "loss": 0.0022, "step": 16234 }, { "epoch": 3.96, "learning_rate": 6.3054970048959105e-09, "loss": 0.0014, "step": 16236 }, { "epoch": 3.96, "learning_rate": 6.166167627715825e-09, "loss": 0.0024, "step": 16238 }, { "epoch": 3.96, "learning_rate": 6.028394366881518e-09, "loss": 0.0042, "step": 16240 }, { "epoch": 3.96, "learning_rate": 5.892177243846942e-09, "loss": 0.0014, "step": 16242 }, { "epoch": 3.96, "learning_rate": 5.757516279821796e-09, "loss": 0.0014, "step": 16244 }, { "epoch": 3.96, "learning_rate": 5.624411495774862e-09, "loss": 0.0023, "step": 16246 }, { "epoch": 3.96, "learning_rate": 5.492862912429564e-09, "loss": 0.0027, "step": 16248 }, { "epoch": 3.96, "learning_rate": 5.362870550269517e-09, "loss": 0.0017, "step": 16250 }, { "epoch": 3.96, "learning_rate": 5.234434429537416e-09, "loss": 0.0017, "step": 16252 }, { "epoch": 3.96, "learning_rate": 5.107554570229489e-09, "loss": 0.0011, "step": 16254 }, { "epoch": 3.96, "learning_rate": 4.982230992103265e-09, "loss": 0.0033, "step": 16256 }, { "epoch": 3.96, "learning_rate": 4.8584637146709135e-09, "loss": 0.0032, "step": 16258 }, { "epoch": 3.96, "learning_rate": 4.736252757205906e-09, "loss": 0.0047, "step": 16260 }, { "epoch": 3.96, "learning_rate": 4.615598138737465e-09, "loss": 0.002, "step": 16262 }, { "epoch": 3.96, "learning_rate": 4.496499878050564e-09, "loss": 0.0015, "step": 16264 }, { "epoch": 3.96, "learning_rate": 4.3789579936914795e-09, "loss": 0.0022, "step": 16266 }, { "epoch": 3.96, "learning_rate": 4.262972503961127e-09, "loss": 0.0025, "step": 16268 }, { "epoch": 3.96, "learning_rate": 4.148543426919505e-09, "loss": 0.0004, "step": 16270 }, { "epoch": 3.96, "learning_rate": 4.035670780385692e-09, "loss": 0.0024, "step": 16272 }, { "epoch": 3.97, "learning_rate": 3.924354581932299e-09, "loss": 0.002, "step": 16274 }, { "epoch": 3.97, "learning_rate": 3.814594848894349e-09, "loss": 0.001, "step": 16276 }, { "epoch": 3.97, "learning_rate": 3.7063915983603925e-09, "loss": 0.0024, "step": 16278 }, { "epoch": 3.97, "learning_rate": 3.5997448471802866e-09, "loss": 0.0017, "step": 16280 }, { "epoch": 3.97, "learning_rate": 3.4946546119585257e-09, "loss": 0.0015, "step": 16282 }, { "epoch": 3.97, "learning_rate": 3.3911209090586874e-09, "loss": 0.0024, "step": 16284 }, { "epoch": 3.97, "learning_rate": 3.2891437546023196e-09, "loss": 0.0016, "step": 16286 }, { "epoch": 3.97, "learning_rate": 3.188723164467833e-09, "loss": 0.002, "step": 16288 }, { "epoch": 3.97, "learning_rate": 3.089859154290498e-09, "loss": 0.0012, "step": 16290 }, { "epoch": 3.97, "learning_rate": 2.9925517394657764e-09, "loss": 0.0006, "step": 16292 }, { "epoch": 3.97, "learning_rate": 2.896800935143773e-09, "loss": 0.0017, "step": 16294 }, { "epoch": 3.97, "learning_rate": 2.8026067562347824e-09, "loss": 0.0025, "step": 16296 }, { "epoch": 3.97, "learning_rate": 2.709969217404851e-09, "loss": 0.0016, "step": 16298 }, { "epoch": 3.97, "learning_rate": 2.6188883330779958e-09, "loss": 0.0008, "step": 16300 }, { "epoch": 3.97, "learning_rate": 2.529364117437316e-09, "loss": 0.0033, "step": 16302 }, { "epoch": 3.97, "learning_rate": 2.4413965844216625e-09, "loss": 0.0017, "step": 16304 }, { "epoch": 3.97, "learning_rate": 2.354985747727856e-09, "loss": 0.0008, "step": 16306 }, { "epoch": 3.97, "learning_rate": 2.2701316208117997e-09, "loss": 0.0014, "step": 16308 }, { "epoch": 3.97, "learning_rate": 2.1868342168851474e-09, "loss": 0.0017, "step": 16310 }, { "epoch": 3.97, "learning_rate": 2.105093548918635e-09, "loss": 0.0018, "step": 16312 }, { "epoch": 3.98, "learning_rate": 2.0249096296387495e-09, "loss": 0.0051, "step": 16314 }, { "epoch": 3.98, "learning_rate": 1.946282471532168e-09, "loss": 0.0016, "step": 16316 }, { "epoch": 3.98, "learning_rate": 1.869212086841321e-09, "loss": 0.0014, "step": 16318 }, { "epoch": 3.98, "learning_rate": 1.7936984875654983e-09, "loss": 0.002, "step": 16320 }, { "epoch": 3.98, "learning_rate": 1.7197416854641824e-09, "loss": 0.0024, "step": 16322 }, { "epoch": 3.98, "learning_rate": 1.6473416920526064e-09, "loss": 0.0044, "step": 16324 }, { "epoch": 3.98, "learning_rate": 1.5764985186028648e-09, "loss": 0.0054, "step": 16326 }, { "epoch": 3.98, "learning_rate": 1.5072121761472436e-09, "loss": 0.0033, "step": 16328 }, { "epoch": 3.98, "learning_rate": 1.43948267547378e-09, "loss": 0.0007, "step": 16330 }, { "epoch": 3.98, "learning_rate": 1.3733100271284826e-09, "loss": 0.0009, "step": 16332 }, { "epoch": 3.98, "learning_rate": 1.3086942414153315e-09, "loss": 0.0027, "step": 16334 }, { "epoch": 3.98, "learning_rate": 1.2456353283940569e-09, "loss": 0.0021, "step": 16336 }, { "epoch": 3.98, "learning_rate": 1.184133297884582e-09, "loss": 0.0021, "step": 16338 }, { "epoch": 3.98, "learning_rate": 1.1241881594636905e-09, "loss": 0.0022, "step": 16340 }, { "epoch": 3.98, "learning_rate": 1.0657999224639171e-09, "loss": 0.0019, "step": 16342 }, { "epoch": 3.98, "learning_rate": 1.0089685959779882e-09, "loss": 0.0027, "step": 16344 }, { "epoch": 3.98, "learning_rate": 9.536941888532713e-10, "loss": 0.0055, "step": 16346 }, { "epoch": 3.98, "learning_rate": 8.999767096984358e-10, "loss": 0.0026, "step": 16348 }, { "epoch": 3.98, "learning_rate": 8.478161668779017e-10, "loss": 0.0034, "step": 16350 }, { "epoch": 3.98, "learning_rate": 7.972125685107301e-10, "loss": 0.0007, "step": 16352 }, { "epoch": 3.98, "learning_rate": 7.481659224783943e-10, "loss": 0.0008, "step": 16354 }, { "epoch": 3.99, "learning_rate": 7.006762364181185e-10, "loss": 0.0029, "step": 16356 }, { "epoch": 3.99, "learning_rate": 6.547435177228778e-10, "loss": 0.0014, "step": 16358 }, { "epoch": 3.99, "learning_rate": 6.103677735458391e-10, "loss": 0.0014, "step": 16360 }, { "epoch": 3.99, "learning_rate": 5.675490107970305e-10, "loss": 0.0013, "step": 16362 }, { "epoch": 3.99, "learning_rate": 5.262872361422311e-10, "loss": 0.0029, "step": 16364 }, { "epoch": 3.99, "learning_rate": 4.865824560074118e-10, "loss": 0.0017, "step": 16366 }, { "epoch": 3.99, "learning_rate": 4.484346765742942e-10, "loss": 0.0025, "step": 16368 }, { "epoch": 3.99, "learning_rate": 4.1184390378257167e-10, "loss": 0.0023, "step": 16370 }, { "epoch": 3.99, "learning_rate": 3.7681014333101897e-10, "loss": 0.0054, "step": 16372 }, { "epoch": 3.99, "learning_rate": 3.433334006730515e-10, "loss": 0.0036, "step": 16374 }, { "epoch": 3.99, "learning_rate": 3.1141368102227676e-10, "loss": 0.0018, "step": 16376 }, { "epoch": 3.99, "learning_rate": 2.8105098934916307e-10, "loss": 0.0035, "step": 16378 }, { "epoch": 3.99, "learning_rate": 2.522453303799299e-10, "loss": 0.0036, "step": 16380 }, { "epoch": 3.99, "learning_rate": 2.2499670860098855e-10, "loss": 0.0026, "step": 16382 }, { "epoch": 3.99, "learning_rate": 1.993051282545011e-10, "loss": 0.0013, "step": 16384 }, { "epoch": 3.99, "learning_rate": 1.751705933417114e-10, "loss": 0.0025, "step": 16386 }, { "epoch": 3.99, "learning_rate": 1.525931076196141e-10, "loss": 0.001, "step": 16388 }, { "epoch": 3.99, "learning_rate": 1.3157267460428557e-10, "loss": 0.0037, "step": 16390 }, { "epoch": 3.99, "learning_rate": 1.1210929756866329e-10, "loss": 0.0015, "step": 16392 }, { "epoch": 3.99, "learning_rate": 9.420297954254587e-11, "loss": 0.003, "step": 16394 }, { "epoch": 4.0, "learning_rate": 7.785372331592378e-11, "loss": 0.0026, "step": 16396 }, { "epoch": 4.0, "learning_rate": 6.306153143231797e-11, "loss": 0.0019, "step": 16398 }, { "epoch": 4.0, "learning_rate": 4.982640619766166e-11, "loss": 0.0039, "step": 16400 }, { "epoch": 4.0, "learning_rate": 3.8148349669198113e-11, "loss": 0.0028, "step": 16402 }, { "epoch": 4.0, "learning_rate": 2.802736366880332e-11, "loss": 0.0032, "step": 16404 }, { "epoch": 4.0, "learning_rate": 1.9463449770773522e-11, "loss": 0.002, "step": 16406 }, { "epoch": 4.0, "learning_rate": 1.2456609307376355e-11, "loss": 0.0014, "step": 16408 }, { "epoch": 4.0, "learning_rate": 7.006843372181493e-12, "loss": 0.0008, "step": 16410 }, { "epoch": 4.0, "learning_rate": 3.114152812289106e-12, "loss": 0.0011, "step": 16412 }, { "epoch": 4.0, "learning_rate": 7.785382327707425e-13, "loss": 0.0029, "step": 16414 }, { "epoch": 4.0, "learning_rate": 0.0, "loss": 0.001, "step": 16416 }, { "epoch": 4.0, "step": 16416, "total_flos": 5.43675641046342e+18, "train_loss": 0.10646877170433743, "train_runtime": 71465.0126, "train_samples_per_second": 7.35, "train_steps_per_second": 0.23 } ], "max_steps": 16416, "num_train_epochs": 4, "total_flos": 5.43675641046342e+18, "trial_name": null, "trial_params": null }