diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,49273 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "global_step": 16416, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 8.113590263691684e-08, + "loss": 1.2614, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.6227180527383367e-07, + "loss": 0.5289, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.434077079107505e-07, + "loss": 0.4145, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 3.2454361054766735e-07, + "loss": 0.4136, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 4.056795131845842e-07, + "loss": 0.414, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.86815415821501e-07, + "loss": 0.4016, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 5.679513184584178e-07, + "loss": 0.4036, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 6.490872210953347e-07, + "loss": 0.3919, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 7.302231237322515e-07, + "loss": 0.3831, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 8.113590263691684e-07, + "loss": 0.3727, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8.924949290060852e-07, + "loss": 0.3496, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 9.73630831643002e-07, + "loss": 0.3871, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 1.0547667342799188e-06, + "loss": 0.3724, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 1.1359026369168357e-06, + "loss": 0.3791, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 1.2170385395537525e-06, + "loss": 0.3617, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 1.2981744421906694e-06, + "loss": 0.3574, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 1.3793103448275862e-06, + "loss": 0.3616, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 1.460446247464503e-06, + "loss": 0.3585, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 1.54158215010142e-06, + "loss": 0.3509, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 1.6227180527383368e-06, + "loss": 0.3897, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 1.7038539553752536e-06, + "loss": 0.3582, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 1.7849898580121705e-06, + "loss": 0.3566, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 1.8661257606490873e-06, + "loss": 0.3368, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 1.947261663286004e-06, + "loss": 0.3712, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 2.028397565922921e-06, + "loss": 0.3633, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2.1095334685598377e-06, + "loss": 0.3518, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 2.1906693711967548e-06, + "loss": 0.3629, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 2.2718052738336714e-06, + "loss": 0.3377, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 2.3529411764705885e-06, + "loss": 0.3539, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 2.434077079107505e-06, + "loss": 0.3736, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 2.515212981744422e-06, + "loss": 0.3617, + "step": 62 + }, + { + "epoch": 0.02, + "learning_rate": 2.596348884381339e-06, + "loss": 0.3517, + "step": 64 + }, + { + "epoch": 0.02, + "learning_rate": 2.677484787018256e-06, + "loss": 0.3694, + "step": 66 + }, + { + "epoch": 0.02, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.3483, + "step": 68 + }, + { + "epoch": 0.02, + "learning_rate": 2.8397565922920896e-06, + "loss": 0.3322, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 2.920892494929006e-06, + "loss": 0.3295, + "step": 72 + }, + { + "epoch": 0.02, + "learning_rate": 3.0020283975659233e-06, + "loss": 0.3514, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 3.08316430020284e-06, + "loss": 0.3366, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 3.164300202839757e-06, + "loss": 0.3086, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 3.2454361054766736e-06, + "loss": 0.3452, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 3.3265720081135907e-06, + "loss": 0.2971, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 3.4077079107505073e-06, + "loss": 0.3384, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 3.4888438133874244e-06, + "loss": 0.3428, + "step": 86 + }, + { + "epoch": 0.02, + "learning_rate": 3.569979716024341e-06, + "loss": 0.3635, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 3.651115618661258e-06, + "loss": 0.3625, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 3.7322515212981747e-06, + "loss": 0.3502, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 3.8133874239350913e-06, + "loss": 0.3678, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 3.894523326572008e-06, + "loss": 0.3365, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 3.975659229208925e-06, + "loss": 0.3364, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 4.056795131845842e-06, + "loss": 0.3526, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 4.137931034482759e-06, + "loss": 0.3411, + "step": 102 + }, + { + "epoch": 0.03, + "learning_rate": 4.219066937119675e-06, + "loss": 0.3537, + "step": 104 + }, + { + "epoch": 0.03, + "learning_rate": 4.3002028397565924e-06, + "loss": 0.3394, + "step": 106 + }, + { + "epoch": 0.03, + "learning_rate": 4.3813387423935095e-06, + "loss": 0.3474, + "step": 108 + }, + { + "epoch": 0.03, + "learning_rate": 4.4624746450304266e-06, + "loss": 0.3248, + "step": 110 + }, + { + "epoch": 0.03, + "learning_rate": 4.543610547667343e-06, + "loss": 0.3571, + "step": 112 + }, + { + "epoch": 0.03, + "learning_rate": 4.62474645030426e-06, + "loss": 0.3544, + "step": 114 + }, + { + "epoch": 0.03, + "learning_rate": 4.705882352941177e-06, + "loss": 0.3519, + "step": 116 + }, + { + "epoch": 0.03, + "learning_rate": 4.787018255578094e-06, + "loss": 0.3528, + "step": 118 + }, + { + "epoch": 0.03, + "learning_rate": 4.86815415821501e-06, + "loss": 0.3595, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 4.949290060851927e-06, + "loss": 0.3573, + "step": 122 + }, + { + "epoch": 0.03, + "learning_rate": 5.030425963488844e-06, + "loss": 0.3196, + "step": 124 + }, + { + "epoch": 0.03, + "learning_rate": 5.111561866125761e-06, + "loss": 0.3401, + "step": 126 + }, + { + "epoch": 0.03, + "learning_rate": 5.192697768762678e-06, + "loss": 0.3253, + "step": 128 + }, + { + "epoch": 0.03, + "learning_rate": 5.2738336713995955e-06, + "loss": 0.3246, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 5.354969574036512e-06, + "loss": 0.3478, + "step": 132 + }, + { + "epoch": 0.03, + "learning_rate": 5.436105476673429e-06, + "loss": 0.3498, + "step": 134 + }, + { + "epoch": 0.03, + "learning_rate": 5.517241379310345e-06, + "loss": 0.3404, + "step": 136 + }, + { + "epoch": 0.03, + "learning_rate": 5.598377281947263e-06, + "loss": 0.3578, + "step": 138 + }, + { + "epoch": 0.03, + "learning_rate": 5.679513184584179e-06, + "loss": 0.3445, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 5.760649087221096e-06, + "loss": 0.3386, + "step": 142 + }, + { + "epoch": 0.04, + "learning_rate": 5.841784989858012e-06, + "loss": 0.3562, + "step": 144 + }, + { + "epoch": 0.04, + "learning_rate": 5.92292089249493e-06, + "loss": 0.3339, + "step": 146 + }, + { + "epoch": 0.04, + "learning_rate": 6.0040567951318465e-06, + "loss": 0.3415, + "step": 148 + }, + { + "epoch": 0.04, + "learning_rate": 6.0851926977687636e-06, + "loss": 0.3352, + "step": 150 + }, + { + "epoch": 0.04, + "learning_rate": 6.16632860040568e-06, + "loss": 0.3339, + "step": 152 + }, + { + "epoch": 0.04, + "learning_rate": 6.247464503042598e-06, + "loss": 0.3511, + "step": 154 + }, + { + "epoch": 0.04, + "learning_rate": 6.328600405679514e-06, + "loss": 0.3339, + "step": 156 + }, + { + "epoch": 0.04, + "learning_rate": 6.409736308316431e-06, + "loss": 0.312, + "step": 158 + }, + { + "epoch": 0.04, + "learning_rate": 6.490872210953347e-06, + "loss": 0.3313, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 6.572008113590265e-06, + "loss": 0.3529, + "step": 162 + }, + { + "epoch": 0.04, + "learning_rate": 6.653144016227181e-06, + "loss": 0.3632, + "step": 164 + }, + { + "epoch": 0.04, + "learning_rate": 6.734279918864098e-06, + "loss": 0.3218, + "step": 166 + }, + { + "epoch": 0.04, + "learning_rate": 6.815415821501015e-06, + "loss": 0.3444, + "step": 168 + }, + { + "epoch": 0.04, + "learning_rate": 6.896551724137932e-06, + "loss": 0.3473, + "step": 170 + }, + { + "epoch": 0.04, + "learning_rate": 6.977687626774849e-06, + "loss": 0.3323, + "step": 172 + }, + { + "epoch": 0.04, + "learning_rate": 7.058823529411766e-06, + "loss": 0.3382, + "step": 174 + }, + { + "epoch": 0.04, + "learning_rate": 7.139959432048682e-06, + "loss": 0.324, + "step": 176 + }, + { + "epoch": 0.04, + "learning_rate": 7.221095334685599e-06, + "loss": 0.3314, + "step": 178 + }, + { + "epoch": 0.04, + "learning_rate": 7.302231237322516e-06, + "loss": 0.3148, + "step": 180 + }, + { + "epoch": 0.04, + "learning_rate": 7.383367139959433e-06, + "loss": 0.3384, + "step": 182 + }, + { + "epoch": 0.04, + "learning_rate": 7.464503042596349e-06, + "loss": 0.3375, + "step": 184 + }, + { + "epoch": 0.05, + "learning_rate": 7.5456389452332665e-06, + "loss": 0.3501, + "step": 186 + }, + { + "epoch": 0.05, + "learning_rate": 7.626774847870183e-06, + "loss": 0.3491, + "step": 188 + }, + { + "epoch": 0.05, + "learning_rate": 7.7079107505071e-06, + "loss": 0.353, + "step": 190 + }, + { + "epoch": 0.05, + "learning_rate": 7.789046653144016e-06, + "loss": 0.3278, + "step": 192 + }, + { + "epoch": 0.05, + "learning_rate": 7.870182555780935e-06, + "loss": 0.3406, + "step": 194 + }, + { + "epoch": 0.05, + "learning_rate": 7.95131845841785e-06, + "loss": 0.3584, + "step": 196 + }, + { + "epoch": 0.05, + "learning_rate": 8.032454361054767e-06, + "loss": 0.3401, + "step": 198 + }, + { + "epoch": 0.05, + "learning_rate": 8.113590263691684e-06, + "loss": 0.3356, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 8.194726166328601e-06, + "loss": 0.3439, + "step": 202 + }, + { + "epoch": 0.05, + "learning_rate": 8.275862068965518e-06, + "loss": 0.3372, + "step": 204 + }, + { + "epoch": 0.05, + "learning_rate": 8.356997971602435e-06, + "loss": 0.3373, + "step": 206 + }, + { + "epoch": 0.05, + "learning_rate": 8.43813387423935e-06, + "loss": 0.3266, + "step": 208 + }, + { + "epoch": 0.05, + "learning_rate": 8.51926977687627e-06, + "loss": 0.3466, + "step": 210 + }, + { + "epoch": 0.05, + "learning_rate": 8.600405679513185e-06, + "loss": 0.3455, + "step": 212 + }, + { + "epoch": 0.05, + "learning_rate": 8.681541582150102e-06, + "loss": 0.3269, + "step": 214 + }, + { + "epoch": 0.05, + "learning_rate": 8.762677484787019e-06, + "loss": 0.3052, + "step": 216 + }, + { + "epoch": 0.05, + "learning_rate": 8.843813387423936e-06, + "loss": 0.3405, + "step": 218 + }, + { + "epoch": 0.05, + "learning_rate": 8.924949290060853e-06, + "loss": 0.3277, + "step": 220 + }, + { + "epoch": 0.05, + "learning_rate": 9.00608519269777e-06, + "loss": 0.3151, + "step": 222 + }, + { + "epoch": 0.05, + "learning_rate": 9.087221095334686e-06, + "loss": 0.3159, + "step": 224 + }, + { + "epoch": 0.06, + "learning_rate": 9.168356997971604e-06, + "loss": 0.3493, + "step": 226 + }, + { + "epoch": 0.06, + "learning_rate": 9.24949290060852e-06, + "loss": 0.3403, + "step": 228 + }, + { + "epoch": 0.06, + "learning_rate": 9.330628803245437e-06, + "loss": 0.3386, + "step": 230 + }, + { + "epoch": 0.06, + "learning_rate": 9.411764705882354e-06, + "loss": 0.3393, + "step": 232 + }, + { + "epoch": 0.06, + "learning_rate": 9.492900608519271e-06, + "loss": 0.3426, + "step": 234 + }, + { + "epoch": 0.06, + "learning_rate": 9.574036511156188e-06, + "loss": 0.3531, + "step": 236 + }, + { + "epoch": 0.06, + "learning_rate": 9.655172413793105e-06, + "loss": 0.351, + "step": 238 + }, + { + "epoch": 0.06, + "learning_rate": 9.73630831643002e-06, + "loss": 0.3543, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 9.817444219066939e-06, + "loss": 0.3378, + "step": 242 + }, + { + "epoch": 0.06, + "learning_rate": 9.898580121703854e-06, + "loss": 0.3288, + "step": 244 + }, + { + "epoch": 0.06, + "learning_rate": 9.979716024340772e-06, + "loss": 0.3512, + "step": 246 + }, + { + "epoch": 0.06, + "learning_rate": 1.0060851926977689e-05, + "loss": 0.3391, + "step": 248 + }, + { + "epoch": 0.06, + "learning_rate": 1.0141987829614606e-05, + "loss": 0.3461, + "step": 250 + }, + { + "epoch": 0.06, + "learning_rate": 1.0223123732251523e-05, + "loss": 0.3246, + "step": 252 + }, + { + "epoch": 0.06, + "learning_rate": 1.0304259634888438e-05, + "loss": 0.325, + "step": 254 + }, + { + "epoch": 0.06, + "learning_rate": 1.0385395537525355e-05, + "loss": 0.3457, + "step": 256 + }, + { + "epoch": 0.06, + "learning_rate": 1.0466531440162272e-05, + "loss": 0.3072, + "step": 258 + }, + { + "epoch": 0.06, + "learning_rate": 1.0547667342799191e-05, + "loss": 0.3352, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 1.0628803245436106e-05, + "loss": 0.3282, + "step": 262 + }, + { + "epoch": 0.06, + "learning_rate": 1.0709939148073023e-05, + "loss": 0.3274, + "step": 264 + }, + { + "epoch": 0.06, + "learning_rate": 1.079107505070994e-05, + "loss": 0.3467, + "step": 266 + }, + { + "epoch": 0.07, + "learning_rate": 1.0872210953346858e-05, + "loss": 0.3319, + "step": 268 + }, + { + "epoch": 0.07, + "learning_rate": 1.0953346855983773e-05, + "loss": 0.3206, + "step": 270 + }, + { + "epoch": 0.07, + "learning_rate": 1.103448275862069e-05, + "loss": 0.3468, + "step": 272 + }, + { + "epoch": 0.07, + "learning_rate": 1.1115618661257607e-05, + "loss": 0.3423, + "step": 274 + }, + { + "epoch": 0.07, + "learning_rate": 1.1196754563894526e-05, + "loss": 0.3467, + "step": 276 + }, + { + "epoch": 0.07, + "learning_rate": 1.1277890466531441e-05, + "loss": 0.3367, + "step": 278 + }, + { + "epoch": 0.07, + "learning_rate": 1.1359026369168358e-05, + "loss": 0.3399, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 1.1440162271805275e-05, + "loss": 0.33, + "step": 282 + }, + { + "epoch": 0.07, + "learning_rate": 1.1521298174442192e-05, + "loss": 0.3267, + "step": 284 + }, + { + "epoch": 0.07, + "learning_rate": 1.1602434077079108e-05, + "loss": 0.3496, + "step": 286 + }, + { + "epoch": 0.07, + "learning_rate": 1.1683569979716025e-05, + "loss": 0.3487, + "step": 288 + }, + { + "epoch": 0.07, + "learning_rate": 1.1764705882352942e-05, + "loss": 0.3477, + "step": 290 + }, + { + "epoch": 0.07, + "learning_rate": 1.184584178498986e-05, + "loss": 0.3298, + "step": 292 + }, + { + "epoch": 0.07, + "learning_rate": 1.1926977687626774e-05, + "loss": 0.3296, + "step": 294 + }, + { + "epoch": 0.07, + "learning_rate": 1.2008113590263693e-05, + "loss": 0.3371, + "step": 296 + }, + { + "epoch": 0.07, + "learning_rate": 1.208924949290061e-05, + "loss": 0.3444, + "step": 298 + }, + { + "epoch": 0.07, + "learning_rate": 1.2170385395537527e-05, + "loss": 0.3382, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 1.2251521298174443e-05, + "loss": 0.3259, + "step": 302 + }, + { + "epoch": 0.07, + "learning_rate": 1.233265720081136e-05, + "loss": 0.3469, + "step": 304 + }, + { + "epoch": 0.07, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.317, + "step": 306 + }, + { + "epoch": 0.08, + "learning_rate": 1.2494929006085195e-05, + "loss": 0.3263, + "step": 308 + }, + { + "epoch": 0.08, + "learning_rate": 1.2576064908722109e-05, + "loss": 0.3204, + "step": 310 + }, + { + "epoch": 0.08, + "learning_rate": 1.2657200811359028e-05, + "loss": 0.3197, + "step": 312 + }, + { + "epoch": 0.08, + "learning_rate": 1.2738336713995945e-05, + "loss": 0.3275, + "step": 314 + }, + { + "epoch": 0.08, + "learning_rate": 1.2819472616632862e-05, + "loss": 0.3183, + "step": 316 + }, + { + "epoch": 0.08, + "learning_rate": 1.2900608519269777e-05, + "loss": 0.3363, + "step": 318 + }, + { + "epoch": 0.08, + "learning_rate": 1.2981744421906694e-05, + "loss": 0.3328, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 1.3062880324543611e-05, + "loss": 0.3409, + "step": 322 + }, + { + "epoch": 0.08, + "learning_rate": 1.314401622718053e-05, + "loss": 0.3373, + "step": 324 + }, + { + "epoch": 0.08, + "learning_rate": 1.3225152129817444e-05, + "loss": 0.3331, + "step": 326 + }, + { + "epoch": 0.08, + "learning_rate": 1.3306288032454363e-05, + "loss": 0.3247, + "step": 328 + }, + { + "epoch": 0.08, + "learning_rate": 1.338742393509128e-05, + "loss": 0.3305, + "step": 330 + }, + { + "epoch": 0.08, + "learning_rate": 1.3468559837728197e-05, + "loss": 0.3329, + "step": 332 + }, + { + "epoch": 0.08, + "learning_rate": 1.3549695740365112e-05, + "loss": 0.332, + "step": 334 + }, + { + "epoch": 0.08, + "learning_rate": 1.363083164300203e-05, + "loss": 0.3168, + "step": 336 + }, + { + "epoch": 0.08, + "learning_rate": 1.3711967545638946e-05, + "loss": 0.3176, + "step": 338 + }, + { + "epoch": 0.08, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.312, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 1.3874239350912779e-05, + "loss": 0.3378, + "step": 342 + }, + { + "epoch": 0.08, + "learning_rate": 1.3955375253549697e-05, + "loss": 0.338, + "step": 344 + }, + { + "epoch": 0.08, + "learning_rate": 1.4036511156186615e-05, + "loss": 0.3286, + "step": 346 + }, + { + "epoch": 0.08, + "learning_rate": 1.4117647058823532e-05, + "loss": 0.3103, + "step": 348 + }, + { + "epoch": 0.09, + "learning_rate": 1.4198782961460447e-05, + "loss": 0.3186, + "step": 350 + }, + { + "epoch": 0.09, + "learning_rate": 1.4279918864097364e-05, + "loss": 0.3559, + "step": 352 + }, + { + "epoch": 0.09, + "learning_rate": 1.4361054766734281e-05, + "loss": 0.313, + "step": 354 + }, + { + "epoch": 0.09, + "learning_rate": 1.4442190669371198e-05, + "loss": 0.3274, + "step": 356 + }, + { + "epoch": 0.09, + "learning_rate": 1.4523326572008113e-05, + "loss": 0.3427, + "step": 358 + }, + { + "epoch": 0.09, + "learning_rate": 1.4604462474645032e-05, + "loss": 0.3379, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 1.468559837728195e-05, + "loss": 0.3384, + "step": 362 + }, + { + "epoch": 0.09, + "learning_rate": 1.4766734279918866e-05, + "loss": 0.3395, + "step": 364 + }, + { + "epoch": 0.09, + "learning_rate": 1.4847870182555782e-05, + "loss": 0.3361, + "step": 366 + }, + { + "epoch": 0.09, + "learning_rate": 1.4929006085192699e-05, + "loss": 0.3459, + "step": 368 + }, + { + "epoch": 0.09, + "learning_rate": 1.5010141987829616e-05, + "loss": 0.3415, + "step": 370 + }, + { + "epoch": 0.09, + "learning_rate": 1.5091277890466533e-05, + "loss": 0.3399, + "step": 372 + }, + { + "epoch": 0.09, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.3289, + "step": 374 + }, + { + "epoch": 0.09, + "learning_rate": 1.5253549695740365e-05, + "loss": 0.3346, + "step": 376 + }, + { + "epoch": 0.09, + "learning_rate": 1.5334685598377284e-05, + "loss": 0.3278, + "step": 378 + }, + { + "epoch": 0.09, + "learning_rate": 1.54158215010142e-05, + "loss": 0.3178, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 1.5496957403651115e-05, + "loss": 0.3179, + "step": 382 + }, + { + "epoch": 0.09, + "learning_rate": 1.5578093306288032e-05, + "loss": 0.3195, + "step": 384 + }, + { + "epoch": 0.09, + "learning_rate": 1.5659229208924952e-05, + "loss": 0.3376, + "step": 386 + }, + { + "epoch": 0.09, + "learning_rate": 1.574036511156187e-05, + "loss": 0.3371, + "step": 388 + }, + { + "epoch": 0.1, + "learning_rate": 1.5821501014198783e-05, + "loss": 0.3258, + "step": 390 + }, + { + "epoch": 0.1, + "learning_rate": 1.59026369168357e-05, + "loss": 0.3207, + "step": 392 + }, + { + "epoch": 0.1, + "learning_rate": 1.5983772819472617e-05, + "loss": 0.3065, + "step": 394 + }, + { + "epoch": 0.1, + "learning_rate": 1.6064908722109534e-05, + "loss": 0.3562, + "step": 396 + }, + { + "epoch": 0.1, + "learning_rate": 1.614604462474645e-05, + "loss": 0.3228, + "step": 398 + }, + { + "epoch": 0.1, + "learning_rate": 1.622718052738337e-05, + "loss": 0.3368, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 1.6308316430020285e-05, + "loss": 0.3478, + "step": 402 + }, + { + "epoch": 0.1, + "learning_rate": 1.6389452332657203e-05, + "loss": 0.3398, + "step": 404 + }, + { + "epoch": 0.1, + "learning_rate": 1.647058823529412e-05, + "loss": 0.3244, + "step": 406 + }, + { + "epoch": 0.1, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.3294, + "step": 408 + }, + { + "epoch": 0.1, + "learning_rate": 1.6632860040567954e-05, + "loss": 0.3381, + "step": 410 + }, + { + "epoch": 0.1, + "learning_rate": 1.671399594320487e-05, + "loss": 0.3363, + "step": 412 + }, + { + "epoch": 0.1, + "learning_rate": 1.6795131845841784e-05, + "loss": 0.3429, + "step": 414 + }, + { + "epoch": 0.1, + "learning_rate": 1.68762677484787e-05, + "loss": 0.337, + "step": 416 + }, + { + "epoch": 0.1, + "learning_rate": 1.6957403651115622e-05, + "loss": 0.327, + "step": 418 + }, + { + "epoch": 0.1, + "learning_rate": 1.703853955375254e-05, + "loss": 0.3235, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 1.7119675456389453e-05, + "loss": 0.3387, + "step": 422 + }, + { + "epoch": 0.1, + "learning_rate": 1.720081135902637e-05, + "loss": 0.336, + "step": 424 + }, + { + "epoch": 0.1, + "learning_rate": 1.7281947261663287e-05, + "loss": 0.3452, + "step": 426 + }, + { + "epoch": 0.1, + "learning_rate": 1.7363083164300204e-05, + "loss": 0.3181, + "step": 428 + }, + { + "epoch": 0.1, + "learning_rate": 1.744421906693712e-05, + "loss": 0.3369, + "step": 430 + }, + { + "epoch": 0.11, + "learning_rate": 1.7525354969574038e-05, + "loss": 0.3506, + "step": 432 + }, + { + "epoch": 0.11, + "learning_rate": 1.7606490872210955e-05, + "loss": 0.3135, + "step": 434 + }, + { + "epoch": 0.11, + "learning_rate": 1.7687626774847872e-05, + "loss": 0.3127, + "step": 436 + }, + { + "epoch": 0.11, + "learning_rate": 1.776876267748479e-05, + "loss": 0.35, + "step": 438 + }, + { + "epoch": 0.11, + "learning_rate": 1.7849898580121706e-05, + "loss": 0.3423, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.3362, + "step": 442 + }, + { + "epoch": 0.11, + "learning_rate": 1.801217038539554e-05, + "loss": 0.3, + "step": 444 + }, + { + "epoch": 0.11, + "learning_rate": 1.8093306288032454e-05, + "loss": 0.3476, + "step": 446 + }, + { + "epoch": 0.11, + "learning_rate": 1.817444219066937e-05, + "loss": 0.3289, + "step": 448 + }, + { + "epoch": 0.11, + "learning_rate": 1.8255578093306288e-05, + "loss": 0.3418, + "step": 450 + }, + { + "epoch": 0.11, + "learning_rate": 1.833671399594321e-05, + "loss": 0.3152, + "step": 452 + }, + { + "epoch": 0.11, + "learning_rate": 1.8417849898580122e-05, + "loss": 0.3405, + "step": 454 + }, + { + "epoch": 0.11, + "learning_rate": 1.849898580121704e-05, + "loss": 0.3378, + "step": 456 + }, + { + "epoch": 0.11, + "learning_rate": 1.8580121703853956e-05, + "loss": 0.3447, + "step": 458 + }, + { + "epoch": 0.11, + "learning_rate": 1.8661257606490873e-05, + "loss": 0.3415, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 1.874239350912779e-05, + "loss": 0.2988, + "step": 462 + }, + { + "epoch": 0.11, + "learning_rate": 1.8823529411764708e-05, + "loss": 0.3443, + "step": 464 + }, + { + "epoch": 0.11, + "learning_rate": 1.8904665314401625e-05, + "loss": 0.3468, + "step": 466 + }, + { + "epoch": 0.11, + "learning_rate": 1.8985801217038542e-05, + "loss": 0.3389, + "step": 468 + }, + { + "epoch": 0.11, + "learning_rate": 1.906693711967546e-05, + "loss": 0.3202, + "step": 470 + }, + { + "epoch": 0.12, + "learning_rate": 1.9148073022312376e-05, + "loss": 0.3352, + "step": 472 + }, + { + "epoch": 0.12, + "learning_rate": 1.9229208924949293e-05, + "loss": 0.3316, + "step": 474 + }, + { + "epoch": 0.12, + "learning_rate": 1.931034482758621e-05, + "loss": 0.3248, + "step": 476 + }, + { + "epoch": 0.12, + "learning_rate": 1.9391480730223124e-05, + "loss": 0.3419, + "step": 478 + }, + { + "epoch": 0.12, + "learning_rate": 1.947261663286004e-05, + "loss": 0.3124, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 1.9553752535496958e-05, + "loss": 0.4068, + "step": 482 + }, + { + "epoch": 0.12, + "learning_rate": 1.9634888438133878e-05, + "loss": 0.3444, + "step": 484 + }, + { + "epoch": 0.12, + "learning_rate": 1.9716024340770792e-05, + "loss": 0.3553, + "step": 486 + }, + { + "epoch": 0.12, + "learning_rate": 1.979716024340771e-05, + "loss": 0.3286, + "step": 488 + }, + { + "epoch": 0.12, + "learning_rate": 1.9878296146044626e-05, + "loss": 0.3579, + "step": 490 + }, + { + "epoch": 0.12, + "learning_rate": 1.9959432048681543e-05, + "loss": 0.3307, + "step": 492 + }, + { + "epoch": 0.12, + "learning_rate": 1.999999980536544e-05, + "loss": 0.3232, + "step": 494 + }, + { + "epoch": 0.12, + "learning_rate": 1.9999998248289005e-05, + "loss": 0.3437, + "step": 496 + }, + { + "epoch": 0.12, + "learning_rate": 1.9999995134136373e-05, + "loss": 0.328, + "step": 498 + }, + { + "epoch": 0.12, + "learning_rate": 1.9999990462908037e-05, + "loss": 0.3468, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 1.9999984234604716e-05, + "loss": 0.3406, + "step": 502 + }, + { + "epoch": 0.12, + "learning_rate": 1.9999976449227386e-05, + "loss": 0.3182, + "step": 504 + }, + { + "epoch": 0.12, + "learning_rate": 1.9999967106777253e-05, + "loss": 0.3249, + "step": 506 + }, + { + "epoch": 0.12, + "learning_rate": 1.9999956207255773e-05, + "loss": 0.3209, + "step": 508 + }, + { + "epoch": 0.12, + "learning_rate": 1.9999943750664653e-05, + "loss": 0.3341, + "step": 510 + }, + { + "epoch": 0.12, + "learning_rate": 1.9999929737005818e-05, + "loss": 0.3335, + "step": 512 + }, + { + "epoch": 0.13, + "learning_rate": 1.999991416628146e-05, + "loss": 0.3344, + "step": 514 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999897038494e-05, + "loss": 0.3374, + "step": 516 + }, + { + "epoch": 0.13, + "learning_rate": 1.999987835364611e-05, + "loss": 0.3262, + "step": 518 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999858111740688e-05, + "loss": 0.3144, + "step": 520 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999836312780895e-05, + "loss": 0.3221, + "step": 522 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999812956770125e-05, + "loss": 0.3367, + "step": 524 + }, + { + "epoch": 0.13, + "learning_rate": 1.999978804371201e-05, + "loss": 0.3143, + "step": 526 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999761573610432e-05, + "loss": 0.3397, + "step": 528 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999733546469514e-05, + "loss": 0.3183, + "step": 530 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999703962293612e-05, + "loss": 0.3474, + "step": 532 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999672821087347e-05, + "loss": 0.3183, + "step": 534 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999640122855556e-05, + "loss": 0.3377, + "step": 536 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999605867603333e-05, + "loss": 0.3219, + "step": 538 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999570055336014e-05, + "loss": 0.3197, + "step": 540 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999532686059175e-05, + "loss": 0.3282, + "step": 542 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999493759778635e-05, + "loss": 0.3137, + "step": 544 + }, + { + "epoch": 0.13, + "learning_rate": 1.999945327650045e-05, + "loss": 0.3431, + "step": 546 + }, + { + "epoch": 0.13, + "learning_rate": 1.999941123623093e-05, + "loss": 0.3354, + "step": 548 + }, + { + "epoch": 0.13, + "learning_rate": 1.999936763897662e-05, + "loss": 0.3173, + "step": 550 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999322484744305e-05, + "loss": 0.3322, + "step": 552 + }, + { + "epoch": 0.13, + "learning_rate": 1.9999275773541016e-05, + "loss": 0.3082, + "step": 554 + }, + { + "epoch": 0.14, + "learning_rate": 1.9999227505374033e-05, + "loss": 0.3208, + "step": 556 + }, + { + "epoch": 0.14, + "learning_rate": 1.9999177680250863e-05, + "loss": 0.3308, + "step": 558 + }, + { + "epoch": 0.14, + "learning_rate": 1.999912629817927e-05, + "loss": 0.3151, + "step": 560 + }, + { + "epoch": 0.14, + "learning_rate": 1.999907335916725e-05, + "loss": 0.295, + "step": 562 + }, + { + "epoch": 0.14, + "learning_rate": 1.999901886322305e-05, + "loss": 0.3134, + "step": 564 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998962810355152e-05, + "loss": 0.3051, + "step": 566 + }, + { + "epoch": 0.14, + "learning_rate": 1.999890520057229e-05, + "loss": 0.327, + "step": 568 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998846033883427e-05, + "loss": 0.3553, + "step": 570 + }, + { + "epoch": 0.14, + "learning_rate": 1.999878531029778e-05, + "loss": 0.3285, + "step": 572 + }, + { + "epoch": 0.14, + "learning_rate": 1.99987230298248e-05, + "loss": 0.2928, + "step": 574 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998659192474193e-05, + "loss": 0.3042, + "step": 576 + }, + { + "epoch": 0.14, + "learning_rate": 1.999859379825589e-05, + "loss": 0.3498, + "step": 578 + }, + { + "epoch": 0.14, + "learning_rate": 1.999852684718008e-05, + "loss": 0.3228, + "step": 580 + }, + { + "epoch": 0.14, + "learning_rate": 1.999845833925718e-05, + "loss": 0.3335, + "step": 582 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998388274497864e-05, + "loss": 0.3374, + "step": 584 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998316652913038e-05, + "loss": 0.3328, + "step": 586 + }, + { + "epoch": 0.14, + "learning_rate": 1.999824347451386e-05, + "loss": 0.3158, + "step": 588 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998168739311715e-05, + "loss": 0.2963, + "step": 590 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998092447318247e-05, + "loss": 0.3467, + "step": 592 + }, + { + "epoch": 0.14, + "learning_rate": 1.9998014598545335e-05, + "loss": 0.3319, + "step": 594 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997935193005093e-05, + "loss": 0.3027, + "step": 596 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997854230709896e-05, + "loss": 0.32, + "step": 598 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997771711672343e-05, + "loss": 0.315, + "step": 600 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997687635905284e-05, + "loss": 0.331, + "step": 602 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997602003421815e-05, + "loss": 0.3158, + "step": 604 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997514814235264e-05, + "loss": 0.2994, + "step": 606 + }, + { + "epoch": 0.15, + "learning_rate": 1.999742606835921e-05, + "loss": 0.3224, + "step": 608 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997335765807463e-05, + "loss": 0.3146, + "step": 610 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997243906594098e-05, + "loss": 0.3013, + "step": 612 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997150490733412e-05, + "loss": 0.3083, + "step": 614 + }, + { + "epoch": 0.15, + "learning_rate": 1.9997055518239947e-05, + "loss": 0.3039, + "step": 616 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996958989128498e-05, + "loss": 0.3265, + "step": 618 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996860903414085e-05, + "loss": 0.3186, + "step": 620 + }, + { + "epoch": 0.15, + "learning_rate": 1.999676126111199e-05, + "loss": 0.3373, + "step": 622 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996660062237723e-05, + "loss": 0.2947, + "step": 624 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996557306807046e-05, + "loss": 0.3157, + "step": 626 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996452994835954e-05, + "loss": 0.3326, + "step": 628 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996347126340692e-05, + "loss": 0.3033, + "step": 630 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996239701337744e-05, + "loss": 0.3376, + "step": 632 + }, + { + "epoch": 0.15, + "learning_rate": 1.999613071984384e-05, + "loss": 0.3279, + "step": 634 + }, + { + "epoch": 0.15, + "learning_rate": 1.9996020181875942e-05, + "loss": 0.3207, + "step": 636 + }, + { + "epoch": 0.16, + "learning_rate": 1.9995908087451264e-05, + "loss": 0.3149, + "step": 638 + }, + { + "epoch": 0.16, + "learning_rate": 1.9995794436587266e-05, + "loss": 0.3375, + "step": 640 + }, + { + "epoch": 0.16, + "learning_rate": 1.999567922930164e-05, + "loss": 0.3287, + "step": 642 + }, + { + "epoch": 0.16, + "learning_rate": 1.9995562465612322e-05, + "loss": 0.3353, + "step": 644 + }, + { + "epoch": 0.16, + "learning_rate": 1.9995444145537494e-05, + "loss": 0.3311, + "step": 646 + }, + { + "epoch": 0.16, + "learning_rate": 1.9995324269095585e-05, + "loss": 0.3122, + "step": 648 + }, + { + "epoch": 0.16, + "learning_rate": 1.9995202836305255e-05, + "loss": 0.2925, + "step": 650 + }, + { + "epoch": 0.16, + "learning_rate": 1.999507984718541e-05, + "loss": 0.3346, + "step": 652 + }, + { + "epoch": 0.16, + "learning_rate": 1.999495530175521e-05, + "loss": 0.3232, + "step": 654 + }, + { + "epoch": 0.16, + "learning_rate": 1.9994829200034037e-05, + "loss": 0.3189, + "step": 656 + }, + { + "epoch": 0.16, + "learning_rate": 1.9994701542041533e-05, + "loss": 0.3218, + "step": 658 + }, + { + "epoch": 0.16, + "learning_rate": 1.999457232779757e-05, + "loss": 0.3283, + "step": 660 + }, + { + "epoch": 0.16, + "learning_rate": 1.9994441557322273e-05, + "loss": 0.3224, + "step": 662 + }, + { + "epoch": 0.16, + "learning_rate": 1.9994309230636003e-05, + "loss": 0.3298, + "step": 664 + }, + { + "epoch": 0.16, + "learning_rate": 1.999417534775936e-05, + "loss": 0.3048, + "step": 666 + }, + { + "epoch": 0.16, + "learning_rate": 1.9994039908713194e-05, + "loss": 0.3109, + "step": 668 + }, + { + "epoch": 0.16, + "learning_rate": 1.9993902913518593e-05, + "loss": 0.3329, + "step": 670 + }, + { + "epoch": 0.16, + "learning_rate": 1.999376436219689e-05, + "loss": 0.3356, + "step": 672 + }, + { + "epoch": 0.16, + "learning_rate": 1.9993624254769655e-05, + "loss": 0.314, + "step": 674 + }, + { + "epoch": 0.16, + "learning_rate": 1.9993482591258706e-05, + "loss": 0.3093, + "step": 676 + }, + { + "epoch": 0.17, + "learning_rate": 1.99933393716861e-05, + "loss": 0.3187, + "step": 678 + }, + { + "epoch": 0.17, + "learning_rate": 1.999319459607414e-05, + "loss": 0.3038, + "step": 680 + }, + { + "epoch": 0.17, + "learning_rate": 1.9993048264445368e-05, + "loss": 0.3096, + "step": 682 + }, + { + "epoch": 0.17, + "learning_rate": 1.999290037682257e-05, + "loss": 0.3198, + "step": 684 + }, + { + "epoch": 0.17, + "learning_rate": 1.9992750933228768e-05, + "loss": 0.3216, + "step": 686 + }, + { + "epoch": 0.17, + "learning_rate": 1.9992599933687233e-05, + "loss": 0.3128, + "step": 688 + }, + { + "epoch": 0.17, + "learning_rate": 1.999244737822148e-05, + "loss": 0.3169, + "step": 690 + }, + { + "epoch": 0.17, + "learning_rate": 1.999229326685526e-05, + "loss": 0.3119, + "step": 692 + }, + { + "epoch": 0.17, + "learning_rate": 1.9992137599612573e-05, + "loss": 0.3272, + "step": 694 + }, + { + "epoch": 0.17, + "learning_rate": 1.999198037651765e-05, + "loss": 0.3413, + "step": 696 + }, + { + "epoch": 0.17, + "learning_rate": 1.9991821597594983e-05, + "loss": 0.3305, + "step": 698 + }, + { + "epoch": 0.17, + "learning_rate": 1.999166126286929e-05, + "loss": 0.3104, + "step": 700 + }, + { + "epoch": 0.17, + "learning_rate": 1.999149937236553e-05, + "loss": 0.3277, + "step": 702 + }, + { + "epoch": 0.17, + "learning_rate": 1.9991335926108923e-05, + "loss": 0.3228, + "step": 704 + }, + { + "epoch": 0.17, + "learning_rate": 1.9991170924124906e-05, + "loss": 0.3249, + "step": 706 + }, + { + "epoch": 0.17, + "learning_rate": 1.999100436643918e-05, + "loss": 0.3304, + "step": 708 + }, + { + "epoch": 0.17, + "learning_rate": 1.9990836253077677e-05, + "loss": 0.3435, + "step": 710 + }, + { + "epoch": 0.17, + "learning_rate": 1.999066658406657e-05, + "loss": 0.3404, + "step": 712 + }, + { + "epoch": 0.17, + "learning_rate": 1.9990495359432286e-05, + "loss": 0.3116, + "step": 714 + }, + { + "epoch": 0.17, + "learning_rate": 1.9990322579201476e-05, + "loss": 0.3237, + "step": 716 + }, + { + "epoch": 0.17, + "learning_rate": 1.9990148243401048e-05, + "loss": 0.3107, + "step": 718 + }, + { + "epoch": 0.18, + "learning_rate": 1.998997235205815e-05, + "loss": 0.3326, + "step": 720 + }, + { + "epoch": 0.18, + "learning_rate": 1.9989794905200167e-05, + "loss": 0.3296, + "step": 722 + }, + { + "epoch": 0.18, + "learning_rate": 1.9989615902854726e-05, + "loss": 0.3097, + "step": 724 + }, + { + "epoch": 0.18, + "learning_rate": 1.9989435345049704e-05, + "loss": 0.3424, + "step": 726 + }, + { + "epoch": 0.18, + "learning_rate": 1.998925323181321e-05, + "loss": 0.3295, + "step": 728 + }, + { + "epoch": 0.18, + "learning_rate": 1.9989069563173606e-05, + "loss": 0.3117, + "step": 730 + }, + { + "epoch": 0.18, + "learning_rate": 1.9988884339159487e-05, + "loss": 0.3138, + "step": 732 + }, + { + "epoch": 0.18, + "learning_rate": 1.9988697559799696e-05, + "loss": 0.3237, + "step": 734 + }, + { + "epoch": 0.18, + "learning_rate": 1.9988509225123317e-05, + "loss": 0.2955, + "step": 736 + }, + { + "epoch": 0.18, + "learning_rate": 1.998831933515967e-05, + "loss": 0.3375, + "step": 738 + }, + { + "epoch": 0.18, + "learning_rate": 1.9988127889938324e-05, + "loss": 0.3289, + "step": 740 + }, + { + "epoch": 0.18, + "learning_rate": 1.998793488948909e-05, + "loss": 0.3463, + "step": 742 + }, + { + "epoch": 0.18, + "learning_rate": 1.998774033384202e-05, + "loss": 0.3198, + "step": 744 + }, + { + "epoch": 0.18, + "learning_rate": 1.9987544223027412e-05, + "loss": 0.3082, + "step": 746 + }, + { + "epoch": 0.18, + "learning_rate": 1.9987346557075792e-05, + "loss": 0.3383, + "step": 748 + }, + { + "epoch": 0.18, + "learning_rate": 1.9987147336017945e-05, + "loss": 0.325, + "step": 750 + }, + { + "epoch": 0.18, + "learning_rate": 1.998694655988489e-05, + "loss": 0.3152, + "step": 752 + }, + { + "epoch": 0.18, + "learning_rate": 1.9986744228707888e-05, + "loss": 0.3224, + "step": 754 + }, + { + "epoch": 0.18, + "learning_rate": 1.9986540342518445e-05, + "loss": 0.3282, + "step": 756 + }, + { + "epoch": 0.18, + "learning_rate": 1.9986334901348307e-05, + "loss": 0.3152, + "step": 758 + }, + { + "epoch": 0.19, + "learning_rate": 1.9986127905229463e-05, + "loss": 0.3278, + "step": 760 + }, + { + "epoch": 0.19, + "learning_rate": 1.9985919354194142e-05, + "loss": 0.3435, + "step": 762 + }, + { + "epoch": 0.19, + "learning_rate": 1.9985709248274822e-05, + "loss": 0.3143, + "step": 764 + }, + { + "epoch": 0.19, + "learning_rate": 1.9985497587504213e-05, + "loss": 0.326, + "step": 766 + }, + { + "epoch": 0.19, + "learning_rate": 1.9985284371915273e-05, + "loss": 0.3432, + "step": 768 + }, + { + "epoch": 0.19, + "learning_rate": 1.99850696015412e-05, + "loss": 0.3417, + "step": 770 + }, + { + "epoch": 0.19, + "learning_rate": 1.9984853276415444e-05, + "loss": 0.3378, + "step": 772 + }, + { + "epoch": 0.19, + "learning_rate": 1.998463539657168e-05, + "loss": 0.324, + "step": 774 + }, + { + "epoch": 0.19, + "learning_rate": 1.9984415962043835e-05, + "loss": 0.3303, + "step": 776 + }, + { + "epoch": 0.19, + "learning_rate": 1.9984194972866076e-05, + "loss": 0.2976, + "step": 778 + }, + { + "epoch": 0.19, + "learning_rate": 1.9983972429072814e-05, + "loss": 0.3077, + "step": 780 + }, + { + "epoch": 0.19, + "learning_rate": 1.9983748330698703e-05, + "loss": 0.3179, + "step": 782 + }, + { + "epoch": 0.19, + "learning_rate": 1.9983522677778634e-05, + "loss": 0.3089, + "step": 784 + }, + { + "epoch": 0.19, + "learning_rate": 1.9983295470347745e-05, + "loss": 0.3315, + "step": 786 + }, + { + "epoch": 0.19, + "learning_rate": 1.998306670844141e-05, + "loss": 0.3128, + "step": 788 + }, + { + "epoch": 0.19, + "learning_rate": 1.998283639209525e-05, + "loss": 0.3377, + "step": 790 + }, + { + "epoch": 0.19, + "learning_rate": 1.9982604521345132e-05, + "loss": 0.3234, + "step": 792 + }, + { + "epoch": 0.19, + "learning_rate": 1.9982371096227155e-05, + "loss": 0.3373, + "step": 794 + }, + { + "epoch": 0.19, + "learning_rate": 1.9982136116777666e-05, + "loss": 0.3241, + "step": 796 + }, + { + "epoch": 0.19, + "learning_rate": 1.9981899583033255e-05, + "loss": 0.3161, + "step": 798 + }, + { + "epoch": 0.19, + "learning_rate": 1.998166149503075e-05, + "loss": 0.3227, + "step": 800 + }, + { + "epoch": 0.2, + "learning_rate": 1.9981421852807227e-05, + "loss": 0.3276, + "step": 802 + }, + { + "epoch": 0.2, + "learning_rate": 1.9981180656399993e-05, + "loss": 0.3242, + "step": 804 + }, + { + "epoch": 0.2, + "learning_rate": 1.998093790584661e-05, + "loss": 0.3239, + "step": 806 + }, + { + "epoch": 0.2, + "learning_rate": 1.9980693601184874e-05, + "loss": 0.3241, + "step": 808 + }, + { + "epoch": 0.2, + "learning_rate": 1.9980447742452823e-05, + "loss": 0.3365, + "step": 810 + }, + { + "epoch": 0.2, + "learning_rate": 1.9980200329688742e-05, + "loss": 0.3305, + "step": 812 + }, + { + "epoch": 0.2, + "learning_rate": 1.997995136293116e-05, + "loss": 0.3351, + "step": 814 + }, + { + "epoch": 0.2, + "learning_rate": 1.9979700842218833e-05, + "loss": 0.3169, + "step": 816 + }, + { + "epoch": 0.2, + "learning_rate": 1.9979448767590776e-05, + "loss": 0.3424, + "step": 818 + }, + { + "epoch": 0.2, + "learning_rate": 1.9979195139086235e-05, + "loss": 0.3177, + "step": 820 + }, + { + "epoch": 0.2, + "learning_rate": 1.99789399567447e-05, + "loss": 0.3276, + "step": 822 + }, + { + "epoch": 0.2, + "learning_rate": 1.9978683220605912e-05, + "loss": 0.335, + "step": 824 + }, + { + "epoch": 0.2, + "learning_rate": 1.9978424930709842e-05, + "loss": 0.3291, + "step": 826 + }, + { + "epoch": 0.2, + "learning_rate": 1.997816508709671e-05, + "loss": 0.3103, + "step": 828 + }, + { + "epoch": 0.2, + "learning_rate": 1.9977903689806975e-05, + "loss": 0.3105, + "step": 830 + }, + { + "epoch": 0.2, + "learning_rate": 1.9977640738881337e-05, + "loss": 0.3115, + "step": 832 + }, + { + "epoch": 0.2, + "learning_rate": 1.997737623436074e-05, + "loss": 0.3296, + "step": 834 + }, + { + "epoch": 0.2, + "learning_rate": 1.997711017628637e-05, + "loss": 0.3187, + "step": 836 + }, + { + "epoch": 0.2, + "learning_rate": 1.9976842564699654e-05, + "loss": 0.299, + "step": 838 + }, + { + "epoch": 0.2, + "learning_rate": 1.997657339964226e-05, + "loss": 0.3225, + "step": 840 + }, + { + "epoch": 0.21, + "learning_rate": 1.9976302681156103e-05, + "loss": 0.3118, + "step": 842 + }, + { + "epoch": 0.21, + "learning_rate": 1.997603040928333e-05, + "loss": 0.3034, + "step": 844 + }, + { + "epoch": 0.21, + "learning_rate": 1.997575658406634e-05, + "loss": 0.2916, + "step": 846 + }, + { + "epoch": 0.21, + "learning_rate": 1.997548120554777e-05, + "loss": 0.3229, + "step": 848 + }, + { + "epoch": 0.21, + "learning_rate": 1.9975204273770497e-05, + "loss": 0.3357, + "step": 850 + }, + { + "epoch": 0.21, + "learning_rate": 1.9974925788777642e-05, + "loss": 0.3207, + "step": 852 + }, + { + "epoch": 0.21, + "learning_rate": 1.9974645750612568e-05, + "loss": 0.3423, + "step": 854 + }, + { + "epoch": 0.21, + "learning_rate": 1.9974364159318876e-05, + "loss": 0.3072, + "step": 856 + }, + { + "epoch": 0.21, + "learning_rate": 1.9974081014940415e-05, + "loss": 0.3288, + "step": 858 + }, + { + "epoch": 0.21, + "learning_rate": 1.997379631752127e-05, + "loss": 0.3285, + "step": 860 + }, + { + "epoch": 0.21, + "learning_rate": 1.9973510067105774e-05, + "loss": 0.307, + "step": 862 + }, + { + "epoch": 0.21, + "learning_rate": 1.9973222263738495e-05, + "loss": 0.3245, + "step": 864 + }, + { + "epoch": 0.21, + "learning_rate": 1.997293290746425e-05, + "loss": 0.3332, + "step": 866 + }, + { + "epoch": 0.21, + "learning_rate": 1.9972641998328086e-05, + "loss": 0.3261, + "step": 868 + }, + { + "epoch": 0.21, + "learning_rate": 1.9972349536375312e-05, + "loss": 0.3223, + "step": 870 + }, + { + "epoch": 0.21, + "learning_rate": 1.997205552165146e-05, + "loss": 0.3134, + "step": 872 + }, + { + "epoch": 0.21, + "learning_rate": 1.9971759954202306e-05, + "loss": 0.3199, + "step": 874 + }, + { + "epoch": 0.21, + "learning_rate": 1.9971462834073878e-05, + "loss": 0.3102, + "step": 876 + }, + { + "epoch": 0.21, + "learning_rate": 1.997116416131244e-05, + "loss": 0.3278, + "step": 878 + }, + { + "epoch": 0.21, + "learning_rate": 1.9970863935964496e-05, + "loss": 0.3101, + "step": 880 + }, + { + "epoch": 0.21, + "learning_rate": 1.9970562158076793e-05, + "loss": 0.3196, + "step": 882 + }, + { + "epoch": 0.22, + "learning_rate": 1.997025882769632e-05, + "loss": 0.2994, + "step": 884 + }, + { + "epoch": 0.22, + "learning_rate": 1.996995394487031e-05, + "loss": 0.3033, + "step": 886 + }, + { + "epoch": 0.22, + "learning_rate": 1.9969647509646234e-05, + "loss": 0.3227, + "step": 888 + }, + { + "epoch": 0.22, + "learning_rate": 1.99693395220718e-05, + "loss": 0.3207, + "step": 890 + }, + { + "epoch": 0.22, + "learning_rate": 1.9969029982194978e-05, + "loss": 0.3291, + "step": 892 + }, + { + "epoch": 0.22, + "learning_rate": 1.9968718890063952e-05, + "loss": 0.3326, + "step": 894 + }, + { + "epoch": 0.22, + "learning_rate": 1.9968406245727175e-05, + "loss": 0.309, + "step": 896 + }, + { + "epoch": 0.22, + "learning_rate": 1.9968092049233317e-05, + "loss": 0.3268, + "step": 898 + }, + { + "epoch": 0.22, + "learning_rate": 1.9967776300631302e-05, + "loss": 0.3132, + "step": 900 + }, + { + "epoch": 0.22, + "learning_rate": 1.9967458999970302e-05, + "loss": 0.3, + "step": 902 + }, + { + "epoch": 0.22, + "learning_rate": 1.9967140147299714e-05, + "loss": 0.3339, + "step": 904 + }, + { + "epoch": 0.22, + "learning_rate": 1.996681974266919e-05, + "loss": 0.3136, + "step": 906 + }, + { + "epoch": 0.22, + "learning_rate": 1.996649778612862e-05, + "loss": 0.3094, + "step": 908 + }, + { + "epoch": 0.22, + "learning_rate": 1.9966174277728135e-05, + "loss": 0.3212, + "step": 910 + }, + { + "epoch": 0.22, + "learning_rate": 1.9965849217518107e-05, + "loss": 0.2945, + "step": 912 + }, + { + "epoch": 0.22, + "learning_rate": 1.996552260554915e-05, + "loss": 0.3019, + "step": 914 + }, + { + "epoch": 0.22, + "learning_rate": 1.996519444187212e-05, + "loss": 0.3295, + "step": 916 + }, + { + "epoch": 0.22, + "learning_rate": 1.9964864726538117e-05, + "loss": 0.3355, + "step": 918 + }, + { + "epoch": 0.22, + "learning_rate": 1.9964533459598473e-05, + "loss": 0.3212, + "step": 920 + }, + { + "epoch": 0.22, + "learning_rate": 1.996420064110478e-05, + "loss": 0.3067, + "step": 922 + }, + { + "epoch": 0.23, + "learning_rate": 1.9963866271108854e-05, + "loss": 0.3124, + "step": 924 + }, + { + "epoch": 0.23, + "learning_rate": 1.9963530349662754e-05, + "loss": 0.3017, + "step": 926 + }, + { + "epoch": 0.23, + "learning_rate": 1.9963192876818797e-05, + "loss": 0.3132, + "step": 928 + }, + { + "epoch": 0.23, + "learning_rate": 1.996285385262952e-05, + "loss": 0.31, + "step": 930 + }, + { + "epoch": 0.23, + "learning_rate": 1.996251327714772e-05, + "loss": 0.3187, + "step": 932 + }, + { + "epoch": 0.23, + "learning_rate": 1.9962171150426418e-05, + "loss": 0.323, + "step": 934 + }, + { + "epoch": 0.23, + "learning_rate": 1.996182747251889e-05, + "loss": 0.3116, + "step": 936 + }, + { + "epoch": 0.23, + "learning_rate": 1.9961482243478654e-05, + "loss": 0.3026, + "step": 938 + }, + { + "epoch": 0.23, + "learning_rate": 1.996113546335946e-05, + "loss": 0.3032, + "step": 940 + }, + { + "epoch": 0.23, + "learning_rate": 1.9960787132215305e-05, + "loss": 0.2994, + "step": 942 + }, + { + "epoch": 0.23, + "learning_rate": 1.9960437250100427e-05, + "loss": 0.325, + "step": 944 + }, + { + "epoch": 0.23, + "learning_rate": 1.99600858170693e-05, + "loss": 0.3098, + "step": 946 + }, + { + "epoch": 0.23, + "learning_rate": 1.9959732833176656e-05, + "loss": 0.3191, + "step": 948 + }, + { + "epoch": 0.23, + "learning_rate": 1.995937829847745e-05, + "loss": 0.3162, + "step": 950 + }, + { + "epoch": 0.23, + "learning_rate": 1.995902221302689e-05, + "loss": 0.2874, + "step": 952 + }, + { + "epoch": 0.23, + "learning_rate": 1.9958664576880412e-05, + "loss": 0.3069, + "step": 954 + }, + { + "epoch": 0.23, + "learning_rate": 1.9958305390093714e-05, + "loss": 0.3371, + "step": 956 + }, + { + "epoch": 0.23, + "learning_rate": 1.9957944652722716e-05, + "loss": 0.3269, + "step": 958 + }, + { + "epoch": 0.23, + "learning_rate": 1.995758236482359e-05, + "loss": 0.3234, + "step": 960 + }, + { + "epoch": 0.23, + "learning_rate": 1.995721852645275e-05, + "loss": 0.2994, + "step": 962 + }, + { + "epoch": 0.23, + "learning_rate": 1.9956853137666842e-05, + "loss": 0.2938, + "step": 964 + }, + { + "epoch": 0.24, + "learning_rate": 1.9956486198522767e-05, + "loss": 0.3126, + "step": 966 + }, + { + "epoch": 0.24, + "learning_rate": 1.995611770907766e-05, + "loss": 0.32, + "step": 968 + }, + { + "epoch": 0.24, + "learning_rate": 1.9955747669388893e-05, + "loss": 0.3068, + "step": 970 + }, + { + "epoch": 0.24, + "learning_rate": 1.9955376079514083e-05, + "loss": 0.3029, + "step": 972 + }, + { + "epoch": 0.24, + "learning_rate": 1.9955002939511093e-05, + "loss": 0.3227, + "step": 974 + }, + { + "epoch": 0.24, + "learning_rate": 1.9954628249438023e-05, + "loss": 0.2945, + "step": 976 + }, + { + "epoch": 0.24, + "learning_rate": 1.9954252009353217e-05, + "loss": 0.325, + "step": 978 + }, + { + "epoch": 0.24, + "learning_rate": 1.9953874219315256e-05, + "loss": 0.3219, + "step": 980 + }, + { + "epoch": 0.24, + "learning_rate": 1.9953494879382963e-05, + "loss": 0.315, + "step": 982 + }, + { + "epoch": 0.24, + "learning_rate": 1.995311398961541e-05, + "loss": 0.3171, + "step": 984 + }, + { + "epoch": 0.24, + "learning_rate": 1.9952731550071894e-05, + "loss": 0.3189, + "step": 986 + }, + { + "epoch": 0.24, + "learning_rate": 1.9952347560811977e-05, + "loss": 0.3194, + "step": 988 + }, + { + "epoch": 0.24, + "learning_rate": 1.995196202189544e-05, + "loss": 0.3283, + "step": 990 + }, + { + "epoch": 0.24, + "learning_rate": 1.995157493338232e-05, + "loss": 0.3157, + "step": 992 + }, + { + "epoch": 0.24, + "learning_rate": 1.9951186295332882e-05, + "loss": 0.3113, + "step": 994 + }, + { + "epoch": 0.24, + "learning_rate": 1.9950796107807648e-05, + "loss": 0.309, + "step": 996 + }, + { + "epoch": 0.24, + "learning_rate": 1.9950404370867368e-05, + "loss": 0.2968, + "step": 998 + }, + { + "epoch": 0.24, + "learning_rate": 1.9950011084573042e-05, + "loss": 0.3049, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 1.9949616248985904e-05, + "loss": 0.3273, + "step": 1002 + }, + { + "epoch": 0.24, + "learning_rate": 1.994921986416744e-05, + "loss": 0.3085, + "step": 1004 + }, + { + "epoch": 0.25, + "learning_rate": 1.9948821930179357e-05, + "loss": 0.3198, + "step": 1006 + }, + { + "epoch": 0.25, + "learning_rate": 1.9948422447083628e-05, + "loss": 0.292, + "step": 1008 + }, + { + "epoch": 0.25, + "learning_rate": 1.994802141494245e-05, + "loss": 0.3178, + "step": 1010 + }, + { + "epoch": 0.25, + "learning_rate": 1.994761883381827e-05, + "loss": 0.3165, + "step": 1012 + }, + { + "epoch": 0.25, + "learning_rate": 1.9947214703773773e-05, + "loss": 0.2986, + "step": 1014 + }, + { + "epoch": 0.25, + "learning_rate": 1.9946809024871884e-05, + "loss": 0.3241, + "step": 1016 + }, + { + "epoch": 0.25, + "learning_rate": 1.9946401797175767e-05, + "loss": 0.3061, + "step": 1018 + }, + { + "epoch": 0.25, + "learning_rate": 1.9945993020748834e-05, + "loss": 0.3012, + "step": 1020 + }, + { + "epoch": 0.25, + "learning_rate": 1.9945582695654738e-05, + "loss": 0.3203, + "step": 1022 + }, + { + "epoch": 0.25, + "learning_rate": 1.994517082195736e-05, + "loss": 0.3154, + "step": 1024 + }, + { + "epoch": 0.25, + "learning_rate": 1.9944757399720843e-05, + "loss": 0.323, + "step": 1026 + }, + { + "epoch": 0.25, + "learning_rate": 1.994434242900955e-05, + "loss": 0.3123, + "step": 1028 + }, + { + "epoch": 0.25, + "learning_rate": 1.9943925909888103e-05, + "loss": 0.3145, + "step": 1030 + }, + { + "epoch": 0.25, + "learning_rate": 1.9943507842421357e-05, + "loss": 0.3033, + "step": 1032 + }, + { + "epoch": 0.25, + "learning_rate": 1.99430882266744e-05, + "loss": 0.3297, + "step": 1034 + }, + { + "epoch": 0.25, + "learning_rate": 1.994266706271258e-05, + "loss": 0.3268, + "step": 1036 + }, + { + "epoch": 0.25, + "learning_rate": 1.9942244350601462e-05, + "loss": 0.3109, + "step": 1038 + }, + { + "epoch": 0.25, + "learning_rate": 1.994182009040688e-05, + "loss": 0.2908, + "step": 1040 + }, + { + "epoch": 0.25, + "learning_rate": 1.9941394282194887e-05, + "loss": 0.3152, + "step": 1042 + }, + { + "epoch": 0.25, + "learning_rate": 1.9940966926031788e-05, + "loss": 0.29, + "step": 1044 + }, + { + "epoch": 0.25, + "learning_rate": 1.994053802198412e-05, + "loss": 0.3145, + "step": 1046 + }, + { + "epoch": 0.26, + "learning_rate": 1.994010757011867e-05, + "loss": 0.3104, + "step": 1048 + }, + { + "epoch": 0.26, + "learning_rate": 1.9939675570502467e-05, + "loss": 0.2953, + "step": 1050 + }, + { + "epoch": 0.26, + "learning_rate": 1.993924202320277e-05, + "loss": 0.3167, + "step": 1052 + }, + { + "epoch": 0.26, + "learning_rate": 1.9938806928287086e-05, + "loss": 0.3192, + "step": 1054 + }, + { + "epoch": 0.26, + "learning_rate": 1.9938370285823167e-05, + "loss": 0.326, + "step": 1056 + }, + { + "epoch": 0.26, + "learning_rate": 1.9937932095879e-05, + "loss": 0.3085, + "step": 1058 + }, + { + "epoch": 0.26, + "learning_rate": 1.993749235852281e-05, + "loss": 0.3399, + "step": 1060 + }, + { + "epoch": 0.26, + "learning_rate": 1.9937051073823074e-05, + "loss": 0.3125, + "step": 1062 + }, + { + "epoch": 0.26, + "learning_rate": 1.99366082418485e-05, + "loss": 0.3179, + "step": 1064 + }, + { + "epoch": 0.26, + "learning_rate": 1.9936163862668043e-05, + "loss": 0.3049, + "step": 1066 + }, + { + "epoch": 0.26, + "learning_rate": 1.9935717936350894e-05, + "loss": 0.3066, + "step": 1068 + }, + { + "epoch": 0.26, + "learning_rate": 1.9935270462966484e-05, + "loss": 0.2742, + "step": 1070 + }, + { + "epoch": 0.26, + "learning_rate": 1.9934821442584495e-05, + "loss": 0.3001, + "step": 1072 + }, + { + "epoch": 0.26, + "learning_rate": 1.9934370875274836e-05, + "loss": 0.3153, + "step": 1074 + }, + { + "epoch": 0.26, + "learning_rate": 1.993391876110767e-05, + "loss": 0.3057, + "step": 1076 + }, + { + "epoch": 0.26, + "learning_rate": 1.9933465100153388e-05, + "loss": 0.3336, + "step": 1078 + }, + { + "epoch": 0.26, + "learning_rate": 1.9933009892482636e-05, + "loss": 0.3095, + "step": 1080 + }, + { + "epoch": 0.26, + "learning_rate": 1.9932553138166287e-05, + "loss": 0.3327, + "step": 1082 + }, + { + "epoch": 0.26, + "learning_rate": 1.9932094837275465e-05, + "loss": 0.3228, + "step": 1084 + }, + { + "epoch": 0.26, + "learning_rate": 1.993163498988153e-05, + "loss": 0.309, + "step": 1086 + }, + { + "epoch": 0.27, + "learning_rate": 1.9931173596056085e-05, + "loss": 0.3207, + "step": 1088 + }, + { + "epoch": 0.27, + "learning_rate": 1.9930710655870967e-05, + "loss": 0.32, + "step": 1090 + }, + { + "epoch": 0.27, + "learning_rate": 1.993024616939826e-05, + "loss": 0.325, + "step": 1092 + }, + { + "epoch": 0.27, + "learning_rate": 1.99297801367103e-05, + "loss": 0.3103, + "step": 1094 + }, + { + "epoch": 0.27, + "learning_rate": 1.9929312557879638e-05, + "loss": 0.316, + "step": 1096 + }, + { + "epoch": 0.27, + "learning_rate": 1.9928843432979084e-05, + "loss": 0.3174, + "step": 1098 + }, + { + "epoch": 0.27, + "learning_rate": 1.9928372762081686e-05, + "loss": 0.3038, + "step": 1100 + }, + { + "epoch": 0.27, + "learning_rate": 1.9927900545260735e-05, + "loss": 0.287, + "step": 1102 + }, + { + "epoch": 0.27, + "learning_rate": 1.992742678258975e-05, + "loss": 0.3089, + "step": 1104 + }, + { + "epoch": 0.27, + "learning_rate": 1.99269514741425e-05, + "loss": 0.2913, + "step": 1106 + }, + { + "epoch": 0.27, + "learning_rate": 1.9926474619993e-05, + "loss": 0.2994, + "step": 1108 + }, + { + "epoch": 0.27, + "learning_rate": 1.99259962202155e-05, + "loss": 0.317, + "step": 1110 + }, + { + "epoch": 0.27, + "learning_rate": 1.9925516274884487e-05, + "loss": 0.3194, + "step": 1112 + }, + { + "epoch": 0.27, + "learning_rate": 1.9925034784074692e-05, + "loss": 0.3157, + "step": 1114 + }, + { + "epoch": 0.27, + "learning_rate": 1.9924551747861088e-05, + "loss": 0.3334, + "step": 1116 + }, + { + "epoch": 0.27, + "learning_rate": 1.9924067166318884e-05, + "loss": 0.3092, + "step": 1118 + }, + { + "epoch": 0.27, + "learning_rate": 1.992358103952354e-05, + "loss": 0.3108, + "step": 1120 + }, + { + "epoch": 0.27, + "learning_rate": 1.9923093367550747e-05, + "loss": 0.277, + "step": 1122 + }, + { + "epoch": 0.27, + "learning_rate": 1.9922604150476435e-05, + "loss": 0.3012, + "step": 1124 + }, + { + "epoch": 0.27, + "learning_rate": 1.9922113388376786e-05, + "loss": 0.276, + "step": 1126 + }, + { + "epoch": 0.27, + "learning_rate": 1.9921621081328207e-05, + "loss": 0.3107, + "step": 1128 + }, + { + "epoch": 0.28, + "learning_rate": 1.9921127229407363e-05, + "loss": 0.3169, + "step": 1130 + }, + { + "epoch": 0.28, + "learning_rate": 1.9920631832691143e-05, + "loss": 0.3211, + "step": 1132 + }, + { + "epoch": 0.28, + "learning_rate": 1.9920134891256685e-05, + "loss": 0.3012, + "step": 1134 + }, + { + "epoch": 0.28, + "learning_rate": 1.9919636405181375e-05, + "loss": 0.317, + "step": 1136 + }, + { + "epoch": 0.28, + "learning_rate": 1.991913637454282e-05, + "loss": 0.3059, + "step": 1138 + }, + { + "epoch": 0.28, + "learning_rate": 1.9918634799418886e-05, + "loss": 0.308, + "step": 1140 + }, + { + "epoch": 0.28, + "learning_rate": 1.9918131679887668e-05, + "loss": 0.3348, + "step": 1142 + }, + { + "epoch": 0.28, + "learning_rate": 1.9917627016027504e-05, + "loss": 0.3083, + "step": 1144 + }, + { + "epoch": 0.28, + "learning_rate": 1.991712080791698e-05, + "loss": 0.2949, + "step": 1146 + }, + { + "epoch": 0.28, + "learning_rate": 1.9916613055634914e-05, + "loss": 0.3144, + "step": 1148 + }, + { + "epoch": 0.28, + "learning_rate": 1.9916103759260363e-05, + "loss": 0.3472, + "step": 1150 + }, + { + "epoch": 0.28, + "learning_rate": 1.9915592918872635e-05, + "loss": 0.3034, + "step": 1152 + }, + { + "epoch": 0.28, + "learning_rate": 1.991508053455127e-05, + "loss": 0.3044, + "step": 1154 + }, + { + "epoch": 0.28, + "learning_rate": 1.9914566606376045e-05, + "loss": 0.3265, + "step": 1156 + }, + { + "epoch": 0.28, + "learning_rate": 1.9914051134426987e-05, + "loss": 0.3078, + "step": 1158 + }, + { + "epoch": 0.28, + "learning_rate": 1.9913534118784358e-05, + "loss": 0.2931, + "step": 1160 + }, + { + "epoch": 0.28, + "learning_rate": 1.9913015559528662e-05, + "loss": 0.351, + "step": 1162 + }, + { + "epoch": 0.28, + "learning_rate": 1.9912495456740642e-05, + "loss": 0.3184, + "step": 1164 + }, + { + "epoch": 0.28, + "learning_rate": 1.991197381050128e-05, + "loss": 0.3217, + "step": 1166 + }, + { + "epoch": 0.28, + "learning_rate": 1.9911450620891807e-05, + "loss": 0.2977, + "step": 1168 + }, + { + "epoch": 0.29, + "learning_rate": 1.9910925887993676e-05, + "loss": 0.3109, + "step": 1170 + }, + { + "epoch": 0.29, + "learning_rate": 1.9910399611888604e-05, + "loss": 0.3246, + "step": 1172 + }, + { + "epoch": 0.29, + "learning_rate": 1.990987179265853e-05, + "loss": 0.2781, + "step": 1174 + }, + { + "epoch": 0.29, + "learning_rate": 1.990934243038564e-05, + "loss": 0.2934, + "step": 1176 + }, + { + "epoch": 0.29, + "learning_rate": 1.9908811525152362e-05, + "loss": 0.3316, + "step": 1178 + }, + { + "epoch": 0.29, + "learning_rate": 1.990827907704136e-05, + "loss": 0.346, + "step": 1180 + }, + { + "epoch": 0.29, + "learning_rate": 1.9907745086135538e-05, + "loss": 0.3196, + "step": 1182 + }, + { + "epoch": 0.29, + "learning_rate": 1.9907209552518046e-05, + "loss": 0.3159, + "step": 1184 + }, + { + "epoch": 0.29, + "learning_rate": 1.990667247627227e-05, + "loss": 0.3162, + "step": 1186 + }, + { + "epoch": 0.29, + "learning_rate": 1.9906133857481837e-05, + "loss": 0.3112, + "step": 1188 + }, + { + "epoch": 0.29, + "learning_rate": 1.9905593696230615e-05, + "loss": 0.3185, + "step": 1190 + }, + { + "epoch": 0.29, + "learning_rate": 1.9905051992602708e-05, + "loss": 0.3425, + "step": 1192 + }, + { + "epoch": 0.29, + "learning_rate": 1.9904508746682465e-05, + "loss": 0.307, + "step": 1194 + }, + { + "epoch": 0.29, + "learning_rate": 1.9903963958554474e-05, + "loss": 0.3286, + "step": 1196 + }, + { + "epoch": 0.29, + "learning_rate": 1.9903417628303565e-05, + "loss": 0.3038, + "step": 1198 + }, + { + "epoch": 0.29, + "learning_rate": 1.99028697560148e-05, + "loss": 0.3197, + "step": 1200 + }, + { + "epoch": 0.29, + "learning_rate": 1.990232034177349e-05, + "loss": 0.3308, + "step": 1202 + }, + { + "epoch": 0.29, + "learning_rate": 1.9901769385665185e-05, + "loss": 0.2893, + "step": 1204 + }, + { + "epoch": 0.29, + "learning_rate": 1.990121688777567e-05, + "loss": 0.2965, + "step": 1206 + }, + { + "epoch": 0.29, + "learning_rate": 1.9900662848190977e-05, + "loss": 0.3098, + "step": 1208 + }, + { + "epoch": 0.29, + "learning_rate": 1.9900107266997367e-05, + "loss": 0.3128, + "step": 1210 + }, + { + "epoch": 0.3, + "learning_rate": 1.9899550144281358e-05, + "loss": 0.3294, + "step": 1212 + }, + { + "epoch": 0.3, + "learning_rate": 1.9898991480129692e-05, + "loss": 0.3076, + "step": 1214 + }, + { + "epoch": 0.3, + "learning_rate": 1.9898431274629356e-05, + "loss": 0.3294, + "step": 1216 + }, + { + "epoch": 0.3, + "learning_rate": 1.9897869527867582e-05, + "loss": 0.3314, + "step": 1218 + }, + { + "epoch": 0.3, + "learning_rate": 1.9897306239931837e-05, + "loss": 0.3265, + "step": 1220 + }, + { + "epoch": 0.3, + "learning_rate": 1.989674141090983e-05, + "loss": 0.3109, + "step": 1222 + }, + { + "epoch": 0.3, + "learning_rate": 1.989617504088951e-05, + "loss": 0.2841, + "step": 1224 + }, + { + "epoch": 0.3, + "learning_rate": 1.9895607129959058e-05, + "loss": 0.2836, + "step": 1226 + }, + { + "epoch": 0.3, + "learning_rate": 1.989503767820691e-05, + "loss": 0.2899, + "step": 1228 + }, + { + "epoch": 0.3, + "learning_rate": 1.9894466685721734e-05, + "loss": 0.3002, + "step": 1230 + }, + { + "epoch": 0.3, + "learning_rate": 1.9893894152592433e-05, + "loss": 0.3189, + "step": 1232 + }, + { + "epoch": 0.3, + "learning_rate": 1.989332007890816e-05, + "loss": 0.3337, + "step": 1234 + }, + { + "epoch": 0.3, + "learning_rate": 1.9892744464758295e-05, + "loss": 0.3012, + "step": 1236 + }, + { + "epoch": 0.3, + "learning_rate": 1.9892167310232473e-05, + "loss": 0.3007, + "step": 1238 + }, + { + "epoch": 0.3, + "learning_rate": 1.9891588615420558e-05, + "loss": 0.3122, + "step": 1240 + }, + { + "epoch": 0.3, + "learning_rate": 1.989100838041266e-05, + "loss": 0.3058, + "step": 1242 + }, + { + "epoch": 0.3, + "learning_rate": 1.9890426605299125e-05, + "loss": 0.3151, + "step": 1244 + }, + { + "epoch": 0.3, + "learning_rate": 1.9889843290170535e-05, + "loss": 0.2995, + "step": 1246 + }, + { + "epoch": 0.3, + "learning_rate": 1.9889258435117723e-05, + "loss": 0.3267, + "step": 1248 + }, + { + "epoch": 0.3, + "learning_rate": 1.9888672040231753e-05, + "loss": 0.3216, + "step": 1250 + }, + { + "epoch": 0.31, + "learning_rate": 1.988808410560393e-05, + "loss": 0.3212, + "step": 1252 + }, + { + "epoch": 0.31, + "learning_rate": 1.9887494631325805e-05, + "loss": 0.303, + "step": 1254 + }, + { + "epoch": 0.31, + "learning_rate": 1.9886903617489156e-05, + "loss": 0.3135, + "step": 1256 + }, + { + "epoch": 0.31, + "learning_rate": 1.9886311064186012e-05, + "loss": 0.3279, + "step": 1258 + }, + { + "epoch": 0.31, + "learning_rate": 1.988571697150864e-05, + "loss": 0.3013, + "step": 1260 + }, + { + "epoch": 0.31, + "learning_rate": 1.988512133954954e-05, + "loss": 0.3068, + "step": 1262 + }, + { + "epoch": 0.31, + "learning_rate": 1.988452416840146e-05, + "loss": 0.3294, + "step": 1264 + }, + { + "epoch": 0.31, + "learning_rate": 1.9883925458157386e-05, + "loss": 0.3274, + "step": 1266 + }, + { + "epoch": 0.31, + "learning_rate": 1.9883325208910537e-05, + "loss": 0.3023, + "step": 1268 + }, + { + "epoch": 0.31, + "learning_rate": 1.988272342075438e-05, + "loss": 0.327, + "step": 1270 + }, + { + "epoch": 0.31, + "learning_rate": 1.9882120093782616e-05, + "loss": 0.2948, + "step": 1272 + }, + { + "epoch": 0.31, + "learning_rate": 1.9881515228089188e-05, + "loss": 0.3254, + "step": 1274 + }, + { + "epoch": 0.31, + "learning_rate": 1.988090882376828e-05, + "loss": 0.2967, + "step": 1276 + }, + { + "epoch": 0.31, + "learning_rate": 1.988030088091431e-05, + "loss": 0.2976, + "step": 1278 + }, + { + "epoch": 0.31, + "learning_rate": 1.987969139962194e-05, + "loss": 0.2674, + "step": 1280 + }, + { + "epoch": 0.31, + "learning_rate": 1.9879080379986074e-05, + "loss": 0.3203, + "step": 1282 + }, + { + "epoch": 0.31, + "learning_rate": 1.9878467822101853e-05, + "loss": 0.2864, + "step": 1284 + }, + { + "epoch": 0.31, + "learning_rate": 1.9877853726064655e-05, + "loss": 0.327, + "step": 1286 + }, + { + "epoch": 0.31, + "learning_rate": 1.98772380919701e-05, + "loss": 0.3233, + "step": 1288 + }, + { + "epoch": 0.31, + "learning_rate": 1.9876620919914044e-05, + "loss": 0.3, + "step": 1290 + }, + { + "epoch": 0.31, + "learning_rate": 1.9876002209992586e-05, + "loss": 0.3088, + "step": 1292 + }, + { + "epoch": 0.32, + "learning_rate": 1.987538196230207e-05, + "loss": 0.2999, + "step": 1294 + }, + { + "epoch": 0.32, + "learning_rate": 1.9874760176939066e-05, + "loss": 0.3079, + "step": 1296 + }, + { + "epoch": 0.32, + "learning_rate": 1.9874136854000398e-05, + "loss": 0.3162, + "step": 1298 + }, + { + "epoch": 0.32, + "learning_rate": 1.9873511993583114e-05, + "loss": 0.3089, + "step": 1300 + }, + { + "epoch": 0.32, + "learning_rate": 1.987288559578451e-05, + "loss": 0.3147, + "step": 1302 + }, + { + "epoch": 0.32, + "learning_rate": 1.9872257660702126e-05, + "loss": 0.3282, + "step": 1304 + }, + { + "epoch": 0.32, + "learning_rate": 1.987162818843374e-05, + "loss": 0.3074, + "step": 1306 + }, + { + "epoch": 0.32, + "learning_rate": 1.9870997179077353e-05, + "loss": 0.3278, + "step": 1308 + }, + { + "epoch": 0.32, + "learning_rate": 1.987036463273123e-05, + "loss": 0.3033, + "step": 1310 + }, + { + "epoch": 0.32, + "learning_rate": 1.9869730549493857e-05, + "loss": 0.3363, + "step": 1312 + }, + { + "epoch": 0.32, + "learning_rate": 1.9869094929463967e-05, + "loss": 0.3086, + "step": 1314 + }, + { + "epoch": 0.32, + "learning_rate": 1.986845777274053e-05, + "loss": 0.3084, + "step": 1316 + }, + { + "epoch": 0.32, + "learning_rate": 1.9867819079422758e-05, + "loss": 0.3092, + "step": 1318 + }, + { + "epoch": 0.32, + "learning_rate": 1.98671788496101e-05, + "loss": 0.2931, + "step": 1320 + }, + { + "epoch": 0.32, + "learning_rate": 1.9866537083402243e-05, + "loss": 0.3154, + "step": 1322 + }, + { + "epoch": 0.32, + "learning_rate": 1.986589378089912e-05, + "loss": 0.2928, + "step": 1324 + }, + { + "epoch": 0.32, + "learning_rate": 1.986524894220089e-05, + "loss": 0.3224, + "step": 1326 + }, + { + "epoch": 0.32, + "learning_rate": 1.9864602567407962e-05, + "loss": 0.2886, + "step": 1328 + }, + { + "epoch": 0.32, + "learning_rate": 1.9863954656620987e-05, + "loss": 0.3036, + "step": 1330 + }, + { + "epoch": 0.32, + "learning_rate": 1.9863305209940843e-05, + "loss": 0.3011, + "step": 1332 + }, + { + "epoch": 0.33, + "learning_rate": 1.986265422746866e-05, + "loss": 0.34, + "step": 1334 + }, + { + "epoch": 0.33, + "learning_rate": 1.9862001709305793e-05, + "loss": 0.3024, + "step": 1336 + }, + { + "epoch": 0.33, + "learning_rate": 1.9861347655553852e-05, + "loss": 0.3307, + "step": 1338 + }, + { + "epoch": 0.33, + "learning_rate": 1.9860692066314676e-05, + "loss": 0.2974, + "step": 1340 + }, + { + "epoch": 0.33, + "learning_rate": 1.9860034941690342e-05, + "loss": 0.3023, + "step": 1342 + }, + { + "epoch": 0.33, + "learning_rate": 1.9859376281783168e-05, + "loss": 0.3131, + "step": 1344 + }, + { + "epoch": 0.33, + "learning_rate": 1.9858716086695723e-05, + "loss": 0.3038, + "step": 1346 + }, + { + "epoch": 0.33, + "learning_rate": 1.985805435653079e-05, + "loss": 0.3196, + "step": 1348 + }, + { + "epoch": 0.33, + "learning_rate": 1.9857391091391415e-05, + "loss": 0.3102, + "step": 1350 + }, + { + "epoch": 0.33, + "learning_rate": 1.9856726291380872e-05, + "loss": 0.307, + "step": 1352 + }, + { + "epoch": 0.33, + "learning_rate": 1.9856059956602675e-05, + "loss": 0.2894, + "step": 1354 + }, + { + "epoch": 0.33, + "learning_rate": 1.9855392087160576e-05, + "loss": 0.3243, + "step": 1356 + }, + { + "epoch": 0.33, + "learning_rate": 1.985472268315857e-05, + "loss": 0.3037, + "step": 1358 + }, + { + "epoch": 0.33, + "learning_rate": 1.9854051744700884e-05, + "loss": 0.3132, + "step": 1360 + }, + { + "epoch": 0.33, + "learning_rate": 1.9853379271891994e-05, + "loss": 0.2907, + "step": 1362 + }, + { + "epoch": 0.33, + "learning_rate": 1.9852705264836602e-05, + "loss": 0.328, + "step": 1364 + }, + { + "epoch": 0.33, + "learning_rate": 1.9852029723639663e-05, + "loss": 0.2926, + "step": 1366 + }, + { + "epoch": 0.33, + "learning_rate": 1.985135264840636e-05, + "loss": 0.2987, + "step": 1368 + }, + { + "epoch": 0.33, + "learning_rate": 1.9850674039242117e-05, + "loss": 0.3277, + "step": 1370 + }, + { + "epoch": 0.33, + "learning_rate": 1.9849993896252604e-05, + "loss": 0.2985, + "step": 1372 + }, + { + "epoch": 0.33, + "learning_rate": 1.9849312219543723e-05, + "loss": 0.3122, + "step": 1374 + }, + { + "epoch": 0.34, + "learning_rate": 1.9848629009221615e-05, + "loss": 0.3129, + "step": 1376 + }, + { + "epoch": 0.34, + "learning_rate": 1.984794426539266e-05, + "loss": 0.3185, + "step": 1378 + }, + { + "epoch": 0.34, + "learning_rate": 1.984725798816348e-05, + "loss": 0.2946, + "step": 1380 + }, + { + "epoch": 0.34, + "learning_rate": 1.984657017764093e-05, + "loss": 0.3036, + "step": 1382 + }, + { + "epoch": 0.34, + "learning_rate": 1.9845880833932113e-05, + "loss": 0.2939, + "step": 1384 + }, + { + "epoch": 0.34, + "learning_rate": 1.9845189957144358e-05, + "loss": 0.3104, + "step": 1386 + }, + { + "epoch": 0.34, + "learning_rate": 1.984449754738525e-05, + "loss": 0.3117, + "step": 1388 + }, + { + "epoch": 0.34, + "learning_rate": 1.9843803604762594e-05, + "loss": 0.3189, + "step": 1390 + }, + { + "epoch": 0.34, + "learning_rate": 1.9843108129384444e-05, + "loss": 0.316, + "step": 1392 + }, + { + "epoch": 0.34, + "learning_rate": 1.984241112135909e-05, + "loss": 0.3162, + "step": 1394 + }, + { + "epoch": 0.34, + "learning_rate": 1.984171258079507e-05, + "loss": 0.3082, + "step": 1396 + }, + { + "epoch": 0.34, + "learning_rate": 1.9841012507801136e-05, + "loss": 0.27, + "step": 1398 + }, + { + "epoch": 0.34, + "learning_rate": 1.9840310902486308e-05, + "loss": 0.292, + "step": 1400 + }, + { + "epoch": 0.34, + "learning_rate": 1.9839607764959827e-05, + "loss": 0.2984, + "step": 1402 + }, + { + "epoch": 0.34, + "learning_rate": 1.9838903095331175e-05, + "loss": 0.3186, + "step": 1404 + }, + { + "epoch": 0.34, + "learning_rate": 1.983819689371008e-05, + "loss": 0.3414, + "step": 1406 + }, + { + "epoch": 0.34, + "learning_rate": 1.9837489160206495e-05, + "loss": 0.3212, + "step": 1408 + }, + { + "epoch": 0.34, + "learning_rate": 1.9836779894930623e-05, + "loss": 0.28, + "step": 1410 + }, + { + "epoch": 0.34, + "learning_rate": 1.9836069097992906e-05, + "loss": 0.2762, + "step": 1412 + }, + { + "epoch": 0.34, + "learning_rate": 1.983535676950402e-05, + "loss": 0.3076, + "step": 1414 + }, + { + "epoch": 0.35, + "learning_rate": 1.9834642909574875e-05, + "loss": 0.324, + "step": 1416 + }, + { + "epoch": 0.35, + "learning_rate": 1.9833927518316625e-05, + "loss": 0.305, + "step": 1418 + }, + { + "epoch": 0.35, + "learning_rate": 1.9833210595840667e-05, + "loss": 0.2914, + "step": 1420 + }, + { + "epoch": 0.35, + "learning_rate": 1.983249214225863e-05, + "loss": 0.277, + "step": 1422 + }, + { + "epoch": 0.35, + "learning_rate": 1.9831772157682375e-05, + "loss": 0.302, + "step": 1424 + }, + { + "epoch": 0.35, + "learning_rate": 1.9831050642224017e-05, + "loss": 0.3169, + "step": 1426 + }, + { + "epoch": 0.35, + "learning_rate": 1.9830327595995898e-05, + "loss": 0.3141, + "step": 1428 + }, + { + "epoch": 0.35, + "learning_rate": 1.9829603019110607e-05, + "loss": 0.3157, + "step": 1430 + }, + { + "epoch": 0.35, + "learning_rate": 1.9828876911680962e-05, + "loss": 0.2737, + "step": 1432 + }, + { + "epoch": 0.35, + "learning_rate": 1.9828149273820017e-05, + "loss": 0.339, + "step": 1434 + }, + { + "epoch": 0.35, + "learning_rate": 1.9827420105641086e-05, + "loss": 0.3179, + "step": 1436 + }, + { + "epoch": 0.35, + "learning_rate": 1.9826689407257694e-05, + "loss": 0.3278, + "step": 1438 + }, + { + "epoch": 0.35, + "learning_rate": 1.9825957178783622e-05, + "loss": 0.3079, + "step": 1440 + }, + { + "epoch": 0.35, + "learning_rate": 1.982522342033288e-05, + "loss": 0.2928, + "step": 1442 + }, + { + "epoch": 0.35, + "learning_rate": 1.9824488132019717e-05, + "loss": 0.3072, + "step": 1444 + }, + { + "epoch": 0.35, + "learning_rate": 1.9823751313958634e-05, + "loss": 0.3153, + "step": 1446 + }, + { + "epoch": 0.35, + "learning_rate": 1.9823012966264353e-05, + "loss": 0.3207, + "step": 1448 + }, + { + "epoch": 0.35, + "learning_rate": 1.9822273089051834e-05, + "loss": 0.3214, + "step": 1450 + }, + { + "epoch": 0.35, + "learning_rate": 1.9821531682436293e-05, + "loss": 0.3116, + "step": 1452 + }, + { + "epoch": 0.35, + "learning_rate": 1.9820788746533165e-05, + "loss": 0.3103, + "step": 1454 + }, + { + "epoch": 0.35, + "learning_rate": 1.9820044281458136e-05, + "loss": 0.3026, + "step": 1456 + }, + { + "epoch": 0.36, + "learning_rate": 1.9819298287327114e-05, + "loss": 0.3033, + "step": 1458 + }, + { + "epoch": 0.36, + "learning_rate": 1.9818550764256273e-05, + "loss": 0.3103, + "step": 1460 + }, + { + "epoch": 0.36, + "learning_rate": 1.981780171236199e-05, + "loss": 0.2929, + "step": 1462 + }, + { + "epoch": 0.36, + "learning_rate": 1.9817051131760915e-05, + "loss": 0.3138, + "step": 1464 + }, + { + "epoch": 0.36, + "learning_rate": 1.981629902256991e-05, + "loss": 0.3056, + "step": 1466 + }, + { + "epoch": 0.36, + "learning_rate": 1.9815545384906083e-05, + "loss": 0.3235, + "step": 1468 + }, + { + "epoch": 0.36, + "learning_rate": 1.9814790218886783e-05, + "loss": 0.3198, + "step": 1470 + }, + { + "epoch": 0.36, + "learning_rate": 1.98140335246296e-05, + "loss": 0.3209, + "step": 1472 + }, + { + "epoch": 0.36, + "learning_rate": 1.9813275302252347e-05, + "loss": 0.3038, + "step": 1474 + }, + { + "epoch": 0.36, + "learning_rate": 1.9812515551873093e-05, + "loss": 0.3263, + "step": 1476 + }, + { + "epoch": 0.36, + "learning_rate": 1.9811754273610138e-05, + "loss": 0.3314, + "step": 1478 + }, + { + "epoch": 0.36, + "learning_rate": 1.9810991467582013e-05, + "loss": 0.2907, + "step": 1480 + }, + { + "epoch": 0.36, + "learning_rate": 1.9810227133907492e-05, + "loss": 0.3088, + "step": 1482 + }, + { + "epoch": 0.36, + "learning_rate": 1.9809461272705595e-05, + "loss": 0.3254, + "step": 1484 + }, + { + "epoch": 0.36, + "learning_rate": 1.9808693884095568e-05, + "loss": 0.3263, + "step": 1486 + }, + { + "epoch": 0.36, + "learning_rate": 1.9807924968196897e-05, + "loss": 0.3091, + "step": 1488 + }, + { + "epoch": 0.36, + "learning_rate": 1.9807154525129314e-05, + "loss": 0.3114, + "step": 1490 + }, + { + "epoch": 0.36, + "learning_rate": 1.9806382555012777e-05, + "loss": 0.2993, + "step": 1492 + }, + { + "epoch": 0.36, + "learning_rate": 1.980560905796749e-05, + "loss": 0.3116, + "step": 1494 + }, + { + "epoch": 0.36, + "learning_rate": 1.9804834034113893e-05, + "loss": 0.2834, + "step": 1496 + }, + { + "epoch": 0.37, + "learning_rate": 1.9804057483572663e-05, + "loss": 0.2976, + "step": 1498 + }, + { + "epoch": 0.37, + "learning_rate": 1.9803279406464714e-05, + "loss": 0.3019, + "step": 1500 + }, + { + "epoch": 0.37, + "learning_rate": 1.98024998029112e-05, + "loss": 0.3148, + "step": 1502 + }, + { + "epoch": 0.37, + "learning_rate": 1.980171867303351e-05, + "loss": 0.2935, + "step": 1504 + }, + { + "epoch": 0.37, + "learning_rate": 1.9800936016953277e-05, + "loss": 0.3229, + "step": 1506 + }, + { + "epoch": 0.37, + "learning_rate": 1.9800151834792355e-05, + "loss": 0.3158, + "step": 1508 + }, + { + "epoch": 0.37, + "learning_rate": 1.9799366126672858e-05, + "loss": 0.2974, + "step": 1510 + }, + { + "epoch": 0.37, + "learning_rate": 1.979857889271712e-05, + "loss": 0.2913, + "step": 1512 + }, + { + "epoch": 0.37, + "learning_rate": 1.9797790133047722e-05, + "loss": 0.2826, + "step": 1514 + }, + { + "epoch": 0.37, + "learning_rate": 1.9796999847787485e-05, + "loss": 0.2959, + "step": 1516 + }, + { + "epoch": 0.37, + "learning_rate": 1.9796208037059454e-05, + "loss": 0.327, + "step": 1518 + }, + { + "epoch": 0.37, + "learning_rate": 1.9795414700986922e-05, + "loss": 0.3133, + "step": 1520 + }, + { + "epoch": 0.37, + "learning_rate": 1.9794619839693417e-05, + "loss": 0.2951, + "step": 1522 + }, + { + "epoch": 0.37, + "learning_rate": 1.9793823453302712e-05, + "loss": 0.3277, + "step": 1524 + }, + { + "epoch": 0.37, + "learning_rate": 1.9793025541938805e-05, + "loss": 0.3191, + "step": 1526 + }, + { + "epoch": 0.37, + "learning_rate": 1.9792226105725935e-05, + "loss": 0.3309, + "step": 1528 + }, + { + "epoch": 0.37, + "learning_rate": 1.9791425144788583e-05, + "loss": 0.2971, + "step": 1530 + }, + { + "epoch": 0.37, + "learning_rate": 1.9790622659251465e-05, + "loss": 0.316, + "step": 1532 + }, + { + "epoch": 0.37, + "learning_rate": 1.9789818649239533e-05, + "loss": 0.3063, + "step": 1534 + }, + { + "epoch": 0.37, + "learning_rate": 1.9789013114877978e-05, + "loss": 0.3034, + "step": 1536 + }, + { + "epoch": 0.37, + "learning_rate": 1.978820605629223e-05, + "loss": 0.3228, + "step": 1538 + }, + { + "epoch": 0.38, + "learning_rate": 1.9787397473607947e-05, + "loss": 0.3186, + "step": 1540 + }, + { + "epoch": 0.38, + "learning_rate": 1.978658736695104e-05, + "loss": 0.3237, + "step": 1542 + }, + { + "epoch": 0.38, + "learning_rate": 1.9785775736447644e-05, + "loss": 0.2878, + "step": 1544 + }, + { + "epoch": 0.38, + "learning_rate": 1.978496258222414e-05, + "loss": 0.288, + "step": 1546 + }, + { + "epoch": 0.38, + "learning_rate": 1.978414790440714e-05, + "loss": 0.3085, + "step": 1548 + }, + { + "epoch": 0.38, + "learning_rate": 1.978333170312349e-05, + "loss": 0.3123, + "step": 1550 + }, + { + "epoch": 0.38, + "learning_rate": 1.978251397850029e-05, + "loss": 0.337, + "step": 1552 + }, + { + "epoch": 0.38, + "learning_rate": 1.9781694730664855e-05, + "loss": 0.2834, + "step": 1554 + }, + { + "epoch": 0.38, + "learning_rate": 1.9780873959744754e-05, + "loss": 0.3375, + "step": 1556 + }, + { + "epoch": 0.38, + "learning_rate": 1.9780051665867792e-05, + "loss": 0.3127, + "step": 1558 + }, + { + "epoch": 0.38, + "learning_rate": 1.9779227849161998e-05, + "loss": 0.3209, + "step": 1560 + }, + { + "epoch": 0.38, + "learning_rate": 1.9778402509755645e-05, + "loss": 0.3221, + "step": 1562 + }, + { + "epoch": 0.38, + "learning_rate": 1.9777575647777256e-05, + "loss": 0.3113, + "step": 1564 + }, + { + "epoch": 0.38, + "learning_rate": 1.9776747263355566e-05, + "loss": 0.3399, + "step": 1566 + }, + { + "epoch": 0.38, + "learning_rate": 1.9775917356619575e-05, + "loss": 0.3173, + "step": 1568 + }, + { + "epoch": 0.38, + "learning_rate": 1.9775085927698496e-05, + "loss": 0.3113, + "step": 1570 + }, + { + "epoch": 0.38, + "learning_rate": 1.977425297672179e-05, + "loss": 0.3062, + "step": 1572 + }, + { + "epoch": 0.38, + "learning_rate": 1.9773418503819153e-05, + "loss": 0.3285, + "step": 1574 + }, + { + "epoch": 0.38, + "learning_rate": 1.9772582509120525e-05, + "loss": 0.3211, + "step": 1576 + }, + { + "epoch": 0.38, + "learning_rate": 1.977174499275607e-05, + "loss": 0.3244, + "step": 1578 + }, + { + "epoch": 0.38, + "learning_rate": 1.97709059548562e-05, + "loss": 0.3001, + "step": 1580 + }, + { + "epoch": 0.39, + "learning_rate": 1.9770065395551562e-05, + "loss": 0.3094, + "step": 1582 + }, + { + "epoch": 0.39, + "learning_rate": 1.9769223314973027e-05, + "loss": 0.3091, + "step": 1584 + }, + { + "epoch": 0.39, + "learning_rate": 1.9768379713251725e-05, + "loss": 0.3053, + "step": 1586 + }, + { + "epoch": 0.39, + "learning_rate": 1.9767534590519e-05, + "loss": 0.3255, + "step": 1588 + }, + { + "epoch": 0.39, + "learning_rate": 1.976668794690646e-05, + "loss": 0.3164, + "step": 1590 + }, + { + "epoch": 0.39, + "learning_rate": 1.9765839782545916e-05, + "loss": 0.3025, + "step": 1592 + }, + { + "epoch": 0.39, + "learning_rate": 1.9764990097569445e-05, + "loss": 0.2943, + "step": 1594 + }, + { + "epoch": 0.39, + "learning_rate": 1.9764138892109344e-05, + "loss": 0.3292, + "step": 1596 + }, + { + "epoch": 0.39, + "learning_rate": 1.9763286166298157e-05, + "loss": 0.3143, + "step": 1598 + }, + { + "epoch": 0.39, + "learning_rate": 1.976243192026866e-05, + "loss": 0.2977, + "step": 1600 + }, + { + "epoch": 0.39, + "learning_rate": 1.9761576154153855e-05, + "loss": 0.3237, + "step": 1602 + }, + { + "epoch": 0.39, + "learning_rate": 1.9760718868087006e-05, + "loss": 0.3173, + "step": 1604 + }, + { + "epoch": 0.39, + "learning_rate": 1.975986006220159e-05, + "loss": 0.324, + "step": 1606 + }, + { + "epoch": 0.39, + "learning_rate": 1.9758999736631336e-05, + "loss": 0.3094, + "step": 1608 + }, + { + "epoch": 0.39, + "learning_rate": 1.9758137891510194e-05, + "loss": 0.3136, + "step": 1610 + }, + { + "epoch": 0.39, + "learning_rate": 1.9757274526972367e-05, + "loss": 0.2919, + "step": 1612 + }, + { + "epoch": 0.39, + "learning_rate": 1.9756409643152287e-05, + "loss": 0.2911, + "step": 1614 + }, + { + "epoch": 0.39, + "learning_rate": 1.975554324018462e-05, + "loss": 0.3079, + "step": 1616 + }, + { + "epoch": 0.39, + "learning_rate": 1.9754675318204275e-05, + "loss": 0.3119, + "step": 1618 + }, + { + "epoch": 0.39, + "learning_rate": 1.975380587734639e-05, + "loss": 0.294, + "step": 1620 + }, + { + "epoch": 0.4, + "learning_rate": 1.9752934917746346e-05, + "loss": 0.3157, + "step": 1622 + }, + { + "epoch": 0.4, + "learning_rate": 1.975206243953976e-05, + "loss": 0.3027, + "step": 1624 + }, + { + "epoch": 0.4, + "learning_rate": 1.9751188442862485e-05, + "loss": 0.2889, + "step": 1626 + }, + { + "epoch": 0.4, + "learning_rate": 1.97503129278506e-05, + "loss": 0.2995, + "step": 1628 + }, + { + "epoch": 0.4, + "learning_rate": 1.9749435894640438e-05, + "loss": 0.2985, + "step": 1630 + }, + { + "epoch": 0.4, + "learning_rate": 1.9748557343368554e-05, + "loss": 0.2941, + "step": 1632 + }, + { + "epoch": 0.4, + "learning_rate": 1.9747677274171745e-05, + "loss": 0.2984, + "step": 1634 + }, + { + "epoch": 0.4, + "learning_rate": 1.9746795687187054e-05, + "loss": 0.3193, + "step": 1636 + }, + { + "epoch": 0.4, + "learning_rate": 1.974591258255174e-05, + "loss": 0.3129, + "step": 1638 + }, + { + "epoch": 0.4, + "learning_rate": 1.9745027960403312e-05, + "loss": 0.3223, + "step": 1640 + }, + { + "epoch": 0.4, + "learning_rate": 1.974414182087952e-05, + "loss": 0.3171, + "step": 1642 + }, + { + "epoch": 0.4, + "learning_rate": 1.974325416411833e-05, + "loss": 0.3139, + "step": 1644 + }, + { + "epoch": 0.4, + "learning_rate": 1.9742364990257966e-05, + "loss": 0.3186, + "step": 1646 + }, + { + "epoch": 0.4, + "learning_rate": 1.9741474299436877e-05, + "loss": 0.3171, + "step": 1648 + }, + { + "epoch": 0.4, + "learning_rate": 1.974058209179375e-05, + "loss": 0.275, + "step": 1650 + }, + { + "epoch": 0.4, + "learning_rate": 1.9739688367467508e-05, + "loss": 0.2959, + "step": 1652 + }, + { + "epoch": 0.4, + "learning_rate": 1.973879312659731e-05, + "loss": 0.3034, + "step": 1654 + }, + { + "epoch": 0.4, + "learning_rate": 1.9737896369322555e-05, + "loss": 0.3057, + "step": 1656 + }, + { + "epoch": 0.4, + "learning_rate": 1.973699809578287e-05, + "loss": 0.3162, + "step": 1658 + }, + { + "epoch": 0.4, + "learning_rate": 1.973609830611813e-05, + "loss": 0.3393, + "step": 1660 + }, + { + "epoch": 0.4, + "learning_rate": 1.9735197000468435e-05, + "loss": 0.3355, + "step": 1662 + }, + { + "epoch": 0.41, + "learning_rate": 1.9734294178974123e-05, + "loss": 0.3039, + "step": 1664 + }, + { + "epoch": 0.41, + "learning_rate": 1.9733389841775773e-05, + "loss": 0.2998, + "step": 1666 + }, + { + "epoch": 0.41, + "learning_rate": 1.9732483989014197e-05, + "loss": 0.3127, + "step": 1668 + }, + { + "epoch": 0.41, + "learning_rate": 1.9731576620830444e-05, + "loss": 0.2932, + "step": 1670 + }, + { + "epoch": 0.41, + "learning_rate": 1.9730667737365796e-05, + "loss": 0.3171, + "step": 1672 + }, + { + "epoch": 0.41, + "learning_rate": 1.9729757338761775e-05, + "loss": 0.3, + "step": 1674 + }, + { + "epoch": 0.41, + "learning_rate": 1.9728845425160137e-05, + "loss": 0.3099, + "step": 1676 + }, + { + "epoch": 0.41, + "learning_rate": 1.972793199670287e-05, + "loss": 0.2798, + "step": 1678 + }, + { + "epoch": 0.41, + "learning_rate": 1.9727017053532207e-05, + "loss": 0.3084, + "step": 1680 + }, + { + "epoch": 0.41, + "learning_rate": 1.972610059579061e-05, + "loss": 0.3135, + "step": 1682 + }, + { + "epoch": 0.41, + "learning_rate": 1.9725182623620777e-05, + "loss": 0.3098, + "step": 1684 + }, + { + "epoch": 0.41, + "learning_rate": 1.9724263137165648e-05, + "loss": 0.2916, + "step": 1686 + }, + { + "epoch": 0.41, + "learning_rate": 1.9723342136568385e-05, + "loss": 0.319, + "step": 1688 + }, + { + "epoch": 0.41, + "learning_rate": 1.9722419621972405e-05, + "loss": 0.2902, + "step": 1690 + }, + { + "epoch": 0.41, + "learning_rate": 1.9721495593521343e-05, + "loss": 0.3347, + "step": 1692 + }, + { + "epoch": 0.41, + "learning_rate": 1.9720570051359084e-05, + "loss": 0.3115, + "step": 1694 + }, + { + "epoch": 0.41, + "learning_rate": 1.9719642995629735e-05, + "loss": 0.3027, + "step": 1696 + }, + { + "epoch": 0.41, + "learning_rate": 1.9718714426477646e-05, + "loss": 0.3016, + "step": 1698 + }, + { + "epoch": 0.41, + "learning_rate": 1.9717784344047408e-05, + "loss": 0.3083, + "step": 1700 + }, + { + "epoch": 0.41, + "learning_rate": 1.971685274848384e-05, + "loss": 0.3114, + "step": 1702 + }, + { + "epoch": 0.42, + "learning_rate": 1.9715919639931996e-05, + "loss": 0.3163, + "step": 1704 + }, + { + "epoch": 0.42, + "learning_rate": 1.9714985018537173e-05, + "loss": 0.3084, + "step": 1706 + }, + { + "epoch": 0.42, + "learning_rate": 1.9714048884444894e-05, + "loss": 0.3248, + "step": 1708 + }, + { + "epoch": 0.42, + "learning_rate": 1.971311123780092e-05, + "loss": 0.3078, + "step": 1710 + }, + { + "epoch": 0.42, + "learning_rate": 1.971217207875126e-05, + "loss": 0.3056, + "step": 1712 + }, + { + "epoch": 0.42, + "learning_rate": 1.9711231407442136e-05, + "loss": 0.3068, + "step": 1714 + }, + { + "epoch": 0.42, + "learning_rate": 1.9710289224020028e-05, + "loss": 0.3245, + "step": 1716 + }, + { + "epoch": 0.42, + "learning_rate": 1.9709345528631637e-05, + "loss": 0.2868, + "step": 1718 + }, + { + "epoch": 0.42, + "learning_rate": 1.9708400321423897e-05, + "loss": 0.3025, + "step": 1720 + }, + { + "epoch": 0.42, + "learning_rate": 1.9707453602543994e-05, + "loss": 0.2995, + "step": 1722 + }, + { + "epoch": 0.42, + "learning_rate": 1.9706505372139337e-05, + "loss": 0.3032, + "step": 1724 + }, + { + "epoch": 0.42, + "learning_rate": 1.970555563035757e-05, + "loss": 0.3164, + "step": 1726 + }, + { + "epoch": 0.42, + "learning_rate": 1.9704604377346577e-05, + "loss": 0.2949, + "step": 1728 + }, + { + "epoch": 0.42, + "learning_rate": 1.9703651613254475e-05, + "loss": 0.318, + "step": 1730 + }, + { + "epoch": 0.42, + "learning_rate": 1.9702697338229613e-05, + "loss": 0.3144, + "step": 1732 + }, + { + "epoch": 0.42, + "learning_rate": 1.9701741552420587e-05, + "loss": 0.2944, + "step": 1734 + }, + { + "epoch": 0.42, + "learning_rate": 1.9700784255976212e-05, + "loss": 0.3185, + "step": 1736 + }, + { + "epoch": 0.42, + "learning_rate": 1.9699825449045548e-05, + "loss": 0.3026, + "step": 1738 + }, + { + "epoch": 0.42, + "learning_rate": 1.9698865131777896e-05, + "loss": 0.3052, + "step": 1740 + }, + { + "epoch": 0.42, + "learning_rate": 1.9697903304322775e-05, + "loss": 0.314, + "step": 1742 + }, + { + "epoch": 0.42, + "learning_rate": 1.9696939966829957e-05, + "loss": 0.3105, + "step": 1744 + }, + { + "epoch": 0.43, + "learning_rate": 1.9695975119449434e-05, + "loss": 0.3298, + "step": 1746 + }, + { + "epoch": 0.43, + "learning_rate": 1.9695008762331444e-05, + "loss": 0.3048, + "step": 1748 + }, + { + "epoch": 0.43, + "learning_rate": 1.9694040895626452e-05, + "loss": 0.2939, + "step": 1750 + }, + { + "epoch": 0.43, + "learning_rate": 1.969307151948517e-05, + "loss": 0.3049, + "step": 1752 + }, + { + "epoch": 0.43, + "learning_rate": 1.969210063405853e-05, + "loss": 0.2966, + "step": 1754 + }, + { + "epoch": 0.43, + "learning_rate": 1.969112823949771e-05, + "loss": 0.3224, + "step": 1756 + }, + { + "epoch": 0.43, + "learning_rate": 1.969015433595412e-05, + "loss": 0.3018, + "step": 1758 + }, + { + "epoch": 0.43, + "learning_rate": 1.9689178923579404e-05, + "loss": 0.2984, + "step": 1760 + }, + { + "epoch": 0.43, + "learning_rate": 1.9688202002525437e-05, + "loss": 0.2696, + "step": 1762 + }, + { + "epoch": 0.43, + "learning_rate": 1.9687223572944337e-05, + "loss": 0.2899, + "step": 1764 + }, + { + "epoch": 0.43, + "learning_rate": 1.968624363498845e-05, + "loss": 0.3124, + "step": 1766 + }, + { + "epoch": 0.43, + "learning_rate": 1.9685262188810365e-05, + "loss": 0.3092, + "step": 1768 + }, + { + "epoch": 0.43, + "learning_rate": 1.9684279234562894e-05, + "loss": 0.3199, + "step": 1770 + }, + { + "epoch": 0.43, + "learning_rate": 1.9683294772399094e-05, + "loss": 0.3017, + "step": 1772 + }, + { + "epoch": 0.43, + "learning_rate": 1.9682308802472256e-05, + "loss": 0.3229, + "step": 1774 + }, + { + "epoch": 0.43, + "learning_rate": 1.9681321324935897e-05, + "loss": 0.3124, + "step": 1776 + }, + { + "epoch": 0.43, + "learning_rate": 1.968033233994378e-05, + "loss": 0.3186, + "step": 1778 + }, + { + "epoch": 0.43, + "learning_rate": 1.9679341847649894e-05, + "loss": 0.3194, + "step": 1780 + }, + { + "epoch": 0.43, + "learning_rate": 1.967834984820847e-05, + "loss": 0.3029, + "step": 1782 + }, + { + "epoch": 0.43, + "learning_rate": 1.9677356341773968e-05, + "loss": 0.314, + "step": 1784 + }, + { + "epoch": 0.44, + "learning_rate": 1.967636132850108e-05, + "loss": 0.2938, + "step": 1786 + }, + { + "epoch": 0.44, + "learning_rate": 1.9675364808544745e-05, + "loss": 0.3223, + "step": 1788 + }, + { + "epoch": 0.44, + "learning_rate": 1.9674366782060126e-05, + "loss": 0.3082, + "step": 1790 + }, + { + "epoch": 0.44, + "learning_rate": 1.9673367249202623e-05, + "loss": 0.309, + "step": 1792 + }, + { + "epoch": 0.44, + "learning_rate": 1.9672366210127863e-05, + "loss": 0.3034, + "step": 1794 + }, + { + "epoch": 0.44, + "learning_rate": 1.9671363664991728e-05, + "loss": 0.2849, + "step": 1796 + }, + { + "epoch": 0.44, + "learning_rate": 1.967035961395032e-05, + "loss": 0.2915, + "step": 1798 + }, + { + "epoch": 0.44, + "learning_rate": 1.966935405715997e-05, + "loss": 0.3152, + "step": 1800 + }, + { + "epoch": 0.44, + "learning_rate": 1.9668346994777257e-05, + "loss": 0.3072, + "step": 1802 + }, + { + "epoch": 0.44, + "learning_rate": 1.9667338426958986e-05, + "loss": 0.3013, + "step": 1804 + }, + { + "epoch": 0.44, + "learning_rate": 1.96663283538622e-05, + "loss": 0.303, + "step": 1806 + }, + { + "epoch": 0.44, + "learning_rate": 1.9665316775644174e-05, + "loss": 0.2886, + "step": 1808 + }, + { + "epoch": 0.44, + "learning_rate": 1.9664303692462417e-05, + "loss": 0.3094, + "step": 1810 + }, + { + "epoch": 0.44, + "learning_rate": 1.9663289104474675e-05, + "loss": 0.3039, + "step": 1812 + }, + { + "epoch": 0.44, + "learning_rate": 1.966227301183893e-05, + "loss": 0.3065, + "step": 1814 + }, + { + "epoch": 0.44, + "learning_rate": 1.9661255414713394e-05, + "loss": 0.3259, + "step": 1816 + }, + { + "epoch": 0.44, + "learning_rate": 1.9660236313256508e-05, + "loss": 0.3194, + "step": 1818 + }, + { + "epoch": 0.44, + "learning_rate": 1.9659215707626968e-05, + "loss": 0.3159, + "step": 1820 + }, + { + "epoch": 0.44, + "learning_rate": 1.9658193597983673e-05, + "loss": 0.3046, + "step": 1822 + }, + { + "epoch": 0.44, + "learning_rate": 1.9657169984485785e-05, + "loss": 0.3013, + "step": 1824 + }, + { + "epoch": 0.44, + "learning_rate": 1.965614486729269e-05, + "loss": 0.3137, + "step": 1826 + }, + { + "epoch": 0.45, + "learning_rate": 1.9655118246563996e-05, + "loss": 0.2932, + "step": 1828 + }, + { + "epoch": 0.45, + "learning_rate": 1.9654090122459566e-05, + "loss": 0.3086, + "step": 1830 + }, + { + "epoch": 0.45, + "learning_rate": 1.965306049513948e-05, + "loss": 0.2965, + "step": 1832 + }, + { + "epoch": 0.45, + "learning_rate": 1.965202936476406e-05, + "loss": 0.297, + "step": 1834 + }, + { + "epoch": 0.45, + "learning_rate": 1.9650996731493866e-05, + "loss": 0.308, + "step": 1836 + }, + { + "epoch": 0.45, + "learning_rate": 1.964996259548968e-05, + "loss": 0.3188, + "step": 1838 + }, + { + "epoch": 0.45, + "learning_rate": 1.964892695691253e-05, + "loss": 0.309, + "step": 1840 + }, + { + "epoch": 0.45, + "learning_rate": 1.9647889815923672e-05, + "loss": 0.3366, + "step": 1842 + }, + { + "epoch": 0.45, + "learning_rate": 1.9646851172684593e-05, + "loss": 0.2949, + "step": 1844 + }, + { + "epoch": 0.45, + "learning_rate": 1.9645811027357017e-05, + "loss": 0.3055, + "step": 1846 + }, + { + "epoch": 0.45, + "learning_rate": 1.9644769380102912e-05, + "loss": 0.2972, + "step": 1848 + }, + { + "epoch": 0.45, + "learning_rate": 1.964372623108446e-05, + "loss": 0.3134, + "step": 1850 + }, + { + "epoch": 0.45, + "learning_rate": 1.9642681580464095e-05, + "loss": 0.2692, + "step": 1852 + }, + { + "epoch": 0.45, + "learning_rate": 1.9641635428404475e-05, + "loss": 0.3146, + "step": 1854 + }, + { + "epoch": 0.45, + "learning_rate": 1.964058777506849e-05, + "loss": 0.3048, + "step": 1856 + }, + { + "epoch": 0.45, + "learning_rate": 1.9639538620619266e-05, + "loss": 0.3155, + "step": 1858 + }, + { + "epoch": 0.45, + "learning_rate": 1.9638487965220176e-05, + "loss": 0.3291, + "step": 1860 + }, + { + "epoch": 0.45, + "learning_rate": 1.96374358090348e-05, + "loss": 0.2986, + "step": 1862 + }, + { + "epoch": 0.45, + "learning_rate": 1.963638215222698e-05, + "loss": 0.2878, + "step": 1864 + }, + { + "epoch": 0.45, + "learning_rate": 1.9635326994960773e-05, + "loss": 0.3019, + "step": 1866 + }, + { + "epoch": 0.46, + "learning_rate": 1.9634270337400474e-05, + "loss": 0.2835, + "step": 1868 + }, + { + "epoch": 0.46, + "learning_rate": 1.9633212179710613e-05, + "loss": 0.2899, + "step": 1870 + }, + { + "epoch": 0.46, + "learning_rate": 1.9632152522055953e-05, + "loss": 0.2793, + "step": 1872 + }, + { + "epoch": 0.46, + "learning_rate": 1.9631091364601495e-05, + "loss": 0.29, + "step": 1874 + }, + { + "epoch": 0.46, + "learning_rate": 1.9630028707512462e-05, + "loss": 0.3209, + "step": 1876 + }, + { + "epoch": 0.46, + "learning_rate": 1.9628964550954325e-05, + "loss": 0.3049, + "step": 1878 + }, + { + "epoch": 0.46, + "learning_rate": 1.9627898895092773e-05, + "loss": 0.3086, + "step": 1880 + }, + { + "epoch": 0.46, + "learning_rate": 1.9626831740093745e-05, + "loss": 0.3065, + "step": 1882 + }, + { + "epoch": 0.46, + "learning_rate": 1.9625763086123403e-05, + "loss": 0.3178, + "step": 1884 + }, + { + "epoch": 0.46, + "learning_rate": 1.9624692933348142e-05, + "loss": 0.3102, + "step": 1886 + }, + { + "epoch": 0.46, + "learning_rate": 1.96236212819346e-05, + "loss": 0.2425, + "step": 1888 + }, + { + "epoch": 0.46, + "learning_rate": 1.962254813204963e-05, + "loss": 0.2794, + "step": 1890 + }, + { + "epoch": 0.46, + "learning_rate": 1.9621473483860335e-05, + "loss": 0.3106, + "step": 1892 + }, + { + "epoch": 0.46, + "learning_rate": 1.962039733753405e-05, + "loss": 0.3126, + "step": 1894 + }, + { + "epoch": 0.46, + "learning_rate": 1.9619319693238333e-05, + "loss": 0.3055, + "step": 1896 + }, + { + "epoch": 0.46, + "learning_rate": 1.9618240551140985e-05, + "loss": 0.2972, + "step": 1898 + }, + { + "epoch": 0.46, + "learning_rate": 1.9617159911410037e-05, + "loss": 0.2988, + "step": 1900 + }, + { + "epoch": 0.46, + "learning_rate": 1.961607777421375e-05, + "loss": 0.2913, + "step": 1902 + }, + { + "epoch": 0.46, + "learning_rate": 1.9614994139720624e-05, + "loss": 0.3034, + "step": 1904 + }, + { + "epoch": 0.46, + "learning_rate": 1.9613909008099387e-05, + "loss": 0.3074, + "step": 1906 + }, + { + "epoch": 0.46, + "learning_rate": 1.9612822379519004e-05, + "loss": 0.3059, + "step": 1908 + }, + { + "epoch": 0.47, + "learning_rate": 1.961173425414867e-05, + "loss": 0.3056, + "step": 1910 + }, + { + "epoch": 0.47, + "learning_rate": 1.9610644632157814e-05, + "loss": 0.2983, + "step": 1912 + }, + { + "epoch": 0.47, + "learning_rate": 1.96095535137161e-05, + "loss": 0.2777, + "step": 1914 + }, + { + "epoch": 0.47, + "learning_rate": 1.9608460898993425e-05, + "loss": 0.2874, + "step": 1916 + }, + { + "epoch": 0.47, + "learning_rate": 1.9607366788159914e-05, + "loss": 0.2735, + "step": 1918 + }, + { + "epoch": 0.47, + "learning_rate": 1.960627118138593e-05, + "loss": 0.2974, + "step": 1920 + }, + { + "epoch": 0.47, + "learning_rate": 1.960517407884207e-05, + "loss": 0.3067, + "step": 1922 + }, + { + "epoch": 0.47, + "learning_rate": 1.9604075480699155e-05, + "loss": 0.2966, + "step": 1924 + }, + { + "epoch": 0.47, + "learning_rate": 1.960297538712825e-05, + "loss": 0.2996, + "step": 1926 + }, + { + "epoch": 0.47, + "learning_rate": 1.9601873798300647e-05, + "loss": 0.2881, + "step": 1928 + }, + { + "epoch": 0.47, + "learning_rate": 1.960077071438787e-05, + "loss": 0.2806, + "step": 1930 + }, + { + "epoch": 0.47, + "learning_rate": 1.959966613556168e-05, + "loss": 0.2916, + "step": 1932 + }, + { + "epoch": 0.47, + "learning_rate": 1.9598560061994067e-05, + "loss": 0.3156, + "step": 1934 + }, + { + "epoch": 0.47, + "learning_rate": 1.9597452493857253e-05, + "loss": 0.3111, + "step": 1936 + }, + { + "epoch": 0.47, + "learning_rate": 1.95963434313237e-05, + "loss": 0.3016, + "step": 1938 + }, + { + "epoch": 0.47, + "learning_rate": 1.9595232874566098e-05, + "loss": 0.3144, + "step": 1940 + }, + { + "epoch": 0.47, + "learning_rate": 1.959412082375736e-05, + "loss": 0.3074, + "step": 1942 + }, + { + "epoch": 0.47, + "learning_rate": 1.9593007279070654e-05, + "loss": 0.3026, + "step": 1944 + }, + { + "epoch": 0.47, + "learning_rate": 1.9591892240679353e-05, + "loss": 0.2951, + "step": 1946 + }, + { + "epoch": 0.47, + "learning_rate": 1.959077570875709e-05, + "loss": 0.3124, + "step": 1948 + }, + { + "epoch": 0.48, + "learning_rate": 1.9589657683477708e-05, + "loss": 0.2941, + "step": 1950 + }, + { + "epoch": 0.48, + "learning_rate": 1.9588538165015297e-05, + "loss": 0.298, + "step": 1952 + }, + { + "epoch": 0.48, + "learning_rate": 1.958741715354417e-05, + "loss": 0.3097, + "step": 1954 + }, + { + "epoch": 0.48, + "learning_rate": 1.9586294649238885e-05, + "loss": 0.2838, + "step": 1956 + }, + { + "epoch": 0.48, + "learning_rate": 1.958517065227422e-05, + "loss": 0.3099, + "step": 1958 + }, + { + "epoch": 0.48, + "learning_rate": 1.9584045162825188e-05, + "loss": 0.3099, + "step": 1960 + }, + { + "epoch": 0.48, + "learning_rate": 1.9582918181067043e-05, + "loss": 0.2977, + "step": 1962 + }, + { + "epoch": 0.48, + "learning_rate": 1.9581789707175254e-05, + "loss": 0.3225, + "step": 1964 + }, + { + "epoch": 0.48, + "learning_rate": 1.9580659741325544e-05, + "loss": 0.3154, + "step": 1966 + }, + { + "epoch": 0.48, + "learning_rate": 1.9579528283693846e-05, + "loss": 0.303, + "step": 1968 + }, + { + "epoch": 0.48, + "learning_rate": 1.9578395334456348e-05, + "loss": 0.2989, + "step": 1970 + }, + { + "epoch": 0.48, + "learning_rate": 1.957726089378945e-05, + "loss": 0.3123, + "step": 1972 + }, + { + "epoch": 0.48, + "learning_rate": 1.95761249618698e-05, + "loss": 0.3198, + "step": 1974 + }, + { + "epoch": 0.48, + "learning_rate": 1.957498753887427e-05, + "loss": 0.2754, + "step": 1976 + }, + { + "epoch": 0.48, + "learning_rate": 1.957384862497996e-05, + "loss": 0.2932, + "step": 1978 + }, + { + "epoch": 0.48, + "learning_rate": 1.9572708220364212e-05, + "loss": 0.3162, + "step": 1980 + }, + { + "epoch": 0.48, + "learning_rate": 1.9571566325204593e-05, + "loss": 0.31, + "step": 1982 + }, + { + "epoch": 0.48, + "learning_rate": 1.957042293967891e-05, + "loss": 0.3051, + "step": 1984 + }, + { + "epoch": 0.48, + "learning_rate": 1.9569278063965193e-05, + "loss": 0.2821, + "step": 1986 + }, + { + "epoch": 0.48, + "learning_rate": 1.9568131698241708e-05, + "loss": 0.3124, + "step": 1988 + }, + { + "epoch": 0.48, + "learning_rate": 1.9566983842686954e-05, + "loss": 0.2982, + "step": 1990 + }, + { + "epoch": 0.49, + "learning_rate": 1.956583449747966e-05, + "loss": 0.3054, + "step": 1992 + }, + { + "epoch": 0.49, + "learning_rate": 1.9564683662798788e-05, + "loss": 0.3064, + "step": 1994 + }, + { + "epoch": 0.49, + "learning_rate": 1.9563531338823528e-05, + "loss": 0.2784, + "step": 1996 + }, + { + "epoch": 0.49, + "learning_rate": 1.9562377525733314e-05, + "loss": 0.297, + "step": 1998 + }, + { + "epoch": 0.49, + "learning_rate": 1.95612222237078e-05, + "loss": 0.279, + "step": 2000 + }, + { + "epoch": 0.49, + "learning_rate": 1.956006543292687e-05, + "loss": 0.2912, + "step": 2002 + }, + { + "epoch": 0.49, + "learning_rate": 1.9558907153570654e-05, + "loss": 0.324, + "step": 2004 + }, + { + "epoch": 0.49, + "learning_rate": 1.95577473858195e-05, + "loss": 0.2992, + "step": 2006 + }, + { + "epoch": 0.49, + "learning_rate": 1.9556586129853993e-05, + "loss": 0.2856, + "step": 2008 + }, + { + "epoch": 0.49, + "learning_rate": 1.9555423385854948e-05, + "loss": 0.2596, + "step": 2010 + }, + { + "epoch": 0.49, + "learning_rate": 1.9554259154003415e-05, + "loss": 0.2967, + "step": 2012 + }, + { + "epoch": 0.49, + "learning_rate": 1.9553093434480673e-05, + "loss": 0.3143, + "step": 2014 + }, + { + "epoch": 0.49, + "learning_rate": 1.9551926227468238e-05, + "loss": 0.3187, + "step": 2016 + }, + { + "epoch": 0.49, + "learning_rate": 1.9550757533147845e-05, + "loss": 0.3061, + "step": 2018 + }, + { + "epoch": 0.49, + "learning_rate": 1.9549587351701474e-05, + "loss": 0.3104, + "step": 2020 + }, + { + "epoch": 0.49, + "learning_rate": 1.954841568331133e-05, + "loss": 0.3155, + "step": 2022 + }, + { + "epoch": 0.49, + "learning_rate": 1.9547242528159852e-05, + "loss": 0.2866, + "step": 2024 + }, + { + "epoch": 0.49, + "learning_rate": 1.9546067886429705e-05, + "loss": 0.3206, + "step": 2026 + }, + { + "epoch": 0.49, + "learning_rate": 1.9544891758303795e-05, + "loss": 0.3331, + "step": 2028 + }, + { + "epoch": 0.49, + "learning_rate": 1.9543714143965252e-05, + "loss": 0.31, + "step": 2030 + }, + { + "epoch": 0.5, + "learning_rate": 1.954253504359744e-05, + "loss": 0.3137, + "step": 2032 + }, + { + "epoch": 0.5, + "learning_rate": 1.954135445738395e-05, + "loss": 0.3016, + "step": 2034 + }, + { + "epoch": 0.5, + "learning_rate": 1.9540172385508614e-05, + "loss": 0.2827, + "step": 2036 + }, + { + "epoch": 0.5, + "learning_rate": 1.9538988828155484e-05, + "loss": 0.32, + "step": 2038 + }, + { + "epoch": 0.5, + "learning_rate": 1.9537803785508854e-05, + "loss": 0.303, + "step": 2040 + }, + { + "epoch": 0.5, + "learning_rate": 1.9536617257753246e-05, + "loss": 0.3173, + "step": 2042 + }, + { + "epoch": 0.5, + "learning_rate": 1.9535429245073407e-05, + "loss": 0.3248, + "step": 2044 + }, + { + "epoch": 0.5, + "learning_rate": 1.9534239747654316e-05, + "loss": 0.3261, + "step": 2046 + }, + { + "epoch": 0.5, + "learning_rate": 1.953304876568119e-05, + "loss": 0.3067, + "step": 2048 + }, + { + "epoch": 0.5, + "learning_rate": 1.9531856299339483e-05, + "loss": 0.2753, + "step": 2050 + }, + { + "epoch": 0.5, + "learning_rate": 1.9530662348814858e-05, + "loss": 0.3137, + "step": 2052 + }, + { + "epoch": 0.5, + "learning_rate": 1.952946691429323e-05, + "loss": 0.3059, + "step": 2054 + }, + { + "epoch": 0.5, + "learning_rate": 1.9528269995960732e-05, + "loss": 0.2983, + "step": 2056 + }, + { + "epoch": 0.5, + "learning_rate": 1.952707159400374e-05, + "loss": 0.2921, + "step": 2058 + }, + { + "epoch": 0.5, + "learning_rate": 1.9525871708608847e-05, + "loss": 0.3104, + "step": 2060 + }, + { + "epoch": 0.5, + "learning_rate": 1.952467033996289e-05, + "loss": 0.2975, + "step": 2062 + }, + { + "epoch": 0.5, + "learning_rate": 1.952346748825293e-05, + "loss": 0.2984, + "step": 2064 + }, + { + "epoch": 0.5, + "learning_rate": 1.9522263153666257e-05, + "loss": 0.2981, + "step": 2066 + }, + { + "epoch": 0.5, + "learning_rate": 1.9521057336390398e-05, + "loss": 0.3075, + "step": 2068 + }, + { + "epoch": 0.5, + "learning_rate": 1.951985003661311e-05, + "loss": 0.308, + "step": 2070 + }, + { + "epoch": 0.5, + "learning_rate": 1.9518641254522374e-05, + "loss": 0.3135, + "step": 2072 + }, + { + "epoch": 0.51, + "learning_rate": 1.951743099030641e-05, + "loss": 0.3081, + "step": 2074 + }, + { + "epoch": 0.51, + "learning_rate": 1.9516219244153666e-05, + "loss": 0.2991, + "step": 2076 + }, + { + "epoch": 0.51, + "learning_rate": 1.9515006016252816e-05, + "loss": 0.2934, + "step": 2078 + }, + { + "epoch": 0.51, + "learning_rate": 1.951379130679277e-05, + "loss": 0.3083, + "step": 2080 + }, + { + "epoch": 0.51, + "learning_rate": 1.951257511596267e-05, + "loss": 0.3318, + "step": 2082 + }, + { + "epoch": 0.51, + "learning_rate": 1.9511357443951885e-05, + "loss": 0.3098, + "step": 2084 + }, + { + "epoch": 0.51, + "learning_rate": 1.951013829095002e-05, + "loss": 0.3099, + "step": 2086 + }, + { + "epoch": 0.51, + "learning_rate": 1.95089176571469e-05, + "loss": 0.3306, + "step": 2088 + }, + { + "epoch": 0.51, + "learning_rate": 1.950769554273259e-05, + "loss": 0.2969, + "step": 2090 + }, + { + "epoch": 0.51, + "learning_rate": 1.9506471947897378e-05, + "loss": 0.2976, + "step": 2092 + }, + { + "epoch": 0.51, + "learning_rate": 1.9505246872831792e-05, + "loss": 0.3151, + "step": 2094 + }, + { + "epoch": 0.51, + "learning_rate": 1.9504020317726585e-05, + "loss": 0.3159, + "step": 2096 + }, + { + "epoch": 0.51, + "learning_rate": 1.950279228277274e-05, + "loss": 0.3117, + "step": 2098 + }, + { + "epoch": 0.51, + "learning_rate": 1.9501562768161477e-05, + "loss": 0.2997, + "step": 2100 + }, + { + "epoch": 0.51, + "learning_rate": 1.9500331774084232e-05, + "loss": 0.2662, + "step": 2102 + }, + { + "epoch": 0.51, + "learning_rate": 1.9499099300732687e-05, + "loss": 0.3126, + "step": 2104 + }, + { + "epoch": 0.51, + "learning_rate": 1.9497865348298738e-05, + "loss": 0.3011, + "step": 2106 + }, + { + "epoch": 0.51, + "learning_rate": 1.9496629916974533e-05, + "loss": 0.3095, + "step": 2108 + }, + { + "epoch": 0.51, + "learning_rate": 1.9495393006952433e-05, + "loss": 0.3251, + "step": 2110 + }, + { + "epoch": 0.51, + "learning_rate": 1.949415461842503e-05, + "loss": 0.3057, + "step": 2112 + }, + { + "epoch": 0.52, + "learning_rate": 1.9492914751585157e-05, + "loss": 0.3205, + "step": 2114 + }, + { + "epoch": 0.52, + "learning_rate": 1.949167340662587e-05, + "loss": 0.2909, + "step": 2116 + }, + { + "epoch": 0.52, + "learning_rate": 1.949043058374045e-05, + "loss": 0.3098, + "step": 2118 + }, + { + "epoch": 0.52, + "learning_rate": 1.9489186283122417e-05, + "loss": 0.2958, + "step": 2120 + }, + { + "epoch": 0.52, + "learning_rate": 1.9487940504965525e-05, + "loss": 0.3054, + "step": 2122 + }, + { + "epoch": 0.52, + "learning_rate": 1.948669324946374e-05, + "loss": 0.3021, + "step": 2124 + }, + { + "epoch": 0.52, + "learning_rate": 1.948544451681128e-05, + "loss": 0.2918, + "step": 2126 + }, + { + "epoch": 0.52, + "learning_rate": 1.9484194307202573e-05, + "loss": 0.2922, + "step": 2128 + }, + { + "epoch": 0.52, + "learning_rate": 1.948294262083229e-05, + "loss": 0.3322, + "step": 2130 + }, + { + "epoch": 0.52, + "learning_rate": 1.948168945789533e-05, + "loss": 0.311, + "step": 2132 + }, + { + "epoch": 0.52, + "learning_rate": 1.9480434818586817e-05, + "loss": 0.3313, + "step": 2134 + }, + { + "epoch": 0.52, + "learning_rate": 1.9479178703102108e-05, + "loss": 0.2972, + "step": 2136 + }, + { + "epoch": 0.52, + "learning_rate": 1.9477921111636794e-05, + "loss": 0.3031, + "step": 2138 + }, + { + "epoch": 0.52, + "learning_rate": 1.9476662044386687e-05, + "loss": 0.2851, + "step": 2140 + }, + { + "epoch": 0.52, + "learning_rate": 1.9475401501547835e-05, + "loss": 0.2924, + "step": 2142 + }, + { + "epoch": 0.52, + "learning_rate": 1.9474139483316512e-05, + "loss": 0.2908, + "step": 2144 + }, + { + "epoch": 0.52, + "learning_rate": 1.9472875989889227e-05, + "loss": 0.2872, + "step": 2146 + }, + { + "epoch": 0.52, + "learning_rate": 1.947161102146272e-05, + "loss": 0.297, + "step": 2148 + }, + { + "epoch": 0.52, + "learning_rate": 1.9470344578233942e-05, + "loss": 0.2732, + "step": 2150 + }, + { + "epoch": 0.52, + "learning_rate": 1.9469076660400104e-05, + "loss": 0.3101, + "step": 2152 + }, + { + "epoch": 0.52, + "learning_rate": 1.946780726815862e-05, + "loss": 0.3093, + "step": 2154 + }, + { + "epoch": 0.53, + "learning_rate": 1.9466536401707146e-05, + "loss": 0.3066, + "step": 2156 + }, + { + "epoch": 0.53, + "learning_rate": 1.946526406124357e-05, + "loss": 0.3087, + "step": 2158 + }, + { + "epoch": 0.53, + "learning_rate": 1.9463990246966e-05, + "loss": 0.3039, + "step": 2160 + }, + { + "epoch": 0.53, + "learning_rate": 1.946271495907278e-05, + "loss": 0.3063, + "step": 2162 + }, + { + "epoch": 0.53, + "learning_rate": 1.9461438197762485e-05, + "loss": 0.2598, + "step": 2164 + }, + { + "epoch": 0.53, + "learning_rate": 1.9460159963233916e-05, + "loss": 0.3244, + "step": 2166 + }, + { + "epoch": 0.53, + "learning_rate": 1.94588802556861e-05, + "loss": 0.2994, + "step": 2168 + }, + { + "epoch": 0.53, + "learning_rate": 1.9457599075318297e-05, + "loss": 0.3135, + "step": 2170 + }, + { + "epoch": 0.53, + "learning_rate": 1.9456316422330003e-05, + "loss": 0.3108, + "step": 2172 + }, + { + "epoch": 0.53, + "learning_rate": 1.9455032296920932e-05, + "loss": 0.3212, + "step": 2174 + }, + { + "epoch": 0.53, + "learning_rate": 1.945374669929103e-05, + "loss": 0.2943, + "step": 2176 + }, + { + "epoch": 0.53, + "learning_rate": 1.945245962964048e-05, + "loss": 0.2851, + "step": 2178 + }, + { + "epoch": 0.53, + "learning_rate": 1.9451171088169684e-05, + "loss": 0.292, + "step": 2180 + }, + { + "epoch": 0.53, + "learning_rate": 1.9449881075079282e-05, + "loss": 0.2911, + "step": 2182 + }, + { + "epoch": 0.53, + "learning_rate": 1.9448589590570136e-05, + "loss": 0.3014, + "step": 2184 + }, + { + "epoch": 0.53, + "learning_rate": 1.944729663484334e-05, + "loss": 0.3048, + "step": 2186 + }, + { + "epoch": 0.53, + "learning_rate": 1.944600220810022e-05, + "loss": 0.281, + "step": 2188 + }, + { + "epoch": 0.53, + "learning_rate": 1.9444706310542326e-05, + "loss": 0.3162, + "step": 2190 + }, + { + "epoch": 0.53, + "learning_rate": 1.9443408942371435e-05, + "loss": 0.3266, + "step": 2192 + }, + { + "epoch": 0.53, + "learning_rate": 1.944211010378956e-05, + "loss": 0.3273, + "step": 2194 + }, + { + "epoch": 0.54, + "learning_rate": 1.9440809794998947e-05, + "loss": 0.2907, + "step": 2196 + }, + { + "epoch": 0.54, + "learning_rate": 1.9439508016202057e-05, + "loss": 0.3187, + "step": 2198 + }, + { + "epoch": 0.54, + "learning_rate": 1.9438204767601586e-05, + "loss": 0.2982, + "step": 2200 + }, + { + "epoch": 0.54, + "learning_rate": 1.9436900049400466e-05, + "loss": 0.305, + "step": 2202 + }, + { + "epoch": 0.54, + "learning_rate": 1.9435593861801846e-05, + "loss": 0.2946, + "step": 2204 + }, + { + "epoch": 0.54, + "learning_rate": 1.943428620500911e-05, + "loss": 0.3075, + "step": 2206 + }, + { + "epoch": 0.54, + "learning_rate": 1.943297707922587e-05, + "loss": 0.2992, + "step": 2208 + }, + { + "epoch": 0.54, + "learning_rate": 1.943166648465597e-05, + "loss": 0.2891, + "step": 2210 + }, + { + "epoch": 0.54, + "learning_rate": 1.9430354421503476e-05, + "loss": 0.2712, + "step": 2212 + }, + { + "epoch": 0.54, + "learning_rate": 1.942904088997269e-05, + "loss": 0.2869, + "step": 2214 + }, + { + "epoch": 0.54, + "learning_rate": 1.9427725890268133e-05, + "loss": 0.2955, + "step": 2216 + }, + { + "epoch": 0.54, + "learning_rate": 1.942640942259457e-05, + "loss": 0.2933, + "step": 2218 + }, + { + "epoch": 0.54, + "learning_rate": 1.9425091487156976e-05, + "loss": 0.3002, + "step": 2220 + }, + { + "epoch": 0.54, + "learning_rate": 1.9423772084160565e-05, + "loss": 0.3053, + "step": 2222 + }, + { + "epoch": 0.54, + "learning_rate": 1.9422451213810786e-05, + "loss": 0.3091, + "step": 2224 + }, + { + "epoch": 0.54, + "learning_rate": 1.9421128876313297e-05, + "loss": 0.3076, + "step": 2226 + }, + { + "epoch": 0.54, + "learning_rate": 1.941980507187401e-05, + "loss": 0.2878, + "step": 2228 + }, + { + "epoch": 0.54, + "learning_rate": 1.9418479800699035e-05, + "loss": 0.3039, + "step": 2230 + }, + { + "epoch": 0.54, + "learning_rate": 1.9417153062994737e-05, + "loss": 0.3059, + "step": 2232 + }, + { + "epoch": 0.54, + "learning_rate": 1.94158248589677e-05, + "loss": 0.2913, + "step": 2234 + }, + { + "epoch": 0.54, + "learning_rate": 1.941449518882473e-05, + "loss": 0.2824, + "step": 2236 + }, + { + "epoch": 0.55, + "learning_rate": 1.941316405277287e-05, + "loss": 0.3135, + "step": 2238 + }, + { + "epoch": 0.55, + "learning_rate": 1.941183145101939e-05, + "loss": 0.2874, + "step": 2240 + }, + { + "epoch": 0.55, + "learning_rate": 1.9410497383771782e-05, + "loss": 0.3155, + "step": 2242 + }, + { + "epoch": 0.55, + "learning_rate": 1.9409161851237775e-05, + "loss": 0.3064, + "step": 2244 + }, + { + "epoch": 0.55, + "learning_rate": 1.9407824853625316e-05, + "loss": 0.3088, + "step": 2246 + }, + { + "epoch": 0.55, + "learning_rate": 1.9406486391142593e-05, + "loss": 0.2959, + "step": 2248 + }, + { + "epoch": 0.55, + "learning_rate": 1.940514646399801e-05, + "loss": 0.2983, + "step": 2250 + }, + { + "epoch": 0.55, + "learning_rate": 1.94038050724002e-05, + "loss": 0.2917, + "step": 2252 + }, + { + "epoch": 0.55, + "learning_rate": 1.940246221655804e-05, + "loss": 0.3225, + "step": 2254 + }, + { + "epoch": 0.55, + "learning_rate": 1.9401117896680607e-05, + "loss": 0.313, + "step": 2256 + }, + { + "epoch": 0.55, + "learning_rate": 1.9399772112977234e-05, + "loss": 0.2962, + "step": 2258 + }, + { + "epoch": 0.55, + "learning_rate": 1.939842486565747e-05, + "loss": 0.3086, + "step": 2260 + }, + { + "epoch": 0.55, + "learning_rate": 1.9397076154931086e-05, + "loss": 0.2956, + "step": 2262 + }, + { + "epoch": 0.55, + "learning_rate": 1.9395725981008084e-05, + "loss": 0.3017, + "step": 2264 + }, + { + "epoch": 0.55, + "learning_rate": 1.9394374344098706e-05, + "loss": 0.3099, + "step": 2266 + }, + { + "epoch": 0.55, + "learning_rate": 1.9393021244413406e-05, + "loss": 0.3061, + "step": 2268 + }, + { + "epoch": 0.55, + "learning_rate": 1.939166668216287e-05, + "loss": 0.2888, + "step": 2270 + }, + { + "epoch": 0.55, + "learning_rate": 1.939031065755802e-05, + "loss": 0.2944, + "step": 2272 + }, + { + "epoch": 0.55, + "learning_rate": 1.9388953170809996e-05, + "loss": 0.3022, + "step": 2274 + }, + { + "epoch": 0.55, + "learning_rate": 1.938759422213017e-05, + "loss": 0.3085, + "step": 2276 + }, + { + "epoch": 0.56, + "learning_rate": 1.9386233811730136e-05, + "loss": 0.2693, + "step": 2278 + }, + { + "epoch": 0.56, + "learning_rate": 1.938487193982173e-05, + "loss": 0.3024, + "step": 2280 + }, + { + "epoch": 0.56, + "learning_rate": 1.9383508606616994e-05, + "loss": 0.3012, + "step": 2282 + }, + { + "epoch": 0.56, + "learning_rate": 1.9382143812328218e-05, + "loss": 0.3065, + "step": 2284 + }, + { + "epoch": 0.56, + "learning_rate": 1.9380777557167908e-05, + "loss": 0.3001, + "step": 2286 + }, + { + "epoch": 0.56, + "learning_rate": 1.9379409841348805e-05, + "loss": 0.2973, + "step": 2288 + }, + { + "epoch": 0.56, + "learning_rate": 1.9378040665083862e-05, + "loss": 0.3029, + "step": 2290 + }, + { + "epoch": 0.56, + "learning_rate": 1.9376670028586283e-05, + "loss": 0.3005, + "step": 2292 + }, + { + "epoch": 0.56, + "learning_rate": 1.9375297932069477e-05, + "loss": 0.3084, + "step": 2294 + }, + { + "epoch": 0.56, + "learning_rate": 1.9373924375747095e-05, + "loss": 0.289, + "step": 2296 + }, + { + "epoch": 0.56, + "learning_rate": 1.9372549359833005e-05, + "loss": 0.3064, + "step": 2298 + }, + { + "epoch": 0.56, + "learning_rate": 1.9371172884541316e-05, + "loss": 0.2915, + "step": 2300 + }, + { + "epoch": 0.56, + "learning_rate": 1.9369794950086346e-05, + "loss": 0.2964, + "step": 2302 + }, + { + "epoch": 0.56, + "learning_rate": 1.936841555668266e-05, + "loss": 0.2922, + "step": 2304 + }, + { + "epoch": 0.56, + "learning_rate": 1.936703470454503e-05, + "loss": 0.2912, + "step": 2306 + }, + { + "epoch": 0.56, + "learning_rate": 1.9365652393888476e-05, + "loss": 0.3013, + "step": 2308 + }, + { + "epoch": 0.56, + "learning_rate": 1.9364268624928223e-05, + "loss": 0.3095, + "step": 2310 + }, + { + "epoch": 0.56, + "learning_rate": 1.9362883397879738e-05, + "loss": 0.327, + "step": 2312 + }, + { + "epoch": 0.56, + "learning_rate": 1.9361496712958718e-05, + "loss": 0.3151, + "step": 2314 + }, + { + "epoch": 0.56, + "learning_rate": 1.9360108570381073e-05, + "loss": 0.2809, + "step": 2316 + }, + { + "epoch": 0.56, + "learning_rate": 1.9358718970362954e-05, + "loss": 0.3027, + "step": 2318 + }, + { + "epoch": 0.57, + "learning_rate": 1.9357327913120723e-05, + "loss": 0.31, + "step": 2320 + }, + { + "epoch": 0.57, + "learning_rate": 1.9355935398870987e-05, + "loss": 0.3004, + "step": 2322 + }, + { + "epoch": 0.57, + "learning_rate": 1.9354541427830566e-05, + "loss": 0.311, + "step": 2324 + }, + { + "epoch": 0.57, + "learning_rate": 1.9353146000216517e-05, + "loss": 0.2999, + "step": 2326 + }, + { + "epoch": 0.57, + "learning_rate": 1.935174911624611e-05, + "loss": 0.303, + "step": 2328 + }, + { + "epoch": 0.57, + "learning_rate": 1.9350350776136862e-05, + "loss": 0.2858, + "step": 2330 + }, + { + "epoch": 0.57, + "learning_rate": 1.9348950980106496e-05, + "loss": 0.3085, + "step": 2332 + }, + { + "epoch": 0.57, + "learning_rate": 1.9347549728372977e-05, + "loss": 0.3017, + "step": 2334 + }, + { + "epoch": 0.57, + "learning_rate": 1.9346147021154485e-05, + "loss": 0.3166, + "step": 2336 + }, + { + "epoch": 0.57, + "learning_rate": 1.9344742858669435e-05, + "loss": 0.3032, + "step": 2338 + }, + { + "epoch": 0.57, + "learning_rate": 1.9343337241136466e-05, + "loss": 0.2859, + "step": 2340 + }, + { + "epoch": 0.57, + "learning_rate": 1.9341930168774446e-05, + "loss": 0.3238, + "step": 2342 + }, + { + "epoch": 0.57, + "learning_rate": 1.9340521641802464e-05, + "loss": 0.2853, + "step": 2344 + }, + { + "epoch": 0.57, + "learning_rate": 1.9339111660439835e-05, + "loss": 0.3151, + "step": 2346 + }, + { + "epoch": 0.57, + "learning_rate": 1.933770022490611e-05, + "loss": 0.2785, + "step": 2348 + }, + { + "epoch": 0.57, + "learning_rate": 1.9336287335421057e-05, + "loss": 0.3166, + "step": 2350 + }, + { + "epoch": 0.57, + "learning_rate": 1.9334872992204677e-05, + "loss": 0.3239, + "step": 2352 + }, + { + "epoch": 0.57, + "learning_rate": 1.933345719547719e-05, + "loss": 0.3006, + "step": 2354 + }, + { + "epoch": 0.57, + "learning_rate": 1.9332039945459044e-05, + "loss": 0.2962, + "step": 2356 + }, + { + "epoch": 0.57, + "learning_rate": 1.9330621242370924e-05, + "loss": 0.3114, + "step": 2358 + }, + { + "epoch": 0.58, + "learning_rate": 1.932920108643373e-05, + "loss": 0.2896, + "step": 2360 + }, + { + "epoch": 0.58, + "learning_rate": 1.9327779477868586e-05, + "loss": 0.3102, + "step": 2362 + }, + { + "epoch": 0.58, + "learning_rate": 1.932635641689685e-05, + "loss": 0.2954, + "step": 2364 + }, + { + "epoch": 0.58, + "learning_rate": 1.932493190374011e-05, + "loss": 0.2728, + "step": 2366 + }, + { + "epoch": 0.58, + "learning_rate": 1.9323505938620163e-05, + "loss": 0.2819, + "step": 2368 + }, + { + "epoch": 0.58, + "learning_rate": 1.932207852175905e-05, + "loss": 0.2837, + "step": 2370 + }, + { + "epoch": 0.58, + "learning_rate": 1.932064965337903e-05, + "loss": 0.3101, + "step": 2372 + }, + { + "epoch": 0.58, + "learning_rate": 1.9319219333702584e-05, + "loss": 0.3016, + "step": 2374 + }, + { + "epoch": 0.58, + "learning_rate": 1.931778756295243e-05, + "loss": 0.2733, + "step": 2376 + }, + { + "epoch": 0.58, + "learning_rate": 1.9316354341351497e-05, + "loss": 0.3171, + "step": 2378 + }, + { + "epoch": 0.58, + "learning_rate": 1.931491966912296e-05, + "loss": 0.2806, + "step": 2380 + }, + { + "epoch": 0.58, + "learning_rate": 1.93134835464902e-05, + "loss": 0.3011, + "step": 2382 + }, + { + "epoch": 0.58, + "learning_rate": 1.9312045973676835e-05, + "loss": 0.3103, + "step": 2384 + }, + { + "epoch": 0.58, + "learning_rate": 1.9310606950906706e-05, + "loss": 0.2665, + "step": 2386 + }, + { + "epoch": 0.58, + "learning_rate": 1.9309166478403882e-05, + "loss": 0.305, + "step": 2388 + }, + { + "epoch": 0.58, + "learning_rate": 1.9307724556392654e-05, + "loss": 0.3093, + "step": 2390 + }, + { + "epoch": 0.58, + "learning_rate": 1.9306281185097532e-05, + "loss": 0.268, + "step": 2392 + }, + { + "epoch": 0.58, + "learning_rate": 1.9304836364743277e-05, + "loss": 0.2995, + "step": 2394 + }, + { + "epoch": 0.58, + "learning_rate": 1.9303390095554842e-05, + "loss": 0.2973, + "step": 2396 + }, + { + "epoch": 0.58, + "learning_rate": 1.9301942377757433e-05, + "loss": 0.3085, + "step": 2398 + }, + { + "epoch": 0.58, + "learning_rate": 1.9300493211576467e-05, + "loss": 0.287, + "step": 2400 + }, + { + "epoch": 0.59, + "learning_rate": 1.929904259723759e-05, + "loss": 0.2907, + "step": 2402 + }, + { + "epoch": 0.59, + "learning_rate": 1.9297590534966676e-05, + "loss": 0.288, + "step": 2404 + }, + { + "epoch": 0.59, + "learning_rate": 1.9296137024989818e-05, + "loss": 0.2706, + "step": 2406 + }, + { + "epoch": 0.59, + "learning_rate": 1.9294682067533338e-05, + "loss": 0.2976, + "step": 2408 + }, + { + "epoch": 0.59, + "learning_rate": 1.9293225662823788e-05, + "loss": 0.2883, + "step": 2410 + }, + { + "epoch": 0.59, + "learning_rate": 1.9291767811087942e-05, + "loss": 0.3068, + "step": 2412 + }, + { + "epoch": 0.59, + "learning_rate": 1.92903085125528e-05, + "loss": 0.3056, + "step": 2414 + }, + { + "epoch": 0.59, + "learning_rate": 1.928884776744558e-05, + "loss": 0.3094, + "step": 2416 + }, + { + "epoch": 0.59, + "learning_rate": 1.928738557599373e-05, + "loss": 0.2917, + "step": 2418 + }, + { + "epoch": 0.59, + "learning_rate": 1.9285921938424934e-05, + "loss": 0.3092, + "step": 2420 + }, + { + "epoch": 0.59, + "learning_rate": 1.9284456854967086e-05, + "loss": 0.3042, + "step": 2422 + }, + { + "epoch": 0.59, + "learning_rate": 1.9282990325848307e-05, + "loss": 0.2973, + "step": 2424 + }, + { + "epoch": 0.59, + "learning_rate": 1.9281522351296952e-05, + "loss": 0.3205, + "step": 2426 + }, + { + "epoch": 0.59, + "learning_rate": 1.9280052931541594e-05, + "loss": 0.2851, + "step": 2428 + }, + { + "epoch": 0.59, + "learning_rate": 1.9278582066811034e-05, + "loss": 0.3056, + "step": 2430 + }, + { + "epoch": 0.59, + "learning_rate": 1.9277109757334296e-05, + "loss": 0.3115, + "step": 2432 + }, + { + "epoch": 0.59, + "learning_rate": 1.927563600334063e-05, + "loss": 0.2996, + "step": 2434 + }, + { + "epoch": 0.59, + "learning_rate": 1.9274160805059513e-05, + "loss": 0.297, + "step": 2436 + }, + { + "epoch": 0.59, + "learning_rate": 1.927268416272064e-05, + "loss": 0.3132, + "step": 2438 + }, + { + "epoch": 0.59, + "learning_rate": 1.9271206076553936e-05, + "loss": 0.296, + "step": 2440 + }, + { + "epoch": 0.6, + "learning_rate": 1.9269726546789553e-05, + "loss": 0.3107, + "step": 2442 + }, + { + "epoch": 0.6, + "learning_rate": 1.9268245573657867e-05, + "loss": 0.3028, + "step": 2444 + }, + { + "epoch": 0.6, + "learning_rate": 1.926676315738947e-05, + "loss": 0.3078, + "step": 2446 + }, + { + "epoch": 0.6, + "learning_rate": 1.9265279298215197e-05, + "loss": 0.2909, + "step": 2448 + }, + { + "epoch": 0.6, + "learning_rate": 1.9263793996366083e-05, + "loss": 0.3159, + "step": 2450 + }, + { + "epoch": 0.6, + "learning_rate": 1.926230725207341e-05, + "loss": 0.2733, + "step": 2452 + }, + { + "epoch": 0.6, + "learning_rate": 1.9260819065568667e-05, + "loss": 0.291, + "step": 2454 + }, + { + "epoch": 0.6, + "learning_rate": 1.9259329437083586e-05, + "loss": 0.3063, + "step": 2456 + }, + { + "epoch": 0.6, + "learning_rate": 1.9257838366850106e-05, + "loss": 0.2884, + "step": 2458 + }, + { + "epoch": 0.6, + "learning_rate": 1.92563458551004e-05, + "loss": 0.2785, + "step": 2460 + }, + { + "epoch": 0.6, + "learning_rate": 1.9254851902066867e-05, + "loss": 0.3104, + "step": 2462 + }, + { + "epoch": 0.6, + "learning_rate": 1.9253356507982125e-05, + "loss": 0.2991, + "step": 2464 + }, + { + "epoch": 0.6, + "learning_rate": 1.9251859673079016e-05, + "loss": 0.2842, + "step": 2466 + }, + { + "epoch": 0.6, + "learning_rate": 1.925036139759061e-05, + "loss": 0.2965, + "step": 2468 + }, + { + "epoch": 0.6, + "learning_rate": 1.92488616817502e-05, + "loss": 0.2986, + "step": 2470 + }, + { + "epoch": 0.6, + "learning_rate": 1.9247360525791303e-05, + "loss": 0.2956, + "step": 2472 + }, + { + "epoch": 0.6, + "learning_rate": 1.924585792994766e-05, + "loss": 0.2855, + "step": 2474 + }, + { + "epoch": 0.6, + "learning_rate": 1.924435389445324e-05, + "loss": 0.2867, + "step": 2476 + }, + { + "epoch": 0.6, + "learning_rate": 1.9242848419542232e-05, + "loss": 0.3022, + "step": 2478 + }, + { + "epoch": 0.6, + "learning_rate": 1.9241341505449043e-05, + "loss": 0.3224, + "step": 2480 + }, + { + "epoch": 0.6, + "learning_rate": 1.923983315240832e-05, + "loss": 0.2938, + "step": 2482 + }, + { + "epoch": 0.61, + "learning_rate": 1.923832336065492e-05, + "loss": 0.3167, + "step": 2484 + }, + { + "epoch": 0.61, + "learning_rate": 1.923681213042393e-05, + "loss": 0.3131, + "step": 2486 + }, + { + "epoch": 0.61, + "learning_rate": 1.923529946195066e-05, + "loss": 0.2955, + "step": 2488 + }, + { + "epoch": 0.61, + "learning_rate": 1.9233785355470645e-05, + "loss": 0.3071, + "step": 2490 + }, + { + "epoch": 0.61, + "learning_rate": 1.9232269811219643e-05, + "loss": 0.3017, + "step": 2492 + }, + { + "epoch": 0.61, + "learning_rate": 1.923075282943364e-05, + "loss": 0.2925, + "step": 2494 + }, + { + "epoch": 0.61, + "learning_rate": 1.922923441034883e-05, + "loss": 0.3116, + "step": 2496 + }, + { + "epoch": 0.61, + "learning_rate": 1.9227714554201654e-05, + "loss": 0.3121, + "step": 2498 + }, + { + "epoch": 0.61, + "learning_rate": 1.922619326122876e-05, + "loss": 0.2915, + "step": 2500 + }, + { + "epoch": 0.61, + "learning_rate": 1.9224670531667023e-05, + "loss": 0.2796, + "step": 2502 + }, + { + "epoch": 0.61, + "learning_rate": 1.9223146365753548e-05, + "loss": 0.2623, + "step": 2504 + }, + { + "epoch": 0.61, + "learning_rate": 1.9221620763725658e-05, + "loss": 0.2923, + "step": 2506 + }, + { + "epoch": 0.61, + "learning_rate": 1.9220093725820903e-05, + "loss": 0.3046, + "step": 2508 + }, + { + "epoch": 0.61, + "learning_rate": 1.9218565252277047e-05, + "loss": 0.3085, + "step": 2510 + }, + { + "epoch": 0.61, + "learning_rate": 1.9217035343332094e-05, + "loss": 0.2772, + "step": 2512 + }, + { + "epoch": 0.61, + "learning_rate": 1.921550399922426e-05, + "loss": 0.2952, + "step": 2514 + }, + { + "epoch": 0.61, + "learning_rate": 1.921397122019198e-05, + "loss": 0.2839, + "step": 2516 + }, + { + "epoch": 0.61, + "learning_rate": 1.9212437006473927e-05, + "loss": 0.3098, + "step": 2518 + }, + { + "epoch": 0.61, + "learning_rate": 1.921090135830899e-05, + "loss": 0.2951, + "step": 2520 + }, + { + "epoch": 0.61, + "learning_rate": 1.9209364275936278e-05, + "loss": 0.3081, + "step": 2522 + }, + { + "epoch": 0.62, + "learning_rate": 1.9207825759595126e-05, + "loss": 0.2788, + "step": 2524 + }, + { + "epoch": 0.62, + "learning_rate": 1.9206285809525097e-05, + "loss": 0.2964, + "step": 2526 + }, + { + "epoch": 0.62, + "learning_rate": 1.9204744425965967e-05, + "loss": 0.2875, + "step": 2528 + }, + { + "epoch": 0.62, + "learning_rate": 1.9203201609157746e-05, + "loss": 0.2919, + "step": 2530 + }, + { + "epoch": 0.62, + "learning_rate": 1.9201657359340662e-05, + "loss": 0.3252, + "step": 2532 + }, + { + "epoch": 0.62, + "learning_rate": 1.9200111676755164e-05, + "loss": 0.3178, + "step": 2534 + }, + { + "epoch": 0.62, + "learning_rate": 1.9198564561641927e-05, + "loss": 0.3143, + "step": 2536 + }, + { + "epoch": 0.62, + "learning_rate": 1.9197016014241853e-05, + "loss": 0.3072, + "step": 2538 + }, + { + "epoch": 0.62, + "learning_rate": 1.9195466034796056e-05, + "loss": 0.3021, + "step": 2540 + }, + { + "epoch": 0.62, + "learning_rate": 1.919391462354588e-05, + "loss": 0.3162, + "step": 2542 + }, + { + "epoch": 0.62, + "learning_rate": 1.9192361780732896e-05, + "loss": 0.301, + "step": 2544 + }, + { + "epoch": 0.62, + "learning_rate": 1.9190807506598895e-05, + "loss": 0.2746, + "step": 2546 + }, + { + "epoch": 0.62, + "learning_rate": 1.918925180138589e-05, + "loss": 0.2919, + "step": 2548 + }, + { + "epoch": 0.62, + "learning_rate": 1.9187694665336106e-05, + "loss": 0.3085, + "step": 2550 + }, + { + "epoch": 0.62, + "learning_rate": 1.9186136098692008e-05, + "loss": 0.302, + "step": 2552 + }, + { + "epoch": 0.62, + "learning_rate": 1.9184576101696277e-05, + "loss": 0.3078, + "step": 2554 + }, + { + "epoch": 0.62, + "learning_rate": 1.918301467459182e-05, + "loss": 0.2958, + "step": 2556 + }, + { + "epoch": 0.62, + "learning_rate": 1.9181451817621753e-05, + "loss": 0.2922, + "step": 2558 + }, + { + "epoch": 0.62, + "learning_rate": 1.917988753102943e-05, + "loss": 0.3217, + "step": 2560 + }, + { + "epoch": 0.62, + "learning_rate": 1.9178321815058426e-05, + "loss": 0.285, + "step": 2562 + }, + { + "epoch": 0.62, + "learning_rate": 1.9176754669952534e-05, + "loss": 0.3077, + "step": 2564 + }, + { + "epoch": 0.63, + "learning_rate": 1.9175186095955766e-05, + "loss": 0.308, + "step": 2566 + }, + { + "epoch": 0.63, + "learning_rate": 1.9173616093312364e-05, + "loss": 0.2936, + "step": 2568 + }, + { + "epoch": 0.63, + "learning_rate": 1.9172044662266787e-05, + "loss": 0.2961, + "step": 2570 + }, + { + "epoch": 0.63, + "learning_rate": 1.917047180306372e-05, + "loss": 0.2918, + "step": 2572 + }, + { + "epoch": 0.63, + "learning_rate": 1.9168897515948074e-05, + "loss": 0.314, + "step": 2574 + }, + { + "epoch": 0.63, + "learning_rate": 1.916732180116497e-05, + "loss": 0.3068, + "step": 2576 + }, + { + "epoch": 0.63, + "learning_rate": 1.9165744658959763e-05, + "loss": 0.2924, + "step": 2578 + }, + { + "epoch": 0.63, + "learning_rate": 1.9164166089578026e-05, + "loss": 0.2987, + "step": 2580 + }, + { + "epoch": 0.63, + "learning_rate": 1.9162586093265555e-05, + "loss": 0.315, + "step": 2582 + }, + { + "epoch": 0.63, + "learning_rate": 1.9161004670268365e-05, + "loss": 0.2955, + "step": 2584 + }, + { + "epoch": 0.63, + "learning_rate": 1.9159421820832696e-05, + "loss": 0.3077, + "step": 2586 + }, + { + "epoch": 0.63, + "learning_rate": 1.915783754520501e-05, + "loss": 0.3106, + "step": 2588 + }, + { + "epoch": 0.63, + "learning_rate": 1.9156251843631996e-05, + "loss": 0.3177, + "step": 2590 + }, + { + "epoch": 0.63, + "learning_rate": 1.915466471636055e-05, + "loss": 0.3211, + "step": 2592 + }, + { + "epoch": 0.63, + "learning_rate": 1.9153076163637806e-05, + "loss": 0.2919, + "step": 2594 + }, + { + "epoch": 0.63, + "learning_rate": 1.9151486185711113e-05, + "loss": 0.2859, + "step": 2596 + }, + { + "epoch": 0.63, + "learning_rate": 1.9149894782828044e-05, + "loss": 0.3109, + "step": 2598 + }, + { + "epoch": 0.63, + "learning_rate": 1.914830195523639e-05, + "loss": 0.2911, + "step": 2600 + }, + { + "epoch": 0.63, + "learning_rate": 1.9146707703184168e-05, + "loss": 0.2569, + "step": 2602 + }, + { + "epoch": 0.63, + "learning_rate": 1.9145112026919615e-05, + "loss": 0.3109, + "step": 2604 + }, + { + "epoch": 0.63, + "learning_rate": 1.914351492669119e-05, + "loss": 0.3088, + "step": 2606 + }, + { + "epoch": 0.64, + "learning_rate": 1.9141916402747576e-05, + "loss": 0.2975, + "step": 2608 + }, + { + "epoch": 0.64, + "learning_rate": 1.914031645533767e-05, + "loss": 0.2903, + "step": 2610 + }, + { + "epoch": 0.64, + "learning_rate": 1.91387150847106e-05, + "loss": 0.3029, + "step": 2612 + }, + { + "epoch": 0.64, + "learning_rate": 1.9137112291115713e-05, + "loss": 0.2918, + "step": 2614 + }, + { + "epoch": 0.64, + "learning_rate": 1.913550807480257e-05, + "loss": 0.3048, + "step": 2616 + }, + { + "epoch": 0.64, + "learning_rate": 1.9133902436020966e-05, + "loss": 0.2966, + "step": 2618 + }, + { + "epoch": 0.64, + "learning_rate": 1.913229537502091e-05, + "loss": 0.2864, + "step": 2620 + }, + { + "epoch": 0.64, + "learning_rate": 1.9130686892052628e-05, + "loss": 0.3041, + "step": 2622 + }, + { + "epoch": 0.64, + "learning_rate": 1.912907698736658e-05, + "loss": 0.2901, + "step": 2624 + }, + { + "epoch": 0.64, + "learning_rate": 1.912746566121344e-05, + "loss": 0.3059, + "step": 2626 + }, + { + "epoch": 0.64, + "learning_rate": 1.9125852913844102e-05, + "loss": 0.2821, + "step": 2628 + }, + { + "epoch": 0.64, + "learning_rate": 1.9124238745509682e-05, + "loss": 0.3068, + "step": 2630 + }, + { + "epoch": 0.64, + "learning_rate": 1.912262315646152e-05, + "loss": 0.3024, + "step": 2632 + }, + { + "epoch": 0.64, + "learning_rate": 1.9121006146951172e-05, + "loss": 0.2663, + "step": 2634 + }, + { + "epoch": 0.64, + "learning_rate": 1.9119387717230427e-05, + "loss": 0.283, + "step": 2636 + }, + { + "epoch": 0.64, + "learning_rate": 1.911776786755128e-05, + "loss": 0.2837, + "step": 2638 + }, + { + "epoch": 0.64, + "learning_rate": 1.9116146598165954e-05, + "loss": 0.3077, + "step": 2640 + }, + { + "epoch": 0.64, + "learning_rate": 1.9114523909326892e-05, + "loss": 0.274, + "step": 2642 + }, + { + "epoch": 0.64, + "learning_rate": 1.9112899801286766e-05, + "loss": 0.2965, + "step": 2644 + }, + { + "epoch": 0.64, + "learning_rate": 1.9111274274298457e-05, + "loss": 0.3009, + "step": 2646 + }, + { + "epoch": 0.65, + "learning_rate": 1.9109647328615074e-05, + "loss": 0.2753, + "step": 2648 + }, + { + "epoch": 0.65, + "learning_rate": 1.9108018964489942e-05, + "loss": 0.2705, + "step": 2650 + }, + { + "epoch": 0.65, + "learning_rate": 1.910638918217661e-05, + "loss": 0.2892, + "step": 2652 + }, + { + "epoch": 0.65, + "learning_rate": 1.9104757981928852e-05, + "loss": 0.3046, + "step": 2654 + }, + { + "epoch": 0.65, + "learning_rate": 1.910312536400065e-05, + "loss": 0.2998, + "step": 2656 + }, + { + "epoch": 0.65, + "learning_rate": 1.9101491328646228e-05, + "loss": 0.2896, + "step": 2658 + }, + { + "epoch": 0.65, + "learning_rate": 1.9099855876120005e-05, + "loss": 0.2855, + "step": 2660 + }, + { + "epoch": 0.65, + "learning_rate": 1.909821900667664e-05, + "loss": 0.2924, + "step": 2662 + }, + { + "epoch": 0.65, + "learning_rate": 1.9096580720571004e-05, + "loss": 0.2856, + "step": 2664 + }, + { + "epoch": 0.65, + "learning_rate": 1.909494101805819e-05, + "loss": 0.2791, + "step": 2666 + }, + { + "epoch": 0.65, + "learning_rate": 1.909329989939352e-05, + "loss": 0.2848, + "step": 2668 + }, + { + "epoch": 0.65, + "learning_rate": 1.9091657364832518e-05, + "loss": 0.3059, + "step": 2670 + }, + { + "epoch": 0.65, + "learning_rate": 1.9090013414630943e-05, + "loss": 0.2832, + "step": 2672 + }, + { + "epoch": 0.65, + "learning_rate": 1.908836804904477e-05, + "loss": 0.2779, + "step": 2674 + }, + { + "epoch": 0.65, + "learning_rate": 1.90867212683302e-05, + "loss": 0.2676, + "step": 2676 + }, + { + "epoch": 0.65, + "learning_rate": 1.9085073072743644e-05, + "loss": 0.3032, + "step": 2678 + }, + { + "epoch": 0.65, + "learning_rate": 1.908342346254174e-05, + "loss": 0.3164, + "step": 2680 + }, + { + "epoch": 0.65, + "learning_rate": 1.9081772437981344e-05, + "loss": 0.2889, + "step": 2682 + }, + { + "epoch": 0.65, + "learning_rate": 1.9080119999319536e-05, + "loss": 0.2923, + "step": 2684 + }, + { + "epoch": 0.65, + "learning_rate": 1.907846614681361e-05, + "loss": 0.2842, + "step": 2686 + }, + { + "epoch": 0.65, + "learning_rate": 1.9076810880721085e-05, + "loss": 0.3022, + "step": 2688 + }, + { + "epoch": 0.66, + "learning_rate": 1.90751542012997e-05, + "loss": 0.2779, + "step": 2690 + }, + { + "epoch": 0.66, + "learning_rate": 1.907349610880741e-05, + "loss": 0.2785, + "step": 2692 + }, + { + "epoch": 0.66, + "learning_rate": 1.9071836603502397e-05, + "loss": 0.2906, + "step": 2694 + }, + { + "epoch": 0.66, + "learning_rate": 1.9070175685643054e-05, + "loss": 0.2717, + "step": 2696 + }, + { + "epoch": 0.66, + "learning_rate": 1.9068513355487996e-05, + "loss": 0.2896, + "step": 2698 + }, + { + "epoch": 0.66, + "learning_rate": 1.906684961329607e-05, + "loss": 0.2838, + "step": 2700 + }, + { + "epoch": 0.66, + "learning_rate": 1.9065184459326328e-05, + "loss": 0.3062, + "step": 2702 + }, + { + "epoch": 0.66, + "learning_rate": 1.9063517893838048e-05, + "loss": 0.3351, + "step": 2704 + }, + { + "epoch": 0.66, + "learning_rate": 1.906184991709072e-05, + "loss": 0.3178, + "step": 2706 + }, + { + "epoch": 0.66, + "learning_rate": 1.906018052934407e-05, + "loss": 0.313, + "step": 2708 + }, + { + "epoch": 0.66, + "learning_rate": 1.9058509730858037e-05, + "loss": 0.2599, + "step": 2710 + }, + { + "epoch": 0.66, + "learning_rate": 1.9056837521892765e-05, + "loss": 0.2795, + "step": 2712 + }, + { + "epoch": 0.66, + "learning_rate": 1.9055163902708642e-05, + "loss": 0.289, + "step": 2714 + }, + { + "epoch": 0.66, + "learning_rate": 1.905348887356625e-05, + "loss": 0.2986, + "step": 2716 + }, + { + "epoch": 0.66, + "learning_rate": 1.9051812434726418e-05, + "loss": 0.2873, + "step": 2718 + }, + { + "epoch": 0.66, + "learning_rate": 1.9050134586450175e-05, + "loss": 0.3006, + "step": 2720 + }, + { + "epoch": 0.66, + "learning_rate": 1.904845532899877e-05, + "loss": 0.3107, + "step": 2722 + }, + { + "epoch": 0.66, + "learning_rate": 1.904677466263368e-05, + "loss": 0.2941, + "step": 2724 + }, + { + "epoch": 0.66, + "learning_rate": 1.9045092587616598e-05, + "loss": 0.3029, + "step": 2726 + }, + { + "epoch": 0.66, + "learning_rate": 1.9043409104209436e-05, + "loss": 0.2769, + "step": 2728 + }, + { + "epoch": 0.67, + "learning_rate": 1.9041724212674323e-05, + "loss": 0.2872, + "step": 2730 + }, + { + "epoch": 0.67, + "learning_rate": 1.9040037913273616e-05, + "loss": 0.287, + "step": 2732 + }, + { + "epoch": 0.67, + "learning_rate": 1.9038350206269874e-05, + "loss": 0.2954, + "step": 2734 + }, + { + "epoch": 0.67, + "learning_rate": 1.9036661091925894e-05, + "loss": 0.3083, + "step": 2736 + }, + { + "epoch": 0.67, + "learning_rate": 1.9034970570504684e-05, + "loss": 0.2589, + "step": 2738 + }, + { + "epoch": 0.67, + "learning_rate": 1.9033278642269467e-05, + "loss": 0.3008, + "step": 2740 + }, + { + "epoch": 0.67, + "learning_rate": 1.9031585307483688e-05, + "loss": 0.2895, + "step": 2742 + }, + { + "epoch": 0.67, + "learning_rate": 1.902989056641102e-05, + "loss": 0.3067, + "step": 2744 + }, + { + "epoch": 0.67, + "learning_rate": 1.9028194419315338e-05, + "loss": 0.2846, + "step": 2746 + }, + { + "epoch": 0.67, + "learning_rate": 1.9026496866460753e-05, + "loss": 0.2877, + "step": 2748 + }, + { + "epoch": 0.67, + "learning_rate": 1.902479790811158e-05, + "loss": 0.3036, + "step": 2750 + }, + { + "epoch": 0.67, + "learning_rate": 1.9023097544532368e-05, + "loss": 0.3012, + "step": 2752 + }, + { + "epoch": 0.67, + "learning_rate": 1.902139577598787e-05, + "loss": 0.2911, + "step": 2754 + }, + { + "epoch": 0.67, + "learning_rate": 1.9019692602743067e-05, + "loss": 0.2929, + "step": 2756 + }, + { + "epoch": 0.67, + "learning_rate": 1.9017988025063153e-05, + "loss": 0.283, + "step": 2758 + }, + { + "epoch": 0.67, + "learning_rate": 1.9016282043213544e-05, + "loss": 0.2908, + "step": 2760 + }, + { + "epoch": 0.67, + "learning_rate": 1.901457465745988e-05, + "loss": 0.2935, + "step": 2762 + }, + { + "epoch": 0.67, + "learning_rate": 1.901286586806801e-05, + "loss": 0.3021, + "step": 2764 + }, + { + "epoch": 0.67, + "learning_rate": 1.9011155675304006e-05, + "loss": 0.2932, + "step": 2766 + }, + { + "epoch": 0.67, + "learning_rate": 1.9009444079434162e-05, + "loss": 0.3196, + "step": 2768 + }, + { + "epoch": 0.67, + "learning_rate": 1.9007731080724977e-05, + "loss": 0.2999, + "step": 2770 + }, + { + "epoch": 0.68, + "learning_rate": 1.9006016679443187e-05, + "loss": 0.2936, + "step": 2772 + }, + { + "epoch": 0.68, + "learning_rate": 1.9004300875855734e-05, + "loss": 0.285, + "step": 2774 + }, + { + "epoch": 0.68, + "learning_rate": 1.9002583670229778e-05, + "loss": 0.2927, + "step": 2776 + }, + { + "epoch": 0.68, + "learning_rate": 1.9000865062832708e-05, + "loss": 0.2814, + "step": 2778 + }, + { + "epoch": 0.68, + "learning_rate": 1.899914505393212e-05, + "loss": 0.3099, + "step": 2780 + }, + { + "epoch": 0.68, + "learning_rate": 1.8997423643795836e-05, + "loss": 0.3047, + "step": 2782 + }, + { + "epoch": 0.68, + "learning_rate": 1.899570083269189e-05, + "loss": 0.2742, + "step": 2784 + }, + { + "epoch": 0.68, + "learning_rate": 1.8993976620888536e-05, + "loss": 0.3233, + "step": 2786 + }, + { + "epoch": 0.68, + "learning_rate": 1.8992251008654248e-05, + "loss": 0.3003, + "step": 2788 + }, + { + "epoch": 0.68, + "learning_rate": 1.899052399625772e-05, + "loss": 0.3147, + "step": 2790 + }, + { + "epoch": 0.68, + "learning_rate": 1.8988795583967855e-05, + "loss": 0.2998, + "step": 2792 + }, + { + "epoch": 0.68, + "learning_rate": 1.8987065772053785e-05, + "loss": 0.2711, + "step": 2794 + }, + { + "epoch": 0.68, + "learning_rate": 1.8985334560784853e-05, + "loss": 0.3152, + "step": 2796 + }, + { + "epoch": 0.68, + "learning_rate": 1.8983601950430623e-05, + "loss": 0.3011, + "step": 2798 + }, + { + "epoch": 0.68, + "learning_rate": 1.8981867941260875e-05, + "loss": 0.2822, + "step": 2800 + }, + { + "epoch": 0.68, + "learning_rate": 1.8980132533545605e-05, + "loss": 0.2903, + "step": 2802 + }, + { + "epoch": 0.68, + "learning_rate": 1.8978395727555034e-05, + "loss": 0.3006, + "step": 2804 + }, + { + "epoch": 0.68, + "learning_rate": 1.8976657523559592e-05, + "loss": 0.318, + "step": 2806 + }, + { + "epoch": 0.68, + "learning_rate": 1.8974917921829934e-05, + "loss": 0.2895, + "step": 2808 + }, + { + "epoch": 0.68, + "learning_rate": 1.8973176922636927e-05, + "loss": 0.3041, + "step": 2810 + }, + { + "epoch": 0.69, + "learning_rate": 1.897143452625166e-05, + "loss": 0.3035, + "step": 2812 + }, + { + "epoch": 0.69, + "learning_rate": 1.896969073294543e-05, + "loss": 0.2651, + "step": 2814 + }, + { + "epoch": 0.69, + "learning_rate": 1.896794554298977e-05, + "loss": 0.2857, + "step": 2816 + }, + { + "epoch": 0.69, + "learning_rate": 1.896619895665641e-05, + "loss": 0.2826, + "step": 2818 + }, + { + "epoch": 0.69, + "learning_rate": 1.8964450974217317e-05, + "loss": 0.2946, + "step": 2820 + }, + { + "epoch": 0.69, + "learning_rate": 1.8962701595944657e-05, + "loss": 0.295, + "step": 2822 + }, + { + "epoch": 0.69, + "learning_rate": 1.8960950822110822e-05, + "loss": 0.3086, + "step": 2824 + }, + { + "epoch": 0.69, + "learning_rate": 1.8959198652988423e-05, + "loss": 0.2817, + "step": 2826 + }, + { + "epoch": 0.69, + "learning_rate": 1.8957445088850286e-05, + "loss": 0.2671, + "step": 2828 + }, + { + "epoch": 0.69, + "learning_rate": 1.8955690129969455e-05, + "loss": 0.2744, + "step": 2830 + }, + { + "epoch": 0.69, + "learning_rate": 1.8953933776619192e-05, + "loss": 0.2972, + "step": 2832 + }, + { + "epoch": 0.69, + "learning_rate": 1.8952176029072968e-05, + "loss": 0.3035, + "step": 2834 + }, + { + "epoch": 0.69, + "learning_rate": 1.8950416887604485e-05, + "loss": 0.3069, + "step": 2836 + }, + { + "epoch": 0.69, + "learning_rate": 1.8948656352487652e-05, + "loss": 0.2702, + "step": 2838 + }, + { + "epoch": 0.69, + "learning_rate": 1.8946894423996592e-05, + "loss": 0.299, + "step": 2840 + }, + { + "epoch": 0.69, + "learning_rate": 1.8945131102405664e-05, + "loss": 0.2958, + "step": 2842 + }, + { + "epoch": 0.69, + "learning_rate": 1.894336638798942e-05, + "loss": 0.2513, + "step": 2844 + }, + { + "epoch": 0.69, + "learning_rate": 1.894160028102264e-05, + "loss": 0.316, + "step": 2846 + }, + { + "epoch": 0.69, + "learning_rate": 1.8939832781780326e-05, + "loss": 0.2811, + "step": 2848 + }, + { + "epoch": 0.69, + "learning_rate": 1.8938063890537687e-05, + "loss": 0.2888, + "step": 2850 + }, + { + "epoch": 0.69, + "learning_rate": 1.8936293607570154e-05, + "loss": 0.2695, + "step": 2852 + }, + { + "epoch": 0.7, + "learning_rate": 1.8934521933153376e-05, + "loss": 0.299, + "step": 2854 + }, + { + "epoch": 0.7, + "learning_rate": 1.893274886756321e-05, + "loss": 0.2679, + "step": 2856 + }, + { + "epoch": 0.7, + "learning_rate": 1.8930974411075743e-05, + "loss": 0.2949, + "step": 2858 + }, + { + "epoch": 0.7, + "learning_rate": 1.8929198563967265e-05, + "loss": 0.2841, + "step": 2860 + }, + { + "epoch": 0.7, + "learning_rate": 1.8927421326514296e-05, + "loss": 0.2693, + "step": 2862 + }, + { + "epoch": 0.7, + "learning_rate": 1.892564269899356e-05, + "loss": 0.2988, + "step": 2864 + }, + { + "epoch": 0.7, + "learning_rate": 1.8923862681682005e-05, + "loss": 0.3143, + "step": 2866 + }, + { + "epoch": 0.7, + "learning_rate": 1.892208127485679e-05, + "loss": 0.2889, + "step": 2868 + }, + { + "epoch": 0.7, + "learning_rate": 1.8920298478795298e-05, + "loss": 0.2918, + "step": 2870 + }, + { + "epoch": 0.7, + "learning_rate": 1.8918514293775123e-05, + "loss": 0.2983, + "step": 2872 + }, + { + "epoch": 0.7, + "learning_rate": 1.8916728720074077e-05, + "loss": 0.2775, + "step": 2874 + }, + { + "epoch": 0.7, + "learning_rate": 1.8914941757970184e-05, + "loss": 0.2812, + "step": 2876 + }, + { + "epoch": 0.7, + "learning_rate": 1.891315340774169e-05, + "loss": 0.2889, + "step": 2878 + }, + { + "epoch": 0.7, + "learning_rate": 1.8911363669667052e-05, + "loss": 0.2653, + "step": 2880 + }, + { + "epoch": 0.7, + "learning_rate": 1.8909572544024955e-05, + "loss": 0.2936, + "step": 2882 + }, + { + "epoch": 0.7, + "learning_rate": 1.8907780031094282e-05, + "loss": 0.2886, + "step": 2884 + }, + { + "epoch": 0.7, + "learning_rate": 1.8905986131154138e-05, + "loss": 0.2668, + "step": 2886 + }, + { + "epoch": 0.7, + "learning_rate": 1.890419084448386e-05, + "loss": 0.3083, + "step": 2888 + }, + { + "epoch": 0.7, + "learning_rate": 1.8902394171362975e-05, + "loss": 0.2789, + "step": 2890 + }, + { + "epoch": 0.7, + "learning_rate": 1.8900596112071247e-05, + "loss": 0.2871, + "step": 2892 + }, + { + "epoch": 0.71, + "learning_rate": 1.8898796666888645e-05, + "loss": 0.276, + "step": 2894 + }, + { + "epoch": 0.71, + "learning_rate": 1.889699583609535e-05, + "loss": 0.2866, + "step": 2896 + }, + { + "epoch": 0.71, + "learning_rate": 1.889519361997178e-05, + "loss": 0.2888, + "step": 2898 + }, + { + "epoch": 0.71, + "learning_rate": 1.8893390018798538e-05, + "loss": 0.2905, + "step": 2900 + }, + { + "epoch": 0.71, + "learning_rate": 1.889158503285647e-05, + "loss": 0.3007, + "step": 2902 + }, + { + "epoch": 0.71, + "learning_rate": 1.888977866242662e-05, + "loss": 0.2915, + "step": 2904 + }, + { + "epoch": 0.71, + "learning_rate": 1.8887970907790255e-05, + "loss": 0.308, + "step": 2906 + }, + { + "epoch": 0.71, + "learning_rate": 1.8886161769228857e-05, + "loss": 0.2867, + "step": 2908 + }, + { + "epoch": 0.71, + "learning_rate": 1.888435124702412e-05, + "loss": 0.2805, + "step": 2910 + }, + { + "epoch": 0.71, + "learning_rate": 1.888253934145796e-05, + "loss": 0.2939, + "step": 2912 + }, + { + "epoch": 0.71, + "learning_rate": 1.8880726052812502e-05, + "loss": 0.298, + "step": 2914 + }, + { + "epoch": 0.71, + "learning_rate": 1.887891138137009e-05, + "loss": 0.2902, + "step": 2916 + }, + { + "epoch": 0.71, + "learning_rate": 1.8877095327413283e-05, + "loss": 0.2854, + "step": 2918 + }, + { + "epoch": 0.71, + "learning_rate": 1.8875277891224854e-05, + "loss": 0.2569, + "step": 2920 + }, + { + "epoch": 0.71, + "learning_rate": 1.887345907308779e-05, + "loss": 0.3057, + "step": 2922 + }, + { + "epoch": 0.71, + "learning_rate": 1.8871638873285295e-05, + "loss": 0.2779, + "step": 2924 + }, + { + "epoch": 0.71, + "learning_rate": 1.8869817292100792e-05, + "loss": 0.2838, + "step": 2926 + }, + { + "epoch": 0.71, + "learning_rate": 1.886799432981791e-05, + "loss": 0.3311, + "step": 2928 + }, + { + "epoch": 0.71, + "learning_rate": 1.88661699867205e-05, + "loss": 0.2978, + "step": 2930 + }, + { + "epoch": 0.71, + "learning_rate": 1.8864344263092627e-05, + "loss": 0.2899, + "step": 2932 + }, + { + "epoch": 0.71, + "learning_rate": 1.886251715921857e-05, + "loss": 0.2747, + "step": 2934 + }, + { + "epoch": 0.72, + "learning_rate": 1.8860688675382823e-05, + "loss": 0.2994, + "step": 2936 + }, + { + "epoch": 0.72, + "learning_rate": 1.8858858811870094e-05, + "loss": 0.2803, + "step": 2938 + }, + { + "epoch": 0.72, + "learning_rate": 1.885702756896531e-05, + "loss": 0.2816, + "step": 2940 + }, + { + "epoch": 0.72, + "learning_rate": 1.8855194946953605e-05, + "loss": 0.2603, + "step": 2942 + }, + { + "epoch": 0.72, + "learning_rate": 1.885336094612033e-05, + "loss": 0.3026, + "step": 2944 + }, + { + "epoch": 0.72, + "learning_rate": 1.8851525566751062e-05, + "loss": 0.2952, + "step": 2946 + }, + { + "epoch": 0.72, + "learning_rate": 1.8849688809131578e-05, + "loss": 0.2844, + "step": 2948 + }, + { + "epoch": 0.72, + "learning_rate": 1.8847850673547877e-05, + "loss": 0.2769, + "step": 2950 + }, + { + "epoch": 0.72, + "learning_rate": 1.8846011160286168e-05, + "loss": 0.3053, + "step": 2952 + }, + { + "epoch": 0.72, + "learning_rate": 1.8844170269632877e-05, + "loss": 0.2921, + "step": 2954 + }, + { + "epoch": 0.72, + "learning_rate": 1.884232800187465e-05, + "loss": 0.2964, + "step": 2956 + }, + { + "epoch": 0.72, + "learning_rate": 1.8840484357298338e-05, + "loss": 0.2978, + "step": 2958 + }, + { + "epoch": 0.72, + "learning_rate": 1.8838639336191012e-05, + "loss": 0.2862, + "step": 2960 + }, + { + "epoch": 0.72, + "learning_rate": 1.8836792938839955e-05, + "loss": 0.245, + "step": 2962 + }, + { + "epoch": 0.72, + "learning_rate": 1.8834945165532662e-05, + "loss": 0.2917, + "step": 2964 + }, + { + "epoch": 0.72, + "learning_rate": 1.8833096016556855e-05, + "loss": 0.2604, + "step": 2966 + }, + { + "epoch": 0.72, + "learning_rate": 1.883124549220045e-05, + "loss": 0.2775, + "step": 2968 + }, + { + "epoch": 0.72, + "learning_rate": 1.8829393592751594e-05, + "loss": 0.2993, + "step": 2970 + }, + { + "epoch": 0.72, + "learning_rate": 1.8827540318498644e-05, + "loss": 0.2926, + "step": 2972 + }, + { + "epoch": 0.72, + "learning_rate": 1.882568566973016e-05, + "loss": 0.3071, + "step": 2974 + }, + { + "epoch": 0.73, + "learning_rate": 1.882382964673493e-05, + "loss": 0.2846, + "step": 2976 + }, + { + "epoch": 0.73, + "learning_rate": 1.8821972249801956e-05, + "loss": 0.2873, + "step": 2978 + }, + { + "epoch": 0.73, + "learning_rate": 1.8820113479220442e-05, + "loss": 0.2839, + "step": 2980 + }, + { + "epoch": 0.73, + "learning_rate": 1.8818253335279815e-05, + "loss": 0.2907, + "step": 2982 + }, + { + "epoch": 0.73, + "learning_rate": 1.881639181826971e-05, + "loss": 0.2969, + "step": 2984 + }, + { + "epoch": 0.73, + "learning_rate": 1.881452892847999e-05, + "loss": 0.2998, + "step": 2986 + }, + { + "epoch": 0.73, + "learning_rate": 1.881266466620071e-05, + "loss": 0.3013, + "step": 2988 + }, + { + "epoch": 0.73, + "learning_rate": 1.8810799031722153e-05, + "loss": 0.2812, + "step": 2990 + }, + { + "epoch": 0.73, + "learning_rate": 1.8808932025334816e-05, + "loss": 0.3009, + "step": 2992 + }, + { + "epoch": 0.73, + "learning_rate": 1.8807063647329402e-05, + "loss": 0.287, + "step": 2994 + }, + { + "epoch": 0.73, + "learning_rate": 1.8805193897996834e-05, + "loss": 0.2658, + "step": 2996 + }, + { + "epoch": 0.73, + "learning_rate": 1.8803322777628247e-05, + "loss": 0.2631, + "step": 2998 + }, + { + "epoch": 0.73, + "learning_rate": 1.880145028651499e-05, + "loss": 0.2893, + "step": 3000 + }, + { + "epoch": 0.73, + "learning_rate": 1.8799576424948616e-05, + "loss": 0.3094, + "step": 3002 + }, + { + "epoch": 0.73, + "learning_rate": 1.8797701193220908e-05, + "loss": 0.2807, + "step": 3004 + }, + { + "epoch": 0.73, + "learning_rate": 1.8795824591623853e-05, + "loss": 0.3014, + "step": 3006 + }, + { + "epoch": 0.73, + "learning_rate": 1.8793946620449647e-05, + "loss": 0.2875, + "step": 3008 + }, + { + "epoch": 0.73, + "learning_rate": 1.879206727999071e-05, + "loss": 0.2879, + "step": 3010 + }, + { + "epoch": 0.73, + "learning_rate": 1.879018657053967e-05, + "loss": 0.2772, + "step": 3012 + }, + { + "epoch": 0.73, + "learning_rate": 1.878830449238936e-05, + "loss": 0.275, + "step": 3014 + }, + { + "epoch": 0.73, + "learning_rate": 1.8786421045832842e-05, + "loss": 0.2864, + "step": 3016 + }, + { + "epoch": 0.74, + "learning_rate": 1.878453623116338e-05, + "loss": 0.3137, + "step": 3018 + }, + { + "epoch": 0.74, + "learning_rate": 1.8782650048674456e-05, + "loss": 0.2784, + "step": 3020 + }, + { + "epoch": 0.74, + "learning_rate": 1.8780762498659762e-05, + "loss": 0.2983, + "step": 3022 + }, + { + "epoch": 0.74, + "learning_rate": 1.87788735814132e-05, + "loss": 0.2735, + "step": 3024 + }, + { + "epoch": 0.74, + "learning_rate": 1.8776983297228896e-05, + "loss": 0.2899, + "step": 3026 + }, + { + "epoch": 0.74, + "learning_rate": 1.8775091646401177e-05, + "loss": 0.3009, + "step": 3028 + }, + { + "epoch": 0.74, + "learning_rate": 1.8773198629224588e-05, + "loss": 0.2586, + "step": 3030 + }, + { + "epoch": 0.74, + "learning_rate": 1.8771304245993888e-05, + "loss": 0.2723, + "step": 3032 + }, + { + "epoch": 0.74, + "learning_rate": 1.876940849700404e-05, + "loss": 0.278, + "step": 3034 + }, + { + "epoch": 0.74, + "learning_rate": 1.876751138255024e-05, + "loss": 0.2593, + "step": 3036 + }, + { + "epoch": 0.74, + "learning_rate": 1.8765612902927874e-05, + "loss": 0.2761, + "step": 3038 + }, + { + "epoch": 0.74, + "learning_rate": 1.876371305843255e-05, + "loss": 0.3046, + "step": 3040 + }, + { + "epoch": 0.74, + "learning_rate": 1.8761811849360093e-05, + "loss": 0.2837, + "step": 3042 + }, + { + "epoch": 0.74, + "learning_rate": 1.8759909276006527e-05, + "loss": 0.2996, + "step": 3044 + }, + { + "epoch": 0.74, + "learning_rate": 1.8758005338668105e-05, + "loss": 0.2737, + "step": 3046 + }, + { + "epoch": 0.74, + "learning_rate": 1.8756100037641282e-05, + "loss": 0.267, + "step": 3048 + }, + { + "epoch": 0.74, + "learning_rate": 1.8754193373222732e-05, + "loss": 0.2913, + "step": 3050 + }, + { + "epoch": 0.74, + "learning_rate": 1.875228534570933e-05, + "loss": 0.2832, + "step": 3052 + }, + { + "epoch": 0.74, + "learning_rate": 1.8750375955398176e-05, + "loss": 0.3088, + "step": 3054 + }, + { + "epoch": 0.74, + "learning_rate": 1.874846520258657e-05, + "loss": 0.2808, + "step": 3056 + }, + { + "epoch": 0.75, + "learning_rate": 1.8746553087572042e-05, + "loss": 0.2693, + "step": 3058 + }, + { + "epoch": 0.75, + "learning_rate": 1.874463961065231e-05, + "loss": 0.2798, + "step": 3060 + }, + { + "epoch": 0.75, + "learning_rate": 1.8742724772125326e-05, + "loss": 0.2919, + "step": 3062 + }, + { + "epoch": 0.75, + "learning_rate": 1.8740808572289246e-05, + "loss": 0.2757, + "step": 3064 + }, + { + "epoch": 0.75, + "learning_rate": 1.873889101144243e-05, + "loss": 0.2987, + "step": 3066 + }, + { + "epoch": 0.75, + "learning_rate": 1.873697208988346e-05, + "loss": 0.2629, + "step": 3068 + }, + { + "epoch": 0.75, + "learning_rate": 1.8735051807911127e-05, + "loss": 0.2788, + "step": 3070 + }, + { + "epoch": 0.75, + "learning_rate": 1.8733130165824437e-05, + "loss": 0.2753, + "step": 3072 + }, + { + "epoch": 0.75, + "learning_rate": 1.8731207163922597e-05, + "loss": 0.3008, + "step": 3074 + }, + { + "epoch": 0.75, + "learning_rate": 1.8729282802505036e-05, + "loss": 0.2813, + "step": 3076 + }, + { + "epoch": 0.75, + "learning_rate": 1.8727357081871398e-05, + "loss": 0.2807, + "step": 3078 + }, + { + "epoch": 0.75, + "learning_rate": 1.872543000232152e-05, + "loss": 0.2905, + "step": 3080 + }, + { + "epoch": 0.75, + "learning_rate": 1.8723501564155477e-05, + "loss": 0.2691, + "step": 3082 + }, + { + "epoch": 0.75, + "learning_rate": 1.8721571767673534e-05, + "loss": 0.2876, + "step": 3084 + }, + { + "epoch": 0.75, + "learning_rate": 1.8719640613176175e-05, + "loss": 0.302, + "step": 3086 + }, + { + "epoch": 0.75, + "learning_rate": 1.8717708100964095e-05, + "loss": 0.2913, + "step": 3088 + }, + { + "epoch": 0.75, + "learning_rate": 1.8715774231338203e-05, + "loss": 0.2932, + "step": 3090 + }, + { + "epoch": 0.75, + "learning_rate": 1.871383900459962e-05, + "loss": 0.3084, + "step": 3092 + }, + { + "epoch": 0.75, + "learning_rate": 1.8711902421049668e-05, + "loss": 0.2886, + "step": 3094 + }, + { + "epoch": 0.75, + "learning_rate": 1.8709964480989896e-05, + "loss": 0.3087, + "step": 3096 + }, + { + "epoch": 0.75, + "learning_rate": 1.8708025184722046e-05, + "loss": 0.2581, + "step": 3098 + }, + { + "epoch": 0.76, + "learning_rate": 1.8706084532548093e-05, + "loss": 0.2853, + "step": 3100 + }, + { + "epoch": 0.76, + "learning_rate": 1.8704142524770207e-05, + "loss": 0.2844, + "step": 3102 + }, + { + "epoch": 0.76, + "learning_rate": 1.870219916169077e-05, + "loss": 0.2671, + "step": 3104 + }, + { + "epoch": 0.76, + "learning_rate": 1.8700254443612376e-05, + "loss": 0.2533, + "step": 3106 + }, + { + "epoch": 0.76, + "learning_rate": 1.8698308370837847e-05, + "loss": 0.2754, + "step": 3108 + }, + { + "epoch": 0.76, + "learning_rate": 1.869636094367018e-05, + "loss": 0.2697, + "step": 3110 + }, + { + "epoch": 0.76, + "learning_rate": 1.8694412162412626e-05, + "loss": 0.3111, + "step": 3112 + }, + { + "epoch": 0.76, + "learning_rate": 1.869246202736861e-05, + "loss": 0.2928, + "step": 3114 + }, + { + "epoch": 0.76, + "learning_rate": 1.8690510538841792e-05, + "loss": 0.2738, + "step": 3116 + }, + { + "epoch": 0.76, + "learning_rate": 1.868855769713603e-05, + "loss": 0.2643, + "step": 3118 + }, + { + "epoch": 0.76, + "learning_rate": 1.868660350255539e-05, + "loss": 0.2714, + "step": 3120 + }, + { + "epoch": 0.76, + "learning_rate": 1.8684647955404168e-05, + "loss": 0.2977, + "step": 3122 + }, + { + "epoch": 0.76, + "learning_rate": 1.8682691055986847e-05, + "loss": 0.2676, + "step": 3124 + }, + { + "epoch": 0.76, + "learning_rate": 1.8680732804608136e-05, + "loss": 0.27, + "step": 3126 + }, + { + "epoch": 0.76, + "learning_rate": 1.8678773201572948e-05, + "loss": 0.2979, + "step": 3128 + }, + { + "epoch": 0.76, + "learning_rate": 1.867681224718641e-05, + "loss": 0.2846, + "step": 3130 + }, + { + "epoch": 0.76, + "learning_rate": 1.8674849941753857e-05, + "loss": 0.2798, + "step": 3132 + }, + { + "epoch": 0.76, + "learning_rate": 1.8672886285580835e-05, + "loss": 0.3039, + "step": 3134 + }, + { + "epoch": 0.76, + "learning_rate": 1.8670921278973098e-05, + "loss": 0.2681, + "step": 3136 + }, + { + "epoch": 0.76, + "learning_rate": 1.8668954922236613e-05, + "loss": 0.3153, + "step": 3138 + }, + { + "epoch": 0.77, + "learning_rate": 1.866698721567756e-05, + "loss": 0.3031, + "step": 3140 + }, + { + "epoch": 0.77, + "learning_rate": 1.8665018159602323e-05, + "loss": 0.2815, + "step": 3142 + }, + { + "epoch": 0.77, + "learning_rate": 1.8663047754317503e-05, + "loss": 0.2812, + "step": 3144 + }, + { + "epoch": 0.77, + "learning_rate": 1.8661076000129902e-05, + "loss": 0.2441, + "step": 3146 + }, + { + "epoch": 0.77, + "learning_rate": 1.8659102897346543e-05, + "loss": 0.2887, + "step": 3148 + }, + { + "epoch": 0.77, + "learning_rate": 1.8657128446274644e-05, + "loss": 0.2726, + "step": 3150 + }, + { + "epoch": 0.77, + "learning_rate": 1.8655152647221652e-05, + "loss": 0.3039, + "step": 3152 + }, + { + "epoch": 0.77, + "learning_rate": 1.865317550049521e-05, + "loss": 0.2891, + "step": 3154 + }, + { + "epoch": 0.77, + "learning_rate": 1.8651197006403173e-05, + "loss": 0.2501, + "step": 3156 + }, + { + "epoch": 0.77, + "learning_rate": 1.8649217165253608e-05, + "loss": 0.3005, + "step": 3158 + }, + { + "epoch": 0.77, + "learning_rate": 1.8647235977354796e-05, + "loss": 0.2675, + "step": 3160 + }, + { + "epoch": 0.77, + "learning_rate": 1.864525344301522e-05, + "loss": 0.2938, + "step": 3162 + }, + { + "epoch": 0.77, + "learning_rate": 1.8643269562543573e-05, + "loss": 0.2947, + "step": 3164 + }, + { + "epoch": 0.77, + "learning_rate": 1.8641284336248763e-05, + "loss": 0.2873, + "step": 3166 + }, + { + "epoch": 0.77, + "learning_rate": 1.8639297764439908e-05, + "loss": 0.2959, + "step": 3168 + }, + { + "epoch": 0.77, + "learning_rate": 1.863730984742633e-05, + "loss": 0.2689, + "step": 3170 + }, + { + "epoch": 0.77, + "learning_rate": 1.8635320585517558e-05, + "loss": 0.2744, + "step": 3172 + }, + { + "epoch": 0.77, + "learning_rate": 1.863332997902334e-05, + "loss": 0.2702, + "step": 3174 + }, + { + "epoch": 0.77, + "learning_rate": 1.863133802825363e-05, + "loss": 0.2874, + "step": 3176 + }, + { + "epoch": 0.77, + "learning_rate": 1.8629344733518587e-05, + "loss": 0.2631, + "step": 3178 + }, + { + "epoch": 0.77, + "learning_rate": 1.8627350095128583e-05, + "loss": 0.3093, + "step": 3180 + }, + { + "epoch": 0.78, + "learning_rate": 1.8625354113394202e-05, + "loss": 0.2725, + "step": 3182 + }, + { + "epoch": 0.78, + "learning_rate": 1.862335678862623e-05, + "loss": 0.2925, + "step": 3184 + }, + { + "epoch": 0.78, + "learning_rate": 1.8621358121135665e-05, + "loss": 0.2886, + "step": 3186 + }, + { + "epoch": 0.78, + "learning_rate": 1.8619358111233717e-05, + "loss": 0.2929, + "step": 3188 + }, + { + "epoch": 0.78, + "learning_rate": 1.8617356759231798e-05, + "loss": 0.2846, + "step": 3190 + }, + { + "epoch": 0.78, + "learning_rate": 1.861535406544154e-05, + "loss": 0.2764, + "step": 3192 + }, + { + "epoch": 0.78, + "learning_rate": 1.8613350030174777e-05, + "loss": 0.2613, + "step": 3194 + }, + { + "epoch": 0.78, + "learning_rate": 1.8611344653743552e-05, + "loss": 0.2757, + "step": 3196 + }, + { + "epoch": 0.78, + "learning_rate": 1.860933793646011e-05, + "loss": 0.2532, + "step": 3198 + }, + { + "epoch": 0.78, + "learning_rate": 1.8607329878636925e-05, + "loss": 0.2976, + "step": 3200 + }, + { + "epoch": 0.78, + "learning_rate": 1.8605320480586658e-05, + "loss": 0.2663, + "step": 3202 + }, + { + "epoch": 0.78, + "learning_rate": 1.860330974262219e-05, + "loss": 0.2917, + "step": 3204 + }, + { + "epoch": 0.78, + "learning_rate": 1.860129766505661e-05, + "loss": 0.2856, + "step": 3206 + }, + { + "epoch": 0.78, + "learning_rate": 1.8599284248203215e-05, + "loss": 0.2747, + "step": 3208 + }, + { + "epoch": 0.78, + "learning_rate": 1.85972694923755e-05, + "loss": 0.2939, + "step": 3210 + }, + { + "epoch": 0.78, + "learning_rate": 1.8595253397887187e-05, + "loss": 0.2551, + "step": 3212 + }, + { + "epoch": 0.78, + "learning_rate": 1.85932359650522e-05, + "loss": 0.2922, + "step": 3214 + }, + { + "epoch": 0.78, + "learning_rate": 1.859121719418466e-05, + "loss": 0.2691, + "step": 3216 + }, + { + "epoch": 0.78, + "learning_rate": 1.8589197085598907e-05, + "loss": 0.2693, + "step": 3218 + }, + { + "epoch": 0.78, + "learning_rate": 1.8587175639609492e-05, + "loss": 0.2956, + "step": 3220 + }, + { + "epoch": 0.79, + "learning_rate": 1.8585152856531166e-05, + "loss": 0.2831, + "step": 3222 + }, + { + "epoch": 0.79, + "learning_rate": 1.858312873667889e-05, + "loss": 0.2593, + "step": 3224 + }, + { + "epoch": 0.79, + "learning_rate": 1.858110328036784e-05, + "loss": 0.2913, + "step": 3226 + }, + { + "epoch": 0.79, + "learning_rate": 1.8579076487913394e-05, + "loss": 0.2999, + "step": 3228 + }, + { + "epoch": 0.79, + "learning_rate": 1.8577048359631134e-05, + "loss": 0.2493, + "step": 3230 + }, + { + "epoch": 0.79, + "learning_rate": 1.857501889583686e-05, + "loss": 0.2561, + "step": 3232 + }, + { + "epoch": 0.79, + "learning_rate": 1.857298809684658e-05, + "loss": 0.2595, + "step": 3234 + }, + { + "epoch": 0.79, + "learning_rate": 1.857095596297649e-05, + "loss": 0.2918, + "step": 3236 + }, + { + "epoch": 0.79, + "learning_rate": 1.8568922494543023e-05, + "loss": 0.2856, + "step": 3238 + }, + { + "epoch": 0.79, + "learning_rate": 1.8566887691862797e-05, + "loss": 0.3029, + "step": 3240 + }, + { + "epoch": 0.79, + "learning_rate": 1.856485155525265e-05, + "loss": 0.2972, + "step": 3242 + }, + { + "epoch": 0.79, + "learning_rate": 1.8562814085029622e-05, + "loss": 0.3018, + "step": 3244 + }, + { + "epoch": 0.79, + "learning_rate": 1.8560775281510968e-05, + "loss": 0.2929, + "step": 3246 + }, + { + "epoch": 0.79, + "learning_rate": 1.855873514501414e-05, + "loss": 0.2733, + "step": 3248 + }, + { + "epoch": 0.79, + "learning_rate": 1.85566936758568e-05, + "loss": 0.3046, + "step": 3250 + }, + { + "epoch": 0.79, + "learning_rate": 1.855465087435683e-05, + "loss": 0.3002, + "step": 3252 + }, + { + "epoch": 0.79, + "learning_rate": 1.85526067408323e-05, + "loss": 0.2803, + "step": 3254 + }, + { + "epoch": 0.79, + "learning_rate": 1.8550561275601504e-05, + "loss": 0.266, + "step": 3256 + }, + { + "epoch": 0.79, + "learning_rate": 1.8548514478982934e-05, + "loss": 0.2744, + "step": 3258 + }, + { + "epoch": 0.79, + "learning_rate": 1.8546466351295293e-05, + "loss": 0.3024, + "step": 3260 + }, + { + "epoch": 0.79, + "learning_rate": 1.854441689285749e-05, + "loss": 0.2852, + "step": 3262 + }, + { + "epoch": 0.8, + "learning_rate": 1.8542366103988637e-05, + "loss": 0.2911, + "step": 3264 + }, + { + "epoch": 0.8, + "learning_rate": 1.8540313985008063e-05, + "loss": 0.3099, + "step": 3266 + }, + { + "epoch": 0.8, + "learning_rate": 1.85382605362353e-05, + "loss": 0.2841, + "step": 3268 + }, + { + "epoch": 0.8, + "learning_rate": 1.8536205757990077e-05, + "loss": 0.307, + "step": 3270 + }, + { + "epoch": 0.8, + "learning_rate": 1.853414965059235e-05, + "loss": 0.2702, + "step": 3272 + }, + { + "epoch": 0.8, + "learning_rate": 1.853209221436226e-05, + "loss": 0.2847, + "step": 3274 + }, + { + "epoch": 0.8, + "learning_rate": 1.853003344962017e-05, + "loss": 0.2993, + "step": 3276 + }, + { + "epoch": 0.8, + "learning_rate": 1.8527973356686647e-05, + "loss": 0.2662, + "step": 3278 + }, + { + "epoch": 0.8, + "learning_rate": 1.8525911935882463e-05, + "loss": 0.2886, + "step": 3280 + }, + { + "epoch": 0.8, + "learning_rate": 1.8523849187528596e-05, + "loss": 0.2778, + "step": 3282 + }, + { + "epoch": 0.8, + "learning_rate": 1.8521785111946227e-05, + "loss": 0.2657, + "step": 3284 + }, + { + "epoch": 0.8, + "learning_rate": 1.851971970945676e-05, + "loss": 0.285, + "step": 3286 + }, + { + "epoch": 0.8, + "learning_rate": 1.851765298038178e-05, + "loss": 0.2591, + "step": 3288 + }, + { + "epoch": 0.8, + "learning_rate": 1.8515584925043105e-05, + "loss": 0.2755, + "step": 3290 + }, + { + "epoch": 0.8, + "learning_rate": 1.851351554376274e-05, + "loss": 0.2671, + "step": 3292 + }, + { + "epoch": 0.8, + "learning_rate": 1.8511444836862905e-05, + "loss": 0.2477, + "step": 3294 + }, + { + "epoch": 0.8, + "learning_rate": 1.8509372804666022e-05, + "loss": 0.2592, + "step": 3296 + }, + { + "epoch": 0.8, + "learning_rate": 1.8507299447494728e-05, + "loss": 0.2712, + "step": 3298 + }, + { + "epoch": 0.8, + "learning_rate": 1.8505224765671857e-05, + "loss": 0.2794, + "step": 3300 + }, + { + "epoch": 0.8, + "learning_rate": 1.850314875952045e-05, + "loss": 0.2924, + "step": 3302 + }, + { + "epoch": 0.81, + "learning_rate": 1.8501071429363768e-05, + "loss": 0.2983, + "step": 3304 + }, + { + "epoch": 0.81, + "learning_rate": 1.8498992775525255e-05, + "loss": 0.3002, + "step": 3306 + }, + { + "epoch": 0.81, + "learning_rate": 1.849691279832858e-05, + "loss": 0.2934, + "step": 3308 + }, + { + "epoch": 0.81, + "learning_rate": 1.849483149809761e-05, + "loss": 0.2765, + "step": 3310 + }, + { + "epoch": 0.81, + "learning_rate": 1.8492748875156414e-05, + "loss": 0.3018, + "step": 3312 + }, + { + "epoch": 0.81, + "learning_rate": 1.849066492982928e-05, + "loss": 0.2974, + "step": 3314 + }, + { + "epoch": 0.81, + "learning_rate": 1.848857966244069e-05, + "loss": 0.2681, + "step": 3316 + }, + { + "epoch": 0.81, + "learning_rate": 1.848649307331534e-05, + "loss": 0.314, + "step": 3318 + }, + { + "epoch": 0.81, + "learning_rate": 1.8484405162778122e-05, + "loss": 0.2714, + "step": 3320 + }, + { + "epoch": 0.81, + "learning_rate": 1.8482315931154146e-05, + "loss": 0.2594, + "step": 3322 + }, + { + "epoch": 0.81, + "learning_rate": 1.8480225378768717e-05, + "loss": 0.2724, + "step": 3324 + }, + { + "epoch": 0.81, + "learning_rate": 1.847813350594735e-05, + "loss": 0.2667, + "step": 3326 + }, + { + "epoch": 0.81, + "learning_rate": 1.8476040313015767e-05, + "loss": 0.2719, + "step": 3328 + }, + { + "epoch": 0.81, + "learning_rate": 1.8473945800299896e-05, + "loss": 0.2878, + "step": 3330 + }, + { + "epoch": 0.81, + "learning_rate": 1.8471849968125867e-05, + "loss": 0.2531, + "step": 3332 + }, + { + "epoch": 0.81, + "learning_rate": 1.8469752816820013e-05, + "loss": 0.2869, + "step": 3334 + }, + { + "epoch": 0.81, + "learning_rate": 1.846765434670888e-05, + "loss": 0.2731, + "step": 3336 + }, + { + "epoch": 0.81, + "learning_rate": 1.846555455811922e-05, + "loss": 0.2727, + "step": 3338 + }, + { + "epoch": 0.81, + "learning_rate": 1.846345345137798e-05, + "loss": 0.278, + "step": 3340 + }, + { + "epoch": 0.81, + "learning_rate": 1.8461351026812318e-05, + "loss": 0.2972, + "step": 3342 + }, + { + "epoch": 0.81, + "learning_rate": 1.8459247284749607e-05, + "loss": 0.2508, + "step": 3344 + }, + { + "epoch": 0.82, + "learning_rate": 1.8457142225517404e-05, + "loss": 0.295, + "step": 3346 + }, + { + "epoch": 0.82, + "learning_rate": 1.845503584944349e-05, + "loss": 0.2679, + "step": 3348 + }, + { + "epoch": 0.82, + "learning_rate": 1.845292815685584e-05, + "loss": 0.2655, + "step": 3350 + }, + { + "epoch": 0.82, + "learning_rate": 1.8450819148082638e-05, + "loss": 0.2707, + "step": 3352 + }, + { + "epoch": 0.82, + "learning_rate": 1.8448708823452275e-05, + "loss": 0.2818, + "step": 3354 + }, + { + "epoch": 0.82, + "learning_rate": 1.8446597183293345e-05, + "loss": 0.2954, + "step": 3356 + }, + { + "epoch": 0.82, + "learning_rate": 1.8444484227934644e-05, + "loss": 0.2882, + "step": 3358 + }, + { + "epoch": 0.82, + "learning_rate": 1.8442369957705178e-05, + "loss": 0.2732, + "step": 3360 + }, + { + "epoch": 0.82, + "learning_rate": 1.844025437293415e-05, + "loss": 0.2784, + "step": 3362 + }, + { + "epoch": 0.82, + "learning_rate": 1.8438137473950984e-05, + "loss": 0.2874, + "step": 3364 + }, + { + "epoch": 0.82, + "learning_rate": 1.843601926108528e-05, + "loss": 0.2865, + "step": 3366 + }, + { + "epoch": 0.82, + "learning_rate": 1.843389973466688e-05, + "loss": 0.2693, + "step": 3368 + }, + { + "epoch": 0.82, + "learning_rate": 1.8431778895025795e-05, + "loss": 0.2997, + "step": 3370 + }, + { + "epoch": 0.82, + "learning_rate": 1.8429656742492263e-05, + "loss": 0.2787, + "step": 3372 + }, + { + "epoch": 0.82, + "learning_rate": 1.8427533277396713e-05, + "loss": 0.2914, + "step": 3374 + }, + { + "epoch": 0.82, + "learning_rate": 1.8425408500069794e-05, + "loss": 0.261, + "step": 3376 + }, + { + "epoch": 0.82, + "learning_rate": 1.8423282410842344e-05, + "loss": 0.2572, + "step": 3378 + }, + { + "epoch": 0.82, + "learning_rate": 1.8421155010045414e-05, + "loss": 0.2966, + "step": 3380 + }, + { + "epoch": 0.82, + "learning_rate": 1.8419026298010256e-05, + "loss": 0.2618, + "step": 3382 + }, + { + "epoch": 0.82, + "learning_rate": 1.841689627506832e-05, + "loss": 0.2907, + "step": 3384 + }, + { + "epoch": 0.83, + "learning_rate": 1.8414764941551277e-05, + "loss": 0.2892, + "step": 3386 + }, + { + "epoch": 0.83, + "learning_rate": 1.8412632297790985e-05, + "loss": 0.2807, + "step": 3388 + }, + { + "epoch": 0.83, + "learning_rate": 1.841049834411952e-05, + "loss": 0.2901, + "step": 3390 + }, + { + "epoch": 0.83, + "learning_rate": 1.8408363080869153e-05, + "loss": 0.2693, + "step": 3392 + }, + { + "epoch": 0.83, + "learning_rate": 1.840622650837235e-05, + "loss": 0.2874, + "step": 3394 + }, + { + "epoch": 0.83, + "learning_rate": 1.8404088626961803e-05, + "loss": 0.2841, + "step": 3396 + }, + { + "epoch": 0.83, + "learning_rate": 1.8401949436970397e-05, + "loss": 0.2967, + "step": 3398 + }, + { + "epoch": 0.83, + "learning_rate": 1.8399808938731218e-05, + "loss": 0.2746, + "step": 3400 + }, + { + "epoch": 0.83, + "learning_rate": 1.839766713257755e-05, + "loss": 0.2864, + "step": 3402 + }, + { + "epoch": 0.83, + "learning_rate": 1.8395524018842904e-05, + "loss": 0.2806, + "step": 3404 + }, + { + "epoch": 0.83, + "learning_rate": 1.8393379597860965e-05, + "loss": 0.2732, + "step": 3406 + }, + { + "epoch": 0.83, + "learning_rate": 1.839123386996564e-05, + "loss": 0.303, + "step": 3408 + }, + { + "epoch": 0.83, + "learning_rate": 1.838908683549104e-05, + "loss": 0.278, + "step": 3410 + }, + { + "epoch": 0.83, + "learning_rate": 1.8386938494771473e-05, + "loss": 0.298, + "step": 3412 + }, + { + "epoch": 0.83, + "learning_rate": 1.8384788848141447e-05, + "loss": 0.2732, + "step": 3414 + }, + { + "epoch": 0.83, + "learning_rate": 1.838263789593568e-05, + "loss": 0.2543, + "step": 3416 + }, + { + "epoch": 0.83, + "learning_rate": 1.8380485638489096e-05, + "loss": 0.2586, + "step": 3418 + }, + { + "epoch": 0.83, + "learning_rate": 1.8378332076136818e-05, + "loss": 0.2696, + "step": 3420 + }, + { + "epoch": 0.83, + "learning_rate": 1.8376177209214166e-05, + "loss": 0.3002, + "step": 3422 + }, + { + "epoch": 0.83, + "learning_rate": 1.8374021038056673e-05, + "loss": 0.2638, + "step": 3424 + }, + { + "epoch": 0.83, + "learning_rate": 1.837186356300007e-05, + "loss": 0.283, + "step": 3426 + }, + { + "epoch": 0.84, + "learning_rate": 1.8369704784380298e-05, + "loss": 0.2841, + "step": 3428 + }, + { + "epoch": 0.84, + "learning_rate": 1.8367544702533487e-05, + "loss": 0.3038, + "step": 3430 + }, + { + "epoch": 0.84, + "learning_rate": 1.8365383317795983e-05, + "loss": 0.2626, + "step": 3432 + }, + { + "epoch": 0.84, + "learning_rate": 1.836322063050433e-05, + "loss": 0.2508, + "step": 3434 + }, + { + "epoch": 0.84, + "learning_rate": 1.8361056640995275e-05, + "loss": 0.2866, + "step": 3436 + }, + { + "epoch": 0.84, + "learning_rate": 1.8358891349605764e-05, + "loss": 0.2871, + "step": 3438 + }, + { + "epoch": 0.84, + "learning_rate": 1.8356724756672955e-05, + "loss": 0.2821, + "step": 3440 + }, + { + "epoch": 0.84, + "learning_rate": 1.83545568625342e-05, + "loss": 0.2614, + "step": 3442 + }, + { + "epoch": 0.84, + "learning_rate": 1.8352387667527057e-05, + "loss": 0.2998, + "step": 3444 + }, + { + "epoch": 0.84, + "learning_rate": 1.8350217171989287e-05, + "loss": 0.2834, + "step": 3446 + }, + { + "epoch": 0.84, + "learning_rate": 1.834804537625885e-05, + "loss": 0.2654, + "step": 3448 + }, + { + "epoch": 0.84, + "learning_rate": 1.834587228067391e-05, + "loss": 0.2774, + "step": 3450 + }, + { + "epoch": 0.84, + "learning_rate": 1.8343697885572845e-05, + "loss": 0.2714, + "step": 3452 + }, + { + "epoch": 0.84, + "learning_rate": 1.8341522191294216e-05, + "loss": 0.273, + "step": 3454 + }, + { + "epoch": 0.84, + "learning_rate": 1.8339345198176798e-05, + "loss": 0.2867, + "step": 3456 + }, + { + "epoch": 0.84, + "learning_rate": 1.833716690655956e-05, + "loss": 0.2783, + "step": 3458 + }, + { + "epoch": 0.84, + "learning_rate": 1.833498731678169e-05, + "loss": 0.2828, + "step": 3460 + }, + { + "epoch": 0.84, + "learning_rate": 1.8332806429182556e-05, + "loss": 0.2855, + "step": 3462 + }, + { + "epoch": 0.84, + "learning_rate": 1.8330624244101742e-05, + "loss": 0.2501, + "step": 3464 + }, + { + "epoch": 0.84, + "learning_rate": 1.8328440761879033e-05, + "loss": 0.263, + "step": 3466 + }, + { + "epoch": 0.85, + "learning_rate": 1.832625598285441e-05, + "loss": 0.2848, + "step": 3468 + }, + { + "epoch": 0.85, + "learning_rate": 1.8324069907368068e-05, + "loss": 0.2734, + "step": 3470 + }, + { + "epoch": 0.85, + "learning_rate": 1.8321882535760386e-05, + "loss": 0.2838, + "step": 3472 + }, + { + "epoch": 0.85, + "learning_rate": 1.831969386837196e-05, + "loss": 0.2597, + "step": 3474 + }, + { + "epoch": 0.85, + "learning_rate": 1.831750390554358e-05, + "loss": 0.2614, + "step": 3476 + }, + { + "epoch": 0.85, + "learning_rate": 1.8315312647616245e-05, + "loss": 0.2845, + "step": 3478 + }, + { + "epoch": 0.85, + "learning_rate": 1.831312009493114e-05, + "loss": 0.2673, + "step": 3480 + }, + { + "epoch": 0.85, + "learning_rate": 1.8310926247829672e-05, + "loss": 0.28, + "step": 3482 + }, + { + "epoch": 0.85, + "learning_rate": 1.8308731106653436e-05, + "loss": 0.2865, + "step": 3484 + }, + { + "epoch": 0.85, + "learning_rate": 1.8306534671744234e-05, + "loss": 0.2754, + "step": 3486 + }, + { + "epoch": 0.85, + "learning_rate": 1.8304336943444066e-05, + "loss": 0.2809, + "step": 3488 + }, + { + "epoch": 0.85, + "learning_rate": 1.8302137922095133e-05, + "loss": 0.2749, + "step": 3490 + }, + { + "epoch": 0.85, + "learning_rate": 1.8299937608039845e-05, + "loss": 0.2943, + "step": 3492 + }, + { + "epoch": 0.85, + "learning_rate": 1.8297736001620805e-05, + "loss": 0.2843, + "step": 3494 + }, + { + "epoch": 0.85, + "learning_rate": 1.829553310318082e-05, + "loss": 0.2729, + "step": 3496 + }, + { + "epoch": 0.85, + "learning_rate": 1.8293328913062892e-05, + "loss": 0.2629, + "step": 3498 + }, + { + "epoch": 0.85, + "learning_rate": 1.829112343161024e-05, + "loss": 0.2886, + "step": 3500 + }, + { + "epoch": 0.85, + "learning_rate": 1.828891665916627e-05, + "loss": 0.2669, + "step": 3502 + }, + { + "epoch": 0.85, + "learning_rate": 1.8286708596074596e-05, + "loss": 0.2594, + "step": 3504 + }, + { + "epoch": 0.85, + "learning_rate": 1.8284499242679027e-05, + "loss": 0.2645, + "step": 3506 + }, + { + "epoch": 0.85, + "learning_rate": 1.8282288599323576e-05, + "loss": 0.2906, + "step": 3508 + }, + { + "epoch": 0.86, + "learning_rate": 1.8280076666352457e-05, + "loss": 0.2944, + "step": 3510 + }, + { + "epoch": 0.86, + "learning_rate": 1.827786344411009e-05, + "loss": 0.2694, + "step": 3512 + }, + { + "epoch": 0.86, + "learning_rate": 1.8275648932941085e-05, + "loss": 0.2701, + "step": 3514 + }, + { + "epoch": 0.86, + "learning_rate": 1.827343313319026e-05, + "loss": 0.2529, + "step": 3516 + }, + { + "epoch": 0.86, + "learning_rate": 1.8271216045202627e-05, + "loss": 0.2737, + "step": 3518 + }, + { + "epoch": 0.86, + "learning_rate": 1.8268997669323418e-05, + "loss": 0.2881, + "step": 3520 + }, + { + "epoch": 0.86, + "learning_rate": 1.8266778005898035e-05, + "loss": 0.2758, + "step": 3522 + }, + { + "epoch": 0.86, + "learning_rate": 1.826455705527211e-05, + "loss": 0.2675, + "step": 3524 + }, + { + "epoch": 0.86, + "learning_rate": 1.8262334817791448e-05, + "loss": 0.2757, + "step": 3526 + }, + { + "epoch": 0.86, + "learning_rate": 1.826011129380208e-05, + "loss": 0.279, + "step": 3528 + }, + { + "epoch": 0.86, + "learning_rate": 1.825788648365022e-05, + "loss": 0.2403, + "step": 3530 + }, + { + "epoch": 0.86, + "learning_rate": 1.8255660387682287e-05, + "loss": 0.263, + "step": 3532 + }, + { + "epoch": 0.86, + "learning_rate": 1.8253433006244907e-05, + "loss": 0.2824, + "step": 3534 + }, + { + "epoch": 0.86, + "learning_rate": 1.8251204339684894e-05, + "loss": 0.2693, + "step": 3536 + }, + { + "epoch": 0.86, + "learning_rate": 1.8248974388349275e-05, + "loss": 0.2676, + "step": 3538 + }, + { + "epoch": 0.86, + "learning_rate": 1.8246743152585264e-05, + "loss": 0.2564, + "step": 3540 + }, + { + "epoch": 0.86, + "learning_rate": 1.8244510632740285e-05, + "loss": 0.2905, + "step": 3542 + }, + { + "epoch": 0.86, + "learning_rate": 1.824227682916196e-05, + "loss": 0.2779, + "step": 3544 + }, + { + "epoch": 0.86, + "learning_rate": 1.8240041742198102e-05, + "loss": 0.2716, + "step": 3546 + }, + { + "epoch": 0.86, + "learning_rate": 1.8237805372196737e-05, + "loss": 0.2858, + "step": 3548 + }, + { + "epoch": 0.87, + "learning_rate": 1.8235567719506084e-05, + "loss": 0.2716, + "step": 3550 + }, + { + "epoch": 0.87, + "learning_rate": 1.8233328784474564e-05, + "loss": 0.2582, + "step": 3552 + }, + { + "epoch": 0.87, + "learning_rate": 1.8231088567450793e-05, + "loss": 0.2474, + "step": 3554 + }, + { + "epoch": 0.87, + "learning_rate": 1.8228847068783598e-05, + "loss": 0.2637, + "step": 3556 + }, + { + "epoch": 0.87, + "learning_rate": 1.8226604288821982e-05, + "loss": 0.2809, + "step": 3558 + }, + { + "epoch": 0.87, + "learning_rate": 1.822436022791518e-05, + "loss": 0.2794, + "step": 3560 + }, + { + "epoch": 0.87, + "learning_rate": 1.8222114886412598e-05, + "loss": 0.2815, + "step": 3562 + }, + { + "epoch": 0.87, + "learning_rate": 1.8219868264663854e-05, + "loss": 0.2772, + "step": 3564 + }, + { + "epoch": 0.87, + "learning_rate": 1.8217620363018773e-05, + "loss": 0.2892, + "step": 3566 + }, + { + "epoch": 0.87, + "learning_rate": 1.8215371181827358e-05, + "loss": 0.2637, + "step": 3568 + }, + { + "epoch": 0.87, + "learning_rate": 1.8213120721439837e-05, + "loss": 0.2556, + "step": 3570 + }, + { + "epoch": 0.87, + "learning_rate": 1.821086898220661e-05, + "loss": 0.246, + "step": 3572 + }, + { + "epoch": 0.87, + "learning_rate": 1.82086159644783e-05, + "loss": 0.2754, + "step": 3574 + }, + { + "epoch": 0.87, + "learning_rate": 1.8206361668605717e-05, + "loss": 0.2892, + "step": 3576 + }, + { + "epoch": 0.87, + "learning_rate": 1.8204106094939868e-05, + "loss": 0.2541, + "step": 3578 + }, + { + "epoch": 0.87, + "learning_rate": 1.8201849243831966e-05, + "loss": 0.261, + "step": 3580 + }, + { + "epoch": 0.87, + "learning_rate": 1.8199591115633424e-05, + "loss": 0.2728, + "step": 3582 + }, + { + "epoch": 0.87, + "learning_rate": 1.8197331710695843e-05, + "loss": 0.2745, + "step": 3584 + }, + { + "epoch": 0.87, + "learning_rate": 1.8195071029371032e-05, + "loss": 0.2728, + "step": 3586 + }, + { + "epoch": 0.87, + "learning_rate": 1.8192809072010998e-05, + "loss": 0.2754, + "step": 3588 + }, + { + "epoch": 0.87, + "learning_rate": 1.8190545838967945e-05, + "loss": 0.2568, + "step": 3590 + }, + { + "epoch": 0.88, + "learning_rate": 1.818828133059427e-05, + "loss": 0.2508, + "step": 3592 + }, + { + "epoch": 0.88, + "learning_rate": 1.8186015547242582e-05, + "loss": 0.2784, + "step": 3594 + }, + { + "epoch": 0.88, + "learning_rate": 1.8183748489265683e-05, + "loss": 0.2851, + "step": 3596 + }, + { + "epoch": 0.88, + "learning_rate": 1.8181480157016558e-05, + "loss": 0.2759, + "step": 3598 + }, + { + "epoch": 0.88, + "learning_rate": 1.8179210550848413e-05, + "loss": 0.2819, + "step": 3600 + }, + { + "epoch": 0.88, + "learning_rate": 1.8176939671114645e-05, + "loss": 0.2839, + "step": 3602 + }, + { + "epoch": 0.88, + "learning_rate": 1.817466751816884e-05, + "loss": 0.2627, + "step": 3604 + }, + { + "epoch": 0.88, + "learning_rate": 1.81723940923648e-05, + "loss": 0.2747, + "step": 3606 + }, + { + "epoch": 0.88, + "learning_rate": 1.81701193940565e-05, + "loss": 0.2798, + "step": 3608 + }, + { + "epoch": 0.88, + "learning_rate": 1.816784342359814e-05, + "loss": 0.2587, + "step": 3610 + }, + { + "epoch": 0.88, + "learning_rate": 1.8165566181344104e-05, + "loss": 0.2338, + "step": 3612 + }, + { + "epoch": 0.88, + "learning_rate": 1.8163287667648974e-05, + "loss": 0.2786, + "step": 3614 + }, + { + "epoch": 0.88, + "learning_rate": 1.816100788286753e-05, + "loss": 0.2696, + "step": 3616 + }, + { + "epoch": 0.88, + "learning_rate": 1.8158726827354756e-05, + "loss": 0.2438, + "step": 3618 + }, + { + "epoch": 0.88, + "learning_rate": 1.8156444501465827e-05, + "loss": 0.2606, + "step": 3620 + }, + { + "epoch": 0.88, + "learning_rate": 1.815416090555612e-05, + "loss": 0.2681, + "step": 3622 + }, + { + "epoch": 0.88, + "learning_rate": 1.8151876039981208e-05, + "loss": 0.2857, + "step": 3624 + }, + { + "epoch": 0.88, + "learning_rate": 1.8149589905096866e-05, + "loss": 0.2803, + "step": 3626 + }, + { + "epoch": 0.88, + "learning_rate": 1.8147302501259055e-05, + "loss": 0.2647, + "step": 3628 + }, + { + "epoch": 0.88, + "learning_rate": 1.8145013828823943e-05, + "loss": 0.278, + "step": 3630 + }, + { + "epoch": 0.88, + "learning_rate": 1.8142723888147897e-05, + "loss": 0.2824, + "step": 3632 + }, + { + "epoch": 0.89, + "learning_rate": 1.814043267958748e-05, + "loss": 0.2832, + "step": 3634 + }, + { + "epoch": 0.89, + "learning_rate": 1.8138140203499443e-05, + "loss": 0.2856, + "step": 3636 + }, + { + "epoch": 0.89, + "learning_rate": 1.8135846460240752e-05, + "loss": 0.2615, + "step": 3638 + }, + { + "epoch": 0.89, + "learning_rate": 1.8133551450168553e-05, + "loss": 0.2731, + "step": 3640 + }, + { + "epoch": 0.89, + "learning_rate": 1.8131255173640196e-05, + "loss": 0.268, + "step": 3642 + }, + { + "epoch": 0.89, + "learning_rate": 1.8128957631013235e-05, + "loss": 0.2652, + "step": 3644 + }, + { + "epoch": 0.89, + "learning_rate": 1.812665882264541e-05, + "loss": 0.2669, + "step": 3646 + }, + { + "epoch": 0.89, + "learning_rate": 1.8124358748894667e-05, + "loss": 0.2823, + "step": 3648 + }, + { + "epoch": 0.89, + "learning_rate": 1.812205741011914e-05, + "loss": 0.2815, + "step": 3650 + }, + { + "epoch": 0.89, + "learning_rate": 1.8119754806677167e-05, + "loss": 0.2726, + "step": 3652 + }, + { + "epoch": 0.89, + "learning_rate": 1.811745093892728e-05, + "loss": 0.2767, + "step": 3654 + }, + { + "epoch": 0.89, + "learning_rate": 1.8115145807228215e-05, + "loss": 0.2743, + "step": 3656 + }, + { + "epoch": 0.89, + "learning_rate": 1.8112839411938892e-05, + "loss": 0.2806, + "step": 3658 + }, + { + "epoch": 0.89, + "learning_rate": 1.811053175341844e-05, + "loss": 0.2809, + "step": 3660 + }, + { + "epoch": 0.89, + "learning_rate": 1.810822283202617e-05, + "loss": 0.2689, + "step": 3662 + }, + { + "epoch": 0.89, + "learning_rate": 1.810591264812161e-05, + "loss": 0.2431, + "step": 3664 + }, + { + "epoch": 0.89, + "learning_rate": 1.8103601202064463e-05, + "loss": 0.2743, + "step": 3666 + }, + { + "epoch": 0.89, + "learning_rate": 1.8101288494214647e-05, + "loss": 0.2466, + "step": 3668 + }, + { + "epoch": 0.89, + "learning_rate": 1.809897452493226e-05, + "loss": 0.2607, + "step": 3670 + }, + { + "epoch": 0.89, + "learning_rate": 1.8096659294577612e-05, + "loss": 0.2422, + "step": 3672 + }, + { + "epoch": 0.9, + "learning_rate": 1.80943428035112e-05, + "loss": 0.2809, + "step": 3674 + }, + { + "epoch": 0.9, + "learning_rate": 1.809202505209372e-05, + "loss": 0.2642, + "step": 3676 + }, + { + "epoch": 0.9, + "learning_rate": 1.808970604068606e-05, + "loss": 0.2734, + "step": 3678 + }, + { + "epoch": 0.9, + "learning_rate": 1.808738576964931e-05, + "loss": 0.2741, + "step": 3680 + }, + { + "epoch": 0.9, + "learning_rate": 1.8085064239344757e-05, + "loss": 0.2887, + "step": 3682 + }, + { + "epoch": 0.9, + "learning_rate": 1.8082741450133875e-05, + "loss": 0.2625, + "step": 3684 + }, + { + "epoch": 0.9, + "learning_rate": 1.8080417402378347e-05, + "loss": 0.27, + "step": 3686 + }, + { + "epoch": 0.9, + "learning_rate": 1.8078092096440038e-05, + "loss": 0.2485, + "step": 3688 + }, + { + "epoch": 0.9, + "learning_rate": 1.807576553268102e-05, + "loss": 0.2768, + "step": 3690 + }, + { + "epoch": 0.9, + "learning_rate": 1.807343771146356e-05, + "loss": 0.2637, + "step": 3692 + }, + { + "epoch": 0.9, + "learning_rate": 1.807110863315011e-05, + "loss": 0.2496, + "step": 3694 + }, + { + "epoch": 0.9, + "learning_rate": 1.8068778298103327e-05, + "loss": 0.2774, + "step": 3696 + }, + { + "epoch": 0.9, + "learning_rate": 1.8066446706686066e-05, + "loss": 0.2708, + "step": 3698 + }, + { + "epoch": 0.9, + "learning_rate": 1.8064113859261373e-05, + "loss": 0.2626, + "step": 3700 + }, + { + "epoch": 0.9, + "learning_rate": 1.8061779756192485e-05, + "loss": 0.2615, + "step": 3702 + }, + { + "epoch": 0.9, + "learning_rate": 1.8059444397842846e-05, + "loss": 0.2869, + "step": 3704 + }, + { + "epoch": 0.9, + "learning_rate": 1.8057107784576088e-05, + "loss": 0.2622, + "step": 3706 + }, + { + "epoch": 0.9, + "learning_rate": 1.805476991675603e-05, + "loss": 0.2866, + "step": 3708 + }, + { + "epoch": 0.9, + "learning_rate": 1.8052430794746714e-05, + "loss": 0.2508, + "step": 3710 + }, + { + "epoch": 0.9, + "learning_rate": 1.8050090418912343e-05, + "loss": 0.2642, + "step": 3712 + }, + { + "epoch": 0.9, + "learning_rate": 1.8047748789617342e-05, + "loss": 0.2361, + "step": 3714 + }, + { + "epoch": 0.91, + "learning_rate": 1.8045405907226312e-05, + "loss": 0.2506, + "step": 3716 + }, + { + "epoch": 0.91, + "learning_rate": 1.8043061772104064e-05, + "loss": 0.2407, + "step": 3718 + }, + { + "epoch": 0.91, + "learning_rate": 1.8040716384615596e-05, + "loss": 0.266, + "step": 3720 + }, + { + "epoch": 0.91, + "learning_rate": 1.80383697451261e-05, + "loss": 0.25, + "step": 3722 + }, + { + "epoch": 0.91, + "learning_rate": 1.8036021854000968e-05, + "loss": 0.2429, + "step": 3724 + }, + { + "epoch": 0.91, + "learning_rate": 1.803367271160579e-05, + "loss": 0.262, + "step": 3726 + }, + { + "epoch": 0.91, + "learning_rate": 1.8031322318306333e-05, + "loss": 0.2723, + "step": 3728 + }, + { + "epoch": 0.91, + "learning_rate": 1.8028970674468583e-05, + "loss": 0.2677, + "step": 3730 + }, + { + "epoch": 0.91, + "learning_rate": 1.8026617780458703e-05, + "loss": 0.2422, + "step": 3732 + }, + { + "epoch": 0.91, + "learning_rate": 1.8024263636643057e-05, + "loss": 0.2729, + "step": 3734 + }, + { + "epoch": 0.91, + "learning_rate": 1.8021908243388205e-05, + "loss": 0.2511, + "step": 3736 + }, + { + "epoch": 0.91, + "learning_rate": 1.8019551601060897e-05, + "loss": 0.2589, + "step": 3738 + }, + { + "epoch": 0.91, + "learning_rate": 1.8017193710028084e-05, + "loss": 0.2637, + "step": 3740 + }, + { + "epoch": 0.91, + "learning_rate": 1.8014834570656903e-05, + "loss": 0.2846, + "step": 3742 + }, + { + "epoch": 0.91, + "learning_rate": 1.8012474183314697e-05, + "loss": 0.2552, + "step": 3744 + }, + { + "epoch": 0.91, + "learning_rate": 1.801011254836899e-05, + "loss": 0.2708, + "step": 3746 + }, + { + "epoch": 0.91, + "learning_rate": 1.800774966618751e-05, + "loss": 0.2664, + "step": 3748 + }, + { + "epoch": 0.91, + "learning_rate": 1.8005385537138172e-05, + "loss": 0.2536, + "step": 3750 + }, + { + "epoch": 0.91, + "learning_rate": 1.8003020161589094e-05, + "loss": 0.252, + "step": 3752 + }, + { + "epoch": 0.91, + "learning_rate": 1.8000653539908575e-05, + "loss": 0.2624, + "step": 3754 + }, + { + "epoch": 0.92, + "learning_rate": 1.7998285672465128e-05, + "loss": 0.2758, + "step": 3756 + }, + { + "epoch": 0.92, + "learning_rate": 1.7995916559627437e-05, + "loss": 0.2431, + "step": 3758 + }, + { + "epoch": 0.92, + "learning_rate": 1.79935462017644e-05, + "loss": 0.2682, + "step": 3760 + }, + { + "epoch": 0.92, + "learning_rate": 1.7991174599245094e-05, + "loss": 0.273, + "step": 3762 + }, + { + "epoch": 0.92, + "learning_rate": 1.7988801752438797e-05, + "loss": 0.2567, + "step": 3764 + }, + { + "epoch": 0.92, + "learning_rate": 1.798642766171498e-05, + "loss": 0.279, + "step": 3766 + }, + { + "epoch": 0.92, + "learning_rate": 1.7984052327443305e-05, + "loss": 0.2389, + "step": 3768 + }, + { + "epoch": 0.92, + "learning_rate": 1.7981675749993633e-05, + "loss": 0.25, + "step": 3770 + }, + { + "epoch": 0.92, + "learning_rate": 1.7979297929736012e-05, + "loss": 0.2561, + "step": 3772 + }, + { + "epoch": 0.92, + "learning_rate": 1.797691886704069e-05, + "loss": 0.245, + "step": 3774 + }, + { + "epoch": 0.92, + "learning_rate": 1.79745385622781e-05, + "loss": 0.2615, + "step": 3776 + }, + { + "epoch": 0.92, + "learning_rate": 1.7972157015818882e-05, + "loss": 0.2782, + "step": 3778 + }, + { + "epoch": 0.92, + "learning_rate": 1.7969774228033857e-05, + "loss": 0.2681, + "step": 3780 + }, + { + "epoch": 0.92, + "learning_rate": 1.7967390199294038e-05, + "loss": 0.2712, + "step": 3782 + }, + { + "epoch": 0.92, + "learning_rate": 1.7965004929970645e-05, + "loss": 0.2597, + "step": 3784 + }, + { + "epoch": 0.92, + "learning_rate": 1.7962618420435075e-05, + "loss": 0.2627, + "step": 3786 + }, + { + "epoch": 0.92, + "learning_rate": 1.7960230671058935e-05, + "loss": 0.2557, + "step": 3788 + }, + { + "epoch": 0.92, + "learning_rate": 1.7957841682214005e-05, + "loss": 0.2708, + "step": 3790 + }, + { + "epoch": 0.92, + "learning_rate": 1.7955451454272278e-05, + "loss": 0.2446, + "step": 3792 + }, + { + "epoch": 0.92, + "learning_rate": 1.7953059987605926e-05, + "loss": 0.2617, + "step": 3794 + }, + { + "epoch": 0.92, + "learning_rate": 1.795066728258732e-05, + "loss": 0.2812, + "step": 3796 + }, + { + "epoch": 0.93, + "learning_rate": 1.794827333958902e-05, + "loss": 0.256, + "step": 3798 + }, + { + "epoch": 0.93, + "learning_rate": 1.794587815898378e-05, + "loss": 0.2581, + "step": 3800 + }, + { + "epoch": 0.93, + "learning_rate": 1.794348174114456e-05, + "loss": 0.2626, + "step": 3802 + }, + { + "epoch": 0.93, + "learning_rate": 1.7941084086444486e-05, + "loss": 0.2621, + "step": 3804 + }, + { + "epoch": 0.93, + "learning_rate": 1.79386851952569e-05, + "loss": 0.269, + "step": 3806 + }, + { + "epoch": 0.93, + "learning_rate": 1.7936285067955324e-05, + "loss": 0.2676, + "step": 3808 + }, + { + "epoch": 0.93, + "learning_rate": 1.7933883704913475e-05, + "loss": 0.2468, + "step": 3810 + }, + { + "epoch": 0.93, + "learning_rate": 1.7931481106505266e-05, + "loss": 0.2564, + "step": 3812 + }, + { + "epoch": 0.93, + "learning_rate": 1.79290772731048e-05, + "loss": 0.2806, + "step": 3814 + }, + { + "epoch": 0.93, + "learning_rate": 1.7926672205086374e-05, + "loss": 0.2671, + "step": 3816 + }, + { + "epoch": 0.93, + "learning_rate": 1.7924265902824468e-05, + "loss": 0.2566, + "step": 3818 + }, + { + "epoch": 0.93, + "learning_rate": 1.7921858366693774e-05, + "loss": 0.2603, + "step": 3820 + }, + { + "epoch": 0.93, + "learning_rate": 1.791944959706915e-05, + "loss": 0.2804, + "step": 3822 + }, + { + "epoch": 0.93, + "learning_rate": 1.791703959432567e-05, + "loss": 0.2598, + "step": 3824 + }, + { + "epoch": 0.93, + "learning_rate": 1.7914628358838586e-05, + "loss": 0.2566, + "step": 3826 + }, + { + "epoch": 0.93, + "learning_rate": 1.7912215890983344e-05, + "loss": 0.2532, + "step": 3828 + }, + { + "epoch": 0.93, + "learning_rate": 1.7909802191135588e-05, + "loss": 0.2795, + "step": 3830 + }, + { + "epoch": 0.93, + "learning_rate": 1.790738725967115e-05, + "loss": 0.2441, + "step": 3832 + }, + { + "epoch": 0.93, + "learning_rate": 1.7904971096966044e-05, + "loss": 0.2764, + "step": 3834 + }, + { + "epoch": 0.93, + "learning_rate": 1.7902553703396493e-05, + "loss": 0.2491, + "step": 3836 + }, + { + "epoch": 0.94, + "learning_rate": 1.7900135079338907e-05, + "loss": 0.2561, + "step": 3838 + }, + { + "epoch": 0.94, + "learning_rate": 1.7897715225169876e-05, + "loss": 0.2609, + "step": 3840 + }, + { + "epoch": 0.94, + "learning_rate": 1.789529414126619e-05, + "loss": 0.2748, + "step": 3842 + }, + { + "epoch": 0.94, + "learning_rate": 1.7892871828004843e-05, + "loss": 0.2354, + "step": 3844 + }, + { + "epoch": 0.94, + "learning_rate": 1.7890448285762992e-05, + "loss": 0.2548, + "step": 3846 + }, + { + "epoch": 0.94, + "learning_rate": 1.788802351491801e-05, + "loss": 0.2576, + "step": 3848 + }, + { + "epoch": 0.94, + "learning_rate": 1.7885597515847448e-05, + "loss": 0.2618, + "step": 3850 + }, + { + "epoch": 0.94, + "learning_rate": 1.7883170288929054e-05, + "loss": 0.2448, + "step": 3852 + }, + { + "epoch": 0.94, + "learning_rate": 1.788074183454077e-05, + "loss": 0.2654, + "step": 3854 + }, + { + "epoch": 0.94, + "learning_rate": 1.7878312153060714e-05, + "loss": 0.2575, + "step": 3856 + }, + { + "epoch": 0.94, + "learning_rate": 1.787588124486722e-05, + "loss": 0.2793, + "step": 3858 + }, + { + "epoch": 0.94, + "learning_rate": 1.787344911033879e-05, + "loss": 0.2445, + "step": 3860 + }, + { + "epoch": 0.94, + "learning_rate": 1.7871015749854124e-05, + "loss": 0.2633, + "step": 3862 + }, + { + "epoch": 0.94, + "learning_rate": 1.786858116379212e-05, + "loss": 0.2619, + "step": 3864 + }, + { + "epoch": 0.94, + "learning_rate": 1.7866145352531865e-05, + "loss": 0.2529, + "step": 3866 + }, + { + "epoch": 0.94, + "learning_rate": 1.7863708316452625e-05, + "loss": 0.2612, + "step": 3868 + }, + { + "epoch": 0.94, + "learning_rate": 1.786127005593387e-05, + "loss": 0.2554, + "step": 3870 + }, + { + "epoch": 0.94, + "learning_rate": 1.7858830571355258e-05, + "loss": 0.2319, + "step": 3872 + }, + { + "epoch": 0.94, + "learning_rate": 1.7856389863096626e-05, + "loss": 0.2341, + "step": 3874 + }, + { + "epoch": 0.94, + "learning_rate": 1.7853947931538023e-05, + "loss": 0.2753, + "step": 3876 + }, + { + "epoch": 0.94, + "learning_rate": 1.785150477705967e-05, + "loss": 0.2645, + "step": 3878 + }, + { + "epoch": 0.95, + "learning_rate": 1.784906040004198e-05, + "loss": 0.2741, + "step": 3880 + }, + { + "epoch": 0.95, + "learning_rate": 1.784661480086557e-05, + "loss": 0.235, + "step": 3882 + }, + { + "epoch": 0.95, + "learning_rate": 1.7844167979911238e-05, + "loss": 0.2461, + "step": 3884 + }, + { + "epoch": 0.95, + "learning_rate": 1.784171993755997e-05, + "loss": 0.2653, + "step": 3886 + }, + { + "epoch": 0.95, + "learning_rate": 1.7839270674192942e-05, + "loss": 0.2591, + "step": 3888 + }, + { + "epoch": 0.95, + "learning_rate": 1.7836820190191524e-05, + "loss": 0.2519, + "step": 3890 + }, + { + "epoch": 0.95, + "learning_rate": 1.7834368485937278e-05, + "loss": 0.2516, + "step": 3892 + }, + { + "epoch": 0.95, + "learning_rate": 1.7831915561811955e-05, + "loss": 0.2179, + "step": 3894 + }, + { + "epoch": 0.95, + "learning_rate": 1.7829461418197492e-05, + "loss": 0.2696, + "step": 3896 + }, + { + "epoch": 0.95, + "learning_rate": 1.7827006055476013e-05, + "loss": 0.2476, + "step": 3898 + }, + { + "epoch": 0.95, + "learning_rate": 1.782454947402984e-05, + "loss": 0.2823, + "step": 3900 + }, + { + "epoch": 0.95, + "learning_rate": 1.7822091674241487e-05, + "loss": 0.2498, + "step": 3902 + }, + { + "epoch": 0.95, + "learning_rate": 1.7819632656493647e-05, + "loss": 0.2632, + "step": 3904 + }, + { + "epoch": 0.95, + "learning_rate": 1.7817172421169206e-05, + "loss": 0.2461, + "step": 3906 + }, + { + "epoch": 0.95, + "learning_rate": 1.7814710968651244e-05, + "loss": 0.2687, + "step": 3908 + }, + { + "epoch": 0.95, + "learning_rate": 1.7812248299323026e-05, + "loss": 0.2754, + "step": 3910 + }, + { + "epoch": 0.95, + "learning_rate": 1.780978441356801e-05, + "loss": 0.259, + "step": 3912 + }, + { + "epoch": 0.95, + "learning_rate": 1.780731931176985e-05, + "loss": 0.2468, + "step": 3914 + }, + { + "epoch": 0.95, + "learning_rate": 1.7804852994312365e-05, + "loss": 0.2661, + "step": 3916 + }, + { + "epoch": 0.95, + "learning_rate": 1.7802385461579593e-05, + "loss": 0.2485, + "step": 3918 + }, + { + "epoch": 0.96, + "learning_rate": 1.779991671395574e-05, + "loss": 0.2621, + "step": 3920 + }, + { + "epoch": 0.96, + "learning_rate": 1.779744675182521e-05, + "loss": 0.2483, + "step": 3922 + }, + { + "epoch": 0.96, + "learning_rate": 1.7794975575572596e-05, + "loss": 0.255, + "step": 3924 + }, + { + "epoch": 0.96, + "learning_rate": 1.7792503185582684e-05, + "loss": 0.2348, + "step": 3926 + }, + { + "epoch": 0.96, + "learning_rate": 1.7790029582240436e-05, + "loss": 0.2623, + "step": 3928 + }, + { + "epoch": 0.96, + "learning_rate": 1.7787554765931013e-05, + "loss": 0.2423, + "step": 3930 + }, + { + "epoch": 0.96, + "learning_rate": 1.7785078737039766e-05, + "loss": 0.2559, + "step": 3932 + }, + { + "epoch": 0.96, + "learning_rate": 1.7782601495952232e-05, + "loss": 0.2484, + "step": 3934 + }, + { + "epoch": 0.96, + "learning_rate": 1.7780123043054135e-05, + "loss": 0.2284, + "step": 3936 + }, + { + "epoch": 0.96, + "learning_rate": 1.7777643378731384e-05, + "loss": 0.2216, + "step": 3938 + }, + { + "epoch": 0.96, + "learning_rate": 1.777516250337009e-05, + "loss": 0.2606, + "step": 3940 + }, + { + "epoch": 0.96, + "learning_rate": 1.7772680417356537e-05, + "loss": 0.2244, + "step": 3942 + }, + { + "epoch": 0.96, + "learning_rate": 1.7770197121077207e-05, + "loss": 0.279, + "step": 3944 + }, + { + "epoch": 0.96, + "learning_rate": 1.7767712614918773e-05, + "loss": 0.2562, + "step": 3946 + }, + { + "epoch": 0.96, + "learning_rate": 1.7765226899268085e-05, + "loss": 0.2466, + "step": 3948 + }, + { + "epoch": 0.96, + "learning_rate": 1.7762739974512194e-05, + "loss": 0.2496, + "step": 3950 + }, + { + "epoch": 0.96, + "learning_rate": 1.7760251841038325e-05, + "loss": 0.2598, + "step": 3952 + }, + { + "epoch": 0.96, + "learning_rate": 1.775776249923391e-05, + "loss": 0.2678, + "step": 3954 + }, + { + "epoch": 0.96, + "learning_rate": 1.775527194948655e-05, + "loss": 0.2676, + "step": 3956 + }, + { + "epoch": 0.96, + "learning_rate": 1.7752780192184043e-05, + "loss": 0.2677, + "step": 3958 + }, + { + "epoch": 0.96, + "learning_rate": 1.7750287227714382e-05, + "loss": 0.2566, + "step": 3960 + }, + { + "epoch": 0.97, + "learning_rate": 1.7747793056465734e-05, + "loss": 0.2087, + "step": 3962 + }, + { + "epoch": 0.97, + "learning_rate": 1.7745297678826464e-05, + "loss": 0.2527, + "step": 3964 + }, + { + "epoch": 0.97, + "learning_rate": 1.774280109518512e-05, + "loss": 0.2456, + "step": 3966 + }, + { + "epoch": 0.97, + "learning_rate": 1.7740303305930437e-05, + "loss": 0.2669, + "step": 3968 + }, + { + "epoch": 0.97, + "learning_rate": 1.773780431145134e-05, + "loss": 0.2549, + "step": 3970 + }, + { + "epoch": 0.97, + "learning_rate": 1.7735304112136947e-05, + "loss": 0.205, + "step": 3972 + }, + { + "epoch": 0.97, + "learning_rate": 1.7732802708376554e-05, + "loss": 0.2572, + "step": 3974 + }, + { + "epoch": 0.97, + "learning_rate": 1.7730300100559645e-05, + "loss": 0.2716, + "step": 3976 + }, + { + "epoch": 0.97, + "learning_rate": 1.7727796289075905e-05, + "loss": 0.2565, + "step": 3978 + }, + { + "epoch": 0.97, + "learning_rate": 1.7725291274315188e-05, + "loss": 0.2251, + "step": 3980 + }, + { + "epoch": 0.97, + "learning_rate": 1.772278505666755e-05, + "loss": 0.2422, + "step": 3982 + }, + { + "epoch": 0.97, + "learning_rate": 1.772027763652322e-05, + "loss": 0.2535, + "step": 3984 + }, + { + "epoch": 0.97, + "learning_rate": 1.771776901427263e-05, + "loss": 0.2694, + "step": 3986 + }, + { + "epoch": 0.97, + "learning_rate": 1.7715259190306393e-05, + "loss": 0.2421, + "step": 3988 + }, + { + "epoch": 0.97, + "learning_rate": 1.77127481650153e-05, + "loss": 0.2569, + "step": 3990 + }, + { + "epoch": 0.97, + "learning_rate": 1.7710235938790344e-05, + "loss": 0.2304, + "step": 3992 + }, + { + "epoch": 0.97, + "learning_rate": 1.7707722512022692e-05, + "loss": 0.2402, + "step": 3994 + }, + { + "epoch": 0.97, + "learning_rate": 1.7705207885103707e-05, + "loss": 0.211, + "step": 3996 + }, + { + "epoch": 0.97, + "learning_rate": 1.770269205842494e-05, + "loss": 0.265, + "step": 3998 + }, + { + "epoch": 0.97, + "learning_rate": 1.7700175032378114e-05, + "loss": 0.2543, + "step": 4000 + }, + { + "epoch": 0.98, + "learning_rate": 1.7697656807355157e-05, + "loss": 0.2455, + "step": 4002 + }, + { + "epoch": 0.98, + "learning_rate": 1.7695137383748172e-05, + "loss": 0.226, + "step": 4004 + }, + { + "epoch": 0.98, + "learning_rate": 1.7692616761949458e-05, + "loss": 0.2697, + "step": 4006 + }, + { + "epoch": 0.98, + "learning_rate": 1.7690094942351488e-05, + "loss": 0.2476, + "step": 4008 + }, + { + "epoch": 0.98, + "learning_rate": 1.7687571925346934e-05, + "loss": 0.2375, + "step": 4010 + }, + { + "epoch": 0.98, + "learning_rate": 1.7685047711328643e-05, + "loss": 0.2509, + "step": 4012 + }, + { + "epoch": 0.98, + "learning_rate": 1.7682522300689665e-05, + "loss": 0.264, + "step": 4014 + }, + { + "epoch": 0.98, + "learning_rate": 1.7679995693823217e-05, + "loss": 0.2397, + "step": 4016 + }, + { + "epoch": 0.98, + "learning_rate": 1.767746789112271e-05, + "loss": 0.2345, + "step": 4018 + }, + { + "epoch": 0.98, + "learning_rate": 1.7674938892981747e-05, + "loss": 0.256, + "step": 4020 + }, + { + "epoch": 0.98, + "learning_rate": 1.767240869979411e-05, + "loss": 0.2561, + "step": 4022 + }, + { + "epoch": 0.98, + "learning_rate": 1.7669877311953773e-05, + "loss": 0.2506, + "step": 4024 + }, + { + "epoch": 0.98, + "learning_rate": 1.766734472985489e-05, + "loss": 0.2642, + "step": 4026 + }, + { + "epoch": 0.98, + "learning_rate": 1.76648109538918e-05, + "loss": 0.235, + "step": 4028 + }, + { + "epoch": 0.98, + "learning_rate": 1.7662275984459036e-05, + "loss": 0.2313, + "step": 4030 + }, + { + "epoch": 0.98, + "learning_rate": 1.765973982195131e-05, + "loss": 0.2288, + "step": 4032 + }, + { + "epoch": 0.98, + "learning_rate": 1.7657202466763523e-05, + "loss": 0.2154, + "step": 4034 + }, + { + "epoch": 0.98, + "learning_rate": 1.765466391929076e-05, + "loss": 0.247, + "step": 4036 + }, + { + "epoch": 0.98, + "learning_rate": 1.765212417992829e-05, + "loss": 0.2406, + "step": 4038 + }, + { + "epoch": 0.98, + "learning_rate": 1.7649583249071574e-05, + "loss": 0.234, + "step": 4040 + }, + { + "epoch": 0.98, + "learning_rate": 1.764704112711625e-05, + "loss": 0.2357, + "step": 4042 + }, + { + "epoch": 0.99, + "learning_rate": 1.7644497814458153e-05, + "loss": 0.2465, + "step": 4044 + }, + { + "epoch": 0.99, + "learning_rate": 1.764195331149329e-05, + "loss": 0.2609, + "step": 4046 + }, + { + "epoch": 0.99, + "learning_rate": 1.763940761861786e-05, + "loss": 0.2645, + "step": 4048 + }, + { + "epoch": 0.99, + "learning_rate": 1.763686073622825e-05, + "loss": 0.2594, + "step": 4050 + }, + { + "epoch": 0.99, + "learning_rate": 1.763431266472102e-05, + "loss": 0.2444, + "step": 4052 + }, + { + "epoch": 0.99, + "learning_rate": 1.7631763404492935e-05, + "loss": 0.2453, + "step": 4054 + }, + { + "epoch": 0.99, + "learning_rate": 1.7629212955940934e-05, + "loss": 0.2682, + "step": 4056 + }, + { + "epoch": 0.99, + "learning_rate": 1.7626661319462134e-05, + "loss": 0.2622, + "step": 4058 + }, + { + "epoch": 0.99, + "learning_rate": 1.7624108495453848e-05, + "loss": 0.2333, + "step": 4060 + }, + { + "epoch": 0.99, + "learning_rate": 1.7621554484313573e-05, + "loss": 0.2378, + "step": 4062 + }, + { + "epoch": 0.99, + "learning_rate": 1.7618999286438986e-05, + "loss": 0.2577, + "step": 4064 + }, + { + "epoch": 0.99, + "learning_rate": 1.761644290222795e-05, + "loss": 0.238, + "step": 4066 + }, + { + "epoch": 0.99, + "learning_rate": 1.7613885332078508e-05, + "loss": 0.233, + "step": 4068 + }, + { + "epoch": 0.99, + "learning_rate": 1.7611326576388905e-05, + "loss": 0.2374, + "step": 4070 + }, + { + "epoch": 0.99, + "learning_rate": 1.7608766635557552e-05, + "loss": 0.2344, + "step": 4072 + }, + { + "epoch": 0.99, + "learning_rate": 1.7606205509983048e-05, + "loss": 0.2529, + "step": 4074 + }, + { + "epoch": 0.99, + "learning_rate": 1.7603643200064188e-05, + "loss": 0.217, + "step": 4076 + }, + { + "epoch": 0.99, + "learning_rate": 1.760107970619994e-05, + "loss": 0.2563, + "step": 4078 + }, + { + "epoch": 0.99, + "learning_rate": 1.759851502878946e-05, + "loss": 0.2288, + "step": 4080 + }, + { + "epoch": 0.99, + "learning_rate": 1.7595949168232083e-05, + "loss": 0.2457, + "step": 4082 + }, + { + "epoch": 1.0, + "learning_rate": 1.759338212492734e-05, + "loss": 0.2308, + "step": 4084 + }, + { + "epoch": 1.0, + "learning_rate": 1.759081389927493e-05, + "loss": 0.2734, + "step": 4086 + }, + { + "epoch": 1.0, + "learning_rate": 1.758824449167476e-05, + "loss": 0.2532, + "step": 4088 + }, + { + "epoch": 1.0, + "learning_rate": 1.7585673902526893e-05, + "loss": 0.2262, + "step": 4090 + }, + { + "epoch": 1.0, + "learning_rate": 1.7583102132231594e-05, + "loss": 0.2583, + "step": 4092 + }, + { + "epoch": 1.0, + "learning_rate": 1.758052918118931e-05, + "loss": 0.2481, + "step": 4094 + }, + { + "epoch": 1.0, + "learning_rate": 1.757795504980067e-05, + "loss": 0.2223, + "step": 4096 + }, + { + "epoch": 1.0, + "learning_rate": 1.7575379738466475e-05, + "loss": 0.2158, + "step": 4098 + }, + { + "epoch": 1.0, + "learning_rate": 1.7572803247587734e-05, + "loss": 0.2527, + "step": 4100 + }, + { + "epoch": 1.0, + "learning_rate": 1.7570225577565622e-05, + "loss": 0.2227, + "step": 4102 + }, + { + "epoch": 1.0, + "learning_rate": 1.7567646728801497e-05, + "loss": 0.2587, + "step": 4104 + }, + { + "epoch": 1.0, + "learning_rate": 1.756506670169691e-05, + "loss": 0.1924, + "step": 4106 + }, + { + "epoch": 1.0, + "learning_rate": 1.756248549665359e-05, + "loss": 0.191, + "step": 4108 + }, + { + "epoch": 1.0, + "learning_rate": 1.755990311407345e-05, + "loss": 0.2099, + "step": 4110 + }, + { + "epoch": 1.0, + "learning_rate": 1.755731955435859e-05, + "loss": 0.1711, + "step": 4112 + }, + { + "epoch": 1.0, + "learning_rate": 1.7554734817911283e-05, + "loss": 0.2189, + "step": 4114 + }, + { + "epoch": 1.0, + "learning_rate": 1.7552148905133995e-05, + "loss": 0.2072, + "step": 4116 + }, + { + "epoch": 1.0, + "learning_rate": 1.754956181642938e-05, + "loss": 0.196, + "step": 4118 + }, + { + "epoch": 1.0, + "learning_rate": 1.7546973552200256e-05, + "loss": 0.2036, + "step": 4120 + }, + { + "epoch": 1.0, + "learning_rate": 1.7544384112849648e-05, + "loss": 0.2007, + "step": 4122 + }, + { + "epoch": 1.0, + "learning_rate": 1.7541793498780738e-05, + "loss": 0.208, + "step": 4124 + }, + { + "epoch": 1.01, + "learning_rate": 1.7539201710396913e-05, + "loss": 0.2203, + "step": 4126 + }, + { + "epoch": 1.01, + "learning_rate": 1.7536608748101728e-05, + "loss": 0.209, + "step": 4128 + }, + { + "epoch": 1.01, + "learning_rate": 1.753401461229894e-05, + "loss": 0.2173, + "step": 4130 + }, + { + "epoch": 1.01, + "learning_rate": 1.753141930339246e-05, + "loss": 0.2103, + "step": 4132 + }, + { + "epoch": 1.01, + "learning_rate": 1.7528822821786403e-05, + "loss": 0.2167, + "step": 4134 + }, + { + "epoch": 1.01, + "learning_rate": 1.7526225167885068e-05, + "loss": 0.2148, + "step": 4136 + }, + { + "epoch": 1.01, + "learning_rate": 1.752362634209292e-05, + "loss": 0.1986, + "step": 4138 + }, + { + "epoch": 1.01, + "learning_rate": 1.752102634481462e-05, + "loss": 0.1869, + "step": 4140 + }, + { + "epoch": 1.01, + "learning_rate": 1.7518425176455008e-05, + "loss": 0.2136, + "step": 4142 + }, + { + "epoch": 1.01, + "learning_rate": 1.751582283741911e-05, + "loss": 0.1973, + "step": 4144 + }, + { + "epoch": 1.01, + "learning_rate": 1.751321932811212e-05, + "loss": 0.2078, + "step": 4146 + }, + { + "epoch": 1.01, + "learning_rate": 1.7510614648939432e-05, + "loss": 0.2088, + "step": 4148 + }, + { + "epoch": 1.01, + "learning_rate": 1.750800880030661e-05, + "loss": 0.1904, + "step": 4150 + }, + { + "epoch": 1.01, + "learning_rate": 1.7505401782619408e-05, + "loss": 0.2362, + "step": 4152 + }, + { + "epoch": 1.01, + "learning_rate": 1.7502793596283756e-05, + "loss": 0.2193, + "step": 4154 + }, + { + "epoch": 1.01, + "learning_rate": 1.750018424170577e-05, + "loss": 0.2163, + "step": 4156 + }, + { + "epoch": 1.01, + "learning_rate": 1.749757371929175e-05, + "loss": 0.1923, + "step": 4158 + }, + { + "epoch": 1.01, + "learning_rate": 1.7494962029448165e-05, + "loss": 0.226, + "step": 4160 + }, + { + "epoch": 1.01, + "learning_rate": 1.749234917258168e-05, + "loss": 0.1786, + "step": 4162 + }, + { + "epoch": 1.01, + "learning_rate": 1.7489735149099143e-05, + "loss": 0.235, + "step": 4164 + }, + { + "epoch": 1.02, + "learning_rate": 1.748711995940757e-05, + "loss": 0.228, + "step": 4166 + }, + { + "epoch": 1.02, + "learning_rate": 1.748450360391417e-05, + "loss": 0.2078, + "step": 4168 + }, + { + "epoch": 1.02, + "learning_rate": 1.7481886083026323e-05, + "loss": 0.2359, + "step": 4170 + }, + { + "epoch": 1.02, + "learning_rate": 1.7479267397151604e-05, + "loss": 0.2137, + "step": 4172 + }, + { + "epoch": 1.02, + "learning_rate": 1.7476647546697758e-05, + "loss": 0.2122, + "step": 4174 + }, + { + "epoch": 1.02, + "learning_rate": 1.747402653207272e-05, + "loss": 0.1959, + "step": 4176 + }, + { + "epoch": 1.02, + "learning_rate": 1.74714043536846e-05, + "loss": 0.2113, + "step": 4178 + }, + { + "epoch": 1.02, + "learning_rate": 1.746878101194169e-05, + "loss": 0.2128, + "step": 4180 + }, + { + "epoch": 1.02, + "learning_rate": 1.7466156507252462e-05, + "loss": 0.1976, + "step": 4182 + }, + { + "epoch": 1.02, + "learning_rate": 1.7463530840025578e-05, + "loss": 0.2239, + "step": 4184 + }, + { + "epoch": 1.02, + "learning_rate": 1.746090401066987e-05, + "loss": 0.219, + "step": 4186 + }, + { + "epoch": 1.02, + "learning_rate": 1.745827601959436e-05, + "loss": 0.2199, + "step": 4188 + }, + { + "epoch": 1.02, + "learning_rate": 1.7455646867208237e-05, + "loss": 0.1982, + "step": 4190 + }, + { + "epoch": 1.02, + "learning_rate": 1.7453016553920893e-05, + "loss": 0.1771, + "step": 4192 + }, + { + "epoch": 1.02, + "learning_rate": 1.7450385080141874e-05, + "loss": 0.2305, + "step": 4194 + }, + { + "epoch": 1.02, + "learning_rate": 1.7447752446280933e-05, + "loss": 0.2265, + "step": 4196 + }, + { + "epoch": 1.02, + "learning_rate": 1.7445118652747983e-05, + "loss": 0.186, + "step": 4198 + }, + { + "epoch": 1.02, + "learning_rate": 1.7442483699953128e-05, + "loss": 0.1697, + "step": 4200 + }, + { + "epoch": 1.02, + "learning_rate": 1.7439847588306653e-05, + "loss": 0.2082, + "step": 4202 + }, + { + "epoch": 1.02, + "learning_rate": 1.7437210318219017e-05, + "loss": 0.2284, + "step": 4204 + }, + { + "epoch": 1.02, + "learning_rate": 1.7434571890100864e-05, + "loss": 0.1919, + "step": 4206 + }, + { + "epoch": 1.03, + "learning_rate": 1.743193230436302e-05, + "loss": 0.1843, + "step": 4208 + }, + { + "epoch": 1.03, + "learning_rate": 1.7429291561416485e-05, + "loss": 0.1933, + "step": 4210 + }, + { + "epoch": 1.03, + "learning_rate": 1.7426649661672446e-05, + "loss": 0.2136, + "step": 4212 + }, + { + "epoch": 1.03, + "learning_rate": 1.7424006605542266e-05, + "loss": 0.2131, + "step": 4214 + }, + { + "epoch": 1.03, + "learning_rate": 1.742136239343749e-05, + "loss": 0.1739, + "step": 4216 + }, + { + "epoch": 1.03, + "learning_rate": 1.7418717025769835e-05, + "loss": 0.212, + "step": 4218 + }, + { + "epoch": 1.03, + "learning_rate": 1.7416070502951215e-05, + "loss": 0.2331, + "step": 4220 + }, + { + "epoch": 1.03, + "learning_rate": 1.741342282539371e-05, + "loss": 0.2165, + "step": 4222 + }, + { + "epoch": 1.03, + "learning_rate": 1.7410773993509585e-05, + "loss": 0.2241, + "step": 4224 + }, + { + "epoch": 1.03, + "learning_rate": 1.740812400771128e-05, + "loss": 0.2422, + "step": 4226 + }, + { + "epoch": 1.03, + "learning_rate": 1.7405472868411416e-05, + "loss": 0.1957, + "step": 4228 + }, + { + "epoch": 1.03, + "learning_rate": 1.74028205760228e-05, + "loss": 0.2168, + "step": 4230 + }, + { + "epoch": 1.03, + "learning_rate": 1.740016713095842e-05, + "loss": 0.1938, + "step": 4232 + }, + { + "epoch": 1.03, + "learning_rate": 1.7397512533631424e-05, + "loss": 0.1813, + "step": 4234 + }, + { + "epoch": 1.03, + "learning_rate": 1.7394856784455164e-05, + "loss": 0.1872, + "step": 4236 + }, + { + "epoch": 1.03, + "learning_rate": 1.7392199883843156e-05, + "loss": 0.2159, + "step": 4238 + }, + { + "epoch": 1.03, + "learning_rate": 1.73895418322091e-05, + "loss": 0.1855, + "step": 4240 + }, + { + "epoch": 1.03, + "learning_rate": 1.7386882629966875e-05, + "loss": 0.2032, + "step": 4242 + }, + { + "epoch": 1.03, + "learning_rate": 1.7384222277530544e-05, + "loss": 0.2146, + "step": 4244 + }, + { + "epoch": 1.03, + "learning_rate": 1.7381560775314336e-05, + "loss": 0.2007, + "step": 4246 + }, + { + "epoch": 1.04, + "learning_rate": 1.737889812373267e-05, + "loss": 0.2126, + "step": 4248 + }, + { + "epoch": 1.04, + "learning_rate": 1.7376234323200144e-05, + "loss": 0.1904, + "step": 4250 + }, + { + "epoch": 1.04, + "learning_rate": 1.737356937413153e-05, + "loss": 0.2157, + "step": 4252 + }, + { + "epoch": 1.04, + "learning_rate": 1.737090327694178e-05, + "loss": 0.175, + "step": 4254 + }, + { + "epoch": 1.04, + "learning_rate": 1.7368236032046027e-05, + "loss": 0.1881, + "step": 4256 + }, + { + "epoch": 1.04, + "learning_rate": 1.7365567639859583e-05, + "loss": 0.1764, + "step": 4258 + }, + { + "epoch": 1.04, + "learning_rate": 1.7362898100797938e-05, + "loss": 0.2059, + "step": 4260 + }, + { + "epoch": 1.04, + "learning_rate": 1.7360227415276755e-05, + "loss": 0.2129, + "step": 4262 + }, + { + "epoch": 1.04, + "learning_rate": 1.735755558371188e-05, + "loss": 0.2048, + "step": 4264 + }, + { + "epoch": 1.04, + "learning_rate": 1.735488260651934e-05, + "loss": 0.1988, + "step": 4266 + }, + { + "epoch": 1.04, + "learning_rate": 1.735220848411534e-05, + "loss": 0.2093, + "step": 4268 + }, + { + "epoch": 1.04, + "learning_rate": 1.734953321691626e-05, + "loss": 0.1806, + "step": 4270 + }, + { + "epoch": 1.04, + "learning_rate": 1.734685680533866e-05, + "loss": 0.2161, + "step": 4272 + }, + { + "epoch": 1.04, + "learning_rate": 1.7344179249799274e-05, + "loss": 0.1984, + "step": 4274 + }, + { + "epoch": 1.04, + "learning_rate": 1.734150055071502e-05, + "loss": 0.2153, + "step": 4276 + }, + { + "epoch": 1.04, + "learning_rate": 1.7338820708502994e-05, + "loss": 0.2043, + "step": 4278 + }, + { + "epoch": 1.04, + "learning_rate": 1.733613972358046e-05, + "loss": 0.1908, + "step": 4280 + }, + { + "epoch": 1.04, + "learning_rate": 1.7333457596364885e-05, + "loss": 0.195, + "step": 4282 + }, + { + "epoch": 1.04, + "learning_rate": 1.7330774327273882e-05, + "loss": 0.205, + "step": 4284 + }, + { + "epoch": 1.04, + "learning_rate": 1.7328089916725257e-05, + "loss": 0.2049, + "step": 4286 + }, + { + "epoch": 1.04, + "learning_rate": 1.7325404365136997e-05, + "loss": 0.2158, + "step": 4288 + }, + { + "epoch": 1.05, + "learning_rate": 1.7322717672927266e-05, + "loss": 0.1865, + "step": 4290 + }, + { + "epoch": 1.05, + "learning_rate": 1.7320029840514395e-05, + "loss": 0.2017, + "step": 4292 + }, + { + "epoch": 1.05, + "learning_rate": 1.7317340868316907e-05, + "loss": 0.1783, + "step": 4294 + }, + { + "epoch": 1.05, + "learning_rate": 1.7314650756753494e-05, + "loss": 0.1964, + "step": 4296 + }, + { + "epoch": 1.05, + "learning_rate": 1.731195950624302e-05, + "loss": 0.2124, + "step": 4298 + }, + { + "epoch": 1.05, + "learning_rate": 1.7309267117204545e-05, + "loss": 0.2162, + "step": 4300 + }, + { + "epoch": 1.05, + "learning_rate": 1.7306573590057284e-05, + "loss": 0.1979, + "step": 4302 + }, + { + "epoch": 1.05, + "learning_rate": 1.7303878925220646e-05, + "loss": 0.1782, + "step": 4304 + }, + { + "epoch": 1.05, + "learning_rate": 1.730118312311421e-05, + "loss": 0.2197, + "step": 4306 + }, + { + "epoch": 1.05, + "learning_rate": 1.729848618415773e-05, + "loss": 0.1779, + "step": 4308 + }, + { + "epoch": 1.05, + "learning_rate": 1.7295788108771145e-05, + "loss": 0.2027, + "step": 4310 + }, + { + "epoch": 1.05, + "learning_rate": 1.729308889737456e-05, + "loss": 0.1925, + "step": 4312 + }, + { + "epoch": 1.05, + "learning_rate": 1.7290388550388267e-05, + "loss": 0.2162, + "step": 4314 + }, + { + "epoch": 1.05, + "learning_rate": 1.728768706823273e-05, + "loss": 0.1943, + "step": 4316 + }, + { + "epoch": 1.05, + "learning_rate": 1.728498445132859e-05, + "loss": 0.1996, + "step": 4318 + }, + { + "epoch": 1.05, + "learning_rate": 1.728228070009667e-05, + "loss": 0.1945, + "step": 4320 + }, + { + "epoch": 1.05, + "learning_rate": 1.7279575814957952e-05, + "loss": 0.2198, + "step": 4322 + }, + { + "epoch": 1.05, + "learning_rate": 1.7276869796333616e-05, + "loss": 0.2125, + "step": 4324 + }, + { + "epoch": 1.05, + "learning_rate": 1.7274162644645013e-05, + "loss": 0.2211, + "step": 4326 + }, + { + "epoch": 1.05, + "learning_rate": 1.7271454360313663e-05, + "loss": 0.2254, + "step": 4328 + }, + { + "epoch": 1.06, + "learning_rate": 1.7268744943761264e-05, + "loss": 0.2025, + "step": 4330 + }, + { + "epoch": 1.06, + "learning_rate": 1.7266034395409698e-05, + "loss": 0.2073, + "step": 4332 + }, + { + "epoch": 1.06, + "learning_rate": 1.7263322715681015e-05, + "loss": 0.215, + "step": 4334 + }, + { + "epoch": 1.06, + "learning_rate": 1.7260609904997445e-05, + "loss": 0.247, + "step": 4336 + }, + { + "epoch": 1.06, + "learning_rate": 1.725789596378139e-05, + "loss": 0.1902, + "step": 4338 + }, + { + "epoch": 1.06, + "learning_rate": 1.725518089245544e-05, + "loss": 0.1993, + "step": 4340 + }, + { + "epoch": 1.06, + "learning_rate": 1.725246469144235e-05, + "loss": 0.1685, + "step": 4342 + }, + { + "epoch": 1.06, + "learning_rate": 1.7249747361165044e-05, + "loss": 0.1995, + "step": 4344 + }, + { + "epoch": 1.06, + "learning_rate": 1.724702890204664e-05, + "loss": 0.2143, + "step": 4346 + }, + { + "epoch": 1.06, + "learning_rate": 1.724430931451042e-05, + "loss": 0.2039, + "step": 4348 + }, + { + "epoch": 1.06, + "learning_rate": 1.7241588598979845e-05, + "loss": 0.2046, + "step": 4350 + }, + { + "epoch": 1.06, + "learning_rate": 1.723886675587855e-05, + "loss": 0.2055, + "step": 4352 + }, + { + "epoch": 1.06, + "learning_rate": 1.7236143785630347e-05, + "loss": 0.174, + "step": 4354 + }, + { + "epoch": 1.06, + "learning_rate": 1.7233419688659228e-05, + "loss": 0.1643, + "step": 4356 + }, + { + "epoch": 1.06, + "learning_rate": 1.7230694465389352e-05, + "loss": 0.2063, + "step": 4358 + }, + { + "epoch": 1.06, + "learning_rate": 1.7227968116245058e-05, + "loss": 0.2005, + "step": 4360 + }, + { + "epoch": 1.06, + "learning_rate": 1.7225240641650854e-05, + "loss": 0.2329, + "step": 4362 + }, + { + "epoch": 1.06, + "learning_rate": 1.7222512042031438e-05, + "loss": 0.219, + "step": 4364 + }, + { + "epoch": 1.06, + "learning_rate": 1.7219782317811663e-05, + "loss": 0.2296, + "step": 4366 + }, + { + "epoch": 1.06, + "learning_rate": 1.721705146941658e-05, + "loss": 0.1989, + "step": 4368 + }, + { + "epoch": 1.06, + "learning_rate": 1.721431949727139e-05, + "loss": 0.2158, + "step": 4370 + }, + { + "epoch": 1.07, + "learning_rate": 1.7211586401801498e-05, + "loss": 0.2015, + "step": 4372 + }, + { + "epoch": 1.07, + "learning_rate": 1.7208852183432455e-05, + "loss": 0.2065, + "step": 4374 + }, + { + "epoch": 1.07, + "learning_rate": 1.720611684259e-05, + "loss": 0.1761, + "step": 4376 + }, + { + "epoch": 1.07, + "learning_rate": 1.7203380379700053e-05, + "loss": 0.1877, + "step": 4378 + }, + { + "epoch": 1.07, + "learning_rate": 1.7200642795188697e-05, + "loss": 0.1953, + "step": 4380 + }, + { + "epoch": 1.07, + "learning_rate": 1.71979040894822e-05, + "loss": 0.1861, + "step": 4382 + }, + { + "epoch": 1.07, + "learning_rate": 1.7195164263006995e-05, + "loss": 0.2139, + "step": 4384 + }, + { + "epoch": 1.07, + "learning_rate": 1.7192423316189696e-05, + "loss": 0.1917, + "step": 4386 + }, + { + "epoch": 1.07, + "learning_rate": 1.718968124945709e-05, + "loss": 0.187, + "step": 4388 + }, + { + "epoch": 1.07, + "learning_rate": 1.7186938063236133e-05, + "loss": 0.2014, + "step": 4390 + }, + { + "epoch": 1.07, + "learning_rate": 1.7184193757953963e-05, + "loss": 0.2031, + "step": 4392 + }, + { + "epoch": 1.07, + "learning_rate": 1.7181448334037894e-05, + "loss": 0.1763, + "step": 4394 + }, + { + "epoch": 1.07, + "learning_rate": 1.7178701791915404e-05, + "loss": 0.196, + "step": 4396 + }, + { + "epoch": 1.07, + "learning_rate": 1.717595413201415e-05, + "loss": 0.2152, + "step": 4398 + }, + { + "epoch": 1.07, + "learning_rate": 1.717320535476197e-05, + "loss": 0.1932, + "step": 4400 + }, + { + "epoch": 1.07, + "learning_rate": 1.7170455460586858e-05, + "loss": 0.2013, + "step": 4402 + }, + { + "epoch": 1.07, + "learning_rate": 1.7167704449917006e-05, + "loss": 0.2072, + "step": 4404 + }, + { + "epoch": 1.07, + "learning_rate": 1.716495232318076e-05, + "loss": 0.1828, + "step": 4406 + }, + { + "epoch": 1.07, + "learning_rate": 1.7162199080806652e-05, + "loss": 0.1907, + "step": 4408 + }, + { + "epoch": 1.07, + "learning_rate": 1.715944472322338e-05, + "loss": 0.2076, + "step": 4410 + }, + { + "epoch": 1.08, + "learning_rate": 1.7156689250859817e-05, + "loss": 0.193, + "step": 4412 + }, + { + "epoch": 1.08, + "learning_rate": 1.7153932664145015e-05, + "loss": 0.1934, + "step": 4414 + }, + { + "epoch": 1.08, + "learning_rate": 1.7151174963508192e-05, + "loss": 0.1976, + "step": 4416 + }, + { + "epoch": 1.08, + "learning_rate": 1.7148416149378743e-05, + "loss": 0.1774, + "step": 4418 + }, + { + "epoch": 1.08, + "learning_rate": 1.714565622218624e-05, + "loss": 0.196, + "step": 4420 + }, + { + "epoch": 1.08, + "learning_rate": 1.714289518236042e-05, + "loss": 0.2089, + "step": 4422 + }, + { + "epoch": 1.08, + "learning_rate": 1.7140133030331205e-05, + "loss": 0.1858, + "step": 4424 + }, + { + "epoch": 1.08, + "learning_rate": 1.713736976652868e-05, + "loss": 0.1916, + "step": 4426 + }, + { + "epoch": 1.08, + "learning_rate": 1.71346053913831e-05, + "loss": 0.2183, + "step": 4428 + }, + { + "epoch": 1.08, + "learning_rate": 1.713183990532491e-05, + "loss": 0.2112, + "step": 4430 + }, + { + "epoch": 1.08, + "learning_rate": 1.712907330878471e-05, + "loss": 0.1865, + "step": 4432 + }, + { + "epoch": 1.08, + "learning_rate": 1.7126305602193282e-05, + "loss": 0.1848, + "step": 4434 + }, + { + "epoch": 1.08, + "learning_rate": 1.7123536785981582e-05, + "loss": 0.2214, + "step": 4436 + }, + { + "epoch": 1.08, + "learning_rate": 1.712076686058073e-05, + "loss": 0.1895, + "step": 4438 + }, + { + "epoch": 1.08, + "learning_rate": 1.7117995826422028e-05, + "loss": 0.2154, + "step": 4440 + }, + { + "epoch": 1.08, + "learning_rate": 1.7115223683936948e-05, + "loss": 0.201, + "step": 4442 + }, + { + "epoch": 1.08, + "learning_rate": 1.711245043355713e-05, + "loss": 0.2141, + "step": 4444 + }, + { + "epoch": 1.08, + "learning_rate": 1.7109676075714396e-05, + "loss": 0.1983, + "step": 4446 + }, + { + "epoch": 1.08, + "learning_rate": 1.710690061084073e-05, + "loss": 0.2037, + "step": 4448 + }, + { + "epoch": 1.08, + "learning_rate": 1.7104124039368296e-05, + "loss": 0.1779, + "step": 4450 + }, + { + "epoch": 1.08, + "learning_rate": 1.7101346361729426e-05, + "loss": 0.208, + "step": 4452 + }, + { + "epoch": 1.09, + "learning_rate": 1.7098567578356626e-05, + "loss": 0.2189, + "step": 4454 + }, + { + "epoch": 1.09, + "learning_rate": 1.709578768968258e-05, + "loss": 0.1942, + "step": 4456 + }, + { + "epoch": 1.09, + "learning_rate": 1.7093006696140123e-05, + "loss": 0.1858, + "step": 4458 + }, + { + "epoch": 1.09, + "learning_rate": 1.7090224598162287e-05, + "loss": 0.1859, + "step": 4460 + }, + { + "epoch": 1.09, + "learning_rate": 1.7087441396182267e-05, + "loss": 0.1605, + "step": 4462 + }, + { + "epoch": 1.09, + "learning_rate": 1.708465709063342e-05, + "loss": 0.2249, + "step": 4464 + }, + { + "epoch": 1.09, + "learning_rate": 1.70818716819493e-05, + "loss": 0.2052, + "step": 4466 + }, + { + "epoch": 1.09, + "learning_rate": 1.7079085170563602e-05, + "loss": 0.1763, + "step": 4468 + }, + { + "epoch": 1.09, + "learning_rate": 1.707629755691021e-05, + "loss": 0.1703, + "step": 4470 + }, + { + "epoch": 1.09, + "learning_rate": 1.707350884142318e-05, + "loss": 0.1929, + "step": 4472 + }, + { + "epoch": 1.09, + "learning_rate": 1.7070719024536736e-05, + "loss": 0.1979, + "step": 4474 + }, + { + "epoch": 1.09, + "learning_rate": 1.7067928106685273e-05, + "loss": 0.1613, + "step": 4476 + }, + { + "epoch": 1.09, + "learning_rate": 1.7065136088303356e-05, + "loss": 0.192, + "step": 4478 + }, + { + "epoch": 1.09, + "learning_rate": 1.7062342969825724e-05, + "loss": 0.1922, + "step": 4480 + }, + { + "epoch": 1.09, + "learning_rate": 1.705954875168729e-05, + "loss": 0.1291, + "step": 4482 + }, + { + "epoch": 1.09, + "learning_rate": 1.7056753434323134e-05, + "loss": 0.2087, + "step": 4484 + }, + { + "epoch": 1.09, + "learning_rate": 1.7053957018168506e-05, + "loss": 0.2033, + "step": 4486 + }, + { + "epoch": 1.09, + "learning_rate": 1.7051159503658833e-05, + "loss": 0.1272, + "step": 4488 + }, + { + "epoch": 1.09, + "learning_rate": 1.704836089122971e-05, + "loss": 0.1954, + "step": 4490 + }, + { + "epoch": 1.09, + "learning_rate": 1.7045561181316894e-05, + "loss": 0.1574, + "step": 4492 + }, + { + "epoch": 1.1, + "learning_rate": 1.7042760374356327e-05, + "loss": 0.1813, + "step": 4494 + }, + { + "epoch": 1.1, + "learning_rate": 1.7039958470784125e-05, + "loss": 0.1815, + "step": 4496 + }, + { + "epoch": 1.1, + "learning_rate": 1.7037155471036547e-05, + "loss": 0.1988, + "step": 4498 + }, + { + "epoch": 1.1, + "learning_rate": 1.7034351375550055e-05, + "loss": 0.1873, + "step": 4500 + }, + { + "epoch": 1.1, + "learning_rate": 1.7031546184761264e-05, + "loss": 0.1639, + "step": 4502 + }, + { + "epoch": 1.1, + "learning_rate": 1.7028739899106966e-05, + "loss": 0.1751, + "step": 4504 + }, + { + "epoch": 1.1, + "learning_rate": 1.702593251902412e-05, + "loss": 0.1922, + "step": 4506 + }, + { + "epoch": 1.1, + "learning_rate": 1.7023124044949855e-05, + "loss": 0.1884, + "step": 4508 + }, + { + "epoch": 1.1, + "learning_rate": 1.7020314477321472e-05, + "loss": 0.2247, + "step": 4510 + }, + { + "epoch": 1.1, + "learning_rate": 1.7017503816576444e-05, + "loss": 0.1862, + "step": 4512 + }, + { + "epoch": 1.1, + "learning_rate": 1.7014692063152408e-05, + "loss": 0.2017, + "step": 4514 + }, + { + "epoch": 1.1, + "learning_rate": 1.7011879217487183e-05, + "loss": 0.2115, + "step": 4516 + }, + { + "epoch": 1.1, + "learning_rate": 1.7009065280018745e-05, + "loss": 0.2063, + "step": 4518 + }, + { + "epoch": 1.1, + "learning_rate": 1.700625025118525e-05, + "loss": 0.1763, + "step": 4520 + }, + { + "epoch": 1.1, + "learning_rate": 1.700343413142501e-05, + "loss": 0.1738, + "step": 4522 + }, + { + "epoch": 1.1, + "learning_rate": 1.7000616921176528e-05, + "loss": 0.1802, + "step": 4524 + }, + { + "epoch": 1.1, + "learning_rate": 1.6997798620878457e-05, + "loss": 0.2135, + "step": 4526 + }, + { + "epoch": 1.1, + "learning_rate": 1.6994979230969634e-05, + "loss": 0.185, + "step": 4528 + }, + { + "epoch": 1.1, + "learning_rate": 1.6992158751889055e-05, + "loss": 0.2059, + "step": 4530 + }, + { + "epoch": 1.1, + "learning_rate": 1.698933718407589e-05, + "loss": 0.1831, + "step": 4532 + }, + { + "epoch": 1.1, + "learning_rate": 1.6986514527969486e-05, + "loss": 0.1846, + "step": 4534 + }, + { + "epoch": 1.11, + "learning_rate": 1.6983690784009343e-05, + "loss": 0.1925, + "step": 4536 + }, + { + "epoch": 1.11, + "learning_rate": 1.6980865952635143e-05, + "loss": 0.1735, + "step": 4538 + }, + { + "epoch": 1.11, + "learning_rate": 1.697804003428673e-05, + "loss": 0.1924, + "step": 4540 + }, + { + "epoch": 1.11, + "learning_rate": 1.697521302940413e-05, + "loss": 0.162, + "step": 4542 + }, + { + "epoch": 1.11, + "learning_rate": 1.697238493842752e-05, + "loss": 0.2009, + "step": 4544 + }, + { + "epoch": 1.11, + "learning_rate": 1.6969555761797266e-05, + "loss": 0.2054, + "step": 4546 + }, + { + "epoch": 1.11, + "learning_rate": 1.6966725499953885e-05, + "loss": 0.1751, + "step": 4548 + }, + { + "epoch": 1.11, + "learning_rate": 1.696389415333807e-05, + "loss": 0.1728, + "step": 4550 + }, + { + "epoch": 1.11, + "learning_rate": 1.6961061722390683e-05, + "loss": 0.1744, + "step": 4552 + }, + { + "epoch": 1.11, + "learning_rate": 1.6958228207552755e-05, + "loss": 0.2016, + "step": 4554 + }, + { + "epoch": 1.11, + "learning_rate": 1.6955393609265494e-05, + "loss": 0.1812, + "step": 4556 + }, + { + "epoch": 1.11, + "learning_rate": 1.695255792797026e-05, + "loss": 0.1804, + "step": 4558 + }, + { + "epoch": 1.11, + "learning_rate": 1.6949721164108594e-05, + "loss": 0.1967, + "step": 4560 + }, + { + "epoch": 1.11, + "learning_rate": 1.69468833181222e-05, + "loss": 0.1877, + "step": 4562 + }, + { + "epoch": 1.11, + "learning_rate": 1.694404439045295e-05, + "loss": 0.1795, + "step": 4564 + }, + { + "epoch": 1.11, + "learning_rate": 1.694120438154289e-05, + "loss": 0.232, + "step": 4566 + }, + { + "epoch": 1.11, + "learning_rate": 1.6938363291834234e-05, + "loss": 0.2077, + "step": 4568 + }, + { + "epoch": 1.11, + "learning_rate": 1.693552112176936e-05, + "loss": 0.1965, + "step": 4570 + }, + { + "epoch": 1.11, + "learning_rate": 1.6932677871790807e-05, + "loss": 0.1888, + "step": 4572 + }, + { + "epoch": 1.11, + "learning_rate": 1.6929833542341303e-05, + "loss": 0.163, + "step": 4574 + }, + { + "epoch": 1.12, + "learning_rate": 1.6926988133863728e-05, + "loss": 0.1926, + "step": 4576 + }, + { + "epoch": 1.12, + "learning_rate": 1.6924141646801126e-05, + "loss": 0.2078, + "step": 4578 + }, + { + "epoch": 1.12, + "learning_rate": 1.6921294081596727e-05, + "loss": 0.202, + "step": 4580 + }, + { + "epoch": 1.12, + "learning_rate": 1.6918445438693912e-05, + "loss": 0.1746, + "step": 4582 + }, + { + "epoch": 1.12, + "learning_rate": 1.6915595718536244e-05, + "loss": 0.2046, + "step": 4584 + }, + { + "epoch": 1.12, + "learning_rate": 1.6912744921567437e-05, + "loss": 0.162, + "step": 4586 + }, + { + "epoch": 1.12, + "learning_rate": 1.690989304823139e-05, + "loss": 0.2115, + "step": 4588 + }, + { + "epoch": 1.12, + "learning_rate": 1.6907040098972153e-05, + "loss": 0.1968, + "step": 4590 + }, + { + "epoch": 1.12, + "learning_rate": 1.6904186074233962e-05, + "loss": 0.1774, + "step": 4592 + }, + { + "epoch": 1.12, + "learning_rate": 1.6901330974461203e-05, + "loss": 0.1958, + "step": 4594 + }, + { + "epoch": 1.12, + "learning_rate": 1.6898474800098438e-05, + "loss": 0.2087, + "step": 4596 + }, + { + "epoch": 1.12, + "learning_rate": 1.6895617551590398e-05, + "loss": 0.1676, + "step": 4598 + }, + { + "epoch": 1.12, + "learning_rate": 1.689275922938198e-05, + "loss": 0.1793, + "step": 4600 + }, + { + "epoch": 1.12, + "learning_rate": 1.6889899833918237e-05, + "loss": 0.184, + "step": 4602 + }, + { + "epoch": 1.12, + "learning_rate": 1.6887039365644407e-05, + "loss": 0.1909, + "step": 4604 + }, + { + "epoch": 1.12, + "learning_rate": 1.6884177825005886e-05, + "loss": 0.2137, + "step": 4606 + }, + { + "epoch": 1.12, + "learning_rate": 1.6881315212448238e-05, + "loss": 0.1776, + "step": 4608 + }, + { + "epoch": 1.12, + "learning_rate": 1.687845152841719e-05, + "loss": 0.182, + "step": 4610 + }, + { + "epoch": 1.12, + "learning_rate": 1.6875586773358645e-05, + "loss": 0.1627, + "step": 4612 + }, + { + "epoch": 1.12, + "learning_rate": 1.6872720947718663e-05, + "loss": 0.194, + "step": 4614 + }, + { + "epoch": 1.12, + "learning_rate": 1.6869854051943476e-05, + "loss": 0.1551, + "step": 4616 + }, + { + "epoch": 1.13, + "learning_rate": 1.686698608647948e-05, + "loss": 0.1795, + "step": 4618 + }, + { + "epoch": 1.13, + "learning_rate": 1.6864117051773242e-05, + "loss": 0.2038, + "step": 4620 + }, + { + "epoch": 1.13, + "learning_rate": 1.6861246948271494e-05, + "loss": 0.1884, + "step": 4622 + }, + { + "epoch": 1.13, + "learning_rate": 1.685837577642113e-05, + "loss": 0.2006, + "step": 4624 + }, + { + "epoch": 1.13, + "learning_rate": 1.685550353666921e-05, + "loss": 0.2014, + "step": 4626 + }, + { + "epoch": 1.13, + "learning_rate": 1.685263022946297e-05, + "loss": 0.2005, + "step": 4628 + }, + { + "epoch": 1.13, + "learning_rate": 1.6849755855249803e-05, + "loss": 0.1806, + "step": 4630 + }, + { + "epoch": 1.13, + "learning_rate": 1.6846880414477275e-05, + "loss": 0.1985, + "step": 4632 + }, + { + "epoch": 1.13, + "learning_rate": 1.6844003907593104e-05, + "loss": 0.2016, + "step": 4634 + }, + { + "epoch": 1.13, + "learning_rate": 1.68411263350452e-05, + "loss": 0.1854, + "step": 4636 + }, + { + "epoch": 1.13, + "learning_rate": 1.6838247697281604e-05, + "loss": 0.2079, + "step": 4638 + }, + { + "epoch": 1.13, + "learning_rate": 1.6835367994750558e-05, + "loss": 0.1965, + "step": 4640 + }, + { + "epoch": 1.13, + "learning_rate": 1.6832487227900448e-05, + "loss": 0.1885, + "step": 4642 + }, + { + "epoch": 1.13, + "learning_rate": 1.682960539717983e-05, + "loss": 0.1874, + "step": 4644 + }, + { + "epoch": 1.13, + "learning_rate": 1.6826722503037424e-05, + "loss": 0.176, + "step": 4646 + }, + { + "epoch": 1.13, + "learning_rate": 1.682383854592213e-05, + "loss": 0.2045, + "step": 4648 + }, + { + "epoch": 1.13, + "learning_rate": 1.682095352628299e-05, + "loss": 0.1989, + "step": 4650 + }, + { + "epoch": 1.13, + "learning_rate": 1.6818067444569227e-05, + "loss": 0.1943, + "step": 4652 + }, + { + "epoch": 1.13, + "learning_rate": 1.681518030123023e-05, + "loss": 0.1746, + "step": 4654 + }, + { + "epoch": 1.13, + "learning_rate": 1.6812292096715546e-05, + "loss": 0.1709, + "step": 4656 + }, + { + "epoch": 1.13, + "learning_rate": 1.680940283147489e-05, + "loss": 0.1619, + "step": 4658 + }, + { + "epoch": 1.14, + "learning_rate": 1.6806512505958148e-05, + "loss": 0.2099, + "step": 4660 + }, + { + "epoch": 1.14, + "learning_rate": 1.6803621120615357e-05, + "loss": 0.1606, + "step": 4662 + }, + { + "epoch": 1.14, + "learning_rate": 1.6800728675896737e-05, + "loss": 0.1682, + "step": 4664 + }, + { + "epoch": 1.14, + "learning_rate": 1.679783517225266e-05, + "loss": 0.2007, + "step": 4666 + }, + { + "epoch": 1.14, + "learning_rate": 1.6794940610133665e-05, + "loss": 0.1717, + "step": 4668 + }, + { + "epoch": 1.14, + "learning_rate": 1.6792044989990453e-05, + "loss": 0.1863, + "step": 4670 + }, + { + "epoch": 1.14, + "learning_rate": 1.6789148312273905e-05, + "loss": 0.1652, + "step": 4672 + }, + { + "epoch": 1.14, + "learning_rate": 1.678625057743505e-05, + "loss": 0.1698, + "step": 4674 + }, + { + "epoch": 1.14, + "learning_rate": 1.678335178592509e-05, + "loss": 0.184, + "step": 4676 + }, + { + "epoch": 1.14, + "learning_rate": 1.6780451938195382e-05, + "loss": 0.163, + "step": 4678 + }, + { + "epoch": 1.14, + "learning_rate": 1.677755103469746e-05, + "loss": 0.1996, + "step": 4680 + }, + { + "epoch": 1.14, + "learning_rate": 1.677464907588302e-05, + "loss": 0.2059, + "step": 4682 + }, + { + "epoch": 1.14, + "learning_rate": 1.6771746062203914e-05, + "loss": 0.1709, + "step": 4684 + }, + { + "epoch": 1.14, + "learning_rate": 1.6768841994112165e-05, + "loss": 0.1587, + "step": 4686 + }, + { + "epoch": 1.14, + "learning_rate": 1.676593687205996e-05, + "loss": 0.1776, + "step": 4688 + }, + { + "epoch": 1.14, + "learning_rate": 1.676303069649964e-05, + "loss": 0.1927, + "step": 4690 + }, + { + "epoch": 1.14, + "learning_rate": 1.6760123467883733e-05, + "loss": 0.1881, + "step": 4692 + }, + { + "epoch": 1.14, + "learning_rate": 1.6757215186664907e-05, + "loss": 0.1775, + "step": 4694 + }, + { + "epoch": 1.14, + "learning_rate": 1.675430585329601e-05, + "loss": 0.1765, + "step": 4696 + }, + { + "epoch": 1.14, + "learning_rate": 1.6751395468230042e-05, + "loss": 0.2045, + "step": 4698 + }, + { + "epoch": 1.15, + "learning_rate": 1.6748484031920173e-05, + "loss": 0.1908, + "step": 4700 + }, + { + "epoch": 1.15, + "learning_rate": 1.6745571544819735e-05, + "loss": 0.1479, + "step": 4702 + }, + { + "epoch": 1.15, + "learning_rate": 1.674265800738223e-05, + "loss": 0.1763, + "step": 4704 + }, + { + "epoch": 1.15, + "learning_rate": 1.6739743420061314e-05, + "loss": 0.1842, + "step": 4706 + }, + { + "epoch": 1.15, + "learning_rate": 1.673682778331081e-05, + "loss": 0.1549, + "step": 4708 + }, + { + "epoch": 1.15, + "learning_rate": 1.6733911097584707e-05, + "loss": 0.1494, + "step": 4710 + }, + { + "epoch": 1.15, + "learning_rate": 1.673099336333715e-05, + "loss": 0.1831, + "step": 4712 + }, + { + "epoch": 1.15, + "learning_rate": 1.6728074581022462e-05, + "loss": 0.1716, + "step": 4714 + }, + { + "epoch": 1.15, + "learning_rate": 1.672515475109511e-05, + "loss": 0.1693, + "step": 4716 + }, + { + "epoch": 1.15, + "learning_rate": 1.6722233874009745e-05, + "loss": 0.1735, + "step": 4718 + }, + { + "epoch": 1.15, + "learning_rate": 1.6719311950221158e-05, + "loss": 0.2006, + "step": 4720 + }, + { + "epoch": 1.15, + "learning_rate": 1.671638898018432e-05, + "loss": 0.1862, + "step": 4722 + }, + { + "epoch": 1.15, + "learning_rate": 1.6713464964354363e-05, + "loss": 0.1965, + "step": 4724 + }, + { + "epoch": 1.15, + "learning_rate": 1.671053990318657e-05, + "loss": 0.1649, + "step": 4726 + }, + { + "epoch": 1.15, + "learning_rate": 1.670761379713641e-05, + "loss": 0.1991, + "step": 4728 + }, + { + "epoch": 1.15, + "learning_rate": 1.670468664665948e-05, + "loss": 0.1855, + "step": 4730 + }, + { + "epoch": 1.15, + "learning_rate": 1.670175845221158e-05, + "loss": 0.1344, + "step": 4732 + }, + { + "epoch": 1.15, + "learning_rate": 1.669882921424864e-05, + "loss": 0.1736, + "step": 4734 + }, + { + "epoch": 1.15, + "learning_rate": 1.6695898933226768e-05, + "loss": 0.163, + "step": 4736 + }, + { + "epoch": 1.15, + "learning_rate": 1.669296760960223e-05, + "loss": 0.2212, + "step": 4738 + }, + { + "epoch": 1.15, + "learning_rate": 1.6690035243831455e-05, + "loss": 0.1596, + "step": 4740 + }, + { + "epoch": 1.16, + "learning_rate": 1.668710183637104e-05, + "loss": 0.1738, + "step": 4742 + }, + { + "epoch": 1.16, + "learning_rate": 1.668416738767773e-05, + "loss": 0.1881, + "step": 4744 + }, + { + "epoch": 1.16, + "learning_rate": 1.668123189820845e-05, + "loss": 0.1818, + "step": 4746 + }, + { + "epoch": 1.16, + "learning_rate": 1.6678295368420273e-05, + "loss": 0.208, + "step": 4748 + }, + { + "epoch": 1.16, + "learning_rate": 1.667535779877044e-05, + "loss": 0.1328, + "step": 4750 + }, + { + "epoch": 1.16, + "learning_rate": 1.6672419189716354e-05, + "loss": 0.1954, + "step": 4752 + }, + { + "epoch": 1.16, + "learning_rate": 1.6669479541715577e-05, + "loss": 0.2125, + "step": 4754 + }, + { + "epoch": 1.16, + "learning_rate": 1.666653885522584e-05, + "loss": 0.1761, + "step": 4756 + }, + { + "epoch": 1.16, + "learning_rate": 1.6663597130705023e-05, + "loss": 0.1873, + "step": 4758 + }, + { + "epoch": 1.16, + "learning_rate": 1.6660654368611183e-05, + "loss": 0.1924, + "step": 4760 + }, + { + "epoch": 1.16, + "learning_rate": 1.6657710569402522e-05, + "loss": 0.1692, + "step": 4762 + }, + { + "epoch": 1.16, + "learning_rate": 1.665476573353742e-05, + "loss": 0.1395, + "step": 4764 + }, + { + "epoch": 1.16, + "learning_rate": 1.6651819861474402e-05, + "loss": 0.1405, + "step": 4766 + }, + { + "epoch": 1.16, + "learning_rate": 1.6648872953672172e-05, + "loss": 0.1608, + "step": 4768 + }, + { + "epoch": 1.16, + "learning_rate": 1.6645925010589576e-05, + "loss": 0.173, + "step": 4770 + }, + { + "epoch": 1.16, + "learning_rate": 1.664297603268564e-05, + "loss": 0.1847, + "step": 4772 + }, + { + "epoch": 1.16, + "learning_rate": 1.664002602041954e-05, + "loss": 0.1991, + "step": 4774 + }, + { + "epoch": 1.16, + "learning_rate": 1.6637074974250614e-05, + "loss": 0.1877, + "step": 4776 + }, + { + "epoch": 1.16, + "learning_rate": 1.663412289463836e-05, + "loss": 0.1613, + "step": 4778 + }, + { + "epoch": 1.16, + "learning_rate": 1.6631169782042446e-05, + "loss": 0.1652, + "step": 4780 + }, + { + "epoch": 1.17, + "learning_rate": 1.662821563692269e-05, + "loss": 0.2009, + "step": 4782 + }, + { + "epoch": 1.17, + "learning_rate": 1.6625260459739077e-05, + "loss": 0.1781, + "step": 4784 + }, + { + "epoch": 1.17, + "learning_rate": 1.6622304250951745e-05, + "loss": 0.1937, + "step": 4786 + }, + { + "epoch": 1.17, + "learning_rate": 1.661934701102101e-05, + "loss": 0.19, + "step": 4788 + }, + { + "epoch": 1.17, + "learning_rate": 1.6616388740407325e-05, + "loss": 0.1968, + "step": 4790 + }, + { + "epoch": 1.17, + "learning_rate": 1.661342943957132e-05, + "loss": 0.171, + "step": 4792 + }, + { + "epoch": 1.17, + "learning_rate": 1.661046910897378e-05, + "loss": 0.1429, + "step": 4794 + }, + { + "epoch": 1.17, + "learning_rate": 1.660750774907565e-05, + "loss": 0.1749, + "step": 4796 + }, + { + "epoch": 1.17, + "learning_rate": 1.660454536033804e-05, + "loss": 0.1703, + "step": 4798 + }, + { + "epoch": 1.17, + "learning_rate": 1.6601581943222217e-05, + "loss": 0.1988, + "step": 4800 + }, + { + "epoch": 1.17, + "learning_rate": 1.6598617498189604e-05, + "loss": 0.1599, + "step": 4802 + }, + { + "epoch": 1.17, + "learning_rate": 1.6595652025701788e-05, + "loss": 0.18, + "step": 4804 + }, + { + "epoch": 1.17, + "learning_rate": 1.6592685526220516e-05, + "loss": 0.1812, + "step": 4806 + }, + { + "epoch": 1.17, + "learning_rate": 1.65897180002077e-05, + "loss": 0.1677, + "step": 4808 + }, + { + "epoch": 1.17, + "learning_rate": 1.65867494481254e-05, + "loss": 0.169, + "step": 4810 + }, + { + "epoch": 1.17, + "learning_rate": 1.6583779870435845e-05, + "loss": 0.1723, + "step": 4812 + }, + { + "epoch": 1.17, + "learning_rate": 1.658080926760142e-05, + "loss": 0.1933, + "step": 4814 + }, + { + "epoch": 1.17, + "learning_rate": 1.6577837640084672e-05, + "loss": 0.1681, + "step": 4816 + }, + { + "epoch": 1.17, + "learning_rate": 1.65748649883483e-05, + "loss": 0.2008, + "step": 4818 + }, + { + "epoch": 1.17, + "learning_rate": 1.657189131285518e-05, + "loss": 0.1612, + "step": 4820 + }, + { + "epoch": 1.17, + "learning_rate": 1.6568916614068323e-05, + "loss": 0.1622, + "step": 4822 + }, + { + "epoch": 1.18, + "learning_rate": 1.6565940892450925e-05, + "loss": 0.1727, + "step": 4824 + }, + { + "epoch": 1.18, + "learning_rate": 1.6562964148466318e-05, + "loss": 0.1719, + "step": 4826 + }, + { + "epoch": 1.18, + "learning_rate": 1.655998638257801e-05, + "loss": 0.1945, + "step": 4828 + }, + { + "epoch": 1.18, + "learning_rate": 1.6557007595249663e-05, + "loss": 0.1706, + "step": 4830 + }, + { + "epoch": 1.18, + "learning_rate": 1.655402778694509e-05, + "loss": 0.1743, + "step": 4832 + }, + { + "epoch": 1.18, + "learning_rate": 1.6551046958128275e-05, + "loss": 0.19, + "step": 4834 + }, + { + "epoch": 1.18, + "learning_rate": 1.654806510926336e-05, + "loss": 0.1869, + "step": 4836 + }, + { + "epoch": 1.18, + "learning_rate": 1.654508224081463e-05, + "loss": 0.1662, + "step": 4838 + }, + { + "epoch": 1.18, + "learning_rate": 1.654209835324655e-05, + "loss": 0.1768, + "step": 4840 + }, + { + "epoch": 1.18, + "learning_rate": 1.6539113447023733e-05, + "loss": 0.1614, + "step": 4842 + }, + { + "epoch": 1.18, + "learning_rate": 1.6536127522610946e-05, + "loss": 0.169, + "step": 4844 + }, + { + "epoch": 1.18, + "learning_rate": 1.6533140580473128e-05, + "loss": 0.1682, + "step": 4846 + }, + { + "epoch": 1.18, + "learning_rate": 1.6530152621075364e-05, + "loss": 0.1838, + "step": 4848 + }, + { + "epoch": 1.18, + "learning_rate": 1.6527163644882902e-05, + "loss": 0.1891, + "step": 4850 + }, + { + "epoch": 1.18, + "learning_rate": 1.652417365236115e-05, + "loss": 0.1951, + "step": 4852 + }, + { + "epoch": 1.18, + "learning_rate": 1.6521182643975675e-05, + "loss": 0.1473, + "step": 4854 + }, + { + "epoch": 1.18, + "learning_rate": 1.651819062019219e-05, + "loss": 0.1976, + "step": 4856 + }, + { + "epoch": 1.18, + "learning_rate": 1.651519758147659e-05, + "loss": 0.1624, + "step": 4858 + }, + { + "epoch": 1.18, + "learning_rate": 1.6512203528294902e-05, + "loss": 0.1665, + "step": 4860 + }, + { + "epoch": 1.18, + "learning_rate": 1.650920846111333e-05, + "loss": 0.1799, + "step": 4862 + }, + { + "epoch": 1.19, + "learning_rate": 1.650621238039823e-05, + "loss": 0.181, + "step": 4864 + }, + { + "epoch": 1.19, + "learning_rate": 1.650321528661611e-05, + "loss": 0.1849, + "step": 4866 + }, + { + "epoch": 1.19, + "learning_rate": 1.650021718023364e-05, + "loss": 0.1286, + "step": 4868 + }, + { + "epoch": 1.19, + "learning_rate": 1.6497218061717654e-05, + "loss": 0.1518, + "step": 4870 + }, + { + "epoch": 1.19, + "learning_rate": 1.649421793153513e-05, + "loss": 0.1738, + "step": 4872 + }, + { + "epoch": 1.19, + "learning_rate": 1.6491216790153214e-05, + "loss": 0.1603, + "step": 4874 + }, + { + "epoch": 1.19, + "learning_rate": 1.6488214638039213e-05, + "loss": 0.1952, + "step": 4876 + }, + { + "epoch": 1.19, + "learning_rate": 1.6485211475660577e-05, + "loss": 0.1931, + "step": 4878 + }, + { + "epoch": 1.19, + "learning_rate": 1.6482207303484924e-05, + "loss": 0.1886, + "step": 4880 + }, + { + "epoch": 1.19, + "learning_rate": 1.6479202121980024e-05, + "loss": 0.1882, + "step": 4882 + }, + { + "epoch": 1.19, + "learning_rate": 1.6476195931613815e-05, + "loss": 0.165, + "step": 4884 + }, + { + "epoch": 1.19, + "learning_rate": 1.6473188732854374e-05, + "loss": 0.1808, + "step": 4886 + }, + { + "epoch": 1.19, + "learning_rate": 1.647018052616995e-05, + "loss": 0.1829, + "step": 4888 + }, + { + "epoch": 1.19, + "learning_rate": 1.6467171312028945e-05, + "loss": 0.1655, + "step": 4890 + }, + { + "epoch": 1.19, + "learning_rate": 1.6464161090899914e-05, + "loss": 0.1568, + "step": 4892 + }, + { + "epoch": 1.19, + "learning_rate": 1.646114986325157e-05, + "loss": 0.1608, + "step": 4894 + }, + { + "epoch": 1.19, + "learning_rate": 1.6458137629552785e-05, + "loss": 0.1738, + "step": 4896 + }, + { + "epoch": 1.19, + "learning_rate": 1.645512439027259e-05, + "loss": 0.1875, + "step": 4898 + }, + { + "epoch": 1.19, + "learning_rate": 1.6452110145880167e-05, + "loss": 0.1516, + "step": 4900 + }, + { + "epoch": 1.19, + "learning_rate": 1.6449094896844856e-05, + "loss": 0.1574, + "step": 4902 + }, + { + "epoch": 1.19, + "learning_rate": 1.644607864363616e-05, + "loss": 0.1701, + "step": 4904 + }, + { + "epoch": 1.2, + "learning_rate": 1.6443061386723722e-05, + "loss": 0.1677, + "step": 4906 + }, + { + "epoch": 1.2, + "learning_rate": 1.6440043126577364e-05, + "loss": 0.2029, + "step": 4908 + }, + { + "epoch": 1.2, + "learning_rate": 1.6437023863667043e-05, + "loss": 0.162, + "step": 4910 + }, + { + "epoch": 1.2, + "learning_rate": 1.6434003598462886e-05, + "loss": 0.1602, + "step": 4912 + }, + { + "epoch": 1.2, + "learning_rate": 1.643098233143517e-05, + "loss": 0.1662, + "step": 4914 + }, + { + "epoch": 1.2, + "learning_rate": 1.642796006305433e-05, + "loss": 0.163, + "step": 4916 + }, + { + "epoch": 1.2, + "learning_rate": 1.6424936793790954e-05, + "loss": 0.1472, + "step": 4918 + }, + { + "epoch": 1.2, + "learning_rate": 1.6421912524115794e-05, + "loss": 0.175, + "step": 4920 + }, + { + "epoch": 1.2, + "learning_rate": 1.6418887254499744e-05, + "loss": 0.1747, + "step": 4922 + }, + { + "epoch": 1.2, + "learning_rate": 1.6415860985413865e-05, + "loss": 0.15, + "step": 4924 + }, + { + "epoch": 1.2, + "learning_rate": 1.641283371732937e-05, + "loss": 0.1647, + "step": 4926 + }, + { + "epoch": 1.2, + "learning_rate": 1.6409805450717633e-05, + "loss": 0.1705, + "step": 4928 + }, + { + "epoch": 1.2, + "learning_rate": 1.6406776186050168e-05, + "loss": 0.1574, + "step": 4930 + }, + { + "epoch": 1.2, + "learning_rate": 1.6403745923798666e-05, + "loss": 0.1969, + "step": 4932 + }, + { + "epoch": 1.2, + "learning_rate": 1.6400714664434957e-05, + "loss": 0.1738, + "step": 4934 + }, + { + "epoch": 1.2, + "learning_rate": 1.6397682408431025e-05, + "loss": 0.1752, + "step": 4936 + }, + { + "epoch": 1.2, + "learning_rate": 1.6394649156259027e-05, + "loss": 0.1621, + "step": 4938 + }, + { + "epoch": 1.2, + "learning_rate": 1.6391614908391254e-05, + "loss": 0.1694, + "step": 4940 + }, + { + "epoch": 1.2, + "learning_rate": 1.6388579665300165e-05, + "loss": 0.1404, + "step": 4942 + }, + { + "epoch": 1.2, + "learning_rate": 1.6385543427458374e-05, + "loss": 0.1475, + "step": 4944 + }, + { + "epoch": 1.21, + "learning_rate": 1.638250619533864e-05, + "loss": 0.1527, + "step": 4946 + }, + { + "epoch": 1.21, + "learning_rate": 1.6379467969413888e-05, + "loss": 0.1312, + "step": 4948 + }, + { + "epoch": 1.21, + "learning_rate": 1.637642875015719e-05, + "loss": 0.1693, + "step": 4950 + }, + { + "epoch": 1.21, + "learning_rate": 1.6373388538041778e-05, + "loss": 0.1595, + "step": 4952 + }, + { + "epoch": 1.21, + "learning_rate": 1.637034733354104e-05, + "loss": 0.1671, + "step": 4954 + }, + { + "epoch": 1.21, + "learning_rate": 1.6367305137128507e-05, + "loss": 0.1916, + "step": 4956 + }, + { + "epoch": 1.21, + "learning_rate": 1.6364261949277876e-05, + "loss": 0.1269, + "step": 4958 + }, + { + "epoch": 1.21, + "learning_rate": 1.636121777046299e-05, + "loss": 0.211, + "step": 4960 + }, + { + "epoch": 1.21, + "learning_rate": 1.635817260115786e-05, + "loss": 0.178, + "step": 4962 + }, + { + "epoch": 1.21, + "learning_rate": 1.6355126441836637e-05, + "loss": 0.1628, + "step": 4964 + }, + { + "epoch": 1.21, + "learning_rate": 1.635207929297363e-05, + "loss": 0.1891, + "step": 4966 + }, + { + "epoch": 1.21, + "learning_rate": 1.6349031155043303e-05, + "loss": 0.1496, + "step": 4968 + }, + { + "epoch": 1.21, + "learning_rate": 1.6345982028520284e-05, + "loss": 0.1683, + "step": 4970 + }, + { + "epoch": 1.21, + "learning_rate": 1.6342931913879328e-05, + "loss": 0.1483, + "step": 4972 + }, + { + "epoch": 1.21, + "learning_rate": 1.6339880811595377e-05, + "loss": 0.1297, + "step": 4974 + }, + { + "epoch": 1.21, + "learning_rate": 1.63368287221435e-05, + "loss": 0.1338, + "step": 4976 + }, + { + "epoch": 1.21, + "learning_rate": 1.6333775645998938e-05, + "loss": 0.1618, + "step": 4978 + }, + { + "epoch": 1.21, + "learning_rate": 1.6330721583637078e-05, + "loss": 0.1563, + "step": 4980 + }, + { + "epoch": 1.21, + "learning_rate": 1.632766653553346e-05, + "loss": 0.168, + "step": 4982 + }, + { + "epoch": 1.21, + "learning_rate": 1.632461050216377e-05, + "loss": 0.1493, + "step": 4984 + }, + { + "epoch": 1.21, + "learning_rate": 1.632155348400387e-05, + "loss": 0.1942, + "step": 4986 + }, + { + "epoch": 1.22, + "learning_rate": 1.6318495481529746e-05, + "loss": 0.16, + "step": 4988 + }, + { + "epoch": 1.22, + "learning_rate": 1.6315436495217567e-05, + "loss": 0.1347, + "step": 4990 + }, + { + "epoch": 1.22, + "learning_rate": 1.6312376525543633e-05, + "loss": 0.1761, + "step": 4992 + }, + { + "epoch": 1.22, + "learning_rate": 1.63093155729844e-05, + "loss": 0.1847, + "step": 4994 + }, + { + "epoch": 1.22, + "learning_rate": 1.630625363801649e-05, + "loss": 0.1803, + "step": 4996 + }, + { + "epoch": 1.22, + "learning_rate": 1.630319072111667e-05, + "loss": 0.1562, + "step": 4998 + }, + { + "epoch": 1.22, + "learning_rate": 1.6300126822761853e-05, + "loss": 0.1568, + "step": 5000 + }, + { + "epoch": 1.22, + "learning_rate": 1.629706194342912e-05, + "loss": 0.1618, + "step": 5002 + }, + { + "epoch": 1.22, + "learning_rate": 1.6293996083595683e-05, + "loss": 0.1238, + "step": 5004 + }, + { + "epoch": 1.22, + "learning_rate": 1.629092924373893e-05, + "loss": 0.1588, + "step": 5006 + }, + { + "epoch": 1.22, + "learning_rate": 1.6287861424336392e-05, + "loss": 0.1736, + "step": 5008 + }, + { + "epoch": 1.22, + "learning_rate": 1.628479262586575e-05, + "loss": 0.1463, + "step": 5010 + }, + { + "epoch": 1.22, + "learning_rate": 1.6281722848804836e-05, + "loss": 0.1548, + "step": 5012 + }, + { + "epoch": 1.22, + "learning_rate": 1.627865209363164e-05, + "loss": 0.1759, + "step": 5014 + }, + { + "epoch": 1.22, + "learning_rate": 1.62755803608243e-05, + "loss": 0.1406, + "step": 5016 + }, + { + "epoch": 1.22, + "learning_rate": 1.627250765086112e-05, + "loss": 0.1887, + "step": 5018 + }, + { + "epoch": 1.22, + "learning_rate": 1.6269433964220525e-05, + "loss": 0.1939, + "step": 5020 + }, + { + "epoch": 1.22, + "learning_rate": 1.6266359301381128e-05, + "loss": 0.1574, + "step": 5022 + }, + { + "epoch": 1.22, + "learning_rate": 1.6263283662821666e-05, + "loss": 0.1493, + "step": 5024 + }, + { + "epoch": 1.22, + "learning_rate": 1.626020704902105e-05, + "loss": 0.1571, + "step": 5026 + }, + { + "epoch": 1.23, + "learning_rate": 1.625712946045832e-05, + "loss": 0.1496, + "step": 5028 + }, + { + "epoch": 1.23, + "learning_rate": 1.6254050897612694e-05, + "loss": 0.1453, + "step": 5030 + }, + { + "epoch": 1.23, + "learning_rate": 1.6250971360963515e-05, + "loss": 0.1798, + "step": 5032 + }, + { + "epoch": 1.23, + "learning_rate": 1.62478908509903e-05, + "loss": 0.1395, + "step": 5034 + }, + { + "epoch": 1.23, + "learning_rate": 1.62448093681727e-05, + "loss": 0.167, + "step": 5036 + }, + { + "epoch": 1.23, + "learning_rate": 1.6241726912990534e-05, + "loss": 0.1424, + "step": 5038 + }, + { + "epoch": 1.23, + "learning_rate": 1.623864348592376e-05, + "loss": 0.1638, + "step": 5040 + }, + { + "epoch": 1.23, + "learning_rate": 1.6235559087452485e-05, + "loss": 0.1719, + "step": 5042 + }, + { + "epoch": 1.23, + "learning_rate": 1.6232473718056986e-05, + "loss": 0.1934, + "step": 5044 + }, + { + "epoch": 1.23, + "learning_rate": 1.6229387378217665e-05, + "loss": 0.186, + "step": 5046 + }, + { + "epoch": 1.23, + "learning_rate": 1.6226300068415096e-05, + "loss": 0.1315, + "step": 5048 + }, + { + "epoch": 1.23, + "learning_rate": 1.622321178913e-05, + "loss": 0.1764, + "step": 5050 + }, + { + "epoch": 1.23, + "learning_rate": 1.622012254084324e-05, + "loss": 0.1671, + "step": 5052 + }, + { + "epoch": 1.23, + "learning_rate": 1.6217032324035837e-05, + "loss": 0.1717, + "step": 5054 + }, + { + "epoch": 1.23, + "learning_rate": 1.6213941139188963e-05, + "loss": 0.1749, + "step": 5056 + }, + { + "epoch": 1.23, + "learning_rate": 1.6210848986783933e-05, + "loss": 0.149, + "step": 5058 + }, + { + "epoch": 1.23, + "learning_rate": 1.620775586730223e-05, + "loss": 0.1757, + "step": 5060 + }, + { + "epoch": 1.23, + "learning_rate": 1.6204661781225464e-05, + "loss": 0.1637, + "step": 5062 + }, + { + "epoch": 1.23, + "learning_rate": 1.6201566729035418e-05, + "loss": 0.1762, + "step": 5064 + }, + { + "epoch": 1.23, + "learning_rate": 1.6198470711214008e-05, + "loss": 0.14, + "step": 5066 + }, + { + "epoch": 1.23, + "learning_rate": 1.6195373728243312e-05, + "loss": 0.146, + "step": 5068 + }, + { + "epoch": 1.24, + "learning_rate": 1.6192275780605556e-05, + "loss": 0.1846, + "step": 5070 + }, + { + "epoch": 1.24, + "learning_rate": 1.6189176868783105e-05, + "loss": 0.1927, + "step": 5072 + }, + { + "epoch": 1.24, + "learning_rate": 1.6186076993258493e-05, + "loss": 0.1685, + "step": 5074 + }, + { + "epoch": 1.24, + "learning_rate": 1.618297615451439e-05, + "loss": 0.1297, + "step": 5076 + }, + { + "epoch": 1.24, + "learning_rate": 1.6179874353033618e-05, + "loss": 0.1731, + "step": 5078 + }, + { + "epoch": 1.24, + "learning_rate": 1.6176771589299156e-05, + "loss": 0.1626, + "step": 5080 + }, + { + "epoch": 1.24, + "learning_rate": 1.6173667863794124e-05, + "loss": 0.2089, + "step": 5082 + }, + { + "epoch": 1.24, + "learning_rate": 1.61705631770018e-05, + "loss": 0.1399, + "step": 5084 + }, + { + "epoch": 1.24, + "learning_rate": 1.6167457529405607e-05, + "loss": 0.1658, + "step": 5086 + }, + { + "epoch": 1.24, + "learning_rate": 1.6164350921489112e-05, + "loss": 0.1587, + "step": 5088 + }, + { + "epoch": 1.24, + "learning_rate": 1.6161243353736044e-05, + "loss": 0.1445, + "step": 5090 + }, + { + "epoch": 1.24, + "learning_rate": 1.615813482663027e-05, + "loss": 0.1835, + "step": 5092 + }, + { + "epoch": 1.24, + "learning_rate": 1.615502534065582e-05, + "loss": 0.1539, + "step": 5094 + }, + { + "epoch": 1.24, + "learning_rate": 1.6151914896296856e-05, + "loss": 0.157, + "step": 5096 + }, + { + "epoch": 1.24, + "learning_rate": 1.61488034940377e-05, + "loss": 0.1584, + "step": 5098 + }, + { + "epoch": 1.24, + "learning_rate": 1.6145691134362823e-05, + "loss": 0.1792, + "step": 5100 + }, + { + "epoch": 1.24, + "learning_rate": 1.6142577817756847e-05, + "loss": 0.1999, + "step": 5102 + }, + { + "epoch": 1.24, + "learning_rate": 1.613946354470453e-05, + "loss": 0.1591, + "step": 5104 + }, + { + "epoch": 1.24, + "learning_rate": 1.6136348315690794e-05, + "loss": 0.1248, + "step": 5106 + }, + { + "epoch": 1.24, + "learning_rate": 1.61332321312007e-05, + "loss": 0.1505, + "step": 5108 + }, + { + "epoch": 1.25, + "learning_rate": 1.6130114991719468e-05, + "loss": 0.1611, + "step": 5110 + }, + { + "epoch": 1.25, + "learning_rate": 1.6126996897732456e-05, + "loss": 0.1611, + "step": 5112 + }, + { + "epoch": 1.25, + "learning_rate": 1.6123877849725174e-05, + "loss": 0.1464, + "step": 5114 + }, + { + "epoch": 1.25, + "learning_rate": 1.6120757848183285e-05, + "loss": 0.1447, + "step": 5116 + }, + { + "epoch": 1.25, + "learning_rate": 1.6117636893592595e-05, + "loss": 0.1368, + "step": 5118 + }, + { + "epoch": 1.25, + "learning_rate": 1.611451498643906e-05, + "loss": 0.1685, + "step": 5120 + }, + { + "epoch": 1.25, + "learning_rate": 1.6111392127208788e-05, + "loss": 0.1594, + "step": 5122 + }, + { + "epoch": 1.25, + "learning_rate": 1.610826831638803e-05, + "loss": 0.1472, + "step": 5124 + }, + { + "epoch": 1.25, + "learning_rate": 1.610514355446319e-05, + "loss": 0.1431, + "step": 5126 + }, + { + "epoch": 1.25, + "learning_rate": 1.610201784192081e-05, + "loss": 0.139, + "step": 5128 + }, + { + "epoch": 1.25, + "learning_rate": 1.60988911792476e-05, + "loss": 0.1428, + "step": 5130 + }, + { + "epoch": 1.25, + "learning_rate": 1.6095763566930388e-05, + "loss": 0.1593, + "step": 5132 + }, + { + "epoch": 1.25, + "learning_rate": 1.6092635005456178e-05, + "loss": 0.1303, + "step": 5134 + }, + { + "epoch": 1.25, + "learning_rate": 1.608950549531211e-05, + "loss": 0.1821, + "step": 5136 + }, + { + "epoch": 1.25, + "learning_rate": 1.6086375036985477e-05, + "loss": 0.1179, + "step": 5138 + }, + { + "epoch": 1.25, + "learning_rate": 1.6083243630963705e-05, + "loss": 0.1674, + "step": 5140 + }, + { + "epoch": 1.25, + "learning_rate": 1.6080111277734384e-05, + "loss": 0.1468, + "step": 5142 + }, + { + "epoch": 1.25, + "learning_rate": 1.6076977977785245e-05, + "loss": 0.1852, + "step": 5144 + }, + { + "epoch": 1.25, + "learning_rate": 1.6073843731604172e-05, + "loss": 0.1528, + "step": 5146 + }, + { + "epoch": 1.25, + "learning_rate": 1.6070708539679175e-05, + "loss": 0.1183, + "step": 5148 + }, + { + "epoch": 1.25, + "learning_rate": 1.6067572402498447e-05, + "loss": 0.1815, + "step": 5150 + }, + { + "epoch": 1.26, + "learning_rate": 1.6064435320550292e-05, + "loss": 0.1327, + "step": 5152 + }, + { + "epoch": 1.26, + "learning_rate": 1.6061297294323187e-05, + "loss": 0.1351, + "step": 5154 + }, + { + "epoch": 1.26, + "learning_rate": 1.6058158324305747e-05, + "loss": 0.1657, + "step": 5156 + }, + { + "epoch": 1.26, + "learning_rate": 1.6055018410986732e-05, + "loss": 0.1488, + "step": 5158 + }, + { + "epoch": 1.26, + "learning_rate": 1.605187755485505e-05, + "loss": 0.1529, + "step": 5160 + }, + { + "epoch": 1.26, + "learning_rate": 1.6048735756399755e-05, + "loss": 0.1373, + "step": 5162 + }, + { + "epoch": 1.26, + "learning_rate": 1.6045593016110047e-05, + "loss": 0.1778, + "step": 5164 + }, + { + "epoch": 1.26, + "learning_rate": 1.604244933447528e-05, + "loss": 0.1656, + "step": 5166 + }, + { + "epoch": 1.26, + "learning_rate": 1.6039304711984947e-05, + "loss": 0.1701, + "step": 5168 + }, + { + "epoch": 1.26, + "learning_rate": 1.603615914912869e-05, + "loss": 0.1554, + "step": 5170 + }, + { + "epoch": 1.26, + "learning_rate": 1.6033012646396297e-05, + "loss": 0.1599, + "step": 5172 + }, + { + "epoch": 1.26, + "learning_rate": 1.6029865204277704e-05, + "loss": 0.1491, + "step": 5174 + }, + { + "epoch": 1.26, + "learning_rate": 1.6026716823262988e-05, + "loss": 0.1581, + "step": 5176 + }, + { + "epoch": 1.26, + "learning_rate": 1.602356750384238e-05, + "loss": 0.1128, + "step": 5178 + }, + { + "epoch": 1.26, + "learning_rate": 1.6020417246506252e-05, + "loss": 0.1509, + "step": 5180 + }, + { + "epoch": 1.26, + "learning_rate": 1.601726605174512e-05, + "loss": 0.1501, + "step": 5182 + }, + { + "epoch": 1.26, + "learning_rate": 1.6014113920049656e-05, + "loss": 0.1542, + "step": 5184 + }, + { + "epoch": 1.26, + "learning_rate": 1.6010960851910665e-05, + "loss": 0.1658, + "step": 5186 + }, + { + "epoch": 1.26, + "learning_rate": 1.60078068478191e-05, + "loss": 0.1627, + "step": 5188 + }, + { + "epoch": 1.26, + "learning_rate": 1.600465190826608e-05, + "loss": 0.1456, + "step": 5190 + }, + { + "epoch": 1.27, + "learning_rate": 1.6001496033742833e-05, + "loss": 0.1601, + "step": 5192 + }, + { + "epoch": 1.27, + "learning_rate": 1.5998339224740767e-05, + "loss": 0.1657, + "step": 5194 + }, + { + "epoch": 1.27, + "learning_rate": 1.5995181481751415e-05, + "loss": 0.1278, + "step": 5196 + }, + { + "epoch": 1.27, + "learning_rate": 1.5992022805266466e-05, + "loss": 0.1563, + "step": 5198 + }, + { + "epoch": 1.27, + "learning_rate": 1.5988863195777745e-05, + "loss": 0.1191, + "step": 5200 + }, + { + "epoch": 1.27, + "learning_rate": 1.5985702653777228e-05, + "loss": 0.1368, + "step": 5202 + }, + { + "epoch": 1.27, + "learning_rate": 1.5982541179757038e-05, + "loss": 0.1869, + "step": 5204 + }, + { + "epoch": 1.27, + "learning_rate": 1.5979378774209443e-05, + "loss": 0.149, + "step": 5206 + }, + { + "epoch": 1.27, + "learning_rate": 1.5976215437626845e-05, + "loss": 0.166, + "step": 5208 + }, + { + "epoch": 1.27, + "learning_rate": 1.5973051170501814e-05, + "loss": 0.1343, + "step": 5210 + }, + { + "epoch": 1.27, + "learning_rate": 1.5969885973327034e-05, + "loss": 0.1548, + "step": 5212 + }, + { + "epoch": 1.27, + "learning_rate": 1.596671984659536e-05, + "loss": 0.1496, + "step": 5214 + }, + { + "epoch": 1.27, + "learning_rate": 1.596355279079978e-05, + "loss": 0.2045, + "step": 5216 + }, + { + "epoch": 1.27, + "learning_rate": 1.596038480643343e-05, + "loss": 0.1596, + "step": 5218 + }, + { + "epoch": 1.27, + "learning_rate": 1.595721589398959e-05, + "loss": 0.1504, + "step": 5220 + }, + { + "epoch": 1.27, + "learning_rate": 1.5954046053961683e-05, + "loss": 0.1384, + "step": 5222 + }, + { + "epoch": 1.27, + "learning_rate": 1.5950875286843274e-05, + "loss": 0.1435, + "step": 5224 + }, + { + "epoch": 1.27, + "learning_rate": 1.5947703593128084e-05, + "loss": 0.1179, + "step": 5226 + }, + { + "epoch": 1.27, + "learning_rate": 1.594453097330996e-05, + "loss": 0.1327, + "step": 5228 + }, + { + "epoch": 1.27, + "learning_rate": 1.594135742788291e-05, + "loss": 0.1396, + "step": 5230 + }, + { + "epoch": 1.27, + "learning_rate": 1.5938182957341072e-05, + "loss": 0.1433, + "step": 5232 + }, + { + "epoch": 1.28, + "learning_rate": 1.5935007562178747e-05, + "loss": 0.1705, + "step": 5234 + }, + { + "epoch": 1.28, + "learning_rate": 1.5931831242890357e-05, + "loss": 0.1546, + "step": 5236 + }, + { + "epoch": 1.28, + "learning_rate": 1.5928653999970486e-05, + "loss": 0.141, + "step": 5238 + }, + { + "epoch": 1.28, + "learning_rate": 1.592547583391385e-05, + "loss": 0.1632, + "step": 5240 + }, + { + "epoch": 1.28, + "learning_rate": 1.592229674521532e-05, + "loss": 0.1296, + "step": 5242 + }, + { + "epoch": 1.28, + "learning_rate": 1.59191167343699e-05, + "loss": 0.1326, + "step": 5244 + }, + { + "epoch": 1.28, + "learning_rate": 1.5915935801872742e-05, + "loss": 0.1522, + "step": 5246 + }, + { + "epoch": 1.28, + "learning_rate": 1.5912753948219143e-05, + "loss": 0.159, + "step": 5248 + }, + { + "epoch": 1.28, + "learning_rate": 1.5909571173904543e-05, + "loss": 0.1287, + "step": 5250 + }, + { + "epoch": 1.28, + "learning_rate": 1.5906387479424523e-05, + "loss": 0.1781, + "step": 5252 + }, + { + "epoch": 1.28, + "learning_rate": 1.5903202865274807e-05, + "loss": 0.1649, + "step": 5254 + }, + { + "epoch": 1.28, + "learning_rate": 1.5900017331951267e-05, + "loss": 0.129, + "step": 5256 + }, + { + "epoch": 1.28, + "learning_rate": 1.589683087994991e-05, + "loss": 0.1417, + "step": 5258 + }, + { + "epoch": 1.28, + "learning_rate": 1.5893643509766895e-05, + "loss": 0.141, + "step": 5260 + }, + { + "epoch": 1.28, + "learning_rate": 1.589045522189852e-05, + "loss": 0.1519, + "step": 5262 + }, + { + "epoch": 1.28, + "learning_rate": 1.588726601684122e-05, + "loss": 0.1618, + "step": 5264 + }, + { + "epoch": 1.28, + "learning_rate": 1.588407589509159e-05, + "loss": 0.168, + "step": 5266 + }, + { + "epoch": 1.28, + "learning_rate": 1.5880884857146343e-05, + "loss": 0.1387, + "step": 5268 + }, + { + "epoch": 1.28, + "learning_rate": 1.587769290350236e-05, + "loss": 0.1507, + "step": 5270 + }, + { + "epoch": 1.28, + "learning_rate": 1.5874500034656646e-05, + "loss": 0.1274, + "step": 5272 + }, + { + "epoch": 1.29, + "learning_rate": 1.5871306251106354e-05, + "loss": 0.168, + "step": 5274 + }, + { + "epoch": 1.29, + "learning_rate": 1.5868111553348787e-05, + "loss": 0.1014, + "step": 5276 + }, + { + "epoch": 1.29, + "learning_rate": 1.5864915941881378e-05, + "loss": 0.1572, + "step": 5278 + }, + { + "epoch": 1.29, + "learning_rate": 1.5861719417201707e-05, + "loss": 0.1601, + "step": 5280 + }, + { + "epoch": 1.29, + "learning_rate": 1.5858521979807508e-05, + "loss": 0.1349, + "step": 5282 + }, + { + "epoch": 1.29, + "learning_rate": 1.585532363019663e-05, + "loss": 0.1456, + "step": 5284 + }, + { + "epoch": 1.29, + "learning_rate": 1.5852124368867093e-05, + "loss": 0.1686, + "step": 5286 + }, + { + "epoch": 1.29, + "learning_rate": 1.5848924196317042e-05, + "loss": 0.1299, + "step": 5288 + }, + { + "epoch": 1.29, + "learning_rate": 1.5845723113044773e-05, + "loss": 0.1532, + "step": 5290 + }, + { + "epoch": 1.29, + "learning_rate": 1.5842521119548706e-05, + "loss": 0.16, + "step": 5292 + }, + { + "epoch": 1.29, + "learning_rate": 1.583931821632743e-05, + "loss": 0.1419, + "step": 5294 + }, + { + "epoch": 1.29, + "learning_rate": 1.5836114403879655e-05, + "loss": 0.1427, + "step": 5296 + }, + { + "epoch": 1.29, + "learning_rate": 1.5832909682704246e-05, + "loss": 0.1639, + "step": 5298 + }, + { + "epoch": 1.29, + "learning_rate": 1.582970405330019e-05, + "loss": 0.1237, + "step": 5300 + }, + { + "epoch": 1.29, + "learning_rate": 1.5826497516166642e-05, + "loss": 0.1219, + "step": 5302 + }, + { + "epoch": 1.29, + "learning_rate": 1.5823290071802872e-05, + "loss": 0.1622, + "step": 5304 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820081720708313e-05, + "loss": 0.1287, + "step": 5306 + }, + { + "epoch": 1.29, + "learning_rate": 1.581687246338252e-05, + "loss": 0.1344, + "step": 5308 + }, + { + "epoch": 1.29, + "learning_rate": 1.581366230032521e-05, + "loss": 0.1493, + "step": 5310 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810451232036226e-05, + "loss": 0.1333, + "step": 5312 + }, + { + "epoch": 1.29, + "learning_rate": 1.5807239259015553e-05, + "loss": 0.1301, + "step": 5314 + }, + { + "epoch": 1.3, + "learning_rate": 1.5804026381763324e-05, + "loss": 0.1138, + "step": 5316 + }, + { + "epoch": 1.3, + "learning_rate": 1.58008126007798e-05, + "loss": 0.112, + "step": 5318 + }, + { + "epoch": 1.3, + "learning_rate": 1.5797597916565402e-05, + "loss": 0.1443, + "step": 5320 + }, + { + "epoch": 1.3, + "learning_rate": 1.5794382329620672e-05, + "loss": 0.1454, + "step": 5322 + }, + { + "epoch": 1.3, + "learning_rate": 1.579116584044631e-05, + "loss": 0.1356, + "step": 5324 + }, + { + "epoch": 1.3, + "learning_rate": 1.578794844954314e-05, + "loss": 0.119, + "step": 5326 + }, + { + "epoch": 1.3, + "learning_rate": 1.578473015741214e-05, + "loss": 0.1512, + "step": 5328 + }, + { + "epoch": 1.3, + "learning_rate": 1.578151096455442e-05, + "loss": 0.132, + "step": 5330 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778290871471234e-05, + "loss": 0.146, + "step": 5332 + }, + { + "epoch": 1.3, + "learning_rate": 1.577506987866398e-05, + "loss": 0.1439, + "step": 5334 + }, + { + "epoch": 1.3, + "learning_rate": 1.5771847986634178e-05, + "loss": 0.1358, + "step": 5336 + }, + { + "epoch": 1.3, + "learning_rate": 1.576862519588351e-05, + "loss": 0.1098, + "step": 5338 + }, + { + "epoch": 1.3, + "learning_rate": 1.576540150691379e-05, + "loss": 0.123, + "step": 5340 + }, + { + "epoch": 1.3, + "learning_rate": 1.576217692022697e-05, + "loss": 0.1648, + "step": 5342 + }, + { + "epoch": 1.3, + "learning_rate": 1.5758951436325142e-05, + "loss": 0.1709, + "step": 5344 + }, + { + "epoch": 1.3, + "learning_rate": 1.5755725055710537e-05, + "loss": 0.149, + "step": 5346 + }, + { + "epoch": 1.3, + "learning_rate": 1.5752497778885532e-05, + "loss": 0.1494, + "step": 5348 + }, + { + "epoch": 1.3, + "learning_rate": 1.5749269606352632e-05, + "loss": 0.1305, + "step": 5350 + }, + { + "epoch": 1.3, + "learning_rate": 1.5746040538614493e-05, + "loss": 0.1163, + "step": 5352 + }, + { + "epoch": 1.3, + "learning_rate": 1.5742810576173907e-05, + "loss": 0.1354, + "step": 5354 + }, + { + "epoch": 1.31, + "learning_rate": 1.5739579719533797e-05, + "loss": 0.1426, + "step": 5356 + }, + { + "epoch": 1.31, + "learning_rate": 1.5736347969197235e-05, + "loss": 0.1567, + "step": 5358 + }, + { + "epoch": 1.31, + "learning_rate": 1.5733115325667435e-05, + "loss": 0.1307, + "step": 5360 + }, + { + "epoch": 1.31, + "learning_rate": 1.572988178944774e-05, + "loss": 0.1419, + "step": 5362 + }, + { + "epoch": 1.31, + "learning_rate": 1.5726647361041632e-05, + "loss": 0.1363, + "step": 5364 + }, + { + "epoch": 1.31, + "learning_rate": 1.572341204095274e-05, + "loss": 0.1426, + "step": 5366 + }, + { + "epoch": 1.31, + "learning_rate": 1.572017582968483e-05, + "loss": 0.1329, + "step": 5368 + }, + { + "epoch": 1.31, + "learning_rate": 1.5716938727741803e-05, + "loss": 0.1406, + "step": 5370 + }, + { + "epoch": 1.31, + "learning_rate": 1.5713700735627703e-05, + "loss": 0.1342, + "step": 5372 + }, + { + "epoch": 1.31, + "learning_rate": 1.5710461853846702e-05, + "loss": 0.172, + "step": 5374 + }, + { + "epoch": 1.31, + "learning_rate": 1.570722208290313e-05, + "loss": 0.1423, + "step": 5376 + }, + { + "epoch": 1.31, + "learning_rate": 1.5703981423301438e-05, + "loss": 0.1524, + "step": 5378 + }, + { + "epoch": 1.31, + "learning_rate": 1.570073987554622e-05, + "loss": 0.1668, + "step": 5380 + }, + { + "epoch": 1.31, + "learning_rate": 1.5697497440142214e-05, + "loss": 0.1692, + "step": 5382 + }, + { + "epoch": 1.31, + "learning_rate": 1.5694254117594286e-05, + "loss": 0.1663, + "step": 5384 + }, + { + "epoch": 1.31, + "learning_rate": 1.5691009908407454e-05, + "loss": 0.11, + "step": 5386 + }, + { + "epoch": 1.31, + "learning_rate": 1.5687764813086856e-05, + "loss": 0.1256, + "step": 5388 + }, + { + "epoch": 1.31, + "learning_rate": 1.5684518832137793e-05, + "loss": 0.0977, + "step": 5390 + }, + { + "epoch": 1.31, + "learning_rate": 1.5681271966065673e-05, + "loss": 0.1296, + "step": 5392 + }, + { + "epoch": 1.31, + "learning_rate": 1.5678024215376067e-05, + "loss": 0.1265, + "step": 5394 + }, + { + "epoch": 1.31, + "learning_rate": 1.5674775580574674e-05, + "loss": 0.1173, + "step": 5396 + }, + { + "epoch": 1.32, + "learning_rate": 1.567152606216733e-05, + "loss": 0.1524, + "step": 5398 + }, + { + "epoch": 1.32, + "learning_rate": 1.5668275660660012e-05, + "loss": 0.1154, + "step": 5400 + }, + { + "epoch": 1.32, + "learning_rate": 1.5665024376558826e-05, + "loss": 0.1707, + "step": 5402 + }, + { + "epoch": 1.32, + "learning_rate": 1.5661772210370028e-05, + "loss": 0.1331, + "step": 5404 + }, + { + "epoch": 1.32, + "learning_rate": 1.5658519162600003e-05, + "loss": 0.1561, + "step": 5406 + }, + { + "epoch": 1.32, + "learning_rate": 1.5655265233755276e-05, + "loss": 0.1632, + "step": 5408 + }, + { + "epoch": 1.32, + "learning_rate": 1.565201042434251e-05, + "loss": 0.1741, + "step": 5410 + }, + { + "epoch": 1.32, + "learning_rate": 1.5648754734868498e-05, + "loss": 0.1444, + "step": 5412 + }, + { + "epoch": 1.32, + "learning_rate": 1.5645498165840183e-05, + "loss": 0.1654, + "step": 5414 + }, + { + "epoch": 1.32, + "learning_rate": 1.5642240717764634e-05, + "loss": 0.1294, + "step": 5416 + }, + { + "epoch": 1.32, + "learning_rate": 1.563898239114906e-05, + "loss": 0.1418, + "step": 5418 + }, + { + "epoch": 1.32, + "learning_rate": 1.563572318650081e-05, + "loss": 0.158, + "step": 5420 + }, + { + "epoch": 1.32, + "learning_rate": 1.5632463104327363e-05, + "loss": 0.1268, + "step": 5422 + }, + { + "epoch": 1.32, + "learning_rate": 1.5629202145136343e-05, + "loss": 0.1379, + "step": 5424 + }, + { + "epoch": 1.32, + "learning_rate": 1.5625940309435503e-05, + "loss": 0.1185, + "step": 5426 + }, + { + "epoch": 1.32, + "learning_rate": 1.5622677597732735e-05, + "loss": 0.1172, + "step": 5428 + }, + { + "epoch": 1.32, + "learning_rate": 1.5619414010536077e-05, + "loss": 0.1336, + "step": 5430 + }, + { + "epoch": 1.32, + "learning_rate": 1.5616149548353682e-05, + "loss": 0.1179, + "step": 5432 + }, + { + "epoch": 1.32, + "learning_rate": 1.5612884211693863e-05, + "loss": 0.1264, + "step": 5434 + }, + { + "epoch": 1.32, + "learning_rate": 1.560961800106505e-05, + "loss": 0.1161, + "step": 5436 + }, + { + "epoch": 1.33, + "learning_rate": 1.5606350916975815e-05, + "loss": 0.1395, + "step": 5438 + }, + { + "epoch": 1.33, + "learning_rate": 1.5603082959934877e-05, + "loss": 0.1206, + "step": 5440 + }, + { + "epoch": 1.33, + "learning_rate": 1.5599814130451078e-05, + "loss": 0.1248, + "step": 5442 + }, + { + "epoch": 1.33, + "learning_rate": 1.5596544429033395e-05, + "loss": 0.1335, + "step": 5444 + }, + { + "epoch": 1.33, + "learning_rate": 1.5593273856190957e-05, + "loss": 0.1212, + "step": 5446 + }, + { + "epoch": 1.33, + "learning_rate": 1.5590002412433004e-05, + "loss": 0.145, + "step": 5448 + }, + { + "epoch": 1.33, + "learning_rate": 1.558673009826893e-05, + "loss": 0.1463, + "step": 5450 + }, + { + "epoch": 1.33, + "learning_rate": 1.558345691420826e-05, + "loss": 0.15, + "step": 5452 + }, + { + "epoch": 1.33, + "learning_rate": 1.558018286076066e-05, + "loss": 0.11, + "step": 5454 + }, + { + "epoch": 1.33, + "learning_rate": 1.557690793843591e-05, + "loss": 0.176, + "step": 5456 + }, + { + "epoch": 1.33, + "learning_rate": 1.5573632147743953e-05, + "loss": 0.1572, + "step": 5458 + }, + { + "epoch": 1.33, + "learning_rate": 1.5570355489194852e-05, + "loss": 0.1118, + "step": 5460 + }, + { + "epoch": 1.33, + "learning_rate": 1.5567077963298806e-05, + "loss": 0.1183, + "step": 5462 + }, + { + "epoch": 1.33, + "learning_rate": 1.5563799570566152e-05, + "loss": 0.1191, + "step": 5464 + }, + { + "epoch": 1.33, + "learning_rate": 1.556052031150736e-05, + "loss": 0.1051, + "step": 5466 + }, + { + "epoch": 1.33, + "learning_rate": 1.5557240186633037e-05, + "loss": 0.1357, + "step": 5468 + }, + { + "epoch": 1.33, + "learning_rate": 1.5553959196453922e-05, + "loss": 0.1501, + "step": 5470 + }, + { + "epoch": 1.33, + "learning_rate": 1.555067734148089e-05, + "loss": 0.1298, + "step": 5472 + }, + { + "epoch": 1.33, + "learning_rate": 1.554739462222495e-05, + "loss": 0.1183, + "step": 5474 + }, + { + "epoch": 1.33, + "learning_rate": 1.5544111039197255e-05, + "loss": 0.1113, + "step": 5476 + }, + { + "epoch": 1.33, + "learning_rate": 1.554082659290907e-05, + "loss": 0.1249, + "step": 5478 + }, + { + "epoch": 1.34, + "learning_rate": 1.553754128387182e-05, + "loss": 0.1607, + "step": 5480 + }, + { + "epoch": 1.34, + "learning_rate": 1.5534255112597047e-05, + "loss": 0.1129, + "step": 5482 + }, + { + "epoch": 1.34, + "learning_rate": 1.5530968079596438e-05, + "loss": 0.1558, + "step": 5484 + }, + { + "epoch": 1.34, + "learning_rate": 1.5527680185381803e-05, + "loss": 0.1407, + "step": 5486 + }, + { + "epoch": 1.34, + "learning_rate": 1.5524391430465094e-05, + "loss": 0.1204, + "step": 5488 + }, + { + "epoch": 1.34, + "learning_rate": 1.5521101815358396e-05, + "loss": 0.1346, + "step": 5490 + }, + { + "epoch": 1.34, + "learning_rate": 1.5517811340573926e-05, + "loss": 0.1014, + "step": 5492 + }, + { + "epoch": 1.34, + "learning_rate": 1.5514520006624038e-05, + "loss": 0.1221, + "step": 5494 + }, + { + "epoch": 1.34, + "learning_rate": 1.5511227814021223e-05, + "loss": 0.1233, + "step": 5496 + }, + { + "epoch": 1.34, + "learning_rate": 1.550793476327809e-05, + "loss": 0.1165, + "step": 5498 + }, + { + "epoch": 1.34, + "learning_rate": 1.55046408549074e-05, + "loss": 0.1417, + "step": 5500 + }, + { + "epoch": 1.34, + "learning_rate": 1.550134608942203e-05, + "loss": 0.1238, + "step": 5502 + }, + { + "epoch": 1.34, + "learning_rate": 1.5498050467335012e-05, + "loss": 0.1351, + "step": 5504 + }, + { + "epoch": 1.34, + "learning_rate": 1.5494753989159493e-05, + "loss": 0.1548, + "step": 5506 + }, + { + "epoch": 1.34, + "learning_rate": 1.5491456655408767e-05, + "loss": 0.1529, + "step": 5508 + }, + { + "epoch": 1.34, + "learning_rate": 1.548815846659624e-05, + "loss": 0.1242, + "step": 5510 + }, + { + "epoch": 1.34, + "learning_rate": 1.548485942323548e-05, + "loss": 0.1164, + "step": 5512 + }, + { + "epoch": 1.34, + "learning_rate": 1.5481559525840167e-05, + "loss": 0.1456, + "step": 5514 + }, + { + "epoch": 1.34, + "learning_rate": 1.5478258774924117e-05, + "loss": 0.1564, + "step": 5516 + }, + { + "epoch": 1.34, + "learning_rate": 1.5474957171001287e-05, + "loss": 0.14, + "step": 5518 + }, + { + "epoch": 1.35, + "learning_rate": 1.547165471458576e-05, + "loss": 0.1094, + "step": 5520 + }, + { + "epoch": 1.35, + "learning_rate": 1.546835140619175e-05, + "loss": 0.1484, + "step": 5522 + }, + { + "epoch": 1.35, + "learning_rate": 1.546504724633362e-05, + "loss": 0.1461, + "step": 5524 + }, + { + "epoch": 1.35, + "learning_rate": 1.546174223552584e-05, + "loss": 0.1358, + "step": 5526 + }, + { + "epoch": 1.35, + "learning_rate": 1.545843637428303e-05, + "loss": 0.1604, + "step": 5528 + }, + { + "epoch": 1.35, + "learning_rate": 1.5455129663119936e-05, + "loss": 0.139, + "step": 5530 + }, + { + "epoch": 1.35, + "learning_rate": 1.545182210255144e-05, + "loss": 0.1456, + "step": 5532 + }, + { + "epoch": 1.35, + "learning_rate": 1.5448513693092558e-05, + "loss": 0.1082, + "step": 5534 + }, + { + "epoch": 1.35, + "learning_rate": 1.5445204435258427e-05, + "loss": 0.1419, + "step": 5536 + }, + { + "epoch": 1.35, + "learning_rate": 1.544189432956433e-05, + "loss": 0.1547, + "step": 5538 + }, + { + "epoch": 1.35, + "learning_rate": 1.5438583376525676e-05, + "loss": 0.1146, + "step": 5540 + }, + { + "epoch": 1.35, + "learning_rate": 1.5435271576658e-05, + "loss": 0.1392, + "step": 5542 + }, + { + "epoch": 1.35, + "learning_rate": 1.543195893047698e-05, + "loss": 0.1439, + "step": 5544 + }, + { + "epoch": 1.35, + "learning_rate": 1.5428645438498416e-05, + "loss": 0.1478, + "step": 5546 + }, + { + "epoch": 1.35, + "learning_rate": 1.5425331101238246e-05, + "loss": 0.1426, + "step": 5548 + }, + { + "epoch": 1.35, + "learning_rate": 1.5422015919212538e-05, + "loss": 0.1713, + "step": 5550 + }, + { + "epoch": 1.35, + "learning_rate": 1.5418699892937494e-05, + "loss": 0.1137, + "step": 5552 + }, + { + "epoch": 1.35, + "learning_rate": 1.541538302292944e-05, + "loss": 0.1055, + "step": 5554 + }, + { + "epoch": 1.35, + "learning_rate": 1.541206530970484e-05, + "loss": 0.1285, + "step": 5556 + }, + { + "epoch": 1.35, + "learning_rate": 1.5408746753780288e-05, + "loss": 0.1351, + "step": 5558 + }, + { + "epoch": 1.35, + "learning_rate": 1.5405427355672506e-05, + "loss": 0.1664, + "step": 5560 + }, + { + "epoch": 1.36, + "learning_rate": 1.540210711589835e-05, + "loss": 0.1539, + "step": 5562 + }, + { + "epoch": 1.36, + "learning_rate": 1.539878603497481e-05, + "loss": 0.1294, + "step": 5564 + }, + { + "epoch": 1.36, + "learning_rate": 1.5395464113419003e-05, + "loss": 0.1472, + "step": 5566 + }, + { + "epoch": 1.36, + "learning_rate": 1.5392141351748175e-05, + "loss": 0.1132, + "step": 5568 + }, + { + "epoch": 1.36, + "learning_rate": 1.5388817750479706e-05, + "loss": 0.1146, + "step": 5570 + }, + { + "epoch": 1.36, + "learning_rate": 1.5385493310131106e-05, + "loss": 0.1115, + "step": 5572 + }, + { + "epoch": 1.36, + "learning_rate": 1.538216803122002e-05, + "loss": 0.1531, + "step": 5574 + }, + { + "epoch": 1.36, + "learning_rate": 1.5378841914264217e-05, + "loss": 0.1595, + "step": 5576 + }, + { + "epoch": 1.36, + "learning_rate": 1.537551495978159e-05, + "loss": 0.0974, + "step": 5578 + }, + { + "epoch": 1.36, + "learning_rate": 1.5372187168290186e-05, + "loss": 0.1615, + "step": 5580 + }, + { + "epoch": 1.36, + "learning_rate": 1.5368858540308158e-05, + "loss": 0.1167, + "step": 5582 + }, + { + "epoch": 1.36, + "learning_rate": 1.5365529076353803e-05, + "loss": 0.1464, + "step": 5584 + }, + { + "epoch": 1.36, + "learning_rate": 1.536219877694554e-05, + "loss": 0.1424, + "step": 5586 + }, + { + "epoch": 1.36, + "learning_rate": 1.5358867642601928e-05, + "loss": 0.1117, + "step": 5588 + }, + { + "epoch": 1.36, + "learning_rate": 1.5355535673841643e-05, + "loss": 0.1558, + "step": 5590 + }, + { + "epoch": 1.36, + "learning_rate": 1.5352202871183504e-05, + "loss": 0.1282, + "step": 5592 + }, + { + "epoch": 1.36, + "learning_rate": 1.534886923514645e-05, + "loss": 0.1326, + "step": 5594 + }, + { + "epoch": 1.36, + "learning_rate": 1.534553476624956e-05, + "loss": 0.1486, + "step": 5596 + }, + { + "epoch": 1.36, + "learning_rate": 1.5342199465012026e-05, + "loss": 0.1142, + "step": 5598 + }, + { + "epoch": 1.36, + "learning_rate": 1.5338863331953183e-05, + "loss": 0.117, + "step": 5600 + }, + { + "epoch": 1.37, + "learning_rate": 1.5335526367592503e-05, + "loss": 0.1362, + "step": 5602 + }, + { + "epoch": 1.37, + "learning_rate": 1.533218857244956e-05, + "loss": 0.1052, + "step": 5604 + }, + { + "epoch": 1.37, + "learning_rate": 1.5328849947044088e-05, + "loss": 0.0941, + "step": 5606 + }, + { + "epoch": 1.37, + "learning_rate": 1.5325510491895934e-05, + "loss": 0.1076, + "step": 5608 + }, + { + "epoch": 1.37, + "learning_rate": 1.532217020752507e-05, + "loss": 0.1427, + "step": 5610 + }, + { + "epoch": 1.37, + "learning_rate": 1.531882909445161e-05, + "loss": 0.1112, + "step": 5612 + }, + { + "epoch": 1.37, + "learning_rate": 1.531548715319579e-05, + "loss": 0.1277, + "step": 5614 + }, + { + "epoch": 1.37, + "learning_rate": 1.5312144384277966e-05, + "loss": 0.0868, + "step": 5616 + }, + { + "epoch": 1.37, + "learning_rate": 1.530880078821865e-05, + "loss": 0.1269, + "step": 5618 + }, + { + "epoch": 1.37, + "learning_rate": 1.5305456365538455e-05, + "loss": 0.1594, + "step": 5620 + }, + { + "epoch": 1.37, + "learning_rate": 1.5302111116758132e-05, + "loss": 0.1352, + "step": 5622 + }, + { + "epoch": 1.37, + "learning_rate": 1.5298765042398567e-05, + "loss": 0.1203, + "step": 5624 + }, + { + "epoch": 1.37, + "learning_rate": 1.5295418142980766e-05, + "loss": 0.1415, + "step": 5626 + }, + { + "epoch": 1.37, + "learning_rate": 1.5292070419025868e-05, + "loss": 0.1195, + "step": 5628 + }, + { + "epoch": 1.37, + "learning_rate": 1.528872187105514e-05, + "loss": 0.1074, + "step": 5630 + }, + { + "epoch": 1.37, + "learning_rate": 1.5285372499589978e-05, + "loss": 0.1317, + "step": 5632 + }, + { + "epoch": 1.37, + "learning_rate": 1.5282022305151897e-05, + "loss": 0.1093, + "step": 5634 + }, + { + "epoch": 1.37, + "learning_rate": 1.5278671288262558e-05, + "loss": 0.1401, + "step": 5636 + }, + { + "epoch": 1.37, + "learning_rate": 1.5275319449443733e-05, + "loss": 0.1387, + "step": 5638 + }, + { + "epoch": 1.37, + "learning_rate": 1.527196678921733e-05, + "loss": 0.0884, + "step": 5640 + }, + { + "epoch": 1.37, + "learning_rate": 1.526861330810539e-05, + "loss": 0.1308, + "step": 5642 + }, + { + "epoch": 1.38, + "learning_rate": 1.5265259006630065e-05, + "loss": 0.0974, + "step": 5644 + }, + { + "epoch": 1.38, + "learning_rate": 1.526190388531365e-05, + "loss": 0.1652, + "step": 5646 + }, + { + "epoch": 1.38, + "learning_rate": 1.525854794467857e-05, + "loss": 0.1436, + "step": 5648 + }, + { + "epoch": 1.38, + "learning_rate": 1.5255191185247362e-05, + "loss": 0.1153, + "step": 5650 + }, + { + "epoch": 1.38, + "learning_rate": 1.5251833607542703e-05, + "loss": 0.1627, + "step": 5652 + }, + { + "epoch": 1.38, + "learning_rate": 1.524847521208739e-05, + "loss": 0.1208, + "step": 5654 + }, + { + "epoch": 1.38, + "learning_rate": 1.5245115999404356e-05, + "loss": 0.1362, + "step": 5656 + }, + { + "epoch": 1.38, + "learning_rate": 1.5241755970016651e-05, + "loss": 0.1521, + "step": 5658 + }, + { + "epoch": 1.38, + "learning_rate": 1.523839512444746e-05, + "loss": 0.119, + "step": 5660 + }, + { + "epoch": 1.38, + "learning_rate": 1.5235033463220092e-05, + "loss": 0.1285, + "step": 5662 + }, + { + "epoch": 1.38, + "learning_rate": 1.5231670986857987e-05, + "loss": 0.0939, + "step": 5664 + }, + { + "epoch": 1.38, + "learning_rate": 1.5228307695884699e-05, + "loss": 0.1077, + "step": 5666 + }, + { + "epoch": 1.38, + "learning_rate": 1.5224943590823926e-05, + "loss": 0.1158, + "step": 5668 + }, + { + "epoch": 1.38, + "learning_rate": 1.5221578672199484e-05, + "loss": 0.1278, + "step": 5670 + }, + { + "epoch": 1.38, + "learning_rate": 1.5218212940535312e-05, + "loss": 0.1345, + "step": 5672 + }, + { + "epoch": 1.38, + "learning_rate": 1.5214846396355484e-05, + "loss": 0.1321, + "step": 5674 + }, + { + "epoch": 1.38, + "learning_rate": 1.5211479040184198e-05, + "loss": 0.1213, + "step": 5676 + }, + { + "epoch": 1.38, + "learning_rate": 1.5208110872545776e-05, + "loss": 0.1339, + "step": 5678 + }, + { + "epoch": 1.38, + "learning_rate": 1.5204741893964663e-05, + "loss": 0.1485, + "step": 5680 + }, + { + "epoch": 1.38, + "learning_rate": 1.5201372104965439e-05, + "loss": 0.1245, + "step": 5682 + }, + { + "epoch": 1.38, + "learning_rate": 1.5198001506072806e-05, + "loss": 0.1426, + "step": 5684 + }, + { + "epoch": 1.39, + "learning_rate": 1.5194630097811591e-05, + "loss": 0.1319, + "step": 5686 + }, + { + "epoch": 1.39, + "learning_rate": 1.519125788070675e-05, + "loss": 0.1286, + "step": 5688 + }, + { + "epoch": 1.39, + "learning_rate": 1.5187884855283358e-05, + "loss": 0.1152, + "step": 5690 + }, + { + "epoch": 1.39, + "learning_rate": 1.518451102206663e-05, + "loss": 0.1348, + "step": 5692 + }, + { + "epoch": 1.39, + "learning_rate": 1.5181136381581884e-05, + "loss": 0.1375, + "step": 5694 + }, + { + "epoch": 1.39, + "learning_rate": 1.5177760934354591e-05, + "loss": 0.1217, + "step": 5696 + }, + { + "epoch": 1.39, + "learning_rate": 1.5174384680910323e-05, + "loss": 0.1631, + "step": 5698 + }, + { + "epoch": 1.39, + "learning_rate": 1.5171007621774798e-05, + "loss": 0.127, + "step": 5700 + }, + { + "epoch": 1.39, + "learning_rate": 1.5167629757473843e-05, + "loss": 0.1128, + "step": 5702 + }, + { + "epoch": 1.39, + "learning_rate": 1.516425108853342e-05, + "loss": 0.0953, + "step": 5704 + }, + { + "epoch": 1.39, + "learning_rate": 1.5160871615479613e-05, + "loss": 0.1366, + "step": 5706 + }, + { + "epoch": 1.39, + "learning_rate": 1.5157491338838633e-05, + "loss": 0.1395, + "step": 5708 + }, + { + "epoch": 1.39, + "learning_rate": 1.5154110259136813e-05, + "loss": 0.1358, + "step": 5710 + }, + { + "epoch": 1.39, + "learning_rate": 1.5150728376900614e-05, + "loss": 0.1308, + "step": 5712 + }, + { + "epoch": 1.39, + "learning_rate": 1.5147345692656622e-05, + "loss": 0.1258, + "step": 5714 + }, + { + "epoch": 1.39, + "learning_rate": 1.5143962206931545e-05, + "loss": 0.1314, + "step": 5716 + }, + { + "epoch": 1.39, + "learning_rate": 1.5140577920252217e-05, + "loss": 0.1048, + "step": 5718 + }, + { + "epoch": 1.39, + "learning_rate": 1.5137192833145604e-05, + "loss": 0.1118, + "step": 5720 + }, + { + "epoch": 1.39, + "learning_rate": 1.5133806946138779e-05, + "loss": 0.0972, + "step": 5722 + }, + { + "epoch": 1.39, + "learning_rate": 1.5130420259758958e-05, + "loss": 0.1365, + "step": 5724 + }, + { + "epoch": 1.4, + "learning_rate": 1.5127032774533468e-05, + "loss": 0.1101, + "step": 5726 + }, + { + "epoch": 1.4, + "learning_rate": 1.5123644490989777e-05, + "loss": 0.1262, + "step": 5728 + }, + { + "epoch": 1.4, + "learning_rate": 1.5120255409655456e-05, + "loss": 0.1442, + "step": 5730 + }, + { + "epoch": 1.4, + "learning_rate": 1.5116865531058216e-05, + "loss": 0.1052, + "step": 5732 + }, + { + "epoch": 1.4, + "learning_rate": 1.5113474855725886e-05, + "loss": 0.1113, + "step": 5734 + }, + { + "epoch": 1.4, + "learning_rate": 1.511008338418642e-05, + "loss": 0.1176, + "step": 5736 + }, + { + "epoch": 1.4, + "learning_rate": 1.5106691116967895e-05, + "loss": 0.1197, + "step": 5738 + }, + { + "epoch": 1.4, + "learning_rate": 1.5103298054598515e-05, + "loss": 0.1444, + "step": 5740 + }, + { + "epoch": 1.4, + "learning_rate": 1.5099904197606605e-05, + "loss": 0.0986, + "step": 5742 + }, + { + "epoch": 1.4, + "learning_rate": 1.5096509546520614e-05, + "loss": 0.1298, + "step": 5744 + }, + { + "epoch": 1.4, + "learning_rate": 1.5093114101869115e-05, + "loss": 0.1464, + "step": 5746 + }, + { + "epoch": 1.4, + "learning_rate": 1.5089717864180804e-05, + "loss": 0.0886, + "step": 5748 + }, + { + "epoch": 1.4, + "learning_rate": 1.5086320833984504e-05, + "loss": 0.1282, + "step": 5750 + }, + { + "epoch": 1.4, + "learning_rate": 1.508292301180916e-05, + "loss": 0.1222, + "step": 5752 + }, + { + "epoch": 1.4, + "learning_rate": 1.5079524398183826e-05, + "loss": 0.1261, + "step": 5754 + }, + { + "epoch": 1.4, + "learning_rate": 1.5076124993637713e-05, + "loss": 0.126, + "step": 5756 + }, + { + "epoch": 1.4, + "learning_rate": 1.5072724798700119e-05, + "loss": 0.078, + "step": 5758 + }, + { + "epoch": 1.4, + "learning_rate": 1.506932381390048e-05, + "loss": 0.1408, + "step": 5760 + }, + { + "epoch": 1.4, + "learning_rate": 1.5065922039768364e-05, + "loss": 0.1541, + "step": 5762 + }, + { + "epoch": 1.4, + "learning_rate": 1.5062519476833451e-05, + "loss": 0.1278, + "step": 5764 + }, + { + "epoch": 1.4, + "learning_rate": 1.5059116125625538e-05, + "loss": 0.1084, + "step": 5766 + }, + { + "epoch": 1.41, + "learning_rate": 1.5055711986674564e-05, + "loss": 0.0804, + "step": 5768 + }, + { + "epoch": 1.41, + "learning_rate": 1.5052307060510574e-05, + "loss": 0.0902, + "step": 5770 + }, + { + "epoch": 1.41, + "learning_rate": 1.504890134766374e-05, + "loss": 0.102, + "step": 5772 + }, + { + "epoch": 1.41, + "learning_rate": 1.5045494848664359e-05, + "loss": 0.1267, + "step": 5774 + }, + { + "epoch": 1.41, + "learning_rate": 1.5042087564042849e-05, + "loss": 0.1299, + "step": 5776 + }, + { + "epoch": 1.41, + "learning_rate": 1.503867949432975e-05, + "loss": 0.1404, + "step": 5778 + }, + { + "epoch": 1.41, + "learning_rate": 1.5035270640055726e-05, + "loss": 0.1019, + "step": 5780 + }, + { + "epoch": 1.41, + "learning_rate": 1.5031861001751558e-05, + "loss": 0.1227, + "step": 5782 + }, + { + "epoch": 1.41, + "learning_rate": 1.502845057994816e-05, + "loss": 0.1436, + "step": 5784 + }, + { + "epoch": 1.41, + "learning_rate": 1.5025039375176552e-05, + "loss": 0.1125, + "step": 5786 + }, + { + "epoch": 1.41, + "learning_rate": 1.5021627387967889e-05, + "loss": 0.1081, + "step": 5788 + }, + { + "epoch": 1.41, + "learning_rate": 1.5018214618853443e-05, + "loss": 0.0876, + "step": 5790 + }, + { + "epoch": 1.41, + "learning_rate": 1.5014801068364609e-05, + "loss": 0.1656, + "step": 5792 + }, + { + "epoch": 1.41, + "learning_rate": 1.50113867370329e-05, + "loss": 0.1332, + "step": 5794 + }, + { + "epoch": 1.41, + "learning_rate": 1.5007971625389956e-05, + "loss": 0.144, + "step": 5796 + }, + { + "epoch": 1.41, + "learning_rate": 1.500455573396754e-05, + "loss": 0.0871, + "step": 5798 + }, + { + "epoch": 1.41, + "learning_rate": 1.5001139063297525e-05, + "loss": 0.1082, + "step": 5800 + }, + { + "epoch": 1.41, + "learning_rate": 1.4997721613911916e-05, + "loss": 0.101, + "step": 5802 + }, + { + "epoch": 1.41, + "learning_rate": 1.4994303386342837e-05, + "loss": 0.1037, + "step": 5804 + }, + { + "epoch": 1.41, + "learning_rate": 1.4990884381122527e-05, + "loss": 0.1269, + "step": 5806 + }, + { + "epoch": 1.42, + "learning_rate": 1.498746459878336e-05, + "loss": 0.1443, + "step": 5808 + }, + { + "epoch": 1.42, + "learning_rate": 1.4984044039857814e-05, + "loss": 0.1405, + "step": 5810 + }, + { + "epoch": 1.42, + "learning_rate": 1.4980622704878505e-05, + "loss": 0.1277, + "step": 5812 + }, + { + "epoch": 1.42, + "learning_rate": 1.4977200594378152e-05, + "loss": 0.1255, + "step": 5814 + }, + { + "epoch": 1.42, + "learning_rate": 1.4973777708889608e-05, + "loss": 0.1199, + "step": 5816 + }, + { + "epoch": 1.42, + "learning_rate": 1.497035404894584e-05, + "loss": 0.1337, + "step": 5818 + }, + { + "epoch": 1.42, + "learning_rate": 1.4966929615079946e-05, + "loss": 0.1169, + "step": 5820 + }, + { + "epoch": 1.42, + "learning_rate": 1.4963504407825125e-05, + "loss": 0.1119, + "step": 5822 + }, + { + "epoch": 1.42, + "learning_rate": 1.4960078427714715e-05, + "loss": 0.1142, + "step": 5824 + }, + { + "epoch": 1.42, + "learning_rate": 1.4956651675282165e-05, + "loss": 0.0938, + "step": 5826 + }, + { + "epoch": 1.42, + "learning_rate": 1.495322415106105e-05, + "loss": 0.1182, + "step": 5828 + }, + { + "epoch": 1.42, + "learning_rate": 1.4949795855585056e-05, + "loss": 0.1009, + "step": 5830 + }, + { + "epoch": 1.42, + "learning_rate": 1.4946366789387998e-05, + "loss": 0.1079, + "step": 5832 + }, + { + "epoch": 1.42, + "learning_rate": 1.494293695300381e-05, + "loss": 0.1111, + "step": 5834 + }, + { + "epoch": 1.42, + "learning_rate": 1.4939506346966541e-05, + "loss": 0.1, + "step": 5836 + }, + { + "epoch": 1.42, + "learning_rate": 1.4936074971810361e-05, + "loss": 0.0966, + "step": 5838 + }, + { + "epoch": 1.42, + "learning_rate": 1.4932642828069568e-05, + "loss": 0.1247, + "step": 5840 + }, + { + "epoch": 1.42, + "learning_rate": 1.4929209916278564e-05, + "loss": 0.108, + "step": 5842 + }, + { + "epoch": 1.42, + "learning_rate": 1.4925776236971885e-05, + "loss": 0.0985, + "step": 5844 + }, + { + "epoch": 1.42, + "learning_rate": 1.4922341790684182e-05, + "loss": 0.1109, + "step": 5846 + }, + { + "epoch": 1.42, + "learning_rate": 1.4918906577950223e-05, + "loss": 0.1093, + "step": 5848 + }, + { + "epoch": 1.43, + "learning_rate": 1.4915470599304893e-05, + "loss": 0.1403, + "step": 5850 + }, + { + "epoch": 1.43, + "learning_rate": 1.4912033855283205e-05, + "loss": 0.1137, + "step": 5852 + }, + { + "epoch": 1.43, + "learning_rate": 1.4908596346420288e-05, + "loss": 0.1273, + "step": 5854 + }, + { + "epoch": 1.43, + "learning_rate": 1.4905158073251385e-05, + "loss": 0.1217, + "step": 5856 + }, + { + "epoch": 1.43, + "learning_rate": 1.4901719036311857e-05, + "loss": 0.1224, + "step": 5858 + }, + { + "epoch": 1.43, + "learning_rate": 1.4898279236137199e-05, + "loss": 0.1389, + "step": 5860 + }, + { + "epoch": 1.43, + "learning_rate": 1.4894838673263006e-05, + "loss": 0.1226, + "step": 5862 + }, + { + "epoch": 1.43, + "learning_rate": 1.4891397348225004e-05, + "loss": 0.1578, + "step": 5864 + }, + { + "epoch": 1.43, + "learning_rate": 1.4887955261559028e-05, + "loss": 0.1173, + "step": 5866 + }, + { + "epoch": 1.43, + "learning_rate": 1.4884512413801049e-05, + "loss": 0.0972, + "step": 5868 + }, + { + "epoch": 1.43, + "learning_rate": 1.488106880548713e-05, + "loss": 0.1213, + "step": 5870 + }, + { + "epoch": 1.43, + "learning_rate": 1.4877624437153477e-05, + "loss": 0.1422, + "step": 5872 + }, + { + "epoch": 1.43, + "learning_rate": 1.4874179309336398e-05, + "loss": 0.1236, + "step": 5874 + }, + { + "epoch": 1.43, + "learning_rate": 1.4870733422572337e-05, + "loss": 0.1105, + "step": 5876 + }, + { + "epoch": 1.43, + "learning_rate": 1.4867286777397829e-05, + "loss": 0.0917, + "step": 5878 + }, + { + "epoch": 1.43, + "learning_rate": 1.4863839374349555e-05, + "loss": 0.1134, + "step": 5880 + }, + { + "epoch": 1.43, + "learning_rate": 1.48603912139643e-05, + "loss": 0.1181, + "step": 5882 + }, + { + "epoch": 1.43, + "learning_rate": 1.4856942296778967e-05, + "loss": 0.1018, + "step": 5884 + }, + { + "epoch": 1.43, + "learning_rate": 1.4853492623330574e-05, + "loss": 0.1018, + "step": 5886 + }, + { + "epoch": 1.43, + "learning_rate": 1.485004219415627e-05, + "loss": 0.1067, + "step": 5888 + }, + { + "epoch": 1.44, + "learning_rate": 1.4846591009793313e-05, + "loss": 0.1152, + "step": 5890 + }, + { + "epoch": 1.44, + "learning_rate": 1.4843139070779073e-05, + "loss": 0.0953, + "step": 5892 + }, + { + "epoch": 1.44, + "learning_rate": 1.4839686377651044e-05, + "loss": 0.1135, + "step": 5894 + }, + { + "epoch": 1.44, + "learning_rate": 1.4836232930946844e-05, + "loss": 0.0802, + "step": 5896 + }, + { + "epoch": 1.44, + "learning_rate": 1.483277873120419e-05, + "loss": 0.1216, + "step": 5898 + }, + { + "epoch": 1.44, + "learning_rate": 1.4829323778960933e-05, + "loss": 0.1143, + "step": 5900 + }, + { + "epoch": 1.44, + "learning_rate": 1.4825868074755037e-05, + "loss": 0.1223, + "step": 5902 + }, + { + "epoch": 1.44, + "learning_rate": 1.4822411619124581e-05, + "loss": 0.1322, + "step": 5904 + }, + { + "epoch": 1.44, + "learning_rate": 1.481895441260776e-05, + "loss": 0.0934, + "step": 5906 + }, + { + "epoch": 1.44, + "learning_rate": 1.4815496455742889e-05, + "loss": 0.106, + "step": 5908 + }, + { + "epoch": 1.44, + "learning_rate": 1.4812037749068396e-05, + "loss": 0.0994, + "step": 5910 + }, + { + "epoch": 1.44, + "learning_rate": 1.480857829312283e-05, + "loss": 0.1217, + "step": 5912 + }, + { + "epoch": 1.44, + "learning_rate": 1.4805118088444852e-05, + "loss": 0.1033, + "step": 5914 + }, + { + "epoch": 1.44, + "learning_rate": 1.480165713557325e-05, + "loss": 0.1132, + "step": 5916 + }, + { + "epoch": 1.44, + "learning_rate": 1.4798195435046911e-05, + "loss": 0.1144, + "step": 5918 + }, + { + "epoch": 1.44, + "learning_rate": 1.4794732987404853e-05, + "loss": 0.1049, + "step": 5920 + }, + { + "epoch": 1.44, + "learning_rate": 1.4791269793186204e-05, + "loss": 0.0806, + "step": 5922 + }, + { + "epoch": 1.44, + "learning_rate": 1.4787805852930216e-05, + "loss": 0.132, + "step": 5924 + }, + { + "epoch": 1.44, + "learning_rate": 1.4784341167176242e-05, + "loss": 0.1218, + "step": 5926 + }, + { + "epoch": 1.44, + "learning_rate": 1.4780875736463764e-05, + "loss": 0.1071, + "step": 5928 + }, + { + "epoch": 1.44, + "learning_rate": 1.4777409561332376e-05, + "loss": 0.111, + "step": 5930 + }, + { + "epoch": 1.45, + "learning_rate": 1.4773942642321792e-05, + "loss": 0.1218, + "step": 5932 + }, + { + "epoch": 1.45, + "learning_rate": 1.477047497997183e-05, + "loss": 0.1157, + "step": 5934 + }, + { + "epoch": 1.45, + "learning_rate": 1.4767006574822437e-05, + "loss": 0.0967, + "step": 5936 + }, + { + "epoch": 1.45, + "learning_rate": 1.4763537427413669e-05, + "loss": 0.1028, + "step": 5938 + }, + { + "epoch": 1.45, + "learning_rate": 1.4760067538285698e-05, + "loss": 0.1493, + "step": 5940 + }, + { + "epoch": 1.45, + "learning_rate": 1.475659690797881e-05, + "loss": 0.1018, + "step": 5942 + }, + { + "epoch": 1.45, + "learning_rate": 1.4753125537033415e-05, + "loss": 0.1384, + "step": 5944 + }, + { + "epoch": 1.45, + "learning_rate": 1.4749653425990027e-05, + "loss": 0.0771, + "step": 5946 + }, + { + "epoch": 1.45, + "learning_rate": 1.4746180575389284e-05, + "loss": 0.1299, + "step": 5948 + }, + { + "epoch": 1.45, + "learning_rate": 1.4742706985771928e-05, + "loss": 0.1114, + "step": 5950 + }, + { + "epoch": 1.45, + "learning_rate": 1.4739232657678832e-05, + "loss": 0.0903, + "step": 5952 + }, + { + "epoch": 1.45, + "learning_rate": 1.4735757591650972e-05, + "loss": 0.1031, + "step": 5954 + }, + { + "epoch": 1.45, + "learning_rate": 1.4732281788229443e-05, + "loss": 0.1015, + "step": 5956 + }, + { + "epoch": 1.45, + "learning_rate": 1.4728805247955448e-05, + "loss": 0.1063, + "step": 5958 + }, + { + "epoch": 1.45, + "learning_rate": 1.4725327971370325e-05, + "loss": 0.1123, + "step": 5960 + }, + { + "epoch": 1.45, + "learning_rate": 1.4721849959015498e-05, + "loss": 0.0838, + "step": 5962 + }, + { + "epoch": 1.45, + "learning_rate": 1.4718371211432527e-05, + "loss": 0.1304, + "step": 5964 + }, + { + "epoch": 1.45, + "learning_rate": 1.471489172916308e-05, + "loss": 0.0669, + "step": 5966 + }, + { + "epoch": 1.45, + "learning_rate": 1.4711411512748936e-05, + "loss": 0.1024, + "step": 5968 + }, + { + "epoch": 1.45, + "learning_rate": 1.4707930562731994e-05, + "loss": 0.132, + "step": 5970 + }, + { + "epoch": 1.46, + "learning_rate": 1.4704448879654264e-05, + "loss": 0.1174, + "step": 5972 + }, + { + "epoch": 1.46, + "learning_rate": 1.4700966464057868e-05, + "loss": 0.0985, + "step": 5974 + }, + { + "epoch": 1.46, + "learning_rate": 1.4697483316485048e-05, + "loss": 0.1103, + "step": 5976 + }, + { + "epoch": 1.46, + "learning_rate": 1.4693999437478158e-05, + "loss": 0.1277, + "step": 5978 + }, + { + "epoch": 1.46, + "learning_rate": 1.4690514827579658e-05, + "loss": 0.1202, + "step": 5980 + }, + { + "epoch": 1.46, + "learning_rate": 1.4687029487332137e-05, + "loss": 0.1218, + "step": 5982 + }, + { + "epoch": 1.46, + "learning_rate": 1.4683543417278281e-05, + "loss": 0.1117, + "step": 5984 + }, + { + "epoch": 1.46, + "learning_rate": 1.4680056617960903e-05, + "loss": 0.1059, + "step": 5986 + }, + { + "epoch": 1.46, + "learning_rate": 1.4676569089922923e-05, + "loss": 0.1184, + "step": 5988 + }, + { + "epoch": 1.46, + "learning_rate": 1.4673080833707376e-05, + "loss": 0.093, + "step": 5990 + }, + { + "epoch": 1.46, + "learning_rate": 1.4669591849857407e-05, + "loss": 0.1105, + "step": 5992 + }, + { + "epoch": 1.46, + "learning_rate": 1.4666102138916282e-05, + "loss": 0.1102, + "step": 5994 + }, + { + "epoch": 1.46, + "learning_rate": 1.4662611701427376e-05, + "loss": 0.12, + "step": 5996 + }, + { + "epoch": 1.46, + "learning_rate": 1.4659120537934173e-05, + "loss": 0.0972, + "step": 5998 + }, + { + "epoch": 1.46, + "learning_rate": 1.4655628648980273e-05, + "loss": 0.1231, + "step": 6000 + }, + { + "epoch": 1.46, + "learning_rate": 1.4652136035109394e-05, + "loss": 0.1563, + "step": 6002 + }, + { + "epoch": 1.46, + "learning_rate": 1.4648642696865363e-05, + "loss": 0.088, + "step": 6004 + }, + { + "epoch": 1.46, + "learning_rate": 1.4645148634792116e-05, + "loss": 0.1007, + "step": 6006 + }, + { + "epoch": 1.46, + "learning_rate": 1.4641653849433709e-05, + "loss": 0.0866, + "step": 6008 + }, + { + "epoch": 1.46, + "learning_rate": 1.4638158341334303e-05, + "loss": 0.0987, + "step": 6010 + }, + { + "epoch": 1.46, + "learning_rate": 1.4634662111038176e-05, + "loss": 0.1255, + "step": 6012 + }, + { + "epoch": 1.47, + "learning_rate": 1.463116515908972e-05, + "loss": 0.0836, + "step": 6014 + }, + { + "epoch": 1.47, + "learning_rate": 1.4627667486033435e-05, + "loss": 0.1185, + "step": 6016 + }, + { + "epoch": 1.47, + "learning_rate": 1.4624169092413935e-05, + "loss": 0.1086, + "step": 6018 + }, + { + "epoch": 1.47, + "learning_rate": 1.462066997877595e-05, + "loss": 0.1047, + "step": 6020 + }, + { + "epoch": 1.47, + "learning_rate": 1.4617170145664314e-05, + "loss": 0.0858, + "step": 6022 + }, + { + "epoch": 1.47, + "learning_rate": 1.4613669593623985e-05, + "loss": 0.112, + "step": 6024 + }, + { + "epoch": 1.47, + "learning_rate": 1.4610168323200017e-05, + "loss": 0.1033, + "step": 6026 + }, + { + "epoch": 1.47, + "learning_rate": 1.4606666334937589e-05, + "loss": 0.1323, + "step": 6028 + }, + { + "epoch": 1.47, + "learning_rate": 1.4603163629381988e-05, + "loss": 0.1033, + "step": 6030 + }, + { + "epoch": 1.47, + "learning_rate": 1.4599660207078614e-05, + "loss": 0.1045, + "step": 6032 + }, + { + "epoch": 1.47, + "learning_rate": 1.459615606857297e-05, + "loss": 0.1361, + "step": 6034 + }, + { + "epoch": 1.47, + "learning_rate": 1.4592651214410683e-05, + "loss": 0.0967, + "step": 6036 + }, + { + "epoch": 1.47, + "learning_rate": 1.4589145645137483e-05, + "loss": 0.1178, + "step": 6038 + }, + { + "epoch": 1.47, + "learning_rate": 1.4585639361299213e-05, + "loss": 0.1041, + "step": 6040 + }, + { + "epoch": 1.47, + "learning_rate": 1.4582132363441832e-05, + "loss": 0.1004, + "step": 6042 + }, + { + "epoch": 1.47, + "learning_rate": 1.4578624652111403e-05, + "loss": 0.1071, + "step": 6044 + }, + { + "epoch": 1.47, + "learning_rate": 1.4575116227854105e-05, + "loss": 0.1068, + "step": 6046 + }, + { + "epoch": 1.47, + "learning_rate": 1.4571607091216223e-05, + "loss": 0.0852, + "step": 6048 + }, + { + "epoch": 1.47, + "learning_rate": 1.4568097242744161e-05, + "loss": 0.106, + "step": 6050 + }, + { + "epoch": 1.47, + "learning_rate": 1.456458668298443e-05, + "loss": 0.0901, + "step": 6052 + }, + { + "epoch": 1.48, + "learning_rate": 1.4561075412483648e-05, + "loss": 0.1098, + "step": 6054 + }, + { + "epoch": 1.48, + "learning_rate": 1.4557563431788545e-05, + "loss": 0.123, + "step": 6056 + }, + { + "epoch": 1.48, + "learning_rate": 1.4554050741445967e-05, + "loss": 0.0801, + "step": 6058 + }, + { + "epoch": 1.48, + "learning_rate": 1.4550537342002865e-05, + "loss": 0.0853, + "step": 6060 + }, + { + "epoch": 1.48, + "learning_rate": 1.4547023234006304e-05, + "loss": 0.1047, + "step": 6062 + }, + { + "epoch": 1.48, + "learning_rate": 1.4543508418003452e-05, + "loss": 0.1323, + "step": 6064 + }, + { + "epoch": 1.48, + "learning_rate": 1.4539992894541602e-05, + "loss": 0.0731, + "step": 6066 + }, + { + "epoch": 1.48, + "learning_rate": 1.4536476664168137e-05, + "loss": 0.132, + "step": 6068 + }, + { + "epoch": 1.48, + "learning_rate": 1.4532959727430571e-05, + "loss": 0.1445, + "step": 6070 + }, + { + "epoch": 1.48, + "learning_rate": 1.4529442084876513e-05, + "loss": 0.0888, + "step": 6072 + }, + { + "epoch": 1.48, + "learning_rate": 1.4525923737053687e-05, + "loss": 0.1083, + "step": 6074 + }, + { + "epoch": 1.48, + "learning_rate": 1.4522404684509923e-05, + "loss": 0.0778, + "step": 6076 + }, + { + "epoch": 1.48, + "learning_rate": 1.4518884927793174e-05, + "loss": 0.1409, + "step": 6078 + }, + { + "epoch": 1.48, + "learning_rate": 1.4515364467451485e-05, + "loss": 0.112, + "step": 6080 + }, + { + "epoch": 1.48, + "learning_rate": 1.4511843304033022e-05, + "loss": 0.0801, + "step": 6082 + }, + { + "epoch": 1.48, + "learning_rate": 1.4508321438086052e-05, + "loss": 0.0965, + "step": 6084 + }, + { + "epoch": 1.48, + "learning_rate": 1.4504798870158964e-05, + "loss": 0.0899, + "step": 6086 + }, + { + "epoch": 1.48, + "learning_rate": 1.4501275600800244e-05, + "loss": 0.0907, + "step": 6088 + }, + { + "epoch": 1.48, + "learning_rate": 1.4497751630558497e-05, + "loss": 0.0885, + "step": 6090 + }, + { + "epoch": 1.48, + "learning_rate": 1.4494226959982423e-05, + "loss": 0.0759, + "step": 6092 + }, + { + "epoch": 1.48, + "learning_rate": 1.449070158962085e-05, + "loss": 0.1384, + "step": 6094 + }, + { + "epoch": 1.49, + "learning_rate": 1.4487175520022699e-05, + "loss": 0.0863, + "step": 6096 + }, + { + "epoch": 1.49, + "learning_rate": 1.4483648751737007e-05, + "loss": 0.1147, + "step": 6098 + }, + { + "epoch": 1.49, + "learning_rate": 1.4480121285312917e-05, + "loss": 0.1112, + "step": 6100 + }, + { + "epoch": 1.49, + "learning_rate": 1.447659312129969e-05, + "loss": 0.1479, + "step": 6102 + }, + { + "epoch": 1.49, + "learning_rate": 1.447306426024668e-05, + "loss": 0.1094, + "step": 6104 + }, + { + "epoch": 1.49, + "learning_rate": 1.4469534702703363e-05, + "loss": 0.1542, + "step": 6106 + }, + { + "epoch": 1.49, + "learning_rate": 1.4466004449219315e-05, + "loss": 0.0746, + "step": 6108 + }, + { + "epoch": 1.49, + "learning_rate": 1.4462473500344222e-05, + "loss": 0.0985, + "step": 6110 + }, + { + "epoch": 1.49, + "learning_rate": 1.4458941856627884e-05, + "loss": 0.1136, + "step": 6112 + }, + { + "epoch": 1.49, + "learning_rate": 1.4455409518620203e-05, + "loss": 0.0918, + "step": 6114 + }, + { + "epoch": 1.49, + "learning_rate": 1.445187648687119e-05, + "loss": 0.1103, + "step": 6116 + }, + { + "epoch": 1.49, + "learning_rate": 1.4448342761930966e-05, + "loss": 0.0859, + "step": 6118 + }, + { + "epoch": 1.49, + "learning_rate": 1.4444808344349758e-05, + "loss": 0.0783, + "step": 6120 + }, + { + "epoch": 1.49, + "learning_rate": 1.4441273234677908e-05, + "loss": 0.1161, + "step": 6122 + }, + { + "epoch": 1.49, + "learning_rate": 1.443773743346585e-05, + "loss": 0.0838, + "step": 6124 + }, + { + "epoch": 1.49, + "learning_rate": 1.4434200941264141e-05, + "loss": 0.1051, + "step": 6126 + }, + { + "epoch": 1.49, + "learning_rate": 1.4430663758623437e-05, + "loss": 0.0745, + "step": 6128 + }, + { + "epoch": 1.49, + "learning_rate": 1.4427125886094512e-05, + "loss": 0.1197, + "step": 6130 + }, + { + "epoch": 1.49, + "learning_rate": 1.4423587324228225e-05, + "loss": 0.1153, + "step": 6132 + }, + { + "epoch": 1.49, + "learning_rate": 1.4420048073575573e-05, + "loss": 0.1036, + "step": 6134 + }, + { + "epoch": 1.5, + "learning_rate": 1.4416508134687634e-05, + "loss": 0.1269, + "step": 6136 + }, + { + "epoch": 1.5, + "learning_rate": 1.441296750811561e-05, + "loss": 0.0646, + "step": 6138 + }, + { + "epoch": 1.5, + "learning_rate": 1.4409426194410796e-05, + "loss": 0.1054, + "step": 6140 + }, + { + "epoch": 1.5, + "learning_rate": 1.4405884194124608e-05, + "loss": 0.0993, + "step": 6142 + }, + { + "epoch": 1.5, + "learning_rate": 1.440234150780856e-05, + "loss": 0.0915, + "step": 6144 + }, + { + "epoch": 1.5, + "learning_rate": 1.439879813601428e-05, + "loss": 0.0799, + "step": 6146 + }, + { + "epoch": 1.5, + "learning_rate": 1.4395254079293488e-05, + "loss": 0.0987, + "step": 6148 + }, + { + "epoch": 1.5, + "learning_rate": 1.4391709338198032e-05, + "loss": 0.1367, + "step": 6150 + }, + { + "epoch": 1.5, + "learning_rate": 1.4388163913279849e-05, + "loss": 0.0956, + "step": 6152 + }, + { + "epoch": 1.5, + "learning_rate": 1.438461780509099e-05, + "loss": 0.095, + "step": 6154 + }, + { + "epoch": 1.5, + "learning_rate": 1.438107101418361e-05, + "loss": 0.1123, + "step": 6156 + }, + { + "epoch": 1.5, + "learning_rate": 1.4377523541109975e-05, + "loss": 0.074, + "step": 6158 + }, + { + "epoch": 1.5, + "learning_rate": 1.4373975386422448e-05, + "loss": 0.1176, + "step": 6160 + }, + { + "epoch": 1.5, + "learning_rate": 1.4370426550673507e-05, + "loss": 0.1224, + "step": 6162 + }, + { + "epoch": 1.5, + "learning_rate": 1.4366877034415736e-05, + "loss": 0.1145, + "step": 6164 + }, + { + "epoch": 1.5, + "learning_rate": 1.4363326838201817e-05, + "loss": 0.0839, + "step": 6166 + }, + { + "epoch": 1.5, + "learning_rate": 1.435977596258454e-05, + "loss": 0.1347, + "step": 6168 + }, + { + "epoch": 1.5, + "learning_rate": 1.4356224408116814e-05, + "loss": 0.1113, + "step": 6170 + }, + { + "epoch": 1.5, + "learning_rate": 1.4352672175351638e-05, + "loss": 0.1176, + "step": 6172 + }, + { + "epoch": 1.5, + "learning_rate": 1.4349119264842117e-05, + "loss": 0.1102, + "step": 6174 + }, + { + "epoch": 1.5, + "learning_rate": 1.4345565677141472e-05, + "loss": 0.0892, + "step": 6176 + }, + { + "epoch": 1.51, + "learning_rate": 1.434201141280302e-05, + "loss": 0.1004, + "step": 6178 + }, + { + "epoch": 1.51, + "learning_rate": 1.4338456472380193e-05, + "loss": 0.0913, + "step": 6180 + }, + { + "epoch": 1.51, + "learning_rate": 1.4334900856426516e-05, + "loss": 0.0929, + "step": 6182 + }, + { + "epoch": 1.51, + "learning_rate": 1.4331344565495628e-05, + "loss": 0.0892, + "step": 6184 + }, + { + "epoch": 1.51, + "learning_rate": 1.4327787600141274e-05, + "loss": 0.0961, + "step": 6186 + }, + { + "epoch": 1.51, + "learning_rate": 1.4324229960917293e-05, + "loss": 0.0726, + "step": 6188 + }, + { + "epoch": 1.51, + "learning_rate": 1.4320671648377645e-05, + "loss": 0.086, + "step": 6190 + }, + { + "epoch": 1.51, + "learning_rate": 1.4317112663076382e-05, + "loss": 0.0861, + "step": 6192 + }, + { + "epoch": 1.51, + "learning_rate": 1.4313553005567664e-05, + "loss": 0.1012, + "step": 6194 + }, + { + "epoch": 1.51, + "learning_rate": 1.430999267640576e-05, + "loss": 0.0886, + "step": 6196 + }, + { + "epoch": 1.51, + "learning_rate": 1.4306431676145038e-05, + "loss": 0.0944, + "step": 6198 + }, + { + "epoch": 1.51, + "learning_rate": 1.4302870005339975e-05, + "loss": 0.1189, + "step": 6200 + }, + { + "epoch": 1.51, + "learning_rate": 1.4299307664545152e-05, + "loss": 0.0685, + "step": 6202 + }, + { + "epoch": 1.51, + "learning_rate": 1.4295744654315247e-05, + "loss": 0.0749, + "step": 6204 + }, + { + "epoch": 1.51, + "learning_rate": 1.4292180975205052e-05, + "loss": 0.092, + "step": 6206 + }, + { + "epoch": 1.51, + "learning_rate": 1.4288616627769458e-05, + "loss": 0.061, + "step": 6208 + }, + { + "epoch": 1.51, + "learning_rate": 1.4285051612563462e-05, + "loss": 0.0825, + "step": 6210 + }, + { + "epoch": 1.51, + "learning_rate": 1.4281485930142163e-05, + "loss": 0.0969, + "step": 6212 + }, + { + "epoch": 1.51, + "learning_rate": 1.4277919581060771e-05, + "loss": 0.0764, + "step": 6214 + }, + { + "epoch": 1.51, + "learning_rate": 1.4274352565874581e-05, + "loss": 0.1221, + "step": 6216 + }, + { + "epoch": 1.52, + "learning_rate": 1.4270784885139015e-05, + "loss": 0.1147, + "step": 6218 + }, + { + "epoch": 1.52, + "learning_rate": 1.4267216539409588e-05, + "loss": 0.1137, + "step": 6220 + }, + { + "epoch": 1.52, + "learning_rate": 1.4263647529241914e-05, + "loss": 0.1143, + "step": 6222 + }, + { + "epoch": 1.52, + "learning_rate": 1.4260077855191714e-05, + "loss": 0.1059, + "step": 6224 + }, + { + "epoch": 1.52, + "learning_rate": 1.4256507517814818e-05, + "loss": 0.0878, + "step": 6226 + }, + { + "epoch": 1.52, + "learning_rate": 1.4252936517667156e-05, + "loss": 0.1016, + "step": 6228 + }, + { + "epoch": 1.52, + "learning_rate": 1.4249364855304755e-05, + "loss": 0.1202, + "step": 6230 + }, + { + "epoch": 1.52, + "learning_rate": 1.4245792531283754e-05, + "loss": 0.1082, + "step": 6232 + }, + { + "epoch": 1.52, + "learning_rate": 1.4242219546160388e-05, + "loss": 0.1111, + "step": 6234 + }, + { + "epoch": 1.52, + "learning_rate": 1.4238645900491e-05, + "loss": 0.1022, + "step": 6236 + }, + { + "epoch": 1.52, + "learning_rate": 1.4235071594832036e-05, + "loss": 0.0812, + "step": 6238 + }, + { + "epoch": 1.52, + "learning_rate": 1.4231496629740038e-05, + "loss": 0.0813, + "step": 6240 + }, + { + "epoch": 1.52, + "learning_rate": 1.4227921005771661e-05, + "loss": 0.0885, + "step": 6242 + }, + { + "epoch": 1.52, + "learning_rate": 1.4224344723483652e-05, + "loss": 0.1035, + "step": 6244 + }, + { + "epoch": 1.52, + "learning_rate": 1.4220767783432868e-05, + "loss": 0.1093, + "step": 6246 + }, + { + "epoch": 1.52, + "learning_rate": 1.4217190186176266e-05, + "loss": 0.1197, + "step": 6248 + }, + { + "epoch": 1.52, + "learning_rate": 1.4213611932270903e-05, + "loss": 0.1098, + "step": 6250 + }, + { + "epoch": 1.52, + "learning_rate": 1.421003302227394e-05, + "loss": 0.1052, + "step": 6252 + }, + { + "epoch": 1.52, + "learning_rate": 1.4206453456742649e-05, + "loss": 0.0923, + "step": 6254 + }, + { + "epoch": 1.52, + "learning_rate": 1.4202873236234384e-05, + "loss": 0.116, + "step": 6256 + }, + { + "epoch": 1.52, + "learning_rate": 1.4199292361306623e-05, + "loss": 0.0929, + "step": 6258 + }, + { + "epoch": 1.53, + "learning_rate": 1.4195710832516926e-05, + "loss": 0.0794, + "step": 6260 + }, + { + "epoch": 1.53, + "learning_rate": 1.4192128650422974e-05, + "loss": 0.0726, + "step": 6262 + }, + { + "epoch": 1.53, + "learning_rate": 1.4188545815582532e-05, + "loss": 0.1018, + "step": 6264 + }, + { + "epoch": 1.53, + "learning_rate": 1.418496232855348e-05, + "loss": 0.088, + "step": 6266 + }, + { + "epoch": 1.53, + "learning_rate": 1.4181378189893788e-05, + "loss": 0.1082, + "step": 6268 + }, + { + "epoch": 1.53, + "learning_rate": 1.4177793400161545e-05, + "loss": 0.0993, + "step": 6270 + }, + { + "epoch": 1.53, + "learning_rate": 1.4174207959914919e-05, + "loss": 0.0931, + "step": 6272 + }, + { + "epoch": 1.53, + "learning_rate": 1.4170621869712197e-05, + "loss": 0.1065, + "step": 6274 + }, + { + "epoch": 1.53, + "learning_rate": 1.4167035130111758e-05, + "loss": 0.0877, + "step": 6276 + }, + { + "epoch": 1.53, + "learning_rate": 1.4163447741672087e-05, + "loss": 0.1107, + "step": 6278 + }, + { + "epoch": 1.53, + "learning_rate": 1.4159859704951762e-05, + "loss": 0.0969, + "step": 6280 + }, + { + "epoch": 1.53, + "learning_rate": 1.4156271020509476e-05, + "loss": 0.1076, + "step": 6282 + }, + { + "epoch": 1.53, + "learning_rate": 1.4152681688904009e-05, + "loss": 0.0982, + "step": 6284 + }, + { + "epoch": 1.53, + "learning_rate": 1.4149091710694246e-05, + "loss": 0.1082, + "step": 6286 + }, + { + "epoch": 1.53, + "learning_rate": 1.414550108643918e-05, + "loss": 0.0902, + "step": 6288 + }, + { + "epoch": 1.53, + "learning_rate": 1.4141909816697896e-05, + "loss": 0.1011, + "step": 6290 + }, + { + "epoch": 1.53, + "learning_rate": 1.4138317902029582e-05, + "loss": 0.0917, + "step": 6292 + }, + { + "epoch": 1.53, + "learning_rate": 1.4134725342993524e-05, + "loss": 0.1049, + "step": 6294 + }, + { + "epoch": 1.53, + "learning_rate": 1.4131132140149114e-05, + "loss": 0.1184, + "step": 6296 + }, + { + "epoch": 1.53, + "learning_rate": 1.4127538294055845e-05, + "loss": 0.1051, + "step": 6298 + }, + { + "epoch": 1.54, + "learning_rate": 1.4123943805273298e-05, + "loss": 0.1399, + "step": 6300 + }, + { + "epoch": 1.54, + "learning_rate": 1.4120348674361167e-05, + "loss": 0.118, + "step": 6302 + }, + { + "epoch": 1.54, + "learning_rate": 1.4116752901879236e-05, + "loss": 0.1346, + "step": 6304 + }, + { + "epoch": 1.54, + "learning_rate": 1.4113156488387405e-05, + "loss": 0.1015, + "step": 6306 + }, + { + "epoch": 1.54, + "learning_rate": 1.4109559434445652e-05, + "loss": 0.0904, + "step": 6308 + }, + { + "epoch": 1.54, + "learning_rate": 1.4105961740614076e-05, + "loss": 0.0995, + "step": 6310 + }, + { + "epoch": 1.54, + "learning_rate": 1.4102363407452857e-05, + "loss": 0.0938, + "step": 6312 + }, + { + "epoch": 1.54, + "learning_rate": 1.4098764435522288e-05, + "loss": 0.0961, + "step": 6314 + }, + { + "epoch": 1.54, + "learning_rate": 1.409516482538275e-05, + "loss": 0.1001, + "step": 6316 + }, + { + "epoch": 1.54, + "learning_rate": 1.4091564577594739e-05, + "loss": 0.0802, + "step": 6318 + }, + { + "epoch": 1.54, + "learning_rate": 1.4087963692718833e-05, + "loss": 0.0751, + "step": 6320 + }, + { + "epoch": 1.54, + "learning_rate": 1.4084362171315723e-05, + "loss": 0.0737, + "step": 6322 + }, + { + "epoch": 1.54, + "learning_rate": 1.408076001394619e-05, + "loss": 0.1007, + "step": 6324 + }, + { + "epoch": 1.54, + "learning_rate": 1.4077157221171121e-05, + "loss": 0.0879, + "step": 6326 + }, + { + "epoch": 1.54, + "learning_rate": 1.4073553793551495e-05, + "loss": 0.079, + "step": 6328 + }, + { + "epoch": 1.54, + "learning_rate": 1.4069949731648394e-05, + "loss": 0.1108, + "step": 6330 + }, + { + "epoch": 1.54, + "learning_rate": 1.4066345036022998e-05, + "loss": 0.066, + "step": 6332 + }, + { + "epoch": 1.54, + "learning_rate": 1.4062739707236588e-05, + "loss": 0.1281, + "step": 6334 + }, + { + "epoch": 1.54, + "learning_rate": 1.4059133745850534e-05, + "loss": 0.1125, + "step": 6336 + }, + { + "epoch": 1.54, + "learning_rate": 1.4055527152426323e-05, + "loss": 0.0769, + "step": 6338 + }, + { + "epoch": 1.54, + "learning_rate": 1.4051919927525521e-05, + "loss": 0.1046, + "step": 6340 + }, + { + "epoch": 1.55, + "learning_rate": 1.4048312071709803e-05, + "loss": 0.0938, + "step": 6342 + }, + { + "epoch": 1.55, + "learning_rate": 1.4044703585540935e-05, + "loss": 0.0938, + "step": 6344 + }, + { + "epoch": 1.55, + "learning_rate": 1.4041094469580796e-05, + "loss": 0.0689, + "step": 6346 + }, + { + "epoch": 1.55, + "learning_rate": 1.4037484724391345e-05, + "loss": 0.0901, + "step": 6348 + }, + { + "epoch": 1.55, + "learning_rate": 1.4033874350534648e-05, + "loss": 0.0418, + "step": 6350 + }, + { + "epoch": 1.55, + "learning_rate": 1.403026334857287e-05, + "loss": 0.0833, + "step": 6352 + }, + { + "epoch": 1.55, + "learning_rate": 1.4026651719068271e-05, + "loss": 0.0939, + "step": 6354 + }, + { + "epoch": 1.55, + "learning_rate": 1.402303946258321e-05, + "loss": 0.1081, + "step": 6356 + }, + { + "epoch": 1.55, + "learning_rate": 1.4019426579680143e-05, + "loss": 0.0756, + "step": 6358 + }, + { + "epoch": 1.55, + "learning_rate": 1.4015813070921617e-05, + "loss": 0.1146, + "step": 6360 + }, + { + "epoch": 1.55, + "learning_rate": 1.4012198936870294e-05, + "loss": 0.1008, + "step": 6362 + }, + { + "epoch": 1.55, + "learning_rate": 1.4008584178088914e-05, + "loss": 0.0906, + "step": 6364 + }, + { + "epoch": 1.55, + "learning_rate": 1.4004968795140324e-05, + "loss": 0.0532, + "step": 6366 + }, + { + "epoch": 1.55, + "learning_rate": 1.400135278858747e-05, + "loss": 0.0909, + "step": 6368 + }, + { + "epoch": 1.55, + "learning_rate": 1.3997736158993388e-05, + "loss": 0.0986, + "step": 6370 + }, + { + "epoch": 1.55, + "learning_rate": 1.3994118906921219e-05, + "loss": 0.1116, + "step": 6372 + }, + { + "epoch": 1.55, + "learning_rate": 1.3990501032934193e-05, + "loss": 0.0664, + "step": 6374 + }, + { + "epoch": 1.55, + "learning_rate": 1.3986882537595646e-05, + "loss": 0.0952, + "step": 6376 + }, + { + "epoch": 1.55, + "learning_rate": 1.3983263421468998e-05, + "loss": 0.1138, + "step": 6378 + }, + { + "epoch": 1.55, + "learning_rate": 1.3979643685117775e-05, + "loss": 0.0898, + "step": 6380 + }, + { + "epoch": 1.56, + "learning_rate": 1.3976023329105601e-05, + "loss": 0.0842, + "step": 6382 + }, + { + "epoch": 1.56, + "learning_rate": 1.3972402353996193e-05, + "loss": 0.0735, + "step": 6384 + }, + { + "epoch": 1.56, + "learning_rate": 1.396878076035336e-05, + "loss": 0.1043, + "step": 6386 + }, + { + "epoch": 1.56, + "learning_rate": 1.3965158548741016e-05, + "loss": 0.1062, + "step": 6388 + }, + { + "epoch": 1.56, + "learning_rate": 1.3961535719723168e-05, + "loss": 0.0908, + "step": 6390 + }, + { + "epoch": 1.56, + "learning_rate": 1.3957912273863912e-05, + "loss": 0.0975, + "step": 6392 + }, + { + "epoch": 1.56, + "learning_rate": 1.3954288211727454e-05, + "loss": 0.0786, + "step": 6394 + }, + { + "epoch": 1.56, + "learning_rate": 1.3950663533878084e-05, + "loss": 0.0687, + "step": 6396 + }, + { + "epoch": 1.56, + "learning_rate": 1.3947038240880191e-05, + "loss": 0.1054, + "step": 6398 + }, + { + "epoch": 1.56, + "learning_rate": 1.3943412333298261e-05, + "loss": 0.0816, + "step": 6400 + }, + { + "epoch": 1.56, + "learning_rate": 1.3939785811696878e-05, + "loss": 0.1035, + "step": 6402 + }, + { + "epoch": 1.56, + "learning_rate": 1.3936158676640719e-05, + "loss": 0.0964, + "step": 6404 + }, + { + "epoch": 1.56, + "learning_rate": 1.3932530928694555e-05, + "loss": 0.0817, + "step": 6406 + }, + { + "epoch": 1.56, + "learning_rate": 1.3928902568423252e-05, + "loss": 0.0969, + "step": 6408 + }, + { + "epoch": 1.56, + "learning_rate": 1.392527359639178e-05, + "loss": 0.1049, + "step": 6410 + }, + { + "epoch": 1.56, + "learning_rate": 1.3921644013165192e-05, + "loss": 0.0704, + "step": 6412 + }, + { + "epoch": 1.56, + "learning_rate": 1.3918013819308644e-05, + "loss": 0.0889, + "step": 6414 + }, + { + "epoch": 1.56, + "learning_rate": 1.3914383015387382e-05, + "loss": 0.1163, + "step": 6416 + }, + { + "epoch": 1.56, + "learning_rate": 1.391075160196676e-05, + "loss": 0.0687, + "step": 6418 + }, + { + "epoch": 1.56, + "learning_rate": 1.3907119579612203e-05, + "loss": 0.1149, + "step": 6420 + }, + { + "epoch": 1.56, + "learning_rate": 1.390348694888925e-05, + "loss": 0.1164, + "step": 6422 + }, + { + "epoch": 1.57, + "learning_rate": 1.3899853710363536e-05, + "loss": 0.091, + "step": 6424 + }, + { + "epoch": 1.57, + "learning_rate": 1.3896219864600778e-05, + "loss": 0.059, + "step": 6426 + }, + { + "epoch": 1.57, + "learning_rate": 1.3892585412166788e-05, + "loss": 0.0787, + "step": 6428 + }, + { + "epoch": 1.57, + "learning_rate": 1.3888950353627489e-05, + "loss": 0.1243, + "step": 6430 + }, + { + "epoch": 1.57, + "learning_rate": 1.388531468954888e-05, + "loss": 0.0921, + "step": 6432 + }, + { + "epoch": 1.57, + "learning_rate": 1.3881678420497067e-05, + "loss": 0.1013, + "step": 6434 + }, + { + "epoch": 1.57, + "learning_rate": 1.3878041547038238e-05, + "loss": 0.0861, + "step": 6436 + }, + { + "epoch": 1.57, + "learning_rate": 1.3874404069738692e-05, + "loss": 0.1073, + "step": 6438 + }, + { + "epoch": 1.57, + "learning_rate": 1.38707659891648e-05, + "loss": 0.0689, + "step": 6440 + }, + { + "epoch": 1.57, + "learning_rate": 1.386712730588305e-05, + "loss": 0.0693, + "step": 6442 + }, + { + "epoch": 1.57, + "learning_rate": 1.3863488020460004e-05, + "loss": 0.108, + "step": 6444 + }, + { + "epoch": 1.57, + "learning_rate": 1.3859848133462335e-05, + "loss": 0.0884, + "step": 6446 + }, + { + "epoch": 1.57, + "learning_rate": 1.3856207645456792e-05, + "loss": 0.0763, + "step": 6448 + }, + { + "epoch": 1.57, + "learning_rate": 1.3852566557010234e-05, + "loss": 0.0818, + "step": 6450 + }, + { + "epoch": 1.57, + "learning_rate": 1.3848924868689604e-05, + "loss": 0.1001, + "step": 6452 + }, + { + "epoch": 1.57, + "learning_rate": 1.384528258106194e-05, + "loss": 0.0767, + "step": 6454 + }, + { + "epoch": 1.57, + "learning_rate": 1.3841639694694373e-05, + "loss": 0.1017, + "step": 6456 + }, + { + "epoch": 1.57, + "learning_rate": 1.3837996210154132e-05, + "loss": 0.0771, + "step": 6458 + }, + { + "epoch": 1.57, + "learning_rate": 1.3834352128008533e-05, + "loss": 0.1086, + "step": 6460 + }, + { + "epoch": 1.57, + "learning_rate": 1.383070744882499e-05, + "loss": 0.0926, + "step": 6462 + }, + { + "epoch": 1.58, + "learning_rate": 1.3827062173170998e-05, + "loss": 0.0757, + "step": 6464 + }, + { + "epoch": 1.58, + "learning_rate": 1.3823416301614168e-05, + "loss": 0.0725, + "step": 6466 + }, + { + "epoch": 1.58, + "learning_rate": 1.381976983472218e-05, + "loss": 0.1125, + "step": 6468 + }, + { + "epoch": 1.58, + "learning_rate": 1.3816122773062824e-05, + "loss": 0.1036, + "step": 6470 + }, + { + "epoch": 1.58, + "learning_rate": 1.3812475117203968e-05, + "loss": 0.099, + "step": 6472 + }, + { + "epoch": 1.58, + "learning_rate": 1.3808826867713587e-05, + "loss": 0.1099, + "step": 6474 + }, + { + "epoch": 1.58, + "learning_rate": 1.3805178025159734e-05, + "loss": 0.1017, + "step": 6476 + }, + { + "epoch": 1.58, + "learning_rate": 1.3801528590110566e-05, + "loss": 0.0859, + "step": 6478 + }, + { + "epoch": 1.58, + "learning_rate": 1.3797878563134327e-05, + "loss": 0.0944, + "step": 6480 + }, + { + "epoch": 1.58, + "learning_rate": 1.3794227944799362e-05, + "loss": 0.1067, + "step": 6482 + }, + { + "epoch": 1.58, + "learning_rate": 1.3790576735674085e-05, + "loss": 0.0733, + "step": 6484 + }, + { + "epoch": 1.58, + "learning_rate": 1.378692493632703e-05, + "loss": 0.0918, + "step": 6486 + }, + { + "epoch": 1.58, + "learning_rate": 1.37832725473268e-05, + "loss": 0.0913, + "step": 6488 + }, + { + "epoch": 1.58, + "learning_rate": 1.3779619569242109e-05, + "loss": 0.1443, + "step": 6490 + }, + { + "epoch": 1.58, + "learning_rate": 1.3775966002641747e-05, + "loss": 0.0941, + "step": 6492 + }, + { + "epoch": 1.58, + "learning_rate": 1.3772311848094607e-05, + "loss": 0.1119, + "step": 6494 + }, + { + "epoch": 1.58, + "learning_rate": 1.3768657106169668e-05, + "loss": 0.0961, + "step": 6496 + }, + { + "epoch": 1.58, + "learning_rate": 1.3765001777435996e-05, + "loss": 0.1158, + "step": 6498 + }, + { + "epoch": 1.58, + "learning_rate": 1.376134586246276e-05, + "loss": 0.0841, + "step": 6500 + }, + { + "epoch": 1.58, + "learning_rate": 1.3757689361819209e-05, + "loss": 0.0953, + "step": 6502 + }, + { + "epoch": 1.58, + "learning_rate": 1.3754032276074695e-05, + "loss": 0.0758, + "step": 6504 + }, + { + "epoch": 1.59, + "learning_rate": 1.3750374605798647e-05, + "loss": 0.1309, + "step": 6506 + }, + { + "epoch": 1.59, + "learning_rate": 1.3746716351560597e-05, + "loss": 0.1379, + "step": 6508 + }, + { + "epoch": 1.59, + "learning_rate": 1.3743057513930161e-05, + "loss": 0.0891, + "step": 6510 + }, + { + "epoch": 1.59, + "learning_rate": 1.3739398093477045e-05, + "loss": 0.0682, + "step": 6512 + }, + { + "epoch": 1.59, + "learning_rate": 1.3735738090771053e-05, + "loss": 0.0714, + "step": 6514 + }, + { + "epoch": 1.59, + "learning_rate": 1.3732077506382075e-05, + "loss": 0.0855, + "step": 6516 + }, + { + "epoch": 1.59, + "learning_rate": 1.3728416340880093e-05, + "loss": 0.0952, + "step": 6518 + }, + { + "epoch": 1.59, + "learning_rate": 1.3724754594835178e-05, + "loss": 0.0899, + "step": 6520 + }, + { + "epoch": 1.59, + "learning_rate": 1.3721092268817484e-05, + "loss": 0.0974, + "step": 6522 + }, + { + "epoch": 1.59, + "learning_rate": 1.3717429363397276e-05, + "loss": 0.075, + "step": 6524 + }, + { + "epoch": 1.59, + "learning_rate": 1.3713765879144886e-05, + "loss": 0.103, + "step": 6526 + }, + { + "epoch": 1.59, + "learning_rate": 1.3710101816630755e-05, + "loss": 0.0908, + "step": 6528 + }, + { + "epoch": 1.59, + "learning_rate": 1.3706437176425399e-05, + "loss": 0.0942, + "step": 6530 + }, + { + "epoch": 1.59, + "learning_rate": 1.3702771959099434e-05, + "loss": 0.1166, + "step": 6532 + }, + { + "epoch": 1.59, + "learning_rate": 1.3699106165223558e-05, + "loss": 0.0825, + "step": 6534 + }, + { + "epoch": 1.59, + "learning_rate": 1.369543979536857e-05, + "loss": 0.0648, + "step": 6536 + }, + { + "epoch": 1.59, + "learning_rate": 1.3691772850105348e-05, + "loss": 0.115, + "step": 6538 + }, + { + "epoch": 1.59, + "learning_rate": 1.3688105330004862e-05, + "loss": 0.0745, + "step": 6540 + }, + { + "epoch": 1.59, + "learning_rate": 1.3684437235638173e-05, + "loss": 0.111, + "step": 6542 + }, + { + "epoch": 1.59, + "learning_rate": 1.3680768567576434e-05, + "loss": 0.1024, + "step": 6544 + }, + { + "epoch": 1.6, + "learning_rate": 1.3677099326390886e-05, + "loss": 0.1281, + "step": 6546 + }, + { + "epoch": 1.6, + "learning_rate": 1.3673429512652854e-05, + "loss": 0.0773, + "step": 6548 + }, + { + "epoch": 1.6, + "learning_rate": 1.3669759126933754e-05, + "loss": 0.0829, + "step": 6550 + }, + { + "epoch": 1.6, + "learning_rate": 1.3666088169805102e-05, + "loss": 0.0926, + "step": 6552 + }, + { + "epoch": 1.6, + "learning_rate": 1.3662416641838486e-05, + "loss": 0.077, + "step": 6554 + }, + { + "epoch": 1.6, + "learning_rate": 1.3658744543605594e-05, + "loss": 0.0866, + "step": 6556 + }, + { + "epoch": 1.6, + "learning_rate": 1.3655071875678197e-05, + "loss": 0.0599, + "step": 6558 + }, + { + "epoch": 1.6, + "learning_rate": 1.3651398638628164e-05, + "loss": 0.1072, + "step": 6560 + }, + { + "epoch": 1.6, + "learning_rate": 1.3647724833027437e-05, + "loss": 0.1026, + "step": 6562 + }, + { + "epoch": 1.6, + "learning_rate": 1.3644050459448066e-05, + "loss": 0.1246, + "step": 6564 + }, + { + "epoch": 1.6, + "learning_rate": 1.364037551846217e-05, + "loss": 0.1003, + "step": 6566 + }, + { + "epoch": 1.6, + "learning_rate": 1.3636700010641972e-05, + "loss": 0.0968, + "step": 6568 + }, + { + "epoch": 1.6, + "learning_rate": 1.363302393655977e-05, + "loss": 0.0981, + "step": 6570 + }, + { + "epoch": 1.6, + "learning_rate": 1.3629347296787963e-05, + "loss": 0.09, + "step": 6572 + }, + { + "epoch": 1.6, + "learning_rate": 1.362567009189903e-05, + "loss": 0.0695, + "step": 6574 + }, + { + "epoch": 1.6, + "learning_rate": 1.362199232246554e-05, + "loss": 0.1228, + "step": 6576 + }, + { + "epoch": 1.6, + "learning_rate": 1.3618313989060146e-05, + "loss": 0.0869, + "step": 6578 + }, + { + "epoch": 1.6, + "learning_rate": 1.3614635092255597e-05, + "loss": 0.0724, + "step": 6580 + }, + { + "epoch": 1.6, + "learning_rate": 1.3610955632624726e-05, + "loss": 0.1333, + "step": 6582 + }, + { + "epoch": 1.6, + "learning_rate": 1.3607275610740451e-05, + "loss": 0.0725, + "step": 6584 + }, + { + "epoch": 1.6, + "learning_rate": 1.3603595027175777e-05, + "loss": 0.0734, + "step": 6586 + }, + { + "epoch": 1.61, + "learning_rate": 1.3599913882503808e-05, + "loss": 0.0788, + "step": 6588 + }, + { + "epoch": 1.61, + "learning_rate": 1.3596232177297714e-05, + "loss": 0.0734, + "step": 6590 + }, + { + "epoch": 1.61, + "learning_rate": 1.3592549912130775e-05, + "loss": 0.0769, + "step": 6592 + }, + { + "epoch": 1.61, + "learning_rate": 1.358886708757634e-05, + "loss": 0.1177, + "step": 6594 + }, + { + "epoch": 1.61, + "learning_rate": 1.3585183704207861e-05, + "loss": 0.0876, + "step": 6596 + }, + { + "epoch": 1.61, + "learning_rate": 1.358149976259886e-05, + "loss": 0.0744, + "step": 6598 + }, + { + "epoch": 1.61, + "learning_rate": 1.3577815263322962e-05, + "loss": 0.0821, + "step": 6600 + }, + { + "epoch": 1.61, + "learning_rate": 1.357413020695387e-05, + "loss": 0.0667, + "step": 6602 + }, + { + "epoch": 1.61, + "learning_rate": 1.3570444594065375e-05, + "loss": 0.0631, + "step": 6604 + }, + { + "epoch": 1.61, + "learning_rate": 1.356675842523135e-05, + "loss": 0.0886, + "step": 6606 + }, + { + "epoch": 1.61, + "learning_rate": 1.356307170102577e-05, + "loss": 0.0796, + "step": 6608 + }, + { + "epoch": 1.61, + "learning_rate": 1.3559384422022677e-05, + "loss": 0.0657, + "step": 6610 + }, + { + "epoch": 1.61, + "learning_rate": 1.3555696588796214e-05, + "loss": 0.0681, + "step": 6612 + }, + { + "epoch": 1.61, + "learning_rate": 1.3552008201920602e-05, + "loss": 0.0894, + "step": 6614 + }, + { + "epoch": 1.61, + "learning_rate": 1.3548319261970152e-05, + "loss": 0.0864, + "step": 6616 + }, + { + "epoch": 1.61, + "learning_rate": 1.354462976951926e-05, + "loss": 0.0782, + "step": 6618 + }, + { + "epoch": 1.61, + "learning_rate": 1.354093972514241e-05, + "loss": 0.0908, + "step": 6620 + }, + { + "epoch": 1.61, + "learning_rate": 1.3537249129414166e-05, + "loss": 0.0792, + "step": 6622 + }, + { + "epoch": 1.61, + "learning_rate": 1.353355798290919e-05, + "loss": 0.0967, + "step": 6624 + }, + { + "epoch": 1.61, + "learning_rate": 1.3529866286202209e-05, + "loss": 0.1142, + "step": 6626 + }, + { + "epoch": 1.62, + "learning_rate": 1.3526174039868059e-05, + "loss": 0.0679, + "step": 6628 + }, + { + "epoch": 1.62, + "learning_rate": 1.3522481244481647e-05, + "loss": 0.0894, + "step": 6630 + }, + { + "epoch": 1.62, + "learning_rate": 1.351878790061797e-05, + "loss": 0.0971, + "step": 6632 + }, + { + "epoch": 1.62, + "learning_rate": 1.351509400885211e-05, + "loss": 0.0876, + "step": 6634 + }, + { + "epoch": 1.62, + "learning_rate": 1.3511399569759234e-05, + "loss": 0.064, + "step": 6636 + }, + { + "epoch": 1.62, + "learning_rate": 1.3507704583914594e-05, + "loss": 0.0719, + "step": 6638 + }, + { + "epoch": 1.62, + "learning_rate": 1.3504009051893529e-05, + "loss": 0.101, + "step": 6640 + }, + { + "epoch": 1.62, + "learning_rate": 1.350031297427146e-05, + "loss": 0.0776, + "step": 6642 + }, + { + "epoch": 1.62, + "learning_rate": 1.3496616351623898e-05, + "loss": 0.0993, + "step": 6644 + }, + { + "epoch": 1.62, + "learning_rate": 1.349291918452643e-05, + "loss": 0.0617, + "step": 6646 + }, + { + "epoch": 1.62, + "learning_rate": 1.3489221473554735e-05, + "loss": 0.1, + "step": 6648 + }, + { + "epoch": 1.62, + "learning_rate": 1.3485523219284578e-05, + "loss": 0.0901, + "step": 6650 + }, + { + "epoch": 1.62, + "learning_rate": 1.34818244222918e-05, + "loss": 0.088, + "step": 6652 + }, + { + "epoch": 1.62, + "learning_rate": 1.3478125083152337e-05, + "loss": 0.0493, + "step": 6654 + }, + { + "epoch": 1.62, + "learning_rate": 1.3474425202442204e-05, + "loss": 0.0586, + "step": 6656 + }, + { + "epoch": 1.62, + "learning_rate": 1.3470724780737498e-05, + "loss": 0.0762, + "step": 6658 + }, + { + "epoch": 1.62, + "learning_rate": 1.3467023818614404e-05, + "loss": 0.0733, + "step": 6660 + }, + { + "epoch": 1.62, + "learning_rate": 1.346332231664919e-05, + "loss": 0.0833, + "step": 6662 + }, + { + "epoch": 1.62, + "learning_rate": 1.345962027541821e-05, + "loss": 0.053, + "step": 6664 + }, + { + "epoch": 1.62, + "learning_rate": 1.3455917695497898e-05, + "loss": 0.097, + "step": 6666 + }, + { + "epoch": 1.62, + "learning_rate": 1.3452214577464772e-05, + "loss": 0.065, + "step": 6668 + }, + { + "epoch": 1.63, + "learning_rate": 1.3448510921895441e-05, + "loss": 0.0837, + "step": 6670 + }, + { + "epoch": 1.63, + "learning_rate": 1.3444806729366593e-05, + "loss": 0.0551, + "step": 6672 + }, + { + "epoch": 1.63, + "learning_rate": 1.344110200045499e-05, + "loss": 0.0591, + "step": 6674 + }, + { + "epoch": 1.63, + "learning_rate": 1.3437396735737495e-05, + "loss": 0.0876, + "step": 6676 + }, + { + "epoch": 1.63, + "learning_rate": 1.3433690935791045e-05, + "loss": 0.084, + "step": 6678 + }, + { + "epoch": 1.63, + "learning_rate": 1.342998460119266e-05, + "loss": 0.0704, + "step": 6680 + }, + { + "epoch": 1.63, + "learning_rate": 1.3426277732519442e-05, + "loss": 0.0588, + "step": 6682 + }, + { + "epoch": 1.63, + "learning_rate": 1.3422570330348583e-05, + "loss": 0.0687, + "step": 6684 + }, + { + "epoch": 1.63, + "learning_rate": 1.3418862395257353e-05, + "loss": 0.0762, + "step": 6686 + }, + { + "epoch": 1.63, + "learning_rate": 1.3415153927823105e-05, + "loss": 0.0834, + "step": 6688 + }, + { + "epoch": 1.63, + "learning_rate": 1.3411444928623274e-05, + "loss": 0.0674, + "step": 6690 + }, + { + "epoch": 1.63, + "learning_rate": 1.3407735398235384e-05, + "loss": 0.0903, + "step": 6692 + }, + { + "epoch": 1.63, + "learning_rate": 1.3404025337237033e-05, + "loss": 0.0852, + "step": 6694 + }, + { + "epoch": 1.63, + "learning_rate": 1.3400314746205909e-05, + "loss": 0.0731, + "step": 6696 + }, + { + "epoch": 1.63, + "learning_rate": 1.3396603625719777e-05, + "loss": 0.0913, + "step": 6698 + }, + { + "epoch": 1.63, + "learning_rate": 1.339289197635649e-05, + "loss": 0.0917, + "step": 6700 + }, + { + "epoch": 1.63, + "learning_rate": 1.3389179798693974e-05, + "loss": 0.0653, + "step": 6702 + }, + { + "epoch": 1.63, + "learning_rate": 1.3385467093310251e-05, + "loss": 0.1114, + "step": 6704 + }, + { + "epoch": 1.63, + "learning_rate": 1.338175386078341e-05, + "loss": 0.1016, + "step": 6706 + }, + { + "epoch": 1.63, + "learning_rate": 1.337804010169164e-05, + "loss": 0.0744, + "step": 6708 + }, + { + "epoch": 1.63, + "learning_rate": 1.3374325816613189e-05, + "loss": 0.0955, + "step": 6710 + }, + { + "epoch": 1.64, + "learning_rate": 1.337061100612641e-05, + "loss": 0.1176, + "step": 6712 + }, + { + "epoch": 1.64, + "learning_rate": 1.3366895670809723e-05, + "loss": 0.0606, + "step": 6714 + }, + { + "epoch": 1.64, + "learning_rate": 1.3363179811241635e-05, + "loss": 0.0663, + "step": 6716 + }, + { + "epoch": 1.64, + "learning_rate": 1.3359463428000732e-05, + "loss": 0.0803, + "step": 6718 + }, + { + "epoch": 1.64, + "learning_rate": 1.3355746521665685e-05, + "loss": 0.0724, + "step": 6720 + }, + { + "epoch": 1.64, + "learning_rate": 1.3352029092815244e-05, + "loss": 0.0864, + "step": 6722 + }, + { + "epoch": 1.64, + "learning_rate": 1.3348311142028242e-05, + "loss": 0.0666, + "step": 6724 + }, + { + "epoch": 1.64, + "learning_rate": 1.3344592669883591e-05, + "loss": 0.1026, + "step": 6726 + }, + { + "epoch": 1.64, + "learning_rate": 1.334087367696029e-05, + "loss": 0.0513, + "step": 6728 + }, + { + "epoch": 1.64, + "learning_rate": 1.3337154163837406e-05, + "loss": 0.0928, + "step": 6730 + }, + { + "epoch": 1.64, + "learning_rate": 1.3333434131094103e-05, + "loss": 0.0912, + "step": 6732 + }, + { + "epoch": 1.64, + "learning_rate": 1.3329713579309615e-05, + "loss": 0.0679, + "step": 6734 + }, + { + "epoch": 1.64, + "learning_rate": 1.3325992509063266e-05, + "loss": 0.0704, + "step": 6736 + }, + { + "epoch": 1.64, + "learning_rate": 1.3322270920934443e-05, + "loss": 0.0817, + "step": 6738 + }, + { + "epoch": 1.64, + "learning_rate": 1.3318548815502638e-05, + "loss": 0.0806, + "step": 6740 + }, + { + "epoch": 1.64, + "learning_rate": 1.3314826193347408e-05, + "loss": 0.0747, + "step": 6742 + }, + { + "epoch": 1.64, + "learning_rate": 1.331110305504839e-05, + "loss": 0.068, + "step": 6744 + }, + { + "epoch": 1.64, + "learning_rate": 1.3307379401185307e-05, + "loss": 0.0744, + "step": 6746 + }, + { + "epoch": 1.64, + "learning_rate": 1.3303655232337962e-05, + "loss": 0.0704, + "step": 6748 + }, + { + "epoch": 1.64, + "learning_rate": 1.3299930549086235e-05, + "loss": 0.0685, + "step": 6750 + }, + { + "epoch": 1.65, + "learning_rate": 1.329620535201009e-05, + "loss": 0.0694, + "step": 6752 + }, + { + "epoch": 1.65, + "learning_rate": 1.3292479641689565e-05, + "loss": 0.0921, + "step": 6754 + }, + { + "epoch": 1.65, + "learning_rate": 1.3288753418704783e-05, + "loss": 0.0766, + "step": 6756 + }, + { + "epoch": 1.65, + "learning_rate": 1.3285026683635947e-05, + "loss": 0.0638, + "step": 6758 + }, + { + "epoch": 1.65, + "learning_rate": 1.3281299437063336e-05, + "loss": 0.0723, + "step": 6760 + }, + { + "epoch": 1.65, + "learning_rate": 1.3277571679567309e-05, + "loss": 0.0767, + "step": 6762 + }, + { + "epoch": 1.65, + "learning_rate": 1.3273843411728314e-05, + "loss": 0.0923, + "step": 6764 + }, + { + "epoch": 1.65, + "learning_rate": 1.3270114634126862e-05, + "loss": 0.0905, + "step": 6766 + }, + { + "epoch": 1.65, + "learning_rate": 1.3266385347343557e-05, + "loss": 0.0921, + "step": 6768 + }, + { + "epoch": 1.65, + "learning_rate": 1.3262655551959079e-05, + "loss": 0.0729, + "step": 6770 + }, + { + "epoch": 1.65, + "learning_rate": 1.3258925248554183e-05, + "loss": 0.0848, + "step": 6772 + }, + { + "epoch": 1.65, + "learning_rate": 1.3255194437709702e-05, + "loss": 0.067, + "step": 6774 + }, + { + "epoch": 1.65, + "learning_rate": 1.3251463120006558e-05, + "loss": 0.0976, + "step": 6776 + }, + { + "epoch": 1.65, + "learning_rate": 1.3247731296025743e-05, + "loss": 0.0812, + "step": 6778 + }, + { + "epoch": 1.65, + "learning_rate": 1.3243998966348333e-05, + "loss": 0.093, + "step": 6780 + }, + { + "epoch": 1.65, + "learning_rate": 1.3240266131555475e-05, + "loss": 0.0781, + "step": 6782 + }, + { + "epoch": 1.65, + "learning_rate": 1.3236532792228405e-05, + "loss": 0.1145, + "step": 6784 + }, + { + "epoch": 1.65, + "learning_rate": 1.323279894894843e-05, + "loss": 0.0821, + "step": 6786 + }, + { + "epoch": 1.65, + "learning_rate": 1.322906460229694e-05, + "loss": 0.0943, + "step": 6788 + }, + { + "epoch": 1.65, + "learning_rate": 1.3225329752855395e-05, + "loss": 0.115, + "step": 6790 + }, + { + "epoch": 1.65, + "learning_rate": 1.3221594401205354e-05, + "loss": 0.1004, + "step": 6792 + }, + { + "epoch": 1.66, + "learning_rate": 1.3217858547928422e-05, + "loss": 0.0497, + "step": 6794 + }, + { + "epoch": 1.66, + "learning_rate": 1.3214122193606312e-05, + "loss": 0.0828, + "step": 6796 + }, + { + "epoch": 1.66, + "learning_rate": 1.3210385338820798e-05, + "loss": 0.0554, + "step": 6798 + }, + { + "epoch": 1.66, + "learning_rate": 1.3206647984153738e-05, + "loss": 0.0439, + "step": 6800 + }, + { + "epoch": 1.66, + "learning_rate": 1.3202910130187066e-05, + "loss": 0.0718, + "step": 6802 + }, + { + "epoch": 1.66, + "learning_rate": 1.3199171777502796e-05, + "loss": 0.0786, + "step": 6804 + }, + { + "epoch": 1.66, + "learning_rate": 1.3195432926683016e-05, + "loss": 0.0559, + "step": 6806 + }, + { + "epoch": 1.66, + "learning_rate": 1.3191693578309898e-05, + "loss": 0.0879, + "step": 6808 + }, + { + "epoch": 1.66, + "learning_rate": 1.3187953732965681e-05, + "loss": 0.0609, + "step": 6810 + }, + { + "epoch": 1.66, + "learning_rate": 1.3184213391232693e-05, + "loss": 0.075, + "step": 6812 + }, + { + "epoch": 1.66, + "learning_rate": 1.3180472553693329e-05, + "loss": 0.0882, + "step": 6814 + }, + { + "epoch": 1.66, + "learning_rate": 1.317673122093007e-05, + "loss": 0.1017, + "step": 6816 + }, + { + "epoch": 1.66, + "learning_rate": 1.3172989393525469e-05, + "loss": 0.0492, + "step": 6818 + }, + { + "epoch": 1.66, + "learning_rate": 1.3169247072062158e-05, + "loss": 0.0848, + "step": 6820 + }, + { + "epoch": 1.66, + "learning_rate": 1.3165504257122838e-05, + "loss": 0.079, + "step": 6822 + }, + { + "epoch": 1.66, + "learning_rate": 1.3161760949290306e-05, + "loss": 0.0513, + "step": 6824 + }, + { + "epoch": 1.66, + "learning_rate": 1.3158017149147415e-05, + "loss": 0.0672, + "step": 6826 + }, + { + "epoch": 1.66, + "learning_rate": 1.3154272857277107e-05, + "loss": 0.069, + "step": 6828 + }, + { + "epoch": 1.66, + "learning_rate": 1.315052807426239e-05, + "loss": 0.1033, + "step": 6830 + }, + { + "epoch": 1.66, + "learning_rate": 1.3146782800686366e-05, + "loss": 0.0862, + "step": 6832 + }, + { + "epoch": 1.67, + "learning_rate": 1.3143037037132195e-05, + "loss": 0.0774, + "step": 6834 + }, + { + "epoch": 1.67, + "learning_rate": 1.3139290784183127e-05, + "loss": 0.0861, + "step": 6836 + }, + { + "epoch": 1.67, + "learning_rate": 1.3135544042422475e-05, + "loss": 0.099, + "step": 6838 + }, + { + "epoch": 1.67, + "learning_rate": 1.3131796812433641e-05, + "loss": 0.0701, + "step": 6840 + }, + { + "epoch": 1.67, + "learning_rate": 1.3128049094800095e-05, + "loss": 0.1021, + "step": 6842 + }, + { + "epoch": 1.67, + "learning_rate": 1.3124300890105388e-05, + "loss": 0.0735, + "step": 6844 + }, + { + "epoch": 1.67, + "learning_rate": 1.3120552198933138e-05, + "loss": 0.0844, + "step": 6846 + }, + { + "epoch": 1.67, + "learning_rate": 1.3116803021867054e-05, + "loss": 0.0757, + "step": 6848 + }, + { + "epoch": 1.67, + "learning_rate": 1.3113053359490901e-05, + "loss": 0.0762, + "step": 6850 + }, + { + "epoch": 1.67, + "learning_rate": 1.3109303212388538e-05, + "loss": 0.0775, + "step": 6852 + }, + { + "epoch": 1.67, + "learning_rate": 1.310555258114389e-05, + "loss": 0.065, + "step": 6854 + }, + { + "epoch": 1.67, + "learning_rate": 1.3101801466340958e-05, + "loss": 0.0842, + "step": 6856 + }, + { + "epoch": 1.67, + "learning_rate": 1.3098049868563818e-05, + "loss": 0.0922, + "step": 6858 + }, + { + "epoch": 1.67, + "learning_rate": 1.3094297788396623e-05, + "loss": 0.0922, + "step": 6860 + }, + { + "epoch": 1.67, + "learning_rate": 1.3090545226423604e-05, + "loss": 0.0887, + "step": 6862 + }, + { + "epoch": 1.67, + "learning_rate": 1.308679218322906e-05, + "loss": 0.0852, + "step": 6864 + }, + { + "epoch": 1.67, + "learning_rate": 1.3083038659397367e-05, + "loss": 0.0745, + "step": 6866 + }, + { + "epoch": 1.67, + "learning_rate": 1.307928465551298e-05, + "loss": 0.0971, + "step": 6868 + }, + { + "epoch": 1.67, + "learning_rate": 1.3075530172160428e-05, + "loss": 0.075, + "step": 6870 + }, + { + "epoch": 1.67, + "learning_rate": 1.3071775209924313e-05, + "loss": 0.0622, + "step": 6872 + }, + { + "epoch": 1.67, + "learning_rate": 1.3068019769389304e-05, + "loss": 0.0791, + "step": 6874 + }, + { + "epoch": 1.68, + "learning_rate": 1.3064263851140163e-05, + "loss": 0.06, + "step": 6876 + }, + { + "epoch": 1.68, + "learning_rate": 1.3060507455761702e-05, + "loss": 0.0656, + "step": 6878 + }, + { + "epoch": 1.68, + "learning_rate": 1.3056750583838831e-05, + "loss": 0.0621, + "step": 6880 + }, + { + "epoch": 1.68, + "learning_rate": 1.3052993235956519e-05, + "loss": 0.0763, + "step": 6882 + }, + { + "epoch": 1.68, + "learning_rate": 1.3049235412699818e-05, + "loss": 0.0601, + "step": 6884 + }, + { + "epoch": 1.68, + "learning_rate": 1.3045477114653844e-05, + "loss": 0.0569, + "step": 6886 + }, + { + "epoch": 1.68, + "learning_rate": 1.3041718342403796e-05, + "loss": 0.0619, + "step": 6888 + }, + { + "epoch": 1.68, + "learning_rate": 1.3037959096534943e-05, + "loss": 0.0899, + "step": 6890 + }, + { + "epoch": 1.68, + "learning_rate": 1.303419937763263e-05, + "loss": 0.0981, + "step": 6892 + }, + { + "epoch": 1.68, + "learning_rate": 1.3030439186282269e-05, + "loss": 0.0686, + "step": 6894 + }, + { + "epoch": 1.68, + "learning_rate": 1.3026678523069355e-05, + "loss": 0.072, + "step": 6896 + }, + { + "epoch": 1.68, + "learning_rate": 1.3022917388579455e-05, + "loss": 0.0743, + "step": 6898 + }, + { + "epoch": 1.68, + "learning_rate": 1.3019155783398199e-05, + "loss": 0.0568, + "step": 6900 + }, + { + "epoch": 1.68, + "learning_rate": 1.3015393708111299e-05, + "loss": 0.0667, + "step": 6902 + }, + { + "epoch": 1.68, + "learning_rate": 1.3011631163304548e-05, + "loss": 0.0629, + "step": 6904 + }, + { + "epoch": 1.68, + "learning_rate": 1.300786814956379e-05, + "loss": 0.0757, + "step": 6906 + }, + { + "epoch": 1.68, + "learning_rate": 1.3004104667474962e-05, + "loss": 0.066, + "step": 6908 + }, + { + "epoch": 1.68, + "learning_rate": 1.3000340717624064e-05, + "loss": 0.0786, + "step": 6910 + }, + { + "epoch": 1.68, + "learning_rate": 1.2996576300597181e-05, + "loss": 0.0548, + "step": 6912 + }, + { + "epoch": 1.68, + "learning_rate": 1.2992811416980446e-05, + "loss": 0.0579, + "step": 6914 + }, + { + "epoch": 1.69, + "learning_rate": 1.2989046067360093e-05, + "loss": 0.0517, + "step": 6916 + }, + { + "epoch": 1.69, + "learning_rate": 1.298528025232241e-05, + "loss": 0.0682, + "step": 6918 + }, + { + "epoch": 1.69, + "learning_rate": 1.2981513972453766e-05, + "loss": 0.0604, + "step": 6920 + }, + { + "epoch": 1.69, + "learning_rate": 1.2977747228340594e-05, + "loss": 0.0646, + "step": 6922 + }, + { + "epoch": 1.69, + "learning_rate": 1.2973980020569413e-05, + "loss": 0.085, + "step": 6924 + }, + { + "epoch": 1.69, + "learning_rate": 1.2970212349726798e-05, + "loss": 0.0795, + "step": 6926 + }, + { + "epoch": 1.69, + "learning_rate": 1.296644421639941e-05, + "loss": 0.062, + "step": 6928 + }, + { + "epoch": 1.69, + "learning_rate": 1.2962675621173972e-05, + "loss": 0.0899, + "step": 6930 + }, + { + "epoch": 1.69, + "learning_rate": 1.2958906564637287e-05, + "loss": 0.0663, + "step": 6932 + }, + { + "epoch": 1.69, + "learning_rate": 1.2955137047376227e-05, + "loss": 0.0601, + "step": 6934 + }, + { + "epoch": 1.69, + "learning_rate": 1.295136706997773e-05, + "loss": 0.0444, + "step": 6936 + }, + { + "epoch": 1.69, + "learning_rate": 1.2947596633028808e-05, + "loss": 0.0382, + "step": 6938 + }, + { + "epoch": 1.69, + "learning_rate": 1.2943825737116558e-05, + "loss": 0.0671, + "step": 6940 + }, + { + "epoch": 1.69, + "learning_rate": 1.2940054382828124e-05, + "loss": 0.0548, + "step": 6942 + }, + { + "epoch": 1.69, + "learning_rate": 1.2936282570750745e-05, + "loss": 0.0785, + "step": 6944 + }, + { + "epoch": 1.69, + "learning_rate": 1.2932510301471714e-05, + "loss": 0.0671, + "step": 6946 + }, + { + "epoch": 1.69, + "learning_rate": 1.2928737575578407e-05, + "loss": 0.0715, + "step": 6948 + }, + { + "epoch": 1.69, + "learning_rate": 1.2924964393658263e-05, + "loss": 0.0892, + "step": 6950 + }, + { + "epoch": 1.69, + "learning_rate": 1.2921190756298798e-05, + "loss": 0.0792, + "step": 6952 + }, + { + "epoch": 1.69, + "learning_rate": 1.2917416664087596e-05, + "loss": 0.0751, + "step": 6954 + }, + { + "epoch": 1.69, + "learning_rate": 1.2913642117612311e-05, + "loss": 0.0708, + "step": 6956 + }, + { + "epoch": 1.7, + "learning_rate": 1.2909867117460664e-05, + "loss": 0.1013, + "step": 6958 + }, + { + "epoch": 1.7, + "learning_rate": 1.2906091664220461e-05, + "loss": 0.0618, + "step": 6960 + }, + { + "epoch": 1.7, + "learning_rate": 1.2902315758479562e-05, + "loss": 0.0884, + "step": 6962 + }, + { + "epoch": 1.7, + "learning_rate": 1.289853940082591e-05, + "loss": 0.0489, + "step": 6964 + }, + { + "epoch": 1.7, + "learning_rate": 1.2894762591847502e-05, + "loss": 0.0689, + "step": 6966 + }, + { + "epoch": 1.7, + "learning_rate": 1.2890985332132432e-05, + "loss": 0.0461, + "step": 6968 + }, + { + "epoch": 1.7, + "learning_rate": 1.2887207622268831e-05, + "loss": 0.0751, + "step": 6970 + }, + { + "epoch": 1.7, + "learning_rate": 1.288342946284493e-05, + "loss": 0.0822, + "step": 6972 + }, + { + "epoch": 1.7, + "learning_rate": 1.2879650854449013e-05, + "loss": 0.0637, + "step": 6974 + }, + { + "epoch": 1.7, + "learning_rate": 1.287587179766944e-05, + "loss": 0.0525, + "step": 6976 + }, + { + "epoch": 1.7, + "learning_rate": 1.2872092293094631e-05, + "loss": 0.0633, + "step": 6978 + }, + { + "epoch": 1.7, + "learning_rate": 1.2868312341313096e-05, + "loss": 0.0648, + "step": 6980 + }, + { + "epoch": 1.7, + "learning_rate": 1.2864531942913394e-05, + "loss": 0.0624, + "step": 6982 + }, + { + "epoch": 1.7, + "learning_rate": 1.2860751098484165e-05, + "loss": 0.0744, + "step": 6984 + }, + { + "epoch": 1.7, + "learning_rate": 1.2856969808614115e-05, + "loss": 0.0659, + "step": 6986 + }, + { + "epoch": 1.7, + "learning_rate": 1.285318807389202e-05, + "loss": 0.0635, + "step": 6988 + }, + { + "epoch": 1.7, + "learning_rate": 1.2849405894906724e-05, + "loss": 0.0604, + "step": 6990 + }, + { + "epoch": 1.7, + "learning_rate": 1.2845623272247142e-05, + "loss": 0.0527, + "step": 6992 + }, + { + "epoch": 1.7, + "learning_rate": 1.2841840206502254e-05, + "loss": 0.0505, + "step": 6994 + }, + { + "epoch": 1.7, + "learning_rate": 1.2838056698261122e-05, + "loss": 0.0698, + "step": 6996 + }, + { + "epoch": 1.71, + "learning_rate": 1.2834272748112855e-05, + "loss": 0.0681, + "step": 6998 + }, + { + "epoch": 1.71, + "learning_rate": 1.283048835664665e-05, + "loss": 0.073, + "step": 7000 + }, + { + "epoch": 1.71, + "learning_rate": 1.2826703524451764e-05, + "loss": 0.0798, + "step": 7002 + }, + { + "epoch": 1.71, + "learning_rate": 1.2822918252117525e-05, + "loss": 0.0707, + "step": 7004 + }, + { + "epoch": 1.71, + "learning_rate": 1.2819132540233326e-05, + "loss": 0.0698, + "step": 7006 + }, + { + "epoch": 1.71, + "learning_rate": 1.2815346389388637e-05, + "loss": 0.0702, + "step": 7008 + }, + { + "epoch": 1.71, + "learning_rate": 1.2811559800172986e-05, + "loss": 0.0673, + "step": 7010 + }, + { + "epoch": 1.71, + "learning_rate": 1.2807772773175975e-05, + "loss": 0.0444, + "step": 7012 + }, + { + "epoch": 1.71, + "learning_rate": 1.280398530898727e-05, + "loss": 0.0665, + "step": 7014 + }, + { + "epoch": 1.71, + "learning_rate": 1.2800197408196616e-05, + "loss": 0.052, + "step": 7016 + }, + { + "epoch": 1.71, + "learning_rate": 1.279640907139381e-05, + "loss": 0.0917, + "step": 7018 + }, + { + "epoch": 1.71, + "learning_rate": 1.279262029916873e-05, + "loss": 0.0769, + "step": 7020 + }, + { + "epoch": 1.71, + "learning_rate": 1.2788831092111316e-05, + "loss": 0.0858, + "step": 7022 + }, + { + "epoch": 1.71, + "learning_rate": 1.2785041450811574e-05, + "loss": 0.0873, + "step": 7024 + }, + { + "epoch": 1.71, + "learning_rate": 1.2781251375859585e-05, + "loss": 0.0654, + "step": 7026 + }, + { + "epoch": 1.71, + "learning_rate": 1.2777460867845485e-05, + "loss": 0.0692, + "step": 7028 + }, + { + "epoch": 1.71, + "learning_rate": 1.2773669927359494e-05, + "loss": 0.0733, + "step": 7030 + }, + { + "epoch": 1.71, + "learning_rate": 1.2769878554991882e-05, + "loss": 0.0602, + "step": 7032 + }, + { + "epoch": 1.71, + "learning_rate": 1.2766086751333e-05, + "loss": 0.0904, + "step": 7034 + }, + { + "epoch": 1.71, + "learning_rate": 1.276229451697326e-05, + "loss": 0.0866, + "step": 7036 + }, + { + "epoch": 1.71, + "learning_rate": 1.275850185250314e-05, + "loss": 0.065, + "step": 7038 + }, + { + "epoch": 1.72, + "learning_rate": 1.2754708758513192e-05, + "loss": 0.0648, + "step": 7040 + }, + { + "epoch": 1.72, + "learning_rate": 1.2750915235594023e-05, + "loss": 0.0751, + "step": 7042 + }, + { + "epoch": 1.72, + "learning_rate": 1.2747121284336317e-05, + "loss": 0.0602, + "step": 7044 + }, + { + "epoch": 1.72, + "learning_rate": 1.2743326905330822e-05, + "loss": 0.0571, + "step": 7046 + }, + { + "epoch": 1.72, + "learning_rate": 1.2739532099168347e-05, + "loss": 0.0627, + "step": 7048 + }, + { + "epoch": 1.72, + "learning_rate": 1.273573686643978e-05, + "loss": 0.0493, + "step": 7050 + }, + { + "epoch": 1.72, + "learning_rate": 1.2731941207736063e-05, + "loss": 0.0748, + "step": 7052 + }, + { + "epoch": 1.72, + "learning_rate": 1.272814512364821e-05, + "loss": 0.0552, + "step": 7054 + }, + { + "epoch": 1.72, + "learning_rate": 1.2724348614767296e-05, + "loss": 0.0575, + "step": 7056 + }, + { + "epoch": 1.72, + "learning_rate": 1.2720551681684475e-05, + "loss": 0.0828, + "step": 7058 + }, + { + "epoch": 1.72, + "learning_rate": 1.2716754324990952e-05, + "loss": 0.0615, + "step": 7060 + }, + { + "epoch": 1.72, + "learning_rate": 1.2712956545278008e-05, + "loss": 0.0633, + "step": 7062 + }, + { + "epoch": 1.72, + "learning_rate": 1.2709158343136983e-05, + "loss": 0.0567, + "step": 7064 + }, + { + "epoch": 1.72, + "learning_rate": 1.2705359719159288e-05, + "loss": 0.0764, + "step": 7066 + }, + { + "epoch": 1.72, + "learning_rate": 1.2701560673936399e-05, + "loss": 0.0633, + "step": 7068 + }, + { + "epoch": 1.72, + "learning_rate": 1.2697761208059856e-05, + "loss": 0.0733, + "step": 7070 + }, + { + "epoch": 1.72, + "learning_rate": 1.2693961322121262e-05, + "loss": 0.0731, + "step": 7072 + }, + { + "epoch": 1.72, + "learning_rate": 1.2690161016712295e-05, + "loss": 0.0748, + "step": 7074 + }, + { + "epoch": 1.72, + "learning_rate": 1.268636029242468e-05, + "loss": 0.0657, + "step": 7076 + }, + { + "epoch": 1.72, + "learning_rate": 1.2682559149850229e-05, + "loss": 0.0762, + "step": 7078 + }, + { + "epoch": 1.73, + "learning_rate": 1.2678757589580805e-05, + "loss": 0.0656, + "step": 7080 + }, + { + "epoch": 1.73, + "learning_rate": 1.2674955612208343e-05, + "loss": 0.0784, + "step": 7082 + }, + { + "epoch": 1.73, + "learning_rate": 1.2671153218324834e-05, + "loss": 0.0761, + "step": 7084 + }, + { + "epoch": 1.73, + "learning_rate": 1.2667350408522347e-05, + "loss": 0.0839, + "step": 7086 + }, + { + "epoch": 1.73, + "learning_rate": 1.2663547183393006e-05, + "loss": 0.0711, + "step": 7088 + }, + { + "epoch": 1.73, + "learning_rate": 1.2659743543529e-05, + "loss": 0.0582, + "step": 7090 + }, + { + "epoch": 1.73, + "learning_rate": 1.2655939489522582e-05, + "loss": 0.0774, + "step": 7092 + }, + { + "epoch": 1.73, + "learning_rate": 1.2652135021966081e-05, + "loss": 0.0714, + "step": 7094 + }, + { + "epoch": 1.73, + "learning_rate": 1.2648330141451877e-05, + "loss": 0.0388, + "step": 7096 + }, + { + "epoch": 1.73, + "learning_rate": 1.2644524848572419e-05, + "loss": 0.0422, + "step": 7098 + }, + { + "epoch": 1.73, + "learning_rate": 1.2640719143920216e-05, + "loss": 0.0827, + "step": 7100 + }, + { + "epoch": 1.73, + "learning_rate": 1.2636913028087856e-05, + "loss": 0.0647, + "step": 7102 + }, + { + "epoch": 1.73, + "learning_rate": 1.2633106501667971e-05, + "loss": 0.0638, + "step": 7104 + }, + { + "epoch": 1.73, + "learning_rate": 1.2629299565253268e-05, + "loss": 0.061, + "step": 7106 + }, + { + "epoch": 1.73, + "learning_rate": 1.2625492219436517e-05, + "loss": 0.0665, + "step": 7108 + }, + { + "epoch": 1.73, + "learning_rate": 1.2621684464810556e-05, + "loss": 0.0547, + "step": 7110 + }, + { + "epoch": 1.73, + "learning_rate": 1.2617876301968273e-05, + "loss": 0.0812, + "step": 7112 + }, + { + "epoch": 1.73, + "learning_rate": 1.261406773150263e-05, + "loss": 0.0571, + "step": 7114 + }, + { + "epoch": 1.73, + "learning_rate": 1.2610258754006656e-05, + "loss": 0.075, + "step": 7116 + }, + { + "epoch": 1.73, + "learning_rate": 1.2606449370073435e-05, + "loss": 0.0742, + "step": 7118 + }, + { + "epoch": 1.73, + "learning_rate": 1.2602639580296113e-05, + "loss": 0.0594, + "step": 7120 + }, + { + "epoch": 1.74, + "learning_rate": 1.2598829385267908e-05, + "loss": 0.0501, + "step": 7122 + }, + { + "epoch": 1.74, + "learning_rate": 1.2595018785582096e-05, + "loss": 0.0565, + "step": 7124 + }, + { + "epoch": 1.74, + "learning_rate": 1.2591207781832015e-05, + "loss": 0.044, + "step": 7126 + }, + { + "epoch": 1.74, + "learning_rate": 1.258739637461107e-05, + "loss": 0.0562, + "step": 7128 + }, + { + "epoch": 1.74, + "learning_rate": 1.2583584564512723e-05, + "loss": 0.0556, + "step": 7130 + }, + { + "epoch": 1.74, + "learning_rate": 1.2579772352130503e-05, + "loss": 0.0764, + "step": 7132 + }, + { + "epoch": 1.74, + "learning_rate": 1.2575959738058004e-05, + "loss": 0.0508, + "step": 7134 + }, + { + "epoch": 1.74, + "learning_rate": 1.2572146722888871e-05, + "loss": 0.0608, + "step": 7136 + }, + { + "epoch": 1.74, + "learning_rate": 1.256833330721683e-05, + "loss": 0.0632, + "step": 7138 + }, + { + "epoch": 1.74, + "learning_rate": 1.2564519491635651e-05, + "loss": 0.0377, + "step": 7140 + }, + { + "epoch": 1.74, + "learning_rate": 1.256070527673918e-05, + "loss": 0.043, + "step": 7142 + }, + { + "epoch": 1.74, + "learning_rate": 1.2556890663121314e-05, + "loss": 0.0714, + "step": 7144 + }, + { + "epoch": 1.74, + "learning_rate": 1.2553075651376023e-05, + "loss": 0.0483, + "step": 7146 + }, + { + "epoch": 1.74, + "learning_rate": 1.2549260242097328e-05, + "loss": 0.0759, + "step": 7148 + }, + { + "epoch": 1.74, + "learning_rate": 1.254544443587932e-05, + "loss": 0.0351, + "step": 7150 + }, + { + "epoch": 1.74, + "learning_rate": 1.254162823331615e-05, + "loss": 0.0651, + "step": 7152 + }, + { + "epoch": 1.74, + "learning_rate": 1.253781163500203e-05, + "loss": 0.0596, + "step": 7154 + }, + { + "epoch": 1.74, + "learning_rate": 1.253399464153123e-05, + "loss": 0.0418, + "step": 7156 + }, + { + "epoch": 1.74, + "learning_rate": 1.253017725349809e-05, + "loss": 0.0747, + "step": 7158 + }, + { + "epoch": 1.74, + "learning_rate": 1.2526359471497004e-05, + "loss": 0.0731, + "step": 7160 + }, + { + "epoch": 1.75, + "learning_rate": 1.252254129612243e-05, + "loss": 0.0658, + "step": 7162 + }, + { + "epoch": 1.75, + "learning_rate": 1.2518722727968886e-05, + "loss": 0.0795, + "step": 7164 + }, + { + "epoch": 1.75, + "learning_rate": 1.2514903767630957e-05, + "loss": 0.0771, + "step": 7166 + }, + { + "epoch": 1.75, + "learning_rate": 1.2511084415703276e-05, + "loss": 0.0422, + "step": 7168 + }, + { + "epoch": 1.75, + "learning_rate": 1.2507264672780553e-05, + "loss": 0.052, + "step": 7170 + }, + { + "epoch": 1.75, + "learning_rate": 1.250344453945755e-05, + "loss": 0.0968, + "step": 7172 + }, + { + "epoch": 1.75, + "learning_rate": 1.2499624016329086e-05, + "loss": 0.057, + "step": 7174 + }, + { + "epoch": 1.75, + "learning_rate": 1.2495803103990047e-05, + "loss": 0.0577, + "step": 7176 + }, + { + "epoch": 1.75, + "learning_rate": 1.2491981803035384e-05, + "loss": 0.0737, + "step": 7178 + }, + { + "epoch": 1.75, + "learning_rate": 1.2488160114060099e-05, + "loss": 0.0838, + "step": 7180 + }, + { + "epoch": 1.75, + "learning_rate": 1.2484338037659258e-05, + "loss": 0.0762, + "step": 7182 + }, + { + "epoch": 1.75, + "learning_rate": 1.2480515574427985e-05, + "loss": 0.0482, + "step": 7184 + }, + { + "epoch": 1.75, + "learning_rate": 1.247669272496147e-05, + "loss": 0.0824, + "step": 7186 + }, + { + "epoch": 1.75, + "learning_rate": 1.2472869489854961e-05, + "loss": 0.0583, + "step": 7188 + }, + { + "epoch": 1.75, + "learning_rate": 1.2469045869703764e-05, + "loss": 0.0522, + "step": 7190 + }, + { + "epoch": 1.75, + "learning_rate": 1.2465221865103243e-05, + "loss": 0.071, + "step": 7192 + }, + { + "epoch": 1.75, + "learning_rate": 1.2461397476648828e-05, + "loss": 0.0606, + "step": 7194 + }, + { + "epoch": 1.75, + "learning_rate": 1.2457572704936004e-05, + "loss": 0.0634, + "step": 7196 + }, + { + "epoch": 1.75, + "learning_rate": 1.2453747550560317e-05, + "loss": 0.0957, + "step": 7198 + }, + { + "epoch": 1.75, + "learning_rate": 1.2449922014117376e-05, + "loss": 0.0689, + "step": 7200 + }, + { + "epoch": 1.75, + "learning_rate": 1.2446096096202843e-05, + "loss": 0.0602, + "step": 7202 + }, + { + "epoch": 1.76, + "learning_rate": 1.244226979741244e-05, + "loss": 0.0644, + "step": 7204 + }, + { + "epoch": 1.76, + "learning_rate": 1.2438443118341957e-05, + "loss": 0.0556, + "step": 7206 + }, + { + "epoch": 1.76, + "learning_rate": 1.2434616059587235e-05, + "loss": 0.0592, + "step": 7208 + }, + { + "epoch": 1.76, + "learning_rate": 1.2430788621744174e-05, + "loss": 0.0664, + "step": 7210 + }, + { + "epoch": 1.76, + "learning_rate": 1.2426960805408739e-05, + "loss": 0.0707, + "step": 7212 + }, + { + "epoch": 1.76, + "learning_rate": 1.2423132611176947e-05, + "loss": 0.0505, + "step": 7214 + }, + { + "epoch": 1.76, + "learning_rate": 1.241930403964488e-05, + "loss": 0.0452, + "step": 7216 + }, + { + "epoch": 1.76, + "learning_rate": 1.2415475091408675e-05, + "loss": 0.057, + "step": 7218 + }, + { + "epoch": 1.76, + "learning_rate": 1.2411645767064524e-05, + "loss": 0.0425, + "step": 7220 + }, + { + "epoch": 1.76, + "learning_rate": 1.2407816067208692e-05, + "loss": 0.0651, + "step": 7222 + }, + { + "epoch": 1.76, + "learning_rate": 1.2403985992437482e-05, + "loss": 0.073, + "step": 7224 + }, + { + "epoch": 1.76, + "learning_rate": 1.2400155543347272e-05, + "loss": 0.0842, + "step": 7226 + }, + { + "epoch": 1.76, + "learning_rate": 1.2396324720534491e-05, + "loss": 0.0768, + "step": 7228 + }, + { + "epoch": 1.76, + "learning_rate": 1.2392493524595629e-05, + "loss": 0.089, + "step": 7230 + }, + { + "epoch": 1.76, + "learning_rate": 1.2388661956127225e-05, + "loss": 0.0947, + "step": 7232 + }, + { + "epoch": 1.76, + "learning_rate": 1.2384830015725893e-05, + "loss": 0.0511, + "step": 7234 + }, + { + "epoch": 1.76, + "learning_rate": 1.2380997703988294e-05, + "loss": 0.0574, + "step": 7236 + }, + { + "epoch": 1.76, + "learning_rate": 1.2377165021511142e-05, + "loss": 0.0552, + "step": 7238 + }, + { + "epoch": 1.76, + "learning_rate": 1.2373331968891217e-05, + "loss": 0.059, + "step": 7240 + }, + { + "epoch": 1.76, + "learning_rate": 1.236949854672536e-05, + "loss": 0.0623, + "step": 7242 + }, + { + "epoch": 1.77, + "learning_rate": 1.2365664755610461e-05, + "loss": 0.0496, + "step": 7244 + }, + { + "epoch": 1.77, + "learning_rate": 1.236183059614347e-05, + "loss": 0.0766, + "step": 7246 + }, + { + "epoch": 1.77, + "learning_rate": 1.2357996068921392e-05, + "loss": 0.0592, + "step": 7248 + }, + { + "epoch": 1.77, + "learning_rate": 1.23541611745413e-05, + "loss": 0.0564, + "step": 7250 + }, + { + "epoch": 1.77, + "learning_rate": 1.2350325913600307e-05, + "loss": 0.0574, + "step": 7252 + }, + { + "epoch": 1.77, + "learning_rate": 1.2346490286695601e-05, + "loss": 0.0581, + "step": 7254 + }, + { + "epoch": 1.77, + "learning_rate": 1.2342654294424413e-05, + "loss": 0.0536, + "step": 7256 + }, + { + "epoch": 1.77, + "learning_rate": 1.2338817937384038e-05, + "loss": 0.054, + "step": 7258 + }, + { + "epoch": 1.77, + "learning_rate": 1.2334981216171823e-05, + "loss": 0.0912, + "step": 7260 + }, + { + "epoch": 1.77, + "learning_rate": 1.2331144131385183e-05, + "loss": 0.043, + "step": 7262 + }, + { + "epoch": 1.77, + "learning_rate": 1.2327306683621573e-05, + "loss": 0.0451, + "step": 7264 + }, + { + "epoch": 1.77, + "learning_rate": 1.2323468873478518e-05, + "loss": 0.0478, + "step": 7266 + }, + { + "epoch": 1.77, + "learning_rate": 1.2319630701553589e-05, + "loss": 0.0463, + "step": 7268 + }, + { + "epoch": 1.77, + "learning_rate": 1.2315792168444424e-05, + "loss": 0.0596, + "step": 7270 + }, + { + "epoch": 1.77, + "learning_rate": 1.2311953274748714e-05, + "loss": 0.053, + "step": 7272 + }, + { + "epoch": 1.77, + "learning_rate": 1.2308114021064197e-05, + "loss": 0.0691, + "step": 7274 + }, + { + "epoch": 1.77, + "learning_rate": 1.2304274407988676e-05, + "loss": 0.0751, + "step": 7276 + }, + { + "epoch": 1.77, + "learning_rate": 1.2300434436120017e-05, + "loss": 0.0597, + "step": 7278 + }, + { + "epoch": 1.77, + "learning_rate": 1.2296594106056118e-05, + "loss": 0.0776, + "step": 7280 + }, + { + "epoch": 1.77, + "learning_rate": 1.2292753418394958e-05, + "loss": 0.0516, + "step": 7282 + }, + { + "epoch": 1.77, + "learning_rate": 1.2288912373734558e-05, + "loss": 0.0512, + "step": 7284 + }, + { + "epoch": 1.78, + "learning_rate": 1.2285070972673004e-05, + "loss": 0.0703, + "step": 7286 + }, + { + "epoch": 1.78, + "learning_rate": 1.228122921580842e-05, + "loss": 0.0541, + "step": 7288 + }, + { + "epoch": 1.78, + "learning_rate": 1.2277387103739006e-05, + "loss": 0.048, + "step": 7290 + }, + { + "epoch": 1.78, + "learning_rate": 1.2273544637063006e-05, + "loss": 0.0695, + "step": 7292 + }, + { + "epoch": 1.78, + "learning_rate": 1.2269701816378721e-05, + "loss": 0.0592, + "step": 7294 + }, + { + "epoch": 1.78, + "learning_rate": 1.2265858642284505e-05, + "loss": 0.0675, + "step": 7296 + }, + { + "epoch": 1.78, + "learning_rate": 1.2262015115378774e-05, + "loss": 0.0494, + "step": 7298 + }, + { + "epoch": 1.78, + "learning_rate": 1.2258171236259993e-05, + "loss": 0.0649, + "step": 7300 + }, + { + "epoch": 1.78, + "learning_rate": 1.2254327005526683e-05, + "loss": 0.0679, + "step": 7302 + }, + { + "epoch": 1.78, + "learning_rate": 1.2250482423777419e-05, + "loss": 0.0423, + "step": 7304 + }, + { + "epoch": 1.78, + "learning_rate": 1.2246637491610835e-05, + "loss": 0.0632, + "step": 7306 + }, + { + "epoch": 1.78, + "learning_rate": 1.2242792209625613e-05, + "loss": 0.0735, + "step": 7308 + }, + { + "epoch": 1.78, + "learning_rate": 1.2238946578420493e-05, + "loss": 0.0536, + "step": 7310 + }, + { + "epoch": 1.78, + "learning_rate": 1.2235100598594271e-05, + "loss": 0.0353, + "step": 7312 + }, + { + "epoch": 1.78, + "learning_rate": 1.2231254270745798e-05, + "loss": 0.0562, + "step": 7314 + }, + { + "epoch": 1.78, + "learning_rate": 1.222740759547397e-05, + "loss": 0.0477, + "step": 7316 + }, + { + "epoch": 1.78, + "learning_rate": 1.2223560573377749e-05, + "loss": 0.0357, + "step": 7318 + }, + { + "epoch": 1.78, + "learning_rate": 1.2219713205056143e-05, + "loss": 0.0444, + "step": 7320 + }, + { + "epoch": 1.78, + "learning_rate": 1.2215865491108216e-05, + "loss": 0.0584, + "step": 7322 + }, + { + "epoch": 1.78, + "learning_rate": 1.221201743213309e-05, + "loss": 0.0737, + "step": 7324 + }, + { + "epoch": 1.79, + "learning_rate": 1.2208169028729934e-05, + "loss": 0.0562, + "step": 7326 + }, + { + "epoch": 1.79, + "learning_rate": 1.2204320281497977e-05, + "loss": 0.0666, + "step": 7328 + }, + { + "epoch": 1.79, + "learning_rate": 1.2200471191036496e-05, + "loss": 0.0591, + "step": 7330 + }, + { + "epoch": 1.79, + "learning_rate": 1.2196621757944822e-05, + "loss": 0.0802, + "step": 7332 + }, + { + "epoch": 1.79, + "learning_rate": 1.2192771982822346e-05, + "loss": 0.0904, + "step": 7334 + }, + { + "epoch": 1.79, + "learning_rate": 1.2188921866268503e-05, + "loss": 0.0593, + "step": 7336 + }, + { + "epoch": 1.79, + "learning_rate": 1.2185071408882792e-05, + "loss": 0.0706, + "step": 7338 + }, + { + "epoch": 1.79, + "learning_rate": 1.2181220611264748e-05, + "loss": 0.0537, + "step": 7340 + }, + { + "epoch": 1.79, + "learning_rate": 1.217736947401398e-05, + "loss": 0.0791, + "step": 7342 + }, + { + "epoch": 1.79, + "learning_rate": 1.2173517997730133e-05, + "loss": 0.0722, + "step": 7344 + }, + { + "epoch": 1.79, + "learning_rate": 1.2169666183012915e-05, + "loss": 0.0609, + "step": 7346 + }, + { + "epoch": 1.79, + "learning_rate": 1.2165814030462083e-05, + "loss": 0.0408, + "step": 7348 + }, + { + "epoch": 1.79, + "learning_rate": 1.2161961540677442e-05, + "loss": 0.0463, + "step": 7350 + }, + { + "epoch": 1.79, + "learning_rate": 1.2158108714258859e-05, + "loss": 0.0581, + "step": 7352 + }, + { + "epoch": 1.79, + "learning_rate": 1.2154255551806246e-05, + "loss": 0.051, + "step": 7354 + }, + { + "epoch": 1.79, + "learning_rate": 1.2150402053919571e-05, + "loss": 0.0894, + "step": 7356 + }, + { + "epoch": 1.79, + "learning_rate": 1.2146548221198855e-05, + "loss": 0.0349, + "step": 7358 + }, + { + "epoch": 1.79, + "learning_rate": 1.2142694054244164e-05, + "loss": 0.0638, + "step": 7360 + }, + { + "epoch": 1.79, + "learning_rate": 1.2138839553655625e-05, + "loss": 0.0809, + "step": 7362 + }, + { + "epoch": 1.79, + "learning_rate": 1.2134984720033414e-05, + "loss": 0.0432, + "step": 7364 + }, + { + "epoch": 1.79, + "learning_rate": 1.2131129553977756e-05, + "loss": 0.0694, + "step": 7366 + }, + { + "epoch": 1.8, + "learning_rate": 1.212727405608893e-05, + "loss": 0.0583, + "step": 7368 + }, + { + "epoch": 1.8, + "learning_rate": 1.212341822696727e-05, + "loss": 0.0403, + "step": 7370 + }, + { + "epoch": 1.8, + "learning_rate": 1.2119562067213148e-05, + "loss": 0.0793, + "step": 7372 + }, + { + "epoch": 1.8, + "learning_rate": 1.211570557742701e-05, + "loss": 0.0586, + "step": 7374 + }, + { + "epoch": 1.8, + "learning_rate": 1.2111848758209335e-05, + "loss": 0.067, + "step": 7376 + }, + { + "epoch": 1.8, + "learning_rate": 1.2107991610160662e-05, + "loss": 0.0443, + "step": 7378 + }, + { + "epoch": 1.8, + "learning_rate": 1.2104134133881568e-05, + "loss": 0.0592, + "step": 7380 + }, + { + "epoch": 1.8, + "learning_rate": 1.2100276329972707e-05, + "loss": 0.0659, + "step": 7382 + }, + { + "epoch": 1.8, + "learning_rate": 1.2096418199034761e-05, + "loss": 0.0783, + "step": 7384 + }, + { + "epoch": 1.8, + "learning_rate": 1.2092559741668469e-05, + "loss": 0.0566, + "step": 7386 + }, + { + "epoch": 1.8, + "learning_rate": 1.2088700958474622e-05, + "loss": 0.0612, + "step": 7388 + }, + { + "epoch": 1.8, + "learning_rate": 1.2084841850054067e-05, + "loss": 0.0438, + "step": 7390 + }, + { + "epoch": 1.8, + "learning_rate": 1.2080982417007694e-05, + "loss": 0.0786, + "step": 7392 + }, + { + "epoch": 1.8, + "learning_rate": 1.2077122659936446e-05, + "loss": 0.0555, + "step": 7394 + }, + { + "epoch": 1.8, + "learning_rate": 1.2073262579441317e-05, + "loss": 0.0824, + "step": 7396 + }, + { + "epoch": 1.8, + "learning_rate": 1.2069402176123351e-05, + "loss": 0.0639, + "step": 7398 + }, + { + "epoch": 1.8, + "learning_rate": 1.206554145058364e-05, + "loss": 0.0706, + "step": 7400 + }, + { + "epoch": 1.8, + "learning_rate": 1.2061680403423333e-05, + "loss": 0.0571, + "step": 7402 + }, + { + "epoch": 1.8, + "learning_rate": 1.2057819035243622e-05, + "loss": 0.0457, + "step": 7404 + }, + { + "epoch": 1.8, + "learning_rate": 1.2053957346645751e-05, + "loss": 0.0595, + "step": 7406 + }, + { + "epoch": 1.81, + "learning_rate": 1.2050095338231015e-05, + "loss": 0.0435, + "step": 7408 + }, + { + "epoch": 1.81, + "learning_rate": 1.2046233010600758e-05, + "loss": 0.0591, + "step": 7410 + }, + { + "epoch": 1.81, + "learning_rate": 1.2042370364356375e-05, + "loss": 0.0569, + "step": 7412 + }, + { + "epoch": 1.81, + "learning_rate": 1.203850740009931e-05, + "loss": 0.0637, + "step": 7414 + }, + { + "epoch": 1.81, + "learning_rate": 1.2034644118431054e-05, + "loss": 0.0537, + "step": 7416 + }, + { + "epoch": 1.81, + "learning_rate": 1.2030780519953149e-05, + "loss": 0.0588, + "step": 7418 + }, + { + "epoch": 1.81, + "learning_rate": 1.2026916605267191e-05, + "loss": 0.0919, + "step": 7420 + }, + { + "epoch": 1.81, + "learning_rate": 1.202305237497482e-05, + "loss": 0.0683, + "step": 7422 + }, + { + "epoch": 1.81, + "learning_rate": 1.201918782967772e-05, + "loss": 0.0617, + "step": 7424 + }, + { + "epoch": 1.81, + "learning_rate": 1.2015322969977638e-05, + "loss": 0.0796, + "step": 7426 + }, + { + "epoch": 1.81, + "learning_rate": 1.2011457796476359e-05, + "loss": 0.0273, + "step": 7428 + }, + { + "epoch": 1.81, + "learning_rate": 1.200759230977572e-05, + "loss": 0.0583, + "step": 7430 + }, + { + "epoch": 1.81, + "learning_rate": 1.2003726510477605e-05, + "loss": 0.0635, + "step": 7432 + }, + { + "epoch": 1.81, + "learning_rate": 1.1999860399183954e-05, + "loss": 0.0606, + "step": 7434 + }, + { + "epoch": 1.81, + "learning_rate": 1.1995993976496742e-05, + "loss": 0.0623, + "step": 7436 + }, + { + "epoch": 1.81, + "learning_rate": 1.1992127243018009e-05, + "loss": 0.0387, + "step": 7438 + }, + { + "epoch": 1.81, + "learning_rate": 1.198826019934983e-05, + "loss": 0.0544, + "step": 7440 + }, + { + "epoch": 1.81, + "learning_rate": 1.1984392846094335e-05, + "loss": 0.0673, + "step": 7442 + }, + { + "epoch": 1.81, + "learning_rate": 1.1980525183853697e-05, + "loss": 0.056, + "step": 7444 + }, + { + "epoch": 1.81, + "learning_rate": 1.1976657213230147e-05, + "loss": 0.0527, + "step": 7446 + }, + { + "epoch": 1.81, + "learning_rate": 1.1972788934825953e-05, + "loss": 0.0665, + "step": 7448 + }, + { + "epoch": 1.82, + "learning_rate": 1.1968920349243435e-05, + "loss": 0.0574, + "step": 7450 + }, + { + "epoch": 1.82, + "learning_rate": 1.1965051457084965e-05, + "loss": 0.046, + "step": 7452 + }, + { + "epoch": 1.82, + "learning_rate": 1.1961182258952958e-05, + "loss": 0.082, + "step": 7454 + }, + { + "epoch": 1.82, + "learning_rate": 1.1957312755449874e-05, + "loss": 0.0669, + "step": 7456 + }, + { + "epoch": 1.82, + "learning_rate": 1.1953442947178228e-05, + "loss": 0.0371, + "step": 7458 + }, + { + "epoch": 1.82, + "learning_rate": 1.194957283474058e-05, + "loss": 0.0458, + "step": 7460 + }, + { + "epoch": 1.82, + "learning_rate": 1.1945702418739533e-05, + "loss": 0.0513, + "step": 7462 + }, + { + "epoch": 1.82, + "learning_rate": 1.1941831699777738e-05, + "loss": 0.0445, + "step": 7464 + }, + { + "epoch": 1.82, + "learning_rate": 1.1937960678457902e-05, + "loss": 0.0513, + "step": 7466 + }, + { + "epoch": 1.82, + "learning_rate": 1.193408935538277e-05, + "loss": 0.0386, + "step": 7468 + }, + { + "epoch": 1.82, + "learning_rate": 1.1930217731155133e-05, + "loss": 0.0375, + "step": 7470 + }, + { + "epoch": 1.82, + "learning_rate": 1.1926345806377837e-05, + "loss": 0.0555, + "step": 7472 + }, + { + "epoch": 1.82, + "learning_rate": 1.192247358165377e-05, + "loss": 0.0475, + "step": 7474 + }, + { + "epoch": 1.82, + "learning_rate": 1.1918601057585866e-05, + "loss": 0.0672, + "step": 7476 + }, + { + "epoch": 1.82, + "learning_rate": 1.1914728234777104e-05, + "loss": 0.0506, + "step": 7478 + }, + { + "epoch": 1.82, + "learning_rate": 1.1910855113830515e-05, + "loss": 0.0644, + "step": 7480 + }, + { + "epoch": 1.82, + "learning_rate": 1.1906981695349178e-05, + "loss": 0.0561, + "step": 7482 + }, + { + "epoch": 1.82, + "learning_rate": 1.1903107979936203e-05, + "loss": 0.0377, + "step": 7484 + }, + { + "epoch": 1.82, + "learning_rate": 1.1899233968194766e-05, + "loss": 0.0492, + "step": 7486 + }, + { + "epoch": 1.82, + "learning_rate": 1.1895359660728073e-05, + "loss": 0.0442, + "step": 7488 + }, + { + "epoch": 1.83, + "learning_rate": 1.1891485058139396e-05, + "loss": 0.082, + "step": 7490 + }, + { + "epoch": 1.83, + "learning_rate": 1.1887610161032026e-05, + "loss": 0.0474, + "step": 7492 + }, + { + "epoch": 1.83, + "learning_rate": 1.1883734970009321e-05, + "loss": 0.062, + "step": 7494 + }, + { + "epoch": 1.83, + "learning_rate": 1.1879859485674678e-05, + "loss": 0.0484, + "step": 7496 + }, + { + "epoch": 1.83, + "learning_rate": 1.1875983708631538e-05, + "loss": 0.067, + "step": 7498 + }, + { + "epoch": 1.83, + "learning_rate": 1.1872107639483389e-05, + "loss": 0.0289, + "step": 7500 + }, + { + "epoch": 1.83, + "learning_rate": 1.1868231278833764e-05, + "loss": 0.0618, + "step": 7502 + }, + { + "epoch": 1.83, + "learning_rate": 1.1864354627286245e-05, + "loss": 0.0663, + "step": 7504 + }, + { + "epoch": 1.83, + "learning_rate": 1.1860477685444456e-05, + "loss": 0.0584, + "step": 7506 + }, + { + "epoch": 1.83, + "learning_rate": 1.1856600453912062e-05, + "loss": 0.0609, + "step": 7508 + }, + { + "epoch": 1.83, + "learning_rate": 1.1852722933292781e-05, + "loss": 0.054, + "step": 7510 + }, + { + "epoch": 1.83, + "learning_rate": 1.1848845124190374e-05, + "loss": 0.06, + "step": 7512 + }, + { + "epoch": 1.83, + "learning_rate": 1.1844967027208642e-05, + "loss": 0.0598, + "step": 7514 + }, + { + "epoch": 1.83, + "learning_rate": 1.1841088642951434e-05, + "loss": 0.0688, + "step": 7516 + }, + { + "epoch": 1.83, + "learning_rate": 1.1837209972022652e-05, + "loss": 0.0468, + "step": 7518 + }, + { + "epoch": 1.83, + "learning_rate": 1.1833331015026224e-05, + "loss": 0.0393, + "step": 7520 + }, + { + "epoch": 1.83, + "learning_rate": 1.182945177256614e-05, + "loss": 0.0479, + "step": 7522 + }, + { + "epoch": 1.83, + "learning_rate": 1.1825572245246426e-05, + "loss": 0.0501, + "step": 7524 + }, + { + "epoch": 1.83, + "learning_rate": 1.1821692433671154e-05, + "loss": 0.046, + "step": 7526 + }, + { + "epoch": 1.83, + "learning_rate": 1.1817812338444437e-05, + "loss": 0.0462, + "step": 7528 + }, + { + "epoch": 1.83, + "learning_rate": 1.1813931960170442e-05, + "loss": 0.0494, + "step": 7530 + }, + { + "epoch": 1.84, + "learning_rate": 1.181005129945337e-05, + "loss": 0.069, + "step": 7532 + }, + { + "epoch": 1.84, + "learning_rate": 1.1806170356897471e-05, + "loss": 0.0546, + "step": 7534 + }, + { + "epoch": 1.84, + "learning_rate": 1.1802289133107032e-05, + "loss": 0.0531, + "step": 7536 + }, + { + "epoch": 1.84, + "learning_rate": 1.1798407628686396e-05, + "loss": 0.0584, + "step": 7538 + }, + { + "epoch": 1.84, + "learning_rate": 1.1794525844239941e-05, + "loss": 0.0441, + "step": 7540 + }, + { + "epoch": 1.84, + "learning_rate": 1.179064378037209e-05, + "loss": 0.0678, + "step": 7542 + }, + { + "epoch": 1.84, + "learning_rate": 1.1786761437687311e-05, + "loss": 0.0746, + "step": 7544 + }, + { + "epoch": 1.84, + "learning_rate": 1.1782878816790114e-05, + "loss": 0.041, + "step": 7546 + }, + { + "epoch": 1.84, + "learning_rate": 1.1778995918285047e-05, + "loss": 0.0991, + "step": 7548 + }, + { + "epoch": 1.84, + "learning_rate": 1.1775112742776715e-05, + "loss": 0.0414, + "step": 7550 + }, + { + "epoch": 1.84, + "learning_rate": 1.1771229290869757e-05, + "loss": 0.0695, + "step": 7552 + }, + { + "epoch": 1.84, + "learning_rate": 1.1767345563168852e-05, + "loss": 0.0457, + "step": 7554 + }, + { + "epoch": 1.84, + "learning_rate": 1.176346156027873e-05, + "loss": 0.0418, + "step": 7556 + }, + { + "epoch": 1.84, + "learning_rate": 1.1759577282804157e-05, + "loss": 0.0333, + "step": 7558 + }, + { + "epoch": 1.84, + "learning_rate": 1.1755692731349947e-05, + "loss": 0.0486, + "step": 7560 + }, + { + "epoch": 1.84, + "learning_rate": 1.1751807906520956e-05, + "loss": 0.0484, + "step": 7562 + }, + { + "epoch": 1.84, + "learning_rate": 1.1747922808922074e-05, + "loss": 0.0518, + "step": 7564 + }, + { + "epoch": 1.84, + "learning_rate": 1.1744037439158247e-05, + "loss": 0.0293, + "step": 7566 + }, + { + "epoch": 1.84, + "learning_rate": 1.1740151797834459e-05, + "loss": 0.0558, + "step": 7568 + }, + { + "epoch": 1.84, + "learning_rate": 1.1736265885555722e-05, + "loss": 0.0539, + "step": 7570 + }, + { + "epoch": 1.85, + "learning_rate": 1.1732379702927114e-05, + "loss": 0.0306, + "step": 7572 + }, + { + "epoch": 1.85, + "learning_rate": 1.172849325055374e-05, + "loss": 0.0461, + "step": 7574 + }, + { + "epoch": 1.85, + "learning_rate": 1.1724606529040749e-05, + "loss": 0.0704, + "step": 7576 + }, + { + "epoch": 1.85, + "learning_rate": 1.1720719538993332e-05, + "loss": 0.0465, + "step": 7578 + }, + { + "epoch": 1.85, + "learning_rate": 1.1716832281016726e-05, + "loss": 0.0553, + "step": 7580 + }, + { + "epoch": 1.85, + "learning_rate": 1.1712944755716207e-05, + "loss": 0.0388, + "step": 7582 + }, + { + "epoch": 1.85, + "learning_rate": 1.1709056963697091e-05, + "loss": 0.0304, + "step": 7584 + }, + { + "epoch": 1.85, + "learning_rate": 1.1705168905564737e-05, + "loss": 0.0426, + "step": 7586 + }, + { + "epoch": 1.85, + "learning_rate": 1.1701280581924546e-05, + "loss": 0.0628, + "step": 7588 + }, + { + "epoch": 1.85, + "learning_rate": 1.1697391993381956e-05, + "loss": 0.0755, + "step": 7590 + }, + { + "epoch": 1.85, + "learning_rate": 1.169350314054246e-05, + "loss": 0.0762, + "step": 7592 + }, + { + "epoch": 1.85, + "learning_rate": 1.1689614024011569e-05, + "loss": 0.0569, + "step": 7594 + }, + { + "epoch": 1.85, + "learning_rate": 1.1685724644394858e-05, + "loss": 0.0615, + "step": 7596 + }, + { + "epoch": 1.85, + "learning_rate": 1.1681835002297927e-05, + "loss": 0.0571, + "step": 7598 + }, + { + "epoch": 1.85, + "learning_rate": 1.1677945098326429e-05, + "loss": 0.0297, + "step": 7600 + }, + { + "epoch": 1.85, + "learning_rate": 1.1674054933086048e-05, + "loss": 0.0459, + "step": 7602 + }, + { + "epoch": 1.85, + "learning_rate": 1.167016450718251e-05, + "loss": 0.0535, + "step": 7604 + }, + { + "epoch": 1.85, + "learning_rate": 1.1666273821221588e-05, + "loss": 0.0368, + "step": 7606 + }, + { + "epoch": 1.85, + "learning_rate": 1.1662382875809094e-05, + "loss": 0.0658, + "step": 7608 + }, + { + "epoch": 1.85, + "learning_rate": 1.165849167155087e-05, + "loss": 0.0469, + "step": 7610 + }, + { + "epoch": 1.85, + "learning_rate": 1.1654600209052815e-05, + "loss": 0.0301, + "step": 7612 + }, + { + "epoch": 1.86, + "learning_rate": 1.1650708488920851e-05, + "loss": 0.0431, + "step": 7614 + }, + { + "epoch": 1.86, + "learning_rate": 1.1646816511760956e-05, + "loss": 0.0431, + "step": 7616 + }, + { + "epoch": 1.86, + "learning_rate": 1.1642924278179137e-05, + "loss": 0.0605, + "step": 7618 + }, + { + "epoch": 1.86, + "learning_rate": 1.1639031788781446e-05, + "loss": 0.0425, + "step": 7620 + }, + { + "epoch": 1.86, + "learning_rate": 1.1635139044173969e-05, + "loss": 0.0449, + "step": 7622 + }, + { + "epoch": 1.86, + "learning_rate": 1.1631246044962846e-05, + "loss": 0.0392, + "step": 7624 + }, + { + "epoch": 1.86, + "learning_rate": 1.1627352791754232e-05, + "loss": 0.0475, + "step": 7626 + }, + { + "epoch": 1.86, + "learning_rate": 1.1623459285154347e-05, + "loss": 0.0607, + "step": 7628 + }, + { + "epoch": 1.86, + "learning_rate": 1.1619565525769441e-05, + "loss": 0.0473, + "step": 7630 + }, + { + "epoch": 1.86, + "learning_rate": 1.1615671514205798e-05, + "loss": 0.0601, + "step": 7632 + }, + { + "epoch": 1.86, + "learning_rate": 1.161177725106974e-05, + "loss": 0.0459, + "step": 7634 + }, + { + "epoch": 1.86, + "learning_rate": 1.1607882736967643e-05, + "loss": 0.0704, + "step": 7636 + }, + { + "epoch": 1.86, + "learning_rate": 1.160398797250591e-05, + "loss": 0.0571, + "step": 7638 + }, + { + "epoch": 1.86, + "learning_rate": 1.1600092958290985e-05, + "loss": 0.0279, + "step": 7640 + }, + { + "epoch": 1.86, + "learning_rate": 1.1596197694929348e-05, + "loss": 0.0464, + "step": 7642 + }, + { + "epoch": 1.86, + "learning_rate": 1.1592302183027526e-05, + "loss": 0.0503, + "step": 7644 + }, + { + "epoch": 1.86, + "learning_rate": 1.1588406423192077e-05, + "loss": 0.0279, + "step": 7646 + }, + { + "epoch": 1.86, + "learning_rate": 1.1584510416029607e-05, + "loss": 0.0315, + "step": 7648 + }, + { + "epoch": 1.86, + "learning_rate": 1.1580614162146742e-05, + "loss": 0.0345, + "step": 7650 + }, + { + "epoch": 1.86, + "learning_rate": 1.157671766215017e-05, + "loss": 0.0339, + "step": 7652 + }, + { + "epoch": 1.87, + "learning_rate": 1.15728209166466e-05, + "loss": 0.0618, + "step": 7654 + }, + { + "epoch": 1.87, + "learning_rate": 1.1568923926242786e-05, + "loss": 0.0564, + "step": 7656 + }, + { + "epoch": 1.87, + "learning_rate": 1.156502669154552e-05, + "loss": 0.068, + "step": 7658 + }, + { + "epoch": 1.87, + "learning_rate": 1.1561129213161633e-05, + "loss": 0.0486, + "step": 7660 + }, + { + "epoch": 1.87, + "learning_rate": 1.1557231491697987e-05, + "loss": 0.0585, + "step": 7662 + }, + { + "epoch": 1.87, + "learning_rate": 1.1553333527761493e-05, + "loss": 0.0451, + "step": 7664 + }, + { + "epoch": 1.87, + "learning_rate": 1.1549435321959087e-05, + "loss": 0.0424, + "step": 7666 + }, + { + "epoch": 1.87, + "learning_rate": 1.1545536874897758e-05, + "loss": 0.0484, + "step": 7668 + }, + { + "epoch": 1.87, + "learning_rate": 1.1541638187184514e-05, + "loss": 0.0546, + "step": 7670 + }, + { + "epoch": 1.87, + "learning_rate": 1.153773925942642e-05, + "loss": 0.0494, + "step": 7672 + }, + { + "epoch": 1.87, + "learning_rate": 1.1533840092230564e-05, + "loss": 0.0472, + "step": 7674 + }, + { + "epoch": 1.87, + "learning_rate": 1.1529940686204078e-05, + "loss": 0.051, + "step": 7676 + }, + { + "epoch": 1.87, + "learning_rate": 1.1526041041954126e-05, + "loss": 0.0327, + "step": 7678 + }, + { + "epoch": 1.87, + "learning_rate": 1.1522141160087916e-05, + "loss": 0.0485, + "step": 7680 + }, + { + "epoch": 1.87, + "learning_rate": 1.1518241041212686e-05, + "loss": 0.0634, + "step": 7682 + }, + { + "epoch": 1.87, + "learning_rate": 1.1514340685935719e-05, + "loss": 0.0518, + "step": 7684 + }, + { + "epoch": 1.87, + "learning_rate": 1.1510440094864328e-05, + "loss": 0.0507, + "step": 7686 + }, + { + "epoch": 1.87, + "learning_rate": 1.1506539268605863e-05, + "loss": 0.0374, + "step": 7688 + }, + { + "epoch": 1.87, + "learning_rate": 1.1502638207767712e-05, + "loss": 0.0627, + "step": 7690 + }, + { + "epoch": 1.87, + "learning_rate": 1.1498736912957304e-05, + "loss": 0.0582, + "step": 7692 + }, + { + "epoch": 1.87, + "learning_rate": 1.1494835384782099e-05, + "loss": 0.0506, + "step": 7694 + }, + { + "epoch": 1.88, + "learning_rate": 1.1490933623849594e-05, + "loss": 0.0514, + "step": 7696 + }, + { + "epoch": 1.88, + "learning_rate": 1.148703163076732e-05, + "loss": 0.038, + "step": 7698 + }, + { + "epoch": 1.88, + "learning_rate": 1.1483129406142855e-05, + "loss": 0.0517, + "step": 7700 + }, + { + "epoch": 1.88, + "learning_rate": 1.1479226950583797e-05, + "loss": 0.045, + "step": 7702 + }, + { + "epoch": 1.88, + "learning_rate": 1.1475324264697793e-05, + "loss": 0.0536, + "step": 7704 + }, + { + "epoch": 1.88, + "learning_rate": 1.1471421349092517e-05, + "loss": 0.0401, + "step": 7706 + }, + { + "epoch": 1.88, + "learning_rate": 1.1467518204375692e-05, + "loss": 0.0395, + "step": 7708 + }, + { + "epoch": 1.88, + "learning_rate": 1.1463614831155054e-05, + "loss": 0.0255, + "step": 7710 + }, + { + "epoch": 1.88, + "learning_rate": 1.14597112300384e-05, + "loss": 0.0377, + "step": 7712 + }, + { + "epoch": 1.88, + "learning_rate": 1.145580740163354e-05, + "loss": 0.0363, + "step": 7714 + }, + { + "epoch": 1.88, + "learning_rate": 1.1451903346548343e-05, + "loss": 0.0424, + "step": 7716 + }, + { + "epoch": 1.88, + "learning_rate": 1.1447999065390686e-05, + "loss": 0.0533, + "step": 7718 + }, + { + "epoch": 1.88, + "learning_rate": 1.1444094558768506e-05, + "loss": 0.0512, + "step": 7720 + }, + { + "epoch": 1.88, + "learning_rate": 1.144018982728976e-05, + "loss": 0.0423, + "step": 7722 + }, + { + "epoch": 1.88, + "learning_rate": 1.1436284871562446e-05, + "loss": 0.0439, + "step": 7724 + }, + { + "epoch": 1.88, + "learning_rate": 1.1432379692194593e-05, + "loss": 0.0202, + "step": 7726 + }, + { + "epoch": 1.88, + "learning_rate": 1.1428474289794269e-05, + "loss": 0.0439, + "step": 7728 + }, + { + "epoch": 1.88, + "learning_rate": 1.1424568664969578e-05, + "loss": 0.0575, + "step": 7730 + }, + { + "epoch": 1.88, + "learning_rate": 1.1420662818328649e-05, + "loss": 0.0481, + "step": 7732 + }, + { + "epoch": 1.88, + "learning_rate": 1.1416756750479657e-05, + "loss": 0.0439, + "step": 7734 + }, + { + "epoch": 1.88, + "learning_rate": 1.1412850462030806e-05, + "loss": 0.0269, + "step": 7736 + }, + { + "epoch": 1.89, + "learning_rate": 1.1408943953590335e-05, + "loss": 0.0288, + "step": 7738 + }, + { + "epoch": 1.89, + "learning_rate": 1.1405037225766518e-05, + "loss": 0.0515, + "step": 7740 + }, + { + "epoch": 1.89, + "learning_rate": 1.1401130279167655e-05, + "loss": 0.0356, + "step": 7742 + }, + { + "epoch": 1.89, + "learning_rate": 1.13972231144021e-05, + "loss": 0.0463, + "step": 7744 + }, + { + "epoch": 1.89, + "learning_rate": 1.1393315732078219e-05, + "loss": 0.0463, + "step": 7746 + }, + { + "epoch": 1.89, + "learning_rate": 1.1389408132804426e-05, + "loss": 0.036, + "step": 7748 + }, + { + "epoch": 1.89, + "learning_rate": 1.1385500317189163e-05, + "loss": 0.0508, + "step": 7750 + }, + { + "epoch": 1.89, + "learning_rate": 1.1381592285840903e-05, + "loss": 0.0498, + "step": 7752 + }, + { + "epoch": 1.89, + "learning_rate": 1.137768403936816e-05, + "loss": 0.0297, + "step": 7754 + }, + { + "epoch": 1.89, + "learning_rate": 1.137377557837948e-05, + "loss": 0.0464, + "step": 7756 + }, + { + "epoch": 1.89, + "learning_rate": 1.1369866903483437e-05, + "loss": 0.052, + "step": 7758 + }, + { + "epoch": 1.89, + "learning_rate": 1.136595801528864e-05, + "loss": 0.0511, + "step": 7760 + }, + { + "epoch": 1.89, + "learning_rate": 1.1362048914403736e-05, + "loss": 0.0447, + "step": 7762 + }, + { + "epoch": 1.89, + "learning_rate": 1.1358139601437402e-05, + "loss": 0.0638, + "step": 7764 + }, + { + "epoch": 1.89, + "learning_rate": 1.1354230076998347e-05, + "loss": 0.0534, + "step": 7766 + }, + { + "epoch": 1.89, + "learning_rate": 1.1350320341695314e-05, + "loss": 0.0392, + "step": 7768 + }, + { + "epoch": 1.89, + "learning_rate": 1.1346410396137075e-05, + "loss": 0.0487, + "step": 7770 + }, + { + "epoch": 1.89, + "learning_rate": 1.1342500240932445e-05, + "loss": 0.0457, + "step": 7772 + }, + { + "epoch": 1.89, + "learning_rate": 1.1338589876690261e-05, + "loss": 0.0435, + "step": 7774 + }, + { + "epoch": 1.89, + "learning_rate": 1.1334679304019397e-05, + "loss": 0.0728, + "step": 7776 + }, + { + "epoch": 1.9, + "learning_rate": 1.1330768523528761e-05, + "loss": 0.0512, + "step": 7778 + }, + { + "epoch": 1.9, + "learning_rate": 1.132685753582729e-05, + "loss": 0.0334, + "step": 7780 + }, + { + "epoch": 1.9, + "learning_rate": 1.132294634152395e-05, + "loss": 0.055, + "step": 7782 + }, + { + "epoch": 1.9, + "learning_rate": 1.1319034941227754e-05, + "loss": 0.052, + "step": 7784 + }, + { + "epoch": 1.9, + "learning_rate": 1.131512333554773e-05, + "loss": 0.0687, + "step": 7786 + }, + { + "epoch": 1.9, + "learning_rate": 1.1311211525092947e-05, + "loss": 0.0563, + "step": 7788 + }, + { + "epoch": 1.9, + "learning_rate": 1.13072995104725e-05, + "loss": 0.0289, + "step": 7790 + }, + { + "epoch": 1.9, + "learning_rate": 1.1303387292295524e-05, + "loss": 0.0318, + "step": 7792 + }, + { + "epoch": 1.9, + "learning_rate": 1.1299474871171184e-05, + "loss": 0.0357, + "step": 7794 + }, + { + "epoch": 1.9, + "learning_rate": 1.1295562247708665e-05, + "loss": 0.0283, + "step": 7796 + }, + { + "epoch": 1.9, + "learning_rate": 1.1291649422517196e-05, + "loss": 0.0341, + "step": 7798 + }, + { + "epoch": 1.9, + "learning_rate": 1.1287736396206043e-05, + "loss": 0.0473, + "step": 7800 + }, + { + "epoch": 1.9, + "learning_rate": 1.128382316938448e-05, + "loss": 0.0694, + "step": 7802 + }, + { + "epoch": 1.9, + "learning_rate": 1.1279909742661834e-05, + "loss": 0.0444, + "step": 7804 + }, + { + "epoch": 1.9, + "learning_rate": 1.1275996116647454e-05, + "loss": 0.0306, + "step": 7806 + }, + { + "epoch": 1.9, + "learning_rate": 1.1272082291950723e-05, + "loss": 0.0405, + "step": 7808 + }, + { + "epoch": 1.9, + "learning_rate": 1.1268168269181047e-05, + "loss": 0.051, + "step": 7810 + }, + { + "epoch": 1.9, + "learning_rate": 1.126425404894788e-05, + "loss": 0.051, + "step": 7812 + }, + { + "epoch": 1.9, + "learning_rate": 1.1260339631860688e-05, + "loss": 0.0346, + "step": 7814 + }, + { + "epoch": 1.9, + "learning_rate": 1.125642501852898e-05, + "loss": 0.0586, + "step": 7816 + }, + { + "epoch": 1.9, + "learning_rate": 1.1252510209562284e-05, + "loss": 0.0491, + "step": 7818 + }, + { + "epoch": 1.91, + "learning_rate": 1.1248595205570174e-05, + "loss": 0.0461, + "step": 7820 + }, + { + "epoch": 1.91, + "learning_rate": 1.1244680007162246e-05, + "loss": 0.0478, + "step": 7822 + }, + { + "epoch": 1.91, + "learning_rate": 1.124076461494812e-05, + "loss": 0.0301, + "step": 7824 + }, + { + "epoch": 1.91, + "learning_rate": 1.1236849029537453e-05, + "loss": 0.0567, + "step": 7826 + }, + { + "epoch": 1.91, + "learning_rate": 1.1232933251539941e-05, + "loss": 0.0397, + "step": 7828 + }, + { + "epoch": 1.91, + "learning_rate": 1.1229017281565288e-05, + "loss": 0.0522, + "step": 7830 + }, + { + "epoch": 1.91, + "learning_rate": 1.122510112022325e-05, + "loss": 0.0357, + "step": 7832 + }, + { + "epoch": 1.91, + "learning_rate": 1.1221184768123598e-05, + "loss": 0.0452, + "step": 7834 + }, + { + "epoch": 1.91, + "learning_rate": 1.121726822587614e-05, + "loss": 0.0441, + "step": 7836 + }, + { + "epoch": 1.91, + "learning_rate": 1.121335149409071e-05, + "loss": 0.0346, + "step": 7838 + }, + { + "epoch": 1.91, + "learning_rate": 1.1209434573377176e-05, + "loss": 0.0482, + "step": 7840 + }, + { + "epoch": 1.91, + "learning_rate": 1.120551746434543e-05, + "loss": 0.0434, + "step": 7842 + }, + { + "epoch": 1.91, + "learning_rate": 1.1201600167605397e-05, + "loss": 0.0564, + "step": 7844 + }, + { + "epoch": 1.91, + "learning_rate": 1.1197682683767028e-05, + "loss": 0.0537, + "step": 7846 + }, + { + "epoch": 1.91, + "learning_rate": 1.119376501344031e-05, + "loss": 0.0479, + "step": 7848 + }, + { + "epoch": 1.91, + "learning_rate": 1.1189847157235249e-05, + "loss": 0.0449, + "step": 7850 + }, + { + "epoch": 1.91, + "learning_rate": 1.1185929115761889e-05, + "loss": 0.0575, + "step": 7852 + }, + { + "epoch": 1.91, + "learning_rate": 1.1182010889630295e-05, + "loss": 0.0479, + "step": 7854 + }, + { + "epoch": 1.91, + "learning_rate": 1.117809247945057e-05, + "loss": 0.0384, + "step": 7856 + }, + { + "epoch": 1.91, + "learning_rate": 1.1174173885832835e-05, + "loss": 0.0481, + "step": 7858 + }, + { + "epoch": 1.92, + "learning_rate": 1.1170255109387248e-05, + "loss": 0.0357, + "step": 7860 + }, + { + "epoch": 1.92, + "learning_rate": 1.1166336150723992e-05, + "loss": 0.0313, + "step": 7862 + }, + { + "epoch": 1.92, + "learning_rate": 1.1162417010453281e-05, + "loss": 0.0547, + "step": 7864 + }, + { + "epoch": 1.92, + "learning_rate": 1.1158497689185347e-05, + "loss": 0.0415, + "step": 7866 + }, + { + "epoch": 1.92, + "learning_rate": 1.115457818753047e-05, + "loss": 0.0375, + "step": 7868 + }, + { + "epoch": 1.92, + "learning_rate": 1.1150658506098938e-05, + "loss": 0.0392, + "step": 7870 + }, + { + "epoch": 1.92, + "learning_rate": 1.114673864550108e-05, + "loss": 0.0445, + "step": 7872 + }, + { + "epoch": 1.92, + "learning_rate": 1.1142818606347243e-05, + "loss": 0.0353, + "step": 7874 + }, + { + "epoch": 1.92, + "learning_rate": 1.1138898389247812e-05, + "loss": 0.0281, + "step": 7876 + }, + { + "epoch": 1.92, + "learning_rate": 1.1134977994813191e-05, + "loss": 0.0302, + "step": 7878 + }, + { + "epoch": 1.92, + "learning_rate": 1.1131057423653822e-05, + "loss": 0.0335, + "step": 7880 + }, + { + "epoch": 1.92, + "learning_rate": 1.112713667638016e-05, + "loss": 0.0299, + "step": 7882 + }, + { + "epoch": 1.92, + "learning_rate": 1.11232157536027e-05, + "loss": 0.0306, + "step": 7884 + }, + { + "epoch": 1.92, + "learning_rate": 1.1119294655931956e-05, + "loss": 0.0358, + "step": 7886 + }, + { + "epoch": 1.92, + "learning_rate": 1.1115373383978478e-05, + "loss": 0.0605, + "step": 7888 + }, + { + "epoch": 1.92, + "learning_rate": 1.1111451938352833e-05, + "loss": 0.0521, + "step": 7890 + }, + { + "epoch": 1.92, + "learning_rate": 1.1107530319665625e-05, + "loss": 0.0637, + "step": 7892 + }, + { + "epoch": 1.92, + "learning_rate": 1.1103608528527475e-05, + "loss": 0.0534, + "step": 7894 + }, + { + "epoch": 1.92, + "learning_rate": 1.109968656554904e-05, + "loss": 0.0549, + "step": 7896 + }, + { + "epoch": 1.92, + "learning_rate": 1.1095764431340996e-05, + "loss": 0.0276, + "step": 7898 + }, + { + "epoch": 1.92, + "learning_rate": 1.1091842126514052e-05, + "loss": 0.0486, + "step": 7900 + }, + { + "epoch": 1.93, + "learning_rate": 1.1087919651678938e-05, + "loss": 0.0407, + "step": 7902 + }, + { + "epoch": 1.93, + "learning_rate": 1.1083997007446418e-05, + "loss": 0.0588, + "step": 7904 + }, + { + "epoch": 1.93, + "learning_rate": 1.1080074194427275e-05, + "loss": 0.0533, + "step": 7906 + }, + { + "epoch": 1.93, + "learning_rate": 1.107615121323232e-05, + "loss": 0.0421, + "step": 7908 + }, + { + "epoch": 1.93, + "learning_rate": 1.1072228064472391e-05, + "loss": 0.0633, + "step": 7910 + }, + { + "epoch": 1.93, + "learning_rate": 1.1068304748758356e-05, + "loss": 0.0433, + "step": 7912 + }, + { + "epoch": 1.93, + "learning_rate": 1.10643812667011e-05, + "loss": 0.074, + "step": 7914 + }, + { + "epoch": 1.93, + "learning_rate": 1.1060457618911544e-05, + "loss": 0.0198, + "step": 7916 + }, + { + "epoch": 1.93, + "learning_rate": 1.1056533806000625e-05, + "loss": 0.0433, + "step": 7918 + }, + { + "epoch": 1.93, + "learning_rate": 1.1052609828579318e-05, + "loss": 0.0421, + "step": 7920 + }, + { + "epoch": 1.93, + "learning_rate": 1.1048685687258607e-05, + "loss": 0.0387, + "step": 7922 + }, + { + "epoch": 1.93, + "learning_rate": 1.1044761382649519e-05, + "loss": 0.0298, + "step": 7924 + }, + { + "epoch": 1.93, + "learning_rate": 1.1040836915363093e-05, + "loss": 0.0515, + "step": 7926 + }, + { + "epoch": 1.93, + "learning_rate": 1.1036912286010402e-05, + "loss": 0.0444, + "step": 7928 + }, + { + "epoch": 1.93, + "learning_rate": 1.1032987495202536e-05, + "loss": 0.0399, + "step": 7930 + }, + { + "epoch": 1.93, + "learning_rate": 1.1029062543550619e-05, + "loss": 0.055, + "step": 7932 + }, + { + "epoch": 1.93, + "learning_rate": 1.1025137431665798e-05, + "loss": 0.0416, + "step": 7934 + }, + { + "epoch": 1.93, + "learning_rate": 1.1021212160159238e-05, + "loss": 0.0374, + "step": 7936 + }, + { + "epoch": 1.93, + "learning_rate": 1.1017286729642133e-05, + "loss": 0.0323, + "step": 7938 + }, + { + "epoch": 1.93, + "learning_rate": 1.1013361140725712e-05, + "loss": 0.0488, + "step": 7940 + }, + { + "epoch": 1.94, + "learning_rate": 1.1009435394021208e-05, + "loss": 0.0487, + "step": 7942 + }, + { + "epoch": 1.94, + "learning_rate": 1.1005509490139897e-05, + "loss": 0.0384, + "step": 7944 + }, + { + "epoch": 1.94, + "learning_rate": 1.1001583429693063e-05, + "loss": 0.0501, + "step": 7946 + }, + { + "epoch": 1.94, + "learning_rate": 1.099765721329204e-05, + "loss": 0.0436, + "step": 7948 + }, + { + "epoch": 1.94, + "learning_rate": 1.0993730841548153e-05, + "loss": 0.0422, + "step": 7950 + }, + { + "epoch": 1.94, + "learning_rate": 1.0989804315072779e-05, + "loss": 0.038, + "step": 7952 + }, + { + "epoch": 1.94, + "learning_rate": 1.0985877634477301e-05, + "loss": 0.045, + "step": 7954 + }, + { + "epoch": 1.94, + "learning_rate": 1.098195080037314e-05, + "loss": 0.0301, + "step": 7956 + }, + { + "epoch": 1.94, + "learning_rate": 1.0978023813371728e-05, + "loss": 0.0396, + "step": 7958 + }, + { + "epoch": 1.94, + "learning_rate": 1.0974096674084531e-05, + "loss": 0.0443, + "step": 7960 + }, + { + "epoch": 1.94, + "learning_rate": 1.0970169383123035e-05, + "loss": 0.0487, + "step": 7962 + }, + { + "epoch": 1.94, + "learning_rate": 1.0966241941098745e-05, + "loss": 0.0412, + "step": 7964 + }, + { + "epoch": 1.94, + "learning_rate": 1.0962314348623196e-05, + "loss": 0.0621, + "step": 7966 + }, + { + "epoch": 1.94, + "learning_rate": 1.0958386606307947e-05, + "loss": 0.0469, + "step": 7968 + }, + { + "epoch": 1.94, + "learning_rate": 1.0954458714764573e-05, + "loss": 0.0413, + "step": 7970 + }, + { + "epoch": 1.94, + "learning_rate": 1.095053067460468e-05, + "loss": 0.0447, + "step": 7972 + }, + { + "epoch": 1.94, + "learning_rate": 1.094660248643989e-05, + "loss": 0.0226, + "step": 7974 + }, + { + "epoch": 1.94, + "learning_rate": 1.0942674150881859e-05, + "loss": 0.0208, + "step": 7976 + }, + { + "epoch": 1.94, + "learning_rate": 1.093874566854225e-05, + "loss": 0.043, + "step": 7978 + }, + { + "epoch": 1.94, + "learning_rate": 1.0934817040032763e-05, + "loss": 0.0375, + "step": 7980 + }, + { + "epoch": 1.94, + "learning_rate": 1.0930888265965116e-05, + "loss": 0.0679, + "step": 7982 + }, + { + "epoch": 1.95, + "learning_rate": 1.0926959346951046e-05, + "loss": 0.0555, + "step": 7984 + }, + { + "epoch": 1.95, + "learning_rate": 1.0923030283602318e-05, + "loss": 0.0534, + "step": 7986 + }, + { + "epoch": 1.95, + "learning_rate": 1.0919101076530719e-05, + "loss": 0.072, + "step": 7988 + }, + { + "epoch": 1.95, + "learning_rate": 1.0915171726348053e-05, + "loss": 0.034, + "step": 7990 + }, + { + "epoch": 1.95, + "learning_rate": 1.0911242233666152e-05, + "loss": 0.0437, + "step": 7992 + }, + { + "epoch": 1.95, + "learning_rate": 1.0907312599096864e-05, + "loss": 0.0326, + "step": 7994 + }, + { + "epoch": 1.95, + "learning_rate": 1.0903382823252069e-05, + "loss": 0.0421, + "step": 7996 + }, + { + "epoch": 1.95, + "learning_rate": 1.0899452906743662e-05, + "loss": 0.0288, + "step": 7998 + }, + { + "epoch": 1.95, + "learning_rate": 1.0895522850183557e-05, + "loss": 0.0327, + "step": 8000 + }, + { + "epoch": 1.95, + "learning_rate": 1.0891592654183695e-05, + "loss": 0.0469, + "step": 8002 + }, + { + "epoch": 1.95, + "learning_rate": 1.0887662319356045e-05, + "loss": 0.0251, + "step": 8004 + }, + { + "epoch": 1.95, + "learning_rate": 1.088373184631258e-05, + "loss": 0.0282, + "step": 8006 + }, + { + "epoch": 1.95, + "learning_rate": 1.0879801235665311e-05, + "loss": 0.0486, + "step": 8008 + }, + { + "epoch": 1.95, + "learning_rate": 1.087587048802626e-05, + "loss": 0.0395, + "step": 8010 + }, + { + "epoch": 1.95, + "learning_rate": 1.0871939604007477e-05, + "loss": 0.0366, + "step": 8012 + }, + { + "epoch": 1.95, + "learning_rate": 1.086800858422103e-05, + "loss": 0.0515, + "step": 8014 + }, + { + "epoch": 1.95, + "learning_rate": 1.086407742927901e-05, + "loss": 0.0379, + "step": 8016 + }, + { + "epoch": 1.95, + "learning_rate": 1.0860146139793525e-05, + "loss": 0.0424, + "step": 8018 + }, + { + "epoch": 1.95, + "learning_rate": 1.0856214716376712e-05, + "loss": 0.0439, + "step": 8020 + }, + { + "epoch": 1.95, + "learning_rate": 1.0852283159640718e-05, + "loss": 0.0494, + "step": 8022 + }, + { + "epoch": 1.96, + "learning_rate": 1.084835147019772e-05, + "loss": 0.0484, + "step": 8024 + }, + { + "epoch": 1.96, + "learning_rate": 1.0844419648659912e-05, + "loss": 0.0313, + "step": 8026 + }, + { + "epoch": 1.96, + "learning_rate": 1.0840487695639506e-05, + "loss": 0.052, + "step": 8028 + }, + { + "epoch": 1.96, + "learning_rate": 1.0836555611748739e-05, + "loss": 0.0334, + "step": 8030 + }, + { + "epoch": 1.96, + "learning_rate": 1.0832623397599869e-05, + "loss": 0.0577, + "step": 8032 + }, + { + "epoch": 1.96, + "learning_rate": 1.0828691053805165e-05, + "loss": 0.0274, + "step": 8034 + }, + { + "epoch": 1.96, + "learning_rate": 1.0824758580976929e-05, + "loss": 0.0532, + "step": 8036 + }, + { + "epoch": 1.96, + "learning_rate": 1.0820825979727477e-05, + "loss": 0.0234, + "step": 8038 + }, + { + "epoch": 1.96, + "learning_rate": 1.081689325066914e-05, + "loss": 0.05, + "step": 8040 + }, + { + "epoch": 1.96, + "learning_rate": 1.0812960394414278e-05, + "loss": 0.0279, + "step": 8042 + }, + { + "epoch": 1.96, + "learning_rate": 1.0809027411575267e-05, + "loss": 0.0333, + "step": 8044 + }, + { + "epoch": 1.96, + "learning_rate": 1.0805094302764505e-05, + "loss": 0.0405, + "step": 8046 + }, + { + "epoch": 1.96, + "learning_rate": 1.0801161068594401e-05, + "loss": 0.0307, + "step": 8048 + }, + { + "epoch": 1.96, + "learning_rate": 1.079722770967739e-05, + "loss": 0.0375, + "step": 8050 + }, + { + "epoch": 1.96, + "learning_rate": 1.0793294226625932e-05, + "loss": 0.0394, + "step": 8052 + }, + { + "epoch": 1.96, + "learning_rate": 1.0789360620052496e-05, + "loss": 0.0318, + "step": 8054 + }, + { + "epoch": 1.96, + "learning_rate": 1.0785426890569575e-05, + "loss": 0.0292, + "step": 8056 + }, + { + "epoch": 1.96, + "learning_rate": 1.0781493038789682e-05, + "loss": 0.0332, + "step": 8058 + }, + { + "epoch": 1.96, + "learning_rate": 1.0777559065325347e-05, + "loss": 0.0598, + "step": 8060 + }, + { + "epoch": 1.96, + "learning_rate": 1.0773624970789118e-05, + "loss": 0.0342, + "step": 8062 + }, + { + "epoch": 1.96, + "learning_rate": 1.0769690755793567e-05, + "loss": 0.0371, + "step": 8064 + }, + { + "epoch": 1.97, + "learning_rate": 1.076575642095128e-05, + "loss": 0.0434, + "step": 8066 + }, + { + "epoch": 1.97, + "learning_rate": 1.0761821966874862e-05, + "loss": 0.0435, + "step": 8068 + }, + { + "epoch": 1.97, + "learning_rate": 1.075788739417694e-05, + "loss": 0.0477, + "step": 8070 + }, + { + "epoch": 1.97, + "learning_rate": 1.0753952703470152e-05, + "loss": 0.0393, + "step": 8072 + }, + { + "epoch": 1.97, + "learning_rate": 1.0750017895367165e-05, + "loss": 0.0469, + "step": 8074 + }, + { + "epoch": 1.97, + "learning_rate": 1.0746082970480658e-05, + "loss": 0.0254, + "step": 8076 + }, + { + "epoch": 1.97, + "learning_rate": 1.0742147929423326e-05, + "loss": 0.0331, + "step": 8078 + }, + { + "epoch": 1.97, + "learning_rate": 1.0738212772807883e-05, + "loss": 0.0347, + "step": 8080 + }, + { + "epoch": 1.97, + "learning_rate": 1.0734277501247073e-05, + "loss": 0.0454, + "step": 8082 + }, + { + "epoch": 1.97, + "learning_rate": 1.0730342115353635e-05, + "loss": 0.052, + "step": 8084 + }, + { + "epoch": 1.97, + "learning_rate": 1.072640661574035e-05, + "loss": 0.036, + "step": 8086 + }, + { + "epoch": 1.97, + "learning_rate": 1.0722471003019998e-05, + "loss": 0.0569, + "step": 8088 + }, + { + "epoch": 1.97, + "learning_rate": 1.0718535277805388e-05, + "loss": 0.049, + "step": 8090 + }, + { + "epoch": 1.97, + "learning_rate": 1.071459944070934e-05, + "loss": 0.0353, + "step": 8092 + }, + { + "epoch": 1.97, + "learning_rate": 1.0710663492344693e-05, + "loss": 0.0394, + "step": 8094 + }, + { + "epoch": 1.97, + "learning_rate": 1.0706727433324308e-05, + "loss": 0.0485, + "step": 8096 + }, + { + "epoch": 1.97, + "learning_rate": 1.0702791264261056e-05, + "loss": 0.0335, + "step": 8098 + }, + { + "epoch": 1.97, + "learning_rate": 1.0698854985767831e-05, + "loss": 0.0302, + "step": 8100 + }, + { + "epoch": 1.97, + "learning_rate": 1.069491859845754e-05, + "loss": 0.0371, + "step": 8102 + }, + { + "epoch": 1.97, + "learning_rate": 1.0690982102943113e-05, + "loss": 0.0345, + "step": 8104 + }, + { + "epoch": 1.98, + "learning_rate": 1.0687045499837486e-05, + "loss": 0.0313, + "step": 8106 + }, + { + "epoch": 1.98, + "learning_rate": 1.0683108789753618e-05, + "loss": 0.0341, + "step": 8108 + }, + { + "epoch": 1.98, + "learning_rate": 1.0679171973304494e-05, + "loss": 0.0459, + "step": 8110 + }, + { + "epoch": 1.98, + "learning_rate": 1.0675235051103097e-05, + "loss": 0.0301, + "step": 8112 + }, + { + "epoch": 1.98, + "learning_rate": 1.067129802376244e-05, + "loss": 0.0269, + "step": 8114 + }, + { + "epoch": 1.98, + "learning_rate": 1.0667360891895547e-05, + "loss": 0.0452, + "step": 8116 + }, + { + "epoch": 1.98, + "learning_rate": 1.0663423656115461e-05, + "loss": 0.0285, + "step": 8118 + }, + { + "epoch": 1.98, + "learning_rate": 1.0659486317035237e-05, + "loss": 0.0586, + "step": 8120 + }, + { + "epoch": 1.98, + "learning_rate": 1.0655548875267951e-05, + "loss": 0.0384, + "step": 8122 + }, + { + "epoch": 1.98, + "learning_rate": 1.0651611331426694e-05, + "loss": 0.0415, + "step": 8124 + }, + { + "epoch": 1.98, + "learning_rate": 1.064767368612457e-05, + "loss": 0.0362, + "step": 8126 + }, + { + "epoch": 1.98, + "learning_rate": 1.0643735939974698e-05, + "loss": 0.0274, + "step": 8128 + }, + { + "epoch": 1.98, + "learning_rate": 1.0639798093590221e-05, + "loss": 0.0448, + "step": 8130 + }, + { + "epoch": 1.98, + "learning_rate": 1.0635860147584286e-05, + "loss": 0.0221, + "step": 8132 + }, + { + "epoch": 1.98, + "learning_rate": 1.0631922102570066e-05, + "loss": 0.0387, + "step": 8134 + }, + { + "epoch": 1.98, + "learning_rate": 1.0627983959160739e-05, + "loss": 0.0381, + "step": 8136 + }, + { + "epoch": 1.98, + "learning_rate": 1.0624045717969514e-05, + "loss": 0.0307, + "step": 8138 + }, + { + "epoch": 1.98, + "learning_rate": 1.0620107379609592e-05, + "loss": 0.0443, + "step": 8140 + }, + { + "epoch": 1.98, + "learning_rate": 1.0616168944694212e-05, + "loss": 0.0449, + "step": 8142 + }, + { + "epoch": 1.98, + "learning_rate": 1.0612230413836615e-05, + "loss": 0.0474, + "step": 8144 + }, + { + "epoch": 1.98, + "learning_rate": 1.0608291787650064e-05, + "loss": 0.0362, + "step": 8146 + }, + { + "epoch": 1.99, + "learning_rate": 1.0604353066747825e-05, + "loss": 0.0477, + "step": 8148 + }, + { + "epoch": 1.99, + "learning_rate": 1.0600414251743195e-05, + "loss": 0.032, + "step": 8150 + }, + { + "epoch": 1.99, + "learning_rate": 1.0596475343249478e-05, + "loss": 0.0534, + "step": 8152 + }, + { + "epoch": 1.99, + "learning_rate": 1.0592536341879986e-05, + "loss": 0.0288, + "step": 8154 + }, + { + "epoch": 1.99, + "learning_rate": 1.0588597248248054e-05, + "loss": 0.0239, + "step": 8156 + }, + { + "epoch": 1.99, + "learning_rate": 1.058465806296703e-05, + "loss": 0.0279, + "step": 8158 + }, + { + "epoch": 1.99, + "learning_rate": 1.0580718786650275e-05, + "loss": 0.0401, + "step": 8160 + }, + { + "epoch": 1.99, + "learning_rate": 1.0576779419911165e-05, + "loss": 0.0421, + "step": 8162 + }, + { + "epoch": 1.99, + "learning_rate": 1.0572839963363088e-05, + "loss": 0.0413, + "step": 8164 + }, + { + "epoch": 1.99, + "learning_rate": 1.056890041761945e-05, + "loss": 0.0301, + "step": 8166 + }, + { + "epoch": 1.99, + "learning_rate": 1.0564960783293666e-05, + "loss": 0.0297, + "step": 8168 + }, + { + "epoch": 1.99, + "learning_rate": 1.0561021060999168e-05, + "loss": 0.037, + "step": 8170 + }, + { + "epoch": 1.99, + "learning_rate": 1.05570812513494e-05, + "loss": 0.0246, + "step": 8172 + }, + { + "epoch": 1.99, + "learning_rate": 1.0553141354957823e-05, + "loss": 0.0507, + "step": 8174 + }, + { + "epoch": 1.99, + "learning_rate": 1.0549201372437905e-05, + "loss": 0.0384, + "step": 8176 + }, + { + "epoch": 1.99, + "learning_rate": 1.0545261304403137e-05, + "loss": 0.0504, + "step": 8178 + }, + { + "epoch": 1.99, + "learning_rate": 1.0541321151467012e-05, + "loss": 0.0367, + "step": 8180 + }, + { + "epoch": 1.99, + "learning_rate": 1.0537380914243045e-05, + "loss": 0.0399, + "step": 8182 + }, + { + "epoch": 1.99, + "learning_rate": 1.053344059334476e-05, + "loss": 0.0241, + "step": 8184 + }, + { + "epoch": 1.99, + "learning_rate": 1.0529500189385696e-05, + "loss": 0.0202, + "step": 8186 + }, + { + "epoch": 2.0, + "learning_rate": 1.0525559702979402e-05, + "loss": 0.0375, + "step": 8188 + }, + { + "epoch": 2.0, + "learning_rate": 1.0521619134739447e-05, + "loss": 0.0425, + "step": 8190 + }, + { + "epoch": 2.0, + "learning_rate": 1.05176784852794e-05, + "loss": 0.0536, + "step": 8192 + }, + { + "epoch": 2.0, + "learning_rate": 1.0513737755212856e-05, + "loss": 0.0386, + "step": 8194 + }, + { + "epoch": 2.0, + "learning_rate": 1.0509796945153416e-05, + "loss": 0.0288, + "step": 8196 + }, + { + "epoch": 2.0, + "learning_rate": 1.0505856055714693e-05, + "loss": 0.0298, + "step": 8198 + }, + { + "epoch": 2.0, + "learning_rate": 1.0501915087510314e-05, + "loss": 0.0213, + "step": 8200 + }, + { + "epoch": 2.0, + "learning_rate": 1.049797404115392e-05, + "loss": 0.0503, + "step": 8202 + }, + { + "epoch": 2.0, + "learning_rate": 1.0494032917259159e-05, + "loss": 0.0498, + "step": 8204 + }, + { + "epoch": 2.0, + "learning_rate": 1.0490091716439693e-05, + "loss": 0.0372, + "step": 8206 + }, + { + "epoch": 2.0, + "learning_rate": 1.04861504393092e-05, + "loss": 0.0302, + "step": 8208 + }, + { + "epoch": 2.0, + "learning_rate": 1.0482209086481368e-05, + "loss": 0.0288, + "step": 8210 + }, + { + "epoch": 2.0, + "learning_rate": 1.047826765856989e-05, + "loss": 0.0146, + "step": 8212 + }, + { + "epoch": 2.0, + "learning_rate": 1.0474326156188483e-05, + "loss": 0.0127, + "step": 8214 + }, + { + "epoch": 2.0, + "learning_rate": 1.0470384579950867e-05, + "loss": 0.0208, + "step": 8216 + }, + { + "epoch": 2.0, + "learning_rate": 1.0466442930470775e-05, + "loss": 0.0144, + "step": 8218 + }, + { + "epoch": 2.0, + "learning_rate": 1.0462501208361948e-05, + "loss": 0.0075, + "step": 8220 + }, + { + "epoch": 2.0, + "learning_rate": 1.0458559414238152e-05, + "loss": 0.0204, + "step": 8222 + }, + { + "epoch": 2.0, + "learning_rate": 1.0454617548713147e-05, + "loss": 0.0187, + "step": 8224 + }, + { + "epoch": 2.0, + "learning_rate": 1.045067561240071e-05, + "loss": 0.0132, + "step": 8226 + }, + { + "epoch": 2.0, + "learning_rate": 1.0446733605914636e-05, + "loss": 0.0206, + "step": 8228 + }, + { + "epoch": 2.01, + "learning_rate": 1.0442791529868727e-05, + "loss": 0.0268, + "step": 8230 + }, + { + "epoch": 2.01, + "learning_rate": 1.0438849384876789e-05, + "loss": 0.0134, + "step": 8232 + }, + { + "epoch": 2.01, + "learning_rate": 1.0434907171552644e-05, + "loss": 0.014, + "step": 8234 + }, + { + "epoch": 2.01, + "learning_rate": 1.0430964890510129e-05, + "loss": 0.0223, + "step": 8236 + }, + { + "epoch": 2.01, + "learning_rate": 1.0427022542363082e-05, + "loss": 0.0163, + "step": 8238 + }, + { + "epoch": 2.01, + "learning_rate": 1.0423080127725362e-05, + "loss": 0.0229, + "step": 8240 + }, + { + "epoch": 2.01, + "learning_rate": 1.041913764721083e-05, + "loss": 0.0164, + "step": 8242 + }, + { + "epoch": 2.01, + "learning_rate": 1.0415195101433364e-05, + "loss": 0.0242, + "step": 8244 + }, + { + "epoch": 2.01, + "learning_rate": 1.0411252491006844e-05, + "loss": 0.0133, + "step": 8246 + }, + { + "epoch": 2.01, + "learning_rate": 1.0407309816545166e-05, + "loss": 0.0088, + "step": 8248 + }, + { + "epoch": 2.01, + "learning_rate": 1.0403367078662236e-05, + "loss": 0.0196, + "step": 8250 + }, + { + "epoch": 2.01, + "learning_rate": 1.0399424277971968e-05, + "loss": 0.0146, + "step": 8252 + }, + { + "epoch": 2.01, + "learning_rate": 1.0395481415088284e-05, + "loss": 0.0186, + "step": 8254 + }, + { + "epoch": 2.01, + "learning_rate": 1.039153849062512e-05, + "loss": 0.0188, + "step": 8256 + }, + { + "epoch": 2.01, + "learning_rate": 1.0387595505196421e-05, + "loss": 0.0091, + "step": 8258 + }, + { + "epoch": 2.01, + "learning_rate": 1.0383652459416134e-05, + "loss": 0.0209, + "step": 8260 + }, + { + "epoch": 2.01, + "learning_rate": 1.0379709353898229e-05, + "loss": 0.0091, + "step": 8262 + }, + { + "epoch": 2.01, + "learning_rate": 1.0375766189256673e-05, + "loss": 0.014, + "step": 8264 + }, + { + "epoch": 2.01, + "learning_rate": 1.0371822966105449e-05, + "loss": 0.012, + "step": 8266 + }, + { + "epoch": 2.01, + "learning_rate": 1.0367879685058542e-05, + "loss": 0.0188, + "step": 8268 + }, + { + "epoch": 2.02, + "learning_rate": 1.036393634672996e-05, + "loss": 0.0137, + "step": 8270 + }, + { + "epoch": 2.02, + "learning_rate": 1.0359992951733704e-05, + "loss": 0.0175, + "step": 8272 + }, + { + "epoch": 2.02, + "learning_rate": 1.0356049500683792e-05, + "loss": 0.0181, + "step": 8274 + }, + { + "epoch": 2.02, + "learning_rate": 1.0352105994194248e-05, + "loss": 0.0202, + "step": 8276 + }, + { + "epoch": 2.02, + "learning_rate": 1.0348162432879111e-05, + "loss": 0.0104, + "step": 8278 + }, + { + "epoch": 2.02, + "learning_rate": 1.0344218817352422e-05, + "loss": 0.0095, + "step": 8280 + }, + { + "epoch": 2.02, + "learning_rate": 1.034027514822823e-05, + "loss": 0.0235, + "step": 8282 + }, + { + "epoch": 2.02, + "learning_rate": 1.0336331426120595e-05, + "loss": 0.0153, + "step": 8284 + }, + { + "epoch": 2.02, + "learning_rate": 1.0332387651643587e-05, + "loss": 0.0207, + "step": 8286 + }, + { + "epoch": 2.02, + "learning_rate": 1.0328443825411276e-05, + "loss": 0.015, + "step": 8288 + }, + { + "epoch": 2.02, + "learning_rate": 1.0324499948037753e-05, + "loss": 0.0144, + "step": 8290 + }, + { + "epoch": 2.02, + "learning_rate": 1.0320556020137104e-05, + "loss": 0.0096, + "step": 8292 + }, + { + "epoch": 2.02, + "learning_rate": 1.0316612042323438e-05, + "loss": 0.0229, + "step": 8294 + }, + { + "epoch": 2.02, + "learning_rate": 1.0312668015210848e-05, + "loss": 0.0082, + "step": 8296 + }, + { + "epoch": 2.02, + "learning_rate": 1.030872393941346e-05, + "loss": 0.0131, + "step": 8298 + }, + { + "epoch": 2.02, + "learning_rate": 1.0304779815545396e-05, + "loss": 0.0146, + "step": 8300 + }, + { + "epoch": 2.02, + "learning_rate": 1.0300835644220783e-05, + "loss": 0.016, + "step": 8302 + }, + { + "epoch": 2.02, + "learning_rate": 1.0296891426053759e-05, + "loss": 0.0113, + "step": 8304 + }, + { + "epoch": 2.02, + "learning_rate": 1.029294716165847e-05, + "loss": 0.0298, + "step": 8306 + }, + { + "epoch": 2.02, + "learning_rate": 1.0289002851649068e-05, + "loss": 0.0168, + "step": 8308 + }, + { + "epoch": 2.02, + "learning_rate": 1.0285058496639713e-05, + "loss": 0.0171, + "step": 8310 + }, + { + "epoch": 2.03, + "learning_rate": 1.0281114097244568e-05, + "loss": 0.0072, + "step": 8312 + }, + { + "epoch": 2.03, + "learning_rate": 1.0277169654077813e-05, + "loss": 0.0126, + "step": 8314 + }, + { + "epoch": 2.03, + "learning_rate": 1.0273225167753618e-05, + "loss": 0.0237, + "step": 8316 + }, + { + "epoch": 2.03, + "learning_rate": 1.0269280638886178e-05, + "loss": 0.0141, + "step": 8318 + }, + { + "epoch": 2.03, + "learning_rate": 1.0265336068089682e-05, + "loss": 0.015, + "step": 8320 + }, + { + "epoch": 2.03, + "learning_rate": 1.0261391455978335e-05, + "loss": 0.0087, + "step": 8322 + }, + { + "epoch": 2.03, + "learning_rate": 1.0257446803166336e-05, + "loss": 0.0226, + "step": 8324 + }, + { + "epoch": 2.03, + "learning_rate": 1.02535021102679e-05, + "loss": 0.0223, + "step": 8326 + }, + { + "epoch": 2.03, + "learning_rate": 1.0249557377897251e-05, + "loss": 0.0152, + "step": 8328 + }, + { + "epoch": 2.03, + "learning_rate": 1.0245612606668609e-05, + "loss": 0.0081, + "step": 8330 + }, + { + "epoch": 2.03, + "learning_rate": 1.0241667797196202e-05, + "loss": 0.0234, + "step": 8332 + }, + { + "epoch": 2.03, + "learning_rate": 1.0237722950094275e-05, + "loss": 0.0155, + "step": 8334 + }, + { + "epoch": 2.03, + "learning_rate": 1.0233778065977067e-05, + "loss": 0.013, + "step": 8336 + }, + { + "epoch": 2.03, + "learning_rate": 1.0229833145458825e-05, + "loss": 0.0254, + "step": 8338 + }, + { + "epoch": 2.03, + "learning_rate": 1.0225888189153803e-05, + "loss": 0.0144, + "step": 8340 + }, + { + "epoch": 2.03, + "learning_rate": 1.0221943197676265e-05, + "loss": 0.0224, + "step": 8342 + }, + { + "epoch": 2.03, + "learning_rate": 1.0217998171640475e-05, + "loss": 0.0247, + "step": 8344 + }, + { + "epoch": 2.03, + "learning_rate": 1.0214053111660702e-05, + "loss": 0.019, + "step": 8346 + }, + { + "epoch": 2.03, + "learning_rate": 1.0210108018351222e-05, + "loss": 0.0122, + "step": 8348 + }, + { + "epoch": 2.03, + "learning_rate": 1.020616289232632e-05, + "loss": 0.0255, + "step": 8350 + }, + { + "epoch": 2.04, + "learning_rate": 1.0202217734200273e-05, + "loss": 0.0143, + "step": 8352 + }, + { + "epoch": 2.04, + "learning_rate": 1.0198272544587382e-05, + "loss": 0.0308, + "step": 8354 + }, + { + "epoch": 2.04, + "learning_rate": 1.019432732410194e-05, + "loss": 0.0123, + "step": 8356 + }, + { + "epoch": 2.04, + "learning_rate": 1.019038207335825e-05, + "loss": 0.014, + "step": 8358 + }, + { + "epoch": 2.04, + "learning_rate": 1.0186436792970608e-05, + "loss": 0.018, + "step": 8360 + }, + { + "epoch": 2.04, + "learning_rate": 1.0182491483553339e-05, + "loss": 0.0066, + "step": 8362 + }, + { + "epoch": 2.04, + "learning_rate": 1.0178546145720746e-05, + "loss": 0.0208, + "step": 8364 + }, + { + "epoch": 2.04, + "learning_rate": 1.0174600780087155e-05, + "loss": 0.0116, + "step": 8366 + }, + { + "epoch": 2.04, + "learning_rate": 1.0170655387266888e-05, + "loss": 0.0172, + "step": 8368 + }, + { + "epoch": 2.04, + "learning_rate": 1.016670996787427e-05, + "loss": 0.0105, + "step": 8370 + }, + { + "epoch": 2.04, + "learning_rate": 1.0162764522523638e-05, + "loss": 0.0158, + "step": 8372 + }, + { + "epoch": 2.04, + "learning_rate": 1.0158819051829325e-05, + "loss": 0.0128, + "step": 8374 + }, + { + "epoch": 2.04, + "learning_rate": 1.015487355640567e-05, + "loss": 0.008, + "step": 8376 + }, + { + "epoch": 2.04, + "learning_rate": 1.0150928036867019e-05, + "loss": 0.0125, + "step": 8378 + }, + { + "epoch": 2.04, + "learning_rate": 1.0146982493827717e-05, + "loss": 0.014, + "step": 8380 + }, + { + "epoch": 2.04, + "learning_rate": 1.0143036927902116e-05, + "loss": 0.0122, + "step": 8382 + }, + { + "epoch": 2.04, + "learning_rate": 1.0139091339704574e-05, + "loss": 0.0234, + "step": 8384 + }, + { + "epoch": 2.04, + "learning_rate": 1.0135145729849448e-05, + "loss": 0.0164, + "step": 8386 + }, + { + "epoch": 2.04, + "learning_rate": 1.0131200098951092e-05, + "loss": 0.0187, + "step": 8388 + }, + { + "epoch": 2.04, + "learning_rate": 1.0127254447623885e-05, + "loss": 0.0103, + "step": 8390 + }, + { + "epoch": 2.04, + "learning_rate": 1.0123308776482184e-05, + "loss": 0.0194, + "step": 8392 + }, + { + "epoch": 2.05, + "learning_rate": 1.0119363086140363e-05, + "loss": 0.0128, + "step": 8394 + }, + { + "epoch": 2.05, + "learning_rate": 1.0115417377212797e-05, + "loss": 0.0118, + "step": 8396 + }, + { + "epoch": 2.05, + "learning_rate": 1.0111471650313862e-05, + "loss": 0.0204, + "step": 8398 + }, + { + "epoch": 2.05, + "learning_rate": 1.0107525906057943e-05, + "loss": 0.0154, + "step": 8400 + }, + { + "epoch": 2.05, + "learning_rate": 1.0103580145059415e-05, + "loss": 0.0073, + "step": 8402 + }, + { + "epoch": 2.05, + "learning_rate": 1.0099634367932664e-05, + "loss": 0.024, + "step": 8404 + }, + { + "epoch": 2.05, + "learning_rate": 1.0095688575292087e-05, + "loss": 0.0142, + "step": 8406 + }, + { + "epoch": 2.05, + "learning_rate": 1.009174276775206e-05, + "loss": 0.0155, + "step": 8408 + }, + { + "epoch": 2.05, + "learning_rate": 1.0087796945926984e-05, + "loss": 0.0136, + "step": 8410 + }, + { + "epoch": 2.05, + "learning_rate": 1.0083851110431256e-05, + "loss": 0.0253, + "step": 8412 + }, + { + "epoch": 2.05, + "learning_rate": 1.0079905261879265e-05, + "loss": 0.0132, + "step": 8414 + }, + { + "epoch": 2.05, + "learning_rate": 1.0075959400885413e-05, + "loss": 0.0079, + "step": 8416 + }, + { + "epoch": 2.05, + "learning_rate": 1.0072013528064107e-05, + "loss": 0.0107, + "step": 8418 + }, + { + "epoch": 2.05, + "learning_rate": 1.006806764402974e-05, + "loss": 0.0183, + "step": 8420 + }, + { + "epoch": 2.05, + "learning_rate": 1.006412174939672e-05, + "loss": 0.0249, + "step": 8422 + }, + { + "epoch": 2.05, + "learning_rate": 1.0060175844779454e-05, + "loss": 0.0171, + "step": 8424 + }, + { + "epoch": 2.05, + "learning_rate": 1.0056229930792349e-05, + "loss": 0.0104, + "step": 8426 + }, + { + "epoch": 2.05, + "learning_rate": 1.0052284008049813e-05, + "loss": 0.0132, + "step": 8428 + }, + { + "epoch": 2.05, + "learning_rate": 1.0048338077166259e-05, + "loss": 0.0159, + "step": 8430 + }, + { + "epoch": 2.05, + "learning_rate": 1.0044392138756094e-05, + "loss": 0.0111, + "step": 8432 + }, + { + "epoch": 2.06, + "learning_rate": 1.0040446193433737e-05, + "loss": 0.0086, + "step": 8434 + }, + { + "epoch": 2.06, + "learning_rate": 1.0036500241813596e-05, + "loss": 0.0108, + "step": 8436 + }, + { + "epoch": 2.06, + "learning_rate": 1.003255428451009e-05, + "loss": 0.0134, + "step": 8438 + }, + { + "epoch": 2.06, + "learning_rate": 1.0028608322137632e-05, + "loss": 0.0162, + "step": 8440 + }, + { + "epoch": 2.06, + "learning_rate": 1.002466235531064e-05, + "loss": 0.0192, + "step": 8442 + }, + { + "epoch": 2.06, + "learning_rate": 1.0020716384643527e-05, + "loss": 0.0276, + "step": 8444 + }, + { + "epoch": 2.06, + "learning_rate": 1.0016770410750718e-05, + "loss": 0.0091, + "step": 8446 + }, + { + "epoch": 2.06, + "learning_rate": 1.0012824434246628e-05, + "loss": 0.012, + "step": 8448 + }, + { + "epoch": 2.06, + "learning_rate": 1.0008878455745676e-05, + "loss": 0.0144, + "step": 8450 + }, + { + "epoch": 2.06, + "learning_rate": 1.0004932475862277e-05, + "loss": 0.0139, + "step": 8452 + }, + { + "epoch": 2.06, + "learning_rate": 1.0000986495210858e-05, + "loss": 0.0213, + "step": 8454 + }, + { + "epoch": 2.06, + "learning_rate": 9.997040514405832e-06, + "loss": 0.0126, + "step": 8456 + }, + { + "epoch": 2.06, + "learning_rate": 9.99309453406162e-06, + "loss": 0.0117, + "step": 8458 + }, + { + "epoch": 2.06, + "learning_rate": 9.989148554792645e-06, + "loss": 0.0072, + "step": 8460 + }, + { + "epoch": 2.06, + "learning_rate": 9.98520257721332e-06, + "loss": 0.0191, + "step": 8462 + }, + { + "epoch": 2.06, + "learning_rate": 9.981256601938067e-06, + "loss": 0.0095, + "step": 8464 + }, + { + "epoch": 2.06, + "learning_rate": 9.977310629581303e-06, + "loss": 0.0122, + "step": 8466 + }, + { + "epoch": 2.06, + "learning_rate": 9.973364660757453e-06, + "loss": 0.0201, + "step": 8468 + }, + { + "epoch": 2.06, + "learning_rate": 9.969418696080926e-06, + "loss": 0.0141, + "step": 8470 + }, + { + "epoch": 2.06, + "learning_rate": 9.96547273616614e-06, + "loss": 0.0156, + "step": 8472 + }, + { + "epoch": 2.06, + "learning_rate": 9.961526781627512e-06, + "loss": 0.0204, + "step": 8474 + }, + { + "epoch": 2.07, + "learning_rate": 9.957580833079463e-06, + "loss": 0.0163, + "step": 8476 + }, + { + "epoch": 2.07, + "learning_rate": 9.953634891136401e-06, + "loss": 0.013, + "step": 8478 + }, + { + "epoch": 2.07, + "learning_rate": 9.949688956412739e-06, + "loss": 0.0126, + "step": 8480 + }, + { + "epoch": 2.07, + "learning_rate": 9.945743029522893e-06, + "loss": 0.0136, + "step": 8482 + }, + { + "epoch": 2.07, + "learning_rate": 9.941797111081273e-06, + "loss": 0.0182, + "step": 8484 + }, + { + "epoch": 2.07, + "learning_rate": 9.937851201702285e-06, + "loss": 0.0131, + "step": 8486 + }, + { + "epoch": 2.07, + "learning_rate": 9.933905302000341e-06, + "loss": 0.0114, + "step": 8488 + }, + { + "epoch": 2.07, + "learning_rate": 9.929959412589848e-06, + "loss": 0.0122, + "step": 8490 + }, + { + "epoch": 2.07, + "learning_rate": 9.926013534085212e-06, + "loss": 0.0173, + "step": 8492 + }, + { + "epoch": 2.07, + "learning_rate": 9.922067667100829e-06, + "loss": 0.0238, + "step": 8494 + }, + { + "epoch": 2.07, + "learning_rate": 9.91812181225111e-06, + "loss": 0.0137, + "step": 8496 + }, + { + "epoch": 2.07, + "learning_rate": 9.914175970150449e-06, + "loss": 0.0096, + "step": 8498 + }, + { + "epoch": 2.07, + "learning_rate": 9.910230141413249e-06, + "loss": 0.011, + "step": 8500 + }, + { + "epoch": 2.07, + "learning_rate": 9.9062843266539e-06, + "loss": 0.0176, + "step": 8502 + }, + { + "epoch": 2.07, + "learning_rate": 9.902338526486799e-06, + "loss": 0.0095, + "step": 8504 + }, + { + "epoch": 2.07, + "learning_rate": 9.898392741526333e-06, + "loss": 0.0195, + "step": 8506 + }, + { + "epoch": 2.07, + "learning_rate": 9.8944469723869e-06, + "loss": 0.0152, + "step": 8508 + }, + { + "epoch": 2.07, + "learning_rate": 9.890501219682874e-06, + "loss": 0.0199, + "step": 8510 + }, + { + "epoch": 2.07, + "learning_rate": 9.88655548402865e-06, + "loss": 0.0102, + "step": 8512 + }, + { + "epoch": 2.07, + "learning_rate": 9.882609766038602e-06, + "loss": 0.0204, + "step": 8514 + }, + { + "epoch": 2.08, + "learning_rate": 9.878664066327114e-06, + "loss": 0.021, + "step": 8516 + }, + { + "epoch": 2.08, + "learning_rate": 9.874718385508555e-06, + "loss": 0.0066, + "step": 8518 + }, + { + "epoch": 2.08, + "learning_rate": 9.870772724197304e-06, + "loss": 0.0185, + "step": 8520 + }, + { + "epoch": 2.08, + "learning_rate": 9.866827083007725e-06, + "loss": 0.0136, + "step": 8522 + }, + { + "epoch": 2.08, + "learning_rate": 9.862881462554192e-06, + "loss": 0.0178, + "step": 8524 + }, + { + "epoch": 2.08, + "learning_rate": 9.858935863451062e-06, + "loss": 0.02, + "step": 8526 + }, + { + "epoch": 2.08, + "learning_rate": 9.854990286312695e-06, + "loss": 0.0274, + "step": 8528 + }, + { + "epoch": 2.08, + "learning_rate": 9.851044731753448e-06, + "loss": 0.0165, + "step": 8530 + }, + { + "epoch": 2.08, + "learning_rate": 9.847099200387682e-06, + "loss": 0.0165, + "step": 8532 + }, + { + "epoch": 2.08, + "learning_rate": 9.843153692829734e-06, + "loss": 0.0119, + "step": 8534 + }, + { + "epoch": 2.08, + "learning_rate": 9.839208209693956e-06, + "loss": 0.0188, + "step": 8536 + }, + { + "epoch": 2.08, + "learning_rate": 9.83526275159469e-06, + "loss": 0.0159, + "step": 8538 + }, + { + "epoch": 2.08, + "learning_rate": 9.831317319146277e-06, + "loss": 0.015, + "step": 8540 + }, + { + "epoch": 2.08, + "learning_rate": 9.827371912963042e-06, + "loss": 0.0186, + "step": 8542 + }, + { + "epoch": 2.08, + "learning_rate": 9.82342653365932e-06, + "loss": 0.0152, + "step": 8544 + }, + { + "epoch": 2.08, + "learning_rate": 9.819481181849439e-06, + "loss": 0.017, + "step": 8546 + }, + { + "epoch": 2.08, + "learning_rate": 9.81553585814772e-06, + "loss": 0.0277, + "step": 8548 + }, + { + "epoch": 2.08, + "learning_rate": 9.811590563168475e-06, + "loss": 0.0112, + "step": 8550 + }, + { + "epoch": 2.08, + "learning_rate": 9.807645297526019e-06, + "loss": 0.0134, + "step": 8552 + }, + { + "epoch": 2.08, + "learning_rate": 9.803700061834665e-06, + "loss": 0.0073, + "step": 8554 + }, + { + "epoch": 2.08, + "learning_rate": 9.799754856708713e-06, + "loss": 0.0141, + "step": 8556 + }, + { + "epoch": 2.09, + "learning_rate": 9.795809682762457e-06, + "loss": 0.0192, + "step": 8558 + }, + { + "epoch": 2.09, + "learning_rate": 9.791864540610198e-06, + "loss": 0.0137, + "step": 8560 + }, + { + "epoch": 2.09, + "learning_rate": 9.787919430866218e-06, + "loss": 0.02, + "step": 8562 + }, + { + "epoch": 2.09, + "learning_rate": 9.78397435414481e-06, + "loss": 0.0134, + "step": 8564 + }, + { + "epoch": 2.09, + "learning_rate": 9.780029311060241e-06, + "loss": 0.0227, + "step": 8566 + }, + { + "epoch": 2.09, + "learning_rate": 9.776084302226794e-06, + "loss": 0.0155, + "step": 8568 + }, + { + "epoch": 2.09, + "learning_rate": 9.77213932825873e-06, + "loss": 0.0134, + "step": 8570 + }, + { + "epoch": 2.09, + "learning_rate": 9.76819438977032e-06, + "loss": 0.0065, + "step": 8572 + }, + { + "epoch": 2.09, + "learning_rate": 9.764249487375812e-06, + "loss": 0.0187, + "step": 8574 + }, + { + "epoch": 2.09, + "learning_rate": 9.760304621689462e-06, + "loss": 0.0065, + "step": 8576 + }, + { + "epoch": 2.09, + "learning_rate": 9.756359793325515e-06, + "loss": 0.0164, + "step": 8578 + }, + { + "epoch": 2.09, + "learning_rate": 9.752415002898214e-06, + "loss": 0.0157, + "step": 8580 + }, + { + "epoch": 2.09, + "learning_rate": 9.74847025102179e-06, + "loss": 0.016, + "step": 8582 + }, + { + "epoch": 2.09, + "learning_rate": 9.744525538310468e-06, + "loss": 0.0155, + "step": 8584 + }, + { + "epoch": 2.09, + "learning_rate": 9.740580865378478e-06, + "loss": 0.0143, + "step": 8586 + }, + { + "epoch": 2.09, + "learning_rate": 9.736636232840026e-06, + "loss": 0.016, + "step": 8588 + }, + { + "epoch": 2.09, + "learning_rate": 9.732691641309328e-06, + "loss": 0.0172, + "step": 8590 + }, + { + "epoch": 2.09, + "learning_rate": 9.728747091400585e-06, + "loss": 0.0132, + "step": 8592 + }, + { + "epoch": 2.09, + "learning_rate": 9.724802583727996e-06, + "loss": 0.0129, + "step": 8594 + }, + { + "epoch": 2.09, + "learning_rate": 9.720858118905747e-06, + "loss": 0.0088, + "step": 8596 + }, + { + "epoch": 2.1, + "learning_rate": 9.716913697548025e-06, + "loss": 0.0079, + "step": 8598 + }, + { + "epoch": 2.1, + "learning_rate": 9.712969320269002e-06, + "loss": 0.0179, + "step": 8600 + }, + { + "epoch": 2.1, + "learning_rate": 9.709024987682855e-06, + "loss": 0.0148, + "step": 8602 + }, + { + "epoch": 2.1, + "learning_rate": 9.705080700403741e-06, + "loss": 0.0107, + "step": 8604 + }, + { + "epoch": 2.1, + "learning_rate": 9.701136459045814e-06, + "loss": 0.0142, + "step": 8606 + }, + { + "epoch": 2.1, + "learning_rate": 9.697192264223226e-06, + "loss": 0.0241, + "step": 8608 + }, + { + "epoch": 2.1, + "learning_rate": 9.693248116550123e-06, + "loss": 0.0194, + "step": 8610 + }, + { + "epoch": 2.1, + "learning_rate": 9.689304016640631e-06, + "loss": 0.0123, + "step": 8612 + }, + { + "epoch": 2.1, + "learning_rate": 9.685359965108878e-06, + "loss": 0.0185, + "step": 8614 + }, + { + "epoch": 2.1, + "learning_rate": 9.681415962568986e-06, + "loss": 0.0157, + "step": 8616 + }, + { + "epoch": 2.1, + "learning_rate": 9.677472009635064e-06, + "loss": 0.0108, + "step": 8618 + }, + { + "epoch": 2.1, + "learning_rate": 9.673528106921218e-06, + "loss": 0.0156, + "step": 8620 + }, + { + "epoch": 2.1, + "learning_rate": 9.669584255041538e-06, + "loss": 0.011, + "step": 8622 + }, + { + "epoch": 2.1, + "learning_rate": 9.66564045461012e-06, + "loss": 0.0096, + "step": 8624 + }, + { + "epoch": 2.1, + "learning_rate": 9.661696706241042e-06, + "loss": 0.0235, + "step": 8626 + }, + { + "epoch": 2.1, + "learning_rate": 9.657753010548368e-06, + "loss": 0.0199, + "step": 8628 + }, + { + "epoch": 2.1, + "learning_rate": 9.653809368146169e-06, + "loss": 0.0106, + "step": 8630 + }, + { + "epoch": 2.1, + "learning_rate": 9.6498657796485e-06, + "loss": 0.0185, + "step": 8632 + }, + { + "epoch": 2.1, + "learning_rate": 9.645922245669408e-06, + "loss": 0.0255, + "step": 8634 + }, + { + "epoch": 2.1, + "learning_rate": 9.641978766822925e-06, + "loss": 0.0211, + "step": 8636 + }, + { + "epoch": 2.1, + "learning_rate": 9.63803534372309e-06, + "loss": 0.0173, + "step": 8638 + }, + { + "epoch": 2.11, + "learning_rate": 9.634091976983916e-06, + "loss": 0.0089, + "step": 8640 + }, + { + "epoch": 2.11, + "learning_rate": 9.630148667219424e-06, + "loss": 0.012, + "step": 8642 + }, + { + "epoch": 2.11, + "learning_rate": 9.62620541504361e-06, + "loss": 0.0126, + "step": 8644 + }, + { + "epoch": 2.11, + "learning_rate": 9.62226222107047e-06, + "loss": 0.0112, + "step": 8646 + }, + { + "epoch": 2.11, + "learning_rate": 9.61831908591399e-06, + "loss": 0.0138, + "step": 8648 + }, + { + "epoch": 2.11, + "learning_rate": 9.614376010188151e-06, + "loss": 0.0267, + "step": 8650 + }, + { + "epoch": 2.11, + "learning_rate": 9.610432994506912e-06, + "loss": 0.0218, + "step": 8652 + }, + { + "epoch": 2.11, + "learning_rate": 9.606490039484237e-06, + "loss": 0.013, + "step": 8654 + }, + { + "epoch": 2.11, + "learning_rate": 9.602547145734069e-06, + "loss": 0.0159, + "step": 8656 + }, + { + "epoch": 2.11, + "learning_rate": 9.598604313870355e-06, + "loss": 0.0112, + "step": 8658 + }, + { + "epoch": 2.11, + "learning_rate": 9.594661544507015e-06, + "loss": 0.0212, + "step": 8660 + }, + { + "epoch": 2.11, + "learning_rate": 9.590718838257973e-06, + "loss": 0.013, + "step": 8662 + }, + { + "epoch": 2.11, + "learning_rate": 9.586776195737133e-06, + "loss": 0.0105, + "step": 8664 + }, + { + "epoch": 2.11, + "learning_rate": 9.582833617558406e-06, + "loss": 0.0126, + "step": 8666 + }, + { + "epoch": 2.11, + "learning_rate": 9.578891104335673e-06, + "loss": 0.0173, + "step": 8668 + }, + { + "epoch": 2.11, + "learning_rate": 9.574948656682813e-06, + "loss": 0.0141, + "step": 8670 + }, + { + "epoch": 2.11, + "learning_rate": 9.571006275213695e-06, + "loss": 0.018, + "step": 8672 + }, + { + "epoch": 2.11, + "learning_rate": 9.567063960542187e-06, + "loss": 0.0111, + "step": 8674 + }, + { + "epoch": 2.11, + "learning_rate": 9.563121713282126e-06, + "loss": 0.0166, + "step": 8676 + }, + { + "epoch": 2.11, + "learning_rate": 9.559179534047355e-06, + "loss": 0.012, + "step": 8678 + }, + { + "epoch": 2.12, + "learning_rate": 9.555237423451704e-06, + "loss": 0.0143, + "step": 8680 + }, + { + "epoch": 2.12, + "learning_rate": 9.551295382108988e-06, + "loss": 0.0153, + "step": 8682 + }, + { + "epoch": 2.12, + "learning_rate": 9.547353410633009e-06, + "loss": 0.0134, + "step": 8684 + }, + { + "epoch": 2.12, + "learning_rate": 9.543411509637564e-06, + "loss": 0.0213, + "step": 8686 + }, + { + "epoch": 2.12, + "learning_rate": 9.53946967973644e-06, + "loss": 0.012, + "step": 8688 + }, + { + "epoch": 2.12, + "learning_rate": 9.535527921543411e-06, + "loss": 0.0155, + "step": 8690 + }, + { + "epoch": 2.12, + "learning_rate": 9.531586235672232e-06, + "loss": 0.0325, + "step": 8692 + }, + { + "epoch": 2.12, + "learning_rate": 9.52764462273666e-06, + "loss": 0.0147, + "step": 8694 + }, + { + "epoch": 2.12, + "learning_rate": 9.52370308335043e-06, + "loss": 0.0074, + "step": 8696 + }, + { + "epoch": 2.12, + "learning_rate": 9.519761618127277e-06, + "loss": 0.0156, + "step": 8698 + }, + { + "epoch": 2.12, + "learning_rate": 9.515820227680909e-06, + "loss": 0.0173, + "step": 8700 + }, + { + "epoch": 2.12, + "learning_rate": 9.511878912625035e-06, + "loss": 0.0114, + "step": 8702 + }, + { + "epoch": 2.12, + "learning_rate": 9.507937673573343e-06, + "loss": 0.0156, + "step": 8704 + }, + { + "epoch": 2.12, + "learning_rate": 9.503996511139523e-06, + "loss": 0.017, + "step": 8706 + }, + { + "epoch": 2.12, + "learning_rate": 9.500055425937235e-06, + "loss": 0.0145, + "step": 8708 + }, + { + "epoch": 2.12, + "learning_rate": 9.496114418580144e-06, + "loss": 0.0165, + "step": 8710 + }, + { + "epoch": 2.12, + "learning_rate": 9.492173489681888e-06, + "loss": 0.0136, + "step": 8712 + }, + { + "epoch": 2.12, + "learning_rate": 9.488232639856106e-06, + "loss": 0.0187, + "step": 8714 + }, + { + "epoch": 2.12, + "learning_rate": 9.484291869716414e-06, + "loss": 0.018, + "step": 8716 + }, + { + "epoch": 2.12, + "learning_rate": 9.480351179876417e-06, + "loss": 0.0174, + "step": 8718 + }, + { + "epoch": 2.12, + "learning_rate": 9.476410570949718e-06, + "loss": 0.0121, + "step": 8720 + }, + { + "epoch": 2.13, + "learning_rate": 9.4724700435499e-06, + "loss": 0.0255, + "step": 8722 + }, + { + "epoch": 2.13, + "learning_rate": 9.468529598290526e-06, + "loss": 0.0123, + "step": 8724 + }, + { + "epoch": 2.13, + "learning_rate": 9.464589235785157e-06, + "loss": 0.0186, + "step": 8726 + }, + { + "epoch": 2.13, + "learning_rate": 9.460648956647339e-06, + "loss": 0.0174, + "step": 8728 + }, + { + "epoch": 2.13, + "learning_rate": 9.456708761490606e-06, + "loss": 0.0144, + "step": 8730 + }, + { + "epoch": 2.13, + "learning_rate": 9.452768650928472e-06, + "loss": 0.0173, + "step": 8732 + }, + { + "epoch": 2.13, + "learning_rate": 9.44882862557444e-06, + "loss": 0.0058, + "step": 8734 + }, + { + "epoch": 2.13, + "learning_rate": 9.444888686042007e-06, + "loss": 0.017, + "step": 8736 + }, + { + "epoch": 2.13, + "learning_rate": 9.440948832944654e-06, + "loss": 0.0162, + "step": 8738 + }, + { + "epoch": 2.13, + "learning_rate": 9.437009066895837e-06, + "loss": 0.0137, + "step": 8740 + }, + { + "epoch": 2.13, + "learning_rate": 9.433069388509015e-06, + "loss": 0.0128, + "step": 8742 + }, + { + "epoch": 2.13, + "learning_rate": 9.429129798397625e-06, + "loss": 0.0152, + "step": 8744 + }, + { + "epoch": 2.13, + "learning_rate": 9.425190297175096e-06, + "loss": 0.0218, + "step": 8746 + }, + { + "epoch": 2.13, + "learning_rate": 9.421250885454825e-06, + "loss": 0.012, + "step": 8748 + }, + { + "epoch": 2.13, + "learning_rate": 9.417311563850218e-06, + "loss": 0.0131, + "step": 8750 + }, + { + "epoch": 2.13, + "learning_rate": 9.41337233297466e-06, + "loss": 0.0191, + "step": 8752 + }, + { + "epoch": 2.13, + "learning_rate": 9.409433193441516e-06, + "loss": 0.0172, + "step": 8754 + }, + { + "epoch": 2.13, + "learning_rate": 9.405494145864134e-06, + "loss": 0.014, + "step": 8756 + }, + { + "epoch": 2.13, + "learning_rate": 9.401555190855863e-06, + "loss": 0.013, + "step": 8758 + }, + { + "epoch": 2.13, + "learning_rate": 9.397616329030023e-06, + "loss": 0.0126, + "step": 8760 + }, + { + "epoch": 2.13, + "learning_rate": 9.39367756099993e-06, + "loss": 0.0152, + "step": 8762 + }, + { + "epoch": 2.14, + "learning_rate": 9.389738887378873e-06, + "loss": 0.0269, + "step": 8764 + }, + { + "epoch": 2.14, + "learning_rate": 9.385800308780141e-06, + "loss": 0.0092, + "step": 8766 + }, + { + "epoch": 2.14, + "learning_rate": 9.381861825816995e-06, + "loss": 0.012, + "step": 8768 + }, + { + "epoch": 2.14, + "learning_rate": 9.377923439102693e-06, + "loss": 0.015, + "step": 8770 + }, + { + "epoch": 2.14, + "learning_rate": 9.373985149250464e-06, + "loss": 0.0181, + "step": 8772 + }, + { + "epoch": 2.14, + "learning_rate": 9.370046956873536e-06, + "loss": 0.0339, + "step": 8774 + }, + { + "epoch": 2.14, + "learning_rate": 9.366108862585113e-06, + "loss": 0.0125, + "step": 8776 + }, + { + "epoch": 2.14, + "learning_rate": 9.362170866998391e-06, + "loss": 0.0288, + "step": 8778 + }, + { + "epoch": 2.14, + "learning_rate": 9.358232970726541e-06, + "loss": 0.0241, + "step": 8780 + }, + { + "epoch": 2.14, + "learning_rate": 9.354295174382721e-06, + "loss": 0.006, + "step": 8782 + }, + { + "epoch": 2.14, + "learning_rate": 9.350357478580082e-06, + "loss": 0.0203, + "step": 8784 + }, + { + "epoch": 2.14, + "learning_rate": 9.346419883931752e-06, + "loss": 0.0129, + "step": 8786 + }, + { + "epoch": 2.14, + "learning_rate": 9.342482391050844e-06, + "loss": 0.0073, + "step": 8788 + }, + { + "epoch": 2.14, + "learning_rate": 9.338545000550453e-06, + "loss": 0.0185, + "step": 8790 + }, + { + "epoch": 2.14, + "learning_rate": 9.334607713043666e-06, + "loss": 0.0166, + "step": 8792 + }, + { + "epoch": 2.14, + "learning_rate": 9.330670529143545e-06, + "loss": 0.0145, + "step": 8794 + }, + { + "epoch": 2.14, + "learning_rate": 9.326733449463143e-06, + "loss": 0.0123, + "step": 8796 + }, + { + "epoch": 2.14, + "learning_rate": 9.322796474615487e-06, + "loss": 0.013, + "step": 8798 + }, + { + "epoch": 2.14, + "learning_rate": 9.3188596052136e-06, + "loss": 0.0127, + "step": 8800 + }, + { + "epoch": 2.14, + "learning_rate": 9.314922841870484e-06, + "loss": 0.0127, + "step": 8802 + }, + { + "epoch": 2.15, + "learning_rate": 9.310986185199115e-06, + "loss": 0.0096, + "step": 8804 + }, + { + "epoch": 2.15, + "learning_rate": 9.307049635812466e-06, + "loss": 0.0285, + "step": 8806 + }, + { + "epoch": 2.15, + "learning_rate": 9.303113194323491e-06, + "loss": 0.0219, + "step": 8808 + }, + { + "epoch": 2.15, + "learning_rate": 9.29917686134512e-06, + "loss": 0.0225, + "step": 8810 + }, + { + "epoch": 2.15, + "learning_rate": 9.295240637490269e-06, + "loss": 0.025, + "step": 8812 + }, + { + "epoch": 2.15, + "learning_rate": 9.29130452337184e-06, + "loss": 0.0174, + "step": 8814 + }, + { + "epoch": 2.15, + "learning_rate": 9.287368519602715e-06, + "loss": 0.0128, + "step": 8816 + }, + { + "epoch": 2.15, + "learning_rate": 9.283432626795764e-06, + "loss": 0.0231, + "step": 8818 + }, + { + "epoch": 2.15, + "learning_rate": 9.279496845563828e-06, + "loss": 0.0112, + "step": 8820 + }, + { + "epoch": 2.15, + "learning_rate": 9.275561176519747e-06, + "loss": 0.0259, + "step": 8822 + }, + { + "epoch": 2.15, + "learning_rate": 9.271625620276326e-06, + "loss": 0.019, + "step": 8824 + }, + { + "epoch": 2.15, + "learning_rate": 9.26769017744637e-06, + "loss": 0.0207, + "step": 8826 + }, + { + "epoch": 2.15, + "learning_rate": 9.263754848642649e-06, + "loss": 0.0091, + "step": 8828 + }, + { + "epoch": 2.15, + "learning_rate": 9.25981963447793e-06, + "loss": 0.0089, + "step": 8830 + }, + { + "epoch": 2.15, + "learning_rate": 9.255884535564952e-06, + "loss": 0.0198, + "step": 8832 + }, + { + "epoch": 2.15, + "learning_rate": 9.251949552516447e-06, + "loss": 0.0227, + "step": 8834 + }, + { + "epoch": 2.15, + "learning_rate": 9.248014685945113e-06, + "loss": 0.012, + "step": 8836 + }, + { + "epoch": 2.15, + "learning_rate": 9.24407993646364e-06, + "loss": 0.0074, + "step": 8838 + }, + { + "epoch": 2.15, + "learning_rate": 9.240145304684701e-06, + "loss": 0.0172, + "step": 8840 + }, + { + "epoch": 2.15, + "learning_rate": 9.236210791220956e-06, + "loss": 0.0239, + "step": 8842 + }, + { + "epoch": 2.15, + "learning_rate": 9.232276396685025e-06, + "loss": 0.0112, + "step": 8844 + }, + { + "epoch": 2.16, + "learning_rate": 9.228342121689531e-06, + "loss": 0.0156, + "step": 8846 + }, + { + "epoch": 2.16, + "learning_rate": 9.224407966847067e-06, + "loss": 0.0159, + "step": 8848 + }, + { + "epoch": 2.16, + "learning_rate": 9.220473932770217e-06, + "loss": 0.0163, + "step": 8850 + }, + { + "epoch": 2.16, + "learning_rate": 9.216540020071537e-06, + "loss": 0.0195, + "step": 8852 + }, + { + "epoch": 2.16, + "learning_rate": 9.212606229363563e-06, + "loss": 0.0209, + "step": 8854 + }, + { + "epoch": 2.16, + "learning_rate": 9.20867256125882e-06, + "loss": 0.0172, + "step": 8856 + }, + { + "epoch": 2.16, + "learning_rate": 9.204739016369817e-06, + "loss": 0.0261, + "step": 8858 + }, + { + "epoch": 2.16, + "learning_rate": 9.200805595309023e-06, + "loss": 0.0099, + "step": 8860 + }, + { + "epoch": 2.16, + "learning_rate": 9.196872298688908e-06, + "loss": 0.0131, + "step": 8862 + }, + { + "epoch": 2.16, + "learning_rate": 9.192939127121922e-06, + "loss": 0.0156, + "step": 8864 + }, + { + "epoch": 2.16, + "learning_rate": 9.189006081220486e-06, + "loss": 0.0161, + "step": 8866 + }, + { + "epoch": 2.16, + "learning_rate": 9.185073161597e-06, + "loss": 0.0178, + "step": 8868 + }, + { + "epoch": 2.16, + "learning_rate": 9.181140368863857e-06, + "loss": 0.0088, + "step": 8870 + }, + { + "epoch": 2.16, + "learning_rate": 9.177207703633417e-06, + "loss": 0.0244, + "step": 8872 + }, + { + "epoch": 2.16, + "learning_rate": 9.173275166518035e-06, + "loss": 0.014, + "step": 8874 + }, + { + "epoch": 2.16, + "learning_rate": 9.169342758130026e-06, + "loss": 0.0125, + "step": 8876 + }, + { + "epoch": 2.16, + "learning_rate": 9.165410479081704e-06, + "loss": 0.014, + "step": 8878 + }, + { + "epoch": 2.16, + "learning_rate": 9.161478329985349e-06, + "loss": 0.0164, + "step": 8880 + }, + { + "epoch": 2.16, + "learning_rate": 9.157546311453235e-06, + "loss": 0.0166, + "step": 8882 + }, + { + "epoch": 2.16, + "learning_rate": 9.153614424097597e-06, + "loss": 0.0131, + "step": 8884 + }, + { + "epoch": 2.17, + "learning_rate": 9.14968266853067e-06, + "loss": 0.0255, + "step": 8886 + }, + { + "epoch": 2.17, + "learning_rate": 9.145751045364648e-06, + "loss": 0.0166, + "step": 8888 + }, + { + "epoch": 2.17, + "learning_rate": 9.141819555211728e-06, + "loss": 0.0094, + "step": 8890 + }, + { + "epoch": 2.17, + "learning_rate": 9.137888198684061e-06, + "loss": 0.0128, + "step": 8892 + }, + { + "epoch": 2.17, + "learning_rate": 9.133956976393791e-06, + "loss": 0.0218, + "step": 8894 + }, + { + "epoch": 2.17, + "learning_rate": 9.130025888953044e-06, + "loss": 0.0141, + "step": 8896 + }, + { + "epoch": 2.17, + "learning_rate": 9.126094936973922e-06, + "loss": 0.0085, + "step": 8898 + }, + { + "epoch": 2.17, + "learning_rate": 9.122164121068498e-06, + "loss": 0.0211, + "step": 8900 + }, + { + "epoch": 2.17, + "learning_rate": 9.118233441848832e-06, + "loss": 0.0129, + "step": 8902 + }, + { + "epoch": 2.17, + "learning_rate": 9.11430289992696e-06, + "loss": 0.0174, + "step": 8904 + }, + { + "epoch": 2.17, + "learning_rate": 9.110372495914906e-06, + "loss": 0.0088, + "step": 8906 + }, + { + "epoch": 2.17, + "learning_rate": 9.106442230424654e-06, + "loss": 0.0166, + "step": 8908 + }, + { + "epoch": 2.17, + "learning_rate": 9.102512104068175e-06, + "loss": 0.0204, + "step": 8910 + }, + { + "epoch": 2.17, + "learning_rate": 9.098582117457429e-06, + "loss": 0.0136, + "step": 8912 + }, + { + "epoch": 2.17, + "learning_rate": 9.09465227120434e-06, + "loss": 0.0101, + "step": 8914 + }, + { + "epoch": 2.17, + "learning_rate": 9.090722565920813e-06, + "loss": 0.0125, + "step": 8916 + }, + { + "epoch": 2.17, + "learning_rate": 9.086793002218735e-06, + "loss": 0.0211, + "step": 8918 + }, + { + "epoch": 2.17, + "learning_rate": 9.08286358070997e-06, + "loss": 0.0104, + "step": 8920 + }, + { + "epoch": 2.17, + "learning_rate": 9.078934302006363e-06, + "loss": 0.0131, + "step": 8922 + }, + { + "epoch": 2.17, + "learning_rate": 9.075005166719724e-06, + "loss": 0.0114, + "step": 8924 + }, + { + "epoch": 2.17, + "learning_rate": 9.071076175461851e-06, + "loss": 0.0162, + "step": 8926 + }, + { + "epoch": 2.18, + "learning_rate": 9.067147328844526e-06, + "loss": 0.0186, + "step": 8928 + }, + { + "epoch": 2.18, + "learning_rate": 9.063218627479494e-06, + "loss": 0.0189, + "step": 8930 + }, + { + "epoch": 2.18, + "learning_rate": 9.059290071978482e-06, + "loss": 0.0134, + "step": 8932 + }, + { + "epoch": 2.18, + "learning_rate": 9.055361662953202e-06, + "loss": 0.0118, + "step": 8934 + }, + { + "epoch": 2.18, + "learning_rate": 9.051433401015333e-06, + "loss": 0.0183, + "step": 8936 + }, + { + "epoch": 2.18, + "learning_rate": 9.04750528677654e-06, + "loss": 0.0157, + "step": 8938 + }, + { + "epoch": 2.18, + "learning_rate": 9.043577320848452e-06, + "loss": 0.0164, + "step": 8940 + }, + { + "epoch": 2.18, + "learning_rate": 9.039649503842691e-06, + "loss": 0.0108, + "step": 8942 + }, + { + "epoch": 2.18, + "learning_rate": 9.035721836370845e-06, + "loss": 0.01, + "step": 8944 + }, + { + "epoch": 2.18, + "learning_rate": 9.031794319044486e-06, + "loss": 0.0088, + "step": 8946 + }, + { + "epoch": 2.18, + "learning_rate": 9.02786695247515e-06, + "loss": 0.015, + "step": 8948 + }, + { + "epoch": 2.18, + "learning_rate": 9.023939737274366e-06, + "loss": 0.0224, + "step": 8950 + }, + { + "epoch": 2.18, + "learning_rate": 9.020012674053627e-06, + "loss": 0.0158, + "step": 8952 + }, + { + "epoch": 2.18, + "learning_rate": 9.016085763424411e-06, + "loss": 0.0248, + "step": 8954 + }, + { + "epoch": 2.18, + "learning_rate": 9.012159005998165e-06, + "loss": 0.0136, + "step": 8956 + }, + { + "epoch": 2.18, + "learning_rate": 9.00823240238631e-06, + "loss": 0.0182, + "step": 8958 + }, + { + "epoch": 2.18, + "learning_rate": 9.004305953200258e-06, + "loss": 0.0144, + "step": 8960 + }, + { + "epoch": 2.18, + "learning_rate": 9.000379659051383e-06, + "loss": 0.0132, + "step": 8962 + }, + { + "epoch": 2.18, + "learning_rate": 8.99645352055104e-06, + "loss": 0.0215, + "step": 8964 + }, + { + "epoch": 2.18, + "learning_rate": 8.992527538310554e-06, + "loss": 0.0095, + "step": 8966 + }, + { + "epoch": 2.19, + "learning_rate": 8.988601712941237e-06, + "loss": 0.0113, + "step": 8968 + }, + { + "epoch": 2.19, + "learning_rate": 8.984676045054365e-06, + "loss": 0.0118, + "step": 8970 + }, + { + "epoch": 2.19, + "learning_rate": 8.980750535261199e-06, + "loss": 0.0148, + "step": 8972 + }, + { + "epoch": 2.19, + "learning_rate": 8.976825184172966e-06, + "loss": 0.0191, + "step": 8974 + }, + { + "epoch": 2.19, + "learning_rate": 8.972899992400878e-06, + "loss": 0.0156, + "step": 8976 + }, + { + "epoch": 2.19, + "learning_rate": 8.968974960556117e-06, + "loss": 0.0088, + "step": 8978 + }, + { + "epoch": 2.19, + "learning_rate": 8.965050089249832e-06, + "loss": 0.0155, + "step": 8980 + }, + { + "epoch": 2.19, + "learning_rate": 8.961125379093165e-06, + "loss": 0.0198, + "step": 8982 + }, + { + "epoch": 2.19, + "learning_rate": 8.95720083069722e-06, + "loss": 0.0202, + "step": 8984 + }, + { + "epoch": 2.19, + "learning_rate": 8.953276444673084e-06, + "loss": 0.0103, + "step": 8986 + }, + { + "epoch": 2.19, + "learning_rate": 8.949352221631805e-06, + "loss": 0.0183, + "step": 8988 + }, + { + "epoch": 2.19, + "learning_rate": 8.94542816218442e-06, + "loss": 0.0138, + "step": 8990 + }, + { + "epoch": 2.19, + "learning_rate": 8.941504266941932e-06, + "loss": 0.011, + "step": 8992 + }, + { + "epoch": 2.19, + "learning_rate": 8.937580536515328e-06, + "loss": 0.0115, + "step": 8994 + }, + { + "epoch": 2.19, + "learning_rate": 8.933656971515554e-06, + "loss": 0.0231, + "step": 8996 + }, + { + "epoch": 2.19, + "learning_rate": 8.929733572553546e-06, + "loss": 0.0164, + "step": 8998 + }, + { + "epoch": 2.19, + "learning_rate": 8.925810340240203e-06, + "loss": 0.0102, + "step": 9000 + }, + { + "epoch": 2.19, + "learning_rate": 8.921887275186408e-06, + "loss": 0.016, + "step": 9002 + }, + { + "epoch": 2.19, + "learning_rate": 8.917964378003003e-06, + "loss": 0.0266, + "step": 9004 + }, + { + "epoch": 2.19, + "learning_rate": 8.914041649300822e-06, + "loss": 0.0162, + "step": 9006 + }, + { + "epoch": 2.19, + "learning_rate": 8.910119089690657e-06, + "loss": 0.0125, + "step": 9008 + }, + { + "epoch": 2.2, + "learning_rate": 8.906196699783286e-06, + "loss": 0.0257, + "step": 9010 + }, + { + "epoch": 2.2, + "learning_rate": 8.902274480189454e-06, + "loss": 0.0151, + "step": 9012 + }, + { + "epoch": 2.2, + "learning_rate": 8.898352431519876e-06, + "loss": 0.009, + "step": 9014 + }, + { + "epoch": 2.2, + "learning_rate": 8.894430554385249e-06, + "loss": 0.0126, + "step": 9016 + }, + { + "epoch": 2.2, + "learning_rate": 8.890508849396243e-06, + "loss": 0.0139, + "step": 9018 + }, + { + "epoch": 2.2, + "learning_rate": 8.88658731716349e-06, + "loss": 0.0221, + "step": 9020 + }, + { + "epoch": 2.2, + "learning_rate": 8.882665958297603e-06, + "loss": 0.0121, + "step": 9022 + }, + { + "epoch": 2.2, + "learning_rate": 8.878744773409171e-06, + "loss": 0.0136, + "step": 9024 + }, + { + "epoch": 2.2, + "learning_rate": 8.874823763108755e-06, + "loss": 0.0203, + "step": 9026 + }, + { + "epoch": 2.2, + "learning_rate": 8.870902928006882e-06, + "loss": 0.0075, + "step": 9028 + }, + { + "epoch": 2.2, + "learning_rate": 8.866982268714054e-06, + "loss": 0.0207, + "step": 9030 + }, + { + "epoch": 2.2, + "learning_rate": 8.863061785840753e-06, + "loss": 0.0118, + "step": 9032 + }, + { + "epoch": 2.2, + "learning_rate": 8.859141479997427e-06, + "loss": 0.0199, + "step": 9034 + }, + { + "epoch": 2.2, + "learning_rate": 8.855221351794493e-06, + "loss": 0.0201, + "step": 9036 + }, + { + "epoch": 2.2, + "learning_rate": 8.851301401842348e-06, + "loss": 0.0138, + "step": 9038 + }, + { + "epoch": 2.2, + "learning_rate": 8.84738163075136e-06, + "loss": 0.024, + "step": 9040 + }, + { + "epoch": 2.2, + "learning_rate": 8.843462039131869e-06, + "loss": 0.0081, + "step": 9042 + }, + { + "epoch": 2.2, + "learning_rate": 8.839542627594177e-06, + "loss": 0.0243, + "step": 9044 + }, + { + "epoch": 2.2, + "learning_rate": 8.835623396748574e-06, + "loss": 0.0228, + "step": 9046 + }, + { + "epoch": 2.2, + "learning_rate": 8.83170434720531e-06, + "loss": 0.0102, + "step": 9048 + }, + { + "epoch": 2.21, + "learning_rate": 8.827785479574616e-06, + "loss": 0.0072, + "step": 9050 + }, + { + "epoch": 2.21, + "learning_rate": 8.823866794466683e-06, + "loss": 0.0144, + "step": 9052 + }, + { + "epoch": 2.21, + "learning_rate": 8.819948292491686e-06, + "loss": 0.0083, + "step": 9054 + }, + { + "epoch": 2.21, + "learning_rate": 8.81602997425976e-06, + "loss": 0.0149, + "step": 9056 + }, + { + "epoch": 2.21, + "learning_rate": 8.812111840381025e-06, + "loss": 0.005, + "step": 9058 + }, + { + "epoch": 2.21, + "learning_rate": 8.808193891465555e-06, + "loss": 0.0089, + "step": 9060 + }, + { + "epoch": 2.21, + "learning_rate": 8.804276128123412e-06, + "loss": 0.0104, + "step": 9062 + }, + { + "epoch": 2.21, + "learning_rate": 8.800358550964618e-06, + "loss": 0.0129, + "step": 9064 + }, + { + "epoch": 2.21, + "learning_rate": 8.796441160599175e-06, + "loss": 0.0119, + "step": 9066 + }, + { + "epoch": 2.21, + "learning_rate": 8.792523957637043e-06, + "loss": 0.0098, + "step": 9068 + }, + { + "epoch": 2.21, + "learning_rate": 8.788606942688163e-06, + "loss": 0.0156, + "step": 9070 + }, + { + "epoch": 2.21, + "learning_rate": 8.784690116362444e-06, + "loss": 0.0075, + "step": 9072 + }, + { + "epoch": 2.21, + "learning_rate": 8.780773479269773e-06, + "loss": 0.0176, + "step": 9074 + }, + { + "epoch": 2.21, + "learning_rate": 8.776857032019991e-06, + "loss": 0.017, + "step": 9076 + }, + { + "epoch": 2.21, + "learning_rate": 8.772940775222918e-06, + "loss": 0.0137, + "step": 9078 + }, + { + "epoch": 2.21, + "learning_rate": 8.769024709488353e-06, + "loss": 0.0114, + "step": 9080 + }, + { + "epoch": 2.21, + "learning_rate": 8.765108835426057e-06, + "loss": 0.0167, + "step": 9082 + }, + { + "epoch": 2.21, + "learning_rate": 8.761193153645753e-06, + "loss": 0.014, + "step": 9084 + }, + { + "epoch": 2.21, + "learning_rate": 8.757277664757148e-06, + "loss": 0.0091, + "step": 9086 + }, + { + "epoch": 2.21, + "learning_rate": 8.753362369369915e-06, + "loss": 0.0123, + "step": 9088 + }, + { + "epoch": 2.21, + "learning_rate": 8.749447268093697e-06, + "loss": 0.0146, + "step": 9090 + }, + { + "epoch": 2.22, + "learning_rate": 8.745532361538094e-06, + "loss": 0.0148, + "step": 9092 + }, + { + "epoch": 2.22, + "learning_rate": 8.741617650312699e-06, + "loss": 0.0202, + "step": 9094 + }, + { + "epoch": 2.22, + "learning_rate": 8.737703135027058e-06, + "loss": 0.0158, + "step": 9096 + }, + { + "epoch": 2.22, + "learning_rate": 8.733788816290694e-06, + "loss": 0.0101, + "step": 9098 + }, + { + "epoch": 2.22, + "learning_rate": 8.729874694713089e-06, + "loss": 0.0253, + "step": 9100 + }, + { + "epoch": 2.22, + "learning_rate": 8.725960770903706e-06, + "loss": 0.017, + "step": 9102 + }, + { + "epoch": 2.22, + "learning_rate": 8.722047045471977e-06, + "loss": 0.0148, + "step": 9104 + }, + { + "epoch": 2.22, + "learning_rate": 8.718133519027295e-06, + "loss": 0.0163, + "step": 9106 + }, + { + "epoch": 2.22, + "learning_rate": 8.714220192179023e-06, + "loss": 0.0191, + "step": 9108 + }, + { + "epoch": 2.22, + "learning_rate": 8.7103070655365e-06, + "loss": 0.009, + "step": 9110 + }, + { + "epoch": 2.22, + "learning_rate": 8.706394139709032e-06, + "loss": 0.0096, + "step": 9112 + }, + { + "epoch": 2.22, + "learning_rate": 8.702481415305885e-06, + "loss": 0.0175, + "step": 9114 + }, + { + "epoch": 2.22, + "learning_rate": 8.698568892936301e-06, + "loss": 0.022, + "step": 9116 + }, + { + "epoch": 2.22, + "learning_rate": 8.694656573209494e-06, + "loss": 0.0122, + "step": 9118 + }, + { + "epoch": 2.22, + "learning_rate": 8.690744456734644e-06, + "loss": 0.0156, + "step": 9120 + }, + { + "epoch": 2.22, + "learning_rate": 8.68683254412089e-06, + "loss": 0.0205, + "step": 9122 + }, + { + "epoch": 2.22, + "learning_rate": 8.682920835977347e-06, + "loss": 0.018, + "step": 9124 + }, + { + "epoch": 2.22, + "learning_rate": 8.679009332913106e-06, + "loss": 0.0079, + "step": 9126 + }, + { + "epoch": 2.22, + "learning_rate": 8.675098035537214e-06, + "loss": 0.014, + "step": 9128 + }, + { + "epoch": 2.22, + "learning_rate": 8.671186944458685e-06, + "loss": 0.0166, + "step": 9130 + }, + { + "epoch": 2.23, + "learning_rate": 8.667276060286511e-06, + "loss": 0.0133, + "step": 9132 + }, + { + "epoch": 2.23, + "learning_rate": 8.663365383629643e-06, + "loss": 0.0172, + "step": 9134 + }, + { + "epoch": 2.23, + "learning_rate": 8.659454915097011e-06, + "loss": 0.0172, + "step": 9136 + }, + { + "epoch": 2.23, + "learning_rate": 8.655544655297494e-06, + "loss": 0.0196, + "step": 9138 + }, + { + "epoch": 2.23, + "learning_rate": 8.651634604839958e-06, + "loss": 0.0117, + "step": 9140 + }, + { + "epoch": 2.23, + "learning_rate": 8.647724764333223e-06, + "loss": 0.012, + "step": 9142 + }, + { + "epoch": 2.23, + "learning_rate": 8.643815134386085e-06, + "loss": 0.012, + "step": 9144 + }, + { + "epoch": 2.23, + "learning_rate": 8.6399057156073e-06, + "loss": 0.0103, + "step": 9146 + }, + { + "epoch": 2.23, + "learning_rate": 8.635996508605593e-06, + "loss": 0.008, + "step": 9148 + }, + { + "epoch": 2.23, + "learning_rate": 8.63208751398966e-06, + "loss": 0.0147, + "step": 9150 + }, + { + "epoch": 2.23, + "learning_rate": 8.628178732368164e-06, + "loss": 0.0117, + "step": 9152 + }, + { + "epoch": 2.23, + "learning_rate": 8.624270164349726e-06, + "loss": 0.0149, + "step": 9154 + }, + { + "epoch": 2.23, + "learning_rate": 8.620361810542944e-06, + "loss": 0.0156, + "step": 9156 + }, + { + "epoch": 2.23, + "learning_rate": 8.616453671556375e-06, + "loss": 0.0113, + "step": 9158 + }, + { + "epoch": 2.23, + "learning_rate": 8.612545747998555e-06, + "loss": 0.0112, + "step": 9160 + }, + { + "epoch": 2.23, + "learning_rate": 8.60863804047797e-06, + "loss": 0.0132, + "step": 9162 + }, + { + "epoch": 2.23, + "learning_rate": 8.604730549603077e-06, + "loss": 0.0187, + "step": 9164 + }, + { + "epoch": 2.23, + "learning_rate": 8.60082327598231e-06, + "loss": 0.0096, + "step": 9166 + }, + { + "epoch": 2.23, + "learning_rate": 8.59691622022406e-06, + "loss": 0.006, + "step": 9168 + }, + { + "epoch": 2.23, + "learning_rate": 8.593009382936679e-06, + "loss": 0.0238, + "step": 9170 + }, + { + "epoch": 2.23, + "learning_rate": 8.589102764728495e-06, + "loss": 0.0159, + "step": 9172 + }, + { + "epoch": 2.24, + "learning_rate": 8.585196366207803e-06, + "loss": 0.026, + "step": 9174 + }, + { + "epoch": 2.24, + "learning_rate": 8.581290187982855e-06, + "loss": 0.0217, + "step": 9176 + }, + { + "epoch": 2.24, + "learning_rate": 8.57738423066187e-06, + "loss": 0.014, + "step": 9178 + }, + { + "epoch": 2.24, + "learning_rate": 8.57347849485304e-06, + "loss": 0.0229, + "step": 9180 + }, + { + "epoch": 2.24, + "learning_rate": 8.569572981164516e-06, + "loss": 0.0201, + "step": 9182 + }, + { + "epoch": 2.24, + "learning_rate": 8.56566769020442e-06, + "loss": 0.0182, + "step": 9184 + }, + { + "epoch": 2.24, + "learning_rate": 8.56176262258083e-06, + "loss": 0.0146, + "step": 9186 + }, + { + "epoch": 2.24, + "learning_rate": 8.557857778901798e-06, + "loss": 0.0101, + "step": 9188 + }, + { + "epoch": 2.24, + "learning_rate": 8.553953159775335e-06, + "loss": 0.0198, + "step": 9190 + }, + { + "epoch": 2.24, + "learning_rate": 8.550048765809427e-06, + "loss": 0.0132, + "step": 9192 + }, + { + "epoch": 2.24, + "learning_rate": 8.54614459761201e-06, + "loss": 0.0035, + "step": 9194 + }, + { + "epoch": 2.24, + "learning_rate": 8.542240655790997e-06, + "loss": 0.0085, + "step": 9196 + }, + { + "epoch": 2.24, + "learning_rate": 8.538336940954259e-06, + "loss": 0.0122, + "step": 9198 + }, + { + "epoch": 2.24, + "learning_rate": 8.534433453709642e-06, + "loss": 0.0296, + "step": 9200 + }, + { + "epoch": 2.24, + "learning_rate": 8.530530194664937e-06, + "loss": 0.0117, + "step": 9202 + }, + { + "epoch": 2.24, + "learning_rate": 8.526627164427919e-06, + "loss": 0.0095, + "step": 9204 + }, + { + "epoch": 2.24, + "learning_rate": 8.522724363606317e-06, + "loss": 0.0139, + "step": 9206 + }, + { + "epoch": 2.24, + "learning_rate": 8.518821792807832e-06, + "loss": 0.0198, + "step": 9208 + }, + { + "epoch": 2.24, + "learning_rate": 8.514919452640117e-06, + "loss": 0.0138, + "step": 9210 + }, + { + "epoch": 2.24, + "learning_rate": 8.511017343710796e-06, + "loss": 0.0154, + "step": 9212 + }, + { + "epoch": 2.25, + "learning_rate": 8.507115466627462e-06, + "loss": 0.0152, + "step": 9214 + }, + { + "epoch": 2.25, + "learning_rate": 8.50321382199767e-06, + "loss": 0.0068, + "step": 9216 + }, + { + "epoch": 2.25, + "learning_rate": 8.499312410428928e-06, + "loss": 0.0157, + "step": 9218 + }, + { + "epoch": 2.25, + "learning_rate": 8.495411232528718e-06, + "loss": 0.0143, + "step": 9220 + }, + { + "epoch": 2.25, + "learning_rate": 8.491510288904485e-06, + "loss": 0.0238, + "step": 9222 + }, + { + "epoch": 2.25, + "learning_rate": 8.487609580163636e-06, + "loss": 0.0176, + "step": 9224 + }, + { + "epoch": 2.25, + "learning_rate": 8.48370910691354e-06, + "loss": 0.0176, + "step": 9226 + }, + { + "epoch": 2.25, + "learning_rate": 8.479808869761528e-06, + "loss": 0.0179, + "step": 9228 + }, + { + "epoch": 2.25, + "learning_rate": 8.475908869314901e-06, + "loss": 0.0164, + "step": 9230 + }, + { + "epoch": 2.25, + "learning_rate": 8.47200910618092e-06, + "loss": 0.0071, + "step": 9232 + }, + { + "epoch": 2.25, + "learning_rate": 8.4681095809668e-06, + "loss": 0.0126, + "step": 9234 + }, + { + "epoch": 2.25, + "learning_rate": 8.464210294279734e-06, + "loss": 0.0111, + "step": 9236 + }, + { + "epoch": 2.25, + "learning_rate": 8.46031124672687e-06, + "loss": 0.0146, + "step": 9238 + }, + { + "epoch": 2.25, + "learning_rate": 8.45641243891532e-06, + "loss": 0.0155, + "step": 9240 + }, + { + "epoch": 2.25, + "learning_rate": 8.452513871452153e-06, + "loss": 0.0194, + "step": 9242 + }, + { + "epoch": 2.25, + "learning_rate": 8.448615544944412e-06, + "loss": 0.0065, + "step": 9244 + }, + { + "epoch": 2.25, + "learning_rate": 8.44471745999909e-06, + "loss": 0.009, + "step": 9246 + }, + { + "epoch": 2.25, + "learning_rate": 8.440819617223157e-06, + "loss": 0.0129, + "step": 9248 + }, + { + "epoch": 2.25, + "learning_rate": 8.436922017223528e-06, + "loss": 0.0103, + "step": 9250 + }, + { + "epoch": 2.25, + "learning_rate": 8.433024660607094e-06, + "loss": 0.0141, + "step": 9252 + }, + { + "epoch": 2.25, + "learning_rate": 8.429127547980702e-06, + "loss": 0.009, + "step": 9254 + }, + { + "epoch": 2.26, + "learning_rate": 8.425230679951166e-06, + "loss": 0.0178, + "step": 9256 + }, + { + "epoch": 2.26, + "learning_rate": 8.42133405712525e-06, + "loss": 0.0091, + "step": 9258 + }, + { + "epoch": 2.26, + "learning_rate": 8.417437680109697e-06, + "loss": 0.015, + "step": 9260 + }, + { + "epoch": 2.26, + "learning_rate": 8.413541549511195e-06, + "loss": 0.0134, + "step": 9262 + }, + { + "epoch": 2.26, + "learning_rate": 8.40964566593641e-06, + "loss": 0.0182, + "step": 9264 + }, + { + "epoch": 2.26, + "learning_rate": 8.405750029991953e-06, + "loss": 0.0218, + "step": 9266 + }, + { + "epoch": 2.26, + "learning_rate": 8.401854642284405e-06, + "loss": 0.0159, + "step": 9268 + }, + { + "epoch": 2.26, + "learning_rate": 8.39795950342031e-06, + "loss": 0.0115, + "step": 9270 + }, + { + "epoch": 2.26, + "learning_rate": 8.394064614006174e-06, + "loss": 0.0147, + "step": 9272 + }, + { + "epoch": 2.26, + "learning_rate": 8.390169974648456e-06, + "loss": 0.0139, + "step": 9274 + }, + { + "epoch": 2.26, + "learning_rate": 8.38627558595358e-06, + "loss": 0.0148, + "step": 9276 + }, + { + "epoch": 2.26, + "learning_rate": 8.382381448527934e-06, + "loss": 0.017, + "step": 9278 + }, + { + "epoch": 2.26, + "learning_rate": 8.378487562977872e-06, + "loss": 0.0147, + "step": 9280 + }, + { + "epoch": 2.26, + "learning_rate": 8.374593929909694e-06, + "loss": 0.0081, + "step": 9282 + }, + { + "epoch": 2.26, + "learning_rate": 8.370700549929667e-06, + "loss": 0.0156, + "step": 9284 + }, + { + "epoch": 2.26, + "learning_rate": 8.366807423644025e-06, + "loss": 0.0098, + "step": 9286 + }, + { + "epoch": 2.26, + "learning_rate": 8.362914551658958e-06, + "loss": 0.0168, + "step": 9288 + }, + { + "epoch": 2.26, + "learning_rate": 8.359021934580607e-06, + "loss": 0.0082, + "step": 9290 + }, + { + "epoch": 2.26, + "learning_rate": 8.355129573015092e-06, + "loss": 0.0168, + "step": 9292 + }, + { + "epoch": 2.26, + "learning_rate": 8.35123746756848e-06, + "loss": 0.0085, + "step": 9294 + }, + { + "epoch": 2.27, + "learning_rate": 8.347345618846807e-06, + "loss": 0.0146, + "step": 9296 + }, + { + "epoch": 2.27, + "learning_rate": 8.343454027456051e-06, + "loss": 0.0139, + "step": 9298 + }, + { + "epoch": 2.27, + "learning_rate": 8.339562694002171e-06, + "loss": 0.0111, + "step": 9300 + }, + { + "epoch": 2.27, + "learning_rate": 8.33567161909108e-06, + "loss": 0.0099, + "step": 9302 + }, + { + "epoch": 2.27, + "learning_rate": 8.331780803328643e-06, + "loss": 0.0134, + "step": 9304 + }, + { + "epoch": 2.27, + "learning_rate": 8.32789024732069e-06, + "loss": 0.0132, + "step": 9306 + }, + { + "epoch": 2.27, + "learning_rate": 8.323999951673013e-06, + "loss": 0.0239, + "step": 9308 + }, + { + "epoch": 2.27, + "learning_rate": 8.320109916991357e-06, + "loss": 0.0204, + "step": 9310 + }, + { + "epoch": 2.27, + "learning_rate": 8.316220143881438e-06, + "loss": 0.0128, + "step": 9312 + }, + { + "epoch": 2.27, + "learning_rate": 8.312330632948911e-06, + "loss": 0.0132, + "step": 9314 + }, + { + "epoch": 2.27, + "learning_rate": 8.308441384799413e-06, + "loss": 0.0197, + "step": 9316 + }, + { + "epoch": 2.27, + "learning_rate": 8.304552400038523e-06, + "loss": 0.0251, + "step": 9318 + }, + { + "epoch": 2.27, + "learning_rate": 8.300663679271794e-06, + "loss": 0.0129, + "step": 9320 + }, + { + "epoch": 2.27, + "learning_rate": 8.296775223104722e-06, + "loss": 0.0171, + "step": 9322 + }, + { + "epoch": 2.27, + "learning_rate": 8.292887032142772e-06, + "loss": 0.0094, + "step": 9324 + }, + { + "epoch": 2.27, + "learning_rate": 8.288999106991363e-06, + "loss": 0.0074, + "step": 9326 + }, + { + "epoch": 2.27, + "learning_rate": 8.28511144825588e-06, + "loss": 0.0112, + "step": 9328 + }, + { + "epoch": 2.27, + "learning_rate": 8.281224056541655e-06, + "loss": 0.0143, + "step": 9330 + }, + { + "epoch": 2.27, + "learning_rate": 8.277336932453985e-06, + "loss": 0.0112, + "step": 9332 + }, + { + "epoch": 2.27, + "learning_rate": 8.27345007659813e-06, + "loss": 0.0205, + "step": 9334 + }, + { + "epoch": 2.27, + "learning_rate": 8.269563489579302e-06, + "loss": 0.0104, + "step": 9336 + }, + { + "epoch": 2.28, + "learning_rate": 8.265677172002669e-06, + "loss": 0.0109, + "step": 9338 + }, + { + "epoch": 2.28, + "learning_rate": 8.261791124473362e-06, + "loss": 0.0121, + "step": 9340 + }, + { + "epoch": 2.28, + "learning_rate": 8.257905347596468e-06, + "loss": 0.0133, + "step": 9342 + }, + { + "epoch": 2.28, + "learning_rate": 8.254019841977036e-06, + "loss": 0.0299, + "step": 9344 + }, + { + "epoch": 2.28, + "learning_rate": 8.25013460822006e-06, + "loss": 0.0064, + "step": 9346 + }, + { + "epoch": 2.28, + "learning_rate": 8.246249646930507e-06, + "loss": 0.0174, + "step": 9348 + }, + { + "epoch": 2.28, + "learning_rate": 8.242364958713297e-06, + "loss": 0.0208, + "step": 9350 + }, + { + "epoch": 2.28, + "learning_rate": 8.238480544173304e-06, + "loss": 0.0113, + "step": 9352 + }, + { + "epoch": 2.28, + "learning_rate": 8.234596403915357e-06, + "loss": 0.0244, + "step": 9354 + }, + { + "epoch": 2.28, + "learning_rate": 8.23071253854425e-06, + "loss": 0.0177, + "step": 9356 + }, + { + "epoch": 2.28, + "learning_rate": 8.22682894866473e-06, + "loss": 0.0095, + "step": 9358 + }, + { + "epoch": 2.28, + "learning_rate": 8.222945634881504e-06, + "loss": 0.0213, + "step": 9360 + }, + { + "epoch": 2.28, + "learning_rate": 8.219062597799227e-06, + "loss": 0.027, + "step": 9362 + }, + { + "epoch": 2.28, + "learning_rate": 8.215179838022524e-06, + "loss": 0.0194, + "step": 9364 + }, + { + "epoch": 2.28, + "learning_rate": 8.211297356155966e-06, + "loss": 0.0125, + "step": 9366 + }, + { + "epoch": 2.28, + "learning_rate": 8.207415152804091e-06, + "loss": 0.0077, + "step": 9368 + }, + { + "epoch": 2.28, + "learning_rate": 8.20353322857138e-06, + "loss": 0.0225, + "step": 9370 + }, + { + "epoch": 2.28, + "learning_rate": 8.199651584062285e-06, + "loss": 0.0196, + "step": 9372 + }, + { + "epoch": 2.28, + "learning_rate": 8.195770219881203e-06, + "loss": 0.0177, + "step": 9374 + }, + { + "epoch": 2.28, + "learning_rate": 8.191889136632498e-06, + "loss": 0.022, + "step": 9376 + }, + { + "epoch": 2.29, + "learning_rate": 8.188008334920475e-06, + "loss": 0.0162, + "step": 9378 + }, + { + "epoch": 2.29, + "learning_rate": 8.184127815349415e-06, + "loss": 0.0109, + "step": 9380 + }, + { + "epoch": 2.29, + "learning_rate": 8.180247578523535e-06, + "loss": 0.0177, + "step": 9382 + }, + { + "epoch": 2.29, + "learning_rate": 8.176367625047026e-06, + "loss": 0.0119, + "step": 9384 + }, + { + "epoch": 2.29, + "learning_rate": 8.172487955524022e-06, + "loss": 0.0108, + "step": 9386 + }, + { + "epoch": 2.29, + "learning_rate": 8.168608570558614e-06, + "loss": 0.015, + "step": 9388 + }, + { + "epoch": 2.29, + "learning_rate": 8.164729470754855e-06, + "loss": 0.0097, + "step": 9390 + }, + { + "epoch": 2.29, + "learning_rate": 8.160850656716757e-06, + "loss": 0.0104, + "step": 9392 + }, + { + "epoch": 2.29, + "learning_rate": 8.156972129048273e-06, + "loss": 0.0126, + "step": 9394 + }, + { + "epoch": 2.29, + "learning_rate": 8.153093888353318e-06, + "loss": 0.0094, + "step": 9396 + }, + { + "epoch": 2.29, + "learning_rate": 8.14921593523577e-06, + "loss": 0.015, + "step": 9398 + }, + { + "epoch": 2.29, + "learning_rate": 8.145338270299454e-06, + "loss": 0.0091, + "step": 9400 + }, + { + "epoch": 2.29, + "learning_rate": 8.141460894148149e-06, + "loss": 0.0176, + "step": 9402 + }, + { + "epoch": 2.29, + "learning_rate": 8.137583807385595e-06, + "loss": 0.0122, + "step": 9404 + }, + { + "epoch": 2.29, + "learning_rate": 8.133707010615485e-06, + "loss": 0.0136, + "step": 9406 + }, + { + "epoch": 2.29, + "learning_rate": 8.129830504441466e-06, + "loss": 0.0128, + "step": 9408 + }, + { + "epoch": 2.29, + "learning_rate": 8.125954289467132e-06, + "loss": 0.0161, + "step": 9410 + }, + { + "epoch": 2.29, + "learning_rate": 8.122078366296048e-06, + "loss": 0.015, + "step": 9412 + }, + { + "epoch": 2.29, + "learning_rate": 8.118202735531724e-06, + "loss": 0.0145, + "step": 9414 + }, + { + "epoch": 2.29, + "learning_rate": 8.114327397777624e-06, + "loss": 0.0213, + "step": 9416 + }, + { + "epoch": 2.29, + "learning_rate": 8.110452353637165e-06, + "loss": 0.0215, + "step": 9418 + }, + { + "epoch": 2.3, + "learning_rate": 8.106577603713724e-06, + "loss": 0.0125, + "step": 9420 + }, + { + "epoch": 2.3, + "learning_rate": 8.102703148610627e-06, + "loss": 0.0101, + "step": 9422 + }, + { + "epoch": 2.3, + "learning_rate": 8.098828988931162e-06, + "loss": 0.0123, + "step": 9424 + }, + { + "epoch": 2.3, + "learning_rate": 8.094955125278555e-06, + "loss": 0.0113, + "step": 9426 + }, + { + "epoch": 2.3, + "learning_rate": 8.091081558256006e-06, + "loss": 0.0085, + "step": 9428 + }, + { + "epoch": 2.3, + "learning_rate": 8.087208288466653e-06, + "loss": 0.0138, + "step": 9430 + }, + { + "epoch": 2.3, + "learning_rate": 8.0833353165136e-06, + "loss": 0.0117, + "step": 9432 + }, + { + "epoch": 2.3, + "learning_rate": 8.07946264299989e-06, + "loss": 0.0185, + "step": 9434 + }, + { + "epoch": 2.3, + "learning_rate": 8.075590268528535e-06, + "loss": 0.0208, + "step": 9436 + }, + { + "epoch": 2.3, + "learning_rate": 8.071718193702486e-06, + "loss": 0.0065, + "step": 9438 + }, + { + "epoch": 2.3, + "learning_rate": 8.067846419124665e-06, + "loss": 0.0176, + "step": 9440 + }, + { + "epoch": 2.3, + "learning_rate": 8.06397494539793e-06, + "loss": 0.0116, + "step": 9442 + }, + { + "epoch": 2.3, + "learning_rate": 8.060103773125097e-06, + "loss": 0.0127, + "step": 9444 + }, + { + "epoch": 2.3, + "learning_rate": 8.056232902908941e-06, + "loss": 0.0172, + "step": 9446 + }, + { + "epoch": 2.3, + "learning_rate": 8.052362335352189e-06, + "loss": 0.018, + "step": 9448 + }, + { + "epoch": 2.3, + "learning_rate": 8.048492071057512e-06, + "loss": 0.0145, + "step": 9450 + }, + { + "epoch": 2.3, + "learning_rate": 8.044622110627539e-06, + "loss": 0.0108, + "step": 9452 + }, + { + "epoch": 2.3, + "learning_rate": 8.040752454664855e-06, + "loss": 0.0137, + "step": 9454 + }, + { + "epoch": 2.3, + "learning_rate": 8.036883103772001e-06, + "loss": 0.0173, + "step": 9456 + }, + { + "epoch": 2.3, + "learning_rate": 8.033014058551458e-06, + "loss": 0.0145, + "step": 9458 + }, + { + "epoch": 2.31, + "learning_rate": 8.029145319605664e-06, + "loss": 0.0145, + "step": 9460 + }, + { + "epoch": 2.31, + "learning_rate": 8.025276887537016e-06, + "loss": 0.0199, + "step": 9462 + }, + { + "epoch": 2.31, + "learning_rate": 8.021408762947859e-06, + "loss": 0.0123, + "step": 9464 + }, + { + "epoch": 2.31, + "learning_rate": 8.017540946440482e-06, + "loss": 0.0148, + "step": 9466 + }, + { + "epoch": 2.31, + "learning_rate": 8.01367343861714e-06, + "loss": 0.0122, + "step": 9468 + }, + { + "epoch": 2.31, + "learning_rate": 8.009806240080034e-06, + "loss": 0.0135, + "step": 9470 + }, + { + "epoch": 2.31, + "learning_rate": 8.005939351431316e-06, + "loss": 0.0161, + "step": 9472 + }, + { + "epoch": 2.31, + "learning_rate": 8.002072773273087e-06, + "loss": 0.0202, + "step": 9474 + }, + { + "epoch": 2.31, + "learning_rate": 7.998206506207402e-06, + "loss": 0.0211, + "step": 9476 + }, + { + "epoch": 2.31, + "learning_rate": 7.994340550836275e-06, + "loss": 0.0059, + "step": 9478 + }, + { + "epoch": 2.31, + "learning_rate": 7.990474907761664e-06, + "loss": 0.0134, + "step": 9480 + }, + { + "epoch": 2.31, + "learning_rate": 7.986609577585469e-06, + "loss": 0.0088, + "step": 9482 + }, + { + "epoch": 2.31, + "learning_rate": 7.98274456090956e-06, + "loss": 0.0142, + "step": 9484 + }, + { + "epoch": 2.31, + "learning_rate": 7.978879858335748e-06, + "loss": 0.0106, + "step": 9486 + }, + { + "epoch": 2.31, + "learning_rate": 7.9750154704658e-06, + "loss": 0.0182, + "step": 9488 + }, + { + "epoch": 2.31, + "learning_rate": 7.971151397901425e-06, + "loss": 0.019, + "step": 9490 + }, + { + "epoch": 2.31, + "learning_rate": 7.967287641244292e-06, + "loss": 0.0097, + "step": 9492 + }, + { + "epoch": 2.31, + "learning_rate": 7.963424201096014e-06, + "loss": 0.0113, + "step": 9494 + }, + { + "epoch": 2.31, + "learning_rate": 7.959561078058168e-06, + "loss": 0.0183, + "step": 9496 + }, + { + "epoch": 2.31, + "learning_rate": 7.955698272732261e-06, + "loss": 0.0228, + "step": 9498 + }, + { + "epoch": 2.31, + "learning_rate": 7.95183578571976e-06, + "loss": 0.0149, + "step": 9500 + }, + { + "epoch": 2.32, + "learning_rate": 7.947973617622093e-06, + "loss": 0.0146, + "step": 9502 + }, + { + "epoch": 2.32, + "learning_rate": 7.944111769040628e-06, + "loss": 0.006, + "step": 9504 + }, + { + "epoch": 2.32, + "learning_rate": 7.940250240576679e-06, + "loss": 0.0257, + "step": 9506 + }, + { + "epoch": 2.32, + "learning_rate": 7.936389032831514e-06, + "loss": 0.0198, + "step": 9508 + }, + { + "epoch": 2.32, + "learning_rate": 7.932528146406357e-06, + "loss": 0.0162, + "step": 9510 + }, + { + "epoch": 2.32, + "learning_rate": 7.928667581902382e-06, + "loss": 0.0113, + "step": 9512 + }, + { + "epoch": 2.32, + "learning_rate": 7.924807339920701e-06, + "loss": 0.0226, + "step": 9514 + }, + { + "epoch": 2.32, + "learning_rate": 7.920947421062383e-06, + "loss": 0.0074, + "step": 9516 + }, + { + "epoch": 2.32, + "learning_rate": 7.91708782592845e-06, + "loss": 0.0139, + "step": 9518 + }, + { + "epoch": 2.32, + "learning_rate": 7.913228555119875e-06, + "loss": 0.0107, + "step": 9520 + }, + { + "epoch": 2.32, + "learning_rate": 7.909369609237564e-06, + "loss": 0.0153, + "step": 9522 + }, + { + "epoch": 2.32, + "learning_rate": 7.905510988882392e-06, + "loss": 0.0057, + "step": 9524 + }, + { + "epoch": 2.32, + "learning_rate": 7.901652694655177e-06, + "loss": 0.0184, + "step": 9526 + }, + { + "epoch": 2.32, + "learning_rate": 7.897794727156685e-06, + "loss": 0.0128, + "step": 9528 + }, + { + "epoch": 2.32, + "learning_rate": 7.893937086987626e-06, + "loss": 0.0153, + "step": 9530 + }, + { + "epoch": 2.32, + "learning_rate": 7.890079774748665e-06, + "loss": 0.0091, + "step": 9532 + }, + { + "epoch": 2.32, + "learning_rate": 7.886222791040418e-06, + "loss": 0.0156, + "step": 9534 + }, + { + "epoch": 2.32, + "learning_rate": 7.88236613646345e-06, + "loss": 0.0139, + "step": 9536 + }, + { + "epoch": 2.32, + "learning_rate": 7.878509811618263e-06, + "loss": 0.0112, + "step": 9538 + }, + { + "epoch": 2.32, + "learning_rate": 7.874653817105322e-06, + "loss": 0.0147, + "step": 9540 + }, + { + "epoch": 2.33, + "learning_rate": 7.87079815352503e-06, + "loss": 0.0065, + "step": 9542 + }, + { + "epoch": 2.33, + "learning_rate": 7.866942821477754e-06, + "loss": 0.0136, + "step": 9544 + }, + { + "epoch": 2.33, + "learning_rate": 7.863087821563785e-06, + "loss": 0.0099, + "step": 9546 + }, + { + "epoch": 2.33, + "learning_rate": 7.859233154383386e-06, + "loss": 0.0071, + "step": 9548 + }, + { + "epoch": 2.33, + "learning_rate": 7.855378820536751e-06, + "loss": 0.0076, + "step": 9550 + }, + { + "epoch": 2.33, + "learning_rate": 7.851524820624039e-06, + "loss": 0.0191, + "step": 9552 + }, + { + "epoch": 2.33, + "learning_rate": 7.847671155245334e-06, + "loss": 0.0099, + "step": 9554 + }, + { + "epoch": 2.33, + "learning_rate": 7.843817825000693e-06, + "loss": 0.0131, + "step": 9556 + }, + { + "epoch": 2.33, + "learning_rate": 7.839964830490102e-06, + "loss": 0.0197, + "step": 9558 + }, + { + "epoch": 2.33, + "learning_rate": 7.836112172313505e-06, + "loss": 0.017, + "step": 9560 + }, + { + "epoch": 2.33, + "learning_rate": 7.83225985107079e-06, + "loss": 0.0148, + "step": 9562 + }, + { + "epoch": 2.33, + "learning_rate": 7.828407867361789e-06, + "loss": 0.0177, + "step": 9564 + }, + { + "epoch": 2.33, + "learning_rate": 7.82455622178629e-06, + "loss": 0.0087, + "step": 9566 + }, + { + "epoch": 2.33, + "learning_rate": 7.820704914944024e-06, + "loss": 0.0184, + "step": 9568 + }, + { + "epoch": 2.33, + "learning_rate": 7.816853947434666e-06, + "loss": 0.0102, + "step": 9570 + }, + { + "epoch": 2.33, + "learning_rate": 7.81300331985784e-06, + "loss": 0.0165, + "step": 9572 + }, + { + "epoch": 2.33, + "learning_rate": 7.809153032813124e-06, + "loss": 0.0221, + "step": 9574 + }, + { + "epoch": 2.33, + "learning_rate": 7.805303086900032e-06, + "loss": 0.0121, + "step": 9576 + }, + { + "epoch": 2.33, + "learning_rate": 7.801453482718032e-06, + "loss": 0.0091, + "step": 9578 + }, + { + "epoch": 2.33, + "learning_rate": 7.797604220866532e-06, + "loss": 0.0097, + "step": 9580 + }, + { + "epoch": 2.33, + "learning_rate": 7.7937553019449e-06, + "loss": 0.0174, + "step": 9582 + }, + { + "epoch": 2.34, + "learning_rate": 7.789906726552439e-06, + "loss": 0.0077, + "step": 9584 + }, + { + "epoch": 2.34, + "learning_rate": 7.786058495288396e-06, + "loss": 0.0095, + "step": 9586 + }, + { + "epoch": 2.34, + "learning_rate": 7.782210608751975e-06, + "loss": 0.0087, + "step": 9588 + }, + { + "epoch": 2.34, + "learning_rate": 7.778363067542325e-06, + "loss": 0.0143, + "step": 9590 + }, + { + "epoch": 2.34, + "learning_rate": 7.774515872258533e-06, + "loss": 0.0138, + "step": 9592 + }, + { + "epoch": 2.34, + "learning_rate": 7.770669023499633e-06, + "loss": 0.0096, + "step": 9594 + }, + { + "epoch": 2.34, + "learning_rate": 7.766822521864617e-06, + "loss": 0.0193, + "step": 9596 + }, + { + "epoch": 2.34, + "learning_rate": 7.762976367952406e-06, + "loss": 0.015, + "step": 9598 + }, + { + "epoch": 2.34, + "learning_rate": 7.759130562361888e-06, + "loss": 0.0059, + "step": 9600 + }, + { + "epoch": 2.34, + "learning_rate": 7.755285105691869e-06, + "loss": 0.0164, + "step": 9602 + }, + { + "epoch": 2.34, + "learning_rate": 7.751439998541127e-06, + "loss": 0.0144, + "step": 9604 + }, + { + "epoch": 2.34, + "learning_rate": 7.747595241508368e-06, + "loss": 0.0067, + "step": 9606 + }, + { + "epoch": 2.34, + "learning_rate": 7.74375083519226e-06, + "loss": 0.0157, + "step": 9608 + }, + { + "epoch": 2.34, + "learning_rate": 7.739906780191394e-06, + "loss": 0.0109, + "step": 9610 + }, + { + "epoch": 2.34, + "learning_rate": 7.736063077104326e-06, + "loss": 0.0132, + "step": 9612 + }, + { + "epoch": 2.34, + "learning_rate": 7.732219726529546e-06, + "loss": 0.0088, + "step": 9614 + }, + { + "epoch": 2.34, + "learning_rate": 7.728376729065501e-06, + "loss": 0.0078, + "step": 9616 + }, + { + "epoch": 2.34, + "learning_rate": 7.724534085310568e-06, + "loss": 0.0066, + "step": 9618 + }, + { + "epoch": 2.34, + "learning_rate": 7.720691795863075e-06, + "loss": 0.0221, + "step": 9620 + }, + { + "epoch": 2.34, + "learning_rate": 7.716849861321296e-06, + "loss": 0.0173, + "step": 9622 + }, + { + "epoch": 2.35, + "learning_rate": 7.71300828228346e-06, + "loss": 0.0055, + "step": 9624 + }, + { + "epoch": 2.35, + "learning_rate": 7.709167059347718e-06, + "loss": 0.0128, + "step": 9626 + }, + { + "epoch": 2.35, + "learning_rate": 7.70532619311218e-06, + "loss": 0.0121, + "step": 9628 + }, + { + "epoch": 2.35, + "learning_rate": 7.701485684174905e-06, + "loss": 0.0158, + "step": 9630 + }, + { + "epoch": 2.35, + "learning_rate": 7.697645533133883e-06, + "loss": 0.0123, + "step": 9632 + }, + { + "epoch": 2.35, + "learning_rate": 7.693805740587055e-06, + "loss": 0.0128, + "step": 9634 + }, + { + "epoch": 2.35, + "learning_rate": 7.689966307132306e-06, + "loss": 0.0195, + "step": 9636 + }, + { + "epoch": 2.35, + "learning_rate": 7.686127233367473e-06, + "loss": 0.0097, + "step": 9638 + }, + { + "epoch": 2.35, + "learning_rate": 7.68228851989032e-06, + "loss": 0.0116, + "step": 9640 + }, + { + "epoch": 2.35, + "learning_rate": 7.678450167298566e-06, + "loss": 0.011, + "step": 9642 + }, + { + "epoch": 2.35, + "learning_rate": 7.674612176189872e-06, + "loss": 0.0098, + "step": 9644 + }, + { + "epoch": 2.35, + "learning_rate": 7.67077454716185e-06, + "loss": 0.0153, + "step": 9646 + }, + { + "epoch": 2.35, + "learning_rate": 7.666937280812038e-06, + "loss": 0.0067, + "step": 9648 + }, + { + "epoch": 2.35, + "learning_rate": 7.66310037773793e-06, + "loss": 0.0141, + "step": 9650 + }, + { + "epoch": 2.35, + "learning_rate": 7.659263838536963e-06, + "loss": 0.0088, + "step": 9652 + }, + { + "epoch": 2.35, + "learning_rate": 7.655427663806517e-06, + "loss": 0.0082, + "step": 9654 + }, + { + "epoch": 2.35, + "learning_rate": 7.651591854143911e-06, + "loss": 0.0061, + "step": 9656 + }, + { + "epoch": 2.35, + "learning_rate": 7.64775641014641e-06, + "loss": 0.0111, + "step": 9658 + }, + { + "epoch": 2.35, + "learning_rate": 7.643921332411223e-06, + "loss": 0.0082, + "step": 9660 + }, + { + "epoch": 2.35, + "learning_rate": 7.640086621535504e-06, + "loss": 0.0145, + "step": 9662 + }, + { + "epoch": 2.35, + "learning_rate": 7.636252278116338e-06, + "loss": 0.0108, + "step": 9664 + }, + { + "epoch": 2.36, + "learning_rate": 7.632418302750767e-06, + "loss": 0.021, + "step": 9666 + }, + { + "epoch": 2.36, + "learning_rate": 7.628584696035775e-06, + "loss": 0.0179, + "step": 9668 + }, + { + "epoch": 2.36, + "learning_rate": 7.624751458568279e-06, + "loss": 0.0153, + "step": 9670 + }, + { + "epoch": 2.36, + "learning_rate": 7.6209185909451414e-06, + "loss": 0.0088, + "step": 9672 + }, + { + "epoch": 2.36, + "learning_rate": 7.6170860937631705e-06, + "loss": 0.0093, + "step": 9674 + }, + { + "epoch": 2.36, + "learning_rate": 7.613253967619117e-06, + "loss": 0.0111, + "step": 9676 + }, + { + "epoch": 2.36, + "learning_rate": 7.6094222131096725e-06, + "loss": 0.0097, + "step": 9678 + }, + { + "epoch": 2.36, + "learning_rate": 7.6055908308314675e-06, + "loss": 0.0202, + "step": 9680 + }, + { + "epoch": 2.36, + "learning_rate": 7.601759821381081e-06, + "loss": 0.0094, + "step": 9682 + }, + { + "epoch": 2.36, + "learning_rate": 7.597929185355025e-06, + "loss": 0.0118, + "step": 9684 + }, + { + "epoch": 2.36, + "learning_rate": 7.594098923349769e-06, + "loss": 0.019, + "step": 9686 + }, + { + "epoch": 2.36, + "learning_rate": 7.590269035961701e-06, + "loss": 0.0196, + "step": 9688 + }, + { + "epoch": 2.36, + "learning_rate": 7.586439523787176e-06, + "loss": 0.0096, + "step": 9690 + }, + { + "epoch": 2.36, + "learning_rate": 7.582610387422468e-06, + "loss": 0.0165, + "step": 9692 + }, + { + "epoch": 2.36, + "learning_rate": 7.578781627463814e-06, + "loss": 0.0173, + "step": 9694 + }, + { + "epoch": 2.36, + "learning_rate": 7.574953244507371e-06, + "loss": 0.016, + "step": 9696 + }, + { + "epoch": 2.36, + "learning_rate": 7.57112523914925e-06, + "loss": 0.0121, + "step": 9698 + }, + { + "epoch": 2.36, + "learning_rate": 7.5672976119855e-06, + "loss": 0.0125, + "step": 9700 + }, + { + "epoch": 2.36, + "learning_rate": 7.563470363612121e-06, + "loss": 0.0199, + "step": 9702 + }, + { + "epoch": 2.36, + "learning_rate": 7.559643494625034e-06, + "loss": 0.0136, + "step": 9704 + }, + { + "epoch": 2.37, + "learning_rate": 7.555817005620114e-06, + "loss": 0.015, + "step": 9706 + }, + { + "epoch": 2.37, + "learning_rate": 7.551990897193175e-06, + "loss": 0.016, + "step": 9708 + }, + { + "epoch": 2.37, + "learning_rate": 7.548165169939978e-06, + "loss": 0.0163, + "step": 9710 + }, + { + "epoch": 2.37, + "learning_rate": 7.54433982445621e-06, + "loss": 0.0153, + "step": 9712 + }, + { + "epoch": 2.37, + "learning_rate": 7.540514861337506e-06, + "loss": 0.0104, + "step": 9714 + }, + { + "epoch": 2.37, + "learning_rate": 7.5366902811794465e-06, + "loss": 0.0131, + "step": 9716 + }, + { + "epoch": 2.37, + "learning_rate": 7.53286608457755e-06, + "loss": 0.016, + "step": 9718 + }, + { + "epoch": 2.37, + "learning_rate": 7.529042272127264e-06, + "loss": 0.0126, + "step": 9720 + }, + { + "epoch": 2.37, + "learning_rate": 7.525218844423991e-06, + "loss": 0.0173, + "step": 9722 + }, + { + "epoch": 2.37, + "learning_rate": 7.52139580206307e-06, + "loss": 0.0161, + "step": 9724 + }, + { + "epoch": 2.37, + "learning_rate": 7.5175731456397785e-06, + "loss": 0.0143, + "step": 9726 + }, + { + "epoch": 2.37, + "learning_rate": 7.513750875749326e-06, + "loss": 0.0215, + "step": 9728 + }, + { + "epoch": 2.37, + "learning_rate": 7.509928992986872e-06, + "loss": 0.0105, + "step": 9730 + }, + { + "epoch": 2.37, + "learning_rate": 7.5061074979475194e-06, + "loss": 0.01, + "step": 9732 + }, + { + "epoch": 2.37, + "learning_rate": 7.502286391226299e-06, + "loss": 0.01, + "step": 9734 + }, + { + "epoch": 2.37, + "learning_rate": 7.4984656734181825e-06, + "loss": 0.0057, + "step": 9736 + }, + { + "epoch": 2.37, + "learning_rate": 7.494645345118092e-06, + "loss": 0.02, + "step": 9738 + }, + { + "epoch": 2.37, + "learning_rate": 7.490825406920878e-06, + "loss": 0.0176, + "step": 9740 + }, + { + "epoch": 2.37, + "learning_rate": 7.487005859421337e-06, + "loss": 0.0113, + "step": 9742 + }, + { + "epoch": 2.37, + "learning_rate": 7.4831867032141955e-06, + "loss": 0.0071, + "step": 9744 + }, + { + "epoch": 2.37, + "learning_rate": 7.479367938894133e-06, + "loss": 0.0069, + "step": 9746 + }, + { + "epoch": 2.38, + "learning_rate": 7.475549567055754e-06, + "loss": 0.0039, + "step": 9748 + }, + { + "epoch": 2.38, + "learning_rate": 7.471731588293616e-06, + "loss": 0.0118, + "step": 9750 + }, + { + "epoch": 2.38, + "learning_rate": 7.467914003202197e-06, + "loss": 0.0216, + "step": 9752 + }, + { + "epoch": 2.38, + "learning_rate": 7.464096812375932e-06, + "loss": 0.0217, + "step": 9754 + }, + { + "epoch": 2.38, + "learning_rate": 7.460280016409183e-06, + "loss": 0.0105, + "step": 9756 + }, + { + "epoch": 2.38, + "learning_rate": 7.45646361589626e-06, + "loss": 0.0128, + "step": 9758 + }, + { + "epoch": 2.38, + "learning_rate": 7.4526476114313986e-06, + "loss": 0.0167, + "step": 9760 + }, + { + "epoch": 2.38, + "learning_rate": 7.448832003608781e-06, + "loss": 0.0199, + "step": 9762 + }, + { + "epoch": 2.38, + "learning_rate": 7.4450167930225295e-06, + "loss": 0.0041, + "step": 9764 + }, + { + "epoch": 2.38, + "learning_rate": 7.4412019802667036e-06, + "loss": 0.006, + "step": 9766 + }, + { + "epoch": 2.38, + "learning_rate": 7.437387565935294e-06, + "loss": 0.0146, + "step": 9768 + }, + { + "epoch": 2.38, + "learning_rate": 7.433573550622233e-06, + "loss": 0.0118, + "step": 9770 + }, + { + "epoch": 2.38, + "learning_rate": 7.429759934921397e-06, + "loss": 0.0041, + "step": 9772 + }, + { + "epoch": 2.38, + "learning_rate": 7.4259467194265955e-06, + "loss": 0.0113, + "step": 9774 + }, + { + "epoch": 2.38, + "learning_rate": 7.422133904731568e-06, + "loss": 0.0215, + "step": 9776 + }, + { + "epoch": 2.38, + "learning_rate": 7.418321491430003e-06, + "loss": 0.0164, + "step": 9778 + }, + { + "epoch": 2.38, + "learning_rate": 7.4145094801155255e-06, + "loss": 0.0062, + "step": 9780 + }, + { + "epoch": 2.38, + "learning_rate": 7.410697871381695e-06, + "loss": 0.0106, + "step": 9782 + }, + { + "epoch": 2.38, + "learning_rate": 7.406886665822001e-06, + "loss": 0.0196, + "step": 9784 + }, + { + "epoch": 2.38, + "learning_rate": 7.403075864029881e-06, + "loss": 0.0233, + "step": 9786 + }, + { + "epoch": 2.38, + "learning_rate": 7.3992654665987095e-06, + "loss": 0.0073, + "step": 9788 + }, + { + "epoch": 2.39, + "learning_rate": 7.395455474121793e-06, + "loss": 0.0136, + "step": 9790 + }, + { + "epoch": 2.39, + "learning_rate": 7.391645887192374e-06, + "loss": 0.02, + "step": 9792 + }, + { + "epoch": 2.39, + "learning_rate": 7.387836706403635e-06, + "loss": 0.0122, + "step": 9794 + }, + { + "epoch": 2.39, + "learning_rate": 7.384027932348692e-06, + "loss": 0.0195, + "step": 9796 + }, + { + "epoch": 2.39, + "learning_rate": 7.38021956562061e-06, + "loss": 0.0178, + "step": 9798 + }, + { + "epoch": 2.39, + "learning_rate": 7.376411606812368e-06, + "loss": 0.0148, + "step": 9800 + }, + { + "epoch": 2.39, + "learning_rate": 7.372604056516904e-06, + "loss": 0.0127, + "step": 9802 + }, + { + "epoch": 2.39, + "learning_rate": 7.368796915327076e-06, + "loss": 0.0118, + "step": 9804 + }, + { + "epoch": 2.39, + "learning_rate": 7.364990183835694e-06, + "loss": 0.0137, + "step": 9806 + }, + { + "epoch": 2.39, + "learning_rate": 7.361183862635484e-06, + "loss": 0.0136, + "step": 9808 + }, + { + "epoch": 2.39, + "learning_rate": 7.357377952319127e-06, + "loss": 0.0158, + "step": 9810 + }, + { + "epoch": 2.39, + "learning_rate": 7.353572453479228e-06, + "loss": 0.0131, + "step": 9812 + }, + { + "epoch": 2.39, + "learning_rate": 7.349767366708338e-06, + "loss": 0.0174, + "step": 9814 + }, + { + "epoch": 2.39, + "learning_rate": 7.345962692598934e-06, + "loss": 0.0109, + "step": 9816 + }, + { + "epoch": 2.39, + "learning_rate": 7.342158431743429e-06, + "loss": 0.01, + "step": 9818 + }, + { + "epoch": 2.39, + "learning_rate": 7.338354584734182e-06, + "loss": 0.0067, + "step": 9820 + }, + { + "epoch": 2.39, + "learning_rate": 7.334551152163481e-06, + "loss": 0.0179, + "step": 9822 + }, + { + "epoch": 2.39, + "learning_rate": 7.330748134623546e-06, + "loss": 0.0075, + "step": 9824 + }, + { + "epoch": 2.39, + "learning_rate": 7.326945532706535e-06, + "loss": 0.0112, + "step": 9826 + }, + { + "epoch": 2.39, + "learning_rate": 7.323143347004547e-06, + "loss": 0.0096, + "step": 9828 + }, + { + "epoch": 2.4, + "learning_rate": 7.319341578109609e-06, + "loss": 0.0133, + "step": 9830 + }, + { + "epoch": 2.4, + "learning_rate": 7.315540226613684e-06, + "loss": 0.0258, + "step": 9832 + }, + { + "epoch": 2.4, + "learning_rate": 7.3117392931086726e-06, + "loss": 0.0186, + "step": 9834 + }, + { + "epoch": 2.4, + "learning_rate": 7.30793877818641e-06, + "loss": 0.0155, + "step": 9836 + }, + { + "epoch": 2.4, + "learning_rate": 7.304138682438669e-06, + "loss": 0.0241, + "step": 9838 + }, + { + "epoch": 2.4, + "learning_rate": 7.3003390064571425e-06, + "loss": 0.0139, + "step": 9840 + }, + { + "epoch": 2.4, + "learning_rate": 7.296539750833478e-06, + "loss": 0.0162, + "step": 9842 + }, + { + "epoch": 2.4, + "learning_rate": 7.292740916159249e-06, + "loss": 0.0088, + "step": 9844 + }, + { + "epoch": 2.4, + "learning_rate": 7.288942503025962e-06, + "loss": 0.0211, + "step": 9846 + }, + { + "epoch": 2.4, + "learning_rate": 7.285144512025053e-06, + "loss": 0.0162, + "step": 9848 + }, + { + "epoch": 2.4, + "learning_rate": 7.281346943747907e-06, + "loss": 0.0156, + "step": 9850 + }, + { + "epoch": 2.4, + "learning_rate": 7.277549798785825e-06, + "loss": 0.0235, + "step": 9852 + }, + { + "epoch": 2.4, + "learning_rate": 7.273753077730065e-06, + "loss": 0.0255, + "step": 9854 + }, + { + "epoch": 2.4, + "learning_rate": 7.269956781171792e-06, + "loss": 0.0194, + "step": 9856 + }, + { + "epoch": 2.4, + "learning_rate": 7.266160909702125e-06, + "loss": 0.0117, + "step": 9858 + }, + { + "epoch": 2.4, + "learning_rate": 7.262365463912108e-06, + "loss": 0.0144, + "step": 9860 + }, + { + "epoch": 2.4, + "learning_rate": 7.258570444392725e-06, + "loss": 0.0096, + "step": 9862 + }, + { + "epoch": 2.4, + "learning_rate": 7.254775851734883e-06, + "loss": 0.0076, + "step": 9864 + }, + { + "epoch": 2.4, + "learning_rate": 7.250981686529436e-06, + "loss": 0.0104, + "step": 9866 + }, + { + "epoch": 2.4, + "learning_rate": 7.247187949367158e-06, + "loss": 0.0096, + "step": 9868 + }, + { + "epoch": 2.4, + "learning_rate": 7.24339464083877e-06, + "loss": 0.0133, + "step": 9870 + }, + { + "epoch": 2.41, + "learning_rate": 7.239601761534913e-06, + "loss": 0.022, + "step": 9872 + }, + { + "epoch": 2.41, + "learning_rate": 7.235809312046169e-06, + "loss": 0.0146, + "step": 9874 + }, + { + "epoch": 2.41, + "learning_rate": 7.232017292963049e-06, + "loss": 0.0084, + "step": 9876 + }, + { + "epoch": 2.41, + "learning_rate": 7.22822570487601e-06, + "loss": 0.0192, + "step": 9878 + }, + { + "epoch": 2.41, + "learning_rate": 7.224434548375419e-06, + "loss": 0.017, + "step": 9880 + }, + { + "epoch": 2.41, + "learning_rate": 7.220643824051592e-06, + "loss": 0.0135, + "step": 9882 + }, + { + "epoch": 2.41, + "learning_rate": 7.216853532494773e-06, + "loss": 0.021, + "step": 9884 + }, + { + "epoch": 2.41, + "learning_rate": 7.213063674295146e-06, + "loss": 0.0179, + "step": 9886 + }, + { + "epoch": 2.41, + "learning_rate": 7.2092742500428126e-06, + "loss": 0.0178, + "step": 9888 + }, + { + "epoch": 2.41, + "learning_rate": 7.205485260327817e-06, + "loss": 0.0146, + "step": 9890 + }, + { + "epoch": 2.41, + "learning_rate": 7.201696705740137e-06, + "loss": 0.0105, + "step": 9892 + }, + { + "epoch": 2.41, + "learning_rate": 7.19790858686968e-06, + "loss": 0.012, + "step": 9894 + }, + { + "epoch": 2.41, + "learning_rate": 7.194120904306277e-06, + "loss": 0.0092, + "step": 9896 + }, + { + "epoch": 2.41, + "learning_rate": 7.190333658639705e-06, + "loss": 0.0071, + "step": 9898 + }, + { + "epoch": 2.41, + "learning_rate": 7.1865468504596704e-06, + "loss": 0.0152, + "step": 9900 + }, + { + "epoch": 2.41, + "learning_rate": 7.182760480355806e-06, + "loss": 0.0131, + "step": 9902 + }, + { + "epoch": 2.41, + "learning_rate": 7.178974548917672e-06, + "loss": 0.0161, + "step": 9904 + }, + { + "epoch": 2.41, + "learning_rate": 7.175189056734774e-06, + "loss": 0.0138, + "step": 9906 + }, + { + "epoch": 2.41, + "learning_rate": 7.17140400439654e-06, + "loss": 0.0083, + "step": 9908 + }, + { + "epoch": 2.41, + "learning_rate": 7.167619392492337e-06, + "loss": 0.0108, + "step": 9910 + }, + { + "epoch": 2.42, + "learning_rate": 7.163835221611446e-06, + "loss": 0.0103, + "step": 9912 + }, + { + "epoch": 2.42, + "learning_rate": 7.160051492343101e-06, + "loss": 0.009, + "step": 9914 + }, + { + "epoch": 2.42, + "learning_rate": 7.156268205276453e-06, + "loss": 0.013, + "step": 9916 + }, + { + "epoch": 2.42, + "learning_rate": 7.152485361000595e-06, + "loss": 0.0129, + "step": 9918 + }, + { + "epoch": 2.42, + "learning_rate": 7.148702960104535e-06, + "loss": 0.0141, + "step": 9920 + }, + { + "epoch": 2.42, + "learning_rate": 7.14492100317723e-06, + "loss": 0.0107, + "step": 9922 + }, + { + "epoch": 2.42, + "learning_rate": 7.141139490807554e-06, + "loss": 0.0219, + "step": 9924 + }, + { + "epoch": 2.42, + "learning_rate": 7.137358423584324e-06, + "loss": 0.0121, + "step": 9926 + }, + { + "epoch": 2.42, + "learning_rate": 7.133577802096274e-06, + "loss": 0.0141, + "step": 9928 + }, + { + "epoch": 2.42, + "learning_rate": 7.12979762693208e-06, + "loss": 0.0201, + "step": 9930 + }, + { + "epoch": 2.42, + "learning_rate": 7.1260178986803415e-06, + "loss": 0.0046, + "step": 9932 + }, + { + "epoch": 2.42, + "learning_rate": 7.122238617929596e-06, + "loss": 0.0123, + "step": 9934 + }, + { + "epoch": 2.42, + "learning_rate": 7.118459785268301e-06, + "loss": 0.0125, + "step": 9936 + }, + { + "epoch": 2.42, + "learning_rate": 7.114681401284848e-06, + "loss": 0.0191, + "step": 9938 + }, + { + "epoch": 2.42, + "learning_rate": 7.110903466567567e-06, + "loss": 0.0147, + "step": 9940 + }, + { + "epoch": 2.42, + "learning_rate": 7.10712598170471e-06, + "loss": 0.0215, + "step": 9942 + }, + { + "epoch": 2.42, + "learning_rate": 7.1033489472844566e-06, + "loss": 0.0156, + "step": 9944 + }, + { + "epoch": 2.42, + "learning_rate": 7.0995723638949195e-06, + "loss": 0.0105, + "step": 9946 + }, + { + "epoch": 2.42, + "learning_rate": 7.095796232124148e-06, + "loss": 0.0089, + "step": 9948 + }, + { + "epoch": 2.42, + "learning_rate": 7.092020552560111e-06, + "loss": 0.013, + "step": 9950 + }, + { + "epoch": 2.42, + "learning_rate": 7.088245325790705e-06, + "loss": 0.0085, + "step": 9952 + }, + { + "epoch": 2.43, + "learning_rate": 7.084470552403769e-06, + "loss": 0.0188, + "step": 9954 + }, + { + "epoch": 2.43, + "learning_rate": 7.080696232987063e-06, + "loss": 0.0135, + "step": 9956 + }, + { + "epoch": 2.43, + "learning_rate": 7.07692236812828e-06, + "loss": 0.0071, + "step": 9958 + }, + { + "epoch": 2.43, + "learning_rate": 7.07314895841503e-06, + "loss": 0.0109, + "step": 9960 + }, + { + "epoch": 2.43, + "learning_rate": 7.0693760044348695e-06, + "loss": 0.0116, + "step": 9962 + }, + { + "epoch": 2.43, + "learning_rate": 7.065603506775276e-06, + "loss": 0.0155, + "step": 9964 + }, + { + "epoch": 2.43, + "learning_rate": 7.061831466023656e-06, + "loss": 0.0096, + "step": 9966 + }, + { + "epoch": 2.43, + "learning_rate": 7.058059882767341e-06, + "loss": 0.0105, + "step": 9968 + }, + { + "epoch": 2.43, + "learning_rate": 7.054288757593599e-06, + "loss": 0.0113, + "step": 9970 + }, + { + "epoch": 2.43, + "learning_rate": 7.050518091089621e-06, + "loss": 0.0111, + "step": 9972 + }, + { + "epoch": 2.43, + "learning_rate": 7.046747883842534e-06, + "loss": 0.0068, + "step": 9974 + }, + { + "epoch": 2.43, + "learning_rate": 7.04297813643938e-06, + "loss": 0.0163, + "step": 9976 + }, + { + "epoch": 2.43, + "learning_rate": 7.039208849467143e-06, + "loss": 0.0243, + "step": 9978 + }, + { + "epoch": 2.43, + "learning_rate": 7.0354400235127264e-06, + "loss": 0.01, + "step": 9980 + }, + { + "epoch": 2.43, + "learning_rate": 7.0316716591629706e-06, + "loss": 0.0097, + "step": 9982 + }, + { + "epoch": 2.43, + "learning_rate": 7.0279037570046325e-06, + "loss": 0.0091, + "step": 9984 + }, + { + "epoch": 2.43, + "learning_rate": 7.024136317624407e-06, + "loss": 0.0141, + "step": 9986 + }, + { + "epoch": 2.43, + "learning_rate": 7.02036934160891e-06, + "loss": 0.0088, + "step": 9988 + }, + { + "epoch": 2.43, + "learning_rate": 7.016602829544696e-06, + "loss": 0.0174, + "step": 9990 + }, + { + "epoch": 2.43, + "learning_rate": 7.012836782018232e-06, + "loss": 0.0346, + "step": 9992 + }, + { + "epoch": 2.44, + "learning_rate": 7.00907119961592e-06, + "loss": 0.0124, + "step": 9994 + }, + { + "epoch": 2.44, + "learning_rate": 7.005306082924094e-06, + "loss": 0.0128, + "step": 9996 + }, + { + "epoch": 2.44, + "learning_rate": 7.001541432529013e-06, + "loss": 0.0114, + "step": 9998 + }, + { + "epoch": 2.44, + "learning_rate": 6.9977772490168594e-06, + "loss": 0.0196, + "step": 10000 + }, + { + "epoch": 2.44, + "learning_rate": 6.9940135329737404e-06, + "loss": 0.0138, + "step": 10002 + }, + { + "epoch": 2.44, + "learning_rate": 6.9902502849857036e-06, + "loss": 0.0127, + "step": 10004 + }, + { + "epoch": 2.44, + "learning_rate": 6.98648750563871e-06, + "loss": 0.0162, + "step": 10006 + }, + { + "epoch": 2.44, + "learning_rate": 6.982725195518658e-06, + "loss": 0.016, + "step": 10008 + }, + { + "epoch": 2.44, + "learning_rate": 6.978963355211361e-06, + "loss": 0.0138, + "step": 10010 + }, + { + "epoch": 2.44, + "learning_rate": 6.975201985302573e-06, + "loss": 0.0205, + "step": 10012 + }, + { + "epoch": 2.44, + "learning_rate": 6.971441086377968e-06, + "loss": 0.0087, + "step": 10014 + }, + { + "epoch": 2.44, + "learning_rate": 6.96768065902314e-06, + "loss": 0.0135, + "step": 10016 + }, + { + "epoch": 2.44, + "learning_rate": 6.963920703823619e-06, + "loss": 0.0127, + "step": 10018 + }, + { + "epoch": 2.44, + "learning_rate": 6.960161221364864e-06, + "loss": 0.0092, + "step": 10020 + }, + { + "epoch": 2.44, + "learning_rate": 6.956402212232254e-06, + "loss": 0.0209, + "step": 10022 + }, + { + "epoch": 2.44, + "learning_rate": 6.952643677011086e-06, + "loss": 0.0194, + "step": 10024 + }, + { + "epoch": 2.44, + "learning_rate": 6.948885616286605e-06, + "loss": 0.0136, + "step": 10026 + }, + { + "epoch": 2.44, + "learning_rate": 6.945128030643959e-06, + "loss": 0.0159, + "step": 10028 + }, + { + "epoch": 2.44, + "learning_rate": 6.941370920668244e-06, + "loss": 0.0192, + "step": 10030 + }, + { + "epoch": 2.44, + "learning_rate": 6.937614286944461e-06, + "loss": 0.019, + "step": 10032 + }, + { + "epoch": 2.44, + "learning_rate": 6.933858130057553e-06, + "loss": 0.0139, + "step": 10034 + }, + { + "epoch": 2.45, + "learning_rate": 6.930102450592376e-06, + "loss": 0.0094, + "step": 10036 + }, + { + "epoch": 2.45, + "learning_rate": 6.926347249133727e-06, + "loss": 0.0107, + "step": 10038 + }, + { + "epoch": 2.45, + "learning_rate": 6.922592526266312e-06, + "loss": 0.0154, + "step": 10040 + }, + { + "epoch": 2.45, + "learning_rate": 6.9188382825747715e-06, + "loss": 0.0082, + "step": 10042 + }, + { + "epoch": 2.45, + "learning_rate": 6.915084518643671e-06, + "loss": 0.0134, + "step": 10044 + }, + { + "epoch": 2.45, + "learning_rate": 6.9113312350575035e-06, + "loss": 0.0054, + "step": 10046 + }, + { + "epoch": 2.45, + "learning_rate": 6.907578432400679e-06, + "loss": 0.0104, + "step": 10048 + }, + { + "epoch": 2.45, + "learning_rate": 6.903826111257536e-06, + "loss": 0.0139, + "step": 10050 + }, + { + "epoch": 2.45, + "learning_rate": 6.9000742722123445e-06, + "loss": 0.0105, + "step": 10052 + }, + { + "epoch": 2.45, + "learning_rate": 6.896322915849298e-06, + "loss": 0.0087, + "step": 10054 + }, + { + "epoch": 2.45, + "learning_rate": 6.892572042752501e-06, + "loss": 0.0124, + "step": 10056 + }, + { + "epoch": 2.45, + "learning_rate": 6.8888216535059985e-06, + "loss": 0.0083, + "step": 10058 + }, + { + "epoch": 2.45, + "learning_rate": 6.885071748693755e-06, + "loss": 0.0198, + "step": 10060 + }, + { + "epoch": 2.45, + "learning_rate": 6.881322328899661e-06, + "loss": 0.0112, + "step": 10062 + }, + { + "epoch": 2.45, + "learning_rate": 6.8775733947075265e-06, + "loss": 0.0087, + "step": 10064 + }, + { + "epoch": 2.45, + "learning_rate": 6.873824946701089e-06, + "loss": 0.0084, + "step": 10066 + }, + { + "epoch": 2.45, + "learning_rate": 6.8700769854640135e-06, + "loss": 0.0107, + "step": 10068 + }, + { + "epoch": 2.45, + "learning_rate": 6.866329511579888e-06, + "loss": 0.0079, + "step": 10070 + }, + { + "epoch": 2.45, + "learning_rate": 6.862582525632215e-06, + "loss": 0.0123, + "step": 10072 + }, + { + "epoch": 2.45, + "learning_rate": 6.858836028204434e-06, + "loss": 0.0209, + "step": 10074 + }, + { + "epoch": 2.46, + "learning_rate": 6.855090019879904e-06, + "loss": 0.0169, + "step": 10076 + }, + { + "epoch": 2.46, + "learning_rate": 6.851344501241908e-06, + "loss": 0.0159, + "step": 10078 + }, + { + "epoch": 2.46, + "learning_rate": 6.847599472873646e-06, + "loss": 0.0178, + "step": 10080 + }, + { + "epoch": 2.46, + "learning_rate": 6.843854935358252e-06, + "loss": 0.0126, + "step": 10082 + }, + { + "epoch": 2.46, + "learning_rate": 6.84011088927878e-06, + "loss": 0.0099, + "step": 10084 + }, + { + "epoch": 2.46, + "learning_rate": 6.836367335218206e-06, + "loss": 0.0093, + "step": 10086 + }, + { + "epoch": 2.46, + "learning_rate": 6.832624273759428e-06, + "loss": 0.0159, + "step": 10088 + }, + { + "epoch": 2.46, + "learning_rate": 6.8288817054852685e-06, + "loss": 0.0127, + "step": 10090 + }, + { + "epoch": 2.46, + "learning_rate": 6.8251396309784764e-06, + "loss": 0.0093, + "step": 10092 + }, + { + "epoch": 2.46, + "learning_rate": 6.8213980508217235e-06, + "loss": 0.0197, + "step": 10094 + }, + { + "epoch": 2.46, + "learning_rate": 6.817656965597597e-06, + "loss": 0.0064, + "step": 10096 + }, + { + "epoch": 2.46, + "learning_rate": 6.813916375888617e-06, + "loss": 0.0117, + "step": 10098 + }, + { + "epoch": 2.46, + "learning_rate": 6.8101762822772176e-06, + "loss": 0.0255, + "step": 10100 + }, + { + "epoch": 2.46, + "learning_rate": 6.806436685345768e-06, + "loss": 0.0069, + "step": 10102 + }, + { + "epoch": 2.46, + "learning_rate": 6.802697585676543e-06, + "loss": 0.0172, + "step": 10104 + }, + { + "epoch": 2.46, + "learning_rate": 6.798958983851751e-06, + "loss": 0.0057, + "step": 10106 + }, + { + "epoch": 2.46, + "learning_rate": 6.795220880453521e-06, + "loss": 0.0053, + "step": 10108 + }, + { + "epoch": 2.46, + "learning_rate": 6.791483276063913e-06, + "loss": 0.0086, + "step": 10110 + }, + { + "epoch": 2.46, + "learning_rate": 6.7877461712648885e-06, + "loss": 0.0133, + "step": 10112 + }, + { + "epoch": 2.46, + "learning_rate": 6.784009566638348e-06, + "loss": 0.0176, + "step": 10114 + }, + { + "epoch": 2.46, + "learning_rate": 6.780273462766107e-06, + "loss": 0.0153, + "step": 10116 + }, + { + "epoch": 2.47, + "learning_rate": 6.776537860229915e-06, + "loss": 0.0103, + "step": 10118 + }, + { + "epoch": 2.47, + "learning_rate": 6.772802759611423e-06, + "loss": 0.011, + "step": 10120 + }, + { + "epoch": 2.47, + "learning_rate": 6.769068161492217e-06, + "loss": 0.0133, + "step": 10122 + }, + { + "epoch": 2.47, + "learning_rate": 6.7653340664538055e-06, + "loss": 0.0054, + "step": 10124 + }, + { + "epoch": 2.47, + "learning_rate": 6.761600475077618e-06, + "loss": 0.0069, + "step": 10126 + }, + { + "epoch": 2.47, + "learning_rate": 6.757867387944994e-06, + "loss": 0.0104, + "step": 10128 + }, + { + "epoch": 2.47, + "learning_rate": 6.7541348056372095e-06, + "loss": 0.0124, + "step": 10130 + }, + { + "epoch": 2.47, + "learning_rate": 6.750402728735457e-06, + "loss": 0.0179, + "step": 10132 + }, + { + "epoch": 2.47, + "learning_rate": 6.7466711578208524e-06, + "loss": 0.0147, + "step": 10134 + }, + { + "epoch": 2.47, + "learning_rate": 6.74294009347442e-06, + "loss": 0.0161, + "step": 10136 + }, + { + "epoch": 2.47, + "learning_rate": 6.73920953627712e-06, + "loss": 0.0139, + "step": 10138 + }, + { + "epoch": 2.47, + "learning_rate": 6.735479486809832e-06, + "loss": 0.0109, + "step": 10140 + }, + { + "epoch": 2.47, + "learning_rate": 6.731749945653352e-06, + "loss": 0.0101, + "step": 10142 + }, + { + "epoch": 2.47, + "learning_rate": 6.728020913388393e-06, + "loss": 0.0125, + "step": 10144 + }, + { + "epoch": 2.47, + "learning_rate": 6.7242923905956e-06, + "loss": 0.0073, + "step": 10146 + }, + { + "epoch": 2.47, + "learning_rate": 6.720564377855527e-06, + "loss": 0.0079, + "step": 10148 + }, + { + "epoch": 2.47, + "learning_rate": 6.716836875748663e-06, + "loss": 0.0185, + "step": 10150 + }, + { + "epoch": 2.47, + "learning_rate": 6.713109884855397e-06, + "loss": 0.0111, + "step": 10152 + }, + { + "epoch": 2.47, + "learning_rate": 6.709383405756058e-06, + "loss": 0.0103, + "step": 10154 + }, + { + "epoch": 2.47, + "learning_rate": 6.705657439030888e-06, + "loss": 0.0181, + "step": 10156 + }, + { + "epoch": 2.48, + "learning_rate": 6.7019319852600396e-06, + "loss": 0.0216, + "step": 10158 + }, + { + "epoch": 2.48, + "learning_rate": 6.6982070450236014e-06, + "loss": 0.0158, + "step": 10160 + }, + { + "epoch": 2.48, + "learning_rate": 6.694482618901575e-06, + "loss": 0.0095, + "step": 10162 + }, + { + "epoch": 2.48, + "learning_rate": 6.690758707473883e-06, + "loss": 0.0123, + "step": 10164 + }, + { + "epoch": 2.48, + "learning_rate": 6.687035311320363e-06, + "loss": 0.0069, + "step": 10166 + }, + { + "epoch": 2.48, + "learning_rate": 6.6833124310207785e-06, + "loss": 0.0223, + "step": 10168 + }, + { + "epoch": 2.48, + "learning_rate": 6.679590067154807e-06, + "loss": 0.008, + "step": 10170 + }, + { + "epoch": 2.48, + "learning_rate": 6.675868220302057e-06, + "loss": 0.029, + "step": 10172 + }, + { + "epoch": 2.48, + "learning_rate": 6.672146891042041e-06, + "loss": 0.006, + "step": 10174 + }, + { + "epoch": 2.48, + "learning_rate": 6.668426079954201e-06, + "loss": 0.0089, + "step": 10176 + }, + { + "epoch": 2.48, + "learning_rate": 6.664705787617894e-06, + "loss": 0.0099, + "step": 10178 + }, + { + "epoch": 2.48, + "learning_rate": 6.660986014612405e-06, + "loss": 0.0066, + "step": 10180 + }, + { + "epoch": 2.48, + "learning_rate": 6.65726676151692e-06, + "loss": 0.0062, + "step": 10182 + }, + { + "epoch": 2.48, + "learning_rate": 6.653548028910565e-06, + "loss": 0.0174, + "step": 10184 + }, + { + "epoch": 2.48, + "learning_rate": 6.649829817372369e-06, + "loss": 0.0059, + "step": 10186 + }, + { + "epoch": 2.48, + "learning_rate": 6.64611212748129e-06, + "loss": 0.0071, + "step": 10188 + }, + { + "epoch": 2.48, + "learning_rate": 6.642394959816198e-06, + "loss": 0.0125, + "step": 10190 + }, + { + "epoch": 2.48, + "learning_rate": 6.638678314955882e-06, + "loss": 0.0108, + "step": 10192 + }, + { + "epoch": 2.48, + "learning_rate": 6.634962193479057e-06, + "loss": 0.0113, + "step": 10194 + }, + { + "epoch": 2.48, + "learning_rate": 6.631246595964354e-06, + "loss": 0.0139, + "step": 10196 + }, + { + "epoch": 2.48, + "learning_rate": 6.627531522990311e-06, + "loss": 0.0217, + "step": 10198 + }, + { + "epoch": 2.49, + "learning_rate": 6.623816975135398e-06, + "loss": 0.0177, + "step": 10200 + }, + { + "epoch": 2.49, + "learning_rate": 6.620102952978e-06, + "loss": 0.0096, + "step": 10202 + }, + { + "epoch": 2.49, + "learning_rate": 6.616389457096417e-06, + "loss": 0.01, + "step": 10204 + }, + { + "epoch": 2.49, + "learning_rate": 6.61267648806887e-06, + "loss": 0.0123, + "step": 10206 + }, + { + "epoch": 2.49, + "learning_rate": 6.608964046473491e-06, + "loss": 0.0145, + "step": 10208 + }, + { + "epoch": 2.49, + "learning_rate": 6.605252132888345e-06, + "loss": 0.0123, + "step": 10210 + }, + { + "epoch": 2.49, + "learning_rate": 6.6015407478914e-06, + "loss": 0.0123, + "step": 10212 + }, + { + "epoch": 2.49, + "learning_rate": 6.597829892060544e-06, + "loss": 0.0151, + "step": 10214 + }, + { + "epoch": 2.49, + "learning_rate": 6.594119565973589e-06, + "loss": 0.0147, + "step": 10216 + }, + { + "epoch": 2.49, + "learning_rate": 6.5904097702082635e-06, + "loss": 0.0127, + "step": 10218 + }, + { + "epoch": 2.49, + "learning_rate": 6.586700505342212e-06, + "loss": 0.0095, + "step": 10220 + }, + { + "epoch": 2.49, + "learning_rate": 6.582991771952986e-06, + "loss": 0.0146, + "step": 10222 + }, + { + "epoch": 2.49, + "learning_rate": 6.5792835706180725e-06, + "loss": 0.0133, + "step": 10224 + }, + { + "epoch": 2.49, + "learning_rate": 6.5755759019148616e-06, + "loss": 0.0155, + "step": 10226 + }, + { + "epoch": 2.49, + "learning_rate": 6.571868766420672e-06, + "loss": 0.0127, + "step": 10228 + }, + { + "epoch": 2.49, + "learning_rate": 6.568162164712727e-06, + "loss": 0.0148, + "step": 10230 + }, + { + "epoch": 2.49, + "learning_rate": 6.564456097368176e-06, + "loss": 0.0095, + "step": 10232 + }, + { + "epoch": 2.49, + "learning_rate": 6.560750564964079e-06, + "loss": 0.0073, + "step": 10234 + }, + { + "epoch": 2.49, + "learning_rate": 6.557045568077422e-06, + "loss": 0.0126, + "step": 10236 + }, + { + "epoch": 2.49, + "learning_rate": 6.553341107285092e-06, + "loss": 0.0202, + "step": 10238 + }, + { + "epoch": 2.5, + "learning_rate": 6.549637183163911e-06, + "loss": 0.0122, + "step": 10240 + }, + { + "epoch": 2.5, + "learning_rate": 6.5459337962906e-06, + "loss": 0.0146, + "step": 10242 + }, + { + "epoch": 2.5, + "learning_rate": 6.542230947241815e-06, + "loss": 0.0085, + "step": 10244 + }, + { + "epoch": 2.5, + "learning_rate": 6.538528636594108e-06, + "loss": 0.0157, + "step": 10246 + }, + { + "epoch": 2.5, + "learning_rate": 6.53482686492396e-06, + "loss": 0.0123, + "step": 10248 + }, + { + "epoch": 2.5, + "learning_rate": 6.531125632807767e-06, + "loss": 0.012, + "step": 10250 + }, + { + "epoch": 2.5, + "learning_rate": 6.52742494082184e-06, + "loss": 0.0161, + "step": 10252 + }, + { + "epoch": 2.5, + "learning_rate": 6.523724789542404e-06, + "loss": 0.0084, + "step": 10254 + }, + { + "epoch": 2.5, + "learning_rate": 6.520025179545597e-06, + "loss": 0.0177, + "step": 10256 + }, + { + "epoch": 2.5, + "learning_rate": 6.516326111407478e-06, + "loss": 0.015, + "step": 10258 + }, + { + "epoch": 2.5, + "learning_rate": 6.512627585704028e-06, + "loss": 0.008, + "step": 10260 + }, + { + "epoch": 2.5, + "learning_rate": 6.508929603011127e-06, + "loss": 0.0166, + "step": 10262 + }, + { + "epoch": 2.5, + "learning_rate": 6.50523216390458e-06, + "loss": 0.0187, + "step": 10264 + }, + { + "epoch": 2.5, + "learning_rate": 6.501535268960109e-06, + "loss": 0.0083, + "step": 10266 + }, + { + "epoch": 2.5, + "learning_rate": 6.497838918753352e-06, + "loss": 0.0102, + "step": 10268 + }, + { + "epoch": 2.5, + "learning_rate": 6.49414311385985e-06, + "loss": 0.0068, + "step": 10270 + }, + { + "epoch": 2.5, + "learning_rate": 6.4904478548550746e-06, + "loss": 0.009, + "step": 10272 + }, + { + "epoch": 2.5, + "learning_rate": 6.486753142314406e-06, + "loss": 0.015, + "step": 10274 + }, + { + "epoch": 2.5, + "learning_rate": 6.483058976813139e-06, + "loss": 0.0126, + "step": 10276 + }, + { + "epoch": 2.5, + "learning_rate": 6.479365358926477e-06, + "loss": 0.0074, + "step": 10278 + }, + { + "epoch": 2.5, + "learning_rate": 6.475672289229555e-06, + "loss": 0.0134, + "step": 10280 + }, + { + "epoch": 2.51, + "learning_rate": 6.471979768297402e-06, + "loss": 0.0106, + "step": 10282 + }, + { + "epoch": 2.51, + "learning_rate": 6.468287796704982e-06, + "loss": 0.0104, + "step": 10284 + }, + { + "epoch": 2.51, + "learning_rate": 6.4645963750271525e-06, + "loss": 0.0093, + "step": 10286 + }, + { + "epoch": 2.51, + "learning_rate": 6.4609055038387045e-06, + "loss": 0.0043, + "step": 10288 + }, + { + "epoch": 2.51, + "learning_rate": 6.4572151837143295e-06, + "loss": 0.0154, + "step": 10290 + }, + { + "epoch": 2.51, + "learning_rate": 6.453525415228645e-06, + "loss": 0.0152, + "step": 10292 + }, + { + "epoch": 2.51, + "learning_rate": 6.449836198956168e-06, + "loss": 0.0079, + "step": 10294 + }, + { + "epoch": 2.51, + "learning_rate": 6.4461475354713435e-06, + "loss": 0.0161, + "step": 10296 + }, + { + "epoch": 2.51, + "learning_rate": 6.442459425348522e-06, + "loss": 0.0104, + "step": 10298 + }, + { + "epoch": 2.51, + "learning_rate": 6.4387718691619735e-06, + "loss": 0.0116, + "step": 10300 + }, + { + "epoch": 2.51, + "learning_rate": 6.435084867485875e-06, + "loss": 0.0076, + "step": 10302 + }, + { + "epoch": 2.51, + "learning_rate": 6.431398420894322e-06, + "loss": 0.0146, + "step": 10304 + }, + { + "epoch": 2.51, + "learning_rate": 6.42771252996132e-06, + "loss": 0.0126, + "step": 10306 + }, + { + "epoch": 2.51, + "learning_rate": 6.424027195260798e-06, + "loss": 0.0124, + "step": 10308 + }, + { + "epoch": 2.51, + "learning_rate": 6.420342417366584e-06, + "loss": 0.0017, + "step": 10310 + }, + { + "epoch": 2.51, + "learning_rate": 6.416658196852426e-06, + "loss": 0.0137, + "step": 10312 + }, + { + "epoch": 2.51, + "learning_rate": 6.412974534291988e-06, + "loss": 0.0175, + "step": 10314 + }, + { + "epoch": 2.51, + "learning_rate": 6.409291430258847e-06, + "loss": 0.0082, + "step": 10316 + }, + { + "epoch": 2.51, + "learning_rate": 6.405608885326486e-06, + "loss": 0.0068, + "step": 10318 + }, + { + "epoch": 2.51, + "learning_rate": 6.401926900068304e-06, + "loss": 0.0082, + "step": 10320 + }, + { + "epoch": 2.52, + "learning_rate": 6.3982454750576185e-06, + "loss": 0.0104, + "step": 10322 + }, + { + "epoch": 2.52, + "learning_rate": 6.394564610867656e-06, + "loss": 0.0115, + "step": 10324 + }, + { + "epoch": 2.52, + "learning_rate": 6.39088430807155e-06, + "loss": 0.0149, + "step": 10326 + }, + { + "epoch": 2.52, + "learning_rate": 6.3872045672423545e-06, + "loss": 0.0098, + "step": 10328 + }, + { + "epoch": 2.52, + "learning_rate": 6.383525388953036e-06, + "loss": 0.0144, + "step": 10330 + }, + { + "epoch": 2.52, + "learning_rate": 6.379846773776469e-06, + "loss": 0.0182, + "step": 10332 + }, + { + "epoch": 2.52, + "learning_rate": 6.376168722285438e-06, + "loss": 0.0096, + "step": 10334 + }, + { + "epoch": 2.52, + "learning_rate": 6.3724912350526465e-06, + "loss": 0.0189, + "step": 10336 + }, + { + "epoch": 2.52, + "learning_rate": 6.368814312650712e-06, + "loss": 0.0121, + "step": 10338 + }, + { + "epoch": 2.52, + "learning_rate": 6.365137955652156e-06, + "loss": 0.0111, + "step": 10340 + }, + { + "epoch": 2.52, + "learning_rate": 6.36146216462941e-06, + "loss": 0.0099, + "step": 10342 + }, + { + "epoch": 2.52, + "learning_rate": 6.357786940154832e-06, + "loss": 0.0058, + "step": 10344 + }, + { + "epoch": 2.52, + "learning_rate": 6.354112282800675e-06, + "loss": 0.0102, + "step": 10346 + }, + { + "epoch": 2.52, + "learning_rate": 6.350438193139118e-06, + "loss": 0.0128, + "step": 10348 + }, + { + "epoch": 2.52, + "learning_rate": 6.346764671742238e-06, + "loss": 0.016, + "step": 10350 + }, + { + "epoch": 2.52, + "learning_rate": 6.3430917191820354e-06, + "loss": 0.0173, + "step": 10352 + }, + { + "epoch": 2.52, + "learning_rate": 6.339419336030412e-06, + "loss": 0.009, + "step": 10354 + }, + { + "epoch": 2.52, + "learning_rate": 6.335747522859195e-06, + "loss": 0.01, + "step": 10356 + }, + { + "epoch": 2.52, + "learning_rate": 6.332076280240103e-06, + "loss": 0.0157, + "step": 10358 + }, + { + "epoch": 2.52, + "learning_rate": 6.328405608744783e-06, + "loss": 0.0083, + "step": 10360 + }, + { + "epoch": 2.52, + "learning_rate": 6.324735508944783e-06, + "loss": 0.0108, + "step": 10362 + }, + { + "epoch": 2.53, + "learning_rate": 6.321065981411574e-06, + "loss": 0.0125, + "step": 10364 + }, + { + "epoch": 2.53, + "learning_rate": 6.317397026716519e-06, + "loss": 0.0059, + "step": 10366 + }, + { + "epoch": 2.53, + "learning_rate": 6.313728645430906e-06, + "loss": 0.0078, + "step": 10368 + }, + { + "epoch": 2.53, + "learning_rate": 6.310060838125929e-06, + "loss": 0.0077, + "step": 10370 + }, + { + "epoch": 2.53, + "learning_rate": 6.3063936053726994e-06, + "loss": 0.0148, + "step": 10372 + }, + { + "epoch": 2.53, + "learning_rate": 6.302726947742228e-06, + "loss": 0.0064, + "step": 10374 + }, + { + "epoch": 2.53, + "learning_rate": 6.29906086580544e-06, + "loss": 0.0113, + "step": 10376 + }, + { + "epoch": 2.53, + "learning_rate": 6.295395360133177e-06, + "loss": 0.0119, + "step": 10378 + }, + { + "epoch": 2.53, + "learning_rate": 6.2917304312961865e-06, + "loss": 0.0064, + "step": 10380 + }, + { + "epoch": 2.53, + "learning_rate": 6.288066079865121e-06, + "loss": 0.0118, + "step": 10382 + }, + { + "epoch": 2.53, + "learning_rate": 6.2844023064105484e-06, + "loss": 0.0148, + "step": 10384 + }, + { + "epoch": 2.53, + "learning_rate": 6.280739111502951e-06, + "loss": 0.0096, + "step": 10386 + }, + { + "epoch": 2.53, + "learning_rate": 6.277076495712718e-06, + "loss": 0.0071, + "step": 10388 + }, + { + "epoch": 2.53, + "learning_rate": 6.2734144596101364e-06, + "loss": 0.0129, + "step": 10390 + }, + { + "epoch": 2.53, + "learning_rate": 6.26975300376542e-06, + "loss": 0.0076, + "step": 10392 + }, + { + "epoch": 2.53, + "learning_rate": 6.266092128748687e-06, + "loss": 0.0062, + "step": 10394 + }, + { + "epoch": 2.53, + "learning_rate": 6.262431835129963e-06, + "loss": 0.0171, + "step": 10396 + }, + { + "epoch": 2.53, + "learning_rate": 6.2587721234791774e-06, + "loss": 0.0168, + "step": 10398 + }, + { + "epoch": 2.53, + "learning_rate": 6.2551129943661825e-06, + "loss": 0.0126, + "step": 10400 + }, + { + "epoch": 2.53, + "learning_rate": 6.251454448360729e-06, + "loss": 0.0073, + "step": 10402 + }, + { + "epoch": 2.54, + "learning_rate": 6.2477964860324844e-06, + "loss": 0.0189, + "step": 10404 + }, + { + "epoch": 2.54, + "learning_rate": 6.244139107951017e-06, + "loss": 0.0055, + "step": 10406 + }, + { + "epoch": 2.54, + "learning_rate": 6.240482314685811e-06, + "loss": 0.0117, + "step": 10408 + }, + { + "epoch": 2.54, + "learning_rate": 6.236826106806253e-06, + "loss": 0.0074, + "step": 10410 + }, + { + "epoch": 2.54, + "learning_rate": 6.233170484881652e-06, + "loss": 0.0185, + "step": 10412 + }, + { + "epoch": 2.54, + "learning_rate": 6.229515449481205e-06, + "loss": 0.0093, + "step": 10414 + }, + { + "epoch": 2.54, + "learning_rate": 6.225861001174036e-06, + "loss": 0.0148, + "step": 10416 + }, + { + "epoch": 2.54, + "learning_rate": 6.222207140529167e-06, + "loss": 0.0112, + "step": 10418 + }, + { + "epoch": 2.54, + "learning_rate": 6.218553868115538e-06, + "loss": 0.0103, + "step": 10420 + }, + { + "epoch": 2.54, + "learning_rate": 6.214901184501984e-06, + "loss": 0.0092, + "step": 10422 + }, + { + "epoch": 2.54, + "learning_rate": 6.211249090257259e-06, + "loss": 0.0095, + "step": 10424 + }, + { + "epoch": 2.54, + "learning_rate": 6.20759758595002e-06, + "loss": 0.0123, + "step": 10426 + }, + { + "epoch": 2.54, + "learning_rate": 6.2039466721488405e-06, + "loss": 0.0098, + "step": 10428 + }, + { + "epoch": 2.54, + "learning_rate": 6.20029634942219e-06, + "loss": 0.0106, + "step": 10430 + }, + { + "epoch": 2.54, + "learning_rate": 6.19664661833845e-06, + "loss": 0.0074, + "step": 10432 + }, + { + "epoch": 2.54, + "learning_rate": 6.192997479465914e-06, + "loss": 0.015, + "step": 10434 + }, + { + "epoch": 2.54, + "learning_rate": 6.189348933372787e-06, + "loss": 0.0095, + "step": 10436 + }, + { + "epoch": 2.54, + "learning_rate": 6.185700980627167e-06, + "loss": 0.0063, + "step": 10438 + }, + { + "epoch": 2.54, + "learning_rate": 6.182053621797068e-06, + "loss": 0.0142, + "step": 10440 + }, + { + "epoch": 2.54, + "learning_rate": 6.178406857450417e-06, + "loss": 0.0157, + "step": 10442 + }, + { + "epoch": 2.54, + "learning_rate": 6.174760688155044e-06, + "loss": 0.0135, + "step": 10444 + }, + { + "epoch": 2.55, + "learning_rate": 6.171115114478677e-06, + "loss": 0.0107, + "step": 10446 + }, + { + "epoch": 2.55, + "learning_rate": 6.167470136988964e-06, + "loss": 0.003, + "step": 10448 + }, + { + "epoch": 2.55, + "learning_rate": 6.163825756253461e-06, + "loss": 0.0075, + "step": 10450 + }, + { + "epoch": 2.55, + "learning_rate": 6.1601819728396216e-06, + "loss": 0.0155, + "step": 10452 + }, + { + "epoch": 2.55, + "learning_rate": 6.156538787314808e-06, + "loss": 0.0164, + "step": 10454 + }, + { + "epoch": 2.55, + "learning_rate": 6.152896200246297e-06, + "loss": 0.01, + "step": 10456 + }, + { + "epoch": 2.55, + "learning_rate": 6.149254212201261e-06, + "loss": 0.0061, + "step": 10458 + }, + { + "epoch": 2.55, + "learning_rate": 6.145612823746795e-06, + "loss": 0.0114, + "step": 10460 + }, + { + "epoch": 2.55, + "learning_rate": 6.141972035449881e-06, + "loss": 0.0159, + "step": 10462 + }, + { + "epoch": 2.55, + "learning_rate": 6.1383318478774255e-06, + "loss": 0.0092, + "step": 10464 + }, + { + "epoch": 2.55, + "learning_rate": 6.134692261596227e-06, + "loss": 0.0128, + "step": 10466 + }, + { + "epoch": 2.55, + "learning_rate": 6.131053277173003e-06, + "loss": 0.0094, + "step": 10468 + }, + { + "epoch": 2.55, + "learning_rate": 6.127414895174366e-06, + "loss": 0.0189, + "step": 10470 + }, + { + "epoch": 2.55, + "learning_rate": 6.123777116166844e-06, + "loss": 0.0069, + "step": 10472 + }, + { + "epoch": 2.55, + "learning_rate": 6.120139940716862e-06, + "loss": 0.0061, + "step": 10474 + }, + { + "epoch": 2.55, + "learning_rate": 6.116503369390764e-06, + "loss": 0.017, + "step": 10476 + }, + { + "epoch": 2.55, + "learning_rate": 6.112867402754785e-06, + "loss": 0.0162, + "step": 10478 + }, + { + "epoch": 2.55, + "learning_rate": 6.1092320413750725e-06, + "loss": 0.006, + "step": 10480 + }, + { + "epoch": 2.55, + "learning_rate": 6.105597285817682e-06, + "loss": 0.0069, + "step": 10482 + }, + { + "epoch": 2.55, + "learning_rate": 6.1019631366485785e-06, + "loss": 0.0195, + "step": 10484 + }, + { + "epoch": 2.56, + "learning_rate": 6.09832959443362e-06, + "loss": 0.0135, + "step": 10486 + }, + { + "epoch": 2.56, + "learning_rate": 6.094696659738575e-06, + "loss": 0.0094, + "step": 10488 + }, + { + "epoch": 2.56, + "learning_rate": 6.091064333129123e-06, + "loss": 0.0122, + "step": 10490 + }, + { + "epoch": 2.56, + "learning_rate": 6.087432615170849e-06, + "loss": 0.0074, + "step": 10492 + }, + { + "epoch": 2.56, + "learning_rate": 6.0838015064292325e-06, + "loss": 0.0127, + "step": 10494 + }, + { + "epoch": 2.56, + "learning_rate": 6.080171007469664e-06, + "loss": 0.0098, + "step": 10496 + }, + { + "epoch": 2.56, + "learning_rate": 6.076541118857448e-06, + "loss": 0.0142, + "step": 10498 + }, + { + "epoch": 2.56, + "learning_rate": 6.07291184115778e-06, + "loss": 0.0136, + "step": 10500 + }, + { + "epoch": 2.56, + "learning_rate": 6.069283174935766e-06, + "loss": 0.0149, + "step": 10502 + }, + { + "epoch": 2.56, + "learning_rate": 6.065655120756417e-06, + "loss": 0.0119, + "step": 10504 + }, + { + "epoch": 2.56, + "learning_rate": 6.062027679184653e-06, + "loss": 0.0124, + "step": 10506 + }, + { + "epoch": 2.56, + "learning_rate": 6.058400850785293e-06, + "loss": 0.0088, + "step": 10508 + }, + { + "epoch": 2.56, + "learning_rate": 6.054774636123058e-06, + "loss": 0.0107, + "step": 10510 + }, + { + "epoch": 2.56, + "learning_rate": 6.051149035762578e-06, + "loss": 0.013, + "step": 10512 + }, + { + "epoch": 2.56, + "learning_rate": 6.047524050268392e-06, + "loss": 0.0104, + "step": 10514 + }, + { + "epoch": 2.56, + "learning_rate": 6.043899680204937e-06, + "loss": 0.0077, + "step": 10516 + }, + { + "epoch": 2.56, + "learning_rate": 6.040275926136547e-06, + "loss": 0.0138, + "step": 10518 + }, + { + "epoch": 2.56, + "learning_rate": 6.036652788627477e-06, + "loss": 0.0157, + "step": 10520 + }, + { + "epoch": 2.56, + "learning_rate": 6.033030268241871e-06, + "loss": 0.0064, + "step": 10522 + }, + { + "epoch": 2.56, + "learning_rate": 6.029408365543792e-06, + "loss": 0.0137, + "step": 10524 + }, + { + "epoch": 2.56, + "learning_rate": 6.025787081097188e-06, + "loss": 0.0159, + "step": 10526 + }, + { + "epoch": 2.57, + "learning_rate": 6.022166415465925e-06, + "loss": 0.0268, + "step": 10528 + }, + { + "epoch": 2.57, + "learning_rate": 6.0185463692137666e-06, + "loss": 0.0108, + "step": 10530 + }, + { + "epoch": 2.57, + "learning_rate": 6.014926942904388e-06, + "loss": 0.0054, + "step": 10532 + }, + { + "epoch": 2.57, + "learning_rate": 6.011308137101355e-06, + "loss": 0.0074, + "step": 10534 + }, + { + "epoch": 2.57, + "learning_rate": 6.007689952368144e-06, + "loss": 0.0118, + "step": 10536 + }, + { + "epoch": 2.57, + "learning_rate": 6.004072389268134e-06, + "loss": 0.0085, + "step": 10538 + }, + { + "epoch": 2.57, + "learning_rate": 6.0004554483646135e-06, + "loss": 0.0085, + "step": 10540 + }, + { + "epoch": 2.57, + "learning_rate": 5.996839130220761e-06, + "loss": 0.0138, + "step": 10542 + }, + { + "epoch": 2.57, + "learning_rate": 5.993223435399663e-06, + "loss": 0.0068, + "step": 10544 + }, + { + "epoch": 2.57, + "learning_rate": 5.989608364464317e-06, + "loss": 0.0113, + "step": 10546 + }, + { + "epoch": 2.57, + "learning_rate": 5.9859939179776164e-06, + "loss": 0.0078, + "step": 10548 + }, + { + "epoch": 2.57, + "learning_rate": 5.982380096502355e-06, + "loss": 0.0091, + "step": 10550 + }, + { + "epoch": 2.57, + "learning_rate": 5.978766900601232e-06, + "loss": 0.0098, + "step": 10552 + }, + { + "epoch": 2.57, + "learning_rate": 5.975154330836854e-06, + "loss": 0.0102, + "step": 10554 + }, + { + "epoch": 2.57, + "learning_rate": 5.971542387771725e-06, + "loss": 0.0094, + "step": 10556 + }, + { + "epoch": 2.57, + "learning_rate": 5.967931071968246e-06, + "loss": 0.0062, + "step": 10558 + }, + { + "epoch": 2.57, + "learning_rate": 5.964320383988731e-06, + "loss": 0.017, + "step": 10560 + }, + { + "epoch": 2.57, + "learning_rate": 5.960710324395394e-06, + "loss": 0.0115, + "step": 10562 + }, + { + "epoch": 2.57, + "learning_rate": 5.957100893750349e-06, + "loss": 0.0073, + "step": 10564 + }, + { + "epoch": 2.57, + "learning_rate": 5.9534920926156044e-06, + "loss": 0.0121, + "step": 10566 + }, + { + "epoch": 2.58, + "learning_rate": 5.9498839215530846e-06, + "loss": 0.0122, + "step": 10568 + }, + { + "epoch": 2.58, + "learning_rate": 5.9462763811246095e-06, + "loss": 0.0111, + "step": 10570 + }, + { + "epoch": 2.58, + "learning_rate": 5.9426694718919e-06, + "loss": 0.0117, + "step": 10572 + }, + { + "epoch": 2.58, + "learning_rate": 5.9390631944165764e-06, + "loss": 0.0084, + "step": 10574 + }, + { + "epoch": 2.58, + "learning_rate": 5.935457549260167e-06, + "loss": 0.0136, + "step": 10576 + }, + { + "epoch": 2.58, + "learning_rate": 5.931852536984096e-06, + "loss": 0.009, + "step": 10578 + }, + { + "epoch": 2.58, + "learning_rate": 5.928248158149697e-06, + "loss": 0.0111, + "step": 10580 + }, + { + "epoch": 2.58, + "learning_rate": 5.9246444133181905e-06, + "loss": 0.0069, + "step": 10582 + }, + { + "epoch": 2.58, + "learning_rate": 5.921041303050713e-06, + "loss": 0.0072, + "step": 10584 + }, + { + "epoch": 2.58, + "learning_rate": 5.917438827908293e-06, + "loss": 0.0122, + "step": 10586 + }, + { + "epoch": 2.58, + "learning_rate": 5.913836988451871e-06, + "loss": 0.0061, + "step": 10588 + }, + { + "epoch": 2.58, + "learning_rate": 5.9102357852422695e-06, + "loss": 0.0112, + "step": 10590 + }, + { + "epoch": 2.58, + "learning_rate": 5.906635218840233e-06, + "loss": 0.0127, + "step": 10592 + }, + { + "epoch": 2.58, + "learning_rate": 5.903035289806389e-06, + "loss": 0.0091, + "step": 10594 + }, + { + "epoch": 2.58, + "learning_rate": 5.899435998701285e-06, + "loss": 0.0106, + "step": 10596 + }, + { + "epoch": 2.58, + "learning_rate": 5.895837346085349e-06, + "loss": 0.0122, + "step": 10598 + }, + { + "epoch": 2.58, + "learning_rate": 5.892239332518919e-06, + "loss": 0.0081, + "step": 10600 + }, + { + "epoch": 2.58, + "learning_rate": 5.888641958562236e-06, + "loss": 0.0098, + "step": 10602 + }, + { + "epoch": 2.58, + "learning_rate": 5.885045224775441e-06, + "loss": 0.0131, + "step": 10604 + }, + { + "epoch": 2.58, + "learning_rate": 5.881449131718568e-06, + "loss": 0.0168, + "step": 10606 + }, + { + "epoch": 2.58, + "learning_rate": 5.877853679951557e-06, + "loss": 0.0173, + "step": 10608 + }, + { + "epoch": 2.59, + "learning_rate": 5.8742588700342505e-06, + "loss": 0.0098, + "step": 10610 + }, + { + "epoch": 2.59, + "learning_rate": 5.870664702526387e-06, + "loss": 0.0067, + "step": 10612 + }, + { + "epoch": 2.59, + "learning_rate": 5.867071177987604e-06, + "loss": 0.0151, + "step": 10614 + }, + { + "epoch": 2.59, + "learning_rate": 5.8634782969774395e-06, + "loss": 0.0109, + "step": 10616 + }, + { + "epoch": 2.59, + "learning_rate": 5.859886060055338e-06, + "loss": 0.0069, + "step": 10618 + }, + { + "epoch": 2.59, + "learning_rate": 5.8562944677806346e-06, + "loss": 0.0134, + "step": 10620 + }, + { + "epoch": 2.59, + "learning_rate": 5.8527035207125656e-06, + "loss": 0.0088, + "step": 10622 + }, + { + "epoch": 2.59, + "learning_rate": 5.849113219410272e-06, + "loss": 0.0118, + "step": 10624 + }, + { + "epoch": 2.59, + "learning_rate": 5.845523564432791e-06, + "loss": 0.0104, + "step": 10626 + }, + { + "epoch": 2.59, + "learning_rate": 5.841934556339062e-06, + "loss": 0.0151, + "step": 10628 + }, + { + "epoch": 2.59, + "learning_rate": 5.838346195687915e-06, + "loss": 0.0098, + "step": 10630 + }, + { + "epoch": 2.59, + "learning_rate": 5.834758483038087e-06, + "loss": 0.0123, + "step": 10632 + }, + { + "epoch": 2.59, + "learning_rate": 5.8311714189482115e-06, + "loss": 0.0137, + "step": 10634 + }, + { + "epoch": 2.59, + "learning_rate": 5.82758500397683e-06, + "loss": 0.0087, + "step": 10636 + }, + { + "epoch": 2.59, + "learning_rate": 5.823999238682363e-06, + "loss": 0.0121, + "step": 10638 + }, + { + "epoch": 2.59, + "learning_rate": 5.8204141236231485e-06, + "loss": 0.0104, + "step": 10640 + }, + { + "epoch": 2.59, + "learning_rate": 5.816829659357417e-06, + "loss": 0.0177, + "step": 10642 + }, + { + "epoch": 2.59, + "learning_rate": 5.813245846443295e-06, + "loss": 0.0169, + "step": 10644 + }, + { + "epoch": 2.59, + "learning_rate": 5.809662685438806e-06, + "loss": 0.0071, + "step": 10646 + }, + { + "epoch": 2.59, + "learning_rate": 5.806080176901879e-06, + "loss": 0.0133, + "step": 10648 + }, + { + "epoch": 2.6, + "learning_rate": 5.8024983213903374e-06, + "loss": 0.0064, + "step": 10650 + }, + { + "epoch": 2.6, + "learning_rate": 5.798917119461908e-06, + "loss": 0.0059, + "step": 10652 + }, + { + "epoch": 2.6, + "learning_rate": 5.795336571674203e-06, + "loss": 0.0094, + "step": 10654 + }, + { + "epoch": 2.6, + "learning_rate": 5.791756678584746e-06, + "loss": 0.0103, + "step": 10656 + }, + { + "epoch": 2.6, + "learning_rate": 5.788177440750958e-06, + "loss": 0.0109, + "step": 10658 + }, + { + "epoch": 2.6, + "learning_rate": 5.784598858730146e-06, + "loss": 0.0067, + "step": 10660 + }, + { + "epoch": 2.6, + "learning_rate": 5.781020933079524e-06, + "loss": 0.0089, + "step": 10662 + }, + { + "epoch": 2.6, + "learning_rate": 5.777443664356203e-06, + "loss": 0.0142, + "step": 10664 + }, + { + "epoch": 2.6, + "learning_rate": 5.773867053117192e-06, + "loss": 0.0147, + "step": 10666 + }, + { + "epoch": 2.6, + "learning_rate": 5.7702910999194e-06, + "loss": 0.0105, + "step": 10668 + }, + { + "epoch": 2.6, + "learning_rate": 5.766715805319623e-06, + "loss": 0.0058, + "step": 10670 + }, + { + "epoch": 2.6, + "learning_rate": 5.76314116987457e-06, + "loss": 0.0097, + "step": 10672 + }, + { + "epoch": 2.6, + "learning_rate": 5.759567194140834e-06, + "loss": 0.0142, + "step": 10674 + }, + { + "epoch": 2.6, + "learning_rate": 5.755993878674908e-06, + "loss": 0.0119, + "step": 10676 + }, + { + "epoch": 2.6, + "learning_rate": 5.752421224033187e-06, + "loss": 0.011, + "step": 10678 + }, + { + "epoch": 2.6, + "learning_rate": 5.74884923077196e-06, + "loss": 0.0136, + "step": 10680 + }, + { + "epoch": 2.6, + "learning_rate": 5.745277899447421e-06, + "loss": 0.0132, + "step": 10682 + }, + { + "epoch": 2.6, + "learning_rate": 5.741707230615643e-06, + "loss": 0.0099, + "step": 10684 + }, + { + "epoch": 2.6, + "learning_rate": 5.738137224832614e-06, + "loss": 0.0094, + "step": 10686 + }, + { + "epoch": 2.6, + "learning_rate": 5.734567882654204e-06, + "loss": 0.0151, + "step": 10688 + }, + { + "epoch": 2.6, + "learning_rate": 5.730999204636195e-06, + "loss": 0.0065, + "step": 10690 + }, + { + "epoch": 2.61, + "learning_rate": 5.727431191334249e-06, + "loss": 0.01, + "step": 10692 + }, + { + "epoch": 2.61, + "learning_rate": 5.723863843303938e-06, + "loss": 0.0113, + "step": 10694 + }, + { + "epoch": 2.61, + "learning_rate": 5.720297161100725e-06, + "loss": 0.0031, + "step": 10696 + }, + { + "epoch": 2.61, + "learning_rate": 5.7167311452799745e-06, + "loss": 0.0104, + "step": 10698 + }, + { + "epoch": 2.61, + "learning_rate": 5.713165796396931e-06, + "loss": 0.0108, + "step": 10700 + }, + { + "epoch": 2.61, + "learning_rate": 5.709601115006759e-06, + "loss": 0.0087, + "step": 10702 + }, + { + "epoch": 2.61, + "learning_rate": 5.706037101664495e-06, + "loss": 0.0117, + "step": 10704 + }, + { + "epoch": 2.61, + "learning_rate": 5.702473756925093e-06, + "loss": 0.0142, + "step": 10706 + }, + { + "epoch": 2.61, + "learning_rate": 5.698911081343386e-06, + "loss": 0.0122, + "step": 10708 + }, + { + "epoch": 2.61, + "learning_rate": 5.69534907547411e-06, + "loss": 0.0132, + "step": 10710 + }, + { + "epoch": 2.61, + "learning_rate": 5.691787739871901e-06, + "loss": 0.0085, + "step": 10712 + }, + { + "epoch": 2.61, + "learning_rate": 5.688227075091288e-06, + "loss": 0.0137, + "step": 10714 + }, + { + "epoch": 2.61, + "learning_rate": 5.68466708168669e-06, + "loss": 0.0102, + "step": 10716 + }, + { + "epoch": 2.61, + "learning_rate": 5.681107760212422e-06, + "loss": 0.0135, + "step": 10718 + }, + { + "epoch": 2.61, + "learning_rate": 5.6775491112227e-06, + "loss": 0.0211, + "step": 10720 + }, + { + "epoch": 2.61, + "learning_rate": 5.673991135271637e-06, + "loss": 0.0136, + "step": 10722 + }, + { + "epoch": 2.61, + "learning_rate": 5.670433832913231e-06, + "loss": 0.0135, + "step": 10724 + }, + { + "epoch": 2.61, + "learning_rate": 5.666877204701383e-06, + "loss": 0.0175, + "step": 10726 + }, + { + "epoch": 2.61, + "learning_rate": 5.663321251189893e-06, + "loss": 0.0111, + "step": 10728 + }, + { + "epoch": 2.61, + "learning_rate": 5.659765972932445e-06, + "loss": 0.0077, + "step": 10730 + }, + { + "epoch": 2.62, + "learning_rate": 5.656211370482618e-06, + "loss": 0.0137, + "step": 10732 + }, + { + "epoch": 2.62, + "learning_rate": 5.652657444393898e-06, + "loss": 0.0064, + "step": 10734 + }, + { + "epoch": 2.62, + "learning_rate": 5.649104195219654e-06, + "loss": 0.0127, + "step": 10736 + }, + { + "epoch": 2.62, + "learning_rate": 5.645551623513163e-06, + "loss": 0.0078, + "step": 10738 + }, + { + "epoch": 2.62, + "learning_rate": 5.641999729827576e-06, + "loss": 0.0062, + "step": 10740 + }, + { + "epoch": 2.62, + "learning_rate": 5.638448514715959e-06, + "loss": 0.0103, + "step": 10742 + }, + { + "epoch": 2.62, + "learning_rate": 5.634897978731257e-06, + "loss": 0.0067, + "step": 10744 + }, + { + "epoch": 2.62, + "learning_rate": 5.6313481224263215e-06, + "loss": 0.0105, + "step": 10746 + }, + { + "epoch": 2.62, + "learning_rate": 5.627798946353884e-06, + "loss": 0.0062, + "step": 10748 + }, + { + "epoch": 2.62, + "learning_rate": 5.624250451066584e-06, + "loss": 0.0146, + "step": 10750 + }, + { + "epoch": 2.62, + "learning_rate": 5.6207026371169485e-06, + "loss": 0.0105, + "step": 10752 + }, + { + "epoch": 2.62, + "learning_rate": 5.6171555050574035e-06, + "loss": 0.0085, + "step": 10754 + }, + { + "epoch": 2.62, + "learning_rate": 5.613609055440256e-06, + "loss": 0.015, + "step": 10756 + }, + { + "epoch": 2.62, + "learning_rate": 5.610063288817723e-06, + "loss": 0.0162, + "step": 10758 + }, + { + "epoch": 2.62, + "learning_rate": 5.606518205741902e-06, + "loss": 0.0036, + "step": 10760 + }, + { + "epoch": 2.62, + "learning_rate": 5.602973806764794e-06, + "loss": 0.006, + "step": 10762 + }, + { + "epoch": 2.62, + "learning_rate": 5.599430092438285e-06, + "loss": 0.0117, + "step": 10764 + }, + { + "epoch": 2.62, + "learning_rate": 5.595887063314158e-06, + "loss": 0.0151, + "step": 10766 + }, + { + "epoch": 2.62, + "learning_rate": 5.5923447199440935e-06, + "loss": 0.008, + "step": 10768 + }, + { + "epoch": 2.62, + "learning_rate": 5.588803062879663e-06, + "loss": 0.014, + "step": 10770 + }, + { + "epoch": 2.62, + "learning_rate": 5.585262092672328e-06, + "loss": 0.0097, + "step": 10772 + }, + { + "epoch": 2.63, + "learning_rate": 5.581721809873437e-06, + "loss": 0.0054, + "step": 10774 + }, + { + "epoch": 2.63, + "learning_rate": 5.578182215034247e-06, + "loss": 0.0086, + "step": 10776 + }, + { + "epoch": 2.63, + "learning_rate": 5.5746433087059e-06, + "loss": 0.0097, + "step": 10778 + }, + { + "epoch": 2.63, + "learning_rate": 5.571105091439427e-06, + "loss": 0.0081, + "step": 10780 + }, + { + "epoch": 2.63, + "learning_rate": 5.567567563785758e-06, + "loss": 0.0059, + "step": 10782 + }, + { + "epoch": 2.63, + "learning_rate": 5.564030726295715e-06, + "loss": 0.0051, + "step": 10784 + }, + { + "epoch": 2.63, + "learning_rate": 5.560494579520008e-06, + "loss": 0.0142, + "step": 10786 + }, + { + "epoch": 2.63, + "learning_rate": 5.55695912400924e-06, + "loss": 0.015, + "step": 10788 + }, + { + "epoch": 2.63, + "learning_rate": 5.553424360313909e-06, + "loss": 0.0122, + "step": 10790 + }, + { + "epoch": 2.63, + "learning_rate": 5.549890288984408e-06, + "loss": 0.0113, + "step": 10792 + }, + { + "epoch": 2.63, + "learning_rate": 5.54635691057102e-06, + "loss": 0.0076, + "step": 10794 + }, + { + "epoch": 2.63, + "learning_rate": 5.542824225623914e-06, + "loss": 0.0106, + "step": 10796 + }, + { + "epoch": 2.63, + "learning_rate": 5.539292234693158e-06, + "loss": 0.0074, + "step": 10798 + }, + { + "epoch": 2.63, + "learning_rate": 5.535760938328714e-06, + "loss": 0.0049, + "step": 10800 + }, + { + "epoch": 2.63, + "learning_rate": 5.532230337080429e-06, + "loss": 0.0107, + "step": 10802 + }, + { + "epoch": 2.63, + "learning_rate": 5.52870043149804e-06, + "loss": 0.0073, + "step": 10804 + }, + { + "epoch": 2.63, + "learning_rate": 5.5251712221311834e-06, + "loss": 0.0131, + "step": 10806 + }, + { + "epoch": 2.63, + "learning_rate": 5.521642709529387e-06, + "loss": 0.0067, + "step": 10808 + }, + { + "epoch": 2.63, + "learning_rate": 5.518114894242067e-06, + "loss": 0.0128, + "step": 10810 + }, + { + "epoch": 2.63, + "learning_rate": 5.514587776818526e-06, + "loss": 0.0078, + "step": 10812 + }, + { + "epoch": 2.63, + "learning_rate": 5.511061357807971e-06, + "loss": 0.0125, + "step": 10814 + }, + { + "epoch": 2.64, + "learning_rate": 5.507535637759483e-06, + "loss": 0.0063, + "step": 10816 + }, + { + "epoch": 2.64, + "learning_rate": 5.504010617222053e-06, + "loss": 0.0068, + "step": 10818 + }, + { + "epoch": 2.64, + "learning_rate": 5.500486296744546e-06, + "loss": 0.0066, + "step": 10820 + }, + { + "epoch": 2.64, + "learning_rate": 5.496962676875728e-06, + "loss": 0.0121, + "step": 10822 + }, + { + "epoch": 2.64, + "learning_rate": 5.493439758164254e-06, + "loss": 0.0057, + "step": 10824 + }, + { + "epoch": 2.64, + "learning_rate": 5.489917541158674e-06, + "loss": 0.0117, + "step": 10826 + }, + { + "epoch": 2.64, + "learning_rate": 5.4863960264074215e-06, + "loss": 0.0059, + "step": 10828 + }, + { + "epoch": 2.64, + "learning_rate": 5.482875214458816e-06, + "loss": 0.0098, + "step": 10830 + }, + { + "epoch": 2.64, + "learning_rate": 5.479355105861081e-06, + "loss": 0.0161, + "step": 10832 + }, + { + "epoch": 2.64, + "learning_rate": 5.475835701162326e-06, + "loss": 0.0062, + "step": 10834 + }, + { + "epoch": 2.64, + "learning_rate": 5.472317000910545e-06, + "loss": 0.0122, + "step": 10836 + }, + { + "epoch": 2.64, + "learning_rate": 5.468799005653629e-06, + "loss": 0.0156, + "step": 10838 + }, + { + "epoch": 2.64, + "learning_rate": 5.465281715939359e-06, + "loss": 0.0084, + "step": 10840 + }, + { + "epoch": 2.64, + "learning_rate": 5.461765132315402e-06, + "loss": 0.0037, + "step": 10842 + }, + { + "epoch": 2.64, + "learning_rate": 5.458249255329311e-06, + "loss": 0.0048, + "step": 10844 + }, + { + "epoch": 2.64, + "learning_rate": 5.454734085528541e-06, + "loss": 0.013, + "step": 10846 + }, + { + "epoch": 2.64, + "learning_rate": 5.451219623460431e-06, + "loss": 0.0088, + "step": 10848 + }, + { + "epoch": 2.64, + "learning_rate": 5.447705869672211e-06, + "loss": 0.0178, + "step": 10850 + }, + { + "epoch": 2.64, + "learning_rate": 5.444192824710994e-06, + "loss": 0.0071, + "step": 10852 + }, + { + "epoch": 2.64, + "learning_rate": 5.440680489123791e-06, + "loss": 0.0062, + "step": 10854 + }, + { + "epoch": 2.65, + "learning_rate": 5.4371688634575024e-06, + "loss": 0.0117, + "step": 10856 + }, + { + "epoch": 2.65, + "learning_rate": 5.433657948258912e-06, + "loss": 0.0089, + "step": 10858 + }, + { + "epoch": 2.65, + "learning_rate": 5.430147744074693e-06, + "loss": 0.0055, + "step": 10860 + }, + { + "epoch": 2.65, + "learning_rate": 5.426638251451414e-06, + "loss": 0.0092, + "step": 10862 + }, + { + "epoch": 2.65, + "learning_rate": 5.423129470935531e-06, + "loss": 0.0051, + "step": 10864 + }, + { + "epoch": 2.65, + "learning_rate": 5.41962140307339e-06, + "loss": 0.0111, + "step": 10866 + }, + { + "epoch": 2.65, + "learning_rate": 5.4161140484112165e-06, + "loss": 0.0163, + "step": 10868 + }, + { + "epoch": 2.65, + "learning_rate": 5.412607407495143e-06, + "loss": 0.0126, + "step": 10870 + }, + { + "epoch": 2.65, + "learning_rate": 5.409101480871168e-06, + "loss": 0.0079, + "step": 10872 + }, + { + "epoch": 2.65, + "learning_rate": 5.4055962690852025e-06, + "loss": 0.0057, + "step": 10874 + }, + { + "epoch": 2.65, + "learning_rate": 5.402091772683027e-06, + "loss": 0.0079, + "step": 10876 + }, + { + "epoch": 2.65, + "learning_rate": 5.39858799221032e-06, + "loss": 0.0072, + "step": 10878 + }, + { + "epoch": 2.65, + "learning_rate": 5.395084928212648e-06, + "loss": 0.0107, + "step": 10880 + }, + { + "epoch": 2.65, + "learning_rate": 5.391582581235468e-06, + "loss": 0.0132, + "step": 10882 + }, + { + "epoch": 2.65, + "learning_rate": 5.388080951824121e-06, + "loss": 0.0103, + "step": 10884 + }, + { + "epoch": 2.65, + "learning_rate": 5.38458004052383e-06, + "loss": 0.0076, + "step": 10886 + }, + { + "epoch": 2.65, + "learning_rate": 5.38107984787972e-06, + "loss": 0.0104, + "step": 10888 + }, + { + "epoch": 2.65, + "learning_rate": 5.377580374436801e-06, + "loss": 0.0071, + "step": 10890 + }, + { + "epoch": 2.65, + "learning_rate": 5.374081620739959e-06, + "loss": 0.0075, + "step": 10892 + }, + { + "epoch": 2.65, + "learning_rate": 5.3705835873339814e-06, + "loss": 0.0058, + "step": 10894 + }, + { + "epoch": 2.65, + "learning_rate": 5.367086274763544e-06, + "loss": 0.0094, + "step": 10896 + }, + { + "epoch": 2.66, + "learning_rate": 5.3635896835731945e-06, + "loss": 0.0072, + "step": 10898 + }, + { + "epoch": 2.66, + "learning_rate": 5.36009381430739e-06, + "loss": 0.0105, + "step": 10900 + }, + { + "epoch": 2.66, + "learning_rate": 5.356598667510453e-06, + "loss": 0.0052, + "step": 10902 + }, + { + "epoch": 2.66, + "learning_rate": 5.35310424372661e-06, + "loss": 0.0088, + "step": 10904 + }, + { + "epoch": 2.66, + "learning_rate": 5.349610543499973e-06, + "loss": 0.0075, + "step": 10906 + }, + { + "epoch": 2.66, + "learning_rate": 5.346117567374531e-06, + "loss": 0.0109, + "step": 10908 + }, + { + "epoch": 2.66, + "learning_rate": 5.34262531589417e-06, + "loss": 0.0062, + "step": 10910 + }, + { + "epoch": 2.66, + "learning_rate": 5.339133789602666e-06, + "loss": 0.008, + "step": 10912 + }, + { + "epoch": 2.66, + "learning_rate": 5.33564298904367e-06, + "loss": 0.0048, + "step": 10914 + }, + { + "epoch": 2.66, + "learning_rate": 5.3321529147607224e-06, + "loss": 0.0058, + "step": 10916 + }, + { + "epoch": 2.66, + "learning_rate": 5.328663567297261e-06, + "loss": 0.0063, + "step": 10918 + }, + { + "epoch": 2.66, + "learning_rate": 5.325174947196601e-06, + "loss": 0.0078, + "step": 10920 + }, + { + "epoch": 2.66, + "learning_rate": 5.321687055001953e-06, + "loss": 0.0065, + "step": 10922 + }, + { + "epoch": 2.66, + "learning_rate": 5.318199891256399e-06, + "loss": 0.0201, + "step": 10924 + }, + { + "epoch": 2.66, + "learning_rate": 5.314713456502928e-06, + "loss": 0.0052, + "step": 10926 + }, + { + "epoch": 2.66, + "learning_rate": 5.3112277512843935e-06, + "loss": 0.0088, + "step": 10928 + }, + { + "epoch": 2.66, + "learning_rate": 5.307742776143555e-06, + "loss": 0.0102, + "step": 10930 + }, + { + "epoch": 2.66, + "learning_rate": 5.304258531623043e-06, + "loss": 0.0042, + "step": 10932 + }, + { + "epoch": 2.66, + "learning_rate": 5.300775018265385e-06, + "loss": 0.0073, + "step": 10934 + }, + { + "epoch": 2.66, + "learning_rate": 5.297292236612989e-06, + "loss": 0.0134, + "step": 10936 + }, + { + "epoch": 2.67, + "learning_rate": 5.293810187208155e-06, + "loss": 0.011, + "step": 10938 + }, + { + "epoch": 2.67, + "learning_rate": 5.290328870593062e-06, + "loss": 0.0084, + "step": 10940 + }, + { + "epoch": 2.67, + "learning_rate": 5.286848287309774e-06, + "loss": 0.0066, + "step": 10942 + }, + { + "epoch": 2.67, + "learning_rate": 5.283368437900247e-06, + "loss": 0.013, + "step": 10944 + }, + { + "epoch": 2.67, + "learning_rate": 5.2798893229063245e-06, + "loss": 0.0103, + "step": 10946 + }, + { + "epoch": 2.67, + "learning_rate": 5.276410942869723e-06, + "loss": 0.0053, + "step": 10948 + }, + { + "epoch": 2.67, + "learning_rate": 5.272933298332059e-06, + "loss": 0.0124, + "step": 10950 + }, + { + "epoch": 2.67, + "learning_rate": 5.269456389834825e-06, + "loss": 0.0061, + "step": 10952 + }, + { + "epoch": 2.67, + "learning_rate": 5.265980217919408e-06, + "loss": 0.0062, + "step": 10954 + }, + { + "epoch": 2.67, + "learning_rate": 5.262504783127071e-06, + "loss": 0.0097, + "step": 10956 + }, + { + "epoch": 2.67, + "learning_rate": 5.259030085998962e-06, + "loss": 0.0066, + "step": 10958 + }, + { + "epoch": 2.67, + "learning_rate": 5.25555612707612e-06, + "loss": 0.0064, + "step": 10960 + }, + { + "epoch": 2.67, + "learning_rate": 5.2520829068994724e-06, + "loss": 0.0094, + "step": 10962 + }, + { + "epoch": 2.67, + "learning_rate": 5.248610426009818e-06, + "loss": 0.0116, + "step": 10964 + }, + { + "epoch": 2.67, + "learning_rate": 5.245138684947853e-06, + "loss": 0.006, + "step": 10966 + }, + { + "epoch": 2.67, + "learning_rate": 5.241667684254157e-06, + "loss": 0.0085, + "step": 10968 + }, + { + "epoch": 2.67, + "learning_rate": 5.238197424469187e-06, + "loss": 0.0092, + "step": 10970 + }, + { + "epoch": 2.67, + "learning_rate": 5.234727906133287e-06, + "loss": 0.0129, + "step": 10972 + }, + { + "epoch": 2.67, + "learning_rate": 5.23125912978669e-06, + "loss": 0.0088, + "step": 10974 + }, + { + "epoch": 2.67, + "learning_rate": 5.227791095969512e-06, + "loss": 0.0081, + "step": 10976 + }, + { + "epoch": 2.67, + "learning_rate": 5.224323805221755e-06, + "loss": 0.0089, + "step": 10978 + }, + { + "epoch": 2.68, + "learning_rate": 5.220857258083296e-06, + "loss": 0.0062, + "step": 10980 + }, + { + "epoch": 2.68, + "learning_rate": 5.21739145509391e-06, + "loss": 0.0169, + "step": 10982 + }, + { + "epoch": 2.68, + "learning_rate": 5.213926396793241e-06, + "loss": 0.0072, + "step": 10984 + }, + { + "epoch": 2.68, + "learning_rate": 5.210462083720833e-06, + "loss": 0.0093, + "step": 10986 + }, + { + "epoch": 2.68, + "learning_rate": 5.206998516416099e-06, + "loss": 0.0104, + "step": 10988 + }, + { + "epoch": 2.68, + "learning_rate": 5.203535695418348e-06, + "loss": 0.0099, + "step": 10990 + }, + { + "epoch": 2.68, + "learning_rate": 5.200073621266765e-06, + "loss": 0.0167, + "step": 10992 + }, + { + "epoch": 2.68, + "learning_rate": 5.196612294500426e-06, + "loss": 0.0058, + "step": 10994 + }, + { + "epoch": 2.68, + "learning_rate": 5.1931517156582835e-06, + "loss": 0.0083, + "step": 10996 + }, + { + "epoch": 2.68, + "learning_rate": 5.189691885279171e-06, + "loss": 0.0147, + "step": 10998 + }, + { + "epoch": 2.68, + "learning_rate": 5.186232803901814e-06, + "loss": 0.0099, + "step": 11000 + }, + { + "epoch": 2.68, + "learning_rate": 5.182774472064822e-06, + "loss": 0.0081, + "step": 11002 + }, + { + "epoch": 2.68, + "learning_rate": 5.179316890306678e-06, + "loss": 0.0085, + "step": 11004 + }, + { + "epoch": 2.68, + "learning_rate": 5.175860059165756e-06, + "loss": 0.0063, + "step": 11006 + }, + { + "epoch": 2.68, + "learning_rate": 5.17240397918031e-06, + "loss": 0.0041, + "step": 11008 + }, + { + "epoch": 2.68, + "learning_rate": 5.168948650888486e-06, + "loss": 0.0143, + "step": 11010 + }, + { + "epoch": 2.68, + "learning_rate": 5.165494074828296e-06, + "loss": 0.007, + "step": 11012 + }, + { + "epoch": 2.68, + "learning_rate": 5.1620402515376435e-06, + "loss": 0.0099, + "step": 11014 + }, + { + "epoch": 2.68, + "learning_rate": 5.158587181554318e-06, + "loss": 0.0082, + "step": 11016 + }, + { + "epoch": 2.68, + "learning_rate": 5.155134865415992e-06, + "loss": 0.0136, + "step": 11018 + }, + { + "epoch": 2.69, + "learning_rate": 5.151683303660211e-06, + "loss": 0.0093, + "step": 11020 + }, + { + "epoch": 2.69, + "learning_rate": 5.148232496824412e-06, + "loss": 0.0081, + "step": 11022 + }, + { + "epoch": 2.69, + "learning_rate": 5.144782445445918e-06, + "loss": 0.0104, + "step": 11024 + }, + { + "epoch": 2.69, + "learning_rate": 5.141333150061924e-06, + "loss": 0.0093, + "step": 11026 + }, + { + "epoch": 2.69, + "learning_rate": 5.137884611209506e-06, + "loss": 0.0069, + "step": 11028 + }, + { + "epoch": 2.69, + "learning_rate": 5.134436829425633e-06, + "loss": 0.0088, + "step": 11030 + }, + { + "epoch": 2.69, + "learning_rate": 5.130989805247152e-06, + "loss": 0.0109, + "step": 11032 + }, + { + "epoch": 2.69, + "learning_rate": 5.127543539210793e-06, + "loss": 0.0052, + "step": 11034 + }, + { + "epoch": 2.69, + "learning_rate": 5.1240980318531595e-06, + "loss": 0.0092, + "step": 11036 + }, + { + "epoch": 2.69, + "learning_rate": 5.12065328371075e-06, + "loss": 0.0189, + "step": 11038 + }, + { + "epoch": 2.69, + "learning_rate": 5.117209295319931e-06, + "loss": 0.0108, + "step": 11040 + }, + { + "epoch": 2.69, + "learning_rate": 5.113766067216967e-06, + "loss": 0.0086, + "step": 11042 + }, + { + "epoch": 2.69, + "learning_rate": 5.110323599937985e-06, + "loss": 0.0156, + "step": 11044 + }, + { + "epoch": 2.69, + "learning_rate": 5.1068818940190065e-06, + "loss": 0.0117, + "step": 11046 + }, + { + "epoch": 2.69, + "learning_rate": 5.103440949995936e-06, + "loss": 0.014, + "step": 11048 + }, + { + "epoch": 2.69, + "learning_rate": 5.100000768404554e-06, + "loss": 0.0044, + "step": 11050 + }, + { + "epoch": 2.69, + "learning_rate": 5.096561349780518e-06, + "loss": 0.0054, + "step": 11052 + }, + { + "epoch": 2.69, + "learning_rate": 5.093122694659377e-06, + "loss": 0.0062, + "step": 11054 + }, + { + "epoch": 2.69, + "learning_rate": 5.089684803576551e-06, + "loss": 0.0118, + "step": 11056 + }, + { + "epoch": 2.69, + "learning_rate": 5.0862476770673516e-06, + "loss": 0.0041, + "step": 11058 + }, + { + "epoch": 2.69, + "learning_rate": 5.08281131566696e-06, + "loss": 0.0075, + "step": 11060 + }, + { + "epoch": 2.7, + "learning_rate": 5.079375719910446e-06, + "loss": 0.0083, + "step": 11062 + }, + { + "epoch": 2.7, + "learning_rate": 5.075940890332758e-06, + "loss": 0.0038, + "step": 11064 + }, + { + "epoch": 2.7, + "learning_rate": 5.072506827468731e-06, + "loss": 0.0097, + "step": 11066 + }, + { + "epoch": 2.7, + "learning_rate": 5.069073531853068e-06, + "loss": 0.0089, + "step": 11068 + }, + { + "epoch": 2.7, + "learning_rate": 5.065641004020359e-06, + "loss": 0.011, + "step": 11070 + }, + { + "epoch": 2.7, + "learning_rate": 5.062209244505076e-06, + "loss": 0.0109, + "step": 11072 + }, + { + "epoch": 2.7, + "learning_rate": 5.058778253841577e-06, + "loss": 0.0087, + "step": 11074 + }, + { + "epoch": 2.7, + "learning_rate": 5.055348032564081e-06, + "loss": 0.0095, + "step": 11076 + }, + { + "epoch": 2.7, + "learning_rate": 5.051918581206708e-06, + "loss": 0.0103, + "step": 11078 + }, + { + "epoch": 2.7, + "learning_rate": 5.0484899003034515e-06, + "loss": 0.0099, + "step": 11080 + }, + { + "epoch": 2.7, + "learning_rate": 5.0450619903881805e-06, + "loss": 0.0054, + "step": 11082 + }, + { + "epoch": 2.7, + "learning_rate": 5.041634851994642e-06, + "loss": 0.0048, + "step": 11084 + }, + { + "epoch": 2.7, + "learning_rate": 5.038208485656471e-06, + "loss": 0.0101, + "step": 11086 + }, + { + "epoch": 2.7, + "learning_rate": 5.034782891907182e-06, + "loss": 0.0079, + "step": 11088 + }, + { + "epoch": 2.7, + "learning_rate": 5.031358071280165e-06, + "loss": 0.0067, + "step": 11090 + }, + { + "epoch": 2.7, + "learning_rate": 5.02793402430869e-06, + "loss": 0.0061, + "step": 11092 + }, + { + "epoch": 2.7, + "learning_rate": 5.024510751525908e-06, + "loss": 0.0131, + "step": 11094 + }, + { + "epoch": 2.7, + "learning_rate": 5.0210882534648455e-06, + "loss": 0.0082, + "step": 11096 + }, + { + "epoch": 2.7, + "learning_rate": 5.017666530658416e-06, + "loss": 0.0057, + "step": 11098 + }, + { + "epoch": 2.7, + "learning_rate": 5.014245583639403e-06, + "loss": 0.0064, + "step": 11100 + }, + { + "epoch": 2.71, + "learning_rate": 5.010825412940477e-06, + "loss": 0.0124, + "step": 11102 + }, + { + "epoch": 2.71, + "learning_rate": 5.007406019094185e-06, + "loss": 0.0118, + "step": 11104 + }, + { + "epoch": 2.71, + "learning_rate": 5.003987402632955e-06, + "loss": 0.0091, + "step": 11106 + }, + { + "epoch": 2.71, + "learning_rate": 5.000569564089086e-06, + "loss": 0.0134, + "step": 11108 + }, + { + "epoch": 2.71, + "learning_rate": 4.997152503994769e-06, + "loss": 0.0163, + "step": 11110 + }, + { + "epoch": 2.71, + "learning_rate": 4.993736222882058e-06, + "loss": 0.0102, + "step": 11112 + }, + { + "epoch": 2.71, + "learning_rate": 4.990320721282902e-06, + "loss": 0.0057, + "step": 11114 + }, + { + "epoch": 2.71, + "learning_rate": 4.986905999729114e-06, + "loss": 0.0087, + "step": 11116 + }, + { + "epoch": 2.71, + "learning_rate": 4.9834920587523935e-06, + "loss": 0.004, + "step": 11118 + }, + { + "epoch": 2.71, + "learning_rate": 4.9800788988843205e-06, + "loss": 0.0056, + "step": 11120 + }, + { + "epoch": 2.71, + "learning_rate": 4.976666520656352e-06, + "loss": 0.0083, + "step": 11122 + }, + { + "epoch": 2.71, + "learning_rate": 4.973254924599818e-06, + "loss": 0.0104, + "step": 11124 + }, + { + "epoch": 2.71, + "learning_rate": 4.9698441112459266e-06, + "loss": 0.0136, + "step": 11126 + }, + { + "epoch": 2.71, + "learning_rate": 4.966434081125769e-06, + "loss": 0.0049, + "step": 11128 + }, + { + "epoch": 2.71, + "learning_rate": 4.96302483477032e-06, + "loss": 0.0073, + "step": 11130 + }, + { + "epoch": 2.71, + "learning_rate": 4.959616372710416e-06, + "loss": 0.0115, + "step": 11132 + }, + { + "epoch": 2.71, + "learning_rate": 4.956208695476786e-06, + "loss": 0.0112, + "step": 11134 + }, + { + "epoch": 2.71, + "learning_rate": 4.952801803600033e-06, + "loss": 0.0104, + "step": 11136 + }, + { + "epoch": 2.71, + "learning_rate": 4.949395697610634e-06, + "loss": 0.0085, + "step": 11138 + }, + { + "epoch": 2.71, + "learning_rate": 4.945990378038941e-06, + "loss": 0.013, + "step": 11140 + }, + { + "epoch": 2.71, + "learning_rate": 4.942585845415192e-06, + "loss": 0.0053, + "step": 11142 + }, + { + "epoch": 2.72, + "learning_rate": 4.9391821002694996e-06, + "loss": 0.0064, + "step": 11144 + }, + { + "epoch": 2.72, + "learning_rate": 4.935779143131856e-06, + "loss": 0.0063, + "step": 11146 + }, + { + "epoch": 2.72, + "learning_rate": 4.932376974532121e-06, + "loss": 0.0125, + "step": 11148 + }, + { + "epoch": 2.72, + "learning_rate": 4.928975595000044e-06, + "loss": 0.0064, + "step": 11150 + }, + { + "epoch": 2.72, + "learning_rate": 4.925575005065241e-06, + "loss": 0.0174, + "step": 11152 + }, + { + "epoch": 2.72, + "learning_rate": 4.922175205257215e-06, + "loss": 0.0052, + "step": 11154 + }, + { + "epoch": 2.72, + "learning_rate": 4.9187761961053335e-06, + "loss": 0.0023, + "step": 11156 + }, + { + "epoch": 2.72, + "learning_rate": 4.915377978138853e-06, + "loss": 0.0089, + "step": 11158 + }, + { + "epoch": 2.72, + "learning_rate": 4.911980551886902e-06, + "loss": 0.0051, + "step": 11160 + }, + { + "epoch": 2.72, + "learning_rate": 4.908583917878489e-06, + "loss": 0.01, + "step": 11162 + }, + { + "epoch": 2.72, + "learning_rate": 4.9051880766424885e-06, + "loss": 0.0129, + "step": 11164 + }, + { + "epoch": 2.72, + "learning_rate": 4.901793028707666e-06, + "loss": 0.0093, + "step": 11166 + }, + { + "epoch": 2.72, + "learning_rate": 4.898398774602651e-06, + "loss": 0.0065, + "step": 11168 + }, + { + "epoch": 2.72, + "learning_rate": 4.895005314855959e-06, + "loss": 0.0135, + "step": 11170 + }, + { + "epoch": 2.72, + "learning_rate": 4.891612649995973e-06, + "loss": 0.0138, + "step": 11172 + }, + { + "epoch": 2.72, + "learning_rate": 4.888220780550958e-06, + "loss": 0.0057, + "step": 11174 + }, + { + "epoch": 2.72, + "learning_rate": 4.884829707049057e-06, + "loss": 0.0112, + "step": 11176 + }, + { + "epoch": 2.72, + "learning_rate": 4.881439430018288e-06, + "loss": 0.0129, + "step": 11178 + }, + { + "epoch": 2.72, + "learning_rate": 4.878049949986539e-06, + "loss": 0.0106, + "step": 11180 + }, + { + "epoch": 2.72, + "learning_rate": 4.874661267481574e-06, + "loss": 0.007, + "step": 11182 + }, + { + "epoch": 2.73, + "learning_rate": 4.871273383031043e-06, + "loss": 0.005, + "step": 11184 + }, + { + "epoch": 2.73, + "learning_rate": 4.867886297162467e-06, + "loss": 0.0087, + "step": 11186 + }, + { + "epoch": 2.73, + "learning_rate": 4.864500010403234e-06, + "loss": 0.0104, + "step": 11188 + }, + { + "epoch": 2.73, + "learning_rate": 4.861114523280619e-06, + "loss": 0.0109, + "step": 11190 + }, + { + "epoch": 2.73, + "learning_rate": 4.857729836321772e-06, + "loss": 0.0042, + "step": 11192 + }, + { + "epoch": 2.73, + "learning_rate": 4.854345950053711e-06, + "loss": 0.0053, + "step": 11194 + }, + { + "epoch": 2.73, + "learning_rate": 4.850962865003327e-06, + "loss": 0.012, + "step": 11196 + }, + { + "epoch": 2.73, + "learning_rate": 4.8475805816974e-06, + "loss": 0.0092, + "step": 11198 + }, + { + "epoch": 2.73, + "learning_rate": 4.844199100662578e-06, + "loss": 0.0084, + "step": 11200 + }, + { + "epoch": 2.73, + "learning_rate": 4.840818422425376e-06, + "loss": 0.0074, + "step": 11202 + }, + { + "epoch": 2.73, + "learning_rate": 4.837438547512197e-06, + "loss": 0.0122, + "step": 11204 + }, + { + "epoch": 2.73, + "learning_rate": 4.834059476449312e-06, + "loss": 0.0114, + "step": 11206 + }, + { + "epoch": 2.73, + "learning_rate": 4.830681209762873e-06, + "loss": 0.0086, + "step": 11208 + }, + { + "epoch": 2.73, + "learning_rate": 4.827303747978898e-06, + "loss": 0.0051, + "step": 11210 + }, + { + "epoch": 2.73, + "learning_rate": 4.823927091623278e-06, + "loss": 0.0142, + "step": 11212 + }, + { + "epoch": 2.73, + "learning_rate": 4.820551241221791e-06, + "loss": 0.0071, + "step": 11214 + }, + { + "epoch": 2.73, + "learning_rate": 4.8171761973000845e-06, + "loss": 0.0087, + "step": 11216 + }, + { + "epoch": 2.73, + "learning_rate": 4.813801960383672e-06, + "loss": 0.0064, + "step": 11218 + }, + { + "epoch": 2.73, + "learning_rate": 4.81042853099795e-06, + "loss": 0.0104, + "step": 11220 + }, + { + "epoch": 2.73, + "learning_rate": 4.807055909668193e-06, + "loss": 0.0082, + "step": 11222 + }, + { + "epoch": 2.73, + "learning_rate": 4.80368409691954e-06, + "loss": 0.0087, + "step": 11224 + }, + { + "epoch": 2.74, + "learning_rate": 4.800313093277002e-06, + "loss": 0.0078, + "step": 11226 + }, + { + "epoch": 2.74, + "learning_rate": 4.7969428992654775e-06, + "loss": 0.0045, + "step": 11228 + }, + { + "epoch": 2.74, + "learning_rate": 4.793573515409729e-06, + "loss": 0.0042, + "step": 11230 + }, + { + "epoch": 2.74, + "learning_rate": 4.7902049422344e-06, + "loss": 0.0071, + "step": 11232 + }, + { + "epoch": 2.74, + "learning_rate": 4.786837180263994e-06, + "loss": 0.0095, + "step": 11234 + }, + { + "epoch": 2.74, + "learning_rate": 4.783470230022908e-06, + "loss": 0.0131, + "step": 11236 + }, + { + "epoch": 2.74, + "learning_rate": 4.780104092035392e-06, + "loss": 0.0091, + "step": 11238 + }, + { + "epoch": 2.74, + "learning_rate": 4.776738766825585e-06, + "loss": 0.0087, + "step": 11240 + }, + { + "epoch": 2.74, + "learning_rate": 4.773374254917491e-06, + "loss": 0.012, + "step": 11242 + }, + { + "epoch": 2.74, + "learning_rate": 4.7700105568349905e-06, + "loss": 0.0114, + "step": 11244 + }, + { + "epoch": 2.74, + "learning_rate": 4.766647673101839e-06, + "loss": 0.0095, + "step": 11246 + }, + { + "epoch": 2.74, + "learning_rate": 4.763285604241665e-06, + "loss": 0.0039, + "step": 11248 + }, + { + "epoch": 2.74, + "learning_rate": 4.759924350777968e-06, + "loss": 0.0097, + "step": 11250 + }, + { + "epoch": 2.74, + "learning_rate": 4.756563913234113e-06, + "loss": 0.0119, + "step": 11252 + }, + { + "epoch": 2.74, + "learning_rate": 4.753204292133352e-06, + "loss": 0.0145, + "step": 11254 + }, + { + "epoch": 2.74, + "learning_rate": 4.749845487998806e-06, + "loss": 0.0067, + "step": 11256 + }, + { + "epoch": 2.74, + "learning_rate": 4.7464875013534614e-06, + "loss": 0.0075, + "step": 11258 + }, + { + "epoch": 2.74, + "learning_rate": 4.743130332720184e-06, + "loss": 0.0043, + "step": 11260 + }, + { + "epoch": 2.74, + "learning_rate": 4.739773982621712e-06, + "loss": 0.0077, + "step": 11262 + }, + { + "epoch": 2.74, + "learning_rate": 4.736418451580656e-06, + "loss": 0.0097, + "step": 11264 + }, + { + "epoch": 2.75, + "learning_rate": 4.733063740119497e-06, + "loss": 0.0044, + "step": 11266 + }, + { + "epoch": 2.75, + "learning_rate": 4.729709848760584e-06, + "loss": 0.0041, + "step": 11268 + }, + { + "epoch": 2.75, + "learning_rate": 4.726356778026148e-06, + "loss": 0.0147, + "step": 11270 + }, + { + "epoch": 2.75, + "learning_rate": 4.723004528438291e-06, + "loss": 0.0047, + "step": 11272 + }, + { + "epoch": 2.75, + "learning_rate": 4.719653100518976e-06, + "loss": 0.0129, + "step": 11274 + }, + { + "epoch": 2.75, + "learning_rate": 4.7163024947900505e-06, + "loss": 0.0111, + "step": 11276 + }, + { + "epoch": 2.75, + "learning_rate": 4.712952711773233e-06, + "loss": 0.0086, + "step": 11278 + }, + { + "epoch": 2.75, + "learning_rate": 4.7096037519901065e-06, + "loss": 0.0077, + "step": 11280 + }, + { + "epoch": 2.75, + "learning_rate": 4.706255615962127e-06, + "loss": 0.0088, + "step": 11282 + }, + { + "epoch": 2.75, + "learning_rate": 4.702908304210625e-06, + "loss": 0.0122, + "step": 11284 + }, + { + "epoch": 2.75, + "learning_rate": 4.6995618172568066e-06, + "loss": 0.011, + "step": 11286 + }, + { + "epoch": 2.75, + "learning_rate": 4.696216155621748e-06, + "loss": 0.0049, + "step": 11288 + }, + { + "epoch": 2.75, + "learning_rate": 4.692871319826385e-06, + "loss": 0.0045, + "step": 11290 + }, + { + "epoch": 2.75, + "learning_rate": 4.689527310391544e-06, + "loss": 0.0065, + "step": 11292 + }, + { + "epoch": 2.75, + "learning_rate": 4.686184127837905e-06, + "loss": 0.0143, + "step": 11294 + }, + { + "epoch": 2.75, + "learning_rate": 4.682841772686033e-06, + "loss": 0.0078, + "step": 11296 + }, + { + "epoch": 2.75, + "learning_rate": 4.679500245456352e-06, + "loss": 0.0071, + "step": 11298 + }, + { + "epoch": 2.75, + "learning_rate": 4.676159546669167e-06, + "loss": 0.0039, + "step": 11300 + }, + { + "epoch": 2.75, + "learning_rate": 4.672819676844649e-06, + "loss": 0.0079, + "step": 11302 + }, + { + "epoch": 2.75, + "learning_rate": 4.669480636502847e-06, + "loss": 0.0052, + "step": 11304 + }, + { + "epoch": 2.75, + "learning_rate": 4.666142426163667e-06, + "loss": 0.0088, + "step": 11306 + }, + { + "epoch": 2.76, + "learning_rate": 4.662805046346901e-06, + "loss": 0.0042, + "step": 11308 + }, + { + "epoch": 2.76, + "learning_rate": 4.6594684975721974e-06, + "loss": 0.0102, + "step": 11310 + }, + { + "epoch": 2.76, + "learning_rate": 4.656132780359089e-06, + "loss": 0.0039, + "step": 11312 + }, + { + "epoch": 2.76, + "learning_rate": 4.652797895226966e-06, + "loss": 0.0132, + "step": 11314 + }, + { + "epoch": 2.76, + "learning_rate": 4.649463842695099e-06, + "loss": 0.0032, + "step": 11316 + }, + { + "epoch": 2.76, + "learning_rate": 4.646130623282625e-06, + "loss": 0.0061, + "step": 11318 + }, + { + "epoch": 2.76, + "learning_rate": 4.642798237508555e-06, + "loss": 0.0055, + "step": 11320 + }, + { + "epoch": 2.76, + "learning_rate": 4.639466685891766e-06, + "loss": 0.0071, + "step": 11322 + }, + { + "epoch": 2.76, + "learning_rate": 4.636135968951e-06, + "loss": 0.0126, + "step": 11324 + }, + { + "epoch": 2.76, + "learning_rate": 4.632806087204878e-06, + "loss": 0.0097, + "step": 11326 + }, + { + "epoch": 2.76, + "learning_rate": 4.629477041171895e-06, + "loss": 0.0104, + "step": 11328 + }, + { + "epoch": 2.76, + "learning_rate": 4.6261488313703985e-06, + "loss": 0.0064, + "step": 11330 + }, + { + "epoch": 2.76, + "learning_rate": 4.6228214583186205e-06, + "loss": 0.0154, + "step": 11332 + }, + { + "epoch": 2.76, + "learning_rate": 4.619494922534663e-06, + "loss": 0.0102, + "step": 11334 + }, + { + "epoch": 2.76, + "learning_rate": 4.616169224536489e-06, + "loss": 0.013, + "step": 11336 + }, + { + "epoch": 2.76, + "learning_rate": 4.612844364841931e-06, + "loss": 0.0042, + "step": 11338 + }, + { + "epoch": 2.76, + "learning_rate": 4.6095203439686985e-06, + "loss": 0.0059, + "step": 11340 + }, + { + "epoch": 2.76, + "learning_rate": 4.606197162434368e-06, + "loss": 0.0044, + "step": 11342 + }, + { + "epoch": 2.76, + "learning_rate": 4.602874820756388e-06, + "loss": 0.007, + "step": 11344 + }, + { + "epoch": 2.76, + "learning_rate": 4.599553319452065e-06, + "loss": 0.0096, + "step": 11346 + }, + { + "epoch": 2.77, + "learning_rate": 4.596232659038588e-06, + "loss": 0.0086, + "step": 11348 + }, + { + "epoch": 2.77, + "learning_rate": 4.5929128400330035e-06, + "loss": 0.0071, + "step": 11350 + }, + { + "epoch": 2.77, + "learning_rate": 4.5895938629522396e-06, + "loss": 0.0156, + "step": 11352 + }, + { + "epoch": 2.77, + "learning_rate": 4.58627572831308e-06, + "loss": 0.0077, + "step": 11354 + }, + { + "epoch": 2.77, + "learning_rate": 4.582958436632185e-06, + "loss": 0.0052, + "step": 11356 + }, + { + "epoch": 2.77, + "learning_rate": 4.579641988426084e-06, + "loss": 0.008, + "step": 11358 + }, + { + "epoch": 2.77, + "learning_rate": 4.576326384211177e-06, + "loss": 0.006, + "step": 11360 + }, + { + "epoch": 2.77, + "learning_rate": 4.573011624503721e-06, + "loss": 0.0058, + "step": 11362 + }, + { + "epoch": 2.77, + "learning_rate": 4.569697709819857e-06, + "loss": 0.0174, + "step": 11364 + }, + { + "epoch": 2.77, + "learning_rate": 4.566384640675579e-06, + "loss": 0.0012, + "step": 11366 + }, + { + "epoch": 2.77, + "learning_rate": 4.563072417586767e-06, + "loss": 0.0041, + "step": 11368 + }, + { + "epoch": 2.77, + "learning_rate": 4.559761041069149e-06, + "loss": 0.0123, + "step": 11370 + }, + { + "epoch": 2.77, + "learning_rate": 4.556450511638336e-06, + "loss": 0.0089, + "step": 11372 + }, + { + "epoch": 2.77, + "learning_rate": 4.553140829809804e-06, + "loss": 0.0056, + "step": 11374 + }, + { + "epoch": 2.77, + "learning_rate": 4.5498319960988975e-06, + "loss": 0.0022, + "step": 11376 + }, + { + "epoch": 2.77, + "learning_rate": 4.546524011020826e-06, + "loss": 0.0062, + "step": 11378 + }, + { + "epoch": 2.77, + "learning_rate": 4.543216875090663e-06, + "loss": 0.0078, + "step": 11380 + }, + { + "epoch": 2.77, + "learning_rate": 4.539910588823359e-06, + "loss": 0.0044, + "step": 11382 + }, + { + "epoch": 2.77, + "learning_rate": 4.53660515273373e-06, + "loss": 0.0097, + "step": 11384 + }, + { + "epoch": 2.77, + "learning_rate": 4.533300567336454e-06, + "loss": 0.0136, + "step": 11386 + }, + { + "epoch": 2.77, + "learning_rate": 4.52999683314608e-06, + "loss": 0.0097, + "step": 11388 + }, + { + "epoch": 2.78, + "learning_rate": 4.5266939506770305e-06, + "loss": 0.0061, + "step": 11390 + }, + { + "epoch": 2.78, + "learning_rate": 4.523391920443584e-06, + "loss": 0.0036, + "step": 11392 + }, + { + "epoch": 2.78, + "learning_rate": 4.5200907429598906e-06, + "loss": 0.0095, + "step": 11394 + }, + { + "epoch": 2.78, + "learning_rate": 4.516790418739972e-06, + "loss": 0.0091, + "step": 11396 + }, + { + "epoch": 2.78, + "learning_rate": 4.513490948297713e-06, + "loss": 0.0052, + "step": 11398 + }, + { + "epoch": 2.78, + "learning_rate": 4.51019233214687e-06, + "loss": 0.0093, + "step": 11400 + }, + { + "epoch": 2.78, + "learning_rate": 4.5068945708010556e-06, + "loss": 0.0086, + "step": 11402 + }, + { + "epoch": 2.78, + "learning_rate": 4.503597664773761e-06, + "loss": 0.0096, + "step": 11404 + }, + { + "epoch": 2.78, + "learning_rate": 4.500301614578343e-06, + "loss": 0.0039, + "step": 11406 + }, + { + "epoch": 2.78, + "learning_rate": 4.4970064207280175e-06, + "loss": 0.0068, + "step": 11408 + }, + { + "epoch": 2.78, + "learning_rate": 4.493712083735868e-06, + "loss": 0.0082, + "step": 11410 + }, + { + "epoch": 2.78, + "learning_rate": 4.490418604114852e-06, + "loss": 0.0083, + "step": 11412 + }, + { + "epoch": 2.78, + "learning_rate": 4.487125982377789e-06, + "loss": 0.0079, + "step": 11414 + }, + { + "epoch": 2.78, + "learning_rate": 4.483834219037369e-06, + "loss": 0.0095, + "step": 11416 + }, + { + "epoch": 2.78, + "learning_rate": 4.480543314606138e-06, + "loss": 0.0043, + "step": 11418 + }, + { + "epoch": 2.78, + "learning_rate": 4.477253269596522e-06, + "loss": 0.0054, + "step": 11420 + }, + { + "epoch": 2.78, + "learning_rate": 4.473964084520799e-06, + "loss": 0.0028, + "step": 11422 + }, + { + "epoch": 2.78, + "learning_rate": 4.470675759891126e-06, + "loss": 0.006, + "step": 11424 + }, + { + "epoch": 2.78, + "learning_rate": 4.467388296219516e-06, + "loss": 0.0034, + "step": 11426 + }, + { + "epoch": 2.78, + "learning_rate": 4.4641016940178535e-06, + "loss": 0.0079, + "step": 11428 + }, + { + "epoch": 2.79, + "learning_rate": 4.460815953797889e-06, + "loss": 0.0072, + "step": 11430 + }, + { + "epoch": 2.79, + "learning_rate": 4.4575310760712395e-06, + "loss": 0.0098, + "step": 11432 + }, + { + "epoch": 2.79, + "learning_rate": 4.4542470613493824e-06, + "loss": 0.0097, + "step": 11434 + }, + { + "epoch": 2.79, + "learning_rate": 4.450963910143662e-06, + "loss": 0.0127, + "step": 11436 + }, + { + "epoch": 2.79, + "learning_rate": 4.447681622965292e-06, + "loss": 0.0068, + "step": 11438 + }, + { + "epoch": 2.79, + "learning_rate": 4.444400200325353e-06, + "loss": 0.0026, + "step": 11440 + }, + { + "epoch": 2.79, + "learning_rate": 4.441119642734781e-06, + "loss": 0.0093, + "step": 11442 + }, + { + "epoch": 2.79, + "learning_rate": 4.437839950704388e-06, + "loss": 0.0104, + "step": 11444 + }, + { + "epoch": 2.79, + "learning_rate": 4.434561124744849e-06, + "loss": 0.0123, + "step": 11446 + }, + { + "epoch": 2.79, + "learning_rate": 4.431283165366701e-06, + "loss": 0.0111, + "step": 11448 + }, + { + "epoch": 2.79, + "learning_rate": 4.428006073080342e-06, + "loss": 0.0109, + "step": 11450 + }, + { + "epoch": 2.79, + "learning_rate": 4.424729848396045e-06, + "loss": 0.0086, + "step": 11452 + }, + { + "epoch": 2.79, + "learning_rate": 4.421454491823942e-06, + "loss": 0.0134, + "step": 11454 + }, + { + "epoch": 2.79, + "learning_rate": 4.418180003874036e-06, + "loss": 0.003, + "step": 11456 + }, + { + "epoch": 2.79, + "learning_rate": 4.4149063850561825e-06, + "loss": 0.0056, + "step": 11458 + }, + { + "epoch": 2.79, + "learning_rate": 4.411633635880112e-06, + "loss": 0.0057, + "step": 11460 + }, + { + "epoch": 2.79, + "learning_rate": 4.408361756855419e-06, + "loss": 0.0064, + "step": 11462 + }, + { + "epoch": 2.79, + "learning_rate": 4.405090748491558e-06, + "loss": 0.0122, + "step": 11464 + }, + { + "epoch": 2.79, + "learning_rate": 4.4018206112978475e-06, + "loss": 0.0118, + "step": 11466 + }, + { + "epoch": 2.79, + "learning_rate": 4.398551345783474e-06, + "loss": 0.0109, + "step": 11468 + }, + { + "epoch": 2.79, + "learning_rate": 4.395282952457489e-06, + "loss": 0.0077, + "step": 11470 + }, + { + "epoch": 2.8, + "learning_rate": 4.392015431828809e-06, + "loss": 0.0095, + "step": 11472 + }, + { + "epoch": 2.8, + "learning_rate": 4.388748784406205e-06, + "loss": 0.004, + "step": 11474 + }, + { + "epoch": 2.8, + "learning_rate": 4.385483010698326e-06, + "loss": 0.006, + "step": 11476 + }, + { + "epoch": 2.8, + "learning_rate": 4.382218111213671e-06, + "loss": 0.0066, + "step": 11478 + }, + { + "epoch": 2.8, + "learning_rate": 4.378954086460616e-06, + "loss": 0.0054, + "step": 11480 + }, + { + "epoch": 2.8, + "learning_rate": 4.37569093694739e-06, + "loss": 0.0054, + "step": 11482 + }, + { + "epoch": 2.8, + "learning_rate": 4.372428663182091e-06, + "loss": 0.0091, + "step": 11484 + }, + { + "epoch": 2.8, + "learning_rate": 4.369167265672681e-06, + "loss": 0.0098, + "step": 11486 + }, + { + "epoch": 2.8, + "learning_rate": 4.36590674492699e-06, + "loss": 0.0049, + "step": 11488 + }, + { + "epoch": 2.8, + "learning_rate": 4.362647101452699e-06, + "loss": 0.0075, + "step": 11490 + }, + { + "epoch": 2.8, + "learning_rate": 4.359388335757358e-06, + "loss": 0.0094, + "step": 11492 + }, + { + "epoch": 2.8, + "learning_rate": 4.3561304483483855e-06, + "loss": 0.0071, + "step": 11494 + }, + { + "epoch": 2.8, + "learning_rate": 4.352873439733063e-06, + "loss": 0.0049, + "step": 11496 + }, + { + "epoch": 2.8, + "learning_rate": 4.349617310418523e-06, + "loss": 0.0124, + "step": 11498 + }, + { + "epoch": 2.8, + "learning_rate": 4.346362060911774e-06, + "loss": 0.0144, + "step": 11500 + }, + { + "epoch": 2.8, + "learning_rate": 4.343107691719688e-06, + "loss": 0.0186, + "step": 11502 + }, + { + "epoch": 2.8, + "learning_rate": 4.339854203348987e-06, + "loss": 0.0121, + "step": 11504 + }, + { + "epoch": 2.8, + "learning_rate": 4.3366015963062714e-06, + "loss": 0.0165, + "step": 11506 + }, + { + "epoch": 2.8, + "learning_rate": 4.333349871097988e-06, + "loss": 0.0066, + "step": 11508 + }, + { + "epoch": 2.8, + "learning_rate": 4.330099028230462e-06, + "loss": 0.0093, + "step": 11510 + }, + { + "epoch": 2.81, + "learning_rate": 4.326849068209877e-06, + "loss": 0.0078, + "step": 11512 + }, + { + "epoch": 2.81, + "learning_rate": 4.323599991542269e-06, + "loss": 0.0138, + "step": 11514 + }, + { + "epoch": 2.81, + "learning_rate": 4.320351798733547e-06, + "loss": 0.0073, + "step": 11516 + }, + { + "epoch": 2.81, + "learning_rate": 4.317104490289484e-06, + "loss": 0.0069, + "step": 11518 + }, + { + "epoch": 2.81, + "learning_rate": 4.313858066715707e-06, + "loss": 0.007, + "step": 11520 + }, + { + "epoch": 2.81, + "learning_rate": 4.310612528517706e-06, + "loss": 0.012, + "step": 11522 + }, + { + "epoch": 2.81, + "learning_rate": 4.307367876200839e-06, + "loss": 0.0055, + "step": 11524 + }, + { + "epoch": 2.81, + "learning_rate": 4.3041241102703225e-06, + "loss": 0.0059, + "step": 11526 + }, + { + "epoch": 2.81, + "learning_rate": 4.30088123123124e-06, + "loss": 0.0056, + "step": 11528 + }, + { + "epoch": 2.81, + "learning_rate": 4.297639239588526e-06, + "loss": 0.0045, + "step": 11530 + }, + { + "epoch": 2.81, + "learning_rate": 4.2943981358469885e-06, + "loss": 0.0111, + "step": 11532 + }, + { + "epoch": 2.81, + "learning_rate": 4.291157920511289e-06, + "loss": 0.0085, + "step": 11534 + }, + { + "epoch": 2.81, + "learning_rate": 4.287918594085957e-06, + "loss": 0.007, + "step": 11536 + }, + { + "epoch": 2.81, + "learning_rate": 4.284680157075374e-06, + "loss": 0.0066, + "step": 11538 + }, + { + "epoch": 2.81, + "learning_rate": 4.281442609983793e-06, + "loss": 0.0082, + "step": 11540 + }, + { + "epoch": 2.81, + "learning_rate": 4.278205953315327e-06, + "loss": 0.0046, + "step": 11542 + }, + { + "epoch": 2.81, + "learning_rate": 4.2749701875739505e-06, + "loss": 0.0043, + "step": 11544 + }, + { + "epoch": 2.81, + "learning_rate": 4.271735313263493e-06, + "loss": 0.0065, + "step": 11546 + }, + { + "epoch": 2.81, + "learning_rate": 4.268501330887644e-06, + "loss": 0.0097, + "step": 11548 + }, + { + "epoch": 2.81, + "learning_rate": 4.2652682409499666e-06, + "loss": 0.006, + "step": 11550 + }, + { + "epoch": 2.81, + "learning_rate": 4.262036043953878e-06, + "loss": 0.0099, + "step": 11552 + }, + { + "epoch": 2.82, + "learning_rate": 4.25880474040265e-06, + "loss": 0.0189, + "step": 11554 + }, + { + "epoch": 2.82, + "learning_rate": 4.255574330799426e-06, + "loss": 0.0045, + "step": 11556 + }, + { + "epoch": 2.82, + "learning_rate": 4.252344815647202e-06, + "loss": 0.0038, + "step": 11558 + }, + { + "epoch": 2.82, + "learning_rate": 4.249116195448845e-06, + "loss": 0.0036, + "step": 11560 + }, + { + "epoch": 2.82, + "learning_rate": 4.245888470707074e-06, + "loss": 0.0075, + "step": 11562 + }, + { + "epoch": 2.82, + "learning_rate": 4.242661641924461e-06, + "loss": 0.0086, + "step": 11564 + }, + { + "epoch": 2.82, + "learning_rate": 4.239435709603455e-06, + "loss": 0.0082, + "step": 11566 + }, + { + "epoch": 2.82, + "learning_rate": 4.2362106742463635e-06, + "loss": 0.0134, + "step": 11568 + }, + { + "epoch": 2.82, + "learning_rate": 4.232986536355339e-06, + "loss": 0.0052, + "step": 11570 + }, + { + "epoch": 2.82, + "learning_rate": 4.229763296432409e-06, + "loss": 0.004, + "step": 11572 + }, + { + "epoch": 2.82, + "learning_rate": 4.226540954979461e-06, + "loss": 0.0085, + "step": 11574 + }, + { + "epoch": 2.82, + "learning_rate": 4.223319512498233e-06, + "loss": 0.0068, + "step": 11576 + }, + { + "epoch": 2.82, + "learning_rate": 4.220098969490326e-06, + "loss": 0.0092, + "step": 11578 + }, + { + "epoch": 2.82, + "learning_rate": 4.216879326457206e-06, + "loss": 0.0054, + "step": 11580 + }, + { + "epoch": 2.82, + "learning_rate": 4.213660583900198e-06, + "loss": 0.0015, + "step": 11582 + }, + { + "epoch": 2.82, + "learning_rate": 4.210442742320485e-06, + "loss": 0.0137, + "step": 11584 + }, + { + "epoch": 2.82, + "learning_rate": 4.207225802219105e-06, + "loss": 0.0078, + "step": 11586 + }, + { + "epoch": 2.82, + "learning_rate": 4.204009764096966e-06, + "loss": 0.0043, + "step": 11588 + }, + { + "epoch": 2.82, + "learning_rate": 4.200794628454823e-06, + "loss": 0.0084, + "step": 11590 + }, + { + "epoch": 2.82, + "learning_rate": 4.197580395793305e-06, + "loss": 0.0045, + "step": 11592 + }, + { + "epoch": 2.83, + "learning_rate": 4.194367066612884e-06, + "loss": 0.0051, + "step": 11594 + }, + { + "epoch": 2.83, + "learning_rate": 4.191154641413905e-06, + "loss": 0.0075, + "step": 11596 + }, + { + "epoch": 2.83, + "learning_rate": 4.187943120696567e-06, + "loss": 0.0052, + "step": 11598 + }, + { + "epoch": 2.83, + "learning_rate": 4.184732504960931e-06, + "loss": 0.0052, + "step": 11600 + }, + { + "epoch": 2.83, + "learning_rate": 4.18152279470691e-06, + "loss": 0.0042, + "step": 11602 + }, + { + "epoch": 2.83, + "learning_rate": 4.178313990434281e-06, + "loss": 0.0059, + "step": 11604 + }, + { + "epoch": 2.83, + "learning_rate": 4.1751060926426775e-06, + "loss": 0.0061, + "step": 11606 + }, + { + "epoch": 2.83, + "learning_rate": 4.1718991018316015e-06, + "loss": 0.0072, + "step": 11608 + }, + { + "epoch": 2.83, + "learning_rate": 4.168693018500396e-06, + "loss": 0.0083, + "step": 11610 + }, + { + "epoch": 2.83, + "learning_rate": 4.1654878431482784e-06, + "loss": 0.009, + "step": 11612 + }, + { + "epoch": 2.83, + "learning_rate": 4.162283576274317e-06, + "loss": 0.0079, + "step": 11614 + }, + { + "epoch": 2.83, + "learning_rate": 4.159080218377447e-06, + "loss": 0.0076, + "step": 11616 + }, + { + "epoch": 2.83, + "learning_rate": 4.15587776995645e-06, + "loss": 0.0058, + "step": 11618 + }, + { + "epoch": 2.83, + "learning_rate": 4.152676231509968e-06, + "loss": 0.0074, + "step": 11620 + }, + { + "epoch": 2.83, + "learning_rate": 4.149475603536509e-06, + "loss": 0.0151, + "step": 11622 + }, + { + "epoch": 2.83, + "learning_rate": 4.14627588653444e-06, + "loss": 0.0109, + "step": 11624 + }, + { + "epoch": 2.83, + "learning_rate": 4.143077081001973e-06, + "loss": 0.0078, + "step": 11626 + }, + { + "epoch": 2.83, + "learning_rate": 4.13987918743719e-06, + "loss": 0.0077, + "step": 11628 + }, + { + "epoch": 2.83, + "learning_rate": 4.136682206338031e-06, + "loss": 0.0066, + "step": 11630 + }, + { + "epoch": 2.83, + "learning_rate": 4.133486138202288e-06, + "loss": 0.0096, + "step": 11632 + }, + { + "epoch": 2.83, + "learning_rate": 4.1302909835276084e-06, + "loss": 0.007, + "step": 11634 + }, + { + "epoch": 2.84, + "learning_rate": 4.127096742811506e-06, + "loss": 0.0073, + "step": 11636 + }, + { + "epoch": 2.84, + "learning_rate": 4.12390341655135e-06, + "loss": 0.0064, + "step": 11638 + }, + { + "epoch": 2.84, + "learning_rate": 4.1207110052443675e-06, + "loss": 0.0103, + "step": 11640 + }, + { + "epoch": 2.84, + "learning_rate": 4.117519509387634e-06, + "loss": 0.0043, + "step": 11642 + }, + { + "epoch": 2.84, + "learning_rate": 4.114328929478098e-06, + "loss": 0.0103, + "step": 11644 + }, + { + "epoch": 2.84, + "learning_rate": 4.111139266012551e-06, + "loss": 0.0079, + "step": 11646 + }, + { + "epoch": 2.84, + "learning_rate": 4.107950519487653e-06, + "loss": 0.0045, + "step": 11648 + }, + { + "epoch": 2.84, + "learning_rate": 4.1047626903999106e-06, + "loss": 0.0066, + "step": 11650 + }, + { + "epoch": 2.84, + "learning_rate": 4.101575779245696e-06, + "loss": 0.0126, + "step": 11652 + }, + { + "epoch": 2.84, + "learning_rate": 4.098389786521234e-06, + "loss": 0.0107, + "step": 11654 + }, + { + "epoch": 2.84, + "learning_rate": 4.095204712722614e-06, + "loss": 0.0064, + "step": 11656 + }, + { + "epoch": 2.84, + "learning_rate": 4.092020558345771e-06, + "loss": 0.0102, + "step": 11658 + }, + { + "epoch": 2.84, + "learning_rate": 4.088837323886504e-06, + "loss": 0.008, + "step": 11660 + }, + { + "epoch": 2.84, + "learning_rate": 4.0856550098404645e-06, + "loss": 0.006, + "step": 11662 + }, + { + "epoch": 2.84, + "learning_rate": 4.082473616703167e-06, + "loss": 0.0055, + "step": 11664 + }, + { + "epoch": 2.84, + "learning_rate": 4.0792931449699745e-06, + "loss": 0.0074, + "step": 11666 + }, + { + "epoch": 2.84, + "learning_rate": 4.076113595136113e-06, + "loss": 0.0069, + "step": 11668 + }, + { + "epoch": 2.84, + "learning_rate": 4.0729349676966625e-06, + "loss": 0.0075, + "step": 11670 + }, + { + "epoch": 2.84, + "learning_rate": 4.069757263146562e-06, + "loss": 0.0077, + "step": 11672 + }, + { + "epoch": 2.84, + "learning_rate": 4.066580481980603e-06, + "loss": 0.0055, + "step": 11674 + }, + { + "epoch": 2.85, + "learning_rate": 4.06340462469343e-06, + "loss": 0.0113, + "step": 11676 + }, + { + "epoch": 2.85, + "learning_rate": 4.060229691779552e-06, + "loss": 0.0084, + "step": 11678 + }, + { + "epoch": 2.85, + "learning_rate": 4.057055683733334e-06, + "loss": 0.007, + "step": 11680 + }, + { + "epoch": 2.85, + "learning_rate": 4.053882601048987e-06, + "loss": 0.0068, + "step": 11682 + }, + { + "epoch": 2.85, + "learning_rate": 4.050710444220585e-06, + "loss": 0.0025, + "step": 11684 + }, + { + "epoch": 2.85, + "learning_rate": 4.047539213742064e-06, + "loss": 0.012, + "step": 11686 + }, + { + "epoch": 2.85, + "learning_rate": 4.044368910107204e-06, + "loss": 0.0068, + "step": 11688 + }, + { + "epoch": 2.85, + "learning_rate": 4.041199533809641e-06, + "loss": 0.0075, + "step": 11690 + }, + { + "epoch": 2.85, + "learning_rate": 4.038031085342875e-06, + "loss": 0.009, + "step": 11692 + }, + { + "epoch": 2.85, + "learning_rate": 4.034863565200259e-06, + "loss": 0.0105, + "step": 11694 + }, + { + "epoch": 2.85, + "learning_rate": 4.031696973875003e-06, + "loss": 0.0142, + "step": 11696 + }, + { + "epoch": 2.85, + "learning_rate": 4.028531311860161e-06, + "loss": 0.0038, + "step": 11698 + }, + { + "epoch": 2.85, + "learning_rate": 4.02536657964866e-06, + "loss": 0.0039, + "step": 11700 + }, + { + "epoch": 2.85, + "learning_rate": 4.022202777733264e-06, + "loss": 0.0039, + "step": 11702 + }, + { + "epoch": 2.85, + "learning_rate": 4.01903990660661e-06, + "loss": 0.0032, + "step": 11704 + }, + { + "epoch": 2.85, + "learning_rate": 4.015877966761173e-06, + "loss": 0.0074, + "step": 11706 + }, + { + "epoch": 2.85, + "learning_rate": 4.0127169586892955e-06, + "loss": 0.0067, + "step": 11708 + }, + { + "epoch": 2.85, + "learning_rate": 4.00955688288317e-06, + "loss": 0.0041, + "step": 11710 + }, + { + "epoch": 2.85, + "learning_rate": 4.006397739834848e-06, + "loss": 0.0051, + "step": 11712 + }, + { + "epoch": 2.85, + "learning_rate": 4.003239530036226e-06, + "loss": 0.007, + "step": 11714 + }, + { + "epoch": 2.85, + "learning_rate": 4.0000822539790675e-06, + "loss": 0.0054, + "step": 11716 + }, + { + "epoch": 2.86, + "learning_rate": 3.9969259121549805e-06, + "loss": 0.0051, + "step": 11718 + }, + { + "epoch": 2.86, + "learning_rate": 3.99377050505543e-06, + "loss": 0.0128, + "step": 11720 + }, + { + "epoch": 2.86, + "learning_rate": 3.990616033171738e-06, + "loss": 0.0083, + "step": 11722 + }, + { + "epoch": 2.86, + "learning_rate": 3.987462496995082e-06, + "loss": 0.0076, + "step": 11724 + }, + { + "epoch": 2.86, + "learning_rate": 3.984309897016495e-06, + "loss": 0.0052, + "step": 11726 + }, + { + "epoch": 2.86, + "learning_rate": 3.981158233726854e-06, + "loss": 0.0057, + "step": 11728 + }, + { + "epoch": 2.86, + "learning_rate": 3.978007507616903e-06, + "loss": 0.0045, + "step": 11730 + }, + { + "epoch": 2.86, + "learning_rate": 3.974857719177227e-06, + "loss": 0.0074, + "step": 11732 + }, + { + "epoch": 2.86, + "learning_rate": 3.971708868898279e-06, + "loss": 0.0034, + "step": 11734 + }, + { + "epoch": 2.86, + "learning_rate": 3.9685609572703544e-06, + "loss": 0.004, + "step": 11736 + }, + { + "epoch": 2.86, + "learning_rate": 3.965413984783609e-06, + "loss": 0.0062, + "step": 11738 + }, + { + "epoch": 2.86, + "learning_rate": 3.9622679519280504e-06, + "loss": 0.0044, + "step": 11740 + }, + { + "epoch": 2.86, + "learning_rate": 3.959122859193543e-06, + "loss": 0.0083, + "step": 11742 + }, + { + "epoch": 2.86, + "learning_rate": 3.9559787070698e-06, + "loss": 0.006, + "step": 11744 + }, + { + "epoch": 2.86, + "learning_rate": 3.952835496046383e-06, + "loss": 0.0018, + "step": 11746 + }, + { + "epoch": 2.86, + "learning_rate": 3.949693226612722e-06, + "loss": 0.0035, + "step": 11748 + }, + { + "epoch": 2.86, + "learning_rate": 3.946551899258093e-06, + "loss": 0.0053, + "step": 11750 + }, + { + "epoch": 2.86, + "learning_rate": 3.943411514471619e-06, + "loss": 0.009, + "step": 11752 + }, + { + "epoch": 2.86, + "learning_rate": 3.9402720727422835e-06, + "loss": 0.006, + "step": 11754 + }, + { + "epoch": 2.86, + "learning_rate": 3.937133574558925e-06, + "loss": 0.0121, + "step": 11756 + }, + { + "epoch": 2.87, + "learning_rate": 3.933996020410231e-06, + "loss": 0.0074, + "step": 11758 + }, + { + "epoch": 2.87, + "learning_rate": 3.930859410784742e-06, + "loss": 0.0041, + "step": 11760 + }, + { + "epoch": 2.87, + "learning_rate": 3.927723746170848e-06, + "loss": 0.0058, + "step": 11762 + }, + { + "epoch": 2.87, + "learning_rate": 3.9245890270568e-06, + "loss": 0.0102, + "step": 11764 + }, + { + "epoch": 2.87, + "learning_rate": 3.921455253930699e-06, + "loss": 0.0054, + "step": 11766 + }, + { + "epoch": 2.87, + "learning_rate": 3.918322427280493e-06, + "loss": 0.0042, + "step": 11768 + }, + { + "epoch": 2.87, + "learning_rate": 3.9151905475939886e-06, + "loss": 0.0079, + "step": 11770 + }, + { + "epoch": 2.87, + "learning_rate": 3.912059615358849e-06, + "loss": 0.0049, + "step": 11772 + }, + { + "epoch": 2.87, + "learning_rate": 3.908929631062579e-06, + "loss": 0.005, + "step": 11774 + }, + { + "epoch": 2.87, + "learning_rate": 3.905800595192538e-06, + "loss": 0.0103, + "step": 11776 + }, + { + "epoch": 2.87, + "learning_rate": 3.902672508235945e-06, + "loss": 0.0124, + "step": 11778 + }, + { + "epoch": 2.87, + "learning_rate": 3.899545370679867e-06, + "loss": 0.0067, + "step": 11780 + }, + { + "epoch": 2.87, + "learning_rate": 3.896419183011226e-06, + "loss": 0.0107, + "step": 11782 + }, + { + "epoch": 2.87, + "learning_rate": 3.893293945716786e-06, + "loss": 0.0078, + "step": 11784 + }, + { + "epoch": 2.87, + "learning_rate": 3.89016965928318e-06, + "loss": 0.007, + "step": 11786 + }, + { + "epoch": 2.87, + "learning_rate": 3.887046324196873e-06, + "loss": 0.0039, + "step": 11788 + }, + { + "epoch": 2.87, + "learning_rate": 3.8839239409442e-06, + "loss": 0.0046, + "step": 11790 + }, + { + "epoch": 2.87, + "learning_rate": 3.8808025100113335e-06, + "loss": 0.0074, + "step": 11792 + }, + { + "epoch": 2.87, + "learning_rate": 3.877682031884308e-06, + "loss": 0.0121, + "step": 11794 + }, + { + "epoch": 2.87, + "learning_rate": 3.874562507049005e-06, + "loss": 0.0084, + "step": 11796 + }, + { + "epoch": 2.87, + "learning_rate": 3.871443935991161e-06, + "loss": 0.0074, + "step": 11798 + }, + { + "epoch": 2.88, + "learning_rate": 3.868326319196362e-06, + "loss": 0.0057, + "step": 11800 + }, + { + "epoch": 2.88, + "learning_rate": 3.865209657150036e-06, + "loss": 0.0042, + "step": 11802 + }, + { + "epoch": 2.88, + "learning_rate": 3.8620939503374775e-06, + "loss": 0.0046, + "step": 11804 + }, + { + "epoch": 2.88, + "learning_rate": 3.85897919924383e-06, + "loss": 0.0045, + "step": 11806 + }, + { + "epoch": 2.88, + "learning_rate": 3.855865404354074e-06, + "loss": 0.0045, + "step": 11808 + }, + { + "epoch": 2.88, + "learning_rate": 3.852752566153059e-06, + "loss": 0.0062, + "step": 11810 + }, + { + "epoch": 2.88, + "learning_rate": 3.849640685125473e-06, + "loss": 0.0094, + "step": 11812 + }, + { + "epoch": 2.88, + "learning_rate": 3.846529761755867e-06, + "loss": 0.0069, + "step": 11814 + }, + { + "epoch": 2.88, + "learning_rate": 3.8434197965286304e-06, + "loss": 0.0027, + "step": 11816 + }, + { + "epoch": 2.88, + "learning_rate": 3.840310789928005e-06, + "loss": 0.0061, + "step": 11818 + }, + { + "epoch": 2.88, + "learning_rate": 3.83720274243809e-06, + "loss": 0.0053, + "step": 11820 + }, + { + "epoch": 2.88, + "learning_rate": 3.834095654542836e-06, + "loss": 0.0128, + "step": 11822 + }, + { + "epoch": 2.88, + "learning_rate": 3.830989526726036e-06, + "loss": 0.0097, + "step": 11824 + }, + { + "epoch": 2.88, + "learning_rate": 3.8278843594713365e-06, + "loss": 0.0048, + "step": 11826 + }, + { + "epoch": 2.88, + "learning_rate": 3.824780153262242e-06, + "loss": 0.0069, + "step": 11828 + }, + { + "epoch": 2.88, + "learning_rate": 3.821676908582098e-06, + "loss": 0.0068, + "step": 11830 + }, + { + "epoch": 2.88, + "learning_rate": 3.8185746259141e-06, + "loss": 0.0068, + "step": 11832 + }, + { + "epoch": 2.88, + "learning_rate": 3.815473305741299e-06, + "loss": 0.0024, + "step": 11834 + }, + { + "epoch": 2.88, + "learning_rate": 3.8123729485465953e-06, + "loss": 0.0063, + "step": 11836 + }, + { + "epoch": 2.88, + "learning_rate": 3.8092735548127413e-06, + "loss": 0.0139, + "step": 11838 + }, + { + "epoch": 2.88, + "learning_rate": 3.80617512502233e-06, + "loss": 0.0045, + "step": 11840 + }, + { + "epoch": 2.89, + "learning_rate": 3.8030776596578177e-06, + "loss": 0.0048, + "step": 11842 + }, + { + "epoch": 2.89, + "learning_rate": 3.7999811592014956e-06, + "loss": 0.0071, + "step": 11844 + }, + { + "epoch": 2.89, + "learning_rate": 3.79688562413552e-06, + "loss": 0.0088, + "step": 11846 + }, + { + "epoch": 2.89, + "learning_rate": 3.793791054941882e-06, + "loss": 0.0034, + "step": 11848 + }, + { + "epoch": 2.89, + "learning_rate": 3.7906974521024343e-06, + "loss": 0.0032, + "step": 11850 + }, + { + "epoch": 2.89, + "learning_rate": 3.7876048160988734e-06, + "loss": 0.0063, + "step": 11852 + }, + { + "epoch": 2.89, + "learning_rate": 3.7845131474127493e-06, + "loss": 0.0059, + "step": 11854 + }, + { + "epoch": 2.89, + "learning_rate": 3.7814224465254525e-06, + "loss": 0.0053, + "step": 11856 + }, + { + "epoch": 2.89, + "learning_rate": 3.7783327139182357e-06, + "loss": 0.0126, + "step": 11858 + }, + { + "epoch": 2.89, + "learning_rate": 3.775243950072187e-06, + "loss": 0.0067, + "step": 11860 + }, + { + "epoch": 2.89, + "learning_rate": 3.772156155468257e-06, + "loss": 0.0061, + "step": 11862 + }, + { + "epoch": 2.89, + "learning_rate": 3.7690693305872327e-06, + "loss": 0.0047, + "step": 11864 + }, + { + "epoch": 2.89, + "learning_rate": 3.7659834759097603e-06, + "loss": 0.0024, + "step": 11866 + }, + { + "epoch": 2.89, + "learning_rate": 3.7628985919163284e-06, + "loss": 0.0093, + "step": 11868 + }, + { + "epoch": 2.89, + "learning_rate": 3.7598146790872825e-06, + "loss": 0.0043, + "step": 11870 + }, + { + "epoch": 2.89, + "learning_rate": 3.7567317379028077e-06, + "loss": 0.0055, + "step": 11872 + }, + { + "epoch": 2.89, + "learning_rate": 3.7536497688429384e-06, + "loss": 0.0022, + "step": 11874 + }, + { + "epoch": 2.89, + "learning_rate": 3.750568772387564e-06, + "loss": 0.0065, + "step": 11876 + }, + { + "epoch": 2.89, + "learning_rate": 3.7474887490164213e-06, + "loss": 0.0083, + "step": 11878 + }, + { + "epoch": 2.89, + "learning_rate": 3.744409699209088e-06, + "loss": 0.0071, + "step": 11880 + }, + { + "epoch": 2.9, + "learning_rate": 3.741331623444999e-06, + "loss": 0.0079, + "step": 11882 + }, + { + "epoch": 2.9, + "learning_rate": 3.7382545222034385e-06, + "loss": 0.0039, + "step": 11884 + }, + { + "epoch": 2.9, + "learning_rate": 3.735178395963529e-06, + "loss": 0.0046, + "step": 11886 + }, + { + "epoch": 2.9, + "learning_rate": 3.732103245204245e-06, + "loss": 0.0032, + "step": 11888 + }, + { + "epoch": 2.9, + "learning_rate": 3.729029070404414e-06, + "loss": 0.0051, + "step": 11890 + }, + { + "epoch": 2.9, + "learning_rate": 3.725955872042709e-06, + "loss": 0.0062, + "step": 11892 + }, + { + "epoch": 2.9, + "learning_rate": 3.722883650597654e-06, + "loss": 0.0053, + "step": 11894 + }, + { + "epoch": 2.9, + "learning_rate": 3.719812406547609e-06, + "loss": 0.005, + "step": 11896 + }, + { + "epoch": 2.9, + "learning_rate": 3.716742140370799e-06, + "loss": 0.0156, + "step": 11898 + }, + { + "epoch": 2.9, + "learning_rate": 3.7136728525452803e-06, + "loss": 0.007, + "step": 11900 + }, + { + "epoch": 2.9, + "learning_rate": 3.710604543548971e-06, + "loss": 0.0022, + "step": 11902 + }, + { + "epoch": 2.9, + "learning_rate": 3.707537213859623e-06, + "loss": 0.0054, + "step": 11904 + }, + { + "epoch": 2.9, + "learning_rate": 3.7044708639548477e-06, + "loss": 0.0023, + "step": 11906 + }, + { + "epoch": 2.9, + "learning_rate": 3.701405494312099e-06, + "loss": 0.0041, + "step": 11908 + }, + { + "epoch": 2.9, + "learning_rate": 3.6983411054086804e-06, + "loss": 0.0046, + "step": 11910 + }, + { + "epoch": 2.9, + "learning_rate": 3.6952776977217355e-06, + "loss": 0.0056, + "step": 11912 + }, + { + "epoch": 2.9, + "learning_rate": 3.6922152717282667e-06, + "loss": 0.0034, + "step": 11914 + }, + { + "epoch": 2.9, + "learning_rate": 3.68915382790511e-06, + "loss": 0.0014, + "step": 11916 + }, + { + "epoch": 2.9, + "learning_rate": 3.686093366728962e-06, + "loss": 0.0099, + "step": 11918 + }, + { + "epoch": 2.9, + "learning_rate": 3.683033888676354e-06, + "loss": 0.0078, + "step": 11920 + }, + { + "epoch": 2.9, + "learning_rate": 3.679975394223673e-06, + "loss": 0.0072, + "step": 11922 + }, + { + "epoch": 2.91, + "learning_rate": 3.6769178838471508e-06, + "loss": 0.006, + "step": 11924 + }, + { + "epoch": 2.91, + "learning_rate": 3.6738613580228664e-06, + "loss": 0.0055, + "step": 11926 + }, + { + "epoch": 2.91, + "learning_rate": 3.6708058172267434e-06, + "loss": 0.008, + "step": 11928 + }, + { + "epoch": 2.91, + "learning_rate": 3.667751261934549e-06, + "loss": 0.009, + "step": 11930 + }, + { + "epoch": 2.91, + "learning_rate": 3.6646976926219025e-06, + "loss": 0.0031, + "step": 11932 + }, + { + "epoch": 2.91, + "learning_rate": 3.6616451097642734e-06, + "loss": 0.0076, + "step": 11934 + }, + { + "epoch": 2.91, + "learning_rate": 3.6585935138369644e-06, + "loss": 0.0048, + "step": 11936 + }, + { + "epoch": 2.91, + "learning_rate": 3.655542905315135e-06, + "loss": 0.0109, + "step": 11938 + }, + { + "epoch": 2.91, + "learning_rate": 3.6524932846737926e-06, + "loss": 0.0058, + "step": 11940 + }, + { + "epoch": 2.91, + "learning_rate": 3.6494446523877835e-06, + "loss": 0.0073, + "step": 11942 + }, + { + "epoch": 2.91, + "learning_rate": 3.646397008931799e-06, + "loss": 0.0032, + "step": 11944 + }, + { + "epoch": 2.91, + "learning_rate": 3.643350354780384e-06, + "loss": 0.0053, + "step": 11946 + }, + { + "epoch": 2.91, + "learning_rate": 3.6403046904079255e-06, + "loss": 0.0047, + "step": 11948 + }, + { + "epoch": 2.91, + "learning_rate": 3.6372600162886605e-06, + "loss": 0.0059, + "step": 11950 + }, + { + "epoch": 2.91, + "learning_rate": 3.6342163328966617e-06, + "loss": 0.0057, + "step": 11952 + }, + { + "epoch": 2.91, + "learning_rate": 3.6311736407058607e-06, + "loss": 0.0032, + "step": 11954 + }, + { + "epoch": 2.91, + "learning_rate": 3.6281319401900194e-06, + "loss": 0.0028, + "step": 11956 + }, + { + "epoch": 2.91, + "learning_rate": 3.625091231822763e-06, + "loss": 0.0053, + "step": 11958 + }, + { + "epoch": 2.91, + "learning_rate": 3.6220515160775436e-06, + "loss": 0.0049, + "step": 11960 + }, + { + "epoch": 2.91, + "learning_rate": 3.6190127934276743e-06, + "loss": 0.0043, + "step": 11962 + }, + { + "epoch": 2.92, + "learning_rate": 3.6159750643463064e-06, + "loss": 0.0045, + "step": 11964 + }, + { + "epoch": 2.92, + "learning_rate": 3.61293832930644e-06, + "loss": 0.0095, + "step": 11966 + }, + { + "epoch": 2.92, + "learning_rate": 3.6099025887809123e-06, + "loss": 0.0041, + "step": 11968 + }, + { + "epoch": 2.92, + "learning_rate": 3.6068678432424175e-06, + "loss": 0.0018, + "step": 11970 + }, + { + "epoch": 2.92, + "learning_rate": 3.603834093163483e-06, + "loss": 0.0045, + "step": 11972 + }, + { + "epoch": 2.92, + "learning_rate": 3.600801339016492e-06, + "loss": 0.0076, + "step": 11974 + }, + { + "epoch": 2.92, + "learning_rate": 3.5977695812736613e-06, + "loss": 0.0091, + "step": 11976 + }, + { + "epoch": 2.92, + "learning_rate": 3.594738820407063e-06, + "loss": 0.0041, + "step": 11978 + }, + { + "epoch": 2.92, + "learning_rate": 3.5917090568886092e-06, + "loss": 0.0048, + "step": 11980 + }, + { + "epoch": 2.92, + "learning_rate": 3.588680291190061e-06, + "loss": 0.0072, + "step": 11982 + }, + { + "epoch": 2.92, + "learning_rate": 3.5856525237830164e-06, + "loss": 0.0093, + "step": 11984 + }, + { + "epoch": 2.92, + "learning_rate": 3.582625755138918e-06, + "loss": 0.0062, + "step": 11986 + }, + { + "epoch": 2.92, + "learning_rate": 3.579599985729062e-06, + "loss": 0.0066, + "step": 11988 + }, + { + "epoch": 2.92, + "learning_rate": 3.5765752160245848e-06, + "loss": 0.0067, + "step": 11990 + }, + { + "epoch": 2.92, + "learning_rate": 3.5735514464964615e-06, + "loss": 0.0056, + "step": 11992 + }, + { + "epoch": 2.92, + "learning_rate": 3.570528677615519e-06, + "loss": 0.0019, + "step": 11994 + }, + { + "epoch": 2.92, + "learning_rate": 3.5675069098524282e-06, + "loss": 0.0092, + "step": 11996 + }, + { + "epoch": 2.92, + "learning_rate": 3.564486143677699e-06, + "loss": 0.0089, + "step": 11998 + }, + { + "epoch": 2.92, + "learning_rate": 3.5614663795616843e-06, + "loss": 0.0064, + "step": 12000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5584476179745876e-06, + "loss": 0.0041, + "step": 12002 + }, + { + "epoch": 2.92, + "learning_rate": 3.5554298593864523e-06, + "loss": 0.0055, + "step": 12004 + }, + { + "epoch": 2.93, + "learning_rate": 3.5524131042671705e-06, + "loss": 0.0069, + "step": 12006 + }, + { + "epoch": 2.93, + "learning_rate": 3.549397353086468e-06, + "loss": 0.0064, + "step": 12008 + }, + { + "epoch": 2.93, + "learning_rate": 3.546382606313923e-06, + "loss": 0.0086, + "step": 12010 + }, + { + "epoch": 2.93, + "learning_rate": 3.5433688644189577e-06, + "loss": 0.0112, + "step": 12012 + }, + { + "epoch": 2.93, + "learning_rate": 3.540356127870833e-06, + "loss": 0.007, + "step": 12014 + }, + { + "epoch": 2.93, + "learning_rate": 3.53734439713865e-06, + "loss": 0.0087, + "step": 12016 + }, + { + "epoch": 2.93, + "learning_rate": 3.5343336726913614e-06, + "loss": 0.0026, + "step": 12018 + }, + { + "epoch": 2.93, + "learning_rate": 3.5313239549977606e-06, + "loss": 0.0118, + "step": 12020 + }, + { + "epoch": 2.93, + "learning_rate": 3.5283152445264877e-06, + "loss": 0.0107, + "step": 12022 + }, + { + "epoch": 2.93, + "learning_rate": 3.5253075417460146e-06, + "loss": 0.0105, + "step": 12024 + }, + { + "epoch": 2.93, + "learning_rate": 3.52230084712467e-06, + "loss": 0.007, + "step": 12026 + }, + { + "epoch": 2.93, + "learning_rate": 3.519295161130614e-06, + "loss": 0.0048, + "step": 12028 + }, + { + "epoch": 2.93, + "learning_rate": 3.516290484231859e-06, + "loss": 0.0048, + "step": 12030 + }, + { + "epoch": 2.93, + "learning_rate": 3.513286816896251e-06, + "loss": 0.0074, + "step": 12032 + }, + { + "epoch": 2.93, + "learning_rate": 3.510284159591488e-06, + "loss": 0.0034, + "step": 12034 + }, + { + "epoch": 2.93, + "learning_rate": 3.507282512785105e-06, + "loss": 0.0056, + "step": 12036 + }, + { + "epoch": 2.93, + "learning_rate": 3.5042818769444866e-06, + "loss": 0.0075, + "step": 12038 + }, + { + "epoch": 2.93, + "learning_rate": 3.5012822525368506e-06, + "loss": 0.0042, + "step": 12040 + }, + { + "epoch": 2.93, + "learning_rate": 3.4982836400292573e-06, + "loss": 0.0121, + "step": 12042 + }, + { + "epoch": 2.93, + "learning_rate": 3.495286039888618e-06, + "loss": 0.0046, + "step": 12044 + }, + { + "epoch": 2.94, + "learning_rate": 3.492289452581684e-06, + "loss": 0.0029, + "step": 12046 + }, + { + "epoch": 2.94, + "learning_rate": 3.489293878575042e-06, + "loss": 0.0044, + "step": 12048 + }, + { + "epoch": 2.94, + "learning_rate": 3.486299318335128e-06, + "loss": 0.0053, + "step": 12050 + }, + { + "epoch": 2.94, + "learning_rate": 3.4833057723282214e-06, + "loss": 0.0048, + "step": 12052 + }, + { + "epoch": 2.94, + "learning_rate": 3.4803132410204364e-06, + "loss": 0.0069, + "step": 12054 + }, + { + "epoch": 2.94, + "learning_rate": 3.4773217248777313e-06, + "loss": 0.0027, + "step": 12056 + }, + { + "epoch": 2.94, + "learning_rate": 3.47433122436591e-06, + "loss": 0.0055, + "step": 12058 + }, + { + "epoch": 2.94, + "learning_rate": 3.4713417399506154e-06, + "loss": 0.0033, + "step": 12060 + }, + { + "epoch": 2.94, + "learning_rate": 3.468353272097339e-06, + "loss": 0.0087, + "step": 12062 + }, + { + "epoch": 2.94, + "learning_rate": 3.4653658212714003e-06, + "loss": 0.0089, + "step": 12064 + }, + { + "epoch": 2.94, + "learning_rate": 3.4623793879379695e-06, + "loss": 0.0032, + "step": 12066 + }, + { + "epoch": 2.94, + "learning_rate": 3.459393972562064e-06, + "loss": 0.0053, + "step": 12068 + }, + { + "epoch": 2.94, + "learning_rate": 3.4564095756085302e-06, + "loss": 0.0092, + "step": 12070 + }, + { + "epoch": 2.94, + "learning_rate": 3.4534261975420578e-06, + "loss": 0.0104, + "step": 12072 + }, + { + "epoch": 2.94, + "learning_rate": 3.4504438388271866e-06, + "loss": 0.0027, + "step": 12074 + }, + { + "epoch": 2.94, + "learning_rate": 3.4474624999282923e-06, + "loss": 0.0097, + "step": 12076 + }, + { + "epoch": 2.94, + "learning_rate": 3.444482181309594e-06, + "loss": 0.0064, + "step": 12078 + }, + { + "epoch": 2.94, + "learning_rate": 3.441502883435146e-06, + "loss": 0.0073, + "step": 12080 + }, + { + "epoch": 2.94, + "learning_rate": 3.4385246067688517e-06, + "loss": 0.0048, + "step": 12082 + }, + { + "epoch": 2.94, + "learning_rate": 3.4355473517744464e-06, + "loss": 0.0047, + "step": 12084 + }, + { + "epoch": 2.94, + "learning_rate": 3.4325711189155177e-06, + "loss": 0.0017, + "step": 12086 + }, + { + "epoch": 2.95, + "learning_rate": 3.429595908655482e-06, + "loss": 0.0051, + "step": 12088 + }, + { + "epoch": 2.95, + "learning_rate": 3.4266217214576035e-06, + "loss": 0.0096, + "step": 12090 + }, + { + "epoch": 2.95, + "learning_rate": 3.4236485577849886e-06, + "loss": 0.0037, + "step": 12092 + }, + { + "epoch": 2.95, + "learning_rate": 3.4206764181005834e-06, + "loss": 0.0064, + "step": 12094 + }, + { + "epoch": 2.95, + "learning_rate": 3.4177053028671693e-06, + "loss": 0.0048, + "step": 12096 + }, + { + "epoch": 2.95, + "learning_rate": 3.4147352125473687e-06, + "loss": 0.0041, + "step": 12098 + }, + { + "epoch": 2.95, + "learning_rate": 3.4117661476036514e-06, + "loss": 0.0068, + "step": 12100 + }, + { + "epoch": 2.95, + "learning_rate": 3.4087981084983258e-06, + "loss": 0.0042, + "step": 12102 + }, + { + "epoch": 2.95, + "learning_rate": 3.4058310956935314e-06, + "loss": 0.0079, + "step": 12104 + }, + { + "epoch": 2.95, + "learning_rate": 3.40286510965126e-06, + "loss": 0.0055, + "step": 12106 + }, + { + "epoch": 2.95, + "learning_rate": 3.3999001508333396e-06, + "loss": 0.0041, + "step": 12108 + }, + { + "epoch": 2.95, + "learning_rate": 3.39693621970143e-06, + "loss": 0.0081, + "step": 12110 + }, + { + "epoch": 2.95, + "learning_rate": 3.3939733167170475e-06, + "loss": 0.0016, + "step": 12112 + }, + { + "epoch": 2.95, + "learning_rate": 3.3910114423415296e-06, + "loss": 0.0027, + "step": 12114 + }, + { + "epoch": 2.95, + "learning_rate": 3.3880505970360667e-06, + "loss": 0.0033, + "step": 12116 + }, + { + "epoch": 2.95, + "learning_rate": 3.3850907812616873e-06, + "loss": 0.0088, + "step": 12118 + }, + { + "epoch": 2.95, + "learning_rate": 3.3821319954792533e-06, + "loss": 0.0082, + "step": 12120 + }, + { + "epoch": 2.95, + "learning_rate": 3.379174240149472e-06, + "loss": 0.0142, + "step": 12122 + }, + { + "epoch": 2.95, + "learning_rate": 3.3762175157328915e-06, + "loss": 0.0031, + "step": 12124 + }, + { + "epoch": 2.95, + "learning_rate": 3.3732618226898937e-06, + "loss": 0.0034, + "step": 12126 + }, + { + "epoch": 2.96, + "learning_rate": 3.3703071614806994e-06, + "loss": 0.0055, + "step": 12128 + }, + { + "epoch": 2.96, + "learning_rate": 3.367353532565375e-06, + "loss": 0.0066, + "step": 12130 + }, + { + "epoch": 2.96, + "learning_rate": 3.364400936403822e-06, + "loss": 0.0012, + "step": 12132 + }, + { + "epoch": 2.96, + "learning_rate": 3.3614493734557884e-06, + "loss": 0.0087, + "step": 12134 + }, + { + "epoch": 2.96, + "learning_rate": 3.358498844180845e-06, + "loss": 0.0184, + "step": 12136 + }, + { + "epoch": 2.96, + "learning_rate": 3.3555493490384215e-06, + "loss": 0.0033, + "step": 12138 + }, + { + "epoch": 2.96, + "learning_rate": 3.3526008884877683e-06, + "loss": 0.0092, + "step": 12140 + }, + { + "epoch": 2.96, + "learning_rate": 3.3496534629879905e-06, + "loss": 0.0051, + "step": 12142 + }, + { + "epoch": 2.96, + "learning_rate": 3.3467070729980177e-06, + "loss": 0.0097, + "step": 12144 + }, + { + "epoch": 2.96, + "learning_rate": 3.34376171897663e-06, + "loss": 0.0036, + "step": 12146 + }, + { + "epoch": 2.96, + "learning_rate": 3.3408174013824402e-06, + "loss": 0.0054, + "step": 12148 + }, + { + "epoch": 2.96, + "learning_rate": 3.337874120673904e-06, + "loss": 0.0057, + "step": 12150 + }, + { + "epoch": 2.96, + "learning_rate": 3.334931877309311e-06, + "loss": 0.0063, + "step": 12152 + }, + { + "epoch": 2.96, + "learning_rate": 3.3319906717467864e-06, + "loss": 0.0076, + "step": 12154 + }, + { + "epoch": 2.96, + "learning_rate": 3.3290505044443023e-06, + "loss": 0.0033, + "step": 12156 + }, + { + "epoch": 2.96, + "learning_rate": 3.3261113758596686e-06, + "loss": 0.0041, + "step": 12158 + }, + { + "epoch": 2.96, + "learning_rate": 3.323173286450523e-06, + "loss": 0.009, + "step": 12160 + }, + { + "epoch": 2.96, + "learning_rate": 3.3202362366743523e-06, + "loss": 0.001, + "step": 12162 + }, + { + "epoch": 2.96, + "learning_rate": 3.317300226988477e-06, + "loss": 0.0087, + "step": 12164 + }, + { + "epoch": 2.96, + "learning_rate": 3.3143652578500607e-06, + "loss": 0.0063, + "step": 12166 + }, + { + "epoch": 2.96, + "learning_rate": 3.311431329716096e-06, + "loss": 0.0048, + "step": 12168 + }, + { + "epoch": 2.97, + "learning_rate": 3.3084984430434152e-06, + "loss": 0.004, + "step": 12170 + }, + { + "epoch": 2.97, + "learning_rate": 3.305566598288694e-06, + "loss": 0.0045, + "step": 12172 + }, + { + "epoch": 2.97, + "learning_rate": 3.3026357959084465e-06, + "loss": 0.0094, + "step": 12174 + }, + { + "epoch": 2.97, + "learning_rate": 3.299706036359015e-06, + "loss": 0.0046, + "step": 12176 + }, + { + "epoch": 2.97, + "learning_rate": 3.296777320096589e-06, + "loss": 0.0077, + "step": 12178 + }, + { + "epoch": 2.97, + "learning_rate": 3.293849647577194e-06, + "loss": 0.008, + "step": 12180 + }, + { + "epoch": 2.97, + "learning_rate": 3.29092301925669e-06, + "loss": 0.0025, + "step": 12182 + }, + { + "epoch": 2.97, + "learning_rate": 3.2879974355907684e-06, + "loss": 0.003, + "step": 12184 + }, + { + "epoch": 2.97, + "learning_rate": 3.2850728970349722e-06, + "loss": 0.0138, + "step": 12186 + }, + { + "epoch": 2.97, + "learning_rate": 3.282149404044672e-06, + "loss": 0.0045, + "step": 12188 + }, + { + "epoch": 2.97, + "learning_rate": 3.279226957075081e-06, + "loss": 0.0034, + "step": 12190 + }, + { + "epoch": 2.97, + "learning_rate": 3.276305556581243e-06, + "loss": 0.006, + "step": 12192 + }, + { + "epoch": 2.97, + "learning_rate": 3.2733852030180444e-06, + "loss": 0.0038, + "step": 12194 + }, + { + "epoch": 2.97, + "learning_rate": 3.270465896840205e-06, + "loss": 0.004, + "step": 12196 + }, + { + "epoch": 2.97, + "learning_rate": 3.2675476385022853e-06, + "loss": 0.0033, + "step": 12198 + }, + { + "epoch": 2.97, + "learning_rate": 3.264630428458676e-06, + "loss": 0.0039, + "step": 12200 + }, + { + "epoch": 2.97, + "learning_rate": 3.2617142671636127e-06, + "loss": 0.0042, + "step": 12202 + }, + { + "epoch": 2.97, + "learning_rate": 3.2587991550711627e-06, + "loss": 0.0049, + "step": 12204 + }, + { + "epoch": 2.97, + "learning_rate": 3.255885092635236e-06, + "loss": 0.0054, + "step": 12206 + }, + { + "epoch": 2.97, + "learning_rate": 3.252972080309569e-06, + "loss": 0.0091, + "step": 12208 + }, + { + "epoch": 2.98, + "learning_rate": 3.250060118547739e-06, + "loss": 0.0052, + "step": 12210 + }, + { + "epoch": 2.98, + "learning_rate": 3.247149207803163e-06, + "loss": 0.0076, + "step": 12212 + }, + { + "epoch": 2.98, + "learning_rate": 3.2442393485290947e-06, + "loss": 0.0073, + "step": 12214 + }, + { + "epoch": 2.98, + "learning_rate": 3.2413305411786155e-06, + "loss": 0.0051, + "step": 12216 + }, + { + "epoch": 2.98, + "learning_rate": 3.2384227862046525e-06, + "loss": 0.0035, + "step": 12218 + }, + { + "epoch": 2.98, + "learning_rate": 3.2355160840599654e-06, + "loss": 0.0072, + "step": 12220 + }, + { + "epoch": 2.98, + "learning_rate": 3.232610435197152e-06, + "loss": 0.0072, + "step": 12222 + }, + { + "epoch": 2.98, + "learning_rate": 3.2297058400686433e-06, + "loss": 0.0031, + "step": 12224 + }, + { + "epoch": 2.98, + "learning_rate": 3.2268022991267e-06, + "loss": 0.005, + "step": 12226 + }, + { + "epoch": 2.98, + "learning_rate": 3.2238998128234324e-06, + "loss": 0.0043, + "step": 12228 + }, + { + "epoch": 2.98, + "learning_rate": 3.2209983816107816e-06, + "loss": 0.0055, + "step": 12230 + }, + { + "epoch": 2.98, + "learning_rate": 3.2180980059405166e-06, + "loss": 0.0052, + "step": 12232 + }, + { + "epoch": 2.98, + "learning_rate": 3.2151986862642504e-06, + "loss": 0.005, + "step": 12234 + }, + { + "epoch": 2.98, + "learning_rate": 3.212300423033432e-06, + "loss": 0.0046, + "step": 12236 + }, + { + "epoch": 2.98, + "learning_rate": 3.2094032166993425e-06, + "loss": 0.0086, + "step": 12238 + }, + { + "epoch": 2.98, + "learning_rate": 3.2065070677130938e-06, + "loss": 0.0026, + "step": 12240 + }, + { + "epoch": 2.98, + "learning_rate": 3.203611976525641e-06, + "loss": 0.0073, + "step": 12242 + }, + { + "epoch": 2.98, + "learning_rate": 3.200717943587778e-06, + "loss": 0.0089, + "step": 12244 + }, + { + "epoch": 2.98, + "learning_rate": 3.1978249693501185e-06, + "loss": 0.0077, + "step": 12246 + }, + { + "epoch": 2.98, + "learning_rate": 3.194933054263126e-06, + "loss": 0.0051, + "step": 12248 + }, + { + "epoch": 2.98, + "learning_rate": 3.1920421987770956e-06, + "loss": 0.008, + "step": 12250 + }, + { + "epoch": 2.99, + "learning_rate": 3.1891524033421527e-06, + "loss": 0.004, + "step": 12252 + }, + { + "epoch": 2.99, + "learning_rate": 3.1862636684082582e-06, + "loss": 0.0069, + "step": 12254 + }, + { + "epoch": 2.99, + "learning_rate": 3.183375994425213e-06, + "loss": 0.0092, + "step": 12256 + }, + { + "epoch": 2.99, + "learning_rate": 3.1804893818426498e-06, + "loss": 0.009, + "step": 12258 + }, + { + "epoch": 2.99, + "learning_rate": 3.177603831110039e-06, + "loss": 0.006, + "step": 12260 + }, + { + "epoch": 2.99, + "learning_rate": 3.1747193426766763e-06, + "loss": 0.0082, + "step": 12262 + }, + { + "epoch": 2.99, + "learning_rate": 3.1718359169917033e-06, + "loss": 0.0062, + "step": 12264 + }, + { + "epoch": 2.99, + "learning_rate": 3.168953554504094e-06, + "loss": 0.0022, + "step": 12266 + }, + { + "epoch": 2.99, + "learning_rate": 3.16607225566265e-06, + "loss": 0.0059, + "step": 12268 + }, + { + "epoch": 2.99, + "learning_rate": 3.1631920209160106e-06, + "loss": 0.0032, + "step": 12270 + }, + { + "epoch": 2.99, + "learning_rate": 3.1603128507126515e-06, + "loss": 0.005, + "step": 12272 + }, + { + "epoch": 2.99, + "learning_rate": 3.1574347455008813e-06, + "loss": 0.004, + "step": 12274 + }, + { + "epoch": 2.99, + "learning_rate": 3.154557705728849e-06, + "loss": 0.0037, + "step": 12276 + }, + { + "epoch": 2.99, + "learning_rate": 3.1516817318445225e-06, + "loss": 0.0105, + "step": 12278 + }, + { + "epoch": 2.99, + "learning_rate": 3.148806824295719e-06, + "loss": 0.005, + "step": 12280 + }, + { + "epoch": 2.99, + "learning_rate": 3.14593298353008e-06, + "loss": 0.0061, + "step": 12282 + }, + { + "epoch": 2.99, + "learning_rate": 3.1430602099950892e-06, + "loss": 0.0064, + "step": 12284 + }, + { + "epoch": 2.99, + "learning_rate": 3.140188504138053e-06, + "loss": 0.0035, + "step": 12286 + }, + { + "epoch": 2.99, + "learning_rate": 3.137317866406121e-06, + "loss": 0.0061, + "step": 12288 + }, + { + "epoch": 2.99, + "learning_rate": 3.134448297246274e-06, + "loss": 0.0032, + "step": 12290 + }, + { + "epoch": 3.0, + "learning_rate": 3.1315797971053295e-06, + "loss": 0.0025, + "step": 12292 + }, + { + "epoch": 3.0, + "learning_rate": 3.12871236642993e-06, + "loss": 0.0024, + "step": 12294 + }, + { + "epoch": 3.0, + "learning_rate": 3.1258460056665553e-06, + "loss": 0.0048, + "step": 12296 + }, + { + "epoch": 3.0, + "learning_rate": 3.1229807152615198e-06, + "loss": 0.0053, + "step": 12298 + }, + { + "epoch": 3.0, + "learning_rate": 3.1201164956609777e-06, + "loss": 0.0025, + "step": 12300 + }, + { + "epoch": 3.0, + "learning_rate": 3.1172533473109022e-06, + "loss": 0.0006, + "step": 12302 + }, + { + "epoch": 3.0, + "learning_rate": 3.11439127065711e-06, + "loss": 0.0017, + "step": 12304 + }, + { + "epoch": 3.0, + "learning_rate": 3.1115302661452527e-06, + "loss": 0.0041, + "step": 12306 + }, + { + "epoch": 3.0, + "learning_rate": 3.1086703342208026e-06, + "loss": 0.0037, + "step": 12308 + }, + { + "epoch": 3.0, + "learning_rate": 3.10581147532908e-06, + "loss": 0.0053, + "step": 12310 + }, + { + "epoch": 3.0, + "learning_rate": 3.1029536899152257e-06, + "loss": 0.0054, + "step": 12312 + }, + { + "epoch": 3.0, + "learning_rate": 3.1000969784242217e-06, + "loss": 0.0023, + "step": 12314 + }, + { + "epoch": 3.0, + "learning_rate": 3.097241341300882e-06, + "loss": 0.0053, + "step": 12316 + }, + { + "epoch": 3.0, + "learning_rate": 3.0943867789898453e-06, + "loss": 0.004, + "step": 12318 + }, + { + "epoch": 3.0, + "learning_rate": 3.0915332919355912e-06, + "loss": 0.0024, + "step": 12320 + }, + { + "epoch": 3.0, + "learning_rate": 3.088680880582434e-06, + "loss": 0.0017, + "step": 12322 + }, + { + "epoch": 3.0, + "learning_rate": 3.0858295453745123e-06, + "loss": 0.0019, + "step": 12324 + }, + { + "epoch": 3.0, + "learning_rate": 3.0829792867557973e-06, + "loss": 0.004, + "step": 12326 + }, + { + "epoch": 3.0, + "learning_rate": 3.0801301051700992e-06, + "loss": 0.0037, + "step": 12328 + }, + { + "epoch": 3.0, + "learning_rate": 3.077282001061057e-06, + "loss": 0.002, + "step": 12330 + }, + { + "epoch": 3.0, + "learning_rate": 3.074434974872147e-06, + "loss": 0.0034, + "step": 12332 + }, + { + "epoch": 3.01, + "learning_rate": 3.0715890270466652e-06, + "loss": 0.0009, + "step": 12334 + }, + { + "epoch": 3.01, + "learning_rate": 3.0687441580277546e-06, + "loss": 0.0026, + "step": 12336 + }, + { + "epoch": 3.01, + "learning_rate": 3.0659003682583766e-06, + "loss": 0.0038, + "step": 12338 + }, + { + "epoch": 3.01, + "learning_rate": 3.0630576581813365e-06, + "loss": 0.004, + "step": 12340 + }, + { + "epoch": 3.01, + "learning_rate": 3.06021602823926e-06, + "loss": 0.0034, + "step": 12342 + }, + { + "epoch": 3.01, + "learning_rate": 3.057375478874615e-06, + "loss": 0.0027, + "step": 12344 + }, + { + "epoch": 3.01, + "learning_rate": 3.054536010529694e-06, + "loss": 0.0017, + "step": 12346 + }, + { + "epoch": 3.01, + "learning_rate": 3.05169762364663e-06, + "loss": 0.0055, + "step": 12348 + }, + { + "epoch": 3.01, + "learning_rate": 3.0488603186673772e-06, + "loss": 0.0036, + "step": 12350 + }, + { + "epoch": 3.01, + "learning_rate": 3.046024096033723e-06, + "loss": 0.0037, + "step": 12352 + }, + { + "epoch": 3.01, + "learning_rate": 3.0431889561872905e-06, + "loss": 0.0031, + "step": 12354 + }, + { + "epoch": 3.01, + "learning_rate": 3.0403548995695387e-06, + "loss": 0.0063, + "step": 12356 + }, + { + "epoch": 3.01, + "learning_rate": 3.0375219266217428e-06, + "loss": 0.003, + "step": 12358 + }, + { + "epoch": 3.01, + "learning_rate": 3.0346900377850218e-06, + "loss": 0.0024, + "step": 12360 + }, + { + "epoch": 3.01, + "learning_rate": 3.031859233500325e-06, + "loss": 0.0021, + "step": 12362 + }, + { + "epoch": 3.01, + "learning_rate": 3.02902951420843e-06, + "loss": 0.0017, + "step": 12364 + }, + { + "epoch": 3.01, + "learning_rate": 3.0262008803499455e-06, + "loss": 0.0026, + "step": 12366 + }, + { + "epoch": 3.01, + "learning_rate": 3.023373332365306e-06, + "loss": 0.0063, + "step": 12368 + }, + { + "epoch": 3.01, + "learning_rate": 3.020546870694787e-06, + "loss": 0.0027, + "step": 12370 + }, + { + "epoch": 3.01, + "learning_rate": 3.017721495778493e-06, + "loss": 0.0034, + "step": 12372 + }, + { + "epoch": 3.02, + "learning_rate": 3.01489720805635e-06, + "loss": 0.0023, + "step": 12374 + }, + { + "epoch": 3.02, + "learning_rate": 3.0120740079681233e-06, + "loss": 0.004, + "step": 12376 + }, + { + "epoch": 3.02, + "learning_rate": 3.0092518959534125e-06, + "loss": 0.0037, + "step": 12378 + }, + { + "epoch": 3.02, + "learning_rate": 3.0064308724516357e-06, + "loss": 0.0056, + "step": 12380 + }, + { + "epoch": 3.02, + "learning_rate": 3.0036109379020474e-06, + "loss": 0.0012, + "step": 12382 + }, + { + "epoch": 3.02, + "learning_rate": 3.000792092743735e-06, + "loss": 0.0037, + "step": 12384 + }, + { + "epoch": 3.02, + "learning_rate": 2.9979743374156136e-06, + "loss": 0.0066, + "step": 12386 + }, + { + "epoch": 3.02, + "learning_rate": 2.9951576723564335e-06, + "loss": 0.002, + "step": 12388 + }, + { + "epoch": 3.02, + "learning_rate": 2.9923420980047647e-06, + "loss": 0.0041, + "step": 12390 + }, + { + "epoch": 3.02, + "learning_rate": 2.9895276147990192e-06, + "loss": 0.0025, + "step": 12392 + }, + { + "epoch": 3.02, + "learning_rate": 2.986714223177427e-06, + "loss": 0.0019, + "step": 12394 + }, + { + "epoch": 3.02, + "learning_rate": 2.983901923578062e-06, + "loss": 0.0038, + "step": 12396 + }, + { + "epoch": 3.02, + "learning_rate": 2.981090716438814e-06, + "loss": 0.0045, + "step": 12398 + }, + { + "epoch": 3.02, + "learning_rate": 2.978280602197412e-06, + "loss": 0.0033, + "step": 12400 + }, + { + "epoch": 3.02, + "learning_rate": 2.9754715812914135e-06, + "loss": 0.0026, + "step": 12402 + }, + { + "epoch": 3.02, + "learning_rate": 2.9726636541582076e-06, + "loss": 0.0022, + "step": 12404 + }, + { + "epoch": 3.02, + "learning_rate": 2.9698568212350056e-06, + "loss": 0.0018, + "step": 12406 + }, + { + "epoch": 3.02, + "learning_rate": 2.9670510829588508e-06, + "loss": 0.0045, + "step": 12408 + }, + { + "epoch": 3.02, + "learning_rate": 2.9642464397666202e-06, + "loss": 0.0013, + "step": 12410 + }, + { + "epoch": 3.02, + "learning_rate": 2.9614428920950234e-06, + "loss": 0.0051, + "step": 12412 + }, + { + "epoch": 3.02, + "learning_rate": 2.9586404403805847e-06, + "loss": 0.0036, + "step": 12414 + }, + { + "epoch": 3.03, + "learning_rate": 2.9558390850596743e-06, + "loss": 0.0057, + "step": 12416 + }, + { + "epoch": 3.03, + "learning_rate": 2.9530388265684806e-06, + "loss": 0.0029, + "step": 12418 + }, + { + "epoch": 3.03, + "learning_rate": 2.950239665343032e-06, + "loss": 0.0027, + "step": 12420 + }, + { + "epoch": 3.03, + "learning_rate": 2.947441601819173e-06, + "loss": 0.002, + "step": 12422 + }, + { + "epoch": 3.03, + "learning_rate": 2.944644636432583e-06, + "loss": 0.0051, + "step": 12424 + }, + { + "epoch": 3.03, + "learning_rate": 2.941848769618774e-06, + "loss": 0.0067, + "step": 12426 + }, + { + "epoch": 3.03, + "learning_rate": 2.939054001813084e-06, + "loss": 0.0042, + "step": 12428 + }, + { + "epoch": 3.03, + "learning_rate": 2.9362603334506755e-06, + "loss": 0.0019, + "step": 12430 + }, + { + "epoch": 3.03, + "learning_rate": 2.933467764966548e-06, + "loss": 0.0042, + "step": 12432 + }, + { + "epoch": 3.03, + "learning_rate": 2.930676296795527e-06, + "loss": 0.0038, + "step": 12434 + }, + { + "epoch": 3.03, + "learning_rate": 2.927885929372264e-06, + "loss": 0.003, + "step": 12436 + }, + { + "epoch": 3.03, + "learning_rate": 2.925096663131237e-06, + "loss": 0.0052, + "step": 12438 + }, + { + "epoch": 3.03, + "learning_rate": 2.9223084985067583e-06, + "loss": 0.0061, + "step": 12440 + }, + { + "epoch": 3.03, + "learning_rate": 2.919521435932967e-06, + "loss": 0.0026, + "step": 12442 + }, + { + "epoch": 3.03, + "learning_rate": 2.9167354758438337e-06, + "loss": 0.0026, + "step": 12444 + }, + { + "epoch": 3.03, + "learning_rate": 2.9139506186731457e-06, + "loss": 0.0017, + "step": 12446 + }, + { + "epoch": 3.03, + "learning_rate": 2.9111668648545354e-06, + "loss": 0.0028, + "step": 12448 + }, + { + "epoch": 3.03, + "learning_rate": 2.908384214821446e-06, + "loss": 0.0021, + "step": 12450 + }, + { + "epoch": 3.03, + "learning_rate": 2.9056026690071646e-06, + "loss": 0.0045, + "step": 12452 + }, + { + "epoch": 3.03, + "learning_rate": 2.9028222278447936e-06, + "loss": 0.0021, + "step": 12454 + }, + { + "epoch": 3.04, + "learning_rate": 2.9000428917672717e-06, + "loss": 0.0039, + "step": 12456 + }, + { + "epoch": 3.04, + "learning_rate": 2.897264661207362e-06, + "loss": 0.0037, + "step": 12458 + }, + { + "epoch": 3.04, + "learning_rate": 2.894487536597659e-06, + "loss": 0.0041, + "step": 12460 + }, + { + "epoch": 3.04, + "learning_rate": 2.891711518370578e-06, + "loss": 0.0013, + "step": 12462 + }, + { + "epoch": 3.04, + "learning_rate": 2.8889366069583703e-06, + "loss": 0.0029, + "step": 12464 + }, + { + "epoch": 3.04, + "learning_rate": 2.8861628027931065e-06, + "loss": 0.0063, + "step": 12466 + }, + { + "epoch": 3.04, + "learning_rate": 2.883390106306694e-06, + "loss": 0.0018, + "step": 12468 + }, + { + "epoch": 3.04, + "learning_rate": 2.8806185179308567e-06, + "loss": 0.0007, + "step": 12470 + }, + { + "epoch": 3.04, + "learning_rate": 2.877848038097156e-06, + "loss": 0.0053, + "step": 12472 + }, + { + "epoch": 3.04, + "learning_rate": 2.8750786672369757e-06, + "loss": 0.0016, + "step": 12474 + }, + { + "epoch": 3.04, + "learning_rate": 2.872310405781533e-06, + "loss": 0.0059, + "step": 12476 + }, + { + "epoch": 3.04, + "learning_rate": 2.8695432541618627e-06, + "loss": 0.0007, + "step": 12478 + }, + { + "epoch": 3.04, + "learning_rate": 2.866777212808829e-06, + "loss": 0.009, + "step": 12480 + }, + { + "epoch": 3.04, + "learning_rate": 2.8640122821531278e-06, + "loss": 0.0036, + "step": 12482 + }, + { + "epoch": 3.04, + "learning_rate": 2.8612484626252836e-06, + "loss": 0.0019, + "step": 12484 + }, + { + "epoch": 3.04, + "learning_rate": 2.8584857546556387e-06, + "loss": 0.0013, + "step": 12486 + }, + { + "epoch": 3.04, + "learning_rate": 2.855724158674371e-06, + "loss": 0.0007, + "step": 12488 + }, + { + "epoch": 3.04, + "learning_rate": 2.852963675111484e-06, + "loss": 0.0045, + "step": 12490 + }, + { + "epoch": 3.04, + "learning_rate": 2.8502043043968042e-06, + "loss": 0.0081, + "step": 12492 + }, + { + "epoch": 3.04, + "learning_rate": 2.847446046959982e-06, + "loss": 0.0025, + "step": 12494 + }, + { + "epoch": 3.04, + "learning_rate": 2.844688903230505e-06, + "loss": 0.0019, + "step": 12496 + }, + { + "epoch": 3.05, + "learning_rate": 2.841932873637678e-06, + "loss": 0.0026, + "step": 12498 + }, + { + "epoch": 3.05, + "learning_rate": 2.8391779586106427e-06, + "loss": 0.0037, + "step": 12500 + }, + { + "epoch": 3.05, + "learning_rate": 2.8364241585783514e-06, + "loss": 0.0047, + "step": 12502 + }, + { + "epoch": 3.05, + "learning_rate": 2.8336714739695993e-06, + "loss": 0.0047, + "step": 12504 + }, + { + "epoch": 3.05, + "learning_rate": 2.8309199052129944e-06, + "loss": 0.0006, + "step": 12506 + }, + { + "epoch": 3.05, + "learning_rate": 2.828169452736983e-06, + "loss": 0.0023, + "step": 12508 + }, + { + "epoch": 3.05, + "learning_rate": 2.8254201169698246e-06, + "loss": 0.0106, + "step": 12510 + }, + { + "epoch": 3.05, + "learning_rate": 2.822671898339615e-06, + "loss": 0.0034, + "step": 12512 + }, + { + "epoch": 3.05, + "learning_rate": 2.8199247972742747e-06, + "loss": 0.0014, + "step": 12514 + }, + { + "epoch": 3.05, + "learning_rate": 2.817178814201549e-06, + "loss": 0.004, + "step": 12516 + }, + { + "epoch": 3.05, + "learning_rate": 2.8144339495490035e-06, + "loss": 0.0031, + "step": 12518 + }, + { + "epoch": 3.05, + "learning_rate": 2.8116902037440408e-06, + "loss": 0.0031, + "step": 12520 + }, + { + "epoch": 3.05, + "learning_rate": 2.8089475772138775e-06, + "loss": 0.004, + "step": 12522 + }, + { + "epoch": 3.05, + "learning_rate": 2.8062060703855666e-06, + "loss": 0.0053, + "step": 12524 + }, + { + "epoch": 3.05, + "learning_rate": 2.803465683685975e-06, + "loss": 0.0024, + "step": 12526 + }, + { + "epoch": 3.05, + "learning_rate": 2.800726417541807e-06, + "loss": 0.0038, + "step": 12528 + }, + { + "epoch": 3.05, + "learning_rate": 2.797988272379586e-06, + "loss": 0.0035, + "step": 12530 + }, + { + "epoch": 3.05, + "learning_rate": 2.7952512486256646e-06, + "loss": 0.003, + "step": 12532 + }, + { + "epoch": 3.05, + "learning_rate": 2.792515346706216e-06, + "loss": 0.0062, + "step": 12534 + }, + { + "epoch": 3.05, + "learning_rate": 2.789780567047239e-06, + "loss": 0.0017, + "step": 12536 + }, + { + "epoch": 3.06, + "learning_rate": 2.7870469100745612e-06, + "loss": 0.0041, + "step": 12538 + }, + { + "epoch": 3.06, + "learning_rate": 2.784314376213837e-06, + "loss": 0.0019, + "step": 12540 + }, + { + "epoch": 3.06, + "learning_rate": 2.781582965890538e-06, + "loss": 0.0024, + "step": 12542 + }, + { + "epoch": 3.06, + "learning_rate": 2.7788526795299675e-06, + "loss": 0.0006, + "step": 12544 + }, + { + "epoch": 3.06, + "learning_rate": 2.7761235175572554e-06, + "loss": 0.0016, + "step": 12546 + }, + { + "epoch": 3.06, + "learning_rate": 2.7733954803973505e-06, + "loss": 0.001, + "step": 12548 + }, + { + "epoch": 3.06, + "learning_rate": 2.7706685684750245e-06, + "loss": 0.004, + "step": 12550 + }, + { + "epoch": 3.06, + "learning_rate": 2.767942782214884e-06, + "loss": 0.003, + "step": 12552 + }, + { + "epoch": 3.06, + "learning_rate": 2.7652181220413523e-06, + "loss": 0.0029, + "step": 12554 + }, + { + "epoch": 3.06, + "learning_rate": 2.762494588378685e-06, + "loss": 0.0024, + "step": 12556 + }, + { + "epoch": 3.06, + "learning_rate": 2.7597721816509482e-06, + "loss": 0.0023, + "step": 12558 + }, + { + "epoch": 3.06, + "learning_rate": 2.757050902282051e-06, + "loss": 0.0028, + "step": 12560 + }, + { + "epoch": 3.06, + "learning_rate": 2.7543307506957084e-06, + "loss": 0.0008, + "step": 12562 + }, + { + "epoch": 3.06, + "learning_rate": 2.7516117273154765e-06, + "loss": 0.0022, + "step": 12564 + }, + { + "epoch": 3.06, + "learning_rate": 2.7488938325647217e-06, + "loss": 0.0012, + "step": 12566 + }, + { + "epoch": 3.06, + "learning_rate": 2.746177066866643e-06, + "loss": 0.0027, + "step": 12568 + }, + { + "epoch": 3.06, + "learning_rate": 2.743461430644264e-06, + "loss": 0.0058, + "step": 12570 + }, + { + "epoch": 3.06, + "learning_rate": 2.740746924320431e-06, + "loss": 0.0035, + "step": 12572 + }, + { + "epoch": 3.06, + "learning_rate": 2.7380335483178068e-06, + "loss": 0.0027, + "step": 12574 + }, + { + "epoch": 3.06, + "learning_rate": 2.735321303058893e-06, + "loss": 0.0013, + "step": 12576 + }, + { + "epoch": 3.06, + "learning_rate": 2.7326101889659983e-06, + "loss": 0.0055, + "step": 12578 + }, + { + "epoch": 3.07, + "learning_rate": 2.7299002064612734e-06, + "loss": 0.0036, + "step": 12580 + }, + { + "epoch": 3.07, + "learning_rate": 2.7271913559666742e-06, + "loss": 0.0033, + "step": 12582 + }, + { + "epoch": 3.07, + "learning_rate": 2.724483637903993e-06, + "loss": 0.0017, + "step": 12584 + }, + { + "epoch": 3.07, + "learning_rate": 2.721777052694844e-06, + "loss": 0.0032, + "step": 12586 + }, + { + "epoch": 3.07, + "learning_rate": 2.7190716007606634e-06, + "loss": 0.0045, + "step": 12588 + }, + { + "epoch": 3.07, + "learning_rate": 2.7163672825227104e-06, + "loss": 0.0021, + "step": 12590 + }, + { + "epoch": 3.07, + "learning_rate": 2.7136640984020636e-06, + "loss": 0.0073, + "step": 12592 + }, + { + "epoch": 3.07, + "learning_rate": 2.710962048819633e-06, + "loss": 0.0043, + "step": 12594 + }, + { + "epoch": 3.07, + "learning_rate": 2.7082611341961506e-06, + "loss": 0.0044, + "step": 12596 + }, + { + "epoch": 3.07, + "learning_rate": 2.7055613549521643e-06, + "loss": 0.002, + "step": 12598 + }, + { + "epoch": 3.07, + "learning_rate": 2.702862711508053e-06, + "loss": 0.0052, + "step": 12600 + }, + { + "epoch": 3.07, + "learning_rate": 2.70016520428402e-06, + "loss": 0.0032, + "step": 12602 + }, + { + "epoch": 3.07, + "learning_rate": 2.6974688337000832e-06, + "loss": 0.0029, + "step": 12604 + }, + { + "epoch": 3.07, + "learning_rate": 2.694773600176085e-06, + "loss": 0.0054, + "step": 12606 + }, + { + "epoch": 3.07, + "learning_rate": 2.6920795041316994e-06, + "loss": 0.0024, + "step": 12608 + }, + { + "epoch": 3.07, + "learning_rate": 2.6893865459864154e-06, + "loss": 0.001, + "step": 12610 + }, + { + "epoch": 3.07, + "learning_rate": 2.686694726159551e-06, + "loss": 0.0049, + "step": 12612 + }, + { + "epoch": 3.07, + "learning_rate": 2.6840040450702366e-06, + "loss": 0.0027, + "step": 12614 + }, + { + "epoch": 3.07, + "learning_rate": 2.6813145031374356e-06, + "loss": 0.0036, + "step": 12616 + }, + { + "epoch": 3.07, + "learning_rate": 2.678626100779933e-06, + "loss": 0.0044, + "step": 12618 + }, + { + "epoch": 3.08, + "learning_rate": 2.67593883841633e-06, + "loss": 0.0016, + "step": 12620 + }, + { + "epoch": 3.08, + "learning_rate": 2.6732527164650523e-06, + "loss": 0.0041, + "step": 12622 + }, + { + "epoch": 3.08, + "learning_rate": 2.6705677353443514e-06, + "loss": 0.0021, + "step": 12624 + }, + { + "epoch": 3.08, + "learning_rate": 2.6678838954722997e-06, + "loss": 0.0043, + "step": 12626 + }, + { + "epoch": 3.08, + "learning_rate": 2.6652011972667945e-06, + "loss": 0.0044, + "step": 12628 + }, + { + "epoch": 3.08, + "learning_rate": 2.6625196411455456e-06, + "loss": 0.0021, + "step": 12630 + }, + { + "epoch": 3.08, + "learning_rate": 2.6598392275261e-06, + "loss": 0.0029, + "step": 12632 + }, + { + "epoch": 3.08, + "learning_rate": 2.657159956825811e-06, + "loss": 0.0051, + "step": 12634 + }, + { + "epoch": 3.08, + "learning_rate": 2.654481829461868e-06, + "loss": 0.0023, + "step": 12636 + }, + { + "epoch": 3.08, + "learning_rate": 2.6518048458512692e-06, + "loss": 0.0072, + "step": 12638 + }, + { + "epoch": 3.08, + "learning_rate": 2.6491290064108454e-06, + "loss": 0.0047, + "step": 12640 + }, + { + "epoch": 3.08, + "learning_rate": 2.646454311557245e-06, + "loss": 0.0029, + "step": 12642 + }, + { + "epoch": 3.08, + "learning_rate": 2.643780761706941e-06, + "loss": 0.0058, + "step": 12644 + }, + { + "epoch": 3.08, + "learning_rate": 2.641108357276223e-06, + "loss": 0.0009, + "step": 12646 + }, + { + "epoch": 3.08, + "learning_rate": 2.6384370986812027e-06, + "loss": 0.0008, + "step": 12648 + }, + { + "epoch": 3.08, + "learning_rate": 2.6357669863378155e-06, + "loss": 0.0003, + "step": 12650 + }, + { + "epoch": 3.08, + "learning_rate": 2.633098020661824e-06, + "loss": 0.005, + "step": 12652 + }, + { + "epoch": 3.08, + "learning_rate": 2.6304302020687997e-06, + "loss": 0.0035, + "step": 12654 + }, + { + "epoch": 3.08, + "learning_rate": 2.627763530974147e-06, + "loss": 0.0042, + "step": 12656 + }, + { + "epoch": 3.08, + "learning_rate": 2.6250980077930864e-06, + "loss": 0.0033, + "step": 12658 + }, + { + "epoch": 3.08, + "learning_rate": 2.6224336329406607e-06, + "loss": 0.0013, + "step": 12660 + }, + { + "epoch": 3.09, + "learning_rate": 2.6197704068317274e-06, + "loss": 0.0012, + "step": 12662 + }, + { + "epoch": 3.09, + "learning_rate": 2.6171083298809773e-06, + "loss": 0.0041, + "step": 12664 + }, + { + "epoch": 3.09, + "learning_rate": 2.6144474025029145e-06, + "loss": 0.0078, + "step": 12666 + }, + { + "epoch": 3.09, + "learning_rate": 2.611787625111868e-06, + "loss": 0.0024, + "step": 12668 + }, + { + "epoch": 3.09, + "learning_rate": 2.6091289981219813e-06, + "loss": 0.0074, + "step": 12670 + }, + { + "epoch": 3.09, + "learning_rate": 2.606471521947225e-06, + "loss": 0.0033, + "step": 12672 + }, + { + "epoch": 3.09, + "learning_rate": 2.6038151970013903e-06, + "loss": 0.0067, + "step": 12674 + }, + { + "epoch": 3.09, + "learning_rate": 2.601160023698086e-06, + "loss": 0.0036, + "step": 12676 + }, + { + "epoch": 3.09, + "learning_rate": 2.5985060024507403e-06, + "loss": 0.0034, + "step": 12678 + }, + { + "epoch": 3.09, + "learning_rate": 2.5958531336726057e-06, + "loss": 0.0026, + "step": 12680 + }, + { + "epoch": 3.09, + "learning_rate": 2.593201417776755e-06, + "loss": 0.0011, + "step": 12682 + }, + { + "epoch": 3.09, + "learning_rate": 2.590550855176085e-06, + "loss": 0.0017, + "step": 12684 + }, + { + "epoch": 3.09, + "learning_rate": 2.5879014462832997e-06, + "loss": 0.0023, + "step": 12686 + }, + { + "epoch": 3.09, + "learning_rate": 2.5852531915109415e-06, + "loss": 0.0032, + "step": 12688 + }, + { + "epoch": 3.09, + "learning_rate": 2.5826060912713544e-06, + "loss": 0.0011, + "step": 12690 + }, + { + "epoch": 3.09, + "learning_rate": 2.579960145976722e-06, + "loss": 0.0019, + "step": 12692 + }, + { + "epoch": 3.09, + "learning_rate": 2.5773153560390297e-06, + "loss": 0.0027, + "step": 12694 + }, + { + "epoch": 3.09, + "learning_rate": 2.5746717218700946e-06, + "loss": 0.0036, + "step": 12696 + }, + { + "epoch": 3.09, + "learning_rate": 2.572029243881552e-06, + "loss": 0.0065, + "step": 12698 + }, + { + "epoch": 3.09, + "learning_rate": 2.5693879224848585e-06, + "loss": 0.0035, + "step": 12700 + }, + { + "epoch": 3.1, + "learning_rate": 2.5667477580912836e-06, + "loss": 0.0034, + "step": 12702 + }, + { + "epoch": 3.1, + "learning_rate": 2.56410875111192e-06, + "loss": 0.0022, + "step": 12704 + }, + { + "epoch": 3.1, + "learning_rate": 2.561470901957683e-06, + "loss": 0.0019, + "step": 12706 + }, + { + "epoch": 3.1, + "learning_rate": 2.5588342110393105e-06, + "loss": 0.0012, + "step": 12708 + }, + { + "epoch": 3.1, + "learning_rate": 2.5561986787673477e-06, + "loss": 0.0047, + "step": 12710 + }, + { + "epoch": 3.1, + "learning_rate": 2.553564305552171e-06, + "loss": 0.0055, + "step": 12712 + }, + { + "epoch": 3.1, + "learning_rate": 2.5509310918039755e-06, + "loss": 0.0014, + "step": 12714 + }, + { + "epoch": 3.1, + "learning_rate": 2.5482990379327654e-06, + "loss": 0.0059, + "step": 12716 + }, + { + "epoch": 3.1, + "learning_rate": 2.545668144348379e-06, + "loss": 0.0036, + "step": 12718 + }, + { + "epoch": 3.1, + "learning_rate": 2.54303841146046e-06, + "loss": 0.0026, + "step": 12720 + }, + { + "epoch": 3.1, + "learning_rate": 2.5404098396784815e-06, + "loss": 0.0042, + "step": 12722 + }, + { + "epoch": 3.1, + "learning_rate": 2.537782429411736e-06, + "loss": 0.0037, + "step": 12724 + }, + { + "epoch": 3.1, + "learning_rate": 2.5351561810693225e-06, + "loss": 0.0022, + "step": 12726 + }, + { + "epoch": 3.1, + "learning_rate": 2.5325310950601734e-06, + "loss": 0.0012, + "step": 12728 + }, + { + "epoch": 3.1, + "learning_rate": 2.5299071717930367e-06, + "loss": 0.0019, + "step": 12730 + }, + { + "epoch": 3.1, + "learning_rate": 2.527284411676475e-06, + "loss": 0.0025, + "step": 12732 + }, + { + "epoch": 3.1, + "learning_rate": 2.524662815118868e-06, + "loss": 0.0027, + "step": 12734 + }, + { + "epoch": 3.1, + "learning_rate": 2.5220423825284223e-06, + "loss": 0.0028, + "step": 12736 + }, + { + "epoch": 3.1, + "learning_rate": 2.519423114313159e-06, + "loss": 0.004, + "step": 12738 + }, + { + "epoch": 3.1, + "learning_rate": 2.5168050108809206e-06, + "loss": 0.0011, + "step": 12740 + }, + { + "epoch": 3.1, + "learning_rate": 2.5141880726393607e-06, + "loss": 0.0071, + "step": 12742 + }, + { + "epoch": 3.11, + "learning_rate": 2.5115722999959636e-06, + "loss": 0.0039, + "step": 12744 + }, + { + "epoch": 3.11, + "learning_rate": 2.508957693358016e-06, + "loss": 0.003, + "step": 12746 + }, + { + "epoch": 3.11, + "learning_rate": 2.506344253132641e-06, + "loss": 0.0011, + "step": 12748 + }, + { + "epoch": 3.11, + "learning_rate": 2.5037319797267635e-06, + "loss": 0.0075, + "step": 12750 + }, + { + "epoch": 3.11, + "learning_rate": 2.5011208735471383e-06, + "loss": 0.0071, + "step": 12752 + }, + { + "epoch": 3.11, + "learning_rate": 2.4985109350003344e-06, + "loss": 0.0055, + "step": 12754 + }, + { + "epoch": 3.11, + "learning_rate": 2.4959021644927427e-06, + "loss": 0.0034, + "step": 12756 + }, + { + "epoch": 3.11, + "learning_rate": 2.493294562430565e-06, + "loss": 0.004, + "step": 12758 + }, + { + "epoch": 3.11, + "learning_rate": 2.4906881292198213e-06, + "loss": 0.0034, + "step": 12760 + }, + { + "epoch": 3.11, + "learning_rate": 2.4880828652663613e-06, + "loss": 0.0025, + "step": 12762 + }, + { + "epoch": 3.11, + "learning_rate": 2.4854787709758366e-06, + "loss": 0.0035, + "step": 12764 + }, + { + "epoch": 3.11, + "learning_rate": 2.4828758467537274e-06, + "loss": 0.0022, + "step": 12766 + }, + { + "epoch": 3.11, + "learning_rate": 2.4802740930053305e-06, + "loss": 0.0021, + "step": 12768 + }, + { + "epoch": 3.11, + "learning_rate": 2.4776735101357606e-06, + "loss": 0.0029, + "step": 12770 + }, + { + "epoch": 3.11, + "learning_rate": 2.4750740985499434e-06, + "loss": 0.0038, + "step": 12772 + }, + { + "epoch": 3.11, + "learning_rate": 2.472475858652632e-06, + "loss": 0.0028, + "step": 12774 + }, + { + "epoch": 3.11, + "learning_rate": 2.4698787908483867e-06, + "loss": 0.0033, + "step": 12776 + }, + { + "epoch": 3.11, + "learning_rate": 2.467282895541597e-06, + "loss": 0.0068, + "step": 12778 + }, + { + "epoch": 3.11, + "learning_rate": 2.464688173136458e-06, + "loss": 0.0032, + "step": 12780 + }, + { + "epoch": 3.11, + "learning_rate": 2.4620946240369904e-06, + "loss": 0.0028, + "step": 12782 + }, + { + "epoch": 3.12, + "learning_rate": 2.459502248647029e-06, + "loss": 0.0026, + "step": 12784 + }, + { + "epoch": 3.12, + "learning_rate": 2.4569110473702306e-06, + "loss": 0.0051, + "step": 12786 + }, + { + "epoch": 3.12, + "learning_rate": 2.454321020610061e-06, + "loss": 0.0024, + "step": 12788 + }, + { + "epoch": 3.12, + "learning_rate": 2.4517321687698047e-06, + "loss": 0.0022, + "step": 12790 + }, + { + "epoch": 3.12, + "learning_rate": 2.4491444922525687e-06, + "loss": 0.0026, + "step": 12792 + }, + { + "epoch": 3.12, + "learning_rate": 2.446557991461277e-06, + "loss": 0.0054, + "step": 12794 + }, + { + "epoch": 3.12, + "learning_rate": 2.443972666798662e-06, + "loss": 0.001, + "step": 12796 + }, + { + "epoch": 3.12, + "learning_rate": 2.4413885186672804e-06, + "loss": 0.0067, + "step": 12798 + }, + { + "epoch": 3.12, + "learning_rate": 2.4388055474695084e-06, + "loss": 0.0021, + "step": 12800 + }, + { + "epoch": 3.12, + "learning_rate": 2.4362237536075295e-06, + "loss": 0.0064, + "step": 12802 + }, + { + "epoch": 3.12, + "learning_rate": 2.433643137483347e-06, + "loss": 0.0017, + "step": 12804 + }, + { + "epoch": 3.12, + "learning_rate": 2.4310636994987856e-06, + "loss": 0.0065, + "step": 12806 + }, + { + "epoch": 3.12, + "learning_rate": 2.4284854400554834e-06, + "loss": 0.0023, + "step": 12808 + }, + { + "epoch": 3.12, + "learning_rate": 2.425908359554897e-06, + "loss": 0.0014, + "step": 12810 + }, + { + "epoch": 3.12, + "learning_rate": 2.423332458398293e-06, + "loss": 0.0024, + "step": 12812 + }, + { + "epoch": 3.12, + "learning_rate": 2.420757736986762e-06, + "loss": 0.0014, + "step": 12814 + }, + { + "epoch": 3.12, + "learning_rate": 2.4181841957212095e-06, + "loss": 0.0004, + "step": 12816 + }, + { + "epoch": 3.12, + "learning_rate": 2.4156118350023527e-06, + "loss": 0.0057, + "step": 12818 + }, + { + "epoch": 3.12, + "learning_rate": 2.413040655230726e-06, + "loss": 0.0036, + "step": 12820 + }, + { + "epoch": 3.12, + "learning_rate": 2.410470656806685e-06, + "loss": 0.0045, + "step": 12822 + }, + { + "epoch": 3.12, + "learning_rate": 2.4079018401303967e-06, + "loss": 0.0046, + "step": 12824 + }, + { + "epoch": 3.13, + "learning_rate": 2.405334205601848e-06, + "loss": 0.0018, + "step": 12826 + }, + { + "epoch": 3.13, + "learning_rate": 2.402767753620835e-06, + "loss": 0.0008, + "step": 12828 + }, + { + "epoch": 3.13, + "learning_rate": 2.4002024845869786e-06, + "loss": 0.0019, + "step": 12830 + }, + { + "epoch": 3.13, + "learning_rate": 2.3976383988997064e-06, + "loss": 0.003, + "step": 12832 + }, + { + "epoch": 3.13, + "learning_rate": 2.39507549695827e-06, + "loss": 0.0013, + "step": 12834 + }, + { + "epoch": 3.13, + "learning_rate": 2.392513779161729e-06, + "loss": 0.004, + "step": 12836 + }, + { + "epoch": 3.13, + "learning_rate": 2.3899532459089634e-06, + "loss": 0.003, + "step": 12838 + }, + { + "epoch": 3.13, + "learning_rate": 2.3873938975986698e-06, + "loss": 0.0016, + "step": 12840 + }, + { + "epoch": 3.13, + "learning_rate": 2.3848357346293593e-06, + "loss": 0.0038, + "step": 12842 + }, + { + "epoch": 3.13, + "learning_rate": 2.3822787573993557e-06, + "loss": 0.0034, + "step": 12844 + }, + { + "epoch": 3.13, + "learning_rate": 2.3797229663067965e-06, + "loss": 0.0011, + "step": 12846 + }, + { + "epoch": 3.13, + "learning_rate": 2.377168361749642e-06, + "loss": 0.0044, + "step": 12848 + }, + { + "epoch": 3.13, + "learning_rate": 2.374614944125665e-06, + "loss": 0.003, + "step": 12850 + }, + { + "epoch": 3.13, + "learning_rate": 2.372062713832447e-06, + "loss": 0.0029, + "step": 12852 + }, + { + "epoch": 3.13, + "learning_rate": 2.3695116712673927e-06, + "loss": 0.0018, + "step": 12854 + }, + { + "epoch": 3.13, + "learning_rate": 2.366961816827721e-06, + "loss": 0.0037, + "step": 12856 + }, + { + "epoch": 3.13, + "learning_rate": 2.364413150910463e-06, + "loss": 0.0024, + "step": 12858 + }, + { + "epoch": 3.13, + "learning_rate": 2.36186567391246e-06, + "loss": 0.0019, + "step": 12860 + }, + { + "epoch": 3.13, + "learning_rate": 2.3593193862303775e-06, + "loss": 0.0063, + "step": 12862 + }, + { + "epoch": 3.13, + "learning_rate": 2.356774288260694e-06, + "loss": 0.002, + "step": 12864 + }, + { + "epoch": 3.13, + "learning_rate": 2.3542303803997004e-06, + "loss": 0.0039, + "step": 12866 + }, + { + "epoch": 3.14, + "learning_rate": 2.3516876630434982e-06, + "loss": 0.0034, + "step": 12868 + }, + { + "epoch": 3.14, + "learning_rate": 2.349146136588011e-06, + "loss": 0.0034, + "step": 12870 + }, + { + "epoch": 3.14, + "learning_rate": 2.346605801428976e-06, + "loss": 0.0062, + "step": 12872 + }, + { + "epoch": 3.14, + "learning_rate": 2.344066657961942e-06, + "loss": 0.0018, + "step": 12874 + }, + { + "epoch": 3.14, + "learning_rate": 2.341528706582268e-06, + "loss": 0.0019, + "step": 12876 + }, + { + "epoch": 3.14, + "learning_rate": 2.3389919476851354e-06, + "loss": 0.0044, + "step": 12878 + }, + { + "epoch": 3.14, + "learning_rate": 2.3364563816655384e-06, + "loss": 0.004, + "step": 12880 + }, + { + "epoch": 3.14, + "learning_rate": 2.333922008918286e-06, + "loss": 0.0028, + "step": 12882 + }, + { + "epoch": 3.14, + "learning_rate": 2.3313888298379937e-06, + "loss": 0.003, + "step": 12884 + }, + { + "epoch": 3.14, + "learning_rate": 2.328856844819103e-06, + "loss": 0.0028, + "step": 12886 + }, + { + "epoch": 3.14, + "learning_rate": 2.3263260542558564e-06, + "loss": 0.0022, + "step": 12888 + }, + { + "epoch": 3.14, + "learning_rate": 2.3237964585423244e-06, + "loss": 0.0051, + "step": 12890 + }, + { + "epoch": 3.14, + "learning_rate": 2.321268058072379e-06, + "loss": 0.0009, + "step": 12892 + }, + { + "epoch": 3.14, + "learning_rate": 2.3187408532397126e-06, + "loss": 0.0035, + "step": 12894 + }, + { + "epoch": 3.14, + "learning_rate": 2.3162148444378318e-06, + "loss": 0.0022, + "step": 12896 + }, + { + "epoch": 3.14, + "learning_rate": 2.3136900320600574e-06, + "loss": 0.0025, + "step": 12898 + }, + { + "epoch": 3.14, + "learning_rate": 2.3111664164995196e-06, + "loss": 0.0041, + "step": 12900 + }, + { + "epoch": 3.14, + "learning_rate": 2.3086439981491616e-06, + "loss": 0.0019, + "step": 12902 + }, + { + "epoch": 3.14, + "learning_rate": 2.306122777401746e-06, + "loss": 0.0026, + "step": 12904 + }, + { + "epoch": 3.14, + "learning_rate": 2.3036027546498495e-06, + "loss": 0.0035, + "step": 12906 + }, + { + "epoch": 3.15, + "learning_rate": 2.3010839302858535e-06, + "loss": 0.0062, + "step": 12908 + }, + { + "epoch": 3.15, + "learning_rate": 2.29856630470196e-06, + "loss": 0.0046, + "step": 12910 + }, + { + "epoch": 3.15, + "learning_rate": 2.296049878290185e-06, + "loss": 0.0034, + "step": 12912 + }, + { + "epoch": 3.15, + "learning_rate": 2.2935346514423517e-06, + "loss": 0.0027, + "step": 12914 + }, + { + "epoch": 3.15, + "learning_rate": 2.2910206245501043e-06, + "loss": 0.0014, + "step": 12916 + }, + { + "epoch": 3.15, + "learning_rate": 2.2885077980048907e-06, + "loss": 0.0145, + "step": 12918 + }, + { + "epoch": 3.15, + "learning_rate": 2.2859961721979807e-06, + "loss": 0.0044, + "step": 12920 + }, + { + "epoch": 3.15, + "learning_rate": 2.2834857475204555e-06, + "loss": 0.0092, + "step": 12922 + }, + { + "epoch": 3.15, + "learning_rate": 2.2809765243632008e-06, + "loss": 0.0017, + "step": 12924 + }, + { + "epoch": 3.15, + "learning_rate": 2.2784685031169275e-06, + "loss": 0.0012, + "step": 12926 + }, + { + "epoch": 3.15, + "learning_rate": 2.275961684172154e-06, + "loss": 0.0012, + "step": 12928 + }, + { + "epoch": 3.15, + "learning_rate": 2.27345606791921e-06, + "loss": 0.0034, + "step": 12930 + }, + { + "epoch": 3.15, + "learning_rate": 2.2709516547482347e-06, + "loss": 0.003, + "step": 12932 + }, + { + "epoch": 3.15, + "learning_rate": 2.2684484450491894e-06, + "loss": 0.0038, + "step": 12934 + }, + { + "epoch": 3.15, + "learning_rate": 2.26594643921184e-06, + "loss": 0.004, + "step": 12936 + }, + { + "epoch": 3.15, + "learning_rate": 2.263445637625774e-06, + "loss": 0.0022, + "step": 12938 + }, + { + "epoch": 3.15, + "learning_rate": 2.2609460406803775e-06, + "loss": 0.0088, + "step": 12940 + }, + { + "epoch": 3.15, + "learning_rate": 2.258447648764863e-06, + "loss": 0.0027, + "step": 12942 + }, + { + "epoch": 3.15, + "learning_rate": 2.2559504622682436e-06, + "loss": 0.0018, + "step": 12944 + }, + { + "epoch": 3.15, + "learning_rate": 2.253454481579357e-06, + "loss": 0.0015, + "step": 12946 + }, + { + "epoch": 3.15, + "learning_rate": 2.2509597070868393e-06, + "loss": 0.0015, + "step": 12948 + }, + { + "epoch": 3.16, + "learning_rate": 2.2484661391791494e-06, + "loss": 0.0017, + "step": 12950 + }, + { + "epoch": 3.16, + "learning_rate": 2.2459737782445546e-06, + "loss": 0.004, + "step": 12952 + }, + { + "epoch": 3.16, + "learning_rate": 2.2434826246711384e-06, + "loss": 0.0011, + "step": 12954 + }, + { + "epoch": 3.16, + "learning_rate": 2.2409926788467883e-06, + "loss": 0.0048, + "step": 12956 + }, + { + "epoch": 3.16, + "learning_rate": 2.238503941159206e-06, + "loss": 0.0041, + "step": 12958 + }, + { + "epoch": 3.16, + "learning_rate": 2.2360164119959093e-06, + "loss": 0.0009, + "step": 12960 + }, + { + "epoch": 3.16, + "learning_rate": 2.2335300917442293e-06, + "loss": 0.0022, + "step": 12962 + }, + { + "epoch": 3.16, + "learning_rate": 2.231044980791298e-06, + "loss": 0.0043, + "step": 12964 + }, + { + "epoch": 3.16, + "learning_rate": 2.2285610795240695e-06, + "loss": 0.0025, + "step": 12966 + }, + { + "epoch": 3.16, + "learning_rate": 2.226078388329307e-06, + "loss": 0.0052, + "step": 12968 + }, + { + "epoch": 3.16, + "learning_rate": 2.223596907593586e-06, + "loss": 0.0025, + "step": 12970 + }, + { + "epoch": 3.16, + "learning_rate": 2.221116637703291e-06, + "loss": 0.0071, + "step": 12972 + }, + { + "epoch": 3.16, + "learning_rate": 2.2186375790446148e-06, + "loss": 0.0079, + "step": 12974 + }, + { + "epoch": 3.16, + "learning_rate": 2.216159732003568e-06, + "loss": 0.0048, + "step": 12976 + }, + { + "epoch": 3.16, + "learning_rate": 2.213683096965975e-06, + "loss": 0.0024, + "step": 12978 + }, + { + "epoch": 3.16, + "learning_rate": 2.2112076743174593e-06, + "loss": 0.0014, + "step": 12980 + }, + { + "epoch": 3.16, + "learning_rate": 2.208733464443468e-06, + "loss": 0.004, + "step": 12982 + }, + { + "epoch": 3.16, + "learning_rate": 2.2062604677292554e-06, + "loss": 0.0035, + "step": 12984 + }, + { + "epoch": 3.16, + "learning_rate": 2.2037886845598845e-06, + "loss": 0.0028, + "step": 12986 + }, + { + "epoch": 3.16, + "learning_rate": 2.201318115320227e-06, + "loss": 0.0032, + "step": 12988 + }, + { + "epoch": 3.17, + "learning_rate": 2.1988487603949726e-06, + "loss": 0.0038, + "step": 12990 + }, + { + "epoch": 3.17, + "learning_rate": 2.19638062016862e-06, + "loss": 0.0045, + "step": 12992 + }, + { + "epoch": 3.17, + "learning_rate": 2.193913695025478e-06, + "loss": 0.0031, + "step": 12994 + }, + { + "epoch": 3.17, + "learning_rate": 2.1914479853496618e-06, + "loss": 0.0019, + "step": 12996 + }, + { + "epoch": 3.17, + "learning_rate": 2.1889834915251063e-06, + "loss": 0.0085, + "step": 12998 + }, + { + "epoch": 3.17, + "learning_rate": 2.1865202139355467e-06, + "loss": 0.0017, + "step": 13000 + }, + { + "epoch": 3.17, + "learning_rate": 2.18405815296454e-06, + "loss": 0.0049, + "step": 13002 + }, + { + "epoch": 3.17, + "learning_rate": 2.181597308995441e-06, + "loss": 0.0026, + "step": 13004 + }, + { + "epoch": 3.17, + "learning_rate": 2.1791376824114265e-06, + "loss": 0.0048, + "step": 13006 + }, + { + "epoch": 3.17, + "learning_rate": 2.176679273595477e-06, + "loss": 0.0019, + "step": 13008 + }, + { + "epoch": 3.17, + "learning_rate": 2.1742220829303904e-06, + "loss": 0.0043, + "step": 13010 + }, + { + "epoch": 3.17, + "learning_rate": 2.1717661107987663e-06, + "loss": 0.001, + "step": 13012 + }, + { + "epoch": 3.17, + "learning_rate": 2.169311357583016e-06, + "loss": 0.0035, + "step": 13014 + }, + { + "epoch": 3.17, + "learning_rate": 2.1668578236653647e-06, + "loss": 0.0027, + "step": 13016 + }, + { + "epoch": 3.17, + "learning_rate": 2.1644055094278515e-06, + "loss": 0.0045, + "step": 13018 + }, + { + "epoch": 3.17, + "learning_rate": 2.1619544152523143e-06, + "loss": 0.0032, + "step": 13020 + }, + { + "epoch": 3.17, + "learning_rate": 2.1595045415204087e-06, + "loss": 0.0011, + "step": 13022 + }, + { + "epoch": 3.17, + "learning_rate": 2.1570558886135997e-06, + "loss": 0.0027, + "step": 13024 + }, + { + "epoch": 3.17, + "learning_rate": 2.154608456913163e-06, + "loss": 0.0017, + "step": 13026 + }, + { + "epoch": 3.17, + "learning_rate": 2.1521622468001814e-06, + "loss": 0.0009, + "step": 13028 + }, + { + "epoch": 3.17, + "learning_rate": 2.1497172586555447e-06, + "loss": 0.0038, + "step": 13030 + }, + { + "epoch": 3.18, + "learning_rate": 2.1472734928599593e-06, + "loss": 0.0014, + "step": 13032 + }, + { + "epoch": 3.18, + "learning_rate": 2.14483094979394e-06, + "loss": 0.0025, + "step": 13034 + }, + { + "epoch": 3.18, + "learning_rate": 2.1423896298378066e-06, + "loss": 0.0036, + "step": 13036 + }, + { + "epoch": 3.18, + "learning_rate": 2.1399495333716912e-06, + "loss": 0.0082, + "step": 13038 + }, + { + "epoch": 3.18, + "learning_rate": 2.137510660775539e-06, + "loss": 0.0008, + "step": 13040 + }, + { + "epoch": 3.18, + "learning_rate": 2.1350730124290985e-06, + "loss": 0.0031, + "step": 13042 + }, + { + "epoch": 3.18, + "learning_rate": 2.132636588711928e-06, + "loss": 0.0055, + "step": 13044 + }, + { + "epoch": 3.18, + "learning_rate": 2.1302013900034e-06, + "loss": 0.0034, + "step": 13046 + }, + { + "epoch": 3.18, + "learning_rate": 2.1277674166826935e-06, + "loss": 0.0052, + "step": 13048 + }, + { + "epoch": 3.18, + "learning_rate": 2.125334669128798e-06, + "loss": 0.0048, + "step": 13050 + }, + { + "epoch": 3.18, + "learning_rate": 2.1229031477205075e-06, + "loss": 0.0017, + "step": 13052 + }, + { + "epoch": 3.18, + "learning_rate": 2.120472852836433e-06, + "loss": 0.0025, + "step": 13054 + }, + { + "epoch": 3.18, + "learning_rate": 2.118043784854984e-06, + "loss": 0.0039, + "step": 13056 + }, + { + "epoch": 3.18, + "learning_rate": 2.115615944154391e-06, + "loss": 0.0025, + "step": 13058 + }, + { + "epoch": 3.18, + "learning_rate": 2.1131893311126815e-06, + "loss": 0.0016, + "step": 13060 + }, + { + "epoch": 3.18, + "learning_rate": 2.1107639461077024e-06, + "loss": 0.0043, + "step": 13062 + }, + { + "epoch": 3.18, + "learning_rate": 2.108339789517102e-06, + "loss": 0.0066, + "step": 13064 + }, + { + "epoch": 3.18, + "learning_rate": 2.1059168617183433e-06, + "loss": 0.0024, + "step": 13066 + }, + { + "epoch": 3.18, + "learning_rate": 2.1034951630886913e-06, + "loss": 0.0016, + "step": 13068 + }, + { + "epoch": 3.18, + "learning_rate": 2.101074694005226e-06, + "loss": 0.0032, + "step": 13070 + }, + { + "epoch": 3.19, + "learning_rate": 2.098655454844829e-06, + "loss": 0.0022, + "step": 13072 + }, + { + "epoch": 3.19, + "learning_rate": 2.0962374459841982e-06, + "loss": 0.001, + "step": 13074 + }, + { + "epoch": 3.19, + "learning_rate": 2.0938206677998328e-06, + "loss": 0.0033, + "step": 13076 + }, + { + "epoch": 3.19, + "learning_rate": 2.091405120668045e-06, + "loss": 0.001, + "step": 13078 + }, + { + "epoch": 3.19, + "learning_rate": 2.0889908049649543e-06, + "loss": 0.0053, + "step": 13080 + }, + { + "epoch": 3.19, + "learning_rate": 2.0865777210664904e-06, + "loss": 0.0048, + "step": 13082 + }, + { + "epoch": 3.19, + "learning_rate": 2.084165869348387e-06, + "loss": 0.0019, + "step": 13084 + }, + { + "epoch": 3.19, + "learning_rate": 2.0817552501861836e-06, + "loss": 0.003, + "step": 13086 + }, + { + "epoch": 3.19, + "learning_rate": 2.0793458639552365e-06, + "loss": 0.004, + "step": 13088 + }, + { + "epoch": 3.19, + "learning_rate": 2.0769377110307076e-06, + "loss": 0.0057, + "step": 13090 + }, + { + "epoch": 3.19, + "learning_rate": 2.074530791787559e-06, + "loss": 0.0038, + "step": 13092 + }, + { + "epoch": 3.19, + "learning_rate": 2.07212510660057e-06, + "loss": 0.0034, + "step": 13094 + }, + { + "epoch": 3.19, + "learning_rate": 2.069720655844325e-06, + "loss": 0.0028, + "step": 13096 + }, + { + "epoch": 3.19, + "learning_rate": 2.067317439893215e-06, + "loss": 0.0033, + "step": 13098 + }, + { + "epoch": 3.19, + "learning_rate": 2.064915459121435e-06, + "loss": 0.0045, + "step": 13100 + }, + { + "epoch": 3.19, + "learning_rate": 2.0625147139029956e-06, + "loss": 0.0027, + "step": 13102 + }, + { + "epoch": 3.19, + "learning_rate": 2.06011520461171e-06, + "loss": 0.0017, + "step": 13104 + }, + { + "epoch": 3.19, + "learning_rate": 2.0577169316212033e-06, + "loss": 0.0024, + "step": 13106 + }, + { + "epoch": 3.19, + "learning_rate": 2.055319895304899e-06, + "loss": 0.0007, + "step": 13108 + }, + { + "epoch": 3.19, + "learning_rate": 2.0529240960360396e-06, + "loss": 0.0081, + "step": 13110 + }, + { + "epoch": 3.19, + "learning_rate": 2.050529534187665e-06, + "loss": 0.0029, + "step": 13112 + }, + { + "epoch": 3.2, + "learning_rate": 2.048136210132632e-06, + "loss": 0.0007, + "step": 13114 + }, + { + "epoch": 3.2, + "learning_rate": 2.045744124243593e-06, + "loss": 0.0005, + "step": 13116 + }, + { + "epoch": 3.2, + "learning_rate": 2.0433532768930176e-06, + "loss": 0.0028, + "step": 13118 + }, + { + "epoch": 3.2, + "learning_rate": 2.0409636684531774e-06, + "loss": 0.0018, + "step": 13120 + }, + { + "epoch": 3.2, + "learning_rate": 2.0385752992961584e-06, + "loss": 0.0033, + "step": 13122 + }, + { + "epoch": 3.2, + "learning_rate": 2.03618816979384e-06, + "loss": 0.0014, + "step": 13124 + }, + { + "epoch": 3.2, + "learning_rate": 2.0338022803179225e-06, + "loss": 0.0038, + "step": 13126 + }, + { + "epoch": 3.2, + "learning_rate": 2.0314176312399024e-06, + "loss": 0.0038, + "step": 13128 + }, + { + "epoch": 3.2, + "learning_rate": 2.0290342229310923e-06, + "loss": 0.0029, + "step": 13130 + }, + { + "epoch": 3.2, + "learning_rate": 2.0266520557626022e-06, + "loss": 0.0027, + "step": 13132 + }, + { + "epoch": 3.2, + "learning_rate": 2.0242711301053573e-06, + "loss": 0.0038, + "step": 13134 + }, + { + "epoch": 3.2, + "learning_rate": 2.0218914463300843e-06, + "loss": 0.0047, + "step": 13136 + }, + { + "epoch": 3.2, + "learning_rate": 2.019513004807322e-06, + "loss": 0.003, + "step": 13138 + }, + { + "epoch": 3.2, + "learning_rate": 2.0171358059074085e-06, + "loss": 0.0044, + "step": 13140 + }, + { + "epoch": 3.2, + "learning_rate": 2.0147598500004904e-06, + "loss": 0.0014, + "step": 13142 + }, + { + "epoch": 3.2, + "learning_rate": 2.012385137456523e-06, + "loss": 0.0049, + "step": 13144 + }, + { + "epoch": 3.2, + "learning_rate": 2.0100116686452708e-06, + "loss": 0.0033, + "step": 13146 + }, + { + "epoch": 3.2, + "learning_rate": 2.0076394439362968e-06, + "loss": 0.0015, + "step": 13148 + }, + { + "epoch": 3.2, + "learning_rate": 2.005268463698975e-06, + "loss": 0.0034, + "step": 13150 + }, + { + "epoch": 3.2, + "learning_rate": 2.0028987283024902e-06, + "loss": 0.0003, + "step": 13152 + }, + { + "epoch": 3.21, + "learning_rate": 2.0005302381158232e-06, + "loss": 0.0015, + "step": 13154 + }, + { + "epoch": 3.21, + "learning_rate": 1.9981629935077663e-06, + "loss": 0.0004, + "step": 13156 + }, + { + "epoch": 3.21, + "learning_rate": 1.995796994846917e-06, + "loss": 0.0046, + "step": 13158 + }, + { + "epoch": 3.21, + "learning_rate": 1.9934322425016816e-06, + "loss": 0.0025, + "step": 13160 + }, + { + "epoch": 3.21, + "learning_rate": 1.9910687368402715e-06, + "loss": 0.0041, + "step": 13162 + }, + { + "epoch": 3.21, + "learning_rate": 1.9887064782306984e-06, + "loss": 0.0049, + "step": 13164 + }, + { + "epoch": 3.21, + "learning_rate": 1.986345467040789e-06, + "loss": 0.001, + "step": 13166 + }, + { + "epoch": 3.21, + "learning_rate": 1.983985703638166e-06, + "loss": 0.0052, + "step": 13168 + }, + { + "epoch": 3.21, + "learning_rate": 1.981627188390266e-06, + "loss": 0.0017, + "step": 13170 + }, + { + "epoch": 3.21, + "learning_rate": 1.979269921664324e-06, + "loss": 0.0027, + "step": 13172 + }, + { + "epoch": 3.21, + "learning_rate": 1.9769139038273875e-06, + "loss": 0.0038, + "step": 13174 + }, + { + "epoch": 3.21, + "learning_rate": 1.9745591352463055e-06, + "loss": 0.0023, + "step": 13176 + }, + { + "epoch": 3.21, + "learning_rate": 1.9722056162877367e-06, + "loss": 0.0024, + "step": 13178 + }, + { + "epoch": 3.21, + "learning_rate": 1.969853347318137e-06, + "loss": 0.0015, + "step": 13180 + }, + { + "epoch": 3.21, + "learning_rate": 1.9675023287037766e-06, + "loss": 0.0046, + "step": 13182 + }, + { + "epoch": 3.21, + "learning_rate": 1.9651525608107237e-06, + "loss": 0.0028, + "step": 13184 + }, + { + "epoch": 3.21, + "learning_rate": 1.962804044004859e-06, + "loss": 0.003, + "step": 13186 + }, + { + "epoch": 3.21, + "learning_rate": 1.960456778651859e-06, + "loss": 0.0027, + "step": 13188 + }, + { + "epoch": 3.21, + "learning_rate": 1.958110765117215e-06, + "loss": 0.0009, + "step": 13190 + }, + { + "epoch": 3.21, + "learning_rate": 1.9557660037662175e-06, + "loss": 0.0037, + "step": 13192 + }, + { + "epoch": 3.21, + "learning_rate": 1.953422494963968e-06, + "loss": 0.0019, + "step": 13194 + }, + { + "epoch": 3.22, + "learning_rate": 1.951080239075366e-06, + "loss": 0.0022, + "step": 13196 + }, + { + "epoch": 3.22, + "learning_rate": 1.9487392364651135e-06, + "loss": 0.0007, + "step": 13198 + }, + { + "epoch": 3.22, + "learning_rate": 1.946399487497729e-06, + "loss": 0.0047, + "step": 13200 + }, + { + "epoch": 3.22, + "learning_rate": 1.9440609925375285e-06, + "loss": 0.0057, + "step": 13202 + }, + { + "epoch": 3.22, + "learning_rate": 1.941723751948631e-06, + "loss": 0.0044, + "step": 13204 + }, + { + "epoch": 3.22, + "learning_rate": 1.9393877660949646e-06, + "loss": 0.0037, + "step": 13206 + }, + { + "epoch": 3.22, + "learning_rate": 1.9370530353402604e-06, + "loss": 0.0029, + "step": 13208 + }, + { + "epoch": 3.22, + "learning_rate": 1.9347195600480552e-06, + "loss": 0.0023, + "step": 13210 + }, + { + "epoch": 3.22, + "learning_rate": 1.9323873405816827e-06, + "loss": 0.0037, + "step": 13212 + }, + { + "epoch": 3.22, + "learning_rate": 1.9300563773042924e-06, + "loss": 0.0056, + "step": 13214 + }, + { + "epoch": 3.22, + "learning_rate": 1.927726670578831e-06, + "loss": 0.0034, + "step": 13216 + }, + { + "epoch": 3.22, + "learning_rate": 1.9253982207680566e-06, + "loss": 0.003, + "step": 13218 + }, + { + "epoch": 3.22, + "learning_rate": 1.92307102823452e-06, + "loss": 0.0011, + "step": 13220 + }, + { + "epoch": 3.22, + "learning_rate": 1.9207450933405857e-06, + "loss": 0.0019, + "step": 13222 + }, + { + "epoch": 3.22, + "learning_rate": 1.9184204164484223e-06, + "loss": 0.0046, + "step": 13224 + }, + { + "epoch": 3.22, + "learning_rate": 1.9160969979199974e-06, + "loss": 0.0014, + "step": 13226 + }, + { + "epoch": 3.22, + "learning_rate": 1.9137748381170818e-06, + "loss": 0.0004, + "step": 13228 + }, + { + "epoch": 3.22, + "learning_rate": 1.9114539374012564e-06, + "loss": 0.0013, + "step": 13230 + }, + { + "epoch": 3.22, + "learning_rate": 1.909134296133903e-06, + "loss": 0.0043, + "step": 13232 + }, + { + "epoch": 3.22, + "learning_rate": 1.906815914676211e-06, + "loss": 0.0046, + "step": 13234 + }, + { + "epoch": 3.23, + "learning_rate": 1.9044987933891635e-06, + "loss": 0.0021, + "step": 13236 + }, + { + "epoch": 3.23, + "learning_rate": 1.9021829326335606e-06, + "loss": 0.0011, + "step": 13238 + }, + { + "epoch": 3.23, + "learning_rate": 1.8998683327699929e-06, + "loss": 0.002, + "step": 13240 + }, + { + "epoch": 3.23, + "learning_rate": 1.8975549941588679e-06, + "loss": 0.0022, + "step": 13242 + }, + { + "epoch": 3.23, + "learning_rate": 1.8952429171603849e-06, + "loss": 0.0015, + "step": 13244 + }, + { + "epoch": 3.23, + "learning_rate": 1.8929321021345526e-06, + "loss": 0.0016, + "step": 13246 + }, + { + "epoch": 3.23, + "learning_rate": 1.8906225494411844e-06, + "loss": 0.0064, + "step": 13248 + }, + { + "epoch": 3.23, + "learning_rate": 1.8883142594398974e-06, + "loss": 0.0023, + "step": 13250 + }, + { + "epoch": 3.23, + "learning_rate": 1.8860072324901079e-06, + "loss": 0.0027, + "step": 13252 + }, + { + "epoch": 3.23, + "learning_rate": 1.8837014689510337e-06, + "loss": 0.0054, + "step": 13254 + }, + { + "epoch": 3.23, + "learning_rate": 1.8813969691817047e-06, + "loss": 0.0017, + "step": 13256 + }, + { + "epoch": 3.23, + "learning_rate": 1.8790937335409487e-06, + "loss": 0.0049, + "step": 13258 + }, + { + "epoch": 3.23, + "learning_rate": 1.8767917623873943e-06, + "loss": 0.0014, + "step": 13260 + }, + { + "epoch": 3.23, + "learning_rate": 1.8744910560794783e-06, + "loss": 0.005, + "step": 13262 + }, + { + "epoch": 3.23, + "learning_rate": 1.8721916149754392e-06, + "loss": 0.0017, + "step": 13264 + }, + { + "epoch": 3.23, + "learning_rate": 1.8698934394333146e-06, + "loss": 0.0019, + "step": 13266 + }, + { + "epoch": 3.23, + "learning_rate": 1.8675965298109522e-06, + "loss": 0.004, + "step": 13268 + }, + { + "epoch": 3.23, + "learning_rate": 1.8653008864659926e-06, + "loss": 0.0031, + "step": 13270 + }, + { + "epoch": 3.23, + "learning_rate": 1.863006509755888e-06, + "loss": 0.004, + "step": 13272 + }, + { + "epoch": 3.23, + "learning_rate": 1.8607134000378945e-06, + "loss": 0.0057, + "step": 13274 + }, + { + "epoch": 3.23, + "learning_rate": 1.858421557669059e-06, + "loss": 0.0022, + "step": 13276 + }, + { + "epoch": 3.24, + "learning_rate": 1.8561309830062434e-06, + "loss": 0.0038, + "step": 13278 + }, + { + "epoch": 3.24, + "learning_rate": 1.8538416764061097e-06, + "loss": 0.0013, + "step": 13280 + }, + { + "epoch": 3.24, + "learning_rate": 1.8515536382251176e-06, + "loss": 0.0009, + "step": 13282 + }, + { + "epoch": 3.24, + "learning_rate": 1.8492668688195293e-06, + "loss": 0.0024, + "step": 13284 + }, + { + "epoch": 3.24, + "learning_rate": 1.8469813685454164e-06, + "loss": 0.0017, + "step": 13286 + }, + { + "epoch": 3.24, + "learning_rate": 1.8446971377586497e-06, + "loss": 0.0015, + "step": 13288 + }, + { + "epoch": 3.24, + "learning_rate": 1.8424141768148962e-06, + "loss": 0.0021, + "step": 13290 + }, + { + "epoch": 3.24, + "learning_rate": 1.8401324860696347e-06, + "loss": 0.0043, + "step": 13292 + }, + { + "epoch": 3.24, + "learning_rate": 1.8378520658781429e-06, + "loss": 0.0035, + "step": 13294 + }, + { + "epoch": 3.24, + "learning_rate": 1.8355729165954973e-06, + "loss": 0.0052, + "step": 13296 + }, + { + "epoch": 3.24, + "learning_rate": 1.833295038576577e-06, + "loss": 0.0015, + "step": 13298 + }, + { + "epoch": 3.24, + "learning_rate": 1.831018432176066e-06, + "loss": 0.0028, + "step": 13300 + }, + { + "epoch": 3.24, + "learning_rate": 1.8287430977484522e-06, + "loss": 0.0016, + "step": 13302 + }, + { + "epoch": 3.24, + "learning_rate": 1.826469035648022e-06, + "loss": 0.0084, + "step": 13304 + }, + { + "epoch": 3.24, + "learning_rate": 1.8241962462288609e-06, + "loss": 0.0057, + "step": 13306 + }, + { + "epoch": 3.24, + "learning_rate": 1.821924729844865e-06, + "loss": 0.0027, + "step": 13308 + }, + { + "epoch": 3.24, + "learning_rate": 1.8196544868497201e-06, + "loss": 0.0024, + "step": 13310 + }, + { + "epoch": 3.24, + "learning_rate": 1.8173855175969268e-06, + "loss": 0.0045, + "step": 13312 + }, + { + "epoch": 3.24, + "learning_rate": 1.8151178224397758e-06, + "loss": 0.0042, + "step": 13314 + }, + { + "epoch": 3.24, + "learning_rate": 1.812851401731367e-06, + "loss": 0.0047, + "step": 13316 + }, + { + "epoch": 3.25, + "learning_rate": 1.8105862558245979e-06, + "loss": 0.0032, + "step": 13318 + }, + { + "epoch": 3.25, + "learning_rate": 1.8083223850721743e-06, + "loss": 0.0015, + "step": 13320 + }, + { + "epoch": 3.25, + "learning_rate": 1.8060597898265907e-06, + "loss": 0.0009, + "step": 13322 + }, + { + "epoch": 3.25, + "learning_rate": 1.8037984704401579e-06, + "loss": 0.0023, + "step": 13324 + }, + { + "epoch": 3.25, + "learning_rate": 1.8015384272649726e-06, + "loss": 0.0023, + "step": 13326 + }, + { + "epoch": 3.25, + "learning_rate": 1.7992796606529483e-06, + "loss": 0.0031, + "step": 13328 + }, + { + "epoch": 3.25, + "learning_rate": 1.7970221709557868e-06, + "loss": 0.0016, + "step": 13330 + }, + { + "epoch": 3.25, + "learning_rate": 1.7947659585249977e-06, + "loss": 0.001, + "step": 13332 + }, + { + "epoch": 3.25, + "learning_rate": 1.7925110237118914e-06, + "loss": 0.0024, + "step": 13334 + }, + { + "epoch": 3.25, + "learning_rate": 1.7902573668675815e-06, + "loss": 0.0025, + "step": 13336 + }, + { + "epoch": 3.25, + "learning_rate": 1.7880049883429774e-06, + "loss": 0.0026, + "step": 13338 + }, + { + "epoch": 3.25, + "learning_rate": 1.785753888488787e-06, + "loss": 0.0028, + "step": 13340 + }, + { + "epoch": 3.25, + "learning_rate": 1.783504067655528e-06, + "loss": 0.0034, + "step": 13342 + }, + { + "epoch": 3.25, + "learning_rate": 1.7812555261935172e-06, + "loss": 0.003, + "step": 13344 + }, + { + "epoch": 3.25, + "learning_rate": 1.779008264452864e-06, + "loss": 0.0033, + "step": 13346 + }, + { + "epoch": 3.25, + "learning_rate": 1.7767622827834873e-06, + "loss": 0.0065, + "step": 13348 + }, + { + "epoch": 3.25, + "learning_rate": 1.7745175815351057e-06, + "loss": 0.0006, + "step": 13350 + }, + { + "epoch": 3.25, + "learning_rate": 1.7722741610572348e-06, + "loss": 0.0037, + "step": 13352 + }, + { + "epoch": 3.25, + "learning_rate": 1.7700320216991874e-06, + "loss": 0.0033, + "step": 13354 + }, + { + "epoch": 3.25, + "learning_rate": 1.7677911638100876e-06, + "loss": 0.0062, + "step": 13356 + }, + { + "epoch": 3.25, + "learning_rate": 1.765551587738852e-06, + "loss": 0.0038, + "step": 13358 + }, + { + "epoch": 3.26, + "learning_rate": 1.7633132938342023e-06, + "loss": 0.003, + "step": 13360 + }, + { + "epoch": 3.26, + "learning_rate": 1.7610762824446537e-06, + "loss": 0.0017, + "step": 13362 + }, + { + "epoch": 3.26, + "learning_rate": 1.7588405539185304e-06, + "loss": 0.0036, + "step": 13364 + }, + { + "epoch": 3.26, + "learning_rate": 1.7566061086039476e-06, + "loss": 0.0031, + "step": 13366 + }, + { + "epoch": 3.26, + "learning_rate": 1.7543729468488301e-06, + "loss": 0.0061, + "step": 13368 + }, + { + "epoch": 3.26, + "learning_rate": 1.7521410690008944e-06, + "loss": 0.0027, + "step": 13370 + }, + { + "epoch": 3.26, + "learning_rate": 1.7499104754076623e-06, + "loss": 0.0007, + "step": 13372 + }, + { + "epoch": 3.26, + "learning_rate": 1.747681166416455e-06, + "loss": 0.0032, + "step": 13374 + }, + { + "epoch": 3.26, + "learning_rate": 1.7454531423743948e-06, + "loss": 0.0012, + "step": 13376 + }, + { + "epoch": 3.26, + "learning_rate": 1.7432264036283986e-06, + "loss": 0.0028, + "step": 13378 + }, + { + "epoch": 3.26, + "learning_rate": 1.741000950525189e-06, + "loss": 0.001, + "step": 13380 + }, + { + "epoch": 3.26, + "learning_rate": 1.7387767834112833e-06, + "loss": 0.0023, + "step": 13382 + }, + { + "epoch": 3.26, + "learning_rate": 1.7365539026330058e-06, + "loss": 0.0067, + "step": 13384 + }, + { + "epoch": 3.26, + "learning_rate": 1.7343323085364717e-06, + "loss": 0.0049, + "step": 13386 + }, + { + "epoch": 3.26, + "learning_rate": 1.7321120014676006e-06, + "loss": 0.0043, + "step": 13388 + }, + { + "epoch": 3.26, + "learning_rate": 1.7298929817721122e-06, + "loss": 0.002, + "step": 13390 + }, + { + "epoch": 3.26, + "learning_rate": 1.7276752497955286e-06, + "loss": 0.0031, + "step": 13392 + }, + { + "epoch": 3.26, + "learning_rate": 1.7254588058831633e-06, + "loss": 0.0047, + "step": 13394 + }, + { + "epoch": 3.26, + "learning_rate": 1.7232436503801332e-06, + "loss": 0.0022, + "step": 13396 + }, + { + "epoch": 3.26, + "learning_rate": 1.721029783631355e-06, + "loss": 0.0018, + "step": 13398 + }, + { + "epoch": 3.27, + "learning_rate": 1.718817205981549e-06, + "loss": 0.004, + "step": 13400 + }, + { + "epoch": 3.27, + "learning_rate": 1.7166059177752249e-06, + "loss": 0.0045, + "step": 13402 + }, + { + "epoch": 3.27, + "learning_rate": 1.7143959193566995e-06, + "loss": 0.0024, + "step": 13404 + }, + { + "epoch": 3.27, + "learning_rate": 1.7121872110700888e-06, + "loss": 0.002, + "step": 13406 + }, + { + "epoch": 3.27, + "learning_rate": 1.7099797932593043e-06, + "loss": 0.0031, + "step": 13408 + }, + { + "epoch": 3.27, + "learning_rate": 1.7077736662680533e-06, + "loss": 0.002, + "step": 13410 + }, + { + "epoch": 3.27, + "learning_rate": 1.705568830439851e-06, + "loss": 0.0057, + "step": 13412 + }, + { + "epoch": 3.27, + "learning_rate": 1.703365286118006e-06, + "loss": 0.0038, + "step": 13414 + }, + { + "epoch": 3.27, + "learning_rate": 1.7011630336456296e-06, + "loss": 0.0031, + "step": 13416 + }, + { + "epoch": 3.27, + "learning_rate": 1.6989620733656253e-06, + "loss": 0.0039, + "step": 13418 + }, + { + "epoch": 3.27, + "learning_rate": 1.696762405620701e-06, + "loss": 0.0052, + "step": 13420 + }, + { + "epoch": 3.27, + "learning_rate": 1.694564030753365e-06, + "loss": 0.0029, + "step": 13422 + }, + { + "epoch": 3.27, + "learning_rate": 1.692366949105918e-06, + "loss": 0.0063, + "step": 13424 + }, + { + "epoch": 3.27, + "learning_rate": 1.6901711610204597e-06, + "loss": 0.0019, + "step": 13426 + }, + { + "epoch": 3.27, + "learning_rate": 1.6879766668388943e-06, + "loss": 0.0046, + "step": 13428 + }, + { + "epoch": 3.27, + "learning_rate": 1.6857834669029216e-06, + "loss": 0.0026, + "step": 13430 + }, + { + "epoch": 3.27, + "learning_rate": 1.683591561554041e-06, + "loss": 0.0025, + "step": 13432 + }, + { + "epoch": 3.27, + "learning_rate": 1.681400951133544e-06, + "loss": 0.0028, + "step": 13434 + }, + { + "epoch": 3.27, + "learning_rate": 1.6792116359825317e-06, + "loss": 0.002, + "step": 13436 + }, + { + "epoch": 3.27, + "learning_rate": 1.6770236164418906e-06, + "loss": 0.0028, + "step": 13438 + }, + { + "epoch": 3.27, + "learning_rate": 1.6748368928523174e-06, + "loss": 0.0017, + "step": 13440 + }, + { + "epoch": 3.28, + "learning_rate": 1.672651465554298e-06, + "loss": 0.0086, + "step": 13442 + }, + { + "epoch": 3.28, + "learning_rate": 1.6704673348881217e-06, + "loss": 0.0029, + "step": 13444 + }, + { + "epoch": 3.28, + "learning_rate": 1.6682845011938742e-06, + "loss": 0.0024, + "step": 13446 + }, + { + "epoch": 3.28, + "learning_rate": 1.6661029648114425e-06, + "loss": 0.0078, + "step": 13448 + }, + { + "epoch": 3.28, + "learning_rate": 1.6639227260805047e-06, + "loss": 0.0035, + "step": 13450 + }, + { + "epoch": 3.28, + "learning_rate": 1.6617437853405395e-06, + "loss": 0.009, + "step": 13452 + }, + { + "epoch": 3.28, + "learning_rate": 1.6595661429308273e-06, + "loss": 0.0015, + "step": 13454 + }, + { + "epoch": 3.28, + "learning_rate": 1.657389799190444e-06, + "loss": 0.0021, + "step": 13456 + }, + { + "epoch": 3.28, + "learning_rate": 1.655214754458261e-06, + "loss": 0.0019, + "step": 13458 + }, + { + "epoch": 3.28, + "learning_rate": 1.6530410090729498e-06, + "loss": 0.001, + "step": 13460 + }, + { + "epoch": 3.28, + "learning_rate": 1.6508685633729826e-06, + "loss": 0.0029, + "step": 13462 + }, + { + "epoch": 3.28, + "learning_rate": 1.648697417696622e-06, + "loss": 0.0033, + "step": 13464 + }, + { + "epoch": 3.28, + "learning_rate": 1.646527572381932e-06, + "loss": 0.0022, + "step": 13466 + }, + { + "epoch": 3.28, + "learning_rate": 1.6443590277667743e-06, + "loss": 0.0009, + "step": 13468 + }, + { + "epoch": 3.28, + "learning_rate": 1.642191784188808e-06, + "loss": 0.0063, + "step": 13470 + }, + { + "epoch": 3.28, + "learning_rate": 1.640025841985493e-06, + "loss": 0.0035, + "step": 13472 + }, + { + "epoch": 3.28, + "learning_rate": 1.6378612014940788e-06, + "loss": 0.0015, + "step": 13474 + }, + { + "epoch": 3.28, + "learning_rate": 1.6356978630516163e-06, + "loss": 0.002, + "step": 13476 + }, + { + "epoch": 3.28, + "learning_rate": 1.6335358269949586e-06, + "loss": 0.0026, + "step": 13478 + }, + { + "epoch": 3.28, + "learning_rate": 1.6313750936607487e-06, + "loss": 0.0035, + "step": 13480 + }, + { + "epoch": 3.29, + "learning_rate": 1.629215663385425e-06, + "loss": 0.0034, + "step": 13482 + }, + { + "epoch": 3.29, + "learning_rate": 1.627057536505232e-06, + "loss": 0.0031, + "step": 13484 + }, + { + "epoch": 3.29, + "learning_rate": 1.6249007133562056e-06, + "loss": 0.0027, + "step": 13486 + }, + { + "epoch": 3.29, + "learning_rate": 1.6227451942741812e-06, + "loss": 0.0017, + "step": 13488 + }, + { + "epoch": 3.29, + "learning_rate": 1.620590979594786e-06, + "loss": 0.0009, + "step": 13490 + }, + { + "epoch": 3.29, + "learning_rate": 1.6184380696534518e-06, + "loss": 0.0023, + "step": 13492 + }, + { + "epoch": 3.29, + "learning_rate": 1.6162864647853993e-06, + "loss": 0.003, + "step": 13494 + }, + { + "epoch": 3.29, + "learning_rate": 1.6141361653256538e-06, + "loss": 0.0048, + "step": 13496 + }, + { + "epoch": 3.29, + "learning_rate": 1.6119871716090285e-06, + "loss": 0.0023, + "step": 13498 + }, + { + "epoch": 3.29, + "learning_rate": 1.6098394839701403e-06, + "loss": 0.0007, + "step": 13500 + }, + { + "epoch": 3.29, + "learning_rate": 1.6076931027434017e-06, + "loss": 0.0035, + "step": 13502 + }, + { + "epoch": 3.29, + "learning_rate": 1.605548028263022e-06, + "loss": 0.006, + "step": 13504 + }, + { + "epoch": 3.29, + "learning_rate": 1.6034042608630041e-06, + "loss": 0.0029, + "step": 13506 + }, + { + "epoch": 3.29, + "learning_rate": 1.6012618008771464e-06, + "loss": 0.0013, + "step": 13508 + }, + { + "epoch": 3.29, + "learning_rate": 1.5991206486390487e-06, + "loss": 0.0018, + "step": 13510 + }, + { + "epoch": 3.29, + "learning_rate": 1.5969808044821068e-06, + "loss": 0.0033, + "step": 13512 + }, + { + "epoch": 3.29, + "learning_rate": 1.594842268739506e-06, + "loss": 0.0023, + "step": 13514 + }, + { + "epoch": 3.29, + "learning_rate": 1.5927050417442347e-06, + "loss": 0.0042, + "step": 13516 + }, + { + "epoch": 3.29, + "learning_rate": 1.5905691238290788e-06, + "loss": 0.0019, + "step": 13518 + }, + { + "epoch": 3.29, + "learning_rate": 1.5884345153266124e-06, + "loss": 0.0035, + "step": 13520 + }, + { + "epoch": 3.29, + "learning_rate": 1.586301216569215e-06, + "loss": 0.0004, + "step": 13522 + }, + { + "epoch": 3.3, + "learning_rate": 1.584169227889052e-06, + "loss": 0.0015, + "step": 13524 + }, + { + "epoch": 3.3, + "learning_rate": 1.5820385496180923e-06, + "loss": 0.001, + "step": 13526 + }, + { + "epoch": 3.3, + "learning_rate": 1.579909182088103e-06, + "loss": 0.002, + "step": 13528 + }, + { + "epoch": 3.3, + "learning_rate": 1.5777811256306374e-06, + "loss": 0.0024, + "step": 13530 + }, + { + "epoch": 3.3, + "learning_rate": 1.575654380577052e-06, + "loss": 0.0031, + "step": 13532 + }, + { + "epoch": 3.3, + "learning_rate": 1.5735289472584991e-06, + "loss": 0.0017, + "step": 13534 + }, + { + "epoch": 3.3, + "learning_rate": 1.5714048260059244e-06, + "loss": 0.0048, + "step": 13536 + }, + { + "epoch": 3.3, + "learning_rate": 1.5692820171500655e-06, + "loss": 0.0022, + "step": 13538 + }, + { + "epoch": 3.3, + "learning_rate": 1.5671605210214647e-06, + "loss": 0.0041, + "step": 13540 + }, + { + "epoch": 3.3, + "learning_rate": 1.565040337950453e-06, + "loss": 0.0036, + "step": 13542 + }, + { + "epoch": 3.3, + "learning_rate": 1.5629214682671623e-06, + "loss": 0.003, + "step": 13544 + }, + { + "epoch": 3.3, + "learning_rate": 1.5608039123015117e-06, + "loss": 0.0065, + "step": 13546 + }, + { + "epoch": 3.3, + "learning_rate": 1.5586876703832254e-06, + "loss": 0.0017, + "step": 13548 + }, + { + "epoch": 3.3, + "learning_rate": 1.5565727428418153e-06, + "loss": 0.0058, + "step": 13550 + }, + { + "epoch": 3.3, + "learning_rate": 1.5544591300065947e-06, + "loss": 0.0019, + "step": 13552 + }, + { + "epoch": 3.3, + "learning_rate": 1.5523468322066659e-06, + "loss": 0.0014, + "step": 13554 + }, + { + "epoch": 3.3, + "learning_rate": 1.5502358497709314e-06, + "loss": 0.001, + "step": 13556 + }, + { + "epoch": 3.3, + "learning_rate": 1.548126183028088e-06, + "loss": 0.0019, + "step": 13558 + }, + { + "epoch": 3.3, + "learning_rate": 1.5460178323066289e-06, + "loss": 0.0019, + "step": 13560 + }, + { + "epoch": 3.3, + "learning_rate": 1.5439107979348366e-06, + "loss": 0.003, + "step": 13562 + }, + { + "epoch": 3.31, + "learning_rate": 1.5418050802407924e-06, + "loss": 0.0033, + "step": 13564 + }, + { + "epoch": 3.31, + "learning_rate": 1.539700679552374e-06, + "loss": 0.0066, + "step": 13566 + }, + { + "epoch": 3.31, + "learning_rate": 1.5375975961972556e-06, + "loss": 0.002, + "step": 13568 + }, + { + "epoch": 3.31, + "learning_rate": 1.5354958305028967e-06, + "loss": 0.0021, + "step": 13570 + }, + { + "epoch": 3.31, + "learning_rate": 1.5333953827965631e-06, + "loss": 0.0009, + "step": 13572 + }, + { + "epoch": 3.31, + "learning_rate": 1.53129625340531e-06, + "loss": 0.0019, + "step": 13574 + }, + { + "epoch": 3.31, + "learning_rate": 1.529198442655989e-06, + "loss": 0.0045, + "step": 13576 + }, + { + "epoch": 3.31, + "learning_rate": 1.5271019508752438e-06, + "loss": 0.0059, + "step": 13578 + }, + { + "epoch": 3.31, + "learning_rate": 1.5250067783895128e-06, + "loss": 0.0022, + "step": 13580 + }, + { + "epoch": 3.31, + "learning_rate": 1.5229129255250309e-06, + "loss": 0.0029, + "step": 13582 + }, + { + "epoch": 3.31, + "learning_rate": 1.5208203926078302e-06, + "loss": 0.0018, + "step": 13584 + }, + { + "epoch": 3.31, + "learning_rate": 1.5187291799637293e-06, + "loss": 0.0016, + "step": 13586 + }, + { + "epoch": 3.31, + "learning_rate": 1.516639287918349e-06, + "loss": 0.0023, + "step": 13588 + }, + { + "epoch": 3.31, + "learning_rate": 1.5145507167971019e-06, + "loss": 0.0016, + "step": 13590 + }, + { + "epoch": 3.31, + "learning_rate": 1.5124634669251948e-06, + "loss": 0.0029, + "step": 13592 + }, + { + "epoch": 3.31, + "learning_rate": 1.5103775386276243e-06, + "loss": 0.0035, + "step": 13594 + }, + { + "epoch": 3.31, + "learning_rate": 1.5082929322291883e-06, + "loss": 0.007, + "step": 13596 + }, + { + "epoch": 3.31, + "learning_rate": 1.5062096480544752e-06, + "loss": 0.0042, + "step": 13598 + }, + { + "epoch": 3.31, + "learning_rate": 1.504127686427872e-06, + "loss": 0.0012, + "step": 13600 + }, + { + "epoch": 3.31, + "learning_rate": 1.5020470476735505e-06, + "loss": 0.0028, + "step": 13602 + }, + { + "epoch": 3.31, + "learning_rate": 1.499967732115487e-06, + "loss": 0.0026, + "step": 13604 + }, + { + "epoch": 3.32, + "learning_rate": 1.4978897400774416e-06, + "loss": 0.0026, + "step": 13606 + }, + { + "epoch": 3.32, + "learning_rate": 1.49581307188298e-06, + "loss": 0.0045, + "step": 13608 + }, + { + "epoch": 3.32, + "learning_rate": 1.4937377278554477e-06, + "loss": 0.002, + "step": 13610 + }, + { + "epoch": 3.32, + "learning_rate": 1.4916637083179964e-06, + "loss": 0.0037, + "step": 13612 + }, + { + "epoch": 3.32, + "learning_rate": 1.4895910135935666e-06, + "loss": 0.0037, + "step": 13614 + }, + { + "epoch": 3.32, + "learning_rate": 1.4875196440048934e-06, + "loss": 0.0022, + "step": 13616 + }, + { + "epoch": 3.32, + "learning_rate": 1.4854495998745044e-06, + "loss": 0.0038, + "step": 13618 + }, + { + "epoch": 3.32, + "learning_rate": 1.4833808815247175e-06, + "loss": 0.0003, + "step": 13620 + }, + { + "epoch": 3.32, + "learning_rate": 1.4813134892776504e-06, + "loss": 0.0049, + "step": 13622 + }, + { + "epoch": 3.32, + "learning_rate": 1.4792474234552156e-06, + "loss": 0.0026, + "step": 13624 + }, + { + "epoch": 3.32, + "learning_rate": 1.4771826843791104e-06, + "loss": 0.0022, + "step": 13626 + }, + { + "epoch": 3.32, + "learning_rate": 1.4751192723708318e-06, + "loss": 0.0038, + "step": 13628 + }, + { + "epoch": 3.32, + "learning_rate": 1.4730571877516686e-06, + "loss": 0.0022, + "step": 13630 + }, + { + "epoch": 3.32, + "learning_rate": 1.470996430842707e-06, + "loss": 0.0023, + "step": 13632 + }, + { + "epoch": 3.32, + "learning_rate": 1.4689370019648198e-06, + "loss": 0.0022, + "step": 13634 + }, + { + "epoch": 3.32, + "learning_rate": 1.466878901438672e-06, + "loss": 0.0081, + "step": 13636 + }, + { + "epoch": 3.32, + "learning_rate": 1.4648221295847298e-06, + "loss": 0.0027, + "step": 13638 + }, + { + "epoch": 3.32, + "learning_rate": 1.4627666867232493e-06, + "loss": 0.0039, + "step": 13640 + }, + { + "epoch": 3.32, + "learning_rate": 1.4607125731742756e-06, + "loss": 0.004, + "step": 13642 + }, + { + "epoch": 3.32, + "learning_rate": 1.4586597892576503e-06, + "loss": 0.0007, + "step": 13644 + }, + { + "epoch": 3.33, + "learning_rate": 1.4566083352930105e-06, + "loss": 0.0021, + "step": 13646 + }, + { + "epoch": 3.33, + "learning_rate": 1.4545582115997825e-06, + "loss": 0.0021, + "step": 13648 + }, + { + "epoch": 3.33, + "learning_rate": 1.4525094184971812e-06, + "loss": 0.0016, + "step": 13650 + }, + { + "epoch": 3.33, + "learning_rate": 1.4504619563042244e-06, + "loss": 0.003, + "step": 13652 + }, + { + "epoch": 3.33, + "learning_rate": 1.4484158253397152e-06, + "loss": 0.0026, + "step": 13654 + }, + { + "epoch": 3.33, + "learning_rate": 1.4463710259222563e-06, + "loss": 0.0022, + "step": 13656 + }, + { + "epoch": 3.33, + "learning_rate": 1.444327558370232e-06, + "loss": 0.0023, + "step": 13658 + }, + { + "epoch": 3.33, + "learning_rate": 1.4422854230018324e-06, + "loss": 0.0014, + "step": 13660 + }, + { + "epoch": 3.33, + "learning_rate": 1.4402446201350273e-06, + "loss": 0.0023, + "step": 13662 + }, + { + "epoch": 3.33, + "learning_rate": 1.4382051500875916e-06, + "loss": 0.0005, + "step": 13664 + }, + { + "epoch": 3.33, + "learning_rate": 1.43616701317708e-06, + "loss": 0.0012, + "step": 13666 + }, + { + "epoch": 3.33, + "learning_rate": 1.4341302097208487e-06, + "loss": 0.0032, + "step": 13668 + }, + { + "epoch": 3.33, + "learning_rate": 1.432094740036044e-06, + "loss": 0.0024, + "step": 13670 + }, + { + "epoch": 3.33, + "learning_rate": 1.430060604439607e-06, + "loss": 0.0031, + "step": 13672 + }, + { + "epoch": 3.33, + "learning_rate": 1.4280278032482631e-06, + "loss": 0.0025, + "step": 13674 + }, + { + "epoch": 3.33, + "learning_rate": 1.425996336778539e-06, + "loss": 0.0042, + "step": 13676 + }, + { + "epoch": 3.33, + "learning_rate": 1.4239662053467452e-06, + "loss": 0.0035, + "step": 13678 + }, + { + "epoch": 3.33, + "learning_rate": 1.4219374092689941e-06, + "loss": 0.0018, + "step": 13680 + }, + { + "epoch": 3.33, + "learning_rate": 1.4199099488611789e-06, + "loss": 0.0023, + "step": 13682 + }, + { + "epoch": 3.33, + "learning_rate": 1.4178838244389937e-06, + "loss": 0.0024, + "step": 13684 + }, + { + "epoch": 3.33, + "learning_rate": 1.4158590363179215e-06, + "loss": 0.0008, + "step": 13686 + }, + { + "epoch": 3.34, + "learning_rate": 1.4138355848132402e-06, + "loss": 0.0013, + "step": 13688 + }, + { + "epoch": 3.34, + "learning_rate": 1.4118134702400133e-06, + "loss": 0.0041, + "step": 13690 + }, + { + "epoch": 3.34, + "learning_rate": 1.409792692913098e-06, + "loss": 0.0016, + "step": 13692 + }, + { + "epoch": 3.34, + "learning_rate": 1.4077732531471455e-06, + "loss": 0.0044, + "step": 13694 + }, + { + "epoch": 3.34, + "learning_rate": 1.4057551512566025e-06, + "loss": 0.0028, + "step": 13696 + }, + { + "epoch": 3.34, + "learning_rate": 1.4037383875556977e-06, + "loss": 0.0016, + "step": 13698 + }, + { + "epoch": 3.34, + "learning_rate": 1.4017229623584583e-06, + "loss": 0.0017, + "step": 13700 + }, + { + "epoch": 3.34, + "learning_rate": 1.3997088759787036e-06, + "loss": 0.0043, + "step": 13702 + }, + { + "epoch": 3.34, + "learning_rate": 1.3976961287300407e-06, + "loss": 0.0031, + "step": 13704 + }, + { + "epoch": 3.34, + "learning_rate": 1.3956847209258672e-06, + "loss": 0.001, + "step": 13706 + }, + { + "epoch": 3.34, + "learning_rate": 1.3936746528793765e-06, + "loss": 0.0018, + "step": 13708 + }, + { + "epoch": 3.34, + "learning_rate": 1.3916659249035514e-06, + "loss": 0.0016, + "step": 13710 + }, + { + "epoch": 3.34, + "learning_rate": 1.3896585373111703e-06, + "loss": 0.0017, + "step": 13712 + }, + { + "epoch": 3.34, + "learning_rate": 1.3876524904147915e-06, + "loss": 0.0041, + "step": 13714 + }, + { + "epoch": 3.34, + "learning_rate": 1.3856477845267791e-06, + "loss": 0.0034, + "step": 13716 + }, + { + "epoch": 3.34, + "learning_rate": 1.3836444199592746e-06, + "loss": 0.0021, + "step": 13718 + }, + { + "epoch": 3.34, + "learning_rate": 1.3816423970242232e-06, + "loss": 0.0039, + "step": 13720 + }, + { + "epoch": 3.34, + "learning_rate": 1.3796417160333485e-06, + "loss": 0.0036, + "step": 13722 + }, + { + "epoch": 3.34, + "learning_rate": 1.3776423772981772e-06, + "loss": 0.0026, + "step": 13724 + }, + { + "epoch": 3.34, + "learning_rate": 1.375644381130019e-06, + "loss": 0.0033, + "step": 13726 + }, + { + "epoch": 3.35, + "learning_rate": 1.3736477278399806e-06, + "loss": 0.0031, + "step": 13728 + }, + { + "epoch": 3.35, + "learning_rate": 1.3716524177389511e-06, + "loss": 0.0072, + "step": 13730 + }, + { + "epoch": 3.35, + "learning_rate": 1.36965845113762e-06, + "loss": 0.0002, + "step": 13732 + }, + { + "epoch": 3.35, + "learning_rate": 1.3676658283464595e-06, + "loss": 0.003, + "step": 13734 + }, + { + "epoch": 3.35, + "learning_rate": 1.3656745496757407e-06, + "loss": 0.0004, + "step": 13736 + }, + { + "epoch": 3.35, + "learning_rate": 1.3636846154355155e-06, + "loss": 0.0021, + "step": 13738 + }, + { + "epoch": 3.35, + "learning_rate": 1.3616960259356348e-06, + "loss": 0.0004, + "step": 13740 + }, + { + "epoch": 3.35, + "learning_rate": 1.3597087814857368e-06, + "loss": 0.0023, + "step": 13742 + }, + { + "epoch": 3.35, + "learning_rate": 1.3577228823952526e-06, + "loss": 0.0011, + "step": 13744 + }, + { + "epoch": 3.35, + "learning_rate": 1.3557383289734004e-06, + "loss": 0.0063, + "step": 13746 + }, + { + "epoch": 3.35, + "learning_rate": 1.353755121529189e-06, + "loss": 0.0044, + "step": 13748 + }, + { + "epoch": 3.35, + "learning_rate": 1.3517732603714183e-06, + "loss": 0.0016, + "step": 13750 + }, + { + "epoch": 3.35, + "learning_rate": 1.3497927458086836e-06, + "loss": 0.0019, + "step": 13752 + }, + { + "epoch": 3.35, + "learning_rate": 1.3478135781493617e-06, + "loss": 0.0035, + "step": 13754 + }, + { + "epoch": 3.35, + "learning_rate": 1.3458357577016257e-06, + "loss": 0.002, + "step": 13756 + }, + { + "epoch": 3.35, + "learning_rate": 1.34385928477344e-06, + "loss": 0.0046, + "step": 13758 + }, + { + "epoch": 3.35, + "learning_rate": 1.341884159672554e-06, + "loss": 0.0054, + "step": 13760 + }, + { + "epoch": 3.35, + "learning_rate": 1.3399103827065075e-06, + "loss": 0.0049, + "step": 13762 + }, + { + "epoch": 3.35, + "learning_rate": 1.3379379541826353e-06, + "loss": 0.0014, + "step": 13764 + }, + { + "epoch": 3.35, + "learning_rate": 1.3359668744080601e-06, + "loss": 0.0022, + "step": 13766 + }, + { + "epoch": 3.35, + "learning_rate": 1.3339971436896949e-06, + "loss": 0.0058, + "step": 13768 + }, + { + "epoch": 3.36, + "learning_rate": 1.3320287623342376e-06, + "loss": 0.004, + "step": 13770 + }, + { + "epoch": 3.36, + "learning_rate": 1.3300617306481833e-06, + "loss": 0.0026, + "step": 13772 + }, + { + "epoch": 3.36, + "learning_rate": 1.328096048937816e-06, + "loss": 0.0038, + "step": 13774 + }, + { + "epoch": 3.36, + "learning_rate": 1.3261317175092047e-06, + "loss": 0.0023, + "step": 13776 + }, + { + "epoch": 3.36, + "learning_rate": 1.32416873666821e-06, + "loss": 0.0042, + "step": 13778 + }, + { + "epoch": 3.36, + "learning_rate": 1.3222071067204822e-06, + "loss": 0.0028, + "step": 13780 + }, + { + "epoch": 3.36, + "learning_rate": 1.3202468279714653e-06, + "loss": 0.0023, + "step": 13782 + }, + { + "epoch": 3.36, + "learning_rate": 1.3182879007263906e-06, + "loss": 0.0017, + "step": 13784 + }, + { + "epoch": 3.36, + "learning_rate": 1.3163303252902727e-06, + "loss": 0.0023, + "step": 13786 + }, + { + "epoch": 3.36, + "learning_rate": 1.3143741019679268e-06, + "loss": 0.0073, + "step": 13788 + }, + { + "epoch": 3.36, + "learning_rate": 1.312419231063946e-06, + "loss": 0.0066, + "step": 13790 + }, + { + "epoch": 3.36, + "learning_rate": 1.3104657128827247e-06, + "loss": 0.0048, + "step": 13792 + }, + { + "epoch": 3.36, + "learning_rate": 1.308513547728436e-06, + "loss": 0.0046, + "step": 13794 + }, + { + "epoch": 3.36, + "learning_rate": 1.3065627359050491e-06, + "loss": 0.0013, + "step": 13796 + }, + { + "epoch": 3.36, + "learning_rate": 1.304613277716319e-06, + "loss": 0.0047, + "step": 13798 + }, + { + "epoch": 3.36, + "learning_rate": 1.3026651734657947e-06, + "loss": 0.0014, + "step": 13800 + }, + { + "epoch": 3.36, + "learning_rate": 1.3007184234568082e-06, + "loss": 0.0037, + "step": 13802 + }, + { + "epoch": 3.36, + "learning_rate": 1.2987730279924815e-06, + "loss": 0.002, + "step": 13804 + }, + { + "epoch": 3.36, + "learning_rate": 1.2968289873757311e-06, + "loss": 0.001, + "step": 13806 + }, + { + "epoch": 3.36, + "learning_rate": 1.294886301909256e-06, + "loss": 0.0045, + "step": 13808 + }, + { + "epoch": 3.37, + "learning_rate": 1.292944971895549e-06, + "loss": 0.0028, + "step": 13810 + }, + { + "epoch": 3.37, + "learning_rate": 1.2910049976368888e-06, + "loss": 0.0025, + "step": 13812 + }, + { + "epoch": 3.37, + "learning_rate": 1.2890663794353476e-06, + "loss": 0.0028, + "step": 13814 + }, + { + "epoch": 3.37, + "learning_rate": 1.2871291175927814e-06, + "loss": 0.0024, + "step": 13816 + }, + { + "epoch": 3.37, + "learning_rate": 1.2851932124108323e-06, + "loss": 0.0019, + "step": 13818 + }, + { + "epoch": 3.37, + "learning_rate": 1.2832586641909405e-06, + "loss": 0.0041, + "step": 13820 + }, + { + "epoch": 3.37, + "learning_rate": 1.2813254732343294e-06, + "loss": 0.0023, + "step": 13822 + }, + { + "epoch": 3.37, + "learning_rate": 1.2793936398420093e-06, + "loss": 0.0036, + "step": 13824 + }, + { + "epoch": 3.37, + "learning_rate": 1.2774631643147827e-06, + "loss": 0.0027, + "step": 13826 + }, + { + "epoch": 3.37, + "learning_rate": 1.2755340469532407e-06, + "loss": 0.0007, + "step": 13828 + }, + { + "epoch": 3.37, + "learning_rate": 1.2736062880577616e-06, + "loss": 0.0019, + "step": 13830 + }, + { + "epoch": 3.37, + "learning_rate": 1.2716798879285108e-06, + "loss": 0.004, + "step": 13832 + }, + { + "epoch": 3.37, + "learning_rate": 1.269754846865443e-06, + "loss": 0.0036, + "step": 13834 + }, + { + "epoch": 3.37, + "learning_rate": 1.2678311651683017e-06, + "loss": 0.0014, + "step": 13836 + }, + { + "epoch": 3.37, + "learning_rate": 1.2659088431366218e-06, + "loss": 0.0033, + "step": 13838 + }, + { + "epoch": 3.37, + "learning_rate": 1.2639878810697204e-06, + "loss": 0.0016, + "step": 13840 + }, + { + "epoch": 3.37, + "learning_rate": 1.262068279266706e-06, + "loss": 0.004, + "step": 13842 + }, + { + "epoch": 3.37, + "learning_rate": 1.2601500380264798e-06, + "loss": 0.0019, + "step": 13844 + }, + { + "epoch": 3.37, + "learning_rate": 1.2582331576477225e-06, + "loss": 0.0027, + "step": 13846 + }, + { + "epoch": 3.37, + "learning_rate": 1.2563176384289055e-06, + "loss": 0.0023, + "step": 13848 + }, + { + "epoch": 3.37, + "learning_rate": 1.2544034806682926e-06, + "loss": 0.0018, + "step": 13850 + }, + { + "epoch": 3.38, + "learning_rate": 1.252490684663933e-06, + "loss": 0.0033, + "step": 13852 + }, + { + "epoch": 3.38, + "learning_rate": 1.2505792507136638e-06, + "loss": 0.0009, + "step": 13854 + }, + { + "epoch": 3.38, + "learning_rate": 1.2486691791151072e-06, + "loss": 0.0027, + "step": 13856 + }, + { + "epoch": 3.38, + "learning_rate": 1.24676047016568e-06, + "loss": 0.0037, + "step": 13858 + }, + { + "epoch": 3.38, + "learning_rate": 1.2448531241625784e-06, + "loss": 0.0015, + "step": 13860 + }, + { + "epoch": 3.38, + "learning_rate": 1.2429471414027949e-06, + "loss": 0.0018, + "step": 13862 + }, + { + "epoch": 3.38, + "learning_rate": 1.2410425221831024e-06, + "loss": 0.0018, + "step": 13864 + }, + { + "epoch": 3.38, + "learning_rate": 1.2391392668000646e-06, + "loss": 0.003, + "step": 13866 + }, + { + "epoch": 3.38, + "learning_rate": 1.2372373755500356e-06, + "loss": 0.003, + "step": 13868 + }, + { + "epoch": 3.38, + "learning_rate": 1.235336848729154e-06, + "loss": 0.0019, + "step": 13870 + }, + { + "epoch": 3.38, + "learning_rate": 1.2334376866333441e-06, + "loss": 0.0044, + "step": 13872 + }, + { + "epoch": 3.38, + "learning_rate": 1.231539889558323e-06, + "loss": 0.0019, + "step": 13874 + }, + { + "epoch": 3.38, + "learning_rate": 1.2296434577995875e-06, + "loss": 0.0029, + "step": 13876 + }, + { + "epoch": 3.38, + "learning_rate": 1.2277483916524324e-06, + "loss": 0.0028, + "step": 13878 + }, + { + "epoch": 3.38, + "learning_rate": 1.2258546914119295e-06, + "loss": 0.0029, + "step": 13880 + }, + { + "epoch": 3.38, + "learning_rate": 1.223962357372943e-06, + "loss": 0.0021, + "step": 13882 + }, + { + "epoch": 3.38, + "learning_rate": 1.222071389830125e-06, + "loss": 0.0031, + "step": 13884 + }, + { + "epoch": 3.38, + "learning_rate": 1.2201817890779155e-06, + "loss": 0.0017, + "step": 13886 + }, + { + "epoch": 3.38, + "learning_rate": 1.2182935554105379e-06, + "loss": 0.002, + "step": 13888 + }, + { + "epoch": 3.38, + "learning_rate": 1.2164066891220017e-06, + "loss": 0.0016, + "step": 13890 + }, + { + "epoch": 3.38, + "learning_rate": 1.2145211905061094e-06, + "loss": 0.0038, + "step": 13892 + }, + { + "epoch": 3.39, + "learning_rate": 1.2126370598564497e-06, + "loss": 0.0013, + "step": 13894 + }, + { + "epoch": 3.39, + "learning_rate": 1.210754297466391e-06, + "loss": 0.0026, + "step": 13896 + }, + { + "epoch": 3.39, + "learning_rate": 1.208872903629097e-06, + "loss": 0.0044, + "step": 13898 + }, + { + "epoch": 3.39, + "learning_rate": 1.2069928786375173e-06, + "loss": 0.0034, + "step": 13900 + }, + { + "epoch": 3.39, + "learning_rate": 1.2051142227843826e-06, + "loss": 0.002, + "step": 13902 + }, + { + "epoch": 3.39, + "learning_rate": 1.203236936362212e-06, + "loss": 0.0052, + "step": 13904 + }, + { + "epoch": 3.39, + "learning_rate": 1.2013610196633174e-06, + "loss": 0.0017, + "step": 13906 + }, + { + "epoch": 3.39, + "learning_rate": 1.1994864729797918e-06, + "loss": 0.0027, + "step": 13908 + }, + { + "epoch": 3.39, + "learning_rate": 1.1976132966035192e-06, + "loss": 0.0031, + "step": 13910 + }, + { + "epoch": 3.39, + "learning_rate": 1.1957414908261623e-06, + "loss": 0.0024, + "step": 13912 + }, + { + "epoch": 3.39, + "learning_rate": 1.1938710559391809e-06, + "loss": 0.001, + "step": 13914 + }, + { + "epoch": 3.39, + "learning_rate": 1.1920019922338112e-06, + "loss": 0.0016, + "step": 13916 + }, + { + "epoch": 3.39, + "learning_rate": 1.1901343000010856e-06, + "loss": 0.0018, + "step": 13918 + }, + { + "epoch": 3.39, + "learning_rate": 1.1882679795318131e-06, + "loss": 0.0018, + "step": 13920 + }, + { + "epoch": 3.39, + "learning_rate": 1.1864030311165965e-06, + "loss": 0.0076, + "step": 13922 + }, + { + "epoch": 3.39, + "learning_rate": 1.1845394550458223e-06, + "loss": 0.0018, + "step": 13924 + }, + { + "epoch": 3.39, + "learning_rate": 1.1826772516096652e-06, + "loss": 0.0018, + "step": 13926 + }, + { + "epoch": 3.39, + "learning_rate": 1.1808164210980821e-06, + "loss": 0.0015, + "step": 13928 + }, + { + "epoch": 3.39, + "learning_rate": 1.1789569638008202e-06, + "loss": 0.0032, + "step": 13930 + }, + { + "epoch": 3.39, + "learning_rate": 1.177098880007409e-06, + "loss": 0.0016, + "step": 13932 + }, + { + "epoch": 3.4, + "learning_rate": 1.1752421700071704e-06, + "loss": 0.0026, + "step": 13934 + }, + { + "epoch": 3.4, + "learning_rate": 1.173386834089203e-06, + "loss": 0.0033, + "step": 13936 + }, + { + "epoch": 3.4, + "learning_rate": 1.1715328725424002e-06, + "loss": 0.0044, + "step": 13938 + }, + { + "epoch": 3.4, + "learning_rate": 1.1696802856554368e-06, + "loss": 0.0043, + "step": 13940 + }, + { + "epoch": 3.4, + "learning_rate": 1.1678290737167785e-06, + "loss": 0.0014, + "step": 13942 + }, + { + "epoch": 3.4, + "learning_rate": 1.1659792370146694e-06, + "loss": 0.0029, + "step": 13944 + }, + { + "epoch": 3.4, + "learning_rate": 1.1641307758371412e-06, + "loss": 0.0011, + "step": 13946 + }, + { + "epoch": 3.4, + "learning_rate": 1.1622836904720159e-06, + "loss": 0.0029, + "step": 13948 + }, + { + "epoch": 3.4, + "learning_rate": 1.160437981206901e-06, + "loss": 0.0022, + "step": 13950 + }, + { + "epoch": 3.4, + "learning_rate": 1.1585936483291837e-06, + "loss": 0.0029, + "step": 13952 + }, + { + "epoch": 3.4, + "learning_rate": 1.156750692126043e-06, + "loss": 0.0019, + "step": 13954 + }, + { + "epoch": 3.4, + "learning_rate": 1.1549091128844425e-06, + "loss": 0.002, + "step": 13956 + }, + { + "epoch": 3.4, + "learning_rate": 1.1530689108911285e-06, + "loss": 0.0017, + "step": 13958 + }, + { + "epoch": 3.4, + "learning_rate": 1.151230086432632e-06, + "loss": 0.0025, + "step": 13960 + }, + { + "epoch": 3.4, + "learning_rate": 1.1493926397952748e-06, + "loss": 0.0032, + "step": 13962 + }, + { + "epoch": 3.4, + "learning_rate": 1.1475565712651615e-06, + "loss": 0.001, + "step": 13964 + }, + { + "epoch": 3.4, + "learning_rate": 1.1457218811281834e-06, + "loss": 0.0013, + "step": 13966 + }, + { + "epoch": 3.4, + "learning_rate": 1.1438885696700108e-06, + "loss": 0.0009, + "step": 13968 + }, + { + "epoch": 3.4, + "learning_rate": 1.142056637176111e-06, + "loss": 0.0036, + "step": 13970 + }, + { + "epoch": 3.4, + "learning_rate": 1.1402260839317237e-06, + "loss": 0.0062, + "step": 13972 + }, + { + "epoch": 3.4, + "learning_rate": 1.138396910221885e-06, + "loss": 0.0024, + "step": 13974 + }, + { + "epoch": 3.41, + "learning_rate": 1.1365691163314074e-06, + "loss": 0.0051, + "step": 13976 + }, + { + "epoch": 3.41, + "learning_rate": 1.1347427025448931e-06, + "loss": 0.0015, + "step": 13978 + }, + { + "epoch": 3.41, + "learning_rate": 1.1329176691467303e-06, + "loss": 0.0047, + "step": 13980 + }, + { + "epoch": 3.41, + "learning_rate": 1.1310940164210916e-06, + "loss": 0.0014, + "step": 13982 + }, + { + "epoch": 3.41, + "learning_rate": 1.1292717446519296e-06, + "loss": 0.0037, + "step": 13984 + }, + { + "epoch": 3.41, + "learning_rate": 1.12745085412299e-06, + "loss": 0.0051, + "step": 13986 + }, + { + "epoch": 3.41, + "learning_rate": 1.1256313451177968e-06, + "loss": 0.0022, + "step": 13988 + }, + { + "epoch": 3.41, + "learning_rate": 1.1238132179196636e-06, + "loss": 0.0016, + "step": 13990 + }, + { + "epoch": 3.41, + "learning_rate": 1.1219964728116839e-06, + "loss": 0.0024, + "step": 13992 + }, + { + "epoch": 3.41, + "learning_rate": 1.12018111007674e-06, + "loss": 0.0027, + "step": 13994 + }, + { + "epoch": 3.41, + "learning_rate": 1.1183671299974984e-06, + "loss": 0.0006, + "step": 13996 + }, + { + "epoch": 3.41, + "learning_rate": 1.116554532856412e-06, + "loss": 0.0037, + "step": 13998 + }, + { + "epoch": 3.41, + "learning_rate": 1.1147433189357125e-06, + "loss": 0.0027, + "step": 14000 + }, + { + "epoch": 3.41, + "learning_rate": 1.1129334885174193e-06, + "loss": 0.0036, + "step": 14002 + }, + { + "epoch": 3.41, + "learning_rate": 1.1111250418833376e-06, + "loss": 0.002, + "step": 14004 + }, + { + "epoch": 3.41, + "learning_rate": 1.1093179793150598e-06, + "loss": 0.0013, + "step": 14006 + }, + { + "epoch": 3.41, + "learning_rate": 1.1075123010939538e-06, + "loss": 0.0023, + "step": 14008 + }, + { + "epoch": 3.41, + "learning_rate": 1.1057080075011805e-06, + "loss": 0.0025, + "step": 14010 + }, + { + "epoch": 3.41, + "learning_rate": 1.1039050988176847e-06, + "loss": 0.0043, + "step": 14012 + }, + { + "epoch": 3.41, + "learning_rate": 1.1021035753241893e-06, + "loss": 0.0058, + "step": 14014 + }, + { + "epoch": 3.42, + "learning_rate": 1.1003034373012055e-06, + "loss": 0.0018, + "step": 14016 + }, + { + "epoch": 3.42, + "learning_rate": 1.0985046850290281e-06, + "loss": 0.0012, + "step": 14018 + }, + { + "epoch": 3.42, + "learning_rate": 1.0967073187877386e-06, + "loss": 0.003, + "step": 14020 + }, + { + "epoch": 3.42, + "learning_rate": 1.094911338857202e-06, + "loss": 0.0023, + "step": 14022 + }, + { + "epoch": 3.42, + "learning_rate": 1.0931167455170622e-06, + "loss": 0.0026, + "step": 14024 + }, + { + "epoch": 3.42, + "learning_rate": 1.0913235390467524e-06, + "loss": 0.0032, + "step": 14026 + }, + { + "epoch": 3.42, + "learning_rate": 1.0895317197254919e-06, + "loss": 0.0038, + "step": 14028 + }, + { + "epoch": 3.42, + "learning_rate": 1.087741287832277e-06, + "loss": 0.0041, + "step": 14030 + }, + { + "epoch": 3.42, + "learning_rate": 1.0859522436458903e-06, + "loss": 0.0017, + "step": 14032 + }, + { + "epoch": 3.42, + "learning_rate": 1.0841645874449025e-06, + "loss": 0.0007, + "step": 14034 + }, + { + "epoch": 3.42, + "learning_rate": 1.0823783195076654e-06, + "loss": 0.0019, + "step": 14036 + }, + { + "epoch": 3.42, + "learning_rate": 1.0805934401123153e-06, + "loss": 0.0018, + "step": 14038 + }, + { + "epoch": 3.42, + "learning_rate": 1.0788099495367677e-06, + "loss": 0.0029, + "step": 14040 + }, + { + "epoch": 3.42, + "learning_rate": 1.0770278480587303e-06, + "loss": 0.0006, + "step": 14042 + }, + { + "epoch": 3.42, + "learning_rate": 1.075247135955686e-06, + "loss": 0.0038, + "step": 14044 + }, + { + "epoch": 3.42, + "learning_rate": 1.0734678135049093e-06, + "loss": 0.0039, + "step": 14046 + }, + { + "epoch": 3.42, + "learning_rate": 1.0716898809834497e-06, + "loss": 0.0027, + "step": 14048 + }, + { + "epoch": 3.42, + "learning_rate": 1.069913338668148e-06, + "loss": 0.0008, + "step": 14050 + }, + { + "epoch": 3.42, + "learning_rate": 1.0681381868356245e-06, + "loss": 0.0014, + "step": 14052 + }, + { + "epoch": 3.42, + "learning_rate": 1.066364425762285e-06, + "loss": 0.0014, + "step": 14054 + }, + { + "epoch": 3.42, + "learning_rate": 1.0645920557243184e-06, + "loss": 0.0031, + "step": 14056 + }, + { + "epoch": 3.43, + "learning_rate": 1.0628210769976922e-06, + "loss": 0.0029, + "step": 14058 + }, + { + "epoch": 3.43, + "learning_rate": 1.0610514898581636e-06, + "loss": 0.0011, + "step": 14060 + }, + { + "epoch": 3.43, + "learning_rate": 1.0592832945812725e-06, + "loss": 0.0033, + "step": 14062 + }, + { + "epoch": 3.43, + "learning_rate": 1.0575164914423374e-06, + "loss": 0.0018, + "step": 14064 + }, + { + "epoch": 3.43, + "learning_rate": 1.055751080716465e-06, + "loss": 0.0019, + "step": 14066 + }, + { + "epoch": 3.43, + "learning_rate": 1.0539870626785452e-06, + "loss": 0.0044, + "step": 14068 + }, + { + "epoch": 3.43, + "learning_rate": 1.0522244376032464e-06, + "loss": 0.0024, + "step": 14070 + }, + { + "epoch": 3.43, + "learning_rate": 1.050463205765021e-06, + "loss": 0.0029, + "step": 14072 + }, + { + "epoch": 3.43, + "learning_rate": 1.0487033674381086e-06, + "loss": 0.0025, + "step": 14074 + }, + { + "epoch": 3.43, + "learning_rate": 1.0469449228965289e-06, + "loss": 0.0038, + "step": 14076 + }, + { + "epoch": 3.43, + "learning_rate": 1.0451878724140884e-06, + "loss": 0.0011, + "step": 14078 + }, + { + "epoch": 3.43, + "learning_rate": 1.0434322162643673e-06, + "loss": 0.0055, + "step": 14080 + }, + { + "epoch": 3.43, + "learning_rate": 1.041677954720739e-06, + "loss": 0.0072, + "step": 14082 + }, + { + "epoch": 3.43, + "learning_rate": 1.039925088056356e-06, + "loss": 0.0028, + "step": 14084 + }, + { + "epoch": 3.43, + "learning_rate": 1.0381736165441514e-06, + "loss": 0.0027, + "step": 14086 + }, + { + "epoch": 3.43, + "learning_rate": 1.0364235404568402e-06, + "loss": 0.0046, + "step": 14088 + }, + { + "epoch": 3.43, + "learning_rate": 1.0346748600669254e-06, + "loss": 0.0031, + "step": 14090 + }, + { + "epoch": 3.43, + "learning_rate": 1.0329275756466905e-06, + "loss": 0.003, + "step": 14092 + }, + { + "epoch": 3.43, + "learning_rate": 1.0311816874682012e-06, + "loss": 0.0007, + "step": 14094 + }, + { + "epoch": 3.43, + "learning_rate": 1.0294371958033023e-06, + "loss": 0.0025, + "step": 14096 + }, + { + "epoch": 3.44, + "learning_rate": 1.0276941009236296e-06, + "loss": 0.0044, + "step": 14098 + }, + { + "epoch": 3.44, + "learning_rate": 1.0259524031005907e-06, + "loss": 0.003, + "step": 14100 + }, + { + "epoch": 3.44, + "learning_rate": 1.0242121026053865e-06, + "loss": 0.0026, + "step": 14102 + }, + { + "epoch": 3.44, + "learning_rate": 1.022473199708991e-06, + "loss": 0.0022, + "step": 14104 + }, + { + "epoch": 3.44, + "learning_rate": 1.0207356946821655e-06, + "loss": 0.0024, + "step": 14106 + }, + { + "epoch": 3.44, + "learning_rate": 1.018999587795454e-06, + "loss": 0.0017, + "step": 14108 + }, + { + "epoch": 3.44, + "learning_rate": 1.0172648793191831e-06, + "loss": 0.0054, + "step": 14110 + }, + { + "epoch": 3.44, + "learning_rate": 1.015531569523459e-06, + "loss": 0.003, + "step": 14112 + }, + { + "epoch": 3.44, + "learning_rate": 1.0137996586781684e-06, + "loss": 0.0035, + "step": 14114 + }, + { + "epoch": 3.44, + "learning_rate": 1.0120691470529843e-06, + "loss": 0.0007, + "step": 14116 + }, + { + "epoch": 3.44, + "learning_rate": 1.0103400349173643e-06, + "loss": 0.0027, + "step": 14118 + }, + { + "epoch": 3.44, + "learning_rate": 1.0086123225405398e-06, + "loss": 0.003, + "step": 14120 + }, + { + "epoch": 3.44, + "learning_rate": 1.0068860101915312e-06, + "loss": 0.0021, + "step": 14122 + }, + { + "epoch": 3.44, + "learning_rate": 1.0051610981391392e-06, + "loss": 0.0027, + "step": 14124 + }, + { + "epoch": 3.44, + "learning_rate": 1.003437586651943e-06, + "loss": 0.0015, + "step": 14126 + }, + { + "epoch": 3.44, + "learning_rate": 1.0017154759983094e-06, + "loss": 0.0037, + "step": 14128 + }, + { + "epoch": 3.44, + "learning_rate": 9.999947664463815e-07, + "loss": 0.0015, + "step": 14130 + }, + { + "epoch": 3.44, + "learning_rate": 9.982754582640885e-07, + "loss": 0.0038, + "step": 14132 + }, + { + "epoch": 3.44, + "learning_rate": 9.965575517191418e-07, + "loss": 0.0008, + "step": 14134 + }, + { + "epoch": 3.44, + "learning_rate": 9.94841047079028e-07, + "loss": 0.0028, + "step": 14136 + }, + { + "epoch": 3.44, + "learning_rate": 9.93125944611023e-07, + "loss": 0.0042, + "step": 14138 + }, + { + "epoch": 3.45, + "learning_rate": 9.914122445821828e-07, + "loss": 0.0031, + "step": 14140 + }, + { + "epoch": 3.45, + "learning_rate": 9.896999472593415e-07, + "loss": 0.003, + "step": 14142 + }, + { + "epoch": 3.45, + "learning_rate": 9.879890529091152e-07, + "loss": 0.0021, + "step": 14144 + }, + { + "epoch": 3.45, + "learning_rate": 9.862795617979049e-07, + "loss": 0.0065, + "step": 14146 + }, + { + "epoch": 3.45, + "learning_rate": 9.84571474191891e-07, + "loss": 0.0049, + "step": 14148 + }, + { + "epoch": 3.45, + "learning_rate": 9.828647903570387e-07, + "loss": 0.0054, + "step": 14150 + }, + { + "epoch": 3.45, + "learning_rate": 9.811595105590876e-07, + "loss": 0.0009, + "step": 14152 + }, + { + "epoch": 3.45, + "learning_rate": 9.794556350635675e-07, + "loss": 0.0033, + "step": 14154 + }, + { + "epoch": 3.45, + "learning_rate": 9.777531641357784e-07, + "loss": 0.0029, + "step": 14156 + }, + { + "epoch": 3.45, + "learning_rate": 9.76052098040815e-07, + "loss": 0.0011, + "step": 14158 + }, + { + "epoch": 3.45, + "learning_rate": 9.74352437043541e-07, + "loss": 0.0027, + "step": 14160 + }, + { + "epoch": 3.45, + "learning_rate": 9.726541814086076e-07, + "loss": 0.0015, + "step": 14162 + }, + { + "epoch": 3.45, + "learning_rate": 9.709573314004473e-07, + "loss": 0.0015, + "step": 14164 + }, + { + "epoch": 3.45, + "learning_rate": 9.692618872832748e-07, + "loss": 0.0023, + "step": 14166 + }, + { + "epoch": 3.45, + "learning_rate": 9.675678493210806e-07, + "loss": 0.0024, + "step": 14168 + }, + { + "epoch": 3.45, + "learning_rate": 9.658752177776386e-07, + "loss": 0.0018, + "step": 14170 + }, + { + "epoch": 3.45, + "learning_rate": 9.641839929165063e-07, + "loss": 0.0026, + "step": 14172 + }, + { + "epoch": 3.45, + "learning_rate": 9.62494175001022e-07, + "loss": 0.001, + "step": 14174 + }, + { + "epoch": 3.45, + "learning_rate": 9.608057642942992e-07, + "loss": 0.0014, + "step": 14176 + }, + { + "epoch": 3.45, + "learning_rate": 9.59118761059238e-07, + "loss": 0.0037, + "step": 14178 + }, + { + "epoch": 3.46, + "learning_rate": 9.57433165558519e-07, + "loss": 0.007, + "step": 14180 + }, + { + "epoch": 3.46, + "learning_rate": 9.557489780546025e-07, + "loss": 0.0017, + "step": 14182 + }, + { + "epoch": 3.46, + "learning_rate": 9.540661988097289e-07, + "loss": 0.0026, + "step": 14184 + }, + { + "epoch": 3.46, + "learning_rate": 9.523848280859172e-07, + "loss": 0.0017, + "step": 14186 + }, + { + "epoch": 3.46, + "learning_rate": 9.507048661449714e-07, + "loss": 0.0029, + "step": 14188 + }, + { + "epoch": 3.46, + "learning_rate": 9.490263132484778e-07, + "loss": 0.0018, + "step": 14190 + }, + { + "epoch": 3.46, + "learning_rate": 9.473491696577941e-07, + "loss": 0.0034, + "step": 14192 + }, + { + "epoch": 3.46, + "learning_rate": 9.456734356340668e-07, + "loss": 0.0012, + "step": 14194 + }, + { + "epoch": 3.46, + "learning_rate": 9.43999111438223e-07, + "loss": 0.0031, + "step": 14196 + }, + { + "epoch": 3.46, + "learning_rate": 9.423261973309661e-07, + "loss": 0.0031, + "step": 14198 + }, + { + "epoch": 3.46, + "learning_rate": 9.40654693572779e-07, + "loss": 0.0013, + "step": 14200 + }, + { + "epoch": 3.46, + "learning_rate": 9.3898460042393e-07, + "loss": 0.0038, + "step": 14202 + }, + { + "epoch": 3.46, + "learning_rate": 9.373159181444647e-07, + "loss": 0.0035, + "step": 14204 + }, + { + "epoch": 3.46, + "learning_rate": 9.356486469942128e-07, + "loss": 0.0025, + "step": 14206 + }, + { + "epoch": 3.46, + "learning_rate": 9.339827872327756e-07, + "loss": 0.0022, + "step": 14208 + }, + { + "epoch": 3.46, + "learning_rate": 9.323183391195457e-07, + "loss": 0.0024, + "step": 14210 + }, + { + "epoch": 3.46, + "learning_rate": 9.306553029136855e-07, + "loss": 0.0037, + "step": 14212 + }, + { + "epoch": 3.46, + "learning_rate": 9.289936788741472e-07, + "loss": 0.0031, + "step": 14214 + }, + { + "epoch": 3.46, + "learning_rate": 9.273334672596535e-07, + "loss": 0.004, + "step": 14216 + }, + { + "epoch": 3.46, + "learning_rate": 9.256746683287143e-07, + "loss": 0.0014, + "step": 14218 + }, + { + "epoch": 3.46, + "learning_rate": 9.240172823396176e-07, + "loss": 0.0027, + "step": 14220 + }, + { + "epoch": 3.47, + "learning_rate": 9.223613095504336e-07, + "loss": 0.0007, + "step": 14222 + }, + { + "epoch": 3.47, + "learning_rate": 9.207067502190037e-07, + "loss": 0.0015, + "step": 14224 + }, + { + "epoch": 3.47, + "learning_rate": 9.190536046029618e-07, + "loss": 0.0018, + "step": 14226 + }, + { + "epoch": 3.47, + "learning_rate": 9.174018729597112e-07, + "loss": 0.0019, + "step": 14228 + }, + { + "epoch": 3.47, + "learning_rate": 9.157515555464414e-07, + "loss": 0.0045, + "step": 14230 + }, + { + "epoch": 3.47, + "learning_rate": 9.14102652620118e-07, + "loss": 0.0008, + "step": 14232 + }, + { + "epoch": 3.47, + "learning_rate": 9.124551644374868e-07, + "loss": 0.0031, + "step": 14234 + }, + { + "epoch": 3.47, + "learning_rate": 9.10809091255076e-07, + "loss": 0.0036, + "step": 14236 + }, + { + "epoch": 3.47, + "learning_rate": 9.091644333291938e-07, + "loss": 0.002, + "step": 14238 + }, + { + "epoch": 3.47, + "learning_rate": 9.075211909159242e-07, + "loss": 0.0047, + "step": 14240 + }, + { + "epoch": 3.47, + "learning_rate": 9.058793642711294e-07, + "loss": 0.0032, + "step": 14242 + }, + { + "epoch": 3.47, + "learning_rate": 9.042389536504581e-07, + "loss": 0.0038, + "step": 14244 + }, + { + "epoch": 3.47, + "learning_rate": 9.025999593093349e-07, + "loss": 0.0024, + "step": 14246 + }, + { + "epoch": 3.47, + "learning_rate": 9.009623815029611e-07, + "loss": 0.0021, + "step": 14248 + }, + { + "epoch": 3.47, + "learning_rate": 8.993262204863218e-07, + "loss": 0.0045, + "step": 14250 + }, + { + "epoch": 3.47, + "learning_rate": 8.976914765141809e-07, + "loss": 0.0021, + "step": 14252 + }, + { + "epoch": 3.47, + "learning_rate": 8.960581498410803e-07, + "loss": 0.0035, + "step": 14254 + }, + { + "epoch": 3.47, + "learning_rate": 8.944262407213378e-07, + "loss": 0.0018, + "step": 14256 + }, + { + "epoch": 3.47, + "learning_rate": 8.927957494090567e-07, + "loss": 0.0007, + "step": 14258 + }, + { + "epoch": 3.47, + "learning_rate": 8.911666761581173e-07, + "loss": 0.0065, + "step": 14260 + }, + { + "epoch": 3.48, + "learning_rate": 8.895390212221811e-07, + "loss": 0.0014, + "step": 14262 + }, + { + "epoch": 3.48, + "learning_rate": 8.879127848546809e-07, + "loss": 0.0007, + "step": 14264 + }, + { + "epoch": 3.48, + "learning_rate": 8.862879673088398e-07, + "loss": 0.0065, + "step": 14266 + }, + { + "epoch": 3.48, + "learning_rate": 8.846645688376488e-07, + "loss": 0.0061, + "step": 14268 + }, + { + "epoch": 3.48, + "learning_rate": 8.830425896938888e-07, + "loss": 0.0015, + "step": 14270 + }, + { + "epoch": 3.48, + "learning_rate": 8.81422030130109e-07, + "loss": 0.0022, + "step": 14272 + }, + { + "epoch": 3.48, + "learning_rate": 8.798028903986467e-07, + "loss": 0.0023, + "step": 14274 + }, + { + "epoch": 3.48, + "learning_rate": 8.781851707516131e-07, + "loss": 0.0019, + "step": 14276 + }, + { + "epoch": 3.48, + "learning_rate": 8.765688714409016e-07, + "loss": 0.0041, + "step": 14278 + }, + { + "epoch": 3.48, + "learning_rate": 8.749539927181782e-07, + "loss": 0.0055, + "step": 14280 + }, + { + "epoch": 3.48, + "learning_rate": 8.733405348348967e-07, + "loss": 0.0025, + "step": 14282 + }, + { + "epoch": 3.48, + "learning_rate": 8.717284980422791e-07, + "loss": 0.0039, + "step": 14284 + }, + { + "epoch": 3.48, + "learning_rate": 8.701178825913382e-07, + "loss": 0.0015, + "step": 14286 + }, + { + "epoch": 3.48, + "learning_rate": 8.685086887328542e-07, + "loss": 0.005, + "step": 14288 + }, + { + "epoch": 3.48, + "learning_rate": 8.669009167173925e-07, + "loss": 0.0014, + "step": 14290 + }, + { + "epoch": 3.48, + "learning_rate": 8.65294566795295e-07, + "loss": 0.0042, + "step": 14292 + }, + { + "epoch": 3.48, + "learning_rate": 8.63689639216686e-07, + "loss": 0.0014, + "step": 14294 + }, + { + "epoch": 3.48, + "learning_rate": 8.620861342314624e-07, + "loss": 0.0025, + "step": 14296 + }, + { + "epoch": 3.48, + "learning_rate": 8.604840520892998e-07, + "loss": 0.0031, + "step": 14298 + }, + { + "epoch": 3.48, + "learning_rate": 8.588833930396578e-07, + "loss": 0.0035, + "step": 14300 + }, + { + "epoch": 3.48, + "learning_rate": 8.572841573317714e-07, + "loss": 0.0045, + "step": 14302 + }, + { + "epoch": 3.49, + "learning_rate": 8.556863452146513e-07, + "loss": 0.003, + "step": 14304 + }, + { + "epoch": 3.49, + "learning_rate": 8.540899569370909e-07, + "loss": 0.004, + "step": 14306 + }, + { + "epoch": 3.49, + "learning_rate": 8.524949927476611e-07, + "loss": 0.002, + "step": 14308 + }, + { + "epoch": 3.49, + "learning_rate": 8.50901452894709e-07, + "loss": 0.0029, + "step": 14310 + }, + { + "epoch": 3.49, + "learning_rate": 8.493093376263584e-07, + "loss": 0.0022, + "step": 14312 + }, + { + "epoch": 3.49, + "learning_rate": 8.477186471905164e-07, + "loss": 0.0008, + "step": 14314 + }, + { + "epoch": 3.49, + "learning_rate": 8.46129381834866e-07, + "loss": 0.0049, + "step": 14316 + }, + { + "epoch": 3.49, + "learning_rate": 8.445415418068681e-07, + "loss": 0.0005, + "step": 14318 + }, + { + "epoch": 3.49, + "learning_rate": 8.429551273537595e-07, + "loss": 0.0034, + "step": 14320 + }, + { + "epoch": 3.49, + "learning_rate": 8.413701387225604e-07, + "loss": 0.0063, + "step": 14322 + }, + { + "epoch": 3.49, + "learning_rate": 8.39786576160061e-07, + "loss": 0.0025, + "step": 14324 + }, + { + "epoch": 3.49, + "learning_rate": 8.382044399128386e-07, + "loss": 0.003, + "step": 14326 + }, + { + "epoch": 3.49, + "learning_rate": 8.366237302272407e-07, + "loss": 0.0029, + "step": 14328 + }, + { + "epoch": 3.49, + "learning_rate": 8.350444473493968e-07, + "loss": 0.0027, + "step": 14330 + }, + { + "epoch": 3.49, + "learning_rate": 8.33466591525216e-07, + "loss": 0.0032, + "step": 14332 + }, + { + "epoch": 3.49, + "learning_rate": 8.318901630003773e-07, + "loss": 0.0048, + "step": 14334 + }, + { + "epoch": 3.49, + "learning_rate": 8.303151620203464e-07, + "loss": 0.0021, + "step": 14336 + }, + { + "epoch": 3.49, + "learning_rate": 8.287415888303641e-07, + "loss": 0.0051, + "step": 14338 + }, + { + "epoch": 3.49, + "learning_rate": 8.271694436754451e-07, + "loss": 0.0035, + "step": 14340 + }, + { + "epoch": 3.49, + "learning_rate": 8.25598726800384e-07, + "loss": 0.0014, + "step": 14342 + }, + { + "epoch": 3.5, + "learning_rate": 8.240294384497538e-07, + "loss": 0.0024, + "step": 14344 + }, + { + "epoch": 3.5, + "learning_rate": 8.224615788679058e-07, + "loss": 0.0036, + "step": 14346 + }, + { + "epoch": 3.5, + "learning_rate": 8.208951482989691e-07, + "loss": 0.003, + "step": 14348 + }, + { + "epoch": 3.5, + "learning_rate": 8.193301469868464e-07, + "loss": 0.0035, + "step": 14350 + }, + { + "epoch": 3.5, + "learning_rate": 8.177665751752217e-07, + "loss": 0.0032, + "step": 14352 + }, + { + "epoch": 3.5, + "learning_rate": 8.162044331075536e-07, + "loss": 0.0016, + "step": 14354 + }, + { + "epoch": 3.5, + "learning_rate": 8.146437210270819e-07, + "loss": 0.0009, + "step": 14356 + }, + { + "epoch": 3.5, + "learning_rate": 8.130844391768189e-07, + "loss": 0.0027, + "step": 14358 + }, + { + "epoch": 3.5, + "learning_rate": 8.11526587799557e-07, + "loss": 0.0026, + "step": 14360 + }, + { + "epoch": 3.5, + "learning_rate": 8.099701671378668e-07, + "loss": 0.0018, + "step": 14362 + }, + { + "epoch": 3.5, + "learning_rate": 8.084151774340965e-07, + "loss": 0.0008, + "step": 14364 + }, + { + "epoch": 3.5, + "learning_rate": 8.068616189303679e-07, + "loss": 0.0014, + "step": 14366 + }, + { + "epoch": 3.5, + "learning_rate": 8.053094918685799e-07, + "loss": 0.003, + "step": 14368 + }, + { + "epoch": 3.5, + "learning_rate": 8.037587964904136e-07, + "loss": 0.0008, + "step": 14370 + }, + { + "epoch": 3.5, + "learning_rate": 8.022095330373236e-07, + "loss": 0.0023, + "step": 14372 + }, + { + "epoch": 3.5, + "learning_rate": 8.006617017505402e-07, + "loss": 0.0011, + "step": 14374 + }, + { + "epoch": 3.5, + "learning_rate": 7.991153028710741e-07, + "loss": 0.0031, + "step": 14376 + }, + { + "epoch": 3.5, + "learning_rate": 7.975703366397114e-07, + "loss": 0.0016, + "step": 14378 + }, + { + "epoch": 3.5, + "learning_rate": 7.960268032970175e-07, + "loss": 0.0003, + "step": 14380 + }, + { + "epoch": 3.5, + "learning_rate": 7.944847030833292e-07, + "loss": 0.0029, + "step": 14382 + }, + { + "epoch": 3.5, + "learning_rate": 7.929440362387619e-07, + "loss": 0.0015, + "step": 14384 + }, + { + "epoch": 3.51, + "learning_rate": 7.914048030032117e-07, + "loss": 0.001, + "step": 14386 + }, + { + "epoch": 3.51, + "learning_rate": 7.898670036163503e-07, + "loss": 0.0038, + "step": 14388 + }, + { + "epoch": 3.51, + "learning_rate": 7.883306383176215e-07, + "loss": 0.0021, + "step": 14390 + }, + { + "epoch": 3.51, + "learning_rate": 7.867957073462507e-07, + "loss": 0.0061, + "step": 14392 + }, + { + "epoch": 3.51, + "learning_rate": 7.8526221094124e-07, + "loss": 0.0035, + "step": 14394 + }, + { + "epoch": 3.51, + "learning_rate": 7.83730149341364e-07, + "loss": 0.0008, + "step": 14396 + }, + { + "epoch": 3.51, + "learning_rate": 7.821995227851775e-07, + "loss": 0.0023, + "step": 14398 + }, + { + "epoch": 3.51, + "learning_rate": 7.806703315110098e-07, + "loss": 0.0026, + "step": 14400 + }, + { + "epoch": 3.51, + "learning_rate": 7.791425757569682e-07, + "loss": 0.0031, + "step": 14402 + }, + { + "epoch": 3.51, + "learning_rate": 7.776162557609379e-07, + "loss": 0.002, + "step": 14404 + }, + { + "epoch": 3.51, + "learning_rate": 7.760913717605756e-07, + "loss": 0.001, + "step": 14406 + }, + { + "epoch": 3.51, + "learning_rate": 7.745679239933202e-07, + "loss": 0.0014, + "step": 14408 + }, + { + "epoch": 3.51, + "learning_rate": 7.730459126963808e-07, + "loss": 0.0051, + "step": 14410 + }, + { + "epoch": 3.51, + "learning_rate": 7.7152533810675e-07, + "loss": 0.005, + "step": 14412 + }, + { + "epoch": 3.51, + "learning_rate": 7.700062004611897e-07, + "loss": 0.0026, + "step": 14414 + }, + { + "epoch": 3.51, + "learning_rate": 7.684884999962428e-07, + "loss": 0.0006, + "step": 14416 + }, + { + "epoch": 3.51, + "learning_rate": 7.669722369482258e-07, + "loss": 0.0015, + "step": 14418 + }, + { + "epoch": 3.51, + "learning_rate": 7.654574115532353e-07, + "loss": 0.004, + "step": 14420 + }, + { + "epoch": 3.51, + "learning_rate": 7.639440240471385e-07, + "loss": 0.0015, + "step": 14422 + }, + { + "epoch": 3.51, + "learning_rate": 7.624320746655811e-07, + "loss": 0.0028, + "step": 14424 + }, + { + "epoch": 3.52, + "learning_rate": 7.60921563643986e-07, + "loss": 0.0024, + "step": 14426 + }, + { + "epoch": 3.52, + "learning_rate": 7.594124912175527e-07, + "loss": 0.0034, + "step": 14428 + }, + { + "epoch": 3.52, + "learning_rate": 7.579048576212534e-07, + "loss": 0.001, + "step": 14430 + }, + { + "epoch": 3.52, + "learning_rate": 7.563986630898379e-07, + "loss": 0.0015, + "step": 14432 + }, + { + "epoch": 3.52, + "learning_rate": 7.548939078578332e-07, + "loss": 0.0014, + "step": 14434 + }, + { + "epoch": 3.52, + "learning_rate": 7.53390592159543e-07, + "loss": 0.0023, + "step": 14436 + }, + { + "epoch": 3.52, + "learning_rate": 7.518887162290433e-07, + "loss": 0.0035, + "step": 14438 + }, + { + "epoch": 3.52, + "learning_rate": 7.50388280300186e-07, + "loss": 0.0033, + "step": 14440 + }, + { + "epoch": 3.52, + "learning_rate": 7.48889284606602e-07, + "loss": 0.0026, + "step": 14442 + }, + { + "epoch": 3.52, + "learning_rate": 7.473917293816979e-07, + "loss": 0.007, + "step": 14444 + }, + { + "epoch": 3.52, + "learning_rate": 7.458956148586516e-07, + "loss": 0.0022, + "step": 14446 + }, + { + "epoch": 3.52, + "learning_rate": 7.444009412704211e-07, + "loss": 0.003, + "step": 14448 + }, + { + "epoch": 3.52, + "learning_rate": 7.429077088497393e-07, + "loss": 0.0048, + "step": 14450 + }, + { + "epoch": 3.52, + "learning_rate": 7.414159178291136e-07, + "loss": 0.0005, + "step": 14452 + }, + { + "epoch": 3.52, + "learning_rate": 7.399255684408246e-07, + "loss": 0.008, + "step": 14454 + }, + { + "epoch": 3.52, + "learning_rate": 7.384366609169336e-07, + "loss": 0.0022, + "step": 14456 + }, + { + "epoch": 3.52, + "learning_rate": 7.369491954892749e-07, + "loss": 0.0035, + "step": 14458 + }, + { + "epoch": 3.52, + "learning_rate": 7.35463172389459e-07, + "loss": 0.0021, + "step": 14460 + }, + { + "epoch": 3.52, + "learning_rate": 7.339785918488673e-07, + "loss": 0.0017, + "step": 14462 + }, + { + "epoch": 3.52, + "learning_rate": 7.32495454098665e-07, + "loss": 0.0013, + "step": 14464 + }, + { + "epoch": 3.52, + "learning_rate": 7.310137593697853e-07, + "loss": 0.0038, + "step": 14466 + }, + { + "epoch": 3.53, + "learning_rate": 7.2953350789294e-07, + "loss": 0.0014, + "step": 14468 + }, + { + "epoch": 3.53, + "learning_rate": 7.280546998986149e-07, + "loss": 0.0025, + "step": 14470 + }, + { + "epoch": 3.53, + "learning_rate": 7.265773356170724e-07, + "loss": 0.0037, + "step": 14472 + }, + { + "epoch": 3.53, + "learning_rate": 7.251014152783487e-07, + "loss": 0.001, + "step": 14474 + }, + { + "epoch": 3.53, + "learning_rate": 7.236269391122586e-07, + "loss": 0.0025, + "step": 14476 + }, + { + "epoch": 3.53, + "learning_rate": 7.221539073483863e-07, + "loss": 0.0036, + "step": 14478 + }, + { + "epoch": 3.53, + "learning_rate": 7.206823202160951e-07, + "loss": 0.0021, + "step": 14480 + }, + { + "epoch": 3.53, + "learning_rate": 7.192121779445227e-07, + "loss": 0.0011, + "step": 14482 + }, + { + "epoch": 3.53, + "learning_rate": 7.177434807625816e-07, + "loss": 0.0038, + "step": 14484 + }, + { + "epoch": 3.53, + "learning_rate": 7.162762288989567e-07, + "loss": 0.0018, + "step": 14486 + }, + { + "epoch": 3.53, + "learning_rate": 7.148104225821128e-07, + "loss": 0.0015, + "step": 14488 + }, + { + "epoch": 3.53, + "learning_rate": 7.133460620402877e-07, + "loss": 0.0024, + "step": 14490 + }, + { + "epoch": 3.53, + "learning_rate": 7.118831475014931e-07, + "loss": 0.0032, + "step": 14492 + }, + { + "epoch": 3.53, + "learning_rate": 7.104216791935148e-07, + "loss": 0.0019, + "step": 14494 + }, + { + "epoch": 3.53, + "learning_rate": 7.089616573439151e-07, + "loss": 0.0016, + "step": 14496 + }, + { + "epoch": 3.53, + "learning_rate": 7.075030821800299e-07, + "loss": 0.0014, + "step": 14498 + }, + { + "epoch": 3.53, + "learning_rate": 7.060459539289733e-07, + "loss": 0.003, + "step": 14500 + }, + { + "epoch": 3.53, + "learning_rate": 7.045902728176268e-07, + "loss": 0.0021, + "step": 14502 + }, + { + "epoch": 3.53, + "learning_rate": 7.03136039072655e-07, + "loss": 0.0036, + "step": 14504 + }, + { + "epoch": 3.53, + "learning_rate": 7.01683252920492e-07, + "loss": 0.0045, + "step": 14506 + }, + { + "epoch": 3.54, + "learning_rate": 7.002319145873482e-07, + "loss": 0.0043, + "step": 14508 + }, + { + "epoch": 3.54, + "learning_rate": 6.98782024299206e-07, + "loss": 0.0058, + "step": 14510 + }, + { + "epoch": 3.54, + "learning_rate": 6.97333582281825e-07, + "loss": 0.0037, + "step": 14512 + }, + { + "epoch": 3.54, + "learning_rate": 6.958865887607402e-07, + "loss": 0.0019, + "step": 14514 + }, + { + "epoch": 3.54, + "learning_rate": 6.944410439612603e-07, + "loss": 0.0009, + "step": 14516 + }, + { + "epoch": 3.54, + "learning_rate": 6.929969481084642e-07, + "loss": 0.0019, + "step": 14518 + }, + { + "epoch": 3.54, + "learning_rate": 6.91554301427213e-07, + "loss": 0.0003, + "step": 14520 + }, + { + "epoch": 3.54, + "learning_rate": 6.901131041421327e-07, + "loss": 0.0005, + "step": 14522 + }, + { + "epoch": 3.54, + "learning_rate": 6.886733564776349e-07, + "loss": 0.004, + "step": 14524 + }, + { + "epoch": 3.54, + "learning_rate": 6.872350586578935e-07, + "loss": 0.0017, + "step": 14526 + }, + { + "epoch": 3.54, + "learning_rate": 6.857982109068639e-07, + "loss": 0.0039, + "step": 14528 + }, + { + "epoch": 3.54, + "learning_rate": 6.843628134482771e-07, + "loss": 0.0013, + "step": 14530 + }, + { + "epoch": 3.54, + "learning_rate": 6.829288665056344e-07, + "loss": 0.0058, + "step": 14532 + }, + { + "epoch": 3.54, + "learning_rate": 6.814963703022104e-07, + "loss": 0.0069, + "step": 14534 + }, + { + "epoch": 3.54, + "learning_rate": 6.800653250610578e-07, + "loss": 0.0012, + "step": 14536 + }, + { + "epoch": 3.54, + "learning_rate": 6.786357310049984e-07, + "loss": 0.0019, + "step": 14538 + }, + { + "epoch": 3.54, + "learning_rate": 6.772075883566353e-07, + "loss": 0.0035, + "step": 14540 + }, + { + "epoch": 3.54, + "learning_rate": 6.757808973383373e-07, + "loss": 0.0036, + "step": 14542 + }, + { + "epoch": 3.54, + "learning_rate": 6.743556581722532e-07, + "loss": 0.003, + "step": 14544 + }, + { + "epoch": 3.54, + "learning_rate": 6.729318710803024e-07, + "loss": 0.0032, + "step": 14546 + }, + { + "epoch": 3.54, + "learning_rate": 6.715095362841817e-07, + "loss": 0.0044, + "step": 14548 + }, + { + "epoch": 3.55, + "learning_rate": 6.700886540053575e-07, + "loss": 0.002, + "step": 14550 + }, + { + "epoch": 3.55, + "learning_rate": 6.686692244650716e-07, + "loss": 0.0019, + "step": 14552 + }, + { + "epoch": 3.55, + "learning_rate": 6.672512478843407e-07, + "loss": 0.003, + "step": 14554 + }, + { + "epoch": 3.55, + "learning_rate": 6.658347244839558e-07, + "loss": 0.0023, + "step": 14556 + }, + { + "epoch": 3.55, + "learning_rate": 6.644196544844784e-07, + "loss": 0.0022, + "step": 14558 + }, + { + "epoch": 3.55, + "learning_rate": 6.630060381062464e-07, + "loss": 0.0019, + "step": 14560 + }, + { + "epoch": 3.55, + "learning_rate": 6.61593875569373e-07, + "loss": 0.0044, + "step": 14562 + }, + { + "epoch": 3.55, + "learning_rate": 6.601831670937409e-07, + "loss": 0.005, + "step": 14564 + }, + { + "epoch": 3.55, + "learning_rate": 6.587739128990056e-07, + "loss": 0.0041, + "step": 14566 + }, + { + "epoch": 3.55, + "learning_rate": 6.573661132046016e-07, + "loss": 0.0021, + "step": 14568 + }, + { + "epoch": 3.55, + "learning_rate": 6.559597682297337e-07, + "loss": 0.0018, + "step": 14570 + }, + { + "epoch": 3.55, + "learning_rate": 6.545548781933819e-07, + "loss": 0.0034, + "step": 14572 + }, + { + "epoch": 3.55, + "learning_rate": 6.53151443314296e-07, + "loss": 0.0037, + "step": 14574 + }, + { + "epoch": 3.55, + "learning_rate": 6.517494638110033e-07, + "loss": 0.0028, + "step": 14576 + }, + { + "epoch": 3.55, + "learning_rate": 6.503489399018004e-07, + "loss": 0.0035, + "step": 14578 + }, + { + "epoch": 3.55, + "learning_rate": 6.489498718047626e-07, + "loss": 0.0038, + "step": 14580 + }, + { + "epoch": 3.55, + "learning_rate": 6.475522597377326e-07, + "loss": 0.002, + "step": 14582 + }, + { + "epoch": 3.55, + "learning_rate": 6.461561039183306e-07, + "loss": 0.0037, + "step": 14584 + }, + { + "epoch": 3.55, + "learning_rate": 6.447614045639494e-07, + "loss": 0.0023, + "step": 14586 + }, + { + "epoch": 3.55, + "learning_rate": 6.433681618917542e-07, + "loss": 0.003, + "step": 14588 + }, + { + "epoch": 3.56, + "learning_rate": 6.419763761186826e-07, + "loss": 0.001, + "step": 14590 + }, + { + "epoch": 3.56, + "learning_rate": 6.405860474614478e-07, + "loss": 0.0041, + "step": 14592 + }, + { + "epoch": 3.56, + "learning_rate": 6.391971761365323e-07, + "loss": 0.0035, + "step": 14594 + }, + { + "epoch": 3.56, + "learning_rate": 6.378097623601964e-07, + "loss": 0.0013, + "step": 14596 + }, + { + "epoch": 3.56, + "learning_rate": 6.364238063484684e-07, + "loss": 0.0037, + "step": 14598 + }, + { + "epoch": 3.56, + "learning_rate": 6.350393083171535e-07, + "loss": 0.0043, + "step": 14600 + }, + { + "epoch": 3.56, + "learning_rate": 6.336562684818292e-07, + "loss": 0.0039, + "step": 14602 + }, + { + "epoch": 3.56, + "learning_rate": 6.322746870578477e-07, + "loss": 0.007, + "step": 14604 + }, + { + "epoch": 3.56, + "learning_rate": 6.308945642603281e-07, + "loss": 0.0037, + "step": 14606 + }, + { + "epoch": 3.56, + "learning_rate": 6.295159003041651e-07, + "loss": 0.0021, + "step": 14608 + }, + { + "epoch": 3.56, + "learning_rate": 6.281386954040303e-07, + "loss": 0.0013, + "step": 14610 + }, + { + "epoch": 3.56, + "learning_rate": 6.267629497743643e-07, + "loss": 0.0011, + "step": 14612 + }, + { + "epoch": 3.56, + "learning_rate": 6.253886636293805e-07, + "loss": 0.0014, + "step": 14614 + }, + { + "epoch": 3.56, + "learning_rate": 6.240158371830662e-07, + "loss": 0.0032, + "step": 14616 + }, + { + "epoch": 3.56, + "learning_rate": 6.226444706491819e-07, + "loss": 0.0019, + "step": 14618 + }, + { + "epoch": 3.56, + "learning_rate": 6.212745642412587e-07, + "loss": 0.0017, + "step": 14620 + }, + { + "epoch": 3.56, + "learning_rate": 6.199061181726007e-07, + "loss": 0.0027, + "step": 14622 + }, + { + "epoch": 3.56, + "learning_rate": 6.185391326562862e-07, + "loss": 0.0028, + "step": 14624 + }, + { + "epoch": 3.56, + "learning_rate": 6.171736079051661e-07, + "loss": 0.0054, + "step": 14626 + }, + { + "epoch": 3.56, + "learning_rate": 6.158095441318634e-07, + "loss": 0.0014, + "step": 14628 + }, + { + "epoch": 3.56, + "learning_rate": 6.144469415487709e-07, + "loss": 0.0021, + "step": 14630 + }, + { + "epoch": 3.57, + "learning_rate": 6.130858003680574e-07, + "loss": 0.0024, + "step": 14632 + }, + { + "epoch": 3.57, + "learning_rate": 6.117261208016645e-07, + "loss": 0.0023, + "step": 14634 + }, + { + "epoch": 3.57, + "learning_rate": 6.103679030613042e-07, + "loss": 0.0007, + "step": 14636 + }, + { + "epoch": 3.57, + "learning_rate": 6.090111473584581e-07, + "loss": 0.0028, + "step": 14638 + }, + { + "epoch": 3.57, + "learning_rate": 6.076558539043875e-07, + "loss": 0.002, + "step": 14640 + }, + { + "epoch": 3.57, + "learning_rate": 6.063020229101191e-07, + "loss": 0.0008, + "step": 14642 + }, + { + "epoch": 3.57, + "learning_rate": 6.049496545864586e-07, + "loss": 0.004, + "step": 14644 + }, + { + "epoch": 3.57, + "learning_rate": 6.035987491439754e-07, + "loss": 0.0012, + "step": 14646 + }, + { + "epoch": 3.57, + "learning_rate": 6.022493067930191e-07, + "loss": 0.0014, + "step": 14648 + }, + { + "epoch": 3.57, + "learning_rate": 6.009013277437059e-07, + "loss": 0.0026, + "step": 14650 + }, + { + "epoch": 3.57, + "learning_rate": 5.995548122059292e-07, + "loss": 0.0056, + "step": 14652 + }, + { + "epoch": 3.57, + "learning_rate": 5.982097603893488e-07, + "loss": 0.0016, + "step": 14654 + }, + { + "epoch": 3.57, + "learning_rate": 5.968661725034008e-07, + "loss": 0.0033, + "step": 14656 + }, + { + "epoch": 3.57, + "learning_rate": 5.955240487572922e-07, + "loss": 0.0039, + "step": 14658 + }, + { + "epoch": 3.57, + "learning_rate": 5.941833893600036e-07, + "loss": 0.0022, + "step": 14660 + }, + { + "epoch": 3.57, + "learning_rate": 5.928441945202846e-07, + "loss": 0.0012, + "step": 14662 + }, + { + "epoch": 3.57, + "learning_rate": 5.915064644466562e-07, + "loss": 0.0006, + "step": 14664 + }, + { + "epoch": 3.57, + "learning_rate": 5.90170199347414e-07, + "loss": 0.0008, + "step": 14666 + }, + { + "epoch": 3.57, + "learning_rate": 5.888353994306273e-07, + "loss": 0.0003, + "step": 14668 + }, + { + "epoch": 3.57, + "learning_rate": 5.875020649041318e-07, + "loss": 0.0054, + "step": 14670 + }, + { + "epoch": 3.58, + "learning_rate": 5.861701959755384e-07, + "loss": 0.0012, + "step": 14672 + }, + { + "epoch": 3.58, + "learning_rate": 5.848397928522309e-07, + "loss": 0.0017, + "step": 14674 + }, + { + "epoch": 3.58, + "learning_rate": 5.835108557413627e-07, + "loss": 0.0016, + "step": 14676 + }, + { + "epoch": 3.58, + "learning_rate": 5.82183384849857e-07, + "loss": 0.0048, + "step": 14678 + }, + { + "epoch": 3.58, + "learning_rate": 5.808573803844131e-07, + "loss": 0.0038, + "step": 14680 + }, + { + "epoch": 3.58, + "learning_rate": 5.795328425515001e-07, + "loss": 0.0023, + "step": 14682 + }, + { + "epoch": 3.58, + "learning_rate": 5.782097715573609e-07, + "loss": 0.0016, + "step": 14684 + }, + { + "epoch": 3.58, + "learning_rate": 5.768881676080029e-07, + "loss": 0.0017, + "step": 14686 + }, + { + "epoch": 3.58, + "learning_rate": 5.755680309092127e-07, + "loss": 0.0025, + "step": 14688 + }, + { + "epoch": 3.58, + "learning_rate": 5.742493616665468e-07, + "loss": 0.0022, + "step": 14690 + }, + { + "epoch": 3.58, + "learning_rate": 5.729321600853311e-07, + "loss": 0.0026, + "step": 14692 + }, + { + "epoch": 3.58, + "learning_rate": 5.716164263706614e-07, + "loss": 0.0028, + "step": 14694 + }, + { + "epoch": 3.58, + "learning_rate": 5.703021607274095e-07, + "loss": 0.0052, + "step": 14696 + }, + { + "epoch": 3.58, + "learning_rate": 5.689893633602173e-07, + "loss": 0.0017, + "step": 14698 + }, + { + "epoch": 3.58, + "learning_rate": 5.676780344734989e-07, + "loss": 0.0023, + "step": 14700 + }, + { + "epoch": 3.58, + "learning_rate": 5.663681742714344e-07, + "loss": 0.0036, + "step": 14702 + }, + { + "epoch": 3.58, + "learning_rate": 5.650597829579818e-07, + "loss": 0.0007, + "step": 14704 + }, + { + "epoch": 3.58, + "learning_rate": 5.637528607368658e-07, + "loss": 0.0029, + "step": 14706 + }, + { + "epoch": 3.58, + "learning_rate": 5.62447407811586e-07, + "loss": 0.0027, + "step": 14708 + }, + { + "epoch": 3.58, + "learning_rate": 5.611434243854097e-07, + "loss": 0.0046, + "step": 14710 + }, + { + "epoch": 3.58, + "learning_rate": 5.598409106613778e-07, + "loss": 0.0028, + "step": 14712 + }, + { + "epoch": 3.59, + "learning_rate": 5.585398668423014e-07, + "loss": 0.0013, + "step": 14714 + }, + { + "epoch": 3.59, + "learning_rate": 5.572402931307641e-07, + "loss": 0.0014, + "step": 14716 + }, + { + "epoch": 3.59, + "learning_rate": 5.559421897291195e-07, + "loss": 0.0041, + "step": 14718 + }, + { + "epoch": 3.59, + "learning_rate": 5.546455568394904e-07, + "loss": 0.0028, + "step": 14720 + }, + { + "epoch": 3.59, + "learning_rate": 5.53350394663772e-07, + "loss": 0.0047, + "step": 14722 + }, + { + "epoch": 3.59, + "learning_rate": 5.520567034036351e-07, + "loss": 0.0031, + "step": 14724 + }, + { + "epoch": 3.59, + "learning_rate": 5.50764483260513e-07, + "loss": 0.0019, + "step": 14726 + }, + { + "epoch": 3.59, + "learning_rate": 5.49473734435615e-07, + "loss": 0.0032, + "step": 14728 + }, + { + "epoch": 3.59, + "learning_rate": 5.481844571299222e-07, + "loss": 0.0024, + "step": 14730 + }, + { + "epoch": 3.59, + "learning_rate": 5.468966515441854e-07, + "loss": 0.0008, + "step": 14732 + }, + { + "epoch": 3.59, + "learning_rate": 5.456103178789252e-07, + "loss": 0.0016, + "step": 14734 + }, + { + "epoch": 3.59, + "learning_rate": 5.443254563344302e-07, + "loss": 0.0039, + "step": 14736 + }, + { + "epoch": 3.59, + "learning_rate": 5.430420671107672e-07, + "loss": 0.0009, + "step": 14738 + }, + { + "epoch": 3.59, + "learning_rate": 5.417601504077686e-07, + "loss": 0.0018, + "step": 14740 + }, + { + "epoch": 3.59, + "learning_rate": 5.404797064250378e-07, + "loss": 0.0034, + "step": 14742 + }, + { + "epoch": 3.59, + "learning_rate": 5.3920073536195e-07, + "loss": 0.0044, + "step": 14744 + }, + { + "epoch": 3.59, + "learning_rate": 5.379232374176524e-07, + "loss": 0.0013, + "step": 14746 + }, + { + "epoch": 3.59, + "learning_rate": 5.366472127910605e-07, + "loss": 0.0047, + "step": 14748 + }, + { + "epoch": 3.59, + "learning_rate": 5.353726616808596e-07, + "loss": 0.0012, + "step": 14750 + }, + { + "epoch": 3.59, + "learning_rate": 5.340995842855068e-07, + "loss": 0.0016, + "step": 14752 + }, + { + "epoch": 3.6, + "learning_rate": 5.328279808032322e-07, + "loss": 0.0004, + "step": 14754 + }, + { + "epoch": 3.6, + "learning_rate": 5.31557851432033e-07, + "loss": 0.0044, + "step": 14756 + }, + { + "epoch": 3.6, + "learning_rate": 5.302891963696788e-07, + "loss": 0.0037, + "step": 14758 + }, + { + "epoch": 3.6, + "learning_rate": 5.290220158137083e-07, + "loss": 0.0042, + "step": 14760 + }, + { + "epoch": 3.6, + "learning_rate": 5.277563099614302e-07, + "loss": 0.0047, + "step": 14762 + }, + { + "epoch": 3.6, + "learning_rate": 5.26492079009927e-07, + "loss": 0.0037, + "step": 14764 + }, + { + "epoch": 3.6, + "learning_rate": 5.252293231560468e-07, + "loss": 0.0014, + "step": 14766 + }, + { + "epoch": 3.6, + "learning_rate": 5.2396804259641e-07, + "loss": 0.0009, + "step": 14768 + }, + { + "epoch": 3.6, + "learning_rate": 5.227082375274095e-07, + "loss": 0.0034, + "step": 14770 + }, + { + "epoch": 3.6, + "learning_rate": 5.214499081452084e-07, + "loss": 0.0015, + "step": 14772 + }, + { + "epoch": 3.6, + "learning_rate": 5.201930546457345e-07, + "loss": 0.0015, + "step": 14774 + }, + { + "epoch": 3.6, + "learning_rate": 5.189376772246901e-07, + "loss": 0.005, + "step": 14776 + }, + { + "epoch": 3.6, + "learning_rate": 5.176837760775466e-07, + "loss": 0.002, + "step": 14778 + }, + { + "epoch": 3.6, + "learning_rate": 5.1643135139955e-07, + "loss": 0.0024, + "step": 14780 + }, + { + "epoch": 3.6, + "learning_rate": 5.151804033857077e-07, + "loss": 0.002, + "step": 14782 + }, + { + "epoch": 3.6, + "learning_rate": 5.139309322308029e-07, + "loss": 0.0014, + "step": 14784 + }, + { + "epoch": 3.6, + "learning_rate": 5.126829381293896e-07, + "loss": 0.0009, + "step": 14786 + }, + { + "epoch": 3.6, + "learning_rate": 5.114364212757894e-07, + "loss": 0.0017, + "step": 14788 + }, + { + "epoch": 3.6, + "learning_rate": 5.101913818640958e-07, + "loss": 0.0018, + "step": 14790 + }, + { + "epoch": 3.6, + "learning_rate": 5.089478200881659e-07, + "loss": 0.0016, + "step": 14792 + }, + { + "epoch": 3.6, + "learning_rate": 5.077057361416371e-07, + "loss": 0.0064, + "step": 14794 + }, + { + "epoch": 3.61, + "learning_rate": 5.064651302179091e-07, + "loss": 0.0027, + "step": 14796 + }, + { + "epoch": 3.61, + "learning_rate": 5.052260025101541e-07, + "loss": 0.0016, + "step": 14798 + }, + { + "epoch": 3.61, + "learning_rate": 5.039883532113132e-07, + "loss": 0.0019, + "step": 14800 + }, + { + "epoch": 3.61, + "learning_rate": 5.027521825140991e-07, + "loss": 0.0047, + "step": 14802 + }, + { + "epoch": 3.61, + "learning_rate": 5.015174906109932e-07, + "loss": 0.0015, + "step": 14804 + }, + { + "epoch": 3.61, + "learning_rate": 5.00284277694244e-07, + "loss": 0.0022, + "step": 14806 + }, + { + "epoch": 3.61, + "learning_rate": 4.990525439558735e-07, + "loss": 0.0046, + "step": 14808 + }, + { + "epoch": 3.61, + "learning_rate": 4.978222895876727e-07, + "loss": 0.003, + "step": 14810 + }, + { + "epoch": 3.61, + "learning_rate": 4.965935147812028e-07, + "loss": 0.0026, + "step": 14812 + }, + { + "epoch": 3.61, + "learning_rate": 4.953662197277898e-07, + "loss": 0.0046, + "step": 14814 + }, + { + "epoch": 3.61, + "learning_rate": 4.941404046185372e-07, + "loss": 0.0032, + "step": 14816 + }, + { + "epoch": 3.61, + "learning_rate": 4.929160696443103e-07, + "loss": 0.0037, + "step": 14818 + }, + { + "epoch": 3.61, + "learning_rate": 4.916932149957488e-07, + "loss": 0.003, + "step": 14820 + }, + { + "epoch": 3.61, + "learning_rate": 4.904718408632602e-07, + "loss": 0.0062, + "step": 14822 + }, + { + "epoch": 3.61, + "learning_rate": 4.892519474370217e-07, + "loss": 0.002, + "step": 14824 + }, + { + "epoch": 3.61, + "learning_rate": 4.880335349069809e-07, + "loss": 0.0017, + "step": 14826 + }, + { + "epoch": 3.61, + "learning_rate": 4.868166034628541e-07, + "loss": 0.0033, + "step": 14828 + }, + { + "epoch": 3.61, + "learning_rate": 4.856011532941252e-07, + "loss": 0.0009, + "step": 14830 + }, + { + "epoch": 3.61, + "learning_rate": 4.843871845900505e-07, + "loss": 0.0022, + "step": 14832 + }, + { + "epoch": 3.61, + "learning_rate": 4.831746975396534e-07, + "loss": 0.0019, + "step": 14834 + }, + { + "epoch": 3.62, + "learning_rate": 4.819636923317284e-07, + "loss": 0.0029, + "step": 14836 + }, + { + "epoch": 3.62, + "learning_rate": 4.807541691548368e-07, + "loss": 0.0045, + "step": 14838 + }, + { + "epoch": 3.62, + "learning_rate": 4.795461281973113e-07, + "loss": 0.0037, + "step": 14840 + }, + { + "epoch": 3.62, + "learning_rate": 4.783395696472526e-07, + "loss": 0.0047, + "step": 14842 + }, + { + "epoch": 3.62, + "learning_rate": 4.771344936925337e-07, + "loss": 0.0018, + "step": 14844 + }, + { + "epoch": 3.62, + "learning_rate": 4.7593090052079237e-07, + "loss": 0.0023, + "step": 14846 + }, + { + "epoch": 3.62, + "learning_rate": 4.747287903194353e-07, + "loss": 0.0046, + "step": 14848 + }, + { + "epoch": 3.62, + "learning_rate": 4.735281632756439e-07, + "loss": 0.003, + "step": 14850 + }, + { + "epoch": 3.62, + "learning_rate": 4.723290195763608e-07, + "loss": 0.0017, + "step": 14852 + }, + { + "epoch": 3.62, + "learning_rate": 4.7113135940830447e-07, + "loss": 0.0025, + "step": 14854 + }, + { + "epoch": 3.62, + "learning_rate": 4.6993518295796015e-07, + "loss": 0.0022, + "step": 14856 + }, + { + "epoch": 3.62, + "learning_rate": 4.6874049041158107e-07, + "loss": 0.0011, + "step": 14858 + }, + { + "epoch": 3.62, + "learning_rate": 4.675472819551907e-07, + "loss": 0.0024, + "step": 14860 + }, + { + "epoch": 3.62, + "learning_rate": 4.663555577745782e-07, + "loss": 0.0028, + "step": 14862 + }, + { + "epoch": 3.62, + "learning_rate": 4.6516531805530615e-07, + "loss": 0.0019, + "step": 14864 + }, + { + "epoch": 3.62, + "learning_rate": 4.639765629827042e-07, + "loss": 0.0001, + "step": 14866 + }, + { + "epoch": 3.62, + "learning_rate": 4.627892927418698e-07, + "loss": 0.002, + "step": 14868 + }, + { + "epoch": 3.62, + "learning_rate": 4.6160350751766945e-07, + "loss": 0.0009, + "step": 14870 + }, + { + "epoch": 3.62, + "learning_rate": 4.604192074947411e-07, + "loss": 0.0038, + "step": 14872 + }, + { + "epoch": 3.62, + "learning_rate": 4.592363928574883e-07, + "loss": 0.003, + "step": 14874 + }, + { + "epoch": 3.62, + "learning_rate": 4.580550637900827e-07, + "loss": 0.0014, + "step": 14876 + }, + { + "epoch": 3.63, + "learning_rate": 4.5687522047646813e-07, + "loss": 0.0021, + "step": 14878 + }, + { + "epoch": 3.63, + "learning_rate": 4.5569686310035444e-07, + "loss": 0.0025, + "step": 14880 + }, + { + "epoch": 3.63, + "learning_rate": 4.5451999184522145e-07, + "loss": 0.0026, + "step": 14882 + }, + { + "epoch": 3.63, + "learning_rate": 4.533446068943159e-07, + "loss": 0.0007, + "step": 14884 + }, + { + "epoch": 3.63, + "learning_rate": 4.5217070843065593e-07, + "loss": 0.0011, + "step": 14886 + }, + { + "epoch": 3.63, + "learning_rate": 4.509982966370252e-07, + "loss": 0.0023, + "step": 14888 + }, + { + "epoch": 3.63, + "learning_rate": 4.498273716959789e-07, + "loss": 0.0034, + "step": 14890 + }, + { + "epoch": 3.63, + "learning_rate": 4.486579337898356e-07, + "loss": 0.0016, + "step": 14892 + }, + { + "epoch": 3.63, + "learning_rate": 4.474899831006885e-07, + "loss": 0.0015, + "step": 14894 + }, + { + "epoch": 3.63, + "learning_rate": 4.4632351981039543e-07, + "loss": 0.002, + "step": 14896 + }, + { + "epoch": 3.63, + "learning_rate": 4.451585441005857e-07, + "loss": 0.0034, + "step": 14898 + }, + { + "epoch": 3.63, + "learning_rate": 4.439950561526507e-07, + "loss": 0.0018, + "step": 14900 + }, + { + "epoch": 3.63, + "learning_rate": 4.4283305614775894e-07, + "loss": 0.0034, + "step": 14902 + }, + { + "epoch": 3.63, + "learning_rate": 4.41672544266839e-07, + "loss": 0.0057, + "step": 14904 + }, + { + "epoch": 3.63, + "learning_rate": 4.4051352069059526e-07, + "loss": 0.0016, + "step": 14906 + }, + { + "epoch": 3.63, + "learning_rate": 4.393559855994922e-07, + "loss": 0.0027, + "step": 14908 + }, + { + "epoch": 3.63, + "learning_rate": 4.381999391737701e-07, + "loss": 0.0024, + "step": 14910 + }, + { + "epoch": 3.63, + "learning_rate": 4.370453815934328e-07, + "loss": 0.0026, + "step": 14912 + }, + { + "epoch": 3.63, + "learning_rate": 4.358923130382553e-07, + "loss": 0.0022, + "step": 14914 + }, + { + "epoch": 3.63, + "learning_rate": 4.3474073368777736e-07, + "loss": 0.0045, + "step": 14916 + }, + { + "epoch": 3.63, + "learning_rate": 4.3359064372130886e-07, + "loss": 0.0027, + "step": 14918 + }, + { + "epoch": 3.64, + "learning_rate": 4.324420433179288e-07, + "loss": 0.0021, + "step": 14920 + }, + { + "epoch": 3.64, + "learning_rate": 4.312949326564819e-07, + "loss": 0.0023, + "step": 14922 + }, + { + "epoch": 3.64, + "learning_rate": 4.3014931191558307e-07, + "loss": 0.0034, + "step": 14924 + }, + { + "epoch": 3.64, + "learning_rate": 4.290051812736129e-07, + "loss": 0.0016, + "step": 14926 + }, + { + "epoch": 3.64, + "learning_rate": 4.278625409087234e-07, + "loss": 0.004, + "step": 14928 + }, + { + "epoch": 3.64, + "learning_rate": 4.2672139099882995e-07, + "loss": 0.004, + "step": 14930 + }, + { + "epoch": 3.64, + "learning_rate": 4.255817317216204e-07, + "loss": 0.0015, + "step": 14932 + }, + { + "epoch": 3.64, + "learning_rate": 4.244435632545463e-07, + "loss": 0.0019, + "step": 14934 + }, + { + "epoch": 3.64, + "learning_rate": 4.2330688577483014e-07, + "loss": 0.0006, + "step": 14936 + }, + { + "epoch": 3.64, + "learning_rate": 4.221716994594627e-07, + "loss": 0.0014, + "step": 14938 + }, + { + "epoch": 3.64, + "learning_rate": 4.2103800448519914e-07, + "loss": 0.0019, + "step": 14940 + }, + { + "epoch": 3.64, + "learning_rate": 4.1990580102856504e-07, + "loss": 0.0014, + "step": 14942 + }, + { + "epoch": 3.64, + "learning_rate": 4.1877508926585486e-07, + "loss": 0.0012, + "step": 14944 + }, + { + "epoch": 3.64, + "learning_rate": 4.176458693731278e-07, + "loss": 0.0028, + "step": 14946 + }, + { + "epoch": 3.64, + "learning_rate": 4.1651814152620985e-07, + "loss": 0.001, + "step": 14948 + }, + { + "epoch": 3.64, + "learning_rate": 4.1539190590069946e-07, + "loss": 0.0011, + "step": 14950 + }, + { + "epoch": 3.64, + "learning_rate": 4.1426716267195853e-07, + "loss": 0.002, + "step": 14952 + }, + { + "epoch": 3.64, + "learning_rate": 4.131439120151215e-07, + "loss": 0.0077, + "step": 14954 + }, + { + "epoch": 3.64, + "learning_rate": 4.1202215410508284e-07, + "loss": 0.0021, + "step": 14956 + }, + { + "epoch": 3.64, + "learning_rate": 4.1090188911651174e-07, + "loss": 0.0025, + "step": 14958 + }, + { + "epoch": 3.65, + "learning_rate": 4.0978311722383977e-07, + "loss": 0.0058, + "step": 14960 + }, + { + "epoch": 3.65, + "learning_rate": 4.0866583860127095e-07, + "loss": 0.003, + "step": 14962 + }, + { + "epoch": 3.65, + "learning_rate": 4.0755005342277167e-07, + "loss": 0.001, + "step": 14964 + }, + { + "epoch": 3.65, + "learning_rate": 4.0643576186207954e-07, + "loss": 0.0015, + "step": 14966 + }, + { + "epoch": 3.65, + "learning_rate": 4.05322964092697e-07, + "loss": 0.0005, + "step": 14968 + }, + { + "epoch": 3.65, + "learning_rate": 4.042116602878976e-07, + "loss": 0.0013, + "step": 14970 + }, + { + "epoch": 3.65, + "learning_rate": 4.031018506207185e-07, + "loss": 0.0034, + "step": 14972 + }, + { + "epoch": 3.65, + "learning_rate": 4.0199353526396477e-07, + "loss": 0.0017, + "step": 14974 + }, + { + "epoch": 3.65, + "learning_rate": 4.0088671439020953e-07, + "loss": 0.0035, + "step": 14976 + }, + { + "epoch": 3.65, + "learning_rate": 3.997813881717949e-07, + "loss": 0.004, + "step": 14978 + }, + { + "epoch": 3.65, + "learning_rate": 3.986775567808265e-07, + "loss": 0.0045, + "step": 14980 + }, + { + "epoch": 3.65, + "learning_rate": 3.9757522038918137e-07, + "loss": 0.0031, + "step": 14982 + }, + { + "epoch": 3.65, + "learning_rate": 3.9647437916849995e-07, + "loss": 0.002, + "step": 14984 + }, + { + "epoch": 3.65, + "learning_rate": 3.95375033290194e-07, + "loss": 0.0018, + "step": 14986 + }, + { + "epoch": 3.65, + "learning_rate": 3.942771829254388e-07, + "loss": 0.0041, + "step": 14988 + }, + { + "epoch": 3.65, + "learning_rate": 3.9318082824517656e-07, + "loss": 0.0027, + "step": 14990 + }, + { + "epoch": 3.65, + "learning_rate": 3.9208596942011956e-07, + "loss": 0.0009, + "step": 14992 + }, + { + "epoch": 3.65, + "learning_rate": 3.909926066207459e-07, + "loss": 0.0049, + "step": 14994 + }, + { + "epoch": 3.65, + "learning_rate": 3.899007400172994e-07, + "loss": 0.0017, + "step": 14996 + }, + { + "epoch": 3.65, + "learning_rate": 3.888103697797929e-07, + "loss": 0.0012, + "step": 14998 + }, + { + "epoch": 3.65, + "learning_rate": 3.8772149607800624e-07, + "loss": 0.0025, + "step": 15000 + }, + { + "epoch": 3.66, + "learning_rate": 3.8663411908148375e-07, + "loss": 0.0029, + "step": 15002 + }, + { + "epoch": 3.66, + "learning_rate": 3.8554823895953885e-07, + "loss": 0.0037, + "step": 15004 + }, + { + "epoch": 3.66, + "learning_rate": 3.844638558812508e-07, + "loss": 0.0008, + "step": 15006 + }, + { + "epoch": 3.66, + "learning_rate": 3.8338097001546783e-07, + "loss": 0.0028, + "step": 15008 + }, + { + "epoch": 3.66, + "learning_rate": 3.822995815308028e-07, + "loss": 0.0008, + "step": 15010 + }, + { + "epoch": 3.66, + "learning_rate": 3.812196905956356e-07, + "loss": 0.0014, + "step": 15012 + }, + { + "epoch": 3.66, + "learning_rate": 3.80141297378116e-07, + "loss": 0.0027, + "step": 15014 + }, + { + "epoch": 3.66, + "learning_rate": 3.7906440204615423e-07, + "loss": 0.0056, + "step": 15016 + }, + { + "epoch": 3.66, + "learning_rate": 3.779890047674339e-07, + "loss": 0.0049, + "step": 15018 + }, + { + "epoch": 3.66, + "learning_rate": 3.7691510570940115e-07, + "loss": 0.0026, + "step": 15020 + }, + { + "epoch": 3.66, + "learning_rate": 3.758427050392699e-07, + "loss": 0.0036, + "step": 15022 + }, + { + "epoch": 3.66, + "learning_rate": 3.7477180292402214e-07, + "loss": 0.0004, + "step": 15024 + }, + { + "epoch": 3.66, + "learning_rate": 3.737023995304079e-07, + "loss": 0.0018, + "step": 15026 + }, + { + "epoch": 3.66, + "learning_rate": 3.726344950249372e-07, + "loss": 0.0021, + "step": 15028 + }, + { + "epoch": 3.66, + "learning_rate": 3.7156808957389266e-07, + "loss": 0.0034, + "step": 15030 + }, + { + "epoch": 3.66, + "learning_rate": 3.7050318334332145e-07, + "loss": 0.0025, + "step": 15032 + }, + { + "epoch": 3.66, + "learning_rate": 3.694397764990398e-07, + "loss": 0.0017, + "step": 15034 + }, + { + "epoch": 3.66, + "learning_rate": 3.6837786920662534e-07, + "loss": 0.0035, + "step": 15036 + }, + { + "epoch": 3.66, + "learning_rate": 3.6731746163142567e-07, + "loss": 0.0026, + "step": 15038 + }, + { + "epoch": 3.66, + "learning_rate": 3.6625855393855437e-07, + "loss": 0.0004, + "step": 15040 + }, + { + "epoch": 3.67, + "learning_rate": 3.6520114629289504e-07, + "loss": 0.0046, + "step": 15042 + }, + { + "epoch": 3.67, + "learning_rate": 3.641452388590894e-07, + "loss": 0.0025, + "step": 15044 + }, + { + "epoch": 3.67, + "learning_rate": 3.6309083180155247e-07, + "loss": 0.0025, + "step": 15046 + }, + { + "epoch": 3.67, + "learning_rate": 3.620379252844619e-07, + "loss": 0.0019, + "step": 15048 + }, + { + "epoch": 3.67, + "learning_rate": 3.6098651947176657e-07, + "loss": 0.0007, + "step": 15050 + }, + { + "epoch": 3.67, + "learning_rate": 3.5993661452717433e-07, + "loss": 0.0011, + "step": 15052 + }, + { + "epoch": 3.67, + "learning_rate": 3.5888821061416556e-07, + "loss": 0.0026, + "step": 15054 + }, + { + "epoch": 3.67, + "learning_rate": 3.578413078959864e-07, + "loss": 0.0027, + "step": 15056 + }, + { + "epoch": 3.67, + "learning_rate": 3.567959065356452e-07, + "loss": 0.0018, + "step": 15058 + }, + { + "epoch": 3.67, + "learning_rate": 3.557520066959186e-07, + "loss": 0.0034, + "step": 15060 + }, + { + "epoch": 3.67, + "learning_rate": 3.5470960853935086e-07, + "loss": 0.001, + "step": 15062 + }, + { + "epoch": 3.67, + "learning_rate": 3.536687122282512e-07, + "loss": 0.0024, + "step": 15064 + }, + { + "epoch": 3.67, + "learning_rate": 3.5262931792469646e-07, + "loss": 0.0022, + "step": 15066 + }, + { + "epoch": 3.67, + "learning_rate": 3.515914257905262e-07, + "loss": 0.0023, + "step": 15068 + }, + { + "epoch": 3.67, + "learning_rate": 3.5055503598734996e-07, + "loss": 0.0026, + "step": 15070 + }, + { + "epoch": 3.67, + "learning_rate": 3.495201486765387e-07, + "loss": 0.004, + "step": 15072 + }, + { + "epoch": 3.67, + "learning_rate": 3.484867640192358e-07, + "loss": 0.0018, + "step": 15074 + }, + { + "epoch": 3.67, + "learning_rate": 3.474548821763446e-07, + "loss": 0.0038, + "step": 15076 + }, + { + "epoch": 3.67, + "learning_rate": 3.464245033085367e-07, + "loss": 0.003, + "step": 15078 + }, + { + "epoch": 3.67, + "learning_rate": 3.453956275762527e-07, + "loss": 0.0051, + "step": 15080 + }, + { + "epoch": 3.67, + "learning_rate": 3.443682551396954e-07, + "loss": 0.0029, + "step": 15082 + }, + { + "epoch": 3.68, + "learning_rate": 3.433423861588325e-07, + "loss": 0.0012, + "step": 15084 + }, + { + "epoch": 3.68, + "learning_rate": 3.423180207934029e-07, + "loss": 0.0013, + "step": 15086 + }, + { + "epoch": 3.68, + "learning_rate": 3.4129515920290455e-07, + "loss": 0.0026, + "step": 15088 + }, + { + "epoch": 3.68, + "learning_rate": 3.4027380154660893e-07, + "loss": 0.002, + "step": 15090 + }, + { + "epoch": 3.68, + "learning_rate": 3.3925394798354437e-07, + "loss": 0.0028, + "step": 15092 + }, + { + "epoch": 3.68, + "learning_rate": 3.382355986725139e-07, + "loss": 0.0043, + "step": 15094 + }, + { + "epoch": 3.68, + "learning_rate": 3.3721875377208056e-07, + "loss": 0.003, + "step": 15096 + }, + { + "epoch": 3.68, + "learning_rate": 3.362034134405756e-07, + "loss": 0.0016, + "step": 15098 + }, + { + "epoch": 3.68, + "learning_rate": 3.3518957783609476e-07, + "loss": 0.0019, + "step": 15100 + }, + { + "epoch": 3.68, + "learning_rate": 3.3417724711649944e-07, + "loss": 0.0016, + "step": 15102 + }, + { + "epoch": 3.68, + "learning_rate": 3.3316642143941814e-07, + "loss": 0.0032, + "step": 15104 + }, + { + "epoch": 3.68, + "learning_rate": 3.3215710096224483e-07, + "loss": 0.0046, + "step": 15106 + }, + { + "epoch": 3.68, + "learning_rate": 3.3114928584213614e-07, + "loss": 0.0008, + "step": 15108 + }, + { + "epoch": 3.68, + "learning_rate": 3.3014297623601865e-07, + "loss": 0.0005, + "step": 15110 + }, + { + "epoch": 3.68, + "learning_rate": 3.2913817230058265e-07, + "loss": 0.0021, + "step": 15112 + }, + { + "epoch": 3.68, + "learning_rate": 3.2813487419228295e-07, + "loss": 0.0018, + "step": 15114 + }, + { + "epoch": 3.68, + "learning_rate": 3.2713308206733907e-07, + "loss": 0.0031, + "step": 15116 + }, + { + "epoch": 3.68, + "learning_rate": 3.2613279608173953e-07, + "loss": 0.0012, + "step": 15118 + }, + { + "epoch": 3.68, + "learning_rate": 3.2513401639123643e-07, + "loss": 0.0028, + "step": 15120 + }, + { + "epoch": 3.68, + "learning_rate": 3.241367431513487e-07, + "loss": 0.0021, + "step": 15122 + }, + { + "epoch": 3.69, + "learning_rate": 3.2314097651735657e-07, + "loss": 0.0026, + "step": 15124 + }, + { + "epoch": 3.69, + "learning_rate": 3.221467166443115e-07, + "loss": 0.0011, + "step": 15126 + }, + { + "epoch": 3.69, + "learning_rate": 3.211539636870242e-07, + "loss": 0.0025, + "step": 15128 + }, + { + "epoch": 3.69, + "learning_rate": 3.2016271780007766e-07, + "loss": 0.0038, + "step": 15130 + }, + { + "epoch": 3.69, + "learning_rate": 3.1917297913781176e-07, + "loss": 0.0001, + "step": 15132 + }, + { + "epoch": 3.69, + "learning_rate": 3.1818474785433985e-07, + "loss": 0.0046, + "step": 15134 + }, + { + "epoch": 3.69, + "learning_rate": 3.171980241035355e-07, + "loss": 0.0042, + "step": 15136 + }, + { + "epoch": 3.69, + "learning_rate": 3.162128080390414e-07, + "loss": 0.0014, + "step": 15138 + }, + { + "epoch": 3.69, + "learning_rate": 3.152290998142604e-07, + "loss": 0.0024, + "step": 15140 + }, + { + "epoch": 3.69, + "learning_rate": 3.1424689958236556e-07, + "loss": 0.0024, + "step": 15142 + }, + { + "epoch": 3.69, + "learning_rate": 3.132662074962911e-07, + "loss": 0.0007, + "step": 15144 + }, + { + "epoch": 3.69, + "learning_rate": 3.122870237087405e-07, + "loss": 0.0037, + "step": 15146 + }, + { + "epoch": 3.69, + "learning_rate": 3.113093483721774e-07, + "loss": 0.0016, + "step": 15148 + }, + { + "epoch": 3.69, + "learning_rate": 3.1033318163883553e-07, + "loss": 0.0036, + "step": 15150 + }, + { + "epoch": 3.69, + "learning_rate": 3.0935852366070995e-07, + "loss": 0.0018, + "step": 15152 + }, + { + "epoch": 3.69, + "learning_rate": 3.0838537458956487e-07, + "loss": 0.0032, + "step": 15154 + }, + { + "epoch": 3.69, + "learning_rate": 3.074137345769257e-07, + "loss": 0.004, + "step": 15156 + }, + { + "epoch": 3.69, + "learning_rate": 3.064436037740814e-07, + "loss": 0.0039, + "step": 15158 + }, + { + "epoch": 3.69, + "learning_rate": 3.054749823320924e-07, + "loss": 0.0009, + "step": 15160 + }, + { + "epoch": 3.69, + "learning_rate": 3.045078704017801e-07, + "loss": 0.0027, + "step": 15162 + }, + { + "epoch": 3.69, + "learning_rate": 3.0354226813372967e-07, + "loss": 0.0024, + "step": 15164 + }, + { + "epoch": 3.7, + "learning_rate": 3.025781756782931e-07, + "loss": 0.0009, + "step": 15166 + }, + { + "epoch": 3.7, + "learning_rate": 3.0161559318558796e-07, + "loss": 0.0025, + "step": 15168 + }, + { + "epoch": 3.7, + "learning_rate": 3.0065452080549564e-07, + "loss": 0.0012, + "step": 15170 + }, + { + "epoch": 3.7, + "learning_rate": 2.996949586876607e-07, + "loss": 0.0029, + "step": 15172 + }, + { + "epoch": 3.7, + "learning_rate": 2.98736906981496e-07, + "loss": 0.0028, + "step": 15174 + }, + { + "epoch": 3.7, + "learning_rate": 2.9778036583617664e-07, + "loss": 0.004, + "step": 15176 + }, + { + "epoch": 3.7, + "learning_rate": 2.9682533540064564e-07, + "loss": 0.0019, + "step": 15178 + }, + { + "epoch": 3.7, + "learning_rate": 2.958718158236051e-07, + "loss": 0.0023, + "step": 15180 + }, + { + "epoch": 3.7, + "learning_rate": 2.9491980725352753e-07, + "loss": 0.0034, + "step": 15182 + }, + { + "epoch": 3.7, + "learning_rate": 2.939693098386465e-07, + "loss": 0.0015, + "step": 15184 + }, + { + "epoch": 3.7, + "learning_rate": 2.9302032372696356e-07, + "loss": 0.0026, + "step": 15186 + }, + { + "epoch": 3.7, + "learning_rate": 2.920728490662417e-07, + "loss": 0.0061, + "step": 15188 + }, + { + "epoch": 3.7, + "learning_rate": 2.911268860040095e-07, + "loss": 0.0012, + "step": 15190 + }, + { + "epoch": 3.7, + "learning_rate": 2.901824346875626e-07, + "loss": 0.0055, + "step": 15192 + }, + { + "epoch": 3.7, + "learning_rate": 2.892394952639588e-07, + "loss": 0.0004, + "step": 15194 + }, + { + "epoch": 3.7, + "learning_rate": 2.8829806788001846e-07, + "loss": 0.0033, + "step": 15196 + }, + { + "epoch": 3.7, + "learning_rate": 2.8735815268233323e-07, + "loss": 0.0029, + "step": 15198 + }, + { + "epoch": 3.7, + "learning_rate": 2.864197498172516e-07, + "loss": 0.0042, + "step": 15200 + }, + { + "epoch": 3.7, + "learning_rate": 2.8548285943089226e-07, + "loss": 0.0037, + "step": 15202 + }, + { + "epoch": 3.7, + "learning_rate": 2.845474816691329e-07, + "loss": 0.003, + "step": 15204 + }, + { + "epoch": 3.71, + "learning_rate": 2.836136166776227e-07, + "loss": 0.0044, + "step": 15206 + }, + { + "epoch": 3.71, + "learning_rate": 2.826812646017696e-07, + "loss": 0.0014, + "step": 15208 + }, + { + "epoch": 3.71, + "learning_rate": 2.8175042558675094e-07, + "loss": 0.004, + "step": 15210 + }, + { + "epoch": 3.71, + "learning_rate": 2.808210997775018e-07, + "loss": 0.0014, + "step": 15212 + }, + { + "epoch": 3.71, + "learning_rate": 2.7989328731872543e-07, + "loss": 0.002, + "step": 15214 + }, + { + "epoch": 3.71, + "learning_rate": 2.7896698835489065e-07, + "loss": 0.0053, + "step": 15216 + }, + { + "epoch": 3.71, + "learning_rate": 2.78042203030231e-07, + "loss": 0.0015, + "step": 15218 + }, + { + "epoch": 3.71, + "learning_rate": 2.7711893148873904e-07, + "loss": 0.0025, + "step": 15220 + }, + { + "epoch": 3.71, + "learning_rate": 2.7619717387417645e-07, + "loss": 0.0013, + "step": 15222 + }, + { + "epoch": 3.71, + "learning_rate": 2.7527693033007063e-07, + "loss": 0.0002, + "step": 15224 + }, + { + "epoch": 3.71, + "learning_rate": 2.74358200999707e-07, + "loss": 0.0009, + "step": 15226 + }, + { + "epoch": 3.71, + "learning_rate": 2.7344098602614e-07, + "loss": 0.0017, + "step": 15228 + }, + { + "epoch": 3.71, + "learning_rate": 2.725252855521865e-07, + "loss": 0.0033, + "step": 15230 + }, + { + "epoch": 3.71, + "learning_rate": 2.716110997204291e-07, + "loss": 0.005, + "step": 15232 + }, + { + "epoch": 3.71, + "learning_rate": 2.706984286732139e-07, + "loss": 0.003, + "step": 15234 + }, + { + "epoch": 3.71, + "learning_rate": 2.697872725526496e-07, + "loss": 0.003, + "step": 15236 + }, + { + "epoch": 3.71, + "learning_rate": 2.6887763150060917e-07, + "loss": 0.0028, + "step": 15238 + }, + { + "epoch": 3.71, + "learning_rate": 2.679695056587339e-07, + "loss": 0.0028, + "step": 15240 + }, + { + "epoch": 3.71, + "learning_rate": 2.67062895168424e-07, + "loss": 0.0025, + "step": 15242 + }, + { + "epoch": 3.71, + "learning_rate": 2.661578001708442e-07, + "loss": 0.0015, + "step": 15244 + }, + { + "epoch": 3.71, + "learning_rate": 2.6525422080692644e-07, + "loss": 0.0043, + "step": 15246 + }, + { + "epoch": 3.72, + "learning_rate": 2.6435215721736575e-07, + "loss": 0.0012, + "step": 15248 + }, + { + "epoch": 3.72, + "learning_rate": 2.6345160954261874e-07, + "loss": 0.0024, + "step": 15250 + }, + { + "epoch": 3.72, + "learning_rate": 2.625525779229077e-07, + "loss": 0.0019, + "step": 15252 + }, + { + "epoch": 3.72, + "learning_rate": 2.616550624982206e-07, + "loss": 0.0028, + "step": 15254 + }, + { + "epoch": 3.72, + "learning_rate": 2.607590634083046e-07, + "loss": 0.0037, + "step": 15256 + }, + { + "epoch": 3.72, + "learning_rate": 2.5986458079267587e-07, + "loss": 0.0015, + "step": 15258 + }, + { + "epoch": 3.72, + "learning_rate": 2.5897161479061073e-07, + "loss": 0.0037, + "step": 15260 + }, + { + "epoch": 3.72, + "learning_rate": 2.5808016554115136e-07, + "loss": 0.0013, + "step": 15262 + }, + { + "epoch": 3.72, + "learning_rate": 2.571902331831033e-07, + "loss": 0.0022, + "step": 15264 + }, + { + "epoch": 3.72, + "learning_rate": 2.5630181785503583e-07, + "loss": 0.0026, + "step": 15266 + }, + { + "epoch": 3.72, + "learning_rate": 2.5541491969528264e-07, + "loss": 0.0045, + "step": 15268 + }, + { + "epoch": 3.72, + "learning_rate": 2.5452953884193996e-07, + "loss": 0.0026, + "step": 15270 + }, + { + "epoch": 3.72, + "learning_rate": 2.536456754328664e-07, + "loss": 0.0007, + "step": 15272 + }, + { + "epoch": 3.72, + "learning_rate": 2.527633296056908e-07, + "loss": 0.0029, + "step": 15274 + }, + { + "epoch": 3.72, + "learning_rate": 2.518825014977966e-07, + "loss": 0.0011, + "step": 15276 + }, + { + "epoch": 3.72, + "learning_rate": 2.5100319124633734e-07, + "loss": 0.0035, + "step": 15278 + }, + { + "epoch": 3.72, + "learning_rate": 2.501253989882302e-07, + "loss": 0.0026, + "step": 15280 + }, + { + "epoch": 3.72, + "learning_rate": 2.492491248601503e-07, + "loss": 0.0031, + "step": 15282 + }, + { + "epoch": 3.72, + "learning_rate": 2.4837436899854407e-07, + "loss": 0.0021, + "step": 15284 + }, + { + "epoch": 3.72, + "learning_rate": 2.4750113153961477e-07, + "loss": 0.0016, + "step": 15286 + }, + { + "epoch": 3.73, + "learning_rate": 2.466294126193325e-07, + "loss": 0.0021, + "step": 15288 + }, + { + "epoch": 3.73, + "learning_rate": 2.4575921237343316e-07, + "loss": 0.0017, + "step": 15290 + }, + { + "epoch": 3.73, + "learning_rate": 2.4489053093741055e-07, + "loss": 0.0028, + "step": 15292 + }, + { + "epoch": 3.73, + "learning_rate": 2.440233684465254e-07, + "loss": 0.0027, + "step": 15294 + }, + { + "epoch": 3.73, + "learning_rate": 2.4315772503580416e-07, + "loss": 0.0015, + "step": 15296 + }, + { + "epoch": 3.73, + "learning_rate": 2.422936008400323e-07, + "loss": 0.0029, + "step": 15298 + }, + { + "epoch": 3.73, + "learning_rate": 2.414309959937589e-07, + "loss": 0.0027, + "step": 15300 + }, + { + "epoch": 3.73, + "learning_rate": 2.4056991063130084e-07, + "loss": 0.0048, + "step": 15302 + }, + { + "epoch": 3.73, + "learning_rate": 2.397103448867344e-07, + "loss": 0.0012, + "step": 15304 + }, + { + "epoch": 3.73, + "learning_rate": 2.3885229889390126e-07, + "loss": 0.0019, + "step": 15306 + }, + { + "epoch": 3.73, + "learning_rate": 2.3799577278640463e-07, + "loss": 0.0007, + "step": 15308 + }, + { + "epoch": 3.73, + "learning_rate": 2.3714076669761333e-07, + "loss": 0.0008, + "step": 15310 + }, + { + "epoch": 3.73, + "learning_rate": 2.3628728076065754e-07, + "loss": 0.0012, + "step": 15312 + }, + { + "epoch": 3.73, + "learning_rate": 2.354353151084321e-07, + "loss": 0.0033, + "step": 15314 + }, + { + "epoch": 3.73, + "learning_rate": 2.345848698735942e-07, + "loss": 0.0013, + "step": 15316 + }, + { + "epoch": 3.73, + "learning_rate": 2.3373594518856458e-07, + "loss": 0.0012, + "step": 15318 + }, + { + "epoch": 3.73, + "learning_rate": 2.3288854118552639e-07, + "loss": 0.0019, + "step": 15320 + }, + { + "epoch": 3.73, + "learning_rate": 2.3204265799643077e-07, + "loss": 0.0038, + "step": 15322 + }, + { + "epoch": 3.73, + "learning_rate": 2.311982957529846e-07, + "loss": 0.0029, + "step": 15324 + }, + { + "epoch": 3.73, + "learning_rate": 2.3035545458666154e-07, + "loss": 0.0051, + "step": 15326 + }, + { + "epoch": 3.73, + "learning_rate": 2.2951413462869886e-07, + "loss": 0.0038, + "step": 15328 + }, + { + "epoch": 3.74, + "learning_rate": 2.286743360100985e-07, + "loss": 0.0027, + "step": 15330 + }, + { + "epoch": 3.74, + "learning_rate": 2.2783605886162018e-07, + "loss": 0.0033, + "step": 15332 + }, + { + "epoch": 3.74, + "learning_rate": 2.2699930331379182e-07, + "loss": 0.0017, + "step": 15334 + }, + { + "epoch": 3.74, + "learning_rate": 2.2616406949690362e-07, + "loss": 0.0017, + "step": 15336 + }, + { + "epoch": 3.74, + "learning_rate": 2.2533035754100708e-07, + "loss": 0.001, + "step": 15338 + }, + { + "epoch": 3.74, + "learning_rate": 2.2449816757591835e-07, + "loss": 0.0048, + "step": 15340 + }, + { + "epoch": 3.74, + "learning_rate": 2.236674997312127e-07, + "loss": 0.0025, + "step": 15342 + }, + { + "epoch": 3.74, + "learning_rate": 2.2283835413623444e-07, + "loss": 0.0007, + "step": 15344 + }, + { + "epoch": 3.74, + "learning_rate": 2.2201073092008696e-07, + "loss": 0.0021, + "step": 15346 + }, + { + "epoch": 3.74, + "learning_rate": 2.2118463021163715e-07, + "loss": 0.0062, + "step": 15348 + }, + { + "epoch": 3.74, + "learning_rate": 2.2036005213951662e-07, + "loss": 0.0038, + "step": 15350 + }, + { + "epoch": 3.74, + "learning_rate": 2.1953699683211704e-07, + "loss": 0.0028, + "step": 15352 + }, + { + "epoch": 3.74, + "learning_rate": 2.1871546441759484e-07, + "loss": 0.0022, + "step": 15354 + }, + { + "epoch": 3.74, + "learning_rate": 2.1789545502386877e-07, + "loss": 0.0025, + "step": 15356 + }, + { + "epoch": 3.74, + "learning_rate": 2.1707696877862005e-07, + "loss": 0.001, + "step": 15358 + }, + { + "epoch": 3.74, + "learning_rate": 2.162600058092945e-07, + "loss": 0.0004, + "step": 15360 + }, + { + "epoch": 3.74, + "learning_rate": 2.1544456624309927e-07, + "loss": 0.002, + "step": 15362 + }, + { + "epoch": 3.74, + "learning_rate": 2.146306502070039e-07, + "loss": 0.0005, + "step": 15364 + }, + { + "epoch": 3.74, + "learning_rate": 2.1381825782774145e-07, + "loss": 0.0017, + "step": 15366 + }, + { + "epoch": 3.74, + "learning_rate": 2.130073892318074e-07, + "loss": 0.0014, + "step": 15368 + }, + { + "epoch": 3.75, + "learning_rate": 2.1219804454546188e-07, + "loss": 0.0021, + "step": 15370 + }, + { + "epoch": 3.75, + "learning_rate": 2.1139022389472297e-07, + "loss": 0.0017, + "step": 15372 + }, + { + "epoch": 3.75, + "learning_rate": 2.105839274053767e-07, + "loss": 0.0033, + "step": 15374 + }, + { + "epoch": 3.75, + "learning_rate": 2.0977915520297042e-07, + "loss": 0.001, + "step": 15376 + }, + { + "epoch": 3.75, + "learning_rate": 2.089759074128117e-07, + "loss": 0.0022, + "step": 15378 + }, + { + "epoch": 3.75, + "learning_rate": 2.081741841599727e-07, + "loss": 0.0046, + "step": 15380 + }, + { + "epoch": 3.75, + "learning_rate": 2.0737398556928689e-07, + "loss": 0.0031, + "step": 15382 + }, + { + "epoch": 3.75, + "learning_rate": 2.065753117653535e-07, + "loss": 0.0041, + "step": 15384 + }, + { + "epoch": 3.75, + "learning_rate": 2.0577816287253082e-07, + "loss": 0.0013, + "step": 15386 + }, + { + "epoch": 3.75, + "learning_rate": 2.049825390149396e-07, + "loss": 0.0039, + "step": 15388 + }, + { + "epoch": 3.75, + "learning_rate": 2.0418844031646735e-07, + "loss": 0.0016, + "step": 15390 + }, + { + "epoch": 3.75, + "learning_rate": 2.0339586690076074e-07, + "loss": 0.0018, + "step": 15392 + }, + { + "epoch": 3.75, + "learning_rate": 2.0260481889122775e-07, + "loss": 0.0023, + "step": 15394 + }, + { + "epoch": 3.75, + "learning_rate": 2.0181529641104315e-07, + "loss": 0.0026, + "step": 15396 + }, + { + "epoch": 3.75, + "learning_rate": 2.010272995831386e-07, + "loss": 0.0019, + "step": 15398 + }, + { + "epoch": 3.75, + "learning_rate": 2.0024082853021487e-07, + "loss": 0.0012, + "step": 15400 + }, + { + "epoch": 3.75, + "learning_rate": 1.9945588337472733e-07, + "loss": 0.0046, + "step": 15402 + }, + { + "epoch": 3.75, + "learning_rate": 1.986724642389004e-07, + "loss": 0.0037, + "step": 15404 + }, + { + "epoch": 3.75, + "learning_rate": 1.9789057124471876e-07, + "loss": 0.0014, + "step": 15406 + }, + { + "epoch": 3.75, + "learning_rate": 1.9711020451392837e-07, + "loss": 0.0023, + "step": 15408 + }, + { + "epoch": 3.75, + "learning_rate": 1.9633136416803867e-07, + "loss": 0.0034, + "step": 15410 + }, + { + "epoch": 3.76, + "learning_rate": 1.9555405032832043e-07, + "loss": 0.0009, + "step": 15412 + }, + { + "epoch": 3.76, + "learning_rate": 1.9477826311580793e-07, + "loss": 0.0004, + "step": 15414 + }, + { + "epoch": 3.76, + "learning_rate": 1.9400400265129682e-07, + "loss": 0.0018, + "step": 15416 + }, + { + "epoch": 3.76, + "learning_rate": 1.9323126905534618e-07, + "loss": 0.0031, + "step": 15418 + }, + { + "epoch": 3.76, + "learning_rate": 1.924600624482742e-07, + "loss": 0.0018, + "step": 15420 + }, + { + "epoch": 3.76, + "learning_rate": 1.9169038295016707e-07, + "loss": 0.0026, + "step": 15422 + }, + { + "epoch": 3.76, + "learning_rate": 1.9092223068086891e-07, + "loss": 0.0022, + "step": 15424 + }, + { + "epoch": 3.76, + "learning_rate": 1.9015560575998403e-07, + "loss": 0.001, + "step": 15426 + }, + { + "epoch": 3.76, + "learning_rate": 1.8939050830688366e-07, + "loss": 0.0017, + "step": 15428 + }, + { + "epoch": 3.76, + "learning_rate": 1.8862693844070023e-07, + "loss": 0.0011, + "step": 15430 + }, + { + "epoch": 3.76, + "learning_rate": 1.8786489628032644e-07, + "loss": 0.0014, + "step": 15432 + }, + { + "epoch": 3.76, + "learning_rate": 1.8710438194441848e-07, + "loss": 0.0022, + "step": 15434 + }, + { + "epoch": 3.76, + "learning_rate": 1.8634539555139385e-07, + "loss": 0.0028, + "step": 15436 + }, + { + "epoch": 3.76, + "learning_rate": 1.8558793721943248e-07, + "loss": 0.0011, + "step": 15438 + }, + { + "epoch": 3.76, + "learning_rate": 1.8483200706647774e-07, + "loss": 0.0033, + "step": 15440 + }, + { + "epoch": 3.76, + "learning_rate": 1.840776052102322e-07, + "loss": 0.0009, + "step": 15442 + }, + { + "epoch": 3.76, + "learning_rate": 1.833247317681619e-07, + "loss": 0.0039, + "step": 15444 + }, + { + "epoch": 3.76, + "learning_rate": 1.8257338685749526e-07, + "loss": 0.0014, + "step": 15446 + }, + { + "epoch": 3.76, + "learning_rate": 1.8182357059522427e-07, + "loss": 0.0016, + "step": 15448 + }, + { + "epoch": 3.76, + "learning_rate": 1.8107528309809774e-07, + "loss": 0.0042, + "step": 15450 + }, + { + "epoch": 3.77, + "learning_rate": 1.8032852448263248e-07, + "loss": 0.0042, + "step": 15452 + }, + { + "epoch": 3.77, + "learning_rate": 1.795832948651033e-07, + "loss": 0.0018, + "step": 15454 + }, + { + "epoch": 3.77, + "learning_rate": 1.7883959436154952e-07, + "loss": 0.0017, + "step": 15456 + }, + { + "epoch": 3.77, + "learning_rate": 1.7809742308776856e-07, + "loss": 0.0027, + "step": 15458 + }, + { + "epoch": 3.77, + "learning_rate": 1.7735678115932353e-07, + "loss": 0.0015, + "step": 15460 + }, + { + "epoch": 3.77, + "learning_rate": 1.7661766869153772e-07, + "loss": 0.0005, + "step": 15462 + }, + { + "epoch": 3.77, + "learning_rate": 1.75880085799498e-07, + "loss": 0.0011, + "step": 15464 + }, + { + "epoch": 3.77, + "learning_rate": 1.7514403259805135e-07, + "loss": 0.0027, + "step": 15466 + }, + { + "epoch": 3.77, + "learning_rate": 1.7440950920180388e-07, + "loss": 0.0037, + "step": 15468 + }, + { + "epoch": 3.77, + "learning_rate": 1.7367651572512967e-07, + "loss": 0.003, + "step": 15470 + }, + { + "epoch": 3.77, + "learning_rate": 1.7294505228216075e-07, + "loss": 0.0019, + "step": 15472 + }, + { + "epoch": 3.77, + "learning_rate": 1.7221511898679043e-07, + "loss": 0.0007, + "step": 15474 + }, + { + "epoch": 3.77, + "learning_rate": 1.7148671595267564e-07, + "loss": 0.0031, + "step": 15476 + }, + { + "epoch": 3.77, + "learning_rate": 1.707598432932356e-07, + "loss": 0.0016, + "step": 15478 + }, + { + "epoch": 3.77, + "learning_rate": 1.7003450112164866e-07, + "loss": 0.0022, + "step": 15480 + }, + { + "epoch": 3.77, + "learning_rate": 1.693106895508556e-07, + "loss": 0.0018, + "step": 15482 + }, + { + "epoch": 3.77, + "learning_rate": 1.6858840869355963e-07, + "loss": 0.0034, + "step": 15484 + }, + { + "epoch": 3.77, + "learning_rate": 1.6786765866222522e-07, + "loss": 0.0047, + "step": 15486 + }, + { + "epoch": 3.77, + "learning_rate": 1.6714843956908145e-07, + "loss": 0.0012, + "step": 15488 + }, + { + "epoch": 3.77, + "learning_rate": 1.664307515261121e-07, + "loss": 0.0024, + "step": 15490 + }, + { + "epoch": 3.77, + "learning_rate": 1.6571459464506888e-07, + "loss": 0.0024, + "step": 15492 + }, + { + "epoch": 3.78, + "learning_rate": 1.6499996903746374e-07, + "loss": 0.0003, + "step": 15494 + }, + { + "epoch": 3.78, + "learning_rate": 1.6428687481456762e-07, + "loss": 0.0031, + "step": 15496 + }, + { + "epoch": 3.78, + "learning_rate": 1.6357531208741507e-07, + "loss": 0.0025, + "step": 15498 + }, + { + "epoch": 3.78, + "learning_rate": 1.62865280966803e-07, + "loss": 0.003, + "step": 15500 + }, + { + "epoch": 3.78, + "learning_rate": 1.621567815632863e-07, + "loss": 0.0018, + "step": 15502 + }, + { + "epoch": 3.78, + "learning_rate": 1.6144981398718674e-07, + "loss": 0.002, + "step": 15504 + }, + { + "epoch": 3.78, + "learning_rate": 1.6074437834858293e-07, + "loss": 0.0009, + "step": 15506 + }, + { + "epoch": 3.78, + "learning_rate": 1.6004047475731587e-07, + "loss": 0.0021, + "step": 15508 + }, + { + "epoch": 3.78, + "learning_rate": 1.5933810332299015e-07, + "loss": 0.003, + "step": 15510 + }, + { + "epoch": 3.78, + "learning_rate": 1.586372641549705e-07, + "loss": 0.0018, + "step": 15512 + }, + { + "epoch": 3.78, + "learning_rate": 1.5793795736238182e-07, + "loss": 0.003, + "step": 15514 + }, + { + "epoch": 3.78, + "learning_rate": 1.5724018305411148e-07, + "loss": 0.0012, + "step": 15516 + }, + { + "epoch": 3.78, + "learning_rate": 1.5654394133880812e-07, + "loss": 0.0024, + "step": 15518 + }, + { + "epoch": 3.78, + "learning_rate": 1.5584923232488502e-07, + "loss": 0.0027, + "step": 15520 + }, + { + "epoch": 3.78, + "learning_rate": 1.551560561205101e-07, + "loss": 0.0062, + "step": 15522 + }, + { + "epoch": 3.78, + "learning_rate": 1.544644128336159e-07, + "loss": 0.0016, + "step": 15524 + }, + { + "epoch": 3.78, + "learning_rate": 1.5377430257189962e-07, + "loss": 0.0034, + "step": 15526 + }, + { + "epoch": 3.78, + "learning_rate": 1.530857254428153e-07, + "loss": 0.0034, + "step": 15528 + }, + { + "epoch": 3.78, + "learning_rate": 1.523986815535783e-07, + "loss": 0.0028, + "step": 15530 + }, + { + "epoch": 3.78, + "learning_rate": 1.5171317101116746e-07, + "loss": 0.0035, + "step": 15532 + }, + { + "epoch": 3.79, + "learning_rate": 1.5102919392232407e-07, + "loss": 0.0012, + "step": 15534 + }, + { + "epoch": 3.79, + "learning_rate": 1.5034675039354517e-07, + "loss": 0.0018, + "step": 15536 + }, + { + "epoch": 3.79, + "learning_rate": 1.4966584053109579e-07, + "loss": 0.002, + "step": 15538 + }, + { + "epoch": 3.79, + "learning_rate": 1.4898646444099553e-07, + "loss": 0.0032, + "step": 15540 + }, + { + "epoch": 3.79, + "learning_rate": 1.4830862222902975e-07, + "loss": 0.0014, + "step": 15542 + }, + { + "epoch": 3.79, + "learning_rate": 1.4763231400074519e-07, + "loss": 0.0016, + "step": 15544 + }, + { + "epoch": 3.79, + "learning_rate": 1.4695753986144646e-07, + "loss": 0.0011, + "step": 15546 + }, + { + "epoch": 3.79, + "learning_rate": 1.4628429991620175e-07, + "loss": 0.0007, + "step": 15548 + }, + { + "epoch": 3.79, + "learning_rate": 1.4561259426983942e-07, + "loss": 0.0013, + "step": 15550 + }, + { + "epoch": 3.79, + "learning_rate": 1.4494242302695029e-07, + "loss": 0.0008, + "step": 15552 + }, + { + "epoch": 3.79, + "learning_rate": 1.4427378629188306e-07, + "loss": 0.0032, + "step": 15554 + }, + { + "epoch": 3.79, + "learning_rate": 1.4360668416875002e-07, + "loss": 0.0028, + "step": 15556 + }, + { + "epoch": 3.79, + "learning_rate": 1.4294111676142474e-07, + "loss": 0.0045, + "step": 15558 + }, + { + "epoch": 3.79, + "learning_rate": 1.4227708417354213e-07, + "loss": 0.0025, + "step": 15560 + }, + { + "epoch": 3.79, + "learning_rate": 1.4161458650849502e-07, + "loss": 0.0014, + "step": 15562 + }, + { + "epoch": 3.79, + "learning_rate": 1.409536238694409e-07, + "loss": 0.0007, + "step": 15564 + }, + { + "epoch": 3.79, + "learning_rate": 1.402941963592963e-07, + "loss": 0.0012, + "step": 15566 + }, + { + "epoch": 3.79, + "learning_rate": 1.3963630408073914e-07, + "loss": 0.0016, + "step": 15568 + }, + { + "epoch": 3.79, + "learning_rate": 1.3897994713620855e-07, + "loss": 0.0009, + "step": 15570 + }, + { + "epoch": 3.79, + "learning_rate": 1.383251256279028e-07, + "loss": 0.0021, + "step": 15572 + }, + { + "epoch": 3.79, + "learning_rate": 1.3767183965778365e-07, + "loss": 0.0049, + "step": 15574 + }, + { + "epoch": 3.8, + "learning_rate": 1.3702008932757416e-07, + "loss": 0.0031, + "step": 15576 + }, + { + "epoch": 3.8, + "learning_rate": 1.363698747387554e-07, + "loss": 0.0005, + "step": 15578 + }, + { + "epoch": 3.8, + "learning_rate": 1.3572119599257083e-07, + "loss": 0.0006, + "step": 15580 + }, + { + "epoch": 3.8, + "learning_rate": 1.350740531900241e-07, + "loss": 0.0013, + "step": 15582 + }, + { + "epoch": 3.8, + "learning_rate": 1.3442844643188124e-07, + "loss": 0.0029, + "step": 15584 + }, + { + "epoch": 3.8, + "learning_rate": 1.3378437581866855e-07, + "loss": 0.0029, + "step": 15586 + }, + { + "epoch": 3.8, + "learning_rate": 1.3314184145067023e-07, + "loss": 0.0015, + "step": 15588 + }, + { + "epoch": 3.8, + "learning_rate": 1.3250084342793734e-07, + "loss": 0.0008, + "step": 15590 + }, + { + "epoch": 3.8, + "learning_rate": 1.3186138185027563e-07, + "loss": 0.0021, + "step": 15592 + }, + { + "epoch": 3.8, + "learning_rate": 1.3122345681725545e-07, + "loss": 0.0014, + "step": 15594 + }, + { + "epoch": 3.8, + "learning_rate": 1.3058706842820624e-07, + "loss": 0.0017, + "step": 15596 + }, + { + "epoch": 3.8, + "learning_rate": 1.2995221678221758e-07, + "loss": 0.0022, + "step": 15598 + }, + { + "epoch": 3.8, + "learning_rate": 1.2931890197814267e-07, + "loss": 0.0011, + "step": 15600 + }, + { + "epoch": 3.8, + "learning_rate": 1.286871241145915e-07, + "loss": 0.0034, + "step": 15602 + }, + { + "epoch": 3.8, + "learning_rate": 1.280568832899376e-07, + "loss": 0.0027, + "step": 15604 + }, + { + "epoch": 3.8, + "learning_rate": 1.274281796023158e-07, + "loss": 0.0021, + "step": 15606 + }, + { + "epoch": 3.8, + "learning_rate": 1.268010131496178e-07, + "loss": 0.0011, + "step": 15608 + }, + { + "epoch": 3.8, + "learning_rate": 1.261753840294977e-07, + "loss": 0.0011, + "step": 15610 + }, + { + "epoch": 3.8, + "learning_rate": 1.2555129233937313e-07, + "loss": 0.0014, + "step": 15612 + }, + { + "epoch": 3.8, + "learning_rate": 1.249287381764186e-07, + "loss": 0.0029, + "step": 15614 + }, + { + "epoch": 3.81, + "learning_rate": 1.24307721637571e-07, + "loss": 0.0016, + "step": 15616 + }, + { + "epoch": 3.81, + "learning_rate": 1.2368824281952629e-07, + "loss": 0.0015, + "step": 15618 + }, + { + "epoch": 3.81, + "learning_rate": 1.2307030181874402e-07, + "loss": 0.0019, + "step": 15620 + }, + { + "epoch": 3.81, + "learning_rate": 1.2245389873144052e-07, + "loss": 0.0005, + "step": 15622 + }, + { + "epoch": 3.81, + "learning_rate": 1.2183903365359574e-07, + "loss": 0.0019, + "step": 15624 + }, + { + "epoch": 3.81, + "learning_rate": 1.2122570668094746e-07, + "loss": 0.0022, + "step": 15626 + }, + { + "epoch": 3.81, + "learning_rate": 1.2061391790899602e-07, + "loss": 0.001, + "step": 15628 + }, + { + "epoch": 3.81, + "learning_rate": 1.2000366743300184e-07, + "loss": 0.0057, + "step": 15630 + }, + { + "epoch": 3.81, + "learning_rate": 1.1939495534798672e-07, + "loss": 0.005, + "step": 15632 + }, + { + "epoch": 3.81, + "learning_rate": 1.1878778174872929e-07, + "loss": 0.0065, + "step": 15634 + }, + { + "epoch": 3.81, + "learning_rate": 1.181821467297728e-07, + "loss": 0.0006, + "step": 15636 + }, + { + "epoch": 3.81, + "learning_rate": 1.1757805038541958e-07, + "loss": 0.0013, + "step": 15638 + }, + { + "epoch": 3.81, + "learning_rate": 1.1697549280973108e-07, + "loss": 0.0018, + "step": 15640 + }, + { + "epoch": 3.81, + "learning_rate": 1.1637447409653002e-07, + "loss": 0.0024, + "step": 15642 + }, + { + "epoch": 3.81, + "learning_rate": 1.1577499433939932e-07, + "loss": 0.0136, + "step": 15644 + }, + { + "epoch": 3.81, + "learning_rate": 1.151770536316843e-07, + "loss": 0.0039, + "step": 15646 + }, + { + "epoch": 3.81, + "learning_rate": 1.1458065206648717e-07, + "loss": 0.0037, + "step": 15648 + }, + { + "epoch": 3.81, + "learning_rate": 1.1398578973667362e-07, + "loss": 0.0018, + "step": 15650 + }, + { + "epoch": 3.81, + "learning_rate": 1.1339246673486737e-07, + "loss": 0.0013, + "step": 15652 + }, + { + "epoch": 3.81, + "learning_rate": 1.1280068315345338e-07, + "loss": 0.0047, + "step": 15654 + }, + { + "epoch": 3.81, + "learning_rate": 1.1221043908457685e-07, + "loss": 0.0013, + "step": 15656 + }, + { + "epoch": 3.82, + "learning_rate": 1.1162173462014313e-07, + "loss": 0.0044, + "step": 15658 + }, + { + "epoch": 3.82, + "learning_rate": 1.1103456985181893e-07, + "loss": 0.0008, + "step": 15660 + }, + { + "epoch": 3.82, + "learning_rate": 1.1044894487102997e-07, + "loss": 0.0013, + "step": 15662 + }, + { + "epoch": 3.82, + "learning_rate": 1.098648597689611e-07, + "loss": 0.0038, + "step": 15664 + }, + { + "epoch": 3.82, + "learning_rate": 1.0928231463656069e-07, + "loss": 0.0025, + "step": 15666 + }, + { + "epoch": 3.82, + "learning_rate": 1.0870130956453396e-07, + "loss": 0.0016, + "step": 15668 + }, + { + "epoch": 3.82, + "learning_rate": 1.0812184464334963e-07, + "loss": 0.0016, + "step": 15670 + }, + { + "epoch": 3.82, + "learning_rate": 1.0754391996323332e-07, + "loss": 0.0009, + "step": 15672 + }, + { + "epoch": 3.82, + "learning_rate": 1.0696753561417306e-07, + "loss": 0.0024, + "step": 15674 + }, + { + "epoch": 3.82, + "learning_rate": 1.0639269168591703e-07, + "loss": 0.002, + "step": 15676 + }, + { + "epoch": 3.82, + "learning_rate": 1.0581938826797034e-07, + "loss": 0.0021, + "step": 15678 + }, + { + "epoch": 3.82, + "learning_rate": 1.0524762544960265e-07, + "loss": 0.0011, + "step": 15680 + }, + { + "epoch": 3.82, + "learning_rate": 1.0467740331984166e-07, + "loss": 0.0019, + "step": 15682 + }, + { + "epoch": 3.82, + "learning_rate": 1.0410872196747524e-07, + "loss": 0.0012, + "step": 15684 + }, + { + "epoch": 3.82, + "learning_rate": 1.0354158148105032e-07, + "loss": 0.0014, + "step": 15686 + }, + { + "epoch": 3.82, + "learning_rate": 1.0297598194887736e-07, + "loss": 0.0012, + "step": 15688 + }, + { + "epoch": 3.82, + "learning_rate": 1.0241192345902152e-07, + "loss": 0.0019, + "step": 15690 + }, + { + "epoch": 3.82, + "learning_rate": 1.018494060993136e-07, + "loss": 0.0013, + "step": 15692 + }, + { + "epoch": 3.82, + "learning_rate": 1.0128842995734023e-07, + "loss": 0.0017, + "step": 15694 + }, + { + "epoch": 3.82, + "learning_rate": 1.0072899512045154e-07, + "loss": 0.0009, + "step": 15696 + }, + { + "epoch": 3.83, + "learning_rate": 1.001711016757534e-07, + "loss": 0.0012, + "step": 15698 + }, + { + "epoch": 3.83, + "learning_rate": 9.96147497101152e-08, + "loss": 0.0034, + "step": 15700 + }, + { + "epoch": 3.83, + "learning_rate": 9.905993931016544e-08, + "loss": 0.0014, + "step": 15702 + }, + { + "epoch": 3.83, + "learning_rate": 9.85066705622928e-08, + "loss": 0.0021, + "step": 15704 + }, + { + "epoch": 3.83, + "learning_rate": 9.795494355264502e-08, + "loss": 0.0026, + "step": 15706 + }, + { + "epoch": 3.83, + "learning_rate": 9.740475836713003e-08, + "loss": 0.0015, + "step": 15708 + }, + { + "epoch": 3.83, + "learning_rate": 9.68561150914149e-08, + "loss": 0.0042, + "step": 15710 + }, + { + "epoch": 3.83, + "learning_rate": 9.630901381092905e-08, + "loss": 0.0011, + "step": 15712 + }, + { + "epoch": 3.83, + "learning_rate": 9.57634546108599e-08, + "loss": 0.007, + "step": 15714 + }, + { + "epoch": 3.83, + "learning_rate": 9.521943757615393e-08, + "loss": 0.0008, + "step": 15716 + }, + { + "epoch": 3.83, + "learning_rate": 9.467696279152116e-08, + "loss": 0.0025, + "step": 15718 + }, + { + "epoch": 3.83, + "learning_rate": 9.413603034142849e-08, + "loss": 0.0027, + "step": 15720 + }, + { + "epoch": 3.83, + "learning_rate": 9.359664031010185e-08, + "loss": 0.0046, + "step": 15722 + }, + { + "epoch": 3.83, + "learning_rate": 9.305879278152851e-08, + "loss": 0.0028, + "step": 15724 + }, + { + "epoch": 3.83, + "learning_rate": 9.252248783945594e-08, + "loss": 0.0006, + "step": 15726 + }, + { + "epoch": 3.83, + "learning_rate": 9.198772556739177e-08, + "loss": 0.0041, + "step": 15728 + }, + { + "epoch": 3.83, + "learning_rate": 9.145450604860163e-08, + "loss": 0.0022, + "step": 15730 + }, + { + "epoch": 3.83, + "learning_rate": 9.09228293661124e-08, + "loss": 0.0023, + "step": 15732 + }, + { + "epoch": 3.83, + "learning_rate": 9.039269560271013e-08, + "loss": 0.0021, + "step": 15734 + }, + { + "epoch": 3.83, + "learning_rate": 8.986410484093988e-08, + "loss": 0.0048, + "step": 15736 + }, + { + "epoch": 3.83, + "learning_rate": 8.933705716310804e-08, + "loss": 0.0042, + "step": 15738 + }, + { + "epoch": 3.84, + "learning_rate": 8.881155265128005e-08, + "loss": 0.0018, + "step": 15740 + }, + { + "epoch": 3.84, + "learning_rate": 8.828759138727939e-08, + "loss": 0.0017, + "step": 15742 + }, + { + "epoch": 3.84, + "learning_rate": 8.776517345269408e-08, + "loss": 0.0018, + "step": 15744 + }, + { + "epoch": 3.84, + "learning_rate": 8.724429892886577e-08, + "loss": 0.002, + "step": 15746 + }, + { + "epoch": 3.84, + "learning_rate": 8.672496789689955e-08, + "loss": 0.0029, + "step": 15748 + }, + { + "epoch": 3.84, + "learning_rate": 8.620718043765853e-08, + "loss": 0.0038, + "step": 15750 + }, + { + "epoch": 3.84, + "learning_rate": 8.56909366317682e-08, + "loss": 0.0009, + "step": 15752 + }, + { + "epoch": 3.84, + "learning_rate": 8.517623655960872e-08, + "loss": 0.0007, + "step": 15754 + }, + { + "epoch": 3.84, + "learning_rate": 8.466308030132597e-08, + "loss": 0.0024, + "step": 15756 + }, + { + "epoch": 3.84, + "learning_rate": 8.415146793681939e-08, + "loss": 0.0026, + "step": 15758 + }, + { + "epoch": 3.84, + "learning_rate": 8.364139954575301e-08, + "loss": 0.0042, + "step": 15760 + }, + { + "epoch": 3.84, + "learning_rate": 8.313287520754776e-08, + "loss": 0.0029, + "step": 15762 + }, + { + "epoch": 3.84, + "learning_rate": 8.262589500138473e-08, + "loss": 0.0017, + "step": 15764 + }, + { + "epoch": 3.84, + "learning_rate": 8.212045900620414e-08, + "loss": 0.0027, + "step": 15766 + }, + { + "epoch": 3.84, + "learning_rate": 8.161656730070744e-08, + "loss": 0.0023, + "step": 15768 + }, + { + "epoch": 3.84, + "learning_rate": 8.1114219963353e-08, + "loss": 0.0019, + "step": 15770 + }, + { + "epoch": 3.84, + "learning_rate": 8.061341707236048e-08, + "loss": 0.003, + "step": 15772 + }, + { + "epoch": 3.84, + "learning_rate": 8.011415870570971e-08, + "loss": 0.0031, + "step": 15774 + }, + { + "epoch": 3.84, + "learning_rate": 7.961644494113741e-08, + "loss": 0.0038, + "step": 15776 + }, + { + "epoch": 3.84, + "learning_rate": 7.912027585614268e-08, + "loss": 0.0025, + "step": 15778 + }, + { + "epoch": 3.85, + "learning_rate": 7.862565152798263e-08, + "loss": 0.0019, + "step": 15780 + }, + { + "epoch": 3.85, + "learning_rate": 7.813257203367452e-08, + "loss": 0.0038, + "step": 15782 + }, + { + "epoch": 3.85, + "learning_rate": 7.764103744999363e-08, + "loss": 0.0009, + "step": 15784 + }, + { + "epoch": 3.85, + "learning_rate": 7.715104785347539e-08, + "loss": 0.0025, + "step": 15786 + }, + { + "epoch": 3.85, + "learning_rate": 7.666260332041653e-08, + "loss": 0.0027, + "step": 15788 + }, + { + "epoch": 3.85, + "learning_rate": 7.617570392686958e-08, + "loss": 0.0026, + "step": 15790 + }, + { + "epoch": 3.85, + "learning_rate": 7.569034974865053e-08, + "loss": 0.0016, + "step": 15792 + }, + { + "epoch": 3.85, + "learning_rate": 7.520654086133117e-08, + "loss": 0.0019, + "step": 15794 + }, + { + "epoch": 3.85, + "learning_rate": 7.472427734024567e-08, + "loss": 0.0043, + "step": 15796 + }, + { + "epoch": 3.85, + "learning_rate": 7.4243559260484e-08, + "loss": 0.0025, + "step": 15798 + }, + { + "epoch": 3.85, + "learning_rate": 7.376438669690067e-08, + "loss": 0.0005, + "step": 15800 + }, + { + "epoch": 3.85, + "learning_rate": 7.328675972410271e-08, + "loss": 0.0061, + "step": 15802 + }, + { + "epoch": 3.85, + "learning_rate": 7.281067841646394e-08, + "loss": 0.0043, + "step": 15804 + }, + { + "epoch": 3.85, + "learning_rate": 7.233614284811174e-08, + "loss": 0.0011, + "step": 15806 + }, + { + "epoch": 3.85, + "learning_rate": 7.18631530929359e-08, + "loss": 0.0032, + "step": 15808 + }, + { + "epoch": 3.85, + "learning_rate": 7.139170922458417e-08, + "loss": 0.0009, + "step": 15810 + }, + { + "epoch": 3.85, + "learning_rate": 7.092181131646336e-08, + "loss": 0.0016, + "step": 15812 + }, + { + "epoch": 3.85, + "learning_rate": 7.045345944174053e-08, + "loss": 0.0038, + "step": 15814 + }, + { + "epoch": 3.85, + "learning_rate": 6.99866536733429e-08, + "loss": 0.001, + "step": 15816 + }, + { + "epoch": 3.85, + "learning_rate": 6.952139408395342e-08, + "loss": 0.0006, + "step": 15818 + }, + { + "epoch": 3.85, + "learning_rate": 6.905768074601859e-08, + "loss": 0.0015, + "step": 15820 + }, + { + "epoch": 3.86, + "learning_rate": 6.859551373174067e-08, + "loss": 0.0017, + "step": 15822 + }, + { + "epoch": 3.86, + "learning_rate": 6.81348931130843e-08, + "loss": 0.0032, + "step": 15824 + }, + { + "epoch": 3.86, + "learning_rate": 6.767581896176989e-08, + "loss": 0.0047, + "step": 15826 + }, + { + "epoch": 3.86, + "learning_rate": 6.721829134927915e-08, + "loss": 0.0038, + "step": 15828 + }, + { + "epoch": 3.86, + "learning_rate": 6.676231034685398e-08, + "loss": 0.0034, + "step": 15830 + }, + { + "epoch": 3.86, + "learning_rate": 6.630787602549204e-08, + "loss": 0.0009, + "step": 15832 + }, + { + "epoch": 3.86, + "learning_rate": 6.58549884559545e-08, + "loss": 0.0031, + "step": 15834 + }, + { + "epoch": 3.86, + "learning_rate": 6.540364770875717e-08, + "loss": 0.0015, + "step": 15836 + }, + { + "epoch": 3.86, + "learning_rate": 6.495385385417829e-08, + "loss": 0.0017, + "step": 15838 + }, + { + "epoch": 3.86, + "learning_rate": 6.450560696225405e-08, + "loss": 0.0032, + "step": 15840 + }, + { + "epoch": 3.86, + "learning_rate": 6.405890710278084e-08, + "loss": 0.002, + "step": 15842 + }, + { + "epoch": 3.86, + "learning_rate": 6.361375434531192e-08, + "loss": 0.0027, + "step": 15844 + }, + { + "epoch": 3.86, + "learning_rate": 6.317014875916183e-08, + "loss": 0.0002, + "step": 15846 + }, + { + "epoch": 3.86, + "learning_rate": 6.272809041340311e-08, + "loss": 0.0004, + "step": 15848 + }, + { + "epoch": 3.86, + "learning_rate": 6.228757937686735e-08, + "loss": 0.0014, + "step": 15850 + }, + { + "epoch": 3.86, + "learning_rate": 6.184861571814527e-08, + "loss": 0.0018, + "step": 15852 + }, + { + "epoch": 3.86, + "learning_rate": 6.141119950558772e-08, + "loss": 0.0031, + "step": 15854 + }, + { + "epoch": 3.86, + "learning_rate": 6.097533080730466e-08, + "loss": 0.0019, + "step": 15856 + }, + { + "epoch": 3.86, + "learning_rate": 6.054100969116072e-08, + "loss": 0.0014, + "step": 15858 + }, + { + "epoch": 3.86, + "learning_rate": 6.010823622478623e-08, + "loss": 0.0013, + "step": 15860 + }, + { + "epoch": 3.87, + "learning_rate": 5.967701047556618e-08, + "loss": 0.0018, + "step": 15862 + }, + { + "epoch": 3.87, + "learning_rate": 5.924733251064574e-08, + "loss": 0.0015, + "step": 15864 + }, + { + "epoch": 3.87, + "learning_rate": 5.881920239692918e-08, + "loss": 0.002, + "step": 15866 + }, + { + "epoch": 3.87, + "learning_rate": 5.839262020107872e-08, + "loss": 0.0035, + "step": 15868 + }, + { + "epoch": 3.87, + "learning_rate": 5.796758598951791e-08, + "loss": 0.0026, + "step": 15870 + }, + { + "epoch": 3.87, + "learning_rate": 5.7544099828428237e-08, + "loss": 0.001, + "step": 15872 + }, + { + "epoch": 3.87, + "learning_rate": 5.7122161783748074e-08, + "loss": 0.0019, + "step": 15874 + }, + { + "epoch": 3.87, + "learning_rate": 5.670177192117599e-08, + "loss": 0.0005, + "step": 15876 + }, + { + "epoch": 3.87, + "learning_rate": 5.6282930306171824e-08, + "loss": 0.0025, + "step": 15878 + }, + { + "epoch": 3.87, + "learning_rate": 5.58656370039512e-08, + "loss": 0.0008, + "step": 15880 + }, + { + "epoch": 3.87, + "learning_rate": 5.544989207949103e-08, + "loss": 0.0004, + "step": 15882 + }, + { + "epoch": 3.87, + "learning_rate": 5.5035695597523977e-08, + "loss": 0.0024, + "step": 15884 + }, + { + "epoch": 3.87, + "learning_rate": 5.4623047622546225e-08, + "loss": 0.0036, + "step": 15886 + }, + { + "epoch": 3.87, + "learning_rate": 5.421194821880749e-08, + "loss": 0.0021, + "step": 15888 + }, + { + "epoch": 3.87, + "learning_rate": 5.380239745032101e-08, + "loss": 0.0018, + "step": 15890 + }, + { + "epoch": 3.87, + "learning_rate": 5.339439538085578e-08, + "loss": 0.0026, + "step": 15892 + }, + { + "epoch": 3.87, + "learning_rate": 5.2987942073943196e-08, + "loss": 0.0006, + "step": 15894 + }, + { + "epoch": 3.87, + "learning_rate": 5.258303759286709e-08, + "loss": 0.0035, + "step": 15896 + }, + { + "epoch": 3.87, + "learning_rate": 5.2179682000677023e-08, + "loss": 0.0029, + "step": 15898 + }, + { + "epoch": 3.87, + "learning_rate": 5.1777875360178307e-08, + "loss": 0.0019, + "step": 15900 + }, + { + "epoch": 3.87, + "learning_rate": 5.137761773393535e-08, + "loss": 0.0041, + "step": 15902 + }, + { + "epoch": 3.88, + "learning_rate": 5.097890918427162e-08, + "loss": 0.0031, + "step": 15904 + }, + { + "epoch": 3.88, + "learning_rate": 5.058174977326746e-08, + "loss": 0.0031, + "step": 15906 + }, + { + "epoch": 3.88, + "learning_rate": 5.0186139562764526e-08, + "loss": 0.0021, + "step": 15908 + }, + { + "epoch": 3.88, + "learning_rate": 4.979207861436242e-08, + "loss": 0.001, + "step": 15910 + }, + { + "epoch": 3.88, + "learning_rate": 4.9399566989419834e-08, + "loss": 0.0023, + "step": 15912 + }, + { + "epoch": 3.88, + "learning_rate": 4.900860474905234e-08, + "loss": 0.0012, + "step": 15914 + }, + { + "epoch": 3.88, + "learning_rate": 4.861919195413789e-08, + "loss": 0.002, + "step": 15916 + }, + { + "epoch": 3.88, + "learning_rate": 4.8231328665310216e-08, + "loss": 0.0019, + "step": 15918 + }, + { + "epoch": 3.88, + "learning_rate": 4.784501494296212e-08, + "loss": 0.0015, + "step": 15920 + }, + { + "epoch": 3.88, + "learning_rate": 4.746025084724548e-08, + "loss": 0.0007, + "step": 15922 + }, + { + "epoch": 3.88, + "learning_rate": 4.707703643807127e-08, + "loss": 0.0021, + "step": 15924 + }, + { + "epoch": 3.88, + "learning_rate": 4.669537177510952e-08, + "loss": 0.0008, + "step": 15926 + }, + { + "epoch": 3.88, + "learning_rate": 4.6315256917787155e-08, + "loss": 0.0036, + "step": 15928 + }, + { + "epoch": 3.88, + "learning_rate": 4.593669192529237e-08, + "loss": 0.002, + "step": 15930 + }, + { + "epoch": 3.88, + "learning_rate": 4.5559676856570255e-08, + "loss": 0.0026, + "step": 15932 + }, + { + "epoch": 3.88, + "learning_rate": 4.518421177032384e-08, + "loss": 0.0012, + "step": 15934 + }, + { + "epoch": 3.88, + "learning_rate": 4.481029672501635e-08, + "loss": 0.0012, + "step": 15936 + }, + { + "epoch": 3.88, + "learning_rate": 4.4437931778870127e-08, + "loss": 0.0013, + "step": 15938 + }, + { + "epoch": 3.88, + "learning_rate": 4.4067116989864325e-08, + "loss": 0.0007, + "step": 15940 + }, + { + "epoch": 3.88, + "learning_rate": 4.369785241573832e-08, + "loss": 0.0008, + "step": 15942 + }, + { + "epoch": 3.88, + "learning_rate": 4.333013811398834e-08, + "loss": 0.0019, + "step": 15944 + }, + { + "epoch": 3.89, + "learning_rate": 4.296397414187192e-08, + "loss": 0.003, + "step": 15946 + }, + { + "epoch": 3.89, + "learning_rate": 4.259936055640235e-08, + "loss": 0.0012, + "step": 15948 + }, + { + "epoch": 3.89, + "learning_rate": 4.22362974143542e-08, + "loss": 0.0029, + "step": 15950 + }, + { + "epoch": 3.89, + "learning_rate": 4.1874784772256707e-08, + "loss": 0.006, + "step": 15952 + }, + { + "epoch": 3.89, + "learning_rate": 4.15148226864015e-08, + "loss": 0.0028, + "step": 15954 + }, + { + "epoch": 3.89, + "learning_rate": 4.11564112128382e-08, + "loss": 0.0016, + "step": 15956 + }, + { + "epoch": 3.89, + "learning_rate": 4.0799550407373267e-08, + "loss": 0.0002, + "step": 15958 + }, + { + "epoch": 3.89, + "learning_rate": 4.044424032557226e-08, + "loss": 0.0046, + "step": 15960 + }, + { + "epoch": 3.89, + "learning_rate": 4.009048102276092e-08, + "loss": 0.0019, + "step": 15962 + }, + { + "epoch": 3.89, + "learning_rate": 3.973827255402185e-08, + "loss": 0.0023, + "step": 15964 + }, + { + "epoch": 3.89, + "learning_rate": 3.938761497419563e-08, + "loss": 0.0035, + "step": 15966 + }, + { + "epoch": 3.89, + "learning_rate": 3.903850833788303e-08, + "loss": 0.0024, + "step": 15968 + }, + { + "epoch": 3.89, + "learning_rate": 3.86909526994439e-08, + "loss": 0.004, + "step": 15970 + }, + { + "epoch": 3.89, + "learning_rate": 3.834494811299272e-08, + "loss": 0.003, + "step": 15972 + }, + { + "epoch": 3.89, + "learning_rate": 3.800049463240751e-08, + "loss": 0.0021, + "step": 15974 + }, + { + "epoch": 3.89, + "learning_rate": 3.765759231132093e-08, + "loss": 0.0042, + "step": 15976 + }, + { + "epoch": 3.89, + "learning_rate": 3.731624120312582e-08, + "loss": 0.0004, + "step": 15978 + }, + { + "epoch": 3.89, + "learning_rate": 3.6976441360971896e-08, + "loss": 0.0025, + "step": 15980 + }, + { + "epoch": 3.89, + "learning_rate": 3.663819283777126e-08, + "loss": 0.0024, + "step": 15982 + }, + { + "epoch": 3.89, + "learning_rate": 3.630149568618957e-08, + "loss": 0.0023, + "step": 15984 + }, + { + "epoch": 3.9, + "learning_rate": 3.596634995865489e-08, + "loss": 0.0027, + "step": 15986 + }, + { + "epoch": 3.9, + "learning_rate": 3.563275570735103e-08, + "loss": 0.0012, + "step": 15988 + }, + { + "epoch": 3.9, + "learning_rate": 3.5300712984219774e-08, + "loss": 0.0026, + "step": 15990 + }, + { + "epoch": 3.9, + "learning_rate": 3.497022184096532e-08, + "loss": 0.0019, + "step": 15992 + }, + { + "epoch": 3.9, + "learning_rate": 3.46412823290454e-08, + "loss": 0.0026, + "step": 15994 + }, + { + "epoch": 3.9, + "learning_rate": 3.4313894499680146e-08, + "loss": 0.0021, + "step": 15996 + }, + { + "epoch": 3.9, + "learning_rate": 3.398805840384545e-08, + "loss": 0.0016, + "step": 15998 + }, + { + "epoch": 3.9, + "learning_rate": 3.366377409227739e-08, + "loss": 0.0016, + "step": 16000 + }, + { + "epoch": 3.9, + "learning_rate": 3.334104161546781e-08, + "loss": 0.0006, + "step": 16002 + }, + { + "epoch": 3.9, + "learning_rate": 3.301986102366983e-08, + "loss": 0.004, + "step": 16004 + }, + { + "epoch": 3.9, + "learning_rate": 3.270023236689457e-08, + "loss": 0.0039, + "step": 16006 + }, + { + "epoch": 3.9, + "learning_rate": 3.238215569490777e-08, + "loss": 0.0021, + "step": 16008 + }, + { + "epoch": 3.9, + "learning_rate": 3.206563105723981e-08, + "loss": 0.003, + "step": 16010 + }, + { + "epoch": 3.9, + "learning_rate": 3.175065850317349e-08, + "loss": 0.003, + "step": 16012 + }, + { + "epoch": 3.9, + "learning_rate": 3.143723808175403e-08, + "loss": 0.0093, + "step": 16014 + }, + { + "epoch": 3.9, + "learning_rate": 3.112536984178238e-08, + "loss": 0.0018, + "step": 16016 + }, + { + "epoch": 3.9, + "learning_rate": 3.0815053831818596e-08, + "loss": 0.0023, + "step": 16018 + }, + { + "epoch": 3.9, + "learning_rate": 3.050629010018291e-08, + "loss": 0.0016, + "step": 16020 + }, + { + "epoch": 3.9, + "learning_rate": 3.01990786949502e-08, + "loss": 0.0007, + "step": 16022 + }, + { + "epoch": 3.9, + "learning_rate": 2.989341966395665e-08, + "loss": 0.003, + "step": 16024 + }, + { + "epoch": 3.9, + "learning_rate": 2.95893130547964e-08, + "loss": 0.0009, + "step": 16026 + }, + { + "epoch": 3.91, + "learning_rate": 2.9286758914819358e-08, + "loss": 0.0016, + "step": 16028 + }, + { + "epoch": 3.91, + "learning_rate": 2.8985757291137838e-08, + "loss": 0.0021, + "step": 16030 + }, + { + "epoch": 3.91, + "learning_rate": 2.868630823061769e-08, + "loss": 0.0045, + "step": 16032 + }, + { + "epoch": 3.91, + "learning_rate": 2.838841177988605e-08, + "loss": 0.0033, + "step": 16034 + }, + { + "epoch": 3.91, + "learning_rate": 2.809206798532915e-08, + "loss": 0.0021, + "step": 16036 + }, + { + "epoch": 3.91, + "learning_rate": 2.7797276893087865e-08, + "loss": 0.0009, + "step": 16038 + }, + { + "epoch": 3.91, + "learning_rate": 2.7504038549065472e-08, + "loss": 0.0014, + "step": 16040 + }, + { + "epoch": 3.91, + "learning_rate": 2.721235299891989e-08, + "loss": 0.0036, + "step": 16042 + }, + { + "epoch": 3.91, + "learning_rate": 2.6922220288070345e-08, + "loss": 0.0018, + "step": 16044 + }, + { + "epoch": 3.91, + "learning_rate": 2.6633640461691812e-08, + "loss": 0.004, + "step": 16046 + }, + { + "epoch": 3.91, + "learning_rate": 2.634661356471724e-08, + "loss": 0.0051, + "step": 16048 + }, + { + "epoch": 3.91, + "learning_rate": 2.606113964183976e-08, + "loss": 0.0027, + "step": 16050 + }, + { + "epoch": 3.91, + "learning_rate": 2.57772187375116e-08, + "loss": 0.0017, + "step": 16052 + }, + { + "epoch": 3.91, + "learning_rate": 2.5494850895938505e-08, + "loss": 0.0031, + "step": 16054 + }, + { + "epoch": 3.91, + "learning_rate": 2.521403616108975e-08, + "loss": 0.0031, + "step": 16056 + }, + { + "epoch": 3.91, + "learning_rate": 2.4934774576688137e-08, + "loss": 0.0015, + "step": 16058 + }, + { + "epoch": 3.91, + "learning_rate": 2.465706618621888e-08, + "loss": 0.0028, + "step": 16060 + }, + { + "epoch": 3.91, + "learning_rate": 2.4380911032921838e-08, + "loss": 0.0025, + "step": 16062 + }, + { + "epoch": 3.91, + "learning_rate": 2.410630915979817e-08, + "loss": 0.0013, + "step": 16064 + }, + { + "epoch": 3.91, + "learning_rate": 2.383326060960256e-08, + "loss": 0.0023, + "step": 16066 + }, + { + "epoch": 3.92, + "learning_rate": 2.356176542485322e-08, + "loss": 0.0015, + "step": 16068 + }, + { + "epoch": 3.92, + "learning_rate": 2.329182364782412e-08, + "loss": 0.0009, + "step": 16070 + }, + { + "epoch": 3.92, + "learning_rate": 2.3023435320546073e-08, + "loss": 0.0013, + "step": 16072 + }, + { + "epoch": 3.92, + "learning_rate": 2.2756600484808987e-08, + "loss": 0.0056, + "step": 16074 + }, + { + "epoch": 3.92, + "learning_rate": 2.2491319182162964e-08, + "loss": 0.0044, + "step": 16076 + }, + { + "epoch": 3.92, + "learning_rate": 2.2227591453911623e-08, + "loss": 0.0015, + "step": 16078 + }, + { + "epoch": 3.92, + "learning_rate": 2.196541734112212e-08, + "loss": 0.0028, + "step": 16080 + }, + { + "epoch": 3.92, + "learning_rate": 2.170479688461513e-08, + "loss": 0.0013, + "step": 16082 + }, + { + "epoch": 3.92, + "learning_rate": 2.144573012497153e-08, + "loss": 0.0026, + "step": 16084 + }, + { + "epoch": 3.92, + "learning_rate": 2.118821710253127e-08, + "loss": 0.0015, + "step": 16086 + }, + { + "epoch": 3.92, + "learning_rate": 2.093225785738895e-08, + "loss": 0.0035, + "step": 16088 + }, + { + "epoch": 3.92, + "learning_rate": 2.0677852429400458e-08, + "loss": 0.0021, + "step": 16090 + }, + { + "epoch": 3.92, + "learning_rate": 2.0425000858179665e-08, + "loss": 0.0027, + "step": 16092 + }, + { + "epoch": 3.92, + "learning_rate": 2.0173703183096194e-08, + "loss": 0.0011, + "step": 16094 + }, + { + "epoch": 3.92, + "learning_rate": 1.992395944327874e-08, + "loss": 0.0032, + "step": 16096 + }, + { + "epoch": 3.92, + "learning_rate": 1.9675769677616196e-08, + "loss": 0.0005, + "step": 16098 + }, + { + "epoch": 3.92, + "learning_rate": 1.9429133924752096e-08, + "loss": 0.002, + "step": 16100 + }, + { + "epoch": 3.92, + "learning_rate": 1.9184052223089054e-08, + "loss": 0.0039, + "step": 16102 + }, + { + "epoch": 3.92, + "learning_rate": 1.8940524610789878e-08, + "loss": 0.0036, + "step": 16104 + }, + { + "epoch": 3.92, + "learning_rate": 1.8698551125772015e-08, + "loss": 0.0007, + "step": 16106 + }, + { + "epoch": 3.92, + "learning_rate": 1.8458131805713096e-08, + "loss": 0.0015, + "step": 16108 + }, + { + "epoch": 3.93, + "learning_rate": 1.8219266688048742e-08, + "loss": 0.0017, + "step": 16110 + }, + { + "epoch": 3.93, + "learning_rate": 1.798195580997142e-08, + "loss": 0.0011, + "step": 16112 + }, + { + "epoch": 3.93, + "learning_rate": 1.774619920843268e-08, + "loss": 0.0008, + "step": 16114 + }, + { + "epoch": 3.93, + "learning_rate": 1.751199692014205e-08, + "loss": 0.0007, + "step": 16116 + }, + { + "epoch": 3.93, + "learning_rate": 1.7279348981565914e-08, + "loss": 0.0023, + "step": 16118 + }, + { + "epoch": 3.93, + "learning_rate": 1.704825542892974e-08, + "loss": 0.0021, + "step": 16120 + }, + { + "epoch": 3.93, + "learning_rate": 1.6818716298215855e-08, + "loss": 0.0044, + "step": 16122 + }, + { + "epoch": 3.93, + "learning_rate": 1.6590731625165666e-08, + "loss": 0.0032, + "step": 16124 + }, + { + "epoch": 3.93, + "learning_rate": 1.6364301445278563e-08, + "loss": 0.0015, + "step": 16126 + }, + { + "epoch": 3.93, + "learning_rate": 1.613942579381189e-08, + "loss": 0.0027, + "step": 16128 + }, + { + "epoch": 3.93, + "learning_rate": 1.5916104705778757e-08, + "loss": 0.0008, + "step": 16130 + }, + { + "epoch": 3.93, + "learning_rate": 1.5694338215952453e-08, + "loss": 0.0025, + "step": 16132 + }, + { + "epoch": 3.93, + "learning_rate": 1.547412635886536e-08, + "loss": 0.0008, + "step": 16134 + }, + { + "epoch": 3.93, + "learning_rate": 1.5255469168804492e-08, + "loss": 0.0013, + "step": 16136 + }, + { + "epoch": 3.93, + "learning_rate": 1.503836667981595e-08, + "loss": 0.0026, + "step": 16138 + }, + { + "epoch": 3.93, + "learning_rate": 1.4822818925707139e-08, + "loss": 0.0024, + "step": 16140 + }, + { + "epoch": 3.93, + "learning_rate": 1.4608825940036764e-08, + "loss": 0.0012, + "step": 16142 + }, + { + "epoch": 3.93, + "learning_rate": 1.4396387756128172e-08, + "loss": 0.0018, + "step": 16144 + }, + { + "epoch": 3.93, + "learning_rate": 1.4185504407058237e-08, + "loss": 0.0018, + "step": 16146 + }, + { + "epoch": 3.93, + "learning_rate": 1.3976175925662917e-08, + "loss": 0.0012, + "step": 16148 + }, + { + "epoch": 3.94, + "learning_rate": 1.3768402344537246e-08, + "loss": 0.0007, + "step": 16150 + }, + { + "epoch": 3.94, + "learning_rate": 1.3562183696032016e-08, + "loss": 0.0004, + "step": 16152 + }, + { + "epoch": 3.94, + "learning_rate": 1.3357520012258207e-08, + "loss": 0.0048, + "step": 16154 + }, + { + "epoch": 3.94, + "learning_rate": 1.315441132508255e-08, + "loss": 0.0031, + "step": 16156 + }, + { + "epoch": 3.94, + "learning_rate": 1.2952857666130858e-08, + "loss": 0.0025, + "step": 16158 + }, + { + "epoch": 3.94, + "learning_rate": 1.2752859066786915e-08, + "loss": 0.0039, + "step": 16160 + }, + { + "epoch": 3.94, + "learning_rate": 1.2554415558191368e-08, + "loss": 0.0012, + "step": 16162 + }, + { + "epoch": 3.94, + "learning_rate": 1.2357527171243943e-08, + "loss": 0.0025, + "step": 16164 + }, + { + "epoch": 3.94, + "learning_rate": 1.216219393660234e-08, + "loss": 0.0018, + "step": 16166 + }, + { + "epoch": 3.94, + "learning_rate": 1.1968415884680007e-08, + "loss": 0.0005, + "step": 16168 + }, + { + "epoch": 3.94, + "learning_rate": 1.1776193045650585e-08, + "loss": 0.0023, + "step": 16170 + }, + { + "epoch": 3.94, + "learning_rate": 1.1585525449443468e-08, + "loss": 0.001, + "step": 16172 + }, + { + "epoch": 3.94, + "learning_rate": 1.1396413125749351e-08, + "loss": 0.0007, + "step": 16174 + }, + { + "epoch": 3.94, + "learning_rate": 1.1208856104012455e-08, + "loss": 0.0005, + "step": 16176 + }, + { + "epoch": 3.94, + "learning_rate": 1.10228544134372e-08, + "loss": 0.004, + "step": 16178 + }, + { + "epoch": 3.94, + "learning_rate": 1.0838408082985974e-08, + "loss": 0.0009, + "step": 16180 + }, + { + "epoch": 3.94, + "learning_rate": 1.0655517141378024e-08, + "loss": 0.0021, + "step": 16182 + }, + { + "epoch": 3.94, + "learning_rate": 1.0474181617091684e-08, + "loss": 0.0018, + "step": 16184 + }, + { + "epoch": 3.94, + "learning_rate": 1.0294401538361032e-08, + "loss": 0.0036, + "step": 16186 + }, + { + "epoch": 3.94, + "learning_rate": 1.0116176933180343e-08, + "loss": 0.0013, + "step": 16188 + }, + { + "epoch": 3.94, + "learning_rate": 9.939507829299644e-09, + "loss": 0.0021, + "step": 16190 + }, + { + "epoch": 3.95, + "learning_rate": 9.764394254228037e-09, + "loss": 0.0047, + "step": 16192 + }, + { + "epoch": 3.95, + "learning_rate": 9.590836235232604e-09, + "loss": 0.002, + "step": 16194 + }, + { + "epoch": 3.95, + "learning_rate": 9.41883379933728e-09, + "loss": 0.0014, + "step": 16196 + }, + { + "epoch": 3.95, + "learning_rate": 9.248386973323975e-09, + "loss": 0.0024, + "step": 16198 + }, + { + "epoch": 3.95, + "learning_rate": 9.079495783731462e-09, + "loss": 0.0027, + "step": 16200 + }, + { + "epoch": 3.95, + "learning_rate": 8.912160256859813e-09, + "loss": 0.0018, + "step": 16202 + }, + { + "epoch": 3.95, + "learning_rate": 8.746380418762635e-09, + "loss": 0.003, + "step": 16204 + }, + { + "epoch": 3.95, + "learning_rate": 8.58215629525372e-09, + "loss": 0.0026, + "step": 16206 + }, + { + "epoch": 3.95, + "learning_rate": 8.419487911903724e-09, + "loss": 0.0027, + "step": 16208 + }, + { + "epoch": 3.95, + "learning_rate": 8.258375294042386e-09, + "loss": 0.0023, + "step": 16210 + }, + { + "epoch": 3.95, + "learning_rate": 8.098818466755198e-09, + "loss": 0.0013, + "step": 16212 + }, + { + "epoch": 3.95, + "learning_rate": 7.940817454885619e-09, + "loss": 0.0027, + "step": 16214 + }, + { + "epoch": 3.95, + "learning_rate": 7.7843722830373e-09, + "loss": 0.0028, + "step": 16216 + }, + { + "epoch": 3.95, + "learning_rate": 7.629482975569646e-09, + "loss": 0.0014, + "step": 16218 + }, + { + "epoch": 3.95, + "learning_rate": 7.476149556598922e-09, + "loss": 0.0035, + "step": 16220 + }, + { + "epoch": 3.95, + "learning_rate": 7.324372050001583e-09, + "loss": 0.0036, + "step": 16222 + }, + { + "epoch": 3.95, + "learning_rate": 7.174150479409836e-09, + "loss": 0.001, + "step": 16224 + }, + { + "epoch": 3.95, + "learning_rate": 7.025484868213861e-09, + "loss": 0.0032, + "step": 16226 + }, + { + "epoch": 3.95, + "learning_rate": 6.878375239562918e-09, + "loss": 0.0018, + "step": 16228 + }, + { + "epoch": 3.95, + "learning_rate": 6.732821616363128e-09, + "loss": 0.0011, + "step": 16230 + }, + { + "epoch": 3.96, + "learning_rate": 6.588824021278584e-09, + "loss": 0.0018, + "step": 16232 + }, + { + "epoch": 3.96, + "learning_rate": 6.4463824767291294e-09, + "loss": 0.0022, + "step": 16234 + }, + { + "epoch": 3.96, + "learning_rate": 6.3054970048959105e-09, + "loss": 0.0014, + "step": 16236 + }, + { + "epoch": 3.96, + "learning_rate": 6.166167627715825e-09, + "loss": 0.0024, + "step": 16238 + }, + { + "epoch": 3.96, + "learning_rate": 6.028394366881518e-09, + "loss": 0.0042, + "step": 16240 + }, + { + "epoch": 3.96, + "learning_rate": 5.892177243846942e-09, + "loss": 0.0014, + "step": 16242 + }, + { + "epoch": 3.96, + "learning_rate": 5.757516279821796e-09, + "loss": 0.0014, + "step": 16244 + }, + { + "epoch": 3.96, + "learning_rate": 5.624411495774862e-09, + "loss": 0.0023, + "step": 16246 + }, + { + "epoch": 3.96, + "learning_rate": 5.492862912429564e-09, + "loss": 0.0027, + "step": 16248 + }, + { + "epoch": 3.96, + "learning_rate": 5.362870550269517e-09, + "loss": 0.0017, + "step": 16250 + }, + { + "epoch": 3.96, + "learning_rate": 5.234434429537416e-09, + "loss": 0.0017, + "step": 16252 + }, + { + "epoch": 3.96, + "learning_rate": 5.107554570229489e-09, + "loss": 0.0011, + "step": 16254 + }, + { + "epoch": 3.96, + "learning_rate": 4.982230992103265e-09, + "loss": 0.0033, + "step": 16256 + }, + { + "epoch": 3.96, + "learning_rate": 4.8584637146709135e-09, + "loss": 0.0032, + "step": 16258 + }, + { + "epoch": 3.96, + "learning_rate": 4.736252757205906e-09, + "loss": 0.0047, + "step": 16260 + }, + { + "epoch": 3.96, + "learning_rate": 4.615598138737465e-09, + "loss": 0.002, + "step": 16262 + }, + { + "epoch": 3.96, + "learning_rate": 4.496499878050564e-09, + "loss": 0.0015, + "step": 16264 + }, + { + "epoch": 3.96, + "learning_rate": 4.3789579936914795e-09, + "loss": 0.0022, + "step": 16266 + }, + { + "epoch": 3.96, + "learning_rate": 4.262972503961127e-09, + "loss": 0.0025, + "step": 16268 + }, + { + "epoch": 3.96, + "learning_rate": 4.148543426919505e-09, + "loss": 0.0004, + "step": 16270 + }, + { + "epoch": 3.96, + "learning_rate": 4.035670780385692e-09, + "loss": 0.0024, + "step": 16272 + }, + { + "epoch": 3.97, + "learning_rate": 3.924354581932299e-09, + "loss": 0.002, + "step": 16274 + }, + { + "epoch": 3.97, + "learning_rate": 3.814594848894349e-09, + "loss": 0.001, + "step": 16276 + }, + { + "epoch": 3.97, + "learning_rate": 3.7063915983603925e-09, + "loss": 0.0024, + "step": 16278 + }, + { + "epoch": 3.97, + "learning_rate": 3.5997448471802866e-09, + "loss": 0.0017, + "step": 16280 + }, + { + "epoch": 3.97, + "learning_rate": 3.4946546119585257e-09, + "loss": 0.0015, + "step": 16282 + }, + { + "epoch": 3.97, + "learning_rate": 3.3911209090586874e-09, + "loss": 0.0024, + "step": 16284 + }, + { + "epoch": 3.97, + "learning_rate": 3.2891437546023196e-09, + "loss": 0.0016, + "step": 16286 + }, + { + "epoch": 3.97, + "learning_rate": 3.188723164467833e-09, + "loss": 0.002, + "step": 16288 + }, + { + "epoch": 3.97, + "learning_rate": 3.089859154290498e-09, + "loss": 0.0012, + "step": 16290 + }, + { + "epoch": 3.97, + "learning_rate": 2.9925517394657764e-09, + "loss": 0.0006, + "step": 16292 + }, + { + "epoch": 3.97, + "learning_rate": 2.896800935143773e-09, + "loss": 0.0017, + "step": 16294 + }, + { + "epoch": 3.97, + "learning_rate": 2.8026067562347824e-09, + "loss": 0.0025, + "step": 16296 + }, + { + "epoch": 3.97, + "learning_rate": 2.709969217404851e-09, + "loss": 0.0016, + "step": 16298 + }, + { + "epoch": 3.97, + "learning_rate": 2.6188883330779958e-09, + "loss": 0.0008, + "step": 16300 + }, + { + "epoch": 3.97, + "learning_rate": 2.529364117437316e-09, + "loss": 0.0033, + "step": 16302 + }, + { + "epoch": 3.97, + "learning_rate": 2.4413965844216625e-09, + "loss": 0.0017, + "step": 16304 + }, + { + "epoch": 3.97, + "learning_rate": 2.354985747727856e-09, + "loss": 0.0008, + "step": 16306 + }, + { + "epoch": 3.97, + "learning_rate": 2.2701316208117997e-09, + "loss": 0.0014, + "step": 16308 + }, + { + "epoch": 3.97, + "learning_rate": 2.1868342168851474e-09, + "loss": 0.0017, + "step": 16310 + }, + { + "epoch": 3.97, + "learning_rate": 2.105093548918635e-09, + "loss": 0.0018, + "step": 16312 + }, + { + "epoch": 3.98, + "learning_rate": 2.0249096296387495e-09, + "loss": 0.0051, + "step": 16314 + }, + { + "epoch": 3.98, + "learning_rate": 1.946282471532168e-09, + "loss": 0.0016, + "step": 16316 + }, + { + "epoch": 3.98, + "learning_rate": 1.869212086841321e-09, + "loss": 0.0014, + "step": 16318 + }, + { + "epoch": 3.98, + "learning_rate": 1.7936984875654983e-09, + "loss": 0.002, + "step": 16320 + }, + { + "epoch": 3.98, + "learning_rate": 1.7197416854641824e-09, + "loss": 0.0024, + "step": 16322 + }, + { + "epoch": 3.98, + "learning_rate": 1.6473416920526064e-09, + "loss": 0.0044, + "step": 16324 + }, + { + "epoch": 3.98, + "learning_rate": 1.5764985186028648e-09, + "loss": 0.0054, + "step": 16326 + }, + { + "epoch": 3.98, + "learning_rate": 1.5072121761472436e-09, + "loss": 0.0033, + "step": 16328 + }, + { + "epoch": 3.98, + "learning_rate": 1.43948267547378e-09, + "loss": 0.0007, + "step": 16330 + }, + { + "epoch": 3.98, + "learning_rate": 1.3733100271284826e-09, + "loss": 0.0009, + "step": 16332 + }, + { + "epoch": 3.98, + "learning_rate": 1.3086942414153315e-09, + "loss": 0.0027, + "step": 16334 + }, + { + "epoch": 3.98, + "learning_rate": 1.2456353283940569e-09, + "loss": 0.0021, + "step": 16336 + }, + { + "epoch": 3.98, + "learning_rate": 1.184133297884582e-09, + "loss": 0.0021, + "step": 16338 + }, + { + "epoch": 3.98, + "learning_rate": 1.1241881594636905e-09, + "loss": 0.0022, + "step": 16340 + }, + { + "epoch": 3.98, + "learning_rate": 1.0657999224639171e-09, + "loss": 0.0019, + "step": 16342 + }, + { + "epoch": 3.98, + "learning_rate": 1.0089685959779882e-09, + "loss": 0.0027, + "step": 16344 + }, + { + "epoch": 3.98, + "learning_rate": 9.536941888532713e-10, + "loss": 0.0055, + "step": 16346 + }, + { + "epoch": 3.98, + "learning_rate": 8.999767096984358e-10, + "loss": 0.0026, + "step": 16348 + }, + { + "epoch": 3.98, + "learning_rate": 8.478161668779017e-10, + "loss": 0.0034, + "step": 16350 + }, + { + "epoch": 3.98, + "learning_rate": 7.972125685107301e-10, + "loss": 0.0007, + "step": 16352 + }, + { + "epoch": 3.98, + "learning_rate": 7.481659224783943e-10, + "loss": 0.0008, + "step": 16354 + }, + { + "epoch": 3.99, + "learning_rate": 7.006762364181185e-10, + "loss": 0.0029, + "step": 16356 + }, + { + "epoch": 3.99, + "learning_rate": 6.547435177228778e-10, + "loss": 0.0014, + "step": 16358 + }, + { + "epoch": 3.99, + "learning_rate": 6.103677735458391e-10, + "loss": 0.0014, + "step": 16360 + }, + { + "epoch": 3.99, + "learning_rate": 5.675490107970305e-10, + "loss": 0.0013, + "step": 16362 + }, + { + "epoch": 3.99, + "learning_rate": 5.262872361422311e-10, + "loss": 0.0029, + "step": 16364 + }, + { + "epoch": 3.99, + "learning_rate": 4.865824560074118e-10, + "loss": 0.0017, + "step": 16366 + }, + { + "epoch": 3.99, + "learning_rate": 4.484346765742942e-10, + "loss": 0.0025, + "step": 16368 + }, + { + "epoch": 3.99, + "learning_rate": 4.1184390378257167e-10, + "loss": 0.0023, + "step": 16370 + }, + { + "epoch": 3.99, + "learning_rate": 3.7681014333101897e-10, + "loss": 0.0054, + "step": 16372 + }, + { + "epoch": 3.99, + "learning_rate": 3.433334006730515e-10, + "loss": 0.0036, + "step": 16374 + }, + { + "epoch": 3.99, + "learning_rate": 3.1141368102227676e-10, + "loss": 0.0018, + "step": 16376 + }, + { + "epoch": 3.99, + "learning_rate": 2.8105098934916307e-10, + "loss": 0.0035, + "step": 16378 + }, + { + "epoch": 3.99, + "learning_rate": 2.522453303799299e-10, + "loss": 0.0036, + "step": 16380 + }, + { + "epoch": 3.99, + "learning_rate": 2.2499670860098855e-10, + "loss": 0.0026, + "step": 16382 + }, + { + "epoch": 3.99, + "learning_rate": 1.993051282545011e-10, + "loss": 0.0013, + "step": 16384 + }, + { + "epoch": 3.99, + "learning_rate": 1.751705933417114e-10, + "loss": 0.0025, + "step": 16386 + }, + { + "epoch": 3.99, + "learning_rate": 1.525931076196141e-10, + "loss": 0.001, + "step": 16388 + }, + { + "epoch": 3.99, + "learning_rate": 1.3157267460428557e-10, + "loss": 0.0037, + "step": 16390 + }, + { + "epoch": 3.99, + "learning_rate": 1.1210929756866329e-10, + "loss": 0.0015, + "step": 16392 + }, + { + "epoch": 3.99, + "learning_rate": 9.420297954254587e-11, + "loss": 0.003, + "step": 16394 + }, + { + "epoch": 4.0, + "learning_rate": 7.785372331592378e-11, + "loss": 0.0026, + "step": 16396 + }, + { + "epoch": 4.0, + "learning_rate": 6.306153143231797e-11, + "loss": 0.0019, + "step": 16398 + }, + { + "epoch": 4.0, + "learning_rate": 4.982640619766166e-11, + "loss": 0.0039, + "step": 16400 + }, + { + "epoch": 4.0, + "learning_rate": 3.8148349669198113e-11, + "loss": 0.0028, + "step": 16402 + }, + { + "epoch": 4.0, + "learning_rate": 2.802736366880332e-11, + "loss": 0.0032, + "step": 16404 + }, + { + "epoch": 4.0, + "learning_rate": 1.9463449770773522e-11, + "loss": 0.002, + "step": 16406 + }, + { + "epoch": 4.0, + "learning_rate": 1.2456609307376355e-11, + "loss": 0.0014, + "step": 16408 + }, + { + "epoch": 4.0, + "learning_rate": 7.006843372181493e-12, + "loss": 0.0008, + "step": 16410 + }, + { + "epoch": 4.0, + "learning_rate": 3.114152812289106e-12, + "loss": 0.0011, + "step": 16412 + }, + { + "epoch": 4.0, + "learning_rate": 7.785382327707425e-13, + "loss": 0.0029, + "step": 16414 + }, + { + "epoch": 4.0, + "learning_rate": 0.0, + "loss": 0.001, + "step": 16416 + }, + { + "epoch": 4.0, + "step": 16416, + "total_flos": 5.43675641046342e+18, + "train_loss": 0.10646877170433743, + "train_runtime": 71465.0126, + "train_samples_per_second": 7.35, + "train_steps_per_second": 0.23 + } + ], + "max_steps": 16416, + "num_train_epochs": 4, + "total_flos": 5.43675641046342e+18, + "trial_name": null, + "trial_params": null +}