{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.807411566535654, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999953209807224e-05, "loss": 0.037, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.999906419614449e-05, "loss": 0.0443, "step": 4 }, { "epoch": 0.0, "learning_rate": 4.999859629421673e-05, "loss": 0.0456, "step": 6 }, { "epoch": 0.0, "learning_rate": 4.999812839228898e-05, "loss": 0.0554, "step": 8 }, { "epoch": 0.0, "learning_rate": 4.999766049036122e-05, "loss": 0.0675, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.9997192588433464e-05, "loss": 0.0875, "step": 12 }, { "epoch": 0.0, "learning_rate": 4.999672468650571e-05, "loss": 0.0372, "step": 14 }, { "epoch": 0.0, "learning_rate": 4.9996256784577956e-05, "loss": 0.0759, "step": 16 }, { "epoch": 0.0, "learning_rate": 4.9995788882650195e-05, "loss": 0.0565, "step": 18 }, { "epoch": 0.0, "learning_rate": 4.999532098072244e-05, "loss": 0.0625, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.999485307879469e-05, "loss": 0.0819, "step": 22 }, { "epoch": 0.0, "learning_rate": 4.999438517686693e-05, "loss": 0.0702, "step": 24 }, { "epoch": 0.0, "learning_rate": 4.999391727493917e-05, "loss": 0.0602, "step": 26 }, { "epoch": 0.0, "learning_rate": 4.999344937301142e-05, "loss": 0.0636, "step": 28 }, { "epoch": 0.0, "learning_rate": 4.9992981471083664e-05, "loss": 0.0527, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.999251356915591e-05, "loss": 0.0542, "step": 32 }, { "epoch": 0.0, "learning_rate": 4.999204566722815e-05, "loss": 0.0554, "step": 34 }, { "epoch": 0.01, "learning_rate": 4.9991577765300395e-05, "loss": 0.097, "step": 36 }, { "epoch": 0.01, "learning_rate": 4.9991109863372634e-05, "loss": 0.0558, "step": 38 }, { "epoch": 0.01, "learning_rate": 4.999064196144489e-05, "loss": 0.0606, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.9990174059517126e-05, "loss": 0.0645, "step": 42 }, { "epoch": 0.01, "learning_rate": 4.998970615758937e-05, "loss": 0.0463, "step": 44 }, { "epoch": 0.01, "learning_rate": 4.998923825566161e-05, "loss": 0.062, "step": 46 }, { "epoch": 0.01, "learning_rate": 4.9988770353733864e-05, "loss": 0.0648, "step": 48 }, { "epoch": 0.01, "learning_rate": 4.99883024518061e-05, "loss": 0.0652, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.998783454987835e-05, "loss": 0.0588, "step": 52 }, { "epoch": 0.01, "learning_rate": 4.998736664795059e-05, "loss": 0.0534, "step": 54 }, { "epoch": 0.01, "learning_rate": 4.9986898746022834e-05, "loss": 0.0591, "step": 56 }, { "epoch": 0.01, "learning_rate": 4.998643084409508e-05, "loss": 0.0734, "step": 58 }, { "epoch": 0.01, "learning_rate": 4.9985962942167326e-05, "loss": 0.0554, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.9985495040239565e-05, "loss": 0.0694, "step": 62 }, { "epoch": 0.01, "learning_rate": 4.998502713831181e-05, "loss": 0.0483, "step": 64 }, { "epoch": 0.01, "learning_rate": 4.9984559236384057e-05, "loss": 0.06, "step": 66 }, { "epoch": 0.01, "learning_rate": 4.99840913344563e-05, "loss": 0.0609, "step": 68 }, { "epoch": 0.01, "learning_rate": 4.998362343252854e-05, "loss": 0.0651, "step": 70 }, { "epoch": 0.01, "learning_rate": 4.998315553060079e-05, "loss": 0.0729, "step": 72 }, { "epoch": 0.01, "learning_rate": 4.998268762867303e-05, "loss": 0.0757, "step": 74 }, { "epoch": 0.01, "learning_rate": 4.998221972674528e-05, "loss": 0.0727, "step": 76 }, { "epoch": 0.01, "learning_rate": 4.998175182481752e-05, "loss": 0.0758, "step": 78 }, { "epoch": 0.01, "learning_rate": 4.9981283922889764e-05, "loss": 0.0541, "step": 80 }, { "epoch": 0.01, "learning_rate": 4.998081602096201e-05, "loss": 0.0508, "step": 82 }, { "epoch": 0.01, "learning_rate": 4.9980348119034256e-05, "loss": 0.061, "step": 84 }, { "epoch": 0.01, "learning_rate": 4.9979880217106495e-05, "loss": 0.065, "step": 86 }, { "epoch": 0.01, "learning_rate": 4.997941231517874e-05, "loss": 0.0799, "step": 88 }, { "epoch": 0.01, "learning_rate": 4.997894441325098e-05, "loss": 0.0496, "step": 90 }, { "epoch": 0.01, "learning_rate": 4.997847651132323e-05, "loss": 0.083, "step": 92 }, { "epoch": 0.01, "learning_rate": 4.997800860939547e-05, "loss": 0.0735, "step": 94 }, { "epoch": 0.01, "learning_rate": 4.997754070746772e-05, "loss": 0.0632, "step": 96 }, { "epoch": 0.01, "learning_rate": 4.997707280553996e-05, "loss": 0.0583, "step": 98 }, { "epoch": 0.01, "learning_rate": 4.99766049036122e-05, "loss": 0.0603, "step": 100 }, { "epoch": 0.01, "learning_rate": 4.997613700168445e-05, "loss": 0.0608, "step": 102 }, { "epoch": 0.01, "learning_rate": 4.9975669099756695e-05, "loss": 0.0535, "step": 104 }, { "epoch": 0.01, "learning_rate": 4.9975201197828934e-05, "loss": 0.0497, "step": 106 }, { "epoch": 0.02, "learning_rate": 4.997473329590118e-05, "loss": 0.0643, "step": 108 }, { "epoch": 0.02, "learning_rate": 4.9974265393973426e-05, "loss": 0.0945, "step": 110 }, { "epoch": 0.02, "learning_rate": 4.997379749204567e-05, "loss": 0.0629, "step": 112 }, { "epoch": 0.02, "learning_rate": 4.997332959011791e-05, "loss": 0.0662, "step": 114 }, { "epoch": 0.02, "learning_rate": 4.997286168819016e-05, "loss": 0.0684, "step": 116 }, { "epoch": 0.02, "learning_rate": 4.99723937862624e-05, "loss": 0.0457, "step": 118 }, { "epoch": 0.02, "learning_rate": 4.997192588433465e-05, "loss": 0.0592, "step": 120 }, { "epoch": 0.02, "learning_rate": 4.997145798240689e-05, "loss": 0.0606, "step": 122 }, { "epoch": 0.02, "learning_rate": 4.9970990080479134e-05, "loss": 0.0658, "step": 124 }, { "epoch": 0.02, "learning_rate": 4.997052217855138e-05, "loss": 0.0649, "step": 126 }, { "epoch": 0.02, "learning_rate": 4.9970054276623626e-05, "loss": 0.082, "step": 128 }, { "epoch": 0.02, "learning_rate": 4.9969586374695865e-05, "loss": 0.064, "step": 130 }, { "epoch": 0.02, "learning_rate": 4.996911847276811e-05, "loss": 0.0648, "step": 132 }, { "epoch": 0.02, "learning_rate": 4.996865057084035e-05, "loss": 0.0601, "step": 134 }, { "epoch": 0.02, "learning_rate": 4.99681826689126e-05, "loss": 0.0887, "step": 136 }, { "epoch": 0.02, "learning_rate": 4.996771476698484e-05, "loss": 0.0643, "step": 138 }, { "epoch": 0.02, "learning_rate": 4.996724686505709e-05, "loss": 0.0608, "step": 140 }, { "epoch": 0.02, "learning_rate": 4.996677896312933e-05, "loss": 0.0526, "step": 142 }, { "epoch": 0.02, "learning_rate": 4.996631106120158e-05, "loss": 0.0693, "step": 144 }, { "epoch": 0.02, "learning_rate": 4.996584315927382e-05, "loss": 0.0642, "step": 146 }, { "epoch": 0.02, "learning_rate": 4.9965375257346064e-05, "loss": 0.0506, "step": 148 }, { "epoch": 0.02, "learning_rate": 4.9964907355418303e-05, "loss": 0.0759, "step": 150 }, { "epoch": 0.02, "learning_rate": 4.996443945349055e-05, "loss": 0.0666, "step": 152 }, { "epoch": 0.02, "learning_rate": 4.9963971551562795e-05, "loss": 0.0838, "step": 154 }, { "epoch": 0.02, "learning_rate": 4.996350364963504e-05, "loss": 0.0516, "step": 156 }, { "epoch": 0.02, "learning_rate": 4.996303574770728e-05, "loss": 0.0821, "step": 158 }, { "epoch": 0.02, "learning_rate": 4.9962567845779526e-05, "loss": 0.0562, "step": 160 }, { "epoch": 0.02, "learning_rate": 4.996209994385177e-05, "loss": 0.0587, "step": 162 }, { "epoch": 0.02, "learning_rate": 4.996163204192402e-05, "loss": 0.0565, "step": 164 }, { "epoch": 0.02, "learning_rate": 4.996116413999626e-05, "loss": 0.0792, "step": 166 }, { "epoch": 0.02, "learning_rate": 4.99606962380685e-05, "loss": 0.0581, "step": 168 }, { "epoch": 0.02, "learning_rate": 4.996022833614075e-05, "loss": 0.0609, "step": 170 }, { "epoch": 0.02, "learning_rate": 4.9959760434212995e-05, "loss": 0.0733, "step": 172 }, { "epoch": 0.02, "learning_rate": 4.9959292532285234e-05, "loss": 0.0535, "step": 174 }, { "epoch": 0.02, "learning_rate": 4.995882463035748e-05, "loss": 0.0691, "step": 176 }, { "epoch": 0.02, "learning_rate": 4.9958356728429726e-05, "loss": 0.0531, "step": 178 }, { "epoch": 0.03, "learning_rate": 4.995788882650197e-05, "loss": 0.0678, "step": 180 }, { "epoch": 0.03, "learning_rate": 4.995742092457421e-05, "loss": 0.071, "step": 182 }, { "epoch": 0.03, "learning_rate": 4.995695302264646e-05, "loss": 0.0658, "step": 184 }, { "epoch": 0.03, "learning_rate": 4.9956485120718696e-05, "loss": 0.0588, "step": 186 }, { "epoch": 0.03, "learning_rate": 4.995601721879095e-05, "loss": 0.076, "step": 188 }, { "epoch": 0.03, "learning_rate": 4.995554931686319e-05, "loss": 0.082, "step": 190 }, { "epoch": 0.03, "learning_rate": 4.9955081414935434e-05, "loss": 0.0749, "step": 192 }, { "epoch": 0.03, "learning_rate": 4.995461351300767e-05, "loss": 0.0777, "step": 194 }, { "epoch": 0.03, "learning_rate": 4.9954145611079926e-05, "loss": 0.0482, "step": 196 }, { "epoch": 0.03, "learning_rate": 4.9953677709152165e-05, "loss": 0.0717, "step": 198 }, { "epoch": 0.03, "learning_rate": 4.995320980722441e-05, "loss": 0.0564, "step": 200 }, { "epoch": 0.03, "learning_rate": 4.995274190529665e-05, "loss": 0.0572, "step": 202 }, { "epoch": 0.03, "learning_rate": 4.9952274003368896e-05, "loss": 0.0558, "step": 204 }, { "epoch": 0.03, "learning_rate": 4.995180610144114e-05, "loss": 0.0597, "step": 206 }, { "epoch": 0.03, "learning_rate": 4.995133819951339e-05, "loss": 0.0753, "step": 208 }, { "epoch": 0.03, "learning_rate": 4.995087029758563e-05, "loss": 0.0674, "step": 210 }, { "epoch": 0.03, "learning_rate": 4.995040239565787e-05, "loss": 0.0579, "step": 212 }, { "epoch": 0.03, "learning_rate": 4.994993449373012e-05, "loss": 0.0707, "step": 214 }, { "epoch": 0.03, "learning_rate": 4.9949466591802364e-05, "loss": 0.0689, "step": 216 }, { "epoch": 0.03, "learning_rate": 4.9948998689874604e-05, "loss": 0.052, "step": 218 }, { "epoch": 0.03, "learning_rate": 4.994853078794685e-05, "loss": 0.0635, "step": 220 }, { "epoch": 0.03, "learning_rate": 4.9948062886019095e-05, "loss": 0.0776, "step": 222 }, { "epoch": 0.03, "learning_rate": 4.994759498409134e-05, "loss": 0.0745, "step": 224 }, { "epoch": 0.03, "learning_rate": 4.994712708216358e-05, "loss": 0.0715, "step": 226 }, { "epoch": 0.03, "learning_rate": 4.9946659180235826e-05, "loss": 0.0822, "step": 228 }, { "epoch": 0.03, "learning_rate": 4.9946191278308065e-05, "loss": 0.0503, "step": 230 }, { "epoch": 0.03, "learning_rate": 4.994572337638032e-05, "loss": 0.0659, "step": 232 }, { "epoch": 0.03, "learning_rate": 4.994525547445256e-05, "loss": 0.0601, "step": 234 }, { "epoch": 0.03, "learning_rate": 4.99447875725248e-05, "loss": 0.0695, "step": 236 }, { "epoch": 0.03, "learning_rate": 4.994431967059704e-05, "loss": 0.0656, "step": 238 }, { "epoch": 0.03, "learning_rate": 4.9943851768669295e-05, "loss": 0.0464, "step": 240 }, { "epoch": 0.03, "learning_rate": 4.9943383866741534e-05, "loss": 0.0717, "step": 242 }, { "epoch": 0.03, "learning_rate": 4.994291596481378e-05, "loss": 0.0721, "step": 244 }, { "epoch": 0.03, "learning_rate": 4.994244806288602e-05, "loss": 0.0659, "step": 246 }, { "epoch": 0.03, "learning_rate": 4.9941980160958265e-05, "loss": 0.0624, "step": 248 }, { "epoch": 0.04, "learning_rate": 4.994151225903051e-05, "loss": 0.0821, "step": 250 }, { "epoch": 0.04, "learning_rate": 4.994104435710276e-05, "loss": 0.0659, "step": 252 }, { "epoch": 0.04, "learning_rate": 4.9940576455174996e-05, "loss": 0.0722, "step": 254 }, { "epoch": 0.04, "learning_rate": 4.994010855324724e-05, "loss": 0.0651, "step": 256 }, { "epoch": 0.04, "learning_rate": 4.993964065131949e-05, "loss": 0.063, "step": 258 }, { "epoch": 0.04, "learning_rate": 4.993917274939173e-05, "loss": 0.0858, "step": 260 }, { "epoch": 0.04, "learning_rate": 4.993870484746397e-05, "loss": 0.065, "step": 262 }, { "epoch": 0.04, "learning_rate": 4.993823694553621e-05, "loss": 0.0508, "step": 264 }, { "epoch": 0.04, "learning_rate": 4.9937769043608465e-05, "loss": 0.0733, "step": 266 }, { "epoch": 0.04, "learning_rate": 4.9937301141680704e-05, "loss": 0.0532, "step": 268 }, { "epoch": 0.04, "learning_rate": 4.993683323975295e-05, "loss": 0.0611, "step": 270 }, { "epoch": 0.04, "learning_rate": 4.993636533782519e-05, "loss": 0.0675, "step": 272 }, { "epoch": 0.04, "learning_rate": 4.993589743589744e-05, "loss": 0.0732, "step": 274 }, { "epoch": 0.04, "learning_rate": 4.993542953396968e-05, "loss": 0.0837, "step": 276 }, { "epoch": 0.04, "learning_rate": 4.993496163204193e-05, "loss": 0.0673, "step": 278 }, { "epoch": 0.04, "learning_rate": 4.9934493730114166e-05, "loss": 0.0694, "step": 280 }, { "epoch": 0.04, "learning_rate": 4.993402582818641e-05, "loss": 0.0588, "step": 282 }, { "epoch": 0.04, "learning_rate": 4.993355792625866e-05, "loss": 0.0712, "step": 284 }, { "epoch": 0.04, "learning_rate": 4.9933090024330904e-05, "loss": 0.0729, "step": 286 }, { "epoch": 0.04, "learning_rate": 4.993262212240314e-05, "loss": 0.0628, "step": 288 }, { "epoch": 0.04, "learning_rate": 4.993215422047539e-05, "loss": 0.0781, "step": 290 }, { "epoch": 0.04, "learning_rate": 4.9931686318547634e-05, "loss": 0.0719, "step": 292 }, { "epoch": 0.04, "learning_rate": 4.993121841661988e-05, "loss": 0.0735, "step": 294 }, { "epoch": 0.04, "learning_rate": 4.993075051469212e-05, "loss": 0.0831, "step": 296 }, { "epoch": 0.04, "learning_rate": 4.9930282612764365e-05, "loss": 0.0536, "step": 298 }, { "epoch": 0.04, "learning_rate": 4.992981471083661e-05, "loss": 0.0818, "step": 300 }, { "epoch": 0.04, "learning_rate": 4.992934680890886e-05, "loss": 0.0652, "step": 302 }, { "epoch": 0.04, "learning_rate": 4.9928878906981096e-05, "loss": 0.0744, "step": 304 }, { "epoch": 0.04, "learning_rate": 4.992841100505334e-05, "loss": 0.0641, "step": 306 }, { "epoch": 0.04, "learning_rate": 4.992794310312559e-05, "loss": 0.0605, "step": 308 }, { "epoch": 0.04, "learning_rate": 4.9927475201197834e-05, "loss": 0.0653, "step": 310 }, { "epoch": 0.04, "learning_rate": 4.992700729927007e-05, "loss": 0.0743, "step": 312 }, { "epoch": 0.04, "learning_rate": 4.992653939734232e-05, "loss": 0.0614, "step": 314 }, { "epoch": 0.04, "learning_rate": 4.992607149541456e-05, "loss": 0.0706, "step": 316 }, { "epoch": 0.04, "learning_rate": 4.992560359348681e-05, "loss": 0.0712, "step": 318 }, { "epoch": 0.04, "learning_rate": 4.992513569155905e-05, "loss": 0.0667, "step": 320 }, { "epoch": 0.05, "learning_rate": 4.9924667789631296e-05, "loss": 0.0829, "step": 322 }, { "epoch": 0.05, "learning_rate": 4.9924199887703535e-05, "loss": 0.0632, "step": 324 }, { "epoch": 0.05, "learning_rate": 4.992373198577579e-05, "loss": 0.0668, "step": 326 }, { "epoch": 0.05, "learning_rate": 4.992326408384803e-05, "loss": 0.074, "step": 328 }, { "epoch": 0.05, "learning_rate": 4.992279618192027e-05, "loss": 0.0648, "step": 330 }, { "epoch": 0.05, "learning_rate": 4.992232827999251e-05, "loss": 0.0703, "step": 332 }, { "epoch": 0.05, "learning_rate": 4.992186037806476e-05, "loss": 0.0689, "step": 334 }, { "epoch": 0.05, "learning_rate": 4.9921392476137004e-05, "loss": 0.0587, "step": 336 }, { "epoch": 0.05, "learning_rate": 4.992092457420925e-05, "loss": 0.0486, "step": 338 }, { "epoch": 0.05, "learning_rate": 4.992045667228149e-05, "loss": 0.0688, "step": 340 }, { "epoch": 0.05, "learning_rate": 4.9919988770353735e-05, "loss": 0.0746, "step": 342 }, { "epoch": 0.05, "learning_rate": 4.991952086842598e-05, "loss": 0.064, "step": 344 }, { "epoch": 0.05, "learning_rate": 4.991905296649823e-05, "loss": 0.0703, "step": 346 }, { "epoch": 0.05, "learning_rate": 4.9918585064570466e-05, "loss": 0.0582, "step": 348 }, { "epoch": 0.05, "learning_rate": 4.991811716264271e-05, "loss": 0.0926, "step": 350 }, { "epoch": 0.05, "learning_rate": 4.991764926071496e-05, "loss": 0.073, "step": 352 }, { "epoch": 0.05, "learning_rate": 4.9917181358787204e-05, "loss": 0.0732, "step": 354 }, { "epoch": 0.05, "learning_rate": 4.991671345685944e-05, "loss": 0.0916, "step": 356 }, { "epoch": 0.05, "learning_rate": 4.991624555493169e-05, "loss": 0.055, "step": 358 }, { "epoch": 0.05, "learning_rate": 4.9915777653003934e-05, "loss": 0.0583, "step": 360 }, { "epoch": 0.05, "learning_rate": 4.991530975107618e-05, "loss": 0.0512, "step": 362 }, { "epoch": 0.05, "learning_rate": 4.991484184914842e-05, "loss": 0.0686, "step": 364 }, { "epoch": 0.05, "learning_rate": 4.9914373947220665e-05, "loss": 0.0654, "step": 366 }, { "epoch": 0.05, "learning_rate": 4.9913906045292905e-05, "loss": 0.0615, "step": 368 }, { "epoch": 0.05, "learning_rate": 4.991343814336516e-05, "loss": 0.0862, "step": 370 }, { "epoch": 0.05, "learning_rate": 4.9912970241437396e-05, "loss": 0.0641, "step": 372 }, { "epoch": 0.05, "learning_rate": 4.991250233950964e-05, "loss": 0.0601, "step": 374 }, { "epoch": 0.05, "learning_rate": 4.991203443758188e-05, "loss": 0.0584, "step": 376 }, { "epoch": 0.05, "learning_rate": 4.991156653565413e-05, "loss": 0.0663, "step": 378 }, { "epoch": 0.05, "learning_rate": 4.991109863372637e-05, "loss": 0.0858, "step": 380 }, { "epoch": 0.05, "learning_rate": 4.991063073179862e-05, "loss": 0.0638, "step": 382 }, { "epoch": 0.05, "learning_rate": 4.991016282987086e-05, "loss": 0.0453, "step": 384 }, { "epoch": 0.05, "learning_rate": 4.9909694927943104e-05, "loss": 0.0616, "step": 386 }, { "epoch": 0.05, "learning_rate": 4.990922702601535e-05, "loss": 0.0885, "step": 388 }, { "epoch": 0.05, "learning_rate": 4.9908759124087596e-05, "loss": 0.078, "step": 390 }, { "epoch": 0.06, "learning_rate": 4.9908291222159835e-05, "loss": 0.0484, "step": 392 }, { "epoch": 0.06, "learning_rate": 4.990782332023208e-05, "loss": 0.0595, "step": 394 }, { "epoch": 0.06, "learning_rate": 4.990735541830433e-05, "loss": 0.0726, "step": 396 }, { "epoch": 0.06, "learning_rate": 4.990688751637657e-05, "loss": 0.0838, "step": 398 }, { "epoch": 0.06, "learning_rate": 4.990641961444881e-05, "loss": 0.0652, "step": 400 }, { "epoch": 0.06, "learning_rate": 4.990595171252106e-05, "loss": 0.0673, "step": 402 }, { "epoch": 0.06, "learning_rate": 4.9905483810593304e-05, "loss": 0.055, "step": 404 }, { "epoch": 0.06, "learning_rate": 4.990501590866555e-05, "loss": 0.0759, "step": 406 }, { "epoch": 0.06, "learning_rate": 4.990454800673779e-05, "loss": 0.0616, "step": 408 }, { "epoch": 0.06, "learning_rate": 4.9904080104810035e-05, "loss": 0.0933, "step": 410 }, { "epoch": 0.06, "learning_rate": 4.9903612202882274e-05, "loss": 0.0647, "step": 412 }, { "epoch": 0.06, "learning_rate": 4.990314430095453e-05, "loss": 0.0527, "step": 414 }, { "epoch": 0.06, "learning_rate": 4.9902676399026766e-05, "loss": 0.0578, "step": 416 }, { "epoch": 0.06, "learning_rate": 4.990220849709901e-05, "loss": 0.0808, "step": 418 }, { "epoch": 0.06, "learning_rate": 4.990174059517125e-05, "loss": 0.0793, "step": 420 }, { "epoch": 0.06, "learning_rate": 4.9901272693243504e-05, "loss": 0.0648, "step": 422 }, { "epoch": 0.06, "learning_rate": 4.990080479131574e-05, "loss": 0.0716, "step": 424 }, { "epoch": 0.06, "learning_rate": 4.990033688938799e-05, "loss": 0.0825, "step": 426 }, { "epoch": 0.06, "learning_rate": 4.989986898746023e-05, "loss": 0.0908, "step": 428 }, { "epoch": 0.06, "learning_rate": 4.9899401085532474e-05, "loss": 0.0688, "step": 430 }, { "epoch": 0.06, "learning_rate": 4.989893318360472e-05, "loss": 0.0669, "step": 432 }, { "epoch": 0.06, "learning_rate": 4.9898465281676965e-05, "loss": 0.0735, "step": 434 }, { "epoch": 0.06, "learning_rate": 4.9897997379749205e-05, "loss": 0.0817, "step": 436 }, { "epoch": 0.06, "learning_rate": 4.989752947782145e-05, "loss": 0.0792, "step": 438 }, { "epoch": 0.06, "learning_rate": 4.9897061575893696e-05, "loss": 0.0509, "step": 440 }, { "epoch": 0.06, "learning_rate": 4.989659367396594e-05, "loss": 0.0809, "step": 442 }, { "epoch": 0.06, "learning_rate": 4.989612577203818e-05, "loss": 0.0654, "step": 444 }, { "epoch": 0.06, "learning_rate": 4.989565787011043e-05, "loss": 0.0608, "step": 446 }, { "epoch": 0.06, "learning_rate": 4.989518996818267e-05, "loss": 0.0928, "step": 448 }, { "epoch": 0.06, "learning_rate": 4.989472206625492e-05, "loss": 0.0554, "step": 450 }, { "epoch": 0.06, "learning_rate": 4.989425416432716e-05, "loss": 0.0559, "step": 452 }, { "epoch": 0.06, "learning_rate": 4.9893786262399404e-05, "loss": 0.0871, "step": 454 }, { "epoch": 0.06, "learning_rate": 4.989331836047165e-05, "loss": 0.0724, "step": 456 }, { "epoch": 0.06, "learning_rate": 4.9892850458543896e-05, "loss": 0.0672, "step": 458 }, { "epoch": 0.06, "learning_rate": 4.9892382556616135e-05, "loss": 0.0602, "step": 460 }, { "epoch": 0.06, "learning_rate": 4.989191465468838e-05, "loss": 0.0616, "step": 462 }, { "epoch": 0.07, "learning_rate": 4.989144675276062e-05, "loss": 0.0732, "step": 464 }, { "epoch": 0.07, "learning_rate": 4.989097885083287e-05, "loss": 0.0668, "step": 466 }, { "epoch": 0.07, "learning_rate": 4.989051094890511e-05, "loss": 0.0848, "step": 468 }, { "epoch": 0.07, "learning_rate": 4.989004304697736e-05, "loss": 0.0656, "step": 470 }, { "epoch": 0.07, "learning_rate": 4.98895751450496e-05, "loss": 0.0609, "step": 472 }, { "epoch": 0.07, "learning_rate": 4.988910724312185e-05, "loss": 0.067, "step": 474 }, { "epoch": 0.07, "learning_rate": 4.988863934119409e-05, "loss": 0.0814, "step": 476 }, { "epoch": 0.07, "learning_rate": 4.9888171439266335e-05, "loss": 0.0736, "step": 478 }, { "epoch": 0.07, "learning_rate": 4.9887703537338574e-05, "loss": 0.0638, "step": 480 }, { "epoch": 0.07, "learning_rate": 4.988723563541082e-05, "loss": 0.0587, "step": 482 }, { "epoch": 0.07, "learning_rate": 4.9886767733483066e-05, "loss": 0.056, "step": 484 }, { "epoch": 0.07, "learning_rate": 4.988629983155531e-05, "loss": 0.0593, "step": 486 }, { "epoch": 0.07, "learning_rate": 4.988583192962755e-05, "loss": 0.0738, "step": 488 }, { "epoch": 0.07, "learning_rate": 4.98853640276998e-05, "loss": 0.0849, "step": 490 }, { "epoch": 0.07, "learning_rate": 4.988489612577204e-05, "loss": 0.0673, "step": 492 }, { "epoch": 0.07, "learning_rate": 4.988442822384429e-05, "loss": 0.0864, "step": 494 }, { "epoch": 0.07, "learning_rate": 4.988396032191653e-05, "loss": 0.0738, "step": 496 }, { "epoch": 0.07, "learning_rate": 4.9883492419988774e-05, "loss": 0.0748, "step": 498 }, { "epoch": 0.07, "learning_rate": 4.988302451806102e-05, "loss": 0.0529, "step": 500 }, { "epoch": 0.07, "learning_rate": 4.9882556616133265e-05, "loss": 0.0544, "step": 502 }, { "epoch": 0.07, "learning_rate": 4.9882088714205505e-05, "loss": 0.077, "step": 504 }, { "epoch": 0.07, "learning_rate": 4.988162081227775e-05, "loss": 0.0706, "step": 506 }, { "epoch": 0.07, "learning_rate": 4.988115291034999e-05, "loss": 0.0729, "step": 508 }, { "epoch": 0.07, "learning_rate": 4.9880685008422236e-05, "loss": 0.0812, "step": 510 }, { "epoch": 0.07, "learning_rate": 4.988021710649448e-05, "loss": 0.067, "step": 512 }, { "epoch": 0.07, "learning_rate": 4.987974920456672e-05, "loss": 0.0565, "step": 514 }, { "epoch": 0.07, "learning_rate": 4.9879281302638967e-05, "loss": 0.0597, "step": 516 }, { "epoch": 0.07, "learning_rate": 4.987881340071121e-05, "loss": 0.095, "step": 518 }, { "epoch": 0.07, "learning_rate": 4.987834549878346e-05, "loss": 0.0729, "step": 520 }, { "epoch": 0.07, "learning_rate": 4.98778775968557e-05, "loss": 0.0743, "step": 522 }, { "epoch": 0.07, "learning_rate": 4.9877409694927943e-05, "loss": 0.0656, "step": 524 }, { "epoch": 0.07, "learning_rate": 4.987694179300019e-05, "loss": 0.0656, "step": 526 }, { "epoch": 0.07, "learning_rate": 4.9876473891072435e-05, "loss": 0.0761, "step": 528 }, { "epoch": 0.07, "learning_rate": 4.9876005989144674e-05, "loss": 0.0566, "step": 530 }, { "epoch": 0.07, "learning_rate": 4.987553808721692e-05, "loss": 0.0812, "step": 532 }, { "epoch": 0.07, "learning_rate": 4.9875070185289166e-05, "loss": 0.0558, "step": 534 }, { "epoch": 0.08, "learning_rate": 4.987460228336141e-05, "loss": 0.08, "step": 536 }, { "epoch": 0.08, "learning_rate": 4.987413438143365e-05, "loss": 0.0568, "step": 538 }, { "epoch": 0.08, "learning_rate": 4.98736664795059e-05, "loss": 0.059, "step": 540 }, { "epoch": 0.08, "learning_rate": 4.9873198577578136e-05, "loss": 0.0801, "step": 542 }, { "epoch": 0.08, "learning_rate": 4.987273067565039e-05, "loss": 0.0843, "step": 544 }, { "epoch": 0.08, "learning_rate": 4.987226277372263e-05, "loss": 0.0663, "step": 546 }, { "epoch": 0.08, "learning_rate": 4.9871794871794874e-05, "loss": 0.0791, "step": 548 }, { "epoch": 0.08, "learning_rate": 4.987132696986711e-05, "loss": 0.0614, "step": 550 }, { "epoch": 0.08, "learning_rate": 4.9870859067939366e-05, "loss": 0.0644, "step": 552 }, { "epoch": 0.08, "learning_rate": 4.9870391166011605e-05, "loss": 0.0819, "step": 554 }, { "epoch": 0.08, "learning_rate": 4.986992326408385e-05, "loss": 0.0612, "step": 556 }, { "epoch": 0.08, "learning_rate": 4.986945536215609e-05, "loss": 0.0671, "step": 558 }, { "epoch": 0.08, "learning_rate": 4.9868987460228336e-05, "loss": 0.0697, "step": 560 }, { "epoch": 0.08, "learning_rate": 4.986851955830058e-05, "loss": 0.0684, "step": 562 }, { "epoch": 0.08, "learning_rate": 4.986805165637283e-05, "loss": 0.0503, "step": 564 }, { "epoch": 0.08, "learning_rate": 4.986758375444507e-05, "loss": 0.1045, "step": 566 }, { "epoch": 0.08, "learning_rate": 4.986711585251731e-05, "loss": 0.0755, "step": 568 }, { "epoch": 0.08, "learning_rate": 4.986664795058956e-05, "loss": 0.0808, "step": 570 }, { "epoch": 0.08, "learning_rate": 4.9866180048661805e-05, "loss": 0.0615, "step": 572 }, { "epoch": 0.08, "learning_rate": 4.9865712146734044e-05, "loss": 0.0703, "step": 574 }, { "epoch": 0.08, "learning_rate": 4.986524424480629e-05, "loss": 0.0816, "step": 576 }, { "epoch": 0.08, "learning_rate": 4.9864776342878536e-05, "loss": 0.1, "step": 578 }, { "epoch": 0.08, "learning_rate": 4.986430844095078e-05, "loss": 0.0772, "step": 580 }, { "epoch": 0.08, "learning_rate": 4.986384053902302e-05, "loss": 0.0676, "step": 582 }, { "epoch": 0.08, "learning_rate": 4.9863372637095267e-05, "loss": 0.065, "step": 584 }, { "epoch": 0.08, "learning_rate": 4.986290473516751e-05, "loss": 0.0518, "step": 586 }, { "epoch": 0.08, "learning_rate": 4.986243683323976e-05, "loss": 0.0837, "step": 588 }, { "epoch": 0.08, "learning_rate": 4.9861968931312e-05, "loss": 0.0935, "step": 590 }, { "epoch": 0.08, "learning_rate": 4.9861501029384243e-05, "loss": 0.0638, "step": 592 }, { "epoch": 0.08, "learning_rate": 4.986103312745648e-05, "loss": 0.0594, "step": 594 }, { "epoch": 0.08, "learning_rate": 4.9860565225528735e-05, "loss": 0.0801, "step": 596 }, { "epoch": 0.08, "learning_rate": 4.9860097323600974e-05, "loss": 0.083, "step": 598 }, { "epoch": 0.08, "learning_rate": 4.985962942167322e-05, "loss": 0.0821, "step": 600 }, { "epoch": 0.08, "learning_rate": 4.985916151974546e-05, "loss": 0.0725, "step": 602 }, { "epoch": 0.08, "learning_rate": 4.985869361781771e-05, "loss": 0.0954, "step": 604 }, { "epoch": 0.09, "learning_rate": 4.985822571588995e-05, "loss": 0.0728, "step": 606 }, { "epoch": 0.09, "learning_rate": 4.98577578139622e-05, "loss": 0.0852, "step": 608 }, { "epoch": 0.09, "learning_rate": 4.9857289912034436e-05, "loss": 0.0623, "step": 610 }, { "epoch": 0.09, "learning_rate": 4.985682201010668e-05, "loss": 0.0678, "step": 612 }, { "epoch": 0.09, "learning_rate": 4.985635410817893e-05, "loss": 0.0754, "step": 614 }, { "epoch": 0.09, "learning_rate": 4.9855886206251174e-05, "loss": 0.0643, "step": 616 }, { "epoch": 0.09, "learning_rate": 4.985541830432341e-05, "loss": 0.0607, "step": 618 }, { "epoch": 0.09, "learning_rate": 4.985495040239566e-05, "loss": 0.0761, "step": 620 }, { "epoch": 0.09, "learning_rate": 4.9854482500467905e-05, "loss": 0.0524, "step": 622 }, { "epoch": 0.09, "learning_rate": 4.985401459854015e-05, "loss": 0.0739, "step": 624 }, { "epoch": 0.09, "learning_rate": 4.985354669661239e-05, "loss": 0.0671, "step": 626 }, { "epoch": 0.09, "learning_rate": 4.9853078794684636e-05, "loss": 0.1179, "step": 628 }, { "epoch": 0.09, "learning_rate": 4.985261089275688e-05, "loss": 0.0855, "step": 630 }, { "epoch": 0.09, "learning_rate": 4.985214299082913e-05, "loss": 0.0785, "step": 632 }, { "epoch": 0.09, "learning_rate": 4.985167508890137e-05, "loss": 0.085, "step": 634 }, { "epoch": 0.09, "learning_rate": 4.985120718697361e-05, "loss": 0.0555, "step": 636 }, { "epoch": 0.09, "learning_rate": 4.985073928504586e-05, "loss": 0.0648, "step": 638 }, { "epoch": 0.09, "learning_rate": 4.9850271383118105e-05, "loss": 0.0983, "step": 640 }, { "epoch": 0.09, "learning_rate": 4.9849803481190344e-05, "loss": 0.065, "step": 642 }, { "epoch": 0.09, "learning_rate": 4.984933557926259e-05, "loss": 0.0807, "step": 644 }, { "epoch": 0.09, "learning_rate": 4.984886767733483e-05, "loss": 0.081, "step": 646 }, { "epoch": 0.09, "learning_rate": 4.984839977540708e-05, "loss": 0.067, "step": 648 }, { "epoch": 0.09, "learning_rate": 4.984793187347932e-05, "loss": 0.0582, "step": 650 }, { "epoch": 0.09, "learning_rate": 4.9847463971551567e-05, "loss": 0.061, "step": 652 }, { "epoch": 0.09, "learning_rate": 4.9846996069623806e-05, "loss": 0.0615, "step": 654 }, { "epoch": 0.09, "learning_rate": 4.984652816769605e-05, "loss": 0.0744, "step": 656 }, { "epoch": 0.09, "learning_rate": 4.98460602657683e-05, "loss": 0.0589, "step": 658 }, { "epoch": 0.09, "learning_rate": 4.9845592363840543e-05, "loss": 0.0687, "step": 660 }, { "epoch": 0.09, "learning_rate": 4.984512446191278e-05, "loss": 0.0741, "step": 662 }, { "epoch": 0.09, "learning_rate": 4.984465655998503e-05, "loss": 0.0739, "step": 664 }, { "epoch": 0.09, "learning_rate": 4.9844188658057274e-05, "loss": 0.0726, "step": 666 }, { "epoch": 0.09, "learning_rate": 4.984372075612952e-05, "loss": 0.0584, "step": 668 }, { "epoch": 0.09, "learning_rate": 4.984325285420176e-05, "loss": 0.0637, "step": 670 }, { "epoch": 0.09, "learning_rate": 4.9842784952274005e-05, "loss": 0.0623, "step": 672 }, { "epoch": 0.09, "learning_rate": 4.984231705034625e-05, "loss": 0.0817, "step": 674 }, { "epoch": 0.09, "learning_rate": 4.98418491484185e-05, "loss": 0.0579, "step": 676 }, { "epoch": 0.1, "learning_rate": 4.9841381246490736e-05, "loss": 0.0798, "step": 678 }, { "epoch": 0.1, "learning_rate": 4.984091334456298e-05, "loss": 0.08, "step": 680 }, { "epoch": 0.1, "learning_rate": 4.984044544263523e-05, "loss": 0.0769, "step": 682 }, { "epoch": 0.1, "learning_rate": 4.9839977540707474e-05, "loss": 0.0791, "step": 684 }, { "epoch": 0.1, "learning_rate": 4.983950963877971e-05, "loss": 0.0704, "step": 686 }, { "epoch": 0.1, "learning_rate": 4.983904173685196e-05, "loss": 0.0652, "step": 688 }, { "epoch": 0.1, "learning_rate": 4.98385738349242e-05, "loss": 0.0667, "step": 690 }, { "epoch": 0.1, "learning_rate": 4.983810593299645e-05, "loss": 0.1066, "step": 692 }, { "epoch": 0.1, "learning_rate": 4.983763803106869e-05, "loss": 0.0738, "step": 694 }, { "epoch": 0.1, "learning_rate": 4.9837170129140936e-05, "loss": 0.0779, "step": 696 }, { "epoch": 0.1, "learning_rate": 4.9836702227213175e-05, "loss": 0.0779, "step": 698 }, { "epoch": 0.1, "learning_rate": 4.983623432528543e-05, "loss": 0.0608, "step": 700 }, { "epoch": 0.1, "learning_rate": 4.983576642335767e-05, "loss": 0.082, "step": 702 }, { "epoch": 0.1, "learning_rate": 4.983529852142991e-05, "loss": 0.0693, "step": 704 }, { "epoch": 0.1, "learning_rate": 4.983483061950215e-05, "loss": 0.0889, "step": 706 }, { "epoch": 0.1, "learning_rate": 4.98343627175744e-05, "loss": 0.0858, "step": 708 }, { "epoch": 0.1, "learning_rate": 4.9833894815646644e-05, "loss": 0.0742, "step": 710 }, { "epoch": 0.1, "learning_rate": 4.983342691371889e-05, "loss": 0.0764, "step": 712 }, { "epoch": 0.1, "learning_rate": 4.983295901179113e-05, "loss": 0.0766, "step": 714 }, { "epoch": 0.1, "learning_rate": 4.9832491109863375e-05, "loss": 0.0643, "step": 716 }, { "epoch": 0.1, "learning_rate": 4.983202320793562e-05, "loss": 0.0594, "step": 718 }, { "epoch": 0.1, "learning_rate": 4.9831555306007867e-05, "loss": 0.0705, "step": 720 }, { "epoch": 0.1, "learning_rate": 4.9831087404080106e-05, "loss": 0.0614, "step": 722 }, { "epoch": 0.1, "learning_rate": 4.983061950215235e-05, "loss": 0.0734, "step": 724 }, { "epoch": 0.1, "learning_rate": 4.98301516002246e-05, "loss": 0.0642, "step": 726 }, { "epoch": 0.1, "learning_rate": 4.9829683698296843e-05, "loss": 0.0855, "step": 728 }, { "epoch": 0.1, "learning_rate": 4.982921579636908e-05, "loss": 0.0674, "step": 730 }, { "epoch": 0.1, "learning_rate": 4.982874789444133e-05, "loss": 0.0818, "step": 732 }, { "epoch": 0.1, "learning_rate": 4.9828279992513574e-05, "loss": 0.0786, "step": 734 }, { "epoch": 0.1, "learning_rate": 4.982781209058582e-05, "loss": 0.0543, "step": 736 }, { "epoch": 0.1, "learning_rate": 4.982734418865806e-05, "loss": 0.0741, "step": 738 }, { "epoch": 0.1, "learning_rate": 4.9826876286730305e-05, "loss": 0.0657, "step": 740 }, { "epoch": 0.1, "learning_rate": 4.9826408384802545e-05, "loss": 0.0634, "step": 742 }, { "epoch": 0.1, "learning_rate": 4.98259404828748e-05, "loss": 0.0747, "step": 744 }, { "epoch": 0.1, "learning_rate": 4.9825472580947036e-05, "loss": 0.0765, "step": 746 }, { "epoch": 0.1, "learning_rate": 4.982500467901928e-05, "loss": 0.0785, "step": 748 }, { "epoch": 0.11, "learning_rate": 4.982453677709152e-05, "loss": 0.0719, "step": 750 }, { "epoch": 0.11, "learning_rate": 4.9824068875163774e-05, "loss": 0.0752, "step": 752 }, { "epoch": 0.11, "learning_rate": 4.982360097323601e-05, "loss": 0.0667, "step": 754 }, { "epoch": 0.11, "learning_rate": 4.982313307130826e-05, "loss": 0.0795, "step": 756 }, { "epoch": 0.11, "learning_rate": 4.98226651693805e-05, "loss": 0.0718, "step": 758 }, { "epoch": 0.11, "learning_rate": 4.9822197267452744e-05, "loss": 0.0618, "step": 760 }, { "epoch": 0.11, "learning_rate": 4.982172936552499e-05, "loss": 0.0672, "step": 762 }, { "epoch": 0.11, "learning_rate": 4.982126146359723e-05, "loss": 0.0586, "step": 764 }, { "epoch": 0.11, "learning_rate": 4.9820793561669475e-05, "loss": 0.0782, "step": 766 }, { "epoch": 0.11, "learning_rate": 4.982032565974172e-05, "loss": 0.086, "step": 768 }, { "epoch": 0.11, "learning_rate": 4.981985775781397e-05, "loss": 0.0726, "step": 770 }, { "epoch": 0.11, "learning_rate": 4.9819389855886206e-05, "loss": 0.0705, "step": 772 }, { "epoch": 0.11, "learning_rate": 4.981892195395845e-05, "loss": 0.0829, "step": 774 }, { "epoch": 0.11, "learning_rate": 4.981845405203069e-05, "loss": 0.0539, "step": 776 }, { "epoch": 0.11, "learning_rate": 4.9817986150102944e-05, "loss": 0.0653, "step": 778 }, { "epoch": 0.11, "learning_rate": 4.981751824817518e-05, "loss": 0.0795, "step": 780 }, { "epoch": 0.11, "learning_rate": 4.981705034624743e-05, "loss": 0.0688, "step": 782 }, { "epoch": 0.11, "learning_rate": 4.981658244431967e-05, "loss": 0.0733, "step": 784 }, { "epoch": 0.11, "learning_rate": 4.981611454239192e-05, "loss": 0.079, "step": 786 }, { "epoch": 0.11, "learning_rate": 4.981564664046416e-05, "loss": 0.0719, "step": 788 }, { "epoch": 0.11, "learning_rate": 4.9815178738536406e-05, "loss": 0.0719, "step": 790 }, { "epoch": 0.11, "learning_rate": 4.9814710836608645e-05, "loss": 0.0795, "step": 792 }, { "epoch": 0.11, "learning_rate": 4.981424293468089e-05, "loss": 0.0839, "step": 794 }, { "epoch": 0.11, "learning_rate": 4.981377503275314e-05, "loss": 0.0543, "step": 796 }, { "epoch": 0.11, "learning_rate": 4.981330713082538e-05, "loss": 0.0901, "step": 798 }, { "epoch": 0.11, "learning_rate": 4.981283922889762e-05, "loss": 0.085, "step": 800 }, { "epoch": 0.11, "learning_rate": 4.981237132696987e-05, "loss": 0.0841, "step": 802 }, { "epoch": 0.11, "learning_rate": 4.9811903425042114e-05, "loss": 0.0873, "step": 804 }, { "epoch": 0.11, "learning_rate": 4.981143552311436e-05, "loss": 0.0738, "step": 806 }, { "epoch": 0.11, "learning_rate": 4.98109676211866e-05, "loss": 0.0911, "step": 808 }, { "epoch": 0.11, "learning_rate": 4.9810499719258845e-05, "loss": 0.0635, "step": 810 }, { "epoch": 0.11, "learning_rate": 4.981003181733109e-05, "loss": 0.0814, "step": 812 }, { "epoch": 0.11, "learning_rate": 4.9809563915403336e-05, "loss": 0.0875, "step": 814 }, { "epoch": 0.11, "learning_rate": 4.9809096013475575e-05, "loss": 0.0586, "step": 816 }, { "epoch": 0.11, "learning_rate": 4.980862811154782e-05, "loss": 0.076, "step": 818 }, { "epoch": 0.12, "learning_rate": 4.980816020962006e-05, "loss": 0.0733, "step": 820 }, { "epoch": 0.12, "learning_rate": 4.980769230769231e-05, "loss": 0.0748, "step": 822 }, { "epoch": 0.12, "learning_rate": 4.980722440576455e-05, "loss": 0.0799, "step": 824 }, { "epoch": 0.12, "learning_rate": 4.98067565038368e-05, "loss": 0.0687, "step": 826 }, { "epoch": 0.12, "learning_rate": 4.980628860190904e-05, "loss": 0.0638, "step": 828 }, { "epoch": 0.12, "learning_rate": 4.980582069998129e-05, "loss": 0.0611, "step": 830 }, { "epoch": 0.12, "learning_rate": 4.980535279805353e-05, "loss": 0.0895, "step": 832 }, { "epoch": 0.12, "learning_rate": 4.9804884896125775e-05, "loss": 0.0886, "step": 834 }, { "epoch": 0.12, "learning_rate": 4.9804416994198014e-05, "loss": 0.0741, "step": 836 }, { "epoch": 0.12, "learning_rate": 4.980394909227026e-05, "loss": 0.0634, "step": 838 }, { "epoch": 0.12, "learning_rate": 4.9803481190342506e-05, "loss": 0.0722, "step": 840 }, { "epoch": 0.12, "learning_rate": 4.980301328841475e-05, "loss": 0.079, "step": 842 }, { "epoch": 0.12, "learning_rate": 4.980254538648699e-05, "loss": 0.0603, "step": 844 }, { "epoch": 0.12, "learning_rate": 4.980207748455924e-05, "loss": 0.0672, "step": 846 }, { "epoch": 0.12, "learning_rate": 4.980160958263148e-05, "loss": 0.0855, "step": 848 }, { "epoch": 0.12, "learning_rate": 4.980114168070373e-05, "loss": 0.0704, "step": 850 }, { "epoch": 0.12, "learning_rate": 4.980067377877597e-05, "loss": 0.0627, "step": 852 }, { "epoch": 0.12, "learning_rate": 4.9800205876848214e-05, "loss": 0.0737, "step": 854 }, { "epoch": 0.12, "learning_rate": 4.979973797492046e-05, "loss": 0.064, "step": 856 }, { "epoch": 0.12, "learning_rate": 4.9799270072992706e-05, "loss": 0.0789, "step": 858 }, { "epoch": 0.12, "learning_rate": 4.9798802171064945e-05, "loss": 0.0821, "step": 860 }, { "epoch": 0.12, "learning_rate": 4.979833426913719e-05, "loss": 0.0795, "step": 862 }, { "epoch": 0.12, "learning_rate": 4.979786636720944e-05, "loss": 0.0738, "step": 864 }, { "epoch": 0.12, "learning_rate": 4.979739846528168e-05, "loss": 0.0792, "step": 866 }, { "epoch": 0.12, "learning_rate": 4.979693056335392e-05, "loss": 0.0799, "step": 868 }, { "epoch": 0.12, "learning_rate": 4.979646266142617e-05, "loss": 0.0891, "step": 870 }, { "epoch": 0.12, "learning_rate": 4.979599475949841e-05, "loss": 0.076, "step": 872 }, { "epoch": 0.12, "learning_rate": 4.979552685757066e-05, "loss": 0.084, "step": 874 }, { "epoch": 0.12, "learning_rate": 4.97950589556429e-05, "loss": 0.1022, "step": 876 }, { "epoch": 0.12, "learning_rate": 4.9794591053715145e-05, "loss": 0.0674, "step": 878 }, { "epoch": 0.12, "learning_rate": 4.9794123151787384e-05, "loss": 0.0637, "step": 880 }, { "epoch": 0.12, "learning_rate": 4.9793655249859636e-05, "loss": 0.0675, "step": 882 }, { "epoch": 0.12, "learning_rate": 4.9793187347931876e-05, "loss": 0.0714, "step": 884 }, { "epoch": 0.12, "learning_rate": 4.979271944600412e-05, "loss": 0.0624, "step": 886 }, { "epoch": 0.12, "learning_rate": 4.979225154407636e-05, "loss": 0.0751, "step": 888 }, { "epoch": 0.12, "learning_rate": 4.9791783642148606e-05, "loss": 0.0722, "step": 890 }, { "epoch": 0.13, "learning_rate": 4.979131574022085e-05, "loss": 0.0787, "step": 892 }, { "epoch": 0.13, "learning_rate": 4.97908478382931e-05, "loss": 0.0885, "step": 894 }, { "epoch": 0.13, "learning_rate": 4.979037993636534e-05, "loss": 0.084, "step": 896 }, { "epoch": 0.13, "learning_rate": 4.978991203443758e-05, "loss": 0.0701, "step": 898 }, { "epoch": 0.13, "learning_rate": 4.978944413250983e-05, "loss": 0.0832, "step": 900 }, { "epoch": 0.13, "learning_rate": 4.9788976230582075e-05, "loss": 0.0629, "step": 902 }, { "epoch": 0.13, "learning_rate": 4.9788508328654314e-05, "loss": 0.0877, "step": 904 }, { "epoch": 0.13, "learning_rate": 4.978804042672656e-05, "loss": 0.0544, "step": 906 }, { "epoch": 0.13, "learning_rate": 4.9787572524798806e-05, "loss": 0.0992, "step": 908 }, { "epoch": 0.13, "learning_rate": 4.978710462287105e-05, "loss": 0.0708, "step": 910 }, { "epoch": 0.13, "learning_rate": 4.978663672094329e-05, "loss": 0.1194, "step": 912 }, { "epoch": 0.13, "learning_rate": 4.978616881901554e-05, "loss": 0.0667, "step": 914 }, { "epoch": 0.13, "learning_rate": 4.978570091708778e-05, "loss": 0.0704, "step": 916 }, { "epoch": 0.13, "learning_rate": 4.978523301516003e-05, "loss": 0.0848, "step": 918 }, { "epoch": 0.13, "learning_rate": 4.978476511323227e-05, "loss": 0.0714, "step": 920 }, { "epoch": 0.13, "learning_rate": 4.9784297211304514e-05, "loss": 0.0827, "step": 922 }, { "epoch": 0.13, "learning_rate": 4.978382930937675e-05, "loss": 0.0753, "step": 924 }, { "epoch": 0.13, "learning_rate": 4.9783361407449006e-05, "loss": 0.0912, "step": 926 }, { "epoch": 0.13, "learning_rate": 4.9782893505521245e-05, "loss": 0.0688, "step": 928 }, { "epoch": 0.13, "learning_rate": 4.978242560359349e-05, "loss": 0.0669, "step": 930 }, { "epoch": 0.13, "learning_rate": 4.978195770166573e-05, "loss": 0.0779, "step": 932 }, { "epoch": 0.13, "learning_rate": 4.9781489799737976e-05, "loss": 0.0745, "step": 934 }, { "epoch": 0.13, "learning_rate": 4.978102189781022e-05, "loss": 0.0952, "step": 936 }, { "epoch": 0.13, "learning_rate": 4.978055399588247e-05, "loss": 0.0925, "step": 938 }, { "epoch": 0.13, "learning_rate": 4.978008609395471e-05, "loss": 0.0822, "step": 940 }, { "epoch": 0.13, "learning_rate": 4.977961819202695e-05, "loss": 0.0715, "step": 942 }, { "epoch": 0.13, "learning_rate": 4.97791502900992e-05, "loss": 0.0817, "step": 944 }, { "epoch": 0.13, "learning_rate": 4.9778682388171445e-05, "loss": 0.0675, "step": 946 }, { "epoch": 0.13, "learning_rate": 4.9778214486243684e-05, "loss": 0.0612, "step": 948 }, { "epoch": 0.13, "learning_rate": 4.977774658431593e-05, "loss": 0.0768, "step": 950 }, { "epoch": 0.13, "learning_rate": 4.9777278682388176e-05, "loss": 0.1107, "step": 952 }, { "epoch": 0.13, "learning_rate": 4.977681078046042e-05, "loss": 0.0712, "step": 954 }, { "epoch": 0.13, "learning_rate": 4.977634287853266e-05, "loss": 0.0712, "step": 956 }, { "epoch": 0.13, "learning_rate": 4.9775874976604906e-05, "loss": 0.0705, "step": 958 }, { "epoch": 0.13, "learning_rate": 4.977540707467715e-05, "loss": 0.0801, "step": 960 }, { "epoch": 0.14, "learning_rate": 4.97749391727494e-05, "loss": 0.0883, "step": 962 }, { "epoch": 0.14, "learning_rate": 4.977447127082164e-05, "loss": 0.0704, "step": 964 }, { "epoch": 0.14, "learning_rate": 4.977400336889388e-05, "loss": 0.0827, "step": 966 }, { "epoch": 0.14, "learning_rate": 4.977353546696612e-05, "loss": 0.0675, "step": 968 }, { "epoch": 0.14, "learning_rate": 4.9773067565038375e-05, "loss": 0.0628, "step": 970 }, { "epoch": 0.14, "learning_rate": 4.9772599663110614e-05, "loss": 0.0976, "step": 972 }, { "epoch": 0.14, "learning_rate": 4.977213176118286e-05, "loss": 0.0806, "step": 974 }, { "epoch": 0.14, "learning_rate": 4.97716638592551e-05, "loss": 0.0684, "step": 976 }, { "epoch": 0.14, "learning_rate": 4.977119595732735e-05, "loss": 0.069, "step": 978 }, { "epoch": 0.14, "learning_rate": 4.977072805539959e-05, "loss": 0.1089, "step": 980 }, { "epoch": 0.14, "learning_rate": 4.977026015347184e-05, "loss": 0.0738, "step": 982 }, { "epoch": 0.14, "learning_rate": 4.9769792251544076e-05, "loss": 0.0895, "step": 984 }, { "epoch": 0.14, "learning_rate": 4.976932434961632e-05, "loss": 0.0637, "step": 986 }, { "epoch": 0.14, "learning_rate": 4.976885644768857e-05, "loss": 0.0724, "step": 988 }, { "epoch": 0.14, "learning_rate": 4.9768388545760814e-05, "loss": 0.0606, "step": 990 }, { "epoch": 0.14, "learning_rate": 4.976792064383305e-05, "loss": 0.0679, "step": 992 }, { "epoch": 0.14, "learning_rate": 4.97674527419053e-05, "loss": 0.071, "step": 994 }, { "epoch": 0.14, "learning_rate": 4.9766984839977545e-05, "loss": 0.0872, "step": 996 }, { "epoch": 0.14, "learning_rate": 4.976651693804979e-05, "loss": 0.0745, "step": 998 }, { "epoch": 0.14, "learning_rate": 4.976604903612203e-05, "loss": 0.0872, "step": 1000 }, { "epoch": 0.14, "eval_gen_len": 30.8621, "eval_loss": 1.0442978143692017, "eval_meteor": 0.0398, "eval_runtime": 16.8946, "eval_samples_per_second": 3.433, "eval_steps_per_second": 0.474, "step": 1000 }, { "epoch": 0.14, "learning_rate": 4.9765581134194276e-05, "loss": 0.0728, "step": 1002 }, { "epoch": 0.14, "learning_rate": 4.976511323226652e-05, "loss": 0.0724, "step": 1004 }, { "epoch": 0.14, "learning_rate": 4.976464533033877e-05, "loss": 0.0668, "step": 1006 }, { "epoch": 0.14, "learning_rate": 4.976417742841101e-05, "loss": 0.0669, "step": 1008 }, { "epoch": 0.14, "learning_rate": 4.976370952648325e-05, "loss": 0.0745, "step": 1010 }, { "epoch": 0.14, "learning_rate": 4.97632416245555e-05, "loss": 0.0742, "step": 1012 }, { "epoch": 0.14, "learning_rate": 4.976277372262774e-05, "loss": 0.0789, "step": 1014 }, { "epoch": 0.14, "learning_rate": 4.9762305820699984e-05, "loss": 0.0911, "step": 1016 }, { "epoch": 0.14, "learning_rate": 4.976183791877222e-05, "loss": 0.0868, "step": 1018 }, { "epoch": 0.14, "learning_rate": 4.976137001684447e-05, "loss": 0.0685, "step": 1020 }, { "epoch": 0.14, "learning_rate": 4.9760902114916715e-05, "loss": 0.0819, "step": 1022 }, { "epoch": 0.14, "learning_rate": 4.976043421298896e-05, "loss": 0.0797, "step": 1024 }, { "epoch": 0.14, "learning_rate": 4.97599663110612e-05, "loss": 0.0794, "step": 1026 }, { "epoch": 0.14, "learning_rate": 4.9759498409133446e-05, "loss": 0.0893, "step": 1028 }, { "epoch": 0.14, "learning_rate": 4.975903050720569e-05, "loss": 0.1106, "step": 1030 }, { "epoch": 0.14, "learning_rate": 4.975856260527794e-05, "loss": 0.0689, "step": 1032 }, { "epoch": 0.15, "learning_rate": 4.9758094703350177e-05, "loss": 0.0797, "step": 1034 }, { "epoch": 0.15, "learning_rate": 4.975762680142242e-05, "loss": 0.0948, "step": 1036 }, { "epoch": 0.15, "learning_rate": 4.975715889949467e-05, "loss": 0.0694, "step": 1038 }, { "epoch": 0.15, "learning_rate": 4.9756690997566914e-05, "loss": 0.0811, "step": 1040 }, { "epoch": 0.15, "learning_rate": 4.9756223095639153e-05, "loss": 0.0649, "step": 1042 }, { "epoch": 0.15, "learning_rate": 4.97557551937114e-05, "loss": 0.0638, "step": 1044 }, { "epoch": 0.15, "learning_rate": 4.9755287291783645e-05, "loss": 0.0785, "step": 1046 }, { "epoch": 0.15, "learning_rate": 4.975481938985589e-05, "loss": 0.0799, "step": 1048 }, { "epoch": 0.15, "learning_rate": 4.975435148792813e-05, "loss": 0.0764, "step": 1050 }, { "epoch": 0.15, "learning_rate": 4.9753883586000376e-05, "loss": 0.0614, "step": 1052 }, { "epoch": 0.15, "learning_rate": 4.9753415684072615e-05, "loss": 0.096, "step": 1054 }, { "epoch": 0.15, "learning_rate": 4.975294778214487e-05, "loss": 0.0754, "step": 1056 }, { "epoch": 0.15, "learning_rate": 4.975247988021711e-05, "loss": 0.0629, "step": 1058 }, { "epoch": 0.15, "learning_rate": 4.975201197828935e-05, "loss": 0.0691, "step": 1060 }, { "epoch": 0.15, "learning_rate": 4.975154407636159e-05, "loss": 0.0723, "step": 1062 }, { "epoch": 0.15, "learning_rate": 4.9751076174433845e-05, "loss": 0.0716, "step": 1064 }, { "epoch": 0.15, "learning_rate": 4.9750608272506084e-05, "loss": 0.103, "step": 1066 }, { "epoch": 0.15, "learning_rate": 4.975014037057833e-05, "loss": 0.0746, "step": 1068 }, { "epoch": 0.15, "learning_rate": 4.974967246865057e-05, "loss": 0.0882, "step": 1070 }, { "epoch": 0.15, "learning_rate": 4.9749204566722815e-05, "loss": 0.0734, "step": 1072 }, { "epoch": 0.15, "learning_rate": 4.974873666479506e-05, "loss": 0.083, "step": 1074 }, { "epoch": 0.15, "learning_rate": 4.974826876286731e-05, "loss": 0.0744, "step": 1076 }, { "epoch": 0.15, "learning_rate": 4.9747800860939546e-05, "loss": 0.0658, "step": 1078 }, { "epoch": 0.15, "learning_rate": 4.974733295901179e-05, "loss": 0.0993, "step": 1080 }, { "epoch": 0.15, "learning_rate": 4.974686505708404e-05, "loss": 0.092, "step": 1082 }, { "epoch": 0.15, "learning_rate": 4.9746397155156284e-05, "loss": 0.0776, "step": 1084 }, { "epoch": 0.15, "learning_rate": 4.974592925322852e-05, "loss": 0.091, "step": 1086 }, { "epoch": 0.15, "learning_rate": 4.974546135130077e-05, "loss": 0.0613, "step": 1088 }, { "epoch": 0.15, "learning_rate": 4.9744993449373015e-05, "loss": 0.0721, "step": 1090 }, { "epoch": 0.15, "learning_rate": 4.974452554744526e-05, "loss": 0.0725, "step": 1092 }, { "epoch": 0.15, "learning_rate": 4.97440576455175e-05, "loss": 0.0686, "step": 1094 }, { "epoch": 0.15, "learning_rate": 4.9743589743589746e-05, "loss": 0.0711, "step": 1096 }, { "epoch": 0.15, "learning_rate": 4.974312184166199e-05, "loss": 0.0771, "step": 1098 }, { "epoch": 0.15, "learning_rate": 4.974265393973424e-05, "loss": 0.0777, "step": 1100 }, { "epoch": 0.15, "learning_rate": 4.974218603780648e-05, "loss": 0.0696, "step": 1102 }, { "epoch": 0.15, "learning_rate": 4.974171813587872e-05, "loss": 0.0804, "step": 1104 }, { "epoch": 0.16, "learning_rate": 4.974125023395096e-05, "loss": 0.0923, "step": 1106 }, { "epoch": 0.16, "learning_rate": 4.9740782332023214e-05, "loss": 0.0609, "step": 1108 }, { "epoch": 0.16, "learning_rate": 4.9740314430095453e-05, "loss": 0.095, "step": 1110 }, { "epoch": 0.16, "learning_rate": 4.97398465281677e-05, "loss": 0.0916, "step": 1112 }, { "epoch": 0.16, "learning_rate": 4.973937862623994e-05, "loss": 0.0593, "step": 1114 }, { "epoch": 0.16, "learning_rate": 4.9738910724312184e-05, "loss": 0.0868, "step": 1116 }, { "epoch": 0.16, "learning_rate": 4.973844282238443e-05, "loss": 0.0645, "step": 1118 }, { "epoch": 0.16, "learning_rate": 4.9737974920456676e-05, "loss": 0.0774, "step": 1120 }, { "epoch": 0.16, "learning_rate": 4.9737507018528915e-05, "loss": 0.0776, "step": 1122 }, { "epoch": 0.16, "learning_rate": 4.973703911660116e-05, "loss": 0.0825, "step": 1124 }, { "epoch": 0.16, "learning_rate": 4.973657121467341e-05, "loss": 0.0764, "step": 1126 }, { "epoch": 0.16, "learning_rate": 4.973610331274565e-05, "loss": 0.0721, "step": 1128 }, { "epoch": 0.16, "learning_rate": 4.973563541081789e-05, "loss": 0.0665, "step": 1130 }, { "epoch": 0.16, "learning_rate": 4.973516750889014e-05, "loss": 0.0799, "step": 1132 }, { "epoch": 0.16, "learning_rate": 4.9734699606962384e-05, "loss": 0.0929, "step": 1134 }, { "epoch": 0.16, "learning_rate": 4.973423170503463e-05, "loss": 0.0776, "step": 1136 }, { "epoch": 0.16, "learning_rate": 4.973376380310687e-05, "loss": 0.0908, "step": 1138 }, { "epoch": 0.16, "learning_rate": 4.9733295901179115e-05, "loss": 0.0844, "step": 1140 }, { "epoch": 0.16, "learning_rate": 4.973282799925136e-05, "loss": 0.0699, "step": 1142 }, { "epoch": 0.16, "learning_rate": 4.973236009732361e-05, "loss": 0.0734, "step": 1144 }, { "epoch": 0.16, "learning_rate": 4.9731892195395846e-05, "loss": 0.0761, "step": 1146 }, { "epoch": 0.16, "learning_rate": 4.973142429346809e-05, "loss": 0.0918, "step": 1148 }, { "epoch": 0.16, "learning_rate": 4.973095639154033e-05, "loss": 0.0646, "step": 1150 }, { "epoch": 0.16, "learning_rate": 4.9730488489612584e-05, "loss": 0.0588, "step": 1152 }, { "epoch": 0.16, "learning_rate": 4.973002058768482e-05, "loss": 0.113, "step": 1154 }, { "epoch": 0.16, "learning_rate": 4.972955268575707e-05, "loss": 0.0782, "step": 1156 }, { "epoch": 0.16, "learning_rate": 4.972908478382931e-05, "loss": 0.0707, "step": 1158 }, { "epoch": 0.16, "learning_rate": 4.972861688190156e-05, "loss": 0.0718, "step": 1160 }, { "epoch": 0.16, "learning_rate": 4.97281489799738e-05, "loss": 0.0861, "step": 1162 }, { "epoch": 0.16, "learning_rate": 4.9727681078046046e-05, "loss": 0.0731, "step": 1164 }, { "epoch": 0.16, "learning_rate": 4.9727213176118285e-05, "loss": 0.0554, "step": 1166 }, { "epoch": 0.16, "learning_rate": 4.972674527419053e-05, "loss": 0.0887, "step": 1168 }, { "epoch": 0.16, "learning_rate": 4.972627737226278e-05, "loss": 0.07, "step": 1170 }, { "epoch": 0.16, "learning_rate": 4.972580947033502e-05, "loss": 0.0858, "step": 1172 }, { "epoch": 0.16, "learning_rate": 4.972534156840726e-05, "loss": 0.0911, "step": 1174 }, { "epoch": 0.17, "learning_rate": 4.972487366647951e-05, "loss": 0.1087, "step": 1176 }, { "epoch": 0.17, "learning_rate": 4.9724405764551754e-05, "loss": 0.0649, "step": 1178 }, { "epoch": 0.17, "learning_rate": 4.9723937862624e-05, "loss": 0.074, "step": 1180 }, { "epoch": 0.17, "learning_rate": 4.972346996069624e-05, "loss": 0.0718, "step": 1182 }, { "epoch": 0.17, "learning_rate": 4.9723002058768484e-05, "loss": 0.0853, "step": 1184 }, { "epoch": 0.17, "learning_rate": 4.972253415684073e-05, "loss": 0.0804, "step": 1186 }, { "epoch": 0.17, "learning_rate": 4.9722066254912976e-05, "loss": 0.0734, "step": 1188 }, { "epoch": 0.17, "learning_rate": 4.9721598352985215e-05, "loss": 0.0886, "step": 1190 }, { "epoch": 0.17, "learning_rate": 4.972113045105746e-05, "loss": 0.0633, "step": 1192 }, { "epoch": 0.17, "learning_rate": 4.972066254912971e-05, "loss": 0.0832, "step": 1194 }, { "epoch": 0.17, "learning_rate": 4.972019464720195e-05, "loss": 0.064, "step": 1196 }, { "epoch": 0.17, "learning_rate": 4.971972674527419e-05, "loss": 0.093, "step": 1198 }, { "epoch": 0.17, "learning_rate": 4.971925884334644e-05, "loss": 0.0718, "step": 1200 }, { "epoch": 0.17, "learning_rate": 4.971879094141868e-05, "loss": 0.0691, "step": 1202 }, { "epoch": 0.17, "learning_rate": 4.971832303949093e-05, "loss": 0.071, "step": 1204 }, { "epoch": 0.17, "learning_rate": 4.971785513756317e-05, "loss": 0.0604, "step": 1206 }, { "epoch": 0.17, "learning_rate": 4.9717387235635415e-05, "loss": 0.0821, "step": 1208 }, { "epoch": 0.17, "learning_rate": 4.9716919333707654e-05, "loss": 0.0756, "step": 1210 }, { "epoch": 0.17, "learning_rate": 4.97164514317799e-05, "loss": 0.0643, "step": 1212 }, { "epoch": 0.17, "learning_rate": 4.9715983529852146e-05, "loss": 0.1184, "step": 1214 }, { "epoch": 0.17, "learning_rate": 4.971551562792439e-05, "loss": 0.0718, "step": 1216 }, { "epoch": 0.17, "learning_rate": 4.971504772599663e-05, "loss": 0.0878, "step": 1218 }, { "epoch": 0.17, "learning_rate": 4.971457982406888e-05, "loss": 0.0799, "step": 1220 }, { "epoch": 0.17, "learning_rate": 4.971411192214112e-05, "loss": 0.0843, "step": 1222 }, { "epoch": 0.17, "learning_rate": 4.971364402021337e-05, "loss": 0.085, "step": 1224 }, { "epoch": 0.17, "learning_rate": 4.971317611828561e-05, "loss": 0.0735, "step": 1226 }, { "epoch": 0.17, "learning_rate": 4.9712708216357854e-05, "loss": 0.0596, "step": 1228 }, { "epoch": 0.17, "learning_rate": 4.97122403144301e-05, "loss": 0.0913, "step": 1230 }, { "epoch": 0.17, "learning_rate": 4.9711772412502346e-05, "loss": 0.0649, "step": 1232 }, { "epoch": 0.17, "learning_rate": 4.9711304510574585e-05, "loss": 0.0815, "step": 1234 }, { "epoch": 0.17, "learning_rate": 4.971083660864683e-05, "loss": 0.0671, "step": 1236 }, { "epoch": 0.17, "learning_rate": 4.971036870671908e-05, "loss": 0.0909, "step": 1238 }, { "epoch": 0.17, "learning_rate": 4.970990080479132e-05, "loss": 0.0842, "step": 1240 }, { "epoch": 0.17, "learning_rate": 4.970943290286356e-05, "loss": 0.0687, "step": 1242 }, { "epoch": 0.17, "learning_rate": 4.970896500093581e-05, "loss": 0.0708, "step": 1244 }, { "epoch": 0.17, "learning_rate": 4.970849709900805e-05, "loss": 0.0764, "step": 1246 }, { "epoch": 0.18, "learning_rate": 4.97080291970803e-05, "loss": 0.097, "step": 1248 }, { "epoch": 0.18, "learning_rate": 4.970756129515254e-05, "loss": 0.0811, "step": 1250 }, { "epoch": 0.18, "learning_rate": 4.9707093393224784e-05, "loss": 0.0683, "step": 1252 }, { "epoch": 0.18, "learning_rate": 4.9706625491297024e-05, "loss": 0.0878, "step": 1254 }, { "epoch": 0.18, "learning_rate": 4.9706157589369276e-05, "loss": 0.0864, "step": 1256 }, { "epoch": 0.18, "learning_rate": 4.9705689687441515e-05, "loss": 0.0921, "step": 1258 }, { "epoch": 0.18, "learning_rate": 4.970522178551376e-05, "loss": 0.0966, "step": 1260 }, { "epoch": 0.18, "learning_rate": 4.9704753883586e-05, "loss": 0.081, "step": 1262 }, { "epoch": 0.18, "learning_rate": 4.9704285981658246e-05, "loss": 0.1004, "step": 1264 }, { "epoch": 0.18, "learning_rate": 4.970381807973049e-05, "loss": 0.0775, "step": 1266 }, { "epoch": 0.18, "learning_rate": 4.970335017780273e-05, "loss": 0.0964, "step": 1268 }, { "epoch": 0.18, "learning_rate": 4.970288227587498e-05, "loss": 0.0648, "step": 1270 }, { "epoch": 0.18, "learning_rate": 4.970241437394722e-05, "loss": 0.0906, "step": 1272 }, { "epoch": 0.18, "learning_rate": 4.970194647201947e-05, "loss": 0.0682, "step": 1274 }, { "epoch": 0.18, "learning_rate": 4.970147857009171e-05, "loss": 0.0713, "step": 1276 }, { "epoch": 0.18, "learning_rate": 4.9701010668163954e-05, "loss": 0.0702, "step": 1278 }, { "epoch": 0.18, "learning_rate": 4.970054276623619e-05, "loss": 0.0809, "step": 1280 }, { "epoch": 0.18, "learning_rate": 4.9700074864308446e-05, "loss": 0.0708, "step": 1282 }, { "epoch": 0.18, "learning_rate": 4.9699606962380685e-05, "loss": 0.0661, "step": 1284 }, { "epoch": 0.18, "learning_rate": 4.969913906045293e-05, "loss": 0.0763, "step": 1286 }, { "epoch": 0.18, "learning_rate": 4.969867115852517e-05, "loss": 0.0946, "step": 1288 }, { "epoch": 0.18, "learning_rate": 4.969820325659742e-05, "loss": 0.0696, "step": 1290 }, { "epoch": 0.18, "learning_rate": 4.969773535466966e-05, "loss": 0.1217, "step": 1292 }, { "epoch": 0.18, "learning_rate": 4.969726745274191e-05, "loss": 0.0738, "step": 1294 }, { "epoch": 0.18, "learning_rate": 4.969679955081415e-05, "loss": 0.0721, "step": 1296 }, { "epoch": 0.18, "learning_rate": 4.969633164888639e-05, "loss": 0.0921, "step": 1298 }, { "epoch": 0.18, "learning_rate": 4.969586374695864e-05, "loss": 0.0764, "step": 1300 }, { "epoch": 0.18, "learning_rate": 4.9695395845030885e-05, "loss": 0.0703, "step": 1302 }, { "epoch": 0.18, "learning_rate": 4.9694927943103124e-05, "loss": 0.0706, "step": 1304 }, { "epoch": 0.18, "learning_rate": 4.969446004117537e-05, "loss": 0.0656, "step": 1306 }, { "epoch": 0.18, "learning_rate": 4.9693992139247616e-05, "loss": 0.0598, "step": 1308 }, { "epoch": 0.18, "learning_rate": 4.969352423731986e-05, "loss": 0.1019, "step": 1310 }, { "epoch": 0.18, "learning_rate": 4.96930563353921e-05, "loss": 0.0666, "step": 1312 }, { "epoch": 0.18, "learning_rate": 4.969258843346435e-05, "loss": 0.0836, "step": 1314 }, { "epoch": 0.18, "learning_rate": 4.969212053153659e-05, "loss": 0.1002, "step": 1316 }, { "epoch": 0.19, "learning_rate": 4.969165262960884e-05, "loss": 0.0828, "step": 1318 }, { "epoch": 0.19, "learning_rate": 4.969118472768108e-05, "loss": 0.0883, "step": 1320 }, { "epoch": 0.19, "learning_rate": 4.9690716825753324e-05, "loss": 0.0744, "step": 1322 }, { "epoch": 0.19, "learning_rate": 4.969024892382557e-05, "loss": 0.0969, "step": 1324 }, { "epoch": 0.19, "learning_rate": 4.9689781021897815e-05, "loss": 0.0907, "step": 1326 }, { "epoch": 0.19, "learning_rate": 4.9689313119970055e-05, "loss": 0.0732, "step": 1328 }, { "epoch": 0.19, "learning_rate": 4.96888452180423e-05, "loss": 0.0717, "step": 1330 }, { "epoch": 0.19, "learning_rate": 4.968837731611454e-05, "loss": 0.073, "step": 1332 }, { "epoch": 0.19, "learning_rate": 4.968790941418679e-05, "loss": 0.0989, "step": 1334 }, { "epoch": 0.19, "learning_rate": 4.968744151225903e-05, "loss": 0.0718, "step": 1336 }, { "epoch": 0.19, "learning_rate": 4.968697361033128e-05, "loss": 0.0672, "step": 1338 }, { "epoch": 0.19, "learning_rate": 4.9686505708403517e-05, "loss": 0.0547, "step": 1340 }, { "epoch": 0.19, "learning_rate": 4.968603780647577e-05, "loss": 0.0972, "step": 1342 }, { "epoch": 0.19, "learning_rate": 4.968556990454801e-05, "loss": 0.0707, "step": 1344 }, { "epoch": 0.19, "learning_rate": 4.9685102002620254e-05, "loss": 0.0644, "step": 1346 }, { "epoch": 0.19, "learning_rate": 4.968463410069249e-05, "loss": 0.0886, "step": 1348 }, { "epoch": 0.19, "learning_rate": 4.968416619876474e-05, "loss": 0.1001, "step": 1350 }, { "epoch": 0.19, "learning_rate": 4.9683698296836985e-05, "loss": 0.0992, "step": 1352 }, { "epoch": 0.19, "learning_rate": 4.968323039490923e-05, "loss": 0.0732, "step": 1354 }, { "epoch": 0.19, "learning_rate": 4.968276249298147e-05, "loss": 0.086, "step": 1356 }, { "epoch": 0.19, "learning_rate": 4.9682294591053716e-05, "loss": 0.08, "step": 1358 }, { "epoch": 0.19, "learning_rate": 4.968182668912596e-05, "loss": 0.1046, "step": 1360 }, { "epoch": 0.19, "learning_rate": 4.968135878719821e-05, "loss": 0.0528, "step": 1362 }, { "epoch": 0.19, "learning_rate": 4.968089088527045e-05, "loss": 0.0786, "step": 1364 }, { "epoch": 0.19, "learning_rate": 4.968042298334269e-05, "loss": 0.0762, "step": 1366 }, { "epoch": 0.19, "learning_rate": 4.967995508141494e-05, "loss": 0.098, "step": 1368 }, { "epoch": 0.19, "learning_rate": 4.9679487179487185e-05, "loss": 0.0655, "step": 1370 }, { "epoch": 0.19, "learning_rate": 4.9679019277559424e-05, "loss": 0.079, "step": 1372 }, { "epoch": 0.19, "learning_rate": 4.967855137563167e-05, "loss": 0.0746, "step": 1374 }, { "epoch": 0.19, "learning_rate": 4.9678083473703916e-05, "loss": 0.0864, "step": 1376 }, { "epoch": 0.19, "learning_rate": 4.967761557177616e-05, "loss": 0.0659, "step": 1378 }, { "epoch": 0.19, "learning_rate": 4.96771476698484e-05, "loss": 0.0851, "step": 1380 }, { "epoch": 0.19, "learning_rate": 4.967667976792065e-05, "loss": 0.0959, "step": 1382 }, { "epoch": 0.19, "learning_rate": 4.9676211865992886e-05, "loss": 0.103, "step": 1384 }, { "epoch": 0.19, "learning_rate": 4.967574396406514e-05, "loss": 0.0967, "step": 1386 }, { "epoch": 0.19, "learning_rate": 4.967527606213738e-05, "loss": 0.0623, "step": 1388 }, { "epoch": 0.2, "learning_rate": 4.9674808160209624e-05, "loss": 0.0926, "step": 1390 }, { "epoch": 0.2, "learning_rate": 4.967434025828186e-05, "loss": 0.09, "step": 1392 }, { "epoch": 0.2, "learning_rate": 4.967387235635411e-05, "loss": 0.0794, "step": 1394 }, { "epoch": 0.2, "learning_rate": 4.9673404454426355e-05, "loss": 0.096, "step": 1396 }, { "epoch": 0.2, "learning_rate": 4.96729365524986e-05, "loss": 0.0793, "step": 1398 }, { "epoch": 0.2, "learning_rate": 4.967246865057084e-05, "loss": 0.0692, "step": 1400 }, { "epoch": 0.2, "learning_rate": 4.9672000748643086e-05, "loss": 0.0781, "step": 1402 }, { "epoch": 0.2, "learning_rate": 4.967153284671533e-05, "loss": 0.0773, "step": 1404 }, { "epoch": 0.2, "learning_rate": 4.967106494478758e-05, "loss": 0.0641, "step": 1406 }, { "epoch": 0.2, "learning_rate": 4.9670597042859817e-05, "loss": 0.0957, "step": 1408 }, { "epoch": 0.2, "learning_rate": 4.967012914093206e-05, "loss": 0.0772, "step": 1410 }, { "epoch": 0.2, "learning_rate": 4.966966123900431e-05, "loss": 0.0795, "step": 1412 }, { "epoch": 0.2, "learning_rate": 4.9669193337076554e-05, "loss": 0.0783, "step": 1414 }, { "epoch": 0.2, "learning_rate": 4.9668725435148793e-05, "loss": 0.0703, "step": 1416 }, { "epoch": 0.2, "learning_rate": 4.966825753322104e-05, "loss": 0.0983, "step": 1418 }, { "epoch": 0.2, "learning_rate": 4.9667789631293285e-05, "loss": 0.1161, "step": 1420 }, { "epoch": 0.2, "learning_rate": 4.966732172936553e-05, "loss": 0.0694, "step": 1422 }, { "epoch": 0.2, "learning_rate": 4.966685382743777e-05, "loss": 0.0845, "step": 1424 }, { "epoch": 0.2, "learning_rate": 4.9666385925510016e-05, "loss": 0.0769, "step": 1426 }, { "epoch": 0.2, "learning_rate": 4.9665918023582255e-05, "loss": 0.0982, "step": 1428 }, { "epoch": 0.2, "learning_rate": 4.966545012165451e-05, "loss": 0.08, "step": 1430 }, { "epoch": 0.2, "learning_rate": 4.966498221972675e-05, "loss": 0.0748, "step": 1432 }, { "epoch": 0.2, "learning_rate": 4.966451431779899e-05, "loss": 0.1047, "step": 1434 }, { "epoch": 0.2, "learning_rate": 4.966404641587123e-05, "loss": 0.0887, "step": 1436 }, { "epoch": 0.2, "learning_rate": 4.9663578513943485e-05, "loss": 0.0544, "step": 1438 }, { "epoch": 0.2, "learning_rate": 4.9663110612015724e-05, "loss": 0.068, "step": 1440 }, { "epoch": 0.2, "learning_rate": 4.966264271008797e-05, "loss": 0.0825, "step": 1442 }, { "epoch": 0.2, "learning_rate": 4.966217480816021e-05, "loss": 0.0847, "step": 1444 }, { "epoch": 0.2, "learning_rate": 4.9661706906232455e-05, "loss": 0.0638, "step": 1446 }, { "epoch": 0.2, "learning_rate": 4.96612390043047e-05, "loss": 0.0694, "step": 1448 }, { "epoch": 0.2, "learning_rate": 4.966077110237695e-05, "loss": 0.1114, "step": 1450 }, { "epoch": 0.2, "learning_rate": 4.9660303200449186e-05, "loss": 0.084, "step": 1452 }, { "epoch": 0.2, "learning_rate": 4.965983529852143e-05, "loss": 0.0949, "step": 1454 }, { "epoch": 0.2, "learning_rate": 4.965936739659368e-05, "loss": 0.0833, "step": 1456 }, { "epoch": 0.2, "learning_rate": 4.9658899494665924e-05, "loss": 0.0765, "step": 1458 }, { "epoch": 0.2, "learning_rate": 4.965843159273816e-05, "loss": 0.1126, "step": 1460 }, { "epoch": 0.21, "learning_rate": 4.965796369081041e-05, "loss": 0.0751, "step": 1462 }, { "epoch": 0.21, "learning_rate": 4.9657495788882655e-05, "loss": 0.0913, "step": 1464 }, { "epoch": 0.21, "learning_rate": 4.96570278869549e-05, "loss": 0.0793, "step": 1466 }, { "epoch": 0.21, "learning_rate": 4.965655998502714e-05, "loss": 0.0663, "step": 1468 }, { "epoch": 0.21, "learning_rate": 4.9656092083099386e-05, "loss": 0.0732, "step": 1470 }, { "epoch": 0.21, "learning_rate": 4.965562418117163e-05, "loss": 0.0885, "step": 1472 }, { "epoch": 0.21, "learning_rate": 4.965515627924388e-05, "loss": 0.0798, "step": 1474 }, { "epoch": 0.21, "learning_rate": 4.9654688377316117e-05, "loss": 0.083, "step": 1476 }, { "epoch": 0.21, "learning_rate": 4.965422047538836e-05, "loss": 0.0648, "step": 1478 }, { "epoch": 0.21, "learning_rate": 4.96537525734606e-05, "loss": 0.0861, "step": 1480 }, { "epoch": 0.21, "learning_rate": 4.9653284671532854e-05, "loss": 0.0934, "step": 1482 }, { "epoch": 0.21, "learning_rate": 4.9652816769605093e-05, "loss": 0.0993, "step": 1484 }, { "epoch": 0.21, "learning_rate": 4.965234886767734e-05, "loss": 0.0917, "step": 1486 }, { "epoch": 0.21, "learning_rate": 4.965188096574958e-05, "loss": 0.0759, "step": 1488 }, { "epoch": 0.21, "learning_rate": 4.965141306382183e-05, "loss": 0.0847, "step": 1490 }, { "epoch": 0.21, "learning_rate": 4.965094516189407e-05, "loss": 0.0854, "step": 1492 }, { "epoch": 0.21, "learning_rate": 4.9650477259966316e-05, "loss": 0.0781, "step": 1494 }, { "epoch": 0.21, "learning_rate": 4.9650009358038555e-05, "loss": 0.089, "step": 1496 }, { "epoch": 0.21, "learning_rate": 4.96495414561108e-05, "loss": 0.0684, "step": 1498 }, { "epoch": 0.21, "learning_rate": 4.964907355418305e-05, "loss": 0.0786, "step": 1500 }, { "epoch": 0.21, "learning_rate": 4.964860565225529e-05, "loss": 0.089, "step": 1502 }, { "epoch": 0.21, "learning_rate": 4.964813775032753e-05, "loss": 0.0815, "step": 1504 }, { "epoch": 0.21, "learning_rate": 4.964766984839978e-05, "loss": 0.067, "step": 1506 }, { "epoch": 0.21, "learning_rate": 4.9647201946472024e-05, "loss": 0.0887, "step": 1508 }, { "epoch": 0.21, "learning_rate": 4.964673404454427e-05, "loss": 0.0788, "step": 1510 }, { "epoch": 0.21, "learning_rate": 4.964626614261651e-05, "loss": 0.062, "step": 1512 }, { "epoch": 0.21, "learning_rate": 4.9645798240688755e-05, "loss": 0.0913, "step": 1514 }, { "epoch": 0.21, "learning_rate": 4.9645330338761e-05, "loss": 0.0805, "step": 1516 }, { "epoch": 0.21, "learning_rate": 4.964486243683325e-05, "loss": 0.0733, "step": 1518 }, { "epoch": 0.21, "learning_rate": 4.9644394534905486e-05, "loss": 0.0832, "step": 1520 }, { "epoch": 0.21, "learning_rate": 4.9643926632977725e-05, "loss": 0.0691, "step": 1522 }, { "epoch": 0.21, "learning_rate": 4.964345873104997e-05, "loss": 0.0676, "step": 1524 }, { "epoch": 0.21, "learning_rate": 4.964299082912222e-05, "loss": 0.069, "step": 1526 }, { "epoch": 0.21, "learning_rate": 4.964252292719446e-05, "loss": 0.0831, "step": 1528 }, { "epoch": 0.21, "learning_rate": 4.96420550252667e-05, "loss": 0.0967, "step": 1530 }, { "epoch": 0.22, "learning_rate": 4.964158712333895e-05, "loss": 0.0859, "step": 1532 }, { "epoch": 0.22, "learning_rate": 4.9641119221411194e-05, "loss": 0.0698, "step": 1534 }, { "epoch": 0.22, "learning_rate": 4.964065131948344e-05, "loss": 0.0774, "step": 1536 }, { "epoch": 0.22, "learning_rate": 4.964018341755568e-05, "loss": 0.0931, "step": 1538 }, { "epoch": 0.22, "learning_rate": 4.9639715515627925e-05, "loss": 0.0637, "step": 1540 }, { "epoch": 0.22, "learning_rate": 4.963924761370017e-05, "loss": 0.0944, "step": 1542 }, { "epoch": 0.22, "learning_rate": 4.9638779711772417e-05, "loss": 0.0829, "step": 1544 }, { "epoch": 0.22, "learning_rate": 4.9638311809844656e-05, "loss": 0.0705, "step": 1546 }, { "epoch": 0.22, "learning_rate": 4.96378439079169e-05, "loss": 0.0874, "step": 1548 }, { "epoch": 0.22, "learning_rate": 4.963737600598915e-05, "loss": 0.0955, "step": 1550 }, { "epoch": 0.22, "learning_rate": 4.9636908104061393e-05, "loss": 0.0767, "step": 1552 }, { "epoch": 0.22, "learning_rate": 4.963644020213363e-05, "loss": 0.0824, "step": 1554 }, { "epoch": 0.22, "learning_rate": 4.963597230020588e-05, "loss": 0.0621, "step": 1556 }, { "epoch": 0.22, "learning_rate": 4.963550439827812e-05, "loss": 0.0668, "step": 1558 }, { "epoch": 0.22, "learning_rate": 4.963503649635037e-05, "loss": 0.0944, "step": 1560 }, { "epoch": 0.22, "learning_rate": 4.963456859442261e-05, "loss": 0.0821, "step": 1562 }, { "epoch": 0.22, "learning_rate": 4.9634100692494855e-05, "loss": 0.0802, "step": 1564 }, { "epoch": 0.22, "learning_rate": 4.9633632790567094e-05, "loss": 0.0609, "step": 1566 }, { "epoch": 0.22, "learning_rate": 4.963316488863935e-05, "loss": 0.0801, "step": 1568 }, { "epoch": 0.22, "learning_rate": 4.9632696986711586e-05, "loss": 0.0837, "step": 1570 }, { "epoch": 0.22, "learning_rate": 4.963222908478383e-05, "loss": 0.0898, "step": 1572 }, { "epoch": 0.22, "learning_rate": 4.963176118285607e-05, "loss": 0.0818, "step": 1574 }, { "epoch": 0.22, "learning_rate": 4.963129328092832e-05, "loss": 0.0602, "step": 1576 }, { "epoch": 0.22, "learning_rate": 4.963082537900056e-05, "loss": 0.0868, "step": 1578 }, { "epoch": 0.22, "learning_rate": 4.963035747707281e-05, "loss": 0.077, "step": 1580 }, { "epoch": 0.22, "learning_rate": 4.962988957514505e-05, "loss": 0.1011, "step": 1582 }, { "epoch": 0.22, "learning_rate": 4.9629421673217294e-05, "loss": 0.0658, "step": 1584 }, { "epoch": 0.22, "learning_rate": 4.962895377128954e-05, "loss": 0.0791, "step": 1586 }, { "epoch": 0.22, "learning_rate": 4.9628485869361786e-05, "loss": 0.0889, "step": 1588 }, { "epoch": 0.22, "learning_rate": 4.9628017967434025e-05, "loss": 0.0783, "step": 1590 }, { "epoch": 0.22, "learning_rate": 4.962755006550627e-05, "loss": 0.0944, "step": 1592 }, { "epoch": 0.22, "learning_rate": 4.962708216357852e-05, "loss": 0.0814, "step": 1594 }, { "epoch": 0.22, "learning_rate": 4.962661426165076e-05, "loss": 0.1023, "step": 1596 }, { "epoch": 0.22, "learning_rate": 4.9626146359723e-05, "loss": 0.0762, "step": 1598 }, { "epoch": 0.22, "learning_rate": 4.962567845779525e-05, "loss": 0.0939, "step": 1600 }, { "epoch": 0.22, "learning_rate": 4.9625210555867494e-05, "loss": 0.0706, "step": 1602 }, { "epoch": 0.23, "learning_rate": 4.962474265393974e-05, "loss": 0.0997, "step": 1604 }, { "epoch": 0.23, "learning_rate": 4.962427475201198e-05, "loss": 0.0837, "step": 1606 }, { "epoch": 0.23, "learning_rate": 4.9623806850084225e-05, "loss": 0.0888, "step": 1608 }, { "epoch": 0.23, "learning_rate": 4.9623338948156464e-05, "loss": 0.1059, "step": 1610 }, { "epoch": 0.23, "learning_rate": 4.9622871046228717e-05, "loss": 0.0897, "step": 1612 }, { "epoch": 0.23, "learning_rate": 4.9622403144300956e-05, "loss": 0.0891, "step": 1614 }, { "epoch": 0.23, "learning_rate": 4.96219352423732e-05, "loss": 0.0714, "step": 1616 }, { "epoch": 0.23, "learning_rate": 4.962146734044544e-05, "loss": 0.1006, "step": 1618 }, { "epoch": 0.23, "learning_rate": 4.9620999438517693e-05, "loss": 0.0806, "step": 1620 }, { "epoch": 0.23, "learning_rate": 4.962053153658993e-05, "loss": 0.0737, "step": 1622 }, { "epoch": 0.23, "learning_rate": 4.962006363466218e-05, "loss": 0.0902, "step": 1624 }, { "epoch": 0.23, "learning_rate": 4.961959573273442e-05, "loss": 0.0863, "step": 1626 }, { "epoch": 0.23, "learning_rate": 4.9619127830806664e-05, "loss": 0.0983, "step": 1628 }, { "epoch": 0.23, "learning_rate": 4.961865992887891e-05, "loss": 0.083, "step": 1630 }, { "epoch": 0.23, "learning_rate": 4.9618192026951155e-05, "loss": 0.0634, "step": 1632 }, { "epoch": 0.23, "learning_rate": 4.9617724125023395e-05, "loss": 0.0901, "step": 1634 }, { "epoch": 0.23, "learning_rate": 4.961725622309564e-05, "loss": 0.0803, "step": 1636 }, { "epoch": 0.23, "learning_rate": 4.9616788321167886e-05, "loss": 0.083, "step": 1638 }, { "epoch": 0.23, "learning_rate": 4.961632041924013e-05, "loss": 0.0967, "step": 1640 }, { "epoch": 0.23, "learning_rate": 4.961585251731237e-05, "loss": 0.0762, "step": 1642 }, { "epoch": 0.23, "learning_rate": 4.961538461538462e-05, "loss": 0.0673, "step": 1644 }, { "epoch": 0.23, "learning_rate": 4.961491671345686e-05, "loss": 0.0974, "step": 1646 }, { "epoch": 0.23, "learning_rate": 4.961444881152911e-05, "loss": 0.0882, "step": 1648 }, { "epoch": 0.23, "learning_rate": 4.961398090960135e-05, "loss": 0.0851, "step": 1650 }, { "epoch": 0.23, "learning_rate": 4.9613513007673594e-05, "loss": 0.0705, "step": 1652 }, { "epoch": 0.23, "learning_rate": 4.961304510574584e-05, "loss": 0.0681, "step": 1654 }, { "epoch": 0.23, "learning_rate": 4.9612577203818086e-05, "loss": 0.0806, "step": 1656 }, { "epoch": 0.23, "learning_rate": 4.9612109301890325e-05, "loss": 0.085, "step": 1658 }, { "epoch": 0.23, "learning_rate": 4.961164139996257e-05, "loss": 0.0866, "step": 1660 }, { "epoch": 0.23, "learning_rate": 4.961117349803481e-05, "loss": 0.07, "step": 1662 }, { "epoch": 0.23, "learning_rate": 4.961070559610706e-05, "loss": 0.0758, "step": 1664 }, { "epoch": 0.23, "learning_rate": 4.96102376941793e-05, "loss": 0.0642, "step": 1666 }, { "epoch": 0.23, "learning_rate": 4.960976979225155e-05, "loss": 0.0705, "step": 1668 }, { "epoch": 0.23, "learning_rate": 4.960930189032379e-05, "loss": 0.0739, "step": 1670 }, { "epoch": 0.23, "learning_rate": 4.960883398839603e-05, "loss": 0.1064, "step": 1672 }, { "epoch": 0.23, "learning_rate": 4.960836608646828e-05, "loss": 0.0583, "step": 1674 }, { "epoch": 0.24, "learning_rate": 4.9607898184540525e-05, "loss": 0.0852, "step": 1676 }, { "epoch": 0.24, "learning_rate": 4.9607430282612764e-05, "loss": 0.0754, "step": 1678 }, { "epoch": 0.24, "learning_rate": 4.960696238068501e-05, "loss": 0.0618, "step": 1680 }, { "epoch": 0.24, "learning_rate": 4.9606494478757256e-05, "loss": 0.0784, "step": 1682 }, { "epoch": 0.24, "learning_rate": 4.96060265768295e-05, "loss": 0.0766, "step": 1684 }, { "epoch": 0.24, "learning_rate": 4.960555867490174e-05, "loss": 0.0703, "step": 1686 }, { "epoch": 0.24, "learning_rate": 4.960509077297399e-05, "loss": 0.0819, "step": 1688 }, { "epoch": 0.24, "learning_rate": 4.960462287104623e-05, "loss": 0.0892, "step": 1690 }, { "epoch": 0.24, "learning_rate": 4.960415496911848e-05, "loss": 0.0824, "step": 1692 }, { "epoch": 0.24, "learning_rate": 4.960368706719072e-05, "loss": 0.0842, "step": 1694 }, { "epoch": 0.24, "learning_rate": 4.9603219165262964e-05, "loss": 0.0952, "step": 1696 }, { "epoch": 0.24, "learning_rate": 4.960275126333521e-05, "loss": 0.0705, "step": 1698 }, { "epoch": 0.24, "learning_rate": 4.9602283361407455e-05, "loss": 0.0768, "step": 1700 }, { "epoch": 0.24, "learning_rate": 4.9601815459479695e-05, "loss": 0.0775, "step": 1702 }, { "epoch": 0.24, "learning_rate": 4.960134755755194e-05, "loss": 0.0928, "step": 1704 }, { "epoch": 0.24, "learning_rate": 4.960087965562418e-05, "loss": 0.0663, "step": 1706 }, { "epoch": 0.24, "learning_rate": 4.960041175369643e-05, "loss": 0.075, "step": 1708 }, { "epoch": 0.24, "learning_rate": 4.959994385176867e-05, "loss": 0.1009, "step": 1710 }, { "epoch": 0.24, "learning_rate": 4.959947594984092e-05, "loss": 0.0737, "step": 1712 }, { "epoch": 0.24, "learning_rate": 4.9599008047913156e-05, "loss": 0.0872, "step": 1714 }, { "epoch": 0.24, "learning_rate": 4.959854014598541e-05, "loss": 0.0707, "step": 1716 }, { "epoch": 0.24, "learning_rate": 4.959807224405765e-05, "loss": 0.0861, "step": 1718 }, { "epoch": 0.24, "learning_rate": 4.9597604342129894e-05, "loss": 0.0675, "step": 1720 }, { "epoch": 0.24, "learning_rate": 4.959713644020213e-05, "loss": 0.0742, "step": 1722 }, { "epoch": 0.24, "learning_rate": 4.959666853827438e-05, "loss": 0.0651, "step": 1724 }, { "epoch": 0.24, "learning_rate": 4.9596200636346625e-05, "loss": 0.0899, "step": 1726 }, { "epoch": 0.24, "learning_rate": 4.959573273441887e-05, "loss": 0.0718, "step": 1728 }, { "epoch": 0.24, "learning_rate": 4.959526483249111e-05, "loss": 0.0892, "step": 1730 }, { "epoch": 0.24, "learning_rate": 4.9594796930563356e-05, "loss": 0.072, "step": 1732 }, { "epoch": 0.24, "learning_rate": 4.95943290286356e-05, "loss": 0.0897, "step": 1734 }, { "epoch": 0.24, "learning_rate": 4.959386112670785e-05, "loss": 0.0705, "step": 1736 }, { "epoch": 0.24, "learning_rate": 4.959339322478009e-05, "loss": 0.0808, "step": 1738 }, { "epoch": 0.24, "learning_rate": 4.959292532285233e-05, "loss": 0.076, "step": 1740 }, { "epoch": 0.24, "learning_rate": 4.959245742092458e-05, "loss": 0.0787, "step": 1742 }, { "epoch": 0.24, "learning_rate": 4.9591989518996825e-05, "loss": 0.1035, "step": 1744 }, { "epoch": 0.25, "learning_rate": 4.9591521617069064e-05, "loss": 0.0927, "step": 1746 }, { "epoch": 0.25, "learning_rate": 4.959105371514131e-05, "loss": 0.0718, "step": 1748 }, { "epoch": 0.25, "learning_rate": 4.9590585813213556e-05, "loss": 0.0716, "step": 1750 }, { "epoch": 0.25, "learning_rate": 4.95901179112858e-05, "loss": 0.0739, "step": 1752 }, { "epoch": 0.25, "learning_rate": 4.958965000935804e-05, "loss": 0.0586, "step": 1754 }, { "epoch": 0.25, "learning_rate": 4.958918210743029e-05, "loss": 0.0888, "step": 1756 }, { "epoch": 0.25, "learning_rate": 4.9588714205502526e-05, "loss": 0.0948, "step": 1758 }, { "epoch": 0.25, "learning_rate": 4.958824630357478e-05, "loss": 0.0726, "step": 1760 }, { "epoch": 0.25, "learning_rate": 4.958777840164702e-05, "loss": 0.0675, "step": 1762 }, { "epoch": 0.25, "learning_rate": 4.9587310499719264e-05, "loss": 0.0904, "step": 1764 }, { "epoch": 0.25, "learning_rate": 4.95868425977915e-05, "loss": 0.0768, "step": 1766 }, { "epoch": 0.25, "learning_rate": 4.9586374695863755e-05, "loss": 0.09, "step": 1768 }, { "epoch": 0.25, "learning_rate": 4.9585906793935995e-05, "loss": 0.1021, "step": 1770 }, { "epoch": 0.25, "learning_rate": 4.958543889200824e-05, "loss": 0.0889, "step": 1772 }, { "epoch": 0.25, "learning_rate": 4.958497099008048e-05, "loss": 0.0765, "step": 1774 }, { "epoch": 0.25, "learning_rate": 4.9584503088152726e-05, "loss": 0.0895, "step": 1776 }, { "epoch": 0.25, "learning_rate": 4.958403518622497e-05, "loss": 0.0798, "step": 1778 }, { "epoch": 0.25, "learning_rate": 4.958356728429721e-05, "loss": 0.1208, "step": 1780 }, { "epoch": 0.25, "learning_rate": 4.9583099382369456e-05, "loss": 0.0793, "step": 1782 }, { "epoch": 0.25, "learning_rate": 4.95826314804417e-05, "loss": 0.0725, "step": 1784 }, { "epoch": 0.25, "learning_rate": 4.958216357851395e-05, "loss": 0.0829, "step": 1786 }, { "epoch": 0.25, "learning_rate": 4.958169567658619e-05, "loss": 0.069, "step": 1788 }, { "epoch": 0.25, "learning_rate": 4.958122777465843e-05, "loss": 0.069, "step": 1790 }, { "epoch": 0.25, "learning_rate": 4.958075987273067e-05, "loss": 0.0839, "step": 1792 }, { "epoch": 0.25, "learning_rate": 4.9580291970802925e-05, "loss": 0.0852, "step": 1794 }, { "epoch": 0.25, "learning_rate": 4.9579824068875164e-05, "loss": 0.1122, "step": 1796 }, { "epoch": 0.25, "learning_rate": 4.957935616694741e-05, "loss": 0.1048, "step": 1798 }, { "epoch": 0.25, "learning_rate": 4.957888826501965e-05, "loss": 0.0711, "step": 1800 }, { "epoch": 0.25, "learning_rate": 4.9578420363091895e-05, "loss": 0.073, "step": 1802 }, { "epoch": 0.25, "learning_rate": 4.957795246116414e-05, "loss": 0.1139, "step": 1804 }, { "epoch": 0.25, "learning_rate": 4.957748455923639e-05, "loss": 0.0678, "step": 1806 }, { "epoch": 0.25, "learning_rate": 4.9577016657308626e-05, "loss": 0.0936, "step": 1808 }, { "epoch": 0.25, "learning_rate": 4.957654875538087e-05, "loss": 0.0722, "step": 1810 }, { "epoch": 0.25, "learning_rate": 4.957608085345312e-05, "loss": 0.0779, "step": 1812 }, { "epoch": 0.25, "learning_rate": 4.9575612951525364e-05, "loss": 0.0842, "step": 1814 }, { "epoch": 0.25, "learning_rate": 4.95751450495976e-05, "loss": 0.1146, "step": 1816 }, { "epoch": 0.26, "learning_rate": 4.957467714766985e-05, "loss": 0.0801, "step": 1818 }, { "epoch": 0.26, "learning_rate": 4.9574209245742095e-05, "loss": 0.0851, "step": 1820 }, { "epoch": 0.26, "learning_rate": 4.957374134381434e-05, "loss": 0.0865, "step": 1822 }, { "epoch": 0.26, "learning_rate": 4.957327344188658e-05, "loss": 0.0789, "step": 1824 }, { "epoch": 0.26, "learning_rate": 4.9572805539958826e-05, "loss": 0.0866, "step": 1826 }, { "epoch": 0.26, "learning_rate": 4.957233763803107e-05, "loss": 0.0828, "step": 1828 }, { "epoch": 0.26, "learning_rate": 4.957186973610332e-05, "loss": 0.1413, "step": 1830 }, { "epoch": 0.26, "learning_rate": 4.957140183417556e-05, "loss": 0.0734, "step": 1832 }, { "epoch": 0.26, "learning_rate": 4.95709339322478e-05, "loss": 0.0669, "step": 1834 }, { "epoch": 0.26, "learning_rate": 4.957046603032004e-05, "loss": 0.0824, "step": 1836 }, { "epoch": 0.26, "learning_rate": 4.9569998128392295e-05, "loss": 0.0812, "step": 1838 }, { "epoch": 0.26, "learning_rate": 4.9569530226464534e-05, "loss": 0.0805, "step": 1840 }, { "epoch": 0.26, "learning_rate": 4.956906232453678e-05, "loss": 0.1054, "step": 1842 }, { "epoch": 0.26, "learning_rate": 4.956859442260902e-05, "loss": 0.0721, "step": 1844 }, { "epoch": 0.26, "learning_rate": 4.956812652068127e-05, "loss": 0.0824, "step": 1846 }, { "epoch": 0.26, "learning_rate": 4.956765861875351e-05, "loss": 0.0905, "step": 1848 }, { "epoch": 0.26, "learning_rate": 4.9567190716825756e-05, "loss": 0.0787, "step": 1850 }, { "epoch": 0.26, "learning_rate": 4.9566722814897996e-05, "loss": 0.0864, "step": 1852 }, { "epoch": 0.26, "learning_rate": 4.956625491297024e-05, "loss": 0.0656, "step": 1854 }, { "epoch": 0.26, "learning_rate": 4.956578701104249e-05, "loss": 0.1018, "step": 1856 }, { "epoch": 0.26, "learning_rate": 4.956531910911473e-05, "loss": 0.0841, "step": 1858 }, { "epoch": 0.26, "learning_rate": 4.956485120718697e-05, "loss": 0.0772, "step": 1860 }, { "epoch": 0.26, "learning_rate": 4.956438330525922e-05, "loss": 0.0838, "step": 1862 }, { "epoch": 0.26, "learning_rate": 4.9563915403331464e-05, "loss": 0.0848, "step": 1864 }, { "epoch": 0.26, "learning_rate": 4.956344750140371e-05, "loss": 0.0752, "step": 1866 }, { "epoch": 0.26, "learning_rate": 4.956297959947595e-05, "loss": 0.0683, "step": 1868 }, { "epoch": 0.26, "learning_rate": 4.9562511697548195e-05, "loss": 0.0707, "step": 1870 }, { "epoch": 0.26, "learning_rate": 4.956204379562044e-05, "loss": 0.0966, "step": 1872 }, { "epoch": 0.26, "learning_rate": 4.956157589369269e-05, "loss": 0.1103, "step": 1874 }, { "epoch": 0.26, "learning_rate": 4.9561107991764926e-05, "loss": 0.0819, "step": 1876 }, { "epoch": 0.26, "learning_rate": 4.956064008983717e-05, "loss": 0.0815, "step": 1878 }, { "epoch": 0.26, "learning_rate": 4.956017218790942e-05, "loss": 0.087, "step": 1880 }, { "epoch": 0.26, "learning_rate": 4.9559704285981664e-05, "loss": 0.0682, "step": 1882 }, { "epoch": 0.26, "learning_rate": 4.95592363840539e-05, "loss": 0.0894, "step": 1884 }, { "epoch": 0.26, "learning_rate": 4.955876848212615e-05, "loss": 0.0693, "step": 1886 }, { "epoch": 0.27, "learning_rate": 4.955830058019839e-05, "loss": 0.0699, "step": 1888 }, { "epoch": 0.27, "learning_rate": 4.955783267827064e-05, "loss": 0.0955, "step": 1890 }, { "epoch": 0.27, "learning_rate": 4.955736477634288e-05, "loss": 0.0724, "step": 1892 }, { "epoch": 0.27, "learning_rate": 4.9556896874415126e-05, "loss": 0.0868, "step": 1894 }, { "epoch": 0.27, "learning_rate": 4.9556428972487365e-05, "loss": 0.079, "step": 1896 }, { "epoch": 0.27, "learning_rate": 4.955596107055962e-05, "loss": 0.0919, "step": 1898 }, { "epoch": 0.27, "learning_rate": 4.955549316863186e-05, "loss": 0.0741, "step": 1900 }, { "epoch": 0.27, "learning_rate": 4.95550252667041e-05, "loss": 0.0702, "step": 1902 }, { "epoch": 0.27, "learning_rate": 4.955455736477634e-05, "loss": 0.0649, "step": 1904 }, { "epoch": 0.27, "learning_rate": 4.955408946284859e-05, "loss": 0.1234, "step": 1906 }, { "epoch": 0.27, "learning_rate": 4.9553621560920834e-05, "loss": 0.0759, "step": 1908 }, { "epoch": 0.27, "learning_rate": 4.955315365899308e-05, "loss": 0.0899, "step": 1910 }, { "epoch": 0.27, "learning_rate": 4.955268575706532e-05, "loss": 0.0933, "step": 1912 }, { "epoch": 0.27, "learning_rate": 4.9552217855137565e-05, "loss": 0.1276, "step": 1914 }, { "epoch": 0.27, "learning_rate": 4.955174995320981e-05, "loss": 0.0794, "step": 1916 }, { "epoch": 0.27, "learning_rate": 4.9551282051282056e-05, "loss": 0.1122, "step": 1918 }, { "epoch": 0.27, "learning_rate": 4.9550814149354296e-05, "loss": 0.1009, "step": 1920 }, { "epoch": 0.27, "learning_rate": 4.955034624742654e-05, "loss": 0.0805, "step": 1922 }, { "epoch": 0.27, "learning_rate": 4.954987834549879e-05, "loss": 0.0824, "step": 1924 }, { "epoch": 0.27, "learning_rate": 4.954941044357103e-05, "loss": 0.0767, "step": 1926 }, { "epoch": 0.27, "learning_rate": 4.954894254164327e-05, "loss": 0.0992, "step": 1928 }, { "epoch": 0.27, "learning_rate": 4.954847463971552e-05, "loss": 0.0582, "step": 1930 }, { "epoch": 0.27, "learning_rate": 4.9548006737787764e-05, "loss": 0.0696, "step": 1932 }, { "epoch": 0.27, "learning_rate": 4.954753883586001e-05, "loss": 0.1047, "step": 1934 }, { "epoch": 0.27, "learning_rate": 4.954707093393225e-05, "loss": 0.0732, "step": 1936 }, { "epoch": 0.27, "learning_rate": 4.9546603032004495e-05, "loss": 0.0788, "step": 1938 }, { "epoch": 0.27, "learning_rate": 4.9546135130076734e-05, "loss": 0.0817, "step": 1940 }, { "epoch": 0.27, "learning_rate": 4.954566722814899e-05, "loss": 0.0883, "step": 1942 }, { "epoch": 0.27, "learning_rate": 4.9545199326221226e-05, "loss": 0.0814, "step": 1944 }, { "epoch": 0.27, "learning_rate": 4.954473142429347e-05, "loss": 0.0878, "step": 1946 }, { "epoch": 0.27, "learning_rate": 4.954426352236571e-05, "loss": 0.0919, "step": 1948 }, { "epoch": 0.27, "learning_rate": 4.954379562043796e-05, "loss": 0.0728, "step": 1950 }, { "epoch": 0.27, "learning_rate": 4.95433277185102e-05, "loss": 0.0961, "step": 1952 }, { "epoch": 0.27, "learning_rate": 4.954285981658245e-05, "loss": 0.0727, "step": 1954 }, { "epoch": 0.27, "learning_rate": 4.954239191465469e-05, "loss": 0.0969, "step": 1956 }, { "epoch": 0.27, "learning_rate": 4.9541924012726934e-05, "loss": 0.0743, "step": 1958 }, { "epoch": 0.28, "learning_rate": 4.954145611079918e-05, "loss": 0.0761, "step": 1960 }, { "epoch": 0.28, "learning_rate": 4.9540988208871426e-05, "loss": 0.0691, "step": 1962 }, { "epoch": 0.28, "learning_rate": 4.9540520306943665e-05, "loss": 0.0725, "step": 1964 }, { "epoch": 0.28, "learning_rate": 4.954005240501591e-05, "loss": 0.1018, "step": 1966 }, { "epoch": 0.28, "learning_rate": 4.953958450308816e-05, "loss": 0.0669, "step": 1968 }, { "epoch": 0.28, "learning_rate": 4.95391166011604e-05, "loss": 0.0777, "step": 1970 }, { "epoch": 0.28, "learning_rate": 4.953864869923264e-05, "loss": 0.1015, "step": 1972 }, { "epoch": 0.28, "learning_rate": 4.953818079730489e-05, "loss": 0.099, "step": 1974 }, { "epoch": 0.28, "learning_rate": 4.9537712895377134e-05, "loss": 0.0754, "step": 1976 }, { "epoch": 0.28, "learning_rate": 4.953724499344938e-05, "loss": 0.0703, "step": 1978 }, { "epoch": 0.28, "learning_rate": 4.953677709152162e-05, "loss": 0.0935, "step": 1980 }, { "epoch": 0.28, "learning_rate": 4.9536309189593865e-05, "loss": 0.0701, "step": 1982 }, { "epoch": 0.28, "learning_rate": 4.9535841287666104e-05, "loss": 0.1014, "step": 1984 }, { "epoch": 0.28, "learning_rate": 4.9535373385738357e-05, "loss": 0.0649, "step": 1986 }, { "epoch": 0.28, "learning_rate": 4.9534905483810596e-05, "loss": 0.0947, "step": 1988 }, { "epoch": 0.28, "learning_rate": 4.953443758188284e-05, "loss": 0.0857, "step": 1990 }, { "epoch": 0.28, "learning_rate": 4.953396967995508e-05, "loss": 0.0781, "step": 1992 }, { "epoch": 0.28, "learning_rate": 4.953350177802733e-05, "loss": 0.0849, "step": 1994 }, { "epoch": 0.28, "learning_rate": 4.953303387609957e-05, "loss": 0.0767, "step": 1996 }, { "epoch": 0.28, "learning_rate": 4.953256597417182e-05, "loss": 0.1001, "step": 1998 }, { "epoch": 0.28, "learning_rate": 4.953209807224406e-05, "loss": 0.09, "step": 2000 }, { "epoch": 0.28, "eval_gen_len": 27.0862, "eval_loss": 1.035199761390686, "eval_meteor": 0.0382, "eval_runtime": 13.218, "eval_samples_per_second": 4.388, "eval_steps_per_second": 0.605, "step": 2000 }, { "epoch": 0.28, "learning_rate": 4.9531630170316303e-05, "loss": 0.1195, "step": 2002 }, { "epoch": 0.28, "learning_rate": 4.953116226838855e-05, "loss": 0.0717, "step": 2004 }, { "epoch": 0.28, "learning_rate": 4.9530694366460795e-05, "loss": 0.101, "step": 2006 }, { "epoch": 0.28, "learning_rate": 4.9530226464533034e-05, "loss": 0.0867, "step": 2008 }, { "epoch": 0.28, "learning_rate": 4.952975856260528e-05, "loss": 0.0734, "step": 2010 }, { "epoch": 0.28, "learning_rate": 4.9529290660677526e-05, "loss": 0.0957, "step": 2012 }, { "epoch": 0.28, "learning_rate": 4.952882275874977e-05, "loss": 0.1171, "step": 2014 }, { "epoch": 0.28, "learning_rate": 4.952835485682201e-05, "loss": 0.103, "step": 2016 }, { "epoch": 0.28, "learning_rate": 4.952788695489426e-05, "loss": 0.0913, "step": 2018 }, { "epoch": 0.28, "learning_rate": 4.95274190529665e-05, "loss": 0.0911, "step": 2020 }, { "epoch": 0.28, "learning_rate": 4.952695115103875e-05, "loss": 0.0803, "step": 2022 }, { "epoch": 0.28, "learning_rate": 4.952648324911099e-05, "loss": 0.0831, "step": 2024 }, { "epoch": 0.28, "learning_rate": 4.952601534718323e-05, "loss": 0.0769, "step": 2026 }, { "epoch": 0.28, "learning_rate": 4.952554744525548e-05, "loss": 0.072, "step": 2028 }, { "epoch": 0.28, "learning_rate": 4.952507954332772e-05, "loss": 0.0863, "step": 2030 }, { "epoch": 0.29, "learning_rate": 4.9524611641399965e-05, "loss": 0.0999, "step": 2032 }, { "epoch": 0.29, "learning_rate": 4.9524143739472204e-05, "loss": 0.097, "step": 2034 }, { "epoch": 0.29, "learning_rate": 4.952367583754445e-05, "loss": 0.0917, "step": 2036 }, { "epoch": 0.29, "learning_rate": 4.9523207935616696e-05, "loss": 0.0717, "step": 2038 }, { "epoch": 0.29, "learning_rate": 4.952274003368894e-05, "loss": 0.0909, "step": 2040 }, { "epoch": 0.29, "learning_rate": 4.952227213176118e-05, "loss": 0.0911, "step": 2042 }, { "epoch": 0.29, "learning_rate": 4.952180422983343e-05, "loss": 0.0811, "step": 2044 }, { "epoch": 0.29, "learning_rate": 4.952133632790567e-05, "loss": 0.0892, "step": 2046 }, { "epoch": 0.29, "learning_rate": 4.952086842597792e-05, "loss": 0.0879, "step": 2048 }, { "epoch": 0.29, "learning_rate": 4.952040052405016e-05, "loss": 0.084, "step": 2050 }, { "epoch": 0.29, "learning_rate": 4.9519932622122404e-05, "loss": 0.075, "step": 2052 }, { "epoch": 0.29, "learning_rate": 4.951946472019465e-05, "loss": 0.0923, "step": 2054 }, { "epoch": 0.29, "learning_rate": 4.9518996818266896e-05, "loss": 0.0782, "step": 2056 }, { "epoch": 0.29, "learning_rate": 4.9518528916339135e-05, "loss": 0.1017, "step": 2058 }, { "epoch": 0.29, "learning_rate": 4.951806101441138e-05, "loss": 0.0976, "step": 2060 }, { "epoch": 0.29, "learning_rate": 4.951759311248363e-05, "loss": 0.0843, "step": 2062 }, { "epoch": 0.29, "learning_rate": 4.951712521055587e-05, "loss": 0.1091, "step": 2064 }, { "epoch": 0.29, "learning_rate": 4.951665730862811e-05, "loss": 0.0778, "step": 2066 }, { "epoch": 0.29, "learning_rate": 4.951618940670036e-05, "loss": 0.08, "step": 2068 }, { "epoch": 0.29, "learning_rate": 4.95157215047726e-05, "loss": 0.0813, "step": 2070 }, { "epoch": 0.29, "learning_rate": 4.951525360284485e-05, "loss": 0.1102, "step": 2072 }, { "epoch": 0.29, "learning_rate": 4.951478570091709e-05, "loss": 0.0795, "step": 2074 }, { "epoch": 0.29, "learning_rate": 4.9514317798989334e-05, "loss": 0.0959, "step": 2076 }, { "epoch": 0.29, "learning_rate": 4.9513849897061574e-05, "loss": 0.0886, "step": 2078 }, { "epoch": 0.29, "learning_rate": 4.9513381995133826e-05, "loss": 0.0764, "step": 2080 }, { "epoch": 0.29, "learning_rate": 4.9512914093206065e-05, "loss": 0.0692, "step": 2082 }, { "epoch": 0.29, "learning_rate": 4.951244619127831e-05, "loss": 0.0869, "step": 2084 }, { "epoch": 0.29, "learning_rate": 4.951197828935055e-05, "loss": 0.0853, "step": 2086 }, { "epoch": 0.29, "learning_rate": 4.9511510387422796e-05, "loss": 0.1056, "step": 2088 }, { "epoch": 0.29, "learning_rate": 4.951104248549504e-05, "loss": 0.0794, "step": 2090 }, { "epoch": 0.29, "learning_rate": 4.951057458356729e-05, "loss": 0.0834, "step": 2092 }, { "epoch": 0.29, "learning_rate": 4.951010668163953e-05, "loss": 0.1245, "step": 2094 }, { "epoch": 0.29, "learning_rate": 4.950963877971177e-05, "loss": 0.0811, "step": 2096 }, { "epoch": 0.29, "learning_rate": 4.950917087778402e-05, "loss": 0.0852, "step": 2098 }, { "epoch": 0.29, "learning_rate": 4.9508702975856265e-05, "loss": 0.0776, "step": 2100 }, { "epoch": 0.3, "learning_rate": 4.9508235073928504e-05, "loss": 0.0913, "step": 2102 }, { "epoch": 0.3, "learning_rate": 4.950776717200075e-05, "loss": 0.074, "step": 2104 }, { "epoch": 0.3, "learning_rate": 4.9507299270072996e-05, "loss": 0.0734, "step": 2106 }, { "epoch": 0.3, "learning_rate": 4.950683136814524e-05, "loss": 0.0771, "step": 2108 }, { "epoch": 0.3, "learning_rate": 4.950636346621748e-05, "loss": 0.0767, "step": 2110 }, { "epoch": 0.3, "learning_rate": 4.950589556428973e-05, "loss": 0.0856, "step": 2112 }, { "epoch": 0.3, "learning_rate": 4.9505427662361966e-05, "loss": 0.0851, "step": 2114 }, { "epoch": 0.3, "learning_rate": 4.950495976043422e-05, "loss": 0.0734, "step": 2116 }, { "epoch": 0.3, "learning_rate": 4.950449185850646e-05, "loss": 0.0829, "step": 2118 }, { "epoch": 0.3, "learning_rate": 4.9504023956578704e-05, "loss": 0.0889, "step": 2120 }, { "epoch": 0.3, "learning_rate": 4.950355605465094e-05, "loss": 0.0918, "step": 2122 }, { "epoch": 0.3, "learning_rate": 4.9503088152723196e-05, "loss": 0.1152, "step": 2124 }, { "epoch": 0.3, "learning_rate": 4.9502620250795435e-05, "loss": 0.1038, "step": 2126 }, { "epoch": 0.3, "learning_rate": 4.950215234886768e-05, "loss": 0.0785, "step": 2128 }, { "epoch": 0.3, "learning_rate": 4.950168444693992e-05, "loss": 0.0699, "step": 2130 }, { "epoch": 0.3, "learning_rate": 4.9501216545012166e-05, "loss": 0.0966, "step": 2132 }, { "epoch": 0.3, "learning_rate": 4.950074864308441e-05, "loss": 0.1138, "step": 2134 }, { "epoch": 0.3, "learning_rate": 4.950028074115666e-05, "loss": 0.086, "step": 2136 }, { "epoch": 0.3, "learning_rate": 4.94998128392289e-05, "loss": 0.0641, "step": 2138 }, { "epoch": 0.3, "learning_rate": 4.949934493730114e-05, "loss": 0.0762, "step": 2140 }, { "epoch": 0.3, "learning_rate": 4.949887703537339e-05, "loss": 0.1115, "step": 2142 }, { "epoch": 0.3, "learning_rate": 4.9498409133445634e-05, "loss": 0.1117, "step": 2144 }, { "epoch": 0.3, "learning_rate": 4.9497941231517874e-05, "loss": 0.089, "step": 2146 }, { "epoch": 0.3, "learning_rate": 4.949747332959012e-05, "loss": 0.0802, "step": 2148 }, { "epoch": 0.3, "learning_rate": 4.9497005427662365e-05, "loss": 0.0747, "step": 2150 }, { "epoch": 0.3, "learning_rate": 4.949653752573461e-05, "loss": 0.085, "step": 2152 }, { "epoch": 0.3, "learning_rate": 4.949606962380685e-05, "loss": 0.0879, "step": 2154 }, { "epoch": 0.3, "learning_rate": 4.9495601721879096e-05, "loss": 0.0959, "step": 2156 }, { "epoch": 0.3, "learning_rate": 4.949513381995134e-05, "loss": 0.0959, "step": 2158 }, { "epoch": 0.3, "learning_rate": 4.949466591802359e-05, "loss": 0.0837, "step": 2160 }, { "epoch": 0.3, "learning_rate": 4.949419801609583e-05, "loss": 0.0731, "step": 2162 }, { "epoch": 0.3, "learning_rate": 4.949373011416807e-05, "loss": 0.0748, "step": 2164 }, { "epoch": 0.3, "learning_rate": 4.949326221224031e-05, "loss": 0.0935, "step": 2166 }, { "epoch": 0.3, "learning_rate": 4.9492794310312565e-05, "loss": 0.0672, "step": 2168 }, { "epoch": 0.3, "learning_rate": 4.9492326408384804e-05, "loss": 0.1234, "step": 2170 }, { "epoch": 0.3, "learning_rate": 4.949185850645705e-05, "loss": 0.0684, "step": 2172 }, { "epoch": 0.31, "learning_rate": 4.949139060452929e-05, "loss": 0.0832, "step": 2174 }, { "epoch": 0.31, "learning_rate": 4.949092270260154e-05, "loss": 0.0881, "step": 2176 }, { "epoch": 0.31, "learning_rate": 4.949045480067378e-05, "loss": 0.0823, "step": 2178 }, { "epoch": 0.31, "learning_rate": 4.948998689874603e-05, "loss": 0.1178, "step": 2180 }, { "epoch": 0.31, "learning_rate": 4.9489518996818266e-05, "loss": 0.091, "step": 2182 }, { "epoch": 0.31, "learning_rate": 4.948905109489051e-05, "loss": 0.1102, "step": 2184 }, { "epoch": 0.31, "learning_rate": 4.948858319296276e-05, "loss": 0.0945, "step": 2186 }, { "epoch": 0.31, "learning_rate": 4.9488115291035004e-05, "loss": 0.097, "step": 2188 }, { "epoch": 0.31, "learning_rate": 4.948764738910724e-05, "loss": 0.0815, "step": 2190 }, { "epoch": 0.31, "learning_rate": 4.948717948717949e-05, "loss": 0.0947, "step": 2192 }, { "epoch": 0.31, "learning_rate": 4.9486711585251735e-05, "loss": 0.0601, "step": 2194 }, { "epoch": 0.31, "learning_rate": 4.948624368332398e-05, "loss": 0.079, "step": 2196 }, { "epoch": 0.31, "learning_rate": 4.948577578139622e-05, "loss": 0.1026, "step": 2198 }, { "epoch": 0.31, "learning_rate": 4.9485307879468466e-05, "loss": 0.099, "step": 2200 }, { "epoch": 0.31, "learning_rate": 4.948483997754071e-05, "loss": 0.0662, "step": 2202 }, { "epoch": 0.31, "learning_rate": 4.948437207561296e-05, "loss": 0.0896, "step": 2204 }, { "epoch": 0.31, "learning_rate": 4.94839041736852e-05, "loss": 0.0807, "step": 2206 }, { "epoch": 0.31, "learning_rate": 4.948343627175744e-05, "loss": 0.1046, "step": 2208 }, { "epoch": 0.31, "learning_rate": 4.948296836982969e-05, "loss": 0.0735, "step": 2210 }, { "epoch": 0.31, "learning_rate": 4.9482500467901934e-05, "loss": 0.0695, "step": 2212 }, { "epoch": 0.31, "learning_rate": 4.9482032565974174e-05, "loss": 0.0924, "step": 2214 }, { "epoch": 0.31, "learning_rate": 4.948156466404642e-05, "loss": 0.1034, "step": 2216 }, { "epoch": 0.31, "learning_rate": 4.948109676211866e-05, "loss": 0.0763, "step": 2218 }, { "epoch": 0.31, "learning_rate": 4.948062886019091e-05, "loss": 0.0851, "step": 2220 }, { "epoch": 0.31, "learning_rate": 4.948016095826315e-05, "loss": 0.0719, "step": 2222 }, { "epoch": 0.31, "learning_rate": 4.9479693056335396e-05, "loss": 0.0899, "step": 2224 }, { "epoch": 0.31, "learning_rate": 4.9479225154407636e-05, "loss": 0.0821, "step": 2226 }, { "epoch": 0.31, "learning_rate": 4.947875725247988e-05, "loss": 0.0697, "step": 2228 }, { "epoch": 0.31, "learning_rate": 4.947828935055213e-05, "loss": 0.0901, "step": 2230 }, { "epoch": 0.31, "learning_rate": 4.947782144862437e-05, "loss": 0.0631, "step": 2232 }, { "epoch": 0.31, "learning_rate": 4.947735354669661e-05, "loss": 0.078, "step": 2234 }, { "epoch": 0.31, "learning_rate": 4.947688564476886e-05, "loss": 0.0959, "step": 2236 }, { "epoch": 0.31, "learning_rate": 4.9476417742841104e-05, "loss": 0.0922, "step": 2238 }, { "epoch": 0.31, "learning_rate": 4.947594984091335e-05, "loss": 0.1077, "step": 2240 }, { "epoch": 0.31, "learning_rate": 4.947548193898559e-05, "loss": 0.0851, "step": 2242 }, { "epoch": 0.31, "learning_rate": 4.9475014037057835e-05, "loss": 0.0806, "step": 2244 }, { "epoch": 0.32, "learning_rate": 4.947454613513008e-05, "loss": 0.0849, "step": 2246 }, { "epoch": 0.32, "learning_rate": 4.947407823320233e-05, "loss": 0.093, "step": 2248 }, { "epoch": 0.32, "learning_rate": 4.9473610331274566e-05, "loss": 0.0759, "step": 2250 }, { "epoch": 0.32, "learning_rate": 4.947314242934681e-05, "loss": 0.0918, "step": 2252 }, { "epoch": 0.32, "learning_rate": 4.947267452741906e-05, "loss": 0.0587, "step": 2254 }, { "epoch": 0.32, "learning_rate": 4.9472206625491304e-05, "loss": 0.0857, "step": 2256 }, { "epoch": 0.32, "learning_rate": 4.947173872356354e-05, "loss": 0.0774, "step": 2258 }, { "epoch": 0.32, "learning_rate": 4.947127082163579e-05, "loss": 0.0937, "step": 2260 }, { "epoch": 0.32, "learning_rate": 4.947080291970803e-05, "loss": 0.076, "step": 2262 }, { "epoch": 0.32, "learning_rate": 4.947033501778028e-05, "loss": 0.0994, "step": 2264 }, { "epoch": 0.32, "learning_rate": 4.946986711585252e-05, "loss": 0.0815, "step": 2266 }, { "epoch": 0.32, "learning_rate": 4.9469399213924766e-05, "loss": 0.108, "step": 2268 }, { "epoch": 0.32, "learning_rate": 4.9468931311997005e-05, "loss": 0.1041, "step": 2270 }, { "epoch": 0.32, "learning_rate": 4.946846341006926e-05, "loss": 0.0705, "step": 2272 }, { "epoch": 0.32, "learning_rate": 4.94679955081415e-05, "loss": 0.1079, "step": 2274 }, { "epoch": 0.32, "learning_rate": 4.946752760621374e-05, "loss": 0.0984, "step": 2276 }, { "epoch": 0.32, "learning_rate": 4.946705970428598e-05, "loss": 0.0885, "step": 2278 }, { "epoch": 0.32, "learning_rate": 4.946659180235823e-05, "loss": 0.0901, "step": 2280 }, { "epoch": 0.32, "learning_rate": 4.9466123900430474e-05, "loss": 0.0859, "step": 2282 }, { "epoch": 0.32, "learning_rate": 4.946565599850271e-05, "loss": 0.0949, "step": 2284 }, { "epoch": 0.32, "learning_rate": 4.946518809657496e-05, "loss": 0.0816, "step": 2286 }, { "epoch": 0.32, "learning_rate": 4.9464720194647205e-05, "loss": 0.1089, "step": 2288 }, { "epoch": 0.32, "learning_rate": 4.946425229271945e-05, "loss": 0.1052, "step": 2290 }, { "epoch": 0.32, "learning_rate": 4.946378439079169e-05, "loss": 0.0762, "step": 2292 }, { "epoch": 0.32, "learning_rate": 4.9463316488863936e-05, "loss": 0.0896, "step": 2294 }, { "epoch": 0.32, "learning_rate": 4.9462848586936175e-05, "loss": 0.0887, "step": 2296 }, { "epoch": 0.32, "learning_rate": 4.946238068500843e-05, "loss": 0.0937, "step": 2298 }, { "epoch": 0.32, "learning_rate": 4.9461912783080667e-05, "loss": 0.0998, "step": 2300 }, { "epoch": 0.32, "learning_rate": 4.946144488115291e-05, "loss": 0.0834, "step": 2302 }, { "epoch": 0.32, "learning_rate": 4.946097697922515e-05, "loss": 0.0535, "step": 2304 }, { "epoch": 0.32, "learning_rate": 4.9460509077297404e-05, "loss": 0.0749, "step": 2306 }, { "epoch": 0.32, "learning_rate": 4.9460041175369643e-05, "loss": 0.072, "step": 2308 }, { "epoch": 0.32, "learning_rate": 4.945957327344189e-05, "loss": 0.0742, "step": 2310 }, { "epoch": 0.32, "learning_rate": 4.945910537151413e-05, "loss": 0.092, "step": 2312 }, { "epoch": 0.32, "learning_rate": 4.9458637469586374e-05, "loss": 0.0796, "step": 2314 }, { "epoch": 0.33, "learning_rate": 4.945816956765862e-05, "loss": 0.0694, "step": 2316 }, { "epoch": 0.33, "learning_rate": 4.9457701665730866e-05, "loss": 0.0766, "step": 2318 }, { "epoch": 0.33, "learning_rate": 4.9457233763803105e-05, "loss": 0.0923, "step": 2320 }, { "epoch": 0.33, "learning_rate": 4.945676586187535e-05, "loss": 0.1028, "step": 2322 }, { "epoch": 0.33, "learning_rate": 4.94562979599476e-05, "loss": 0.0795, "step": 2324 }, { "epoch": 0.33, "learning_rate": 4.945583005801984e-05, "loss": 0.078, "step": 2326 }, { "epoch": 0.33, "learning_rate": 4.945536215609208e-05, "loss": 0.0799, "step": 2328 }, { "epoch": 0.33, "learning_rate": 4.945489425416433e-05, "loss": 0.1079, "step": 2330 }, { "epoch": 0.33, "learning_rate": 4.9454426352236574e-05, "loss": 0.0796, "step": 2332 }, { "epoch": 0.33, "learning_rate": 4.945395845030882e-05, "loss": 0.098, "step": 2334 }, { "epoch": 0.33, "learning_rate": 4.945349054838106e-05, "loss": 0.0919, "step": 2336 }, { "epoch": 0.33, "learning_rate": 4.9453022646453305e-05, "loss": 0.0801, "step": 2338 }, { "epoch": 0.33, "learning_rate": 4.945255474452555e-05, "loss": 0.0834, "step": 2340 }, { "epoch": 0.33, "learning_rate": 4.94520868425978e-05, "loss": 0.0893, "step": 2342 }, { "epoch": 0.33, "learning_rate": 4.9451618940670036e-05, "loss": 0.1059, "step": 2344 }, { "epoch": 0.33, "learning_rate": 4.945115103874228e-05, "loss": 0.0724, "step": 2346 }, { "epoch": 0.33, "learning_rate": 4.945068313681452e-05, "loss": 0.0923, "step": 2348 }, { "epoch": 0.33, "learning_rate": 4.9450215234886774e-05, "loss": 0.075, "step": 2350 }, { "epoch": 0.33, "learning_rate": 4.944974733295901e-05, "loss": 0.0787, "step": 2352 }, { "epoch": 0.33, "learning_rate": 4.944927943103126e-05, "loss": 0.0868, "step": 2354 }, { "epoch": 0.33, "learning_rate": 4.94488115291035e-05, "loss": 0.0791, "step": 2356 }, { "epoch": 0.33, "learning_rate": 4.944834362717575e-05, "loss": 0.0778, "step": 2358 }, { "epoch": 0.33, "learning_rate": 4.944787572524799e-05, "loss": 0.0834, "step": 2360 }, { "epoch": 0.33, "learning_rate": 4.9447407823320236e-05, "loss": 0.077, "step": 2362 }, { "epoch": 0.33, "learning_rate": 4.9446939921392475e-05, "loss": 0.0619, "step": 2364 }, { "epoch": 0.33, "learning_rate": 4.944647201946472e-05, "loss": 0.0895, "step": 2366 }, { "epoch": 0.33, "learning_rate": 4.9446004117536967e-05, "loss": 0.0713, "step": 2368 }, { "epoch": 0.33, "learning_rate": 4.944553621560921e-05, "loss": 0.0743, "step": 2370 }, { "epoch": 0.33, "learning_rate": 4.944506831368145e-05, "loss": 0.075, "step": 2372 }, { "epoch": 0.33, "learning_rate": 4.94446004117537e-05, "loss": 0.0811, "step": 2374 }, { "epoch": 0.33, "learning_rate": 4.9444132509825943e-05, "loss": 0.0738, "step": 2376 }, { "epoch": 0.33, "learning_rate": 4.944366460789819e-05, "loss": 0.0831, "step": 2378 }, { "epoch": 0.33, "learning_rate": 4.944319670597043e-05, "loss": 0.0905, "step": 2380 }, { "epoch": 0.33, "learning_rate": 4.9442728804042674e-05, "loss": 0.1058, "step": 2382 }, { "epoch": 0.33, "learning_rate": 4.944226090211492e-05, "loss": 0.0913, "step": 2384 }, { "epoch": 0.33, "learning_rate": 4.9441793000187166e-05, "loss": 0.0977, "step": 2386 }, { "epoch": 0.34, "learning_rate": 4.9441325098259405e-05, "loss": 0.0655, "step": 2388 }, { "epoch": 0.34, "learning_rate": 4.944085719633165e-05, "loss": 0.0907, "step": 2390 }, { "epoch": 0.34, "learning_rate": 4.94403892944039e-05, "loss": 0.1052, "step": 2392 }, { "epoch": 0.34, "learning_rate": 4.943992139247614e-05, "loss": 0.0933, "step": 2394 }, { "epoch": 0.34, "learning_rate": 4.943945349054838e-05, "loss": 0.075, "step": 2396 }, { "epoch": 0.34, "learning_rate": 4.943898558862063e-05, "loss": 0.0644, "step": 2398 }, { "epoch": 0.34, "learning_rate": 4.943851768669287e-05, "loss": 0.0824, "step": 2400 }, { "epoch": 0.34, "learning_rate": 4.943804978476512e-05, "loss": 0.09, "step": 2402 }, { "epoch": 0.34, "learning_rate": 4.943758188283736e-05, "loss": 0.0828, "step": 2404 }, { "epoch": 0.34, "learning_rate": 4.9437113980909605e-05, "loss": 0.0981, "step": 2406 }, { "epoch": 0.34, "learning_rate": 4.9436646078981844e-05, "loss": 0.0962, "step": 2408 }, { "epoch": 0.34, "learning_rate": 4.943617817705409e-05, "loss": 0.0749, "step": 2410 }, { "epoch": 0.34, "learning_rate": 4.9435710275126336e-05, "loss": 0.1149, "step": 2412 }, { "epoch": 0.34, "learning_rate": 4.943524237319858e-05, "loss": 0.1069, "step": 2414 }, { "epoch": 0.34, "learning_rate": 4.943477447127082e-05, "loss": 0.0873, "step": 2416 }, { "epoch": 0.34, "learning_rate": 4.943430656934307e-05, "loss": 0.0975, "step": 2418 }, { "epoch": 0.34, "learning_rate": 4.943383866741531e-05, "loss": 0.0846, "step": 2420 }, { "epoch": 0.34, "learning_rate": 4.943337076548756e-05, "loss": 0.0992, "step": 2422 }, { "epoch": 0.34, "learning_rate": 4.94329028635598e-05, "loss": 0.0794, "step": 2424 }, { "epoch": 0.34, "learning_rate": 4.9432434961632044e-05, "loss": 0.0782, "step": 2426 }, { "epoch": 0.34, "learning_rate": 4.943196705970429e-05, "loss": 0.0959, "step": 2428 }, { "epoch": 0.34, "learning_rate": 4.9431499157776536e-05, "loss": 0.0882, "step": 2430 }, { "epoch": 0.34, "learning_rate": 4.9431031255848775e-05, "loss": 0.0715, "step": 2432 }, { "epoch": 0.34, "learning_rate": 4.943056335392102e-05, "loss": 0.074, "step": 2434 }, { "epoch": 0.34, "learning_rate": 4.9430095451993267e-05, "loss": 0.089, "step": 2436 }, { "epoch": 0.34, "learning_rate": 4.942962755006551e-05, "loss": 0.0881, "step": 2438 }, { "epoch": 0.34, "learning_rate": 4.942915964813775e-05, "loss": 0.1152, "step": 2440 }, { "epoch": 0.34, "learning_rate": 4.942869174621e-05, "loss": 0.0627, "step": 2442 }, { "epoch": 0.34, "learning_rate": 4.942822384428224e-05, "loss": 0.0979, "step": 2444 }, { "epoch": 0.34, "learning_rate": 4.942775594235449e-05, "loss": 0.0961, "step": 2446 }, { "epoch": 0.34, "learning_rate": 4.942728804042673e-05, "loss": 0.1038, "step": 2448 }, { "epoch": 0.34, "learning_rate": 4.9426820138498974e-05, "loss": 0.0805, "step": 2450 }, { "epoch": 0.34, "learning_rate": 4.9426352236571214e-05, "loss": 0.0731, "step": 2452 }, { "epoch": 0.34, "learning_rate": 4.9425884334643466e-05, "loss": 0.0756, "step": 2454 }, { "epoch": 0.34, "learning_rate": 4.9425416432715705e-05, "loss": 0.0756, "step": 2456 }, { "epoch": 0.35, "learning_rate": 4.942494853078795e-05, "loss": 0.109, "step": 2458 }, { "epoch": 0.35, "learning_rate": 4.942448062886019e-05, "loss": 0.0913, "step": 2460 }, { "epoch": 0.35, "learning_rate": 4.9424012726932436e-05, "loss": 0.0695, "step": 2462 }, { "epoch": 0.35, "learning_rate": 4.942354482500468e-05, "loss": 0.0919, "step": 2464 }, { "epoch": 0.35, "learning_rate": 4.942307692307693e-05, "loss": 0.0844, "step": 2466 }, { "epoch": 0.35, "learning_rate": 4.942260902114917e-05, "loss": 0.0902, "step": 2468 }, { "epoch": 0.35, "learning_rate": 4.942214111922141e-05, "loss": 0.0867, "step": 2470 }, { "epoch": 0.35, "learning_rate": 4.942167321729366e-05, "loss": 0.1038, "step": 2472 }, { "epoch": 0.35, "learning_rate": 4.9421205315365905e-05, "loss": 0.0879, "step": 2474 }, { "epoch": 0.35, "learning_rate": 4.9420737413438144e-05, "loss": 0.0834, "step": 2476 }, { "epoch": 0.35, "learning_rate": 4.942026951151039e-05, "loss": 0.0751, "step": 2478 }, { "epoch": 0.35, "learning_rate": 4.9419801609582636e-05, "loss": 0.0794, "step": 2480 }, { "epoch": 0.35, "learning_rate": 4.941933370765488e-05, "loss": 0.0741, "step": 2482 }, { "epoch": 0.35, "learning_rate": 4.941886580572712e-05, "loss": 0.0955, "step": 2484 }, { "epoch": 0.35, "learning_rate": 4.941839790379937e-05, "loss": 0.0644, "step": 2486 }, { "epoch": 0.35, "learning_rate": 4.941793000187161e-05, "loss": 0.0998, "step": 2488 }, { "epoch": 0.35, "learning_rate": 4.941746209994386e-05, "loss": 0.0783, "step": 2490 }, { "epoch": 0.35, "learning_rate": 4.94169941980161e-05, "loss": 0.0843, "step": 2492 }, { "epoch": 0.35, "learning_rate": 4.9416526296088344e-05, "loss": 0.0812, "step": 2494 }, { "epoch": 0.35, "learning_rate": 4.941605839416058e-05, "loss": 0.0885, "step": 2496 }, { "epoch": 0.35, "learning_rate": 4.9415590492232836e-05, "loss": 0.0798, "step": 2498 }, { "epoch": 0.35, "learning_rate": 4.9415122590305075e-05, "loss": 0.0867, "step": 2500 }, { "epoch": 0.35, "learning_rate": 4.941465468837732e-05, "loss": 0.0652, "step": 2502 }, { "epoch": 0.35, "learning_rate": 4.941418678644956e-05, "loss": 0.074, "step": 2504 }, { "epoch": 0.35, "learning_rate": 4.9413718884521806e-05, "loss": 0.0926, "step": 2506 }, { "epoch": 0.35, "learning_rate": 4.941325098259405e-05, "loss": 0.0821, "step": 2508 }, { "epoch": 0.35, "learning_rate": 4.94127830806663e-05, "loss": 0.0757, "step": 2510 }, { "epoch": 0.35, "learning_rate": 4.941231517873854e-05, "loss": 0.0862, "step": 2512 }, { "epoch": 0.35, "learning_rate": 4.941184727681078e-05, "loss": 0.0833, "step": 2514 }, { "epoch": 0.35, "learning_rate": 4.941137937488303e-05, "loss": 0.086, "step": 2516 }, { "epoch": 0.35, "learning_rate": 4.9410911472955274e-05, "loss": 0.082, "step": 2518 }, { "epoch": 0.35, "learning_rate": 4.9410443571027514e-05, "loss": 0.0901, "step": 2520 }, { "epoch": 0.35, "learning_rate": 4.940997566909976e-05, "loss": 0.0925, "step": 2522 }, { "epoch": 0.35, "learning_rate": 4.9409507767172005e-05, "loss": 0.0891, "step": 2524 }, { "epoch": 0.35, "learning_rate": 4.940903986524425e-05, "loss": 0.0839, "step": 2526 }, { "epoch": 0.35, "learning_rate": 4.940857196331649e-05, "loss": 0.0825, "step": 2528 }, { "epoch": 0.36, "learning_rate": 4.9408104061388736e-05, "loss": 0.1103, "step": 2530 }, { "epoch": 0.36, "learning_rate": 4.940763615946098e-05, "loss": 0.0753, "step": 2532 }, { "epoch": 0.36, "learning_rate": 4.940716825753322e-05, "loss": 0.1045, "step": 2534 }, { "epoch": 0.36, "learning_rate": 4.940670035560547e-05, "loss": 0.0909, "step": 2536 }, { "epoch": 0.36, "learning_rate": 4.9406232453677706e-05, "loss": 0.0801, "step": 2538 }, { "epoch": 0.36, "learning_rate": 4.940576455174995e-05, "loss": 0.0956, "step": 2540 }, { "epoch": 0.36, "learning_rate": 4.94052966498222e-05, "loss": 0.0888, "step": 2542 }, { "epoch": 0.36, "learning_rate": 4.9404828747894444e-05, "loss": 0.0784, "step": 2544 }, { "epoch": 0.36, "learning_rate": 4.940436084596668e-05, "loss": 0.0807, "step": 2546 }, { "epoch": 0.36, "learning_rate": 4.940389294403893e-05, "loss": 0.0854, "step": 2548 }, { "epoch": 0.36, "learning_rate": 4.9403425042111175e-05, "loss": 0.0882, "step": 2550 }, { "epoch": 0.36, "learning_rate": 4.940295714018342e-05, "loss": 0.0825, "step": 2552 }, { "epoch": 0.36, "learning_rate": 4.940248923825566e-05, "loss": 0.0677, "step": 2554 }, { "epoch": 0.36, "learning_rate": 4.9402021336327906e-05, "loss": 0.0805, "step": 2556 }, { "epoch": 0.36, "learning_rate": 4.940155343440015e-05, "loss": 0.063, "step": 2558 }, { "epoch": 0.36, "learning_rate": 4.94010855324724e-05, "loss": 0.0733, "step": 2560 }, { "epoch": 0.36, "learning_rate": 4.940061763054464e-05, "loss": 0.0732, "step": 2562 }, { "epoch": 0.36, "learning_rate": 4.940014972861688e-05, "loss": 0.1077, "step": 2564 }, { "epoch": 0.36, "learning_rate": 4.939968182668913e-05, "loss": 0.0888, "step": 2566 }, { "epoch": 0.36, "learning_rate": 4.9399213924761375e-05, "loss": 0.0857, "step": 2568 }, { "epoch": 0.36, "learning_rate": 4.9398746022833614e-05, "loss": 0.0794, "step": 2570 }, { "epoch": 0.36, "learning_rate": 4.939827812090586e-05, "loss": 0.0967, "step": 2572 }, { "epoch": 0.36, "learning_rate": 4.93978102189781e-05, "loss": 0.1005, "step": 2574 }, { "epoch": 0.36, "learning_rate": 4.939734231705035e-05, "loss": 0.0941, "step": 2576 }, { "epoch": 0.36, "learning_rate": 4.939687441512259e-05, "loss": 0.1091, "step": 2578 }, { "epoch": 0.36, "learning_rate": 4.939640651319484e-05, "loss": 0.0826, "step": 2580 }, { "epoch": 0.36, "learning_rate": 4.9395938611267076e-05, "loss": 0.0791, "step": 2582 }, { "epoch": 0.36, "learning_rate": 4.939547070933933e-05, "loss": 0.0869, "step": 2584 }, { "epoch": 0.36, "learning_rate": 4.939500280741157e-05, "loss": 0.0631, "step": 2586 }, { "epoch": 0.36, "learning_rate": 4.9394534905483814e-05, "loss": 0.0909, "step": 2588 }, { "epoch": 0.36, "learning_rate": 4.939406700355605e-05, "loss": 0.0927, "step": 2590 }, { "epoch": 0.36, "learning_rate": 4.93935991016283e-05, "loss": 0.0659, "step": 2592 }, { "epoch": 0.36, "learning_rate": 4.9393131199700545e-05, "loss": 0.0723, "step": 2594 }, { "epoch": 0.36, "learning_rate": 4.939266329777279e-05, "loss": 0.1038, "step": 2596 }, { "epoch": 0.36, "learning_rate": 4.939219539584503e-05, "loss": 0.0827, "step": 2598 }, { "epoch": 0.36, "learning_rate": 4.9391727493917275e-05, "loss": 0.0997, "step": 2600 }, { "epoch": 0.37, "learning_rate": 4.939125959198952e-05, "loss": 0.0814, "step": 2602 }, { "epoch": 0.37, "learning_rate": 4.939079169006177e-05, "loss": 0.0988, "step": 2604 }, { "epoch": 0.37, "learning_rate": 4.9390323788134006e-05, "loss": 0.094, "step": 2606 }, { "epoch": 0.37, "learning_rate": 4.938985588620625e-05, "loss": 0.0926, "step": 2608 }, { "epoch": 0.37, "learning_rate": 4.93893879842785e-05, "loss": 0.0775, "step": 2610 }, { "epoch": 0.37, "learning_rate": 4.9388920082350744e-05, "loss": 0.0849, "step": 2612 }, { "epoch": 0.37, "learning_rate": 4.938845218042298e-05, "loss": 0.0768, "step": 2614 }, { "epoch": 0.37, "learning_rate": 4.938798427849523e-05, "loss": 0.1012, "step": 2616 }, { "epoch": 0.37, "learning_rate": 4.9387516376567475e-05, "loss": 0.1016, "step": 2618 }, { "epoch": 0.37, "learning_rate": 4.938704847463972e-05, "loss": 0.0953, "step": 2620 }, { "epoch": 0.37, "learning_rate": 4.938658057271196e-05, "loss": 0.1024, "step": 2622 }, { "epoch": 0.37, "learning_rate": 4.9386112670784206e-05, "loss": 0.1112, "step": 2624 }, { "epoch": 0.37, "learning_rate": 4.9385644768856445e-05, "loss": 0.0912, "step": 2626 }, { "epoch": 0.37, "learning_rate": 4.93851768669287e-05, "loss": 0.0913, "step": 2628 }, { "epoch": 0.37, "learning_rate": 4.938470896500094e-05, "loss": 0.081, "step": 2630 }, { "epoch": 0.37, "learning_rate": 4.938424106307318e-05, "loss": 0.0804, "step": 2632 }, { "epoch": 0.37, "learning_rate": 4.938377316114542e-05, "loss": 0.0689, "step": 2634 }, { "epoch": 0.37, "learning_rate": 4.9383305259217675e-05, "loss": 0.0885, "step": 2636 }, { "epoch": 0.37, "learning_rate": 4.9382837357289914e-05, "loss": 0.0695, "step": 2638 }, { "epoch": 0.37, "learning_rate": 4.938236945536216e-05, "loss": 0.0922, "step": 2640 }, { "epoch": 0.37, "learning_rate": 4.93819015534344e-05, "loss": 0.0983, "step": 2642 }, { "epoch": 0.37, "learning_rate": 4.9381433651506645e-05, "loss": 0.0997, "step": 2644 }, { "epoch": 0.37, "learning_rate": 4.938096574957889e-05, "loss": 0.0841, "step": 2646 }, { "epoch": 0.37, "learning_rate": 4.938049784765114e-05, "loss": 0.0863, "step": 2648 }, { "epoch": 0.37, "learning_rate": 4.9380029945723376e-05, "loss": 0.0944, "step": 2650 }, { "epoch": 0.37, "learning_rate": 4.937956204379562e-05, "loss": 0.1024, "step": 2652 }, { "epoch": 0.37, "learning_rate": 4.937909414186787e-05, "loss": 0.0689, "step": 2654 }, { "epoch": 0.37, "learning_rate": 4.9378626239940114e-05, "loss": 0.0946, "step": 2656 }, { "epoch": 0.37, "learning_rate": 4.937815833801235e-05, "loss": 0.0825, "step": 2658 }, { "epoch": 0.37, "learning_rate": 4.93776904360846e-05, "loss": 0.0975, "step": 2660 }, { "epoch": 0.37, "learning_rate": 4.9377222534156845e-05, "loss": 0.0915, "step": 2662 }, { "epoch": 0.37, "learning_rate": 4.937675463222909e-05, "loss": 0.0676, "step": 2664 }, { "epoch": 0.37, "learning_rate": 4.937628673030133e-05, "loss": 0.102, "step": 2666 }, { "epoch": 0.37, "learning_rate": 4.9375818828373575e-05, "loss": 0.1083, "step": 2668 }, { "epoch": 0.37, "learning_rate": 4.937535092644582e-05, "loss": 0.0844, "step": 2670 }, { "epoch": 0.38, "learning_rate": 4.937488302451807e-05, "loss": 0.0943, "step": 2672 }, { "epoch": 0.38, "learning_rate": 4.9374415122590306e-05, "loss": 0.0872, "step": 2674 }, { "epoch": 0.38, "learning_rate": 4.937394722066255e-05, "loss": 0.0766, "step": 2676 }, { "epoch": 0.38, "learning_rate": 4.937347931873479e-05, "loss": 0.069, "step": 2678 }, { "epoch": 0.38, "learning_rate": 4.9373011416807044e-05, "loss": 0.0887, "step": 2680 }, { "epoch": 0.38, "learning_rate": 4.937254351487928e-05, "loss": 0.0843, "step": 2682 }, { "epoch": 0.38, "learning_rate": 4.937207561295153e-05, "loss": 0.062, "step": 2684 }, { "epoch": 0.38, "learning_rate": 4.937160771102377e-05, "loss": 0.1122, "step": 2686 }, { "epoch": 0.38, "learning_rate": 4.9371139809096014e-05, "loss": 0.0852, "step": 2688 }, { "epoch": 0.38, "learning_rate": 4.937067190716826e-05, "loss": 0.0827, "step": 2690 }, { "epoch": 0.38, "learning_rate": 4.9370204005240506e-05, "loss": 0.0627, "step": 2692 }, { "epoch": 0.38, "learning_rate": 4.9369736103312745e-05, "loss": 0.1137, "step": 2694 }, { "epoch": 0.38, "learning_rate": 4.936926820138499e-05, "loss": 0.1036, "step": 2696 }, { "epoch": 0.38, "learning_rate": 4.936880029945724e-05, "loss": 0.0767, "step": 2698 }, { "epoch": 0.38, "learning_rate": 4.936833239752948e-05, "loss": 0.0915, "step": 2700 }, { "epoch": 0.38, "learning_rate": 4.936786449560172e-05, "loss": 0.1017, "step": 2702 }, { "epoch": 0.38, "learning_rate": 4.936739659367397e-05, "loss": 0.0586, "step": 2704 }, { "epoch": 0.38, "learning_rate": 4.9366928691746214e-05, "loss": 0.1029, "step": 2706 }, { "epoch": 0.38, "learning_rate": 4.936646078981846e-05, "loss": 0.0846, "step": 2708 }, { "epoch": 0.38, "learning_rate": 4.93659928878907e-05, "loss": 0.0791, "step": 2710 }, { "epoch": 0.38, "learning_rate": 4.9365524985962945e-05, "loss": 0.0909, "step": 2712 }, { "epoch": 0.38, "learning_rate": 4.936505708403519e-05, "loss": 0.102, "step": 2714 }, { "epoch": 0.38, "learning_rate": 4.936458918210744e-05, "loss": 0.0842, "step": 2716 }, { "epoch": 0.38, "learning_rate": 4.9364121280179676e-05, "loss": 0.0893, "step": 2718 }, { "epoch": 0.38, "learning_rate": 4.936365337825192e-05, "loss": 0.1152, "step": 2720 }, { "epoch": 0.38, "learning_rate": 4.936318547632416e-05, "loss": 0.0785, "step": 2722 }, { "epoch": 0.38, "learning_rate": 4.9362717574396414e-05, "loss": 0.0631, "step": 2724 }, { "epoch": 0.38, "learning_rate": 4.936224967246865e-05, "loss": 0.0979, "step": 2726 }, { "epoch": 0.38, "learning_rate": 4.93617817705409e-05, "loss": 0.0591, "step": 2728 }, { "epoch": 0.38, "learning_rate": 4.936131386861314e-05, "loss": 0.1029, "step": 2730 }, { "epoch": 0.38, "learning_rate": 4.936084596668539e-05, "loss": 0.1041, "step": 2732 }, { "epoch": 0.38, "learning_rate": 4.936037806475763e-05, "loss": 0.0838, "step": 2734 }, { "epoch": 0.38, "learning_rate": 4.9359910162829876e-05, "loss": 0.1018, "step": 2736 }, { "epoch": 0.38, "learning_rate": 4.9359442260902115e-05, "loss": 0.1033, "step": 2738 }, { "epoch": 0.38, "learning_rate": 4.935897435897436e-05, "loss": 0.0843, "step": 2740 }, { "epoch": 0.38, "learning_rate": 4.9358506457046606e-05, "loss": 0.0835, "step": 2742 }, { "epoch": 0.39, "learning_rate": 4.935803855511885e-05, "loss": 0.1003, "step": 2744 }, { "epoch": 0.39, "learning_rate": 4.935757065319109e-05, "loss": 0.0987, "step": 2746 }, { "epoch": 0.39, "learning_rate": 4.935710275126334e-05, "loss": 0.0769, "step": 2748 }, { "epoch": 0.39, "learning_rate": 4.935663484933558e-05, "loss": 0.0842, "step": 2750 }, { "epoch": 0.39, "learning_rate": 4.935616694740783e-05, "loss": 0.0742, "step": 2752 }, { "epoch": 0.39, "learning_rate": 4.935569904548007e-05, "loss": 0.0921, "step": 2754 }, { "epoch": 0.39, "learning_rate": 4.9355231143552314e-05, "loss": 0.0929, "step": 2756 }, { "epoch": 0.39, "learning_rate": 4.935476324162456e-05, "loss": 0.0645, "step": 2758 }, { "epoch": 0.39, "learning_rate": 4.9354295339696806e-05, "loss": 0.0891, "step": 2760 }, { "epoch": 0.39, "learning_rate": 4.9353827437769045e-05, "loss": 0.0806, "step": 2762 }, { "epoch": 0.39, "learning_rate": 4.935335953584129e-05, "loss": 0.1043, "step": 2764 }, { "epoch": 0.39, "learning_rate": 4.935289163391354e-05, "loss": 0.1009, "step": 2766 }, { "epoch": 0.39, "learning_rate": 4.935242373198578e-05, "loss": 0.0715, "step": 2768 }, { "epoch": 0.39, "learning_rate": 4.935195583005802e-05, "loss": 0.0918, "step": 2770 }, { "epoch": 0.39, "learning_rate": 4.935148792813027e-05, "loss": 0.0957, "step": 2772 }, { "epoch": 0.39, "learning_rate": 4.935102002620251e-05, "loss": 0.0927, "step": 2774 }, { "epoch": 0.39, "learning_rate": 4.935055212427476e-05, "loss": 0.1075, "step": 2776 }, { "epoch": 0.39, "learning_rate": 4.9350084222347e-05, "loss": 0.0845, "step": 2778 }, { "epoch": 0.39, "learning_rate": 4.9349616320419245e-05, "loss": 0.0982, "step": 2780 }, { "epoch": 0.39, "learning_rate": 4.9349148418491484e-05, "loss": 0.1105, "step": 2782 }, { "epoch": 0.39, "learning_rate": 4.934868051656374e-05, "loss": 0.1015, "step": 2784 }, { "epoch": 0.39, "learning_rate": 4.9348212614635976e-05, "loss": 0.0815, "step": 2786 }, { "epoch": 0.39, "learning_rate": 4.9347744712708215e-05, "loss": 0.1058, "step": 2788 }, { "epoch": 0.39, "learning_rate": 4.934727681078046e-05, "loss": 0.0769, "step": 2790 }, { "epoch": 0.39, "learning_rate": 4.934680890885271e-05, "loss": 0.0965, "step": 2792 }, { "epoch": 0.39, "learning_rate": 4.934634100692495e-05, "loss": 0.0706, "step": 2794 }, { "epoch": 0.39, "learning_rate": 4.934587310499719e-05, "loss": 0.077, "step": 2796 }, { "epoch": 0.39, "learning_rate": 4.934540520306944e-05, "loss": 0.0854, "step": 2798 }, { "epoch": 0.39, "learning_rate": 4.9344937301141684e-05, "loss": 0.0888, "step": 2800 }, { "epoch": 0.39, "learning_rate": 4.934446939921393e-05, "loss": 0.07, "step": 2802 }, { "epoch": 0.39, "learning_rate": 4.934400149728617e-05, "loss": 0.0731, "step": 2804 }, { "epoch": 0.39, "learning_rate": 4.9343533595358415e-05, "loss": 0.0812, "step": 2806 }, { "epoch": 0.39, "learning_rate": 4.9343065693430654e-05, "loss": 0.0853, "step": 2808 }, { "epoch": 0.39, "learning_rate": 4.9342597791502906e-05, "loss": 0.084, "step": 2810 }, { "epoch": 0.39, "learning_rate": 4.9342129889575146e-05, "loss": 0.093, "step": 2812 }, { "epoch": 0.4, "learning_rate": 4.934166198764739e-05, "loss": 0.0735, "step": 2814 }, { "epoch": 0.4, "learning_rate": 4.934119408571963e-05, "loss": 0.07, "step": 2816 }, { "epoch": 0.4, "learning_rate": 4.9340726183791877e-05, "loss": 0.1021, "step": 2818 }, { "epoch": 0.4, "learning_rate": 4.934025828186412e-05, "loss": 0.0915, "step": 2820 }, { "epoch": 0.4, "learning_rate": 4.933979037993637e-05, "loss": 0.0718, "step": 2822 }, { "epoch": 0.4, "learning_rate": 4.933932247800861e-05, "loss": 0.0619, "step": 2824 }, { "epoch": 0.4, "learning_rate": 4.9338854576080853e-05, "loss": 0.1078, "step": 2826 }, { "epoch": 0.4, "learning_rate": 4.93383866741531e-05, "loss": 0.1037, "step": 2828 }, { "epoch": 0.4, "learning_rate": 4.9337918772225345e-05, "loss": 0.104, "step": 2830 }, { "epoch": 0.4, "learning_rate": 4.9337450870297584e-05, "loss": 0.0801, "step": 2832 }, { "epoch": 0.4, "learning_rate": 4.933698296836983e-05, "loss": 0.079, "step": 2834 }, { "epoch": 0.4, "learning_rate": 4.9336515066442076e-05, "loss": 0.0832, "step": 2836 }, { "epoch": 0.4, "learning_rate": 4.933604716451432e-05, "loss": 0.1078, "step": 2838 }, { "epoch": 0.4, "learning_rate": 4.933557926258656e-05, "loss": 0.1121, "step": 2840 }, { "epoch": 0.4, "learning_rate": 4.933511136065881e-05, "loss": 0.0727, "step": 2842 }, { "epoch": 0.4, "learning_rate": 4.933464345873105e-05, "loss": 0.0748, "step": 2844 }, { "epoch": 0.4, "learning_rate": 4.93341755568033e-05, "loss": 0.1016, "step": 2846 }, { "epoch": 0.4, "learning_rate": 4.933370765487554e-05, "loss": 0.0796, "step": 2848 }, { "epoch": 0.4, "learning_rate": 4.9333239752947784e-05, "loss": 0.0688, "step": 2850 }, { "epoch": 0.4, "learning_rate": 4.933277185102002e-05, "loss": 0.0842, "step": 2852 }, { "epoch": 0.4, "learning_rate": 4.9332303949092276e-05, "loss": 0.079, "step": 2854 }, { "epoch": 0.4, "learning_rate": 4.9331836047164515e-05, "loss": 0.0742, "step": 2856 }, { "epoch": 0.4, "learning_rate": 4.933136814523676e-05, "loss": 0.1007, "step": 2858 }, { "epoch": 0.4, "learning_rate": 4.9330900243309e-05, "loss": 0.1008, "step": 2860 }, { "epoch": 0.4, "learning_rate": 4.933043234138125e-05, "loss": 0.1142, "step": 2862 }, { "epoch": 0.4, "learning_rate": 4.932996443945349e-05, "loss": 0.0813, "step": 2864 }, { "epoch": 0.4, "learning_rate": 4.932949653752574e-05, "loss": 0.1144, "step": 2866 }, { "epoch": 0.4, "learning_rate": 4.932902863559798e-05, "loss": 0.0992, "step": 2868 }, { "epoch": 0.4, "learning_rate": 4.932856073367022e-05, "loss": 0.0968, "step": 2870 }, { "epoch": 0.4, "learning_rate": 4.932809283174247e-05, "loss": 0.1014, "step": 2872 }, { "epoch": 0.4, "learning_rate": 4.9327624929814715e-05, "loss": 0.1101, "step": 2874 }, { "epoch": 0.4, "learning_rate": 4.9327157027886954e-05, "loss": 0.1064, "step": 2876 }, { "epoch": 0.4, "learning_rate": 4.93266891259592e-05, "loss": 0.0975, "step": 2878 }, { "epoch": 0.4, "learning_rate": 4.9326221224031446e-05, "loss": 0.0771, "step": 2880 }, { "epoch": 0.4, "learning_rate": 4.932575332210369e-05, "loss": 0.0935, "step": 2882 }, { "epoch": 0.4, "learning_rate": 4.932528542017593e-05, "loss": 0.0904, "step": 2884 }, { "epoch": 0.41, "learning_rate": 4.9324817518248177e-05, "loss": 0.089, "step": 2886 }, { "epoch": 0.41, "learning_rate": 4.932434961632042e-05, "loss": 0.0849, "step": 2888 }, { "epoch": 0.41, "learning_rate": 4.932388171439267e-05, "loss": 0.0978, "step": 2890 }, { "epoch": 0.41, "learning_rate": 4.932341381246491e-05, "loss": 0.0862, "step": 2892 }, { "epoch": 0.41, "learning_rate": 4.9322945910537153e-05, "loss": 0.1218, "step": 2894 }, { "epoch": 0.41, "learning_rate": 4.93224780086094e-05, "loss": 0.0879, "step": 2896 }, { "epoch": 0.41, "learning_rate": 4.9322010106681645e-05, "loss": 0.0632, "step": 2898 }, { "epoch": 0.41, "learning_rate": 4.9321542204753884e-05, "loss": 0.0839, "step": 2900 }, { "epoch": 0.41, "learning_rate": 4.932107430282613e-05, "loss": 0.0785, "step": 2902 }, { "epoch": 0.41, "learning_rate": 4.932060640089837e-05, "loss": 0.081, "step": 2904 }, { "epoch": 0.41, "learning_rate": 4.932013849897062e-05, "loss": 0.0749, "step": 2906 }, { "epoch": 0.41, "learning_rate": 4.931967059704286e-05, "loss": 0.0961, "step": 2908 }, { "epoch": 0.41, "learning_rate": 4.931920269511511e-05, "loss": 0.0717, "step": 2910 }, { "epoch": 0.41, "learning_rate": 4.9318734793187346e-05, "loss": 0.0949, "step": 2912 }, { "epoch": 0.41, "learning_rate": 4.93182668912596e-05, "loss": 0.1055, "step": 2914 }, { "epoch": 0.41, "learning_rate": 4.931779898933184e-05, "loss": 0.1064, "step": 2916 }, { "epoch": 0.41, "learning_rate": 4.9317331087404084e-05, "loss": 0.0878, "step": 2918 }, { "epoch": 0.41, "learning_rate": 4.931686318547632e-05, "loss": 0.1077, "step": 2920 }, { "epoch": 0.41, "learning_rate": 4.931639528354857e-05, "loss": 0.097, "step": 2922 }, { "epoch": 0.41, "learning_rate": 4.9315927381620815e-05, "loss": 0.0931, "step": 2924 }, { "epoch": 0.41, "learning_rate": 4.931545947969306e-05, "loss": 0.0806, "step": 2926 }, { "epoch": 0.41, "learning_rate": 4.93149915777653e-05, "loss": 0.1012, "step": 2928 }, { "epoch": 0.41, "learning_rate": 4.9314523675837546e-05, "loss": 0.0795, "step": 2930 }, { "epoch": 0.41, "learning_rate": 4.931405577390979e-05, "loss": 0.0863, "step": 2932 }, { "epoch": 0.41, "learning_rate": 4.931358787198204e-05, "loss": 0.1046, "step": 2934 }, { "epoch": 0.41, "learning_rate": 4.931311997005428e-05, "loss": 0.0997, "step": 2936 }, { "epoch": 0.41, "learning_rate": 4.931265206812652e-05, "loss": 0.0833, "step": 2938 }, { "epoch": 0.41, "learning_rate": 4.931218416619877e-05, "loss": 0.0915, "step": 2940 }, { "epoch": 0.41, "learning_rate": 4.9311716264271015e-05, "loss": 0.0701, "step": 2942 }, { "epoch": 0.41, "learning_rate": 4.9311248362343254e-05, "loss": 0.0793, "step": 2944 }, { "epoch": 0.41, "learning_rate": 4.93107804604155e-05, "loss": 0.0771, "step": 2946 }, { "epoch": 0.41, "learning_rate": 4.9310312558487746e-05, "loss": 0.0764, "step": 2948 }, { "epoch": 0.41, "learning_rate": 4.930984465655999e-05, "loss": 0.1202, "step": 2950 }, { "epoch": 0.41, "learning_rate": 4.930937675463223e-05, "loss": 0.0874, "step": 2952 }, { "epoch": 0.41, "learning_rate": 4.930890885270448e-05, "loss": 0.0884, "step": 2954 }, { "epoch": 0.41, "learning_rate": 4.9308440950776716e-05, "loss": 0.0876, "step": 2956 }, { "epoch": 0.42, "learning_rate": 4.930797304884897e-05, "loss": 0.0696, "step": 2958 }, { "epoch": 0.42, "learning_rate": 4.930750514692121e-05, "loss": 0.0949, "step": 2960 }, { "epoch": 0.42, "learning_rate": 4.9307037244993453e-05, "loss": 0.0941, "step": 2962 }, { "epoch": 0.42, "learning_rate": 4.930656934306569e-05, "loss": 0.0809, "step": 2964 }, { "epoch": 0.42, "learning_rate": 4.930610144113794e-05, "loss": 0.0756, "step": 2966 }, { "epoch": 0.42, "learning_rate": 4.9305633539210184e-05, "loss": 0.0949, "step": 2968 }, { "epoch": 0.42, "learning_rate": 4.930516563728243e-05, "loss": 0.0901, "step": 2970 }, { "epoch": 0.42, "learning_rate": 4.930469773535467e-05, "loss": 0.0894, "step": 2972 }, { "epoch": 0.42, "learning_rate": 4.9304229833426915e-05, "loss": 0.0945, "step": 2974 }, { "epoch": 0.42, "learning_rate": 4.930376193149916e-05, "loss": 0.089, "step": 2976 }, { "epoch": 0.42, "learning_rate": 4.930329402957141e-05, "loss": 0.1, "step": 2978 }, { "epoch": 0.42, "learning_rate": 4.9302826127643646e-05, "loss": 0.0658, "step": 2980 }, { "epoch": 0.42, "learning_rate": 4.930235822571589e-05, "loss": 0.1, "step": 2982 }, { "epoch": 0.42, "learning_rate": 4.930189032378814e-05, "loss": 0.0922, "step": 2984 }, { "epoch": 0.42, "learning_rate": 4.9301422421860384e-05, "loss": 0.0654, "step": 2986 }, { "epoch": 0.42, "learning_rate": 4.930095451993262e-05, "loss": 0.1063, "step": 2988 }, { "epoch": 0.42, "learning_rate": 4.930048661800487e-05, "loss": 0.0785, "step": 2990 }, { "epoch": 0.42, "learning_rate": 4.9300018716077115e-05, "loss": 0.0839, "step": 2992 }, { "epoch": 0.42, "learning_rate": 4.929955081414936e-05, "loss": 0.0981, "step": 2994 }, { "epoch": 0.42, "learning_rate": 4.92990829122216e-05, "loss": 0.0758, "step": 2996 }, { "epoch": 0.42, "learning_rate": 4.9298615010293846e-05, "loss": 0.08, "step": 2998 }, { "epoch": 0.42, "learning_rate": 4.9298147108366085e-05, "loss": 0.104, "step": 3000 }, { "epoch": 0.42, "eval_gen_len": 31.5172, "eval_loss": 1.0198298692703247, "eval_meteor": 0.0411, "eval_runtime": 15.656, "eval_samples_per_second": 3.705, "eval_steps_per_second": 0.511, "step": 3000 }, { "epoch": 0.42, "learning_rate": 4.929767920643834e-05, "loss": 0.1136, "step": 3002 }, { "epoch": 0.42, "learning_rate": 4.929721130451058e-05, "loss": 0.0673, "step": 3004 }, { "epoch": 0.42, "learning_rate": 4.929674340258282e-05, "loss": 0.1076, "step": 3006 }, { "epoch": 0.42, "learning_rate": 4.929627550065506e-05, "loss": 0.0948, "step": 3008 }, { "epoch": 0.42, "learning_rate": 4.9295807598727315e-05, "loss": 0.0816, "step": 3010 }, { "epoch": 0.42, "learning_rate": 4.9295339696799554e-05, "loss": 0.0813, "step": 3012 }, { "epoch": 0.42, "learning_rate": 4.92948717948718e-05, "loss": 0.087, "step": 3014 }, { "epoch": 0.42, "learning_rate": 4.929440389294404e-05, "loss": 0.0923, "step": 3016 }, { "epoch": 0.42, "learning_rate": 4.9293935991016285e-05, "loss": 0.0832, "step": 3018 }, { "epoch": 0.42, "learning_rate": 4.929346808908853e-05, "loss": 0.121, "step": 3020 }, { "epoch": 0.42, "learning_rate": 4.929300018716078e-05, "loss": 0.0928, "step": 3022 }, { "epoch": 0.42, "learning_rate": 4.9292532285233016e-05, "loss": 0.0719, "step": 3024 }, { "epoch": 0.42, "learning_rate": 4.929206438330526e-05, "loss": 0.0995, "step": 3026 }, { "epoch": 0.43, "learning_rate": 4.929159648137751e-05, "loss": 0.0781, "step": 3028 }, { "epoch": 0.43, "learning_rate": 4.9291128579449754e-05, "loss": 0.1279, "step": 3030 }, { "epoch": 0.43, "learning_rate": 4.929066067752199e-05, "loss": 0.0931, "step": 3032 }, { "epoch": 0.43, "learning_rate": 4.929019277559424e-05, "loss": 0.0818, "step": 3034 }, { "epoch": 0.43, "learning_rate": 4.9289724873666484e-05, "loss": 0.0922, "step": 3036 }, { "epoch": 0.43, "learning_rate": 4.928925697173873e-05, "loss": 0.0798, "step": 3038 }, { "epoch": 0.43, "learning_rate": 4.928878906981097e-05, "loss": 0.0919, "step": 3040 }, { "epoch": 0.43, "learning_rate": 4.928832116788321e-05, "loss": 0.0774, "step": 3042 }, { "epoch": 0.43, "learning_rate": 4.928785326595546e-05, "loss": 0.0971, "step": 3044 }, { "epoch": 0.43, "learning_rate": 4.92873853640277e-05, "loss": 0.0831, "step": 3046 }, { "epoch": 0.43, "learning_rate": 4.9286917462099946e-05, "loss": 0.1036, "step": 3048 }, { "epoch": 0.43, "learning_rate": 4.9286449560172186e-05, "loss": 0.0944, "step": 3050 }, { "epoch": 0.43, "learning_rate": 4.928598165824443e-05, "loss": 0.0763, "step": 3052 }, { "epoch": 0.43, "learning_rate": 4.928551375631668e-05, "loss": 0.0883, "step": 3054 }, { "epoch": 0.43, "learning_rate": 4.928504585438892e-05, "loss": 0.0872, "step": 3056 }, { "epoch": 0.43, "learning_rate": 4.928457795246116e-05, "loss": 0.1013, "step": 3058 }, { "epoch": 0.43, "learning_rate": 4.928411005053341e-05, "loss": 0.0789, "step": 3060 }, { "epoch": 0.43, "learning_rate": 4.9283642148605654e-05, "loss": 0.0916, "step": 3062 }, { "epoch": 0.43, "learning_rate": 4.92831742466779e-05, "loss": 0.0894, "step": 3064 }, { "epoch": 0.43, "learning_rate": 4.928270634475014e-05, "loss": 0.0807, "step": 3066 }, { "epoch": 0.43, "learning_rate": 4.9282238442822385e-05, "loss": 0.0952, "step": 3068 }, { "epoch": 0.43, "learning_rate": 4.928177054089463e-05, "loss": 0.1163, "step": 3070 }, { "epoch": 0.43, "learning_rate": 4.928130263896688e-05, "loss": 0.0867, "step": 3072 }, { "epoch": 0.43, "learning_rate": 4.9280834737039116e-05, "loss": 0.0805, "step": 3074 }, { "epoch": 0.43, "learning_rate": 4.928036683511136e-05, "loss": 0.0857, "step": 3076 }, { "epoch": 0.43, "learning_rate": 4.927989893318361e-05, "loss": 0.103, "step": 3078 }, { "epoch": 0.43, "learning_rate": 4.9279431031255854e-05, "loss": 0.0869, "step": 3080 }, { "epoch": 0.43, "learning_rate": 4.927896312932809e-05, "loss": 0.0897, "step": 3082 }, { "epoch": 0.43, "learning_rate": 4.927849522740034e-05, "loss": 0.0912, "step": 3084 }, { "epoch": 0.43, "learning_rate": 4.927802732547258e-05, "loss": 0.1038, "step": 3086 }, { "epoch": 0.43, "learning_rate": 4.927755942354483e-05, "loss": 0.0832, "step": 3088 }, { "epoch": 0.43, "learning_rate": 4.927709152161707e-05, "loss": 0.1027, "step": 3090 }, { "epoch": 0.43, "learning_rate": 4.9276623619689316e-05, "loss": 0.0765, "step": 3092 }, { "epoch": 0.43, "learning_rate": 4.9276155717761555e-05, "loss": 0.0644, "step": 3094 }, { "epoch": 0.43, "learning_rate": 4.927568781583381e-05, "loss": 0.0852, "step": 3096 }, { "epoch": 0.43, "learning_rate": 4.927521991390605e-05, "loss": 0.1084, "step": 3098 }, { "epoch": 0.44, "learning_rate": 4.927475201197829e-05, "loss": 0.1356, "step": 3100 }, { "epoch": 0.44, "learning_rate": 4.927428411005053e-05, "loss": 0.1009, "step": 3102 }, { "epoch": 0.44, "learning_rate": 4.927381620812278e-05, "loss": 0.0932, "step": 3104 }, { "epoch": 0.44, "learning_rate": 4.9273348306195024e-05, "loss": 0.0939, "step": 3106 }, { "epoch": 0.44, "learning_rate": 4.927288040426727e-05, "loss": 0.1043, "step": 3108 }, { "epoch": 0.44, "learning_rate": 4.927241250233951e-05, "loss": 0.0928, "step": 3110 }, { "epoch": 0.44, "learning_rate": 4.9271944600411755e-05, "loss": 0.0829, "step": 3112 }, { "epoch": 0.44, "learning_rate": 4.9271476698484e-05, "loss": 0.0711, "step": 3114 }, { "epoch": 0.44, "learning_rate": 4.9271008796556246e-05, "loss": 0.0959, "step": 3116 }, { "epoch": 0.44, "learning_rate": 4.9270540894628486e-05, "loss": 0.0769, "step": 3118 }, { "epoch": 0.44, "learning_rate": 4.927007299270073e-05, "loss": 0.087, "step": 3120 }, { "epoch": 0.44, "learning_rate": 4.926960509077298e-05, "loss": 0.0955, "step": 3122 }, { "epoch": 0.44, "learning_rate": 4.926913718884522e-05, "loss": 0.0665, "step": 3124 }, { "epoch": 0.44, "learning_rate": 4.926866928691746e-05, "loss": 0.108, "step": 3126 }, { "epoch": 0.44, "learning_rate": 4.926820138498971e-05, "loss": 0.0868, "step": 3128 }, { "epoch": 0.44, "learning_rate": 4.926773348306195e-05, "loss": 0.0698, "step": 3130 }, { "epoch": 0.44, "learning_rate": 4.92672655811342e-05, "loss": 0.1027, "step": 3132 }, { "epoch": 0.44, "learning_rate": 4.926679767920644e-05, "loss": 0.0982, "step": 3134 }, { "epoch": 0.44, "learning_rate": 4.9266329777278685e-05, "loss": 0.1106, "step": 3136 }, { "epoch": 0.44, "learning_rate": 4.9265861875350924e-05, "loss": 0.106, "step": 3138 }, { "epoch": 0.44, "learning_rate": 4.926539397342318e-05, "loss": 0.0866, "step": 3140 }, { "epoch": 0.44, "learning_rate": 4.9264926071495416e-05, "loss": 0.0842, "step": 3142 }, { "epoch": 0.44, "learning_rate": 4.926445816956766e-05, "loss": 0.0922, "step": 3144 }, { "epoch": 0.44, "learning_rate": 4.92639902676399e-05, "loss": 0.0924, "step": 3146 }, { "epoch": 0.44, "learning_rate": 4.926352236571215e-05, "loss": 0.0974, "step": 3148 }, { "epoch": 0.44, "learning_rate": 4.926305446378439e-05, "loss": 0.0888, "step": 3150 }, { "epoch": 0.44, "learning_rate": 4.926258656185664e-05, "loss": 0.0867, "step": 3152 }, { "epoch": 0.44, "learning_rate": 4.926211865992888e-05, "loss": 0.084, "step": 3154 }, { "epoch": 0.44, "learning_rate": 4.9261650758001124e-05, "loss": 0.0833, "step": 3156 }, { "epoch": 0.44, "learning_rate": 4.926118285607337e-05, "loss": 0.0991, "step": 3158 }, { "epoch": 0.44, "learning_rate": 4.9260714954145616e-05, "loss": 0.0926, "step": 3160 }, { "epoch": 0.44, "learning_rate": 4.9260247052217855e-05, "loss": 0.0762, "step": 3162 }, { "epoch": 0.44, "learning_rate": 4.92597791502901e-05, "loss": 0.0903, "step": 3164 }, { "epoch": 0.44, "learning_rate": 4.925931124836235e-05, "loss": 0.0944, "step": 3166 }, { "epoch": 0.44, "learning_rate": 4.925884334643459e-05, "loss": 0.0783, "step": 3168 }, { "epoch": 0.44, "learning_rate": 4.925837544450683e-05, "loss": 0.0828, "step": 3170 }, { "epoch": 0.45, "learning_rate": 4.925790754257908e-05, "loss": 0.0821, "step": 3172 }, { "epoch": 0.45, "learning_rate": 4.9257439640651324e-05, "loss": 0.0774, "step": 3174 }, { "epoch": 0.45, "learning_rate": 4.925697173872357e-05, "loss": 0.0988, "step": 3176 }, { "epoch": 0.45, "learning_rate": 4.925650383679581e-05, "loss": 0.0883, "step": 3178 }, { "epoch": 0.45, "learning_rate": 4.9256035934868055e-05, "loss": 0.0986, "step": 3180 }, { "epoch": 0.45, "learning_rate": 4.9255568032940294e-05, "loss": 0.0735, "step": 3182 }, { "epoch": 0.45, "learning_rate": 4.9255100131012546e-05, "loss": 0.085, "step": 3184 }, { "epoch": 0.45, "learning_rate": 4.9254632229084786e-05, "loss": 0.0614, "step": 3186 }, { "epoch": 0.45, "learning_rate": 4.925416432715703e-05, "loss": 0.1033, "step": 3188 }, { "epoch": 0.45, "learning_rate": 4.925369642522927e-05, "loss": 0.0694, "step": 3190 }, { "epoch": 0.45, "learning_rate": 4.925322852330152e-05, "loss": 0.1439, "step": 3192 }, { "epoch": 0.45, "learning_rate": 4.925276062137376e-05, "loss": 0.0791, "step": 3194 }, { "epoch": 0.45, "learning_rate": 4.925229271944601e-05, "loss": 0.0969, "step": 3196 }, { "epoch": 0.45, "learning_rate": 4.925182481751825e-05, "loss": 0.1305, "step": 3198 }, { "epoch": 0.45, "learning_rate": 4.925135691559049e-05, "loss": 0.1025, "step": 3200 }, { "epoch": 0.45, "learning_rate": 4.925088901366274e-05, "loss": 0.0891, "step": 3202 }, { "epoch": 0.45, "learning_rate": 4.9250421111734985e-05, "loss": 0.0853, "step": 3204 }, { "epoch": 0.45, "learning_rate": 4.9249953209807224e-05, "loss": 0.0804, "step": 3206 }, { "epoch": 0.45, "learning_rate": 4.924948530787947e-05, "loss": 0.1006, "step": 3208 }, { "epoch": 0.45, "learning_rate": 4.9249017405951716e-05, "loss": 0.0594, "step": 3210 }, { "epoch": 0.45, "learning_rate": 4.924854950402396e-05, "loss": 0.0936, "step": 3212 }, { "epoch": 0.45, "learning_rate": 4.92480816020962e-05, "loss": 0.0892, "step": 3214 }, { "epoch": 0.45, "learning_rate": 4.924761370016845e-05, "loss": 0.0965, "step": 3216 }, { "epoch": 0.45, "learning_rate": 4.924714579824069e-05, "loss": 0.0896, "step": 3218 }, { "epoch": 0.45, "learning_rate": 4.924667789631294e-05, "loss": 0.0858, "step": 3220 }, { "epoch": 0.45, "learning_rate": 4.924620999438518e-05, "loss": 0.1017, "step": 3222 }, { "epoch": 0.45, "learning_rate": 4.9245742092457424e-05, "loss": 0.1014, "step": 3224 }, { "epoch": 0.45, "learning_rate": 4.924527419052967e-05, "loss": 0.1362, "step": 3226 }, { "epoch": 0.45, "learning_rate": 4.9244806288601916e-05, "loss": 0.0861, "step": 3228 }, { "epoch": 0.45, "learning_rate": 4.9244338386674155e-05, "loss": 0.0786, "step": 3230 }, { "epoch": 0.45, "learning_rate": 4.92438704847464e-05, "loss": 0.0862, "step": 3232 }, { "epoch": 0.45, "learning_rate": 4.924340258281864e-05, "loss": 0.0964, "step": 3234 }, { "epoch": 0.45, "learning_rate": 4.924293468089089e-05, "loss": 0.0864, "step": 3236 }, { "epoch": 0.45, "learning_rate": 4.924246677896313e-05, "loss": 0.0824, "step": 3238 }, { "epoch": 0.45, "learning_rate": 4.924199887703538e-05, "loss": 0.0787, "step": 3240 }, { "epoch": 0.46, "learning_rate": 4.924153097510762e-05, "loss": 0.0682, "step": 3242 }, { "epoch": 0.46, "learning_rate": 4.924106307317986e-05, "loss": 0.0717, "step": 3244 }, { "epoch": 0.46, "learning_rate": 4.924059517125211e-05, "loss": 0.0737, "step": 3246 }, { "epoch": 0.46, "learning_rate": 4.9240127269324355e-05, "loss": 0.0897, "step": 3248 }, { "epoch": 0.46, "learning_rate": 4.9239659367396594e-05, "loss": 0.0791, "step": 3250 }, { "epoch": 0.46, "learning_rate": 4.923919146546884e-05, "loss": 0.0948, "step": 3252 }, { "epoch": 0.46, "learning_rate": 4.9238723563541086e-05, "loss": 0.0983, "step": 3254 }, { "epoch": 0.46, "learning_rate": 4.923825566161333e-05, "loss": 0.09, "step": 3256 }, { "epoch": 0.46, "learning_rate": 4.923778775968557e-05, "loss": 0.0888, "step": 3258 }, { "epoch": 0.46, "learning_rate": 4.9237319857757817e-05, "loss": 0.0903, "step": 3260 }, { "epoch": 0.46, "learning_rate": 4.923685195583006e-05, "loss": 0.0757, "step": 3262 }, { "epoch": 0.46, "learning_rate": 4.923638405390231e-05, "loss": 0.1054, "step": 3264 }, { "epoch": 0.46, "learning_rate": 4.923591615197455e-05, "loss": 0.0773, "step": 3266 }, { "epoch": 0.46, "learning_rate": 4.9235448250046793e-05, "loss": 0.1131, "step": 3268 }, { "epoch": 0.46, "learning_rate": 4.923498034811904e-05, "loss": 0.0962, "step": 3270 }, { "epoch": 0.46, "learning_rate": 4.9234512446191285e-05, "loss": 0.1024, "step": 3272 }, { "epoch": 0.46, "learning_rate": 4.9234044544263524e-05, "loss": 0.0872, "step": 3274 }, { "epoch": 0.46, "learning_rate": 4.923357664233577e-05, "loss": 0.0985, "step": 3276 }, { "epoch": 0.46, "learning_rate": 4.923310874040801e-05, "loss": 0.0879, "step": 3278 }, { "epoch": 0.46, "learning_rate": 4.923264083848026e-05, "loss": 0.1048, "step": 3280 }, { "epoch": 0.46, "learning_rate": 4.92321729365525e-05, "loss": 0.0863, "step": 3282 }, { "epoch": 0.46, "learning_rate": 4.923170503462475e-05, "loss": 0.0783, "step": 3284 }, { "epoch": 0.46, "learning_rate": 4.9231237132696986e-05, "loss": 0.0929, "step": 3286 }, { "epoch": 0.46, "learning_rate": 4.923076923076924e-05, "loss": 0.0823, "step": 3288 }, { "epoch": 0.46, "learning_rate": 4.923030132884148e-05, "loss": 0.0793, "step": 3290 }, { "epoch": 0.46, "learning_rate": 4.922983342691372e-05, "loss": 0.1179, "step": 3292 }, { "epoch": 0.46, "learning_rate": 4.922936552498596e-05, "loss": 0.0903, "step": 3294 }, { "epoch": 0.46, "learning_rate": 4.922889762305821e-05, "loss": 0.0917, "step": 3296 }, { "epoch": 0.46, "learning_rate": 4.9228429721130455e-05, "loss": 0.0788, "step": 3298 }, { "epoch": 0.46, "learning_rate": 4.9227961819202694e-05, "loss": 0.076, "step": 3300 }, { "epoch": 0.46, "learning_rate": 4.922749391727494e-05, "loss": 0.0905, "step": 3302 }, { "epoch": 0.46, "learning_rate": 4.9227026015347186e-05, "loss": 0.0986, "step": 3304 }, { "epoch": 0.46, "learning_rate": 4.922655811341943e-05, "loss": 0.0629, "step": 3306 }, { "epoch": 0.46, "learning_rate": 4.922609021149167e-05, "loss": 0.0851, "step": 3308 }, { "epoch": 0.46, "learning_rate": 4.922562230956392e-05, "loss": 0.0812, "step": 3310 }, { "epoch": 0.46, "learning_rate": 4.9225154407636156e-05, "loss": 0.0563, "step": 3312 }, { "epoch": 0.47, "learning_rate": 4.922468650570841e-05, "loss": 0.1, "step": 3314 }, { "epoch": 0.47, "learning_rate": 4.922421860378065e-05, "loss": 0.0857, "step": 3316 }, { "epoch": 0.47, "learning_rate": 4.9223750701852894e-05, "loss": 0.079, "step": 3318 }, { "epoch": 0.47, "learning_rate": 4.922328279992513e-05, "loss": 0.0901, "step": 3320 }, { "epoch": 0.47, "learning_rate": 4.9222814897997386e-05, "loss": 0.082, "step": 3322 }, { "epoch": 0.47, "learning_rate": 4.9222346996069625e-05, "loss": 0.0875, "step": 3324 }, { "epoch": 0.47, "learning_rate": 4.922187909414187e-05, "loss": 0.0915, "step": 3326 }, { "epoch": 0.47, "learning_rate": 4.922141119221411e-05, "loss": 0.0988, "step": 3328 }, { "epoch": 0.47, "learning_rate": 4.9220943290286356e-05, "loss": 0.0775, "step": 3330 }, { "epoch": 0.47, "learning_rate": 4.92204753883586e-05, "loss": 0.077, "step": 3332 }, { "epoch": 0.47, "learning_rate": 4.922000748643085e-05, "loss": 0.0846, "step": 3334 }, { "epoch": 0.47, "learning_rate": 4.921953958450309e-05, "loss": 0.0951, "step": 3336 }, { "epoch": 0.47, "learning_rate": 4.921907168257533e-05, "loss": 0.0938, "step": 3338 }, { "epoch": 0.47, "learning_rate": 4.921860378064758e-05, "loss": 0.0741, "step": 3340 }, { "epoch": 0.47, "learning_rate": 4.9218135878719824e-05, "loss": 0.092, "step": 3342 }, { "epoch": 0.47, "learning_rate": 4.9217667976792064e-05, "loss": 0.0932, "step": 3344 }, { "epoch": 0.47, "learning_rate": 4.921720007486431e-05, "loss": 0.0955, "step": 3346 }, { "epoch": 0.47, "learning_rate": 4.9216732172936555e-05, "loss": 0.1082, "step": 3348 }, { "epoch": 0.47, "learning_rate": 4.92162642710088e-05, "loss": 0.0878, "step": 3350 }, { "epoch": 0.47, "learning_rate": 4.921579636908104e-05, "loss": 0.0945, "step": 3352 }, { "epoch": 0.47, "learning_rate": 4.9215328467153286e-05, "loss": 0.0896, "step": 3354 }, { "epoch": 0.47, "learning_rate": 4.921486056522553e-05, "loss": 0.0855, "step": 3356 }, { "epoch": 0.47, "learning_rate": 4.921439266329778e-05, "loss": 0.1167, "step": 3358 }, { "epoch": 0.47, "learning_rate": 4.921392476137002e-05, "loss": 0.104, "step": 3360 }, { "epoch": 0.47, "learning_rate": 4.921345685944226e-05, "loss": 0.0772, "step": 3362 }, { "epoch": 0.47, "learning_rate": 4.92129889575145e-05, "loss": 0.0921, "step": 3364 }, { "epoch": 0.47, "learning_rate": 4.9212521055586755e-05, "loss": 0.0733, "step": 3366 }, { "epoch": 0.47, "learning_rate": 4.9212053153658994e-05, "loss": 0.105, "step": 3368 }, { "epoch": 0.47, "learning_rate": 4.921158525173124e-05, "loss": 0.0738, "step": 3370 }, { "epoch": 0.47, "learning_rate": 4.921111734980348e-05, "loss": 0.1045, "step": 3372 }, { "epoch": 0.47, "learning_rate": 4.921064944787573e-05, "loss": 0.0756, "step": 3374 }, { "epoch": 0.47, "learning_rate": 4.921018154594797e-05, "loss": 0.0824, "step": 3376 }, { "epoch": 0.47, "learning_rate": 4.920971364402022e-05, "loss": 0.0694, "step": 3378 }, { "epoch": 0.47, "learning_rate": 4.9209245742092456e-05, "loss": 0.0884, "step": 3380 }, { "epoch": 0.47, "learning_rate": 4.92087778401647e-05, "loss": 0.0801, "step": 3382 }, { "epoch": 0.48, "learning_rate": 4.920830993823695e-05, "loss": 0.0818, "step": 3384 }, { "epoch": 0.48, "learning_rate": 4.9207842036309194e-05, "loss": 0.102, "step": 3386 }, { "epoch": 0.48, "learning_rate": 4.920737413438143e-05, "loss": 0.0931, "step": 3388 }, { "epoch": 0.48, "learning_rate": 4.920690623245368e-05, "loss": 0.0782, "step": 3390 }, { "epoch": 0.48, "learning_rate": 4.9206438330525925e-05, "loss": 0.0823, "step": 3392 }, { "epoch": 0.48, "learning_rate": 4.920597042859817e-05, "loss": 0.0899, "step": 3394 }, { "epoch": 0.48, "learning_rate": 4.920550252667041e-05, "loss": 0.0896, "step": 3396 }, { "epoch": 0.48, "learning_rate": 4.9205034624742656e-05, "loss": 0.0832, "step": 3398 }, { "epoch": 0.48, "learning_rate": 4.92045667228149e-05, "loss": 0.0772, "step": 3400 }, { "epoch": 0.48, "learning_rate": 4.920409882088715e-05, "loss": 0.0912, "step": 3402 }, { "epoch": 0.48, "learning_rate": 4.920363091895939e-05, "loss": 0.0745, "step": 3404 }, { "epoch": 0.48, "learning_rate": 4.920316301703163e-05, "loss": 0.0826, "step": 3406 }, { "epoch": 0.48, "learning_rate": 4.920269511510387e-05, "loss": 0.0785, "step": 3408 }, { "epoch": 0.48, "learning_rate": 4.9202227213176124e-05, "loss": 0.0764, "step": 3410 }, { "epoch": 0.48, "learning_rate": 4.9201759311248364e-05, "loss": 0.0993, "step": 3412 }, { "epoch": 0.48, "learning_rate": 4.920129140932061e-05, "loss": 0.0614, "step": 3414 }, { "epoch": 0.48, "learning_rate": 4.920082350739285e-05, "loss": 0.0791, "step": 3416 }, { "epoch": 0.48, "learning_rate": 4.92003556054651e-05, "loss": 0.0739, "step": 3418 }, { "epoch": 0.48, "learning_rate": 4.919988770353734e-05, "loss": 0.0815, "step": 3420 }, { "epoch": 0.48, "learning_rate": 4.9199419801609586e-05, "loss": 0.0853, "step": 3422 }, { "epoch": 0.48, "learning_rate": 4.9198951899681825e-05, "loss": 0.0914, "step": 3424 }, { "epoch": 0.48, "learning_rate": 4.919848399775407e-05, "loss": 0.099, "step": 3426 }, { "epoch": 0.48, "learning_rate": 4.919801609582632e-05, "loss": 0.0893, "step": 3428 }, { "epoch": 0.48, "learning_rate": 4.919754819389856e-05, "loss": 0.0865, "step": 3430 }, { "epoch": 0.48, "learning_rate": 4.91970802919708e-05, "loss": 0.0925, "step": 3432 }, { "epoch": 0.48, "learning_rate": 4.919661239004305e-05, "loss": 0.088, "step": 3434 }, { "epoch": 0.48, "learning_rate": 4.9196144488115294e-05, "loss": 0.1066, "step": 3436 }, { "epoch": 0.48, "learning_rate": 4.919567658618754e-05, "loss": 0.0967, "step": 3438 }, { "epoch": 0.48, "learning_rate": 4.919520868425978e-05, "loss": 0.1086, "step": 3440 }, { "epoch": 0.48, "learning_rate": 4.9194740782332025e-05, "loss": 0.078, "step": 3442 }, { "epoch": 0.48, "learning_rate": 4.919427288040427e-05, "loss": 0.0949, "step": 3444 }, { "epoch": 0.48, "learning_rate": 4.919380497847652e-05, "loss": 0.0922, "step": 3446 }, { "epoch": 0.48, "learning_rate": 4.9193337076548756e-05, "loss": 0.0815, "step": 3448 }, { "epoch": 0.48, "learning_rate": 4.9192869174621e-05, "loss": 0.0727, "step": 3450 }, { "epoch": 0.48, "learning_rate": 4.919240127269325e-05, "loss": 0.0812, "step": 3452 }, { "epoch": 0.48, "learning_rate": 4.9191933370765494e-05, "loss": 0.0907, "step": 3454 }, { "epoch": 0.49, "learning_rate": 4.919146546883773e-05, "loss": 0.0951, "step": 3456 }, { "epoch": 0.49, "learning_rate": 4.919099756690998e-05, "loss": 0.0956, "step": 3458 }, { "epoch": 0.49, "learning_rate": 4.919052966498222e-05, "loss": 0.0864, "step": 3460 }, { "epoch": 0.49, "learning_rate": 4.919006176305447e-05, "loss": 0.0829, "step": 3462 }, { "epoch": 0.49, "learning_rate": 4.918959386112671e-05, "loss": 0.0738, "step": 3464 }, { "epoch": 0.49, "learning_rate": 4.9189125959198956e-05, "loss": 0.0959, "step": 3466 }, { "epoch": 0.49, "learning_rate": 4.9188658057271195e-05, "loss": 0.0988, "step": 3468 }, { "epoch": 0.49, "learning_rate": 4.918819015534345e-05, "loss": 0.087, "step": 3470 }, { "epoch": 0.49, "learning_rate": 4.918772225341569e-05, "loss": 0.0897, "step": 3472 }, { "epoch": 0.49, "learning_rate": 4.918725435148793e-05, "loss": 0.1108, "step": 3474 }, { "epoch": 0.49, "learning_rate": 4.918678644956017e-05, "loss": 0.0892, "step": 3476 }, { "epoch": 0.49, "learning_rate": 4.918631854763242e-05, "loss": 0.0837, "step": 3478 }, { "epoch": 0.49, "learning_rate": 4.9185850645704664e-05, "loss": 0.0924, "step": 3480 }, { "epoch": 0.49, "learning_rate": 4.918538274377691e-05, "loss": 0.0895, "step": 3482 }, { "epoch": 0.49, "learning_rate": 4.918491484184915e-05, "loss": 0.0828, "step": 3484 }, { "epoch": 0.49, "learning_rate": 4.9184446939921395e-05, "loss": 0.1, "step": 3486 }, { "epoch": 0.49, "learning_rate": 4.918397903799364e-05, "loss": 0.0853, "step": 3488 }, { "epoch": 0.49, "learning_rate": 4.9183511136065886e-05, "loss": 0.1041, "step": 3490 }, { "epoch": 0.49, "learning_rate": 4.9183043234138125e-05, "loss": 0.0838, "step": 3492 }, { "epoch": 0.49, "learning_rate": 4.918257533221037e-05, "loss": 0.084, "step": 3494 }, { "epoch": 0.49, "learning_rate": 4.918210743028262e-05, "loss": 0.0974, "step": 3496 }, { "epoch": 0.49, "learning_rate": 4.918163952835486e-05, "loss": 0.0789, "step": 3498 }, { "epoch": 0.49, "learning_rate": 4.91811716264271e-05, "loss": 0.088, "step": 3500 }, { "epoch": 0.49, "learning_rate": 4.918070372449935e-05, "loss": 0.0601, "step": 3502 }, { "epoch": 0.49, "learning_rate": 4.9180235822571594e-05, "loss": 0.1086, "step": 3504 }, { "epoch": 0.49, "learning_rate": 4.917976792064384e-05, "loss": 0.0887, "step": 3506 }, { "epoch": 0.49, "learning_rate": 4.917930001871608e-05, "loss": 0.0916, "step": 3508 }, { "epoch": 0.49, "learning_rate": 4.9178832116788325e-05, "loss": 0.0867, "step": 3510 }, { "epoch": 0.49, "learning_rate": 4.9178364214860564e-05, "loss": 0.0938, "step": 3512 }, { "epoch": 0.49, "learning_rate": 4.917789631293282e-05, "loss": 0.0803, "step": 3514 }, { "epoch": 0.49, "learning_rate": 4.9177428411005056e-05, "loss": 0.0915, "step": 3516 }, { "epoch": 0.49, "learning_rate": 4.91769605090773e-05, "loss": 0.0574, "step": 3518 }, { "epoch": 0.49, "learning_rate": 4.917649260714954e-05, "loss": 0.0889, "step": 3520 }, { "epoch": 0.49, "learning_rate": 4.917602470522179e-05, "loss": 0.0987, "step": 3522 }, { "epoch": 0.49, "learning_rate": 4.917555680329403e-05, "loss": 0.0592, "step": 3524 }, { "epoch": 0.49, "learning_rate": 4.917508890136628e-05, "loss": 0.1026, "step": 3526 }, { "epoch": 0.5, "learning_rate": 4.917462099943852e-05, "loss": 0.1007, "step": 3528 }, { "epoch": 0.5, "learning_rate": 4.9174153097510764e-05, "loss": 0.0932, "step": 3530 }, { "epoch": 0.5, "learning_rate": 4.917368519558301e-05, "loss": 0.1059, "step": 3532 }, { "epoch": 0.5, "learning_rate": 4.9173217293655256e-05, "loss": 0.0822, "step": 3534 }, { "epoch": 0.5, "learning_rate": 4.9172749391727495e-05, "loss": 0.0979, "step": 3536 }, { "epoch": 0.5, "learning_rate": 4.917228148979974e-05, "loss": 0.1007, "step": 3538 }, { "epoch": 0.5, "learning_rate": 4.917181358787199e-05, "loss": 0.0944, "step": 3540 }, { "epoch": 0.5, "learning_rate": 4.917134568594423e-05, "loss": 0.102, "step": 3542 }, { "epoch": 0.5, "learning_rate": 4.917087778401647e-05, "loss": 0.0743, "step": 3544 }, { "epoch": 0.5, "learning_rate": 4.917040988208871e-05, "loss": 0.0897, "step": 3546 }, { "epoch": 0.5, "learning_rate": 4.9169941980160964e-05, "loss": 0.1136, "step": 3548 }, { "epoch": 0.5, "learning_rate": 4.91694740782332e-05, "loss": 0.0874, "step": 3550 }, { "epoch": 0.5, "learning_rate": 4.916900617630545e-05, "loss": 0.079, "step": 3552 }, { "epoch": 0.5, "learning_rate": 4.916853827437769e-05, "loss": 0.0844, "step": 3554 }, { "epoch": 0.5, "learning_rate": 4.9168070372449934e-05, "loss": 0.1033, "step": 3556 }, { "epoch": 0.5, "learning_rate": 4.916760247052218e-05, "loss": 0.1093, "step": 3558 }, { "epoch": 0.5, "learning_rate": 4.9167134568594425e-05, "loss": 0.0963, "step": 3560 }, { "epoch": 0.5, "learning_rate": 4.9166666666666665e-05, "loss": 0.101, "step": 3562 }, { "epoch": 0.5, "learning_rate": 4.916619876473891e-05, "loss": 0.0802, "step": 3564 }, { "epoch": 0.5, "learning_rate": 4.9165730862811156e-05, "loss": 0.0968, "step": 3566 }, { "epoch": 0.5, "learning_rate": 4.91652629608834e-05, "loss": 0.0982, "step": 3568 }, { "epoch": 0.5, "learning_rate": 4.916479505895564e-05, "loss": 0.0885, "step": 3570 }, { "epoch": 0.5, "learning_rate": 4.916432715702789e-05, "loss": 0.0921, "step": 3572 }, { "epoch": 0.5, "learning_rate": 4.916385925510013e-05, "loss": 0.0744, "step": 3574 }, { "epoch": 0.5, "learning_rate": 4.916339135317238e-05, "loss": 0.0824, "step": 3576 }, { "epoch": 0.5, "learning_rate": 4.916292345124462e-05, "loss": 0.1129, "step": 3578 }, { "epoch": 0.5, "learning_rate": 4.9162455549316864e-05, "loss": 0.0937, "step": 3580 }, { "epoch": 0.5, "learning_rate": 4.916198764738911e-05, "loss": 0.0924, "step": 3582 }, { "epoch": 0.5, "learning_rate": 4.9161519745461356e-05, "loss": 0.0828, "step": 3584 }, { "epoch": 0.5, "learning_rate": 4.9161051843533595e-05, "loss": 0.0917, "step": 3586 }, { "epoch": 0.5, "learning_rate": 4.916058394160584e-05, "loss": 0.0766, "step": 3588 }, { "epoch": 0.5, "learning_rate": 4.916011603967808e-05, "loss": 0.0963, "step": 3590 }, { "epoch": 0.5, "learning_rate": 4.915964813775033e-05, "loss": 0.0912, "step": 3592 }, { "epoch": 0.5, "learning_rate": 4.915918023582257e-05, "loss": 0.0903, "step": 3594 }, { "epoch": 0.5, "learning_rate": 4.915871233389482e-05, "loss": 0.0843, "step": 3596 }, { "epoch": 0.51, "learning_rate": 4.915824443196706e-05, "loss": 0.0897, "step": 3598 }, { "epoch": 0.51, "learning_rate": 4.915777653003931e-05, "loss": 0.0859, "step": 3600 }, { "epoch": 0.51, "learning_rate": 4.915730862811155e-05, "loss": 0.0991, "step": 3602 }, { "epoch": 0.51, "learning_rate": 4.9156840726183795e-05, "loss": 0.1032, "step": 3604 }, { "epoch": 0.51, "learning_rate": 4.9156372824256034e-05, "loss": 0.0866, "step": 3606 }, { "epoch": 0.51, "learning_rate": 4.915590492232828e-05, "loss": 0.0994, "step": 3608 }, { "epoch": 0.51, "learning_rate": 4.9155437020400526e-05, "loss": 0.0831, "step": 3610 }, { "epoch": 0.51, "learning_rate": 4.915496911847277e-05, "loss": 0.1023, "step": 3612 }, { "epoch": 0.51, "learning_rate": 4.915450121654501e-05, "loss": 0.1046, "step": 3614 }, { "epoch": 0.51, "learning_rate": 4.915403331461726e-05, "loss": 0.0863, "step": 3616 }, { "epoch": 0.51, "learning_rate": 4.91535654126895e-05, "loss": 0.0761, "step": 3618 }, { "epoch": 0.51, "learning_rate": 4.915309751076175e-05, "loss": 0.1039, "step": 3620 }, { "epoch": 0.51, "learning_rate": 4.915262960883399e-05, "loss": 0.0741, "step": 3622 }, { "epoch": 0.51, "learning_rate": 4.9152161706906234e-05, "loss": 0.0682, "step": 3624 }, { "epoch": 0.51, "learning_rate": 4.915169380497848e-05, "loss": 0.0898, "step": 3626 }, { "epoch": 0.51, "learning_rate": 4.9151225903050725e-05, "loss": 0.1086, "step": 3628 }, { "epoch": 0.51, "learning_rate": 4.9150758001122965e-05, "loss": 0.1067, "step": 3630 }, { "epoch": 0.51, "learning_rate": 4.915029009919521e-05, "loss": 0.0946, "step": 3632 }, { "epoch": 0.51, "learning_rate": 4.9149822197267456e-05, "loss": 0.0644, "step": 3634 }, { "epoch": 0.51, "learning_rate": 4.91493542953397e-05, "loss": 0.0866, "step": 3636 }, { "epoch": 0.51, "learning_rate": 4.914888639341194e-05, "loss": 0.1014, "step": 3638 }, { "epoch": 0.51, "learning_rate": 4.914841849148419e-05, "loss": 0.0865, "step": 3640 }, { "epoch": 0.51, "learning_rate": 4.9147950589556427e-05, "loss": 0.0764, "step": 3642 }, { "epoch": 0.51, "learning_rate": 4.914748268762868e-05, "loss": 0.0872, "step": 3644 }, { "epoch": 0.51, "learning_rate": 4.914701478570092e-05, "loss": 0.1115, "step": 3646 }, { "epoch": 0.51, "learning_rate": 4.9146546883773164e-05, "loss": 0.0901, "step": 3648 }, { "epoch": 0.51, "learning_rate": 4.9146078981845403e-05, "loss": 0.0947, "step": 3650 }, { "epoch": 0.51, "learning_rate": 4.9145611079917656e-05, "loss": 0.0965, "step": 3652 }, { "epoch": 0.51, "learning_rate": 4.9145143177989895e-05, "loss": 0.0938, "step": 3654 }, { "epoch": 0.51, "learning_rate": 4.914467527606214e-05, "loss": 0.1238, "step": 3656 }, { "epoch": 0.51, "learning_rate": 4.914420737413438e-05, "loss": 0.097, "step": 3658 }, { "epoch": 0.51, "learning_rate": 4.9143739472206626e-05, "loss": 0.098, "step": 3660 }, { "epoch": 0.51, "learning_rate": 4.914327157027887e-05, "loss": 0.0833, "step": 3662 }, { "epoch": 0.51, "learning_rate": 4.914280366835112e-05, "loss": 0.0798, "step": 3664 }, { "epoch": 0.51, "learning_rate": 4.914233576642336e-05, "loss": 0.0802, "step": 3666 }, { "epoch": 0.51, "learning_rate": 4.91418678644956e-05, "loss": 0.0726, "step": 3668 }, { "epoch": 0.52, "learning_rate": 4.914139996256785e-05, "loss": 0.0939, "step": 3670 }, { "epoch": 0.52, "learning_rate": 4.9140932060640095e-05, "loss": 0.0983, "step": 3672 }, { "epoch": 0.52, "learning_rate": 4.9140464158712334e-05, "loss": 0.0997, "step": 3674 }, { "epoch": 0.52, "learning_rate": 4.913999625678458e-05, "loss": 0.0726, "step": 3676 }, { "epoch": 0.52, "learning_rate": 4.9139528354856826e-05, "loss": 0.0967, "step": 3678 }, { "epoch": 0.52, "learning_rate": 4.913906045292907e-05, "loss": 0.0933, "step": 3680 }, { "epoch": 0.52, "learning_rate": 4.913859255100131e-05, "loss": 0.0727, "step": 3682 }, { "epoch": 0.52, "learning_rate": 4.913812464907356e-05, "loss": 0.1104, "step": 3684 }, { "epoch": 0.52, "learning_rate": 4.91376567471458e-05, "loss": 0.0989, "step": 3686 }, { "epoch": 0.52, "learning_rate": 4.913718884521805e-05, "loss": 0.0867, "step": 3688 }, { "epoch": 0.52, "learning_rate": 4.913672094329029e-05, "loss": 0.0684, "step": 3690 }, { "epoch": 0.52, "learning_rate": 4.9136253041362534e-05, "loss": 0.0805, "step": 3692 }, { "epoch": 0.52, "learning_rate": 4.913578513943477e-05, "loss": 0.1268, "step": 3694 }, { "epoch": 0.52, "learning_rate": 4.9135317237507026e-05, "loss": 0.1143, "step": 3696 }, { "epoch": 0.52, "learning_rate": 4.9134849335579265e-05, "loss": 0.119, "step": 3698 }, { "epoch": 0.52, "learning_rate": 4.913438143365151e-05, "loss": 0.0887, "step": 3700 }, { "epoch": 0.52, "learning_rate": 4.913391353172375e-05, "loss": 0.0864, "step": 3702 }, { "epoch": 0.52, "learning_rate": 4.9133445629795996e-05, "loss": 0.088, "step": 3704 }, { "epoch": 0.52, "learning_rate": 4.913297772786824e-05, "loss": 0.1035, "step": 3706 }, { "epoch": 0.52, "learning_rate": 4.913250982594049e-05, "loss": 0.1182, "step": 3708 }, { "epoch": 0.52, "learning_rate": 4.9132041924012727e-05, "loss": 0.0731, "step": 3710 }, { "epoch": 0.52, "learning_rate": 4.913157402208497e-05, "loss": 0.0962, "step": 3712 }, { "epoch": 0.52, "learning_rate": 4.913110612015722e-05, "loss": 0.1324, "step": 3714 }, { "epoch": 0.52, "learning_rate": 4.9130638218229464e-05, "loss": 0.0934, "step": 3716 }, { "epoch": 0.52, "learning_rate": 4.9130170316301703e-05, "loss": 0.0941, "step": 3718 }, { "epoch": 0.52, "learning_rate": 4.912970241437395e-05, "loss": 0.0777, "step": 3720 }, { "epoch": 0.52, "learning_rate": 4.9129234512446195e-05, "loss": 0.0773, "step": 3722 }, { "epoch": 0.52, "learning_rate": 4.912876661051844e-05, "loss": 0.0969, "step": 3724 }, { "epoch": 0.52, "learning_rate": 4.912829870859068e-05, "loss": 0.0824, "step": 3726 }, { "epoch": 0.52, "learning_rate": 4.9127830806662926e-05, "loss": 0.0849, "step": 3728 }, { "epoch": 0.52, "learning_rate": 4.912736290473517e-05, "loss": 0.0925, "step": 3730 }, { "epoch": 0.52, "learning_rate": 4.912689500280742e-05, "loss": 0.1111, "step": 3732 }, { "epoch": 0.52, "learning_rate": 4.912642710087966e-05, "loss": 0.0961, "step": 3734 }, { "epoch": 0.52, "learning_rate": 4.91259591989519e-05, "loss": 0.0834, "step": 3736 }, { "epoch": 0.52, "learning_rate": 4.912549129702414e-05, "loss": 0.0897, "step": 3738 }, { "epoch": 0.52, "learning_rate": 4.9125023395096395e-05, "loss": 0.0979, "step": 3740 }, { "epoch": 0.53, "learning_rate": 4.9124555493168634e-05, "loss": 0.1118, "step": 3742 }, { "epoch": 0.53, "learning_rate": 4.912408759124088e-05, "loss": 0.0778, "step": 3744 }, { "epoch": 0.53, "learning_rate": 4.912361968931312e-05, "loss": 0.074, "step": 3746 }, { "epoch": 0.53, "learning_rate": 4.912315178738537e-05, "loss": 0.0897, "step": 3748 }, { "epoch": 0.53, "learning_rate": 4.912268388545761e-05, "loss": 0.0941, "step": 3750 }, { "epoch": 0.53, "learning_rate": 4.912221598352986e-05, "loss": 0.0834, "step": 3752 }, { "epoch": 0.53, "learning_rate": 4.9121748081602096e-05, "loss": 0.1081, "step": 3754 }, { "epoch": 0.53, "learning_rate": 4.912128017967434e-05, "loss": 0.0938, "step": 3756 }, { "epoch": 0.53, "learning_rate": 4.912081227774659e-05, "loss": 0.0984, "step": 3758 }, { "epoch": 0.53, "learning_rate": 4.9120344375818834e-05, "loss": 0.099, "step": 3760 }, { "epoch": 0.53, "learning_rate": 4.911987647389107e-05, "loss": 0.0916, "step": 3762 }, { "epoch": 0.53, "learning_rate": 4.911940857196332e-05, "loss": 0.0924, "step": 3764 }, { "epoch": 0.53, "learning_rate": 4.9118940670035565e-05, "loss": 0.0822, "step": 3766 }, { "epoch": 0.53, "learning_rate": 4.911847276810781e-05, "loss": 0.0792, "step": 3768 }, { "epoch": 0.53, "learning_rate": 4.911800486618005e-05, "loss": 0.1024, "step": 3770 }, { "epoch": 0.53, "learning_rate": 4.9117536964252296e-05, "loss": 0.1246, "step": 3772 }, { "epoch": 0.53, "learning_rate": 4.911706906232454e-05, "loss": 0.0895, "step": 3774 }, { "epoch": 0.53, "learning_rate": 4.911660116039679e-05, "loss": 0.0821, "step": 3776 }, { "epoch": 0.53, "learning_rate": 4.9116133258469027e-05, "loss": 0.0959, "step": 3778 }, { "epoch": 0.53, "learning_rate": 4.911566535654127e-05, "loss": 0.0906, "step": 3780 }, { "epoch": 0.53, "learning_rate": 4.911519745461352e-05, "loss": 0.0815, "step": 3782 }, { "epoch": 0.53, "learning_rate": 4.9114729552685764e-05, "loss": 0.1142, "step": 3784 }, { "epoch": 0.53, "learning_rate": 4.9114261650758003e-05, "loss": 0.0801, "step": 3786 }, { "epoch": 0.53, "learning_rate": 4.911379374883025e-05, "loss": 0.07, "step": 3788 }, { "epoch": 0.53, "learning_rate": 4.911332584690249e-05, "loss": 0.0859, "step": 3790 }, { "epoch": 0.53, "learning_rate": 4.911285794497474e-05, "loss": 0.084, "step": 3792 }, { "epoch": 0.53, "learning_rate": 4.911239004304698e-05, "loss": 0.1333, "step": 3794 }, { "epoch": 0.53, "learning_rate": 4.9111922141119226e-05, "loss": 0.0816, "step": 3796 }, { "epoch": 0.53, "learning_rate": 4.9111454239191465e-05, "loss": 0.0963, "step": 3798 }, { "epoch": 0.53, "learning_rate": 4.911098633726371e-05, "loss": 0.1001, "step": 3800 }, { "epoch": 0.53, "learning_rate": 4.911051843533596e-05, "loss": 0.0969, "step": 3802 }, { "epoch": 0.53, "learning_rate": 4.9110050533408196e-05, "loss": 0.0817, "step": 3804 }, { "epoch": 0.53, "learning_rate": 4.910958263148044e-05, "loss": 0.1269, "step": 3806 }, { "epoch": 0.53, "learning_rate": 4.910911472955269e-05, "loss": 0.1191, "step": 3808 }, { "epoch": 0.53, "learning_rate": 4.9108646827624934e-05, "loss": 0.1094, "step": 3810 }, { "epoch": 0.54, "learning_rate": 4.910817892569717e-05, "loss": 0.0941, "step": 3812 }, { "epoch": 0.54, "learning_rate": 4.910771102376942e-05, "loss": 0.1328, "step": 3814 }, { "epoch": 0.54, "learning_rate": 4.9107243121841665e-05, "loss": 0.0866, "step": 3816 }, { "epoch": 0.54, "learning_rate": 4.910677521991391e-05, "loss": 0.1208, "step": 3818 }, { "epoch": 0.54, "learning_rate": 4.910630731798615e-05, "loss": 0.0855, "step": 3820 }, { "epoch": 0.54, "learning_rate": 4.9105839416058396e-05, "loss": 0.0963, "step": 3822 }, { "epoch": 0.54, "learning_rate": 4.9105371514130635e-05, "loss": 0.0908, "step": 3824 }, { "epoch": 0.54, "learning_rate": 4.910490361220289e-05, "loss": 0.0924, "step": 3826 }, { "epoch": 0.54, "learning_rate": 4.910443571027513e-05, "loss": 0.0927, "step": 3828 }, { "epoch": 0.54, "learning_rate": 4.910396780834737e-05, "loss": 0.1203, "step": 3830 }, { "epoch": 0.54, "learning_rate": 4.910349990641961e-05, "loss": 0.0944, "step": 3832 }, { "epoch": 0.54, "learning_rate": 4.910303200449186e-05, "loss": 0.0923, "step": 3834 }, { "epoch": 0.54, "learning_rate": 4.9102564102564104e-05, "loss": 0.09, "step": 3836 }, { "epoch": 0.54, "learning_rate": 4.910209620063635e-05, "loss": 0.0838, "step": 3838 }, { "epoch": 0.54, "learning_rate": 4.910162829870859e-05, "loss": 0.0979, "step": 3840 }, { "epoch": 0.54, "learning_rate": 4.9101160396780835e-05, "loss": 0.0925, "step": 3842 }, { "epoch": 0.54, "learning_rate": 4.910069249485308e-05, "loss": 0.0911, "step": 3844 }, { "epoch": 0.54, "learning_rate": 4.9100224592925327e-05, "loss": 0.0723, "step": 3846 }, { "epoch": 0.54, "learning_rate": 4.9099756690997566e-05, "loss": 0.0795, "step": 3848 }, { "epoch": 0.54, "learning_rate": 4.909928878906981e-05, "loss": 0.0956, "step": 3850 }, { "epoch": 0.54, "learning_rate": 4.909882088714206e-05, "loss": 0.0885, "step": 3852 }, { "epoch": 0.54, "learning_rate": 4.9098352985214303e-05, "loss": 0.1086, "step": 3854 }, { "epoch": 0.54, "learning_rate": 4.909788508328654e-05, "loss": 0.0614, "step": 3856 }, { "epoch": 0.54, "learning_rate": 4.909741718135879e-05, "loss": 0.1023, "step": 3858 }, { "epoch": 0.54, "learning_rate": 4.9096949279431034e-05, "loss": 0.1041, "step": 3860 }, { "epoch": 0.54, "learning_rate": 4.909648137750328e-05, "loss": 0.0934, "step": 3862 }, { "epoch": 0.54, "learning_rate": 4.909601347557552e-05, "loss": 0.0782, "step": 3864 }, { "epoch": 0.54, "learning_rate": 4.9095545573647765e-05, "loss": 0.1001, "step": 3866 }, { "epoch": 0.54, "learning_rate": 4.9095077671720005e-05, "loss": 0.082, "step": 3868 }, { "epoch": 0.54, "learning_rate": 4.909460976979226e-05, "loss": 0.0836, "step": 3870 }, { "epoch": 0.54, "learning_rate": 4.9094141867864496e-05, "loss": 0.0753, "step": 3872 }, { "epoch": 0.54, "learning_rate": 4.909367396593674e-05, "loss": 0.0935, "step": 3874 }, { "epoch": 0.54, "learning_rate": 4.909320606400898e-05, "loss": 0.093, "step": 3876 }, { "epoch": 0.54, "learning_rate": 4.9092738162081234e-05, "loss": 0.0815, "step": 3878 }, { "epoch": 0.54, "learning_rate": 4.909227026015347e-05, "loss": 0.0994, "step": 3880 }, { "epoch": 0.54, "learning_rate": 4.909180235822572e-05, "loss": 0.0939, "step": 3882 }, { "epoch": 0.55, "learning_rate": 4.909133445629796e-05, "loss": 0.078, "step": 3884 }, { "epoch": 0.55, "learning_rate": 4.9090866554370204e-05, "loss": 0.0785, "step": 3886 }, { "epoch": 0.55, "learning_rate": 4.909039865244245e-05, "loss": 0.1125, "step": 3888 }, { "epoch": 0.55, "learning_rate": 4.9089930750514696e-05, "loss": 0.0668, "step": 3890 }, { "epoch": 0.55, "learning_rate": 4.9089462848586935e-05, "loss": 0.1003, "step": 3892 }, { "epoch": 0.55, "learning_rate": 4.908899494665918e-05, "loss": 0.0932, "step": 3894 }, { "epoch": 0.55, "learning_rate": 4.908852704473143e-05, "loss": 0.095, "step": 3896 }, { "epoch": 0.55, "learning_rate": 4.908805914280367e-05, "loss": 0.1071, "step": 3898 }, { "epoch": 0.55, "learning_rate": 4.908759124087591e-05, "loss": 0.0721, "step": 3900 }, { "epoch": 0.55, "learning_rate": 4.908712333894816e-05, "loss": 0.0861, "step": 3902 }, { "epoch": 0.55, "learning_rate": 4.9086655437020404e-05, "loss": 0.0828, "step": 3904 }, { "epoch": 0.55, "learning_rate": 4.908618753509265e-05, "loss": 0.0956, "step": 3906 }, { "epoch": 0.55, "learning_rate": 4.908571963316489e-05, "loss": 0.0569, "step": 3908 }, { "epoch": 0.55, "learning_rate": 4.9085251731237135e-05, "loss": 0.1102, "step": 3910 }, { "epoch": 0.55, "learning_rate": 4.908478382930938e-05, "loss": 0.0839, "step": 3912 }, { "epoch": 0.55, "learning_rate": 4.908431592738163e-05, "loss": 0.093, "step": 3914 }, { "epoch": 0.55, "learning_rate": 4.9083848025453866e-05, "loss": 0.0756, "step": 3916 }, { "epoch": 0.55, "learning_rate": 4.908338012352611e-05, "loss": 0.0863, "step": 3918 }, { "epoch": 0.55, "learning_rate": 4.908291222159835e-05, "loss": 0.0755, "step": 3920 }, { "epoch": 0.55, "learning_rate": 4.9082444319670603e-05, "loss": 0.0991, "step": 3922 }, { "epoch": 0.55, "learning_rate": 4.908197641774284e-05, "loss": 0.0932, "step": 3924 }, { "epoch": 0.55, "learning_rate": 4.908150851581509e-05, "loss": 0.1011, "step": 3926 }, { "epoch": 0.55, "learning_rate": 4.908104061388733e-05, "loss": 0.1346, "step": 3928 }, { "epoch": 0.55, "learning_rate": 4.908057271195958e-05, "loss": 0.1027, "step": 3930 }, { "epoch": 0.55, "learning_rate": 4.908010481003182e-05, "loss": 0.062, "step": 3932 }, { "epoch": 0.55, "learning_rate": 4.9079636908104065e-05, "loss": 0.11, "step": 3934 }, { "epoch": 0.55, "learning_rate": 4.9079169006176305e-05, "loss": 0.0779, "step": 3936 }, { "epoch": 0.55, "learning_rate": 4.907870110424855e-05, "loss": 0.0715, "step": 3938 }, { "epoch": 0.55, "learning_rate": 4.9078233202320796e-05, "loss": 0.0872, "step": 3940 }, { "epoch": 0.55, "learning_rate": 4.907776530039304e-05, "loss": 0.0855, "step": 3942 }, { "epoch": 0.55, "learning_rate": 4.907729739846528e-05, "loss": 0.0987, "step": 3944 }, { "epoch": 0.55, "learning_rate": 4.907682949653753e-05, "loss": 0.0932, "step": 3946 }, { "epoch": 0.55, "learning_rate": 4.907636159460977e-05, "loss": 0.0953, "step": 3948 }, { "epoch": 0.55, "learning_rate": 4.907589369268202e-05, "loss": 0.0986, "step": 3950 }, { "epoch": 0.55, "learning_rate": 4.907542579075426e-05, "loss": 0.0859, "step": 3952 }, { "epoch": 0.56, "learning_rate": 4.9074957888826504e-05, "loss": 0.0915, "step": 3954 }, { "epoch": 0.56, "learning_rate": 4.907448998689875e-05, "loss": 0.0753, "step": 3956 }, { "epoch": 0.56, "learning_rate": 4.9074022084970996e-05, "loss": 0.0887, "step": 3958 }, { "epoch": 0.56, "learning_rate": 4.9073554183043235e-05, "loss": 0.1035, "step": 3960 }, { "epoch": 0.56, "learning_rate": 4.907308628111548e-05, "loss": 0.1388, "step": 3962 }, { "epoch": 0.56, "learning_rate": 4.907261837918773e-05, "loss": 0.0965, "step": 3964 }, { "epoch": 0.56, "learning_rate": 4.907215047725997e-05, "loss": 0.1309, "step": 3966 }, { "epoch": 0.56, "learning_rate": 4.907168257533221e-05, "loss": 0.1106, "step": 3968 }, { "epoch": 0.56, "learning_rate": 4.907121467340446e-05, "loss": 0.0858, "step": 3970 }, { "epoch": 0.56, "learning_rate": 4.90707467714767e-05, "loss": 0.0735, "step": 3972 }, { "epoch": 0.56, "learning_rate": 4.907027886954895e-05, "loss": 0.093, "step": 3974 }, { "epoch": 0.56, "learning_rate": 4.906981096762119e-05, "loss": 0.1556, "step": 3976 }, { "epoch": 0.56, "learning_rate": 4.9069343065693435e-05, "loss": 0.0656, "step": 3978 }, { "epoch": 0.56, "learning_rate": 4.9068875163765674e-05, "loss": 0.0825, "step": 3980 }, { "epoch": 0.56, "learning_rate": 4.906840726183792e-05, "loss": 0.0945, "step": 3982 }, { "epoch": 0.56, "learning_rate": 4.9067939359910166e-05, "loss": 0.0892, "step": 3984 }, { "epoch": 0.56, "learning_rate": 4.906747145798241e-05, "loss": 0.089, "step": 3986 }, { "epoch": 0.56, "learning_rate": 4.906700355605465e-05, "loss": 0.1058, "step": 3988 }, { "epoch": 0.56, "learning_rate": 4.90665356541269e-05, "loss": 0.0915, "step": 3990 }, { "epoch": 0.56, "learning_rate": 4.906606775219914e-05, "loss": 0.0894, "step": 3992 }, { "epoch": 0.56, "learning_rate": 4.906559985027139e-05, "loss": 0.0821, "step": 3994 }, { "epoch": 0.56, "learning_rate": 4.906513194834363e-05, "loss": 0.081, "step": 3996 }, { "epoch": 0.56, "learning_rate": 4.9064664046415874e-05, "loss": 0.096, "step": 3998 }, { "epoch": 0.56, "learning_rate": 4.906419614448812e-05, "loss": 0.0845, "step": 4000 }, { "epoch": 0.56, "eval_gen_len": 33.3103, "eval_loss": 1.031044363975525, "eval_meteor": 0.0499, "eval_runtime": 17.245, "eval_samples_per_second": 3.363, "eval_steps_per_second": 0.464, "step": 4000 }, { "epoch": 0.56, "learning_rate": 4.9063728242560365e-05, "loss": 0.0821, "step": 4002 }, { "epoch": 0.56, "learning_rate": 4.9063260340632605e-05, "loss": 0.0703, "step": 4004 }, { "epoch": 0.56, "learning_rate": 4.906279243870485e-05, "loss": 0.0763, "step": 4006 }, { "epoch": 0.56, "learning_rate": 4.9062324536777096e-05, "loss": 0.083, "step": 4008 }, { "epoch": 0.56, "learning_rate": 4.906185663484934e-05, "loss": 0.0703, "step": 4010 }, { "epoch": 0.56, "learning_rate": 4.906138873292158e-05, "loss": 0.104, "step": 4012 }, { "epoch": 0.56, "learning_rate": 4.906092083099383e-05, "loss": 0.0943, "step": 4014 }, { "epoch": 0.56, "learning_rate": 4.9060452929066066e-05, "loss": 0.086, "step": 4016 }, { "epoch": 0.56, "learning_rate": 4.905998502713832e-05, "loss": 0.0977, "step": 4018 }, { "epoch": 0.56, "learning_rate": 4.905951712521056e-05, "loss": 0.1004, "step": 4020 }, { "epoch": 0.56, "learning_rate": 4.9059049223282804e-05, "loss": 0.1224, "step": 4022 }, { "epoch": 0.56, "learning_rate": 4.905858132135504e-05, "loss": 0.094, "step": 4024 }, { "epoch": 0.57, "learning_rate": 4.9058113419427296e-05, "loss": 0.0824, "step": 4026 }, { "epoch": 0.57, "learning_rate": 4.9057645517499535e-05, "loss": 0.1008, "step": 4028 }, { "epoch": 0.57, "learning_rate": 4.905717761557178e-05, "loss": 0.1034, "step": 4030 }, { "epoch": 0.57, "learning_rate": 4.905670971364402e-05, "loss": 0.1208, "step": 4032 }, { "epoch": 0.57, "learning_rate": 4.9056241811716266e-05, "loss": 0.0767, "step": 4034 }, { "epoch": 0.57, "learning_rate": 4.905577390978851e-05, "loss": 0.1051, "step": 4036 }, { "epoch": 0.57, "learning_rate": 4.905530600786076e-05, "loss": 0.1172, "step": 4038 }, { "epoch": 0.57, "learning_rate": 4.9054838105933e-05, "loss": 0.1016, "step": 4040 }, { "epoch": 0.57, "learning_rate": 4.905437020400524e-05, "loss": 0.09, "step": 4042 }, { "epoch": 0.57, "learning_rate": 4.905390230207749e-05, "loss": 0.0693, "step": 4044 }, { "epoch": 0.57, "learning_rate": 4.9053434400149735e-05, "loss": 0.0866, "step": 4046 }, { "epoch": 0.57, "learning_rate": 4.9052966498221974e-05, "loss": 0.1154, "step": 4048 }, { "epoch": 0.57, "learning_rate": 4.905249859629422e-05, "loss": 0.1125, "step": 4050 }, { "epoch": 0.57, "learning_rate": 4.9052030694366466e-05, "loss": 0.0926, "step": 4052 }, { "epoch": 0.57, "learning_rate": 4.9051562792438705e-05, "loss": 0.0813, "step": 4054 }, { "epoch": 0.57, "learning_rate": 4.905109489051095e-05, "loss": 0.104, "step": 4056 }, { "epoch": 0.57, "learning_rate": 4.905062698858319e-05, "loss": 0.0821, "step": 4058 }, { "epoch": 0.57, "learning_rate": 4.905015908665544e-05, "loss": 0.0744, "step": 4060 }, { "epoch": 0.57, "learning_rate": 4.904969118472768e-05, "loss": 0.1002, "step": 4062 }, { "epoch": 0.57, "learning_rate": 4.904922328279993e-05, "loss": 0.0927, "step": 4064 }, { "epoch": 0.57, "learning_rate": 4.904875538087217e-05, "loss": 0.0597, "step": 4066 }, { "epoch": 0.57, "learning_rate": 4.904828747894441e-05, "loss": 0.0818, "step": 4068 }, { "epoch": 0.57, "learning_rate": 4.904781957701666e-05, "loss": 0.0856, "step": 4070 }, { "epoch": 0.57, "learning_rate": 4.9047351675088905e-05, "loss": 0.0839, "step": 4072 }, { "epoch": 0.57, "learning_rate": 4.9046883773161144e-05, "loss": 0.0904, "step": 4074 }, { "epoch": 0.57, "learning_rate": 4.904641587123339e-05, "loss": 0.1198, "step": 4076 }, { "epoch": 0.57, "learning_rate": 4.9045947969305636e-05, "loss": 0.0997, "step": 4078 }, { "epoch": 0.57, "learning_rate": 4.904548006737788e-05, "loss": 0.091, "step": 4080 }, { "epoch": 0.57, "learning_rate": 4.904501216545012e-05, "loss": 0.0827, "step": 4082 }, { "epoch": 0.57, "learning_rate": 4.9044544263522367e-05, "loss": 0.0801, "step": 4084 }, { "epoch": 0.57, "learning_rate": 4.904407636159461e-05, "loss": 0.099, "step": 4086 }, { "epoch": 0.57, "learning_rate": 4.904360845966686e-05, "loss": 0.1048, "step": 4088 }, { "epoch": 0.57, "learning_rate": 4.90431405577391e-05, "loss": 0.089, "step": 4090 }, { "epoch": 0.57, "learning_rate": 4.904267265581134e-05, "loss": 0.0893, "step": 4092 }, { "epoch": 0.57, "learning_rate": 4.904220475388359e-05, "loss": 0.0834, "step": 4094 }, { "epoch": 0.57, "learning_rate": 4.9041736851955835e-05, "loss": 0.0814, "step": 4096 }, { "epoch": 0.58, "learning_rate": 4.9041268950028074e-05, "loss": 0.0911, "step": 4098 }, { "epoch": 0.58, "learning_rate": 4.904080104810032e-05, "loss": 0.1082, "step": 4100 }, { "epoch": 0.58, "learning_rate": 4.904033314617256e-05, "loss": 0.1039, "step": 4102 }, { "epoch": 0.58, "learning_rate": 4.903986524424481e-05, "loss": 0.0719, "step": 4104 }, { "epoch": 0.58, "learning_rate": 4.903939734231705e-05, "loss": 0.086, "step": 4106 }, { "epoch": 0.58, "learning_rate": 4.90389294403893e-05, "loss": 0.0852, "step": 4108 }, { "epoch": 0.58, "learning_rate": 4.9038461538461536e-05, "loss": 0.1041, "step": 4110 }, { "epoch": 0.58, "learning_rate": 4.903799363653378e-05, "loss": 0.1025, "step": 4112 }, { "epoch": 0.58, "learning_rate": 4.903752573460603e-05, "loss": 0.1016, "step": 4114 }, { "epoch": 0.58, "learning_rate": 4.9037057832678274e-05, "loss": 0.1134, "step": 4116 }, { "epoch": 0.58, "learning_rate": 4.903658993075051e-05, "loss": 0.1048, "step": 4118 }, { "epoch": 0.58, "learning_rate": 4.903612202882276e-05, "loss": 0.0856, "step": 4120 }, { "epoch": 0.58, "learning_rate": 4.9035654126895005e-05, "loss": 0.0737, "step": 4122 }, { "epoch": 0.58, "learning_rate": 4.903518622496725e-05, "loss": 0.0931, "step": 4124 }, { "epoch": 0.58, "learning_rate": 4.903471832303949e-05, "loss": 0.0875, "step": 4126 }, { "epoch": 0.58, "learning_rate": 4.9034250421111736e-05, "loss": 0.0841, "step": 4128 }, { "epoch": 0.58, "learning_rate": 4.903378251918398e-05, "loss": 0.1084, "step": 4130 }, { "epoch": 0.58, "learning_rate": 4.903331461725623e-05, "loss": 0.0762, "step": 4132 }, { "epoch": 0.58, "learning_rate": 4.903284671532847e-05, "loss": 0.1076, "step": 4134 }, { "epoch": 0.58, "learning_rate": 4.903237881340071e-05, "loss": 0.0912, "step": 4136 }, { "epoch": 0.58, "learning_rate": 4.903191091147296e-05, "loss": 0.0866, "step": 4138 }, { "epoch": 0.58, "learning_rate": 4.9031443009545205e-05, "loss": 0.092, "step": 4140 }, { "epoch": 0.58, "learning_rate": 4.9030975107617444e-05, "loss": 0.1047, "step": 4142 }, { "epoch": 0.58, "learning_rate": 4.903050720568969e-05, "loss": 0.0932, "step": 4144 }, { "epoch": 0.58, "learning_rate": 4.903003930376193e-05, "loss": 0.1197, "step": 4146 }, { "epoch": 0.58, "learning_rate": 4.902957140183418e-05, "loss": 0.1017, "step": 4148 }, { "epoch": 0.58, "learning_rate": 4.902910349990642e-05, "loss": 0.0966, "step": 4150 }, { "epoch": 0.58, "learning_rate": 4.9028635597978667e-05, "loss": 0.1375, "step": 4152 }, { "epoch": 0.58, "learning_rate": 4.9028167696050906e-05, "loss": 0.096, "step": 4154 }, { "epoch": 0.58, "learning_rate": 4.902769979412316e-05, "loss": 0.0814, "step": 4156 }, { "epoch": 0.58, "learning_rate": 4.90272318921954e-05, "loss": 0.0854, "step": 4158 }, { "epoch": 0.58, "learning_rate": 4.9026763990267643e-05, "loss": 0.0935, "step": 4160 }, { "epoch": 0.58, "learning_rate": 4.902629608833988e-05, "loss": 0.0972, "step": 4162 }, { "epoch": 0.58, "learning_rate": 4.902582818641213e-05, "loss": 0.1046, "step": 4164 }, { "epoch": 0.58, "learning_rate": 4.9025360284484374e-05, "loss": 0.1404, "step": 4166 }, { "epoch": 0.59, "learning_rate": 4.902489238255662e-05, "loss": 0.0898, "step": 4168 }, { "epoch": 0.59, "learning_rate": 4.902442448062886e-05, "loss": 0.082, "step": 4170 }, { "epoch": 0.59, "learning_rate": 4.9023956578701105e-05, "loss": 0.1063, "step": 4172 }, { "epoch": 0.59, "learning_rate": 4.902348867677335e-05, "loss": 0.0762, "step": 4174 }, { "epoch": 0.59, "learning_rate": 4.90230207748456e-05, "loss": 0.1002, "step": 4176 }, { "epoch": 0.59, "learning_rate": 4.9022552872917836e-05, "loss": 0.0935, "step": 4178 }, { "epoch": 0.59, "learning_rate": 4.902208497099008e-05, "loss": 0.1117, "step": 4180 }, { "epoch": 0.59, "learning_rate": 4.902161706906233e-05, "loss": 0.1026, "step": 4182 }, { "epoch": 0.59, "learning_rate": 4.9021149167134574e-05, "loss": 0.0976, "step": 4184 }, { "epoch": 0.59, "learning_rate": 4.902068126520681e-05, "loss": 0.1024, "step": 4186 }, { "epoch": 0.59, "learning_rate": 4.902021336327906e-05, "loss": 0.0984, "step": 4188 }, { "epoch": 0.59, "learning_rate": 4.9019745461351305e-05, "loss": 0.0916, "step": 4190 }, { "epoch": 0.59, "learning_rate": 4.901927755942355e-05, "loss": 0.1315, "step": 4192 }, { "epoch": 0.59, "learning_rate": 4.901880965749579e-05, "loss": 0.0902, "step": 4194 }, { "epoch": 0.59, "learning_rate": 4.9018341755568036e-05, "loss": 0.1066, "step": 4196 }, { "epoch": 0.59, "learning_rate": 4.9017873853640275e-05, "loss": 0.0916, "step": 4198 }, { "epoch": 0.59, "learning_rate": 4.901740595171253e-05, "loss": 0.1009, "step": 4200 }, { "epoch": 0.59, "learning_rate": 4.901693804978477e-05, "loss": 0.099, "step": 4202 }, { "epoch": 0.59, "learning_rate": 4.901647014785701e-05, "loss": 0.1022, "step": 4204 }, { "epoch": 0.59, "learning_rate": 4.901600224592925e-05, "loss": 0.1264, "step": 4206 }, { "epoch": 0.59, "learning_rate": 4.9015534344001505e-05, "loss": 0.0855, "step": 4208 }, { "epoch": 0.59, "learning_rate": 4.9015066442073744e-05, "loss": 0.0807, "step": 4210 }, { "epoch": 0.59, "learning_rate": 4.901459854014599e-05, "loss": 0.098, "step": 4212 }, { "epoch": 0.59, "learning_rate": 4.901413063821823e-05, "loss": 0.0941, "step": 4214 }, { "epoch": 0.59, "learning_rate": 4.9013662736290475e-05, "loss": 0.0892, "step": 4216 }, { "epoch": 0.59, "learning_rate": 4.901319483436272e-05, "loss": 0.086, "step": 4218 }, { "epoch": 0.59, "learning_rate": 4.9012726932434967e-05, "loss": 0.0854, "step": 4220 }, { "epoch": 0.59, "learning_rate": 4.9012259030507206e-05, "loss": 0.0884, "step": 4222 }, { "epoch": 0.59, "learning_rate": 4.901179112857945e-05, "loss": 0.14, "step": 4224 }, { "epoch": 0.59, "learning_rate": 4.90113232266517e-05, "loss": 0.1044, "step": 4226 }, { "epoch": 0.59, "learning_rate": 4.9010855324723943e-05, "loss": 0.1168, "step": 4228 }, { "epoch": 0.59, "learning_rate": 4.901038742279618e-05, "loss": 0.099, "step": 4230 }, { "epoch": 0.59, "learning_rate": 4.900991952086843e-05, "loss": 0.0921, "step": 4232 }, { "epoch": 0.59, "learning_rate": 4.9009451618940674e-05, "loss": 0.1101, "step": 4234 }, { "epoch": 0.59, "learning_rate": 4.900898371701292e-05, "loss": 0.0668, "step": 4236 }, { "epoch": 0.59, "learning_rate": 4.900851581508516e-05, "loss": 0.1024, "step": 4238 }, { "epoch": 0.6, "learning_rate": 4.9008047913157405e-05, "loss": 0.1071, "step": 4240 }, { "epoch": 0.6, "learning_rate": 4.900758001122965e-05, "loss": 0.0846, "step": 4242 }, { "epoch": 0.6, "learning_rate": 4.90071121093019e-05, "loss": 0.0732, "step": 4244 }, { "epoch": 0.6, "learning_rate": 4.9006644207374136e-05, "loss": 0.0998, "step": 4246 }, { "epoch": 0.6, "learning_rate": 4.900617630544638e-05, "loss": 0.1033, "step": 4248 }, { "epoch": 0.6, "learning_rate": 4.900570840351862e-05, "loss": 0.0899, "step": 4250 }, { "epoch": 0.6, "learning_rate": 4.9005240501590874e-05, "loss": 0.09, "step": 4252 }, { "epoch": 0.6, "learning_rate": 4.900477259966311e-05, "loss": 0.1022, "step": 4254 }, { "epoch": 0.6, "learning_rate": 4.900430469773536e-05, "loss": 0.0846, "step": 4256 }, { "epoch": 0.6, "learning_rate": 4.90038367958076e-05, "loss": 0.1168, "step": 4258 }, { "epoch": 0.6, "learning_rate": 4.9003368893879844e-05, "loss": 0.094, "step": 4260 }, { "epoch": 0.6, "learning_rate": 4.900290099195209e-05, "loss": 0.0767, "step": 4262 }, { "epoch": 0.6, "learning_rate": 4.9002433090024336e-05, "loss": 0.0952, "step": 4264 }, { "epoch": 0.6, "learning_rate": 4.9001965188096575e-05, "loss": 0.0958, "step": 4266 }, { "epoch": 0.6, "learning_rate": 4.900149728616882e-05, "loss": 0.0928, "step": 4268 }, { "epoch": 0.6, "learning_rate": 4.900102938424107e-05, "loss": 0.0999, "step": 4270 }, { "epoch": 0.6, "learning_rate": 4.900056148231331e-05, "loss": 0.1167, "step": 4272 }, { "epoch": 0.6, "learning_rate": 4.900009358038555e-05, "loss": 0.0872, "step": 4274 }, { "epoch": 0.6, "learning_rate": 4.89996256784578e-05, "loss": 0.0778, "step": 4276 }, { "epoch": 0.6, "learning_rate": 4.8999157776530044e-05, "loss": 0.097, "step": 4278 }, { "epoch": 0.6, "learning_rate": 4.899868987460229e-05, "loss": 0.0768, "step": 4280 }, { "epoch": 0.6, "learning_rate": 4.899822197267453e-05, "loss": 0.0844, "step": 4282 }, { "epoch": 0.6, "learning_rate": 4.8997754070746775e-05, "loss": 0.1015, "step": 4284 }, { "epoch": 0.6, "learning_rate": 4.899728616881902e-05, "loss": 0.0848, "step": 4286 }, { "epoch": 0.6, "learning_rate": 4.8996818266891267e-05, "loss": 0.0916, "step": 4288 }, { "epoch": 0.6, "learning_rate": 4.8996350364963506e-05, "loss": 0.091, "step": 4290 }, { "epoch": 0.6, "learning_rate": 4.899588246303575e-05, "loss": 0.085, "step": 4292 }, { "epoch": 0.6, "learning_rate": 4.899541456110799e-05, "loss": 0.0828, "step": 4294 }, { "epoch": 0.6, "learning_rate": 4.8994946659180243e-05, "loss": 0.0947, "step": 4296 }, { "epoch": 0.6, "learning_rate": 4.899447875725248e-05, "loss": 0.1177, "step": 4298 }, { "epoch": 0.6, "learning_rate": 4.899401085532473e-05, "loss": 0.0947, "step": 4300 }, { "epoch": 0.6, "learning_rate": 4.899354295339697e-05, "loss": 0.0855, "step": 4302 }, { "epoch": 0.6, "learning_rate": 4.899307505146922e-05, "loss": 0.0957, "step": 4304 }, { "epoch": 0.6, "learning_rate": 4.899260714954146e-05, "loss": 0.0863, "step": 4306 }, { "epoch": 0.6, "learning_rate": 4.89921392476137e-05, "loss": 0.1037, "step": 4308 }, { "epoch": 0.6, "learning_rate": 4.8991671345685944e-05, "loss": 0.1227, "step": 4310 }, { "epoch": 0.61, "learning_rate": 4.899120344375819e-05, "loss": 0.0743, "step": 4312 }, { "epoch": 0.61, "learning_rate": 4.8990735541830436e-05, "loss": 0.1011, "step": 4314 }, { "epoch": 0.61, "learning_rate": 4.8990267639902675e-05, "loss": 0.0948, "step": 4316 }, { "epoch": 0.61, "learning_rate": 4.898979973797492e-05, "loss": 0.0914, "step": 4318 }, { "epoch": 0.61, "learning_rate": 4.898933183604717e-05, "loss": 0.0866, "step": 4320 }, { "epoch": 0.61, "learning_rate": 4.898886393411941e-05, "loss": 0.0906, "step": 4322 }, { "epoch": 0.61, "learning_rate": 4.898839603219165e-05, "loss": 0.0916, "step": 4324 }, { "epoch": 0.61, "learning_rate": 4.89879281302639e-05, "loss": 0.0887, "step": 4326 }, { "epoch": 0.61, "learning_rate": 4.898746022833614e-05, "loss": 0.0896, "step": 4328 }, { "epoch": 0.61, "learning_rate": 4.898699232640839e-05, "loss": 0.1005, "step": 4330 }, { "epoch": 0.61, "learning_rate": 4.898652442448063e-05, "loss": 0.0842, "step": 4332 }, { "epoch": 0.61, "learning_rate": 4.8986056522552875e-05, "loss": 0.0873, "step": 4334 }, { "epoch": 0.61, "learning_rate": 4.8985588620625114e-05, "loss": 0.1558, "step": 4336 }, { "epoch": 0.61, "learning_rate": 4.898512071869737e-05, "loss": 0.0988, "step": 4338 }, { "epoch": 0.61, "learning_rate": 4.8984652816769606e-05, "loss": 0.0885, "step": 4340 }, { "epoch": 0.61, "learning_rate": 4.898418491484185e-05, "loss": 0.0964, "step": 4342 }, { "epoch": 0.61, "learning_rate": 4.898371701291409e-05, "loss": 0.0886, "step": 4344 }, { "epoch": 0.61, "learning_rate": 4.898324911098634e-05, "loss": 0.0924, "step": 4346 }, { "epoch": 0.61, "learning_rate": 4.898278120905858e-05, "loss": 0.1097, "step": 4348 }, { "epoch": 0.61, "learning_rate": 4.898231330713083e-05, "loss": 0.0865, "step": 4350 }, { "epoch": 0.61, "learning_rate": 4.898184540520307e-05, "loss": 0.1014, "step": 4352 }, { "epoch": 0.61, "learning_rate": 4.8981377503275314e-05, "loss": 0.1189, "step": 4354 }, { "epoch": 0.61, "learning_rate": 4.898090960134756e-05, "loss": 0.0735, "step": 4356 }, { "epoch": 0.61, "learning_rate": 4.8980441699419806e-05, "loss": 0.0957, "step": 4358 }, { "epoch": 0.61, "learning_rate": 4.8979973797492045e-05, "loss": 0.1077, "step": 4360 }, { "epoch": 0.61, "learning_rate": 4.897950589556429e-05, "loss": 0.0805, "step": 4362 }, { "epoch": 0.61, "learning_rate": 4.897903799363654e-05, "loss": 0.0992, "step": 4364 }, { "epoch": 0.61, "learning_rate": 4.897857009170878e-05, "loss": 0.1078, "step": 4366 }, { "epoch": 0.61, "learning_rate": 4.897810218978102e-05, "loss": 0.0871, "step": 4368 }, { "epoch": 0.61, "learning_rate": 4.897763428785327e-05, "loss": 0.0985, "step": 4370 }, { "epoch": 0.61, "learning_rate": 4.8977166385925514e-05, "loss": 0.0938, "step": 4372 }, { "epoch": 0.61, "learning_rate": 4.897669848399776e-05, "loss": 0.1007, "step": 4374 }, { "epoch": 0.61, "learning_rate": 4.897623058207e-05, "loss": 0.0793, "step": 4376 }, { "epoch": 0.61, "learning_rate": 4.8975762680142244e-05, "loss": 0.0948, "step": 4378 }, { "epoch": 0.61, "learning_rate": 4.8975294778214484e-05, "loss": 0.0967, "step": 4380 }, { "epoch": 0.62, "learning_rate": 4.8974826876286736e-05, "loss": 0.0931, "step": 4382 }, { "epoch": 0.62, "learning_rate": 4.8974358974358975e-05, "loss": 0.0887, "step": 4384 }, { "epoch": 0.62, "learning_rate": 4.897389107243122e-05, "loss": 0.0804, "step": 4386 }, { "epoch": 0.62, "learning_rate": 4.897342317050346e-05, "loss": 0.0771, "step": 4388 }, { "epoch": 0.62, "learning_rate": 4.897295526857571e-05, "loss": 0.0911, "step": 4390 }, { "epoch": 0.62, "learning_rate": 4.897248736664795e-05, "loss": 0.0988, "step": 4392 }, { "epoch": 0.62, "learning_rate": 4.89720194647202e-05, "loss": 0.0744, "step": 4394 }, { "epoch": 0.62, "learning_rate": 4.897155156279244e-05, "loss": 0.0785, "step": 4396 }, { "epoch": 0.62, "learning_rate": 4.897108366086468e-05, "loss": 0.0776, "step": 4398 }, { "epoch": 0.62, "learning_rate": 4.897061575893693e-05, "loss": 0.0814, "step": 4400 }, { "epoch": 0.62, "learning_rate": 4.8970147857009175e-05, "loss": 0.097, "step": 4402 }, { "epoch": 0.62, "learning_rate": 4.8969679955081414e-05, "loss": 0.0763, "step": 4404 }, { "epoch": 0.62, "learning_rate": 4.896921205315366e-05, "loss": 0.1057, "step": 4406 }, { "epoch": 0.62, "learning_rate": 4.8968744151225906e-05, "loss": 0.0981, "step": 4408 }, { "epoch": 0.62, "learning_rate": 4.896827624929815e-05, "loss": 0.0961, "step": 4410 }, { "epoch": 0.62, "learning_rate": 4.896780834737039e-05, "loss": 0.0917, "step": 4412 }, { "epoch": 0.62, "learning_rate": 4.896734044544264e-05, "loss": 0.0941, "step": 4414 }, { "epoch": 0.62, "learning_rate": 4.896687254351488e-05, "loss": 0.0752, "step": 4416 }, { "epoch": 0.62, "learning_rate": 4.896640464158713e-05, "loss": 0.1232, "step": 4418 }, { "epoch": 0.62, "learning_rate": 4.896593673965937e-05, "loss": 0.0823, "step": 4420 }, { "epoch": 0.62, "learning_rate": 4.8965468837731614e-05, "loss": 0.0992, "step": 4422 }, { "epoch": 0.62, "learning_rate": 4.896500093580385e-05, "loss": 0.0677, "step": 4424 }, { "epoch": 0.62, "learning_rate": 4.8964533033876106e-05, "loss": 0.0863, "step": 4426 }, { "epoch": 0.62, "learning_rate": 4.8964065131948345e-05, "loss": 0.1048, "step": 4428 }, { "epoch": 0.62, "learning_rate": 4.896359723002059e-05, "loss": 0.0841, "step": 4430 }, { "epoch": 0.62, "learning_rate": 4.896312932809283e-05, "loss": 0.1079, "step": 4432 }, { "epoch": 0.62, "learning_rate": 4.896266142616508e-05, "loss": 0.0667, "step": 4434 }, { "epoch": 0.62, "learning_rate": 4.896219352423732e-05, "loss": 0.0898, "step": 4436 }, { "epoch": 0.62, "learning_rate": 4.896172562230957e-05, "loss": 0.1009, "step": 4438 }, { "epoch": 0.62, "learning_rate": 4.896125772038181e-05, "loss": 0.0881, "step": 4440 }, { "epoch": 0.62, "learning_rate": 4.896078981845405e-05, "loss": 0.0873, "step": 4442 }, { "epoch": 0.62, "learning_rate": 4.89603219165263e-05, "loss": 0.1066, "step": 4444 }, { "epoch": 0.62, "learning_rate": 4.8959854014598545e-05, "loss": 0.1005, "step": 4446 }, { "epoch": 0.62, "learning_rate": 4.8959386112670784e-05, "loss": 0.0918, "step": 4448 }, { "epoch": 0.62, "learning_rate": 4.895891821074303e-05, "loss": 0.0812, "step": 4450 }, { "epoch": 0.62, "learning_rate": 4.8958450308815275e-05, "loss": 0.0732, "step": 4452 }, { "epoch": 0.63, "learning_rate": 4.895798240688752e-05, "loss": 0.1159, "step": 4454 }, { "epoch": 0.63, "learning_rate": 4.895751450495976e-05, "loss": 0.1127, "step": 4456 }, { "epoch": 0.63, "learning_rate": 4.8957046603032006e-05, "loss": 0.0782, "step": 4458 }, { "epoch": 0.63, "learning_rate": 4.895657870110425e-05, "loss": 0.1067, "step": 4460 }, { "epoch": 0.63, "learning_rate": 4.89561107991765e-05, "loss": 0.078, "step": 4462 }, { "epoch": 0.63, "learning_rate": 4.895564289724874e-05, "loss": 0.1023, "step": 4464 }, { "epoch": 0.63, "learning_rate": 4.895517499532098e-05, "loss": 0.0884, "step": 4466 }, { "epoch": 0.63, "learning_rate": 4.895470709339323e-05, "loss": 0.0833, "step": 4468 }, { "epoch": 0.63, "learning_rate": 4.8954239191465475e-05, "loss": 0.0966, "step": 4470 }, { "epoch": 0.63, "learning_rate": 4.8953771289537714e-05, "loss": 0.0709, "step": 4472 }, { "epoch": 0.63, "learning_rate": 4.895330338760996e-05, "loss": 0.0861, "step": 4474 }, { "epoch": 0.63, "learning_rate": 4.89528354856822e-05, "loss": 0.1094, "step": 4476 }, { "epoch": 0.63, "learning_rate": 4.895236758375445e-05, "loss": 0.1049, "step": 4478 }, { "epoch": 0.63, "learning_rate": 4.895189968182669e-05, "loss": 0.0909, "step": 4480 }, { "epoch": 0.63, "learning_rate": 4.895143177989894e-05, "loss": 0.0886, "step": 4482 }, { "epoch": 0.63, "learning_rate": 4.8950963877971176e-05, "loss": 0.112, "step": 4484 }, { "epoch": 0.63, "learning_rate": 4.895049597604343e-05, "loss": 0.0861, "step": 4486 }, { "epoch": 0.63, "learning_rate": 4.895002807411567e-05, "loss": 0.086, "step": 4488 }, { "epoch": 0.63, "learning_rate": 4.8949560172187914e-05, "loss": 0.0954, "step": 4490 }, { "epoch": 0.63, "learning_rate": 4.894909227026015e-05, "loss": 0.1179, "step": 4492 }, { "epoch": 0.63, "learning_rate": 4.89486243683324e-05, "loss": 0.0842, "step": 4494 }, { "epoch": 0.63, "learning_rate": 4.8948156466404645e-05, "loss": 0.1064, "step": 4496 }, { "epoch": 0.63, "learning_rate": 4.894768856447689e-05, "loss": 0.1053, "step": 4498 }, { "epoch": 0.63, "learning_rate": 4.894722066254913e-05, "loss": 0.0951, "step": 4500 }, { "epoch": 0.63, "learning_rate": 4.8946752760621376e-05, "loss": 0.1047, "step": 4502 }, { "epoch": 0.63, "learning_rate": 4.894628485869362e-05, "loss": 0.0911, "step": 4504 }, { "epoch": 0.63, "learning_rate": 4.894581695676587e-05, "loss": 0.0806, "step": 4506 }, { "epoch": 0.63, "learning_rate": 4.894534905483811e-05, "loss": 0.0861, "step": 4508 }, { "epoch": 0.63, "learning_rate": 4.894488115291035e-05, "loss": 0.09, "step": 4510 }, { "epoch": 0.63, "learning_rate": 4.89444132509826e-05, "loss": 0.0681, "step": 4512 }, { "epoch": 0.63, "learning_rate": 4.8943945349054845e-05, "loss": 0.1114, "step": 4514 }, { "epoch": 0.63, "learning_rate": 4.8943477447127084e-05, "loss": 0.1009, "step": 4516 }, { "epoch": 0.63, "learning_rate": 4.894300954519933e-05, "loss": 0.0783, "step": 4518 }, { "epoch": 0.63, "learning_rate": 4.8942541643271575e-05, "loss": 0.0956, "step": 4520 }, { "epoch": 0.63, "learning_rate": 4.894207374134382e-05, "loss": 0.0938, "step": 4522 }, { "epoch": 0.64, "learning_rate": 4.894160583941606e-05, "loss": 0.0783, "step": 4524 }, { "epoch": 0.64, "learning_rate": 4.8941137937488306e-05, "loss": 0.095, "step": 4526 }, { "epoch": 0.64, "learning_rate": 4.8940670035560546e-05, "loss": 0.1184, "step": 4528 }, { "epoch": 0.64, "learning_rate": 4.89402021336328e-05, "loss": 0.0978, "step": 4530 }, { "epoch": 0.64, "learning_rate": 4.893973423170504e-05, "loss": 0.1171, "step": 4532 }, { "epoch": 0.64, "learning_rate": 4.893926632977728e-05, "loss": 0.0834, "step": 4534 }, { "epoch": 0.64, "learning_rate": 4.893879842784952e-05, "loss": 0.1069, "step": 4536 }, { "epoch": 0.64, "learning_rate": 4.893833052592177e-05, "loss": 0.1139, "step": 4538 }, { "epoch": 0.64, "learning_rate": 4.8937862623994014e-05, "loss": 0.1091, "step": 4540 }, { "epoch": 0.64, "learning_rate": 4.893739472206626e-05, "loss": 0.1117, "step": 4542 }, { "epoch": 0.64, "learning_rate": 4.89369268201385e-05, "loss": 0.1039, "step": 4544 }, { "epoch": 0.64, "learning_rate": 4.8936458918210745e-05, "loss": 0.0875, "step": 4546 }, { "epoch": 0.64, "learning_rate": 4.893599101628299e-05, "loss": 0.0898, "step": 4548 }, { "epoch": 0.64, "learning_rate": 4.893552311435524e-05, "loss": 0.0719, "step": 4550 }, { "epoch": 0.64, "learning_rate": 4.8935055212427476e-05, "loss": 0.1235, "step": 4552 }, { "epoch": 0.64, "learning_rate": 4.893458731049972e-05, "loss": 0.0776, "step": 4554 }, { "epoch": 0.64, "learning_rate": 4.893411940857197e-05, "loss": 0.0873, "step": 4556 }, { "epoch": 0.64, "learning_rate": 4.8933651506644214e-05, "loss": 0.0984, "step": 4558 }, { "epoch": 0.64, "learning_rate": 4.893318360471645e-05, "loss": 0.0908, "step": 4560 }, { "epoch": 0.64, "learning_rate": 4.893271570278869e-05, "loss": 0.0766, "step": 4562 }, { "epoch": 0.64, "learning_rate": 4.8932247800860945e-05, "loss": 0.0991, "step": 4564 }, { "epoch": 0.64, "learning_rate": 4.8931779898933184e-05, "loss": 0.0929, "step": 4566 }, { "epoch": 0.64, "learning_rate": 4.893131199700543e-05, "loss": 0.0673, "step": 4568 }, { "epoch": 0.64, "learning_rate": 4.893084409507767e-05, "loss": 0.0776, "step": 4570 }, { "epoch": 0.64, "learning_rate": 4.8930376193149915e-05, "loss": 0.0835, "step": 4572 }, { "epoch": 0.64, "learning_rate": 4.892990829122216e-05, "loss": 0.1216, "step": 4574 }, { "epoch": 0.64, "learning_rate": 4.892944038929441e-05, "loss": 0.0918, "step": 4576 }, { "epoch": 0.64, "learning_rate": 4.8928972487366646e-05, "loss": 0.1165, "step": 4578 }, { "epoch": 0.64, "learning_rate": 4.892850458543889e-05, "loss": 0.0949, "step": 4580 }, { "epoch": 0.64, "learning_rate": 4.892803668351114e-05, "loss": 0.106, "step": 4582 }, { "epoch": 0.64, "learning_rate": 4.8927568781583384e-05, "loss": 0.0911, "step": 4584 }, { "epoch": 0.64, "learning_rate": 4.892710087965562e-05, "loss": 0.0676, "step": 4586 }, { "epoch": 0.64, "learning_rate": 4.892663297772787e-05, "loss": 0.0986, "step": 4588 }, { "epoch": 0.64, "learning_rate": 4.8926165075800115e-05, "loss": 0.1082, "step": 4590 }, { "epoch": 0.64, "learning_rate": 4.892569717387236e-05, "loss": 0.104, "step": 4592 }, { "epoch": 0.64, "learning_rate": 4.89252292719446e-05, "loss": 0.0883, "step": 4594 }, { "epoch": 0.65, "learning_rate": 4.8924761370016846e-05, "loss": 0.1203, "step": 4596 }, { "epoch": 0.65, "learning_rate": 4.892429346808909e-05, "loss": 0.1259, "step": 4598 }, { "epoch": 0.65, "learning_rate": 4.892382556616134e-05, "loss": 0.088, "step": 4600 }, { "epoch": 0.65, "learning_rate": 4.8923357664233577e-05, "loss": 0.0992, "step": 4602 }, { "epoch": 0.65, "learning_rate": 4.892288976230582e-05, "loss": 0.0931, "step": 4604 }, { "epoch": 0.65, "learning_rate": 4.892242186037806e-05, "loss": 0.112, "step": 4606 }, { "epoch": 0.65, "learning_rate": 4.8921953958450314e-05, "loss": 0.1198, "step": 4608 }, { "epoch": 0.65, "learning_rate": 4.8921486056522553e-05, "loss": 0.0877, "step": 4610 }, { "epoch": 0.65, "learning_rate": 4.89210181545948e-05, "loss": 0.0897, "step": 4612 }, { "epoch": 0.65, "learning_rate": 4.892055025266704e-05, "loss": 0.093, "step": 4614 }, { "epoch": 0.65, "learning_rate": 4.892008235073929e-05, "loss": 0.1275, "step": 4616 }, { "epoch": 0.65, "learning_rate": 4.891961444881153e-05, "loss": 0.1018, "step": 4618 }, { "epoch": 0.65, "learning_rate": 4.8919146546883776e-05, "loss": 0.1001, "step": 4620 }, { "epoch": 0.65, "learning_rate": 4.8918678644956015e-05, "loss": 0.0741, "step": 4622 }, { "epoch": 0.65, "learning_rate": 4.891821074302826e-05, "loss": 0.0864, "step": 4624 }, { "epoch": 0.65, "learning_rate": 4.891774284110051e-05, "loss": 0.098, "step": 4626 }, { "epoch": 0.65, "learning_rate": 4.891727493917275e-05, "loss": 0.0803, "step": 4628 }, { "epoch": 0.65, "learning_rate": 4.891680703724499e-05, "loss": 0.0959, "step": 4630 }, { "epoch": 0.65, "learning_rate": 4.891633913531724e-05, "loss": 0.0874, "step": 4632 }, { "epoch": 0.65, "learning_rate": 4.8915871233389484e-05, "loss": 0.0949, "step": 4634 }, { "epoch": 0.65, "learning_rate": 4.891540333146173e-05, "loss": 0.0782, "step": 4636 }, { "epoch": 0.65, "learning_rate": 4.891493542953397e-05, "loss": 0.0949, "step": 4638 }, { "epoch": 0.65, "learning_rate": 4.8914467527606215e-05, "loss": 0.1179, "step": 4640 }, { "epoch": 0.65, "learning_rate": 4.891399962567846e-05, "loss": 0.0777, "step": 4642 }, { "epoch": 0.65, "learning_rate": 4.891353172375071e-05, "loss": 0.0822, "step": 4644 }, { "epoch": 0.65, "learning_rate": 4.8913063821822946e-05, "loss": 0.1081, "step": 4646 }, { "epoch": 0.65, "learning_rate": 4.891259591989519e-05, "loss": 0.139, "step": 4648 }, { "epoch": 0.65, "learning_rate": 4.891212801796744e-05, "loss": 0.0901, "step": 4650 }, { "epoch": 0.65, "learning_rate": 4.8911660116039684e-05, "loss": 0.0992, "step": 4652 }, { "epoch": 0.65, "learning_rate": 4.891119221411192e-05, "loss": 0.1104, "step": 4654 }, { "epoch": 0.65, "learning_rate": 4.891072431218417e-05, "loss": 0.1043, "step": 4656 }, { "epoch": 0.65, "learning_rate": 4.891025641025641e-05, "loss": 0.0848, "step": 4658 }, { "epoch": 0.65, "learning_rate": 4.890978850832866e-05, "loss": 0.1208, "step": 4660 }, { "epoch": 0.65, "learning_rate": 4.89093206064009e-05, "loss": 0.0921, "step": 4662 }, { "epoch": 0.65, "learning_rate": 4.8908852704473146e-05, "loss": 0.1036, "step": 4664 }, { "epoch": 0.65, "learning_rate": 4.8908384802545385e-05, "loss": 0.1074, "step": 4666 }, { "epoch": 0.66, "learning_rate": 4.890791690061764e-05, "loss": 0.0866, "step": 4668 }, { "epoch": 0.66, "learning_rate": 4.8907448998689877e-05, "loss": 0.077, "step": 4670 }, { "epoch": 0.66, "learning_rate": 4.890698109676212e-05, "loss": 0.1032, "step": 4672 }, { "epoch": 0.66, "learning_rate": 4.890651319483436e-05, "loss": 0.0909, "step": 4674 }, { "epoch": 0.66, "learning_rate": 4.890604529290661e-05, "loss": 0.0973, "step": 4676 }, { "epoch": 0.66, "learning_rate": 4.8905577390978853e-05, "loss": 0.1134, "step": 4678 }, { "epoch": 0.66, "learning_rate": 4.89051094890511e-05, "loss": 0.0955, "step": 4680 }, { "epoch": 0.66, "learning_rate": 4.890464158712334e-05, "loss": 0.1156, "step": 4682 }, { "epoch": 0.66, "learning_rate": 4.8904173685195584e-05, "loss": 0.1031, "step": 4684 }, { "epoch": 0.66, "learning_rate": 4.890370578326783e-05, "loss": 0.1189, "step": 4686 }, { "epoch": 0.66, "learning_rate": 4.8903237881340076e-05, "loss": 0.0778, "step": 4688 }, { "epoch": 0.66, "learning_rate": 4.8902769979412315e-05, "loss": 0.0846, "step": 4690 }, { "epoch": 0.66, "learning_rate": 4.890230207748456e-05, "loss": 0.1086, "step": 4692 }, { "epoch": 0.66, "learning_rate": 4.890183417555681e-05, "loss": 0.1226, "step": 4694 }, { "epoch": 0.66, "learning_rate": 4.890136627362905e-05, "loss": 0.1065, "step": 4696 }, { "epoch": 0.66, "learning_rate": 4.890089837170129e-05, "loss": 0.0925, "step": 4698 }, { "epoch": 0.66, "learning_rate": 4.890043046977354e-05, "loss": 0.0848, "step": 4700 }, { "epoch": 0.66, "learning_rate": 4.889996256784578e-05, "loss": 0.1009, "step": 4702 }, { "epoch": 0.66, "learning_rate": 4.889949466591803e-05, "loss": 0.0826, "step": 4704 }, { "epoch": 0.66, "learning_rate": 4.889902676399027e-05, "loss": 0.0922, "step": 4706 }, { "epoch": 0.66, "learning_rate": 4.8898558862062515e-05, "loss": 0.1119, "step": 4708 }, { "epoch": 0.66, "learning_rate": 4.8898090960134754e-05, "loss": 0.0901, "step": 4710 }, { "epoch": 0.66, "learning_rate": 4.889762305820701e-05, "loss": 0.1133, "step": 4712 }, { "epoch": 0.66, "learning_rate": 4.8897155156279246e-05, "loss": 0.085, "step": 4714 }, { "epoch": 0.66, "learning_rate": 4.889668725435149e-05, "loss": 0.1013, "step": 4716 }, { "epoch": 0.66, "learning_rate": 4.889621935242373e-05, "loss": 0.0785, "step": 4718 }, { "epoch": 0.66, "learning_rate": 4.889575145049598e-05, "loss": 0.0972, "step": 4720 }, { "epoch": 0.66, "learning_rate": 4.889528354856822e-05, "loss": 0.0839, "step": 4722 }, { "epoch": 0.66, "learning_rate": 4.889481564664047e-05, "loss": 0.0845, "step": 4724 }, { "epoch": 0.66, "learning_rate": 4.889434774471271e-05, "loss": 0.112, "step": 4726 }, { "epoch": 0.66, "learning_rate": 4.8893879842784954e-05, "loss": 0.0875, "step": 4728 }, { "epoch": 0.66, "learning_rate": 4.88934119408572e-05, "loss": 0.0809, "step": 4730 }, { "epoch": 0.66, "learning_rate": 4.8892944038929446e-05, "loss": 0.1014, "step": 4732 }, { "epoch": 0.66, "learning_rate": 4.8892476137001685e-05, "loss": 0.1008, "step": 4734 }, { "epoch": 0.66, "learning_rate": 4.889200823507393e-05, "loss": 0.0968, "step": 4736 }, { "epoch": 0.67, "learning_rate": 4.8891540333146177e-05, "loss": 0.104, "step": 4738 }, { "epoch": 0.67, "learning_rate": 4.889107243121842e-05, "loss": 0.0895, "step": 4740 }, { "epoch": 0.67, "learning_rate": 4.889060452929066e-05, "loss": 0.1038, "step": 4742 }, { "epoch": 0.67, "learning_rate": 4.889013662736291e-05, "loss": 0.0948, "step": 4744 }, { "epoch": 0.67, "learning_rate": 4.8889668725435153e-05, "loss": 0.0859, "step": 4746 }, { "epoch": 0.67, "learning_rate": 4.88892008235074e-05, "loss": 0.0925, "step": 4748 }, { "epoch": 0.67, "learning_rate": 4.888873292157964e-05, "loss": 0.0894, "step": 4750 }, { "epoch": 0.67, "learning_rate": 4.8888265019651884e-05, "loss": 0.1084, "step": 4752 }, { "epoch": 0.67, "learning_rate": 4.8887797117724124e-05, "loss": 0.1075, "step": 4754 }, { "epoch": 0.67, "learning_rate": 4.8887329215796376e-05, "loss": 0.0874, "step": 4756 }, { "epoch": 0.67, "learning_rate": 4.8886861313868615e-05, "loss": 0.1107, "step": 4758 }, { "epoch": 0.67, "learning_rate": 4.888639341194086e-05, "loss": 0.1032, "step": 4760 }, { "epoch": 0.67, "learning_rate": 4.88859255100131e-05, "loss": 0.1123, "step": 4762 }, { "epoch": 0.67, "learning_rate": 4.888545760808535e-05, "loss": 0.075, "step": 4764 }, { "epoch": 0.67, "learning_rate": 4.888498970615759e-05, "loss": 0.0872, "step": 4766 }, { "epoch": 0.67, "learning_rate": 4.888452180422984e-05, "loss": 0.1139, "step": 4768 }, { "epoch": 0.67, "learning_rate": 4.888405390230208e-05, "loss": 0.077, "step": 4770 }, { "epoch": 0.67, "learning_rate": 4.888358600037432e-05, "loss": 0.1046, "step": 4772 }, { "epoch": 0.67, "learning_rate": 4.888311809844657e-05, "loss": 0.0873, "step": 4774 }, { "epoch": 0.67, "learning_rate": 4.8882650196518815e-05, "loss": 0.0995, "step": 4776 }, { "epoch": 0.67, "learning_rate": 4.8882182294591054e-05, "loss": 0.0791, "step": 4778 }, { "epoch": 0.67, "learning_rate": 4.88817143926633e-05, "loss": 0.0981, "step": 4780 }, { "epoch": 0.67, "learning_rate": 4.8881246490735546e-05, "loss": 0.0895, "step": 4782 }, { "epoch": 0.67, "learning_rate": 4.888077858880779e-05, "loss": 0.0972, "step": 4784 }, { "epoch": 0.67, "learning_rate": 4.888031068688003e-05, "loss": 0.1236, "step": 4786 }, { "epoch": 0.67, "learning_rate": 4.887984278495228e-05, "loss": 0.097, "step": 4788 }, { "epoch": 0.67, "learning_rate": 4.887937488302452e-05, "loss": 0.1093, "step": 4790 }, { "epoch": 0.67, "learning_rate": 4.887890698109677e-05, "loss": 0.0803, "step": 4792 }, { "epoch": 0.67, "learning_rate": 4.887843907916901e-05, "loss": 0.0826, "step": 4794 }, { "epoch": 0.67, "learning_rate": 4.8877971177241254e-05, "loss": 0.1125, "step": 4796 }, { "epoch": 0.67, "learning_rate": 4.88775032753135e-05, "loss": 0.105, "step": 4798 }, { "epoch": 0.67, "learning_rate": 4.8877035373385746e-05, "loss": 0.0959, "step": 4800 }, { "epoch": 0.67, "learning_rate": 4.8876567471457985e-05, "loss": 0.0936, "step": 4802 }, { "epoch": 0.67, "learning_rate": 4.887609956953023e-05, "loss": 0.1086, "step": 4804 }, { "epoch": 0.67, "learning_rate": 4.887563166760247e-05, "loss": 0.0829, "step": 4806 }, { "epoch": 0.67, "learning_rate": 4.887516376567472e-05, "loss": 0.1008, "step": 4808 }, { "epoch": 0.68, "learning_rate": 4.887469586374696e-05, "loss": 0.0936, "step": 4810 }, { "epoch": 0.68, "learning_rate": 4.88742279618192e-05, "loss": 0.0756, "step": 4812 }, { "epoch": 0.68, "learning_rate": 4.887376005989145e-05, "loss": 0.0609, "step": 4814 }, { "epoch": 0.68, "learning_rate": 4.887329215796369e-05, "loss": 0.1086, "step": 4816 }, { "epoch": 0.68, "learning_rate": 4.887282425603594e-05, "loss": 0.102, "step": 4818 }, { "epoch": 0.68, "learning_rate": 4.887235635410818e-05, "loss": 0.0932, "step": 4820 }, { "epoch": 0.68, "learning_rate": 4.8871888452180424e-05, "loss": 0.0812, "step": 4822 }, { "epoch": 0.68, "learning_rate": 4.887142055025267e-05, "loss": 0.0932, "step": 4824 }, { "epoch": 0.68, "learning_rate": 4.8870952648324915e-05, "loss": 0.0684, "step": 4826 }, { "epoch": 0.68, "learning_rate": 4.8870484746397155e-05, "loss": 0.0867, "step": 4828 }, { "epoch": 0.68, "learning_rate": 4.88700168444694e-05, "loss": 0.1134, "step": 4830 }, { "epoch": 0.68, "learning_rate": 4.8869548942541646e-05, "loss": 0.0709, "step": 4832 }, { "epoch": 0.68, "learning_rate": 4.886908104061389e-05, "loss": 0.0993, "step": 4834 }, { "epoch": 0.68, "learning_rate": 4.886861313868613e-05, "loss": 0.0925, "step": 4836 }, { "epoch": 0.68, "learning_rate": 4.886814523675838e-05, "loss": 0.074, "step": 4838 }, { "epoch": 0.68, "learning_rate": 4.8867677334830616e-05, "loss": 0.0871, "step": 4840 }, { "epoch": 0.68, "learning_rate": 4.886720943290287e-05, "loss": 0.1135, "step": 4842 }, { "epoch": 0.68, "learning_rate": 4.886674153097511e-05, "loss": 0.0792, "step": 4844 }, { "epoch": 0.68, "learning_rate": 4.8866273629047354e-05, "loss": 0.098, "step": 4846 }, { "epoch": 0.68, "learning_rate": 4.886580572711959e-05, "loss": 0.0822, "step": 4848 }, { "epoch": 0.68, "learning_rate": 4.886533782519184e-05, "loss": 0.1011, "step": 4850 }, { "epoch": 0.68, "learning_rate": 4.8864869923264085e-05, "loss": 0.0857, "step": 4852 }, { "epoch": 0.68, "learning_rate": 4.886440202133633e-05, "loss": 0.1509, "step": 4854 }, { "epoch": 0.68, "learning_rate": 4.886393411940857e-05, "loss": 0.098, "step": 4856 }, { "epoch": 0.68, "learning_rate": 4.8863466217480816e-05, "loss": 0.0951, "step": 4858 }, { "epoch": 0.68, "learning_rate": 4.886299831555306e-05, "loss": 0.0924, "step": 4860 }, { "epoch": 0.68, "learning_rate": 4.886253041362531e-05, "loss": 0.0884, "step": 4862 }, { "epoch": 0.68, "learning_rate": 4.886206251169755e-05, "loss": 0.1034, "step": 4864 }, { "epoch": 0.68, "learning_rate": 4.886159460976979e-05, "loss": 0.0821, "step": 4866 }, { "epoch": 0.68, "learning_rate": 4.886112670784204e-05, "loss": 0.1115, "step": 4868 }, { "epoch": 0.68, "learning_rate": 4.8860658805914285e-05, "loss": 0.0928, "step": 4870 }, { "epoch": 0.68, "learning_rate": 4.8860190903986524e-05, "loss": 0.116, "step": 4872 }, { "epoch": 0.68, "learning_rate": 4.885972300205877e-05, "loss": 0.1248, "step": 4874 }, { "epoch": 0.68, "learning_rate": 4.8859255100131016e-05, "loss": 0.0932, "step": 4876 }, { "epoch": 0.68, "learning_rate": 4.885878719820326e-05, "loss": 0.0847, "step": 4878 }, { "epoch": 0.69, "learning_rate": 4.88583192962755e-05, "loss": 0.1006, "step": 4880 }, { "epoch": 0.69, "learning_rate": 4.885785139434775e-05, "loss": 0.0924, "step": 4882 }, { "epoch": 0.69, "learning_rate": 4.8857383492419986e-05, "loss": 0.1081, "step": 4884 }, { "epoch": 0.69, "learning_rate": 4.885691559049224e-05, "loss": 0.1089, "step": 4886 }, { "epoch": 0.69, "learning_rate": 4.885644768856448e-05, "loss": 0.0778, "step": 4888 }, { "epoch": 0.69, "learning_rate": 4.8855979786636724e-05, "loss": 0.0876, "step": 4890 }, { "epoch": 0.69, "learning_rate": 4.885551188470896e-05, "loss": 0.0956, "step": 4892 }, { "epoch": 0.69, "learning_rate": 4.8855043982781215e-05, "loss": 0.0968, "step": 4894 }, { "epoch": 0.69, "learning_rate": 4.8854576080853455e-05, "loss": 0.1055, "step": 4896 }, { "epoch": 0.69, "learning_rate": 4.88541081789257e-05, "loss": 0.0718, "step": 4898 }, { "epoch": 0.69, "learning_rate": 4.885364027699794e-05, "loss": 0.1171, "step": 4900 }, { "epoch": 0.69, "learning_rate": 4.8853172375070186e-05, "loss": 0.0821, "step": 4902 }, { "epoch": 0.69, "learning_rate": 4.885270447314243e-05, "loss": 0.083, "step": 4904 }, { "epoch": 0.69, "learning_rate": 4.885223657121468e-05, "loss": 0.1034, "step": 4906 }, { "epoch": 0.69, "learning_rate": 4.8851768669286916e-05, "loss": 0.1082, "step": 4908 }, { "epoch": 0.69, "learning_rate": 4.885130076735916e-05, "loss": 0.0854, "step": 4910 }, { "epoch": 0.69, "learning_rate": 4.885083286543141e-05, "loss": 0.0951, "step": 4912 }, { "epoch": 0.69, "learning_rate": 4.8850364963503654e-05, "loss": 0.1106, "step": 4914 }, { "epoch": 0.69, "learning_rate": 4.884989706157589e-05, "loss": 0.1112, "step": 4916 }, { "epoch": 0.69, "learning_rate": 4.884942915964814e-05, "loss": 0.0852, "step": 4918 }, { "epoch": 0.69, "learning_rate": 4.8848961257720385e-05, "loss": 0.0912, "step": 4920 }, { "epoch": 0.69, "learning_rate": 4.884849335579263e-05, "loss": 0.0921, "step": 4922 }, { "epoch": 0.69, "learning_rate": 4.884802545386487e-05, "loss": 0.0897, "step": 4924 }, { "epoch": 0.69, "learning_rate": 4.8847557551937116e-05, "loss": 0.1344, "step": 4926 }, { "epoch": 0.69, "learning_rate": 4.884708965000936e-05, "loss": 0.0962, "step": 4928 }, { "epoch": 0.69, "learning_rate": 4.884662174808161e-05, "loss": 0.1081, "step": 4930 }, { "epoch": 0.69, "learning_rate": 4.884615384615385e-05, "loss": 0.0939, "step": 4932 }, { "epoch": 0.69, "learning_rate": 4.884568594422609e-05, "loss": 0.097, "step": 4934 }, { "epoch": 0.69, "learning_rate": 4.884521804229833e-05, "loss": 0.0804, "step": 4936 }, { "epoch": 0.69, "learning_rate": 4.8844750140370585e-05, "loss": 0.1433, "step": 4938 }, { "epoch": 0.69, "learning_rate": 4.8844282238442824e-05, "loss": 0.0986, "step": 4940 }, { "epoch": 0.69, "learning_rate": 4.884381433651507e-05, "loss": 0.0981, "step": 4942 }, { "epoch": 0.69, "learning_rate": 4.884334643458731e-05, "loss": 0.0859, "step": 4944 }, { "epoch": 0.69, "learning_rate": 4.884287853265956e-05, "loss": 0.1135, "step": 4946 }, { "epoch": 0.69, "learning_rate": 4.88424106307318e-05, "loss": 0.1019, "step": 4948 }, { "epoch": 0.69, "learning_rate": 4.884194272880405e-05, "loss": 0.1109, "step": 4950 }, { "epoch": 0.7, "learning_rate": 4.8841474826876286e-05, "loss": 0.1208, "step": 4952 }, { "epoch": 0.7, "learning_rate": 4.884100692494853e-05, "loss": 0.0752, "step": 4954 }, { "epoch": 0.7, "learning_rate": 4.884053902302078e-05, "loss": 0.1325, "step": 4956 }, { "epoch": 0.7, "learning_rate": 4.8840071121093024e-05, "loss": 0.111, "step": 4958 }, { "epoch": 0.7, "learning_rate": 4.883960321916526e-05, "loss": 0.087, "step": 4960 }, { "epoch": 0.7, "learning_rate": 4.883913531723751e-05, "loss": 0.0767, "step": 4962 }, { "epoch": 0.7, "learning_rate": 4.8838667415309755e-05, "loss": 0.0867, "step": 4964 }, { "epoch": 0.7, "learning_rate": 4.8838199513382e-05, "loss": 0.0836, "step": 4966 }, { "epoch": 0.7, "learning_rate": 4.883773161145424e-05, "loss": 0.084, "step": 4968 }, { "epoch": 0.7, "learning_rate": 4.8837263709526486e-05, "loss": 0.1198, "step": 4970 }, { "epoch": 0.7, "learning_rate": 4.883679580759873e-05, "loss": 0.0794, "step": 4972 }, { "epoch": 0.7, "learning_rate": 4.883632790567098e-05, "loss": 0.0856, "step": 4974 }, { "epoch": 0.7, "learning_rate": 4.8835860003743216e-05, "loss": 0.0822, "step": 4976 }, { "epoch": 0.7, "learning_rate": 4.883539210181546e-05, "loss": 0.1034, "step": 4978 }, { "epoch": 0.7, "learning_rate": 4.883492419988771e-05, "loss": 0.1047, "step": 4980 }, { "epoch": 0.7, "learning_rate": 4.8834456297959954e-05, "loss": 0.1017, "step": 4982 }, { "epoch": 0.7, "learning_rate": 4.883398839603219e-05, "loss": 0.1045, "step": 4984 }, { "epoch": 0.7, "learning_rate": 4.883352049410444e-05, "loss": 0.0984, "step": 4986 }, { "epoch": 0.7, "learning_rate": 4.883305259217668e-05, "loss": 0.0999, "step": 4988 }, { "epoch": 0.7, "learning_rate": 4.883258469024893e-05, "loss": 0.0997, "step": 4990 }, { "epoch": 0.7, "learning_rate": 4.883211678832117e-05, "loss": 0.0962, "step": 4992 }, { "epoch": 0.7, "learning_rate": 4.8831648886393416e-05, "loss": 0.0899, "step": 4994 }, { "epoch": 0.7, "learning_rate": 4.8831180984465655e-05, "loss": 0.0797, "step": 4996 }, { "epoch": 0.7, "learning_rate": 4.88307130825379e-05, "loss": 0.0936, "step": 4998 }, { "epoch": 0.7, "learning_rate": 4.883024518061015e-05, "loss": 0.0877, "step": 5000 }, { "epoch": 0.7, "eval_gen_len": 30.7586, "eval_loss": 1.0278133153915405, "eval_meteor": 0.0429, "eval_runtime": 14.877, "eval_samples_per_second": 3.899, "eval_steps_per_second": 0.538, "step": 5000 }, { "epoch": 0.7, "learning_rate": 4.882977727868239e-05, "loss": 0.1066, "step": 5002 }, { "epoch": 0.7, "learning_rate": 4.882930937675463e-05, "loss": 0.0858, "step": 5004 }, { "epoch": 0.7, "learning_rate": 4.882884147482688e-05, "loss": 0.092, "step": 5006 }, { "epoch": 0.7, "learning_rate": 4.8828373572899124e-05, "loss": 0.09, "step": 5008 }, { "epoch": 0.7, "learning_rate": 4.882790567097137e-05, "loss": 0.1196, "step": 5010 }, { "epoch": 0.7, "learning_rate": 4.882743776904361e-05, "loss": 0.1112, "step": 5012 }, { "epoch": 0.7, "learning_rate": 4.8826969867115855e-05, "loss": 0.102, "step": 5014 }, { "epoch": 0.7, "learning_rate": 4.88265019651881e-05, "loss": 0.0905, "step": 5016 }, { "epoch": 0.7, "learning_rate": 4.882603406326035e-05, "loss": 0.1077, "step": 5018 }, { "epoch": 0.7, "learning_rate": 4.8825566161332586e-05, "loss": 0.1116, "step": 5020 }, { "epoch": 0.7, "learning_rate": 4.882509825940483e-05, "loss": 0.1122, "step": 5022 }, { "epoch": 0.71, "learning_rate": 4.882463035747708e-05, "loss": 0.0991, "step": 5024 }, { "epoch": 0.71, "learning_rate": 4.8824162455549324e-05, "loss": 0.0988, "step": 5026 }, { "epoch": 0.71, "learning_rate": 4.882369455362156e-05, "loss": 0.0812, "step": 5028 }, { "epoch": 0.71, "learning_rate": 4.882322665169381e-05, "loss": 0.0862, "step": 5030 }, { "epoch": 0.71, "learning_rate": 4.882275874976605e-05, "loss": 0.09, "step": 5032 }, { "epoch": 0.71, "learning_rate": 4.88222908478383e-05, "loss": 0.0951, "step": 5034 }, { "epoch": 0.71, "learning_rate": 4.882182294591054e-05, "loss": 0.0941, "step": 5036 }, { "epoch": 0.71, "learning_rate": 4.8821355043982786e-05, "loss": 0.1444, "step": 5038 }, { "epoch": 0.71, "learning_rate": 4.8820887142055025e-05, "loss": 0.1041, "step": 5040 }, { "epoch": 0.71, "learning_rate": 4.882041924012728e-05, "loss": 0.1071, "step": 5042 }, { "epoch": 0.71, "learning_rate": 4.8819951338199517e-05, "loss": 0.0689, "step": 5044 }, { "epoch": 0.71, "learning_rate": 4.881948343627176e-05, "loss": 0.0874, "step": 5046 }, { "epoch": 0.71, "learning_rate": 4.8819015534344e-05, "loss": 0.1425, "step": 5048 }, { "epoch": 0.71, "learning_rate": 4.881854763241625e-05, "loss": 0.0879, "step": 5050 }, { "epoch": 0.71, "learning_rate": 4.881807973048849e-05, "loss": 0.0951, "step": 5052 }, { "epoch": 0.71, "learning_rate": 4.881761182856074e-05, "loss": 0.0918, "step": 5054 }, { "epoch": 0.71, "learning_rate": 4.881714392663298e-05, "loss": 0.0966, "step": 5056 }, { "epoch": 0.71, "learning_rate": 4.8816676024705224e-05, "loss": 0.0977, "step": 5058 }, { "epoch": 0.71, "learning_rate": 4.881620812277747e-05, "loss": 0.0908, "step": 5060 }, { "epoch": 0.71, "learning_rate": 4.8815740220849716e-05, "loss": 0.0909, "step": 5062 }, { "epoch": 0.71, "learning_rate": 4.8815272318921955e-05, "loss": 0.0929, "step": 5064 }, { "epoch": 0.71, "learning_rate": 4.8814804416994194e-05, "loss": 0.0875, "step": 5066 }, { "epoch": 0.71, "learning_rate": 4.881433651506645e-05, "loss": 0.0916, "step": 5068 }, { "epoch": 0.71, "learning_rate": 4.8813868613138686e-05, "loss": 0.1227, "step": 5070 }, { "epoch": 0.71, "learning_rate": 4.881340071121093e-05, "loss": 0.1054, "step": 5072 }, { "epoch": 0.71, "learning_rate": 4.881293280928317e-05, "loss": 0.0916, "step": 5074 }, { "epoch": 0.71, "learning_rate": 4.8812464907355424e-05, "loss": 0.0883, "step": 5076 }, { "epoch": 0.71, "learning_rate": 4.881199700542766e-05, "loss": 0.098, "step": 5078 }, { "epoch": 0.71, "learning_rate": 4.881152910349991e-05, "loss": 0.0925, "step": 5080 }, { "epoch": 0.71, "learning_rate": 4.881106120157215e-05, "loss": 0.0824, "step": 5082 }, { "epoch": 0.71, "learning_rate": 4.8810593299644394e-05, "loss": 0.0913, "step": 5084 }, { "epoch": 0.71, "learning_rate": 4.881012539771664e-05, "loss": 0.078, "step": 5086 }, { "epoch": 0.71, "learning_rate": 4.8809657495788886e-05, "loss": 0.0972, "step": 5088 }, { "epoch": 0.71, "learning_rate": 4.8809189593861125e-05, "loss": 0.0802, "step": 5090 }, { "epoch": 0.71, "learning_rate": 4.880872169193337e-05, "loss": 0.0854, "step": 5092 }, { "epoch": 0.72, "learning_rate": 4.880825379000562e-05, "loss": 0.0963, "step": 5094 }, { "epoch": 0.72, "learning_rate": 4.880778588807786e-05, "loss": 0.1212, "step": 5096 }, { "epoch": 0.72, "learning_rate": 4.88073179861501e-05, "loss": 0.0881, "step": 5098 }, { "epoch": 0.72, "learning_rate": 4.880685008422235e-05, "loss": 0.1133, "step": 5100 }, { "epoch": 0.72, "learning_rate": 4.8806382182294594e-05, "loss": 0.0829, "step": 5102 }, { "epoch": 0.72, "learning_rate": 4.880591428036684e-05, "loss": 0.0888, "step": 5104 }, { "epoch": 0.72, "learning_rate": 4.880544637843908e-05, "loss": 0.1003, "step": 5106 }, { "epoch": 0.72, "learning_rate": 4.8804978476511325e-05, "loss": 0.0716, "step": 5108 }, { "epoch": 0.72, "learning_rate": 4.880451057458357e-05, "loss": 0.1152, "step": 5110 }, { "epoch": 0.72, "learning_rate": 4.8804042672655817e-05, "loss": 0.082, "step": 5112 }, { "epoch": 0.72, "learning_rate": 4.8803574770728056e-05, "loss": 0.0875, "step": 5114 }, { "epoch": 0.72, "learning_rate": 4.88031068688003e-05, "loss": 0.1069, "step": 5116 }, { "epoch": 0.72, "learning_rate": 4.880263896687254e-05, "loss": 0.0879, "step": 5118 }, { "epoch": 0.72, "learning_rate": 4.8802171064944793e-05, "loss": 0.0848, "step": 5120 }, { "epoch": 0.72, "learning_rate": 4.880170316301703e-05, "loss": 0.0749, "step": 5122 }, { "epoch": 0.72, "learning_rate": 4.880123526108928e-05, "loss": 0.0772, "step": 5124 }, { "epoch": 0.72, "learning_rate": 4.880076735916152e-05, "loss": 0.0929, "step": 5126 }, { "epoch": 0.72, "learning_rate": 4.8800299457233763e-05, "loss": 0.1225, "step": 5128 }, { "epoch": 0.72, "learning_rate": 4.879983155530601e-05, "loss": 0.0864, "step": 5130 }, { "epoch": 0.72, "learning_rate": 4.8799363653378255e-05, "loss": 0.099, "step": 5132 }, { "epoch": 0.72, "learning_rate": 4.8798895751450494e-05, "loss": 0.0949, "step": 5134 }, { "epoch": 0.72, "learning_rate": 4.879842784952274e-05, "loss": 0.0723, "step": 5136 }, { "epoch": 0.72, "learning_rate": 4.8797959947594986e-05, "loss": 0.1157, "step": 5138 }, { "epoch": 0.72, "learning_rate": 4.879749204566723e-05, "loss": 0.0795, "step": 5140 }, { "epoch": 0.72, "learning_rate": 4.879702414373947e-05, "loss": 0.111, "step": 5142 }, { "epoch": 0.72, "learning_rate": 4.879655624181172e-05, "loss": 0.1169, "step": 5144 }, { "epoch": 0.72, "learning_rate": 4.879608833988396e-05, "loss": 0.0893, "step": 5146 }, { "epoch": 0.72, "learning_rate": 4.879562043795621e-05, "loss": 0.1175, "step": 5148 }, { "epoch": 0.72, "learning_rate": 4.879515253602845e-05, "loss": 0.1116, "step": 5150 }, { "epoch": 0.72, "learning_rate": 4.8794684634100694e-05, "loss": 0.1105, "step": 5152 }, { "epoch": 0.72, "learning_rate": 4.879421673217294e-05, "loss": 0.1006, "step": 5154 }, { "epoch": 0.72, "learning_rate": 4.8793748830245186e-05, "loss": 0.0852, "step": 5156 }, { "epoch": 0.72, "learning_rate": 4.8793280928317425e-05, "loss": 0.0972, "step": 5158 }, { "epoch": 0.72, "learning_rate": 4.879281302638967e-05, "loss": 0.1039, "step": 5160 }, { "epoch": 0.72, "learning_rate": 4.879234512446191e-05, "loss": 0.1021, "step": 5162 }, { "epoch": 0.72, "learning_rate": 4.879187722253416e-05, "loss": 0.1008, "step": 5164 }, { "epoch": 0.73, "learning_rate": 4.87914093206064e-05, "loss": 0.0823, "step": 5166 }, { "epoch": 0.73, "learning_rate": 4.879094141867865e-05, "loss": 0.0925, "step": 5168 }, { "epoch": 0.73, "learning_rate": 4.879047351675089e-05, "loss": 0.1156, "step": 5170 }, { "epoch": 0.73, "learning_rate": 4.879000561482314e-05, "loss": 0.1213, "step": 5172 }, { "epoch": 0.73, "learning_rate": 4.878953771289538e-05, "loss": 0.1397, "step": 5174 }, { "epoch": 0.73, "learning_rate": 4.8789069810967625e-05, "loss": 0.0847, "step": 5176 }, { "epoch": 0.73, "learning_rate": 4.8788601909039864e-05, "loss": 0.0956, "step": 5178 }, { "epoch": 0.73, "learning_rate": 4.878813400711211e-05, "loss": 0.1024, "step": 5180 }, { "epoch": 0.73, "learning_rate": 4.8787666105184356e-05, "loss": 0.0907, "step": 5182 }, { "epoch": 0.73, "learning_rate": 4.87871982032566e-05, "loss": 0.0927, "step": 5184 }, { "epoch": 0.73, "learning_rate": 4.878673030132884e-05, "loss": 0.0882, "step": 5186 }, { "epoch": 0.73, "learning_rate": 4.878626239940109e-05, "loss": 0.0587, "step": 5188 }, { "epoch": 0.73, "learning_rate": 4.878579449747333e-05, "loss": 0.0883, "step": 5190 }, { "epoch": 0.73, "learning_rate": 4.878532659554558e-05, "loss": 0.0874, "step": 5192 }, { "epoch": 0.73, "learning_rate": 4.878485869361782e-05, "loss": 0.0828, "step": 5194 }, { "epoch": 0.73, "learning_rate": 4.8784390791690064e-05, "loss": 0.064, "step": 5196 }, { "epoch": 0.73, "learning_rate": 4.878392288976231e-05, "loss": 0.0796, "step": 5198 }, { "epoch": 0.73, "learning_rate": 4.8783454987834555e-05, "loss": 0.0894, "step": 5200 }, { "epoch": 0.73, "learning_rate": 4.8782987085906794e-05, "loss": 0.0972, "step": 5202 }, { "epoch": 0.73, "learning_rate": 4.878251918397904e-05, "loss": 0.1182, "step": 5204 }, { "epoch": 0.73, "learning_rate": 4.8782051282051286e-05, "loss": 0.0831, "step": 5206 }, { "epoch": 0.73, "learning_rate": 4.878158338012353e-05, "loss": 0.096, "step": 5208 }, { "epoch": 0.73, "learning_rate": 4.878111547819577e-05, "loss": 0.0795, "step": 5210 }, { "epoch": 0.73, "learning_rate": 4.878064757626802e-05, "loss": 0.1084, "step": 5212 }, { "epoch": 0.73, "learning_rate": 4.8780179674340256e-05, "loss": 0.0898, "step": 5214 }, { "epoch": 0.73, "learning_rate": 4.877971177241251e-05, "loss": 0.095, "step": 5216 }, { "epoch": 0.73, "learning_rate": 4.877924387048475e-05, "loss": 0.0821, "step": 5218 }, { "epoch": 0.73, "learning_rate": 4.8778775968556994e-05, "loss": 0.1048, "step": 5220 }, { "epoch": 0.73, "learning_rate": 4.877830806662923e-05, "loss": 0.0875, "step": 5222 }, { "epoch": 0.73, "learning_rate": 4.8777840164701486e-05, "loss": 0.0855, "step": 5224 }, { "epoch": 0.73, "learning_rate": 4.8777372262773725e-05, "loss": 0.0792, "step": 5226 }, { "epoch": 0.73, "learning_rate": 4.877690436084597e-05, "loss": 0.1225, "step": 5228 }, { "epoch": 0.73, "learning_rate": 4.877643645891821e-05, "loss": 0.1503, "step": 5230 }, { "epoch": 0.73, "learning_rate": 4.8775968556990456e-05, "loss": 0.0773, "step": 5232 }, { "epoch": 0.73, "learning_rate": 4.87755006550627e-05, "loss": 0.1163, "step": 5234 }, { "epoch": 0.73, "learning_rate": 4.877503275313495e-05, "loss": 0.0889, "step": 5236 }, { "epoch": 0.74, "learning_rate": 4.877456485120719e-05, "loss": 0.1115, "step": 5238 }, { "epoch": 0.74, "learning_rate": 4.877409694927943e-05, "loss": 0.1228, "step": 5240 }, { "epoch": 0.74, "learning_rate": 4.877362904735168e-05, "loss": 0.0851, "step": 5242 }, { "epoch": 0.74, "learning_rate": 4.8773161145423925e-05, "loss": 0.105, "step": 5244 }, { "epoch": 0.74, "learning_rate": 4.8772693243496164e-05, "loss": 0.0942, "step": 5246 }, { "epoch": 0.74, "learning_rate": 4.877222534156841e-05, "loss": 0.0972, "step": 5248 }, { "epoch": 0.74, "learning_rate": 4.8771757439640656e-05, "loss": 0.1146, "step": 5250 }, { "epoch": 0.74, "learning_rate": 4.87712895377129e-05, "loss": 0.1119, "step": 5252 }, { "epoch": 0.74, "learning_rate": 4.877082163578514e-05, "loss": 0.1169, "step": 5254 }, { "epoch": 0.74, "learning_rate": 4.877035373385739e-05, "loss": 0.1061, "step": 5256 }, { "epoch": 0.74, "learning_rate": 4.876988583192963e-05, "loss": 0.0853, "step": 5258 }, { "epoch": 0.74, "learning_rate": 4.876941793000188e-05, "loss": 0.1577, "step": 5260 }, { "epoch": 0.74, "learning_rate": 4.876895002807412e-05, "loss": 0.0861, "step": 5262 }, { "epoch": 0.74, "learning_rate": 4.8768482126146364e-05, "loss": 0.0989, "step": 5264 }, { "epoch": 0.74, "learning_rate": 4.87680142242186e-05, "loss": 0.1048, "step": 5266 }, { "epoch": 0.74, "learning_rate": 4.8767546322290855e-05, "loss": 0.088, "step": 5268 }, { "epoch": 0.74, "learning_rate": 4.8767078420363094e-05, "loss": 0.1005, "step": 5270 }, { "epoch": 0.74, "learning_rate": 4.876661051843534e-05, "loss": 0.0915, "step": 5272 }, { "epoch": 0.74, "learning_rate": 4.876614261650758e-05, "loss": 0.0972, "step": 5274 }, { "epoch": 0.74, "learning_rate": 4.8765674714579825e-05, "loss": 0.0944, "step": 5276 }, { "epoch": 0.74, "learning_rate": 4.876520681265207e-05, "loss": 0.0973, "step": 5278 }, { "epoch": 0.74, "learning_rate": 4.876473891072432e-05, "loss": 0.0953, "step": 5280 }, { "epoch": 0.74, "learning_rate": 4.8764271008796556e-05, "loss": 0.0862, "step": 5282 }, { "epoch": 0.74, "learning_rate": 4.87638031068688e-05, "loss": 0.098, "step": 5284 }, { "epoch": 0.74, "learning_rate": 4.876333520494105e-05, "loss": 0.1076, "step": 5286 }, { "epoch": 0.74, "learning_rate": 4.8762867303013294e-05, "loss": 0.0939, "step": 5288 }, { "epoch": 0.74, "learning_rate": 4.876239940108553e-05, "loss": 0.0821, "step": 5290 }, { "epoch": 0.74, "learning_rate": 4.876193149915778e-05, "loss": 0.0927, "step": 5292 }, { "epoch": 0.74, "learning_rate": 4.8761463597230025e-05, "loss": 0.0995, "step": 5294 }, { "epoch": 0.74, "learning_rate": 4.876099569530227e-05, "loss": 0.0986, "step": 5296 }, { "epoch": 0.74, "learning_rate": 4.876052779337451e-05, "loss": 0.0788, "step": 5298 }, { "epoch": 0.74, "learning_rate": 4.8760059891446756e-05, "loss": 0.0971, "step": 5300 }, { "epoch": 0.74, "learning_rate": 4.8759591989519e-05, "loss": 0.0936, "step": 5302 }, { "epoch": 0.74, "learning_rate": 4.875912408759125e-05, "loss": 0.1128, "step": 5304 }, { "epoch": 0.74, "learning_rate": 4.875865618566349e-05, "loss": 0.1094, "step": 5306 }, { "epoch": 0.75, "learning_rate": 4.875818828373573e-05, "loss": 0.1193, "step": 5308 }, { "epoch": 0.75, "learning_rate": 4.875772038180797e-05, "loss": 0.0843, "step": 5310 }, { "epoch": 0.75, "learning_rate": 4.8757252479880225e-05, "loss": 0.0942, "step": 5312 }, { "epoch": 0.75, "learning_rate": 4.8756784577952464e-05, "loss": 0.1054, "step": 5314 }, { "epoch": 0.75, "learning_rate": 4.875631667602471e-05, "loss": 0.1116, "step": 5316 }, { "epoch": 0.75, "learning_rate": 4.875584877409695e-05, "loss": 0.0969, "step": 5318 }, { "epoch": 0.75, "learning_rate": 4.8755380872169195e-05, "loss": 0.099, "step": 5320 }, { "epoch": 0.75, "learning_rate": 4.875491297024144e-05, "loss": 0.1027, "step": 5322 }, { "epoch": 0.75, "learning_rate": 4.875444506831368e-05, "loss": 0.0683, "step": 5324 }, { "epoch": 0.75, "learning_rate": 4.8753977166385926e-05, "loss": 0.1208, "step": 5326 }, { "epoch": 0.75, "learning_rate": 4.875350926445817e-05, "loss": 0.0801, "step": 5328 }, { "epoch": 0.75, "learning_rate": 4.875304136253042e-05, "loss": 0.0926, "step": 5330 }, { "epoch": 0.75, "learning_rate": 4.875257346060266e-05, "loss": 0.0755, "step": 5332 }, { "epoch": 0.75, "learning_rate": 4.87521055586749e-05, "loss": 0.0812, "step": 5334 }, { "epoch": 0.75, "learning_rate": 4.875163765674715e-05, "loss": 0.1073, "step": 5336 }, { "epoch": 0.75, "learning_rate": 4.8751169754819395e-05, "loss": 0.1058, "step": 5338 }, { "epoch": 0.75, "learning_rate": 4.8750701852891634e-05, "loss": 0.0831, "step": 5340 }, { "epoch": 0.75, "learning_rate": 4.875023395096388e-05, "loss": 0.1217, "step": 5342 }, { "epoch": 0.75, "learning_rate": 4.874976604903612e-05, "loss": 0.0912, "step": 5344 }, { "epoch": 0.75, "learning_rate": 4.874929814710837e-05, "loss": 0.0806, "step": 5346 }, { "epoch": 0.75, "learning_rate": 4.874883024518061e-05, "loss": 0.0736, "step": 5348 }, { "epoch": 0.75, "learning_rate": 4.8748362343252856e-05, "loss": 0.1044, "step": 5350 }, { "epoch": 0.75, "learning_rate": 4.8747894441325096e-05, "loss": 0.0607, "step": 5352 }, { "epoch": 0.75, "learning_rate": 4.874742653939735e-05, "loss": 0.0997, "step": 5354 }, { "epoch": 0.75, "learning_rate": 4.874695863746959e-05, "loss": 0.1129, "step": 5356 }, { "epoch": 0.75, "learning_rate": 4.874649073554183e-05, "loss": 0.1079, "step": 5358 }, { "epoch": 0.75, "learning_rate": 4.874602283361407e-05, "loss": 0.1107, "step": 5360 }, { "epoch": 0.75, "learning_rate": 4.874555493168632e-05, "loss": 0.1014, "step": 5362 }, { "epoch": 0.75, "learning_rate": 4.8745087029758564e-05, "loss": 0.0788, "step": 5364 }, { "epoch": 0.75, "learning_rate": 4.874461912783081e-05, "loss": 0.0861, "step": 5366 }, { "epoch": 0.75, "learning_rate": 4.874415122590305e-05, "loss": 0.0941, "step": 5368 }, { "epoch": 0.75, "learning_rate": 4.8743683323975295e-05, "loss": 0.1025, "step": 5370 }, { "epoch": 0.75, "learning_rate": 4.874321542204754e-05, "loss": 0.0818, "step": 5372 }, { "epoch": 0.75, "learning_rate": 4.874274752011979e-05, "loss": 0.0861, "step": 5374 }, { "epoch": 0.75, "learning_rate": 4.8742279618192026e-05, "loss": 0.0949, "step": 5376 }, { "epoch": 0.75, "learning_rate": 4.874181171626427e-05, "loss": 0.0917, "step": 5378 }, { "epoch": 0.76, "learning_rate": 4.874134381433652e-05, "loss": 0.0911, "step": 5380 }, { "epoch": 0.76, "learning_rate": 4.8740875912408764e-05, "loss": 0.1092, "step": 5382 }, { "epoch": 0.76, "learning_rate": 4.8740408010481e-05, "loss": 0.0843, "step": 5384 }, { "epoch": 0.76, "learning_rate": 4.873994010855325e-05, "loss": 0.1067, "step": 5386 }, { "epoch": 0.76, "learning_rate": 4.8739472206625495e-05, "loss": 0.1009, "step": 5388 }, { "epoch": 0.76, "learning_rate": 4.873900430469774e-05, "loss": 0.0771, "step": 5390 }, { "epoch": 0.76, "learning_rate": 4.873853640276998e-05, "loss": 0.0855, "step": 5392 }, { "epoch": 0.76, "learning_rate": 4.8738068500842226e-05, "loss": 0.0999, "step": 5394 }, { "epoch": 0.76, "learning_rate": 4.8737600598914465e-05, "loss": 0.0723, "step": 5396 }, { "epoch": 0.76, "learning_rate": 4.873713269698672e-05, "loss": 0.0921, "step": 5398 }, { "epoch": 0.76, "learning_rate": 4.873666479505896e-05, "loss": 0.1074, "step": 5400 }, { "epoch": 0.76, "learning_rate": 4.87361968931312e-05, "loss": 0.1037, "step": 5402 }, { "epoch": 0.76, "learning_rate": 4.873572899120344e-05, "loss": 0.1177, "step": 5404 }, { "epoch": 0.76, "learning_rate": 4.873526108927569e-05, "loss": 0.0802, "step": 5406 }, { "epoch": 0.76, "learning_rate": 4.8734793187347934e-05, "loss": 0.1014, "step": 5408 }, { "epoch": 0.76, "learning_rate": 4.873432528542018e-05, "loss": 0.1004, "step": 5410 }, { "epoch": 0.76, "learning_rate": 4.873385738349242e-05, "loss": 0.0965, "step": 5412 }, { "epoch": 0.76, "learning_rate": 4.8733389481564665e-05, "loss": 0.1153, "step": 5414 }, { "epoch": 0.76, "learning_rate": 4.873292157963691e-05, "loss": 0.0954, "step": 5416 }, { "epoch": 0.76, "learning_rate": 4.8732453677709156e-05, "loss": 0.1108, "step": 5418 }, { "epoch": 0.76, "learning_rate": 4.8731985775781396e-05, "loss": 0.0833, "step": 5420 }, { "epoch": 0.76, "learning_rate": 4.873151787385364e-05, "loss": 0.092, "step": 5422 }, { "epoch": 0.76, "learning_rate": 4.873104997192589e-05, "loss": 0.0987, "step": 5424 }, { "epoch": 0.76, "learning_rate": 4.873058206999813e-05, "loss": 0.1232, "step": 5426 }, { "epoch": 0.76, "learning_rate": 4.873011416807037e-05, "loss": 0.0961, "step": 5428 }, { "epoch": 0.76, "learning_rate": 4.872964626614262e-05, "loss": 0.0977, "step": 5430 }, { "epoch": 0.76, "learning_rate": 4.8729178364214864e-05, "loss": 0.1228, "step": 5432 }, { "epoch": 0.76, "learning_rate": 4.872871046228711e-05, "loss": 0.0832, "step": 5434 }, { "epoch": 0.76, "learning_rate": 4.872824256035935e-05, "loss": 0.0934, "step": 5436 }, { "epoch": 0.76, "learning_rate": 4.8727774658431595e-05, "loss": 0.1066, "step": 5438 }, { "epoch": 0.76, "learning_rate": 4.8727306756503834e-05, "loss": 0.0877, "step": 5440 }, { "epoch": 0.76, "learning_rate": 4.872683885457609e-05, "loss": 0.0959, "step": 5442 }, { "epoch": 0.76, "learning_rate": 4.8726370952648326e-05, "loss": 0.1042, "step": 5444 }, { "epoch": 0.76, "learning_rate": 4.872590305072057e-05, "loss": 0.1263, "step": 5446 }, { "epoch": 0.76, "learning_rate": 4.872543514879281e-05, "loss": 0.1156, "step": 5448 }, { "epoch": 0.77, "learning_rate": 4.8724967246865064e-05, "loss": 0.102, "step": 5450 }, { "epoch": 0.77, "learning_rate": 4.87244993449373e-05, "loss": 0.0916, "step": 5452 }, { "epoch": 0.77, "learning_rate": 4.872403144300955e-05, "loss": 0.1195, "step": 5454 }, { "epoch": 0.77, "learning_rate": 4.872356354108179e-05, "loss": 0.0908, "step": 5456 }, { "epoch": 0.77, "learning_rate": 4.8723095639154034e-05, "loss": 0.0879, "step": 5458 }, { "epoch": 0.77, "learning_rate": 4.872262773722628e-05, "loss": 0.0976, "step": 5460 }, { "epoch": 0.77, "learning_rate": 4.8722159835298526e-05, "loss": 0.0783, "step": 5462 }, { "epoch": 0.77, "learning_rate": 4.8721691933370765e-05, "loss": 0.1068, "step": 5464 }, { "epoch": 0.77, "learning_rate": 4.872122403144301e-05, "loss": 0.0795, "step": 5466 }, { "epoch": 0.77, "learning_rate": 4.872075612951526e-05, "loss": 0.0846, "step": 5468 }, { "epoch": 0.77, "learning_rate": 4.87202882275875e-05, "loss": 0.0909, "step": 5470 }, { "epoch": 0.77, "learning_rate": 4.871982032565974e-05, "loss": 0.0914, "step": 5472 }, { "epoch": 0.77, "learning_rate": 4.871935242373199e-05, "loss": 0.144, "step": 5474 }, { "epoch": 0.77, "learning_rate": 4.8718884521804234e-05, "loss": 0.0967, "step": 5476 }, { "epoch": 0.77, "learning_rate": 4.871841661987648e-05, "loss": 0.077, "step": 5478 }, { "epoch": 0.77, "learning_rate": 4.871794871794872e-05, "loss": 0.0871, "step": 5480 }, { "epoch": 0.77, "learning_rate": 4.8717480816020965e-05, "loss": 0.1024, "step": 5482 }, { "epoch": 0.77, "learning_rate": 4.871701291409321e-05, "loss": 0.1155, "step": 5484 }, { "epoch": 0.77, "learning_rate": 4.8716545012165456e-05, "loss": 0.1163, "step": 5486 }, { "epoch": 0.77, "learning_rate": 4.8716077110237696e-05, "loss": 0.1136, "step": 5488 }, { "epoch": 0.77, "learning_rate": 4.871560920830994e-05, "loss": 0.1093, "step": 5490 }, { "epoch": 0.77, "learning_rate": 4.871514130638218e-05, "loss": 0.0954, "step": 5492 }, { "epoch": 0.77, "learning_rate": 4.871467340445443e-05, "loss": 0.0912, "step": 5494 }, { "epoch": 0.77, "learning_rate": 4.871420550252667e-05, "loss": 0.1163, "step": 5496 }, { "epoch": 0.77, "learning_rate": 4.871373760059892e-05, "loss": 0.1029, "step": 5498 }, { "epoch": 0.77, "learning_rate": 4.871326969867116e-05, "loss": 0.0773, "step": 5500 }, { "epoch": 0.77, "learning_rate": 4.871280179674341e-05, "loss": 0.0908, "step": 5502 }, { "epoch": 0.77, "learning_rate": 4.871233389481565e-05, "loss": 0.0809, "step": 5504 }, { "epoch": 0.77, "learning_rate": 4.8711865992887895e-05, "loss": 0.1013, "step": 5506 }, { "epoch": 0.77, "learning_rate": 4.8711398090960134e-05, "loss": 0.0955, "step": 5508 }, { "epoch": 0.77, "learning_rate": 4.871093018903238e-05, "loss": 0.0844, "step": 5510 }, { "epoch": 0.77, "learning_rate": 4.8710462287104626e-05, "loss": 0.1063, "step": 5512 }, { "epoch": 0.77, "learning_rate": 4.870999438517687e-05, "loss": 0.0918, "step": 5514 }, { "epoch": 0.77, "learning_rate": 4.870952648324911e-05, "loss": 0.0901, "step": 5516 }, { "epoch": 0.77, "learning_rate": 4.870905858132136e-05, "loss": 0.0831, "step": 5518 }, { "epoch": 0.77, "learning_rate": 4.87085906793936e-05, "loss": 0.0877, "step": 5520 }, { "epoch": 0.78, "learning_rate": 4.870812277746585e-05, "loss": 0.1052, "step": 5522 }, { "epoch": 0.78, "learning_rate": 4.870765487553809e-05, "loss": 0.0827, "step": 5524 }, { "epoch": 0.78, "learning_rate": 4.8707186973610334e-05, "loss": 0.1171, "step": 5526 }, { "epoch": 0.78, "learning_rate": 4.870671907168258e-05, "loss": 0.0911, "step": 5528 }, { "epoch": 0.78, "learning_rate": 4.8706251169754826e-05, "loss": 0.129, "step": 5530 }, { "epoch": 0.78, "learning_rate": 4.8705783267827065e-05, "loss": 0.0777, "step": 5532 }, { "epoch": 0.78, "learning_rate": 4.870531536589931e-05, "loss": 0.0703, "step": 5534 }, { "epoch": 0.78, "learning_rate": 4.870484746397156e-05, "loss": 0.1071, "step": 5536 }, { "epoch": 0.78, "learning_rate": 4.87043795620438e-05, "loss": 0.0731, "step": 5538 }, { "epoch": 0.78, "learning_rate": 4.870391166011604e-05, "loss": 0.0921, "step": 5540 }, { "epoch": 0.78, "learning_rate": 4.870344375818829e-05, "loss": 0.0842, "step": 5542 }, { "epoch": 0.78, "learning_rate": 4.870297585626053e-05, "loss": 0.1026, "step": 5544 }, { "epoch": 0.78, "learning_rate": 4.870250795433278e-05, "loss": 0.0941, "step": 5546 }, { "epoch": 0.78, "learning_rate": 4.870204005240502e-05, "loss": 0.0942, "step": 5548 }, { "epoch": 0.78, "learning_rate": 4.8701572150477265e-05, "loss": 0.0914, "step": 5550 }, { "epoch": 0.78, "learning_rate": 4.8701104248549504e-05, "loss": 0.1367, "step": 5552 }, { "epoch": 0.78, "learning_rate": 4.870063634662175e-05, "loss": 0.0841, "step": 5554 }, { "epoch": 0.78, "learning_rate": 4.8700168444693996e-05, "loss": 0.1086, "step": 5556 }, { "epoch": 0.78, "learning_rate": 4.869970054276624e-05, "loss": 0.086, "step": 5558 }, { "epoch": 0.78, "learning_rate": 4.869923264083848e-05, "loss": 0.1065, "step": 5560 }, { "epoch": 0.78, "learning_rate": 4.8698764738910727e-05, "loss": 0.0951, "step": 5562 }, { "epoch": 0.78, "learning_rate": 4.869829683698297e-05, "loss": 0.0806, "step": 5564 }, { "epoch": 0.78, "learning_rate": 4.869782893505522e-05, "loss": 0.1128, "step": 5566 }, { "epoch": 0.78, "learning_rate": 4.869736103312746e-05, "loss": 0.1105, "step": 5568 }, { "epoch": 0.78, "learning_rate": 4.8696893131199703e-05, "loss": 0.0936, "step": 5570 }, { "epoch": 0.78, "learning_rate": 4.869642522927195e-05, "loss": 0.0906, "step": 5572 }, { "epoch": 0.78, "learning_rate": 4.869595732734419e-05, "loss": 0.0979, "step": 5574 }, { "epoch": 0.78, "learning_rate": 4.8695489425416434e-05, "loss": 0.1293, "step": 5576 }, { "epoch": 0.78, "learning_rate": 4.8695021523488674e-05, "loss": 0.0945, "step": 5578 }, { "epoch": 0.78, "learning_rate": 4.8694553621560926e-05, "loss": 0.1071, "step": 5580 }, { "epoch": 0.78, "learning_rate": 4.8694085719633165e-05, "loss": 0.0819, "step": 5582 }, { "epoch": 0.78, "learning_rate": 4.869361781770541e-05, "loss": 0.09, "step": 5584 }, { "epoch": 0.78, "learning_rate": 4.869314991577765e-05, "loss": 0.094, "step": 5586 }, { "epoch": 0.78, "learning_rate": 4.8692682013849896e-05, "loss": 0.0905, "step": 5588 }, { "epoch": 0.78, "learning_rate": 4.869221411192214e-05, "loss": 0.1133, "step": 5590 }, { "epoch": 0.78, "learning_rate": 4.869174620999439e-05, "loss": 0.1095, "step": 5592 }, { "epoch": 0.79, "learning_rate": 4.869127830806663e-05, "loss": 0.1052, "step": 5594 }, { "epoch": 0.79, "learning_rate": 4.869081040613887e-05, "loss": 0.0891, "step": 5596 }, { "epoch": 0.79, "learning_rate": 4.869034250421112e-05, "loss": 0.1034, "step": 5598 }, { "epoch": 0.79, "learning_rate": 4.8689874602283365e-05, "loss": 0.0836, "step": 5600 }, { "epoch": 0.79, "learning_rate": 4.8689406700355604e-05, "loss": 0.0989, "step": 5602 }, { "epoch": 0.79, "learning_rate": 4.868893879842785e-05, "loss": 0.0892, "step": 5604 }, { "epoch": 0.79, "learning_rate": 4.8688470896500096e-05, "loss": 0.0902, "step": 5606 }, { "epoch": 0.79, "learning_rate": 4.868800299457234e-05, "loss": 0.1037, "step": 5608 }, { "epoch": 0.79, "learning_rate": 4.868753509264458e-05, "loss": 0.1106, "step": 5610 }, { "epoch": 0.79, "learning_rate": 4.868706719071683e-05, "loss": 0.1128, "step": 5612 }, { "epoch": 0.79, "learning_rate": 4.868659928878907e-05, "loss": 0.1263, "step": 5614 }, { "epoch": 0.79, "learning_rate": 4.868613138686132e-05, "loss": 0.1297, "step": 5616 }, { "epoch": 0.79, "learning_rate": 4.868566348493356e-05, "loss": 0.116, "step": 5618 }, { "epoch": 0.79, "learning_rate": 4.8685195583005804e-05, "loss": 0.1371, "step": 5620 }, { "epoch": 0.79, "learning_rate": 4.868472768107804e-05, "loss": 0.1183, "step": 5622 }, { "epoch": 0.79, "learning_rate": 4.8684259779150296e-05, "loss": 0.0773, "step": 5624 }, { "epoch": 0.79, "learning_rate": 4.8683791877222535e-05, "loss": 0.1158, "step": 5626 }, { "epoch": 0.79, "learning_rate": 4.868332397529478e-05, "loss": 0.0743, "step": 5628 }, { "epoch": 0.79, "learning_rate": 4.868285607336702e-05, "loss": 0.0896, "step": 5630 }, { "epoch": 0.79, "learning_rate": 4.868238817143927e-05, "loss": 0.1097, "step": 5632 }, { "epoch": 0.79, "learning_rate": 4.868192026951151e-05, "loss": 0.0834, "step": 5634 }, { "epoch": 0.79, "learning_rate": 4.868145236758376e-05, "loss": 0.0808, "step": 5636 }, { "epoch": 0.79, "learning_rate": 4.8680984465656e-05, "loss": 0.0934, "step": 5638 }, { "epoch": 0.79, "learning_rate": 4.868051656372824e-05, "loss": 0.1005, "step": 5640 }, { "epoch": 0.79, "learning_rate": 4.868004866180049e-05, "loss": 0.1217, "step": 5642 }, { "epoch": 0.79, "learning_rate": 4.8679580759872734e-05, "loss": 0.0884, "step": 5644 }, { "epoch": 0.79, "learning_rate": 4.8679112857944974e-05, "loss": 0.1019, "step": 5646 }, { "epoch": 0.79, "learning_rate": 4.867864495601722e-05, "loss": 0.0945, "step": 5648 }, { "epoch": 0.79, "learning_rate": 4.8678177054089465e-05, "loss": 0.093, "step": 5650 }, { "epoch": 0.79, "learning_rate": 4.867770915216171e-05, "loss": 0.1392, "step": 5652 }, { "epoch": 0.79, "learning_rate": 4.867724125023395e-05, "loss": 0.0792, "step": 5654 }, { "epoch": 0.79, "learning_rate": 4.8676773348306196e-05, "loss": 0.1005, "step": 5656 }, { "epoch": 0.79, "learning_rate": 4.867630544637844e-05, "loss": 0.1035, "step": 5658 }, { "epoch": 0.79, "learning_rate": 4.867583754445069e-05, "loss": 0.085, "step": 5660 }, { "epoch": 0.79, "learning_rate": 4.867536964252293e-05, "loss": 0.1051, "step": 5662 }, { "epoch": 0.8, "learning_rate": 4.867490174059517e-05, "loss": 0.0851, "step": 5664 }, { "epoch": 0.8, "learning_rate": 4.867443383866742e-05, "loss": 0.0912, "step": 5666 }, { "epoch": 0.8, "learning_rate": 4.8673965936739665e-05, "loss": 0.0964, "step": 5668 }, { "epoch": 0.8, "learning_rate": 4.8673498034811904e-05, "loss": 0.0948, "step": 5670 }, { "epoch": 0.8, "learning_rate": 4.867303013288415e-05, "loss": 0.1255, "step": 5672 }, { "epoch": 0.8, "learning_rate": 4.867256223095639e-05, "loss": 0.0945, "step": 5674 }, { "epoch": 0.8, "learning_rate": 4.867209432902864e-05, "loss": 0.1198, "step": 5676 }, { "epoch": 0.8, "learning_rate": 4.867162642710088e-05, "loss": 0.0898, "step": 5678 }, { "epoch": 0.8, "learning_rate": 4.867115852517313e-05, "loss": 0.0981, "step": 5680 }, { "epoch": 0.8, "learning_rate": 4.8670690623245366e-05, "loss": 0.0937, "step": 5682 }, { "epoch": 0.8, "learning_rate": 4.867022272131762e-05, "loss": 0.0942, "step": 5684 }, { "epoch": 0.8, "learning_rate": 4.866975481938986e-05, "loss": 0.0784, "step": 5686 }, { "epoch": 0.8, "learning_rate": 4.8669286917462104e-05, "loss": 0.132, "step": 5688 }, { "epoch": 0.8, "learning_rate": 4.866881901553434e-05, "loss": 0.1304, "step": 5690 }, { "epoch": 0.8, "learning_rate": 4.866835111360659e-05, "loss": 0.0966, "step": 5692 }, { "epoch": 0.8, "learning_rate": 4.8667883211678835e-05, "loss": 0.1298, "step": 5694 }, { "epoch": 0.8, "learning_rate": 4.866741530975108e-05, "loss": 0.0615, "step": 5696 }, { "epoch": 0.8, "learning_rate": 4.866694740782332e-05, "loss": 0.075, "step": 5698 }, { "epoch": 0.8, "learning_rate": 4.8666479505895566e-05, "loss": 0.11, "step": 5700 }, { "epoch": 0.8, "learning_rate": 4.866601160396781e-05, "loss": 0.0993, "step": 5702 }, { "epoch": 0.8, "learning_rate": 4.866554370204006e-05, "loss": 0.0881, "step": 5704 }, { "epoch": 0.8, "learning_rate": 4.86650758001123e-05, "loss": 0.0967, "step": 5706 }, { "epoch": 0.8, "learning_rate": 4.866460789818454e-05, "loss": 0.1145, "step": 5708 }, { "epoch": 0.8, "learning_rate": 4.866413999625679e-05, "loss": 0.1109, "step": 5710 }, { "epoch": 0.8, "learning_rate": 4.8663672094329034e-05, "loss": 0.1026, "step": 5712 }, { "epoch": 0.8, "learning_rate": 4.8663204192401274e-05, "loss": 0.1205, "step": 5714 }, { "epoch": 0.8, "learning_rate": 4.866273629047352e-05, "loss": 0.0781, "step": 5716 }, { "epoch": 0.8, "learning_rate": 4.866226838854576e-05, "loss": 0.1263, "step": 5718 }, { "epoch": 0.8, "learning_rate": 4.866180048661801e-05, "loss": 0.0782, "step": 5720 }, { "epoch": 0.8, "learning_rate": 4.866133258469025e-05, "loss": 0.092, "step": 5722 }, { "epoch": 0.8, "learning_rate": 4.8660864682762496e-05, "loss": 0.084, "step": 5724 }, { "epoch": 0.8, "learning_rate": 4.8660396780834735e-05, "loss": 0.0845, "step": 5726 }, { "epoch": 0.8, "learning_rate": 4.865992887890699e-05, "loss": 0.1016, "step": 5728 }, { "epoch": 0.8, "learning_rate": 4.865946097697923e-05, "loss": 0.1108, "step": 5730 }, { "epoch": 0.8, "learning_rate": 4.865899307505147e-05, "loss": 0.1022, "step": 5732 }, { "epoch": 0.8, "learning_rate": 4.865852517312371e-05, "loss": 0.0797, "step": 5734 }, { "epoch": 0.81, "learning_rate": 4.865805727119596e-05, "loss": 0.1102, "step": 5736 }, { "epoch": 0.81, "learning_rate": 4.8657589369268204e-05, "loss": 0.1081, "step": 5738 }, { "epoch": 0.81, "learning_rate": 4.865712146734045e-05, "loss": 0.1162, "step": 5740 }, { "epoch": 0.81, "learning_rate": 4.865665356541269e-05, "loss": 0.1079, "step": 5742 }, { "epoch": 0.81, "learning_rate": 4.8656185663484935e-05, "loss": 0.0622, "step": 5744 }, { "epoch": 0.81, "learning_rate": 4.865571776155718e-05, "loss": 0.0808, "step": 5746 }, { "epoch": 0.81, "learning_rate": 4.865524985962943e-05, "loss": 0.0969, "step": 5748 }, { "epoch": 0.81, "learning_rate": 4.8654781957701666e-05, "loss": 0.1134, "step": 5750 }, { "epoch": 0.81, "learning_rate": 4.865431405577391e-05, "loss": 0.0911, "step": 5752 }, { "epoch": 0.81, "learning_rate": 4.865384615384616e-05, "loss": 0.1119, "step": 5754 }, { "epoch": 0.81, "learning_rate": 4.8653378251918404e-05, "loss": 0.1267, "step": 5756 }, { "epoch": 0.81, "learning_rate": 4.865291034999064e-05, "loss": 0.1022, "step": 5758 }, { "epoch": 0.81, "learning_rate": 4.865244244806289e-05, "loss": 0.0947, "step": 5760 }, { "epoch": 0.81, "learning_rate": 4.8651974546135135e-05, "loss": 0.0856, "step": 5762 }, { "epoch": 0.81, "learning_rate": 4.865150664420738e-05, "loss": 0.1083, "step": 5764 }, { "epoch": 0.81, "learning_rate": 4.865103874227962e-05, "loss": 0.1219, "step": 5766 }, { "epoch": 0.81, "learning_rate": 4.8650570840351866e-05, "loss": 0.1151, "step": 5768 }, { "epoch": 0.81, "learning_rate": 4.8650102938424105e-05, "loss": 0.1197, "step": 5770 }, { "epoch": 0.81, "learning_rate": 4.864963503649636e-05, "loss": 0.0811, "step": 5772 }, { "epoch": 0.81, "learning_rate": 4.86491671345686e-05, "loss": 0.093, "step": 5774 }, { "epoch": 0.81, "learning_rate": 4.864869923264084e-05, "loss": 0.1262, "step": 5776 }, { "epoch": 0.81, "learning_rate": 4.864823133071308e-05, "loss": 0.1196, "step": 5778 }, { "epoch": 0.81, "learning_rate": 4.8647763428785334e-05, "loss": 0.0947, "step": 5780 }, { "epoch": 0.81, "learning_rate": 4.8647295526857574e-05, "loss": 0.1022, "step": 5782 }, { "epoch": 0.81, "learning_rate": 4.864682762492982e-05, "loss": 0.1471, "step": 5784 }, { "epoch": 0.81, "learning_rate": 4.864635972300206e-05, "loss": 0.0992, "step": 5786 }, { "epoch": 0.81, "learning_rate": 4.8645891821074305e-05, "loss": 0.0914, "step": 5788 }, { "epoch": 0.81, "learning_rate": 4.864542391914655e-05, "loss": 0.1023, "step": 5790 }, { "epoch": 0.81, "learning_rate": 4.8644956017218796e-05, "loss": 0.1051, "step": 5792 }, { "epoch": 0.81, "learning_rate": 4.8644488115291036e-05, "loss": 0.1129, "step": 5794 }, { "epoch": 0.81, "learning_rate": 4.864402021336328e-05, "loss": 0.0977, "step": 5796 }, { "epoch": 0.81, "learning_rate": 4.864355231143553e-05, "loss": 0.0916, "step": 5798 }, { "epoch": 0.81, "learning_rate": 4.864308440950777e-05, "loss": 0.0867, "step": 5800 }, { "epoch": 0.81, "learning_rate": 4.864261650758001e-05, "loss": 0.0936, "step": 5802 }, { "epoch": 0.81, "learning_rate": 4.864214860565226e-05, "loss": 0.1163, "step": 5804 }, { "epoch": 0.81, "learning_rate": 4.8641680703724504e-05, "loss": 0.104, "step": 5806 }, { "epoch": 0.82, "learning_rate": 4.864121280179675e-05, "loss": 0.1155, "step": 5808 }, { "epoch": 0.82, "learning_rate": 4.864074489986899e-05, "loss": 0.098, "step": 5810 }, { "epoch": 0.82, "learning_rate": 4.8640276997941235e-05, "loss": 0.0928, "step": 5812 }, { "epoch": 0.82, "learning_rate": 4.863980909601348e-05, "loss": 0.0958, "step": 5814 }, { "epoch": 0.82, "learning_rate": 4.863934119408573e-05, "loss": 0.0983, "step": 5816 }, { "epoch": 0.82, "learning_rate": 4.8638873292157966e-05, "loss": 0.0974, "step": 5818 }, { "epoch": 0.82, "learning_rate": 4.863840539023021e-05, "loss": 0.1004, "step": 5820 }, { "epoch": 0.82, "learning_rate": 4.863793748830245e-05, "loss": 0.14, "step": 5822 }, { "epoch": 0.82, "learning_rate": 4.8637469586374704e-05, "loss": 0.0913, "step": 5824 }, { "epoch": 0.82, "learning_rate": 4.863700168444694e-05, "loss": 0.0883, "step": 5826 }, { "epoch": 0.82, "learning_rate": 4.863653378251918e-05, "loss": 0.1027, "step": 5828 }, { "epoch": 0.82, "learning_rate": 4.863606588059143e-05, "loss": 0.0953, "step": 5830 }, { "epoch": 0.82, "learning_rate": 4.8635597978663674e-05, "loss": 0.0811, "step": 5832 }, { "epoch": 0.82, "learning_rate": 4.863513007673592e-05, "loss": 0.1174, "step": 5834 }, { "epoch": 0.82, "learning_rate": 4.863466217480816e-05, "loss": 0.1065, "step": 5836 }, { "epoch": 0.82, "learning_rate": 4.8634194272880405e-05, "loss": 0.1064, "step": 5838 }, { "epoch": 0.82, "learning_rate": 4.863372637095265e-05, "loss": 0.0954, "step": 5840 }, { "epoch": 0.82, "learning_rate": 4.86332584690249e-05, "loss": 0.1126, "step": 5842 }, { "epoch": 0.82, "learning_rate": 4.8632790567097136e-05, "loss": 0.0907, "step": 5844 }, { "epoch": 0.82, "learning_rate": 4.863232266516938e-05, "loss": 0.111, "step": 5846 }, { "epoch": 0.82, "learning_rate": 4.863185476324163e-05, "loss": 0.092, "step": 5848 }, { "epoch": 0.82, "learning_rate": 4.8631386861313874e-05, "loss": 0.0969, "step": 5850 }, { "epoch": 0.82, "learning_rate": 4.863091895938611e-05, "loss": 0.1048, "step": 5852 }, { "epoch": 0.82, "learning_rate": 4.863045105745836e-05, "loss": 0.098, "step": 5854 }, { "epoch": 0.82, "learning_rate": 4.86299831555306e-05, "loss": 0.1183, "step": 5856 }, { "epoch": 0.82, "learning_rate": 4.862951525360285e-05, "loss": 0.0794, "step": 5858 }, { "epoch": 0.82, "learning_rate": 4.862904735167509e-05, "loss": 0.1104, "step": 5860 }, { "epoch": 0.82, "learning_rate": 4.8628579449747336e-05, "loss": 0.1069, "step": 5862 }, { "epoch": 0.82, "learning_rate": 4.8628111547819575e-05, "loss": 0.1049, "step": 5864 }, { "epoch": 0.82, "learning_rate": 4.862764364589182e-05, "loss": 0.0684, "step": 5866 }, { "epoch": 0.82, "learning_rate": 4.8627175743964066e-05, "loss": 0.1266, "step": 5868 }, { "epoch": 0.82, "learning_rate": 4.862670784203631e-05, "loss": 0.11, "step": 5870 }, { "epoch": 0.82, "learning_rate": 4.862623994010855e-05, "loss": 0.1415, "step": 5872 }, { "epoch": 0.82, "learning_rate": 4.86257720381808e-05, "loss": 0.0985, "step": 5874 }, { "epoch": 0.82, "learning_rate": 4.862530413625304e-05, "loss": 0.0979, "step": 5876 }, { "epoch": 0.83, "learning_rate": 4.862483623432529e-05, "loss": 0.0997, "step": 5878 }, { "epoch": 0.83, "learning_rate": 4.862436833239753e-05, "loss": 0.0902, "step": 5880 }, { "epoch": 0.83, "learning_rate": 4.8623900430469774e-05, "loss": 0.0743, "step": 5882 }, { "epoch": 0.83, "learning_rate": 4.862343252854202e-05, "loss": 0.1087, "step": 5884 }, { "epoch": 0.83, "learning_rate": 4.8622964626614266e-05, "loss": 0.1133, "step": 5886 }, { "epoch": 0.83, "learning_rate": 4.8622496724686505e-05, "loss": 0.089, "step": 5888 }, { "epoch": 0.83, "learning_rate": 4.862202882275875e-05, "loss": 0.0954, "step": 5890 }, { "epoch": 0.83, "learning_rate": 4.8621560920831e-05, "loss": 0.097, "step": 5892 }, { "epoch": 0.83, "learning_rate": 4.862109301890324e-05, "loss": 0.1184, "step": 5894 }, { "epoch": 0.83, "learning_rate": 4.862062511697548e-05, "loss": 0.0975, "step": 5896 }, { "epoch": 0.83, "learning_rate": 4.862015721504773e-05, "loss": 0.1071, "step": 5898 }, { "epoch": 0.83, "learning_rate": 4.861968931311997e-05, "loss": 0.069, "step": 5900 }, { "epoch": 0.83, "learning_rate": 4.861922141119222e-05, "loss": 0.1376, "step": 5902 }, { "epoch": 0.83, "learning_rate": 4.861875350926446e-05, "loss": 0.1246, "step": 5904 }, { "epoch": 0.83, "learning_rate": 4.8618285607336705e-05, "loss": 0.1001, "step": 5906 }, { "epoch": 0.83, "learning_rate": 4.8617817705408944e-05, "loss": 0.1003, "step": 5908 }, { "epoch": 0.83, "learning_rate": 4.86173498034812e-05, "loss": 0.1234, "step": 5910 }, { "epoch": 0.83, "learning_rate": 4.8616881901553436e-05, "loss": 0.0929, "step": 5912 }, { "epoch": 0.83, "learning_rate": 4.861641399962568e-05, "loss": 0.0917, "step": 5914 }, { "epoch": 0.83, "learning_rate": 4.861594609769792e-05, "loss": 0.0919, "step": 5916 }, { "epoch": 0.83, "learning_rate": 4.861547819577017e-05, "loss": 0.0958, "step": 5918 }, { "epoch": 0.83, "learning_rate": 4.861501029384241e-05, "loss": 0.1047, "step": 5920 }, { "epoch": 0.83, "learning_rate": 4.861454239191466e-05, "loss": 0.0816, "step": 5922 }, { "epoch": 0.83, "learning_rate": 4.86140744899869e-05, "loss": 0.1162, "step": 5924 }, { "epoch": 0.83, "learning_rate": 4.8613606588059144e-05, "loss": 0.0917, "step": 5926 }, { "epoch": 0.83, "learning_rate": 4.861313868613139e-05, "loss": 0.1072, "step": 5928 }, { "epoch": 0.83, "learning_rate": 4.8612670784203636e-05, "loss": 0.1359, "step": 5930 }, { "epoch": 0.83, "learning_rate": 4.8612202882275875e-05, "loss": 0.1258, "step": 5932 }, { "epoch": 0.83, "learning_rate": 4.861173498034812e-05, "loss": 0.1104, "step": 5934 }, { "epoch": 0.83, "learning_rate": 4.8611267078420366e-05, "loss": 0.0921, "step": 5936 }, { "epoch": 0.83, "learning_rate": 4.861079917649261e-05, "loss": 0.0894, "step": 5938 }, { "epoch": 0.83, "learning_rate": 4.861033127456485e-05, "loss": 0.086, "step": 5940 }, { "epoch": 0.83, "learning_rate": 4.86098633726371e-05, "loss": 0.1098, "step": 5942 }, { "epoch": 0.83, "learning_rate": 4.860939547070934e-05, "loss": 0.0945, "step": 5944 }, { "epoch": 0.83, "learning_rate": 4.860892756878159e-05, "loss": 0.083, "step": 5946 }, { "epoch": 0.83, "learning_rate": 4.860845966685383e-05, "loss": 0.0899, "step": 5948 }, { "epoch": 0.84, "learning_rate": 4.8607991764926074e-05, "loss": 0.1089, "step": 5950 }, { "epoch": 0.84, "learning_rate": 4.8607523862998313e-05, "loss": 0.0862, "step": 5952 }, { "epoch": 0.84, "learning_rate": 4.8607055961070566e-05, "loss": 0.1048, "step": 5954 }, { "epoch": 0.84, "learning_rate": 4.8606588059142805e-05, "loss": 0.1208, "step": 5956 }, { "epoch": 0.84, "learning_rate": 4.860612015721505e-05, "loss": 0.1038, "step": 5958 }, { "epoch": 0.84, "learning_rate": 4.860565225528729e-05, "loss": 0.0831, "step": 5960 }, { "epoch": 0.84, "learning_rate": 4.860518435335954e-05, "loss": 0.0993, "step": 5962 }, { "epoch": 0.84, "learning_rate": 4.860471645143178e-05, "loss": 0.1206, "step": 5964 }, { "epoch": 0.84, "learning_rate": 4.860424854950403e-05, "loss": 0.0931, "step": 5966 }, { "epoch": 0.84, "learning_rate": 4.860378064757627e-05, "loss": 0.0725, "step": 5968 }, { "epoch": 0.84, "learning_rate": 4.860331274564851e-05, "loss": 0.0996, "step": 5970 }, { "epoch": 0.84, "learning_rate": 4.860284484372076e-05, "loss": 0.0704, "step": 5972 }, { "epoch": 0.84, "learning_rate": 4.8602376941793005e-05, "loss": 0.0899, "step": 5974 }, { "epoch": 0.84, "learning_rate": 4.8601909039865244e-05, "loss": 0.0883, "step": 5976 }, { "epoch": 0.84, "learning_rate": 4.860144113793749e-05, "loss": 0.0819, "step": 5978 }, { "epoch": 0.84, "learning_rate": 4.8600973236009736e-05, "loss": 0.0999, "step": 5980 }, { "epoch": 0.84, "learning_rate": 4.860050533408198e-05, "loss": 0.0846, "step": 5982 }, { "epoch": 0.84, "learning_rate": 4.860003743215422e-05, "loss": 0.0805, "step": 5984 }, { "epoch": 0.84, "learning_rate": 4.859956953022647e-05, "loss": 0.1264, "step": 5986 }, { "epoch": 0.84, "learning_rate": 4.859910162829871e-05, "loss": 0.1262, "step": 5988 }, { "epoch": 0.84, "learning_rate": 4.859863372637096e-05, "loss": 0.1123, "step": 5990 }, { "epoch": 0.84, "learning_rate": 4.85981658244432e-05, "loss": 0.106, "step": 5992 }, { "epoch": 0.84, "learning_rate": 4.8597697922515444e-05, "loss": 0.0991, "step": 5994 }, { "epoch": 0.84, "learning_rate": 4.859723002058769e-05, "loss": 0.0839, "step": 5996 }, { "epoch": 0.84, "learning_rate": 4.8596762118659936e-05, "loss": 0.0766, "step": 5998 }, { "epoch": 0.84, "learning_rate": 4.8596294216732175e-05, "loss": 0.1062, "step": 6000 }, { "epoch": 0.84, "eval_gen_len": 29.3793, "eval_loss": 1.022807002067566, "eval_meteor": 0.0513, "eval_runtime": 14.225, "eval_samples_per_second": 4.077, "eval_steps_per_second": 0.562, "step": 6000 }, { "epoch": 0.84, "learning_rate": 4.859582631480442e-05, "loss": 0.0607, "step": 6002 }, { "epoch": 0.84, "learning_rate": 4.859535841287666e-05, "loss": 0.0952, "step": 6004 }, { "epoch": 0.84, "learning_rate": 4.859489051094891e-05, "loss": 0.1087, "step": 6006 }, { "epoch": 0.84, "learning_rate": 4.859442260902115e-05, "loss": 0.1181, "step": 6008 }, { "epoch": 0.84, "learning_rate": 4.85939547070934e-05, "loss": 0.0786, "step": 6010 }, { "epoch": 0.84, "learning_rate": 4.8593486805165637e-05, "loss": 0.0841, "step": 6012 }, { "epoch": 0.84, "learning_rate": 4.859301890323788e-05, "loss": 0.1031, "step": 6014 }, { "epoch": 0.84, "learning_rate": 4.859255100131013e-05, "loss": 0.092, "step": 6016 }, { "epoch": 0.84, "learning_rate": 4.8592083099382374e-05, "loss": 0.1029, "step": 6018 }, { "epoch": 0.85, "learning_rate": 4.8591615197454613e-05, "loss": 0.0929, "step": 6020 }, { "epoch": 0.85, "learning_rate": 4.859114729552686e-05, "loss": 0.0853, "step": 6022 }, { "epoch": 0.85, "learning_rate": 4.8590679393599105e-05, "loss": 0.0855, "step": 6024 }, { "epoch": 0.85, "learning_rate": 4.859021149167135e-05, "loss": 0.0849, "step": 6026 }, { "epoch": 0.85, "learning_rate": 4.858974358974359e-05, "loss": 0.0936, "step": 6028 }, { "epoch": 0.85, "learning_rate": 4.8589275687815836e-05, "loss": 0.1235, "step": 6030 }, { "epoch": 0.85, "learning_rate": 4.858880778588808e-05, "loss": 0.0925, "step": 6032 }, { "epoch": 0.85, "learning_rate": 4.858833988396033e-05, "loss": 0.1205, "step": 6034 }, { "epoch": 0.85, "learning_rate": 4.858787198203257e-05, "loss": 0.1217, "step": 6036 }, { "epoch": 0.85, "learning_rate": 4.858740408010481e-05, "loss": 0.0957, "step": 6038 }, { "epoch": 0.85, "learning_rate": 4.858693617817706e-05, "loss": 0.0844, "step": 6040 }, { "epoch": 0.85, "learning_rate": 4.8586468276249305e-05, "loss": 0.0872, "step": 6042 }, { "epoch": 0.85, "learning_rate": 4.8586000374321544e-05, "loss": 0.1021, "step": 6044 }, { "epoch": 0.85, "learning_rate": 4.858553247239379e-05, "loss": 0.1088, "step": 6046 }, { "epoch": 0.85, "learning_rate": 4.858506457046603e-05, "loss": 0.0879, "step": 6048 }, { "epoch": 0.85, "learning_rate": 4.858459666853828e-05, "loss": 0.1001, "step": 6050 }, { "epoch": 0.85, "learning_rate": 4.858412876661052e-05, "loss": 0.1154, "step": 6052 }, { "epoch": 0.85, "learning_rate": 4.858366086468277e-05, "loss": 0.0824, "step": 6054 }, { "epoch": 0.85, "learning_rate": 4.8583192962755006e-05, "loss": 0.0981, "step": 6056 }, { "epoch": 0.85, "learning_rate": 4.858272506082726e-05, "loss": 0.0853, "step": 6058 }, { "epoch": 0.85, "learning_rate": 4.85822571588995e-05, "loss": 0.0906, "step": 6060 }, { "epoch": 0.85, "learning_rate": 4.8581789256971744e-05, "loss": 0.1102, "step": 6062 }, { "epoch": 0.85, "learning_rate": 4.858132135504398e-05, "loss": 0.0784, "step": 6064 }, { "epoch": 0.85, "learning_rate": 4.858085345311623e-05, "loss": 0.0725, "step": 6066 }, { "epoch": 0.85, "learning_rate": 4.8580385551188475e-05, "loss": 0.1081, "step": 6068 }, { "epoch": 0.85, "learning_rate": 4.857991764926072e-05, "loss": 0.1199, "step": 6070 }, { "epoch": 0.85, "learning_rate": 4.857944974733296e-05, "loss": 0.1477, "step": 6072 }, { "epoch": 0.85, "learning_rate": 4.8578981845405206e-05, "loss": 0.1329, "step": 6074 }, { "epoch": 0.85, "learning_rate": 4.857851394347745e-05, "loss": 0.0915, "step": 6076 }, { "epoch": 0.85, "learning_rate": 4.857804604154969e-05, "loss": 0.0836, "step": 6078 }, { "epoch": 0.85, "learning_rate": 4.857757813962194e-05, "loss": 0.1161, "step": 6080 }, { "epoch": 0.85, "learning_rate": 4.8577110237694176e-05, "loss": 0.0971, "step": 6082 }, { "epoch": 0.85, "learning_rate": 4.857664233576643e-05, "loss": 0.1114, "step": 6084 }, { "epoch": 0.85, "learning_rate": 4.857617443383867e-05, "loss": 0.11, "step": 6086 }, { "epoch": 0.85, "learning_rate": 4.8575706531910913e-05, "loss": 0.1008, "step": 6088 }, { "epoch": 0.85, "learning_rate": 4.857523862998315e-05, "loss": 0.1019, "step": 6090 }, { "epoch": 0.86, "learning_rate": 4.8574770728055405e-05, "loss": 0.1183, "step": 6092 }, { "epoch": 0.86, "learning_rate": 4.8574302826127644e-05, "loss": 0.1048, "step": 6094 }, { "epoch": 0.86, "learning_rate": 4.857383492419989e-05, "loss": 0.0978, "step": 6096 }, { "epoch": 0.86, "learning_rate": 4.857336702227213e-05, "loss": 0.1271, "step": 6098 }, { "epoch": 0.86, "learning_rate": 4.8572899120344375e-05, "loss": 0.1231, "step": 6100 }, { "epoch": 0.86, "learning_rate": 4.857243121841662e-05, "loss": 0.075, "step": 6102 }, { "epoch": 0.86, "learning_rate": 4.857196331648887e-05, "loss": 0.0971, "step": 6104 }, { "epoch": 0.86, "learning_rate": 4.8571495414561106e-05, "loss": 0.0998, "step": 6106 }, { "epoch": 0.86, "learning_rate": 4.857102751263335e-05, "loss": 0.1032, "step": 6108 }, { "epoch": 0.86, "learning_rate": 4.85705596107056e-05, "loss": 0.0914, "step": 6110 }, { "epoch": 0.86, "learning_rate": 4.8570091708777844e-05, "loss": 0.1036, "step": 6112 }, { "epoch": 0.86, "learning_rate": 4.856962380685008e-05, "loss": 0.1222, "step": 6114 }, { "epoch": 0.86, "learning_rate": 4.856915590492233e-05, "loss": 0.1505, "step": 6116 }, { "epoch": 0.86, "learning_rate": 4.8568688002994575e-05, "loss": 0.1288, "step": 6118 }, { "epoch": 0.86, "learning_rate": 4.856822010106682e-05, "loss": 0.0904, "step": 6120 }, { "epoch": 0.86, "learning_rate": 4.856775219913906e-05, "loss": 0.0823, "step": 6122 }, { "epoch": 0.86, "learning_rate": 4.8567284297211306e-05, "loss": 0.1094, "step": 6124 }, { "epoch": 0.86, "learning_rate": 4.856681639528355e-05, "loss": 0.0855, "step": 6126 }, { "epoch": 0.86, "learning_rate": 4.85663484933558e-05, "loss": 0.1334, "step": 6128 }, { "epoch": 0.86, "learning_rate": 4.856588059142804e-05, "loss": 0.1046, "step": 6130 }, { "epoch": 0.86, "learning_rate": 4.856541268950028e-05, "loss": 0.0929, "step": 6132 }, { "epoch": 0.86, "learning_rate": 4.856494478757252e-05, "loss": 0.1016, "step": 6134 }, { "epoch": 0.86, "learning_rate": 4.8564476885644775e-05, "loss": 0.0838, "step": 6136 }, { "epoch": 0.86, "learning_rate": 4.8564008983717014e-05, "loss": 0.0816, "step": 6138 }, { "epoch": 0.86, "learning_rate": 4.856354108178926e-05, "loss": 0.11, "step": 6140 }, { "epoch": 0.86, "learning_rate": 4.85630731798615e-05, "loss": 0.1227, "step": 6142 }, { "epoch": 0.86, "learning_rate": 4.8562605277933745e-05, "loss": 0.0896, "step": 6144 }, { "epoch": 0.86, "learning_rate": 4.856213737600599e-05, "loss": 0.1126, "step": 6146 }, { "epoch": 0.86, "learning_rate": 4.856166947407824e-05, "loss": 0.0829, "step": 6148 }, { "epoch": 0.86, "learning_rate": 4.8561201572150476e-05, "loss": 0.1106, "step": 6150 }, { "epoch": 0.86, "learning_rate": 4.856073367022272e-05, "loss": 0.1107, "step": 6152 }, { "epoch": 0.86, "learning_rate": 4.856026576829497e-05, "loss": 0.1127, "step": 6154 }, { "epoch": 0.86, "learning_rate": 4.8559797866367214e-05, "loss": 0.0964, "step": 6156 }, { "epoch": 0.86, "learning_rate": 4.855932996443945e-05, "loss": 0.1157, "step": 6158 }, { "epoch": 0.86, "learning_rate": 4.85588620625117e-05, "loss": 0.0899, "step": 6160 }, { "epoch": 0.86, "learning_rate": 4.8558394160583944e-05, "loss": 0.119, "step": 6162 }, { "epoch": 0.87, "learning_rate": 4.855792625865619e-05, "loss": 0.0888, "step": 6164 }, { "epoch": 0.87, "learning_rate": 4.855745835672843e-05, "loss": 0.1033, "step": 6166 }, { "epoch": 0.87, "learning_rate": 4.8556990454800675e-05, "loss": 0.1036, "step": 6168 }, { "epoch": 0.87, "learning_rate": 4.855652255287292e-05, "loss": 0.1108, "step": 6170 }, { "epoch": 0.87, "learning_rate": 4.855605465094517e-05, "loss": 0.0931, "step": 6172 }, { "epoch": 0.87, "learning_rate": 4.8555586749017406e-05, "loss": 0.119, "step": 6174 }, { "epoch": 0.87, "learning_rate": 4.855511884708965e-05, "loss": 0.096, "step": 6176 }, { "epoch": 0.87, "learning_rate": 4.855465094516189e-05, "loss": 0.1296, "step": 6178 }, { "epoch": 0.87, "learning_rate": 4.8554183043234144e-05, "loss": 0.1038, "step": 6180 }, { "epoch": 0.87, "learning_rate": 4.855371514130638e-05, "loss": 0.1, "step": 6182 }, { "epoch": 0.87, "learning_rate": 4.855324723937863e-05, "loss": 0.0973, "step": 6184 }, { "epoch": 0.87, "learning_rate": 4.855277933745087e-05, "loss": 0.0748, "step": 6186 }, { "epoch": 0.87, "learning_rate": 4.855231143552312e-05, "loss": 0.0792, "step": 6188 }, { "epoch": 0.87, "learning_rate": 4.855184353359536e-05, "loss": 0.0941, "step": 6190 }, { "epoch": 0.87, "learning_rate": 4.8551375631667606e-05, "loss": 0.0827, "step": 6192 }, { "epoch": 0.87, "learning_rate": 4.8550907729739845e-05, "loss": 0.1037, "step": 6194 }, { "epoch": 0.87, "learning_rate": 4.855043982781209e-05, "loss": 0.105, "step": 6196 }, { "epoch": 0.87, "learning_rate": 4.854997192588434e-05, "loss": 0.1088, "step": 6198 }, { "epoch": 0.87, "learning_rate": 4.854950402395658e-05, "loss": 0.1072, "step": 6200 }, { "epoch": 0.87, "learning_rate": 4.854903612202882e-05, "loss": 0.1163, "step": 6202 }, { "epoch": 0.87, "learning_rate": 4.854856822010107e-05, "loss": 0.0877, "step": 6204 }, { "epoch": 0.87, "learning_rate": 4.8548100318173314e-05, "loss": 0.114, "step": 6206 }, { "epoch": 0.87, "learning_rate": 4.854763241624556e-05, "loss": 0.1165, "step": 6208 }, { "epoch": 0.87, "learning_rate": 4.85471645143178e-05, "loss": 0.1088, "step": 6210 }, { "epoch": 0.87, "learning_rate": 4.8546696612390045e-05, "loss": 0.0901, "step": 6212 }, { "epoch": 0.87, "learning_rate": 4.854622871046229e-05, "loss": 0.0794, "step": 6214 }, { "epoch": 0.87, "learning_rate": 4.854576080853454e-05, "loss": 0.1076, "step": 6216 }, { "epoch": 0.87, "learning_rate": 4.8545292906606776e-05, "loss": 0.1255, "step": 6218 }, { "epoch": 0.87, "learning_rate": 4.854482500467902e-05, "loss": 0.0943, "step": 6220 }, { "epoch": 0.87, "learning_rate": 4.854435710275127e-05, "loss": 0.1077, "step": 6222 }, { "epoch": 0.87, "learning_rate": 4.8543889200823514e-05, "loss": 0.1009, "step": 6224 }, { "epoch": 0.87, "learning_rate": 4.854342129889575e-05, "loss": 0.1059, "step": 6226 }, { "epoch": 0.87, "learning_rate": 4.8542953396968e-05, "loss": 0.0911, "step": 6228 }, { "epoch": 0.87, "learning_rate": 4.854248549504024e-05, "loss": 0.183, "step": 6230 }, { "epoch": 0.87, "learning_rate": 4.854201759311249e-05, "loss": 0.1078, "step": 6232 }, { "epoch": 0.88, "learning_rate": 4.854154969118473e-05, "loss": 0.0954, "step": 6234 }, { "epoch": 0.88, "learning_rate": 4.8541081789256975e-05, "loss": 0.1076, "step": 6236 }, { "epoch": 0.88, "learning_rate": 4.8540613887329215e-05, "loss": 0.0982, "step": 6238 }, { "epoch": 0.88, "learning_rate": 4.854014598540147e-05, "loss": 0.1291, "step": 6240 }, { "epoch": 0.88, "learning_rate": 4.8539678083473706e-05, "loss": 0.0829, "step": 6242 }, { "epoch": 0.88, "learning_rate": 4.853921018154595e-05, "loss": 0.0662, "step": 6244 }, { "epoch": 0.88, "learning_rate": 4.853874227961819e-05, "loss": 0.0739, "step": 6246 }, { "epoch": 0.88, "learning_rate": 4.853827437769044e-05, "loss": 0.0794, "step": 6248 }, { "epoch": 0.88, "learning_rate": 4.853780647576268e-05, "loss": 0.1045, "step": 6250 }, { "epoch": 0.88, "learning_rate": 4.853733857383493e-05, "loss": 0.0904, "step": 6252 }, { "epoch": 0.88, "learning_rate": 4.853687067190717e-05, "loss": 0.077, "step": 6254 }, { "epoch": 0.88, "learning_rate": 4.8536402769979414e-05, "loss": 0.0991, "step": 6256 }, { "epoch": 0.88, "learning_rate": 4.853593486805166e-05, "loss": 0.1022, "step": 6258 }, { "epoch": 0.88, "learning_rate": 4.8535466966123906e-05, "loss": 0.106, "step": 6260 }, { "epoch": 0.88, "learning_rate": 4.8534999064196145e-05, "loss": 0.1008, "step": 6262 }, { "epoch": 0.88, "learning_rate": 4.853453116226839e-05, "loss": 0.0948, "step": 6264 }, { "epoch": 0.88, "learning_rate": 4.853406326034064e-05, "loss": 0.0963, "step": 6266 }, { "epoch": 0.88, "learning_rate": 4.853359535841288e-05, "loss": 0.0881, "step": 6268 }, { "epoch": 0.88, "learning_rate": 4.853312745648512e-05, "loss": 0.0933, "step": 6270 }, { "epoch": 0.88, "learning_rate": 4.853265955455737e-05, "loss": 0.0912, "step": 6272 }, { "epoch": 0.88, "learning_rate": 4.8532191652629614e-05, "loss": 0.1104, "step": 6274 }, { "epoch": 0.88, "learning_rate": 4.853172375070186e-05, "loss": 0.11, "step": 6276 }, { "epoch": 0.88, "learning_rate": 4.85312558487741e-05, "loss": 0.0975, "step": 6278 }, { "epoch": 0.88, "learning_rate": 4.8530787946846345e-05, "loss": 0.1034, "step": 6280 }, { "epoch": 0.88, "learning_rate": 4.8530320044918584e-05, "loss": 0.0773, "step": 6282 }, { "epoch": 0.88, "learning_rate": 4.852985214299084e-05, "loss": 0.1249, "step": 6284 }, { "epoch": 0.88, "learning_rate": 4.8529384241063076e-05, "loss": 0.1355, "step": 6286 }, { "epoch": 0.88, "learning_rate": 4.852891633913532e-05, "loss": 0.0809, "step": 6288 }, { "epoch": 0.88, "learning_rate": 4.852844843720756e-05, "loss": 0.1087, "step": 6290 }, { "epoch": 0.88, "learning_rate": 4.852798053527981e-05, "loss": 0.0923, "step": 6292 }, { "epoch": 0.88, "learning_rate": 4.852751263335205e-05, "loss": 0.1128, "step": 6294 }, { "epoch": 0.88, "learning_rate": 4.85270447314243e-05, "loss": 0.0799, "step": 6296 }, { "epoch": 0.88, "learning_rate": 4.852657682949654e-05, "loss": 0.0949, "step": 6298 }, { "epoch": 0.88, "learning_rate": 4.8526108927568784e-05, "loss": 0.1028, "step": 6300 }, { "epoch": 0.88, "learning_rate": 4.852564102564103e-05, "loss": 0.1062, "step": 6302 }, { "epoch": 0.88, "learning_rate": 4.8525173123713275e-05, "loss": 0.0819, "step": 6304 }, { "epoch": 0.89, "learning_rate": 4.8524705221785515e-05, "loss": 0.0895, "step": 6306 }, { "epoch": 0.89, "learning_rate": 4.852423731985776e-05, "loss": 0.0985, "step": 6308 }, { "epoch": 0.89, "learning_rate": 4.8523769417930006e-05, "loss": 0.0971, "step": 6310 }, { "epoch": 0.89, "learning_rate": 4.852330151600225e-05, "loss": 0.1381, "step": 6312 }, { "epoch": 0.89, "learning_rate": 4.852283361407449e-05, "loss": 0.0967, "step": 6314 }, { "epoch": 0.89, "learning_rate": 4.852236571214674e-05, "loss": 0.1021, "step": 6316 }, { "epoch": 0.89, "learning_rate": 4.852189781021898e-05, "loss": 0.0963, "step": 6318 }, { "epoch": 0.89, "learning_rate": 4.852142990829123e-05, "loss": 0.1066, "step": 6320 }, { "epoch": 0.89, "learning_rate": 4.852096200636347e-05, "loss": 0.0817, "step": 6322 }, { "epoch": 0.89, "learning_rate": 4.8520494104435714e-05, "loss": 0.1203, "step": 6324 }, { "epoch": 0.89, "learning_rate": 4.8520026202507953e-05, "loss": 0.0768, "step": 6326 }, { "epoch": 0.89, "learning_rate": 4.8519558300580206e-05, "loss": 0.0735, "step": 6328 }, { "epoch": 0.89, "learning_rate": 4.8519090398652445e-05, "loss": 0.1131, "step": 6330 }, { "epoch": 0.89, "learning_rate": 4.8518622496724684e-05, "loss": 0.0946, "step": 6332 }, { "epoch": 0.89, "learning_rate": 4.851815459479693e-05, "loss": 0.1166, "step": 6334 }, { "epoch": 0.89, "learning_rate": 4.8517686692869176e-05, "loss": 0.0827, "step": 6336 }, { "epoch": 0.89, "learning_rate": 4.851721879094142e-05, "loss": 0.0983, "step": 6338 }, { "epoch": 0.89, "learning_rate": 4.851675088901366e-05, "loss": 0.0972, "step": 6340 }, { "epoch": 0.89, "learning_rate": 4.851628298708591e-05, "loss": 0.0934, "step": 6342 }, { "epoch": 0.89, "learning_rate": 4.851581508515815e-05, "loss": 0.1255, "step": 6344 }, { "epoch": 0.89, "learning_rate": 4.85153471832304e-05, "loss": 0.0948, "step": 6346 }, { "epoch": 0.89, "learning_rate": 4.851487928130264e-05, "loss": 0.0938, "step": 6348 }, { "epoch": 0.89, "learning_rate": 4.8514411379374884e-05, "loss": 0.0958, "step": 6350 }, { "epoch": 0.89, "learning_rate": 4.851394347744713e-05, "loss": 0.1154, "step": 6352 }, { "epoch": 0.89, "learning_rate": 4.8513475575519376e-05, "loss": 0.1278, "step": 6354 }, { "epoch": 0.89, "learning_rate": 4.8513007673591615e-05, "loss": 0.1073, "step": 6356 }, { "epoch": 0.89, "learning_rate": 4.851253977166386e-05, "loss": 0.109, "step": 6358 }, { "epoch": 0.89, "learning_rate": 4.85120718697361e-05, "loss": 0.102, "step": 6360 }, { "epoch": 0.89, "learning_rate": 4.851160396780835e-05, "loss": 0.0921, "step": 6362 }, { "epoch": 0.89, "learning_rate": 4.851113606588059e-05, "loss": 0.0813, "step": 6364 }, { "epoch": 0.89, "learning_rate": 4.851066816395284e-05, "loss": 0.1047, "step": 6366 }, { "epoch": 0.89, "learning_rate": 4.851020026202508e-05, "loss": 0.1126, "step": 6368 }, { "epoch": 0.89, "learning_rate": 4.850973236009733e-05, "loss": 0.0892, "step": 6370 }, { "epoch": 0.89, "learning_rate": 4.850926445816957e-05, "loss": 0.1046, "step": 6372 }, { "epoch": 0.89, "learning_rate": 4.8508796556241815e-05, "loss": 0.1093, "step": 6374 }, { "epoch": 0.9, "learning_rate": 4.8508328654314054e-05, "loss": 0.1002, "step": 6376 }, { "epoch": 0.9, "learning_rate": 4.85078607523863e-05, "loss": 0.1065, "step": 6378 }, { "epoch": 0.9, "learning_rate": 4.8507392850458546e-05, "loss": 0.1215, "step": 6380 }, { "epoch": 0.9, "learning_rate": 4.850692494853079e-05, "loss": 0.0916, "step": 6382 }, { "epoch": 0.9, "learning_rate": 4.850645704660303e-05, "loss": 0.0933, "step": 6384 }, { "epoch": 0.9, "learning_rate": 4.8505989144675277e-05, "loss": 0.1038, "step": 6386 }, { "epoch": 0.9, "learning_rate": 4.850552124274752e-05, "loss": 0.0875, "step": 6388 }, { "epoch": 0.9, "learning_rate": 4.850505334081977e-05, "loss": 0.1225, "step": 6390 }, { "epoch": 0.9, "learning_rate": 4.850458543889201e-05, "loss": 0.0956, "step": 6392 }, { "epoch": 0.9, "learning_rate": 4.8504117536964253e-05, "loss": 0.1178, "step": 6394 }, { "epoch": 0.9, "learning_rate": 4.85036496350365e-05, "loss": 0.1011, "step": 6396 }, { "epoch": 0.9, "learning_rate": 4.8503181733108745e-05, "loss": 0.0821, "step": 6398 }, { "epoch": 0.9, "learning_rate": 4.8502713831180984e-05, "loss": 0.1073, "step": 6400 }, { "epoch": 0.9, "learning_rate": 4.850224592925323e-05, "loss": 0.0952, "step": 6402 }, { "epoch": 0.9, "learning_rate": 4.8501778027325476e-05, "loss": 0.1331, "step": 6404 }, { "epoch": 0.9, "learning_rate": 4.850131012539772e-05, "loss": 0.0767, "step": 6406 }, { "epoch": 0.9, "learning_rate": 4.850084222346996e-05, "loss": 0.1026, "step": 6408 }, { "epoch": 0.9, "learning_rate": 4.850037432154221e-05, "loss": 0.0974, "step": 6410 }, { "epoch": 0.9, "learning_rate": 4.8499906419614446e-05, "loss": 0.1189, "step": 6412 }, { "epoch": 0.9, "learning_rate": 4.84994385176867e-05, "loss": 0.1029, "step": 6414 }, { "epoch": 0.9, "learning_rate": 4.849897061575894e-05, "loss": 0.0899, "step": 6416 }, { "epoch": 0.9, "learning_rate": 4.8498502713831184e-05, "loss": 0.0949, "step": 6418 }, { "epoch": 0.9, "learning_rate": 4.849803481190342e-05, "loss": 0.0998, "step": 6420 }, { "epoch": 0.9, "learning_rate": 4.849756690997567e-05, "loss": 0.0758, "step": 6422 }, { "epoch": 0.9, "learning_rate": 4.8497099008047915e-05, "loss": 0.1262, "step": 6424 }, { "epoch": 0.9, "learning_rate": 4.849663110612016e-05, "loss": 0.0781, "step": 6426 }, { "epoch": 0.9, "learning_rate": 4.84961632041924e-05, "loss": 0.0878, "step": 6428 }, { "epoch": 0.9, "learning_rate": 4.8495695302264646e-05, "loss": 0.0719, "step": 6430 }, { "epoch": 0.9, "learning_rate": 4.849522740033689e-05, "loss": 0.0843, "step": 6432 }, { "epoch": 0.9, "learning_rate": 4.849475949840914e-05, "loss": 0.0928, "step": 6434 }, { "epoch": 0.9, "learning_rate": 4.849429159648138e-05, "loss": 0.1238, "step": 6436 }, { "epoch": 0.9, "learning_rate": 4.849382369455362e-05, "loss": 0.113, "step": 6438 }, { "epoch": 0.9, "learning_rate": 4.849335579262587e-05, "loss": 0.0953, "step": 6440 }, { "epoch": 0.9, "learning_rate": 4.8492887890698115e-05, "loss": 0.1073, "step": 6442 }, { "epoch": 0.9, "learning_rate": 4.8492419988770354e-05, "loss": 0.1078, "step": 6444 }, { "epoch": 0.9, "learning_rate": 4.84919520868426e-05, "loss": 0.0983, "step": 6446 }, { "epoch": 0.91, "learning_rate": 4.8491484184914846e-05, "loss": 0.087, "step": 6448 }, { "epoch": 0.91, "learning_rate": 4.849101628298709e-05, "loss": 0.1162, "step": 6450 }, { "epoch": 0.91, "learning_rate": 4.849054838105933e-05, "loss": 0.1125, "step": 6452 }, { "epoch": 0.91, "learning_rate": 4.8490080479131577e-05, "loss": 0.1088, "step": 6454 }, { "epoch": 0.91, "learning_rate": 4.8489612577203816e-05, "loss": 0.1248, "step": 6456 }, { "epoch": 0.91, "learning_rate": 4.848914467527607e-05, "loss": 0.0978, "step": 6458 }, { "epoch": 0.91, "learning_rate": 4.848867677334831e-05, "loss": 0.0851, "step": 6460 }, { "epoch": 0.91, "learning_rate": 4.8488208871420553e-05, "loss": 0.1121, "step": 6462 }, { "epoch": 0.91, "learning_rate": 4.848774096949279e-05, "loss": 0.0838, "step": 6464 }, { "epoch": 0.91, "learning_rate": 4.8487273067565045e-05, "loss": 0.1132, "step": 6466 }, { "epoch": 0.91, "learning_rate": 4.8486805165637284e-05, "loss": 0.1201, "step": 6468 }, { "epoch": 0.91, "learning_rate": 4.848633726370953e-05, "loss": 0.0927, "step": 6470 }, { "epoch": 0.91, "learning_rate": 4.848586936178177e-05, "loss": 0.1031, "step": 6472 }, { "epoch": 0.91, "learning_rate": 4.8485401459854015e-05, "loss": 0.1185, "step": 6474 }, { "epoch": 0.91, "learning_rate": 4.848493355792626e-05, "loss": 0.1226, "step": 6476 }, { "epoch": 0.91, "learning_rate": 4.848446565599851e-05, "loss": 0.1114, "step": 6478 }, { "epoch": 0.91, "learning_rate": 4.8483997754070746e-05, "loss": 0.1083, "step": 6480 }, { "epoch": 0.91, "learning_rate": 4.848352985214299e-05, "loss": 0.1019, "step": 6482 }, { "epoch": 0.91, "learning_rate": 4.848306195021524e-05, "loss": 0.077, "step": 6484 }, { "epoch": 0.91, "learning_rate": 4.8482594048287484e-05, "loss": 0.1112, "step": 6486 }, { "epoch": 0.91, "learning_rate": 4.848212614635972e-05, "loss": 0.0828, "step": 6488 }, { "epoch": 0.91, "learning_rate": 4.848165824443197e-05, "loss": 0.1204, "step": 6490 }, { "epoch": 0.91, "learning_rate": 4.8481190342504215e-05, "loss": 0.1105, "step": 6492 }, { "epoch": 0.91, "learning_rate": 4.848072244057646e-05, "loss": 0.0913, "step": 6494 }, { "epoch": 0.91, "learning_rate": 4.84802545386487e-05, "loss": 0.1125, "step": 6496 }, { "epoch": 0.91, "learning_rate": 4.8479786636720946e-05, "loss": 0.1119, "step": 6498 }, { "epoch": 0.91, "learning_rate": 4.847931873479319e-05, "loss": 0.106, "step": 6500 }, { "epoch": 0.91, "learning_rate": 4.847885083286544e-05, "loss": 0.0962, "step": 6502 }, { "epoch": 0.91, "learning_rate": 4.847838293093768e-05, "loss": 0.1283, "step": 6504 }, { "epoch": 0.91, "learning_rate": 4.847791502900992e-05, "loss": 0.0884, "step": 6506 }, { "epoch": 0.91, "learning_rate": 4.847744712708216e-05, "loss": 0.0893, "step": 6508 }, { "epoch": 0.91, "learning_rate": 4.8476979225154415e-05, "loss": 0.1104, "step": 6510 }, { "epoch": 0.91, "learning_rate": 4.8476511323226654e-05, "loss": 0.0902, "step": 6512 }, { "epoch": 0.91, "learning_rate": 4.84760434212989e-05, "loss": 0.1052, "step": 6514 }, { "epoch": 0.91, "learning_rate": 4.847557551937114e-05, "loss": 0.0942, "step": 6516 }, { "epoch": 0.91, "learning_rate": 4.847510761744339e-05, "loss": 0.1106, "step": 6518 }, { "epoch": 0.92, "learning_rate": 4.847463971551563e-05, "loss": 0.0741, "step": 6520 }, { "epoch": 0.92, "learning_rate": 4.8474171813587877e-05, "loss": 0.0991, "step": 6522 }, { "epoch": 0.92, "learning_rate": 4.8473703911660116e-05, "loss": 0.1039, "step": 6524 }, { "epoch": 0.92, "learning_rate": 4.847323600973236e-05, "loss": 0.1115, "step": 6526 }, { "epoch": 0.92, "learning_rate": 4.847276810780461e-05, "loss": 0.1064, "step": 6528 }, { "epoch": 0.92, "learning_rate": 4.8472300205876853e-05, "loss": 0.1291, "step": 6530 }, { "epoch": 0.92, "learning_rate": 4.847183230394909e-05, "loss": 0.1032, "step": 6532 }, { "epoch": 0.92, "learning_rate": 4.847136440202134e-05, "loss": 0.1156, "step": 6534 }, { "epoch": 0.92, "learning_rate": 4.8470896500093584e-05, "loss": 0.1356, "step": 6536 }, { "epoch": 0.92, "learning_rate": 4.847042859816583e-05, "loss": 0.1375, "step": 6538 }, { "epoch": 0.92, "learning_rate": 4.846996069623807e-05, "loss": 0.0808, "step": 6540 }, { "epoch": 0.92, "learning_rate": 4.8469492794310315e-05, "loss": 0.1261, "step": 6542 }, { "epoch": 0.92, "learning_rate": 4.846902489238256e-05, "loss": 0.0996, "step": 6544 }, { "epoch": 0.92, "learning_rate": 4.846855699045481e-05, "loss": 0.1103, "step": 6546 }, { "epoch": 0.92, "learning_rate": 4.8468089088527046e-05, "loss": 0.1061, "step": 6548 }, { "epoch": 0.92, "learning_rate": 4.846762118659929e-05, "loss": 0.0861, "step": 6550 }, { "epoch": 0.92, "learning_rate": 4.846715328467154e-05, "loss": 0.0728, "step": 6552 }, { "epoch": 0.92, "learning_rate": 4.8466685382743784e-05, "loss": 0.1171, "step": 6554 }, { "epoch": 0.92, "learning_rate": 4.846621748081602e-05, "loss": 0.092, "step": 6556 }, { "epoch": 0.92, "learning_rate": 4.846574957888827e-05, "loss": 0.1062, "step": 6558 }, { "epoch": 0.92, "learning_rate": 4.846528167696051e-05, "loss": 0.0762, "step": 6560 }, { "epoch": 0.92, "learning_rate": 4.846481377503276e-05, "loss": 0.0735, "step": 6562 }, { "epoch": 0.92, "learning_rate": 4.8464345873105e-05, "loss": 0.0945, "step": 6564 }, { "epoch": 0.92, "learning_rate": 4.8463877971177246e-05, "loss": 0.1233, "step": 6566 }, { "epoch": 0.92, "learning_rate": 4.8463410069249485e-05, "loss": 0.1, "step": 6568 }, { "epoch": 0.92, "learning_rate": 4.846294216732173e-05, "loss": 0.0977, "step": 6570 }, { "epoch": 0.92, "learning_rate": 4.846247426539398e-05, "loss": 0.0987, "step": 6572 }, { "epoch": 0.92, "learning_rate": 4.846200636346622e-05, "loss": 0.1063, "step": 6574 }, { "epoch": 0.92, "learning_rate": 4.846153846153846e-05, "loss": 0.0971, "step": 6576 }, { "epoch": 0.92, "learning_rate": 4.846107055961071e-05, "loss": 0.1042, "step": 6578 }, { "epoch": 0.92, "learning_rate": 4.8460602657682954e-05, "loss": 0.1063, "step": 6580 }, { "epoch": 0.92, "learning_rate": 4.84601347557552e-05, "loss": 0.1105, "step": 6582 }, { "epoch": 0.92, "learning_rate": 4.845966685382744e-05, "loss": 0.1092, "step": 6584 }, { "epoch": 0.92, "learning_rate": 4.8459198951899685e-05, "loss": 0.0794, "step": 6586 }, { "epoch": 0.92, "learning_rate": 4.845873104997193e-05, "loss": 0.1062, "step": 6588 }, { "epoch": 0.93, "learning_rate": 4.845826314804417e-05, "loss": 0.1065, "step": 6590 }, { "epoch": 0.93, "learning_rate": 4.8457795246116416e-05, "loss": 0.1123, "step": 6592 }, { "epoch": 0.93, "learning_rate": 4.8457327344188655e-05, "loss": 0.1104, "step": 6594 }, { "epoch": 0.93, "learning_rate": 4.845685944226091e-05, "loss": 0.0636, "step": 6596 }, { "epoch": 0.93, "learning_rate": 4.845639154033315e-05, "loss": 0.0963, "step": 6598 }, { "epoch": 0.93, "learning_rate": 4.845592363840539e-05, "loss": 0.1153, "step": 6600 }, { "epoch": 0.93, "learning_rate": 4.845545573647763e-05, "loss": 0.1124, "step": 6602 }, { "epoch": 0.93, "learning_rate": 4.845498783454988e-05, "loss": 0.167, "step": 6604 }, { "epoch": 0.93, "learning_rate": 4.8454519932622124e-05, "loss": 0.0995, "step": 6606 }, { "epoch": 0.93, "learning_rate": 4.845405203069437e-05, "loss": 0.1159, "step": 6608 }, { "epoch": 0.93, "learning_rate": 4.845358412876661e-05, "loss": 0.1221, "step": 6610 }, { "epoch": 0.93, "learning_rate": 4.8453116226838855e-05, "loss": 0.0812, "step": 6612 }, { "epoch": 0.93, "learning_rate": 4.84526483249111e-05, "loss": 0.0912, "step": 6614 }, { "epoch": 0.93, "learning_rate": 4.8452180422983346e-05, "loss": 0.125, "step": 6616 }, { "epoch": 0.93, "learning_rate": 4.8451712521055585e-05, "loss": 0.1155, "step": 6618 }, { "epoch": 0.93, "learning_rate": 4.845124461912783e-05, "loss": 0.0926, "step": 6620 }, { "epoch": 0.93, "learning_rate": 4.845077671720008e-05, "loss": 0.1366, "step": 6622 }, { "epoch": 0.93, "learning_rate": 4.845030881527232e-05, "loss": 0.1135, "step": 6624 }, { "epoch": 0.93, "learning_rate": 4.844984091334456e-05, "loss": 0.1239, "step": 6626 }, { "epoch": 0.93, "learning_rate": 4.844937301141681e-05, "loss": 0.1278, "step": 6628 }, { "epoch": 0.93, "learning_rate": 4.8448905109489054e-05, "loss": 0.0896, "step": 6630 }, { "epoch": 0.93, "learning_rate": 4.84484372075613e-05, "loss": 0.0938, "step": 6632 }, { "epoch": 0.93, "learning_rate": 4.844796930563354e-05, "loss": 0.0911, "step": 6634 }, { "epoch": 0.93, "learning_rate": 4.8447501403705785e-05, "loss": 0.1133, "step": 6636 }, { "epoch": 0.93, "learning_rate": 4.8447033501778024e-05, "loss": 0.1028, "step": 6638 }, { "epoch": 0.93, "learning_rate": 4.844656559985028e-05, "loss": 0.1258, "step": 6640 }, { "epoch": 0.93, "learning_rate": 4.8446097697922516e-05, "loss": 0.1117, "step": 6642 }, { "epoch": 0.93, "learning_rate": 4.844562979599476e-05, "loss": 0.0975, "step": 6644 }, { "epoch": 0.93, "learning_rate": 4.8445161894067e-05, "loss": 0.1121, "step": 6646 }, { "epoch": 0.93, "learning_rate": 4.8444693992139254e-05, "loss": 0.1198, "step": 6648 }, { "epoch": 0.93, "learning_rate": 4.844422609021149e-05, "loss": 0.1173, "step": 6650 }, { "epoch": 0.93, "learning_rate": 4.844375818828374e-05, "loss": 0.1062, "step": 6652 }, { "epoch": 0.93, "learning_rate": 4.844329028635598e-05, "loss": 0.0994, "step": 6654 }, { "epoch": 0.93, "learning_rate": 4.8442822384428224e-05, "loss": 0.1192, "step": 6656 }, { "epoch": 0.93, "learning_rate": 4.844235448250047e-05, "loss": 0.0775, "step": 6658 }, { "epoch": 0.93, "learning_rate": 4.8441886580572716e-05, "loss": 0.1088, "step": 6660 }, { "epoch": 0.94, "learning_rate": 4.8441418678644955e-05, "loss": 0.1271, "step": 6662 }, { "epoch": 0.94, "learning_rate": 4.84409507767172e-05, "loss": 0.0869, "step": 6664 }, { "epoch": 0.94, "learning_rate": 4.844048287478945e-05, "loss": 0.1133, "step": 6666 }, { "epoch": 0.94, "learning_rate": 4.844001497286169e-05, "loss": 0.0873, "step": 6668 }, { "epoch": 0.94, "learning_rate": 4.843954707093393e-05, "loss": 0.0897, "step": 6670 }, { "epoch": 0.94, "learning_rate": 4.843907916900618e-05, "loss": 0.0942, "step": 6672 }, { "epoch": 0.94, "learning_rate": 4.8438611267078424e-05, "loss": 0.1154, "step": 6674 }, { "epoch": 0.94, "learning_rate": 4.843814336515067e-05, "loss": 0.106, "step": 6676 }, { "epoch": 0.94, "learning_rate": 4.843767546322291e-05, "loss": 0.1179, "step": 6678 }, { "epoch": 0.94, "learning_rate": 4.8437207561295155e-05, "loss": 0.1076, "step": 6680 }, { "epoch": 0.94, "learning_rate": 4.84367396593674e-05, "loss": 0.1158, "step": 6682 }, { "epoch": 0.94, "learning_rate": 4.8436271757439646e-05, "loss": 0.1141, "step": 6684 }, { "epoch": 0.94, "learning_rate": 4.8435803855511885e-05, "loss": 0.1287, "step": 6686 }, { "epoch": 0.94, "learning_rate": 4.843533595358413e-05, "loss": 0.0818, "step": 6688 }, { "epoch": 0.94, "learning_rate": 4.843486805165637e-05, "loss": 0.1097, "step": 6690 }, { "epoch": 0.94, "learning_rate": 4.843440014972862e-05, "loss": 0.0919, "step": 6692 }, { "epoch": 0.94, "learning_rate": 4.843393224780086e-05, "loss": 0.0963, "step": 6694 }, { "epoch": 0.94, "learning_rate": 4.843346434587311e-05, "loss": 0.1151, "step": 6696 }, { "epoch": 0.94, "learning_rate": 4.843299644394535e-05, "loss": 0.1007, "step": 6698 }, { "epoch": 0.94, "learning_rate": 4.843252854201759e-05, "loss": 0.1192, "step": 6700 }, { "epoch": 0.94, "learning_rate": 4.843206064008984e-05, "loss": 0.0704, "step": 6702 }, { "epoch": 0.94, "learning_rate": 4.8431592738162085e-05, "loss": 0.1191, "step": 6704 }, { "epoch": 0.94, "learning_rate": 4.8431124836234324e-05, "loss": 0.0758, "step": 6706 }, { "epoch": 0.94, "learning_rate": 4.843065693430657e-05, "loss": 0.12, "step": 6708 }, { "epoch": 0.94, "learning_rate": 4.8430189032378816e-05, "loss": 0.1071, "step": 6710 }, { "epoch": 0.94, "learning_rate": 4.842972113045106e-05, "loss": 0.1074, "step": 6712 }, { "epoch": 0.94, "learning_rate": 4.84292532285233e-05, "loss": 0.0955, "step": 6714 }, { "epoch": 0.94, "learning_rate": 4.842878532659555e-05, "loss": 0.0992, "step": 6716 }, { "epoch": 0.94, "learning_rate": 4.842831742466779e-05, "loss": 0.1008, "step": 6718 }, { "epoch": 0.94, "learning_rate": 4.842784952274004e-05, "loss": 0.1133, "step": 6720 }, { "epoch": 0.94, "learning_rate": 4.842738162081228e-05, "loss": 0.1024, "step": 6722 }, { "epoch": 0.94, "learning_rate": 4.8426913718884524e-05, "loss": 0.0731, "step": 6724 }, { "epoch": 0.94, "learning_rate": 4.842644581695677e-05, "loss": 0.0925, "step": 6726 }, { "epoch": 0.94, "learning_rate": 4.8425977915029016e-05, "loss": 0.0857, "step": 6728 }, { "epoch": 0.94, "learning_rate": 4.8425510013101255e-05, "loss": 0.0843, "step": 6730 }, { "epoch": 0.94, "learning_rate": 4.84250421111735e-05, "loss": 0.106, "step": 6732 }, { "epoch": 0.95, "learning_rate": 4.842457420924574e-05, "loss": 0.1088, "step": 6734 }, { "epoch": 0.95, "learning_rate": 4.842410630731799e-05, "loss": 0.097, "step": 6736 }, { "epoch": 0.95, "learning_rate": 4.842363840539023e-05, "loss": 0.1064, "step": 6738 }, { "epoch": 0.95, "learning_rate": 4.842317050346248e-05, "loss": 0.1006, "step": 6740 }, { "epoch": 0.95, "learning_rate": 4.842270260153472e-05, "loss": 0.1257, "step": 6742 }, { "epoch": 0.95, "learning_rate": 4.842223469960697e-05, "loss": 0.1, "step": 6744 }, { "epoch": 0.95, "learning_rate": 4.842176679767921e-05, "loss": 0.1103, "step": 6746 }, { "epoch": 0.95, "learning_rate": 4.8421298895751455e-05, "loss": 0.108, "step": 6748 }, { "epoch": 0.95, "learning_rate": 4.8420830993823694e-05, "loss": 0.0737, "step": 6750 }, { "epoch": 0.95, "learning_rate": 4.842036309189594e-05, "loss": 0.1007, "step": 6752 }, { "epoch": 0.95, "learning_rate": 4.8419895189968186e-05, "loss": 0.0944, "step": 6754 }, { "epoch": 0.95, "learning_rate": 4.841942728804043e-05, "loss": 0.1202, "step": 6756 }, { "epoch": 0.95, "learning_rate": 4.841895938611267e-05, "loss": 0.0799, "step": 6758 }, { "epoch": 0.95, "learning_rate": 4.8418491484184916e-05, "loss": 0.0949, "step": 6760 }, { "epoch": 0.95, "learning_rate": 4.841802358225716e-05, "loss": 0.1105, "step": 6762 }, { "epoch": 0.95, "learning_rate": 4.841755568032941e-05, "loss": 0.0744, "step": 6764 }, { "epoch": 0.95, "learning_rate": 4.841708777840165e-05, "loss": 0.1234, "step": 6766 }, { "epoch": 0.95, "learning_rate": 4.841661987647389e-05, "loss": 0.092, "step": 6768 }, { "epoch": 0.95, "learning_rate": 4.841615197454614e-05, "loss": 0.0982, "step": 6770 }, { "epoch": 0.95, "learning_rate": 4.8415684072618385e-05, "loss": 0.1087, "step": 6772 }, { "epoch": 0.95, "learning_rate": 4.8415216170690624e-05, "loss": 0.0769, "step": 6774 }, { "epoch": 0.95, "learning_rate": 4.841474826876287e-05, "loss": 0.0998, "step": 6776 }, { "epoch": 0.95, "learning_rate": 4.8414280366835116e-05, "loss": 0.097, "step": 6778 }, { "epoch": 0.95, "learning_rate": 4.841381246490736e-05, "loss": 0.1062, "step": 6780 }, { "epoch": 0.95, "learning_rate": 4.84133445629796e-05, "loss": 0.1102, "step": 6782 }, { "epoch": 0.95, "learning_rate": 4.841287666105185e-05, "loss": 0.0784, "step": 6784 }, { "epoch": 0.95, "learning_rate": 4.8412408759124086e-05, "loss": 0.0953, "step": 6786 }, { "epoch": 0.95, "learning_rate": 4.841194085719634e-05, "loss": 0.0952, "step": 6788 }, { "epoch": 0.95, "learning_rate": 4.841147295526858e-05, "loss": 0.1155, "step": 6790 }, { "epoch": 0.95, "learning_rate": 4.8411005053340824e-05, "loss": 0.0963, "step": 6792 }, { "epoch": 0.95, "learning_rate": 4.841053715141306e-05, "loss": 0.1182, "step": 6794 }, { "epoch": 0.95, "learning_rate": 4.8410069249485316e-05, "loss": 0.0928, "step": 6796 }, { "epoch": 0.95, "learning_rate": 4.8409601347557555e-05, "loss": 0.097, "step": 6798 }, { "epoch": 0.95, "learning_rate": 4.84091334456298e-05, "loss": 0.0953, "step": 6800 }, { "epoch": 0.95, "learning_rate": 4.840866554370204e-05, "loss": 0.0926, "step": 6802 }, { "epoch": 0.96, "learning_rate": 4.8408197641774286e-05, "loss": 0.074, "step": 6804 }, { "epoch": 0.96, "learning_rate": 4.840772973984653e-05, "loss": 0.0794, "step": 6806 }, { "epoch": 0.96, "learning_rate": 4.840726183791878e-05, "loss": 0.1061, "step": 6808 }, { "epoch": 0.96, "learning_rate": 4.840679393599102e-05, "loss": 0.108, "step": 6810 }, { "epoch": 0.96, "learning_rate": 4.840632603406326e-05, "loss": 0.0953, "step": 6812 }, { "epoch": 0.96, "learning_rate": 4.840585813213551e-05, "loss": 0.0895, "step": 6814 }, { "epoch": 0.96, "learning_rate": 4.8405390230207755e-05, "loss": 0.097, "step": 6816 }, { "epoch": 0.96, "learning_rate": 4.8404922328279994e-05, "loss": 0.0926, "step": 6818 }, { "epoch": 0.96, "learning_rate": 4.840445442635224e-05, "loss": 0.1324, "step": 6820 }, { "epoch": 0.96, "learning_rate": 4.8403986524424486e-05, "loss": 0.0832, "step": 6822 }, { "epoch": 0.96, "learning_rate": 4.840351862249673e-05, "loss": 0.097, "step": 6824 }, { "epoch": 0.96, "learning_rate": 4.840305072056897e-05, "loss": 0.1154, "step": 6826 }, { "epoch": 0.96, "learning_rate": 4.8402582818641216e-05, "loss": 0.1211, "step": 6828 }, { "epoch": 0.96, "learning_rate": 4.840211491671346e-05, "loss": 0.105, "step": 6830 }, { "epoch": 0.96, "learning_rate": 4.840164701478571e-05, "loss": 0.0973, "step": 6832 }, { "epoch": 0.96, "learning_rate": 4.840117911285795e-05, "loss": 0.0933, "step": 6834 }, { "epoch": 0.96, "learning_rate": 4.840071121093019e-05, "loss": 0.0872, "step": 6836 }, { "epoch": 0.96, "learning_rate": 4.840024330900243e-05, "loss": 0.1034, "step": 6838 }, { "epoch": 0.96, "learning_rate": 4.839977540707468e-05, "loss": 0.0869, "step": 6840 }, { "epoch": 0.96, "learning_rate": 4.8399307505146924e-05, "loss": 0.0798, "step": 6842 }, { "epoch": 0.96, "learning_rate": 4.8398839603219163e-05, "loss": 0.0998, "step": 6844 }, { "epoch": 0.96, "learning_rate": 4.839837170129141e-05, "loss": 0.0968, "step": 6846 }, { "epoch": 0.96, "learning_rate": 4.8397903799363655e-05, "loss": 0.1173, "step": 6848 }, { "epoch": 0.96, "learning_rate": 4.83974358974359e-05, "loss": 0.0919, "step": 6850 }, { "epoch": 0.96, "learning_rate": 4.839696799550814e-05, "loss": 0.1159, "step": 6852 }, { "epoch": 0.96, "learning_rate": 4.8396500093580386e-05, "loss": 0.0714, "step": 6854 }, { "epoch": 0.96, "learning_rate": 4.839603219165263e-05, "loss": 0.0924, "step": 6856 }, { "epoch": 0.96, "learning_rate": 4.839556428972488e-05, "loss": 0.1054, "step": 6858 }, { "epoch": 0.96, "learning_rate": 4.839509638779712e-05, "loss": 0.1118, "step": 6860 }, { "epoch": 0.96, "learning_rate": 4.839462848586936e-05, "loss": 0.0879, "step": 6862 }, { "epoch": 0.96, "learning_rate": 4.839416058394161e-05, "loss": 0.1111, "step": 6864 }, { "epoch": 0.96, "learning_rate": 4.8393692682013855e-05, "loss": 0.0892, "step": 6866 }, { "epoch": 0.96, "learning_rate": 4.8393224780086094e-05, "loss": 0.1077, "step": 6868 }, { "epoch": 0.96, "learning_rate": 4.839275687815834e-05, "loss": 0.1028, "step": 6870 }, { "epoch": 0.96, "learning_rate": 4.839228897623058e-05, "loss": 0.1059, "step": 6872 }, { "epoch": 0.96, "learning_rate": 4.839182107430283e-05, "loss": 0.0976, "step": 6874 }, { "epoch": 0.97, "learning_rate": 4.839135317237507e-05, "loss": 0.0977, "step": 6876 }, { "epoch": 0.97, "learning_rate": 4.839088527044732e-05, "loss": 0.0917, "step": 6878 }, { "epoch": 0.97, "learning_rate": 4.8390417368519556e-05, "loss": 0.0875, "step": 6880 }, { "epoch": 0.97, "learning_rate": 4.83899494665918e-05, "loss": 0.1239, "step": 6882 }, { "epoch": 0.97, "learning_rate": 4.838948156466405e-05, "loss": 0.0962, "step": 6884 }, { "epoch": 0.97, "learning_rate": 4.8389013662736294e-05, "loss": 0.105, "step": 6886 }, { "epoch": 0.97, "learning_rate": 4.838854576080853e-05, "loss": 0.102, "step": 6888 }, { "epoch": 0.97, "learning_rate": 4.838807785888078e-05, "loss": 0.0977, "step": 6890 }, { "epoch": 0.97, "learning_rate": 4.8387609956953025e-05, "loss": 0.1037, "step": 6892 }, { "epoch": 0.97, "learning_rate": 4.838714205502527e-05, "loss": 0.0885, "step": 6894 }, { "epoch": 0.97, "learning_rate": 4.838667415309751e-05, "loss": 0.0917, "step": 6896 }, { "epoch": 0.97, "learning_rate": 4.8386206251169756e-05, "loss": 0.1001, "step": 6898 }, { "epoch": 0.97, "learning_rate": 4.8385738349242e-05, "loss": 0.1313, "step": 6900 }, { "epoch": 0.97, "learning_rate": 4.838527044731425e-05, "loss": 0.1169, "step": 6902 }, { "epoch": 0.97, "learning_rate": 4.8384802545386487e-05, "loss": 0.1195, "step": 6904 }, { "epoch": 0.97, "learning_rate": 4.838433464345873e-05, "loss": 0.0921, "step": 6906 }, { "epoch": 0.97, "learning_rate": 4.838386674153098e-05, "loss": 0.1008, "step": 6908 }, { "epoch": 0.97, "learning_rate": 4.8383398839603224e-05, "loss": 0.0865, "step": 6910 }, { "epoch": 0.97, "learning_rate": 4.8382930937675463e-05, "loss": 0.0884, "step": 6912 }, { "epoch": 0.97, "learning_rate": 4.838246303574771e-05, "loss": 0.1269, "step": 6914 }, { "epoch": 0.97, "learning_rate": 4.838199513381995e-05, "loss": 0.0834, "step": 6916 }, { "epoch": 0.97, "learning_rate": 4.83815272318922e-05, "loss": 0.0979, "step": 6918 }, { "epoch": 0.97, "learning_rate": 4.838105932996444e-05, "loss": 0.0706, "step": 6920 }, { "epoch": 0.97, "learning_rate": 4.8380591428036686e-05, "loss": 0.1109, "step": 6922 }, { "epoch": 0.97, "learning_rate": 4.8380123526108925e-05, "loss": 0.0822, "step": 6924 }, { "epoch": 0.97, "learning_rate": 4.837965562418118e-05, "loss": 0.103, "step": 6926 }, { "epoch": 0.97, "learning_rate": 4.837918772225342e-05, "loss": 0.1162, "step": 6928 }, { "epoch": 0.97, "learning_rate": 4.837871982032566e-05, "loss": 0.0904, "step": 6930 }, { "epoch": 0.97, "learning_rate": 4.83782519183979e-05, "loss": 0.0865, "step": 6932 }, { "epoch": 0.97, "learning_rate": 4.837778401647015e-05, "loss": 0.1013, "step": 6934 }, { "epoch": 0.97, "learning_rate": 4.8377316114542394e-05, "loss": 0.0997, "step": 6936 }, { "epoch": 0.97, "learning_rate": 4.837684821261464e-05, "loss": 0.096, "step": 6938 }, { "epoch": 0.97, "learning_rate": 4.837638031068688e-05, "loss": 0.1282, "step": 6940 }, { "epoch": 0.97, "learning_rate": 4.8375912408759125e-05, "loss": 0.081, "step": 6942 }, { "epoch": 0.97, "learning_rate": 4.837544450683137e-05, "loss": 0.0763, "step": 6944 }, { "epoch": 0.98, "learning_rate": 4.837497660490362e-05, "loss": 0.0851, "step": 6946 }, { "epoch": 0.98, "learning_rate": 4.8374508702975856e-05, "loss": 0.1228, "step": 6948 }, { "epoch": 0.98, "learning_rate": 4.83740408010481e-05, "loss": 0.1094, "step": 6950 }, { "epoch": 0.98, "learning_rate": 4.837357289912035e-05, "loss": 0.1124, "step": 6952 }, { "epoch": 0.98, "learning_rate": 4.8373104997192594e-05, "loss": 0.0812, "step": 6954 }, { "epoch": 0.98, "learning_rate": 4.837263709526483e-05, "loss": 0.0816, "step": 6956 }, { "epoch": 0.98, "learning_rate": 4.837216919333708e-05, "loss": 0.1111, "step": 6958 }, { "epoch": 0.98, "learning_rate": 4.8371701291409325e-05, "loss": 0.1044, "step": 6960 }, { "epoch": 0.98, "learning_rate": 4.837123338948157e-05, "loss": 0.1142, "step": 6962 }, { "epoch": 0.98, "learning_rate": 4.837076548755381e-05, "loss": 0.1034, "step": 6964 }, { "epoch": 0.98, "learning_rate": 4.8370297585626056e-05, "loss": 0.1176, "step": 6966 }, { "epoch": 0.98, "learning_rate": 4.8369829683698295e-05, "loss": 0.0962, "step": 6968 }, { "epoch": 0.98, "learning_rate": 4.836936178177055e-05, "loss": 0.0932, "step": 6970 }, { "epoch": 0.98, "learning_rate": 4.836889387984279e-05, "loss": 0.1057, "step": 6972 }, { "epoch": 0.98, "learning_rate": 4.836842597791503e-05, "loss": 0.1024, "step": 6974 }, { "epoch": 0.98, "learning_rate": 4.836795807598727e-05, "loss": 0.1027, "step": 6976 }, { "epoch": 0.98, "learning_rate": 4.8367490174059524e-05, "loss": 0.1228, "step": 6978 }, { "epoch": 0.98, "learning_rate": 4.8367022272131763e-05, "loss": 0.0971, "step": 6980 }, { "epoch": 0.98, "learning_rate": 4.836655437020401e-05, "loss": 0.1094, "step": 6982 }, { "epoch": 0.98, "learning_rate": 4.836608646827625e-05, "loss": 0.0743, "step": 6984 }, { "epoch": 0.98, "learning_rate": 4.8365618566348494e-05, "loss": 0.0925, "step": 6986 }, { "epoch": 0.98, "learning_rate": 4.836515066442074e-05, "loss": 0.1026, "step": 6988 }, { "epoch": 0.98, "learning_rate": 4.8364682762492986e-05, "loss": 0.1369, "step": 6990 }, { "epoch": 0.98, "learning_rate": 4.8364214860565225e-05, "loss": 0.1028, "step": 6992 }, { "epoch": 0.98, "learning_rate": 4.836374695863747e-05, "loss": 0.1015, "step": 6994 }, { "epoch": 0.98, "learning_rate": 4.836327905670972e-05, "loss": 0.104, "step": 6996 }, { "epoch": 0.98, "learning_rate": 4.836281115478196e-05, "loss": 0.0902, "step": 6998 }, { "epoch": 0.98, "learning_rate": 4.83623432528542e-05, "loss": 0.1163, "step": 7000 }, { "epoch": 0.98, "eval_gen_len": 29.0862, "eval_loss": 1.0126045942306519, "eval_meteor": 0.0443, "eval_runtime": 14.844, "eval_samples_per_second": 3.907, "eval_steps_per_second": 0.539, "step": 7000 }, { "epoch": 0.98, "learning_rate": 4.836187535092645e-05, "loss": 0.1137, "step": 7002 }, { "epoch": 0.98, "learning_rate": 4.8361407448998694e-05, "loss": 0.0969, "step": 7004 }, { "epoch": 0.98, "learning_rate": 4.836093954707094e-05, "loss": 0.0938, "step": 7006 }, { "epoch": 0.98, "learning_rate": 4.836047164514318e-05, "loss": 0.0878, "step": 7008 }, { "epoch": 0.98, "learning_rate": 4.8360003743215425e-05, "loss": 0.0865, "step": 7010 }, { "epoch": 0.98, "learning_rate": 4.8359535841287664e-05, "loss": 0.1052, "step": 7012 }, { "epoch": 0.98, "learning_rate": 4.835906793935992e-05, "loss": 0.0923, "step": 7014 }, { "epoch": 0.98, "learning_rate": 4.8358600037432156e-05, "loss": 0.1033, "step": 7016 }, { "epoch": 0.99, "learning_rate": 4.83581321355044e-05, "loss": 0.099, "step": 7018 }, { "epoch": 0.99, "learning_rate": 4.835766423357664e-05, "loss": 0.119, "step": 7020 }, { "epoch": 0.99, "learning_rate": 4.8357196331648894e-05, "loss": 0.1074, "step": 7022 }, { "epoch": 0.99, "learning_rate": 4.835672842972113e-05, "loss": 0.1074, "step": 7024 }, { "epoch": 0.99, "learning_rate": 4.835626052779338e-05, "loss": 0.104, "step": 7026 }, { "epoch": 0.99, "learning_rate": 4.835579262586562e-05, "loss": 0.0993, "step": 7028 }, { "epoch": 0.99, "learning_rate": 4.8355324723937864e-05, "loss": 0.085, "step": 7030 }, { "epoch": 0.99, "learning_rate": 4.835485682201011e-05, "loss": 0.0896, "step": 7032 }, { "epoch": 0.99, "learning_rate": 4.8354388920082356e-05, "loss": 0.0996, "step": 7034 }, { "epoch": 0.99, "learning_rate": 4.8353921018154595e-05, "loss": 0.1204, "step": 7036 }, { "epoch": 0.99, "learning_rate": 4.835345311622684e-05, "loss": 0.0781, "step": 7038 }, { "epoch": 0.99, "learning_rate": 4.835298521429909e-05, "loss": 0.1396, "step": 7040 }, { "epoch": 0.99, "learning_rate": 4.835251731237133e-05, "loss": 0.1755, "step": 7042 }, { "epoch": 0.99, "learning_rate": 4.835204941044357e-05, "loss": 0.0937, "step": 7044 }, { "epoch": 0.99, "learning_rate": 4.835158150851582e-05, "loss": 0.0942, "step": 7046 }, { "epoch": 0.99, "learning_rate": 4.8351113606588064e-05, "loss": 0.1169, "step": 7048 }, { "epoch": 0.99, "learning_rate": 4.835064570466031e-05, "loss": 0.0962, "step": 7050 }, { "epoch": 0.99, "learning_rate": 4.835017780273255e-05, "loss": 0.1254, "step": 7052 }, { "epoch": 0.99, "learning_rate": 4.8349709900804794e-05, "loss": 0.1053, "step": 7054 }, { "epoch": 0.99, "learning_rate": 4.834924199887704e-05, "loss": 0.1188, "step": 7056 }, { "epoch": 0.99, "learning_rate": 4.8348774096949286e-05, "loss": 0.1038, "step": 7058 }, { "epoch": 0.99, "learning_rate": 4.8348306195021525e-05, "loss": 0.1068, "step": 7060 }, { "epoch": 0.99, "learning_rate": 4.834783829309377e-05, "loss": 0.1112, "step": 7062 }, { "epoch": 0.99, "learning_rate": 4.834737039116601e-05, "loss": 0.1352, "step": 7064 }, { "epoch": 0.99, "learning_rate": 4.834690248923826e-05, "loss": 0.1129, "step": 7066 }, { "epoch": 0.99, "learning_rate": 4.83464345873105e-05, "loss": 0.1124, "step": 7068 }, { "epoch": 0.99, "learning_rate": 4.834596668538275e-05, "loss": 0.1069, "step": 7070 }, { "epoch": 0.99, "learning_rate": 4.834549878345499e-05, "loss": 0.1167, "step": 7072 }, { "epoch": 0.99, "learning_rate": 4.834503088152724e-05, "loss": 0.0909, "step": 7074 }, { "epoch": 0.99, "learning_rate": 4.834456297959948e-05, "loss": 0.1275, "step": 7076 }, { "epoch": 0.99, "learning_rate": 4.8344095077671725e-05, "loss": 0.096, "step": 7078 }, { "epoch": 0.99, "learning_rate": 4.8343627175743964e-05, "loss": 0.1105, "step": 7080 }, { "epoch": 0.99, "learning_rate": 4.834315927381621e-05, "loss": 0.1029, "step": 7082 }, { "epoch": 0.99, "learning_rate": 4.8342691371888456e-05, "loss": 0.0912, "step": 7084 }, { "epoch": 0.99, "learning_rate": 4.83422234699607e-05, "loss": 0.0942, "step": 7086 }, { "epoch": 0.99, "learning_rate": 4.834175556803294e-05, "loss": 0.0853, "step": 7088 }, { "epoch": 1.0, "learning_rate": 4.834128766610519e-05, "loss": 0.0912, "step": 7090 }, { "epoch": 1.0, "learning_rate": 4.834081976417743e-05, "loss": 0.0714, "step": 7092 }, { "epoch": 1.0, "learning_rate": 4.834035186224967e-05, "loss": 0.0945, "step": 7094 }, { "epoch": 1.0, "learning_rate": 4.833988396032192e-05, "loss": 0.1026, "step": 7096 }, { "epoch": 1.0, "learning_rate": 4.833941605839416e-05, "loss": 0.1034, "step": 7098 }, { "epoch": 1.0, "learning_rate": 4.833894815646641e-05, "loss": 0.0926, "step": 7100 }, { "epoch": 1.0, "learning_rate": 4.833848025453865e-05, "loss": 0.085, "step": 7102 }, { "epoch": 1.0, "learning_rate": 4.8338012352610895e-05, "loss": 0.1048, "step": 7104 }, { "epoch": 1.0, "learning_rate": 4.8337544450683134e-05, "loss": 0.1001, "step": 7106 }, { "epoch": 1.0, "learning_rate": 4.833707654875539e-05, "loss": 0.0964, "step": 7108 }, { "epoch": 1.0, "learning_rate": 4.8336608646827626e-05, "loss": 0.1004, "step": 7110 }, { "epoch": 1.0, "learning_rate": 4.833614074489987e-05, "loss": 0.0852, "step": 7112 }, { "epoch": 1.0, "learning_rate": 4.833567284297211e-05, "loss": 0.0964, "step": 7114 }, { "epoch": 1.0, "learning_rate": 4.833520494104436e-05, "loss": 0.0953, "step": 7116 }, { "epoch": 1.0, "learning_rate": 4.83347370391166e-05, "loss": 0.1235, "step": 7118 }, { "epoch": 1.0, "learning_rate": 4.833426913718885e-05, "loss": 0.0937, "step": 7120 }, { "epoch": 1.0, "learning_rate": 4.833380123526109e-05, "loss": 0.1229, "step": 7122 }, { "epoch": 1.0, "learning_rate": 4.8333567284297214e-05, "loss": 0.1498, "step": 7124 }, { "epoch": 1.0, "learning_rate": 4.833309938236946e-05, "loss": 0.0874, "step": 7126 }, { "epoch": 1.0, "learning_rate": 4.8332631480441706e-05, "loss": 0.0612, "step": 7128 }, { "epoch": 1.0, "learning_rate": 4.8332163578513945e-05, "loss": 0.0723, "step": 7130 }, { "epoch": 1.0, "learning_rate": 4.833169567658619e-05, "loss": 0.0633, "step": 7132 }, { "epoch": 1.0, "learning_rate": 4.833122777465843e-05, "loss": 0.068, "step": 7134 }, { "epoch": 1.0, "learning_rate": 4.833075987273068e-05, "loss": 0.0644, "step": 7136 }, { "epoch": 1.0, "learning_rate": 4.833029197080292e-05, "loss": 0.059, "step": 7138 }, { "epoch": 1.0, "learning_rate": 4.832982406887517e-05, "loss": 0.0491, "step": 7140 }, { "epoch": 1.0, "learning_rate": 4.832935616694741e-05, "loss": 0.0526, "step": 7142 }, { "epoch": 1.0, "learning_rate": 4.832888826501965e-05, "loss": 0.0508, "step": 7144 }, { "epoch": 1.0, "learning_rate": 4.83284203630919e-05, "loss": 0.0626, "step": 7146 }, { "epoch": 1.0, "learning_rate": 4.8327952461164145e-05, "loss": 0.0754, "step": 7148 }, { "epoch": 1.0, "learning_rate": 4.8327484559236384e-05, "loss": 0.0685, "step": 7150 }, { "epoch": 1.0, "learning_rate": 4.832701665730863e-05, "loss": 0.0698, "step": 7152 }, { "epoch": 1.0, "learning_rate": 4.8326548755380876e-05, "loss": 0.0732, "step": 7154 }, { "epoch": 1.0, "learning_rate": 4.832608085345312e-05, "loss": 0.0628, "step": 7156 }, { "epoch": 1.0, "learning_rate": 4.832561295152536e-05, "loss": 0.075, "step": 7158 }, { "epoch": 1.01, "learning_rate": 4.8325145049597607e-05, "loss": 0.0582, "step": 7160 }, { "epoch": 1.01, "learning_rate": 4.832467714766985e-05, "loss": 0.063, "step": 7162 }, { "epoch": 1.01, "learning_rate": 4.83242092457421e-05, "loss": 0.0574, "step": 7164 }, { "epoch": 1.01, "learning_rate": 4.832374134381434e-05, "loss": 0.057, "step": 7166 }, { "epoch": 1.01, "learning_rate": 4.8323273441886583e-05, "loss": 0.0757, "step": 7168 }, { "epoch": 1.01, "learning_rate": 4.832280553995883e-05, "loss": 0.0547, "step": 7170 }, { "epoch": 1.01, "learning_rate": 4.8322337638031075e-05, "loss": 0.0701, "step": 7172 }, { "epoch": 1.01, "learning_rate": 4.8321869736103314e-05, "loss": 0.0587, "step": 7174 }, { "epoch": 1.01, "learning_rate": 4.832140183417556e-05, "loss": 0.0661, "step": 7176 }, { "epoch": 1.01, "learning_rate": 4.83209339322478e-05, "loss": 0.0653, "step": 7178 }, { "epoch": 1.01, "learning_rate": 4.832046603032005e-05, "loss": 0.0517, "step": 7180 }, { "epoch": 1.01, "learning_rate": 4.831999812839229e-05, "loss": 0.0517, "step": 7182 }, { "epoch": 1.01, "learning_rate": 4.831953022646454e-05, "loss": 0.0689, "step": 7184 }, { "epoch": 1.01, "learning_rate": 4.8319062324536776e-05, "loss": 0.0664, "step": 7186 }, { "epoch": 1.01, "learning_rate": 4.831859442260903e-05, "loss": 0.0978, "step": 7188 }, { "epoch": 1.01, "learning_rate": 4.831812652068127e-05, "loss": 0.0796, "step": 7190 }, { "epoch": 1.01, "learning_rate": 4.8317658618753514e-05, "loss": 0.0552, "step": 7192 }, { "epoch": 1.01, "learning_rate": 4.831719071682575e-05, "loss": 0.0539, "step": 7194 }, { "epoch": 1.01, "learning_rate": 4.8316722814898e-05, "loss": 0.0512, "step": 7196 }, { "epoch": 1.01, "learning_rate": 4.8316254912970245e-05, "loss": 0.0468, "step": 7198 }, { "epoch": 1.01, "learning_rate": 4.831578701104249e-05, "loss": 0.0708, "step": 7200 }, { "epoch": 1.01, "learning_rate": 4.831531910911473e-05, "loss": 0.0638, "step": 7202 }, { "epoch": 1.01, "learning_rate": 4.8314851207186976e-05, "loss": 0.0721, "step": 7204 }, { "epoch": 1.01, "learning_rate": 4.831438330525922e-05, "loss": 0.1117, "step": 7206 }, { "epoch": 1.01, "learning_rate": 4.831391540333147e-05, "loss": 0.0617, "step": 7208 }, { "epoch": 1.01, "learning_rate": 4.831344750140371e-05, "loss": 0.0466, "step": 7210 }, { "epoch": 1.01, "learning_rate": 4.831297959947595e-05, "loss": 0.064, "step": 7212 }, { "epoch": 1.01, "learning_rate": 4.83125116975482e-05, "loss": 0.0715, "step": 7214 }, { "epoch": 1.01, "learning_rate": 4.8312043795620445e-05, "loss": 0.0638, "step": 7216 }, { "epoch": 1.01, "learning_rate": 4.8311575893692684e-05, "loss": 0.0467, "step": 7218 }, { "epoch": 1.01, "learning_rate": 4.831110799176492e-05, "loss": 0.0772, "step": 7220 }, { "epoch": 1.01, "learning_rate": 4.8310640089837176e-05, "loss": 0.0694, "step": 7222 }, { "epoch": 1.01, "learning_rate": 4.8310172187909415e-05, "loss": 0.0674, "step": 7224 }, { "epoch": 1.01, "learning_rate": 4.830970428598166e-05, "loss": 0.0755, "step": 7226 }, { "epoch": 1.01, "learning_rate": 4.83092363840539e-05, "loss": 0.0662, "step": 7228 }, { "epoch": 1.01, "learning_rate": 4.8308768482126146e-05, "loss": 0.0614, "step": 7230 }, { "epoch": 1.02, "learning_rate": 4.830830058019839e-05, "loss": 0.0579, "step": 7232 }, { "epoch": 1.02, "learning_rate": 4.830783267827064e-05, "loss": 0.0823, "step": 7234 }, { "epoch": 1.02, "learning_rate": 4.830736477634288e-05, "loss": 0.0653, "step": 7236 }, { "epoch": 1.02, "learning_rate": 4.830689687441512e-05, "loss": 0.0713, "step": 7238 }, { "epoch": 1.02, "learning_rate": 4.830642897248737e-05, "loss": 0.0644, "step": 7240 }, { "epoch": 1.02, "learning_rate": 4.8305961070559614e-05, "loss": 0.0711, "step": 7242 }, { "epoch": 1.02, "learning_rate": 4.8305493168631854e-05, "loss": 0.0618, "step": 7244 }, { "epoch": 1.02, "learning_rate": 4.83050252667041e-05, "loss": 0.0537, "step": 7246 }, { "epoch": 1.02, "learning_rate": 4.8304557364776345e-05, "loss": 0.0449, "step": 7248 }, { "epoch": 1.02, "learning_rate": 4.830408946284859e-05, "loss": 0.0807, "step": 7250 }, { "epoch": 1.02, "learning_rate": 4.830362156092083e-05, "loss": 0.0597, "step": 7252 }, { "epoch": 1.02, "learning_rate": 4.8303153658993076e-05, "loss": 0.067, "step": 7254 }, { "epoch": 1.02, "learning_rate": 4.830268575706532e-05, "loss": 0.0769, "step": 7256 }, { "epoch": 1.02, "learning_rate": 4.830221785513757e-05, "loss": 0.0526, "step": 7258 }, { "epoch": 1.02, "learning_rate": 4.830174995320981e-05, "loss": 0.0624, "step": 7260 }, { "epoch": 1.02, "learning_rate": 4.830128205128205e-05, "loss": 0.0553, "step": 7262 }, { "epoch": 1.02, "learning_rate": 4.830081414935429e-05, "loss": 0.0978, "step": 7264 }, { "epoch": 1.02, "learning_rate": 4.8300346247426545e-05, "loss": 0.0651, "step": 7266 }, { "epoch": 1.02, "learning_rate": 4.8299878345498784e-05, "loss": 0.0584, "step": 7268 }, { "epoch": 1.02, "learning_rate": 4.829941044357103e-05, "loss": 0.066, "step": 7270 }, { "epoch": 1.02, "learning_rate": 4.829894254164327e-05, "loss": 0.0597, "step": 7272 }, { "epoch": 1.02, "learning_rate": 4.829847463971552e-05, "loss": 0.0579, "step": 7274 }, { "epoch": 1.02, "learning_rate": 4.829800673778776e-05, "loss": 0.0657, "step": 7276 }, { "epoch": 1.02, "learning_rate": 4.829753883586001e-05, "loss": 0.0558, "step": 7278 }, { "epoch": 1.02, "learning_rate": 4.8297070933932246e-05, "loss": 0.0477, "step": 7280 }, { "epoch": 1.02, "learning_rate": 4.829660303200449e-05, "loss": 0.0681, "step": 7282 }, { "epoch": 1.02, "learning_rate": 4.829613513007674e-05, "loss": 0.0579, "step": 7284 }, { "epoch": 1.02, "learning_rate": 4.8295667228148984e-05, "loss": 0.0725, "step": 7286 }, { "epoch": 1.02, "learning_rate": 4.829519932622122e-05, "loss": 0.0675, "step": 7288 }, { "epoch": 1.02, "learning_rate": 4.829473142429347e-05, "loss": 0.0747, "step": 7290 }, { "epoch": 1.02, "learning_rate": 4.8294263522365715e-05, "loss": 0.061, "step": 7292 }, { "epoch": 1.02, "learning_rate": 4.829379562043796e-05, "loss": 0.0767, "step": 7294 }, { "epoch": 1.02, "learning_rate": 4.82933277185102e-05, "loss": 0.0563, "step": 7296 }, { "epoch": 1.02, "learning_rate": 4.8292859816582446e-05, "loss": 0.0656, "step": 7298 }, { "epoch": 1.02, "learning_rate": 4.829239191465469e-05, "loss": 0.0674, "step": 7300 }, { "epoch": 1.02, "learning_rate": 4.829192401272694e-05, "loss": 0.0565, "step": 7302 }, { "epoch": 1.03, "learning_rate": 4.829145611079918e-05, "loss": 0.0848, "step": 7304 }, { "epoch": 1.03, "learning_rate": 4.829098820887142e-05, "loss": 0.0647, "step": 7306 }, { "epoch": 1.03, "learning_rate": 4.829052030694366e-05, "loss": 0.1279, "step": 7308 }, { "epoch": 1.03, "learning_rate": 4.8290052405015914e-05, "loss": 0.1035, "step": 7310 }, { "epoch": 1.03, "learning_rate": 4.8289584503088154e-05, "loss": 0.0651, "step": 7312 }, { "epoch": 1.03, "learning_rate": 4.82891166011604e-05, "loss": 0.0533, "step": 7314 }, { "epoch": 1.03, "learning_rate": 4.828864869923264e-05, "loss": 0.0539, "step": 7316 }, { "epoch": 1.03, "learning_rate": 4.828818079730489e-05, "loss": 0.0557, "step": 7318 }, { "epoch": 1.03, "learning_rate": 4.828771289537713e-05, "loss": 0.0614, "step": 7320 }, { "epoch": 1.03, "learning_rate": 4.8287244993449376e-05, "loss": 0.0697, "step": 7322 }, { "epoch": 1.03, "learning_rate": 4.8286777091521616e-05, "loss": 0.0604, "step": 7324 }, { "epoch": 1.03, "learning_rate": 4.828630918959386e-05, "loss": 0.0769, "step": 7326 }, { "epoch": 1.03, "learning_rate": 4.828584128766611e-05, "loss": 0.0574, "step": 7328 }, { "epoch": 1.03, "learning_rate": 4.828537338573835e-05, "loss": 0.0731, "step": 7330 }, { "epoch": 1.03, "learning_rate": 4.828490548381059e-05, "loss": 0.0723, "step": 7332 }, { "epoch": 1.03, "learning_rate": 4.828443758188284e-05, "loss": 0.0887, "step": 7334 }, { "epoch": 1.03, "learning_rate": 4.8283969679955084e-05, "loss": 0.0518, "step": 7336 }, { "epoch": 1.03, "learning_rate": 4.828350177802733e-05, "loss": 0.0574, "step": 7338 }, { "epoch": 1.03, "learning_rate": 4.828303387609957e-05, "loss": 0.0467, "step": 7340 }, { "epoch": 1.03, "learning_rate": 4.8282565974171815e-05, "loss": 0.0675, "step": 7342 }, { "epoch": 1.03, "learning_rate": 4.828209807224406e-05, "loss": 0.0528, "step": 7344 }, { "epoch": 1.03, "learning_rate": 4.828163017031631e-05, "loss": 0.0507, "step": 7346 }, { "epoch": 1.03, "learning_rate": 4.8281162268388546e-05, "loss": 0.0674, "step": 7348 }, { "epoch": 1.03, "learning_rate": 4.828069436646079e-05, "loss": 0.069, "step": 7350 }, { "epoch": 1.03, "learning_rate": 4.828022646453304e-05, "loss": 0.0562, "step": 7352 }, { "epoch": 1.03, "learning_rate": 4.8279758562605284e-05, "loss": 0.0706, "step": 7354 }, { "epoch": 1.03, "learning_rate": 4.827929066067752e-05, "loss": 0.0788, "step": 7356 }, { "epoch": 1.03, "learning_rate": 4.827882275874977e-05, "loss": 0.057, "step": 7358 }, { "epoch": 1.03, "learning_rate": 4.827835485682201e-05, "loss": 0.0555, "step": 7360 }, { "epoch": 1.03, "learning_rate": 4.827788695489426e-05, "loss": 0.0682, "step": 7362 }, { "epoch": 1.03, "learning_rate": 4.82774190529665e-05, "loss": 0.0704, "step": 7364 }, { "epoch": 1.03, "learning_rate": 4.8276951151038746e-05, "loss": 0.0572, "step": 7366 }, { "epoch": 1.03, "learning_rate": 4.8276483249110985e-05, "loss": 0.0653, "step": 7368 }, { "epoch": 1.03, "learning_rate": 4.827601534718324e-05, "loss": 0.0529, "step": 7370 }, { "epoch": 1.03, "learning_rate": 4.827554744525548e-05, "loss": 0.0839, "step": 7372 }, { "epoch": 1.04, "learning_rate": 4.827507954332772e-05, "loss": 0.054, "step": 7374 }, { "epoch": 1.04, "learning_rate": 4.827461164139996e-05, "loss": 0.0571, "step": 7376 }, { "epoch": 1.04, "learning_rate": 4.827414373947221e-05, "loss": 0.0714, "step": 7378 }, { "epoch": 1.04, "learning_rate": 4.8273675837544454e-05, "loss": 0.0643, "step": 7380 }, { "epoch": 1.04, "learning_rate": 4.82732079356167e-05, "loss": 0.0683, "step": 7382 }, { "epoch": 1.04, "learning_rate": 4.827274003368894e-05, "loss": 0.0637, "step": 7384 }, { "epoch": 1.04, "learning_rate": 4.8272272131761185e-05, "loss": 0.0618, "step": 7386 }, { "epoch": 1.04, "learning_rate": 4.827180422983343e-05, "loss": 0.0454, "step": 7388 }, { "epoch": 1.04, "learning_rate": 4.8271336327905676e-05, "loss": 0.0771, "step": 7390 }, { "epoch": 1.04, "learning_rate": 4.8270868425977916e-05, "loss": 0.0571, "step": 7392 }, { "epoch": 1.04, "learning_rate": 4.827040052405016e-05, "loss": 0.0552, "step": 7394 }, { "epoch": 1.04, "learning_rate": 4.826993262212241e-05, "loss": 0.0479, "step": 7396 }, { "epoch": 1.04, "learning_rate": 4.826946472019465e-05, "loss": 0.0548, "step": 7398 }, { "epoch": 1.04, "learning_rate": 4.826899681826689e-05, "loss": 0.0895, "step": 7400 }, { "epoch": 1.04, "learning_rate": 4.826852891633914e-05, "loss": 0.061, "step": 7402 }, { "epoch": 1.04, "learning_rate": 4.8268061014411384e-05, "loss": 0.0498, "step": 7404 }, { "epoch": 1.04, "learning_rate": 4.826759311248363e-05, "loss": 0.0818, "step": 7406 }, { "epoch": 1.04, "learning_rate": 4.826712521055587e-05, "loss": 0.0608, "step": 7408 }, { "epoch": 1.04, "learning_rate": 4.8266657308628115e-05, "loss": 0.0578, "step": 7410 }, { "epoch": 1.04, "learning_rate": 4.8266189406700354e-05, "loss": 0.0586, "step": 7412 }, { "epoch": 1.04, "learning_rate": 4.826572150477261e-05, "loss": 0.0822, "step": 7414 }, { "epoch": 1.04, "learning_rate": 4.8265253602844846e-05, "loss": 0.0717, "step": 7416 }, { "epoch": 1.04, "learning_rate": 4.826478570091709e-05, "loss": 0.0689, "step": 7418 }, { "epoch": 1.04, "learning_rate": 4.826431779898933e-05, "loss": 0.0631, "step": 7420 }, { "epoch": 1.04, "learning_rate": 4.826384989706158e-05, "loss": 0.0611, "step": 7422 }, { "epoch": 1.04, "learning_rate": 4.826338199513382e-05, "loss": 0.0666, "step": 7424 }, { "epoch": 1.04, "learning_rate": 4.826291409320607e-05, "loss": 0.0727, "step": 7426 }, { "epoch": 1.04, "learning_rate": 4.826244619127831e-05, "loss": 0.0587, "step": 7428 }, { "epoch": 1.04, "learning_rate": 4.8261978289350554e-05, "loss": 0.0682, "step": 7430 }, { "epoch": 1.04, "learning_rate": 4.82615103874228e-05, "loss": 0.0822, "step": 7432 }, { "epoch": 1.04, "learning_rate": 4.8261042485495046e-05, "loss": 0.0738, "step": 7434 }, { "epoch": 1.04, "learning_rate": 4.8260574583567285e-05, "loss": 0.0635, "step": 7436 }, { "epoch": 1.04, "learning_rate": 4.826010668163953e-05, "loss": 0.0888, "step": 7438 }, { "epoch": 1.04, "learning_rate": 4.825963877971178e-05, "loss": 0.0693, "step": 7440 }, { "epoch": 1.04, "learning_rate": 4.825917087778402e-05, "loss": 0.0491, "step": 7442 }, { "epoch": 1.04, "learning_rate": 4.825870297585626e-05, "loss": 0.0827, "step": 7444 }, { "epoch": 1.05, "learning_rate": 4.825823507392851e-05, "loss": 0.0647, "step": 7446 }, { "epoch": 1.05, "learning_rate": 4.8257767172000754e-05, "loss": 0.0726, "step": 7448 }, { "epoch": 1.05, "learning_rate": 4.8257299270073e-05, "loss": 0.0706, "step": 7450 }, { "epoch": 1.05, "learning_rate": 4.825683136814524e-05, "loss": 0.0626, "step": 7452 }, { "epoch": 1.05, "learning_rate": 4.8256363466217485e-05, "loss": 0.0643, "step": 7454 }, { "epoch": 1.05, "learning_rate": 4.8255895564289724e-05, "loss": 0.077, "step": 7456 }, { "epoch": 1.05, "learning_rate": 4.8255427662361976e-05, "loss": 0.0779, "step": 7458 }, { "epoch": 1.05, "learning_rate": 4.8254959760434216e-05, "loss": 0.0888, "step": 7460 }, { "epoch": 1.05, "learning_rate": 4.825449185850646e-05, "loss": 0.0717, "step": 7462 }, { "epoch": 1.05, "learning_rate": 4.82540239565787e-05, "loss": 0.0695, "step": 7464 }, { "epoch": 1.05, "learning_rate": 4.825355605465095e-05, "loss": 0.0626, "step": 7466 }, { "epoch": 1.05, "learning_rate": 4.825308815272319e-05, "loss": 0.0669, "step": 7468 }, { "epoch": 1.05, "learning_rate": 4.825262025079544e-05, "loss": 0.0651, "step": 7470 }, { "epoch": 1.05, "learning_rate": 4.825215234886768e-05, "loss": 0.0729, "step": 7472 }, { "epoch": 1.05, "learning_rate": 4.825168444693992e-05, "loss": 0.0771, "step": 7474 }, { "epoch": 1.05, "learning_rate": 4.825121654501217e-05, "loss": 0.06, "step": 7476 }, { "epoch": 1.05, "learning_rate": 4.825074864308441e-05, "loss": 0.09, "step": 7478 }, { "epoch": 1.05, "learning_rate": 4.8250280741156654e-05, "loss": 0.0725, "step": 7480 }, { "epoch": 1.05, "learning_rate": 4.82498128392289e-05, "loss": 0.0557, "step": 7482 }, { "epoch": 1.05, "learning_rate": 4.8249344937301146e-05, "loss": 0.0807, "step": 7484 }, { "epoch": 1.05, "learning_rate": 4.8248877035373385e-05, "loss": 0.0546, "step": 7486 }, { "epoch": 1.05, "learning_rate": 4.824840913344563e-05, "loss": 0.0702, "step": 7488 }, { "epoch": 1.05, "learning_rate": 4.824794123151787e-05, "loss": 0.056, "step": 7490 }, { "epoch": 1.05, "learning_rate": 4.824747332959012e-05, "loss": 0.0478, "step": 7492 }, { "epoch": 1.05, "learning_rate": 4.824700542766236e-05, "loss": 0.1009, "step": 7494 }, { "epoch": 1.05, "learning_rate": 4.824653752573461e-05, "loss": 0.0793, "step": 7496 }, { "epoch": 1.05, "learning_rate": 4.824606962380685e-05, "loss": 0.0483, "step": 7498 }, { "epoch": 1.05, "learning_rate": 4.82456017218791e-05, "loss": 0.0788, "step": 7500 }, { "epoch": 1.05, "learning_rate": 4.824513381995134e-05, "loss": 0.057, "step": 7502 }, { "epoch": 1.05, "learning_rate": 4.8244665918023585e-05, "loss": 0.0492, "step": 7504 }, { "epoch": 1.05, "learning_rate": 4.8244198016095824e-05, "loss": 0.0532, "step": 7506 }, { "epoch": 1.05, "learning_rate": 4.824373011416807e-05, "loss": 0.0536, "step": 7508 }, { "epoch": 1.05, "learning_rate": 4.8243262212240316e-05, "loss": 0.0543, "step": 7510 }, { "epoch": 1.05, "learning_rate": 4.824279431031256e-05, "loss": 0.062, "step": 7512 }, { "epoch": 1.05, "learning_rate": 4.82423264083848e-05, "loss": 0.0596, "step": 7514 }, { "epoch": 1.06, "learning_rate": 4.824185850645705e-05, "loss": 0.062, "step": 7516 }, { "epoch": 1.06, "learning_rate": 4.824139060452929e-05, "loss": 0.0675, "step": 7518 }, { "epoch": 1.06, "learning_rate": 4.824092270260154e-05, "loss": 0.086, "step": 7520 }, { "epoch": 1.06, "learning_rate": 4.824045480067378e-05, "loss": 0.066, "step": 7522 }, { "epoch": 1.06, "learning_rate": 4.8239986898746024e-05, "loss": 0.0588, "step": 7524 }, { "epoch": 1.06, "learning_rate": 4.823951899681827e-05, "loss": 0.0607, "step": 7526 }, { "epoch": 1.06, "learning_rate": 4.8239051094890516e-05, "loss": 0.0704, "step": 7528 }, { "epoch": 1.06, "learning_rate": 4.8238583192962755e-05, "loss": 0.0591, "step": 7530 }, { "epoch": 1.06, "learning_rate": 4.8238115291035e-05, "loss": 0.06, "step": 7532 }, { "epoch": 1.06, "learning_rate": 4.8237647389107247e-05, "loss": 0.0442, "step": 7534 }, { "epoch": 1.06, "learning_rate": 4.823717948717949e-05, "loss": 0.0712, "step": 7536 }, { "epoch": 1.06, "learning_rate": 4.823671158525173e-05, "loss": 0.0796, "step": 7538 }, { "epoch": 1.06, "learning_rate": 4.823624368332398e-05, "loss": 0.069, "step": 7540 }, { "epoch": 1.06, "learning_rate": 4.8235775781396217e-05, "loss": 0.0548, "step": 7542 }, { "epoch": 1.06, "learning_rate": 4.823530787946847e-05, "loss": 0.0547, "step": 7544 }, { "epoch": 1.06, "learning_rate": 4.823483997754071e-05, "loss": 0.0631, "step": 7546 }, { "epoch": 1.06, "learning_rate": 4.8234372075612954e-05, "loss": 0.0475, "step": 7548 }, { "epoch": 1.06, "learning_rate": 4.8233904173685193e-05, "loss": 0.0955, "step": 7550 }, { "epoch": 1.06, "learning_rate": 4.8233436271757446e-05, "loss": 0.0594, "step": 7552 }, { "epoch": 1.06, "learning_rate": 4.8232968369829685e-05, "loss": 0.0649, "step": 7554 }, { "epoch": 1.06, "learning_rate": 4.823250046790193e-05, "loss": 0.0786, "step": 7556 }, { "epoch": 1.06, "learning_rate": 4.823203256597417e-05, "loss": 0.0765, "step": 7558 }, { "epoch": 1.06, "learning_rate": 4.8231564664046416e-05, "loss": 0.0757, "step": 7560 }, { "epoch": 1.06, "learning_rate": 4.823109676211866e-05, "loss": 0.0754, "step": 7562 }, { "epoch": 1.06, "learning_rate": 4.823062886019091e-05, "loss": 0.0841, "step": 7564 }, { "epoch": 1.06, "learning_rate": 4.823016095826315e-05, "loss": 0.0519, "step": 7566 }, { "epoch": 1.06, "learning_rate": 4.822969305633539e-05, "loss": 0.0816, "step": 7568 }, { "epoch": 1.06, "learning_rate": 4.822922515440764e-05, "loss": 0.0679, "step": 7570 }, { "epoch": 1.06, "learning_rate": 4.8228757252479885e-05, "loss": 0.0718, "step": 7572 }, { "epoch": 1.06, "learning_rate": 4.8228289350552124e-05, "loss": 0.0536, "step": 7574 }, { "epoch": 1.06, "learning_rate": 4.822782144862437e-05, "loss": 0.0623, "step": 7576 }, { "epoch": 1.06, "learning_rate": 4.8227353546696616e-05, "loss": 0.0431, "step": 7578 }, { "epoch": 1.06, "learning_rate": 4.822688564476886e-05, "loss": 0.0596, "step": 7580 }, { "epoch": 1.06, "learning_rate": 4.82264177428411e-05, "loss": 0.0538, "step": 7582 }, { "epoch": 1.06, "learning_rate": 4.822594984091335e-05, "loss": 0.0893, "step": 7584 }, { "epoch": 1.06, "learning_rate": 4.822548193898559e-05, "loss": 0.0555, "step": 7586 }, { "epoch": 1.07, "learning_rate": 4.822501403705784e-05, "loss": 0.0734, "step": 7588 }, { "epoch": 1.07, "learning_rate": 4.822454613513008e-05, "loss": 0.0567, "step": 7590 }, { "epoch": 1.07, "learning_rate": 4.8224078233202324e-05, "loss": 0.064, "step": 7592 }, { "epoch": 1.07, "learning_rate": 4.822361033127456e-05, "loss": 0.065, "step": 7594 }, { "epoch": 1.07, "learning_rate": 4.8223142429346816e-05, "loss": 0.0534, "step": 7596 }, { "epoch": 1.07, "learning_rate": 4.8222674527419055e-05, "loss": 0.0747, "step": 7598 }, { "epoch": 1.07, "learning_rate": 4.82222066254913e-05, "loss": 0.0647, "step": 7600 }, { "epoch": 1.07, "learning_rate": 4.822173872356354e-05, "loss": 0.0856, "step": 7602 }, { "epoch": 1.07, "learning_rate": 4.8221270821635786e-05, "loss": 0.0707, "step": 7604 }, { "epoch": 1.07, "learning_rate": 4.822080291970803e-05, "loss": 0.0771, "step": 7606 }, { "epoch": 1.07, "learning_rate": 4.822033501778028e-05, "loss": 0.0515, "step": 7608 }, { "epoch": 1.07, "learning_rate": 4.821986711585252e-05, "loss": 0.0551, "step": 7610 }, { "epoch": 1.07, "learning_rate": 4.821939921392476e-05, "loss": 0.0631, "step": 7612 }, { "epoch": 1.07, "learning_rate": 4.821893131199701e-05, "loss": 0.0606, "step": 7614 }, { "epoch": 1.07, "learning_rate": 4.8218463410069254e-05, "loss": 0.0536, "step": 7616 }, { "epoch": 1.07, "learning_rate": 4.8217995508141493e-05, "loss": 0.0663, "step": 7618 }, { "epoch": 1.07, "learning_rate": 4.821752760621374e-05, "loss": 0.0762, "step": 7620 }, { "epoch": 1.07, "learning_rate": 4.8217059704285985e-05, "loss": 0.0706, "step": 7622 }, { "epoch": 1.07, "learning_rate": 4.821659180235823e-05, "loss": 0.0515, "step": 7624 }, { "epoch": 1.07, "learning_rate": 4.821612390043047e-05, "loss": 0.0671, "step": 7626 }, { "epoch": 1.07, "learning_rate": 4.8215655998502716e-05, "loss": 0.0682, "step": 7628 }, { "epoch": 1.07, "learning_rate": 4.821518809657496e-05, "loss": 0.0613, "step": 7630 }, { "epoch": 1.07, "learning_rate": 4.821472019464721e-05, "loss": 0.0691, "step": 7632 }, { "epoch": 1.07, "learning_rate": 4.821425229271945e-05, "loss": 0.0641, "step": 7634 }, { "epoch": 1.07, "learning_rate": 4.821378439079169e-05, "loss": 0.0649, "step": 7636 }, { "epoch": 1.07, "learning_rate": 4.821331648886393e-05, "loss": 0.0827, "step": 7638 }, { "epoch": 1.07, "learning_rate": 4.8212848586936185e-05, "loss": 0.0626, "step": 7640 }, { "epoch": 1.07, "learning_rate": 4.8212380685008424e-05, "loss": 0.0616, "step": 7642 }, { "epoch": 1.07, "learning_rate": 4.821191278308067e-05, "loss": 0.0842, "step": 7644 }, { "epoch": 1.07, "learning_rate": 4.821144488115291e-05, "loss": 0.0791, "step": 7646 }, { "epoch": 1.07, "learning_rate": 4.821097697922516e-05, "loss": 0.0669, "step": 7648 }, { "epoch": 1.07, "learning_rate": 4.82105090772974e-05, "loss": 0.0816, "step": 7650 }, { "epoch": 1.07, "learning_rate": 4.821004117536965e-05, "loss": 0.0793, "step": 7652 }, { "epoch": 1.07, "learning_rate": 4.8209573273441886e-05, "loss": 0.0845, "step": 7654 }, { "epoch": 1.07, "learning_rate": 4.820910537151413e-05, "loss": 0.0449, "step": 7656 }, { "epoch": 1.07, "learning_rate": 4.820863746958638e-05, "loss": 0.0511, "step": 7658 }, { "epoch": 1.08, "learning_rate": 4.8208169567658624e-05, "loss": 0.0566, "step": 7660 }, { "epoch": 1.08, "learning_rate": 4.820770166573086e-05, "loss": 0.0652, "step": 7662 }, { "epoch": 1.08, "learning_rate": 4.820723376380311e-05, "loss": 0.0824, "step": 7664 }, { "epoch": 1.08, "learning_rate": 4.8206765861875355e-05, "loss": 0.0643, "step": 7666 }, { "epoch": 1.08, "learning_rate": 4.82062979599476e-05, "loss": 0.0708, "step": 7668 }, { "epoch": 1.08, "learning_rate": 4.820583005801984e-05, "loss": 0.0625, "step": 7670 }, { "epoch": 1.08, "learning_rate": 4.8205362156092086e-05, "loss": 0.079, "step": 7672 }, { "epoch": 1.08, "learning_rate": 4.820489425416433e-05, "loss": 0.082, "step": 7674 }, { "epoch": 1.08, "learning_rate": 4.820442635223658e-05, "loss": 0.05, "step": 7676 }, { "epoch": 1.08, "learning_rate": 4.820395845030882e-05, "loss": 0.0489, "step": 7678 }, { "epoch": 1.08, "learning_rate": 4.820349054838106e-05, "loss": 0.0706, "step": 7680 }, { "epoch": 1.08, "learning_rate": 4.820302264645331e-05, "loss": 0.0685, "step": 7682 }, { "epoch": 1.08, "learning_rate": 4.8202554744525554e-05, "loss": 0.0767, "step": 7684 }, { "epoch": 1.08, "learning_rate": 4.8202086842597794e-05, "loss": 0.0713, "step": 7686 }, { "epoch": 1.08, "learning_rate": 4.820161894067004e-05, "loss": 0.0667, "step": 7688 }, { "epoch": 1.08, "learning_rate": 4.820115103874228e-05, "loss": 0.0843, "step": 7690 }, { "epoch": 1.08, "learning_rate": 4.820068313681453e-05, "loss": 0.0671, "step": 7692 }, { "epoch": 1.08, "learning_rate": 4.820021523488677e-05, "loss": 0.0672, "step": 7694 }, { "epoch": 1.08, "learning_rate": 4.8199747332959016e-05, "loss": 0.0553, "step": 7696 }, { "epoch": 1.08, "learning_rate": 4.8199279431031255e-05, "loss": 0.0708, "step": 7698 }, { "epoch": 1.08, "learning_rate": 4.81988115291035e-05, "loss": 0.0711, "step": 7700 }, { "epoch": 1.08, "learning_rate": 4.819834362717575e-05, "loss": 0.0792, "step": 7702 }, { "epoch": 1.08, "learning_rate": 4.819787572524799e-05, "loss": 0.0559, "step": 7704 }, { "epoch": 1.08, "learning_rate": 4.819740782332023e-05, "loss": 0.0718, "step": 7706 }, { "epoch": 1.08, "learning_rate": 4.819693992139248e-05, "loss": 0.0542, "step": 7708 }, { "epoch": 1.08, "learning_rate": 4.8196472019464724e-05, "loss": 0.0631, "step": 7710 }, { "epoch": 1.08, "learning_rate": 4.819600411753697e-05, "loss": 0.0861, "step": 7712 }, { "epoch": 1.08, "learning_rate": 4.819553621560921e-05, "loss": 0.0579, "step": 7714 }, { "epoch": 1.08, "learning_rate": 4.8195068313681455e-05, "loss": 0.0659, "step": 7716 }, { "epoch": 1.08, "learning_rate": 4.81946004117537e-05, "loss": 0.08, "step": 7718 }, { "epoch": 1.08, "learning_rate": 4.819413250982595e-05, "loss": 0.0688, "step": 7720 }, { "epoch": 1.08, "learning_rate": 4.8193664607898186e-05, "loss": 0.0613, "step": 7722 }, { "epoch": 1.08, "learning_rate": 4.819319670597043e-05, "loss": 0.0559, "step": 7724 }, { "epoch": 1.08, "learning_rate": 4.819272880404268e-05, "loss": 0.0671, "step": 7726 }, { "epoch": 1.08, "learning_rate": 4.819226090211492e-05, "loss": 0.0881, "step": 7728 }, { "epoch": 1.09, "learning_rate": 4.819179300018716e-05, "loss": 0.0615, "step": 7730 }, { "epoch": 1.09, "learning_rate": 4.81913250982594e-05, "loss": 0.0512, "step": 7732 }, { "epoch": 1.09, "learning_rate": 4.819085719633165e-05, "loss": 0.0774, "step": 7734 }, { "epoch": 1.09, "learning_rate": 4.8190389294403894e-05, "loss": 0.0809, "step": 7736 }, { "epoch": 1.09, "learning_rate": 4.818992139247614e-05, "loss": 0.0522, "step": 7738 }, { "epoch": 1.09, "learning_rate": 4.818945349054838e-05, "loss": 0.0971, "step": 7740 }, { "epoch": 1.09, "learning_rate": 4.8188985588620625e-05, "loss": 0.0542, "step": 7742 }, { "epoch": 1.09, "learning_rate": 4.818851768669287e-05, "loss": 0.0455, "step": 7744 }, { "epoch": 1.09, "learning_rate": 4.818804978476512e-05, "loss": 0.0589, "step": 7746 }, { "epoch": 1.09, "learning_rate": 4.8187581882837356e-05, "loss": 0.0563, "step": 7748 }, { "epoch": 1.09, "learning_rate": 4.81871139809096e-05, "loss": 0.0941, "step": 7750 }, { "epoch": 1.09, "learning_rate": 4.818664607898185e-05, "loss": 0.0742, "step": 7752 }, { "epoch": 1.09, "learning_rate": 4.8186178177054094e-05, "loss": 0.0508, "step": 7754 }, { "epoch": 1.09, "learning_rate": 4.818571027512633e-05, "loss": 0.0669, "step": 7756 }, { "epoch": 1.09, "learning_rate": 4.818524237319858e-05, "loss": 0.0881, "step": 7758 }, { "epoch": 1.09, "learning_rate": 4.8184774471270824e-05, "loss": 0.0674, "step": 7760 }, { "epoch": 1.09, "learning_rate": 4.818430656934307e-05, "loss": 0.0609, "step": 7762 }, { "epoch": 1.09, "learning_rate": 4.818383866741531e-05, "loss": 0.0733, "step": 7764 }, { "epoch": 1.09, "learning_rate": 4.8183370765487555e-05, "loss": 0.0565, "step": 7766 }, { "epoch": 1.09, "learning_rate": 4.8182902863559795e-05, "loss": 0.0612, "step": 7768 }, { "epoch": 1.09, "learning_rate": 4.818243496163205e-05, "loss": 0.0705, "step": 7770 }, { "epoch": 1.09, "learning_rate": 4.8181967059704286e-05, "loss": 0.0761, "step": 7772 }, { "epoch": 1.09, "learning_rate": 4.818149915777653e-05, "loss": 0.0654, "step": 7774 }, { "epoch": 1.09, "learning_rate": 4.818103125584877e-05, "loss": 0.0732, "step": 7776 }, { "epoch": 1.09, "learning_rate": 4.8180563353921024e-05, "loss": 0.0572, "step": 7778 }, { "epoch": 1.09, "learning_rate": 4.818009545199326e-05, "loss": 0.0724, "step": 7780 }, { "epoch": 1.09, "learning_rate": 4.817962755006551e-05, "loss": 0.0695, "step": 7782 }, { "epoch": 1.09, "learning_rate": 4.817915964813775e-05, "loss": 0.0701, "step": 7784 }, { "epoch": 1.09, "learning_rate": 4.8178691746209994e-05, "loss": 0.0616, "step": 7786 }, { "epoch": 1.09, "learning_rate": 4.817822384428224e-05, "loss": 0.07, "step": 7788 }, { "epoch": 1.09, "learning_rate": 4.8177755942354486e-05, "loss": 0.057, "step": 7790 }, { "epoch": 1.09, "learning_rate": 4.8177288040426725e-05, "loss": 0.0552, "step": 7792 }, { "epoch": 1.09, "learning_rate": 4.817682013849897e-05, "loss": 0.0599, "step": 7794 }, { "epoch": 1.09, "learning_rate": 4.817635223657122e-05, "loss": 0.0666, "step": 7796 }, { "epoch": 1.09, "learning_rate": 4.817588433464346e-05, "loss": 0.0762, "step": 7798 }, { "epoch": 1.09, "learning_rate": 4.81754164327157e-05, "loss": 0.078, "step": 7800 }, { "epoch": 1.1, "learning_rate": 4.817494853078795e-05, "loss": 0.0919, "step": 7802 }, { "epoch": 1.1, "learning_rate": 4.8174480628860194e-05, "loss": 0.0589, "step": 7804 }, { "epoch": 1.1, "learning_rate": 4.817401272693244e-05, "loss": 0.0794, "step": 7806 }, { "epoch": 1.1, "learning_rate": 4.817354482500468e-05, "loss": 0.0589, "step": 7808 }, { "epoch": 1.1, "learning_rate": 4.8173076923076925e-05, "loss": 0.06, "step": 7810 }, { "epoch": 1.1, "learning_rate": 4.817260902114917e-05, "loss": 0.0764, "step": 7812 }, { "epoch": 1.1, "learning_rate": 4.817214111922142e-05, "loss": 0.0708, "step": 7814 }, { "epoch": 1.1, "learning_rate": 4.8171673217293656e-05, "loss": 0.0765, "step": 7816 }, { "epoch": 1.1, "learning_rate": 4.81712053153659e-05, "loss": 0.0632, "step": 7818 }, { "epoch": 1.1, "learning_rate": 4.817073741343814e-05, "loss": 0.0682, "step": 7820 }, { "epoch": 1.1, "learning_rate": 4.8170269511510394e-05, "loss": 0.0869, "step": 7822 }, { "epoch": 1.1, "learning_rate": 4.816980160958263e-05, "loss": 0.0638, "step": 7824 }, { "epoch": 1.1, "learning_rate": 4.816933370765488e-05, "loss": 0.0583, "step": 7826 }, { "epoch": 1.1, "learning_rate": 4.816886580572712e-05, "loss": 0.0606, "step": 7828 }, { "epoch": 1.1, "learning_rate": 4.816839790379937e-05, "loss": 0.068, "step": 7830 }, { "epoch": 1.1, "learning_rate": 4.816793000187161e-05, "loss": 0.0684, "step": 7832 }, { "epoch": 1.1, "learning_rate": 4.8167462099943855e-05, "loss": 0.07, "step": 7834 }, { "epoch": 1.1, "learning_rate": 4.8166994198016095e-05, "loss": 0.0608, "step": 7836 }, { "epoch": 1.1, "learning_rate": 4.816652629608834e-05, "loss": 0.0557, "step": 7838 }, { "epoch": 1.1, "learning_rate": 4.8166058394160586e-05, "loss": 0.071, "step": 7840 }, { "epoch": 1.1, "learning_rate": 4.816559049223283e-05, "loss": 0.0844, "step": 7842 }, { "epoch": 1.1, "learning_rate": 4.816512259030507e-05, "loss": 0.0665, "step": 7844 }, { "epoch": 1.1, "learning_rate": 4.816465468837732e-05, "loss": 0.0669, "step": 7846 }, { "epoch": 1.1, "learning_rate": 4.816418678644956e-05, "loss": 0.0813, "step": 7848 }, { "epoch": 1.1, "learning_rate": 4.816371888452181e-05, "loss": 0.06, "step": 7850 }, { "epoch": 1.1, "learning_rate": 4.816325098259405e-05, "loss": 0.0722, "step": 7852 }, { "epoch": 1.1, "learning_rate": 4.8162783080666294e-05, "loss": 0.062, "step": 7854 }, { "epoch": 1.1, "learning_rate": 4.816231517873854e-05, "loss": 0.0721, "step": 7856 }, { "epoch": 1.1, "learning_rate": 4.8161847276810786e-05, "loss": 0.0635, "step": 7858 }, { "epoch": 1.1, "learning_rate": 4.8161379374883025e-05, "loss": 0.0516, "step": 7860 }, { "epoch": 1.1, "learning_rate": 4.816091147295527e-05, "loss": 0.0586, "step": 7862 }, { "epoch": 1.1, "learning_rate": 4.816044357102752e-05, "loss": 0.0866, "step": 7864 }, { "epoch": 1.1, "learning_rate": 4.815997566909976e-05, "loss": 0.1013, "step": 7866 }, { "epoch": 1.1, "learning_rate": 4.8159507767172e-05, "loss": 0.0496, "step": 7868 }, { "epoch": 1.1, "learning_rate": 4.815903986524425e-05, "loss": 0.056, "step": 7870 }, { "epoch": 1.1, "learning_rate": 4.815857196331649e-05, "loss": 0.0675, "step": 7872 }, { "epoch": 1.11, "learning_rate": 4.815810406138874e-05, "loss": 0.0529, "step": 7874 }, { "epoch": 1.11, "learning_rate": 4.815763615946098e-05, "loss": 0.0555, "step": 7876 }, { "epoch": 1.11, "learning_rate": 4.8157168257533225e-05, "loss": 0.054, "step": 7878 }, { "epoch": 1.11, "learning_rate": 4.8156700355605464e-05, "loss": 0.0417, "step": 7880 }, { "epoch": 1.11, "learning_rate": 4.815623245367771e-05, "loss": 0.0638, "step": 7882 }, { "epoch": 1.11, "learning_rate": 4.8155764551749956e-05, "loss": 0.0669, "step": 7884 }, { "epoch": 1.11, "learning_rate": 4.81552966498222e-05, "loss": 0.0656, "step": 7886 }, { "epoch": 1.11, "learning_rate": 4.815482874789444e-05, "loss": 0.0671, "step": 7888 }, { "epoch": 1.11, "learning_rate": 4.815436084596669e-05, "loss": 0.0731, "step": 7890 }, { "epoch": 1.11, "learning_rate": 4.815389294403893e-05, "loss": 0.0734, "step": 7892 }, { "epoch": 1.11, "learning_rate": 4.815342504211118e-05, "loss": 0.0656, "step": 7894 }, { "epoch": 1.11, "learning_rate": 4.815295714018342e-05, "loss": 0.0782, "step": 7896 }, { "epoch": 1.11, "learning_rate": 4.8152489238255664e-05, "loss": 0.0714, "step": 7898 }, { "epoch": 1.11, "learning_rate": 4.815202133632791e-05, "loss": 0.0672, "step": 7900 }, { "epoch": 1.11, "learning_rate": 4.8151553434400155e-05, "loss": 0.0649, "step": 7902 }, { "epoch": 1.11, "learning_rate": 4.8151085532472395e-05, "loss": 0.0899, "step": 7904 }, { "epoch": 1.11, "learning_rate": 4.815061763054464e-05, "loss": 0.0662, "step": 7906 }, { "epoch": 1.11, "learning_rate": 4.8150149728616886e-05, "loss": 0.0721, "step": 7908 }, { "epoch": 1.11, "learning_rate": 4.814968182668913e-05, "loss": 0.049, "step": 7910 }, { "epoch": 1.11, "learning_rate": 4.814921392476137e-05, "loss": 0.0706, "step": 7912 }, { "epoch": 1.11, "learning_rate": 4.814874602283362e-05, "loss": 0.0792, "step": 7914 }, { "epoch": 1.11, "learning_rate": 4.8148278120905857e-05, "loss": 0.0566, "step": 7916 }, { "epoch": 1.11, "learning_rate": 4.814781021897811e-05, "loss": 0.0589, "step": 7918 }, { "epoch": 1.11, "learning_rate": 4.814734231705035e-05, "loss": 0.064, "step": 7920 }, { "epoch": 1.11, "learning_rate": 4.8146874415122594e-05, "loss": 0.0704, "step": 7922 }, { "epoch": 1.11, "learning_rate": 4.8146406513194833e-05, "loss": 0.0591, "step": 7924 }, { "epoch": 1.11, "learning_rate": 4.8145938611267086e-05, "loss": 0.076, "step": 7926 }, { "epoch": 1.11, "learning_rate": 4.8145470709339325e-05, "loss": 0.0813, "step": 7928 }, { "epoch": 1.11, "learning_rate": 4.814500280741157e-05, "loss": 0.0558, "step": 7930 }, { "epoch": 1.11, "learning_rate": 4.814453490548381e-05, "loss": 0.0759, "step": 7932 }, { "epoch": 1.11, "learning_rate": 4.8144067003556056e-05, "loss": 0.0678, "step": 7934 }, { "epoch": 1.11, "learning_rate": 4.81435991016283e-05, "loss": 0.0739, "step": 7936 }, { "epoch": 1.11, "learning_rate": 4.814313119970055e-05, "loss": 0.0725, "step": 7938 }, { "epoch": 1.11, "learning_rate": 4.814266329777279e-05, "loss": 0.0775, "step": 7940 }, { "epoch": 1.11, "learning_rate": 4.814219539584503e-05, "loss": 0.0732, "step": 7942 }, { "epoch": 1.12, "learning_rate": 4.814172749391728e-05, "loss": 0.0624, "step": 7944 }, { "epoch": 1.12, "learning_rate": 4.8141259591989525e-05, "loss": 0.0711, "step": 7946 }, { "epoch": 1.12, "learning_rate": 4.8140791690061764e-05, "loss": 0.0646, "step": 7948 }, { "epoch": 1.12, "learning_rate": 4.814032378813401e-05, "loss": 0.0741, "step": 7950 }, { "epoch": 1.12, "learning_rate": 4.8139855886206256e-05, "loss": 0.059, "step": 7952 }, { "epoch": 1.12, "learning_rate": 4.81393879842785e-05, "loss": 0.0857, "step": 7954 }, { "epoch": 1.12, "learning_rate": 4.813892008235074e-05, "loss": 0.0581, "step": 7956 }, { "epoch": 1.12, "learning_rate": 4.813845218042299e-05, "loss": 0.0606, "step": 7958 }, { "epoch": 1.12, "learning_rate": 4.813798427849523e-05, "loss": 0.0702, "step": 7960 }, { "epoch": 1.12, "learning_rate": 4.813751637656748e-05, "loss": 0.0659, "step": 7962 }, { "epoch": 1.12, "learning_rate": 4.813704847463972e-05, "loss": 0.0766, "step": 7964 }, { "epoch": 1.12, "learning_rate": 4.8136580572711964e-05, "loss": 0.0675, "step": 7966 }, { "epoch": 1.12, "learning_rate": 4.81361126707842e-05, "loss": 0.0975, "step": 7968 }, { "epoch": 1.12, "learning_rate": 4.8135644768856455e-05, "loss": 0.0676, "step": 7970 }, { "epoch": 1.12, "learning_rate": 4.8135176866928695e-05, "loss": 0.0702, "step": 7972 }, { "epoch": 1.12, "learning_rate": 4.813470896500094e-05, "loss": 0.0593, "step": 7974 }, { "epoch": 1.12, "learning_rate": 4.813424106307318e-05, "loss": 0.0601, "step": 7976 }, { "epoch": 1.12, "learning_rate": 4.8133773161145426e-05, "loss": 0.067, "step": 7978 }, { "epoch": 1.12, "learning_rate": 4.813330525921767e-05, "loss": 0.0551, "step": 7980 }, { "epoch": 1.12, "learning_rate": 4.813283735728991e-05, "loss": 0.0796, "step": 7982 }, { "epoch": 1.12, "learning_rate": 4.8132369455362157e-05, "loss": 0.082, "step": 7984 }, { "epoch": 1.12, "learning_rate": 4.81319015534344e-05, "loss": 0.066, "step": 7986 }, { "epoch": 1.12, "learning_rate": 4.813143365150665e-05, "loss": 0.0799, "step": 7988 }, { "epoch": 1.12, "learning_rate": 4.813096574957889e-05, "loss": 0.071, "step": 7990 }, { "epoch": 1.12, "learning_rate": 4.8130497847651133e-05, "loss": 0.0782, "step": 7992 }, { "epoch": 1.12, "learning_rate": 4.813002994572338e-05, "loss": 0.0746, "step": 7994 }, { "epoch": 1.12, "learning_rate": 4.8129562043795625e-05, "loss": 0.088, "step": 7996 }, { "epoch": 1.12, "learning_rate": 4.8129094141867864e-05, "loss": 0.0548, "step": 7998 }, { "epoch": 1.12, "learning_rate": 4.812862623994011e-05, "loss": 0.0478, "step": 8000 }, { "epoch": 1.12, "eval_gen_len": 30.9483, "eval_loss": 1.0468480587005615, "eval_meteor": 0.0439, "eval_runtime": 14.548, "eval_samples_per_second": 3.987, "eval_steps_per_second": 0.55, "step": 8000 }, { "epoch": 1.12, "learning_rate": 4.812815833801235e-05, "loss": 0.05, "step": 8002 }, { "epoch": 1.12, "learning_rate": 4.81276904360846e-05, "loss": 0.0737, "step": 8004 }, { "epoch": 1.12, "learning_rate": 4.812722253415684e-05, "loss": 0.0783, "step": 8006 }, { "epoch": 1.12, "learning_rate": 4.812675463222909e-05, "loss": 0.0702, "step": 8008 }, { "epoch": 1.12, "learning_rate": 4.8126286730301326e-05, "loss": 0.0799, "step": 8010 }, { "epoch": 1.12, "learning_rate": 4.812581882837357e-05, "loss": 0.0586, "step": 8012 }, { "epoch": 1.12, "learning_rate": 4.812535092644582e-05, "loss": 0.0582, "step": 8014 }, { "epoch": 1.13, "learning_rate": 4.8124883024518064e-05, "loss": 0.0588, "step": 8016 }, { "epoch": 1.13, "learning_rate": 4.81244151225903e-05, "loss": 0.0802, "step": 8018 }, { "epoch": 1.13, "learning_rate": 4.812394722066255e-05, "loss": 0.0737, "step": 8020 }, { "epoch": 1.13, "learning_rate": 4.8123479318734795e-05, "loss": 0.0783, "step": 8022 }, { "epoch": 1.13, "learning_rate": 4.812301141680704e-05, "loss": 0.0774, "step": 8024 }, { "epoch": 1.13, "learning_rate": 4.812254351487928e-05, "loss": 0.0643, "step": 8026 }, { "epoch": 1.13, "learning_rate": 4.8122075612951526e-05, "loss": 0.0802, "step": 8028 }, { "epoch": 1.13, "learning_rate": 4.812160771102377e-05, "loss": 0.0561, "step": 8030 }, { "epoch": 1.13, "learning_rate": 4.812113980909602e-05, "loss": 0.0615, "step": 8032 }, { "epoch": 1.13, "learning_rate": 4.812067190716826e-05, "loss": 0.0594, "step": 8034 }, { "epoch": 1.13, "learning_rate": 4.81202040052405e-05, "loss": 0.0687, "step": 8036 }, { "epoch": 1.13, "learning_rate": 4.811973610331275e-05, "loss": 0.0634, "step": 8038 }, { "epoch": 1.13, "learning_rate": 4.8119268201384995e-05, "loss": 0.0599, "step": 8040 }, { "epoch": 1.13, "learning_rate": 4.8118800299457234e-05, "loss": 0.0585, "step": 8042 }, { "epoch": 1.13, "learning_rate": 4.811833239752948e-05, "loss": 0.0434, "step": 8044 }, { "epoch": 1.13, "learning_rate": 4.811786449560172e-05, "loss": 0.0796, "step": 8046 }, { "epoch": 1.13, "learning_rate": 4.811739659367397e-05, "loss": 0.0748, "step": 8048 }, { "epoch": 1.13, "learning_rate": 4.811692869174621e-05, "loss": 0.0529, "step": 8050 }, { "epoch": 1.13, "learning_rate": 4.8116460789818457e-05, "loss": 0.0687, "step": 8052 }, { "epoch": 1.13, "learning_rate": 4.8115992887890696e-05, "loss": 0.0586, "step": 8054 }, { "epoch": 1.13, "learning_rate": 4.811552498596295e-05, "loss": 0.0777, "step": 8056 }, { "epoch": 1.13, "learning_rate": 4.811505708403519e-05, "loss": 0.0652, "step": 8058 }, { "epoch": 1.13, "learning_rate": 4.8114589182107433e-05, "loss": 0.0725, "step": 8060 }, { "epoch": 1.13, "learning_rate": 4.811412128017967e-05, "loss": 0.0585, "step": 8062 }, { "epoch": 1.13, "learning_rate": 4.811365337825192e-05, "loss": 0.0635, "step": 8064 }, { "epoch": 1.13, "learning_rate": 4.8113185476324164e-05, "loss": 0.0741, "step": 8066 }, { "epoch": 1.13, "learning_rate": 4.811271757439641e-05, "loss": 0.0816, "step": 8068 }, { "epoch": 1.13, "learning_rate": 4.811224967246865e-05, "loss": 0.0608, "step": 8070 }, { "epoch": 1.13, "learning_rate": 4.8111781770540895e-05, "loss": 0.0733, "step": 8072 }, { "epoch": 1.13, "learning_rate": 4.811131386861314e-05, "loss": 0.0923, "step": 8074 }, { "epoch": 1.13, "learning_rate": 4.811084596668539e-05, "loss": 0.0607, "step": 8076 }, { "epoch": 1.13, "learning_rate": 4.8110378064757626e-05, "loss": 0.0649, "step": 8078 }, { "epoch": 1.13, "learning_rate": 4.810991016282987e-05, "loss": 0.0739, "step": 8080 }, { "epoch": 1.13, "learning_rate": 4.810944226090212e-05, "loss": 0.0613, "step": 8082 }, { "epoch": 1.13, "learning_rate": 4.8108974358974364e-05, "loss": 0.0989, "step": 8084 }, { "epoch": 1.14, "learning_rate": 4.81085064570466e-05, "loss": 0.0552, "step": 8086 }, { "epoch": 1.14, "learning_rate": 4.810803855511885e-05, "loss": 0.0609, "step": 8088 }, { "epoch": 1.14, "learning_rate": 4.8107570653191095e-05, "loss": 0.0698, "step": 8090 }, { "epoch": 1.14, "learning_rate": 4.810710275126334e-05, "loss": 0.0753, "step": 8092 }, { "epoch": 1.14, "learning_rate": 4.810663484933558e-05, "loss": 0.1152, "step": 8094 }, { "epoch": 1.14, "learning_rate": 4.8106166947407826e-05, "loss": 0.0786, "step": 8096 }, { "epoch": 1.14, "learning_rate": 4.8105699045480065e-05, "loss": 0.0664, "step": 8098 }, { "epoch": 1.14, "learning_rate": 4.810523114355232e-05, "loss": 0.0641, "step": 8100 }, { "epoch": 1.14, "learning_rate": 4.810476324162456e-05, "loss": 0.0837, "step": 8102 }, { "epoch": 1.14, "learning_rate": 4.81042953396968e-05, "loss": 0.0699, "step": 8104 }, { "epoch": 1.14, "learning_rate": 4.810382743776904e-05, "loss": 0.0884, "step": 8106 }, { "epoch": 1.14, "learning_rate": 4.8103359535841295e-05, "loss": 0.072, "step": 8108 }, { "epoch": 1.14, "learning_rate": 4.8102891633913534e-05, "loss": 0.0612, "step": 8110 }, { "epoch": 1.14, "learning_rate": 4.810242373198578e-05, "loss": 0.0544, "step": 8112 }, { "epoch": 1.14, "learning_rate": 4.810195583005802e-05, "loss": 0.0678, "step": 8114 }, { "epoch": 1.14, "learning_rate": 4.8101487928130265e-05, "loss": 0.07, "step": 8116 }, { "epoch": 1.14, "learning_rate": 4.810102002620251e-05, "loss": 0.0607, "step": 8118 }, { "epoch": 1.14, "learning_rate": 4.8100552124274757e-05, "loss": 0.0729, "step": 8120 }, { "epoch": 1.14, "learning_rate": 4.8100084222346996e-05, "loss": 0.0656, "step": 8122 }, { "epoch": 1.14, "learning_rate": 4.809961632041924e-05, "loss": 0.0468, "step": 8124 }, { "epoch": 1.14, "learning_rate": 4.809914841849149e-05, "loss": 0.0738, "step": 8126 }, { "epoch": 1.14, "learning_rate": 4.8098680516563733e-05, "loss": 0.0672, "step": 8128 }, { "epoch": 1.14, "learning_rate": 4.809821261463597e-05, "loss": 0.0643, "step": 8130 }, { "epoch": 1.14, "learning_rate": 4.809774471270822e-05, "loss": 0.0657, "step": 8132 }, { "epoch": 1.14, "learning_rate": 4.8097276810780464e-05, "loss": 0.0779, "step": 8134 }, { "epoch": 1.14, "learning_rate": 4.809680890885271e-05, "loss": 0.0668, "step": 8136 }, { "epoch": 1.14, "learning_rate": 4.809634100692495e-05, "loss": 0.067, "step": 8138 }, { "epoch": 1.14, "learning_rate": 4.8095873104997195e-05, "loss": 0.0589, "step": 8140 }, { "epoch": 1.14, "learning_rate": 4.809540520306944e-05, "loss": 0.0877, "step": 8142 }, { "epoch": 1.14, "learning_rate": 4.809493730114169e-05, "loss": 0.0608, "step": 8144 }, { "epoch": 1.14, "learning_rate": 4.8094469399213926e-05, "loss": 0.0513, "step": 8146 }, { "epoch": 1.14, "learning_rate": 4.809400149728617e-05, "loss": 0.0618, "step": 8148 }, { "epoch": 1.14, "learning_rate": 4.809353359535841e-05, "loss": 0.0819, "step": 8150 }, { "epoch": 1.14, "learning_rate": 4.8093065693430664e-05, "loss": 0.1021, "step": 8152 }, { "epoch": 1.14, "learning_rate": 4.80925977915029e-05, "loss": 0.0664, "step": 8154 }, { "epoch": 1.14, "learning_rate": 4.809212988957515e-05, "loss": 0.074, "step": 8156 }, { "epoch": 1.15, "learning_rate": 4.809166198764739e-05, "loss": 0.0618, "step": 8158 }, { "epoch": 1.15, "learning_rate": 4.8091194085719634e-05, "loss": 0.0829, "step": 8160 }, { "epoch": 1.15, "learning_rate": 4.809072618379188e-05, "loss": 0.0654, "step": 8162 }, { "epoch": 1.15, "learning_rate": 4.8090258281864126e-05, "loss": 0.0744, "step": 8164 }, { "epoch": 1.15, "learning_rate": 4.8089790379936365e-05, "loss": 0.083, "step": 8166 }, { "epoch": 1.15, "learning_rate": 4.808932247800861e-05, "loss": 0.0602, "step": 8168 }, { "epoch": 1.15, "learning_rate": 4.808885457608086e-05, "loss": 0.0708, "step": 8170 }, { "epoch": 1.15, "learning_rate": 4.80883866741531e-05, "loss": 0.0772, "step": 8172 }, { "epoch": 1.15, "learning_rate": 4.808791877222534e-05, "loss": 0.0678, "step": 8174 }, { "epoch": 1.15, "learning_rate": 4.808745087029759e-05, "loss": 0.0593, "step": 8176 }, { "epoch": 1.15, "learning_rate": 4.8086982968369834e-05, "loss": 0.0697, "step": 8178 }, { "epoch": 1.15, "learning_rate": 4.808651506644208e-05, "loss": 0.0583, "step": 8180 }, { "epoch": 1.15, "learning_rate": 4.808604716451432e-05, "loss": 0.1039, "step": 8182 }, { "epoch": 1.15, "learning_rate": 4.8085579262586565e-05, "loss": 0.0736, "step": 8184 }, { "epoch": 1.15, "learning_rate": 4.808511136065881e-05, "loss": 0.0623, "step": 8186 }, { "epoch": 1.15, "learning_rate": 4.8084643458731057e-05, "loss": 0.0537, "step": 8188 }, { "epoch": 1.15, "learning_rate": 4.8084175556803296e-05, "loss": 0.0584, "step": 8190 }, { "epoch": 1.15, "learning_rate": 4.808370765487554e-05, "loss": 0.0542, "step": 8192 }, { "epoch": 1.15, "learning_rate": 4.808323975294778e-05, "loss": 0.0765, "step": 8194 }, { "epoch": 1.15, "learning_rate": 4.8082771851020033e-05, "loss": 0.0796, "step": 8196 }, { "epoch": 1.15, "learning_rate": 4.808230394909227e-05, "loss": 0.0519, "step": 8198 }, { "epoch": 1.15, "learning_rate": 4.808183604716452e-05, "loss": 0.0654, "step": 8200 }, { "epoch": 1.15, "learning_rate": 4.808136814523676e-05, "loss": 0.0686, "step": 8202 }, { "epoch": 1.15, "learning_rate": 4.808090024330901e-05, "loss": 0.0627, "step": 8204 }, { "epoch": 1.15, "learning_rate": 4.808043234138125e-05, "loss": 0.0727, "step": 8206 }, { "epoch": 1.15, "learning_rate": 4.8079964439453495e-05, "loss": 0.0523, "step": 8208 }, { "epoch": 1.15, "learning_rate": 4.8079496537525735e-05, "loss": 0.0563, "step": 8210 }, { "epoch": 1.15, "learning_rate": 4.807902863559798e-05, "loss": 0.0895, "step": 8212 }, { "epoch": 1.15, "learning_rate": 4.8078560733670226e-05, "loss": 0.0592, "step": 8214 }, { "epoch": 1.15, "learning_rate": 4.807809283174247e-05, "loss": 0.0438, "step": 8216 }, { "epoch": 1.15, "learning_rate": 4.807762492981471e-05, "loss": 0.0556, "step": 8218 }, { "epoch": 1.15, "learning_rate": 4.807715702788696e-05, "loss": 0.063, "step": 8220 }, { "epoch": 1.15, "learning_rate": 4.80766891259592e-05, "loss": 0.0833, "step": 8222 }, { "epoch": 1.15, "learning_rate": 4.807622122403145e-05, "loss": 0.0723, "step": 8224 }, { "epoch": 1.15, "learning_rate": 4.807575332210369e-05, "loss": 0.051, "step": 8226 }, { "epoch": 1.15, "learning_rate": 4.8075285420175934e-05, "loss": 0.0813, "step": 8228 }, { "epoch": 1.16, "learning_rate": 4.807481751824818e-05, "loss": 0.0658, "step": 8230 }, { "epoch": 1.16, "learning_rate": 4.807434961632042e-05, "loss": 0.0587, "step": 8232 }, { "epoch": 1.16, "learning_rate": 4.8073881714392665e-05, "loss": 0.0742, "step": 8234 }, { "epoch": 1.16, "learning_rate": 4.8073413812464904e-05, "loss": 0.0613, "step": 8236 }, { "epoch": 1.16, "learning_rate": 4.807294591053716e-05, "loss": 0.0605, "step": 8238 }, { "epoch": 1.16, "learning_rate": 4.8072478008609396e-05, "loss": 0.0686, "step": 8240 }, { "epoch": 1.16, "learning_rate": 4.807201010668164e-05, "loss": 0.0761, "step": 8242 }, { "epoch": 1.16, "learning_rate": 4.807154220475388e-05, "loss": 0.0635, "step": 8244 }, { "epoch": 1.16, "learning_rate": 4.807107430282613e-05, "loss": 0.0915, "step": 8246 }, { "epoch": 1.16, "learning_rate": 4.807060640089837e-05, "loss": 0.0766, "step": 8248 }, { "epoch": 1.16, "learning_rate": 4.807013849897062e-05, "loss": 0.0665, "step": 8250 }, { "epoch": 1.16, "learning_rate": 4.806967059704286e-05, "loss": 0.0415, "step": 8252 }, { "epoch": 1.16, "learning_rate": 4.8069202695115104e-05, "loss": 0.0925, "step": 8254 }, { "epoch": 1.16, "learning_rate": 4.806873479318735e-05, "loss": 0.0664, "step": 8256 }, { "epoch": 1.16, "learning_rate": 4.8068266891259596e-05, "loss": 0.0654, "step": 8258 }, { "epoch": 1.16, "learning_rate": 4.8067798989331835e-05, "loss": 0.0634, "step": 8260 }, { "epoch": 1.16, "learning_rate": 4.806733108740408e-05, "loss": 0.0652, "step": 8262 }, { "epoch": 1.16, "learning_rate": 4.806686318547633e-05, "loss": 0.0618, "step": 8264 }, { "epoch": 1.16, "learning_rate": 4.806639528354857e-05, "loss": 0.0875, "step": 8266 }, { "epoch": 1.16, "learning_rate": 4.806592738162081e-05, "loss": 0.063, "step": 8268 }, { "epoch": 1.16, "learning_rate": 4.806545947969306e-05, "loss": 0.0749, "step": 8270 }, { "epoch": 1.16, "learning_rate": 4.8064991577765304e-05, "loss": 0.0755, "step": 8272 }, { "epoch": 1.16, "learning_rate": 4.806452367583755e-05, "loss": 0.0548, "step": 8274 }, { "epoch": 1.16, "learning_rate": 4.806405577390979e-05, "loss": 0.0591, "step": 8276 }, { "epoch": 1.16, "learning_rate": 4.8063587871982035e-05, "loss": 0.0816, "step": 8278 }, { "epoch": 1.16, "learning_rate": 4.8063119970054274e-05, "loss": 0.0765, "step": 8280 }, { "epoch": 1.16, "learning_rate": 4.8062652068126526e-05, "loss": 0.0692, "step": 8282 }, { "epoch": 1.16, "learning_rate": 4.8062184166198766e-05, "loss": 0.0758, "step": 8284 }, { "epoch": 1.16, "learning_rate": 4.806171626427101e-05, "loss": 0.0773, "step": 8286 }, { "epoch": 1.16, "learning_rate": 4.806124836234325e-05, "loss": 0.0591, "step": 8288 }, { "epoch": 1.16, "learning_rate": 4.80607804604155e-05, "loss": 0.0695, "step": 8290 }, { "epoch": 1.16, "learning_rate": 4.806031255848774e-05, "loss": 0.0794, "step": 8292 }, { "epoch": 1.16, "learning_rate": 4.805984465655999e-05, "loss": 0.0875, "step": 8294 }, { "epoch": 1.16, "learning_rate": 4.805937675463223e-05, "loss": 0.0798, "step": 8296 }, { "epoch": 1.16, "learning_rate": 4.805890885270447e-05, "loss": 0.0538, "step": 8298 }, { "epoch": 1.17, "learning_rate": 4.805844095077672e-05, "loss": 0.0534, "step": 8300 }, { "epoch": 1.17, "learning_rate": 4.8057973048848965e-05, "loss": 0.064, "step": 8302 }, { "epoch": 1.17, "learning_rate": 4.8057505146921204e-05, "loss": 0.0612, "step": 8304 }, { "epoch": 1.17, "learning_rate": 4.805703724499345e-05, "loss": 0.0589, "step": 8306 }, { "epoch": 1.17, "learning_rate": 4.8056569343065696e-05, "loss": 0.0721, "step": 8308 }, { "epoch": 1.17, "learning_rate": 4.805610144113794e-05, "loss": 0.0675, "step": 8310 }, { "epoch": 1.17, "learning_rate": 4.805563353921018e-05, "loss": 0.0714, "step": 8312 }, { "epoch": 1.17, "learning_rate": 4.805516563728243e-05, "loss": 0.069, "step": 8314 }, { "epoch": 1.17, "learning_rate": 4.805469773535467e-05, "loss": 0.09, "step": 8316 }, { "epoch": 1.17, "learning_rate": 4.805422983342692e-05, "loss": 0.0664, "step": 8318 }, { "epoch": 1.17, "learning_rate": 4.805376193149916e-05, "loss": 0.0713, "step": 8320 }, { "epoch": 1.17, "learning_rate": 4.8053294029571404e-05, "loss": 0.0632, "step": 8322 }, { "epoch": 1.17, "learning_rate": 4.805282612764364e-05, "loss": 0.0608, "step": 8324 }, { "epoch": 1.17, "learning_rate": 4.8052358225715896e-05, "loss": 0.0491, "step": 8326 }, { "epoch": 1.17, "learning_rate": 4.8051890323788135e-05, "loss": 0.0648, "step": 8328 }, { "epoch": 1.17, "learning_rate": 4.805142242186038e-05, "loss": 0.0589, "step": 8330 }, { "epoch": 1.17, "learning_rate": 4.805095451993262e-05, "loss": 0.0694, "step": 8332 }, { "epoch": 1.17, "learning_rate": 4.805048661800487e-05, "loss": 0.0937, "step": 8334 }, { "epoch": 1.17, "learning_rate": 4.805001871607711e-05, "loss": 0.0812, "step": 8336 }, { "epoch": 1.17, "learning_rate": 4.804955081414936e-05, "loss": 0.0723, "step": 8338 }, { "epoch": 1.17, "learning_rate": 4.80490829122216e-05, "loss": 0.0675, "step": 8340 }, { "epoch": 1.17, "learning_rate": 4.804861501029384e-05, "loss": 0.0632, "step": 8342 }, { "epoch": 1.17, "learning_rate": 4.804814710836609e-05, "loss": 0.0716, "step": 8344 }, { "epoch": 1.17, "learning_rate": 4.8047679206438335e-05, "loss": 0.0603, "step": 8346 }, { "epoch": 1.17, "learning_rate": 4.8047211304510574e-05, "loss": 0.0534, "step": 8348 }, { "epoch": 1.17, "learning_rate": 4.804674340258282e-05, "loss": 0.0696, "step": 8350 }, { "epoch": 1.17, "learning_rate": 4.8046275500655066e-05, "loss": 0.058, "step": 8352 }, { "epoch": 1.17, "learning_rate": 4.804580759872731e-05, "loss": 0.0739, "step": 8354 }, { "epoch": 1.17, "learning_rate": 4.804533969679955e-05, "loss": 0.0697, "step": 8356 }, { "epoch": 1.17, "learning_rate": 4.8044871794871796e-05, "loss": 0.0582, "step": 8358 }, { "epoch": 1.17, "learning_rate": 4.804440389294404e-05, "loss": 0.0533, "step": 8360 }, { "epoch": 1.17, "learning_rate": 4.804393599101629e-05, "loss": 0.086, "step": 8362 }, { "epoch": 1.17, "learning_rate": 4.804346808908853e-05, "loss": 0.0734, "step": 8364 }, { "epoch": 1.17, "learning_rate": 4.804300018716077e-05, "loss": 0.0668, "step": 8366 }, { "epoch": 1.17, "learning_rate": 4.804253228523302e-05, "loss": 0.0731, "step": 8368 }, { "epoch": 1.17, "learning_rate": 4.8042064383305265e-05, "loss": 0.0651, "step": 8370 }, { "epoch": 1.18, "learning_rate": 4.8041596481377504e-05, "loss": 0.0637, "step": 8372 }, { "epoch": 1.18, "learning_rate": 4.804112857944975e-05, "loss": 0.0726, "step": 8374 }, { "epoch": 1.18, "learning_rate": 4.804066067752199e-05, "loss": 0.0624, "step": 8376 }, { "epoch": 1.18, "learning_rate": 4.804019277559424e-05, "loss": 0.0543, "step": 8378 }, { "epoch": 1.18, "learning_rate": 4.803972487366648e-05, "loss": 0.0877, "step": 8380 }, { "epoch": 1.18, "learning_rate": 4.803925697173873e-05, "loss": 0.0859, "step": 8382 }, { "epoch": 1.18, "learning_rate": 4.8038789069810966e-05, "loss": 0.0646, "step": 8384 }, { "epoch": 1.18, "learning_rate": 4.803832116788322e-05, "loss": 0.0769, "step": 8386 }, { "epoch": 1.18, "learning_rate": 4.803785326595546e-05, "loss": 0.0606, "step": 8388 }, { "epoch": 1.18, "learning_rate": 4.8037385364027704e-05, "loss": 0.0608, "step": 8390 }, { "epoch": 1.18, "learning_rate": 4.803691746209994e-05, "loss": 0.0569, "step": 8392 }, { "epoch": 1.18, "learning_rate": 4.803644956017219e-05, "loss": 0.0498, "step": 8394 }, { "epoch": 1.18, "learning_rate": 4.8035981658244435e-05, "loss": 0.0683, "step": 8396 }, { "epoch": 1.18, "learning_rate": 4.803551375631668e-05, "loss": 0.0722, "step": 8398 }, { "epoch": 1.18, "learning_rate": 4.803504585438892e-05, "loss": 0.0843, "step": 8400 }, { "epoch": 1.18, "learning_rate": 4.8034577952461166e-05, "loss": 0.096, "step": 8402 }, { "epoch": 1.18, "learning_rate": 4.803411005053341e-05, "loss": 0.072, "step": 8404 }, { "epoch": 1.18, "learning_rate": 4.803364214860566e-05, "loss": 0.0681, "step": 8406 }, { "epoch": 1.18, "learning_rate": 4.80331742466779e-05, "loss": 0.0835, "step": 8408 }, { "epoch": 1.18, "learning_rate": 4.803270634475014e-05, "loss": 0.0779, "step": 8410 }, { "epoch": 1.18, "learning_rate": 4.803223844282239e-05, "loss": 0.087, "step": 8412 }, { "epoch": 1.18, "learning_rate": 4.8031770540894635e-05, "loss": 0.0525, "step": 8414 }, { "epoch": 1.18, "learning_rate": 4.8031302638966874e-05, "loss": 0.0683, "step": 8416 }, { "epoch": 1.18, "learning_rate": 4.803083473703912e-05, "loss": 0.0614, "step": 8418 }, { "epoch": 1.18, "learning_rate": 4.8030366835111366e-05, "loss": 0.0683, "step": 8420 }, { "epoch": 1.18, "learning_rate": 4.802989893318361e-05, "loss": 0.0632, "step": 8422 }, { "epoch": 1.18, "learning_rate": 4.802943103125585e-05, "loss": 0.0556, "step": 8424 }, { "epoch": 1.18, "learning_rate": 4.8028963129328097e-05, "loss": 0.0691, "step": 8426 }, { "epoch": 1.18, "learning_rate": 4.8028495227400336e-05, "loss": 0.0858, "step": 8428 }, { "epoch": 1.18, "learning_rate": 4.802802732547259e-05, "loss": 0.0655, "step": 8430 }, { "epoch": 1.18, "learning_rate": 4.802755942354483e-05, "loss": 0.0626, "step": 8432 }, { "epoch": 1.18, "learning_rate": 4.802709152161707e-05, "loss": 0.0795, "step": 8434 }, { "epoch": 1.18, "learning_rate": 4.802662361968931e-05, "loss": 0.0672, "step": 8436 }, { "epoch": 1.18, "learning_rate": 4.802615571776156e-05, "loss": 0.0803, "step": 8438 }, { "epoch": 1.18, "learning_rate": 4.8025687815833804e-05, "loss": 0.0785, "step": 8440 }, { "epoch": 1.19, "learning_rate": 4.802521991390605e-05, "loss": 0.0689, "step": 8442 }, { "epoch": 1.19, "learning_rate": 4.802475201197829e-05, "loss": 0.0563, "step": 8444 }, { "epoch": 1.19, "learning_rate": 4.8024284110050535e-05, "loss": 0.0754, "step": 8446 }, { "epoch": 1.19, "learning_rate": 4.802381620812278e-05, "loss": 0.0804, "step": 8448 }, { "epoch": 1.19, "learning_rate": 4.802334830619503e-05, "loss": 0.0577, "step": 8450 }, { "epoch": 1.19, "learning_rate": 4.8022880404267266e-05, "loss": 0.0665, "step": 8452 }, { "epoch": 1.19, "learning_rate": 4.802241250233951e-05, "loss": 0.062, "step": 8454 }, { "epoch": 1.19, "learning_rate": 4.802194460041176e-05, "loss": 0.0794, "step": 8456 }, { "epoch": 1.19, "learning_rate": 4.8021476698484004e-05, "loss": 0.0532, "step": 8458 }, { "epoch": 1.19, "learning_rate": 4.802100879655624e-05, "loss": 0.0724, "step": 8460 }, { "epoch": 1.19, "learning_rate": 4.802054089462849e-05, "loss": 0.0757, "step": 8462 }, { "epoch": 1.19, "learning_rate": 4.8020072992700735e-05, "loss": 0.0747, "step": 8464 }, { "epoch": 1.19, "learning_rate": 4.801960509077298e-05, "loss": 0.0794, "step": 8466 }, { "epoch": 1.19, "learning_rate": 4.801913718884522e-05, "loss": 0.0628, "step": 8468 }, { "epoch": 1.19, "learning_rate": 4.8018669286917466e-05, "loss": 0.0595, "step": 8470 }, { "epoch": 1.19, "learning_rate": 4.8018201384989705e-05, "loss": 0.0721, "step": 8472 }, { "epoch": 1.19, "learning_rate": 4.801773348306196e-05, "loss": 0.0644, "step": 8474 }, { "epoch": 1.19, "learning_rate": 4.80172655811342e-05, "loss": 0.0698, "step": 8476 }, { "epoch": 1.19, "learning_rate": 4.801679767920644e-05, "loss": 0.0772, "step": 8478 }, { "epoch": 1.19, "learning_rate": 4.801632977727868e-05, "loss": 0.0635, "step": 8480 }, { "epoch": 1.19, "learning_rate": 4.8015861875350935e-05, "loss": 0.0813, "step": 8482 }, { "epoch": 1.19, "learning_rate": 4.8015393973423174e-05, "loss": 0.0753, "step": 8484 }, { "epoch": 1.19, "learning_rate": 4.801492607149541e-05, "loss": 0.0805, "step": 8486 }, { "epoch": 1.19, "learning_rate": 4.801445816956766e-05, "loss": 0.0754, "step": 8488 }, { "epoch": 1.19, "learning_rate": 4.8013990267639905e-05, "loss": 0.0578, "step": 8490 }, { "epoch": 1.19, "learning_rate": 4.801352236571215e-05, "loss": 0.0782, "step": 8492 }, { "epoch": 1.19, "learning_rate": 4.801305446378439e-05, "loss": 0.0611, "step": 8494 }, { "epoch": 1.19, "learning_rate": 4.8012586561856636e-05, "loss": 0.0837, "step": 8496 }, { "epoch": 1.19, "learning_rate": 4.801211865992888e-05, "loss": 0.0929, "step": 8498 }, { "epoch": 1.19, "learning_rate": 4.801165075800113e-05, "loss": 0.0574, "step": 8500 }, { "epoch": 1.19, "learning_rate": 4.801118285607337e-05, "loss": 0.0785, "step": 8502 }, { "epoch": 1.19, "learning_rate": 4.801071495414561e-05, "loss": 0.0776, "step": 8504 }, { "epoch": 1.19, "learning_rate": 4.801024705221785e-05, "loss": 0.0673, "step": 8506 }, { "epoch": 1.19, "learning_rate": 4.8009779150290104e-05, "loss": 0.0857, "step": 8508 }, { "epoch": 1.19, "learning_rate": 4.8009311248362343e-05, "loss": 0.0825, "step": 8510 }, { "epoch": 1.19, "learning_rate": 4.800884334643459e-05, "loss": 0.0576, "step": 8512 }, { "epoch": 1.2, "learning_rate": 4.800837544450683e-05, "loss": 0.0612, "step": 8514 }, { "epoch": 1.2, "learning_rate": 4.800790754257908e-05, "loss": 0.0561, "step": 8516 }, { "epoch": 1.2, "learning_rate": 4.800743964065132e-05, "loss": 0.0654, "step": 8518 }, { "epoch": 1.2, "learning_rate": 4.8006971738723566e-05, "loss": 0.0605, "step": 8520 }, { "epoch": 1.2, "learning_rate": 4.8006503836795805e-05, "loss": 0.0577, "step": 8522 }, { "epoch": 1.2, "learning_rate": 4.800603593486805e-05, "loss": 0.0814, "step": 8524 }, { "epoch": 1.2, "learning_rate": 4.80055680329403e-05, "loss": 0.0724, "step": 8526 }, { "epoch": 1.2, "learning_rate": 4.800510013101254e-05, "loss": 0.0654, "step": 8528 }, { "epoch": 1.2, "learning_rate": 4.800463222908478e-05, "loss": 0.0666, "step": 8530 }, { "epoch": 1.2, "learning_rate": 4.800416432715703e-05, "loss": 0.0558, "step": 8532 }, { "epoch": 1.2, "learning_rate": 4.8003696425229274e-05, "loss": 0.0732, "step": 8534 }, { "epoch": 1.2, "learning_rate": 4.800322852330152e-05, "loss": 0.0567, "step": 8536 }, { "epoch": 1.2, "learning_rate": 4.800276062137376e-05, "loss": 0.0474, "step": 8538 }, { "epoch": 1.2, "learning_rate": 4.8002292719446005e-05, "loss": 0.0519, "step": 8540 }, { "epoch": 1.2, "learning_rate": 4.800182481751825e-05, "loss": 0.0582, "step": 8542 }, { "epoch": 1.2, "learning_rate": 4.80013569155905e-05, "loss": 0.0762, "step": 8544 }, { "epoch": 1.2, "learning_rate": 4.8000889013662736e-05, "loss": 0.0652, "step": 8546 }, { "epoch": 1.2, "learning_rate": 4.800042111173498e-05, "loss": 0.0581, "step": 8548 }, { "epoch": 1.2, "learning_rate": 4.799995320980723e-05, "loss": 0.0561, "step": 8550 }, { "epoch": 1.2, "learning_rate": 4.7999485307879474e-05, "loss": 0.0744, "step": 8552 }, { "epoch": 1.2, "learning_rate": 4.799901740595171e-05, "loss": 0.0756, "step": 8554 }, { "epoch": 1.2, "learning_rate": 4.799854950402396e-05, "loss": 0.0574, "step": 8556 }, { "epoch": 1.2, "learning_rate": 4.79980816020962e-05, "loss": 0.0673, "step": 8558 }, { "epoch": 1.2, "learning_rate": 4.799761370016845e-05, "loss": 0.112, "step": 8560 }, { "epoch": 1.2, "learning_rate": 4.799714579824069e-05, "loss": 0.0745, "step": 8562 }, { "epoch": 1.2, "learning_rate": 4.7996677896312936e-05, "loss": 0.0649, "step": 8564 }, { "epoch": 1.2, "learning_rate": 4.7996209994385175e-05, "loss": 0.0651, "step": 8566 }, { "epoch": 1.2, "learning_rate": 4.799574209245743e-05, "loss": 0.0765, "step": 8568 }, { "epoch": 1.2, "learning_rate": 4.799527419052967e-05, "loss": 0.0595, "step": 8570 }, { "epoch": 1.2, "learning_rate": 4.799480628860191e-05, "loss": 0.0711, "step": 8572 }, { "epoch": 1.2, "learning_rate": 4.799433838667415e-05, "loss": 0.0622, "step": 8574 }, { "epoch": 1.2, "learning_rate": 4.79938704847464e-05, "loss": 0.0687, "step": 8576 }, { "epoch": 1.2, "learning_rate": 4.7993402582818644e-05, "loss": 0.0675, "step": 8578 }, { "epoch": 1.2, "learning_rate": 4.799293468089089e-05, "loss": 0.0708, "step": 8580 }, { "epoch": 1.2, "learning_rate": 4.799246677896313e-05, "loss": 0.0816, "step": 8582 }, { "epoch": 1.2, "learning_rate": 4.7991998877035374e-05, "loss": 0.0767, "step": 8584 }, { "epoch": 1.21, "learning_rate": 4.799153097510762e-05, "loss": 0.0521, "step": 8586 }, { "epoch": 1.21, "learning_rate": 4.7991063073179866e-05, "loss": 0.0607, "step": 8588 }, { "epoch": 1.21, "learning_rate": 4.7990595171252105e-05, "loss": 0.0895, "step": 8590 }, { "epoch": 1.21, "learning_rate": 4.799012726932435e-05, "loss": 0.0655, "step": 8592 }, { "epoch": 1.21, "learning_rate": 4.79896593673966e-05, "loss": 0.068, "step": 8594 }, { "epoch": 1.21, "learning_rate": 4.798919146546884e-05, "loss": 0.0752, "step": 8596 }, { "epoch": 1.21, "learning_rate": 4.798872356354108e-05, "loss": 0.0816, "step": 8598 }, { "epoch": 1.21, "learning_rate": 4.798825566161333e-05, "loss": 0.0827, "step": 8600 }, { "epoch": 1.21, "learning_rate": 4.798778775968557e-05, "loss": 0.0787, "step": 8602 }, { "epoch": 1.21, "learning_rate": 4.798731985775782e-05, "loss": 0.0548, "step": 8604 }, { "epoch": 1.21, "learning_rate": 4.798685195583006e-05, "loss": 0.0545, "step": 8606 }, { "epoch": 1.21, "learning_rate": 4.7986384053902305e-05, "loss": 0.073, "step": 8608 }, { "epoch": 1.21, "learning_rate": 4.7985916151974544e-05, "loss": 0.0662, "step": 8610 }, { "epoch": 1.21, "learning_rate": 4.79854482500468e-05, "loss": 0.1088, "step": 8612 }, { "epoch": 1.21, "learning_rate": 4.7984980348119036e-05, "loss": 0.0798, "step": 8614 }, { "epoch": 1.21, "learning_rate": 4.798451244619128e-05, "loss": 0.0667, "step": 8616 }, { "epoch": 1.21, "learning_rate": 4.798404454426352e-05, "loss": 0.0683, "step": 8618 }, { "epoch": 1.21, "learning_rate": 4.798357664233577e-05, "loss": 0.0882, "step": 8620 }, { "epoch": 1.21, "learning_rate": 4.798310874040801e-05, "loss": 0.0718, "step": 8622 }, { "epoch": 1.21, "learning_rate": 4.798264083848026e-05, "loss": 0.0746, "step": 8624 }, { "epoch": 1.21, "learning_rate": 4.79821729365525e-05, "loss": 0.0832, "step": 8626 }, { "epoch": 1.21, "learning_rate": 4.7981705034624744e-05, "loss": 0.0532, "step": 8628 }, { "epoch": 1.21, "learning_rate": 4.798123713269699e-05, "loss": 0.0666, "step": 8630 }, { "epoch": 1.21, "learning_rate": 4.7980769230769236e-05, "loss": 0.0945, "step": 8632 }, { "epoch": 1.21, "learning_rate": 4.7980301328841475e-05, "loss": 0.0786, "step": 8634 }, { "epoch": 1.21, "learning_rate": 4.797983342691372e-05, "loss": 0.0674, "step": 8636 }, { "epoch": 1.21, "learning_rate": 4.797936552498597e-05, "loss": 0.0609, "step": 8638 }, { "epoch": 1.21, "learning_rate": 4.797889762305821e-05, "loss": 0.072, "step": 8640 }, { "epoch": 1.21, "learning_rate": 4.797842972113045e-05, "loss": 0.0711, "step": 8642 }, { "epoch": 1.21, "learning_rate": 4.79779618192027e-05, "loss": 0.0527, "step": 8644 }, { "epoch": 1.21, "learning_rate": 4.7977493917274944e-05, "loss": 0.0668, "step": 8646 }, { "epoch": 1.21, "learning_rate": 4.797702601534719e-05, "loss": 0.0771, "step": 8648 }, { "epoch": 1.21, "learning_rate": 4.797655811341943e-05, "loss": 0.0657, "step": 8650 }, { "epoch": 1.21, "learning_rate": 4.7976090211491674e-05, "loss": 0.0763, "step": 8652 }, { "epoch": 1.21, "learning_rate": 4.7975622309563914e-05, "loss": 0.0674, "step": 8654 }, { "epoch": 1.22, "learning_rate": 4.7975154407636166e-05, "loss": 0.0775, "step": 8656 }, { "epoch": 1.22, "learning_rate": 4.7974686505708405e-05, "loss": 0.0635, "step": 8658 }, { "epoch": 1.22, "learning_rate": 4.797421860378065e-05, "loss": 0.0639, "step": 8660 }, { "epoch": 1.22, "learning_rate": 4.797375070185289e-05, "loss": 0.0586, "step": 8662 }, { "epoch": 1.22, "learning_rate": 4.797328279992514e-05, "loss": 0.0707, "step": 8664 }, { "epoch": 1.22, "learning_rate": 4.797281489799738e-05, "loss": 0.0516, "step": 8666 }, { "epoch": 1.22, "learning_rate": 4.797234699606963e-05, "loss": 0.0704, "step": 8668 }, { "epoch": 1.22, "learning_rate": 4.797187909414187e-05, "loss": 0.0894, "step": 8670 }, { "epoch": 1.22, "learning_rate": 4.797141119221411e-05, "loss": 0.0601, "step": 8672 }, { "epoch": 1.22, "learning_rate": 4.797094329028636e-05, "loss": 0.0699, "step": 8674 }, { "epoch": 1.22, "learning_rate": 4.7970475388358605e-05, "loss": 0.0617, "step": 8676 }, { "epoch": 1.22, "learning_rate": 4.7970007486430844e-05, "loss": 0.0697, "step": 8678 }, { "epoch": 1.22, "learning_rate": 4.796953958450309e-05, "loss": 0.0829, "step": 8680 }, { "epoch": 1.22, "learning_rate": 4.7969071682575336e-05, "loss": 0.0682, "step": 8682 }, { "epoch": 1.22, "learning_rate": 4.796860378064758e-05, "loss": 0.0778, "step": 8684 }, { "epoch": 1.22, "learning_rate": 4.796813587871982e-05, "loss": 0.0773, "step": 8686 }, { "epoch": 1.22, "learning_rate": 4.796766797679207e-05, "loss": 0.0738, "step": 8688 }, { "epoch": 1.22, "learning_rate": 4.796720007486431e-05, "loss": 0.0564, "step": 8690 }, { "epoch": 1.22, "learning_rate": 4.796673217293656e-05, "loss": 0.0794, "step": 8692 }, { "epoch": 1.22, "learning_rate": 4.79662642710088e-05, "loss": 0.0731, "step": 8694 }, { "epoch": 1.22, "learning_rate": 4.7965796369081044e-05, "loss": 0.0756, "step": 8696 }, { "epoch": 1.22, "learning_rate": 4.796532846715329e-05, "loss": 0.0585, "step": 8698 }, { "epoch": 1.22, "learning_rate": 4.7964860565225536e-05, "loss": 0.065, "step": 8700 }, { "epoch": 1.22, "learning_rate": 4.7964392663297775e-05, "loss": 0.0476, "step": 8702 }, { "epoch": 1.22, "learning_rate": 4.796392476137002e-05, "loss": 0.0625, "step": 8704 }, { "epoch": 1.22, "learning_rate": 4.796345685944226e-05, "loss": 0.0746, "step": 8706 }, { "epoch": 1.22, "learning_rate": 4.796298895751451e-05, "loss": 0.0774, "step": 8708 }, { "epoch": 1.22, "learning_rate": 4.796252105558675e-05, "loss": 0.0473, "step": 8710 }, { "epoch": 1.22, "learning_rate": 4.7962053153659e-05, "loss": 0.0834, "step": 8712 }, { "epoch": 1.22, "learning_rate": 4.796158525173124e-05, "loss": 0.0597, "step": 8714 }, { "epoch": 1.22, "learning_rate": 4.796111734980348e-05, "loss": 0.0669, "step": 8716 }, { "epoch": 1.22, "learning_rate": 4.796064944787573e-05, "loss": 0.0567, "step": 8718 }, { "epoch": 1.22, "learning_rate": 4.7960181545947974e-05, "loss": 0.0691, "step": 8720 }, { "epoch": 1.22, "learning_rate": 4.7959713644020214e-05, "loss": 0.0713, "step": 8722 }, { "epoch": 1.22, "learning_rate": 4.795924574209246e-05, "loss": 0.0685, "step": 8724 }, { "epoch": 1.22, "learning_rate": 4.7958777840164705e-05, "loss": 0.0747, "step": 8726 }, { "epoch": 1.23, "learning_rate": 4.795830993823695e-05, "loss": 0.0593, "step": 8728 }, { "epoch": 1.23, "learning_rate": 4.795784203630919e-05, "loss": 0.0783, "step": 8730 }, { "epoch": 1.23, "learning_rate": 4.7957374134381436e-05, "loss": 0.0683, "step": 8732 }, { "epoch": 1.23, "learning_rate": 4.795690623245368e-05, "loss": 0.0755, "step": 8734 }, { "epoch": 1.23, "learning_rate": 4.795643833052593e-05, "loss": 0.0863, "step": 8736 }, { "epoch": 1.23, "learning_rate": 4.795597042859817e-05, "loss": 0.0483, "step": 8738 }, { "epoch": 1.23, "learning_rate": 4.7955502526670407e-05, "loss": 0.0703, "step": 8740 }, { "epoch": 1.23, "learning_rate": 4.795503462474266e-05, "loss": 0.0563, "step": 8742 }, { "epoch": 1.23, "learning_rate": 4.79545667228149e-05, "loss": 0.0479, "step": 8744 }, { "epoch": 1.23, "learning_rate": 4.7954098820887144e-05, "loss": 0.0641, "step": 8746 }, { "epoch": 1.23, "learning_rate": 4.795363091895938e-05, "loss": 0.0904, "step": 8748 }, { "epoch": 1.23, "learning_rate": 4.795316301703163e-05, "loss": 0.089, "step": 8750 }, { "epoch": 1.23, "learning_rate": 4.7952695115103875e-05, "loss": 0.0574, "step": 8752 }, { "epoch": 1.23, "learning_rate": 4.795222721317612e-05, "loss": 0.0807, "step": 8754 }, { "epoch": 1.23, "learning_rate": 4.795175931124836e-05, "loss": 0.0497, "step": 8756 }, { "epoch": 1.23, "learning_rate": 4.7951291409320606e-05, "loss": 0.1108, "step": 8758 }, { "epoch": 1.23, "learning_rate": 4.795082350739285e-05, "loss": 0.0649, "step": 8760 }, { "epoch": 1.23, "learning_rate": 4.79503556054651e-05, "loss": 0.0619, "step": 8762 }, { "epoch": 1.23, "learning_rate": 4.794988770353734e-05, "loss": 0.0562, "step": 8764 }, { "epoch": 1.23, "learning_rate": 4.794941980160958e-05, "loss": 0.0662, "step": 8766 }, { "epoch": 1.23, "learning_rate": 4.794895189968183e-05, "loss": 0.0562, "step": 8768 }, { "epoch": 1.23, "learning_rate": 4.7948483997754075e-05, "loss": 0.08, "step": 8770 }, { "epoch": 1.23, "learning_rate": 4.7948016095826314e-05, "loss": 0.0883, "step": 8772 }, { "epoch": 1.23, "learning_rate": 4.794754819389856e-05, "loss": 0.0709, "step": 8774 }, { "epoch": 1.23, "learning_rate": 4.7947080291970806e-05, "loss": 0.0658, "step": 8776 }, { "epoch": 1.23, "learning_rate": 4.794661239004305e-05, "loss": 0.0704, "step": 8778 }, { "epoch": 1.23, "learning_rate": 4.794614448811529e-05, "loss": 0.0966, "step": 8780 }, { "epoch": 1.23, "learning_rate": 4.794567658618754e-05, "loss": 0.081, "step": 8782 }, { "epoch": 1.23, "learning_rate": 4.7945208684259776e-05, "loss": 0.0678, "step": 8784 }, { "epoch": 1.23, "learning_rate": 4.794474078233203e-05, "loss": 0.074, "step": 8786 }, { "epoch": 1.23, "learning_rate": 4.794427288040427e-05, "loss": 0.0727, "step": 8788 }, { "epoch": 1.23, "learning_rate": 4.7943804978476514e-05, "loss": 0.0591, "step": 8790 }, { "epoch": 1.23, "learning_rate": 4.794333707654875e-05, "loss": 0.095, "step": 8792 }, { "epoch": 1.23, "learning_rate": 4.7942869174621005e-05, "loss": 0.064, "step": 8794 }, { "epoch": 1.23, "learning_rate": 4.7942401272693245e-05, "loss": 0.0724, "step": 8796 }, { "epoch": 1.23, "learning_rate": 4.794193337076549e-05, "loss": 0.0756, "step": 8798 }, { "epoch": 1.24, "learning_rate": 4.794146546883773e-05, "loss": 0.0755, "step": 8800 }, { "epoch": 1.24, "learning_rate": 4.7940997566909976e-05, "loss": 0.08, "step": 8802 }, { "epoch": 1.24, "learning_rate": 4.794052966498222e-05, "loss": 0.074, "step": 8804 }, { "epoch": 1.24, "learning_rate": 4.794006176305447e-05, "loss": 0.0783, "step": 8806 }, { "epoch": 1.24, "learning_rate": 4.7939593861126707e-05, "loss": 0.06, "step": 8808 }, { "epoch": 1.24, "learning_rate": 4.793912595919895e-05, "loss": 0.0711, "step": 8810 }, { "epoch": 1.24, "learning_rate": 4.79386580572712e-05, "loss": 0.0625, "step": 8812 }, { "epoch": 1.24, "learning_rate": 4.7938190155343444e-05, "loss": 0.0793, "step": 8814 }, { "epoch": 1.24, "learning_rate": 4.7937722253415683e-05, "loss": 0.0832, "step": 8816 }, { "epoch": 1.24, "learning_rate": 4.793725435148793e-05, "loss": 0.0637, "step": 8818 }, { "epoch": 1.24, "learning_rate": 4.7936786449560175e-05, "loss": 0.0923, "step": 8820 }, { "epoch": 1.24, "learning_rate": 4.793631854763242e-05, "loss": 0.0939, "step": 8822 }, { "epoch": 1.24, "learning_rate": 4.793585064570466e-05, "loss": 0.0704, "step": 8824 }, { "epoch": 1.24, "learning_rate": 4.7935382743776906e-05, "loss": 0.0715, "step": 8826 }, { "epoch": 1.24, "learning_rate": 4.793491484184915e-05, "loss": 0.0708, "step": 8828 }, { "epoch": 1.24, "learning_rate": 4.79344469399214e-05, "loss": 0.0725, "step": 8830 }, { "epoch": 1.24, "learning_rate": 4.793397903799364e-05, "loss": 0.0567, "step": 8832 }, { "epoch": 1.24, "learning_rate": 4.793351113606588e-05, "loss": 0.087, "step": 8834 }, { "epoch": 1.24, "learning_rate": 4.793304323413812e-05, "loss": 0.074, "step": 8836 }, { "epoch": 1.24, "learning_rate": 4.7932575332210375e-05, "loss": 0.0677, "step": 8838 }, { "epoch": 1.24, "learning_rate": 4.7932107430282614e-05, "loss": 0.0722, "step": 8840 }, { "epoch": 1.24, "learning_rate": 4.793163952835486e-05, "loss": 0.0724, "step": 8842 }, { "epoch": 1.24, "learning_rate": 4.79311716264271e-05, "loss": 0.0814, "step": 8844 }, { "epoch": 1.24, "learning_rate": 4.793070372449935e-05, "loss": 0.0862, "step": 8846 }, { "epoch": 1.24, "learning_rate": 4.793023582257159e-05, "loss": 0.0586, "step": 8848 }, { "epoch": 1.24, "learning_rate": 4.792976792064384e-05, "loss": 0.0689, "step": 8850 }, { "epoch": 1.24, "learning_rate": 4.7929300018716076e-05, "loss": 0.0714, "step": 8852 }, { "epoch": 1.24, "learning_rate": 4.792883211678832e-05, "loss": 0.0742, "step": 8854 }, { "epoch": 1.24, "learning_rate": 4.792836421486057e-05, "loss": 0.0689, "step": 8856 }, { "epoch": 1.24, "learning_rate": 4.7927896312932814e-05, "loss": 0.0753, "step": 8858 }, { "epoch": 1.24, "learning_rate": 4.792742841100505e-05, "loss": 0.0691, "step": 8860 }, { "epoch": 1.24, "learning_rate": 4.79269605090773e-05, "loss": 0.065, "step": 8862 }, { "epoch": 1.24, "learning_rate": 4.7926492607149545e-05, "loss": 0.0636, "step": 8864 }, { "epoch": 1.24, "learning_rate": 4.792602470522179e-05, "loss": 0.0698, "step": 8866 }, { "epoch": 1.24, "learning_rate": 4.792555680329403e-05, "loss": 0.0668, "step": 8868 }, { "epoch": 1.25, "learning_rate": 4.7925088901366276e-05, "loss": 0.0519, "step": 8870 }, { "epoch": 1.25, "learning_rate": 4.792462099943852e-05, "loss": 0.0669, "step": 8872 }, { "epoch": 1.25, "learning_rate": 4.792415309751077e-05, "loss": 0.0521, "step": 8874 }, { "epoch": 1.25, "learning_rate": 4.7923685195583007e-05, "loss": 0.0697, "step": 8876 }, { "epoch": 1.25, "learning_rate": 4.792321729365525e-05, "loss": 0.0792, "step": 8878 }, { "epoch": 1.25, "learning_rate": 4.79227493917275e-05, "loss": 0.1011, "step": 8880 }, { "epoch": 1.25, "learning_rate": 4.7922281489799744e-05, "loss": 0.078, "step": 8882 }, { "epoch": 1.25, "learning_rate": 4.7921813587871983e-05, "loss": 0.0777, "step": 8884 }, { "epoch": 1.25, "learning_rate": 4.792134568594423e-05, "loss": 0.0759, "step": 8886 }, { "epoch": 1.25, "learning_rate": 4.792087778401647e-05, "loss": 0.0692, "step": 8888 }, { "epoch": 1.25, "learning_rate": 4.792040988208872e-05, "loss": 0.1084, "step": 8890 }, { "epoch": 1.25, "learning_rate": 4.791994198016096e-05, "loss": 0.07, "step": 8892 }, { "epoch": 1.25, "learning_rate": 4.7919474078233206e-05, "loss": 0.0706, "step": 8894 }, { "epoch": 1.25, "learning_rate": 4.7919006176305445e-05, "loss": 0.0777, "step": 8896 }, { "epoch": 1.25, "learning_rate": 4.791853827437769e-05, "loss": 0.0781, "step": 8898 }, { "epoch": 1.25, "learning_rate": 4.791807037244994e-05, "loss": 0.0655, "step": 8900 }, { "epoch": 1.25, "learning_rate": 4.791760247052218e-05, "loss": 0.0752, "step": 8902 }, { "epoch": 1.25, "learning_rate": 4.791713456859442e-05, "loss": 0.0952, "step": 8904 }, { "epoch": 1.25, "learning_rate": 4.791666666666667e-05, "loss": 0.0797, "step": 8906 }, { "epoch": 1.25, "learning_rate": 4.7916198764738914e-05, "loss": 0.0725, "step": 8908 }, { "epoch": 1.25, "learning_rate": 4.791573086281116e-05, "loss": 0.0733, "step": 8910 }, { "epoch": 1.25, "learning_rate": 4.79152629608834e-05, "loss": 0.0604, "step": 8912 }, { "epoch": 1.25, "learning_rate": 4.7914795058955645e-05, "loss": 0.0751, "step": 8914 }, { "epoch": 1.25, "learning_rate": 4.791432715702789e-05, "loss": 0.0783, "step": 8916 }, { "epoch": 1.25, "learning_rate": 4.791385925510014e-05, "loss": 0.0816, "step": 8918 }, { "epoch": 1.25, "learning_rate": 4.7913391353172376e-05, "loss": 0.063, "step": 8920 }, { "epoch": 1.25, "learning_rate": 4.791292345124462e-05, "loss": 0.0801, "step": 8922 }, { "epoch": 1.25, "learning_rate": 4.791245554931687e-05, "loss": 0.0831, "step": 8924 }, { "epoch": 1.25, "learning_rate": 4.7911987647389114e-05, "loss": 0.085, "step": 8926 }, { "epoch": 1.25, "learning_rate": 4.791151974546135e-05, "loss": 0.0568, "step": 8928 }, { "epoch": 1.25, "learning_rate": 4.79110518435336e-05, "loss": 0.0749, "step": 8930 }, { "epoch": 1.25, "learning_rate": 4.791058394160584e-05, "loss": 0.0779, "step": 8932 }, { "epoch": 1.25, "learning_rate": 4.791011603967809e-05, "loss": 0.0623, "step": 8934 }, { "epoch": 1.25, "learning_rate": 4.790964813775033e-05, "loss": 0.0537, "step": 8936 }, { "epoch": 1.25, "learning_rate": 4.7909180235822576e-05, "loss": 0.0671, "step": 8938 }, { "epoch": 1.25, "learning_rate": 4.7908712333894815e-05, "loss": 0.0853, "step": 8940 }, { "epoch": 1.26, "learning_rate": 4.790824443196707e-05, "loss": 0.0844, "step": 8942 }, { "epoch": 1.26, "learning_rate": 4.7907776530039307e-05, "loss": 0.0767, "step": 8944 }, { "epoch": 1.26, "learning_rate": 4.790730862811155e-05, "loss": 0.0777, "step": 8946 }, { "epoch": 1.26, "learning_rate": 4.790684072618379e-05, "loss": 0.0656, "step": 8948 }, { "epoch": 1.26, "learning_rate": 4.790637282425604e-05, "loss": 0.0875, "step": 8950 }, { "epoch": 1.26, "learning_rate": 4.7905904922328283e-05, "loss": 0.0703, "step": 8952 }, { "epoch": 1.26, "learning_rate": 4.790543702040053e-05, "loss": 0.0772, "step": 8954 }, { "epoch": 1.26, "learning_rate": 4.790496911847277e-05, "loss": 0.082, "step": 8956 }, { "epoch": 1.26, "learning_rate": 4.7904501216545014e-05, "loss": 0.0669, "step": 8958 }, { "epoch": 1.26, "learning_rate": 4.790403331461726e-05, "loss": 0.0686, "step": 8960 }, { "epoch": 1.26, "learning_rate": 4.7903565412689506e-05, "loss": 0.061, "step": 8962 }, { "epoch": 1.26, "learning_rate": 4.7903097510761745e-05, "loss": 0.0885, "step": 8964 }, { "epoch": 1.26, "learning_rate": 4.790262960883399e-05, "loss": 0.0773, "step": 8966 }, { "epoch": 1.26, "learning_rate": 4.790216170690624e-05, "loss": 0.0892, "step": 8968 }, { "epoch": 1.26, "learning_rate": 4.790169380497848e-05, "loss": 0.083, "step": 8970 }, { "epoch": 1.26, "learning_rate": 4.790122590305072e-05, "loss": 0.0689, "step": 8972 }, { "epoch": 1.26, "learning_rate": 4.790075800112297e-05, "loss": 0.0794, "step": 8974 }, { "epoch": 1.26, "learning_rate": 4.7900290099195214e-05, "loss": 0.0811, "step": 8976 }, { "epoch": 1.26, "learning_rate": 4.789982219726746e-05, "loss": 0.0799, "step": 8978 }, { "epoch": 1.26, "learning_rate": 4.78993542953397e-05, "loss": 0.0745, "step": 8980 }, { "epoch": 1.26, "learning_rate": 4.7898886393411945e-05, "loss": 0.0651, "step": 8982 }, { "epoch": 1.26, "learning_rate": 4.7898418491484184e-05, "loss": 0.0807, "step": 8984 }, { "epoch": 1.26, "learning_rate": 4.789795058955644e-05, "loss": 0.0698, "step": 8986 }, { "epoch": 1.26, "learning_rate": 4.7897482687628676e-05, "loss": 0.0688, "step": 8988 }, { "epoch": 1.26, "learning_rate": 4.789701478570092e-05, "loss": 0.0598, "step": 8990 }, { "epoch": 1.26, "learning_rate": 4.789654688377316e-05, "loss": 0.0674, "step": 8992 }, { "epoch": 1.26, "learning_rate": 4.789607898184541e-05, "loss": 0.0773, "step": 8994 }, { "epoch": 1.26, "learning_rate": 4.789561107991765e-05, "loss": 0.0767, "step": 8996 }, { "epoch": 1.26, "learning_rate": 4.789514317798989e-05, "loss": 0.0646, "step": 8998 }, { "epoch": 1.26, "learning_rate": 4.789467527606214e-05, "loss": 0.0806, "step": 9000 }, { "epoch": 1.26, "eval_gen_len": 30.1724, "eval_loss": 1.0504204034805298, "eval_meteor": 0.0494, "eval_runtime": 14.954, "eval_samples_per_second": 3.879, "eval_steps_per_second": 0.535, "step": 9000 }, { "epoch": 1.26, "learning_rate": 4.7894207374134384e-05, "loss": 0.0568, "step": 9002 }, { "epoch": 1.26, "learning_rate": 4.789373947220663e-05, "loss": 0.0617, "step": 9004 }, { "epoch": 1.26, "learning_rate": 4.789327157027887e-05, "loss": 0.0897, "step": 9006 }, { "epoch": 1.26, "learning_rate": 4.7892803668351115e-05, "loss": 0.0402, "step": 9008 }, { "epoch": 1.26, "learning_rate": 4.789233576642336e-05, "loss": 0.0854, "step": 9010 }, { "epoch": 1.27, "learning_rate": 4.7891867864495607e-05, "loss": 0.0905, "step": 9012 }, { "epoch": 1.27, "learning_rate": 4.7891399962567846e-05, "loss": 0.0734, "step": 9014 }, { "epoch": 1.27, "learning_rate": 4.789093206064009e-05, "loss": 0.0816, "step": 9016 }, { "epoch": 1.27, "learning_rate": 4.789046415871233e-05, "loss": 0.0621, "step": 9018 }, { "epoch": 1.27, "learning_rate": 4.7889996256784583e-05, "loss": 0.0782, "step": 9020 }, { "epoch": 1.27, "learning_rate": 4.788952835485682e-05, "loss": 0.0572, "step": 9022 }, { "epoch": 1.27, "learning_rate": 4.788906045292907e-05, "loss": 0.0701, "step": 9024 }, { "epoch": 1.27, "learning_rate": 4.788859255100131e-05, "loss": 0.0729, "step": 9026 }, { "epoch": 1.27, "learning_rate": 4.7888124649073554e-05, "loss": 0.0794, "step": 9028 }, { "epoch": 1.27, "learning_rate": 4.78876567471458e-05, "loss": 0.0732, "step": 9030 }, { "epoch": 1.27, "learning_rate": 4.7887188845218045e-05, "loss": 0.1045, "step": 9032 }, { "epoch": 1.27, "learning_rate": 4.7886720943290285e-05, "loss": 0.0823, "step": 9034 }, { "epoch": 1.27, "learning_rate": 4.788625304136253e-05, "loss": 0.0742, "step": 9036 }, { "epoch": 1.27, "learning_rate": 4.7885785139434776e-05, "loss": 0.0572, "step": 9038 }, { "epoch": 1.27, "learning_rate": 4.788531723750702e-05, "loss": 0.0746, "step": 9040 }, { "epoch": 1.27, "learning_rate": 4.788484933557926e-05, "loss": 0.0704, "step": 9042 }, { "epoch": 1.27, "learning_rate": 4.788438143365151e-05, "loss": 0.1047, "step": 9044 }, { "epoch": 1.27, "learning_rate": 4.788391353172375e-05, "loss": 0.0751, "step": 9046 }, { "epoch": 1.27, "learning_rate": 4.7883445629796e-05, "loss": 0.0787, "step": 9048 }, { "epoch": 1.27, "learning_rate": 4.788297772786824e-05, "loss": 0.0724, "step": 9050 }, { "epoch": 1.27, "learning_rate": 4.7882509825940484e-05, "loss": 0.0762, "step": 9052 }, { "epoch": 1.27, "learning_rate": 4.788204192401273e-05, "loss": 0.074, "step": 9054 }, { "epoch": 1.27, "learning_rate": 4.7881574022084976e-05, "loss": 0.0829, "step": 9056 }, { "epoch": 1.27, "learning_rate": 4.7881106120157215e-05, "loss": 0.0561, "step": 9058 }, { "epoch": 1.27, "learning_rate": 4.788063821822946e-05, "loss": 0.0567, "step": 9060 }, { "epoch": 1.27, "learning_rate": 4.78801703163017e-05, "loss": 0.0665, "step": 9062 }, { "epoch": 1.27, "learning_rate": 4.787970241437395e-05, "loss": 0.0696, "step": 9064 }, { "epoch": 1.27, "learning_rate": 4.787923451244619e-05, "loss": 0.0614, "step": 9066 }, { "epoch": 1.27, "learning_rate": 4.787876661051844e-05, "loss": 0.0622, "step": 9068 }, { "epoch": 1.27, "learning_rate": 4.787829870859068e-05, "loss": 0.0754, "step": 9070 }, { "epoch": 1.27, "learning_rate": 4.787783080666293e-05, "loss": 0.0658, "step": 9072 }, { "epoch": 1.27, "learning_rate": 4.787736290473517e-05, "loss": 0.0959, "step": 9074 }, { "epoch": 1.27, "learning_rate": 4.7876895002807415e-05, "loss": 0.0837, "step": 9076 }, { "epoch": 1.27, "learning_rate": 4.7876427100879654e-05, "loss": 0.0675, "step": 9078 }, { "epoch": 1.27, "learning_rate": 4.78759591989519e-05, "loss": 0.0754, "step": 9080 }, { "epoch": 1.27, "learning_rate": 4.7875491297024146e-05, "loss": 0.0684, "step": 9082 }, { "epoch": 1.28, "learning_rate": 4.787502339509639e-05, "loss": 0.1071, "step": 9084 }, { "epoch": 1.28, "learning_rate": 4.787455549316863e-05, "loss": 0.0761, "step": 9086 }, { "epoch": 1.28, "learning_rate": 4.787408759124088e-05, "loss": 0.0935, "step": 9088 }, { "epoch": 1.28, "learning_rate": 4.787361968931312e-05, "loss": 0.0645, "step": 9090 }, { "epoch": 1.28, "learning_rate": 4.787315178738537e-05, "loss": 0.0616, "step": 9092 }, { "epoch": 1.28, "learning_rate": 4.787268388545761e-05, "loss": 0.0762, "step": 9094 }, { "epoch": 1.28, "learning_rate": 4.7872215983529854e-05, "loss": 0.0685, "step": 9096 }, { "epoch": 1.28, "learning_rate": 4.78717480816021e-05, "loss": 0.0776, "step": 9098 }, { "epoch": 1.28, "learning_rate": 4.7871280179674345e-05, "loss": 0.0725, "step": 9100 }, { "epoch": 1.28, "learning_rate": 4.7870812277746585e-05, "loss": 0.0693, "step": 9102 }, { "epoch": 1.28, "learning_rate": 4.787034437581883e-05, "loss": 0.0788, "step": 9104 }, { "epoch": 1.28, "learning_rate": 4.7869876473891076e-05, "loss": 0.0564, "step": 9106 }, { "epoch": 1.28, "learning_rate": 4.786940857196332e-05, "loss": 0.0688, "step": 9108 }, { "epoch": 1.28, "learning_rate": 4.786894067003556e-05, "loss": 0.0705, "step": 9110 }, { "epoch": 1.28, "learning_rate": 4.786847276810781e-05, "loss": 0.0418, "step": 9112 }, { "epoch": 1.28, "learning_rate": 4.7868004866180046e-05, "loss": 0.0612, "step": 9114 }, { "epoch": 1.28, "learning_rate": 4.78675369642523e-05, "loss": 0.0784, "step": 9116 }, { "epoch": 1.28, "learning_rate": 4.786706906232454e-05, "loss": 0.0913, "step": 9118 }, { "epoch": 1.28, "learning_rate": 4.7866601160396784e-05, "loss": 0.0921, "step": 9120 }, { "epoch": 1.28, "learning_rate": 4.786613325846902e-05, "loss": 0.089, "step": 9122 }, { "epoch": 1.28, "learning_rate": 4.7865665356541276e-05, "loss": 0.0623, "step": 9124 }, { "epoch": 1.28, "learning_rate": 4.7865197454613515e-05, "loss": 0.0994, "step": 9126 }, { "epoch": 1.28, "learning_rate": 4.786472955268576e-05, "loss": 0.0848, "step": 9128 }, { "epoch": 1.28, "learning_rate": 4.7864261650758e-05, "loss": 0.0723, "step": 9130 }, { "epoch": 1.28, "learning_rate": 4.7863793748830246e-05, "loss": 0.0634, "step": 9132 }, { "epoch": 1.28, "learning_rate": 4.786332584690249e-05, "loss": 0.0634, "step": 9134 }, { "epoch": 1.28, "learning_rate": 4.786285794497474e-05, "loss": 0.0679, "step": 9136 }, { "epoch": 1.28, "learning_rate": 4.786239004304698e-05, "loss": 0.0653, "step": 9138 }, { "epoch": 1.28, "learning_rate": 4.786192214111922e-05, "loss": 0.0844, "step": 9140 }, { "epoch": 1.28, "learning_rate": 4.786145423919147e-05, "loss": 0.0672, "step": 9142 }, { "epoch": 1.28, "learning_rate": 4.7860986337263715e-05, "loss": 0.0579, "step": 9144 }, { "epoch": 1.28, "learning_rate": 4.7860518435335954e-05, "loss": 0.0592, "step": 9146 }, { "epoch": 1.28, "learning_rate": 4.78600505334082e-05, "loss": 0.0571, "step": 9148 }, { "epoch": 1.28, "learning_rate": 4.7859582631480446e-05, "loss": 0.069, "step": 9150 }, { "epoch": 1.28, "learning_rate": 4.785911472955269e-05, "loss": 0.0774, "step": 9152 }, { "epoch": 1.28, "learning_rate": 4.785864682762493e-05, "loss": 0.1049, "step": 9154 }, { "epoch": 1.29, "learning_rate": 4.785817892569718e-05, "loss": 0.0677, "step": 9156 }, { "epoch": 1.29, "learning_rate": 4.785771102376942e-05, "loss": 0.0734, "step": 9158 }, { "epoch": 1.29, "learning_rate": 4.785724312184167e-05, "loss": 0.1023, "step": 9160 }, { "epoch": 1.29, "learning_rate": 4.785677521991391e-05, "loss": 0.0664, "step": 9162 }, { "epoch": 1.29, "learning_rate": 4.7856307317986154e-05, "loss": 0.0645, "step": 9164 }, { "epoch": 1.29, "learning_rate": 4.785583941605839e-05, "loss": 0.0904, "step": 9166 }, { "epoch": 1.29, "learning_rate": 4.7855371514130645e-05, "loss": 0.0822, "step": 9168 }, { "epoch": 1.29, "learning_rate": 4.7854903612202885e-05, "loss": 0.0716, "step": 9170 }, { "epoch": 1.29, "learning_rate": 4.785443571027513e-05, "loss": 0.0632, "step": 9172 }, { "epoch": 1.29, "learning_rate": 4.785396780834737e-05, "loss": 0.0616, "step": 9174 }, { "epoch": 1.29, "learning_rate": 4.7853499906419615e-05, "loss": 0.0643, "step": 9176 }, { "epoch": 1.29, "learning_rate": 4.785303200449186e-05, "loss": 0.0805, "step": 9178 }, { "epoch": 1.29, "learning_rate": 4.785256410256411e-05, "loss": 0.0551, "step": 9180 }, { "epoch": 1.29, "learning_rate": 4.7852096200636346e-05, "loss": 0.0611, "step": 9182 }, { "epoch": 1.29, "learning_rate": 4.785162829870859e-05, "loss": 0.0653, "step": 9184 }, { "epoch": 1.29, "learning_rate": 4.785116039678084e-05, "loss": 0.0741, "step": 9186 }, { "epoch": 1.29, "learning_rate": 4.7850692494853084e-05, "loss": 0.062, "step": 9188 }, { "epoch": 1.29, "learning_rate": 4.785022459292532e-05, "loss": 0.0667, "step": 9190 }, { "epoch": 1.29, "learning_rate": 4.784975669099757e-05, "loss": 0.0588, "step": 9192 }, { "epoch": 1.29, "learning_rate": 4.7849288789069815e-05, "loss": 0.0786, "step": 9194 }, { "epoch": 1.29, "learning_rate": 4.784882088714206e-05, "loss": 0.1006, "step": 9196 }, { "epoch": 1.29, "learning_rate": 4.78483529852143e-05, "loss": 0.0753, "step": 9198 }, { "epoch": 1.29, "learning_rate": 4.7847885083286546e-05, "loss": 0.0867, "step": 9200 }, { "epoch": 1.29, "learning_rate": 4.784741718135879e-05, "loss": 0.0679, "step": 9202 }, { "epoch": 1.29, "learning_rate": 4.784694927943104e-05, "loss": 0.0838, "step": 9204 }, { "epoch": 1.29, "learning_rate": 4.784648137750328e-05, "loss": 0.0921, "step": 9206 }, { "epoch": 1.29, "learning_rate": 4.784601347557552e-05, "loss": 0.0794, "step": 9208 }, { "epoch": 1.29, "learning_rate": 4.784554557364776e-05, "loss": 0.0599, "step": 9210 }, { "epoch": 1.29, "learning_rate": 4.7845077671720015e-05, "loss": 0.0683, "step": 9212 }, { "epoch": 1.29, "learning_rate": 4.7844609769792254e-05, "loss": 0.0855, "step": 9214 }, { "epoch": 1.29, "learning_rate": 4.78441418678645e-05, "loss": 0.0653, "step": 9216 }, { "epoch": 1.29, "learning_rate": 4.784367396593674e-05, "loss": 0.0585, "step": 9218 }, { "epoch": 1.29, "learning_rate": 4.784320606400899e-05, "loss": 0.081, "step": 9220 }, { "epoch": 1.29, "learning_rate": 4.784273816208123e-05, "loss": 0.0627, "step": 9222 }, { "epoch": 1.29, "learning_rate": 4.784227026015348e-05, "loss": 0.0702, "step": 9224 }, { "epoch": 1.3, "learning_rate": 4.7841802358225716e-05, "loss": 0.0584, "step": 9226 }, { "epoch": 1.3, "learning_rate": 4.784133445629796e-05, "loss": 0.1109, "step": 9228 }, { "epoch": 1.3, "learning_rate": 4.784086655437021e-05, "loss": 0.0746, "step": 9230 }, { "epoch": 1.3, "learning_rate": 4.7840398652442454e-05, "loss": 0.078, "step": 9232 }, { "epoch": 1.3, "learning_rate": 4.783993075051469e-05, "loss": 0.0719, "step": 9234 }, { "epoch": 1.3, "learning_rate": 4.783946284858694e-05, "loss": 0.059, "step": 9236 }, { "epoch": 1.3, "learning_rate": 4.7838994946659185e-05, "loss": 0.0641, "step": 9238 }, { "epoch": 1.3, "learning_rate": 4.783852704473143e-05, "loss": 0.0597, "step": 9240 }, { "epoch": 1.3, "learning_rate": 4.783805914280367e-05, "loss": 0.0759, "step": 9242 }, { "epoch": 1.3, "learning_rate": 4.783759124087591e-05, "loss": 0.0763, "step": 9244 }, { "epoch": 1.3, "learning_rate": 4.783712333894816e-05, "loss": 0.0799, "step": 9246 }, { "epoch": 1.3, "learning_rate": 4.78366554370204e-05, "loss": 0.0796, "step": 9248 }, { "epoch": 1.3, "learning_rate": 4.7836187535092646e-05, "loss": 0.0719, "step": 9250 }, { "epoch": 1.3, "learning_rate": 4.7835719633164886e-05, "loss": 0.0486, "step": 9252 }, { "epoch": 1.3, "learning_rate": 4.783525173123714e-05, "loss": 0.0624, "step": 9254 }, { "epoch": 1.3, "learning_rate": 4.783478382930938e-05, "loss": 0.073, "step": 9256 }, { "epoch": 1.3, "learning_rate": 4.783431592738162e-05, "loss": 0.057, "step": 9258 }, { "epoch": 1.3, "learning_rate": 4.783384802545386e-05, "loss": 0.073, "step": 9260 }, { "epoch": 1.3, "learning_rate": 4.783338012352611e-05, "loss": 0.0743, "step": 9262 }, { "epoch": 1.3, "learning_rate": 4.7832912221598354e-05, "loss": 0.0986, "step": 9264 }, { "epoch": 1.3, "learning_rate": 4.78324443196706e-05, "loss": 0.0742, "step": 9266 }, { "epoch": 1.3, "learning_rate": 4.783197641774284e-05, "loss": 0.0651, "step": 9268 }, { "epoch": 1.3, "learning_rate": 4.7831508515815085e-05, "loss": 0.08, "step": 9270 }, { "epoch": 1.3, "learning_rate": 4.783104061388733e-05, "loss": 0.0776, "step": 9272 }, { "epoch": 1.3, "learning_rate": 4.783057271195958e-05, "loss": 0.0654, "step": 9274 }, { "epoch": 1.3, "learning_rate": 4.7830104810031816e-05, "loss": 0.0775, "step": 9276 }, { "epoch": 1.3, "learning_rate": 4.782963690810406e-05, "loss": 0.0777, "step": 9278 }, { "epoch": 1.3, "learning_rate": 4.782916900617631e-05, "loss": 0.0784, "step": 9280 }, { "epoch": 1.3, "learning_rate": 4.7828701104248554e-05, "loss": 0.0645, "step": 9282 }, { "epoch": 1.3, "learning_rate": 4.782823320232079e-05, "loss": 0.0714, "step": 9284 }, { "epoch": 1.3, "learning_rate": 4.782776530039304e-05, "loss": 0.0904, "step": 9286 }, { "epoch": 1.3, "learning_rate": 4.7827297398465285e-05, "loss": 0.0768, "step": 9288 }, { "epoch": 1.3, "learning_rate": 4.782682949653753e-05, "loss": 0.0812, "step": 9290 }, { "epoch": 1.3, "learning_rate": 4.782636159460977e-05, "loss": 0.093, "step": 9292 }, { "epoch": 1.3, "learning_rate": 4.7825893692682016e-05, "loss": 0.1154, "step": 9294 }, { "epoch": 1.3, "learning_rate": 4.7825425790754255e-05, "loss": 0.0621, "step": 9296 }, { "epoch": 1.31, "learning_rate": 4.782495788882651e-05, "loss": 0.0908, "step": 9298 }, { "epoch": 1.31, "learning_rate": 4.782448998689875e-05, "loss": 0.0811, "step": 9300 }, { "epoch": 1.31, "learning_rate": 4.782402208497099e-05, "loss": 0.092, "step": 9302 }, { "epoch": 1.31, "learning_rate": 4.782355418304323e-05, "loss": 0.0684, "step": 9304 }, { "epoch": 1.31, "learning_rate": 4.782308628111548e-05, "loss": 0.0721, "step": 9306 }, { "epoch": 1.31, "learning_rate": 4.7822618379187724e-05, "loss": 0.0758, "step": 9308 }, { "epoch": 1.31, "learning_rate": 4.782215047725997e-05, "loss": 0.0707, "step": 9310 }, { "epoch": 1.31, "learning_rate": 4.782168257533221e-05, "loss": 0.0724, "step": 9312 }, { "epoch": 1.31, "learning_rate": 4.7821214673404455e-05, "loss": 0.0972, "step": 9314 }, { "epoch": 1.31, "learning_rate": 4.78207467714767e-05, "loss": 0.0708, "step": 9316 }, { "epoch": 1.31, "learning_rate": 4.7820278869548946e-05, "loss": 0.076, "step": 9318 }, { "epoch": 1.31, "learning_rate": 4.7819810967621186e-05, "loss": 0.0801, "step": 9320 }, { "epoch": 1.31, "learning_rate": 4.781934306569343e-05, "loss": 0.0643, "step": 9322 }, { "epoch": 1.31, "learning_rate": 4.781887516376568e-05, "loss": 0.0598, "step": 9324 }, { "epoch": 1.31, "learning_rate": 4.781840726183792e-05, "loss": 0.0849, "step": 9326 }, { "epoch": 1.31, "learning_rate": 4.781793935991016e-05, "loss": 0.0593, "step": 9328 }, { "epoch": 1.31, "learning_rate": 4.781747145798241e-05, "loss": 0.0835, "step": 9330 }, { "epoch": 1.31, "learning_rate": 4.7817003556054654e-05, "loss": 0.0569, "step": 9332 }, { "epoch": 1.31, "learning_rate": 4.78165356541269e-05, "loss": 0.0801, "step": 9334 }, { "epoch": 1.31, "learning_rate": 4.781606775219914e-05, "loss": 0.0816, "step": 9336 }, { "epoch": 1.31, "learning_rate": 4.7815599850271385e-05, "loss": 0.0702, "step": 9338 }, { "epoch": 1.31, "learning_rate": 4.7815131948343624e-05, "loss": 0.0764, "step": 9340 }, { "epoch": 1.31, "learning_rate": 4.781466404641588e-05, "loss": 0.0907, "step": 9342 }, { "epoch": 1.31, "learning_rate": 4.7814196144488116e-05, "loss": 0.0728, "step": 9344 }, { "epoch": 1.31, "learning_rate": 4.781372824256036e-05, "loss": 0.0776, "step": 9346 }, { "epoch": 1.31, "learning_rate": 4.78132603406326e-05, "loss": 0.0794, "step": 9348 }, { "epoch": 1.31, "learning_rate": 4.7812792438704854e-05, "loss": 0.0685, "step": 9350 }, { "epoch": 1.31, "learning_rate": 4.781232453677709e-05, "loss": 0.0847, "step": 9352 }, { "epoch": 1.31, "learning_rate": 4.781185663484934e-05, "loss": 0.0935, "step": 9354 }, { "epoch": 1.31, "learning_rate": 4.781138873292158e-05, "loss": 0.0531, "step": 9356 }, { "epoch": 1.31, "learning_rate": 4.7810920830993824e-05, "loss": 0.0529, "step": 9358 }, { "epoch": 1.31, "learning_rate": 4.781045292906607e-05, "loss": 0.0577, "step": 9360 }, { "epoch": 1.31, "learning_rate": 4.7809985027138316e-05, "loss": 0.0856, "step": 9362 }, { "epoch": 1.31, "learning_rate": 4.7809517125210555e-05, "loss": 0.0855, "step": 9364 }, { "epoch": 1.31, "learning_rate": 4.78090492232828e-05, "loss": 0.0776, "step": 9366 }, { "epoch": 1.31, "learning_rate": 4.780858132135505e-05, "loss": 0.0795, "step": 9368 }, { "epoch": 1.32, "learning_rate": 4.780811341942729e-05, "loss": 0.0794, "step": 9370 }, { "epoch": 1.32, "learning_rate": 4.780764551749953e-05, "loss": 0.0785, "step": 9372 }, { "epoch": 1.32, "learning_rate": 4.780717761557178e-05, "loss": 0.0669, "step": 9374 }, { "epoch": 1.32, "learning_rate": 4.7806709713644024e-05, "loss": 0.0855, "step": 9376 }, { "epoch": 1.32, "learning_rate": 4.780624181171627e-05, "loss": 0.0793, "step": 9378 }, { "epoch": 1.32, "learning_rate": 4.780577390978851e-05, "loss": 0.0725, "step": 9380 }, { "epoch": 1.32, "learning_rate": 4.7805306007860755e-05, "loss": 0.0715, "step": 9382 }, { "epoch": 1.32, "learning_rate": 4.7804838105933e-05, "loss": 0.0715, "step": 9384 }, { "epoch": 1.32, "learning_rate": 4.7804370204005247e-05, "loss": 0.0826, "step": 9386 }, { "epoch": 1.32, "learning_rate": 4.7803902302077486e-05, "loss": 0.0786, "step": 9388 }, { "epoch": 1.32, "learning_rate": 4.780343440014973e-05, "loss": 0.0659, "step": 9390 }, { "epoch": 1.32, "learning_rate": 4.780296649822197e-05, "loss": 0.0815, "step": 9392 }, { "epoch": 1.32, "learning_rate": 4.780249859629422e-05, "loss": 0.0662, "step": 9394 }, { "epoch": 1.32, "learning_rate": 4.780203069436646e-05, "loss": 0.0941, "step": 9396 }, { "epoch": 1.32, "learning_rate": 4.780156279243871e-05, "loss": 0.0717, "step": 9398 }, { "epoch": 1.32, "learning_rate": 4.780109489051095e-05, "loss": 0.0826, "step": 9400 }, { "epoch": 1.32, "learning_rate": 4.78006269885832e-05, "loss": 0.0819, "step": 9402 }, { "epoch": 1.32, "learning_rate": 4.780015908665544e-05, "loss": 0.0614, "step": 9404 }, { "epoch": 1.32, "learning_rate": 4.7799691184727685e-05, "loss": 0.0575, "step": 9406 }, { "epoch": 1.32, "learning_rate": 4.7799223282799924e-05, "loss": 0.0707, "step": 9408 }, { "epoch": 1.32, "learning_rate": 4.779875538087217e-05, "loss": 0.0745, "step": 9410 }, { "epoch": 1.32, "learning_rate": 4.7798287478944416e-05, "loss": 0.1053, "step": 9412 }, { "epoch": 1.32, "learning_rate": 4.779781957701666e-05, "loss": 0.0779, "step": 9414 }, { "epoch": 1.32, "learning_rate": 4.77973516750889e-05, "loss": 0.0562, "step": 9416 }, { "epoch": 1.32, "learning_rate": 4.779688377316115e-05, "loss": 0.0793, "step": 9418 }, { "epoch": 1.32, "learning_rate": 4.779641587123339e-05, "loss": 0.0771, "step": 9420 }, { "epoch": 1.32, "learning_rate": 4.779594796930564e-05, "loss": 0.0901, "step": 9422 }, { "epoch": 1.32, "learning_rate": 4.779548006737788e-05, "loss": 0.0701, "step": 9424 }, { "epoch": 1.32, "learning_rate": 4.7795012165450124e-05, "loss": 0.0783, "step": 9426 }, { "epoch": 1.32, "learning_rate": 4.779454426352237e-05, "loss": 0.0646, "step": 9428 }, { "epoch": 1.32, "learning_rate": 4.7794076361594616e-05, "loss": 0.0641, "step": 9430 }, { "epoch": 1.32, "learning_rate": 4.7793608459666855e-05, "loss": 0.0943, "step": 9432 }, { "epoch": 1.32, "learning_rate": 4.77931405577391e-05, "loss": 0.0689, "step": 9434 }, { "epoch": 1.32, "learning_rate": 4.779267265581135e-05, "loss": 0.0821, "step": 9436 }, { "epoch": 1.32, "learning_rate": 4.779220475388359e-05, "loss": 0.0845, "step": 9438 }, { "epoch": 1.33, "learning_rate": 4.779173685195583e-05, "loss": 0.059, "step": 9440 }, { "epoch": 1.33, "learning_rate": 4.779126895002808e-05, "loss": 0.0603, "step": 9442 }, { "epoch": 1.33, "learning_rate": 4.779080104810032e-05, "loss": 0.0869, "step": 9444 }, { "epoch": 1.33, "learning_rate": 4.779033314617257e-05, "loss": 0.1109, "step": 9446 }, { "epoch": 1.33, "learning_rate": 4.778986524424481e-05, "loss": 0.0692, "step": 9448 }, { "epoch": 1.33, "learning_rate": 4.7789397342317055e-05, "loss": 0.0742, "step": 9450 }, { "epoch": 1.33, "learning_rate": 4.7788929440389294e-05, "loss": 0.0545, "step": 9452 }, { "epoch": 1.33, "learning_rate": 4.778846153846154e-05, "loss": 0.0703, "step": 9454 }, { "epoch": 1.33, "learning_rate": 4.7787993636533786e-05, "loss": 0.0618, "step": 9456 }, { "epoch": 1.33, "learning_rate": 4.778752573460603e-05, "loss": 0.0836, "step": 9458 }, { "epoch": 1.33, "learning_rate": 4.778705783267827e-05, "loss": 0.0541, "step": 9460 }, { "epoch": 1.33, "learning_rate": 4.778658993075052e-05, "loss": 0.0667, "step": 9462 }, { "epoch": 1.33, "learning_rate": 4.778612202882276e-05, "loss": 0.0661, "step": 9464 }, { "epoch": 1.33, "learning_rate": 4.778565412689501e-05, "loss": 0.0839, "step": 9466 }, { "epoch": 1.33, "learning_rate": 4.778518622496725e-05, "loss": 0.0702, "step": 9468 }, { "epoch": 1.33, "learning_rate": 4.7784718323039493e-05, "loss": 0.0738, "step": 9470 }, { "epoch": 1.33, "learning_rate": 4.778425042111174e-05, "loss": 0.0702, "step": 9472 }, { "epoch": 1.33, "learning_rate": 4.7783782519183985e-05, "loss": 0.0947, "step": 9474 }, { "epoch": 1.33, "learning_rate": 4.7783314617256224e-05, "loss": 0.089, "step": 9476 }, { "epoch": 1.33, "learning_rate": 4.778284671532847e-05, "loss": 0.0641, "step": 9478 }, { "epoch": 1.33, "learning_rate": 4.7782378813400716e-05, "loss": 0.0864, "step": 9480 }, { "epoch": 1.33, "learning_rate": 4.778191091147296e-05, "loss": 0.0784, "step": 9482 }, { "epoch": 1.33, "learning_rate": 4.77814430095452e-05, "loss": 0.0937, "step": 9484 }, { "epoch": 1.33, "learning_rate": 4.778097510761745e-05, "loss": 0.0642, "step": 9486 }, { "epoch": 1.33, "learning_rate": 4.7780507205689686e-05, "loss": 0.0696, "step": 9488 }, { "epoch": 1.33, "learning_rate": 4.778003930376194e-05, "loss": 0.0641, "step": 9490 }, { "epoch": 1.33, "learning_rate": 4.777957140183418e-05, "loss": 0.0722, "step": 9492 }, { "epoch": 1.33, "learning_rate": 4.7779103499906424e-05, "loss": 0.0727, "step": 9494 }, { "epoch": 1.33, "learning_rate": 4.777863559797866e-05, "loss": 0.0869, "step": 9496 }, { "epoch": 1.33, "learning_rate": 4.777816769605091e-05, "loss": 0.0742, "step": 9498 }, { "epoch": 1.33, "learning_rate": 4.7777699794123155e-05, "loss": 0.0686, "step": 9500 }, { "epoch": 1.33, "learning_rate": 4.7777231892195394e-05, "loss": 0.0714, "step": 9502 }, { "epoch": 1.33, "learning_rate": 4.777676399026764e-05, "loss": 0.0534, "step": 9504 }, { "epoch": 1.33, "learning_rate": 4.7776296088339886e-05, "loss": 0.0602, "step": 9506 }, { "epoch": 1.33, "learning_rate": 4.777582818641213e-05, "loss": 0.0547, "step": 9508 }, { "epoch": 1.33, "learning_rate": 4.777536028448437e-05, "loss": 0.0715, "step": 9510 }, { "epoch": 1.34, "learning_rate": 4.777489238255662e-05, "loss": 0.0819, "step": 9512 }, { "epoch": 1.34, "learning_rate": 4.777442448062886e-05, "loss": 0.0779, "step": 9514 }, { "epoch": 1.34, "learning_rate": 4.777395657870111e-05, "loss": 0.0715, "step": 9516 }, { "epoch": 1.34, "learning_rate": 4.777348867677335e-05, "loss": 0.0691, "step": 9518 }, { "epoch": 1.34, "learning_rate": 4.7773020774845594e-05, "loss": 0.0671, "step": 9520 }, { "epoch": 1.34, "learning_rate": 4.777255287291783e-05, "loss": 0.0839, "step": 9522 }, { "epoch": 1.34, "learning_rate": 4.7772084970990086e-05, "loss": 0.0706, "step": 9524 }, { "epoch": 1.34, "learning_rate": 4.7771617069062325e-05, "loss": 0.1025, "step": 9526 }, { "epoch": 1.34, "learning_rate": 4.777114916713457e-05, "loss": 0.0597, "step": 9528 }, { "epoch": 1.34, "learning_rate": 4.777068126520681e-05, "loss": 0.0777, "step": 9530 }, { "epoch": 1.34, "learning_rate": 4.777021336327906e-05, "loss": 0.0769, "step": 9532 }, { "epoch": 1.34, "learning_rate": 4.77697454613513e-05, "loss": 0.0754, "step": 9534 }, { "epoch": 1.34, "learning_rate": 4.776927755942355e-05, "loss": 0.0726, "step": 9536 }, { "epoch": 1.34, "learning_rate": 4.776880965749579e-05, "loss": 0.0891, "step": 9538 }, { "epoch": 1.34, "learning_rate": 4.776834175556803e-05, "loss": 0.0779, "step": 9540 }, { "epoch": 1.34, "learning_rate": 4.776787385364028e-05, "loss": 0.0538, "step": 9542 }, { "epoch": 1.34, "learning_rate": 4.7767405951712524e-05, "loss": 0.0767, "step": 9544 }, { "epoch": 1.34, "learning_rate": 4.7766938049784764e-05, "loss": 0.0645, "step": 9546 }, { "epoch": 1.34, "learning_rate": 4.776647014785701e-05, "loss": 0.0704, "step": 9548 }, { "epoch": 1.34, "learning_rate": 4.7766002245929255e-05, "loss": 0.0526, "step": 9550 }, { "epoch": 1.34, "learning_rate": 4.77655343440015e-05, "loss": 0.0731, "step": 9552 }, { "epoch": 1.34, "learning_rate": 4.776506644207374e-05, "loss": 0.0616, "step": 9554 }, { "epoch": 1.34, "learning_rate": 4.7764598540145986e-05, "loss": 0.0945, "step": 9556 }, { "epoch": 1.34, "learning_rate": 4.776413063821823e-05, "loss": 0.0987, "step": 9558 }, { "epoch": 1.34, "learning_rate": 4.776366273629048e-05, "loss": 0.0715, "step": 9560 }, { "epoch": 1.34, "learning_rate": 4.776319483436272e-05, "loss": 0.0913, "step": 9562 }, { "epoch": 1.34, "learning_rate": 4.776272693243496e-05, "loss": 0.0794, "step": 9564 }, { "epoch": 1.34, "learning_rate": 4.776225903050721e-05, "loss": 0.0623, "step": 9566 }, { "epoch": 1.34, "learning_rate": 4.7761791128579455e-05, "loss": 0.05, "step": 9568 }, { "epoch": 1.34, "learning_rate": 4.7761323226651694e-05, "loss": 0.0791, "step": 9570 }, { "epoch": 1.34, "learning_rate": 4.776085532472394e-05, "loss": 0.0791, "step": 9572 }, { "epoch": 1.34, "learning_rate": 4.776038742279618e-05, "loss": 0.0999, "step": 9574 }, { "epoch": 1.34, "learning_rate": 4.775991952086843e-05, "loss": 0.0884, "step": 9576 }, { "epoch": 1.34, "learning_rate": 4.775945161894067e-05, "loss": 0.0693, "step": 9578 }, { "epoch": 1.34, "learning_rate": 4.775898371701292e-05, "loss": 0.0907, "step": 9580 }, { "epoch": 1.35, "learning_rate": 4.7758515815085156e-05, "loss": 0.0747, "step": 9582 }, { "epoch": 1.35, "learning_rate": 4.775804791315741e-05, "loss": 0.0725, "step": 9584 }, { "epoch": 1.35, "learning_rate": 4.775758001122965e-05, "loss": 0.0684, "step": 9586 }, { "epoch": 1.35, "learning_rate": 4.7757112109301894e-05, "loss": 0.065, "step": 9588 }, { "epoch": 1.35, "learning_rate": 4.775664420737413e-05, "loss": 0.0754, "step": 9590 }, { "epoch": 1.35, "learning_rate": 4.775617630544638e-05, "loss": 0.0679, "step": 9592 }, { "epoch": 1.35, "learning_rate": 4.7755708403518625e-05, "loss": 0.0682, "step": 9594 }, { "epoch": 1.35, "learning_rate": 4.775524050159087e-05, "loss": 0.0815, "step": 9596 }, { "epoch": 1.35, "learning_rate": 4.775477259966311e-05, "loss": 0.0898, "step": 9598 }, { "epoch": 1.35, "learning_rate": 4.7754304697735356e-05, "loss": 0.0839, "step": 9600 }, { "epoch": 1.35, "learning_rate": 4.77538367958076e-05, "loss": 0.083, "step": 9602 }, { "epoch": 1.35, "learning_rate": 4.775336889387985e-05, "loss": 0.0856, "step": 9604 }, { "epoch": 1.35, "learning_rate": 4.775290099195209e-05, "loss": 0.0677, "step": 9606 }, { "epoch": 1.35, "learning_rate": 4.775243309002433e-05, "loss": 0.0781, "step": 9608 }, { "epoch": 1.35, "learning_rate": 4.775196518809658e-05, "loss": 0.0991, "step": 9610 }, { "epoch": 1.35, "learning_rate": 4.7751497286168824e-05, "loss": 0.0913, "step": 9612 }, { "epoch": 1.35, "learning_rate": 4.7751029384241064e-05, "loss": 0.0608, "step": 9614 }, { "epoch": 1.35, "learning_rate": 4.775056148231331e-05, "loss": 0.0653, "step": 9616 }, { "epoch": 1.35, "learning_rate": 4.775009358038555e-05, "loss": 0.0609, "step": 9618 }, { "epoch": 1.35, "learning_rate": 4.77496256784578e-05, "loss": 0.0785, "step": 9620 }, { "epoch": 1.35, "learning_rate": 4.774915777653004e-05, "loss": 0.0784, "step": 9622 }, { "epoch": 1.35, "learning_rate": 4.7748689874602286e-05, "loss": 0.0833, "step": 9624 }, { "epoch": 1.35, "learning_rate": 4.7748221972674526e-05, "loss": 0.0717, "step": 9626 }, { "epoch": 1.35, "learning_rate": 4.774775407074678e-05, "loss": 0.0682, "step": 9628 }, { "epoch": 1.35, "learning_rate": 4.774728616881902e-05, "loss": 0.0794, "step": 9630 }, { "epoch": 1.35, "learning_rate": 4.774681826689126e-05, "loss": 0.0642, "step": 9632 }, { "epoch": 1.35, "learning_rate": 4.77463503649635e-05, "loss": 0.0813, "step": 9634 }, { "epoch": 1.35, "learning_rate": 4.774588246303575e-05, "loss": 0.063, "step": 9636 }, { "epoch": 1.35, "learning_rate": 4.7745414561107994e-05, "loss": 0.072, "step": 9638 }, { "epoch": 1.35, "learning_rate": 4.774494665918024e-05, "loss": 0.072, "step": 9640 }, { "epoch": 1.35, "learning_rate": 4.774447875725248e-05, "loss": 0.0679, "step": 9642 }, { "epoch": 1.35, "learning_rate": 4.7744010855324725e-05, "loss": 0.0886, "step": 9644 }, { "epoch": 1.35, "learning_rate": 4.774354295339697e-05, "loss": 0.0753, "step": 9646 }, { "epoch": 1.35, "learning_rate": 4.774307505146922e-05, "loss": 0.073, "step": 9648 }, { "epoch": 1.35, "learning_rate": 4.7742607149541456e-05, "loss": 0.0789, "step": 9650 }, { "epoch": 1.35, "learning_rate": 4.77421392476137e-05, "loss": 0.0915, "step": 9652 }, { "epoch": 1.36, "learning_rate": 4.774167134568595e-05, "loss": 0.0621, "step": 9654 }, { "epoch": 1.36, "learning_rate": 4.7741203443758194e-05, "loss": 0.0681, "step": 9656 }, { "epoch": 1.36, "learning_rate": 4.774073554183043e-05, "loss": 0.1235, "step": 9658 }, { "epoch": 1.36, "learning_rate": 4.774026763990268e-05, "loss": 0.0724, "step": 9660 }, { "epoch": 1.36, "learning_rate": 4.7739799737974925e-05, "loss": 0.0849, "step": 9662 }, { "epoch": 1.36, "learning_rate": 4.773933183604717e-05, "loss": 0.1005, "step": 9664 }, { "epoch": 1.36, "learning_rate": 4.773886393411941e-05, "loss": 0.0999, "step": 9666 }, { "epoch": 1.36, "learning_rate": 4.7738396032191656e-05, "loss": 0.0581, "step": 9668 }, { "epoch": 1.36, "learning_rate": 4.7737928130263895e-05, "loss": 0.0877, "step": 9670 }, { "epoch": 1.36, "learning_rate": 4.773746022833615e-05, "loss": 0.0776, "step": 9672 }, { "epoch": 1.36, "learning_rate": 4.773699232640839e-05, "loss": 0.0849, "step": 9674 }, { "epoch": 1.36, "learning_rate": 4.773652442448063e-05, "loss": 0.0882, "step": 9676 }, { "epoch": 1.36, "learning_rate": 4.773605652255287e-05, "loss": 0.074, "step": 9678 }, { "epoch": 1.36, "learning_rate": 4.7735588620625125e-05, "loss": 0.0653, "step": 9680 }, { "epoch": 1.36, "learning_rate": 4.7735120718697364e-05, "loss": 0.0706, "step": 9682 }, { "epoch": 1.36, "learning_rate": 4.773465281676961e-05, "loss": 0.0886, "step": 9684 }, { "epoch": 1.36, "learning_rate": 4.773418491484185e-05, "loss": 0.0674, "step": 9686 }, { "epoch": 1.36, "learning_rate": 4.7733717012914095e-05, "loss": 0.0827, "step": 9688 }, { "epoch": 1.36, "learning_rate": 4.773324911098634e-05, "loss": 0.0712, "step": 9690 }, { "epoch": 1.36, "learning_rate": 4.7732781209058586e-05, "loss": 0.064, "step": 9692 }, { "epoch": 1.36, "learning_rate": 4.7732313307130826e-05, "loss": 0.0863, "step": 9694 }, { "epoch": 1.36, "learning_rate": 4.773184540520307e-05, "loss": 0.0711, "step": 9696 }, { "epoch": 1.36, "learning_rate": 4.773137750327532e-05, "loss": 0.0988, "step": 9698 }, { "epoch": 1.36, "learning_rate": 4.773090960134756e-05, "loss": 0.0715, "step": 9700 }, { "epoch": 1.36, "learning_rate": 4.77304416994198e-05, "loss": 0.069, "step": 9702 }, { "epoch": 1.36, "learning_rate": 4.772997379749205e-05, "loss": 0.0843, "step": 9704 }, { "epoch": 1.36, "learning_rate": 4.7729505895564294e-05, "loss": 0.0505, "step": 9706 }, { "epoch": 1.36, "learning_rate": 4.772903799363654e-05, "loss": 0.0832, "step": 9708 }, { "epoch": 1.36, "learning_rate": 4.772857009170878e-05, "loss": 0.0674, "step": 9710 }, { "epoch": 1.36, "learning_rate": 4.7728102189781025e-05, "loss": 0.0918, "step": 9712 }, { "epoch": 1.36, "learning_rate": 4.772763428785327e-05, "loss": 0.062, "step": 9714 }, { "epoch": 1.36, "learning_rate": 4.772716638592552e-05, "loss": 0.0787, "step": 9716 }, { "epoch": 1.36, "learning_rate": 4.7726698483997756e-05, "loss": 0.1003, "step": 9718 }, { "epoch": 1.36, "learning_rate": 4.772623058207e-05, "loss": 0.0892, "step": 9720 }, { "epoch": 1.36, "learning_rate": 4.772576268014224e-05, "loss": 0.0901, "step": 9722 }, { "epoch": 1.36, "learning_rate": 4.7725294778214494e-05, "loss": 0.0626, "step": 9724 }, { "epoch": 1.37, "learning_rate": 4.772482687628673e-05, "loss": 0.0543, "step": 9726 }, { "epoch": 1.37, "learning_rate": 4.772435897435898e-05, "loss": 0.0589, "step": 9728 }, { "epoch": 1.37, "learning_rate": 4.772389107243122e-05, "loss": 0.0743, "step": 9730 }, { "epoch": 1.37, "learning_rate": 4.7723423170503464e-05, "loss": 0.0698, "step": 9732 }, { "epoch": 1.37, "learning_rate": 4.772295526857571e-05, "loss": 0.0566, "step": 9734 }, { "epoch": 1.37, "learning_rate": 4.7722487366647956e-05, "loss": 0.0756, "step": 9736 }, { "epoch": 1.37, "learning_rate": 4.7722019464720195e-05, "loss": 0.0818, "step": 9738 }, { "epoch": 1.37, "learning_rate": 4.772155156279244e-05, "loss": 0.08, "step": 9740 }, { "epoch": 1.37, "learning_rate": 4.772108366086469e-05, "loss": 0.0814, "step": 9742 }, { "epoch": 1.37, "learning_rate": 4.772061575893693e-05, "loss": 0.0714, "step": 9744 }, { "epoch": 1.37, "learning_rate": 4.772014785700917e-05, "loss": 0.0875, "step": 9746 }, { "epoch": 1.37, "learning_rate": 4.771967995508142e-05, "loss": 0.0761, "step": 9748 }, { "epoch": 1.37, "learning_rate": 4.7719212053153664e-05, "loss": 0.104, "step": 9750 }, { "epoch": 1.37, "learning_rate": 4.77187441512259e-05, "loss": 0.0747, "step": 9752 }, { "epoch": 1.37, "learning_rate": 4.771827624929815e-05, "loss": 0.0648, "step": 9754 }, { "epoch": 1.37, "learning_rate": 4.771780834737039e-05, "loss": 0.0821, "step": 9756 }, { "epoch": 1.37, "learning_rate": 4.771734044544264e-05, "loss": 0.075, "step": 9758 }, { "epoch": 1.37, "learning_rate": 4.771687254351488e-05, "loss": 0.0703, "step": 9760 }, { "epoch": 1.37, "learning_rate": 4.7716404641587126e-05, "loss": 0.0719, "step": 9762 }, { "epoch": 1.37, "learning_rate": 4.7715936739659365e-05, "loss": 0.0866, "step": 9764 }, { "epoch": 1.37, "learning_rate": 4.771546883773161e-05, "loss": 0.0786, "step": 9766 }, { "epoch": 1.37, "learning_rate": 4.7715000935803857e-05, "loss": 0.0608, "step": 9768 }, { "epoch": 1.37, "learning_rate": 4.77145330338761e-05, "loss": 0.0894, "step": 9770 }, { "epoch": 1.37, "learning_rate": 4.771406513194834e-05, "loss": 0.0729, "step": 9772 }, { "epoch": 1.37, "learning_rate": 4.771359723002059e-05, "loss": 0.0702, "step": 9774 }, { "epoch": 1.37, "learning_rate": 4.7713129328092833e-05, "loss": 0.0739, "step": 9776 }, { "epoch": 1.37, "learning_rate": 4.771266142616508e-05, "loss": 0.0684, "step": 9778 }, { "epoch": 1.37, "learning_rate": 4.771219352423732e-05, "loss": 0.0607, "step": 9780 }, { "epoch": 1.37, "learning_rate": 4.7711725622309564e-05, "loss": 0.0645, "step": 9782 }, { "epoch": 1.37, "learning_rate": 4.771125772038181e-05, "loss": 0.0874, "step": 9784 }, { "epoch": 1.37, "learning_rate": 4.7710789818454056e-05, "loss": 0.0736, "step": 9786 }, { "epoch": 1.37, "learning_rate": 4.7710321916526295e-05, "loss": 0.0786, "step": 9788 }, { "epoch": 1.37, "learning_rate": 4.770985401459854e-05, "loss": 0.0883, "step": 9790 }, { "epoch": 1.37, "learning_rate": 4.770938611267079e-05, "loss": 0.066, "step": 9792 }, { "epoch": 1.37, "learning_rate": 4.770891821074303e-05, "loss": 0.0771, "step": 9794 }, { "epoch": 1.38, "learning_rate": 4.770845030881527e-05, "loss": 0.0729, "step": 9796 }, { "epoch": 1.38, "learning_rate": 4.770798240688752e-05, "loss": 0.0639, "step": 9798 }, { "epoch": 1.38, "learning_rate": 4.770751450495976e-05, "loss": 0.0947, "step": 9800 }, { "epoch": 1.38, "learning_rate": 4.770704660303201e-05, "loss": 0.0579, "step": 9802 }, { "epoch": 1.38, "learning_rate": 4.770657870110425e-05, "loss": 0.0829, "step": 9804 }, { "epoch": 1.38, "learning_rate": 4.7706110799176495e-05, "loss": 0.0607, "step": 9806 }, { "epoch": 1.38, "learning_rate": 4.7705642897248734e-05, "loss": 0.086, "step": 9808 }, { "epoch": 1.38, "learning_rate": 4.770517499532099e-05, "loss": 0.0956, "step": 9810 }, { "epoch": 1.38, "learning_rate": 4.7704707093393226e-05, "loss": 0.0797, "step": 9812 }, { "epoch": 1.38, "learning_rate": 4.770423919146547e-05, "loss": 0.0675, "step": 9814 }, { "epoch": 1.38, "learning_rate": 4.770377128953771e-05, "loss": 0.0789, "step": 9816 }, { "epoch": 1.38, "learning_rate": 4.770330338760996e-05, "loss": 0.0852, "step": 9818 }, { "epoch": 1.38, "learning_rate": 4.77028354856822e-05, "loss": 0.0734, "step": 9820 }, { "epoch": 1.38, "learning_rate": 4.770236758375445e-05, "loss": 0.0697, "step": 9822 }, { "epoch": 1.38, "learning_rate": 4.770189968182669e-05, "loss": 0.0735, "step": 9824 }, { "epoch": 1.38, "learning_rate": 4.7701431779898934e-05, "loss": 0.075, "step": 9826 }, { "epoch": 1.38, "learning_rate": 4.770096387797118e-05, "loss": 0.0738, "step": 9828 }, { "epoch": 1.38, "learning_rate": 4.7700495976043426e-05, "loss": 0.0763, "step": 9830 }, { "epoch": 1.38, "learning_rate": 4.7700028074115665e-05, "loss": 0.0636, "step": 9832 }, { "epoch": 1.38, "learning_rate": 4.769956017218791e-05, "loss": 0.0753, "step": 9834 }, { "epoch": 1.38, "learning_rate": 4.7699092270260157e-05, "loss": 0.0515, "step": 9836 }, { "epoch": 1.38, "learning_rate": 4.76986243683324e-05, "loss": 0.0864, "step": 9838 }, { "epoch": 1.38, "learning_rate": 4.769815646640464e-05, "loss": 0.082, "step": 9840 }, { "epoch": 1.38, "learning_rate": 4.769768856447689e-05, "loss": 0.103, "step": 9842 }, { "epoch": 1.38, "learning_rate": 4.7697220662549133e-05, "loss": 0.0795, "step": 9844 }, { "epoch": 1.38, "learning_rate": 4.769675276062138e-05, "loss": 0.0762, "step": 9846 }, { "epoch": 1.38, "learning_rate": 4.769628485869362e-05, "loss": 0.0741, "step": 9848 }, { "epoch": 1.38, "learning_rate": 4.7695816956765864e-05, "loss": 0.0859, "step": 9850 }, { "epoch": 1.38, "learning_rate": 4.7695349054838104e-05, "loss": 0.0911, "step": 9852 }, { "epoch": 1.38, "learning_rate": 4.7694881152910356e-05, "loss": 0.0538, "step": 9854 }, { "epoch": 1.38, "learning_rate": 4.7694413250982595e-05, "loss": 0.0864, "step": 9856 }, { "epoch": 1.38, "learning_rate": 4.769394534905484e-05, "loss": 0.0714, "step": 9858 }, { "epoch": 1.38, "learning_rate": 4.769347744712708e-05, "loss": 0.076, "step": 9860 }, { "epoch": 1.38, "learning_rate": 4.769300954519933e-05, "loss": 0.0867, "step": 9862 }, { "epoch": 1.38, "learning_rate": 4.769254164327157e-05, "loss": 0.0533, "step": 9864 }, { "epoch": 1.38, "learning_rate": 4.769207374134382e-05, "loss": 0.0657, "step": 9866 }, { "epoch": 1.39, "learning_rate": 4.769160583941606e-05, "loss": 0.0729, "step": 9868 }, { "epoch": 1.39, "learning_rate": 4.76911379374883e-05, "loss": 0.0832, "step": 9870 }, { "epoch": 1.39, "learning_rate": 4.769067003556055e-05, "loss": 0.0678, "step": 9872 }, { "epoch": 1.39, "learning_rate": 4.7690202133632795e-05, "loss": 0.0666, "step": 9874 }, { "epoch": 1.39, "learning_rate": 4.7689734231705034e-05, "loss": 0.0718, "step": 9876 }, { "epoch": 1.39, "learning_rate": 4.768926632977728e-05, "loss": 0.112, "step": 9878 }, { "epoch": 1.39, "learning_rate": 4.7688798427849526e-05, "loss": 0.0844, "step": 9880 }, { "epoch": 1.39, "learning_rate": 4.768833052592177e-05, "loss": 0.0779, "step": 9882 }, { "epoch": 1.39, "learning_rate": 4.768786262399401e-05, "loss": 0.0621, "step": 9884 }, { "epoch": 1.39, "learning_rate": 4.768739472206626e-05, "loss": 0.0749, "step": 9886 }, { "epoch": 1.39, "learning_rate": 4.76869268201385e-05, "loss": 0.0599, "step": 9888 }, { "epoch": 1.39, "learning_rate": 4.768645891821075e-05, "loss": 0.0632, "step": 9890 }, { "epoch": 1.39, "learning_rate": 4.768599101628299e-05, "loss": 0.0714, "step": 9892 }, { "epoch": 1.39, "learning_rate": 4.7685523114355234e-05, "loss": 0.0759, "step": 9894 }, { "epoch": 1.39, "learning_rate": 4.768505521242748e-05, "loss": 0.0895, "step": 9896 }, { "epoch": 1.39, "learning_rate": 4.7684587310499726e-05, "loss": 0.0576, "step": 9898 }, { "epoch": 1.39, "learning_rate": 4.7684119408571965e-05, "loss": 0.0895, "step": 9900 }, { "epoch": 1.39, "learning_rate": 4.768365150664421e-05, "loss": 0.1024, "step": 9902 }, { "epoch": 1.39, "learning_rate": 4.768318360471645e-05, "loss": 0.0746, "step": 9904 }, { "epoch": 1.39, "learning_rate": 4.76827157027887e-05, "loss": 0.064, "step": 9906 }, { "epoch": 1.39, "learning_rate": 4.768224780086094e-05, "loss": 0.0898, "step": 9908 }, { "epoch": 1.39, "learning_rate": 4.768177989893319e-05, "loss": 0.0732, "step": 9910 }, { "epoch": 1.39, "learning_rate": 4.768131199700543e-05, "loss": 0.0689, "step": 9912 }, { "epoch": 1.39, "learning_rate": 4.768084409507767e-05, "loss": 0.0677, "step": 9914 }, { "epoch": 1.39, "learning_rate": 4.768037619314992e-05, "loss": 0.0953, "step": 9916 }, { "epoch": 1.39, "learning_rate": 4.7679908291222164e-05, "loss": 0.0761, "step": 9918 }, { "epoch": 1.39, "learning_rate": 4.7679440389294404e-05, "loss": 0.0828, "step": 9920 }, { "epoch": 1.39, "learning_rate": 4.767897248736665e-05, "loss": 0.0798, "step": 9922 }, { "epoch": 1.39, "learning_rate": 4.7678504585438895e-05, "loss": 0.0787, "step": 9924 }, { "epoch": 1.39, "learning_rate": 4.767803668351114e-05, "loss": 0.0637, "step": 9926 }, { "epoch": 1.39, "learning_rate": 4.767756878158338e-05, "loss": 0.0898, "step": 9928 }, { "epoch": 1.39, "learning_rate": 4.7677100879655626e-05, "loss": 0.0719, "step": 9930 }, { "epoch": 1.39, "learning_rate": 4.767663297772787e-05, "loss": 0.0693, "step": 9932 }, { "epoch": 1.39, "learning_rate": 4.767616507580012e-05, "loss": 0.0773, "step": 9934 }, { "epoch": 1.39, "learning_rate": 4.767569717387236e-05, "loss": 0.0691, "step": 9936 }, { "epoch": 1.4, "learning_rate": 4.76752292719446e-05, "loss": 0.0797, "step": 9938 }, { "epoch": 1.4, "learning_rate": 4.767476137001685e-05, "loss": 0.0809, "step": 9940 }, { "epoch": 1.4, "learning_rate": 4.7674293468089095e-05, "loss": 0.0595, "step": 9942 }, { "epoch": 1.4, "learning_rate": 4.7673825566161334e-05, "loss": 0.0704, "step": 9944 }, { "epoch": 1.4, "learning_rate": 4.767335766423358e-05, "loss": 0.0709, "step": 9946 }, { "epoch": 1.4, "learning_rate": 4.767288976230582e-05, "loss": 0.0863, "step": 9948 }, { "epoch": 1.4, "learning_rate": 4.767242186037807e-05, "loss": 0.0692, "step": 9950 }, { "epoch": 1.4, "learning_rate": 4.767195395845031e-05, "loss": 0.0904, "step": 9952 }, { "epoch": 1.4, "learning_rate": 4.767148605652256e-05, "loss": 0.0807, "step": 9954 }, { "epoch": 1.4, "learning_rate": 4.7671018154594796e-05, "loss": 0.0644, "step": 9956 }, { "epoch": 1.4, "learning_rate": 4.767055025266705e-05, "loss": 0.0687, "step": 9958 }, { "epoch": 1.4, "learning_rate": 4.767008235073929e-05, "loss": 0.0609, "step": 9960 }, { "epoch": 1.4, "learning_rate": 4.7669614448811534e-05, "loss": 0.0771, "step": 9962 }, { "epoch": 1.4, "learning_rate": 4.766914654688377e-05, "loss": 0.0836, "step": 9964 }, { "epoch": 1.4, "learning_rate": 4.766867864495602e-05, "loss": 0.08, "step": 9966 }, { "epoch": 1.4, "learning_rate": 4.7668210743028265e-05, "loss": 0.0753, "step": 9968 }, { "epoch": 1.4, "learning_rate": 4.766774284110051e-05, "loss": 0.0781, "step": 9970 }, { "epoch": 1.4, "learning_rate": 4.766727493917275e-05, "loss": 0.0704, "step": 9972 }, { "epoch": 1.4, "learning_rate": 4.7666807037244996e-05, "loss": 0.0707, "step": 9974 }, { "epoch": 1.4, "learning_rate": 4.766633913531724e-05, "loss": 0.0661, "step": 9976 }, { "epoch": 1.4, "learning_rate": 4.766587123338949e-05, "loss": 0.0683, "step": 9978 }, { "epoch": 1.4, "learning_rate": 4.766540333146173e-05, "loss": 0.0688, "step": 9980 }, { "epoch": 1.4, "learning_rate": 4.766493542953397e-05, "loss": 0.0902, "step": 9982 }, { "epoch": 1.4, "learning_rate": 4.766446752760622e-05, "loss": 0.0783, "step": 9984 }, { "epoch": 1.4, "learning_rate": 4.7663999625678464e-05, "loss": 0.0778, "step": 9986 }, { "epoch": 1.4, "learning_rate": 4.7663531723750704e-05, "loss": 0.0921, "step": 9988 }, { "epoch": 1.4, "learning_rate": 4.766306382182295e-05, "loss": 0.066, "step": 9990 }, { "epoch": 1.4, "learning_rate": 4.7662595919895195e-05, "loss": 0.0598, "step": 9992 }, { "epoch": 1.4, "learning_rate": 4.766212801796744e-05, "loss": 0.0784, "step": 9994 }, { "epoch": 1.4, "learning_rate": 4.766166011603968e-05, "loss": 0.0627, "step": 9996 }, { "epoch": 1.4, "learning_rate": 4.7661192214111926e-05, "loss": 0.0974, "step": 9998 }, { "epoch": 1.4, "learning_rate": 4.7660724312184165e-05, "loss": 0.09, "step": 10000 }, { "epoch": 1.4, "eval_gen_len": 28.5517, "eval_loss": 1.054301142692566, "eval_meteor": 0.0451, "eval_runtime": 13.332, "eval_samples_per_second": 4.35, "eval_steps_per_second": 0.6, "step": 10000 }, { "epoch": 1.4, "learning_rate": 4.766025641025642e-05, "loss": 0.0818, "step": 10002 }, { "epoch": 1.4, "learning_rate": 4.765978850832866e-05, "loss": 0.0792, "step": 10004 }, { "epoch": 1.4, "learning_rate": 4.7659320606400896e-05, "loss": 0.0772, "step": 10006 }, { "epoch": 1.4, "learning_rate": 4.765885270447314e-05, "loss": 0.0575, "step": 10008 }, { "epoch": 1.41, "learning_rate": 4.765838480254539e-05, "loss": 0.0836, "step": 10010 }, { "epoch": 1.41, "learning_rate": 4.7657916900617634e-05, "loss": 0.0748, "step": 10012 }, { "epoch": 1.41, "learning_rate": 4.765744899868987e-05, "loss": 0.0925, "step": 10014 }, { "epoch": 1.41, "learning_rate": 4.765698109676212e-05, "loss": 0.0833, "step": 10016 }, { "epoch": 1.41, "learning_rate": 4.7656513194834365e-05, "loss": 0.0727, "step": 10018 }, { "epoch": 1.41, "learning_rate": 4.765604529290661e-05, "loss": 0.0711, "step": 10020 }, { "epoch": 1.41, "learning_rate": 4.765557739097885e-05, "loss": 0.075, "step": 10022 }, { "epoch": 1.41, "learning_rate": 4.7655109489051096e-05, "loss": 0.0952, "step": 10024 }, { "epoch": 1.41, "learning_rate": 4.765464158712334e-05, "loss": 0.0643, "step": 10026 }, { "epoch": 1.41, "learning_rate": 4.765417368519559e-05, "loss": 0.0896, "step": 10028 }, { "epoch": 1.41, "learning_rate": 4.765370578326783e-05, "loss": 0.0653, "step": 10030 }, { "epoch": 1.41, "learning_rate": 4.765323788134007e-05, "loss": 0.0816, "step": 10032 }, { "epoch": 1.41, "learning_rate": 4.765276997941231e-05, "loss": 0.0766, "step": 10034 }, { "epoch": 1.41, "learning_rate": 4.7652302077484565e-05, "loss": 0.0978, "step": 10036 }, { "epoch": 1.41, "learning_rate": 4.7651834175556804e-05, "loss": 0.0816, "step": 10038 }, { "epoch": 1.41, "learning_rate": 4.765136627362905e-05, "loss": 0.0778, "step": 10040 }, { "epoch": 1.41, "learning_rate": 4.765089837170129e-05, "loss": 0.0833, "step": 10042 }, { "epoch": 1.41, "learning_rate": 4.7650430469773535e-05, "loss": 0.0851, "step": 10044 }, { "epoch": 1.41, "learning_rate": 4.764996256784578e-05, "loss": 0.063, "step": 10046 }, { "epoch": 1.41, "learning_rate": 4.764949466591803e-05, "loss": 0.0694, "step": 10048 }, { "epoch": 1.41, "learning_rate": 4.7649026763990266e-05, "loss": 0.0671, "step": 10050 }, { "epoch": 1.41, "learning_rate": 4.764855886206251e-05, "loss": 0.0687, "step": 10052 }, { "epoch": 1.41, "learning_rate": 4.764809096013476e-05, "loss": 0.0728, "step": 10054 }, { "epoch": 1.41, "learning_rate": 4.7647623058207004e-05, "loss": 0.0609, "step": 10056 }, { "epoch": 1.41, "learning_rate": 4.764715515627924e-05, "loss": 0.0868, "step": 10058 }, { "epoch": 1.41, "learning_rate": 4.764668725435149e-05, "loss": 0.0629, "step": 10060 }, { "epoch": 1.41, "learning_rate": 4.7646219352423735e-05, "loss": 0.0795, "step": 10062 }, { "epoch": 1.41, "learning_rate": 4.764575145049598e-05, "loss": 0.0821, "step": 10064 }, { "epoch": 1.41, "learning_rate": 4.764528354856822e-05, "loss": 0.0911, "step": 10066 }, { "epoch": 1.41, "learning_rate": 4.7644815646640465e-05, "loss": 0.0839, "step": 10068 }, { "epoch": 1.41, "learning_rate": 4.764434774471271e-05, "loss": 0.0989, "step": 10070 }, { "epoch": 1.41, "learning_rate": 4.764387984278496e-05, "loss": 0.0642, "step": 10072 }, { "epoch": 1.41, "learning_rate": 4.7643411940857196e-05, "loss": 0.074, "step": 10074 }, { "epoch": 1.41, "learning_rate": 4.764294403892944e-05, "loss": 0.0802, "step": 10076 }, { "epoch": 1.41, "learning_rate": 4.764247613700168e-05, "loss": 0.066, "step": 10078 }, { "epoch": 1.41, "learning_rate": 4.7642008235073934e-05, "loss": 0.063, "step": 10080 }, { "epoch": 1.42, "learning_rate": 4.764154033314617e-05, "loss": 0.0642, "step": 10082 }, { "epoch": 1.42, "learning_rate": 4.764107243121842e-05, "loss": 0.0953, "step": 10084 }, { "epoch": 1.42, "learning_rate": 4.764060452929066e-05, "loss": 0.0785, "step": 10086 }, { "epoch": 1.42, "learning_rate": 4.764013662736291e-05, "loss": 0.0715, "step": 10088 }, { "epoch": 1.42, "learning_rate": 4.763966872543515e-05, "loss": 0.0878, "step": 10090 }, { "epoch": 1.42, "learning_rate": 4.7639200823507396e-05, "loss": 0.0906, "step": 10092 }, { "epoch": 1.42, "learning_rate": 4.7638732921579635e-05, "loss": 0.0669, "step": 10094 }, { "epoch": 1.42, "learning_rate": 4.763826501965188e-05, "loss": 0.0847, "step": 10096 }, { "epoch": 1.42, "learning_rate": 4.763779711772413e-05, "loss": 0.1002, "step": 10098 }, { "epoch": 1.42, "learning_rate": 4.763732921579637e-05, "loss": 0.0795, "step": 10100 }, { "epoch": 1.42, "learning_rate": 4.763686131386861e-05, "loss": 0.0743, "step": 10102 }, { "epoch": 1.42, "learning_rate": 4.763639341194086e-05, "loss": 0.0795, "step": 10104 }, { "epoch": 1.42, "learning_rate": 4.7635925510013104e-05, "loss": 0.0786, "step": 10106 }, { "epoch": 1.42, "learning_rate": 4.763545760808535e-05, "loss": 0.0795, "step": 10108 }, { "epoch": 1.42, "learning_rate": 4.763498970615759e-05, "loss": 0.0643, "step": 10110 }, { "epoch": 1.42, "learning_rate": 4.7634521804229835e-05, "loss": 0.0751, "step": 10112 }, { "epoch": 1.42, "learning_rate": 4.763405390230208e-05, "loss": 0.0759, "step": 10114 }, { "epoch": 1.42, "learning_rate": 4.763358600037433e-05, "loss": 0.0764, "step": 10116 }, { "epoch": 1.42, "learning_rate": 4.7633118098446566e-05, "loss": 0.0866, "step": 10118 }, { "epoch": 1.42, "learning_rate": 4.763265019651881e-05, "loss": 0.0631, "step": 10120 }, { "epoch": 1.42, "learning_rate": 4.763218229459106e-05, "loss": 0.0834, "step": 10122 }, { "epoch": 1.42, "learning_rate": 4.7631714392663304e-05, "loss": 0.0667, "step": 10124 }, { "epoch": 1.42, "learning_rate": 4.763124649073554e-05, "loss": 0.0964, "step": 10126 }, { "epoch": 1.42, "learning_rate": 4.763077858880779e-05, "loss": 0.0615, "step": 10128 }, { "epoch": 1.42, "learning_rate": 4.763031068688003e-05, "loss": 0.0611, "step": 10130 }, { "epoch": 1.42, "learning_rate": 4.762984278495228e-05, "loss": 0.0695, "step": 10132 }, { "epoch": 1.42, "learning_rate": 4.762937488302452e-05, "loss": 0.0709, "step": 10134 }, { "epoch": 1.42, "learning_rate": 4.7628906981096766e-05, "loss": 0.0801, "step": 10136 }, { "epoch": 1.42, "learning_rate": 4.7628439079169005e-05, "loss": 0.0692, "step": 10138 }, { "epoch": 1.42, "learning_rate": 4.762797117724126e-05, "loss": 0.0708, "step": 10140 }, { "epoch": 1.42, "learning_rate": 4.7627503275313496e-05, "loss": 0.0976, "step": 10142 }, { "epoch": 1.42, "learning_rate": 4.762703537338574e-05, "loss": 0.0699, "step": 10144 }, { "epoch": 1.42, "learning_rate": 4.762656747145798e-05, "loss": 0.0636, "step": 10146 }, { "epoch": 1.42, "learning_rate": 4.762609956953023e-05, "loss": 0.0588, "step": 10148 }, { "epoch": 1.42, "learning_rate": 4.762563166760247e-05, "loss": 0.059, "step": 10150 }, { "epoch": 1.43, "learning_rate": 4.762516376567472e-05, "loss": 0.0797, "step": 10152 }, { "epoch": 1.43, "learning_rate": 4.762469586374696e-05, "loss": 0.0753, "step": 10154 }, { "epoch": 1.43, "learning_rate": 4.7624227961819204e-05, "loss": 0.0878, "step": 10156 }, { "epoch": 1.43, "learning_rate": 4.762376005989145e-05, "loss": 0.0826, "step": 10158 }, { "epoch": 1.43, "learning_rate": 4.7623292157963696e-05, "loss": 0.0902, "step": 10160 }, { "epoch": 1.43, "learning_rate": 4.7622824256035935e-05, "loss": 0.06, "step": 10162 }, { "epoch": 1.43, "learning_rate": 4.762235635410818e-05, "loss": 0.0776, "step": 10164 }, { "epoch": 1.43, "learning_rate": 4.762188845218043e-05, "loss": 0.0643, "step": 10166 }, { "epoch": 1.43, "learning_rate": 4.762142055025267e-05, "loss": 0.0829, "step": 10168 }, { "epoch": 1.43, "learning_rate": 4.762095264832491e-05, "loss": 0.0756, "step": 10170 }, { "epoch": 1.43, "learning_rate": 4.762048474639716e-05, "loss": 0.0951, "step": 10172 }, { "epoch": 1.43, "learning_rate": 4.7620016844469404e-05, "loss": 0.076, "step": 10174 }, { "epoch": 1.43, "learning_rate": 4.761954894254165e-05, "loss": 0.0738, "step": 10176 }, { "epoch": 1.43, "learning_rate": 4.761908104061389e-05, "loss": 0.0772, "step": 10178 }, { "epoch": 1.43, "learning_rate": 4.7618613138686135e-05, "loss": 0.0677, "step": 10180 }, { "epoch": 1.43, "learning_rate": 4.7618145236758374e-05, "loss": 0.0833, "step": 10182 }, { "epoch": 1.43, "learning_rate": 4.761767733483063e-05, "loss": 0.0759, "step": 10184 }, { "epoch": 1.43, "learning_rate": 4.7617209432902866e-05, "loss": 0.0751, "step": 10186 }, { "epoch": 1.43, "learning_rate": 4.761674153097511e-05, "loss": 0.0665, "step": 10188 }, { "epoch": 1.43, "learning_rate": 4.761627362904735e-05, "loss": 0.0852, "step": 10190 }, { "epoch": 1.43, "learning_rate": 4.76158057271196e-05, "loss": 0.0953, "step": 10192 }, { "epoch": 1.43, "learning_rate": 4.761533782519184e-05, "loss": 0.0823, "step": 10194 }, { "epoch": 1.43, "learning_rate": 4.761486992326409e-05, "loss": 0.0812, "step": 10196 }, { "epoch": 1.43, "learning_rate": 4.761440202133633e-05, "loss": 0.0773, "step": 10198 }, { "epoch": 1.43, "learning_rate": 4.7613934119408574e-05, "loss": 0.0887, "step": 10200 }, { "epoch": 1.43, "learning_rate": 4.761346621748082e-05, "loss": 0.0602, "step": 10202 }, { "epoch": 1.43, "learning_rate": 4.7612998315553066e-05, "loss": 0.0776, "step": 10204 }, { "epoch": 1.43, "learning_rate": 4.7612530413625305e-05, "loss": 0.1012, "step": 10206 }, { "epoch": 1.43, "learning_rate": 4.761206251169755e-05, "loss": 0.098, "step": 10208 }, { "epoch": 1.43, "learning_rate": 4.7611594609769796e-05, "loss": 0.0564, "step": 10210 }, { "epoch": 1.43, "learning_rate": 4.761112670784204e-05, "loss": 0.115, "step": 10212 }, { "epoch": 1.43, "learning_rate": 4.761065880591428e-05, "loss": 0.0707, "step": 10214 }, { "epoch": 1.43, "learning_rate": 4.761019090398653e-05, "loss": 0.0586, "step": 10216 }, { "epoch": 1.43, "learning_rate": 4.760972300205877e-05, "loss": 0.0837, "step": 10218 }, { "epoch": 1.43, "learning_rate": 4.760925510013102e-05, "loss": 0.057, "step": 10220 }, { "epoch": 1.43, "learning_rate": 4.760878719820326e-05, "loss": 0.0619, "step": 10222 }, { "epoch": 1.44, "learning_rate": 4.7608319296275504e-05, "loss": 0.0778, "step": 10224 }, { "epoch": 1.44, "learning_rate": 4.7607851394347743e-05, "loss": 0.0855, "step": 10226 }, { "epoch": 1.44, "learning_rate": 4.7607383492419996e-05, "loss": 0.0645, "step": 10228 }, { "epoch": 1.44, "learning_rate": 4.7606915590492235e-05, "loss": 0.0837, "step": 10230 }, { "epoch": 1.44, "learning_rate": 4.760644768856448e-05, "loss": 0.0745, "step": 10232 }, { "epoch": 1.44, "learning_rate": 4.760597978663672e-05, "loss": 0.0639, "step": 10234 }, { "epoch": 1.44, "learning_rate": 4.760551188470897e-05, "loss": 0.0886, "step": 10236 }, { "epoch": 1.44, "learning_rate": 4.760504398278121e-05, "loss": 0.0946, "step": 10238 }, { "epoch": 1.44, "learning_rate": 4.760457608085346e-05, "loss": 0.0819, "step": 10240 }, { "epoch": 1.44, "learning_rate": 4.76041081789257e-05, "loss": 0.0925, "step": 10242 }, { "epoch": 1.44, "learning_rate": 4.760364027699794e-05, "loss": 0.0561, "step": 10244 }, { "epoch": 1.44, "learning_rate": 4.760317237507019e-05, "loss": 0.0709, "step": 10246 }, { "epoch": 1.44, "learning_rate": 4.7602704473142435e-05, "loss": 0.064, "step": 10248 }, { "epoch": 1.44, "learning_rate": 4.7602236571214674e-05, "loss": 0.047, "step": 10250 }, { "epoch": 1.44, "learning_rate": 4.760176866928692e-05, "loss": 0.0794, "step": 10252 }, { "epoch": 1.44, "learning_rate": 4.7601300767359166e-05, "loss": 0.0786, "step": 10254 }, { "epoch": 1.44, "learning_rate": 4.760083286543141e-05, "loss": 0.0785, "step": 10256 }, { "epoch": 1.44, "learning_rate": 4.760036496350365e-05, "loss": 0.0695, "step": 10258 }, { "epoch": 1.44, "learning_rate": 4.759989706157589e-05, "loss": 0.0832, "step": 10260 }, { "epoch": 1.44, "learning_rate": 4.759942915964814e-05, "loss": 0.0776, "step": 10262 }, { "epoch": 1.44, "learning_rate": 4.759896125772038e-05, "loss": 0.0844, "step": 10264 }, { "epoch": 1.44, "learning_rate": 4.759849335579263e-05, "loss": 0.0635, "step": 10266 }, { "epoch": 1.44, "learning_rate": 4.759802545386487e-05, "loss": 0.0904, "step": 10268 }, { "epoch": 1.44, "learning_rate": 4.759755755193712e-05, "loss": 0.0815, "step": 10270 }, { "epoch": 1.44, "learning_rate": 4.759708965000936e-05, "loss": 0.0622, "step": 10272 }, { "epoch": 1.44, "learning_rate": 4.7596621748081605e-05, "loss": 0.0852, "step": 10274 }, { "epoch": 1.44, "learning_rate": 4.7596153846153844e-05, "loss": 0.0782, "step": 10276 }, { "epoch": 1.44, "learning_rate": 4.759568594422609e-05, "loss": 0.0733, "step": 10278 }, { "epoch": 1.44, "learning_rate": 4.7595218042298336e-05, "loss": 0.0831, "step": 10280 }, { "epoch": 1.44, "learning_rate": 4.759475014037058e-05, "loss": 0.069, "step": 10282 }, { "epoch": 1.44, "learning_rate": 4.759428223844282e-05, "loss": 0.0582, "step": 10284 }, { "epoch": 1.44, "learning_rate": 4.7593814336515067e-05, "loss": 0.0581, "step": 10286 }, { "epoch": 1.44, "learning_rate": 4.759334643458731e-05, "loss": 0.0904, "step": 10288 }, { "epoch": 1.44, "learning_rate": 4.759287853265956e-05, "loss": 0.0674, "step": 10290 }, { "epoch": 1.44, "learning_rate": 4.75924106307318e-05, "loss": 0.068, "step": 10292 }, { "epoch": 1.44, "learning_rate": 4.7591942728804043e-05, "loss": 0.0824, "step": 10294 }, { "epoch": 1.45, "learning_rate": 4.759147482687629e-05, "loss": 0.0638, "step": 10296 }, { "epoch": 1.45, "learning_rate": 4.7591006924948535e-05, "loss": 0.0851, "step": 10298 }, { "epoch": 1.45, "learning_rate": 4.7590539023020774e-05, "loss": 0.0628, "step": 10300 }, { "epoch": 1.45, "learning_rate": 4.759007112109302e-05, "loss": 0.0814, "step": 10302 }, { "epoch": 1.45, "learning_rate": 4.7589603219165266e-05, "loss": 0.0811, "step": 10304 }, { "epoch": 1.45, "learning_rate": 4.758913531723751e-05, "loss": 0.0766, "step": 10306 }, { "epoch": 1.45, "learning_rate": 4.758866741530975e-05, "loss": 0.0863, "step": 10308 }, { "epoch": 1.45, "learning_rate": 4.7588199513382e-05, "loss": 0.072, "step": 10310 }, { "epoch": 1.45, "learning_rate": 4.7587731611454236e-05, "loss": 0.065, "step": 10312 }, { "epoch": 1.45, "learning_rate": 4.758726370952649e-05, "loss": 0.0783, "step": 10314 }, { "epoch": 1.45, "learning_rate": 4.758679580759873e-05, "loss": 0.0976, "step": 10316 }, { "epoch": 1.45, "learning_rate": 4.7586327905670974e-05, "loss": 0.0812, "step": 10318 }, { "epoch": 1.45, "learning_rate": 4.758586000374321e-05, "loss": 0.0647, "step": 10320 }, { "epoch": 1.45, "learning_rate": 4.758539210181546e-05, "loss": 0.0655, "step": 10322 }, { "epoch": 1.45, "learning_rate": 4.7584924199887705e-05, "loss": 0.0794, "step": 10324 }, { "epoch": 1.45, "learning_rate": 4.758445629795995e-05, "loss": 0.0911, "step": 10326 }, { "epoch": 1.45, "learning_rate": 4.758398839603219e-05, "loss": 0.0695, "step": 10328 }, { "epoch": 1.45, "learning_rate": 4.7583520494104436e-05, "loss": 0.0937, "step": 10330 }, { "epoch": 1.45, "learning_rate": 4.758305259217668e-05, "loss": 0.0858, "step": 10332 }, { "epoch": 1.45, "learning_rate": 4.758258469024893e-05, "loss": 0.086, "step": 10334 }, { "epoch": 1.45, "learning_rate": 4.758211678832117e-05, "loss": 0.0736, "step": 10336 }, { "epoch": 1.45, "learning_rate": 4.758164888639341e-05, "loss": 0.0772, "step": 10338 }, { "epoch": 1.45, "learning_rate": 4.758118098446566e-05, "loss": 0.069, "step": 10340 }, { "epoch": 1.45, "learning_rate": 4.7580713082537905e-05, "loss": 0.0619, "step": 10342 }, { "epoch": 1.45, "learning_rate": 4.7580245180610144e-05, "loss": 0.0741, "step": 10344 }, { "epoch": 1.45, "learning_rate": 4.757977727868239e-05, "loss": 0.067, "step": 10346 }, { "epoch": 1.45, "learning_rate": 4.7579309376754636e-05, "loss": 0.1078, "step": 10348 }, { "epoch": 1.45, "learning_rate": 4.757884147482688e-05, "loss": 0.0737, "step": 10350 }, { "epoch": 1.45, "learning_rate": 4.757837357289912e-05, "loss": 0.0616, "step": 10352 }, { "epoch": 1.45, "learning_rate": 4.7577905670971367e-05, "loss": 0.0721, "step": 10354 }, { "epoch": 1.45, "learning_rate": 4.7577437769043606e-05, "loss": 0.0865, "step": 10356 }, { "epoch": 1.45, "learning_rate": 4.757696986711586e-05, "loss": 0.0837, "step": 10358 }, { "epoch": 1.45, "learning_rate": 4.75765019651881e-05, "loss": 0.0855, "step": 10360 }, { "epoch": 1.45, "learning_rate": 4.7576034063260343e-05, "loss": 0.0708, "step": 10362 }, { "epoch": 1.45, "learning_rate": 4.757556616133258e-05, "loss": 0.087, "step": 10364 }, { "epoch": 1.46, "learning_rate": 4.7575098259404835e-05, "loss": 0.0723, "step": 10366 }, { "epoch": 1.46, "learning_rate": 4.7574630357477074e-05, "loss": 0.0713, "step": 10368 }, { "epoch": 1.46, "learning_rate": 4.757416245554932e-05, "loss": 0.0699, "step": 10370 }, { "epoch": 1.46, "learning_rate": 4.757369455362156e-05, "loss": 0.0911, "step": 10372 }, { "epoch": 1.46, "learning_rate": 4.7573226651693805e-05, "loss": 0.0783, "step": 10374 }, { "epoch": 1.46, "learning_rate": 4.757275874976605e-05, "loss": 0.0519, "step": 10376 }, { "epoch": 1.46, "learning_rate": 4.75722908478383e-05, "loss": 0.0662, "step": 10378 }, { "epoch": 1.46, "learning_rate": 4.7571822945910536e-05, "loss": 0.0599, "step": 10380 }, { "epoch": 1.46, "learning_rate": 4.757135504398278e-05, "loss": 0.067, "step": 10382 }, { "epoch": 1.46, "learning_rate": 4.757088714205503e-05, "loss": 0.0655, "step": 10384 }, { "epoch": 1.46, "learning_rate": 4.7570419240127274e-05, "loss": 0.1293, "step": 10386 }, { "epoch": 1.46, "learning_rate": 4.756995133819951e-05, "loss": 0.0776, "step": 10388 }, { "epoch": 1.46, "learning_rate": 4.756948343627176e-05, "loss": 0.0816, "step": 10390 }, { "epoch": 1.46, "learning_rate": 4.7569015534344005e-05, "loss": 0.0835, "step": 10392 }, { "epoch": 1.46, "learning_rate": 4.756854763241625e-05, "loss": 0.0476, "step": 10394 }, { "epoch": 1.46, "learning_rate": 4.756807973048849e-05, "loss": 0.0549, "step": 10396 }, { "epoch": 1.46, "learning_rate": 4.7567611828560736e-05, "loss": 0.1, "step": 10398 }, { "epoch": 1.46, "learning_rate": 4.756714392663298e-05, "loss": 0.0943, "step": 10400 }, { "epoch": 1.46, "learning_rate": 4.756667602470523e-05, "loss": 0.0853, "step": 10402 }, { "epoch": 1.46, "learning_rate": 4.756620812277747e-05, "loss": 0.0702, "step": 10404 }, { "epoch": 1.46, "learning_rate": 4.756574022084971e-05, "loss": 0.0667, "step": 10406 }, { "epoch": 1.46, "learning_rate": 4.756527231892195e-05, "loss": 0.0767, "step": 10408 }, { "epoch": 1.46, "learning_rate": 4.7564804416994205e-05, "loss": 0.0769, "step": 10410 }, { "epoch": 1.46, "learning_rate": 4.7564336515066444e-05, "loss": 0.0684, "step": 10412 }, { "epoch": 1.46, "learning_rate": 4.756386861313869e-05, "loss": 0.0733, "step": 10414 }, { "epoch": 1.46, "learning_rate": 4.756340071121093e-05, "loss": 0.0682, "step": 10416 }, { "epoch": 1.46, "learning_rate": 4.756293280928318e-05, "loss": 0.0649, "step": 10418 }, { "epoch": 1.46, "learning_rate": 4.756246490735542e-05, "loss": 0.0797, "step": 10420 }, { "epoch": 1.46, "learning_rate": 4.756199700542767e-05, "loss": 0.0649, "step": 10422 }, { "epoch": 1.46, "learning_rate": 4.7561529103499906e-05, "loss": 0.0745, "step": 10424 }, { "epoch": 1.46, "learning_rate": 4.756106120157215e-05, "loss": 0.0737, "step": 10426 }, { "epoch": 1.46, "learning_rate": 4.75605932996444e-05, "loss": 0.0702, "step": 10428 }, { "epoch": 1.46, "learning_rate": 4.7560125397716643e-05, "loss": 0.0813, "step": 10430 }, { "epoch": 1.46, "learning_rate": 4.755965749578888e-05, "loss": 0.071, "step": 10432 }, { "epoch": 1.46, "learning_rate": 4.755918959386113e-05, "loss": 0.0971, "step": 10434 }, { "epoch": 1.46, "learning_rate": 4.7558721691933374e-05, "loss": 0.0881, "step": 10436 }, { "epoch": 1.47, "learning_rate": 4.755825379000562e-05, "loss": 0.0977, "step": 10438 }, { "epoch": 1.47, "learning_rate": 4.755778588807786e-05, "loss": 0.0615, "step": 10440 }, { "epoch": 1.47, "learning_rate": 4.7557317986150105e-05, "loss": 0.0871, "step": 10442 }, { "epoch": 1.47, "learning_rate": 4.755685008422235e-05, "loss": 0.094, "step": 10444 }, { "epoch": 1.47, "learning_rate": 4.75563821822946e-05, "loss": 0.0777, "step": 10446 }, { "epoch": 1.47, "learning_rate": 4.7555914280366836e-05, "loss": 0.0763, "step": 10448 }, { "epoch": 1.47, "learning_rate": 4.755544637843908e-05, "loss": 0.0814, "step": 10450 }, { "epoch": 1.47, "learning_rate": 4.755497847651133e-05, "loss": 0.1032, "step": 10452 }, { "epoch": 1.47, "learning_rate": 4.7554510574583574e-05, "loss": 0.0849, "step": 10454 }, { "epoch": 1.47, "learning_rate": 4.755404267265581e-05, "loss": 0.0768, "step": 10456 }, { "epoch": 1.47, "learning_rate": 4.755357477072806e-05, "loss": 0.08, "step": 10458 }, { "epoch": 1.47, "learning_rate": 4.75531068688003e-05, "loss": 0.0926, "step": 10460 }, { "epoch": 1.47, "learning_rate": 4.755263896687255e-05, "loss": 0.0698, "step": 10462 }, { "epoch": 1.47, "learning_rate": 4.755217106494479e-05, "loss": 0.077, "step": 10464 }, { "epoch": 1.47, "learning_rate": 4.7551703163017036e-05, "loss": 0.0722, "step": 10466 }, { "epoch": 1.47, "learning_rate": 4.7551235261089275e-05, "loss": 0.0538, "step": 10468 }, { "epoch": 1.47, "learning_rate": 4.755076735916152e-05, "loss": 0.1016, "step": 10470 }, { "epoch": 1.47, "learning_rate": 4.755029945723377e-05, "loss": 0.0763, "step": 10472 }, { "epoch": 1.47, "learning_rate": 4.754983155530601e-05, "loss": 0.0851, "step": 10474 }, { "epoch": 1.47, "learning_rate": 4.754936365337825e-05, "loss": 0.081, "step": 10476 }, { "epoch": 1.47, "learning_rate": 4.75488957514505e-05, "loss": 0.0741, "step": 10478 }, { "epoch": 1.47, "learning_rate": 4.7548427849522744e-05, "loss": 0.0784, "step": 10480 }, { "epoch": 1.47, "learning_rate": 4.754795994759499e-05, "loss": 0.0804, "step": 10482 }, { "epoch": 1.47, "learning_rate": 4.754749204566723e-05, "loss": 0.0797, "step": 10484 }, { "epoch": 1.47, "learning_rate": 4.7547024143739475e-05, "loss": 0.0931, "step": 10486 }, { "epoch": 1.47, "learning_rate": 4.754655624181172e-05, "loss": 0.0706, "step": 10488 }, { "epoch": 1.47, "learning_rate": 4.754608833988397e-05, "loss": 0.072, "step": 10490 }, { "epoch": 1.47, "learning_rate": 4.7545620437956206e-05, "loss": 0.0801, "step": 10492 }, { "epoch": 1.47, "learning_rate": 4.754515253602845e-05, "loss": 0.074, "step": 10494 }, { "epoch": 1.47, "learning_rate": 4.75446846341007e-05, "loss": 0.0835, "step": 10496 }, { "epoch": 1.47, "learning_rate": 4.7544216732172944e-05, "loss": 0.0612, "step": 10498 }, { "epoch": 1.47, "learning_rate": 4.754374883024518e-05, "loss": 0.0785, "step": 10500 }, { "epoch": 1.47, "learning_rate": 4.754328092831743e-05, "loss": 0.0584, "step": 10502 }, { "epoch": 1.47, "learning_rate": 4.754281302638967e-05, "loss": 0.0711, "step": 10504 }, { "epoch": 1.47, "learning_rate": 4.754234512446192e-05, "loss": 0.079, "step": 10506 }, { "epoch": 1.48, "learning_rate": 4.754187722253416e-05, "loss": 0.0791, "step": 10508 }, { "epoch": 1.48, "learning_rate": 4.75414093206064e-05, "loss": 0.0831, "step": 10510 }, { "epoch": 1.48, "learning_rate": 4.7540941418678645e-05, "loss": 0.0661, "step": 10512 }, { "epoch": 1.48, "learning_rate": 4.754047351675089e-05, "loss": 0.0787, "step": 10514 }, { "epoch": 1.48, "learning_rate": 4.7540005614823136e-05, "loss": 0.0951, "step": 10516 }, { "epoch": 1.48, "learning_rate": 4.7539537712895376e-05, "loss": 0.0659, "step": 10518 }, { "epoch": 1.48, "learning_rate": 4.753906981096762e-05, "loss": 0.0759, "step": 10520 }, { "epoch": 1.48, "learning_rate": 4.753860190903987e-05, "loss": 0.073, "step": 10522 }, { "epoch": 1.48, "learning_rate": 4.753813400711211e-05, "loss": 0.0576, "step": 10524 }, { "epoch": 1.48, "learning_rate": 4.753766610518435e-05, "loss": 0.0819, "step": 10526 }, { "epoch": 1.48, "learning_rate": 4.75371982032566e-05, "loss": 0.0805, "step": 10528 }, { "epoch": 1.48, "learning_rate": 4.7536730301328844e-05, "loss": 0.0822, "step": 10530 }, { "epoch": 1.48, "learning_rate": 4.753626239940109e-05, "loss": 0.0725, "step": 10532 }, { "epoch": 1.48, "learning_rate": 4.753579449747333e-05, "loss": 0.0829, "step": 10534 }, { "epoch": 1.48, "learning_rate": 4.7535326595545575e-05, "loss": 0.0851, "step": 10536 }, { "epoch": 1.48, "learning_rate": 4.7534858693617814e-05, "loss": 0.0814, "step": 10538 }, { "epoch": 1.48, "learning_rate": 4.753439079169007e-05, "loss": 0.0964, "step": 10540 }, { "epoch": 1.48, "learning_rate": 4.7533922889762306e-05, "loss": 0.0887, "step": 10542 }, { "epoch": 1.48, "learning_rate": 4.753345498783455e-05, "loss": 0.0728, "step": 10544 }, { "epoch": 1.48, "learning_rate": 4.753298708590679e-05, "loss": 0.0643, "step": 10546 }, { "epoch": 1.48, "learning_rate": 4.7532519183979044e-05, "loss": 0.0664, "step": 10548 }, { "epoch": 1.48, "learning_rate": 4.753205128205128e-05, "loss": 0.0892, "step": 10550 }, { "epoch": 1.48, "learning_rate": 4.753158338012353e-05, "loss": 0.0759, "step": 10552 }, { "epoch": 1.48, "learning_rate": 4.753111547819577e-05, "loss": 0.0717, "step": 10554 }, { "epoch": 1.48, "learning_rate": 4.7530647576268014e-05, "loss": 0.0964, "step": 10556 }, { "epoch": 1.48, "learning_rate": 4.753017967434026e-05, "loss": 0.0866, "step": 10558 }, { "epoch": 1.48, "learning_rate": 4.7529711772412506e-05, "loss": 0.076, "step": 10560 }, { "epoch": 1.48, "learning_rate": 4.7529243870484745e-05, "loss": 0.0838, "step": 10562 }, { "epoch": 1.48, "learning_rate": 4.752877596855699e-05, "loss": 0.0642, "step": 10564 }, { "epoch": 1.48, "learning_rate": 4.752830806662924e-05, "loss": 0.0777, "step": 10566 }, { "epoch": 1.48, "learning_rate": 4.752784016470148e-05, "loss": 0.0674, "step": 10568 }, { "epoch": 1.48, "learning_rate": 4.752737226277372e-05, "loss": 0.0672, "step": 10570 }, { "epoch": 1.48, "learning_rate": 4.752690436084597e-05, "loss": 0.0698, "step": 10572 }, { "epoch": 1.48, "learning_rate": 4.7526436458918214e-05, "loss": 0.0635, "step": 10574 }, { "epoch": 1.48, "learning_rate": 4.752596855699046e-05, "loss": 0.0889, "step": 10576 }, { "epoch": 1.48, "learning_rate": 4.75255006550627e-05, "loss": 0.0894, "step": 10578 }, { "epoch": 1.49, "learning_rate": 4.7525032753134945e-05, "loss": 0.0573, "step": 10580 }, { "epoch": 1.49, "learning_rate": 4.752456485120719e-05, "loss": 0.0707, "step": 10582 }, { "epoch": 1.49, "learning_rate": 4.7524096949279436e-05, "loss": 0.0701, "step": 10584 }, { "epoch": 1.49, "learning_rate": 4.7523629047351676e-05, "loss": 0.066, "step": 10586 }, { "epoch": 1.49, "learning_rate": 4.752316114542392e-05, "loss": 0.0738, "step": 10588 }, { "epoch": 1.49, "learning_rate": 4.752269324349616e-05, "loss": 0.073, "step": 10590 }, { "epoch": 1.49, "learning_rate": 4.752222534156841e-05, "loss": 0.0956, "step": 10592 }, { "epoch": 1.49, "learning_rate": 4.752175743964065e-05, "loss": 0.0718, "step": 10594 }, { "epoch": 1.49, "learning_rate": 4.75212895377129e-05, "loss": 0.0701, "step": 10596 }, { "epoch": 1.49, "learning_rate": 4.752082163578514e-05, "loss": 0.0723, "step": 10598 }, { "epoch": 1.49, "learning_rate": 4.752035373385738e-05, "loss": 0.0728, "step": 10600 }, { "epoch": 1.49, "learning_rate": 4.751988583192963e-05, "loss": 0.0656, "step": 10602 }, { "epoch": 1.49, "learning_rate": 4.7519417930001875e-05, "loss": 0.0809, "step": 10604 }, { "epoch": 1.49, "learning_rate": 4.7518950028074114e-05, "loss": 0.0736, "step": 10606 }, { "epoch": 1.49, "learning_rate": 4.751848212614636e-05, "loss": 0.0861, "step": 10608 }, { "epoch": 1.49, "learning_rate": 4.7518014224218606e-05, "loss": 0.0679, "step": 10610 }, { "epoch": 1.49, "learning_rate": 4.751754632229085e-05, "loss": 0.0859, "step": 10612 }, { "epoch": 1.49, "learning_rate": 4.751707842036309e-05, "loss": 0.0857, "step": 10614 }, { "epoch": 1.49, "learning_rate": 4.751661051843534e-05, "loss": 0.0977, "step": 10616 }, { "epoch": 1.49, "learning_rate": 4.751614261650758e-05, "loss": 0.0763, "step": 10618 }, { "epoch": 1.49, "learning_rate": 4.751567471457983e-05, "loss": 0.0703, "step": 10620 }, { "epoch": 1.49, "learning_rate": 4.751520681265207e-05, "loss": 0.0713, "step": 10622 }, { "epoch": 1.49, "learning_rate": 4.7514738910724314e-05, "loss": 0.0648, "step": 10624 }, { "epoch": 1.49, "learning_rate": 4.751427100879656e-05, "loss": 0.0707, "step": 10626 }, { "epoch": 1.49, "learning_rate": 4.7513803106868806e-05, "loss": 0.077, "step": 10628 }, { "epoch": 1.49, "learning_rate": 4.7513335204941045e-05, "loss": 0.102, "step": 10630 }, { "epoch": 1.49, "learning_rate": 4.751286730301329e-05, "loss": 0.067, "step": 10632 }, { "epoch": 1.49, "learning_rate": 4.751239940108553e-05, "loss": 0.0642, "step": 10634 }, { "epoch": 1.49, "learning_rate": 4.751193149915778e-05, "loss": 0.0691, "step": 10636 }, { "epoch": 1.49, "learning_rate": 4.751146359723002e-05, "loss": 0.0809, "step": 10638 }, { "epoch": 1.49, "learning_rate": 4.751099569530227e-05, "loss": 0.0847, "step": 10640 }, { "epoch": 1.49, "learning_rate": 4.751052779337451e-05, "loss": 0.069, "step": 10642 }, { "epoch": 1.49, "learning_rate": 4.751005989144676e-05, "loss": 0.0711, "step": 10644 }, { "epoch": 1.49, "learning_rate": 4.7509591989519e-05, "loss": 0.0763, "step": 10646 }, { "epoch": 1.49, "learning_rate": 4.7509124087591245e-05, "loss": 0.0765, "step": 10648 }, { "epoch": 1.49, "learning_rate": 4.7508656185663484e-05, "loss": 0.1077, "step": 10650 }, { "epoch": 1.5, "learning_rate": 4.750818828373573e-05, "loss": 0.0587, "step": 10652 }, { "epoch": 1.5, "learning_rate": 4.7507720381807976e-05, "loss": 0.0733, "step": 10654 }, { "epoch": 1.5, "learning_rate": 4.750725247988022e-05, "loss": 0.0625, "step": 10656 }, { "epoch": 1.5, "learning_rate": 4.750678457795246e-05, "loss": 0.0723, "step": 10658 }, { "epoch": 1.5, "learning_rate": 4.7506316676024707e-05, "loss": 0.0725, "step": 10660 }, { "epoch": 1.5, "learning_rate": 4.750584877409695e-05, "loss": 0.0743, "step": 10662 }, { "epoch": 1.5, "learning_rate": 4.75053808721692e-05, "loss": 0.0622, "step": 10664 }, { "epoch": 1.5, "learning_rate": 4.750491297024144e-05, "loss": 0.1071, "step": 10666 }, { "epoch": 1.5, "learning_rate": 4.7504445068313683e-05, "loss": 0.0714, "step": 10668 }, { "epoch": 1.5, "learning_rate": 4.750397716638593e-05, "loss": 0.05, "step": 10670 }, { "epoch": 1.5, "learning_rate": 4.7503509264458175e-05, "loss": 0.0744, "step": 10672 }, { "epoch": 1.5, "learning_rate": 4.7503041362530414e-05, "loss": 0.0863, "step": 10674 }, { "epoch": 1.5, "learning_rate": 4.750257346060266e-05, "loss": 0.1183, "step": 10676 }, { "epoch": 1.5, "learning_rate": 4.7502105558674906e-05, "loss": 0.1107, "step": 10678 }, { "epoch": 1.5, "learning_rate": 4.750163765674715e-05, "loss": 0.0695, "step": 10680 }, { "epoch": 1.5, "learning_rate": 4.750116975481939e-05, "loss": 0.0695, "step": 10682 }, { "epoch": 1.5, "learning_rate": 4.750070185289164e-05, "loss": 0.0654, "step": 10684 }, { "epoch": 1.5, "learning_rate": 4.7500233950963876e-05, "loss": 0.0953, "step": 10686 }, { "epoch": 1.5, "learning_rate": 4.749976604903613e-05, "loss": 0.0743, "step": 10688 }, { "epoch": 1.5, "learning_rate": 4.749929814710837e-05, "loss": 0.073, "step": 10690 }, { "epoch": 1.5, "learning_rate": 4.7498830245180614e-05, "loss": 0.0664, "step": 10692 }, { "epoch": 1.5, "learning_rate": 4.749836234325285e-05, "loss": 0.0779, "step": 10694 }, { "epoch": 1.5, "learning_rate": 4.7497894441325106e-05, "loss": 0.0899, "step": 10696 }, { "epoch": 1.5, "learning_rate": 4.7497426539397345e-05, "loss": 0.0873, "step": 10698 }, { "epoch": 1.5, "learning_rate": 4.749695863746959e-05, "loss": 0.0767, "step": 10700 }, { "epoch": 1.5, "learning_rate": 4.749649073554183e-05, "loss": 0.0691, "step": 10702 }, { "epoch": 1.5, "learning_rate": 4.7496022833614076e-05, "loss": 0.0822, "step": 10704 }, { "epoch": 1.5, "learning_rate": 4.749555493168632e-05, "loss": 0.0739, "step": 10706 }, { "epoch": 1.5, "learning_rate": 4.749508702975857e-05, "loss": 0.0483, "step": 10708 }, { "epoch": 1.5, "learning_rate": 4.749461912783081e-05, "loss": 0.0882, "step": 10710 }, { "epoch": 1.5, "learning_rate": 4.749415122590305e-05, "loss": 0.0757, "step": 10712 }, { "epoch": 1.5, "learning_rate": 4.74936833239753e-05, "loss": 0.0726, "step": 10714 }, { "epoch": 1.5, "learning_rate": 4.7493215422047545e-05, "loss": 0.0577, "step": 10716 }, { "epoch": 1.5, "learning_rate": 4.7492747520119784e-05, "loss": 0.0701, "step": 10718 }, { "epoch": 1.5, "learning_rate": 4.749227961819203e-05, "loss": 0.1074, "step": 10720 }, { "epoch": 1.51, "learning_rate": 4.7491811716264276e-05, "loss": 0.0624, "step": 10722 }, { "epoch": 1.51, "learning_rate": 4.749134381433652e-05, "loss": 0.0882, "step": 10724 }, { "epoch": 1.51, "learning_rate": 4.749087591240876e-05, "loss": 0.0645, "step": 10726 }, { "epoch": 1.51, "learning_rate": 4.7490408010481007e-05, "loss": 0.0711, "step": 10728 }, { "epoch": 1.51, "learning_rate": 4.748994010855325e-05, "loss": 0.0796, "step": 10730 }, { "epoch": 1.51, "learning_rate": 4.74894722066255e-05, "loss": 0.0767, "step": 10732 }, { "epoch": 1.51, "learning_rate": 4.748900430469774e-05, "loss": 0.0687, "step": 10734 }, { "epoch": 1.51, "learning_rate": 4.7488536402769983e-05, "loss": 0.084, "step": 10736 }, { "epoch": 1.51, "learning_rate": 4.748806850084222e-05, "loss": 0.067, "step": 10738 }, { "epoch": 1.51, "learning_rate": 4.7487600598914475e-05, "loss": 0.0952, "step": 10740 }, { "epoch": 1.51, "learning_rate": 4.7487132696986714e-05, "loss": 0.0873, "step": 10742 }, { "epoch": 1.51, "learning_rate": 4.748666479505896e-05, "loss": 0.0961, "step": 10744 }, { "epoch": 1.51, "learning_rate": 4.74861968931312e-05, "loss": 0.0729, "step": 10746 }, { "epoch": 1.51, "learning_rate": 4.7485728991203445e-05, "loss": 0.0772, "step": 10748 }, { "epoch": 1.51, "learning_rate": 4.748526108927569e-05, "loss": 0.0884, "step": 10750 }, { "epoch": 1.51, "learning_rate": 4.748479318734794e-05, "loss": 0.0868, "step": 10752 }, { "epoch": 1.51, "learning_rate": 4.7484325285420176e-05, "loss": 0.0791, "step": 10754 }, { "epoch": 1.51, "learning_rate": 4.748385738349242e-05, "loss": 0.0874, "step": 10756 }, { "epoch": 1.51, "learning_rate": 4.748338948156467e-05, "loss": 0.0987, "step": 10758 }, { "epoch": 1.51, "learning_rate": 4.7482921579636914e-05, "loss": 0.0765, "step": 10760 }, { "epoch": 1.51, "learning_rate": 4.748245367770915e-05, "loss": 0.0673, "step": 10762 }, { "epoch": 1.51, "learning_rate": 4.74819857757814e-05, "loss": 0.0719, "step": 10764 }, { "epoch": 1.51, "learning_rate": 4.7481517873853645e-05, "loss": 0.0771, "step": 10766 }, { "epoch": 1.51, "learning_rate": 4.7481049971925884e-05, "loss": 0.0686, "step": 10768 }, { "epoch": 1.51, "learning_rate": 4.748058206999813e-05, "loss": 0.0705, "step": 10770 }, { "epoch": 1.51, "learning_rate": 4.748011416807037e-05, "loss": 0.0792, "step": 10772 }, { "epoch": 1.51, "learning_rate": 4.747964626614262e-05, "loss": 0.072, "step": 10774 }, { "epoch": 1.51, "learning_rate": 4.747917836421486e-05, "loss": 0.06, "step": 10776 }, { "epoch": 1.51, "learning_rate": 4.747871046228711e-05, "loss": 0.1011, "step": 10778 }, { "epoch": 1.51, "learning_rate": 4.7478242560359346e-05, "loss": 0.0713, "step": 10780 }, { "epoch": 1.51, "learning_rate": 4.747777465843159e-05, "loss": 0.0661, "step": 10782 }, { "epoch": 1.51, "learning_rate": 4.747730675650384e-05, "loss": 0.0876, "step": 10784 }, { "epoch": 1.51, "learning_rate": 4.7476838854576084e-05, "loss": 0.0917, "step": 10786 }, { "epoch": 1.51, "learning_rate": 4.747637095264832e-05, "loss": 0.1038, "step": 10788 }, { "epoch": 1.51, "learning_rate": 4.747590305072057e-05, "loss": 0.0869, "step": 10790 }, { "epoch": 1.51, "learning_rate": 4.7475435148792815e-05, "loss": 0.0682, "step": 10792 }, { "epoch": 1.52, "learning_rate": 4.747496724686506e-05, "loss": 0.1075, "step": 10794 }, { "epoch": 1.52, "learning_rate": 4.74744993449373e-05, "loss": 0.0597, "step": 10796 }, { "epoch": 1.52, "learning_rate": 4.7474031443009546e-05, "loss": 0.0841, "step": 10798 }, { "epoch": 1.52, "learning_rate": 4.747356354108179e-05, "loss": 0.0933, "step": 10800 }, { "epoch": 1.52, "learning_rate": 4.747309563915404e-05, "loss": 0.0673, "step": 10802 }, { "epoch": 1.52, "learning_rate": 4.747262773722628e-05, "loss": 0.0675, "step": 10804 }, { "epoch": 1.52, "learning_rate": 4.747215983529852e-05, "loss": 0.0756, "step": 10806 }, { "epoch": 1.52, "learning_rate": 4.747169193337077e-05, "loss": 0.07, "step": 10808 }, { "epoch": 1.52, "learning_rate": 4.7471224031443014e-05, "loss": 0.0655, "step": 10810 }, { "epoch": 1.52, "learning_rate": 4.7470756129515254e-05, "loss": 0.074, "step": 10812 }, { "epoch": 1.52, "learning_rate": 4.74702882275875e-05, "loss": 0.0651, "step": 10814 }, { "epoch": 1.52, "learning_rate": 4.746982032565974e-05, "loss": 0.0483, "step": 10816 }, { "epoch": 1.52, "learning_rate": 4.746935242373199e-05, "loss": 0.0795, "step": 10818 }, { "epoch": 1.52, "learning_rate": 4.746888452180423e-05, "loss": 0.0669, "step": 10820 }, { "epoch": 1.52, "learning_rate": 4.7468416619876476e-05, "loss": 0.0668, "step": 10822 }, { "epoch": 1.52, "learning_rate": 4.7467948717948715e-05, "loss": 0.0602, "step": 10824 }, { "epoch": 1.52, "learning_rate": 4.746748081602097e-05, "loss": 0.0764, "step": 10826 }, { "epoch": 1.52, "learning_rate": 4.746701291409321e-05, "loss": 0.0816, "step": 10828 }, { "epoch": 1.52, "learning_rate": 4.746654501216545e-05, "loss": 0.1023, "step": 10830 }, { "epoch": 1.52, "learning_rate": 4.746607711023769e-05, "loss": 0.0804, "step": 10832 }, { "epoch": 1.52, "learning_rate": 4.746560920830994e-05, "loss": 0.1046, "step": 10834 }, { "epoch": 1.52, "learning_rate": 4.7465141306382184e-05, "loss": 0.0669, "step": 10836 }, { "epoch": 1.52, "learning_rate": 4.746467340445443e-05, "loss": 0.0675, "step": 10838 }, { "epoch": 1.52, "learning_rate": 4.746420550252667e-05, "loss": 0.1118, "step": 10840 }, { "epoch": 1.52, "learning_rate": 4.7463737600598915e-05, "loss": 0.0717, "step": 10842 }, { "epoch": 1.52, "learning_rate": 4.746326969867116e-05, "loss": 0.0678, "step": 10844 }, { "epoch": 1.52, "learning_rate": 4.746280179674341e-05, "loss": 0.0864, "step": 10846 }, { "epoch": 1.52, "learning_rate": 4.7462333894815646e-05, "loss": 0.0957, "step": 10848 }, { "epoch": 1.52, "learning_rate": 4.746186599288789e-05, "loss": 0.0651, "step": 10850 }, { "epoch": 1.52, "learning_rate": 4.746139809096014e-05, "loss": 0.0623, "step": 10852 }, { "epoch": 1.52, "learning_rate": 4.7460930189032384e-05, "loss": 0.0774, "step": 10854 }, { "epoch": 1.52, "learning_rate": 4.746046228710462e-05, "loss": 0.0969, "step": 10856 }, { "epoch": 1.52, "learning_rate": 4.745999438517687e-05, "loss": 0.0752, "step": 10858 }, { "epoch": 1.52, "learning_rate": 4.7459526483249115e-05, "loss": 0.0795, "step": 10860 }, { "epoch": 1.52, "learning_rate": 4.745905858132136e-05, "loss": 0.0802, "step": 10862 }, { "epoch": 1.52, "learning_rate": 4.74585906793936e-05, "loss": 0.0815, "step": 10864 }, { "epoch": 1.53, "learning_rate": 4.7458122777465846e-05, "loss": 0.0612, "step": 10866 }, { "epoch": 1.53, "learning_rate": 4.7457654875538085e-05, "loss": 0.0575, "step": 10868 }, { "epoch": 1.53, "learning_rate": 4.745718697361034e-05, "loss": 0.0899, "step": 10870 }, { "epoch": 1.53, "learning_rate": 4.745671907168258e-05, "loss": 0.082, "step": 10872 }, { "epoch": 1.53, "learning_rate": 4.745625116975482e-05, "loss": 0.084, "step": 10874 }, { "epoch": 1.53, "learning_rate": 4.745578326782706e-05, "loss": 0.0872, "step": 10876 }, { "epoch": 1.53, "learning_rate": 4.7455315365899314e-05, "loss": 0.0752, "step": 10878 }, { "epoch": 1.53, "learning_rate": 4.7454847463971554e-05, "loss": 0.0753, "step": 10880 }, { "epoch": 1.53, "learning_rate": 4.74543795620438e-05, "loss": 0.0819, "step": 10882 }, { "epoch": 1.53, "learning_rate": 4.745391166011604e-05, "loss": 0.0843, "step": 10884 }, { "epoch": 1.53, "learning_rate": 4.7453443758188284e-05, "loss": 0.098, "step": 10886 }, { "epoch": 1.53, "learning_rate": 4.745297585626053e-05, "loss": 0.0839, "step": 10888 }, { "epoch": 1.53, "learning_rate": 4.7452507954332776e-05, "loss": 0.0642, "step": 10890 }, { "epoch": 1.53, "learning_rate": 4.7452040052405015e-05, "loss": 0.0771, "step": 10892 }, { "epoch": 1.53, "learning_rate": 4.745157215047726e-05, "loss": 0.08, "step": 10894 }, { "epoch": 1.53, "learning_rate": 4.745110424854951e-05, "loss": 0.0784, "step": 10896 }, { "epoch": 1.53, "learning_rate": 4.745063634662175e-05, "loss": 0.0784, "step": 10898 }, { "epoch": 1.53, "learning_rate": 4.745016844469399e-05, "loss": 0.0807, "step": 10900 }, { "epoch": 1.53, "learning_rate": 4.744970054276624e-05, "loss": 0.0517, "step": 10902 }, { "epoch": 1.53, "learning_rate": 4.7449232640838484e-05, "loss": 0.0802, "step": 10904 }, { "epoch": 1.53, "learning_rate": 4.744876473891073e-05, "loss": 0.0746, "step": 10906 }, { "epoch": 1.53, "learning_rate": 4.744829683698297e-05, "loss": 0.0678, "step": 10908 }, { "epoch": 1.53, "learning_rate": 4.7447828935055215e-05, "loss": 0.0581, "step": 10910 }, { "epoch": 1.53, "learning_rate": 4.7447361033127454e-05, "loss": 0.0775, "step": 10912 }, { "epoch": 1.53, "learning_rate": 4.744689313119971e-05, "loss": 0.0733, "step": 10914 }, { "epoch": 1.53, "learning_rate": 4.7446425229271946e-05, "loss": 0.0898, "step": 10916 }, { "epoch": 1.53, "learning_rate": 4.744595732734419e-05, "loss": 0.1295, "step": 10918 }, { "epoch": 1.53, "learning_rate": 4.744548942541643e-05, "loss": 0.0819, "step": 10920 }, { "epoch": 1.53, "learning_rate": 4.7445021523488684e-05, "loss": 0.0753, "step": 10922 }, { "epoch": 1.53, "learning_rate": 4.744455362156092e-05, "loss": 0.0874, "step": 10924 }, { "epoch": 1.53, "learning_rate": 4.744408571963317e-05, "loss": 0.0692, "step": 10926 }, { "epoch": 1.53, "learning_rate": 4.744361781770541e-05, "loss": 0.1035, "step": 10928 }, { "epoch": 1.53, "learning_rate": 4.7443149915777654e-05, "loss": 0.0888, "step": 10930 }, { "epoch": 1.53, "learning_rate": 4.74426820138499e-05, "loss": 0.0741, "step": 10932 }, { "epoch": 1.53, "learning_rate": 4.7442214111922146e-05, "loss": 0.0764, "step": 10934 }, { "epoch": 1.54, "learning_rate": 4.7441746209994385e-05, "loss": 0.0679, "step": 10936 }, { "epoch": 1.54, "learning_rate": 4.744127830806663e-05, "loss": 0.0854, "step": 10938 }, { "epoch": 1.54, "learning_rate": 4.744081040613888e-05, "loss": 0.0817, "step": 10940 }, { "epoch": 1.54, "learning_rate": 4.744034250421112e-05, "loss": 0.0852, "step": 10942 }, { "epoch": 1.54, "learning_rate": 4.743987460228336e-05, "loss": 0.0856, "step": 10944 }, { "epoch": 1.54, "learning_rate": 4.743940670035561e-05, "loss": 0.0787, "step": 10946 }, { "epoch": 1.54, "learning_rate": 4.7438938798427854e-05, "loss": 0.0854, "step": 10948 }, { "epoch": 1.54, "learning_rate": 4.74384708965001e-05, "loss": 0.0775, "step": 10950 }, { "epoch": 1.54, "learning_rate": 4.743800299457234e-05, "loss": 0.0794, "step": 10952 }, { "epoch": 1.54, "learning_rate": 4.7437535092644585e-05, "loss": 0.0756, "step": 10954 }, { "epoch": 1.54, "learning_rate": 4.743706719071683e-05, "loss": 0.085, "step": 10956 }, { "epoch": 1.54, "learning_rate": 4.7436599288789076e-05, "loss": 0.0747, "step": 10958 }, { "epoch": 1.54, "learning_rate": 4.7436131386861315e-05, "loss": 0.0997, "step": 10960 }, { "epoch": 1.54, "learning_rate": 4.743566348493356e-05, "loss": 0.0884, "step": 10962 }, { "epoch": 1.54, "learning_rate": 4.74351955830058e-05, "loss": 0.1031, "step": 10964 }, { "epoch": 1.54, "learning_rate": 4.743472768107805e-05, "loss": 0.0583, "step": 10966 }, { "epoch": 1.54, "learning_rate": 4.743425977915029e-05, "loss": 0.0806, "step": 10968 }, { "epoch": 1.54, "learning_rate": 4.743379187722254e-05, "loss": 0.0795, "step": 10970 }, { "epoch": 1.54, "learning_rate": 4.743332397529478e-05, "loss": 0.0618, "step": 10972 }, { "epoch": 1.54, "learning_rate": 4.743285607336703e-05, "loss": 0.0768, "step": 10974 }, { "epoch": 1.54, "learning_rate": 4.743238817143927e-05, "loss": 0.0965, "step": 10976 }, { "epoch": 1.54, "learning_rate": 4.7431920269511515e-05, "loss": 0.0713, "step": 10978 }, { "epoch": 1.54, "learning_rate": 4.7431452367583754e-05, "loss": 0.072, "step": 10980 }, { "epoch": 1.54, "learning_rate": 4.7430984465656e-05, "loss": 0.0749, "step": 10982 }, { "epoch": 1.54, "learning_rate": 4.7430516563728246e-05, "loss": 0.0773, "step": 10984 }, { "epoch": 1.54, "learning_rate": 4.743004866180049e-05, "loss": 0.0777, "step": 10986 }, { "epoch": 1.54, "learning_rate": 4.742958075987273e-05, "loss": 0.0919, "step": 10988 }, { "epoch": 1.54, "learning_rate": 4.742911285794498e-05, "loss": 0.0824, "step": 10990 }, { "epoch": 1.54, "learning_rate": 4.742864495601722e-05, "loss": 0.0676, "step": 10992 }, { "epoch": 1.54, "learning_rate": 4.742817705408947e-05, "loss": 0.0821, "step": 10994 }, { "epoch": 1.54, "learning_rate": 4.742770915216171e-05, "loss": 0.0648, "step": 10996 }, { "epoch": 1.54, "learning_rate": 4.7427241250233954e-05, "loss": 0.0847, "step": 10998 }, { "epoch": 1.54, "learning_rate": 4.74267733483062e-05, "loss": 0.0878, "step": 11000 }, { "epoch": 1.54, "eval_gen_len": 29.569, "eval_loss": 1.0419230461120605, "eval_meteor": 0.0426, "eval_runtime": 14.397, "eval_samples_per_second": 4.029, "eval_steps_per_second": 0.556, "step": 11000 }, { "epoch": 1.54, "learning_rate": 4.7426305446378446e-05, "loss": 0.0881, "step": 11002 }, { "epoch": 1.54, "learning_rate": 4.7425837544450685e-05, "loss": 0.0838, "step": 11004 }, { "epoch": 1.54, "learning_rate": 4.742536964252293e-05, "loss": 0.0721, "step": 11006 }, { "epoch": 1.55, "learning_rate": 4.742490174059518e-05, "loss": 0.1109, "step": 11008 }, { "epoch": 1.55, "learning_rate": 4.742443383866742e-05, "loss": 0.1078, "step": 11010 }, { "epoch": 1.55, "learning_rate": 4.742396593673966e-05, "loss": 0.0749, "step": 11012 }, { "epoch": 1.55, "learning_rate": 4.742349803481191e-05, "loss": 0.0613, "step": 11014 }, { "epoch": 1.55, "learning_rate": 4.742303013288415e-05, "loss": 0.0906, "step": 11016 }, { "epoch": 1.55, "learning_rate": 4.742256223095639e-05, "loss": 0.0931, "step": 11018 }, { "epoch": 1.55, "learning_rate": 4.742209432902864e-05, "loss": 0.0662, "step": 11020 }, { "epoch": 1.55, "learning_rate": 4.742162642710088e-05, "loss": 0.0639, "step": 11022 }, { "epoch": 1.55, "learning_rate": 4.7421158525173124e-05, "loss": 0.0959, "step": 11024 }, { "epoch": 1.55, "learning_rate": 4.742069062324537e-05, "loss": 0.096, "step": 11026 }, { "epoch": 1.55, "learning_rate": 4.7420222721317615e-05, "loss": 0.0694, "step": 11028 }, { "epoch": 1.55, "learning_rate": 4.7419754819389855e-05, "loss": 0.0707, "step": 11030 }, { "epoch": 1.55, "learning_rate": 4.74192869174621e-05, "loss": 0.068, "step": 11032 }, { "epoch": 1.55, "learning_rate": 4.7418819015534346e-05, "loss": 0.075, "step": 11034 }, { "epoch": 1.55, "learning_rate": 4.741835111360659e-05, "loss": 0.0659, "step": 11036 }, { "epoch": 1.55, "learning_rate": 4.741788321167883e-05, "loss": 0.0663, "step": 11038 }, { "epoch": 1.55, "learning_rate": 4.741741530975108e-05, "loss": 0.1099, "step": 11040 }, { "epoch": 1.55, "learning_rate": 4.741694740782332e-05, "loss": 0.0676, "step": 11042 }, { "epoch": 1.55, "learning_rate": 4.741647950589557e-05, "loss": 0.0628, "step": 11044 }, { "epoch": 1.55, "learning_rate": 4.741601160396781e-05, "loss": 0.0762, "step": 11046 }, { "epoch": 1.55, "learning_rate": 4.7415543702040054e-05, "loss": 0.1015, "step": 11048 }, { "epoch": 1.55, "learning_rate": 4.7415075800112293e-05, "loss": 0.0709, "step": 11050 }, { "epoch": 1.55, "learning_rate": 4.7414607898184546e-05, "loss": 0.0794, "step": 11052 }, { "epoch": 1.55, "learning_rate": 4.7414139996256785e-05, "loss": 0.097, "step": 11054 }, { "epoch": 1.55, "learning_rate": 4.741367209432903e-05, "loss": 0.0749, "step": 11056 }, { "epoch": 1.55, "learning_rate": 4.741320419240127e-05, "loss": 0.0902, "step": 11058 }, { "epoch": 1.55, "learning_rate": 4.7412736290473516e-05, "loss": 0.0622, "step": 11060 }, { "epoch": 1.55, "learning_rate": 4.741226838854576e-05, "loss": 0.0613, "step": 11062 }, { "epoch": 1.55, "learning_rate": 4.741180048661801e-05, "loss": 0.0799, "step": 11064 }, { "epoch": 1.55, "learning_rate": 4.741133258469025e-05, "loss": 0.0979, "step": 11066 }, { "epoch": 1.55, "learning_rate": 4.741086468276249e-05, "loss": 0.0748, "step": 11068 }, { "epoch": 1.55, "learning_rate": 4.741039678083474e-05, "loss": 0.0949, "step": 11070 }, { "epoch": 1.55, "learning_rate": 4.7409928878906985e-05, "loss": 0.0605, "step": 11072 }, { "epoch": 1.55, "learning_rate": 4.7409460976979224e-05, "loss": 0.0771, "step": 11074 }, { "epoch": 1.55, "learning_rate": 4.740899307505147e-05, "loss": 0.0686, "step": 11076 }, { "epoch": 1.56, "learning_rate": 4.7408525173123716e-05, "loss": 0.0721, "step": 11078 }, { "epoch": 1.56, "learning_rate": 4.740805727119596e-05, "loss": 0.0767, "step": 11080 }, { "epoch": 1.56, "learning_rate": 4.74075893692682e-05, "loss": 0.0871, "step": 11082 }, { "epoch": 1.56, "learning_rate": 4.740712146734045e-05, "loss": 0.0645, "step": 11084 }, { "epoch": 1.56, "learning_rate": 4.740665356541269e-05, "loss": 0.0693, "step": 11086 }, { "epoch": 1.56, "learning_rate": 4.740618566348494e-05, "loss": 0.0795, "step": 11088 }, { "epoch": 1.56, "learning_rate": 4.740571776155718e-05, "loss": 0.0766, "step": 11090 }, { "epoch": 1.56, "learning_rate": 4.7405249859629424e-05, "loss": 0.0742, "step": 11092 }, { "epoch": 1.56, "learning_rate": 4.740478195770166e-05, "loss": 0.0835, "step": 11094 }, { "epoch": 1.56, "learning_rate": 4.7404314055773916e-05, "loss": 0.1059, "step": 11096 }, { "epoch": 1.56, "learning_rate": 4.7403846153846155e-05, "loss": 0.0674, "step": 11098 }, { "epoch": 1.56, "learning_rate": 4.74033782519184e-05, "loss": 0.1083, "step": 11100 }, { "epoch": 1.56, "learning_rate": 4.740291034999064e-05, "loss": 0.0774, "step": 11102 }, { "epoch": 1.56, "learning_rate": 4.740244244806289e-05, "loss": 0.0647, "step": 11104 }, { "epoch": 1.56, "learning_rate": 4.740197454613513e-05, "loss": 0.0657, "step": 11106 }, { "epoch": 1.56, "learning_rate": 4.740150664420738e-05, "loss": 0.0769, "step": 11108 }, { "epoch": 1.56, "learning_rate": 4.7401038742279617e-05, "loss": 0.0954, "step": 11110 }, { "epoch": 1.56, "learning_rate": 4.740057084035186e-05, "loss": 0.0713, "step": 11112 }, { "epoch": 1.56, "learning_rate": 4.740010293842411e-05, "loss": 0.1011, "step": 11114 }, { "epoch": 1.56, "learning_rate": 4.7399635036496354e-05, "loss": 0.0694, "step": 11116 }, { "epoch": 1.56, "learning_rate": 4.7399167134568593e-05, "loss": 0.0856, "step": 11118 }, { "epoch": 1.56, "learning_rate": 4.739869923264084e-05, "loss": 0.0795, "step": 11120 }, { "epoch": 1.56, "learning_rate": 4.7398231330713085e-05, "loss": 0.075, "step": 11122 }, { "epoch": 1.56, "learning_rate": 4.739776342878533e-05, "loss": 0.0653, "step": 11124 }, { "epoch": 1.56, "learning_rate": 4.739729552685757e-05, "loss": 0.079, "step": 11126 }, { "epoch": 1.56, "learning_rate": 4.7396827624929816e-05, "loss": 0.0973, "step": 11128 }, { "epoch": 1.56, "learning_rate": 4.739635972300206e-05, "loss": 0.0547, "step": 11130 }, { "epoch": 1.56, "learning_rate": 4.739589182107431e-05, "loss": 0.1045, "step": 11132 }, { "epoch": 1.56, "learning_rate": 4.739542391914655e-05, "loss": 0.0797, "step": 11134 }, { "epoch": 1.56, "learning_rate": 4.739495601721879e-05, "loss": 0.0819, "step": 11136 }, { "epoch": 1.56, "learning_rate": 4.739448811529104e-05, "loss": 0.0838, "step": 11138 }, { "epoch": 1.56, "learning_rate": 4.7394020213363285e-05, "loss": 0.0873, "step": 11140 }, { "epoch": 1.56, "learning_rate": 4.7393552311435524e-05, "loss": 0.0934, "step": 11142 }, { "epoch": 1.56, "learning_rate": 4.739308440950777e-05, "loss": 0.071, "step": 11144 }, { "epoch": 1.56, "learning_rate": 4.739261650758001e-05, "loss": 0.0773, "step": 11146 }, { "epoch": 1.56, "learning_rate": 4.739214860565226e-05, "loss": 0.0948, "step": 11148 }, { "epoch": 1.57, "learning_rate": 4.73916807037245e-05, "loss": 0.0941, "step": 11150 }, { "epoch": 1.57, "learning_rate": 4.739121280179675e-05, "loss": 0.082, "step": 11152 }, { "epoch": 1.57, "learning_rate": 4.7390744899868986e-05, "loss": 0.0725, "step": 11154 }, { "epoch": 1.57, "learning_rate": 4.739027699794124e-05, "loss": 0.0982, "step": 11156 }, { "epoch": 1.57, "learning_rate": 4.738980909601348e-05, "loss": 0.0918, "step": 11158 }, { "epoch": 1.57, "learning_rate": 4.7389341194085724e-05, "loss": 0.0903, "step": 11160 }, { "epoch": 1.57, "learning_rate": 4.738887329215796e-05, "loss": 0.0882, "step": 11162 }, { "epoch": 1.57, "learning_rate": 4.738840539023021e-05, "loss": 0.0734, "step": 11164 }, { "epoch": 1.57, "learning_rate": 4.7387937488302455e-05, "loss": 0.0719, "step": 11166 }, { "epoch": 1.57, "learning_rate": 4.73874695863747e-05, "loss": 0.088, "step": 11168 }, { "epoch": 1.57, "learning_rate": 4.738700168444694e-05, "loss": 0.073, "step": 11170 }, { "epoch": 1.57, "learning_rate": 4.7386533782519186e-05, "loss": 0.0931, "step": 11172 }, { "epoch": 1.57, "learning_rate": 4.738606588059143e-05, "loss": 0.1425, "step": 11174 }, { "epoch": 1.57, "learning_rate": 4.738559797866368e-05, "loss": 0.064, "step": 11176 }, { "epoch": 1.57, "learning_rate": 4.7385130076735917e-05, "loss": 0.0759, "step": 11178 }, { "epoch": 1.57, "learning_rate": 4.738466217480816e-05, "loss": 0.1033, "step": 11180 }, { "epoch": 1.57, "learning_rate": 4.738419427288041e-05, "loss": 0.0938, "step": 11182 }, { "epoch": 1.57, "learning_rate": 4.7383726370952654e-05, "loss": 0.0601, "step": 11184 }, { "epoch": 1.57, "learning_rate": 4.7383258469024893e-05, "loss": 0.0816, "step": 11186 }, { "epoch": 1.57, "learning_rate": 4.738279056709714e-05, "loss": 0.0874, "step": 11188 }, { "epoch": 1.57, "learning_rate": 4.7382322665169385e-05, "loss": 0.0709, "step": 11190 }, { "epoch": 1.57, "learning_rate": 4.738185476324163e-05, "loss": 0.0896, "step": 11192 }, { "epoch": 1.57, "learning_rate": 4.738138686131387e-05, "loss": 0.0777, "step": 11194 }, { "epoch": 1.57, "learning_rate": 4.7380918959386116e-05, "loss": 0.0628, "step": 11196 }, { "epoch": 1.57, "learning_rate": 4.7380451057458355e-05, "loss": 0.1021, "step": 11198 }, { "epoch": 1.57, "learning_rate": 4.737998315553061e-05, "loss": 0.073, "step": 11200 }, { "epoch": 1.57, "learning_rate": 4.737951525360285e-05, "loss": 0.0818, "step": 11202 }, { "epoch": 1.57, "learning_rate": 4.737904735167509e-05, "loss": 0.0749, "step": 11204 }, { "epoch": 1.57, "learning_rate": 4.737857944974733e-05, "loss": 0.0763, "step": 11206 }, { "epoch": 1.57, "learning_rate": 4.737811154781958e-05, "loss": 0.069, "step": 11208 }, { "epoch": 1.57, "learning_rate": 4.7377643645891824e-05, "loss": 0.0722, "step": 11210 }, { "epoch": 1.57, "learning_rate": 4.737717574396407e-05, "loss": 0.0952, "step": 11212 }, { "epoch": 1.57, "learning_rate": 4.737670784203631e-05, "loss": 0.0932, "step": 11214 }, { "epoch": 1.57, "learning_rate": 4.7376239940108555e-05, "loss": 0.0806, "step": 11216 }, { "epoch": 1.57, "learning_rate": 4.73757720381808e-05, "loss": 0.0695, "step": 11218 }, { "epoch": 1.57, "learning_rate": 4.737530413625305e-05, "loss": 0.0598, "step": 11220 }, { "epoch": 1.58, "learning_rate": 4.7374836234325286e-05, "loss": 0.1067, "step": 11222 }, { "epoch": 1.58, "learning_rate": 4.737436833239753e-05, "loss": 0.0689, "step": 11224 }, { "epoch": 1.58, "learning_rate": 4.737390043046978e-05, "loss": 0.1037, "step": 11226 }, { "epoch": 1.58, "learning_rate": 4.7373432528542024e-05, "loss": 0.0744, "step": 11228 }, { "epoch": 1.58, "learning_rate": 4.737296462661426e-05, "loss": 0.0758, "step": 11230 }, { "epoch": 1.58, "learning_rate": 4.737249672468651e-05, "loss": 0.0667, "step": 11232 }, { "epoch": 1.58, "learning_rate": 4.7372028822758755e-05, "loss": 0.0683, "step": 11234 }, { "epoch": 1.58, "learning_rate": 4.7371560920831e-05, "loss": 0.0885, "step": 11236 }, { "epoch": 1.58, "learning_rate": 4.737109301890324e-05, "loss": 0.0687, "step": 11238 }, { "epoch": 1.58, "learning_rate": 4.7370625116975486e-05, "loss": 0.0814, "step": 11240 }, { "epoch": 1.58, "learning_rate": 4.7370157215047725e-05, "loss": 0.0836, "step": 11242 }, { "epoch": 1.58, "learning_rate": 4.736968931311998e-05, "loss": 0.0732, "step": 11244 }, { "epoch": 1.58, "learning_rate": 4.7369221411192217e-05, "loss": 0.0917, "step": 11246 }, { "epoch": 1.58, "learning_rate": 4.736875350926446e-05, "loss": 0.0698, "step": 11248 }, { "epoch": 1.58, "learning_rate": 4.73682856073367e-05, "loss": 0.0857, "step": 11250 }, { "epoch": 1.58, "learning_rate": 4.7367817705408954e-05, "loss": 0.0953, "step": 11252 }, { "epoch": 1.58, "learning_rate": 4.7367349803481193e-05, "loss": 0.0474, "step": 11254 }, { "epoch": 1.58, "learning_rate": 4.736688190155344e-05, "loss": 0.0712, "step": 11256 }, { "epoch": 1.58, "learning_rate": 4.736641399962568e-05, "loss": 0.0865, "step": 11258 }, { "epoch": 1.58, "learning_rate": 4.7365946097697924e-05, "loss": 0.0683, "step": 11260 }, { "epoch": 1.58, "learning_rate": 4.736547819577017e-05, "loss": 0.0531, "step": 11262 }, { "epoch": 1.58, "learning_rate": 4.7365010293842416e-05, "loss": 0.078, "step": 11264 }, { "epoch": 1.58, "learning_rate": 4.7364542391914655e-05, "loss": 0.1029, "step": 11266 }, { "epoch": 1.58, "learning_rate": 4.73640744899869e-05, "loss": 0.0774, "step": 11268 }, { "epoch": 1.58, "learning_rate": 4.736360658805915e-05, "loss": 0.0738, "step": 11270 }, { "epoch": 1.58, "learning_rate": 4.7363138686131386e-05, "loss": 0.084, "step": 11272 }, { "epoch": 1.58, "learning_rate": 4.736267078420363e-05, "loss": 0.0607, "step": 11274 }, { "epoch": 1.58, "learning_rate": 4.736220288227587e-05, "loss": 0.0743, "step": 11276 }, { "epoch": 1.58, "learning_rate": 4.7361734980348124e-05, "loss": 0.0675, "step": 11278 }, { "epoch": 1.58, "learning_rate": 4.736126707842036e-05, "loss": 0.0873, "step": 11280 }, { "epoch": 1.58, "learning_rate": 4.736079917649261e-05, "loss": 0.0808, "step": 11282 }, { "epoch": 1.58, "learning_rate": 4.736033127456485e-05, "loss": 0.0797, "step": 11284 }, { "epoch": 1.58, "learning_rate": 4.73598633726371e-05, "loss": 0.0756, "step": 11286 }, { "epoch": 1.58, "learning_rate": 4.735939547070934e-05, "loss": 0.0828, "step": 11288 }, { "epoch": 1.58, "learning_rate": 4.7358927568781586e-05, "loss": 0.0776, "step": 11290 }, { "epoch": 1.59, "learning_rate": 4.7358459666853825e-05, "loss": 0.1002, "step": 11292 }, { "epoch": 1.59, "learning_rate": 4.735799176492607e-05, "loss": 0.0764, "step": 11294 }, { "epoch": 1.59, "learning_rate": 4.735752386299832e-05, "loss": 0.0853, "step": 11296 }, { "epoch": 1.59, "learning_rate": 4.735705596107056e-05, "loss": 0.0628, "step": 11298 }, { "epoch": 1.59, "learning_rate": 4.73565880591428e-05, "loss": 0.0834, "step": 11300 }, { "epoch": 1.59, "learning_rate": 4.735612015721505e-05, "loss": 0.0747, "step": 11302 }, { "epoch": 1.59, "learning_rate": 4.7355652255287294e-05, "loss": 0.0772, "step": 11304 }, { "epoch": 1.59, "learning_rate": 4.735518435335954e-05, "loss": 0.0777, "step": 11306 }, { "epoch": 1.59, "learning_rate": 4.735471645143178e-05, "loss": 0.0768, "step": 11308 }, { "epoch": 1.59, "learning_rate": 4.7354248549504025e-05, "loss": 0.0784, "step": 11310 }, { "epoch": 1.59, "learning_rate": 4.735378064757627e-05, "loss": 0.0723, "step": 11312 }, { "epoch": 1.59, "learning_rate": 4.735331274564852e-05, "loss": 0.0832, "step": 11314 }, { "epoch": 1.59, "learning_rate": 4.7352844843720756e-05, "loss": 0.086, "step": 11316 }, { "epoch": 1.59, "learning_rate": 4.7352376941793e-05, "loss": 0.087, "step": 11318 }, { "epoch": 1.59, "learning_rate": 4.735190903986525e-05, "loss": 0.0731, "step": 11320 }, { "epoch": 1.59, "learning_rate": 4.7351441137937493e-05, "loss": 0.0677, "step": 11322 }, { "epoch": 1.59, "learning_rate": 4.735097323600973e-05, "loss": 0.0751, "step": 11324 }, { "epoch": 1.59, "learning_rate": 4.735050533408198e-05, "loss": 0.071, "step": 11326 }, { "epoch": 1.59, "learning_rate": 4.735003743215422e-05, "loss": 0.0663, "step": 11328 }, { "epoch": 1.59, "learning_rate": 4.734956953022647e-05, "loss": 0.0683, "step": 11330 }, { "epoch": 1.59, "learning_rate": 4.734910162829871e-05, "loss": 0.0889, "step": 11332 }, { "epoch": 1.59, "learning_rate": 4.7348633726370955e-05, "loss": 0.0515, "step": 11334 }, { "epoch": 1.59, "learning_rate": 4.7348165824443195e-05, "loss": 0.0791, "step": 11336 }, { "epoch": 1.59, "learning_rate": 4.734769792251544e-05, "loss": 0.0804, "step": 11338 }, { "epoch": 1.59, "learning_rate": 4.7347230020587686e-05, "loss": 0.0884, "step": 11340 }, { "epoch": 1.59, "learning_rate": 4.734676211865993e-05, "loss": 0.0555, "step": 11342 }, { "epoch": 1.59, "learning_rate": 4.734629421673217e-05, "loss": 0.0682, "step": 11344 }, { "epoch": 1.59, "learning_rate": 4.734582631480442e-05, "loss": 0.0737, "step": 11346 }, { "epoch": 1.59, "learning_rate": 4.734535841287666e-05, "loss": 0.0816, "step": 11348 }, { "epoch": 1.59, "learning_rate": 4.734489051094891e-05, "loss": 0.1033, "step": 11350 }, { "epoch": 1.59, "learning_rate": 4.734442260902115e-05, "loss": 0.0915, "step": 11352 }, { "epoch": 1.59, "learning_rate": 4.7343954707093394e-05, "loss": 0.0765, "step": 11354 }, { "epoch": 1.59, "learning_rate": 4.734348680516564e-05, "loss": 0.0822, "step": 11356 }, { "epoch": 1.59, "learning_rate": 4.7343018903237886e-05, "loss": 0.0814, "step": 11358 }, { "epoch": 1.59, "learning_rate": 4.7342551001310125e-05, "loss": 0.0828, "step": 11360 }, { "epoch": 1.59, "learning_rate": 4.734208309938237e-05, "loss": 0.0481, "step": 11362 }, { "epoch": 1.6, "learning_rate": 4.734161519745462e-05, "loss": 0.0683, "step": 11364 }, { "epoch": 1.6, "learning_rate": 4.734114729552686e-05, "loss": 0.0599, "step": 11366 }, { "epoch": 1.6, "learning_rate": 4.73406793935991e-05, "loss": 0.0598, "step": 11368 }, { "epoch": 1.6, "learning_rate": 4.734021149167135e-05, "loss": 0.0633, "step": 11370 }, { "epoch": 1.6, "learning_rate": 4.733974358974359e-05, "loss": 0.0766, "step": 11372 }, { "epoch": 1.6, "learning_rate": 4.733927568781584e-05, "loss": 0.0828, "step": 11374 }, { "epoch": 1.6, "learning_rate": 4.733880778588808e-05, "loss": 0.0787, "step": 11376 }, { "epoch": 1.6, "learning_rate": 4.7338339883960325e-05, "loss": 0.074, "step": 11378 }, { "epoch": 1.6, "learning_rate": 4.7337871982032564e-05, "loss": 0.0945, "step": 11380 }, { "epoch": 1.6, "learning_rate": 4.733740408010482e-05, "loss": 0.0493, "step": 11382 }, { "epoch": 1.6, "learning_rate": 4.7336936178177056e-05, "loss": 0.0778, "step": 11384 }, { "epoch": 1.6, "learning_rate": 4.73364682762493e-05, "loss": 0.0715, "step": 11386 }, { "epoch": 1.6, "learning_rate": 4.733600037432154e-05, "loss": 0.0661, "step": 11388 }, { "epoch": 1.6, "learning_rate": 4.733553247239379e-05, "loss": 0.0892, "step": 11390 }, { "epoch": 1.6, "learning_rate": 4.733506457046603e-05, "loss": 0.0931, "step": 11392 }, { "epoch": 1.6, "learning_rate": 4.733459666853828e-05, "loss": 0.0753, "step": 11394 }, { "epoch": 1.6, "learning_rate": 4.733412876661052e-05, "loss": 0.0888, "step": 11396 }, { "epoch": 1.6, "learning_rate": 4.7333660864682764e-05, "loss": 0.0854, "step": 11398 }, { "epoch": 1.6, "learning_rate": 4.733319296275501e-05, "loss": 0.0715, "step": 11400 }, { "epoch": 1.6, "learning_rate": 4.7332725060827255e-05, "loss": 0.089, "step": 11402 }, { "epoch": 1.6, "learning_rate": 4.7332257158899495e-05, "loss": 0.0884, "step": 11404 }, { "epoch": 1.6, "learning_rate": 4.733178925697174e-05, "loss": 0.0781, "step": 11406 }, { "epoch": 1.6, "learning_rate": 4.7331321355043986e-05, "loss": 0.0712, "step": 11408 }, { "epoch": 1.6, "learning_rate": 4.733085345311623e-05, "loss": 0.064, "step": 11410 }, { "epoch": 1.6, "learning_rate": 4.733038555118847e-05, "loss": 0.0692, "step": 11412 }, { "epoch": 1.6, "learning_rate": 4.732991764926072e-05, "loss": 0.085, "step": 11414 }, { "epoch": 1.6, "learning_rate": 4.732944974733296e-05, "loss": 0.0743, "step": 11416 }, { "epoch": 1.6, "learning_rate": 4.732898184540521e-05, "loss": 0.0652, "step": 11418 }, { "epoch": 1.6, "learning_rate": 4.732851394347745e-05, "loss": 0.0898, "step": 11420 }, { "epoch": 1.6, "learning_rate": 4.7328046041549694e-05, "loss": 0.1076, "step": 11422 }, { "epoch": 1.6, "learning_rate": 4.732757813962193e-05, "loss": 0.073, "step": 11424 }, { "epoch": 1.6, "learning_rate": 4.7327110237694186e-05, "loss": 0.0987, "step": 11426 }, { "epoch": 1.6, "learning_rate": 4.7326642335766425e-05, "loss": 0.0615, "step": 11428 }, { "epoch": 1.6, "learning_rate": 4.732617443383867e-05, "loss": 0.0756, "step": 11430 }, { "epoch": 1.6, "learning_rate": 4.732570653191091e-05, "loss": 0.0616, "step": 11432 }, { "epoch": 1.6, "learning_rate": 4.732523862998316e-05, "loss": 0.0834, "step": 11434 }, { "epoch": 1.61, "learning_rate": 4.73247707280554e-05, "loss": 0.0648, "step": 11436 }, { "epoch": 1.61, "learning_rate": 4.732430282612765e-05, "loss": 0.079, "step": 11438 }, { "epoch": 1.61, "learning_rate": 4.732383492419989e-05, "loss": 0.0773, "step": 11440 }, { "epoch": 1.61, "learning_rate": 4.732336702227213e-05, "loss": 0.0984, "step": 11442 }, { "epoch": 1.61, "learning_rate": 4.732289912034438e-05, "loss": 0.0939, "step": 11444 }, { "epoch": 1.61, "learning_rate": 4.7322431218416625e-05, "loss": 0.0867, "step": 11446 }, { "epoch": 1.61, "learning_rate": 4.7321963316488864e-05, "loss": 0.0737, "step": 11448 }, { "epoch": 1.61, "learning_rate": 4.732149541456111e-05, "loss": 0.062, "step": 11450 }, { "epoch": 1.61, "learning_rate": 4.7321027512633356e-05, "loss": 0.0977, "step": 11452 }, { "epoch": 1.61, "learning_rate": 4.73205596107056e-05, "loss": 0.0768, "step": 11454 }, { "epoch": 1.61, "learning_rate": 4.732009170877784e-05, "loss": 0.0847, "step": 11456 }, { "epoch": 1.61, "learning_rate": 4.731962380685009e-05, "loss": 0.0885, "step": 11458 }, { "epoch": 1.61, "learning_rate": 4.731915590492233e-05, "loss": 0.1158, "step": 11460 }, { "epoch": 1.61, "learning_rate": 4.731868800299458e-05, "loss": 0.0986, "step": 11462 }, { "epoch": 1.61, "learning_rate": 4.731822010106682e-05, "loss": 0.088, "step": 11464 }, { "epoch": 1.61, "learning_rate": 4.7317752199139064e-05, "loss": 0.0747, "step": 11466 }, { "epoch": 1.61, "learning_rate": 4.731728429721131e-05, "loss": 0.0672, "step": 11468 }, { "epoch": 1.61, "learning_rate": 4.7316816395283555e-05, "loss": 0.0751, "step": 11470 }, { "epoch": 1.61, "learning_rate": 4.7316348493355795e-05, "loss": 0.0903, "step": 11472 }, { "epoch": 1.61, "learning_rate": 4.731588059142804e-05, "loss": 0.0736, "step": 11474 }, { "epoch": 1.61, "learning_rate": 4.731541268950028e-05, "loss": 0.0677, "step": 11476 }, { "epoch": 1.61, "learning_rate": 4.731494478757253e-05, "loss": 0.0791, "step": 11478 }, { "epoch": 1.61, "learning_rate": 4.731447688564477e-05, "loss": 0.0899, "step": 11480 }, { "epoch": 1.61, "learning_rate": 4.731400898371702e-05, "loss": 0.0824, "step": 11482 }, { "epoch": 1.61, "learning_rate": 4.7313541081789256e-05, "loss": 0.0991, "step": 11484 }, { "epoch": 1.61, "learning_rate": 4.73130731798615e-05, "loss": 0.0627, "step": 11486 }, { "epoch": 1.61, "learning_rate": 4.731260527793375e-05, "loss": 0.1129, "step": 11488 }, { "epoch": 1.61, "learning_rate": 4.7312137376005994e-05, "loss": 0.0945, "step": 11490 }, { "epoch": 1.61, "learning_rate": 4.731166947407823e-05, "loss": 0.0911, "step": 11492 }, { "epoch": 1.61, "learning_rate": 4.731120157215048e-05, "loss": 0.0818, "step": 11494 }, { "epoch": 1.61, "learning_rate": 4.7310733670222725e-05, "loss": 0.0887, "step": 11496 }, { "epoch": 1.61, "learning_rate": 4.731026576829497e-05, "loss": 0.0907, "step": 11498 }, { "epoch": 1.61, "learning_rate": 4.730979786636721e-05, "loss": 0.0902, "step": 11500 }, { "epoch": 1.61, "learning_rate": 4.7309329964439456e-05, "loss": 0.081, "step": 11502 }, { "epoch": 1.61, "learning_rate": 4.73088620625117e-05, "loss": 0.0923, "step": 11504 }, { "epoch": 1.62, "learning_rate": 4.730839416058395e-05, "loss": 0.0881, "step": 11506 }, { "epoch": 1.62, "learning_rate": 4.730792625865619e-05, "loss": 0.1113, "step": 11508 }, { "epoch": 1.62, "learning_rate": 4.730745835672843e-05, "loss": 0.0689, "step": 11510 }, { "epoch": 1.62, "learning_rate": 4.730699045480068e-05, "loss": 0.0657, "step": 11512 }, { "epoch": 1.62, "learning_rate": 4.7306522552872925e-05, "loss": 0.0717, "step": 11514 }, { "epoch": 1.62, "learning_rate": 4.7306054650945164e-05, "loss": 0.1176, "step": 11516 }, { "epoch": 1.62, "learning_rate": 4.730558674901741e-05, "loss": 0.0772, "step": 11518 }, { "epoch": 1.62, "learning_rate": 4.730511884708965e-05, "loss": 0.0606, "step": 11520 }, { "epoch": 1.62, "learning_rate": 4.73046509451619e-05, "loss": 0.0775, "step": 11522 }, { "epoch": 1.62, "learning_rate": 4.730418304323414e-05, "loss": 0.0849, "step": 11524 }, { "epoch": 1.62, "learning_rate": 4.730371514130638e-05, "loss": 0.0774, "step": 11526 }, { "epoch": 1.62, "learning_rate": 4.7303247239378626e-05, "loss": 0.0619, "step": 11528 }, { "epoch": 1.62, "learning_rate": 4.730277933745087e-05, "loss": 0.0864, "step": 11530 }, { "epoch": 1.62, "learning_rate": 4.730231143552312e-05, "loss": 0.0747, "step": 11532 }, { "epoch": 1.62, "learning_rate": 4.730184353359536e-05, "loss": 0.0905, "step": 11534 }, { "epoch": 1.62, "learning_rate": 4.73013756316676e-05, "loss": 0.0724, "step": 11536 }, { "epoch": 1.62, "learning_rate": 4.730090772973985e-05, "loss": 0.0693, "step": 11538 }, { "epoch": 1.62, "learning_rate": 4.7300439827812095e-05, "loss": 0.0869, "step": 11540 }, { "epoch": 1.62, "learning_rate": 4.7299971925884334e-05, "loss": 0.069, "step": 11542 }, { "epoch": 1.62, "learning_rate": 4.729950402395658e-05, "loss": 0.068, "step": 11544 }, { "epoch": 1.62, "learning_rate": 4.7299036122028826e-05, "loss": 0.0752, "step": 11546 }, { "epoch": 1.62, "learning_rate": 4.729856822010107e-05, "loss": 0.0795, "step": 11548 }, { "epoch": 1.62, "learning_rate": 4.729810031817331e-05, "loss": 0.071, "step": 11550 }, { "epoch": 1.62, "learning_rate": 4.7297632416245557e-05, "loss": 0.0766, "step": 11552 }, { "epoch": 1.62, "learning_rate": 4.7297164514317796e-05, "loss": 0.0614, "step": 11554 }, { "epoch": 1.62, "learning_rate": 4.729669661239005e-05, "loss": 0.0749, "step": 11556 }, { "epoch": 1.62, "learning_rate": 4.729622871046229e-05, "loss": 0.0758, "step": 11558 }, { "epoch": 1.62, "learning_rate": 4.729576080853453e-05, "loss": 0.0642, "step": 11560 }, { "epoch": 1.62, "learning_rate": 4.729529290660677e-05, "loss": 0.0569, "step": 11562 }, { "epoch": 1.62, "learning_rate": 4.7294825004679025e-05, "loss": 0.0919, "step": 11564 }, { "epoch": 1.62, "learning_rate": 4.7294357102751264e-05, "loss": 0.0871, "step": 11566 }, { "epoch": 1.62, "learning_rate": 4.729388920082351e-05, "loss": 0.0754, "step": 11568 }, { "epoch": 1.62, "learning_rate": 4.729342129889575e-05, "loss": 0.091, "step": 11570 }, { "epoch": 1.62, "learning_rate": 4.7292953396967995e-05, "loss": 0.0681, "step": 11572 }, { "epoch": 1.62, "learning_rate": 4.729248549504024e-05, "loss": 0.055, "step": 11574 }, { "epoch": 1.62, "learning_rate": 4.729201759311249e-05, "loss": 0.0793, "step": 11576 }, { "epoch": 1.63, "learning_rate": 4.7291549691184726e-05, "loss": 0.0748, "step": 11578 }, { "epoch": 1.63, "learning_rate": 4.729108178925697e-05, "loss": 0.0647, "step": 11580 }, { "epoch": 1.63, "learning_rate": 4.729061388732922e-05, "loss": 0.1023, "step": 11582 }, { "epoch": 1.63, "learning_rate": 4.7290145985401464e-05, "loss": 0.0651, "step": 11584 }, { "epoch": 1.63, "learning_rate": 4.72896780834737e-05, "loss": 0.0554, "step": 11586 }, { "epoch": 1.63, "learning_rate": 4.728921018154595e-05, "loss": 0.0867, "step": 11588 }, { "epoch": 1.63, "learning_rate": 4.7288742279618195e-05, "loss": 0.0707, "step": 11590 }, { "epoch": 1.63, "learning_rate": 4.728827437769044e-05, "loss": 0.0832, "step": 11592 }, { "epoch": 1.63, "learning_rate": 4.728780647576268e-05, "loss": 0.0965, "step": 11594 }, { "epoch": 1.63, "learning_rate": 4.7287338573834926e-05, "loss": 0.08, "step": 11596 }, { "epoch": 1.63, "learning_rate": 4.728687067190717e-05, "loss": 0.0704, "step": 11598 }, { "epoch": 1.63, "learning_rate": 4.728640276997942e-05, "loss": 0.0839, "step": 11600 }, { "epoch": 1.63, "learning_rate": 4.728593486805166e-05, "loss": 0.0665, "step": 11602 }, { "epoch": 1.63, "learning_rate": 4.72854669661239e-05, "loss": 0.0895, "step": 11604 }, { "epoch": 1.63, "learning_rate": 4.728499906419614e-05, "loss": 0.0749, "step": 11606 }, { "epoch": 1.63, "learning_rate": 4.7284531162268395e-05, "loss": 0.0692, "step": 11608 }, { "epoch": 1.63, "learning_rate": 4.7284063260340634e-05, "loss": 0.0591, "step": 11610 }, { "epoch": 1.63, "learning_rate": 4.728359535841288e-05, "loss": 0.0833, "step": 11612 }, { "epoch": 1.63, "learning_rate": 4.728312745648512e-05, "loss": 0.0784, "step": 11614 }, { "epoch": 1.63, "learning_rate": 4.7282659554557365e-05, "loss": 0.0692, "step": 11616 }, { "epoch": 1.63, "learning_rate": 4.728219165262961e-05, "loss": 0.0848, "step": 11618 }, { "epoch": 1.63, "learning_rate": 4.7281723750701857e-05, "loss": 0.0873, "step": 11620 }, { "epoch": 1.63, "learning_rate": 4.7281255848774096e-05, "loss": 0.0795, "step": 11622 }, { "epoch": 1.63, "learning_rate": 4.728078794684634e-05, "loss": 0.0778, "step": 11624 }, { "epoch": 1.63, "learning_rate": 4.728032004491859e-05, "loss": 0.078, "step": 11626 }, { "epoch": 1.63, "learning_rate": 4.7279852142990833e-05, "loss": 0.0792, "step": 11628 }, { "epoch": 1.63, "learning_rate": 4.727938424106307e-05, "loss": 0.098, "step": 11630 }, { "epoch": 1.63, "learning_rate": 4.727891633913532e-05, "loss": 0.0812, "step": 11632 }, { "epoch": 1.63, "learning_rate": 4.7278448437207564e-05, "loss": 0.0792, "step": 11634 }, { "epoch": 1.63, "learning_rate": 4.727798053527981e-05, "loss": 0.0957, "step": 11636 }, { "epoch": 1.63, "learning_rate": 4.727751263335205e-05, "loss": 0.0758, "step": 11638 }, { "epoch": 1.63, "learning_rate": 4.7277044731424295e-05, "loss": 0.0885, "step": 11640 }, { "epoch": 1.63, "learning_rate": 4.727657682949654e-05, "loss": 0.0937, "step": 11642 }, { "epoch": 1.63, "learning_rate": 4.727610892756879e-05, "loss": 0.0795, "step": 11644 }, { "epoch": 1.63, "learning_rate": 4.7275641025641026e-05, "loss": 0.0655, "step": 11646 }, { "epoch": 1.64, "learning_rate": 4.727517312371327e-05, "loss": 0.0676, "step": 11648 }, { "epoch": 1.64, "learning_rate": 4.727470522178551e-05, "loss": 0.064, "step": 11650 }, { "epoch": 1.64, "learning_rate": 4.7274237319857764e-05, "loss": 0.0745, "step": 11652 }, { "epoch": 1.64, "learning_rate": 4.727376941793e-05, "loss": 0.0913, "step": 11654 }, { "epoch": 1.64, "learning_rate": 4.727330151600225e-05, "loss": 0.0831, "step": 11656 }, { "epoch": 1.64, "learning_rate": 4.727283361407449e-05, "loss": 0.0773, "step": 11658 }, { "epoch": 1.64, "learning_rate": 4.727236571214674e-05, "loss": 0.0705, "step": 11660 }, { "epoch": 1.64, "learning_rate": 4.727189781021898e-05, "loss": 0.092, "step": 11662 }, { "epoch": 1.64, "learning_rate": 4.7271429908291226e-05, "loss": 0.0741, "step": 11664 }, { "epoch": 1.64, "learning_rate": 4.7270962006363465e-05, "loss": 0.0664, "step": 11666 }, { "epoch": 1.64, "learning_rate": 4.727049410443571e-05, "loss": 0.0743, "step": 11668 }, { "epoch": 1.64, "learning_rate": 4.727002620250796e-05, "loss": 0.0884, "step": 11670 }, { "epoch": 1.64, "learning_rate": 4.72695583005802e-05, "loss": 0.0837, "step": 11672 }, { "epoch": 1.64, "learning_rate": 4.726909039865244e-05, "loss": 0.0726, "step": 11674 }, { "epoch": 1.64, "learning_rate": 4.726862249672469e-05, "loss": 0.0882, "step": 11676 }, { "epoch": 1.64, "learning_rate": 4.7268154594796934e-05, "loss": 0.0856, "step": 11678 }, { "epoch": 1.64, "learning_rate": 4.726768669286918e-05, "loss": 0.0879, "step": 11680 }, { "epoch": 1.64, "learning_rate": 4.726721879094142e-05, "loss": 0.0768, "step": 11682 }, { "epoch": 1.64, "learning_rate": 4.7266750889013665e-05, "loss": 0.0636, "step": 11684 }, { "epoch": 1.64, "learning_rate": 4.726628298708591e-05, "loss": 0.0611, "step": 11686 }, { "epoch": 1.64, "learning_rate": 4.7265815085158157e-05, "loss": 0.0699, "step": 11688 }, { "epoch": 1.64, "learning_rate": 4.7265347183230396e-05, "loss": 0.0718, "step": 11690 }, { "epoch": 1.64, "learning_rate": 4.726487928130264e-05, "loss": 0.0774, "step": 11692 }, { "epoch": 1.64, "learning_rate": 4.726441137937489e-05, "loss": 0.0655, "step": 11694 }, { "epoch": 1.64, "learning_rate": 4.7263943477447133e-05, "loss": 0.0833, "step": 11696 }, { "epoch": 1.64, "learning_rate": 4.726347557551937e-05, "loss": 0.0958, "step": 11698 }, { "epoch": 1.64, "learning_rate": 4.726300767359162e-05, "loss": 0.0883, "step": 11700 }, { "epoch": 1.64, "learning_rate": 4.726253977166386e-05, "loss": 0.0853, "step": 11702 }, { "epoch": 1.64, "learning_rate": 4.726207186973611e-05, "loss": 0.0585, "step": 11704 }, { "epoch": 1.64, "learning_rate": 4.726160396780835e-05, "loss": 0.0812, "step": 11706 }, { "epoch": 1.64, "learning_rate": 4.7261136065880595e-05, "loss": 0.082, "step": 11708 }, { "epoch": 1.64, "learning_rate": 4.7260668163952834e-05, "loss": 0.0982, "step": 11710 }, { "epoch": 1.64, "learning_rate": 4.726020026202509e-05, "loss": 0.0671, "step": 11712 }, { "epoch": 1.64, "learning_rate": 4.7259732360097326e-05, "loss": 0.0791, "step": 11714 }, { "epoch": 1.64, "learning_rate": 4.725926445816957e-05, "loss": 0.1029, "step": 11716 }, { "epoch": 1.64, "learning_rate": 4.725879655624181e-05, "loss": 0.0689, "step": 11718 }, { "epoch": 1.65, "learning_rate": 4.725832865431406e-05, "loss": 0.0912, "step": 11720 }, { "epoch": 1.65, "learning_rate": 4.72578607523863e-05, "loss": 0.072, "step": 11722 }, { "epoch": 1.65, "learning_rate": 4.725739285045855e-05, "loss": 0.0864, "step": 11724 }, { "epoch": 1.65, "learning_rate": 4.725692494853079e-05, "loss": 0.0821, "step": 11726 }, { "epoch": 1.65, "learning_rate": 4.7256457046603034e-05, "loss": 0.0626, "step": 11728 }, { "epoch": 1.65, "learning_rate": 4.725598914467528e-05, "loss": 0.099, "step": 11730 }, { "epoch": 1.65, "learning_rate": 4.7255521242747526e-05, "loss": 0.0964, "step": 11732 }, { "epoch": 1.65, "learning_rate": 4.7255053340819765e-05, "loss": 0.0935, "step": 11734 }, { "epoch": 1.65, "learning_rate": 4.725458543889201e-05, "loss": 0.0822, "step": 11736 }, { "epoch": 1.65, "learning_rate": 4.725411753696426e-05, "loss": 0.0706, "step": 11738 }, { "epoch": 1.65, "learning_rate": 4.72536496350365e-05, "loss": 0.065, "step": 11740 }, { "epoch": 1.65, "learning_rate": 4.725318173310874e-05, "loss": 0.1036, "step": 11742 }, { "epoch": 1.65, "learning_rate": 4.725271383118099e-05, "loss": 0.0634, "step": 11744 }, { "epoch": 1.65, "learning_rate": 4.7252245929253234e-05, "loss": 0.0652, "step": 11746 }, { "epoch": 1.65, "learning_rate": 4.725177802732548e-05, "loss": 0.0994, "step": 11748 }, { "epoch": 1.65, "learning_rate": 4.725131012539772e-05, "loss": 0.0934, "step": 11750 }, { "epoch": 1.65, "learning_rate": 4.7250842223469965e-05, "loss": 0.0844, "step": 11752 }, { "epoch": 1.65, "learning_rate": 4.7250374321542204e-05, "loss": 0.0772, "step": 11754 }, { "epoch": 1.65, "learning_rate": 4.7249906419614457e-05, "loss": 0.1009, "step": 11756 }, { "epoch": 1.65, "learning_rate": 4.7249438517686696e-05, "loss": 0.086, "step": 11758 }, { "epoch": 1.65, "learning_rate": 4.724897061575894e-05, "loss": 0.0648, "step": 11760 }, { "epoch": 1.65, "learning_rate": 4.724850271383118e-05, "loss": 0.0779, "step": 11762 }, { "epoch": 1.65, "learning_rate": 4.724803481190343e-05, "loss": 0.0707, "step": 11764 }, { "epoch": 1.65, "learning_rate": 4.724756690997567e-05, "loss": 0.076, "step": 11766 }, { "epoch": 1.65, "learning_rate": 4.724709900804792e-05, "loss": 0.0667, "step": 11768 }, { "epoch": 1.65, "learning_rate": 4.724663110612016e-05, "loss": 0.0736, "step": 11770 }, { "epoch": 1.65, "learning_rate": 4.7246163204192404e-05, "loss": 0.0739, "step": 11772 }, { "epoch": 1.65, "learning_rate": 4.724569530226465e-05, "loss": 0.0869, "step": 11774 }, { "epoch": 1.65, "learning_rate": 4.724522740033689e-05, "loss": 0.0745, "step": 11776 }, { "epoch": 1.65, "learning_rate": 4.7244759498409134e-05, "loss": 0.0821, "step": 11778 }, { "epoch": 1.65, "learning_rate": 4.724429159648138e-05, "loss": 0.0757, "step": 11780 }, { "epoch": 1.65, "learning_rate": 4.7243823694553626e-05, "loss": 0.0797, "step": 11782 }, { "epoch": 1.65, "learning_rate": 4.7243355792625865e-05, "loss": 0.0739, "step": 11784 }, { "epoch": 1.65, "learning_rate": 4.724288789069811e-05, "loss": 0.0636, "step": 11786 }, { "epoch": 1.65, "learning_rate": 4.724241998877035e-05, "loss": 0.0643, "step": 11788 }, { "epoch": 1.65, "learning_rate": 4.72419520868426e-05, "loss": 0.1214, "step": 11790 }, { "epoch": 1.66, "learning_rate": 4.724148418491484e-05, "loss": 0.0793, "step": 11792 }, { "epoch": 1.66, "learning_rate": 4.724101628298709e-05, "loss": 0.0764, "step": 11794 }, { "epoch": 1.66, "learning_rate": 4.724054838105933e-05, "loss": 0.0554, "step": 11796 }, { "epoch": 1.66, "learning_rate": 4.724008047913157e-05, "loss": 0.0658, "step": 11798 }, { "epoch": 1.66, "learning_rate": 4.723961257720382e-05, "loss": 0.0616, "step": 11800 }, { "epoch": 1.66, "learning_rate": 4.7239144675276065e-05, "loss": 0.0725, "step": 11802 }, { "epoch": 1.66, "learning_rate": 4.7238676773348304e-05, "loss": 0.0815, "step": 11804 }, { "epoch": 1.66, "learning_rate": 4.723820887142055e-05, "loss": 0.0898, "step": 11806 }, { "epoch": 1.66, "learning_rate": 4.7237740969492796e-05, "loss": 0.0748, "step": 11808 }, { "epoch": 1.66, "learning_rate": 4.723727306756504e-05, "loss": 0.0658, "step": 11810 }, { "epoch": 1.66, "learning_rate": 4.723680516563728e-05, "loss": 0.0806, "step": 11812 }, { "epoch": 1.66, "learning_rate": 4.723633726370953e-05, "loss": 0.0626, "step": 11814 }, { "epoch": 1.66, "learning_rate": 4.723586936178177e-05, "loss": 0.0744, "step": 11816 }, { "epoch": 1.66, "learning_rate": 4.723540145985402e-05, "loss": 0.0747, "step": 11818 }, { "epoch": 1.66, "learning_rate": 4.723493355792626e-05, "loss": 0.0838, "step": 11820 }, { "epoch": 1.66, "learning_rate": 4.7234465655998504e-05, "loss": 0.0588, "step": 11822 }, { "epoch": 1.66, "learning_rate": 4.723399775407075e-05, "loss": 0.0908, "step": 11824 }, { "epoch": 1.66, "learning_rate": 4.7233529852142996e-05, "loss": 0.1086, "step": 11826 }, { "epoch": 1.66, "learning_rate": 4.7233061950215235e-05, "loss": 0.0839, "step": 11828 }, { "epoch": 1.66, "learning_rate": 4.723259404828748e-05, "loss": 0.0733, "step": 11830 }, { "epoch": 1.66, "learning_rate": 4.723212614635972e-05, "loss": 0.0764, "step": 11832 }, { "epoch": 1.66, "learning_rate": 4.723165824443197e-05, "loss": 0.085, "step": 11834 }, { "epoch": 1.66, "learning_rate": 4.723119034250421e-05, "loss": 0.1012, "step": 11836 }, { "epoch": 1.66, "learning_rate": 4.723072244057646e-05, "loss": 0.0772, "step": 11838 }, { "epoch": 1.66, "learning_rate": 4.72302545386487e-05, "loss": 0.0957, "step": 11840 }, { "epoch": 1.66, "learning_rate": 4.722978663672095e-05, "loss": 0.0894, "step": 11842 }, { "epoch": 1.66, "learning_rate": 4.722931873479319e-05, "loss": 0.0849, "step": 11844 }, { "epoch": 1.66, "learning_rate": 4.7228850832865435e-05, "loss": 0.0983, "step": 11846 }, { "epoch": 1.66, "learning_rate": 4.7228382930937674e-05, "loss": 0.0646, "step": 11848 }, { "epoch": 1.66, "learning_rate": 4.722791502900992e-05, "loss": 0.0889, "step": 11850 }, { "epoch": 1.66, "learning_rate": 4.7227447127082165e-05, "loss": 0.0802, "step": 11852 }, { "epoch": 1.66, "learning_rate": 4.722697922515441e-05, "loss": 0.0982, "step": 11854 }, { "epoch": 1.66, "learning_rate": 4.722651132322665e-05, "loss": 0.0758, "step": 11856 }, { "epoch": 1.66, "learning_rate": 4.7226043421298896e-05, "loss": 0.0803, "step": 11858 }, { "epoch": 1.66, "learning_rate": 4.722557551937114e-05, "loss": 0.0862, "step": 11860 }, { "epoch": 1.67, "learning_rate": 4.722510761744339e-05, "loss": 0.0828, "step": 11862 }, { "epoch": 1.67, "learning_rate": 4.722463971551563e-05, "loss": 0.0986, "step": 11864 }, { "epoch": 1.67, "learning_rate": 4.722417181358787e-05, "loss": 0.0898, "step": 11866 }, { "epoch": 1.67, "learning_rate": 4.722370391166012e-05, "loss": 0.0875, "step": 11868 }, { "epoch": 1.67, "learning_rate": 4.7223236009732365e-05, "loss": 0.0923, "step": 11870 }, { "epoch": 1.67, "learning_rate": 4.7222768107804604e-05, "loss": 0.0614, "step": 11872 }, { "epoch": 1.67, "learning_rate": 4.722230020587685e-05, "loss": 0.0825, "step": 11874 }, { "epoch": 1.67, "learning_rate": 4.7221832303949096e-05, "loss": 0.0857, "step": 11876 }, { "epoch": 1.67, "learning_rate": 4.722136440202134e-05, "loss": 0.0788, "step": 11878 }, { "epoch": 1.67, "learning_rate": 4.722089650009358e-05, "loss": 0.0658, "step": 11880 }, { "epoch": 1.67, "learning_rate": 4.722042859816583e-05, "loss": 0.0642, "step": 11882 }, { "epoch": 1.67, "learning_rate": 4.7219960696238066e-05, "loss": 0.0727, "step": 11884 }, { "epoch": 1.67, "learning_rate": 4.721949279431032e-05, "loss": 0.071, "step": 11886 }, { "epoch": 1.67, "learning_rate": 4.721902489238256e-05, "loss": 0.1086, "step": 11888 }, { "epoch": 1.67, "learning_rate": 4.7218556990454804e-05, "loss": 0.0937, "step": 11890 }, { "epoch": 1.67, "learning_rate": 4.721808908852704e-05, "loss": 0.0744, "step": 11892 }, { "epoch": 1.67, "learning_rate": 4.7217621186599296e-05, "loss": 0.0981, "step": 11894 }, { "epoch": 1.67, "learning_rate": 4.7217153284671535e-05, "loss": 0.0722, "step": 11896 }, { "epoch": 1.67, "learning_rate": 4.721668538274378e-05, "loss": 0.091, "step": 11898 }, { "epoch": 1.67, "learning_rate": 4.721621748081602e-05, "loss": 0.0834, "step": 11900 }, { "epoch": 1.67, "learning_rate": 4.7215749578888266e-05, "loss": 0.0942, "step": 11902 }, { "epoch": 1.67, "learning_rate": 4.721528167696051e-05, "loss": 0.0841, "step": 11904 }, { "epoch": 1.67, "learning_rate": 4.721481377503276e-05, "loss": 0.0716, "step": 11906 }, { "epoch": 1.67, "learning_rate": 4.7214345873105e-05, "loss": 0.0618, "step": 11908 }, { "epoch": 1.67, "learning_rate": 4.721387797117724e-05, "loss": 0.0806, "step": 11910 }, { "epoch": 1.67, "learning_rate": 4.721341006924949e-05, "loss": 0.0829, "step": 11912 }, { "epoch": 1.67, "learning_rate": 4.7212942167321735e-05, "loss": 0.0685, "step": 11914 }, { "epoch": 1.67, "learning_rate": 4.7212474265393974e-05, "loss": 0.067, "step": 11916 }, { "epoch": 1.67, "learning_rate": 4.721200636346622e-05, "loss": 0.0713, "step": 11918 }, { "epoch": 1.67, "learning_rate": 4.7211538461538465e-05, "loss": 0.0798, "step": 11920 }, { "epoch": 1.67, "learning_rate": 4.721107055961071e-05, "loss": 0.0572, "step": 11922 }, { "epoch": 1.67, "learning_rate": 4.721060265768295e-05, "loss": 0.0808, "step": 11924 }, { "epoch": 1.67, "learning_rate": 4.7210134755755196e-05, "loss": 0.076, "step": 11926 }, { "epoch": 1.67, "learning_rate": 4.7209666853827436e-05, "loss": 0.0811, "step": 11928 }, { "epoch": 1.67, "learning_rate": 4.720919895189969e-05, "loss": 0.0944, "step": 11930 }, { "epoch": 1.67, "learning_rate": 4.720873104997193e-05, "loss": 0.066, "step": 11932 }, { "epoch": 1.68, "learning_rate": 4.720826314804417e-05, "loss": 0.0877, "step": 11934 }, { "epoch": 1.68, "learning_rate": 4.720779524611641e-05, "loss": 0.0935, "step": 11936 }, { "epoch": 1.68, "learning_rate": 4.7207327344188665e-05, "loss": 0.078, "step": 11938 }, { "epoch": 1.68, "learning_rate": 4.7206859442260904e-05, "loss": 0.081, "step": 11940 }, { "epoch": 1.68, "learning_rate": 4.720639154033315e-05, "loss": 0.0885, "step": 11942 }, { "epoch": 1.68, "learning_rate": 4.720592363840539e-05, "loss": 0.0958, "step": 11944 }, { "epoch": 1.68, "learning_rate": 4.7205455736477635e-05, "loss": 0.0868, "step": 11946 }, { "epoch": 1.68, "learning_rate": 4.720498783454988e-05, "loss": 0.0872, "step": 11948 }, { "epoch": 1.68, "learning_rate": 4.720451993262213e-05, "loss": 0.0728, "step": 11950 }, { "epoch": 1.68, "learning_rate": 4.7204052030694366e-05, "loss": 0.0711, "step": 11952 }, { "epoch": 1.68, "learning_rate": 4.720358412876661e-05, "loss": 0.0688, "step": 11954 }, { "epoch": 1.68, "learning_rate": 4.720311622683886e-05, "loss": 0.0639, "step": 11956 }, { "epoch": 1.68, "learning_rate": 4.7202648324911104e-05, "loss": 0.0766, "step": 11958 }, { "epoch": 1.68, "learning_rate": 4.720218042298334e-05, "loss": 0.0897, "step": 11960 }, { "epoch": 1.68, "learning_rate": 4.720171252105559e-05, "loss": 0.1089, "step": 11962 }, { "epoch": 1.68, "learning_rate": 4.7201244619127835e-05, "loss": 0.0947, "step": 11964 }, { "epoch": 1.68, "learning_rate": 4.720077671720008e-05, "loss": 0.0831, "step": 11966 }, { "epoch": 1.68, "learning_rate": 4.720030881527232e-05, "loss": 0.0735, "step": 11968 }, { "epoch": 1.68, "learning_rate": 4.7199840913344566e-05, "loss": 0.1122, "step": 11970 }, { "epoch": 1.68, "learning_rate": 4.719937301141681e-05, "loss": 0.0753, "step": 11972 }, { "epoch": 1.68, "learning_rate": 4.719890510948906e-05, "loss": 0.0854, "step": 11974 }, { "epoch": 1.68, "learning_rate": 4.71984372075613e-05, "loss": 0.0752, "step": 11976 }, { "epoch": 1.68, "learning_rate": 4.719796930563354e-05, "loss": 0.0778, "step": 11978 }, { "epoch": 1.68, "learning_rate": 4.719750140370578e-05, "loss": 0.0834, "step": 11980 }, { "epoch": 1.68, "learning_rate": 4.7197033501778035e-05, "loss": 0.0861, "step": 11982 }, { "epoch": 1.68, "learning_rate": 4.7196565599850274e-05, "loss": 0.0943, "step": 11984 }, { "epoch": 1.68, "learning_rate": 4.719609769792252e-05, "loss": 0.0579, "step": 11986 }, { "epoch": 1.68, "learning_rate": 4.719562979599476e-05, "loss": 0.0936, "step": 11988 }, { "epoch": 1.68, "learning_rate": 4.719516189406701e-05, "loss": 0.0587, "step": 11990 }, { "epoch": 1.68, "learning_rate": 4.719469399213925e-05, "loss": 0.085, "step": 11992 }, { "epoch": 1.68, "learning_rate": 4.7194226090211496e-05, "loss": 0.0986, "step": 11994 }, { "epoch": 1.68, "learning_rate": 4.7193758188283736e-05, "loss": 0.0844, "step": 11996 }, { "epoch": 1.68, "learning_rate": 4.719329028635598e-05, "loss": 0.0631, "step": 11998 }, { "epoch": 1.68, "learning_rate": 4.719282238442823e-05, "loss": 0.0657, "step": 12000 }, { "epoch": 1.68, "eval_gen_len": 28.4483, "eval_loss": 1.0475629568099976, "eval_meteor": 0.0375, "eval_runtime": 14.051, "eval_samples_per_second": 4.128, "eval_steps_per_second": 0.569, "step": 12000 }, { "epoch": 1.68, "learning_rate": 4.719235448250047e-05, "loss": 0.0606, "step": 12002 }, { "epoch": 1.69, "learning_rate": 4.719188658057271e-05, "loss": 0.0709, "step": 12004 }, { "epoch": 1.69, "learning_rate": 4.719141867864496e-05, "loss": 0.071, "step": 12006 }, { "epoch": 1.69, "learning_rate": 4.7190950776717204e-05, "loss": 0.0771, "step": 12008 }, { "epoch": 1.69, "learning_rate": 4.719048287478945e-05, "loss": 0.0841, "step": 12010 }, { "epoch": 1.69, "learning_rate": 4.719001497286169e-05, "loss": 0.0795, "step": 12012 }, { "epoch": 1.69, "learning_rate": 4.7189547070933935e-05, "loss": 0.0765, "step": 12014 }, { "epoch": 1.69, "learning_rate": 4.718907916900618e-05, "loss": 0.0778, "step": 12016 }, { "epoch": 1.69, "learning_rate": 4.718861126707843e-05, "loss": 0.0827, "step": 12018 }, { "epoch": 1.69, "learning_rate": 4.7188143365150666e-05, "loss": 0.0841, "step": 12020 }, { "epoch": 1.69, "learning_rate": 4.718767546322291e-05, "loss": 0.0687, "step": 12022 }, { "epoch": 1.69, "learning_rate": 4.718720756129516e-05, "loss": 0.0862, "step": 12024 }, { "epoch": 1.69, "learning_rate": 4.7186739659367404e-05, "loss": 0.0736, "step": 12026 }, { "epoch": 1.69, "learning_rate": 4.718627175743964e-05, "loss": 0.0894, "step": 12028 }, { "epoch": 1.69, "learning_rate": 4.718580385551188e-05, "loss": 0.0997, "step": 12030 }, { "epoch": 1.69, "learning_rate": 4.718533595358413e-05, "loss": 0.0545, "step": 12032 }, { "epoch": 1.69, "learning_rate": 4.7184868051656374e-05, "loss": 0.0907, "step": 12034 }, { "epoch": 1.69, "learning_rate": 4.718440014972862e-05, "loss": 0.098, "step": 12036 }, { "epoch": 1.69, "learning_rate": 4.718393224780086e-05, "loss": 0.0891, "step": 12038 }, { "epoch": 1.69, "learning_rate": 4.7183464345873105e-05, "loss": 0.0955, "step": 12040 }, { "epoch": 1.69, "learning_rate": 4.718299644394535e-05, "loss": 0.0897, "step": 12042 }, { "epoch": 1.69, "learning_rate": 4.71825285420176e-05, "loss": 0.0927, "step": 12044 }, { "epoch": 1.69, "learning_rate": 4.7182060640089836e-05, "loss": 0.0754, "step": 12046 }, { "epoch": 1.69, "learning_rate": 4.718159273816208e-05, "loss": 0.0868, "step": 12048 }, { "epoch": 1.69, "learning_rate": 4.718112483623433e-05, "loss": 0.0834, "step": 12050 }, { "epoch": 1.69, "learning_rate": 4.7180656934306574e-05, "loss": 0.0663, "step": 12052 }, { "epoch": 1.69, "learning_rate": 4.718018903237881e-05, "loss": 0.0725, "step": 12054 }, { "epoch": 1.69, "learning_rate": 4.717972113045106e-05, "loss": 0.0634, "step": 12056 }, { "epoch": 1.69, "learning_rate": 4.7179253228523305e-05, "loss": 0.0943, "step": 12058 }, { "epoch": 1.69, "learning_rate": 4.717878532659555e-05, "loss": 0.0955, "step": 12060 }, { "epoch": 1.69, "learning_rate": 4.717831742466779e-05, "loss": 0.0678, "step": 12062 }, { "epoch": 1.69, "learning_rate": 4.7177849522740036e-05, "loss": 0.0826, "step": 12064 }, { "epoch": 1.69, "learning_rate": 4.7177381620812275e-05, "loss": 0.0751, "step": 12066 }, { "epoch": 1.69, "learning_rate": 4.717691371888453e-05, "loss": 0.0981, "step": 12068 }, { "epoch": 1.69, "learning_rate": 4.7176445816956767e-05, "loss": 0.0855, "step": 12070 }, { "epoch": 1.69, "learning_rate": 4.717597791502901e-05, "loss": 0.0867, "step": 12072 }, { "epoch": 1.69, "learning_rate": 4.717551001310125e-05, "loss": 0.094, "step": 12074 }, { "epoch": 1.7, "learning_rate": 4.71750421111735e-05, "loss": 0.0927, "step": 12076 }, { "epoch": 1.7, "learning_rate": 4.7174574209245743e-05, "loss": 0.086, "step": 12078 }, { "epoch": 1.7, "learning_rate": 4.717410630731799e-05, "loss": 0.0841, "step": 12080 }, { "epoch": 1.7, "learning_rate": 4.717363840539023e-05, "loss": 0.0949, "step": 12082 }, { "epoch": 1.7, "learning_rate": 4.7173170503462474e-05, "loss": 0.0747, "step": 12084 }, { "epoch": 1.7, "learning_rate": 4.717270260153472e-05, "loss": 0.0669, "step": 12086 }, { "epoch": 1.7, "learning_rate": 4.7172234699606966e-05, "loss": 0.0851, "step": 12088 }, { "epoch": 1.7, "learning_rate": 4.7171766797679205e-05, "loss": 0.072, "step": 12090 }, { "epoch": 1.7, "learning_rate": 4.717129889575145e-05, "loss": 0.0783, "step": 12092 }, { "epoch": 1.7, "learning_rate": 4.71708309938237e-05, "loss": 0.0915, "step": 12094 }, { "epoch": 1.7, "learning_rate": 4.717036309189594e-05, "loss": 0.0717, "step": 12096 }, { "epoch": 1.7, "learning_rate": 4.716989518996818e-05, "loss": 0.1175, "step": 12098 }, { "epoch": 1.7, "learning_rate": 4.716942728804043e-05, "loss": 0.101, "step": 12100 }, { "epoch": 1.7, "learning_rate": 4.7168959386112674e-05, "loss": 0.0834, "step": 12102 }, { "epoch": 1.7, "learning_rate": 4.716849148418492e-05, "loss": 0.0789, "step": 12104 }, { "epoch": 1.7, "learning_rate": 4.716802358225716e-05, "loss": 0.0702, "step": 12106 }, { "epoch": 1.7, "learning_rate": 4.7167555680329405e-05, "loss": 0.1083, "step": 12108 }, { "epoch": 1.7, "learning_rate": 4.7167087778401644e-05, "loss": 0.086, "step": 12110 }, { "epoch": 1.7, "learning_rate": 4.71666198764739e-05, "loss": 0.0903, "step": 12112 }, { "epoch": 1.7, "learning_rate": 4.7166151974546136e-05, "loss": 0.0627, "step": 12114 }, { "epoch": 1.7, "learning_rate": 4.716568407261838e-05, "loss": 0.0804, "step": 12116 }, { "epoch": 1.7, "learning_rate": 4.716521617069062e-05, "loss": 0.0791, "step": 12118 }, { "epoch": 1.7, "learning_rate": 4.7164748268762874e-05, "loss": 0.0845, "step": 12120 }, { "epoch": 1.7, "learning_rate": 4.716428036683511e-05, "loss": 0.0908, "step": 12122 }, { "epoch": 1.7, "learning_rate": 4.716381246490736e-05, "loss": 0.0899, "step": 12124 }, { "epoch": 1.7, "learning_rate": 4.71633445629796e-05, "loss": 0.0888, "step": 12126 }, { "epoch": 1.7, "learning_rate": 4.7162876661051844e-05, "loss": 0.0692, "step": 12128 }, { "epoch": 1.7, "learning_rate": 4.716240875912409e-05, "loss": 0.0769, "step": 12130 }, { "epoch": 1.7, "learning_rate": 4.7161940857196336e-05, "loss": 0.1018, "step": 12132 }, { "epoch": 1.7, "learning_rate": 4.7161472955268575e-05, "loss": 0.0935, "step": 12134 }, { "epoch": 1.7, "learning_rate": 4.716100505334082e-05, "loss": 0.0946, "step": 12136 }, { "epoch": 1.7, "learning_rate": 4.7160537151413067e-05, "loss": 0.0889, "step": 12138 }, { "epoch": 1.7, "learning_rate": 4.716006924948531e-05, "loss": 0.107, "step": 12140 }, { "epoch": 1.7, "learning_rate": 4.715960134755755e-05, "loss": 0.0577, "step": 12142 }, { "epoch": 1.7, "learning_rate": 4.71591334456298e-05, "loss": 0.0901, "step": 12144 }, { "epoch": 1.7, "learning_rate": 4.7158665543702043e-05, "loss": 0.0711, "step": 12146 }, { "epoch": 1.71, "learning_rate": 4.715819764177429e-05, "loss": 0.0928, "step": 12148 }, { "epoch": 1.71, "learning_rate": 4.715772973984653e-05, "loss": 0.0766, "step": 12150 }, { "epoch": 1.71, "learning_rate": 4.7157261837918774e-05, "loss": 0.0681, "step": 12152 }, { "epoch": 1.71, "learning_rate": 4.715679393599102e-05, "loss": 0.0737, "step": 12154 }, { "epoch": 1.71, "learning_rate": 4.7156326034063266e-05, "loss": 0.095, "step": 12156 }, { "epoch": 1.71, "learning_rate": 4.7155858132135505e-05, "loss": 0.0668, "step": 12158 }, { "epoch": 1.71, "learning_rate": 4.715539023020775e-05, "loss": 0.0609, "step": 12160 }, { "epoch": 1.71, "learning_rate": 4.715492232827999e-05, "loss": 0.0812, "step": 12162 }, { "epoch": 1.71, "learning_rate": 4.715445442635224e-05, "loss": 0.0752, "step": 12164 }, { "epoch": 1.71, "learning_rate": 4.715398652442448e-05, "loss": 0.0787, "step": 12166 }, { "epoch": 1.71, "learning_rate": 4.715351862249673e-05, "loss": 0.0929, "step": 12168 }, { "epoch": 1.71, "learning_rate": 4.715305072056897e-05, "loss": 0.0748, "step": 12170 }, { "epoch": 1.71, "learning_rate": 4.715258281864122e-05, "loss": 0.0749, "step": 12172 }, { "epoch": 1.71, "learning_rate": 4.715211491671346e-05, "loss": 0.0839, "step": 12174 }, { "epoch": 1.71, "learning_rate": 4.7151647014785705e-05, "loss": 0.0936, "step": 12176 }, { "epoch": 1.71, "learning_rate": 4.7151179112857944e-05, "loss": 0.0704, "step": 12178 }, { "epoch": 1.71, "learning_rate": 4.715071121093019e-05, "loss": 0.0778, "step": 12180 }, { "epoch": 1.71, "learning_rate": 4.7150243309002436e-05, "loss": 0.109, "step": 12182 }, { "epoch": 1.71, "learning_rate": 4.714977540707468e-05, "loss": 0.0578, "step": 12184 }, { "epoch": 1.71, "learning_rate": 4.714930750514692e-05, "loss": 0.0732, "step": 12186 }, { "epoch": 1.71, "learning_rate": 4.714883960321917e-05, "loss": 0.0868, "step": 12188 }, { "epoch": 1.71, "learning_rate": 4.714837170129141e-05, "loss": 0.0722, "step": 12190 }, { "epoch": 1.71, "learning_rate": 4.714790379936366e-05, "loss": 0.0947, "step": 12192 }, { "epoch": 1.71, "learning_rate": 4.71474358974359e-05, "loss": 0.0965, "step": 12194 }, { "epoch": 1.71, "learning_rate": 4.7146967995508144e-05, "loss": 0.0617, "step": 12196 }, { "epoch": 1.71, "learning_rate": 4.714650009358039e-05, "loss": 0.0687, "step": 12198 }, { "epoch": 1.71, "learning_rate": 4.7146032191652636e-05, "loss": 0.0615, "step": 12200 }, { "epoch": 1.71, "learning_rate": 4.7145564289724875e-05, "loss": 0.0919, "step": 12202 }, { "epoch": 1.71, "learning_rate": 4.714509638779712e-05, "loss": 0.0799, "step": 12204 }, { "epoch": 1.71, "learning_rate": 4.714462848586936e-05, "loss": 0.0799, "step": 12206 }, { "epoch": 1.71, "learning_rate": 4.714416058394161e-05, "loss": 0.1178, "step": 12208 }, { "epoch": 1.71, "learning_rate": 4.714369268201385e-05, "loss": 0.0729, "step": 12210 }, { "epoch": 1.71, "learning_rate": 4.71432247800861e-05, "loss": 0.0889, "step": 12212 }, { "epoch": 1.71, "learning_rate": 4.714275687815834e-05, "loss": 0.0741, "step": 12214 }, { "epoch": 1.71, "learning_rate": 4.714228897623059e-05, "loss": 0.0819, "step": 12216 }, { "epoch": 1.72, "learning_rate": 4.714182107430283e-05, "loss": 0.0892, "step": 12218 }, { "epoch": 1.72, "learning_rate": 4.7141353172375074e-05, "loss": 0.0782, "step": 12220 }, { "epoch": 1.72, "learning_rate": 4.7140885270447314e-05, "loss": 0.1023, "step": 12222 }, { "epoch": 1.72, "learning_rate": 4.714041736851956e-05, "loss": 0.0975, "step": 12224 }, { "epoch": 1.72, "learning_rate": 4.7139949466591805e-05, "loss": 0.0752, "step": 12226 }, { "epoch": 1.72, "learning_rate": 4.713948156466405e-05, "loss": 0.0733, "step": 12228 }, { "epoch": 1.72, "learning_rate": 4.713901366273629e-05, "loss": 0.0735, "step": 12230 }, { "epoch": 1.72, "learning_rate": 4.7138545760808536e-05, "loss": 0.0606, "step": 12232 }, { "epoch": 1.72, "learning_rate": 4.713807785888078e-05, "loss": 0.0787, "step": 12234 }, { "epoch": 1.72, "learning_rate": 4.713760995695303e-05, "loss": 0.0747, "step": 12236 }, { "epoch": 1.72, "learning_rate": 4.713714205502527e-05, "loss": 0.077, "step": 12238 }, { "epoch": 1.72, "learning_rate": 4.713667415309751e-05, "loss": 0.0584, "step": 12240 }, { "epoch": 1.72, "learning_rate": 4.713620625116976e-05, "loss": 0.082, "step": 12242 }, { "epoch": 1.72, "learning_rate": 4.7135738349242005e-05, "loss": 0.0814, "step": 12244 }, { "epoch": 1.72, "learning_rate": 4.7135270447314244e-05, "loss": 0.0826, "step": 12246 }, { "epoch": 1.72, "learning_rate": 4.713480254538649e-05, "loss": 0.0768, "step": 12248 }, { "epoch": 1.72, "learning_rate": 4.7134334643458736e-05, "loss": 0.0752, "step": 12250 }, { "epoch": 1.72, "learning_rate": 4.713386674153098e-05, "loss": 0.0939, "step": 12252 }, { "epoch": 1.72, "learning_rate": 4.713339883960322e-05, "loss": 0.0845, "step": 12254 }, { "epoch": 1.72, "learning_rate": 4.713293093767547e-05, "loss": 0.1071, "step": 12256 }, { "epoch": 1.72, "learning_rate": 4.7132463035747706e-05, "loss": 0.0772, "step": 12258 }, { "epoch": 1.72, "learning_rate": 4.713199513381996e-05, "loss": 0.0782, "step": 12260 }, { "epoch": 1.72, "learning_rate": 4.71315272318922e-05, "loss": 0.0823, "step": 12262 }, { "epoch": 1.72, "learning_rate": 4.7131059329964444e-05, "loss": 0.0587, "step": 12264 }, { "epoch": 1.72, "learning_rate": 4.713059142803668e-05, "loss": 0.0788, "step": 12266 }, { "epoch": 1.72, "learning_rate": 4.7130123526108936e-05, "loss": 0.0772, "step": 12268 }, { "epoch": 1.72, "learning_rate": 4.7129655624181175e-05, "loss": 0.0952, "step": 12270 }, { "epoch": 1.72, "learning_rate": 4.712918772225342e-05, "loss": 0.0765, "step": 12272 }, { "epoch": 1.72, "learning_rate": 4.712871982032566e-05, "loss": 0.0812, "step": 12274 }, { "epoch": 1.72, "learning_rate": 4.7128251918397906e-05, "loss": 0.088, "step": 12276 }, { "epoch": 1.72, "learning_rate": 4.712778401647015e-05, "loss": 0.0745, "step": 12278 }, { "epoch": 1.72, "learning_rate": 4.71273161145424e-05, "loss": 0.1049, "step": 12280 }, { "epoch": 1.72, "learning_rate": 4.712684821261464e-05, "loss": 0.0813, "step": 12282 }, { "epoch": 1.72, "learning_rate": 4.712638031068688e-05, "loss": 0.1017, "step": 12284 }, { "epoch": 1.72, "learning_rate": 4.712591240875913e-05, "loss": 0.074, "step": 12286 }, { "epoch": 1.72, "learning_rate": 4.712544450683137e-05, "loss": 0.085, "step": 12288 }, { "epoch": 1.73, "learning_rate": 4.7124976604903614e-05, "loss": 0.1091, "step": 12290 }, { "epoch": 1.73, "learning_rate": 4.712450870297585e-05, "loss": 0.0696, "step": 12292 }, { "epoch": 1.73, "learning_rate": 4.7124040801048105e-05, "loss": 0.056, "step": 12294 }, { "epoch": 1.73, "learning_rate": 4.7123572899120345e-05, "loss": 0.0998, "step": 12296 }, { "epoch": 1.73, "learning_rate": 4.712310499719259e-05, "loss": 0.0656, "step": 12298 }, { "epoch": 1.73, "learning_rate": 4.712263709526483e-05, "loss": 0.0775, "step": 12300 }, { "epoch": 1.73, "learning_rate": 4.712216919333708e-05, "loss": 0.0605, "step": 12302 }, { "epoch": 1.73, "learning_rate": 4.712170129140932e-05, "loss": 0.1085, "step": 12304 }, { "epoch": 1.73, "learning_rate": 4.712123338948157e-05, "loss": 0.0705, "step": 12306 }, { "epoch": 1.73, "learning_rate": 4.7120765487553806e-05, "loss": 0.0794, "step": 12308 }, { "epoch": 1.73, "learning_rate": 4.712029758562605e-05, "loss": 0.0838, "step": 12310 }, { "epoch": 1.73, "learning_rate": 4.71198296836983e-05, "loss": 0.0954, "step": 12312 }, { "epoch": 1.73, "learning_rate": 4.7119361781770544e-05, "loss": 0.0668, "step": 12314 }, { "epoch": 1.73, "learning_rate": 4.711889387984278e-05, "loss": 0.0723, "step": 12316 }, { "epoch": 1.73, "learning_rate": 4.711842597791503e-05, "loss": 0.0811, "step": 12318 }, { "epoch": 1.73, "learning_rate": 4.7117958075987275e-05, "loss": 0.0921, "step": 12320 }, { "epoch": 1.73, "learning_rate": 4.711749017405952e-05, "loss": 0.1172, "step": 12322 }, { "epoch": 1.73, "learning_rate": 4.711702227213176e-05, "loss": 0.0791, "step": 12324 }, { "epoch": 1.73, "learning_rate": 4.7116554370204006e-05, "loss": 0.0773, "step": 12326 }, { "epoch": 1.73, "learning_rate": 4.711608646827625e-05, "loss": 0.0753, "step": 12328 }, { "epoch": 1.73, "learning_rate": 4.71156185663485e-05, "loss": 0.0813, "step": 12330 }, { "epoch": 1.73, "learning_rate": 4.711515066442074e-05, "loss": 0.0911, "step": 12332 }, { "epoch": 1.73, "learning_rate": 4.711468276249298e-05, "loss": 0.1135, "step": 12334 }, { "epoch": 1.73, "learning_rate": 4.711421486056523e-05, "loss": 0.0854, "step": 12336 }, { "epoch": 1.73, "learning_rate": 4.7113746958637475e-05, "loss": 0.0765, "step": 12338 }, { "epoch": 1.73, "learning_rate": 4.7113279056709714e-05, "loss": 0.0631, "step": 12340 }, { "epoch": 1.73, "learning_rate": 4.711281115478196e-05, "loss": 0.0707, "step": 12342 }, { "epoch": 1.73, "learning_rate": 4.71123432528542e-05, "loss": 0.0709, "step": 12344 }, { "epoch": 1.73, "learning_rate": 4.711187535092645e-05, "loss": 0.1139, "step": 12346 }, { "epoch": 1.73, "learning_rate": 4.711140744899869e-05, "loss": 0.0931, "step": 12348 }, { "epoch": 1.73, "learning_rate": 4.711093954707094e-05, "loss": 0.0723, "step": 12350 }, { "epoch": 1.73, "learning_rate": 4.7110471645143176e-05, "loss": 0.082, "step": 12352 }, { "epoch": 1.73, "learning_rate": 4.711000374321542e-05, "loss": 0.0655, "step": 12354 }, { "epoch": 1.73, "learning_rate": 4.710953584128767e-05, "loss": 0.0797, "step": 12356 }, { "epoch": 1.73, "learning_rate": 4.7109067939359914e-05, "loss": 0.0938, "step": 12358 }, { "epoch": 1.73, "learning_rate": 4.710860003743215e-05, "loss": 0.0988, "step": 12360 }, { "epoch": 1.74, "learning_rate": 4.71081321355044e-05, "loss": 0.0772, "step": 12362 }, { "epoch": 1.74, "learning_rate": 4.7107664233576645e-05, "loss": 0.0752, "step": 12364 }, { "epoch": 1.74, "learning_rate": 4.710719633164889e-05, "loss": 0.0749, "step": 12366 }, { "epoch": 1.74, "learning_rate": 4.710672842972113e-05, "loss": 0.087, "step": 12368 }, { "epoch": 1.74, "learning_rate": 4.7106260527793376e-05, "loss": 0.0851, "step": 12370 }, { "epoch": 1.74, "learning_rate": 4.710579262586562e-05, "loss": 0.1129, "step": 12372 }, { "epoch": 1.74, "learning_rate": 4.710532472393787e-05, "loss": 0.0716, "step": 12374 }, { "epoch": 1.74, "learning_rate": 4.7104856822010106e-05, "loss": 0.0682, "step": 12376 }, { "epoch": 1.74, "learning_rate": 4.710438892008235e-05, "loss": 0.0663, "step": 12378 }, { "epoch": 1.74, "learning_rate": 4.71039210181546e-05, "loss": 0.0867, "step": 12380 }, { "epoch": 1.74, "learning_rate": 4.7103453116226844e-05, "loss": 0.0715, "step": 12382 }, { "epoch": 1.74, "learning_rate": 4.710298521429908e-05, "loss": 0.0789, "step": 12384 }, { "epoch": 1.74, "learning_rate": 4.710251731237133e-05, "loss": 0.0809, "step": 12386 }, { "epoch": 1.74, "learning_rate": 4.710204941044357e-05, "loss": 0.0786, "step": 12388 }, { "epoch": 1.74, "learning_rate": 4.710158150851582e-05, "loss": 0.0854, "step": 12390 }, { "epoch": 1.74, "learning_rate": 4.710111360658806e-05, "loss": 0.1066, "step": 12392 }, { "epoch": 1.74, "learning_rate": 4.7100645704660306e-05, "loss": 0.0754, "step": 12394 }, { "epoch": 1.74, "learning_rate": 4.7100177802732545e-05, "loss": 0.0738, "step": 12396 }, { "epoch": 1.74, "learning_rate": 4.70997099008048e-05, "loss": 0.0855, "step": 12398 }, { "epoch": 1.74, "learning_rate": 4.709924199887704e-05, "loss": 0.0652, "step": 12400 }, { "epoch": 1.74, "learning_rate": 4.709877409694928e-05, "loss": 0.0815, "step": 12402 }, { "epoch": 1.74, "learning_rate": 4.709830619502152e-05, "loss": 0.0983, "step": 12404 }, { "epoch": 1.74, "learning_rate": 4.709783829309377e-05, "loss": 0.0887, "step": 12406 }, { "epoch": 1.74, "learning_rate": 4.7097370391166014e-05, "loss": 0.0949, "step": 12408 }, { "epoch": 1.74, "learning_rate": 4.709690248923826e-05, "loss": 0.0763, "step": 12410 }, { "epoch": 1.74, "learning_rate": 4.70964345873105e-05, "loss": 0.0612, "step": 12412 }, { "epoch": 1.74, "learning_rate": 4.7095966685382745e-05, "loss": 0.0726, "step": 12414 }, { "epoch": 1.74, "learning_rate": 4.709549878345499e-05, "loss": 0.0978, "step": 12416 }, { "epoch": 1.74, "learning_rate": 4.709503088152724e-05, "loss": 0.0781, "step": 12418 }, { "epoch": 1.74, "learning_rate": 4.7094562979599476e-05, "loss": 0.1019, "step": 12420 }, { "epoch": 1.74, "learning_rate": 4.709409507767172e-05, "loss": 0.074, "step": 12422 }, { "epoch": 1.74, "learning_rate": 4.709362717574397e-05, "loss": 0.0949, "step": 12424 }, { "epoch": 1.74, "learning_rate": 4.7093159273816214e-05, "loss": 0.0798, "step": 12426 }, { "epoch": 1.74, "learning_rate": 4.709269137188845e-05, "loss": 0.0883, "step": 12428 }, { "epoch": 1.74, "learning_rate": 4.70922234699607e-05, "loss": 0.0895, "step": 12430 }, { "epoch": 1.75, "learning_rate": 4.7091755568032945e-05, "loss": 0.0651, "step": 12432 }, { "epoch": 1.75, "learning_rate": 4.709128766610519e-05, "loss": 0.0896, "step": 12434 }, { "epoch": 1.75, "learning_rate": 4.709081976417743e-05, "loss": 0.0778, "step": 12436 }, { "epoch": 1.75, "learning_rate": 4.7090351862249676e-05, "loss": 0.0979, "step": 12438 }, { "epoch": 1.75, "learning_rate": 4.7089883960321915e-05, "loss": 0.0776, "step": 12440 }, { "epoch": 1.75, "learning_rate": 4.708941605839417e-05, "loss": 0.0702, "step": 12442 }, { "epoch": 1.75, "learning_rate": 4.7088948156466407e-05, "loss": 0.0927, "step": 12444 }, { "epoch": 1.75, "learning_rate": 4.708848025453865e-05, "loss": 0.1335, "step": 12446 }, { "epoch": 1.75, "learning_rate": 4.708801235261089e-05, "loss": 0.0757, "step": 12448 }, { "epoch": 1.75, "learning_rate": 4.7087544450683144e-05, "loss": 0.0841, "step": 12450 }, { "epoch": 1.75, "learning_rate": 4.708707654875538e-05, "loss": 0.1079, "step": 12452 }, { "epoch": 1.75, "learning_rate": 4.708660864682763e-05, "loss": 0.0901, "step": 12454 }, { "epoch": 1.75, "learning_rate": 4.708614074489987e-05, "loss": 0.0774, "step": 12456 }, { "epoch": 1.75, "learning_rate": 4.7085672842972114e-05, "loss": 0.0933, "step": 12458 }, { "epoch": 1.75, "learning_rate": 4.708520494104436e-05, "loss": 0.0999, "step": 12460 }, { "epoch": 1.75, "learning_rate": 4.7084737039116606e-05, "loss": 0.0647, "step": 12462 }, { "epoch": 1.75, "learning_rate": 4.7084269137188845e-05, "loss": 0.1042, "step": 12464 }, { "epoch": 1.75, "learning_rate": 4.708380123526109e-05, "loss": 0.0931, "step": 12466 }, { "epoch": 1.75, "learning_rate": 4.708333333333334e-05, "loss": 0.087, "step": 12468 }, { "epoch": 1.75, "learning_rate": 4.708286543140558e-05, "loss": 0.0654, "step": 12470 }, { "epoch": 1.75, "learning_rate": 4.708239752947782e-05, "loss": 0.1034, "step": 12472 }, { "epoch": 1.75, "learning_rate": 4.708192962755007e-05, "loss": 0.0683, "step": 12474 }, { "epoch": 1.75, "learning_rate": 4.7081461725622314e-05, "loss": 0.0623, "step": 12476 }, { "epoch": 1.75, "learning_rate": 4.708099382369456e-05, "loss": 0.0858, "step": 12478 }, { "epoch": 1.75, "learning_rate": 4.70805259217668e-05, "loss": 0.0856, "step": 12480 }, { "epoch": 1.75, "learning_rate": 4.7080058019839045e-05, "loss": 0.0915, "step": 12482 }, { "epoch": 1.75, "learning_rate": 4.707959011791129e-05, "loss": 0.0699, "step": 12484 }, { "epoch": 1.75, "learning_rate": 4.707912221598354e-05, "loss": 0.0971, "step": 12486 }, { "epoch": 1.75, "learning_rate": 4.7078654314055776e-05, "loss": 0.0924, "step": 12488 }, { "epoch": 1.75, "learning_rate": 4.707818641212802e-05, "loss": 0.079, "step": 12490 }, { "epoch": 1.75, "learning_rate": 4.707771851020026e-05, "loss": 0.1105, "step": 12492 }, { "epoch": 1.75, "learning_rate": 4.7077250608272514e-05, "loss": 0.0796, "step": 12494 }, { "epoch": 1.75, "learning_rate": 4.707678270634475e-05, "loss": 0.0816, "step": 12496 }, { "epoch": 1.75, "learning_rate": 4.7076314804417e-05, "loss": 0.0782, "step": 12498 }, { "epoch": 1.75, "learning_rate": 4.707584690248924e-05, "loss": 0.0678, "step": 12500 }, { "epoch": 1.75, "learning_rate": 4.7075379000561484e-05, "loss": 0.0701, "step": 12502 }, { "epoch": 1.76, "learning_rate": 4.707491109863373e-05, "loss": 0.0743, "step": 12504 }, { "epoch": 1.76, "learning_rate": 4.7074443196705976e-05, "loss": 0.0765, "step": 12506 }, { "epoch": 1.76, "learning_rate": 4.7073975294778215e-05, "loss": 0.0664, "step": 12508 }, { "epoch": 1.76, "learning_rate": 4.707350739285046e-05, "loss": 0.0884, "step": 12510 }, { "epoch": 1.76, "learning_rate": 4.7073039490922707e-05, "loss": 0.0774, "step": 12512 }, { "epoch": 1.76, "learning_rate": 4.707257158899495e-05, "loss": 0.0648, "step": 12514 }, { "epoch": 1.76, "learning_rate": 4.707210368706719e-05, "loss": 0.0909, "step": 12516 }, { "epoch": 1.76, "learning_rate": 4.707163578513944e-05, "loss": 0.1022, "step": 12518 }, { "epoch": 1.76, "learning_rate": 4.7071167883211683e-05, "loss": 0.0714, "step": 12520 }, { "epoch": 1.76, "learning_rate": 4.707069998128393e-05, "loss": 0.0633, "step": 12522 }, { "epoch": 1.76, "learning_rate": 4.707023207935617e-05, "loss": 0.0935, "step": 12524 }, { "epoch": 1.76, "learning_rate": 4.7069764177428414e-05, "loss": 0.0655, "step": 12526 }, { "epoch": 1.76, "learning_rate": 4.706929627550066e-05, "loss": 0.0806, "step": 12528 }, { "epoch": 1.76, "learning_rate": 4.7068828373572906e-05, "loss": 0.0893, "step": 12530 }, { "epoch": 1.76, "learning_rate": 4.7068360471645145e-05, "loss": 0.079, "step": 12532 }, { "epoch": 1.76, "learning_rate": 4.706789256971739e-05, "loss": 0.0798, "step": 12534 }, { "epoch": 1.76, "learning_rate": 4.706742466778963e-05, "loss": 0.0623, "step": 12536 }, { "epoch": 1.76, "learning_rate": 4.7066956765861876e-05, "loss": 0.0825, "step": 12538 }, { "epoch": 1.76, "learning_rate": 4.706648886393412e-05, "loss": 0.0769, "step": 12540 }, { "epoch": 1.76, "learning_rate": 4.706602096200636e-05, "loss": 0.0712, "step": 12542 }, { "epoch": 1.76, "learning_rate": 4.706555306007861e-05, "loss": 0.0721, "step": 12544 }, { "epoch": 1.76, "learning_rate": 4.706508515815085e-05, "loss": 0.0735, "step": 12546 }, { "epoch": 1.76, "learning_rate": 4.70646172562231e-05, "loss": 0.0798, "step": 12548 }, { "epoch": 1.76, "learning_rate": 4.706414935429534e-05, "loss": 0.11, "step": 12550 }, { "epoch": 1.76, "learning_rate": 4.7063681452367584e-05, "loss": 0.085, "step": 12552 }, { "epoch": 1.76, "learning_rate": 4.706321355043983e-05, "loss": 0.0641, "step": 12554 }, { "epoch": 1.76, "learning_rate": 4.7062745648512076e-05, "loss": 0.0903, "step": 12556 }, { "epoch": 1.76, "learning_rate": 4.7062277746584315e-05, "loss": 0.0814, "step": 12558 }, { "epoch": 1.76, "learning_rate": 4.706180984465656e-05, "loss": 0.075, "step": 12560 }, { "epoch": 1.76, "learning_rate": 4.706134194272881e-05, "loss": 0.0837, "step": 12562 }, { "epoch": 1.76, "learning_rate": 4.706087404080105e-05, "loss": 0.0807, "step": 12564 }, { "epoch": 1.76, "learning_rate": 4.706040613887329e-05, "loss": 0.102, "step": 12566 }, { "epoch": 1.76, "learning_rate": 4.705993823694554e-05, "loss": 0.0906, "step": 12568 }, { "epoch": 1.76, "learning_rate": 4.705947033501778e-05, "loss": 0.081, "step": 12570 }, { "epoch": 1.76, "learning_rate": 4.705900243309003e-05, "loss": 0.0921, "step": 12572 }, { "epoch": 1.77, "learning_rate": 4.705853453116227e-05, "loss": 0.093, "step": 12574 }, { "epoch": 1.77, "learning_rate": 4.7058066629234515e-05, "loss": 0.0976, "step": 12576 }, { "epoch": 1.77, "learning_rate": 4.7057598727306754e-05, "loss": 0.0803, "step": 12578 }, { "epoch": 1.77, "learning_rate": 4.7057130825379007e-05, "loss": 0.0962, "step": 12580 }, { "epoch": 1.77, "learning_rate": 4.7056662923451246e-05, "loss": 0.0884, "step": 12582 }, { "epoch": 1.77, "learning_rate": 4.705619502152349e-05, "loss": 0.0813, "step": 12584 }, { "epoch": 1.77, "learning_rate": 4.705572711959573e-05, "loss": 0.07, "step": 12586 }, { "epoch": 1.77, "learning_rate": 4.705525921766798e-05, "loss": 0.0996, "step": 12588 }, { "epoch": 1.77, "learning_rate": 4.705479131574022e-05, "loss": 0.0847, "step": 12590 }, { "epoch": 1.77, "learning_rate": 4.705432341381247e-05, "loss": 0.0983, "step": 12592 }, { "epoch": 1.77, "learning_rate": 4.705385551188471e-05, "loss": 0.0751, "step": 12594 }, { "epoch": 1.77, "learning_rate": 4.7053387609956954e-05, "loss": 0.081, "step": 12596 }, { "epoch": 1.77, "learning_rate": 4.70529197080292e-05, "loss": 0.0744, "step": 12598 }, { "epoch": 1.77, "learning_rate": 4.7052451806101445e-05, "loss": 0.0707, "step": 12600 }, { "epoch": 1.77, "learning_rate": 4.7051983904173684e-05, "loss": 0.0723, "step": 12602 }, { "epoch": 1.77, "learning_rate": 4.705151600224593e-05, "loss": 0.0709, "step": 12604 }, { "epoch": 1.77, "learning_rate": 4.7051048100318176e-05, "loss": 0.0801, "step": 12606 }, { "epoch": 1.77, "learning_rate": 4.705058019839042e-05, "loss": 0.0836, "step": 12608 }, { "epoch": 1.77, "learning_rate": 4.705011229646266e-05, "loss": 0.0852, "step": 12610 }, { "epoch": 1.77, "learning_rate": 4.704964439453491e-05, "loss": 0.0622, "step": 12612 }, { "epoch": 1.77, "learning_rate": 4.704917649260715e-05, "loss": 0.0797, "step": 12614 }, { "epoch": 1.77, "learning_rate": 4.70487085906794e-05, "loss": 0.084, "step": 12616 }, { "epoch": 1.77, "learning_rate": 4.704824068875164e-05, "loss": 0.0826, "step": 12618 }, { "epoch": 1.77, "learning_rate": 4.7047772786823884e-05, "loss": 0.1171, "step": 12620 }, { "epoch": 1.77, "learning_rate": 4.704730488489612e-05, "loss": 0.0766, "step": 12622 }, { "epoch": 1.77, "learning_rate": 4.7046836982968376e-05, "loss": 0.0831, "step": 12624 }, { "epoch": 1.77, "learning_rate": 4.7046369081040615e-05, "loss": 0.098, "step": 12626 }, { "epoch": 1.77, "learning_rate": 4.704590117911286e-05, "loss": 0.0922, "step": 12628 }, { "epoch": 1.77, "learning_rate": 4.70454332771851e-05, "loss": 0.0845, "step": 12630 }, { "epoch": 1.77, "learning_rate": 4.7044965375257346e-05, "loss": 0.0798, "step": 12632 }, { "epoch": 1.77, "learning_rate": 4.704449747332959e-05, "loss": 0.0753, "step": 12634 }, { "epoch": 1.77, "learning_rate": 4.704402957140184e-05, "loss": 0.0997, "step": 12636 }, { "epoch": 1.77, "learning_rate": 4.704356166947408e-05, "loss": 0.0875, "step": 12638 }, { "epoch": 1.77, "learning_rate": 4.704309376754632e-05, "loss": 0.0841, "step": 12640 }, { "epoch": 1.77, "learning_rate": 4.704262586561857e-05, "loss": 0.0661, "step": 12642 }, { "epoch": 1.77, "learning_rate": 4.7042157963690815e-05, "loss": 0.1243, "step": 12644 }, { "epoch": 1.78, "learning_rate": 4.7041690061763054e-05, "loss": 0.0647, "step": 12646 }, { "epoch": 1.78, "learning_rate": 4.70412221598353e-05, "loss": 0.0703, "step": 12648 }, { "epoch": 1.78, "learning_rate": 4.7040754257907546e-05, "loss": 0.0813, "step": 12650 }, { "epoch": 1.78, "learning_rate": 4.704028635597979e-05, "loss": 0.0737, "step": 12652 }, { "epoch": 1.78, "learning_rate": 4.703981845405203e-05, "loss": 0.0834, "step": 12654 }, { "epoch": 1.78, "learning_rate": 4.703935055212428e-05, "loss": 0.0771, "step": 12656 }, { "epoch": 1.78, "learning_rate": 4.703888265019652e-05, "loss": 0.0748, "step": 12658 }, { "epoch": 1.78, "learning_rate": 4.703841474826877e-05, "loss": 0.0708, "step": 12660 }, { "epoch": 1.78, "learning_rate": 4.703794684634101e-05, "loss": 0.0635, "step": 12662 }, { "epoch": 1.78, "learning_rate": 4.7037478944413254e-05, "loss": 0.0806, "step": 12664 }, { "epoch": 1.78, "learning_rate": 4.703701104248549e-05, "loss": 0.0738, "step": 12666 }, { "epoch": 1.78, "learning_rate": 4.7036543140557745e-05, "loss": 0.0633, "step": 12668 }, { "epoch": 1.78, "learning_rate": 4.7036075238629984e-05, "loss": 0.1057, "step": 12670 }, { "epoch": 1.78, "learning_rate": 4.703560733670223e-05, "loss": 0.0974, "step": 12672 }, { "epoch": 1.78, "learning_rate": 4.703513943477447e-05, "loss": 0.0805, "step": 12674 }, { "epoch": 1.78, "learning_rate": 4.703467153284672e-05, "loss": 0.1314, "step": 12676 }, { "epoch": 1.78, "learning_rate": 4.703420363091896e-05, "loss": 0.068, "step": 12678 }, { "epoch": 1.78, "learning_rate": 4.703373572899121e-05, "loss": 0.1, "step": 12680 }, { "epoch": 1.78, "learning_rate": 4.7033267827063446e-05, "loss": 0.1086, "step": 12682 }, { "epoch": 1.78, "learning_rate": 4.703279992513569e-05, "loss": 0.0704, "step": 12684 }, { "epoch": 1.78, "learning_rate": 4.703233202320794e-05, "loss": 0.0832, "step": 12686 }, { "epoch": 1.78, "learning_rate": 4.7031864121280184e-05, "loss": 0.0761, "step": 12688 }, { "epoch": 1.78, "learning_rate": 4.703139621935242e-05, "loss": 0.0811, "step": 12690 }, { "epoch": 1.78, "learning_rate": 4.703092831742467e-05, "loss": 0.0842, "step": 12692 }, { "epoch": 1.78, "learning_rate": 4.7030460415496915e-05, "loss": 0.0915, "step": 12694 }, { "epoch": 1.78, "learning_rate": 4.702999251356916e-05, "loss": 0.0922, "step": 12696 }, { "epoch": 1.78, "learning_rate": 4.70295246116414e-05, "loss": 0.0962, "step": 12698 }, { "epoch": 1.78, "learning_rate": 4.7029056709713646e-05, "loss": 0.0808, "step": 12700 }, { "epoch": 1.78, "learning_rate": 4.702858880778589e-05, "loss": 0.0769, "step": 12702 }, { "epoch": 1.78, "learning_rate": 4.702812090585814e-05, "loss": 0.0768, "step": 12704 }, { "epoch": 1.78, "learning_rate": 4.702765300393038e-05, "loss": 0.1067, "step": 12706 }, { "epoch": 1.78, "learning_rate": 4.702718510200262e-05, "loss": 0.0711, "step": 12708 }, { "epoch": 1.78, "learning_rate": 4.702671720007487e-05, "loss": 0.0673, "step": 12710 }, { "epoch": 1.78, "learning_rate": 4.7026249298147115e-05, "loss": 0.087, "step": 12712 }, { "epoch": 1.78, "learning_rate": 4.7025781396219354e-05, "loss": 0.0766, "step": 12714 }, { "epoch": 1.78, "learning_rate": 4.70253134942916e-05, "loss": 0.079, "step": 12716 }, { "epoch": 1.79, "learning_rate": 4.702484559236384e-05, "loss": 0.0754, "step": 12718 }, { "epoch": 1.79, "learning_rate": 4.702437769043609e-05, "loss": 0.073, "step": 12720 }, { "epoch": 1.79, "learning_rate": 4.702390978850833e-05, "loss": 0.066, "step": 12722 }, { "epoch": 1.79, "learning_rate": 4.702344188658058e-05, "loss": 0.0853, "step": 12724 }, { "epoch": 1.79, "learning_rate": 4.7022973984652816e-05, "loss": 0.0668, "step": 12726 }, { "epoch": 1.79, "learning_rate": 4.702250608272507e-05, "loss": 0.0669, "step": 12728 }, { "epoch": 1.79, "learning_rate": 4.702203818079731e-05, "loss": 0.0992, "step": 12730 }, { "epoch": 1.79, "learning_rate": 4.7021570278869554e-05, "loss": 0.0826, "step": 12732 }, { "epoch": 1.79, "learning_rate": 4.702110237694179e-05, "loss": 0.0781, "step": 12734 }, { "epoch": 1.79, "learning_rate": 4.702063447501404e-05, "loss": 0.0865, "step": 12736 }, { "epoch": 1.79, "learning_rate": 4.7020166573086284e-05, "loss": 0.0927, "step": 12738 }, { "epoch": 1.79, "learning_rate": 4.701969867115853e-05, "loss": 0.0905, "step": 12740 }, { "epoch": 1.79, "learning_rate": 4.701923076923077e-05, "loss": 0.0795, "step": 12742 }, { "epoch": 1.79, "learning_rate": 4.7018762867303015e-05, "loss": 0.0623, "step": 12744 }, { "epoch": 1.79, "learning_rate": 4.701829496537526e-05, "loss": 0.0874, "step": 12746 }, { "epoch": 1.79, "learning_rate": 4.701782706344751e-05, "loss": 0.0702, "step": 12748 }, { "epoch": 1.79, "learning_rate": 4.7017359161519746e-05, "loss": 0.0783, "step": 12750 }, { "epoch": 1.79, "learning_rate": 4.701689125959199e-05, "loss": 0.0527, "step": 12752 }, { "epoch": 1.79, "learning_rate": 4.701642335766424e-05, "loss": 0.0763, "step": 12754 }, { "epoch": 1.79, "learning_rate": 4.7015955455736484e-05, "loss": 0.0902, "step": 12756 }, { "epoch": 1.79, "learning_rate": 4.701548755380872e-05, "loss": 0.0715, "step": 12758 }, { "epoch": 1.79, "learning_rate": 4.701501965188097e-05, "loss": 0.0658, "step": 12760 }, { "epoch": 1.79, "learning_rate": 4.7014551749953215e-05, "loss": 0.0704, "step": 12762 }, { "epoch": 1.79, "learning_rate": 4.701408384802546e-05, "loss": 0.0947, "step": 12764 }, { "epoch": 1.79, "learning_rate": 4.70136159460977e-05, "loss": 0.0937, "step": 12766 }, { "epoch": 1.79, "learning_rate": 4.7013148044169946e-05, "loss": 0.0731, "step": 12768 }, { "epoch": 1.79, "learning_rate": 4.7012680142242185e-05, "loss": 0.0846, "step": 12770 }, { "epoch": 1.79, "learning_rate": 4.701221224031444e-05, "loss": 0.0867, "step": 12772 }, { "epoch": 1.79, "learning_rate": 4.701174433838668e-05, "loss": 0.0654, "step": 12774 }, { "epoch": 1.79, "learning_rate": 4.701127643645892e-05, "loss": 0.086, "step": 12776 }, { "epoch": 1.79, "learning_rate": 4.701080853453116e-05, "loss": 0.0658, "step": 12778 }, { "epoch": 1.79, "learning_rate": 4.701034063260341e-05, "loss": 0.0805, "step": 12780 }, { "epoch": 1.79, "learning_rate": 4.7009872730675654e-05, "loss": 0.0665, "step": 12782 }, { "epoch": 1.79, "learning_rate": 4.70094048287479e-05, "loss": 0.0894, "step": 12784 }, { "epoch": 1.79, "learning_rate": 4.700893692682014e-05, "loss": 0.0769, "step": 12786 }, { "epoch": 1.8, "learning_rate": 4.7008469024892385e-05, "loss": 0.0898, "step": 12788 }, { "epoch": 1.8, "learning_rate": 4.700800112296463e-05, "loss": 0.0977, "step": 12790 }, { "epoch": 1.8, "learning_rate": 4.700753322103687e-05, "loss": 0.0745, "step": 12792 }, { "epoch": 1.8, "learning_rate": 4.7007065319109116e-05, "loss": 0.0907, "step": 12794 }, { "epoch": 1.8, "learning_rate": 4.700659741718136e-05, "loss": 0.0701, "step": 12796 }, { "epoch": 1.8, "learning_rate": 4.700612951525361e-05, "loss": 0.0849, "step": 12798 }, { "epoch": 1.8, "learning_rate": 4.700566161332585e-05, "loss": 0.1005, "step": 12800 }, { "epoch": 1.8, "learning_rate": 4.700519371139809e-05, "loss": 0.0653, "step": 12802 }, { "epoch": 1.8, "learning_rate": 4.700472580947033e-05, "loss": 0.0792, "step": 12804 }, { "epoch": 1.8, "learning_rate": 4.7004257907542585e-05, "loss": 0.1003, "step": 12806 }, { "epoch": 1.8, "learning_rate": 4.7003790005614824e-05, "loss": 0.1019, "step": 12808 }, { "epoch": 1.8, "learning_rate": 4.700332210368707e-05, "loss": 0.1073, "step": 12810 }, { "epoch": 1.8, "learning_rate": 4.700285420175931e-05, "loss": 0.0973, "step": 12812 }, { "epoch": 1.8, "learning_rate": 4.7002386299831555e-05, "loss": 0.1095, "step": 12814 }, { "epoch": 1.8, "learning_rate": 4.70019183979038e-05, "loss": 0.0908, "step": 12816 }, { "epoch": 1.8, "learning_rate": 4.7001450495976046e-05, "loss": 0.1097, "step": 12818 }, { "epoch": 1.8, "learning_rate": 4.7000982594048286e-05, "loss": 0.088, "step": 12820 }, { "epoch": 1.8, "learning_rate": 4.700051469212053e-05, "loss": 0.0861, "step": 12822 }, { "epoch": 1.8, "learning_rate": 4.700004679019278e-05, "loss": 0.0737, "step": 12824 }, { "epoch": 1.8, "learning_rate": 4.699957888826502e-05, "loss": 0.082, "step": 12826 }, { "epoch": 1.8, "learning_rate": 4.699911098633726e-05, "loss": 0.0848, "step": 12828 }, { "epoch": 1.8, "learning_rate": 4.699864308440951e-05, "loss": 0.1033, "step": 12830 }, { "epoch": 1.8, "learning_rate": 4.6998175182481754e-05, "loss": 0.091, "step": 12832 }, { "epoch": 1.8, "learning_rate": 4.6997707280554e-05, "loss": 0.0813, "step": 12834 }, { "epoch": 1.8, "learning_rate": 4.699723937862624e-05, "loss": 0.0833, "step": 12836 }, { "epoch": 1.8, "learning_rate": 4.6996771476698485e-05, "loss": 0.0817, "step": 12838 }, { "epoch": 1.8, "learning_rate": 4.699630357477073e-05, "loss": 0.07, "step": 12840 }, { "epoch": 1.8, "learning_rate": 4.699583567284298e-05, "loss": 0.0913, "step": 12842 }, { "epoch": 1.8, "learning_rate": 4.6995367770915216e-05, "loss": 0.0844, "step": 12844 }, { "epoch": 1.8, "learning_rate": 4.699489986898746e-05, "loss": 0.0987, "step": 12846 }, { "epoch": 1.8, "learning_rate": 4.69944319670597e-05, "loss": 0.0743, "step": 12848 }, { "epoch": 1.8, "learning_rate": 4.6993964065131954e-05, "loss": 0.1001, "step": 12850 }, { "epoch": 1.8, "learning_rate": 4.699349616320419e-05, "loss": 0.0953, "step": 12852 }, { "epoch": 1.8, "learning_rate": 4.699302826127644e-05, "loss": 0.0662, "step": 12854 }, { "epoch": 1.8, "learning_rate": 4.699256035934868e-05, "loss": 0.0551, "step": 12856 }, { "epoch": 1.8, "learning_rate": 4.699209245742093e-05, "loss": 0.101, "step": 12858 }, { "epoch": 1.81, "learning_rate": 4.699162455549317e-05, "loss": 0.083, "step": 12860 }, { "epoch": 1.81, "learning_rate": 4.6991156653565416e-05, "loss": 0.077, "step": 12862 }, { "epoch": 1.81, "learning_rate": 4.6990688751637655e-05, "loss": 0.0892, "step": 12864 }, { "epoch": 1.81, "learning_rate": 4.69902208497099e-05, "loss": 0.0972, "step": 12866 }, { "epoch": 1.81, "learning_rate": 4.698975294778215e-05, "loss": 0.0699, "step": 12868 }, { "epoch": 1.81, "learning_rate": 4.698928504585439e-05, "loss": 0.0878, "step": 12870 }, { "epoch": 1.81, "learning_rate": 4.698881714392663e-05, "loss": 0.0886, "step": 12872 }, { "epoch": 1.81, "learning_rate": 4.698834924199888e-05, "loss": 0.0696, "step": 12874 }, { "epoch": 1.81, "learning_rate": 4.6987881340071124e-05, "loss": 0.0991, "step": 12876 }, { "epoch": 1.81, "learning_rate": 4.698741343814337e-05, "loss": 0.0653, "step": 12878 }, { "epoch": 1.81, "learning_rate": 4.698694553621561e-05, "loss": 0.074, "step": 12880 }, { "epoch": 1.81, "learning_rate": 4.6986477634287855e-05, "loss": 0.0856, "step": 12882 }, { "epoch": 1.81, "learning_rate": 4.69860097323601e-05, "loss": 0.0951, "step": 12884 }, { "epoch": 1.81, "learning_rate": 4.6985541830432346e-05, "loss": 0.0908, "step": 12886 }, { "epoch": 1.81, "learning_rate": 4.6985073928504586e-05, "loss": 0.0951, "step": 12888 }, { "epoch": 1.81, "learning_rate": 4.698460602657683e-05, "loss": 0.0747, "step": 12890 }, { "epoch": 1.81, "learning_rate": 4.698413812464908e-05, "loss": 0.0823, "step": 12892 }, { "epoch": 1.81, "learning_rate": 4.698367022272132e-05, "loss": 0.0784, "step": 12894 }, { "epoch": 1.81, "learning_rate": 4.698320232079356e-05, "loss": 0.0788, "step": 12896 }, { "epoch": 1.81, "learning_rate": 4.698273441886581e-05, "loss": 0.0754, "step": 12898 }, { "epoch": 1.81, "learning_rate": 4.698226651693805e-05, "loss": 0.08, "step": 12900 }, { "epoch": 1.81, "learning_rate": 4.69817986150103e-05, "loss": 0.1191, "step": 12902 }, { "epoch": 1.81, "learning_rate": 4.698133071308254e-05, "loss": 0.0671, "step": 12904 }, { "epoch": 1.81, "learning_rate": 4.6980862811154785e-05, "loss": 0.0759, "step": 12906 }, { "epoch": 1.81, "learning_rate": 4.6980394909227024e-05, "loss": 0.0832, "step": 12908 }, { "epoch": 1.81, "learning_rate": 4.697992700729927e-05, "loss": 0.0873, "step": 12910 }, { "epoch": 1.81, "learning_rate": 4.6979459105371516e-05, "loss": 0.0718, "step": 12912 }, { "epoch": 1.81, "learning_rate": 4.697899120344376e-05, "loss": 0.0744, "step": 12914 }, { "epoch": 1.81, "learning_rate": 4.6978523301516e-05, "loss": 0.0865, "step": 12916 }, { "epoch": 1.81, "learning_rate": 4.697805539958825e-05, "loss": 0.0906, "step": 12918 }, { "epoch": 1.81, "learning_rate": 4.697758749766049e-05, "loss": 0.1134, "step": 12920 }, { "epoch": 1.81, "learning_rate": 4.697711959573274e-05, "loss": 0.0586, "step": 12922 }, { "epoch": 1.81, "learning_rate": 4.697665169380498e-05, "loss": 0.0672, "step": 12924 }, { "epoch": 1.81, "learning_rate": 4.6976183791877224e-05, "loss": 0.0727, "step": 12926 }, { "epoch": 1.81, "learning_rate": 4.697571588994947e-05, "loss": 0.0823, "step": 12928 }, { "epoch": 1.81, "learning_rate": 4.6975247988021716e-05, "loss": 0.0665, "step": 12930 }, { "epoch": 1.82, "learning_rate": 4.6974780086093955e-05, "loss": 0.0841, "step": 12932 }, { "epoch": 1.82, "learning_rate": 4.69743121841662e-05, "loss": 0.073, "step": 12934 }, { "epoch": 1.82, "learning_rate": 4.697384428223845e-05, "loss": 0.0805, "step": 12936 }, { "epoch": 1.82, "learning_rate": 4.697337638031069e-05, "loss": 0.0971, "step": 12938 }, { "epoch": 1.82, "learning_rate": 4.697290847838293e-05, "loss": 0.0782, "step": 12940 }, { "epoch": 1.82, "learning_rate": 4.697244057645518e-05, "loss": 0.0666, "step": 12942 }, { "epoch": 1.82, "learning_rate": 4.697197267452742e-05, "loss": 0.0874, "step": 12944 }, { "epoch": 1.82, "learning_rate": 4.697150477259967e-05, "loss": 0.087, "step": 12946 }, { "epoch": 1.82, "learning_rate": 4.697103687067191e-05, "loss": 0.067, "step": 12948 }, { "epoch": 1.82, "learning_rate": 4.6970568968744155e-05, "loss": 0.0841, "step": 12950 }, { "epoch": 1.82, "learning_rate": 4.6970101066816394e-05, "loss": 0.0915, "step": 12952 }, { "epoch": 1.82, "learning_rate": 4.6969633164888646e-05, "loss": 0.0671, "step": 12954 }, { "epoch": 1.82, "learning_rate": 4.6969165262960886e-05, "loss": 0.0947, "step": 12956 }, { "epoch": 1.82, "learning_rate": 4.696869736103313e-05, "loss": 0.0843, "step": 12958 }, { "epoch": 1.82, "learning_rate": 4.696822945910537e-05, "loss": 0.0736, "step": 12960 }, { "epoch": 1.82, "learning_rate": 4.6967761557177617e-05, "loss": 0.0629, "step": 12962 }, { "epoch": 1.82, "learning_rate": 4.696729365524986e-05, "loss": 0.119, "step": 12964 }, { "epoch": 1.82, "learning_rate": 4.696682575332211e-05, "loss": 0.0911, "step": 12966 }, { "epoch": 1.82, "learning_rate": 4.696635785139435e-05, "loss": 0.0837, "step": 12968 }, { "epoch": 1.82, "learning_rate": 4.6965889949466593e-05, "loss": 0.0801, "step": 12970 }, { "epoch": 1.82, "learning_rate": 4.696542204753884e-05, "loss": 0.07, "step": 12972 }, { "epoch": 1.82, "learning_rate": 4.6964954145611085e-05, "loss": 0.0721, "step": 12974 }, { "epoch": 1.82, "learning_rate": 4.6964486243683324e-05, "loss": 0.0727, "step": 12976 }, { "epoch": 1.82, "learning_rate": 4.696401834175557e-05, "loss": 0.092, "step": 12978 }, { "epoch": 1.82, "learning_rate": 4.6963550439827816e-05, "loss": 0.0906, "step": 12980 }, { "epoch": 1.82, "learning_rate": 4.696308253790006e-05, "loss": 0.0842, "step": 12982 }, { "epoch": 1.82, "learning_rate": 4.69626146359723e-05, "loss": 0.0721, "step": 12984 }, { "epoch": 1.82, "learning_rate": 4.696214673404455e-05, "loss": 0.0861, "step": 12986 }, { "epoch": 1.82, "learning_rate": 4.696167883211679e-05, "loss": 0.0657, "step": 12988 }, { "epoch": 1.82, "learning_rate": 4.696121093018904e-05, "loss": 0.0798, "step": 12990 }, { "epoch": 1.82, "learning_rate": 4.696074302826128e-05, "loss": 0.1028, "step": 12992 }, { "epoch": 1.82, "learning_rate": 4.6960275126333524e-05, "loss": 0.0893, "step": 12994 }, { "epoch": 1.82, "learning_rate": 4.695980722440576e-05, "loss": 0.0804, "step": 12996 }, { "epoch": 1.82, "learning_rate": 4.6959339322478016e-05, "loss": 0.071, "step": 12998 }, { "epoch": 1.82, "learning_rate": 4.6958871420550255e-05, "loss": 0.0995, "step": 13000 }, { "epoch": 1.82, "eval_gen_len": 29.2069, "eval_loss": 1.0517786741256714, "eval_meteor": 0.0455, "eval_runtime": 14.382, "eval_samples_per_second": 4.033, "eval_steps_per_second": 0.556, "step": 13000 }, { "epoch": 1.83, "learning_rate": 4.69584035186225e-05, "loss": 0.0864, "step": 13002 }, { "epoch": 1.83, "learning_rate": 4.695793561669474e-05, "loss": 0.062, "step": 13004 }, { "epoch": 1.83, "learning_rate": 4.695746771476699e-05, "loss": 0.0822, "step": 13006 }, { "epoch": 1.83, "learning_rate": 4.695699981283923e-05, "loss": 0.071, "step": 13008 }, { "epoch": 1.83, "learning_rate": 4.695653191091148e-05, "loss": 0.1013, "step": 13010 }, { "epoch": 1.83, "learning_rate": 4.695606400898372e-05, "loss": 0.0641, "step": 13012 }, { "epoch": 1.83, "learning_rate": 4.695559610705596e-05, "loss": 0.0776, "step": 13014 }, { "epoch": 1.83, "learning_rate": 4.695512820512821e-05, "loss": 0.081, "step": 13016 }, { "epoch": 1.83, "learning_rate": 4.6954660303200455e-05, "loss": 0.0809, "step": 13018 }, { "epoch": 1.83, "learning_rate": 4.6954192401272694e-05, "loss": 0.0897, "step": 13020 }, { "epoch": 1.83, "learning_rate": 4.695372449934494e-05, "loss": 0.0887, "step": 13022 }, { "epoch": 1.83, "learning_rate": 4.6953256597417186e-05, "loss": 0.0946, "step": 13024 }, { "epoch": 1.83, "learning_rate": 4.695278869548943e-05, "loss": 0.0753, "step": 13026 }, { "epoch": 1.83, "learning_rate": 4.695232079356167e-05, "loss": 0.079, "step": 13028 }, { "epoch": 1.83, "learning_rate": 4.6951852891633917e-05, "loss": 0.0981, "step": 13030 }, { "epoch": 1.83, "learning_rate": 4.695138498970616e-05, "loss": 0.1006, "step": 13032 }, { "epoch": 1.83, "learning_rate": 4.695091708777841e-05, "loss": 0.0621, "step": 13034 }, { "epoch": 1.83, "learning_rate": 4.695044918585065e-05, "loss": 0.0791, "step": 13036 }, { "epoch": 1.83, "learning_rate": 4.6949981283922893e-05, "loss": 0.1089, "step": 13038 }, { "epoch": 1.83, "learning_rate": 4.694951338199514e-05, "loss": 0.0858, "step": 13040 }, { "epoch": 1.83, "learning_rate": 4.694904548006738e-05, "loss": 0.0971, "step": 13042 }, { "epoch": 1.83, "learning_rate": 4.6948577578139624e-05, "loss": 0.0803, "step": 13044 }, { "epoch": 1.83, "learning_rate": 4.6948109676211864e-05, "loss": 0.0635, "step": 13046 }, { "epoch": 1.83, "learning_rate": 4.694764177428411e-05, "loss": 0.0969, "step": 13048 }, { "epoch": 1.83, "learning_rate": 4.6947173872356355e-05, "loss": 0.0941, "step": 13050 }, { "epoch": 1.83, "learning_rate": 4.69467059704286e-05, "loss": 0.0975, "step": 13052 }, { "epoch": 1.83, "learning_rate": 4.694623806850084e-05, "loss": 0.0821, "step": 13054 }, { "epoch": 1.83, "learning_rate": 4.6945770166573086e-05, "loss": 0.0905, "step": 13056 }, { "epoch": 1.83, "learning_rate": 4.694530226464533e-05, "loss": 0.0847, "step": 13058 }, { "epoch": 1.83, "learning_rate": 4.694483436271758e-05, "loss": 0.0875, "step": 13060 }, { "epoch": 1.83, "learning_rate": 4.694436646078982e-05, "loss": 0.0798, "step": 13062 }, { "epoch": 1.83, "learning_rate": 4.694389855886206e-05, "loss": 0.0898, "step": 13064 }, { "epoch": 1.83, "learning_rate": 4.694343065693431e-05, "loss": 0.0901, "step": 13066 }, { "epoch": 1.83, "learning_rate": 4.6942962755006555e-05, "loss": 0.1074, "step": 13068 }, { "epoch": 1.83, "learning_rate": 4.6942494853078794e-05, "loss": 0.0895, "step": 13070 }, { "epoch": 1.83, "learning_rate": 4.694202695115104e-05, "loss": 0.0819, "step": 13072 }, { "epoch": 1.84, "learning_rate": 4.6941559049223286e-05, "loss": 0.1017, "step": 13074 }, { "epoch": 1.84, "learning_rate": 4.694109114729553e-05, "loss": 0.0857, "step": 13076 }, { "epoch": 1.84, "learning_rate": 4.694062324536777e-05, "loss": 0.0768, "step": 13078 }, { "epoch": 1.84, "learning_rate": 4.694015534344002e-05, "loss": 0.0742, "step": 13080 }, { "epoch": 1.84, "learning_rate": 4.6939687441512256e-05, "loss": 0.1015, "step": 13082 }, { "epoch": 1.84, "learning_rate": 4.693921953958451e-05, "loss": 0.0989, "step": 13084 }, { "epoch": 1.84, "learning_rate": 4.693875163765675e-05, "loss": 0.0934, "step": 13086 }, { "epoch": 1.84, "learning_rate": 4.6938283735728994e-05, "loss": 0.0767, "step": 13088 }, { "epoch": 1.84, "learning_rate": 4.693781583380123e-05, "loss": 0.0917, "step": 13090 }, { "epoch": 1.84, "learning_rate": 4.693734793187348e-05, "loss": 0.0758, "step": 13092 }, { "epoch": 1.84, "learning_rate": 4.6936880029945725e-05, "loss": 0.1066, "step": 13094 }, { "epoch": 1.84, "learning_rate": 4.693641212801797e-05, "loss": 0.0982, "step": 13096 }, { "epoch": 1.84, "learning_rate": 4.693594422609021e-05, "loss": 0.0868, "step": 13098 }, { "epoch": 1.84, "learning_rate": 4.6935476324162456e-05, "loss": 0.0634, "step": 13100 }, { "epoch": 1.84, "learning_rate": 4.69350084222347e-05, "loss": 0.0647, "step": 13102 }, { "epoch": 1.84, "learning_rate": 4.693454052030695e-05, "loss": 0.075, "step": 13104 }, { "epoch": 1.84, "learning_rate": 4.693407261837919e-05, "loss": 0.08, "step": 13106 }, { "epoch": 1.84, "learning_rate": 4.693360471645143e-05, "loss": 0.0741, "step": 13108 }, { "epoch": 1.84, "learning_rate": 4.693313681452368e-05, "loss": 0.0889, "step": 13110 }, { "epoch": 1.84, "learning_rate": 4.6932668912595924e-05, "loss": 0.0747, "step": 13112 }, { "epoch": 1.84, "learning_rate": 4.6932201010668164e-05, "loss": 0.071, "step": 13114 }, { "epoch": 1.84, "learning_rate": 4.693173310874041e-05, "loss": 0.0843, "step": 13116 }, { "epoch": 1.84, "learning_rate": 4.6931265206812655e-05, "loss": 0.0856, "step": 13118 }, { "epoch": 1.84, "learning_rate": 4.69307973048849e-05, "loss": 0.1067, "step": 13120 }, { "epoch": 1.84, "learning_rate": 4.693032940295714e-05, "loss": 0.0665, "step": 13122 }, { "epoch": 1.84, "learning_rate": 4.6929861501029386e-05, "loss": 0.0728, "step": 13124 }, { "epoch": 1.84, "learning_rate": 4.6929393599101625e-05, "loss": 0.0911, "step": 13126 }, { "epoch": 1.84, "learning_rate": 4.692892569717388e-05, "loss": 0.1013, "step": 13128 }, { "epoch": 1.84, "learning_rate": 4.692845779524612e-05, "loss": 0.0608, "step": 13130 }, { "epoch": 1.84, "learning_rate": 4.692798989331836e-05, "loss": 0.0821, "step": 13132 }, { "epoch": 1.84, "learning_rate": 4.69275219913906e-05, "loss": 0.1001, "step": 13134 }, { "epoch": 1.84, "learning_rate": 4.6927054089462855e-05, "loss": 0.0867, "step": 13136 }, { "epoch": 1.84, "learning_rate": 4.6926586187535094e-05, "loss": 0.0828, "step": 13138 }, { "epoch": 1.84, "learning_rate": 4.692611828560734e-05, "loss": 0.0871, "step": 13140 }, { "epoch": 1.84, "learning_rate": 4.692565038367958e-05, "loss": 0.095, "step": 13142 }, { "epoch": 1.85, "learning_rate": 4.6925182481751825e-05, "loss": 0.0667, "step": 13144 }, { "epoch": 1.85, "learning_rate": 4.692471457982407e-05, "loss": 0.0967, "step": 13146 }, { "epoch": 1.85, "learning_rate": 4.692424667789632e-05, "loss": 0.0768, "step": 13148 }, { "epoch": 1.85, "learning_rate": 4.6923778775968556e-05, "loss": 0.0924, "step": 13150 }, { "epoch": 1.85, "learning_rate": 4.69233108740408e-05, "loss": 0.0602, "step": 13152 }, { "epoch": 1.85, "learning_rate": 4.692284297211305e-05, "loss": 0.098, "step": 13154 }, { "epoch": 1.85, "learning_rate": 4.6922375070185294e-05, "loss": 0.0871, "step": 13156 }, { "epoch": 1.85, "learning_rate": 4.692190716825753e-05, "loss": 0.0651, "step": 13158 }, { "epoch": 1.85, "learning_rate": 4.692167321729366e-05, "loss": 0.169, "step": 13160 }, { "epoch": 1.85, "learning_rate": 4.69212053153659e-05, "loss": 0.0942, "step": 13162 }, { "epoch": 1.85, "learning_rate": 4.692073741343815e-05, "loss": 0.0786, "step": 13164 }, { "epoch": 1.85, "learning_rate": 4.692026951151039e-05, "loss": 0.0576, "step": 13166 }, { "epoch": 1.85, "learning_rate": 4.6919801609582636e-05, "loss": 0.0804, "step": 13168 }, { "epoch": 1.85, "learning_rate": 4.6919333707654875e-05, "loss": 0.0876, "step": 13170 }, { "epoch": 1.85, "learning_rate": 4.691886580572712e-05, "loss": 0.0728, "step": 13172 }, { "epoch": 1.85, "learning_rate": 4.691839790379937e-05, "loss": 0.0686, "step": 13174 }, { "epoch": 1.85, "learning_rate": 4.6917930001871606e-05, "loss": 0.0839, "step": 13176 }, { "epoch": 1.85, "learning_rate": 4.691746209994385e-05, "loss": 0.0708, "step": 13178 }, { "epoch": 1.85, "learning_rate": 4.69169941980161e-05, "loss": 0.0886, "step": 13180 }, { "epoch": 1.85, "learning_rate": 4.6916526296088344e-05, "loss": 0.0977, "step": 13182 }, { "epoch": 1.85, "learning_rate": 4.691605839416058e-05, "loss": 0.0758, "step": 13184 }, { "epoch": 1.85, "learning_rate": 4.691559049223283e-05, "loss": 0.0708, "step": 13186 }, { "epoch": 1.85, "learning_rate": 4.6915122590305075e-05, "loss": 0.0796, "step": 13188 }, { "epoch": 1.85, "learning_rate": 4.691465468837732e-05, "loss": 0.0808, "step": 13190 }, { "epoch": 1.85, "learning_rate": 4.691418678644956e-05, "loss": 0.0826, "step": 13192 }, { "epoch": 1.85, "learning_rate": 4.6913718884521806e-05, "loss": 0.0899, "step": 13194 }, { "epoch": 1.85, "learning_rate": 4.6913250982594045e-05, "loss": 0.0659, "step": 13196 }, { "epoch": 1.85, "learning_rate": 4.69127830806663e-05, "loss": 0.0826, "step": 13198 }, { "epoch": 1.85, "learning_rate": 4.691231517873854e-05, "loss": 0.0948, "step": 13200 }, { "epoch": 1.85, "learning_rate": 4.691184727681078e-05, "loss": 0.074, "step": 13202 }, { "epoch": 1.85, "learning_rate": 4.691137937488302e-05, "loss": 0.0961, "step": 13204 }, { "epoch": 1.85, "learning_rate": 4.691091147295527e-05, "loss": 0.0726, "step": 13206 }, { "epoch": 1.85, "learning_rate": 4.6910443571027514e-05, "loss": 0.0844, "step": 13208 }, { "epoch": 1.85, "learning_rate": 4.690997566909976e-05, "loss": 0.0673, "step": 13210 }, { "epoch": 1.85, "learning_rate": 4.6909507767172e-05, "loss": 0.0706, "step": 13212 }, { "epoch": 1.85, "learning_rate": 4.6909039865244245e-05, "loss": 0.1078, "step": 13214 }, { "epoch": 1.86, "learning_rate": 4.690857196331649e-05, "loss": 0.0711, "step": 13216 }, { "epoch": 1.86, "learning_rate": 4.6908104061388737e-05, "loss": 0.0799, "step": 13218 }, { "epoch": 1.86, "learning_rate": 4.6907636159460976e-05, "loss": 0.071, "step": 13220 }, { "epoch": 1.86, "learning_rate": 4.690716825753322e-05, "loss": 0.0848, "step": 13222 }, { "epoch": 1.86, "learning_rate": 4.690670035560547e-05, "loss": 0.0854, "step": 13224 }, { "epoch": 1.86, "learning_rate": 4.6906232453677713e-05, "loss": 0.0851, "step": 13226 }, { "epoch": 1.86, "learning_rate": 4.690576455174995e-05, "loss": 0.0686, "step": 13228 }, { "epoch": 1.86, "learning_rate": 4.69052966498222e-05, "loss": 0.1077, "step": 13230 }, { "epoch": 1.86, "learning_rate": 4.6904828747894444e-05, "loss": 0.0749, "step": 13232 }, { "epoch": 1.86, "learning_rate": 4.690436084596669e-05, "loss": 0.0885, "step": 13234 }, { "epoch": 1.86, "learning_rate": 4.690389294403893e-05, "loss": 0.0927, "step": 13236 }, { "epoch": 1.86, "learning_rate": 4.6903425042111175e-05, "loss": 0.0764, "step": 13238 }, { "epoch": 1.86, "learning_rate": 4.6902957140183414e-05, "loss": 0.0975, "step": 13240 }, { "epoch": 1.86, "learning_rate": 4.690248923825567e-05, "loss": 0.0649, "step": 13242 }, { "epoch": 1.86, "learning_rate": 4.6902021336327906e-05, "loss": 0.0905, "step": 13244 }, { "epoch": 1.86, "learning_rate": 4.690155343440015e-05, "loss": 0.0837, "step": 13246 }, { "epoch": 1.86, "learning_rate": 4.690108553247239e-05, "loss": 0.0855, "step": 13248 }, { "epoch": 1.86, "learning_rate": 4.6900617630544644e-05, "loss": 0.0862, "step": 13250 }, { "epoch": 1.86, "learning_rate": 4.690014972861688e-05, "loss": 0.0878, "step": 13252 }, { "epoch": 1.86, "learning_rate": 4.689968182668913e-05, "loss": 0.0909, "step": 13254 }, { "epoch": 1.86, "learning_rate": 4.689921392476137e-05, "loss": 0.0998, "step": 13256 }, { "epoch": 1.86, "learning_rate": 4.6898746022833614e-05, "loss": 0.0955, "step": 13258 }, { "epoch": 1.86, "learning_rate": 4.689827812090586e-05, "loss": 0.1101, "step": 13260 }, { "epoch": 1.86, "learning_rate": 4.6897810218978106e-05, "loss": 0.0848, "step": 13262 }, { "epoch": 1.86, "learning_rate": 4.6897342317050345e-05, "loss": 0.095, "step": 13264 }, { "epoch": 1.86, "learning_rate": 4.689687441512259e-05, "loss": 0.086, "step": 13266 }, { "epoch": 1.86, "learning_rate": 4.689640651319484e-05, "loss": 0.096, "step": 13268 }, { "epoch": 1.86, "learning_rate": 4.689593861126708e-05, "loss": 0.0887, "step": 13270 }, { "epoch": 1.86, "learning_rate": 4.689547070933932e-05, "loss": 0.0871, "step": 13272 }, { "epoch": 1.86, "learning_rate": 4.689500280741157e-05, "loss": 0.1057, "step": 13274 }, { "epoch": 1.86, "learning_rate": 4.6894534905483814e-05, "loss": 0.0844, "step": 13276 }, { "epoch": 1.86, "learning_rate": 4.689406700355606e-05, "loss": 0.0959, "step": 13278 }, { "epoch": 1.86, "learning_rate": 4.68935991016283e-05, "loss": 0.0793, "step": 13280 }, { "epoch": 1.86, "learning_rate": 4.6893131199700545e-05, "loss": 0.0894, "step": 13282 }, { "epoch": 1.86, "learning_rate": 4.689266329777279e-05, "loss": 0.1122, "step": 13284 }, { "epoch": 1.86, "learning_rate": 4.6892195395845037e-05, "loss": 0.0787, "step": 13286 }, { "epoch": 1.87, "learning_rate": 4.6891727493917276e-05, "loss": 0.0738, "step": 13288 }, { "epoch": 1.87, "learning_rate": 4.689125959198952e-05, "loss": 0.1069, "step": 13290 }, { "epoch": 1.87, "learning_rate": 4.689079169006176e-05, "loss": 0.0706, "step": 13292 }, { "epoch": 1.87, "learning_rate": 4.6890323788134013e-05, "loss": 0.0879, "step": 13294 }, { "epoch": 1.87, "learning_rate": 4.688985588620625e-05, "loss": 0.0904, "step": 13296 }, { "epoch": 1.87, "learning_rate": 4.68893879842785e-05, "loss": 0.1037, "step": 13298 }, { "epoch": 1.87, "learning_rate": 4.688892008235074e-05, "loss": 0.0918, "step": 13300 }, { "epoch": 1.87, "learning_rate": 4.688845218042299e-05, "loss": 0.1026, "step": 13302 }, { "epoch": 1.87, "learning_rate": 4.688798427849523e-05, "loss": 0.0706, "step": 13304 }, { "epoch": 1.87, "learning_rate": 4.6887516376567475e-05, "loss": 0.0817, "step": 13306 }, { "epoch": 1.87, "learning_rate": 4.6887048474639714e-05, "loss": 0.0742, "step": 13308 }, { "epoch": 1.87, "learning_rate": 4.688658057271196e-05, "loss": 0.0702, "step": 13310 }, { "epoch": 1.87, "learning_rate": 4.6886112670784206e-05, "loss": 0.1123, "step": 13312 }, { "epoch": 1.87, "learning_rate": 4.688564476885645e-05, "loss": 0.1076, "step": 13314 }, { "epoch": 1.87, "learning_rate": 4.688517686692869e-05, "loss": 0.108, "step": 13316 }, { "epoch": 1.87, "learning_rate": 4.688470896500094e-05, "loss": 0.1129, "step": 13318 }, { "epoch": 1.87, "learning_rate": 4.688424106307318e-05, "loss": 0.0735, "step": 13320 }, { "epoch": 1.87, "learning_rate": 4.688377316114543e-05, "loss": 0.0797, "step": 13322 }, { "epoch": 1.87, "learning_rate": 4.688330525921767e-05, "loss": 0.0802, "step": 13324 }, { "epoch": 1.87, "learning_rate": 4.6882837357289914e-05, "loss": 0.0914, "step": 13326 }, { "epoch": 1.87, "learning_rate": 4.688236945536216e-05, "loss": 0.0757, "step": 13328 }, { "epoch": 1.87, "learning_rate": 4.6881901553434406e-05, "loss": 0.0616, "step": 13330 }, { "epoch": 1.87, "learning_rate": 4.6881433651506645e-05, "loss": 0.0916, "step": 13332 }, { "epoch": 1.87, "learning_rate": 4.688096574957889e-05, "loss": 0.0731, "step": 13334 }, { "epoch": 1.87, "learning_rate": 4.688049784765114e-05, "loss": 0.0945, "step": 13336 }, { "epoch": 1.87, "learning_rate": 4.688002994572338e-05, "loss": 0.1139, "step": 13338 }, { "epoch": 1.87, "learning_rate": 4.687956204379562e-05, "loss": 0.0869, "step": 13340 }, { "epoch": 1.87, "learning_rate": 4.687909414186787e-05, "loss": 0.0807, "step": 13342 }, { "epoch": 1.87, "learning_rate": 4.687862623994011e-05, "loss": 0.0698, "step": 13344 }, { "epoch": 1.87, "learning_rate": 4.687815833801236e-05, "loss": 0.1004, "step": 13346 }, { "epoch": 1.87, "learning_rate": 4.68776904360846e-05, "loss": 0.0944, "step": 13348 }, { "epoch": 1.87, "learning_rate": 4.6877222534156845e-05, "loss": 0.0938, "step": 13350 }, { "epoch": 1.87, "learning_rate": 4.6876754632229084e-05, "loss": 0.0711, "step": 13352 }, { "epoch": 1.87, "learning_rate": 4.687628673030133e-05, "loss": 0.0859, "step": 13354 }, { "epoch": 1.87, "learning_rate": 4.6875818828373576e-05, "loss": 0.0783, "step": 13356 }, { "epoch": 1.88, "learning_rate": 4.687535092644582e-05, "loss": 0.076, "step": 13358 }, { "epoch": 1.88, "learning_rate": 4.687488302451806e-05, "loss": 0.0679, "step": 13360 }, { "epoch": 1.88, "learning_rate": 4.687441512259031e-05, "loss": 0.0665, "step": 13362 }, { "epoch": 1.88, "learning_rate": 4.687394722066255e-05, "loss": 0.0869, "step": 13364 }, { "epoch": 1.88, "learning_rate": 4.68734793187348e-05, "loss": 0.0705, "step": 13366 }, { "epoch": 1.88, "learning_rate": 4.687301141680704e-05, "loss": 0.1087, "step": 13368 }, { "epoch": 1.88, "learning_rate": 4.6872543514879284e-05, "loss": 0.0877, "step": 13370 }, { "epoch": 1.88, "learning_rate": 4.687207561295153e-05, "loss": 0.0842, "step": 13372 }, { "epoch": 1.88, "learning_rate": 4.6871607711023775e-05, "loss": 0.0827, "step": 13374 }, { "epoch": 1.88, "learning_rate": 4.6871139809096015e-05, "loss": 0.0682, "step": 13376 }, { "epoch": 1.88, "learning_rate": 4.687067190716826e-05, "loss": 0.0734, "step": 13378 }, { "epoch": 1.88, "learning_rate": 4.6870204005240506e-05, "loss": 0.1011, "step": 13380 }, { "epoch": 1.88, "learning_rate": 4.686973610331275e-05, "loss": 0.0867, "step": 13382 }, { "epoch": 1.88, "learning_rate": 4.686926820138499e-05, "loss": 0.0709, "step": 13384 }, { "epoch": 1.88, "learning_rate": 4.686880029945724e-05, "loss": 0.0679, "step": 13386 }, { "epoch": 1.88, "learning_rate": 4.6868332397529476e-05, "loss": 0.0864, "step": 13388 }, { "epoch": 1.88, "learning_rate": 4.686786449560173e-05, "loss": 0.0858, "step": 13390 }, { "epoch": 1.88, "learning_rate": 4.686739659367397e-05, "loss": 0.0901, "step": 13392 }, { "epoch": 1.88, "learning_rate": 4.6866928691746214e-05, "loss": 0.0751, "step": 13394 }, { "epoch": 1.88, "learning_rate": 4.686646078981845e-05, "loss": 0.0931, "step": 13396 }, { "epoch": 1.88, "learning_rate": 4.6865992887890706e-05, "loss": 0.1245, "step": 13398 }, { "epoch": 1.88, "learning_rate": 4.6865524985962945e-05, "loss": 0.0741, "step": 13400 }, { "epoch": 1.88, "learning_rate": 4.686505708403519e-05, "loss": 0.0923, "step": 13402 }, { "epoch": 1.88, "learning_rate": 4.686458918210743e-05, "loss": 0.0938, "step": 13404 }, { "epoch": 1.88, "learning_rate": 4.6864121280179676e-05, "loss": 0.0802, "step": 13406 }, { "epoch": 1.88, "learning_rate": 4.686365337825192e-05, "loss": 0.096, "step": 13408 }, { "epoch": 1.88, "learning_rate": 4.686318547632417e-05, "loss": 0.1033, "step": 13410 }, { "epoch": 1.88, "learning_rate": 4.686271757439641e-05, "loss": 0.0859, "step": 13412 }, { "epoch": 1.88, "learning_rate": 4.686224967246865e-05, "loss": 0.1024, "step": 13414 }, { "epoch": 1.88, "learning_rate": 4.68617817705409e-05, "loss": 0.0806, "step": 13416 }, { "epoch": 1.88, "learning_rate": 4.6861313868613145e-05, "loss": 0.1089, "step": 13418 }, { "epoch": 1.88, "learning_rate": 4.6860845966685384e-05, "loss": 0.1144, "step": 13420 }, { "epoch": 1.88, "learning_rate": 4.686037806475763e-05, "loss": 0.089, "step": 13422 }, { "epoch": 1.88, "learning_rate": 4.6859910162829876e-05, "loss": 0.1023, "step": 13424 }, { "epoch": 1.88, "learning_rate": 4.6859442260902115e-05, "loss": 0.0765, "step": 13426 }, { "epoch": 1.88, "learning_rate": 4.685897435897436e-05, "loss": 0.0894, "step": 13428 }, { "epoch": 1.89, "learning_rate": 4.68585064570466e-05, "loss": 0.0809, "step": 13430 }, { "epoch": 1.89, "learning_rate": 4.685803855511885e-05, "loss": 0.0723, "step": 13432 }, { "epoch": 1.89, "learning_rate": 4.685757065319109e-05, "loss": 0.0896, "step": 13434 }, { "epoch": 1.89, "learning_rate": 4.685710275126334e-05, "loss": 0.0995, "step": 13436 }, { "epoch": 1.89, "learning_rate": 4.685663484933558e-05, "loss": 0.0846, "step": 13438 }, { "epoch": 1.89, "learning_rate": 4.685616694740782e-05, "loss": 0.074, "step": 13440 }, { "epoch": 1.89, "learning_rate": 4.685569904548007e-05, "loss": 0.1063, "step": 13442 }, { "epoch": 1.89, "learning_rate": 4.6855231143552315e-05, "loss": 0.0902, "step": 13444 }, { "epoch": 1.89, "learning_rate": 4.6854763241624554e-05, "loss": 0.0906, "step": 13446 }, { "epoch": 1.89, "learning_rate": 4.68542953396968e-05, "loss": 0.0996, "step": 13448 }, { "epoch": 1.89, "learning_rate": 4.6853827437769045e-05, "loss": 0.1113, "step": 13450 }, { "epoch": 1.89, "learning_rate": 4.685335953584129e-05, "loss": 0.0751, "step": 13452 }, { "epoch": 1.89, "learning_rate": 4.685289163391353e-05, "loss": 0.0791, "step": 13454 }, { "epoch": 1.89, "learning_rate": 4.6852423731985776e-05, "loss": 0.0825, "step": 13456 }, { "epoch": 1.89, "learning_rate": 4.685195583005802e-05, "loss": 0.0831, "step": 13458 }, { "epoch": 1.89, "learning_rate": 4.685148792813027e-05, "loss": 0.0823, "step": 13460 }, { "epoch": 1.89, "learning_rate": 4.685102002620251e-05, "loss": 0.0809, "step": 13462 }, { "epoch": 1.89, "learning_rate": 4.685055212427475e-05, "loss": 0.0869, "step": 13464 }, { "epoch": 1.89, "learning_rate": 4.6850084222347e-05, "loss": 0.0849, "step": 13466 }, { "epoch": 1.89, "learning_rate": 4.6849616320419245e-05, "loss": 0.079, "step": 13468 }, { "epoch": 1.89, "learning_rate": 4.6849148418491484e-05, "loss": 0.0844, "step": 13470 }, { "epoch": 1.89, "learning_rate": 4.684868051656373e-05, "loss": 0.0985, "step": 13472 }, { "epoch": 1.89, "learning_rate": 4.684821261463597e-05, "loss": 0.0731, "step": 13474 }, { "epoch": 1.89, "learning_rate": 4.684774471270822e-05, "loss": 0.0709, "step": 13476 }, { "epoch": 1.89, "learning_rate": 4.684727681078046e-05, "loss": 0.0901, "step": 13478 }, { "epoch": 1.89, "learning_rate": 4.684680890885271e-05, "loss": 0.0801, "step": 13480 }, { "epoch": 1.89, "learning_rate": 4.6846341006924946e-05, "loss": 0.0704, "step": 13482 }, { "epoch": 1.89, "learning_rate": 4.68458731049972e-05, "loss": 0.0871, "step": 13484 }, { "epoch": 1.89, "learning_rate": 4.684540520306944e-05, "loss": 0.1005, "step": 13486 }, { "epoch": 1.89, "learning_rate": 4.6844937301141684e-05, "loss": 0.0727, "step": 13488 }, { "epoch": 1.89, "learning_rate": 4.684446939921392e-05, "loss": 0.0639, "step": 13490 }, { "epoch": 1.89, "learning_rate": 4.684400149728617e-05, "loss": 0.0865, "step": 13492 }, { "epoch": 1.89, "learning_rate": 4.6843533595358415e-05, "loss": 0.069, "step": 13494 }, { "epoch": 1.89, "learning_rate": 4.684306569343066e-05, "loss": 0.0564, "step": 13496 }, { "epoch": 1.89, "learning_rate": 4.68425977915029e-05, "loss": 0.0919, "step": 13498 }, { "epoch": 1.9, "learning_rate": 4.6842129889575146e-05, "loss": 0.0692, "step": 13500 }, { "epoch": 1.9, "learning_rate": 4.684166198764739e-05, "loss": 0.0836, "step": 13502 }, { "epoch": 1.9, "learning_rate": 4.684119408571964e-05, "loss": 0.0906, "step": 13504 }, { "epoch": 1.9, "learning_rate": 4.684072618379188e-05, "loss": 0.0736, "step": 13506 }, { "epoch": 1.9, "learning_rate": 4.684025828186412e-05, "loss": 0.0942, "step": 13508 }, { "epoch": 1.9, "learning_rate": 4.683979037993637e-05, "loss": 0.0831, "step": 13510 }, { "epoch": 1.9, "learning_rate": 4.6839322478008615e-05, "loss": 0.0843, "step": 13512 }, { "epoch": 1.9, "learning_rate": 4.6838854576080854e-05, "loss": 0.0816, "step": 13514 }, { "epoch": 1.9, "learning_rate": 4.68383866741531e-05, "loss": 0.0848, "step": 13516 }, { "epoch": 1.9, "learning_rate": 4.683791877222534e-05, "loss": 0.0976, "step": 13518 }, { "epoch": 1.9, "learning_rate": 4.683745087029759e-05, "loss": 0.0681, "step": 13520 }, { "epoch": 1.9, "learning_rate": 4.683698296836983e-05, "loss": 0.0999, "step": 13522 }, { "epoch": 1.9, "learning_rate": 4.6836515066442076e-05, "loss": 0.0753, "step": 13524 }, { "epoch": 1.9, "learning_rate": 4.6836047164514316e-05, "loss": 0.0841, "step": 13526 }, { "epoch": 1.9, "learning_rate": 4.683557926258657e-05, "loss": 0.0818, "step": 13528 }, { "epoch": 1.9, "learning_rate": 4.683511136065881e-05, "loss": 0.0804, "step": 13530 }, { "epoch": 1.9, "learning_rate": 4.683464345873105e-05, "loss": 0.0679, "step": 13532 }, { "epoch": 1.9, "learning_rate": 4.683417555680329e-05, "loss": 0.091, "step": 13534 }, { "epoch": 1.9, "learning_rate": 4.683370765487554e-05, "loss": 0.0876, "step": 13536 }, { "epoch": 1.9, "learning_rate": 4.6833239752947784e-05, "loss": 0.0914, "step": 13538 }, { "epoch": 1.9, "learning_rate": 4.683277185102003e-05, "loss": 0.1029, "step": 13540 }, { "epoch": 1.9, "learning_rate": 4.683230394909227e-05, "loss": 0.0933, "step": 13542 }, { "epoch": 1.9, "learning_rate": 4.6831836047164515e-05, "loss": 0.0913, "step": 13544 }, { "epoch": 1.9, "learning_rate": 4.683136814523676e-05, "loss": 0.1064, "step": 13546 }, { "epoch": 1.9, "learning_rate": 4.683090024330901e-05, "loss": 0.0847, "step": 13548 }, { "epoch": 1.9, "learning_rate": 4.6830432341381246e-05, "loss": 0.101, "step": 13550 }, { "epoch": 1.9, "learning_rate": 4.682996443945349e-05, "loss": 0.0917, "step": 13552 }, { "epoch": 1.9, "learning_rate": 4.682949653752574e-05, "loss": 0.0954, "step": 13554 }, { "epoch": 1.9, "learning_rate": 4.6829028635597984e-05, "loss": 0.0735, "step": 13556 }, { "epoch": 1.9, "learning_rate": 4.682856073367022e-05, "loss": 0.0715, "step": 13558 }, { "epoch": 1.9, "learning_rate": 4.682809283174247e-05, "loss": 0.0936, "step": 13560 }, { "epoch": 1.9, "learning_rate": 4.6827624929814715e-05, "loss": 0.0892, "step": 13562 }, { "epoch": 1.9, "learning_rate": 4.682715702788696e-05, "loss": 0.085, "step": 13564 }, { "epoch": 1.9, "learning_rate": 4.68266891259592e-05, "loss": 0.0854, "step": 13566 }, { "epoch": 1.9, "learning_rate": 4.6826221224031446e-05, "loss": 0.0997, "step": 13568 }, { "epoch": 1.9, "learning_rate": 4.6825753322103685e-05, "loss": 0.0778, "step": 13570 }, { "epoch": 1.91, "learning_rate": 4.682528542017594e-05, "loss": 0.0742, "step": 13572 }, { "epoch": 1.91, "learning_rate": 4.682481751824818e-05, "loss": 0.0959, "step": 13574 }, { "epoch": 1.91, "learning_rate": 4.682434961632042e-05, "loss": 0.0921, "step": 13576 }, { "epoch": 1.91, "learning_rate": 4.682388171439266e-05, "loss": 0.0991, "step": 13578 }, { "epoch": 1.91, "learning_rate": 4.6823413812464915e-05, "loss": 0.0825, "step": 13580 }, { "epoch": 1.91, "learning_rate": 4.6822945910537154e-05, "loss": 0.0795, "step": 13582 }, { "epoch": 1.91, "learning_rate": 4.68224780086094e-05, "loss": 0.0881, "step": 13584 }, { "epoch": 1.91, "learning_rate": 4.682201010668164e-05, "loss": 0.0804, "step": 13586 }, { "epoch": 1.91, "learning_rate": 4.6821542204753885e-05, "loss": 0.0766, "step": 13588 }, { "epoch": 1.91, "learning_rate": 4.682107430282613e-05, "loss": 0.0833, "step": 13590 }, { "epoch": 1.91, "learning_rate": 4.6820606400898376e-05, "loss": 0.0696, "step": 13592 }, { "epoch": 1.91, "learning_rate": 4.6820138498970616e-05, "loss": 0.0769, "step": 13594 }, { "epoch": 1.91, "learning_rate": 4.681967059704286e-05, "loss": 0.0775, "step": 13596 }, { "epoch": 1.91, "learning_rate": 4.681920269511511e-05, "loss": 0.0818, "step": 13598 }, { "epoch": 1.91, "learning_rate": 4.681873479318735e-05, "loss": 0.0834, "step": 13600 }, { "epoch": 1.91, "learning_rate": 4.681826689125959e-05, "loss": 0.1017, "step": 13602 }, { "epoch": 1.91, "learning_rate": 4.681779898933184e-05, "loss": 0.077, "step": 13604 }, { "epoch": 1.91, "learning_rate": 4.6817331087404084e-05, "loss": 0.0925, "step": 13606 }, { "epoch": 1.91, "learning_rate": 4.681686318547633e-05, "loss": 0.0859, "step": 13608 }, { "epoch": 1.91, "learning_rate": 4.681639528354857e-05, "loss": 0.0888, "step": 13610 }, { "epoch": 1.91, "learning_rate": 4.6815927381620815e-05, "loss": 0.1129, "step": 13612 }, { "epoch": 1.91, "learning_rate": 4.681545947969306e-05, "loss": 0.0808, "step": 13614 }, { "epoch": 1.91, "learning_rate": 4.681499157776531e-05, "loss": 0.0833, "step": 13616 }, { "epoch": 1.91, "learning_rate": 4.6814523675837546e-05, "loss": 0.0898, "step": 13618 }, { "epoch": 1.91, "learning_rate": 4.681405577390979e-05, "loss": 0.0736, "step": 13620 }, { "epoch": 1.91, "learning_rate": 4.681358787198203e-05, "loss": 0.0834, "step": 13622 }, { "epoch": 1.91, "learning_rate": 4.6813119970054284e-05, "loss": 0.0705, "step": 13624 }, { "epoch": 1.91, "learning_rate": 4.681265206812652e-05, "loss": 0.0742, "step": 13626 }, { "epoch": 1.91, "learning_rate": 4.681241811716264e-05, "loss": 0.0926, "step": 13628 }, { "epoch": 1.91, "learning_rate": 4.681195021523489e-05, "loss": 0.0784, "step": 13630 }, { "epoch": 1.91, "learning_rate": 4.6811482313307134e-05, "loss": 0.0995, "step": 13632 }, { "epoch": 1.91, "learning_rate": 4.681101441137938e-05, "loss": 0.0833, "step": 13634 }, { "epoch": 1.91, "learning_rate": 4.681054650945162e-05, "loss": 0.0907, "step": 13636 }, { "epoch": 1.91, "learning_rate": 4.6810078607523865e-05, "loss": 0.0694, "step": 13638 }, { "epoch": 1.91, "learning_rate": 4.6809610705596105e-05, "loss": 0.07, "step": 13640 }, { "epoch": 1.91, "learning_rate": 4.680914280366836e-05, "loss": 0.0885, "step": 13642 }, { "epoch": 1.92, "learning_rate": 4.6808674901740596e-05, "loss": 0.0748, "step": 13644 }, { "epoch": 1.92, "learning_rate": 4.680820699981284e-05, "loss": 0.07, "step": 13646 }, { "epoch": 1.92, "learning_rate": 4.680773909788508e-05, "loss": 0.0625, "step": 13648 }, { "epoch": 1.92, "learning_rate": 4.680727119595733e-05, "loss": 0.0632, "step": 13650 }, { "epoch": 1.92, "learning_rate": 4.680680329402957e-05, "loss": 0.0726, "step": 13652 }, { "epoch": 1.92, "learning_rate": 4.680633539210182e-05, "loss": 0.0738, "step": 13654 }, { "epoch": 1.92, "learning_rate": 4.680586749017406e-05, "loss": 0.0956, "step": 13656 }, { "epoch": 1.92, "learning_rate": 4.6805399588246304e-05, "loss": 0.0864, "step": 13658 }, { "epoch": 1.92, "learning_rate": 4.680493168631855e-05, "loss": 0.0802, "step": 13660 }, { "epoch": 1.92, "learning_rate": 4.6804463784390796e-05, "loss": 0.0783, "step": 13662 }, { "epoch": 1.92, "learning_rate": 4.6803995882463035e-05, "loss": 0.0808, "step": 13664 }, { "epoch": 1.92, "learning_rate": 4.680352798053528e-05, "loss": 0.083, "step": 13666 }, { "epoch": 1.92, "learning_rate": 4.680306007860753e-05, "loss": 0.0921, "step": 13668 }, { "epoch": 1.92, "learning_rate": 4.680259217667977e-05, "loss": 0.0798, "step": 13670 }, { "epoch": 1.92, "learning_rate": 4.680212427475201e-05, "loss": 0.0709, "step": 13672 }, { "epoch": 1.92, "learning_rate": 4.680165637282426e-05, "loss": 0.0752, "step": 13674 }, { "epoch": 1.92, "learning_rate": 4.6801188470896504e-05, "loss": 0.0701, "step": 13676 }, { "epoch": 1.92, "learning_rate": 4.680072056896875e-05, "loss": 0.1258, "step": 13678 }, { "epoch": 1.92, "learning_rate": 4.680025266704099e-05, "loss": 0.0913, "step": 13680 }, { "epoch": 1.92, "learning_rate": 4.6799784765113235e-05, "loss": 0.0848, "step": 13682 }, { "epoch": 1.92, "learning_rate": 4.6799316863185474e-05, "loss": 0.0862, "step": 13684 }, { "epoch": 1.92, "learning_rate": 4.679884896125773e-05, "loss": 0.1008, "step": 13686 }, { "epoch": 1.92, "learning_rate": 4.6798381059329966e-05, "loss": 0.0804, "step": 13688 }, { "epoch": 1.92, "learning_rate": 4.679791315740221e-05, "loss": 0.0801, "step": 13690 }, { "epoch": 1.92, "learning_rate": 4.679744525547445e-05, "loss": 0.0761, "step": 13692 }, { "epoch": 1.92, "learning_rate": 4.6796977353546704e-05, "loss": 0.1032, "step": 13694 }, { "epoch": 1.92, "learning_rate": 4.679650945161894e-05, "loss": 0.078, "step": 13696 }, { "epoch": 1.92, "learning_rate": 4.679604154969119e-05, "loss": 0.0774, "step": 13698 }, { "epoch": 1.92, "learning_rate": 4.679557364776343e-05, "loss": 0.0831, "step": 13700 }, { "epoch": 1.92, "learning_rate": 4.6795105745835674e-05, "loss": 0.0748, "step": 13702 }, { "epoch": 1.92, "learning_rate": 4.679463784390792e-05, "loss": 0.0989, "step": 13704 }, { "epoch": 1.92, "learning_rate": 4.6794169941980165e-05, "loss": 0.0964, "step": 13706 }, { "epoch": 1.92, "learning_rate": 4.6793702040052405e-05, "loss": 0.0724, "step": 13708 }, { "epoch": 1.92, "learning_rate": 4.679323413812465e-05, "loss": 0.0925, "step": 13710 }, { "epoch": 1.92, "learning_rate": 4.6792766236196896e-05, "loss": 0.0866, "step": 13712 }, { "epoch": 1.93, "learning_rate": 4.679229833426914e-05, "loss": 0.0824, "step": 13714 }, { "epoch": 1.93, "learning_rate": 4.679183043234138e-05, "loss": 0.0734, "step": 13716 }, { "epoch": 1.93, "learning_rate": 4.679136253041363e-05, "loss": 0.0826, "step": 13718 }, { "epoch": 1.93, "learning_rate": 4.679089462848587e-05, "loss": 0.0684, "step": 13720 }, { "epoch": 1.93, "learning_rate": 4.679042672655812e-05, "loss": 0.0783, "step": 13722 }, { "epoch": 1.93, "learning_rate": 4.678995882463036e-05, "loss": 0.0761, "step": 13724 }, { "epoch": 1.93, "learning_rate": 4.6789490922702604e-05, "loss": 0.0723, "step": 13726 }, { "epoch": 1.93, "learning_rate": 4.678902302077485e-05, "loss": 0.093, "step": 13728 }, { "epoch": 1.93, "learning_rate": 4.6788555118847096e-05, "loss": 0.0994, "step": 13730 }, { "epoch": 1.93, "learning_rate": 4.6788087216919335e-05, "loss": 0.0745, "step": 13732 }, { "epoch": 1.93, "learning_rate": 4.678761931499158e-05, "loss": 0.0751, "step": 13734 }, { "epoch": 1.93, "learning_rate": 4.678715141306382e-05, "loss": 0.0986, "step": 13736 }, { "epoch": 1.93, "learning_rate": 4.678668351113607e-05, "loss": 0.0797, "step": 13738 }, { "epoch": 1.93, "learning_rate": 4.678621560920831e-05, "loss": 0.0839, "step": 13740 }, { "epoch": 1.93, "learning_rate": 4.678574770728056e-05, "loss": 0.0674, "step": 13742 }, { "epoch": 1.93, "learning_rate": 4.67852798053528e-05, "loss": 0.0608, "step": 13744 }, { "epoch": 1.93, "learning_rate": 4.678481190342505e-05, "loss": 0.0822, "step": 13746 }, { "epoch": 1.93, "learning_rate": 4.678434400149729e-05, "loss": 0.0805, "step": 13748 }, { "epoch": 1.93, "learning_rate": 4.6783876099569535e-05, "loss": 0.129, "step": 13750 }, { "epoch": 1.93, "learning_rate": 4.6783408197641774e-05, "loss": 0.089, "step": 13752 }, { "epoch": 1.93, "learning_rate": 4.678294029571402e-05, "loss": 0.0838, "step": 13754 }, { "epoch": 1.93, "learning_rate": 4.6782472393786266e-05, "loss": 0.0747, "step": 13756 }, { "epoch": 1.93, "learning_rate": 4.678200449185851e-05, "loss": 0.082, "step": 13758 }, { "epoch": 1.93, "learning_rate": 4.678153658993075e-05, "loss": 0.0776, "step": 13760 }, { "epoch": 1.93, "learning_rate": 4.6781068688003e-05, "loss": 0.0814, "step": 13762 }, { "epoch": 1.93, "learning_rate": 4.678060078607524e-05, "loss": 0.0779, "step": 13764 }, { "epoch": 1.93, "learning_rate": 4.678013288414749e-05, "loss": 0.0998, "step": 13766 }, { "epoch": 1.93, "learning_rate": 4.677966498221973e-05, "loss": 0.0682, "step": 13768 }, { "epoch": 1.93, "learning_rate": 4.6779197080291974e-05, "loss": 0.0751, "step": 13770 }, { "epoch": 1.93, "learning_rate": 4.677872917836422e-05, "loss": 0.0841, "step": 13772 }, { "epoch": 1.93, "learning_rate": 4.6778261276436465e-05, "loss": 0.0675, "step": 13774 }, { "epoch": 1.93, "learning_rate": 4.6777793374508705e-05, "loss": 0.0807, "step": 13776 }, { "epoch": 1.93, "learning_rate": 4.677732547258095e-05, "loss": 0.0729, "step": 13778 }, { "epoch": 1.93, "learning_rate": 4.6776857570653196e-05, "loss": 0.0751, "step": 13780 }, { "epoch": 1.93, "learning_rate": 4.677638966872544e-05, "loss": 0.0849, "step": 13782 }, { "epoch": 1.93, "learning_rate": 4.677592176679768e-05, "loss": 0.099, "step": 13784 }, { "epoch": 1.94, "learning_rate": 4.677545386486993e-05, "loss": 0.0918, "step": 13786 }, { "epoch": 1.94, "learning_rate": 4.6774985962942167e-05, "loss": 0.1082, "step": 13788 }, { "epoch": 1.94, "learning_rate": 4.677451806101442e-05, "loss": 0.071, "step": 13790 }, { "epoch": 1.94, "learning_rate": 4.677405015908666e-05, "loss": 0.1029, "step": 13792 }, { "epoch": 1.94, "learning_rate": 4.6773582257158904e-05, "loss": 0.1171, "step": 13794 }, { "epoch": 1.94, "learning_rate": 4.6773114355231143e-05, "loss": 0.0791, "step": 13796 }, { "epoch": 1.94, "learning_rate": 4.677264645330339e-05, "loss": 0.0978, "step": 13798 }, { "epoch": 1.94, "learning_rate": 4.6772178551375635e-05, "loss": 0.0806, "step": 13800 }, { "epoch": 1.94, "learning_rate": 4.677171064944788e-05, "loss": 0.0735, "step": 13802 }, { "epoch": 1.94, "learning_rate": 4.677124274752012e-05, "loss": 0.0793, "step": 13804 }, { "epoch": 1.94, "learning_rate": 4.6770774845592366e-05, "loss": 0.0637, "step": 13806 }, { "epoch": 1.94, "learning_rate": 4.677030694366461e-05, "loss": 0.0803, "step": 13808 }, { "epoch": 1.94, "learning_rate": 4.676983904173685e-05, "loss": 0.0876, "step": 13810 }, { "epoch": 1.94, "learning_rate": 4.67693711398091e-05, "loss": 0.081, "step": 13812 }, { "epoch": 1.94, "learning_rate": 4.6768903237881336e-05, "loss": 0.0778, "step": 13814 }, { "epoch": 1.94, "learning_rate": 4.676843533595359e-05, "loss": 0.0915, "step": 13816 }, { "epoch": 1.94, "learning_rate": 4.676796743402583e-05, "loss": 0.1146, "step": 13818 }, { "epoch": 1.94, "learning_rate": 4.6767499532098074e-05, "loss": 0.0943, "step": 13820 }, { "epoch": 1.94, "learning_rate": 4.676703163017031e-05, "loss": 0.0681, "step": 13822 }, { "epoch": 1.94, "learning_rate": 4.6766563728242566e-05, "loss": 0.0717, "step": 13824 }, { "epoch": 1.94, "learning_rate": 4.6766095826314805e-05, "loss": 0.0849, "step": 13826 }, { "epoch": 1.94, "learning_rate": 4.676562792438705e-05, "loss": 0.0759, "step": 13828 }, { "epoch": 1.94, "learning_rate": 4.676516002245929e-05, "loss": 0.0588, "step": 13830 }, { "epoch": 1.94, "learning_rate": 4.6764692120531536e-05, "loss": 0.0837, "step": 13832 }, { "epoch": 1.94, "learning_rate": 4.676422421860378e-05, "loss": 0.0741, "step": 13834 }, { "epoch": 1.94, "learning_rate": 4.676375631667603e-05, "loss": 0.0802, "step": 13836 }, { "epoch": 1.94, "learning_rate": 4.676328841474827e-05, "loss": 0.0932, "step": 13838 }, { "epoch": 1.94, "learning_rate": 4.676282051282051e-05, "loss": 0.0816, "step": 13840 }, { "epoch": 1.94, "learning_rate": 4.676235261089276e-05, "loss": 0.0683, "step": 13842 }, { "epoch": 1.94, "learning_rate": 4.6761884708965005e-05, "loss": 0.0657, "step": 13844 }, { "epoch": 1.94, "learning_rate": 4.6761416807037244e-05, "loss": 0.0708, "step": 13846 }, { "epoch": 1.94, "learning_rate": 4.676094890510949e-05, "loss": 0.0979, "step": 13848 }, { "epoch": 1.94, "learning_rate": 4.6760481003181736e-05, "loss": 0.075, "step": 13850 }, { "epoch": 1.94, "learning_rate": 4.676001310125398e-05, "loss": 0.0686, "step": 13852 }, { "epoch": 1.94, "learning_rate": 4.675954519932622e-05, "loss": 0.0878, "step": 13854 }, { "epoch": 1.94, "learning_rate": 4.6759077297398467e-05, "loss": 0.0896, "step": 13856 }, { "epoch": 1.95, "learning_rate": 4.675860939547071e-05, "loss": 0.0891, "step": 13858 }, { "epoch": 1.95, "learning_rate": 4.675814149354296e-05, "loss": 0.0731, "step": 13860 }, { "epoch": 1.95, "learning_rate": 4.67576735916152e-05, "loss": 0.0842, "step": 13862 }, { "epoch": 1.95, "learning_rate": 4.6757205689687443e-05, "loss": 0.0897, "step": 13864 }, { "epoch": 1.95, "learning_rate": 4.675673778775968e-05, "loss": 0.116, "step": 13866 }, { "epoch": 1.95, "learning_rate": 4.6756269885831935e-05, "loss": 0.0838, "step": 13868 }, { "epoch": 1.95, "learning_rate": 4.6755801983904174e-05, "loss": 0.0742, "step": 13870 }, { "epoch": 1.95, "learning_rate": 4.675533408197642e-05, "loss": 0.0804, "step": 13872 }, { "epoch": 1.95, "learning_rate": 4.675486618004866e-05, "loss": 0.0811, "step": 13874 }, { "epoch": 1.95, "learning_rate": 4.675439827812091e-05, "loss": 0.0847, "step": 13876 }, { "epoch": 1.95, "learning_rate": 4.675393037619315e-05, "loss": 0.1092, "step": 13878 }, { "epoch": 1.95, "learning_rate": 4.67534624742654e-05, "loss": 0.0723, "step": 13880 }, { "epoch": 1.95, "learning_rate": 4.6752994572337636e-05, "loss": 0.0644, "step": 13882 }, { "epoch": 1.95, "learning_rate": 4.675252667040988e-05, "loss": 0.0887, "step": 13884 }, { "epoch": 1.95, "learning_rate": 4.675205876848213e-05, "loss": 0.1023, "step": 13886 }, { "epoch": 1.95, "learning_rate": 4.6751590866554374e-05, "loss": 0.075, "step": 13888 }, { "epoch": 1.95, "learning_rate": 4.675112296462661e-05, "loss": 0.0983, "step": 13890 }, { "epoch": 1.95, "learning_rate": 4.675065506269886e-05, "loss": 0.1074, "step": 13892 }, { "epoch": 1.95, "learning_rate": 4.6750187160771105e-05, "loss": 0.0783, "step": 13894 }, { "epoch": 1.95, "learning_rate": 4.674971925884335e-05, "loss": 0.0913, "step": 13896 }, { "epoch": 1.95, "learning_rate": 4.674925135691559e-05, "loss": 0.0891, "step": 13898 }, { "epoch": 1.95, "learning_rate": 4.6748783454987836e-05, "loss": 0.087, "step": 13900 }, { "epoch": 1.95, "learning_rate": 4.674831555306008e-05, "loss": 0.0637, "step": 13902 }, { "epoch": 1.95, "learning_rate": 4.674784765113233e-05, "loss": 0.0705, "step": 13904 }, { "epoch": 1.95, "learning_rate": 4.674737974920457e-05, "loss": 0.0833, "step": 13906 }, { "epoch": 1.95, "learning_rate": 4.674691184727681e-05, "loss": 0.0829, "step": 13908 }, { "epoch": 1.95, "learning_rate": 4.674644394534906e-05, "loss": 0.0869, "step": 13910 }, { "epoch": 1.95, "learning_rate": 4.6745976043421305e-05, "loss": 0.0823, "step": 13912 }, { "epoch": 1.95, "learning_rate": 4.6745508141493544e-05, "loss": 0.0874, "step": 13914 }, { "epoch": 1.95, "learning_rate": 4.674504023956579e-05, "loss": 0.0743, "step": 13916 }, { "epoch": 1.95, "learning_rate": 4.674457233763803e-05, "loss": 0.1075, "step": 13918 }, { "epoch": 1.95, "learning_rate": 4.674410443571028e-05, "loss": 0.0799, "step": 13920 }, { "epoch": 1.95, "learning_rate": 4.674363653378252e-05, "loss": 0.1071, "step": 13922 }, { "epoch": 1.95, "learning_rate": 4.6743168631854767e-05, "loss": 0.0778, "step": 13924 }, { "epoch": 1.95, "learning_rate": 4.6742700729927006e-05, "loss": 0.0834, "step": 13926 }, { "epoch": 1.96, "learning_rate": 4.674223282799925e-05, "loss": 0.0907, "step": 13928 }, { "epoch": 1.96, "learning_rate": 4.67417649260715e-05, "loss": 0.0761, "step": 13930 }, { "epoch": 1.96, "learning_rate": 4.6741297024143743e-05, "loss": 0.1059, "step": 13932 }, { "epoch": 1.96, "learning_rate": 4.674082912221598e-05, "loss": 0.0793, "step": 13934 }, { "epoch": 1.96, "learning_rate": 4.674036122028823e-05, "loss": 0.0738, "step": 13936 }, { "epoch": 1.96, "learning_rate": 4.6739893318360474e-05, "loss": 0.1102, "step": 13938 }, { "epoch": 1.96, "learning_rate": 4.673942541643272e-05, "loss": 0.0918, "step": 13940 }, { "epoch": 1.96, "learning_rate": 4.673895751450496e-05, "loss": 0.0736, "step": 13942 }, { "epoch": 1.96, "learning_rate": 4.6738489612577205e-05, "loss": 0.0583, "step": 13944 }, { "epoch": 1.96, "learning_rate": 4.673802171064945e-05, "loss": 0.114, "step": 13946 }, { "epoch": 1.96, "learning_rate": 4.67375538087217e-05, "loss": 0.1063, "step": 13948 }, { "epoch": 1.96, "learning_rate": 4.6737085906793936e-05, "loss": 0.0842, "step": 13950 }, { "epoch": 1.96, "learning_rate": 4.673661800486618e-05, "loss": 0.074, "step": 13952 }, { "epoch": 1.96, "learning_rate": 4.673615010293843e-05, "loss": 0.0677, "step": 13954 }, { "epoch": 1.96, "learning_rate": 4.6735682201010674e-05, "loss": 0.0717, "step": 13956 }, { "epoch": 1.96, "learning_rate": 4.673521429908291e-05, "loss": 0.0879, "step": 13958 }, { "epoch": 1.96, "learning_rate": 4.673474639715516e-05, "loss": 0.069, "step": 13960 }, { "epoch": 1.96, "learning_rate": 4.67342784952274e-05, "loss": 0.0787, "step": 13962 }, { "epoch": 1.96, "learning_rate": 4.673381059329965e-05, "loss": 0.0953, "step": 13964 }, { "epoch": 1.96, "learning_rate": 4.673334269137189e-05, "loss": 0.0925, "step": 13966 }, { "epoch": 1.96, "learning_rate": 4.6732874789444136e-05, "loss": 0.0702, "step": 13968 }, { "epoch": 1.96, "learning_rate": 4.6732406887516375e-05, "loss": 0.1242, "step": 13970 }, { "epoch": 1.96, "learning_rate": 4.673193898558863e-05, "loss": 0.0616, "step": 13972 }, { "epoch": 1.96, "learning_rate": 4.673147108366087e-05, "loss": 0.1254, "step": 13974 }, { "epoch": 1.96, "learning_rate": 4.673100318173311e-05, "loss": 0.1014, "step": 13976 }, { "epoch": 1.96, "learning_rate": 4.673053527980535e-05, "loss": 0.0924, "step": 13978 }, { "epoch": 1.96, "learning_rate": 4.67300673778776e-05, "loss": 0.0804, "step": 13980 }, { "epoch": 1.96, "learning_rate": 4.6729599475949844e-05, "loss": 0.0965, "step": 13982 }, { "epoch": 1.96, "learning_rate": 4.672913157402209e-05, "loss": 0.0666, "step": 13984 }, { "epoch": 1.96, "learning_rate": 4.672866367209433e-05, "loss": 0.0786, "step": 13986 }, { "epoch": 1.96, "learning_rate": 4.6728195770166575e-05, "loss": 0.0736, "step": 13988 }, { "epoch": 1.96, "learning_rate": 4.672772786823882e-05, "loss": 0.0724, "step": 13990 }, { "epoch": 1.96, "learning_rate": 4.6727259966311067e-05, "loss": 0.069, "step": 13992 }, { "epoch": 1.96, "learning_rate": 4.6726792064383306e-05, "loss": 0.0954, "step": 13994 }, { "epoch": 1.96, "learning_rate": 4.672632416245555e-05, "loss": 0.0802, "step": 13996 }, { "epoch": 1.96, "learning_rate": 4.67258562605278e-05, "loss": 0.0789, "step": 13998 }, { "epoch": 1.97, "learning_rate": 4.6725388358600043e-05, "loss": 0.1104, "step": 14000 }, { "epoch": 1.97, "eval_gen_len": 28.2931, "eval_loss": 1.0474748611450195, "eval_meteor": 0.048, "eval_runtime": 13.517, "eval_samples_per_second": 4.291, "eval_steps_per_second": 0.592, "step": 14000 }, { "epoch": 1.97, "learning_rate": 4.672492045667228e-05, "loss": 0.119, "step": 14002 }, { "epoch": 1.97, "learning_rate": 4.672445255474453e-05, "loss": 0.0843, "step": 14004 }, { "epoch": 1.97, "learning_rate": 4.6723984652816774e-05, "loss": 0.0611, "step": 14006 }, { "epoch": 1.97, "learning_rate": 4.672351675088902e-05, "loss": 0.0763, "step": 14008 }, { "epoch": 1.97, "learning_rate": 4.672304884896126e-05, "loss": 0.0992, "step": 14010 }, { "epoch": 1.97, "learning_rate": 4.6722580947033505e-05, "loss": 0.1185, "step": 14012 }, { "epoch": 1.97, "learning_rate": 4.6722113045105745e-05, "loss": 0.0689, "step": 14014 }, { "epoch": 1.97, "learning_rate": 4.6721645143178e-05, "loss": 0.0783, "step": 14016 }, { "epoch": 1.97, "learning_rate": 4.6721177241250236e-05, "loss": 0.0736, "step": 14018 }, { "epoch": 1.97, "learning_rate": 4.672070933932248e-05, "loss": 0.0734, "step": 14020 }, { "epoch": 1.97, "learning_rate": 4.672024143739472e-05, "loss": 0.08, "step": 14022 }, { "epoch": 1.97, "learning_rate": 4.6719773535466974e-05, "loss": 0.0764, "step": 14024 }, { "epoch": 1.97, "learning_rate": 4.671930563353921e-05, "loss": 0.0828, "step": 14026 }, { "epoch": 1.97, "learning_rate": 4.671883773161146e-05, "loss": 0.0766, "step": 14028 }, { "epoch": 1.97, "learning_rate": 4.67183698296837e-05, "loss": 0.0948, "step": 14030 }, { "epoch": 1.97, "learning_rate": 4.6717901927755944e-05, "loss": 0.1003, "step": 14032 }, { "epoch": 1.97, "learning_rate": 4.671743402582819e-05, "loss": 0.0863, "step": 14034 }, { "epoch": 1.97, "learning_rate": 4.6716966123900436e-05, "loss": 0.0965, "step": 14036 }, { "epoch": 1.97, "learning_rate": 4.6716498221972675e-05, "loss": 0.0851, "step": 14038 }, { "epoch": 1.97, "learning_rate": 4.671603032004492e-05, "loss": 0.1, "step": 14040 }, { "epoch": 1.97, "learning_rate": 4.671556241811717e-05, "loss": 0.096, "step": 14042 }, { "epoch": 1.97, "learning_rate": 4.671509451618941e-05, "loss": 0.0889, "step": 14044 }, { "epoch": 1.97, "learning_rate": 4.671462661426165e-05, "loss": 0.0792, "step": 14046 }, { "epoch": 1.97, "learning_rate": 4.67141587123339e-05, "loss": 0.082, "step": 14048 }, { "epoch": 1.97, "learning_rate": 4.6713690810406144e-05, "loss": 0.0832, "step": 14050 }, { "epoch": 1.97, "learning_rate": 4.671322290847839e-05, "loss": 0.0758, "step": 14052 }, { "epoch": 1.97, "learning_rate": 4.671275500655063e-05, "loss": 0.0826, "step": 14054 }, { "epoch": 1.97, "learning_rate": 4.6712287104622875e-05, "loss": 0.0842, "step": 14056 }, { "epoch": 1.97, "learning_rate": 4.671181920269512e-05, "loss": 0.099, "step": 14058 }, { "epoch": 1.97, "learning_rate": 4.671135130076736e-05, "loss": 0.0892, "step": 14060 }, { "epoch": 1.97, "learning_rate": 4.6710883398839606e-05, "loss": 0.1079, "step": 14062 }, { "epoch": 1.97, "learning_rate": 4.6710415496911845e-05, "loss": 0.0814, "step": 14064 }, { "epoch": 1.97, "learning_rate": 4.670994759498409e-05, "loss": 0.1106, "step": 14066 }, { "epoch": 1.97, "learning_rate": 4.670947969305634e-05, "loss": 0.0782, "step": 14068 }, { "epoch": 1.98, "learning_rate": 4.670901179112858e-05, "loss": 0.1078, "step": 14070 }, { "epoch": 1.98, "learning_rate": 4.670854388920082e-05, "loss": 0.0754, "step": 14072 }, { "epoch": 1.98, "learning_rate": 4.670807598727307e-05, "loss": 0.0854, "step": 14074 }, { "epoch": 1.98, "learning_rate": 4.6707608085345314e-05, "loss": 0.0843, "step": 14076 }, { "epoch": 1.98, "learning_rate": 4.670714018341756e-05, "loss": 0.0886, "step": 14078 }, { "epoch": 1.98, "learning_rate": 4.67066722814898e-05, "loss": 0.0833, "step": 14080 }, { "epoch": 1.98, "learning_rate": 4.6706204379562045e-05, "loss": 0.0828, "step": 14082 }, { "epoch": 1.98, "learning_rate": 4.670573647763429e-05, "loss": 0.079, "step": 14084 }, { "epoch": 1.98, "learning_rate": 4.6705268575706536e-05, "loss": 0.1032, "step": 14086 }, { "epoch": 1.98, "learning_rate": 4.6704800673778775e-05, "loss": 0.0608, "step": 14088 }, { "epoch": 1.98, "learning_rate": 4.670433277185102e-05, "loss": 0.0956, "step": 14090 }, { "epoch": 1.98, "learning_rate": 4.670386486992327e-05, "loss": 0.1089, "step": 14092 }, { "epoch": 1.98, "learning_rate": 4.670339696799551e-05, "loss": 0.0756, "step": 14094 }, { "epoch": 1.98, "learning_rate": 4.670292906606775e-05, "loss": 0.0905, "step": 14096 }, { "epoch": 1.98, "learning_rate": 4.670246116414e-05, "loss": 0.1066, "step": 14098 }, { "epoch": 1.98, "learning_rate": 4.670199326221224e-05, "loss": 0.096, "step": 14100 }, { "epoch": 1.98, "learning_rate": 4.670152536028449e-05, "loss": 0.0998, "step": 14102 }, { "epoch": 1.98, "learning_rate": 4.670105745835673e-05, "loss": 0.1179, "step": 14104 }, { "epoch": 1.98, "learning_rate": 4.6700589556428975e-05, "loss": 0.1025, "step": 14106 }, { "epoch": 1.98, "learning_rate": 4.6700121654501214e-05, "loss": 0.0868, "step": 14108 }, { "epoch": 1.98, "learning_rate": 4.669965375257346e-05, "loss": 0.071, "step": 14110 }, { "epoch": 1.98, "learning_rate": 4.6699185850645706e-05, "loss": 0.1024, "step": 14112 }, { "epoch": 1.98, "learning_rate": 4.669871794871795e-05, "loss": 0.0889, "step": 14114 }, { "epoch": 1.98, "learning_rate": 4.669825004679019e-05, "loss": 0.084, "step": 14116 }, { "epoch": 1.98, "learning_rate": 4.669778214486244e-05, "loss": 0.1139, "step": 14118 }, { "epoch": 1.98, "learning_rate": 4.669731424293468e-05, "loss": 0.0789, "step": 14120 }, { "epoch": 1.98, "learning_rate": 4.669684634100693e-05, "loss": 0.1202, "step": 14122 }, { "epoch": 1.98, "learning_rate": 4.669637843907917e-05, "loss": 0.0674, "step": 14124 }, { "epoch": 1.98, "learning_rate": 4.6695910537151414e-05, "loss": 0.0751, "step": 14126 }, { "epoch": 1.98, "learning_rate": 4.669544263522366e-05, "loss": 0.0863, "step": 14128 }, { "epoch": 1.98, "learning_rate": 4.6694974733295906e-05, "loss": 0.0698, "step": 14130 }, { "epoch": 1.98, "learning_rate": 4.6694506831368145e-05, "loss": 0.0945, "step": 14132 }, { "epoch": 1.98, "learning_rate": 4.669403892944039e-05, "loss": 0.1053, "step": 14134 }, { "epoch": 1.98, "learning_rate": 4.669357102751264e-05, "loss": 0.1102, "step": 14136 }, { "epoch": 1.98, "learning_rate": 4.669310312558488e-05, "loss": 0.0887, "step": 14138 }, { "epoch": 1.98, "learning_rate": 4.669263522365712e-05, "loss": 0.096, "step": 14140 }, { "epoch": 1.99, "learning_rate": 4.669216732172937e-05, "loss": 0.0923, "step": 14142 }, { "epoch": 1.99, "learning_rate": 4.669169941980161e-05, "loss": 0.0653, "step": 14144 }, { "epoch": 1.99, "learning_rate": 4.669123151787386e-05, "loss": 0.0936, "step": 14146 }, { "epoch": 1.99, "learning_rate": 4.66907636159461e-05, "loss": 0.1005, "step": 14148 }, { "epoch": 1.99, "learning_rate": 4.6690295714018345e-05, "loss": 0.0783, "step": 14150 }, { "epoch": 1.99, "learning_rate": 4.6689827812090584e-05, "loss": 0.0802, "step": 14152 }, { "epoch": 1.99, "learning_rate": 4.6689359910162836e-05, "loss": 0.0849, "step": 14154 }, { "epoch": 1.99, "learning_rate": 4.6688892008235076e-05, "loss": 0.0584, "step": 14156 }, { "epoch": 1.99, "learning_rate": 4.668842410630732e-05, "loss": 0.0892, "step": 14158 }, { "epoch": 1.99, "learning_rate": 4.668795620437956e-05, "loss": 0.0815, "step": 14160 }, { "epoch": 1.99, "learning_rate": 4.6687488302451806e-05, "loss": 0.0966, "step": 14162 }, { "epoch": 1.99, "learning_rate": 4.668702040052405e-05, "loss": 0.0969, "step": 14164 }, { "epoch": 1.99, "learning_rate": 4.66865524985963e-05, "loss": 0.0947, "step": 14166 }, { "epoch": 1.99, "learning_rate": 4.668608459666854e-05, "loss": 0.0803, "step": 14168 }, { "epoch": 1.99, "learning_rate": 4.668561669474078e-05, "loss": 0.0776, "step": 14170 }, { "epoch": 1.99, "learning_rate": 4.668514879281303e-05, "loss": 0.0828, "step": 14172 }, { "epoch": 1.99, "learning_rate": 4.6684680890885275e-05, "loss": 0.0849, "step": 14174 }, { "epoch": 1.99, "learning_rate": 4.6684212988957514e-05, "loss": 0.0927, "step": 14176 }, { "epoch": 1.99, "learning_rate": 4.668374508702976e-05, "loss": 0.1052, "step": 14178 }, { "epoch": 1.99, "learning_rate": 4.6683277185102006e-05, "loss": 0.1121, "step": 14180 }, { "epoch": 1.99, "learning_rate": 4.668280928317425e-05, "loss": 0.0738, "step": 14182 }, { "epoch": 1.99, "learning_rate": 4.668234138124649e-05, "loss": 0.0724, "step": 14184 }, { "epoch": 1.99, "learning_rate": 4.668187347931874e-05, "loss": 0.0853, "step": 14186 }, { "epoch": 1.99, "learning_rate": 4.668140557739098e-05, "loss": 0.086, "step": 14188 }, { "epoch": 1.99, "learning_rate": 4.668093767546323e-05, "loss": 0.0838, "step": 14190 }, { "epoch": 1.99, "learning_rate": 4.668046977353547e-05, "loss": 0.064, "step": 14192 }, { "epoch": 1.99, "learning_rate": 4.6680001871607714e-05, "loss": 0.0782, "step": 14194 }, { "epoch": 1.99, "learning_rate": 4.667953396967995e-05, "loss": 0.0698, "step": 14196 }, { "epoch": 1.99, "learning_rate": 4.6679066067752206e-05, "loss": 0.099, "step": 14198 }, { "epoch": 1.99, "learning_rate": 4.6678598165824445e-05, "loss": 0.089, "step": 14200 }, { "epoch": 1.99, "learning_rate": 4.667813026389669e-05, "loss": 0.0976, "step": 14202 }, { "epoch": 1.99, "learning_rate": 4.667766236196893e-05, "loss": 0.0739, "step": 14204 }, { "epoch": 1.99, "learning_rate": 4.6677194460041176e-05, "loss": 0.0998, "step": 14206 }, { "epoch": 1.99, "learning_rate": 4.667672655811342e-05, "loss": 0.0815, "step": 14208 }, { "epoch": 1.99, "learning_rate": 4.667625865618567e-05, "loss": 0.0717, "step": 14210 }, { "epoch": 1.99, "learning_rate": 4.667579075425791e-05, "loss": 0.0927, "step": 14212 }, { "epoch": 2.0, "learning_rate": 4.667532285233015e-05, "loss": 0.0788, "step": 14214 }, { "epoch": 2.0, "learning_rate": 4.66748549504024e-05, "loss": 0.0717, "step": 14216 }, { "epoch": 2.0, "learning_rate": 4.6674387048474645e-05, "loss": 0.0824, "step": 14218 }, { "epoch": 2.0, "learning_rate": 4.6673919146546884e-05, "loss": 0.0815, "step": 14220 }, { "epoch": 2.0, "learning_rate": 4.667345124461913e-05, "loss": 0.0733, "step": 14222 }, { "epoch": 2.0, "learning_rate": 4.6672983342691376e-05, "loss": 0.068, "step": 14224 }, { "epoch": 2.0, "learning_rate": 4.667251544076362e-05, "loss": 0.0959, "step": 14226 }, { "epoch": 2.0, "learning_rate": 4.667204753883586e-05, "loss": 0.0813, "step": 14228 }, { "epoch": 2.0, "learning_rate": 4.6671579636908106e-05, "loss": 0.1064, "step": 14230 }, { "epoch": 2.0, "learning_rate": 4.667111173498035e-05, "loss": 0.0949, "step": 14232 }, { "epoch": 2.0, "learning_rate": 4.66706438330526e-05, "loss": 0.104, "step": 14234 }, { "epoch": 2.0, "learning_rate": 4.667017593112484e-05, "loss": 0.0782, "step": 14236 }, { "epoch": 2.0, "learning_rate": 4.666970802919708e-05, "loss": 0.0857, "step": 14238 }, { "epoch": 2.0, "learning_rate": 4.666924012726932e-05, "loss": 0.0854, "step": 14240 }, { "epoch": 2.0, "learning_rate": 4.6668772225341575e-05, "loss": 0.1081, "step": 14242 }, { "epoch": 2.0, "learning_rate": 4.6668304323413814e-05, "loss": 0.0633, "step": 14244 }, { "epoch": 2.0, "learning_rate": 4.666783642148606e-05, "loss": 0.0887, "step": 14246 }, { "epoch": 2.0, "learning_rate": 4.666760247052218e-05, "loss": 0.1334, "step": 14248 }, { "epoch": 2.0, "learning_rate": 4.6667134568594426e-05, "loss": 0.0486, "step": 14250 }, { "epoch": 2.0, "learning_rate": 4.666666666666667e-05, "loss": 0.0616, "step": 14252 }, { "epoch": 2.0, "learning_rate": 4.666619876473891e-05, "loss": 0.0566, "step": 14254 }, { "epoch": 2.0, "learning_rate": 4.666573086281116e-05, "loss": 0.0476, "step": 14256 }, { "epoch": 2.0, "learning_rate": 4.6665262960883396e-05, "loss": 0.0517, "step": 14258 }, { "epoch": 2.0, "learning_rate": 4.666479505895565e-05, "loss": 0.0507, "step": 14260 }, { "epoch": 2.0, "learning_rate": 4.666432715702789e-05, "loss": 0.0484, "step": 14262 }, { "epoch": 2.0, "learning_rate": 4.6663859255100134e-05, "loss": 0.0614, "step": 14264 }, { "epoch": 2.0, "learning_rate": 4.666339135317237e-05, "loss": 0.0486, "step": 14266 }, { "epoch": 2.0, "learning_rate": 4.6662923451244625e-05, "loss": 0.0598, "step": 14268 }, { "epoch": 2.0, "learning_rate": 4.6662455549316864e-05, "loss": 0.0537, "step": 14270 }, { "epoch": 2.0, "learning_rate": 4.666198764738911e-05, "loss": 0.0677, "step": 14272 }, { "epoch": 2.0, "learning_rate": 4.666151974546135e-05, "loss": 0.0623, "step": 14274 }, { "epoch": 2.0, "learning_rate": 4.6661051843533595e-05, "loss": 0.0255, "step": 14276 }, { "epoch": 2.0, "learning_rate": 4.666058394160584e-05, "loss": 0.0463, "step": 14278 }, { "epoch": 2.0, "learning_rate": 4.666011603967809e-05, "loss": 0.0572, "step": 14280 }, { "epoch": 2.0, "learning_rate": 4.6659648137750326e-05, "loss": 0.0471, "step": 14282 }, { "epoch": 2.01, "learning_rate": 4.665918023582257e-05, "loss": 0.0744, "step": 14284 }, { "epoch": 2.01, "learning_rate": 4.665871233389482e-05, "loss": 0.0569, "step": 14286 }, { "epoch": 2.01, "learning_rate": 4.6658244431967064e-05, "loss": 0.0458, "step": 14288 }, { "epoch": 2.01, "learning_rate": 4.66577765300393e-05, "loss": 0.0442, "step": 14290 }, { "epoch": 2.01, "learning_rate": 4.665730862811155e-05, "loss": 0.0391, "step": 14292 }, { "epoch": 2.01, "learning_rate": 4.6656840726183795e-05, "loss": 0.0603, "step": 14294 }, { "epoch": 2.01, "learning_rate": 4.665637282425604e-05, "loss": 0.0438, "step": 14296 }, { "epoch": 2.01, "learning_rate": 4.665590492232828e-05, "loss": 0.0394, "step": 14298 }, { "epoch": 2.01, "learning_rate": 4.6655437020400526e-05, "loss": 0.0503, "step": 14300 }, { "epoch": 2.01, "learning_rate": 4.665496911847277e-05, "loss": 0.0505, "step": 14302 }, { "epoch": 2.01, "learning_rate": 4.665450121654502e-05, "loss": 0.0406, "step": 14304 }, { "epoch": 2.01, "learning_rate": 4.665403331461726e-05, "loss": 0.0445, "step": 14306 }, { "epoch": 2.01, "learning_rate": 4.66535654126895e-05, "loss": 0.0477, "step": 14308 }, { "epoch": 2.01, "learning_rate": 4.665309751076174e-05, "loss": 0.0571, "step": 14310 }, { "epoch": 2.01, "learning_rate": 4.6652629608833995e-05, "loss": 0.0366, "step": 14312 }, { "epoch": 2.01, "learning_rate": 4.6652161706906234e-05, "loss": 0.0402, "step": 14314 }, { "epoch": 2.01, "learning_rate": 4.665169380497848e-05, "loss": 0.0381, "step": 14316 }, { "epoch": 2.01, "learning_rate": 4.665122590305072e-05, "loss": 0.0498, "step": 14318 }, { "epoch": 2.01, "learning_rate": 4.665075800112297e-05, "loss": 0.0558, "step": 14320 }, { "epoch": 2.01, "learning_rate": 4.665029009919521e-05, "loss": 0.0462, "step": 14322 }, { "epoch": 2.01, "learning_rate": 4.664982219726746e-05, "loss": 0.0783, "step": 14324 }, { "epoch": 2.01, "learning_rate": 4.6649354295339696e-05, "loss": 0.061, "step": 14326 }, { "epoch": 2.01, "learning_rate": 4.664888639341194e-05, "loss": 0.0652, "step": 14328 }, { "epoch": 2.01, "learning_rate": 4.664841849148419e-05, "loss": 0.0561, "step": 14330 }, { "epoch": 2.01, "learning_rate": 4.6647950589556434e-05, "loss": 0.0597, "step": 14332 }, { "epoch": 2.01, "learning_rate": 4.664748268762867e-05, "loss": 0.0651, "step": 14334 }, { "epoch": 2.01, "learning_rate": 4.664701478570092e-05, "loss": 0.0486, "step": 14336 }, { "epoch": 2.01, "learning_rate": 4.6646546883773165e-05, "loss": 0.0443, "step": 14338 }, { "epoch": 2.01, "learning_rate": 4.664607898184541e-05, "loss": 0.0463, "step": 14340 }, { "epoch": 2.01, "learning_rate": 4.664561107991765e-05, "loss": 0.0457, "step": 14342 }, { "epoch": 2.01, "learning_rate": 4.6645143177989895e-05, "loss": 0.0446, "step": 14344 }, { "epoch": 2.01, "learning_rate": 4.664467527606214e-05, "loss": 0.0688, "step": 14346 }, { "epoch": 2.01, "learning_rate": 4.664420737413439e-05, "loss": 0.04, "step": 14348 }, { "epoch": 2.01, "learning_rate": 4.6643739472206626e-05, "loss": 0.048, "step": 14350 }, { "epoch": 2.01, "learning_rate": 4.664327157027887e-05, "loss": 0.0438, "step": 14352 }, { "epoch": 2.01, "learning_rate": 4.664280366835112e-05, "loss": 0.0598, "step": 14354 }, { "epoch": 2.02, "learning_rate": 4.6642335766423364e-05, "loss": 0.051, "step": 14356 }, { "epoch": 2.02, "learning_rate": 4.66418678644956e-05, "loss": 0.0522, "step": 14358 }, { "epoch": 2.02, "learning_rate": 4.664139996256785e-05, "loss": 0.0475, "step": 14360 }, { "epoch": 2.02, "learning_rate": 4.664093206064009e-05, "loss": 0.0378, "step": 14362 }, { "epoch": 2.02, "learning_rate": 4.664046415871234e-05, "loss": 0.0474, "step": 14364 }, { "epoch": 2.02, "learning_rate": 4.663999625678458e-05, "loss": 0.0579, "step": 14366 }, { "epoch": 2.02, "learning_rate": 4.6639528354856826e-05, "loss": 0.0463, "step": 14368 }, { "epoch": 2.02, "learning_rate": 4.6639060452929065e-05, "loss": 0.0532, "step": 14370 }, { "epoch": 2.02, "learning_rate": 4.663859255100131e-05, "loss": 0.0422, "step": 14372 }, { "epoch": 2.02, "learning_rate": 4.663812464907356e-05, "loss": 0.0569, "step": 14374 }, { "epoch": 2.02, "learning_rate": 4.66376567471458e-05, "loss": 0.0525, "step": 14376 }, { "epoch": 2.02, "learning_rate": 4.663718884521804e-05, "loss": 0.0692, "step": 14378 }, { "epoch": 2.02, "learning_rate": 4.663672094329029e-05, "loss": 0.0638, "step": 14380 }, { "epoch": 2.02, "learning_rate": 4.6636253041362534e-05, "loss": 0.0563, "step": 14382 }, { "epoch": 2.02, "learning_rate": 4.663578513943478e-05, "loss": 0.0384, "step": 14384 }, { "epoch": 2.02, "learning_rate": 4.663531723750702e-05, "loss": 0.0473, "step": 14386 }, { "epoch": 2.02, "learning_rate": 4.6634849335579265e-05, "loss": 0.0254, "step": 14388 }, { "epoch": 2.02, "learning_rate": 4.663438143365151e-05, "loss": 0.0498, "step": 14390 }, { "epoch": 2.02, "learning_rate": 4.663391353172376e-05, "loss": 0.0498, "step": 14392 }, { "epoch": 2.02, "learning_rate": 4.6633445629795996e-05, "loss": 0.0393, "step": 14394 }, { "epoch": 2.02, "learning_rate": 4.663297772786824e-05, "loss": 0.0416, "step": 14396 }, { "epoch": 2.02, "learning_rate": 4.663250982594049e-05, "loss": 0.0513, "step": 14398 }, { "epoch": 2.02, "learning_rate": 4.6632041924012734e-05, "loss": 0.0553, "step": 14400 }, { "epoch": 2.02, "learning_rate": 4.663157402208497e-05, "loss": 0.0621, "step": 14402 }, { "epoch": 2.02, "learning_rate": 4.663110612015722e-05, "loss": 0.0484, "step": 14404 }, { "epoch": 2.02, "learning_rate": 4.663063821822946e-05, "loss": 0.0625, "step": 14406 }, { "epoch": 2.02, "learning_rate": 4.663017031630171e-05, "loss": 0.0455, "step": 14408 }, { "epoch": 2.02, "learning_rate": 4.662970241437395e-05, "loss": 0.0517, "step": 14410 }, { "epoch": 2.02, "learning_rate": 4.6629234512446195e-05, "loss": 0.053, "step": 14412 }, { "epoch": 2.02, "learning_rate": 4.6628766610518435e-05, "loss": 0.0519, "step": 14414 }, { "epoch": 2.02, "learning_rate": 4.662829870859069e-05, "loss": 0.0597, "step": 14416 }, { "epoch": 2.02, "learning_rate": 4.6627830806662926e-05, "loss": 0.0446, "step": 14418 }, { "epoch": 2.02, "learning_rate": 4.662736290473517e-05, "loss": 0.036, "step": 14420 }, { "epoch": 2.02, "learning_rate": 4.662689500280741e-05, "loss": 0.0692, "step": 14422 }, { "epoch": 2.02, "learning_rate": 4.662642710087966e-05, "loss": 0.0571, "step": 14424 }, { "epoch": 2.02, "learning_rate": 4.66259591989519e-05, "loss": 0.064, "step": 14426 }, { "epoch": 2.03, "learning_rate": 4.662549129702415e-05, "loss": 0.0612, "step": 14428 }, { "epoch": 2.03, "learning_rate": 4.662502339509639e-05, "loss": 0.0411, "step": 14430 }, { "epoch": 2.03, "learning_rate": 4.6624555493168634e-05, "loss": 0.0502, "step": 14432 }, { "epoch": 2.03, "learning_rate": 4.662408759124088e-05, "loss": 0.0466, "step": 14434 }, { "epoch": 2.03, "learning_rate": 4.6623619689313126e-05, "loss": 0.0592, "step": 14436 }, { "epoch": 2.03, "learning_rate": 4.6623151787385365e-05, "loss": 0.0625, "step": 14438 }, { "epoch": 2.03, "learning_rate": 4.6622683885457604e-05, "loss": 0.0395, "step": 14440 }, { "epoch": 2.03, "learning_rate": 4.662221598352986e-05, "loss": 0.0376, "step": 14442 }, { "epoch": 2.03, "learning_rate": 4.6621748081602096e-05, "loss": 0.0531, "step": 14444 }, { "epoch": 2.03, "learning_rate": 4.662128017967434e-05, "loss": 0.0522, "step": 14446 }, { "epoch": 2.03, "learning_rate": 4.662081227774658e-05, "loss": 0.0555, "step": 14448 }, { "epoch": 2.03, "learning_rate": 4.6620344375818834e-05, "loss": 0.0436, "step": 14450 }, { "epoch": 2.03, "learning_rate": 4.661987647389107e-05, "loss": 0.036, "step": 14452 }, { "epoch": 2.03, "learning_rate": 4.661940857196332e-05, "loss": 0.0492, "step": 14454 }, { "epoch": 2.03, "learning_rate": 4.661894067003556e-05, "loss": 0.0557, "step": 14456 }, { "epoch": 2.03, "learning_rate": 4.6618472768107804e-05, "loss": 0.0332, "step": 14458 }, { "epoch": 2.03, "learning_rate": 4.661800486618005e-05, "loss": 0.0464, "step": 14460 }, { "epoch": 2.03, "learning_rate": 4.6617536964252296e-05, "loss": 0.0492, "step": 14462 }, { "epoch": 2.03, "learning_rate": 4.6617069062324535e-05, "loss": 0.0456, "step": 14464 }, { "epoch": 2.03, "learning_rate": 4.661660116039678e-05, "loss": 0.0545, "step": 14466 }, { "epoch": 2.03, "learning_rate": 4.661613325846903e-05, "loss": 0.0428, "step": 14468 }, { "epoch": 2.03, "learning_rate": 4.661566535654127e-05, "loss": 0.0435, "step": 14470 }, { "epoch": 2.03, "learning_rate": 4.661519745461351e-05, "loss": 0.0409, "step": 14472 }, { "epoch": 2.03, "learning_rate": 4.661472955268576e-05, "loss": 0.0642, "step": 14474 }, { "epoch": 2.03, "learning_rate": 4.6614261650758004e-05, "loss": 0.066, "step": 14476 }, { "epoch": 2.03, "learning_rate": 4.661379374883025e-05, "loss": 0.064, "step": 14478 }, { "epoch": 2.03, "learning_rate": 4.661332584690249e-05, "loss": 0.0546, "step": 14480 }, { "epoch": 2.03, "learning_rate": 4.6612857944974735e-05, "loss": 0.0944, "step": 14482 }, { "epoch": 2.03, "learning_rate": 4.661239004304698e-05, "loss": 0.0726, "step": 14484 }, { "epoch": 2.03, "learning_rate": 4.6611922141119226e-05, "loss": 0.0468, "step": 14486 }, { "epoch": 2.03, "learning_rate": 4.6611454239191466e-05, "loss": 0.0404, "step": 14488 }, { "epoch": 2.03, "learning_rate": 4.661098633726371e-05, "loss": 0.056, "step": 14490 }, { "epoch": 2.03, "learning_rate": 4.661051843533595e-05, "loss": 0.0486, "step": 14492 }, { "epoch": 2.03, "learning_rate": 4.66100505334082e-05, "loss": 0.0692, "step": 14494 }, { "epoch": 2.03, "learning_rate": 4.660958263148044e-05, "loss": 0.0455, "step": 14496 }, { "epoch": 2.04, "learning_rate": 4.660911472955269e-05, "loss": 0.042, "step": 14498 }, { "epoch": 2.04, "learning_rate": 4.660864682762493e-05, "loss": 0.0303, "step": 14500 }, { "epoch": 2.04, "learning_rate": 4.6608178925697173e-05, "loss": 0.0518, "step": 14502 }, { "epoch": 2.04, "learning_rate": 4.660771102376942e-05, "loss": 0.0586, "step": 14504 }, { "epoch": 2.04, "learning_rate": 4.6607243121841665e-05, "loss": 0.0516, "step": 14506 }, { "epoch": 2.04, "learning_rate": 4.6606775219913904e-05, "loss": 0.0457, "step": 14508 }, { "epoch": 2.04, "learning_rate": 4.660630731798615e-05, "loss": 0.07, "step": 14510 }, { "epoch": 2.04, "learning_rate": 4.6605839416058396e-05, "loss": 0.0537, "step": 14512 }, { "epoch": 2.04, "learning_rate": 4.660537151413064e-05, "loss": 0.041, "step": 14514 }, { "epoch": 2.04, "learning_rate": 4.660490361220288e-05, "loss": 0.0478, "step": 14516 }, { "epoch": 2.04, "learning_rate": 4.660443571027513e-05, "loss": 0.0361, "step": 14518 }, { "epoch": 2.04, "learning_rate": 4.660396780834737e-05, "loss": 0.0689, "step": 14520 }, { "epoch": 2.04, "learning_rate": 4.660349990641962e-05, "loss": 0.0567, "step": 14522 }, { "epoch": 2.04, "learning_rate": 4.660303200449186e-05, "loss": 0.0406, "step": 14524 }, { "epoch": 2.04, "learning_rate": 4.6602564102564104e-05, "loss": 0.0564, "step": 14526 }, { "epoch": 2.04, "learning_rate": 4.660209620063635e-05, "loss": 0.0621, "step": 14528 }, { "epoch": 2.04, "learning_rate": 4.6601628298708596e-05, "loss": 0.063, "step": 14530 }, { "epoch": 2.04, "learning_rate": 4.6601160396780835e-05, "loss": 0.058, "step": 14532 }, { "epoch": 2.04, "learning_rate": 4.660069249485308e-05, "loss": 0.0404, "step": 14534 }, { "epoch": 2.04, "learning_rate": 4.660022459292532e-05, "loss": 0.0621, "step": 14536 }, { "epoch": 2.04, "learning_rate": 4.659975669099757e-05, "loss": 0.0534, "step": 14538 }, { "epoch": 2.04, "learning_rate": 4.659928878906981e-05, "loss": 0.07, "step": 14540 }, { "epoch": 2.04, "learning_rate": 4.659882088714206e-05, "loss": 0.043, "step": 14542 }, { "epoch": 2.04, "learning_rate": 4.65983529852143e-05, "loss": 0.0656, "step": 14544 }, { "epoch": 2.04, "learning_rate": 4.659788508328655e-05, "loss": 0.05, "step": 14546 }, { "epoch": 2.04, "learning_rate": 4.659741718135879e-05, "loss": 0.063, "step": 14548 }, { "epoch": 2.04, "learning_rate": 4.6596949279431035e-05, "loss": 0.0544, "step": 14550 }, { "epoch": 2.04, "learning_rate": 4.6596481377503274e-05, "loss": 0.0418, "step": 14552 }, { "epoch": 2.04, "learning_rate": 4.659601347557552e-05, "loss": 0.0418, "step": 14554 }, { "epoch": 2.04, "learning_rate": 4.6595545573647766e-05, "loss": 0.0735, "step": 14556 }, { "epoch": 2.04, "learning_rate": 4.659507767172001e-05, "loss": 0.0715, "step": 14558 }, { "epoch": 2.04, "learning_rate": 4.659460976979225e-05, "loss": 0.0477, "step": 14560 }, { "epoch": 2.04, "learning_rate": 4.6594141867864497e-05, "loss": 0.0454, "step": 14562 }, { "epoch": 2.04, "learning_rate": 4.659367396593674e-05, "loss": 0.0515, "step": 14564 }, { "epoch": 2.04, "learning_rate": 4.659320606400899e-05, "loss": 0.0504, "step": 14566 }, { "epoch": 2.04, "learning_rate": 4.659273816208123e-05, "loss": 0.0641, "step": 14568 }, { "epoch": 2.05, "learning_rate": 4.6592270260153473e-05, "loss": 0.0577, "step": 14570 }, { "epoch": 2.05, "learning_rate": 4.659180235822572e-05, "loss": 0.0744, "step": 14572 }, { "epoch": 2.05, "learning_rate": 4.6591334456297965e-05, "loss": 0.0584, "step": 14574 }, { "epoch": 2.05, "learning_rate": 4.6590866554370204e-05, "loss": 0.0404, "step": 14576 }, { "epoch": 2.05, "learning_rate": 4.659039865244245e-05, "loss": 0.0616, "step": 14578 }, { "epoch": 2.05, "learning_rate": 4.6589930750514696e-05, "loss": 0.0401, "step": 14580 }, { "epoch": 2.05, "learning_rate": 4.658946284858694e-05, "loss": 0.0437, "step": 14582 }, { "epoch": 2.05, "learning_rate": 4.658899494665918e-05, "loss": 0.0641, "step": 14584 }, { "epoch": 2.05, "learning_rate": 4.658852704473143e-05, "loss": 0.0548, "step": 14586 }, { "epoch": 2.05, "learning_rate": 4.6588059142803666e-05, "loss": 0.0503, "step": 14588 }, { "epoch": 2.05, "learning_rate": 4.658759124087592e-05, "loss": 0.0467, "step": 14590 }, { "epoch": 2.05, "learning_rate": 4.658712333894816e-05, "loss": 0.0517, "step": 14592 }, { "epoch": 2.05, "learning_rate": 4.6586655437020404e-05, "loss": 0.0374, "step": 14594 }, { "epoch": 2.05, "learning_rate": 4.658618753509264e-05, "loss": 0.0573, "step": 14596 }, { "epoch": 2.05, "learning_rate": 4.6585719633164896e-05, "loss": 0.0446, "step": 14598 }, { "epoch": 2.05, "learning_rate": 4.6585251731237135e-05, "loss": 0.0477, "step": 14600 }, { "epoch": 2.05, "learning_rate": 4.658478382930938e-05, "loss": 0.056, "step": 14602 }, { "epoch": 2.05, "learning_rate": 4.658431592738162e-05, "loss": 0.0613, "step": 14604 }, { "epoch": 2.05, "learning_rate": 4.6583848025453866e-05, "loss": 0.0651, "step": 14606 }, { "epoch": 2.05, "learning_rate": 4.658338012352611e-05, "loss": 0.0466, "step": 14608 }, { "epoch": 2.05, "learning_rate": 4.658291222159836e-05, "loss": 0.0451, "step": 14610 }, { "epoch": 2.05, "learning_rate": 4.65824443196706e-05, "loss": 0.0697, "step": 14612 }, { "epoch": 2.05, "learning_rate": 4.658197641774284e-05, "loss": 0.0467, "step": 14614 }, { "epoch": 2.05, "learning_rate": 4.658150851581509e-05, "loss": 0.0579, "step": 14616 }, { "epoch": 2.05, "learning_rate": 4.6581040613887335e-05, "loss": 0.0528, "step": 14618 }, { "epoch": 2.05, "learning_rate": 4.6580572711959574e-05, "loss": 0.0398, "step": 14620 }, { "epoch": 2.05, "learning_rate": 4.658010481003182e-05, "loss": 0.0467, "step": 14622 }, { "epoch": 2.05, "learning_rate": 4.6579636908104066e-05, "loss": 0.0417, "step": 14624 }, { "epoch": 2.05, "learning_rate": 4.657916900617631e-05, "loss": 0.054, "step": 14626 }, { "epoch": 2.05, "learning_rate": 4.657870110424855e-05, "loss": 0.0376, "step": 14628 }, { "epoch": 2.05, "learning_rate": 4.6578233202320797e-05, "loss": 0.0616, "step": 14630 }, { "epoch": 2.05, "learning_rate": 4.657776530039304e-05, "loss": 0.0564, "step": 14632 }, { "epoch": 2.05, "learning_rate": 4.657729739846529e-05, "loss": 0.06, "step": 14634 }, { "epoch": 2.05, "learning_rate": 4.657682949653753e-05, "loss": 0.0604, "step": 14636 }, { "epoch": 2.05, "learning_rate": 4.6576361594609773e-05, "loss": 0.0494, "step": 14638 }, { "epoch": 2.06, "learning_rate": 4.657589369268201e-05, "loss": 0.0466, "step": 14640 }, { "epoch": 2.06, "learning_rate": 4.6575425790754265e-05, "loss": 0.0515, "step": 14642 }, { "epoch": 2.06, "learning_rate": 4.6574957888826504e-05, "loss": 0.0423, "step": 14644 }, { "epoch": 2.06, "learning_rate": 4.657448998689875e-05, "loss": 0.0596, "step": 14646 }, { "epoch": 2.06, "learning_rate": 4.657402208497099e-05, "loss": 0.0368, "step": 14648 }, { "epoch": 2.06, "learning_rate": 4.6573554183043235e-05, "loss": 0.0494, "step": 14650 }, { "epoch": 2.06, "learning_rate": 4.657308628111548e-05, "loss": 0.0445, "step": 14652 }, { "epoch": 2.06, "learning_rate": 4.657261837918773e-05, "loss": 0.0517, "step": 14654 }, { "epoch": 2.06, "learning_rate": 4.6572150477259966e-05, "loss": 0.0482, "step": 14656 }, { "epoch": 2.06, "learning_rate": 4.657168257533221e-05, "loss": 0.0476, "step": 14658 }, { "epoch": 2.06, "learning_rate": 4.657121467340446e-05, "loss": 0.0343, "step": 14660 }, { "epoch": 2.06, "learning_rate": 4.6570746771476704e-05, "loss": 0.0455, "step": 14662 }, { "epoch": 2.06, "learning_rate": 4.657027886954894e-05, "loss": 0.0417, "step": 14664 }, { "epoch": 2.06, "learning_rate": 4.656981096762119e-05, "loss": 0.0496, "step": 14666 }, { "epoch": 2.06, "learning_rate": 4.6569343065693435e-05, "loss": 0.0366, "step": 14668 }, { "epoch": 2.06, "learning_rate": 4.656887516376568e-05, "loss": 0.0536, "step": 14670 }, { "epoch": 2.06, "learning_rate": 4.656840726183792e-05, "loss": 0.0497, "step": 14672 }, { "epoch": 2.06, "learning_rate": 4.6567939359910166e-05, "loss": 0.0493, "step": 14674 }, { "epoch": 2.06, "learning_rate": 4.656747145798241e-05, "loss": 0.0546, "step": 14676 }, { "epoch": 2.06, "learning_rate": 4.656700355605466e-05, "loss": 0.0519, "step": 14678 }, { "epoch": 2.06, "learning_rate": 4.65665356541269e-05, "loss": 0.0568, "step": 14680 }, { "epoch": 2.06, "learning_rate": 4.656606775219914e-05, "loss": 0.0414, "step": 14682 }, { "epoch": 2.06, "learning_rate": 4.656559985027138e-05, "loss": 0.0476, "step": 14684 }, { "epoch": 2.06, "learning_rate": 4.6565131948343635e-05, "loss": 0.0621, "step": 14686 }, { "epoch": 2.06, "learning_rate": 4.6564664046415874e-05, "loss": 0.0392, "step": 14688 }, { "epoch": 2.06, "learning_rate": 4.656419614448812e-05, "loss": 0.0544, "step": 14690 }, { "epoch": 2.06, "learning_rate": 4.656372824256036e-05, "loss": 0.0524, "step": 14692 }, { "epoch": 2.06, "learning_rate": 4.6563260340632605e-05, "loss": 0.0526, "step": 14694 }, { "epoch": 2.06, "learning_rate": 4.656279243870485e-05, "loss": 0.0513, "step": 14696 }, { "epoch": 2.06, "learning_rate": 4.656232453677709e-05, "loss": 0.0559, "step": 14698 }, { "epoch": 2.06, "learning_rate": 4.6561856634849336e-05, "loss": 0.0438, "step": 14700 }, { "epoch": 2.06, "learning_rate": 4.656138873292158e-05, "loss": 0.055, "step": 14702 }, { "epoch": 2.06, "learning_rate": 4.656092083099383e-05, "loss": 0.0487, "step": 14704 }, { "epoch": 2.06, "learning_rate": 4.656045292906607e-05, "loss": 0.0491, "step": 14706 }, { "epoch": 2.06, "learning_rate": 4.655998502713831e-05, "loss": 0.0372, "step": 14708 }, { "epoch": 2.06, "learning_rate": 4.655951712521056e-05, "loss": 0.056, "step": 14710 }, { "epoch": 2.07, "learning_rate": 4.6559049223282804e-05, "loss": 0.054, "step": 14712 }, { "epoch": 2.07, "learning_rate": 4.6558581321355044e-05, "loss": 0.0396, "step": 14714 }, { "epoch": 2.07, "learning_rate": 4.655811341942729e-05, "loss": 0.0446, "step": 14716 }, { "epoch": 2.07, "learning_rate": 4.655764551749953e-05, "loss": 0.05, "step": 14718 }, { "epoch": 2.07, "learning_rate": 4.655717761557178e-05, "loss": 0.0512, "step": 14720 }, { "epoch": 2.07, "learning_rate": 4.655670971364402e-05, "loss": 0.0376, "step": 14722 }, { "epoch": 2.07, "learning_rate": 4.6556241811716266e-05, "loss": 0.0605, "step": 14724 }, { "epoch": 2.07, "learning_rate": 4.6555773909788505e-05, "loss": 0.0378, "step": 14726 }, { "epoch": 2.07, "learning_rate": 4.655530600786076e-05, "loss": 0.0732, "step": 14728 }, { "epoch": 2.07, "learning_rate": 4.6554838105933e-05, "loss": 0.0635, "step": 14730 }, { "epoch": 2.07, "learning_rate": 4.655437020400524e-05, "loss": 0.0476, "step": 14732 }, { "epoch": 2.07, "learning_rate": 4.655390230207748e-05, "loss": 0.0382, "step": 14734 }, { "epoch": 2.07, "learning_rate": 4.655343440014973e-05, "loss": 0.0371, "step": 14736 }, { "epoch": 2.07, "learning_rate": 4.6552966498221974e-05, "loss": 0.0559, "step": 14738 }, { "epoch": 2.07, "learning_rate": 4.655249859629422e-05, "loss": 0.0363, "step": 14740 }, { "epoch": 2.07, "learning_rate": 4.655203069436646e-05, "loss": 0.0508, "step": 14742 }, { "epoch": 2.07, "learning_rate": 4.6551562792438705e-05, "loss": 0.0466, "step": 14744 }, { "epoch": 2.07, "learning_rate": 4.655109489051095e-05, "loss": 0.0489, "step": 14746 }, { "epoch": 2.07, "learning_rate": 4.65506269885832e-05, "loss": 0.066, "step": 14748 }, { "epoch": 2.07, "learning_rate": 4.6550159086655436e-05, "loss": 0.0509, "step": 14750 }, { "epoch": 2.07, "learning_rate": 4.654969118472768e-05, "loss": 0.0368, "step": 14752 }, { "epoch": 2.07, "learning_rate": 4.654922328279993e-05, "loss": 0.0315, "step": 14754 }, { "epoch": 2.07, "learning_rate": 4.6548755380872174e-05, "loss": 0.0598, "step": 14756 }, { "epoch": 2.07, "learning_rate": 4.654828747894441e-05, "loss": 0.0678, "step": 14758 }, { "epoch": 2.07, "learning_rate": 4.654781957701666e-05, "loss": 0.0628, "step": 14760 }, { "epoch": 2.07, "learning_rate": 4.6547351675088905e-05, "loss": 0.0318, "step": 14762 }, { "epoch": 2.07, "learning_rate": 4.654688377316115e-05, "loss": 0.0581, "step": 14764 }, { "epoch": 2.07, "learning_rate": 4.654641587123339e-05, "loss": 0.0387, "step": 14766 }, { "epoch": 2.07, "learning_rate": 4.6545947969305636e-05, "loss": 0.0604, "step": 14768 }, { "epoch": 2.07, "learning_rate": 4.6545480067377875e-05, "loss": 0.0608, "step": 14770 }, { "epoch": 2.07, "learning_rate": 4.654501216545013e-05, "loss": 0.0671, "step": 14772 }, { "epoch": 2.07, "learning_rate": 4.654454426352237e-05, "loss": 0.0544, "step": 14774 }, { "epoch": 2.07, "learning_rate": 4.654407636159461e-05, "loss": 0.0495, "step": 14776 }, { "epoch": 2.07, "learning_rate": 4.654360845966685e-05, "loss": 0.0484, "step": 14778 }, { "epoch": 2.07, "learning_rate": 4.6543140557739104e-05, "loss": 0.0563, "step": 14780 }, { "epoch": 2.07, "learning_rate": 4.6542672655811344e-05, "loss": 0.0578, "step": 14782 }, { "epoch": 2.08, "learning_rate": 4.654220475388359e-05, "loss": 0.0636, "step": 14784 }, { "epoch": 2.08, "learning_rate": 4.654173685195583e-05, "loss": 0.0592, "step": 14786 }, { "epoch": 2.08, "learning_rate": 4.6541268950028075e-05, "loss": 0.0511, "step": 14788 }, { "epoch": 2.08, "learning_rate": 4.654080104810032e-05, "loss": 0.0665, "step": 14790 }, { "epoch": 2.08, "learning_rate": 4.6540333146172566e-05, "loss": 0.0539, "step": 14792 }, { "epoch": 2.08, "learning_rate": 4.6539865244244806e-05, "loss": 0.059, "step": 14794 }, { "epoch": 2.08, "learning_rate": 4.653939734231705e-05, "loss": 0.0649, "step": 14796 }, { "epoch": 2.08, "learning_rate": 4.65389294403893e-05, "loss": 0.0448, "step": 14798 }, { "epoch": 2.08, "learning_rate": 4.653846153846154e-05, "loss": 0.0811, "step": 14800 }, { "epoch": 2.08, "learning_rate": 4.653799363653378e-05, "loss": 0.0506, "step": 14802 }, { "epoch": 2.08, "learning_rate": 4.653752573460603e-05, "loss": 0.0577, "step": 14804 }, { "epoch": 2.08, "learning_rate": 4.6537057832678274e-05, "loss": 0.0445, "step": 14806 }, { "epoch": 2.08, "learning_rate": 4.653658993075052e-05, "loss": 0.04, "step": 14808 }, { "epoch": 2.08, "learning_rate": 4.653612202882276e-05, "loss": 0.0642, "step": 14810 }, { "epoch": 2.08, "learning_rate": 4.6535654126895005e-05, "loss": 0.0449, "step": 14812 }, { "epoch": 2.08, "learning_rate": 4.6535186224967244e-05, "loss": 0.0496, "step": 14814 }, { "epoch": 2.08, "learning_rate": 4.65347183230395e-05, "loss": 0.0708, "step": 14816 }, { "epoch": 2.08, "learning_rate": 4.6534250421111736e-05, "loss": 0.0607, "step": 14818 }, { "epoch": 2.08, "learning_rate": 4.653378251918398e-05, "loss": 0.0564, "step": 14820 }, { "epoch": 2.08, "learning_rate": 4.653331461725622e-05, "loss": 0.0459, "step": 14822 }, { "epoch": 2.08, "learning_rate": 4.6532846715328474e-05, "loss": 0.0708, "step": 14824 }, { "epoch": 2.08, "learning_rate": 4.653237881340071e-05, "loss": 0.0561, "step": 14826 }, { "epoch": 2.08, "learning_rate": 4.653191091147296e-05, "loss": 0.0717, "step": 14828 }, { "epoch": 2.08, "learning_rate": 4.65314430095452e-05, "loss": 0.0797, "step": 14830 }, { "epoch": 2.08, "learning_rate": 4.6530975107617444e-05, "loss": 0.0392, "step": 14832 }, { "epoch": 2.08, "learning_rate": 4.653050720568969e-05, "loss": 0.0628, "step": 14834 }, { "epoch": 2.08, "learning_rate": 4.6530039303761936e-05, "loss": 0.0623, "step": 14836 }, { "epoch": 2.08, "learning_rate": 4.6529571401834175e-05, "loss": 0.0539, "step": 14838 }, { "epoch": 2.08, "learning_rate": 4.652910349990642e-05, "loss": 0.0444, "step": 14840 }, { "epoch": 2.08, "learning_rate": 4.652863559797867e-05, "loss": 0.0597, "step": 14842 }, { "epoch": 2.08, "learning_rate": 4.652816769605091e-05, "loss": 0.046, "step": 14844 }, { "epoch": 2.08, "learning_rate": 4.652769979412315e-05, "loss": 0.0736, "step": 14846 }, { "epoch": 2.08, "learning_rate": 4.65272318921954e-05, "loss": 0.0652, "step": 14848 }, { "epoch": 2.08, "learning_rate": 4.6526763990267644e-05, "loss": 0.0419, "step": 14850 }, { "epoch": 2.08, "learning_rate": 4.652629608833989e-05, "loss": 0.0417, "step": 14852 }, { "epoch": 2.09, "learning_rate": 4.652582818641213e-05, "loss": 0.0475, "step": 14854 }, { "epoch": 2.09, "learning_rate": 4.6525360284484375e-05, "loss": 0.0626, "step": 14856 }, { "epoch": 2.09, "learning_rate": 4.652489238255662e-05, "loss": 0.0506, "step": 14858 }, { "epoch": 2.09, "learning_rate": 4.6524424480628866e-05, "loss": 0.0565, "step": 14860 }, { "epoch": 2.09, "learning_rate": 4.6523956578701106e-05, "loss": 0.0566, "step": 14862 }, { "epoch": 2.09, "learning_rate": 4.652348867677335e-05, "loss": 0.0614, "step": 14864 }, { "epoch": 2.09, "learning_rate": 4.652302077484559e-05, "loss": 0.049, "step": 14866 }, { "epoch": 2.09, "learning_rate": 4.652255287291784e-05, "loss": 0.0626, "step": 14868 }, { "epoch": 2.09, "learning_rate": 4.652208497099008e-05, "loss": 0.0448, "step": 14870 }, { "epoch": 2.09, "learning_rate": 4.652161706906233e-05, "loss": 0.0537, "step": 14872 }, { "epoch": 2.09, "learning_rate": 4.652114916713457e-05, "loss": 0.0431, "step": 14874 }, { "epoch": 2.09, "learning_rate": 4.652068126520682e-05, "loss": 0.047, "step": 14876 }, { "epoch": 2.09, "learning_rate": 4.652021336327906e-05, "loss": 0.0668, "step": 14878 }, { "epoch": 2.09, "learning_rate": 4.6519745461351305e-05, "loss": 0.0505, "step": 14880 }, { "epoch": 2.09, "learning_rate": 4.6519277559423544e-05, "loss": 0.0652, "step": 14882 }, { "epoch": 2.09, "learning_rate": 4.651880965749579e-05, "loss": 0.0646, "step": 14884 }, { "epoch": 2.09, "learning_rate": 4.6518341755568036e-05, "loss": 0.0597, "step": 14886 }, { "epoch": 2.09, "learning_rate": 4.651787385364028e-05, "loss": 0.0604, "step": 14888 }, { "epoch": 2.09, "learning_rate": 4.651740595171252e-05, "loss": 0.0598, "step": 14890 }, { "epoch": 2.09, "learning_rate": 4.651693804978477e-05, "loss": 0.0547, "step": 14892 }, { "epoch": 2.09, "learning_rate": 4.651647014785701e-05, "loss": 0.0596, "step": 14894 }, { "epoch": 2.09, "learning_rate": 4.651600224592926e-05, "loss": 0.0751, "step": 14896 }, { "epoch": 2.09, "learning_rate": 4.65155343440015e-05, "loss": 0.0511, "step": 14898 }, { "epoch": 2.09, "learning_rate": 4.6515066442073744e-05, "loss": 0.0748, "step": 14900 }, { "epoch": 2.09, "learning_rate": 4.651459854014599e-05, "loss": 0.0514, "step": 14902 }, { "epoch": 2.09, "learning_rate": 4.6514130638218236e-05, "loss": 0.0444, "step": 14904 }, { "epoch": 2.09, "learning_rate": 4.6513662736290475e-05, "loss": 0.0424, "step": 14906 }, { "epoch": 2.09, "learning_rate": 4.651319483436272e-05, "loss": 0.0598, "step": 14908 }, { "epoch": 2.09, "learning_rate": 4.651272693243497e-05, "loss": 0.0619, "step": 14910 }, { "epoch": 2.09, "learning_rate": 4.651225903050721e-05, "loss": 0.0885, "step": 14912 }, { "epoch": 2.09, "learning_rate": 4.651179112857945e-05, "loss": 0.0641, "step": 14914 }, { "epoch": 2.09, "learning_rate": 4.65113232266517e-05, "loss": 0.0525, "step": 14916 }, { "epoch": 2.09, "learning_rate": 4.651085532472394e-05, "loss": 0.0531, "step": 14918 }, { "epoch": 2.09, "learning_rate": 4.651038742279619e-05, "loss": 0.0573, "step": 14920 }, { "epoch": 2.09, "learning_rate": 4.650991952086843e-05, "loss": 0.0528, "step": 14922 }, { "epoch": 2.09, "learning_rate": 4.6509451618940675e-05, "loss": 0.0504, "step": 14924 }, { "epoch": 2.1, "learning_rate": 4.6508983717012914e-05, "loss": 0.0463, "step": 14926 }, { "epoch": 2.1, "learning_rate": 4.650851581508516e-05, "loss": 0.0617, "step": 14928 }, { "epoch": 2.1, "learning_rate": 4.6508047913157406e-05, "loss": 0.0479, "step": 14930 }, { "epoch": 2.1, "learning_rate": 4.650758001122965e-05, "loss": 0.0539, "step": 14932 }, { "epoch": 2.1, "learning_rate": 4.650711210930189e-05, "loss": 0.0714, "step": 14934 }, { "epoch": 2.1, "learning_rate": 4.6506644207374137e-05, "loss": 0.0437, "step": 14936 }, { "epoch": 2.1, "learning_rate": 4.650617630544638e-05, "loss": 0.0589, "step": 14938 }, { "epoch": 2.1, "learning_rate": 4.650570840351863e-05, "loss": 0.0713, "step": 14940 }, { "epoch": 2.1, "learning_rate": 4.650524050159087e-05, "loss": 0.055, "step": 14942 }, { "epoch": 2.1, "learning_rate": 4.650477259966311e-05, "loss": 0.053, "step": 14944 }, { "epoch": 2.1, "learning_rate": 4.650430469773536e-05, "loss": 0.0679, "step": 14946 }, { "epoch": 2.1, "learning_rate": 4.65038367958076e-05, "loss": 0.066, "step": 14948 }, { "epoch": 2.1, "learning_rate": 4.6503368893879844e-05, "loss": 0.0706, "step": 14950 }, { "epoch": 2.1, "learning_rate": 4.6502900991952083e-05, "loss": 0.0536, "step": 14952 }, { "epoch": 2.1, "learning_rate": 4.6502433090024336e-05, "loss": 0.0565, "step": 14954 }, { "epoch": 2.1, "learning_rate": 4.6501965188096575e-05, "loss": 0.0618, "step": 14956 }, { "epoch": 2.1, "learning_rate": 4.650149728616882e-05, "loss": 0.0537, "step": 14958 }, { "epoch": 2.1, "learning_rate": 4.650102938424106e-05, "loss": 0.0347, "step": 14960 }, { "epoch": 2.1, "learning_rate": 4.6500561482313306e-05, "loss": 0.0448, "step": 14962 }, { "epoch": 2.1, "learning_rate": 4.650009358038555e-05, "loss": 0.0494, "step": 14964 }, { "epoch": 2.1, "learning_rate": 4.64996256784578e-05, "loss": 0.0313, "step": 14966 }, { "epoch": 2.1, "learning_rate": 4.649915777653004e-05, "loss": 0.0549, "step": 14968 }, { "epoch": 2.1, "learning_rate": 4.649868987460228e-05, "loss": 0.0697, "step": 14970 }, { "epoch": 2.1, "learning_rate": 4.649822197267453e-05, "loss": 0.0573, "step": 14972 }, { "epoch": 2.1, "learning_rate": 4.6497754070746775e-05, "loss": 0.057, "step": 14974 }, { "epoch": 2.1, "learning_rate": 4.6497286168819014e-05, "loss": 0.0701, "step": 14976 }, { "epoch": 2.1, "learning_rate": 4.649681826689126e-05, "loss": 0.0424, "step": 14978 }, { "epoch": 2.1, "learning_rate": 4.6496350364963506e-05, "loss": 0.0565, "step": 14980 }, { "epoch": 2.1, "learning_rate": 4.649588246303575e-05, "loss": 0.0474, "step": 14982 }, { "epoch": 2.1, "learning_rate": 4.649541456110799e-05, "loss": 0.0531, "step": 14984 }, { "epoch": 2.1, "learning_rate": 4.649494665918024e-05, "loss": 0.0607, "step": 14986 }, { "epoch": 2.1, "learning_rate": 4.649447875725248e-05, "loss": 0.0822, "step": 14988 }, { "epoch": 2.1, "learning_rate": 4.649401085532473e-05, "loss": 0.0507, "step": 14990 }, { "epoch": 2.1, "learning_rate": 4.649354295339697e-05, "loss": 0.0385, "step": 14992 }, { "epoch": 2.1, "learning_rate": 4.6493075051469214e-05, "loss": 0.0401, "step": 14994 }, { "epoch": 2.1, "learning_rate": 4.649260714954145e-05, "loss": 0.0452, "step": 14996 }, { "epoch": 2.11, "learning_rate": 4.6492139247613706e-05, "loss": 0.0527, "step": 14998 }, { "epoch": 2.11, "learning_rate": 4.6491671345685945e-05, "loss": 0.0805, "step": 15000 }, { "epoch": 2.11, "eval_gen_len": 30.6379, "eval_loss": 1.070984959602356, "eval_meteor": 0.0449, "eval_runtime": 14.582, "eval_samples_per_second": 3.978, "eval_steps_per_second": 0.549, "step": 15000 }, { "epoch": 2.11, "learning_rate": 4.649120344375819e-05, "loss": 0.0477, "step": 15002 }, { "epoch": 2.11, "learning_rate": 4.649073554183043e-05, "loss": 0.0534, "step": 15004 }, { "epoch": 2.11, "learning_rate": 4.649026763990268e-05, "loss": 0.0488, "step": 15006 }, { "epoch": 2.11, "learning_rate": 4.648979973797492e-05, "loss": 0.0484, "step": 15008 }, { "epoch": 2.11, "learning_rate": 4.648933183604717e-05, "loss": 0.0495, "step": 15010 }, { "epoch": 2.11, "learning_rate": 4.648886393411941e-05, "loss": 0.0635, "step": 15012 }, { "epoch": 2.11, "learning_rate": 4.648839603219165e-05, "loss": 0.0501, "step": 15014 }, { "epoch": 2.11, "learning_rate": 4.64879281302639e-05, "loss": 0.0803, "step": 15016 }, { "epoch": 2.11, "learning_rate": 4.6487460228336144e-05, "loss": 0.0396, "step": 15018 }, { "epoch": 2.11, "learning_rate": 4.6486992326408383e-05, "loss": 0.0516, "step": 15020 }, { "epoch": 2.11, "learning_rate": 4.648652442448063e-05, "loss": 0.0522, "step": 15022 }, { "epoch": 2.11, "learning_rate": 4.6486056522552875e-05, "loss": 0.0452, "step": 15024 }, { "epoch": 2.11, "learning_rate": 4.648558862062512e-05, "loss": 0.0603, "step": 15026 }, { "epoch": 2.11, "learning_rate": 4.648512071869736e-05, "loss": 0.0586, "step": 15028 }, { "epoch": 2.11, "learning_rate": 4.6484652816769606e-05, "loss": 0.0389, "step": 15030 }, { "epoch": 2.11, "learning_rate": 4.648418491484185e-05, "loss": 0.0535, "step": 15032 }, { "epoch": 2.11, "learning_rate": 4.64837170129141e-05, "loss": 0.0582, "step": 15034 }, { "epoch": 2.11, "learning_rate": 4.648324911098634e-05, "loss": 0.0479, "step": 15036 }, { "epoch": 2.11, "learning_rate": 4.648278120905858e-05, "loss": 0.057, "step": 15038 }, { "epoch": 2.11, "learning_rate": 4.648231330713083e-05, "loss": 0.0658, "step": 15040 }, { "epoch": 2.11, "learning_rate": 4.6481845405203075e-05, "loss": 0.0594, "step": 15042 }, { "epoch": 2.11, "learning_rate": 4.6481377503275314e-05, "loss": 0.0462, "step": 15044 }, { "epoch": 2.11, "learning_rate": 4.648090960134756e-05, "loss": 0.0506, "step": 15046 }, { "epoch": 2.11, "learning_rate": 4.64804416994198e-05, "loss": 0.0614, "step": 15048 }, { "epoch": 2.11, "learning_rate": 4.647997379749205e-05, "loss": 0.0835, "step": 15050 }, { "epoch": 2.11, "learning_rate": 4.647950589556429e-05, "loss": 0.0583, "step": 15052 }, { "epoch": 2.11, "learning_rate": 4.647903799363654e-05, "loss": 0.0607, "step": 15054 }, { "epoch": 2.11, "learning_rate": 4.6478570091708776e-05, "loss": 0.0574, "step": 15056 }, { "epoch": 2.11, "learning_rate": 4.647810218978103e-05, "loss": 0.0558, "step": 15058 }, { "epoch": 2.11, "learning_rate": 4.647763428785327e-05, "loss": 0.0432, "step": 15060 }, { "epoch": 2.11, "learning_rate": 4.6477166385925514e-05, "loss": 0.0598, "step": 15062 }, { "epoch": 2.11, "learning_rate": 4.647669848399775e-05, "loss": 0.0699, "step": 15064 }, { "epoch": 2.11, "learning_rate": 4.647623058207e-05, "loss": 0.051, "step": 15066 }, { "epoch": 2.12, "learning_rate": 4.6475762680142245e-05, "loss": 0.0436, "step": 15068 }, { "epoch": 2.12, "learning_rate": 4.647529477821449e-05, "loss": 0.0566, "step": 15070 }, { "epoch": 2.12, "learning_rate": 4.647482687628673e-05, "loss": 0.0457, "step": 15072 }, { "epoch": 2.12, "learning_rate": 4.6474358974358976e-05, "loss": 0.0543, "step": 15074 }, { "epoch": 2.12, "learning_rate": 4.647389107243122e-05, "loss": 0.0551, "step": 15076 }, { "epoch": 2.12, "learning_rate": 4.647342317050347e-05, "loss": 0.0436, "step": 15078 }, { "epoch": 2.12, "learning_rate": 4.647295526857571e-05, "loss": 0.0405, "step": 15080 }, { "epoch": 2.12, "learning_rate": 4.647248736664795e-05, "loss": 0.0448, "step": 15082 }, { "epoch": 2.12, "learning_rate": 4.64720194647202e-05, "loss": 0.0623, "step": 15084 }, { "epoch": 2.12, "learning_rate": 4.6471551562792444e-05, "loss": 0.0591, "step": 15086 }, { "epoch": 2.12, "learning_rate": 4.6471083660864684e-05, "loss": 0.0362, "step": 15088 }, { "epoch": 2.12, "learning_rate": 4.647061575893693e-05, "loss": 0.0531, "step": 15090 }, { "epoch": 2.12, "learning_rate": 4.6470147857009175e-05, "loss": 0.0579, "step": 15092 }, { "epoch": 2.12, "learning_rate": 4.646967995508142e-05, "loss": 0.045, "step": 15094 }, { "epoch": 2.12, "learning_rate": 4.646921205315366e-05, "loss": 0.0718, "step": 15096 }, { "epoch": 2.12, "learning_rate": 4.6468744151225906e-05, "loss": 0.0662, "step": 15098 }, { "epoch": 2.12, "learning_rate": 4.6468276249298145e-05, "loss": 0.0498, "step": 15100 }, { "epoch": 2.12, "learning_rate": 4.64678083473704e-05, "loss": 0.0631, "step": 15102 }, { "epoch": 2.12, "learning_rate": 4.646734044544264e-05, "loss": 0.0572, "step": 15104 }, { "epoch": 2.12, "learning_rate": 4.646687254351488e-05, "loss": 0.0628, "step": 15106 }, { "epoch": 2.12, "learning_rate": 4.646640464158712e-05, "loss": 0.045, "step": 15108 }, { "epoch": 2.12, "learning_rate": 4.646593673965937e-05, "loss": 0.0361, "step": 15110 }, { "epoch": 2.12, "learning_rate": 4.6465468837731614e-05, "loss": 0.0522, "step": 15112 }, { "epoch": 2.12, "learning_rate": 4.646500093580386e-05, "loss": 0.0517, "step": 15114 }, { "epoch": 2.12, "learning_rate": 4.64645330338761e-05, "loss": 0.0408, "step": 15116 }, { "epoch": 2.12, "learning_rate": 4.6464065131948345e-05, "loss": 0.0477, "step": 15118 }, { "epoch": 2.12, "learning_rate": 4.646359723002059e-05, "loss": 0.0487, "step": 15120 }, { "epoch": 2.12, "learning_rate": 4.646312932809284e-05, "loss": 0.0528, "step": 15122 }, { "epoch": 2.12, "learning_rate": 4.6462661426165076e-05, "loss": 0.0603, "step": 15124 }, { "epoch": 2.12, "learning_rate": 4.646219352423732e-05, "loss": 0.0583, "step": 15126 }, { "epoch": 2.12, "learning_rate": 4.646172562230957e-05, "loss": 0.0488, "step": 15128 }, { "epoch": 2.12, "learning_rate": 4.6461257720381814e-05, "loss": 0.06, "step": 15130 }, { "epoch": 2.12, "learning_rate": 4.646078981845405e-05, "loss": 0.0546, "step": 15132 }, { "epoch": 2.12, "learning_rate": 4.64603219165263e-05, "loss": 0.0397, "step": 15134 }, { "epoch": 2.12, "learning_rate": 4.6459854014598545e-05, "loss": 0.063, "step": 15136 }, { "epoch": 2.12, "learning_rate": 4.645938611267079e-05, "loss": 0.0425, "step": 15138 }, { "epoch": 2.13, "learning_rate": 4.645891821074303e-05, "loss": 0.0743, "step": 15140 }, { "epoch": 2.13, "learning_rate": 4.6458450308815276e-05, "loss": 0.0632, "step": 15142 }, { "epoch": 2.13, "learning_rate": 4.6457982406887515e-05, "loss": 0.054, "step": 15144 }, { "epoch": 2.13, "learning_rate": 4.645751450495977e-05, "loss": 0.0592, "step": 15146 }, { "epoch": 2.13, "learning_rate": 4.645704660303201e-05, "loss": 0.0652, "step": 15148 }, { "epoch": 2.13, "learning_rate": 4.645657870110425e-05, "loss": 0.0531, "step": 15150 }, { "epoch": 2.13, "learning_rate": 4.645611079917649e-05, "loss": 0.0572, "step": 15152 }, { "epoch": 2.13, "learning_rate": 4.6455642897248744e-05, "loss": 0.0478, "step": 15154 }, { "epoch": 2.13, "learning_rate": 4.6455174995320984e-05, "loss": 0.0781, "step": 15156 }, { "epoch": 2.13, "learning_rate": 4.645470709339323e-05, "loss": 0.0587, "step": 15158 }, { "epoch": 2.13, "learning_rate": 4.645423919146547e-05, "loss": 0.0478, "step": 15160 }, { "epoch": 2.13, "learning_rate": 4.6453771289537714e-05, "loss": 0.0492, "step": 15162 }, { "epoch": 2.13, "learning_rate": 4.645330338760996e-05, "loss": 0.0515, "step": 15164 }, { "epoch": 2.13, "learning_rate": 4.6452835485682206e-05, "loss": 0.0323, "step": 15166 }, { "epoch": 2.13, "learning_rate": 4.6452367583754445e-05, "loss": 0.0474, "step": 15168 }, { "epoch": 2.13, "learning_rate": 4.645189968182669e-05, "loss": 0.0621, "step": 15170 }, { "epoch": 2.13, "learning_rate": 4.645143177989894e-05, "loss": 0.0607, "step": 15172 }, { "epoch": 2.13, "learning_rate": 4.645096387797118e-05, "loss": 0.062, "step": 15174 }, { "epoch": 2.13, "learning_rate": 4.645049597604342e-05, "loss": 0.0483, "step": 15176 }, { "epoch": 2.13, "learning_rate": 4.645002807411567e-05, "loss": 0.0559, "step": 15178 }, { "epoch": 2.13, "learning_rate": 4.6449560172187914e-05, "loss": 0.0626, "step": 15180 }, { "epoch": 2.13, "learning_rate": 4.644909227026016e-05, "loss": 0.0545, "step": 15182 }, { "epoch": 2.13, "learning_rate": 4.64486243683324e-05, "loss": 0.0888, "step": 15184 }, { "epoch": 2.13, "learning_rate": 4.6448156466404645e-05, "loss": 0.1247, "step": 15186 }, { "epoch": 2.13, "learning_rate": 4.644768856447689e-05, "loss": 0.046, "step": 15188 }, { "epoch": 2.13, "learning_rate": 4.644722066254914e-05, "loss": 0.0655, "step": 15190 }, { "epoch": 2.13, "learning_rate": 4.6446752760621376e-05, "loss": 0.062, "step": 15192 }, { "epoch": 2.13, "learning_rate": 4.644628485869362e-05, "loss": 0.047, "step": 15194 }, { "epoch": 2.13, "learning_rate": 4.644581695676586e-05, "loss": 0.0581, "step": 15196 }, { "epoch": 2.13, "learning_rate": 4.644534905483811e-05, "loss": 0.058, "step": 15198 }, { "epoch": 2.13, "learning_rate": 4.644488115291035e-05, "loss": 0.0498, "step": 15200 }, { "epoch": 2.13, "learning_rate": 4.644441325098259e-05, "loss": 0.0443, "step": 15202 }, { "epoch": 2.13, "learning_rate": 4.644394534905484e-05, "loss": 0.0454, "step": 15204 }, { "epoch": 2.13, "learning_rate": 4.6443477447127084e-05, "loss": 0.0568, "step": 15206 }, { "epoch": 2.13, "learning_rate": 4.644300954519933e-05, "loss": 0.053, "step": 15208 }, { "epoch": 2.14, "learning_rate": 4.644254164327157e-05, "loss": 0.059, "step": 15210 }, { "epoch": 2.14, "learning_rate": 4.6442073741343815e-05, "loss": 0.062, "step": 15212 }, { "epoch": 2.14, "learning_rate": 4.644160583941606e-05, "loss": 0.0483, "step": 15214 }, { "epoch": 2.14, "learning_rate": 4.644113793748831e-05, "loss": 0.0564, "step": 15216 }, { "epoch": 2.14, "learning_rate": 4.6440670035560546e-05, "loss": 0.0625, "step": 15218 }, { "epoch": 2.14, "learning_rate": 4.644020213363279e-05, "loss": 0.0437, "step": 15220 }, { "epoch": 2.14, "learning_rate": 4.643973423170504e-05, "loss": 0.0526, "step": 15222 }, { "epoch": 2.14, "learning_rate": 4.6439266329777284e-05, "loss": 0.0499, "step": 15224 }, { "epoch": 2.14, "learning_rate": 4.643879842784952e-05, "loss": 0.0658, "step": 15226 }, { "epoch": 2.14, "learning_rate": 4.643833052592177e-05, "loss": 0.0556, "step": 15228 }, { "epoch": 2.14, "learning_rate": 4.643786262399401e-05, "loss": 0.0673, "step": 15230 }, { "epoch": 2.14, "learning_rate": 4.643739472206626e-05, "loss": 0.0553, "step": 15232 }, { "epoch": 2.14, "learning_rate": 4.64369268201385e-05, "loss": 0.0614, "step": 15234 }, { "epoch": 2.14, "learning_rate": 4.6436458918210745e-05, "loss": 0.0884, "step": 15236 }, { "epoch": 2.14, "learning_rate": 4.6435991016282985e-05, "loss": 0.0455, "step": 15238 }, { "epoch": 2.14, "learning_rate": 4.643552311435523e-05, "loss": 0.0463, "step": 15240 }, { "epoch": 2.14, "learning_rate": 4.6435055212427476e-05, "loss": 0.0636, "step": 15242 }, { "epoch": 2.14, "learning_rate": 4.643458731049972e-05, "loss": 0.0556, "step": 15244 }, { "epoch": 2.14, "learning_rate": 4.643411940857196e-05, "loss": 0.0569, "step": 15246 }, { "epoch": 2.14, "learning_rate": 4.643365150664421e-05, "loss": 0.0563, "step": 15248 }, { "epoch": 2.14, "learning_rate": 4.643318360471645e-05, "loss": 0.059, "step": 15250 }, { "epoch": 2.14, "learning_rate": 4.64327157027887e-05, "loss": 0.0442, "step": 15252 }, { "epoch": 2.14, "learning_rate": 4.643224780086094e-05, "loss": 0.0612, "step": 15254 }, { "epoch": 2.14, "learning_rate": 4.6431779898933184e-05, "loss": 0.0571, "step": 15256 }, { "epoch": 2.14, "learning_rate": 4.643131199700543e-05, "loss": 0.0552, "step": 15258 }, { "epoch": 2.14, "learning_rate": 4.6430844095077676e-05, "loss": 0.0851, "step": 15260 }, { "epoch": 2.14, "learning_rate": 4.6430376193149915e-05, "loss": 0.0524, "step": 15262 }, { "epoch": 2.14, "learning_rate": 4.642990829122216e-05, "loss": 0.0518, "step": 15264 }, { "epoch": 2.14, "learning_rate": 4.642944038929441e-05, "loss": 0.0576, "step": 15266 }, { "epoch": 2.14, "learning_rate": 4.642897248736665e-05, "loss": 0.039, "step": 15268 }, { "epoch": 2.14, "learning_rate": 4.642850458543889e-05, "loss": 0.046, "step": 15270 }, { "epoch": 2.14, "learning_rate": 4.642803668351114e-05, "loss": 0.0452, "step": 15272 }, { "epoch": 2.14, "learning_rate": 4.642756878158338e-05, "loss": 0.0541, "step": 15274 }, { "epoch": 2.14, "learning_rate": 4.642710087965563e-05, "loss": 0.094, "step": 15276 }, { "epoch": 2.14, "learning_rate": 4.642663297772787e-05, "loss": 0.054, "step": 15278 }, { "epoch": 2.14, "learning_rate": 4.6426165075800115e-05, "loss": 0.0606, "step": 15280 }, { "epoch": 2.15, "learning_rate": 4.6425697173872354e-05, "loss": 0.0469, "step": 15282 }, { "epoch": 2.15, "learning_rate": 4.642522927194461e-05, "loss": 0.0518, "step": 15284 }, { "epoch": 2.15, "learning_rate": 4.6424761370016846e-05, "loss": 0.0563, "step": 15286 }, { "epoch": 2.15, "learning_rate": 4.642429346808909e-05, "loss": 0.0486, "step": 15288 }, { "epoch": 2.15, "learning_rate": 4.642382556616133e-05, "loss": 0.0513, "step": 15290 }, { "epoch": 2.15, "learning_rate": 4.642335766423358e-05, "loss": 0.0571, "step": 15292 }, { "epoch": 2.15, "learning_rate": 4.642288976230582e-05, "loss": 0.0572, "step": 15294 }, { "epoch": 2.15, "learning_rate": 4.642242186037807e-05, "loss": 0.0496, "step": 15296 }, { "epoch": 2.15, "learning_rate": 4.642195395845031e-05, "loss": 0.0754, "step": 15298 }, { "epoch": 2.15, "learning_rate": 4.6421486056522554e-05, "loss": 0.0485, "step": 15300 }, { "epoch": 2.15, "learning_rate": 4.64210181545948e-05, "loss": 0.0543, "step": 15302 }, { "epoch": 2.15, "learning_rate": 4.6420550252667045e-05, "loss": 0.0426, "step": 15304 }, { "epoch": 2.15, "learning_rate": 4.6420082350739285e-05, "loss": 0.066, "step": 15306 }, { "epoch": 2.15, "learning_rate": 4.641961444881153e-05, "loss": 0.0548, "step": 15308 }, { "epoch": 2.15, "learning_rate": 4.6419146546883776e-05, "loss": 0.0523, "step": 15310 }, { "epoch": 2.15, "learning_rate": 4.641867864495602e-05, "loss": 0.0636, "step": 15312 }, { "epoch": 2.15, "learning_rate": 4.641821074302826e-05, "loss": 0.058, "step": 15314 }, { "epoch": 2.15, "learning_rate": 4.641774284110051e-05, "loss": 0.0528, "step": 15316 }, { "epoch": 2.15, "learning_rate": 4.641727493917275e-05, "loss": 0.0574, "step": 15318 }, { "epoch": 2.15, "learning_rate": 4.6416807037245e-05, "loss": 0.0475, "step": 15320 }, { "epoch": 2.15, "learning_rate": 4.641633913531724e-05, "loss": 0.0413, "step": 15322 }, { "epoch": 2.15, "learning_rate": 4.6415871233389484e-05, "loss": 0.0584, "step": 15324 }, { "epoch": 2.15, "learning_rate": 4.6415403331461723e-05, "loss": 0.0588, "step": 15326 }, { "epoch": 2.15, "learning_rate": 4.6414935429533976e-05, "loss": 0.05, "step": 15328 }, { "epoch": 2.15, "learning_rate": 4.6414467527606215e-05, "loss": 0.0552, "step": 15330 }, { "epoch": 2.15, "learning_rate": 4.641399962567846e-05, "loss": 0.0602, "step": 15332 }, { "epoch": 2.15, "learning_rate": 4.64135317237507e-05, "loss": 0.0541, "step": 15334 }, { "epoch": 2.15, "learning_rate": 4.641306382182295e-05, "loss": 0.0666, "step": 15336 }, { "epoch": 2.15, "learning_rate": 4.641259591989519e-05, "loss": 0.0538, "step": 15338 }, { "epoch": 2.15, "learning_rate": 4.641212801796744e-05, "loss": 0.0529, "step": 15340 }, { "epoch": 2.15, "learning_rate": 4.641166011603968e-05, "loss": 0.0535, "step": 15342 }, { "epoch": 2.15, "learning_rate": 4.641119221411192e-05, "loss": 0.0572, "step": 15344 }, { "epoch": 2.15, "learning_rate": 4.641072431218417e-05, "loss": 0.0448, "step": 15346 }, { "epoch": 2.15, "learning_rate": 4.6410256410256415e-05, "loss": 0.0659, "step": 15348 }, { "epoch": 2.15, "learning_rate": 4.6409788508328654e-05, "loss": 0.0585, "step": 15350 }, { "epoch": 2.15, "learning_rate": 4.64093206064009e-05, "loss": 0.0567, "step": 15352 }, { "epoch": 2.16, "learning_rate": 4.6408852704473146e-05, "loss": 0.0545, "step": 15354 }, { "epoch": 2.16, "learning_rate": 4.640838480254539e-05, "loss": 0.0571, "step": 15356 }, { "epoch": 2.16, "learning_rate": 4.640791690061763e-05, "loss": 0.0446, "step": 15358 }, { "epoch": 2.16, "learning_rate": 4.640744899868988e-05, "loss": 0.0552, "step": 15360 }, { "epoch": 2.16, "learning_rate": 4.640698109676212e-05, "loss": 0.0586, "step": 15362 }, { "epoch": 2.16, "learning_rate": 4.640651319483437e-05, "loss": 0.0646, "step": 15364 }, { "epoch": 2.16, "learning_rate": 4.640604529290661e-05, "loss": 0.0577, "step": 15366 }, { "epoch": 2.16, "learning_rate": 4.6405577390978854e-05, "loss": 0.0524, "step": 15368 }, { "epoch": 2.16, "learning_rate": 4.64051094890511e-05, "loss": 0.0343, "step": 15370 }, { "epoch": 2.16, "learning_rate": 4.6404641587123345e-05, "loss": 0.0588, "step": 15372 }, { "epoch": 2.16, "learning_rate": 4.6404173685195585e-05, "loss": 0.0514, "step": 15374 }, { "epoch": 2.16, "learning_rate": 4.640370578326783e-05, "loss": 0.0534, "step": 15376 }, { "epoch": 2.16, "learning_rate": 4.640323788134007e-05, "loss": 0.0449, "step": 15378 }, { "epoch": 2.16, "learning_rate": 4.640276997941232e-05, "loss": 0.0473, "step": 15380 }, { "epoch": 2.16, "learning_rate": 4.640230207748456e-05, "loss": 0.0454, "step": 15382 }, { "epoch": 2.16, "learning_rate": 4.640183417555681e-05, "loss": 0.0465, "step": 15384 }, { "epoch": 2.16, "learning_rate": 4.6401366273629047e-05, "loss": 0.0691, "step": 15386 }, { "epoch": 2.16, "learning_rate": 4.640089837170129e-05, "loss": 0.0444, "step": 15388 }, { "epoch": 2.16, "learning_rate": 4.640043046977354e-05, "loss": 0.0527, "step": 15390 }, { "epoch": 2.16, "learning_rate": 4.6399962567845784e-05, "loss": 0.047, "step": 15392 }, { "epoch": 2.16, "learning_rate": 4.6399494665918023e-05, "loss": 0.0557, "step": 15394 }, { "epoch": 2.16, "learning_rate": 4.639902676399027e-05, "loss": 0.0504, "step": 15396 }, { "epoch": 2.16, "learning_rate": 4.6398558862062515e-05, "loss": 0.0716, "step": 15398 }, { "epoch": 2.16, "learning_rate": 4.639809096013476e-05, "loss": 0.0585, "step": 15400 }, { "epoch": 2.16, "learning_rate": 4.6397623058207e-05, "loss": 0.0546, "step": 15402 }, { "epoch": 2.16, "learning_rate": 4.6397155156279246e-05, "loss": 0.0537, "step": 15404 }, { "epoch": 2.16, "learning_rate": 4.639668725435149e-05, "loss": 0.0489, "step": 15406 }, { "epoch": 2.16, "learning_rate": 4.639621935242374e-05, "loss": 0.0468, "step": 15408 }, { "epoch": 2.16, "learning_rate": 4.639575145049598e-05, "loss": 0.0536, "step": 15410 }, { "epoch": 2.16, "learning_rate": 4.639528354856822e-05, "loss": 0.0616, "step": 15412 }, { "epoch": 2.16, "learning_rate": 4.639481564664047e-05, "loss": 0.0497, "step": 15414 }, { "epoch": 2.16, "learning_rate": 4.6394347744712715e-05, "loss": 0.0816, "step": 15416 }, { "epoch": 2.16, "learning_rate": 4.6393879842784954e-05, "loss": 0.0655, "step": 15418 }, { "epoch": 2.16, "learning_rate": 4.63934119408572e-05, "loss": 0.0619, "step": 15420 }, { "epoch": 2.16, "learning_rate": 4.639294403892944e-05, "loss": 0.0545, "step": 15422 }, { "epoch": 2.17, "learning_rate": 4.639247613700169e-05, "loss": 0.0582, "step": 15424 }, { "epoch": 2.17, "learning_rate": 4.639200823507393e-05, "loss": 0.0548, "step": 15426 }, { "epoch": 2.17, "learning_rate": 4.639154033314618e-05, "loss": 0.057, "step": 15428 }, { "epoch": 2.17, "learning_rate": 4.6391072431218416e-05, "loss": 0.0554, "step": 15430 }, { "epoch": 2.17, "learning_rate": 4.639060452929067e-05, "loss": 0.0669, "step": 15432 }, { "epoch": 2.17, "learning_rate": 4.639013662736291e-05, "loss": 0.0555, "step": 15434 }, { "epoch": 2.17, "learning_rate": 4.6389668725435154e-05, "loss": 0.0639, "step": 15436 }, { "epoch": 2.17, "learning_rate": 4.638920082350739e-05, "loss": 0.0716, "step": 15438 }, { "epoch": 2.17, "learning_rate": 4.638873292157964e-05, "loss": 0.0643, "step": 15440 }, { "epoch": 2.17, "learning_rate": 4.6388265019651885e-05, "loss": 0.0448, "step": 15442 }, { "epoch": 2.17, "learning_rate": 4.638779711772413e-05, "loss": 0.0432, "step": 15444 }, { "epoch": 2.17, "learning_rate": 4.638732921579637e-05, "loss": 0.0594, "step": 15446 }, { "epoch": 2.17, "learning_rate": 4.6386861313868616e-05, "loss": 0.0545, "step": 15448 }, { "epoch": 2.17, "learning_rate": 4.638639341194086e-05, "loss": 0.0535, "step": 15450 }, { "epoch": 2.17, "learning_rate": 4.63859255100131e-05, "loss": 0.0547, "step": 15452 }, { "epoch": 2.17, "learning_rate": 4.6385457608085347e-05, "loss": 0.0533, "step": 15454 }, { "epoch": 2.17, "learning_rate": 4.6384989706157586e-05, "loss": 0.0608, "step": 15456 }, { "epoch": 2.17, "learning_rate": 4.638452180422984e-05, "loss": 0.0604, "step": 15458 }, { "epoch": 2.17, "learning_rate": 4.638405390230208e-05, "loss": 0.0497, "step": 15460 }, { "epoch": 2.17, "learning_rate": 4.6383586000374323e-05, "loss": 0.0541, "step": 15462 }, { "epoch": 2.17, "learning_rate": 4.638311809844656e-05, "loss": 0.0518, "step": 15464 }, { "epoch": 2.17, "learning_rate": 4.6382650196518815e-05, "loss": 0.05, "step": 15466 }, { "epoch": 2.17, "learning_rate": 4.6382182294591054e-05, "loss": 0.0651, "step": 15468 }, { "epoch": 2.17, "learning_rate": 4.63817143926633e-05, "loss": 0.0633, "step": 15470 }, { "epoch": 2.17, "learning_rate": 4.638124649073554e-05, "loss": 0.0395, "step": 15472 }, { "epoch": 2.17, "learning_rate": 4.6380778588807785e-05, "loss": 0.0576, "step": 15474 }, { "epoch": 2.17, "learning_rate": 4.638031068688003e-05, "loss": 0.0611, "step": 15476 }, { "epoch": 2.17, "learning_rate": 4.637984278495228e-05, "loss": 0.0513, "step": 15478 }, { "epoch": 2.17, "learning_rate": 4.6379374883024516e-05, "loss": 0.0625, "step": 15480 }, { "epoch": 2.17, "learning_rate": 4.637890698109676e-05, "loss": 0.0531, "step": 15482 }, { "epoch": 2.17, "learning_rate": 4.637843907916901e-05, "loss": 0.063, "step": 15484 }, { "epoch": 2.17, "learning_rate": 4.6377971177241254e-05, "loss": 0.0545, "step": 15486 }, { "epoch": 2.17, "learning_rate": 4.637750327531349e-05, "loss": 0.0325, "step": 15488 }, { "epoch": 2.17, "learning_rate": 4.637703537338574e-05, "loss": 0.056, "step": 15490 }, { "epoch": 2.17, "learning_rate": 4.6376567471457985e-05, "loss": 0.04, "step": 15492 }, { "epoch": 2.17, "learning_rate": 4.637609956953023e-05, "loss": 0.0766, "step": 15494 }, { "epoch": 2.18, "learning_rate": 4.637563166760247e-05, "loss": 0.0583, "step": 15496 }, { "epoch": 2.18, "learning_rate": 4.6375163765674716e-05, "loss": 0.0625, "step": 15498 }, { "epoch": 2.18, "learning_rate": 4.637469586374696e-05, "loss": 0.0655, "step": 15500 }, { "epoch": 2.18, "learning_rate": 4.637422796181921e-05, "loss": 0.039, "step": 15502 }, { "epoch": 2.18, "learning_rate": 4.637376005989145e-05, "loss": 0.0538, "step": 15504 }, { "epoch": 2.18, "learning_rate": 4.637329215796369e-05, "loss": 0.0585, "step": 15506 }, { "epoch": 2.18, "learning_rate": 4.637282425603593e-05, "loss": 0.0564, "step": 15508 }, { "epoch": 2.18, "learning_rate": 4.6372356354108185e-05, "loss": 0.0359, "step": 15510 }, { "epoch": 2.18, "learning_rate": 4.6371888452180424e-05, "loss": 0.0466, "step": 15512 }, { "epoch": 2.18, "learning_rate": 4.637142055025267e-05, "loss": 0.0471, "step": 15514 }, { "epoch": 2.18, "learning_rate": 4.637095264832491e-05, "loss": 0.0497, "step": 15516 }, { "epoch": 2.18, "learning_rate": 4.6370484746397155e-05, "loss": 0.0497, "step": 15518 }, { "epoch": 2.18, "learning_rate": 4.63700168444694e-05, "loss": 0.0579, "step": 15520 }, { "epoch": 2.18, "learning_rate": 4.6369548942541647e-05, "loss": 0.0632, "step": 15522 }, { "epoch": 2.18, "learning_rate": 4.6369081040613886e-05, "loss": 0.0546, "step": 15524 }, { "epoch": 2.18, "learning_rate": 4.636861313868613e-05, "loss": 0.0536, "step": 15526 }, { "epoch": 2.18, "learning_rate": 4.636814523675838e-05, "loss": 0.0689, "step": 15528 }, { "epoch": 2.18, "learning_rate": 4.6367677334830623e-05, "loss": 0.0513, "step": 15530 }, { "epoch": 2.18, "learning_rate": 4.636720943290286e-05, "loss": 0.0614, "step": 15532 }, { "epoch": 2.18, "learning_rate": 4.636674153097511e-05, "loss": 0.0648, "step": 15534 }, { "epoch": 2.18, "learning_rate": 4.6366273629047354e-05, "loss": 0.0535, "step": 15536 }, { "epoch": 2.18, "learning_rate": 4.63658057271196e-05, "loss": 0.0775, "step": 15538 }, { "epoch": 2.18, "learning_rate": 4.636533782519184e-05, "loss": 0.0444, "step": 15540 }, { "epoch": 2.18, "learning_rate": 4.6364869923264085e-05, "loss": 0.0726, "step": 15542 }, { "epoch": 2.18, "learning_rate": 4.636440202133633e-05, "loss": 0.0636, "step": 15544 }, { "epoch": 2.18, "learning_rate": 4.636393411940858e-05, "loss": 0.0536, "step": 15546 }, { "epoch": 2.18, "learning_rate": 4.6363466217480816e-05, "loss": 0.0899, "step": 15548 }, { "epoch": 2.18, "learning_rate": 4.636299831555306e-05, "loss": 0.0589, "step": 15550 }, { "epoch": 2.18, "learning_rate": 4.63625304136253e-05, "loss": 0.0591, "step": 15552 }, { "epoch": 2.18, "learning_rate": 4.6362062511697554e-05, "loss": 0.0833, "step": 15554 }, { "epoch": 2.18, "learning_rate": 4.636159460976979e-05, "loss": 0.056, "step": 15556 }, { "epoch": 2.18, "learning_rate": 4.636112670784204e-05, "loss": 0.0591, "step": 15558 }, { "epoch": 2.18, "learning_rate": 4.636065880591428e-05, "loss": 0.0595, "step": 15560 }, { "epoch": 2.18, "learning_rate": 4.636019090398653e-05, "loss": 0.0524, "step": 15562 }, { "epoch": 2.18, "learning_rate": 4.635972300205877e-05, "loss": 0.0499, "step": 15564 }, { "epoch": 2.19, "learning_rate": 4.6359255100131016e-05, "loss": 0.0536, "step": 15566 }, { "epoch": 2.19, "learning_rate": 4.6358787198203255e-05, "loss": 0.0428, "step": 15568 }, { "epoch": 2.19, "learning_rate": 4.63583192962755e-05, "loss": 0.0692, "step": 15570 }, { "epoch": 2.19, "learning_rate": 4.635785139434775e-05, "loss": 0.0556, "step": 15572 }, { "epoch": 2.19, "learning_rate": 4.635738349241999e-05, "loss": 0.0433, "step": 15574 }, { "epoch": 2.19, "learning_rate": 4.635691559049223e-05, "loss": 0.0503, "step": 15576 }, { "epoch": 2.19, "learning_rate": 4.635644768856448e-05, "loss": 0.049, "step": 15578 }, { "epoch": 2.19, "learning_rate": 4.6355979786636724e-05, "loss": 0.0619, "step": 15580 }, { "epoch": 2.19, "learning_rate": 4.635551188470897e-05, "loss": 0.0455, "step": 15582 }, { "epoch": 2.19, "learning_rate": 4.635504398278121e-05, "loss": 0.0665, "step": 15584 }, { "epoch": 2.19, "learning_rate": 4.6354576080853455e-05, "loss": 0.0644, "step": 15586 }, { "epoch": 2.19, "learning_rate": 4.63541081789257e-05, "loss": 0.0636, "step": 15588 }, { "epoch": 2.19, "learning_rate": 4.6353640276997947e-05, "loss": 0.0647, "step": 15590 }, { "epoch": 2.19, "learning_rate": 4.6353172375070186e-05, "loss": 0.063, "step": 15592 }, { "epoch": 2.19, "learning_rate": 4.635270447314243e-05, "loss": 0.1058, "step": 15594 }, { "epoch": 2.19, "learning_rate": 4.635223657121468e-05, "loss": 0.0463, "step": 15596 }, { "epoch": 2.19, "learning_rate": 4.6351768669286923e-05, "loss": 0.0686, "step": 15598 }, { "epoch": 2.19, "learning_rate": 4.635130076735916e-05, "loss": 0.0621, "step": 15600 }, { "epoch": 2.19, "learning_rate": 4.635083286543141e-05, "loss": 0.0481, "step": 15602 }, { "epoch": 2.19, "learning_rate": 4.635036496350365e-05, "loss": 0.0616, "step": 15604 }, { "epoch": 2.19, "learning_rate": 4.63498970615759e-05, "loss": 0.0559, "step": 15606 }, { "epoch": 2.19, "learning_rate": 4.634942915964814e-05, "loss": 0.0528, "step": 15608 }, { "epoch": 2.19, "learning_rate": 4.6348961257720385e-05, "loss": 0.0511, "step": 15610 }, { "epoch": 2.19, "learning_rate": 4.6348493355792625e-05, "loss": 0.0385, "step": 15612 }, { "epoch": 2.19, "learning_rate": 4.634802545386488e-05, "loss": 0.0599, "step": 15614 }, { "epoch": 2.19, "learning_rate": 4.6347557551937116e-05, "loss": 0.0457, "step": 15616 }, { "epoch": 2.19, "learning_rate": 4.634708965000936e-05, "loss": 0.0578, "step": 15618 }, { "epoch": 2.19, "learning_rate": 4.63466217480816e-05, "loss": 0.05, "step": 15620 }, { "epoch": 2.19, "learning_rate": 4.634615384615385e-05, "loss": 0.0419, "step": 15622 }, { "epoch": 2.19, "learning_rate": 4.634568594422609e-05, "loss": 0.082, "step": 15624 }, { "epoch": 2.19, "learning_rate": 4.634521804229834e-05, "loss": 0.0633, "step": 15626 }, { "epoch": 2.19, "learning_rate": 4.634475014037058e-05, "loss": 0.0507, "step": 15628 }, { "epoch": 2.19, "learning_rate": 4.6344282238442824e-05, "loss": 0.0467, "step": 15630 }, { "epoch": 2.19, "learning_rate": 4.634381433651507e-05, "loss": 0.0654, "step": 15632 }, { "epoch": 2.19, "learning_rate": 4.6343346434587316e-05, "loss": 0.0531, "step": 15634 }, { "epoch": 2.19, "learning_rate": 4.6342878532659555e-05, "loss": 0.0542, "step": 15636 }, { "epoch": 2.2, "learning_rate": 4.63424106307318e-05, "loss": 0.0747, "step": 15638 }, { "epoch": 2.2, "learning_rate": 4.634194272880405e-05, "loss": 0.0491, "step": 15640 }, { "epoch": 2.2, "learning_rate": 4.634147482687629e-05, "loss": 0.0477, "step": 15642 }, { "epoch": 2.2, "learning_rate": 4.634100692494853e-05, "loss": 0.0781, "step": 15644 }, { "epoch": 2.2, "learning_rate": 4.634053902302078e-05, "loss": 0.0875, "step": 15646 }, { "epoch": 2.2, "learning_rate": 4.6340071121093024e-05, "loss": 0.0636, "step": 15648 }, { "epoch": 2.2, "learning_rate": 4.633960321916527e-05, "loss": 0.0407, "step": 15650 }, { "epoch": 2.2, "learning_rate": 4.633913531723751e-05, "loss": 0.0512, "step": 15652 }, { "epoch": 2.2, "learning_rate": 4.6338667415309755e-05, "loss": 0.0644, "step": 15654 }, { "epoch": 2.2, "learning_rate": 4.6338199513381994e-05, "loss": 0.0408, "step": 15656 }, { "epoch": 2.2, "learning_rate": 4.633773161145425e-05, "loss": 0.0575, "step": 15658 }, { "epoch": 2.2, "learning_rate": 4.6337263709526486e-05, "loss": 0.0611, "step": 15660 }, { "epoch": 2.2, "learning_rate": 4.633679580759873e-05, "loss": 0.0643, "step": 15662 }, { "epoch": 2.2, "learning_rate": 4.633632790567097e-05, "loss": 0.1125, "step": 15664 }, { "epoch": 2.2, "learning_rate": 4.633586000374322e-05, "loss": 0.0515, "step": 15666 }, { "epoch": 2.2, "learning_rate": 4.633539210181546e-05, "loss": 0.0465, "step": 15668 }, { "epoch": 2.2, "learning_rate": 4.633492419988771e-05, "loss": 0.073, "step": 15670 }, { "epoch": 2.2, "learning_rate": 4.633445629795995e-05, "loss": 0.0571, "step": 15672 }, { "epoch": 2.2, "learning_rate": 4.6333988396032194e-05, "loss": 0.0672, "step": 15674 }, { "epoch": 2.2, "learning_rate": 4.633352049410444e-05, "loss": 0.0739, "step": 15676 }, { "epoch": 2.2, "learning_rate": 4.6333052592176685e-05, "loss": 0.0513, "step": 15678 }, { "epoch": 2.2, "learning_rate": 4.6332584690248925e-05, "loss": 0.0375, "step": 15680 }, { "epoch": 2.2, "learning_rate": 4.633211678832117e-05, "loss": 0.0816, "step": 15682 }, { "epoch": 2.2, "learning_rate": 4.6331648886393416e-05, "loss": 0.0483, "step": 15684 }, { "epoch": 2.2, "learning_rate": 4.633118098446566e-05, "loss": 0.0672, "step": 15686 }, { "epoch": 2.2, "learning_rate": 4.63307130825379e-05, "loss": 0.0518, "step": 15688 }, { "epoch": 2.2, "learning_rate": 4.633024518061015e-05, "loss": 0.0551, "step": 15690 }, { "epoch": 2.2, "learning_rate": 4.632977727868239e-05, "loss": 0.053, "step": 15692 }, { "epoch": 2.2, "learning_rate": 4.632930937675464e-05, "loss": 0.0707, "step": 15694 }, { "epoch": 2.2, "learning_rate": 4.632884147482688e-05, "loss": 0.0538, "step": 15696 }, { "epoch": 2.2, "learning_rate": 4.6328373572899124e-05, "loss": 0.0524, "step": 15698 }, { "epoch": 2.2, "learning_rate": 4.632790567097136e-05, "loss": 0.0526, "step": 15700 }, { "epoch": 2.2, "learning_rate": 4.6327437769043616e-05, "loss": 0.0603, "step": 15702 }, { "epoch": 2.2, "learning_rate": 4.6326969867115855e-05, "loss": 0.0508, "step": 15704 }, { "epoch": 2.2, "learning_rate": 4.6326501965188094e-05, "loss": 0.0504, "step": 15706 }, { "epoch": 2.2, "learning_rate": 4.632603406326034e-05, "loss": 0.0759, "step": 15708 }, { "epoch": 2.21, "learning_rate": 4.6325566161332586e-05, "loss": 0.0701, "step": 15710 }, { "epoch": 2.21, "learning_rate": 4.632509825940483e-05, "loss": 0.0525, "step": 15712 }, { "epoch": 2.21, "learning_rate": 4.632463035747707e-05, "loss": 0.0752, "step": 15714 }, { "epoch": 2.21, "learning_rate": 4.632416245554932e-05, "loss": 0.0466, "step": 15716 }, { "epoch": 2.21, "learning_rate": 4.632369455362156e-05, "loss": 0.0748, "step": 15718 }, { "epoch": 2.21, "learning_rate": 4.632322665169381e-05, "loss": 0.0522, "step": 15720 }, { "epoch": 2.21, "learning_rate": 4.632275874976605e-05, "loss": 0.0591, "step": 15722 }, { "epoch": 2.21, "learning_rate": 4.6322290847838294e-05, "loss": 0.0554, "step": 15724 }, { "epoch": 2.21, "learning_rate": 4.632182294591054e-05, "loss": 0.069, "step": 15726 }, { "epoch": 2.21, "learning_rate": 4.6321355043982786e-05, "loss": 0.043, "step": 15728 }, { "epoch": 2.21, "learning_rate": 4.6320887142055025e-05, "loss": 0.0566, "step": 15730 }, { "epoch": 2.21, "learning_rate": 4.632041924012727e-05, "loss": 0.0616, "step": 15732 }, { "epoch": 2.21, "learning_rate": 4.631995133819951e-05, "loss": 0.0504, "step": 15734 }, { "epoch": 2.21, "learning_rate": 4.631948343627176e-05, "loss": 0.038, "step": 15736 }, { "epoch": 2.21, "learning_rate": 4.6319015534344e-05, "loss": 0.0668, "step": 15738 }, { "epoch": 2.21, "learning_rate": 4.631854763241625e-05, "loss": 0.0513, "step": 15740 }, { "epoch": 2.21, "learning_rate": 4.631807973048849e-05, "loss": 0.0499, "step": 15742 }, { "epoch": 2.21, "learning_rate": 4.631761182856074e-05, "loss": 0.0645, "step": 15744 }, { "epoch": 2.21, "learning_rate": 4.631714392663298e-05, "loss": 0.0597, "step": 15746 }, { "epoch": 2.21, "learning_rate": 4.6316676024705225e-05, "loss": 0.0603, "step": 15748 }, { "epoch": 2.21, "learning_rate": 4.6316208122777464e-05, "loss": 0.0607, "step": 15750 }, { "epoch": 2.21, "learning_rate": 4.631574022084971e-05, "loss": 0.072, "step": 15752 }, { "epoch": 2.21, "learning_rate": 4.6315272318921956e-05, "loss": 0.0732, "step": 15754 }, { "epoch": 2.21, "learning_rate": 4.63148044169942e-05, "loss": 0.0526, "step": 15756 }, { "epoch": 2.21, "learning_rate": 4.631433651506644e-05, "loss": 0.06, "step": 15758 }, { "epoch": 2.21, "learning_rate": 4.6313868613138686e-05, "loss": 0.0551, "step": 15760 }, { "epoch": 2.21, "learning_rate": 4.631340071121093e-05, "loss": 0.0413, "step": 15762 }, { "epoch": 2.21, "learning_rate": 4.631293280928318e-05, "loss": 0.0723, "step": 15764 }, { "epoch": 2.21, "learning_rate": 4.631246490735542e-05, "loss": 0.0541, "step": 15766 }, { "epoch": 2.21, "learning_rate": 4.631199700542766e-05, "loss": 0.0587, "step": 15768 }, { "epoch": 2.21, "learning_rate": 4.631152910349991e-05, "loss": 0.0835, "step": 15770 }, { "epoch": 2.21, "learning_rate": 4.6311061201572155e-05, "loss": 0.0397, "step": 15772 }, { "epoch": 2.21, "learning_rate": 4.6310593299644394e-05, "loss": 0.073, "step": 15774 }, { "epoch": 2.21, "learning_rate": 4.631012539771664e-05, "loss": 0.066, "step": 15776 }, { "epoch": 2.21, "learning_rate": 4.6309657495788886e-05, "loss": 0.0558, "step": 15778 }, { "epoch": 2.22, "learning_rate": 4.630918959386113e-05, "loss": 0.0481, "step": 15780 }, { "epoch": 2.22, "learning_rate": 4.630872169193337e-05, "loss": 0.0479, "step": 15782 }, { "epoch": 2.22, "learning_rate": 4.630825379000562e-05, "loss": 0.0566, "step": 15784 }, { "epoch": 2.22, "learning_rate": 4.6307785888077856e-05, "loss": 0.0748, "step": 15786 }, { "epoch": 2.22, "learning_rate": 4.630731798615011e-05, "loss": 0.0521, "step": 15788 }, { "epoch": 2.22, "learning_rate": 4.630685008422235e-05, "loss": 0.07, "step": 15790 }, { "epoch": 2.22, "learning_rate": 4.6306382182294594e-05, "loss": 0.0689, "step": 15792 }, { "epoch": 2.22, "learning_rate": 4.630591428036683e-05, "loss": 0.0412, "step": 15794 }, { "epoch": 2.22, "learning_rate": 4.6305446378439086e-05, "loss": 0.0472, "step": 15796 }, { "epoch": 2.22, "learning_rate": 4.6304978476511325e-05, "loss": 0.0605, "step": 15798 }, { "epoch": 2.22, "learning_rate": 4.630451057458357e-05, "loss": 0.0562, "step": 15800 }, { "epoch": 2.22, "learning_rate": 4.630404267265581e-05, "loss": 0.0456, "step": 15802 }, { "epoch": 2.22, "learning_rate": 4.6303574770728056e-05, "loss": 0.0515, "step": 15804 }, { "epoch": 2.22, "learning_rate": 4.63031068688003e-05, "loss": 0.0508, "step": 15806 }, { "epoch": 2.22, "learning_rate": 4.630263896687255e-05, "loss": 0.0717, "step": 15808 }, { "epoch": 2.22, "learning_rate": 4.630217106494479e-05, "loss": 0.0532, "step": 15810 }, { "epoch": 2.22, "learning_rate": 4.630170316301703e-05, "loss": 0.0498, "step": 15812 }, { "epoch": 2.22, "learning_rate": 4.630123526108928e-05, "loss": 0.0589, "step": 15814 }, { "epoch": 2.22, "learning_rate": 4.6300767359161525e-05, "loss": 0.0525, "step": 15816 }, { "epoch": 2.22, "learning_rate": 4.6300299457233764e-05, "loss": 0.0576, "step": 15818 }, { "epoch": 2.22, "learning_rate": 4.629983155530601e-05, "loss": 0.0634, "step": 15820 }, { "epoch": 2.22, "learning_rate": 4.6299363653378256e-05, "loss": 0.0351, "step": 15822 }, { "epoch": 2.22, "learning_rate": 4.62988957514505e-05, "loss": 0.0523, "step": 15824 }, { "epoch": 2.22, "learning_rate": 4.629842784952274e-05, "loss": 0.0544, "step": 15826 }, { "epoch": 2.22, "learning_rate": 4.6297959947594986e-05, "loss": 0.0492, "step": 15828 }, { "epoch": 2.22, "learning_rate": 4.6297492045667226e-05, "loss": 0.0475, "step": 15830 }, { "epoch": 2.22, "learning_rate": 4.629702414373948e-05, "loss": 0.043, "step": 15832 }, { "epoch": 2.22, "learning_rate": 4.629655624181172e-05, "loss": 0.0636, "step": 15834 }, { "epoch": 2.22, "learning_rate": 4.629608833988396e-05, "loss": 0.0614, "step": 15836 }, { "epoch": 2.22, "learning_rate": 4.62956204379562e-05, "loss": 0.0643, "step": 15838 }, { "epoch": 2.22, "learning_rate": 4.6295152536028455e-05, "loss": 0.0549, "step": 15840 }, { "epoch": 2.22, "learning_rate": 4.6294684634100694e-05, "loss": 0.0648, "step": 15842 }, { "epoch": 2.22, "learning_rate": 4.629421673217294e-05, "loss": 0.0555, "step": 15844 }, { "epoch": 2.22, "learning_rate": 4.629374883024518e-05, "loss": 0.047, "step": 15846 }, { "epoch": 2.22, "learning_rate": 4.6293280928317425e-05, "loss": 0.0622, "step": 15848 }, { "epoch": 2.22, "learning_rate": 4.629281302638967e-05, "loss": 0.0447, "step": 15850 }, { "epoch": 2.23, "learning_rate": 4.629234512446192e-05, "loss": 0.0678, "step": 15852 }, { "epoch": 2.23, "learning_rate": 4.6291877222534156e-05, "loss": 0.0686, "step": 15854 }, { "epoch": 2.23, "learning_rate": 4.62914093206064e-05, "loss": 0.0524, "step": 15856 }, { "epoch": 2.23, "learning_rate": 4.629094141867865e-05, "loss": 0.0439, "step": 15858 }, { "epoch": 2.23, "learning_rate": 4.6290473516750894e-05, "loss": 0.0538, "step": 15860 }, { "epoch": 2.23, "learning_rate": 4.629000561482313e-05, "loss": 0.0602, "step": 15862 }, { "epoch": 2.23, "learning_rate": 4.628953771289538e-05, "loss": 0.05, "step": 15864 }, { "epoch": 2.23, "learning_rate": 4.6289069810967625e-05, "loss": 0.0514, "step": 15866 }, { "epoch": 2.23, "learning_rate": 4.628860190903987e-05, "loss": 0.0504, "step": 15868 }, { "epoch": 2.23, "learning_rate": 4.628813400711211e-05, "loss": 0.0541, "step": 15870 }, { "epoch": 2.23, "learning_rate": 4.6287666105184356e-05, "loss": 0.0529, "step": 15872 }, { "epoch": 2.23, "learning_rate": 4.62871982032566e-05, "loss": 0.0441, "step": 15874 }, { "epoch": 2.23, "learning_rate": 4.628673030132885e-05, "loss": 0.0595, "step": 15876 }, { "epoch": 2.23, "learning_rate": 4.628626239940109e-05, "loss": 0.0677, "step": 15878 }, { "epoch": 2.23, "learning_rate": 4.628579449747333e-05, "loss": 0.0418, "step": 15880 }, { "epoch": 2.23, "learning_rate": 4.628532659554557e-05, "loss": 0.0485, "step": 15882 }, { "epoch": 2.23, "learning_rate": 4.6284858693617825e-05, "loss": 0.0765, "step": 15884 }, { "epoch": 2.23, "learning_rate": 4.6284390791690064e-05, "loss": 0.0502, "step": 15886 }, { "epoch": 2.23, "learning_rate": 4.628392288976231e-05, "loss": 0.0586, "step": 15888 }, { "epoch": 2.23, "learning_rate": 4.628345498783455e-05, "loss": 0.0559, "step": 15890 }, { "epoch": 2.23, "learning_rate": 4.62829870859068e-05, "loss": 0.0506, "step": 15892 }, { "epoch": 2.23, "learning_rate": 4.628251918397904e-05, "loss": 0.0451, "step": 15894 }, { "epoch": 2.23, "learning_rate": 4.6282051282051287e-05, "loss": 0.0502, "step": 15896 }, { "epoch": 2.23, "learning_rate": 4.6281583380123526e-05, "loss": 0.0514, "step": 15898 }, { "epoch": 2.23, "learning_rate": 4.628111547819577e-05, "loss": 0.0448, "step": 15900 }, { "epoch": 2.23, "learning_rate": 4.628064757626802e-05, "loss": 0.0621, "step": 15902 }, { "epoch": 2.23, "learning_rate": 4.628017967434026e-05, "loss": 0.0698, "step": 15904 }, { "epoch": 2.23, "learning_rate": 4.62797117724125e-05, "loss": 0.0967, "step": 15906 }, { "epoch": 2.23, "learning_rate": 4.627924387048475e-05, "loss": 0.0529, "step": 15908 }, { "epoch": 2.23, "learning_rate": 4.6278775968556994e-05, "loss": 0.0491, "step": 15910 }, { "epoch": 2.23, "learning_rate": 4.627830806662924e-05, "loss": 0.0508, "step": 15912 }, { "epoch": 2.23, "learning_rate": 4.627784016470148e-05, "loss": 0.0655, "step": 15914 }, { "epoch": 2.23, "learning_rate": 4.6277372262773725e-05, "loss": 0.0729, "step": 15916 }, { "epoch": 2.23, "learning_rate": 4.627690436084597e-05, "loss": 0.0731, "step": 15918 }, { "epoch": 2.23, "learning_rate": 4.627643645891822e-05, "loss": 0.0545, "step": 15920 }, { "epoch": 2.23, "learning_rate": 4.6275968556990456e-05, "loss": 0.0717, "step": 15922 }, { "epoch": 2.24, "learning_rate": 4.62755006550627e-05, "loss": 0.0521, "step": 15924 }, { "epoch": 2.24, "learning_rate": 4.627503275313495e-05, "loss": 0.0446, "step": 15926 }, { "epoch": 2.24, "learning_rate": 4.6274564851207194e-05, "loss": 0.0545, "step": 15928 }, { "epoch": 2.24, "learning_rate": 4.627409694927943e-05, "loss": 0.0442, "step": 15930 }, { "epoch": 2.24, "learning_rate": 4.627362904735168e-05, "loss": 0.0688, "step": 15932 }, { "epoch": 2.24, "learning_rate": 4.627316114542392e-05, "loss": 0.0469, "step": 15934 }, { "epoch": 2.24, "learning_rate": 4.627269324349617e-05, "loss": 0.0658, "step": 15936 }, { "epoch": 2.24, "learning_rate": 4.627222534156841e-05, "loss": 0.0646, "step": 15938 }, { "epoch": 2.24, "learning_rate": 4.6271757439640656e-05, "loss": 0.0568, "step": 15940 }, { "epoch": 2.24, "learning_rate": 4.6271289537712895e-05, "loss": 0.0526, "step": 15942 }, { "epoch": 2.24, "learning_rate": 4.627082163578514e-05, "loss": 0.0592, "step": 15944 }, { "epoch": 2.24, "learning_rate": 4.627035373385739e-05, "loss": 0.0532, "step": 15946 }, { "epoch": 2.24, "learning_rate": 4.626988583192963e-05, "loss": 0.0567, "step": 15948 }, { "epoch": 2.24, "learning_rate": 4.626941793000187e-05, "loss": 0.0497, "step": 15950 }, { "epoch": 2.24, "learning_rate": 4.626895002807412e-05, "loss": 0.0551, "step": 15952 }, { "epoch": 2.24, "learning_rate": 4.6268482126146364e-05, "loss": 0.061, "step": 15954 }, { "epoch": 2.24, "learning_rate": 4.626801422421861e-05, "loss": 0.0532, "step": 15956 }, { "epoch": 2.24, "learning_rate": 4.626754632229085e-05, "loss": 0.0593, "step": 15958 }, { "epoch": 2.24, "learning_rate": 4.6267078420363095e-05, "loss": 0.0664, "step": 15960 }, { "epoch": 2.24, "learning_rate": 4.626661051843534e-05, "loss": 0.0697, "step": 15962 }, { "epoch": 2.24, "learning_rate": 4.626614261650758e-05, "loss": 0.0644, "step": 15964 }, { "epoch": 2.24, "learning_rate": 4.6265674714579826e-05, "loss": 0.0635, "step": 15966 }, { "epoch": 2.24, "learning_rate": 4.6265206812652065e-05, "loss": 0.0528, "step": 15968 }, { "epoch": 2.24, "learning_rate": 4.626473891072432e-05, "loss": 0.0927, "step": 15970 }, { "epoch": 2.24, "learning_rate": 4.626427100879656e-05, "loss": 0.0519, "step": 15972 }, { "epoch": 2.24, "learning_rate": 4.62638031068688e-05, "loss": 0.0556, "step": 15974 }, { "epoch": 2.24, "learning_rate": 4.626333520494104e-05, "loss": 0.0586, "step": 15976 }, { "epoch": 2.24, "learning_rate": 4.626286730301329e-05, "loss": 0.063, "step": 15978 }, { "epoch": 2.24, "learning_rate": 4.6262399401085533e-05, "loss": 0.0455, "step": 15980 }, { "epoch": 2.24, "learning_rate": 4.626193149915778e-05, "loss": 0.0621, "step": 15982 }, { "epoch": 2.24, "learning_rate": 4.626146359723002e-05, "loss": 0.0717, "step": 15984 }, { "epoch": 2.24, "learning_rate": 4.6260995695302264e-05, "loss": 0.0523, "step": 15986 }, { "epoch": 2.24, "learning_rate": 4.626052779337451e-05, "loss": 0.0531, "step": 15988 }, { "epoch": 2.24, "learning_rate": 4.6260059891446756e-05, "loss": 0.0528, "step": 15990 }, { "epoch": 2.24, "learning_rate": 4.6259591989518995e-05, "loss": 0.0591, "step": 15992 }, { "epoch": 2.25, "learning_rate": 4.625912408759124e-05, "loss": 0.049, "step": 15994 }, { "epoch": 2.25, "learning_rate": 4.625865618566349e-05, "loss": 0.0585, "step": 15996 }, { "epoch": 2.25, "learning_rate": 4.625818828373573e-05, "loss": 0.0465, "step": 15998 }, { "epoch": 2.25, "learning_rate": 4.625772038180797e-05, "loss": 0.0579, "step": 16000 }, { "epoch": 2.25, "eval_gen_len": 31.2759, "eval_loss": 1.0764782428741455, "eval_meteor": 0.0444, "eval_runtime": 16.106, "eval_samples_per_second": 3.601, "eval_steps_per_second": 0.497, "step": 16000 }, { "epoch": 2.25, "learning_rate": 4.625725247988022e-05, "loss": 0.0719, "step": 16002 }, { "epoch": 2.25, "learning_rate": 4.6256784577952464e-05, "loss": 0.0528, "step": 16004 }, { "epoch": 2.25, "learning_rate": 4.625631667602471e-05, "loss": 0.0561, "step": 16006 }, { "epoch": 2.25, "learning_rate": 4.625584877409695e-05, "loss": 0.076, "step": 16008 }, { "epoch": 2.25, "learning_rate": 4.6255380872169195e-05, "loss": 0.078, "step": 16010 }, { "epoch": 2.25, "learning_rate": 4.6254912970241434e-05, "loss": 0.059, "step": 16012 }, { "epoch": 2.25, "learning_rate": 4.625444506831369e-05, "loss": 0.0592, "step": 16014 }, { "epoch": 2.25, "learning_rate": 4.6253977166385926e-05, "loss": 0.0463, "step": 16016 }, { "epoch": 2.25, "learning_rate": 4.625350926445817e-05, "loss": 0.0571, "step": 16018 }, { "epoch": 2.25, "learning_rate": 4.625304136253041e-05, "loss": 0.0479, "step": 16020 }, { "epoch": 2.25, "learning_rate": 4.6252573460602664e-05, "loss": 0.0529, "step": 16022 }, { "epoch": 2.25, "learning_rate": 4.62521055586749e-05, "loss": 0.0784, "step": 16024 }, { "epoch": 2.25, "learning_rate": 4.625163765674715e-05, "loss": 0.0507, "step": 16026 }, { "epoch": 2.25, "learning_rate": 4.625116975481939e-05, "loss": 0.0442, "step": 16028 }, { "epoch": 2.25, "learning_rate": 4.6250701852891634e-05, "loss": 0.0547, "step": 16030 }, { "epoch": 2.25, "learning_rate": 4.625023395096388e-05, "loss": 0.0469, "step": 16032 }, { "epoch": 2.25, "learning_rate": 4.6249766049036126e-05, "loss": 0.0338, "step": 16034 }, { "epoch": 2.25, "learning_rate": 4.6249298147108365e-05, "loss": 0.0775, "step": 16036 }, { "epoch": 2.25, "learning_rate": 4.624883024518061e-05, "loss": 0.0377, "step": 16038 }, { "epoch": 2.25, "learning_rate": 4.624836234325286e-05, "loss": 0.062, "step": 16040 }, { "epoch": 2.25, "learning_rate": 4.62478944413251e-05, "loss": 0.0591, "step": 16042 }, { "epoch": 2.25, "learning_rate": 4.624742653939734e-05, "loss": 0.0792, "step": 16044 }, { "epoch": 2.25, "learning_rate": 4.624695863746959e-05, "loss": 0.0659, "step": 16046 }, { "epoch": 2.25, "learning_rate": 4.6246490735541834e-05, "loss": 0.0681, "step": 16048 }, { "epoch": 2.25, "learning_rate": 4.624602283361408e-05, "loss": 0.0465, "step": 16050 }, { "epoch": 2.25, "learning_rate": 4.624555493168632e-05, "loss": 0.0468, "step": 16052 }, { "epoch": 2.25, "learning_rate": 4.6245087029758564e-05, "loss": 0.0424, "step": 16054 }, { "epoch": 2.25, "learning_rate": 4.624461912783081e-05, "loss": 0.0625, "step": 16056 }, { "epoch": 2.25, "learning_rate": 4.6244151225903056e-05, "loss": 0.0727, "step": 16058 }, { "epoch": 2.25, "learning_rate": 4.6243683323975295e-05, "loss": 0.0527, "step": 16060 }, { "epoch": 2.25, "learning_rate": 4.624321542204754e-05, "loss": 0.0547, "step": 16062 }, { "epoch": 2.25, "learning_rate": 4.624274752011978e-05, "loss": 0.0608, "step": 16064 }, { "epoch": 2.26, "learning_rate": 4.624227961819203e-05, "loss": 0.0504, "step": 16066 }, { "epoch": 2.26, "learning_rate": 4.624181171626427e-05, "loss": 0.0585, "step": 16068 }, { "epoch": 2.26, "learning_rate": 4.624134381433652e-05, "loss": 0.0524, "step": 16070 }, { "epoch": 2.26, "learning_rate": 4.624087591240876e-05, "loss": 0.0604, "step": 16072 }, { "epoch": 2.26, "learning_rate": 4.624040801048101e-05, "loss": 0.0621, "step": 16074 }, { "epoch": 2.26, "learning_rate": 4.623994010855325e-05, "loss": 0.0873, "step": 16076 }, { "epoch": 2.26, "learning_rate": 4.6239472206625495e-05, "loss": 0.0553, "step": 16078 }, { "epoch": 2.26, "learning_rate": 4.6239004304697734e-05, "loss": 0.0554, "step": 16080 }, { "epoch": 2.26, "learning_rate": 4.623853640276998e-05, "loss": 0.0522, "step": 16082 }, { "epoch": 2.26, "learning_rate": 4.6238068500842226e-05, "loss": 0.0606, "step": 16084 }, { "epoch": 2.26, "learning_rate": 4.623760059891447e-05, "loss": 0.0513, "step": 16086 }, { "epoch": 2.26, "learning_rate": 4.623713269698671e-05, "loss": 0.0511, "step": 16088 }, { "epoch": 2.26, "learning_rate": 4.623666479505896e-05, "loss": 0.0557, "step": 16090 }, { "epoch": 2.26, "learning_rate": 4.62361968931312e-05, "loss": 0.0704, "step": 16092 }, { "epoch": 2.26, "learning_rate": 4.623572899120345e-05, "loss": 0.049, "step": 16094 }, { "epoch": 2.26, "learning_rate": 4.623526108927569e-05, "loss": 0.0575, "step": 16096 }, { "epoch": 2.26, "learning_rate": 4.6234793187347934e-05, "loss": 0.0567, "step": 16098 }, { "epoch": 2.26, "learning_rate": 4.623432528542018e-05, "loss": 0.0604, "step": 16100 }, { "epoch": 2.26, "learning_rate": 4.6233857383492426e-05, "loss": 0.0472, "step": 16102 }, { "epoch": 2.26, "learning_rate": 4.6233389481564665e-05, "loss": 0.0522, "step": 16104 }, { "epoch": 2.26, "learning_rate": 4.623292157963691e-05, "loss": 0.063, "step": 16106 }, { "epoch": 2.26, "learning_rate": 4.623245367770915e-05, "loss": 0.0545, "step": 16108 }, { "epoch": 2.26, "learning_rate": 4.62319857757814e-05, "loss": 0.0519, "step": 16110 }, { "epoch": 2.26, "learning_rate": 4.623151787385364e-05, "loss": 0.0685, "step": 16112 }, { "epoch": 2.26, "learning_rate": 4.623104997192589e-05, "loss": 0.0856, "step": 16114 }, { "epoch": 2.26, "learning_rate": 4.623058206999813e-05, "loss": 0.069, "step": 16116 }, { "epoch": 2.26, "learning_rate": 4.623011416807038e-05, "loss": 0.0548, "step": 16118 }, { "epoch": 2.26, "learning_rate": 4.622964626614262e-05, "loss": 0.0451, "step": 16120 }, { "epoch": 2.26, "learning_rate": 4.6229178364214864e-05, "loss": 0.0634, "step": 16122 }, { "epoch": 2.26, "learning_rate": 4.6228710462287104e-05, "loss": 0.0577, "step": 16124 }, { "epoch": 2.26, "learning_rate": 4.622824256035935e-05, "loss": 0.0692, "step": 16126 }, { "epoch": 2.26, "learning_rate": 4.6227774658431595e-05, "loss": 0.0533, "step": 16128 }, { "epoch": 2.26, "learning_rate": 4.622730675650384e-05, "loss": 0.0734, "step": 16130 }, { "epoch": 2.26, "learning_rate": 4.622683885457608e-05, "loss": 0.0567, "step": 16132 }, { "epoch": 2.26, "learning_rate": 4.6226370952648326e-05, "loss": 0.0465, "step": 16134 }, { "epoch": 2.27, "learning_rate": 4.622590305072057e-05, "loss": 0.0433, "step": 16136 }, { "epoch": 2.27, "learning_rate": 4.622543514879282e-05, "loss": 0.0711, "step": 16138 }, { "epoch": 2.27, "learning_rate": 4.622496724686506e-05, "loss": 0.0676, "step": 16140 }, { "epoch": 2.27, "learning_rate": 4.62244993449373e-05, "loss": 0.0466, "step": 16142 }, { "epoch": 2.27, "learning_rate": 4.622403144300955e-05, "loss": 0.0457, "step": 16144 }, { "epoch": 2.27, "learning_rate": 4.6223563541081795e-05, "loss": 0.076, "step": 16146 }, { "epoch": 2.27, "learning_rate": 4.6223095639154034e-05, "loss": 0.049, "step": 16148 }, { "epoch": 2.27, "learning_rate": 4.622262773722628e-05, "loss": 0.066, "step": 16150 }, { "epoch": 2.27, "learning_rate": 4.6222159835298526e-05, "loss": 0.0499, "step": 16152 }, { "epoch": 2.27, "learning_rate": 4.622169193337077e-05, "loss": 0.0615, "step": 16154 }, { "epoch": 2.27, "learning_rate": 4.622122403144301e-05, "loss": 0.0845, "step": 16156 }, { "epoch": 2.27, "learning_rate": 4.622075612951526e-05, "loss": 0.0564, "step": 16158 }, { "epoch": 2.27, "learning_rate": 4.6220288227587496e-05, "loss": 0.0744, "step": 16160 }, { "epoch": 2.27, "learning_rate": 4.621982032565975e-05, "loss": 0.0741, "step": 16162 }, { "epoch": 2.27, "learning_rate": 4.621935242373199e-05, "loss": 0.0589, "step": 16164 }, { "epoch": 2.27, "learning_rate": 4.6218884521804234e-05, "loss": 0.048, "step": 16166 }, { "epoch": 2.27, "learning_rate": 4.621841661987647e-05, "loss": 0.0606, "step": 16168 }, { "epoch": 2.27, "learning_rate": 4.6217948717948726e-05, "loss": 0.0448, "step": 16170 }, { "epoch": 2.27, "learning_rate": 4.6217480816020965e-05, "loss": 0.0572, "step": 16172 }, { "epoch": 2.27, "learning_rate": 4.621701291409321e-05, "loss": 0.0815, "step": 16174 }, { "epoch": 2.27, "learning_rate": 4.621654501216545e-05, "loss": 0.055, "step": 16176 }, { "epoch": 2.27, "learning_rate": 4.6216077110237696e-05, "loss": 0.0532, "step": 16178 }, { "epoch": 2.27, "learning_rate": 4.621560920830994e-05, "loss": 0.0919, "step": 16180 }, { "epoch": 2.27, "learning_rate": 4.621514130638219e-05, "loss": 0.0571, "step": 16182 }, { "epoch": 2.27, "learning_rate": 4.621467340445443e-05, "loss": 0.0619, "step": 16184 }, { "epoch": 2.27, "learning_rate": 4.621420550252667e-05, "loss": 0.0578, "step": 16186 }, { "epoch": 2.27, "learning_rate": 4.621373760059892e-05, "loss": 0.0743, "step": 16188 }, { "epoch": 2.27, "learning_rate": 4.6213269698671165e-05, "loss": 0.0552, "step": 16190 }, { "epoch": 2.27, "learning_rate": 4.6212801796743404e-05, "loss": 0.0691, "step": 16192 }, { "epoch": 2.27, "learning_rate": 4.621233389481565e-05, "loss": 0.0478, "step": 16194 }, { "epoch": 2.27, "learning_rate": 4.6211865992887895e-05, "loss": 0.0586, "step": 16196 }, { "epoch": 2.27, "learning_rate": 4.621139809096014e-05, "loss": 0.076, "step": 16198 }, { "epoch": 2.27, "learning_rate": 4.621093018903238e-05, "loss": 0.0608, "step": 16200 }, { "epoch": 2.27, "learning_rate": 4.6210462287104626e-05, "loss": 0.0567, "step": 16202 }, { "epoch": 2.27, "learning_rate": 4.620999438517687e-05, "loss": 0.0479, "step": 16204 }, { "epoch": 2.27, "learning_rate": 4.620952648324912e-05, "loss": 0.0485, "step": 16206 }, { "epoch": 2.28, "learning_rate": 4.620905858132136e-05, "loss": 0.0612, "step": 16208 }, { "epoch": 2.28, "learning_rate": 4.62085906793936e-05, "loss": 0.0601, "step": 16210 }, { "epoch": 2.28, "learning_rate": 4.620812277746584e-05, "loss": 0.0503, "step": 16212 }, { "epoch": 2.28, "learning_rate": 4.620765487553809e-05, "loss": 0.0633, "step": 16214 }, { "epoch": 2.28, "learning_rate": 4.6207186973610334e-05, "loss": 0.0617, "step": 16216 }, { "epoch": 2.28, "learning_rate": 4.6206719071682573e-05, "loss": 0.0533, "step": 16218 }, { "epoch": 2.28, "learning_rate": 4.620625116975482e-05, "loss": 0.0569, "step": 16220 }, { "epoch": 2.28, "learning_rate": 4.6205783267827065e-05, "loss": 0.0582, "step": 16222 }, { "epoch": 2.28, "learning_rate": 4.620531536589931e-05, "loss": 0.0469, "step": 16224 }, { "epoch": 2.28, "learning_rate": 4.620484746397155e-05, "loss": 0.1163, "step": 16226 }, { "epoch": 2.28, "learning_rate": 4.6204379562043796e-05, "loss": 0.0874, "step": 16228 }, { "epoch": 2.28, "learning_rate": 4.620391166011604e-05, "loss": 0.0459, "step": 16230 }, { "epoch": 2.28, "learning_rate": 4.620344375818829e-05, "loss": 0.0586, "step": 16232 }, { "epoch": 2.28, "learning_rate": 4.620297585626053e-05, "loss": 0.0801, "step": 16234 }, { "epoch": 2.28, "learning_rate": 4.620250795433277e-05, "loss": 0.0812, "step": 16236 }, { "epoch": 2.28, "learning_rate": 4.620204005240502e-05, "loss": 0.0583, "step": 16238 }, { "epoch": 2.28, "learning_rate": 4.6201572150477265e-05, "loss": 0.0785, "step": 16240 }, { "epoch": 2.28, "learning_rate": 4.6201104248549504e-05, "loss": 0.0425, "step": 16242 }, { "epoch": 2.28, "learning_rate": 4.620063634662175e-05, "loss": 0.0945, "step": 16244 }, { "epoch": 2.28, "learning_rate": 4.620016844469399e-05, "loss": 0.0518, "step": 16246 }, { "epoch": 2.28, "learning_rate": 4.619970054276624e-05, "loss": 0.0438, "step": 16248 }, { "epoch": 2.28, "learning_rate": 4.619923264083848e-05, "loss": 0.0681, "step": 16250 }, { "epoch": 2.28, "learning_rate": 4.619876473891073e-05, "loss": 0.0466, "step": 16252 }, { "epoch": 2.28, "learning_rate": 4.6198296836982966e-05, "loss": 0.0776, "step": 16254 }, { "epoch": 2.28, "learning_rate": 4.619782893505521e-05, "loss": 0.0648, "step": 16256 }, { "epoch": 2.28, "learning_rate": 4.619736103312746e-05, "loss": 0.0503, "step": 16258 }, { "epoch": 2.28, "learning_rate": 4.6196893131199704e-05, "loss": 0.0909, "step": 16260 }, { "epoch": 2.28, "learning_rate": 4.619642522927194e-05, "loss": 0.0644, "step": 16262 }, { "epoch": 2.28, "learning_rate": 4.619595732734419e-05, "loss": 0.0481, "step": 16264 }, { "epoch": 2.28, "learning_rate": 4.6195489425416435e-05, "loss": 0.0545, "step": 16266 }, { "epoch": 2.28, "learning_rate": 4.619502152348868e-05, "loss": 0.0611, "step": 16268 }, { "epoch": 2.28, "learning_rate": 4.619455362156092e-05, "loss": 0.0729, "step": 16270 }, { "epoch": 2.28, "learning_rate": 4.6194085719633166e-05, "loss": 0.0636, "step": 16272 }, { "epoch": 2.28, "learning_rate": 4.619361781770541e-05, "loss": 0.0756, "step": 16274 }, { "epoch": 2.28, "learning_rate": 4.619314991577766e-05, "loss": 0.0516, "step": 16276 }, { "epoch": 2.28, "learning_rate": 4.6192682013849897e-05, "loss": 0.0693, "step": 16278 }, { "epoch": 2.29, "learning_rate": 4.619221411192214e-05, "loss": 0.0627, "step": 16280 }, { "epoch": 2.29, "learning_rate": 4.619174620999439e-05, "loss": 0.062, "step": 16282 }, { "epoch": 2.29, "learning_rate": 4.6191278308066634e-05, "loss": 0.0543, "step": 16284 }, { "epoch": 2.29, "learning_rate": 4.6190810406138873e-05, "loss": 0.0464, "step": 16286 }, { "epoch": 2.29, "learning_rate": 4.619034250421112e-05, "loss": 0.0653, "step": 16288 }, { "epoch": 2.29, "learning_rate": 4.618987460228336e-05, "loss": 0.0498, "step": 16290 }, { "epoch": 2.29, "learning_rate": 4.618940670035561e-05, "loss": 0.062, "step": 16292 }, { "epoch": 2.29, "learning_rate": 4.618893879842785e-05, "loss": 0.0462, "step": 16294 }, { "epoch": 2.29, "learning_rate": 4.6188470896500096e-05, "loss": 0.0664, "step": 16296 }, { "epoch": 2.29, "learning_rate": 4.6188002994572335e-05, "loss": 0.0627, "step": 16298 }, { "epoch": 2.29, "learning_rate": 4.618753509264459e-05, "loss": 0.0553, "step": 16300 }, { "epoch": 2.29, "learning_rate": 4.618706719071683e-05, "loss": 0.0562, "step": 16302 }, { "epoch": 2.29, "learning_rate": 4.618659928878907e-05, "loss": 0.0546, "step": 16304 }, { "epoch": 2.29, "learning_rate": 4.618613138686131e-05, "loss": 0.0595, "step": 16306 }, { "epoch": 2.29, "learning_rate": 4.618566348493356e-05, "loss": 0.0518, "step": 16308 }, { "epoch": 2.29, "learning_rate": 4.6185195583005804e-05, "loss": 0.0709, "step": 16310 }, { "epoch": 2.29, "learning_rate": 4.618472768107805e-05, "loss": 0.055, "step": 16312 }, { "epoch": 2.29, "learning_rate": 4.618425977915029e-05, "loss": 0.0624, "step": 16314 }, { "epoch": 2.29, "learning_rate": 4.6183791877222535e-05, "loss": 0.0775, "step": 16316 }, { "epoch": 2.29, "learning_rate": 4.618332397529478e-05, "loss": 0.0514, "step": 16318 }, { "epoch": 2.29, "learning_rate": 4.618285607336703e-05, "loss": 0.0788, "step": 16320 }, { "epoch": 2.29, "learning_rate": 4.6182388171439266e-05, "loss": 0.0521, "step": 16322 }, { "epoch": 2.29, "learning_rate": 4.618192026951151e-05, "loss": 0.0568, "step": 16324 }, { "epoch": 2.29, "learning_rate": 4.618145236758376e-05, "loss": 0.0472, "step": 16326 }, { "epoch": 2.29, "learning_rate": 4.6180984465656004e-05, "loss": 0.0519, "step": 16328 }, { "epoch": 2.29, "learning_rate": 4.618051656372824e-05, "loss": 0.0589, "step": 16330 }, { "epoch": 2.29, "learning_rate": 4.618004866180049e-05, "loss": 0.0736, "step": 16332 }, { "epoch": 2.29, "learning_rate": 4.6179580759872735e-05, "loss": 0.0629, "step": 16334 }, { "epoch": 2.29, "learning_rate": 4.617911285794498e-05, "loss": 0.0486, "step": 16336 }, { "epoch": 2.29, "learning_rate": 4.617864495601722e-05, "loss": 0.0601, "step": 16338 }, { "epoch": 2.29, "learning_rate": 4.6178177054089466e-05, "loss": 0.0581, "step": 16340 }, { "epoch": 2.29, "learning_rate": 4.6177709152161705e-05, "loss": 0.0396, "step": 16342 }, { "epoch": 2.29, "learning_rate": 4.617724125023396e-05, "loss": 0.0938, "step": 16344 }, { "epoch": 2.29, "learning_rate": 4.6176773348306197e-05, "loss": 0.0623, "step": 16346 }, { "epoch": 2.29, "learning_rate": 4.617630544637844e-05, "loss": 0.0791, "step": 16348 }, { "epoch": 2.3, "learning_rate": 4.617583754445068e-05, "loss": 0.057, "step": 16350 }, { "epoch": 2.3, "learning_rate": 4.6175369642522934e-05, "loss": 0.0463, "step": 16352 }, { "epoch": 2.3, "learning_rate": 4.6174901740595173e-05, "loss": 0.046, "step": 16354 }, { "epoch": 2.3, "learning_rate": 4.617443383866742e-05, "loss": 0.0841, "step": 16356 }, { "epoch": 2.3, "learning_rate": 4.617396593673966e-05, "loss": 0.0467, "step": 16358 }, { "epoch": 2.3, "learning_rate": 4.6173498034811904e-05, "loss": 0.0535, "step": 16360 }, { "epoch": 2.3, "learning_rate": 4.617303013288415e-05, "loss": 0.047, "step": 16362 }, { "epoch": 2.3, "learning_rate": 4.6172562230956396e-05, "loss": 0.1096, "step": 16364 }, { "epoch": 2.3, "learning_rate": 4.6172094329028635e-05, "loss": 0.068, "step": 16366 }, { "epoch": 2.3, "learning_rate": 4.617162642710088e-05, "loss": 0.0582, "step": 16368 }, { "epoch": 2.3, "learning_rate": 4.617115852517313e-05, "loss": 0.0677, "step": 16370 }, { "epoch": 2.3, "learning_rate": 4.617069062324537e-05, "loss": 0.0565, "step": 16372 }, { "epoch": 2.3, "learning_rate": 4.617022272131761e-05, "loss": 0.07, "step": 16374 }, { "epoch": 2.3, "learning_rate": 4.616975481938986e-05, "loss": 0.0644, "step": 16376 }, { "epoch": 2.3, "learning_rate": 4.6169286917462104e-05, "loss": 0.0604, "step": 16378 }, { "epoch": 2.3, "learning_rate": 4.616881901553435e-05, "loss": 0.0556, "step": 16380 }, { "epoch": 2.3, "learning_rate": 4.616835111360659e-05, "loss": 0.0456, "step": 16382 }, { "epoch": 2.3, "learning_rate": 4.6167883211678835e-05, "loss": 0.0548, "step": 16384 }, { "epoch": 2.3, "learning_rate": 4.616741530975108e-05, "loss": 0.0578, "step": 16386 }, { "epoch": 2.3, "learning_rate": 4.616694740782333e-05, "loss": 0.0583, "step": 16388 }, { "epoch": 2.3, "learning_rate": 4.6166479505895566e-05, "loss": 0.0528, "step": 16390 }, { "epoch": 2.3, "learning_rate": 4.616601160396781e-05, "loss": 0.064, "step": 16392 }, { "epoch": 2.3, "learning_rate": 4.616554370204005e-05, "loss": 0.058, "step": 16394 }, { "epoch": 2.3, "learning_rate": 4.6165075800112304e-05, "loss": 0.0615, "step": 16396 }, { "epoch": 2.3, "learning_rate": 4.616460789818454e-05, "loss": 0.0498, "step": 16398 }, { "epoch": 2.3, "learning_rate": 4.616413999625679e-05, "loss": 0.0625, "step": 16400 }, { "epoch": 2.3, "learning_rate": 4.616367209432903e-05, "loss": 0.0654, "step": 16402 }, { "epoch": 2.3, "learning_rate": 4.6163204192401274e-05, "loss": 0.0408, "step": 16404 }, { "epoch": 2.3, "learning_rate": 4.616273629047352e-05, "loss": 0.057, "step": 16406 }, { "epoch": 2.3, "learning_rate": 4.6162268388545766e-05, "loss": 0.0513, "step": 16408 }, { "epoch": 2.3, "learning_rate": 4.6161800486618005e-05, "loss": 0.0735, "step": 16410 }, { "epoch": 2.3, "learning_rate": 4.616133258469025e-05, "loss": 0.0664, "step": 16412 }, { "epoch": 2.3, "learning_rate": 4.6160864682762497e-05, "loss": 0.0707, "step": 16414 }, { "epoch": 2.3, "learning_rate": 4.616039678083474e-05, "loss": 0.0516, "step": 16416 }, { "epoch": 2.3, "learning_rate": 4.615992887890698e-05, "loss": 0.0514, "step": 16418 }, { "epoch": 2.3, "learning_rate": 4.615946097697923e-05, "loss": 0.0582, "step": 16420 }, { "epoch": 2.31, "learning_rate": 4.6158993075051473e-05, "loss": 0.0472, "step": 16422 }, { "epoch": 2.31, "learning_rate": 4.615852517312372e-05, "loss": 0.0581, "step": 16424 }, { "epoch": 2.31, "learning_rate": 4.615805727119596e-05, "loss": 0.0539, "step": 16426 }, { "epoch": 2.31, "learning_rate": 4.6157589369268204e-05, "loss": 0.0751, "step": 16428 }, { "epoch": 2.31, "learning_rate": 4.615712146734045e-05, "loss": 0.0493, "step": 16430 }, { "epoch": 2.31, "learning_rate": 4.6156653565412696e-05, "loss": 0.0724, "step": 16432 }, { "epoch": 2.31, "learning_rate": 4.6156185663484935e-05, "loss": 0.0493, "step": 16434 }, { "epoch": 2.31, "learning_rate": 4.615571776155718e-05, "loss": 0.0519, "step": 16436 }, { "epoch": 2.31, "learning_rate": 4.615524985962942e-05, "loss": 0.0593, "step": 16438 }, { "epoch": 2.31, "learning_rate": 4.615478195770167e-05, "loss": 0.048, "step": 16440 }, { "epoch": 2.31, "learning_rate": 4.615431405577391e-05, "loss": 0.0508, "step": 16442 }, { "epoch": 2.31, "learning_rate": 4.615384615384616e-05, "loss": 0.046, "step": 16444 }, { "epoch": 2.31, "learning_rate": 4.61533782519184e-05, "loss": 0.0685, "step": 16446 }, { "epoch": 2.31, "learning_rate": 4.615291034999065e-05, "loss": 0.0537, "step": 16448 }, { "epoch": 2.31, "learning_rate": 4.615244244806289e-05, "loss": 0.0448, "step": 16450 }, { "epoch": 2.31, "learning_rate": 4.6151974546135135e-05, "loss": 0.0671, "step": 16452 }, { "epoch": 2.31, "learning_rate": 4.6151506644207374e-05, "loss": 0.0448, "step": 16454 }, { "epoch": 2.31, "learning_rate": 4.615103874227962e-05, "loss": 0.0539, "step": 16456 }, { "epoch": 2.31, "learning_rate": 4.6150570840351866e-05, "loss": 0.0512, "step": 16458 }, { "epoch": 2.31, "learning_rate": 4.615010293842411e-05, "loss": 0.0685, "step": 16460 }, { "epoch": 2.31, "learning_rate": 4.614963503649635e-05, "loss": 0.0577, "step": 16462 }, { "epoch": 2.31, "learning_rate": 4.61491671345686e-05, "loss": 0.0531, "step": 16464 }, { "epoch": 2.31, "learning_rate": 4.614869923264084e-05, "loss": 0.0841, "step": 16466 }, { "epoch": 2.31, "learning_rate": 4.614823133071308e-05, "loss": 0.0471, "step": 16468 }, { "epoch": 2.31, "learning_rate": 4.614776342878533e-05, "loss": 0.0877, "step": 16470 }, { "epoch": 2.31, "learning_rate": 4.614729552685757e-05, "loss": 0.07, "step": 16472 }, { "epoch": 2.31, "learning_rate": 4.614682762492982e-05, "loss": 0.0508, "step": 16474 }, { "epoch": 2.31, "learning_rate": 4.614635972300206e-05, "loss": 0.0608, "step": 16476 }, { "epoch": 2.31, "learning_rate": 4.6145891821074305e-05, "loss": 0.0594, "step": 16478 }, { "epoch": 2.31, "learning_rate": 4.6145423919146544e-05, "loss": 0.0865, "step": 16480 }, { "epoch": 2.31, "learning_rate": 4.6144956017218797e-05, "loss": 0.0721, "step": 16482 }, { "epoch": 2.31, "learning_rate": 4.6144488115291036e-05, "loss": 0.0509, "step": 16484 }, { "epoch": 2.31, "learning_rate": 4.614402021336328e-05, "loss": 0.0556, "step": 16486 }, { "epoch": 2.31, "learning_rate": 4.614355231143552e-05, "loss": 0.0462, "step": 16488 }, { "epoch": 2.31, "learning_rate": 4.614308440950777e-05, "loss": 0.0593, "step": 16490 }, { "epoch": 2.31, "learning_rate": 4.614261650758001e-05, "loss": 0.0748, "step": 16492 }, { "epoch": 2.32, "learning_rate": 4.614214860565226e-05, "loss": 0.0551, "step": 16494 }, { "epoch": 2.32, "learning_rate": 4.61416807037245e-05, "loss": 0.0499, "step": 16496 }, { "epoch": 2.32, "learning_rate": 4.6141212801796744e-05, "loss": 0.0581, "step": 16498 }, { "epoch": 2.32, "learning_rate": 4.614074489986899e-05, "loss": 0.0746, "step": 16500 }, { "epoch": 2.32, "learning_rate": 4.6140276997941235e-05, "loss": 0.0484, "step": 16502 }, { "epoch": 2.32, "learning_rate": 4.6139809096013475e-05, "loss": 0.0661, "step": 16504 }, { "epoch": 2.32, "learning_rate": 4.613934119408572e-05, "loss": 0.0562, "step": 16506 }, { "epoch": 2.32, "learning_rate": 4.6138873292157966e-05, "loss": 0.0686, "step": 16508 }, { "epoch": 2.32, "learning_rate": 4.613840539023021e-05, "loss": 0.0752, "step": 16510 }, { "epoch": 2.32, "learning_rate": 4.613793748830245e-05, "loss": 0.0491, "step": 16512 }, { "epoch": 2.32, "learning_rate": 4.61374695863747e-05, "loss": 0.0613, "step": 16514 }, { "epoch": 2.32, "learning_rate": 4.613700168444694e-05, "loss": 0.0764, "step": 16516 }, { "epoch": 2.32, "learning_rate": 4.613653378251919e-05, "loss": 0.0659, "step": 16518 }, { "epoch": 2.32, "learning_rate": 4.613606588059143e-05, "loss": 0.0626, "step": 16520 }, { "epoch": 2.32, "learning_rate": 4.6135597978663674e-05, "loss": 0.0582, "step": 16522 }, { "epoch": 2.32, "learning_rate": 4.613513007673591e-05, "loss": 0.0557, "step": 16524 }, { "epoch": 2.32, "learning_rate": 4.6134662174808166e-05, "loss": 0.0657, "step": 16526 }, { "epoch": 2.32, "learning_rate": 4.6134194272880405e-05, "loss": 0.0618, "step": 16528 }, { "epoch": 2.32, "learning_rate": 4.613372637095265e-05, "loss": 0.072, "step": 16530 }, { "epoch": 2.32, "learning_rate": 4.613325846902489e-05, "loss": 0.062, "step": 16532 }, { "epoch": 2.32, "learning_rate": 4.6132790567097136e-05, "loss": 0.0533, "step": 16534 }, { "epoch": 2.32, "learning_rate": 4.613232266516938e-05, "loss": 0.0631, "step": 16536 }, { "epoch": 2.32, "learning_rate": 4.613185476324163e-05, "loss": 0.0842, "step": 16538 }, { "epoch": 2.32, "learning_rate": 4.613138686131387e-05, "loss": 0.0708, "step": 16540 }, { "epoch": 2.32, "learning_rate": 4.613091895938611e-05, "loss": 0.0484, "step": 16542 }, { "epoch": 2.32, "learning_rate": 4.613045105745836e-05, "loss": 0.0605, "step": 16544 }, { "epoch": 2.32, "learning_rate": 4.6129983155530605e-05, "loss": 0.0594, "step": 16546 }, { "epoch": 2.32, "learning_rate": 4.6129515253602844e-05, "loss": 0.0636, "step": 16548 }, { "epoch": 2.32, "learning_rate": 4.612904735167509e-05, "loss": 0.0584, "step": 16550 }, { "epoch": 2.32, "learning_rate": 4.6128579449747336e-05, "loss": 0.0532, "step": 16552 }, { "epoch": 2.32, "learning_rate": 4.612811154781958e-05, "loss": 0.073, "step": 16554 }, { "epoch": 2.32, "learning_rate": 4.612764364589182e-05, "loss": 0.0531, "step": 16556 }, { "epoch": 2.32, "learning_rate": 4.612717574396407e-05, "loss": 0.0558, "step": 16558 }, { "epoch": 2.32, "learning_rate": 4.612670784203631e-05, "loss": 0.0779, "step": 16560 }, { "epoch": 2.32, "learning_rate": 4.612623994010856e-05, "loss": 0.0613, "step": 16562 }, { "epoch": 2.33, "learning_rate": 4.61257720381808e-05, "loss": 0.0509, "step": 16564 }, { "epoch": 2.33, "learning_rate": 4.6125304136253044e-05, "loss": 0.0541, "step": 16566 }, { "epoch": 2.33, "learning_rate": 4.612483623432528e-05, "loss": 0.0821, "step": 16568 }, { "epoch": 2.33, "learning_rate": 4.6124368332397535e-05, "loss": 0.0678, "step": 16570 }, { "epoch": 2.33, "learning_rate": 4.6123900430469775e-05, "loss": 0.0801, "step": 16572 }, { "epoch": 2.33, "learning_rate": 4.612343252854202e-05, "loss": 0.0473, "step": 16574 }, { "epoch": 2.33, "learning_rate": 4.612296462661426e-05, "loss": 0.0618, "step": 16576 }, { "epoch": 2.33, "learning_rate": 4.612249672468651e-05, "loss": 0.0674, "step": 16578 }, { "epoch": 2.33, "learning_rate": 4.612202882275875e-05, "loss": 0.0641, "step": 16580 }, { "epoch": 2.33, "learning_rate": 4.6121560920831e-05, "loss": 0.0386, "step": 16582 }, { "epoch": 2.33, "learning_rate": 4.6121093018903236e-05, "loss": 0.0587, "step": 16584 }, { "epoch": 2.33, "learning_rate": 4.612062511697548e-05, "loss": 0.0815, "step": 16586 }, { "epoch": 2.33, "learning_rate": 4.612015721504773e-05, "loss": 0.0677, "step": 16588 }, { "epoch": 2.33, "learning_rate": 4.6119689313119974e-05, "loss": 0.0587, "step": 16590 }, { "epoch": 2.33, "learning_rate": 4.611922141119221e-05, "loss": 0.0756, "step": 16592 }, { "epoch": 2.33, "learning_rate": 4.611875350926446e-05, "loss": 0.052, "step": 16594 }, { "epoch": 2.33, "learning_rate": 4.6118285607336705e-05, "loss": 0.0793, "step": 16596 }, { "epoch": 2.33, "learning_rate": 4.611781770540895e-05, "loss": 0.0521, "step": 16598 }, { "epoch": 2.33, "learning_rate": 4.611734980348119e-05, "loss": 0.0568, "step": 16600 }, { "epoch": 2.33, "learning_rate": 4.6116881901553436e-05, "loss": 0.0561, "step": 16602 }, { "epoch": 2.33, "learning_rate": 4.611641399962568e-05, "loss": 0.0714, "step": 16604 }, { "epoch": 2.33, "learning_rate": 4.611594609769793e-05, "loss": 0.0629, "step": 16606 }, { "epoch": 2.33, "learning_rate": 4.611547819577017e-05, "loss": 0.0693, "step": 16608 }, { "epoch": 2.33, "learning_rate": 4.611501029384241e-05, "loss": 0.0908, "step": 16610 }, { "epoch": 2.33, "learning_rate": 4.611454239191466e-05, "loss": 0.0742, "step": 16612 }, { "epoch": 2.33, "learning_rate": 4.6114074489986905e-05, "loss": 0.0683, "step": 16614 }, { "epoch": 2.33, "learning_rate": 4.6113606588059144e-05, "loss": 0.0545, "step": 16616 }, { "epoch": 2.33, "learning_rate": 4.611313868613139e-05, "loss": 0.0542, "step": 16618 }, { "epoch": 2.33, "learning_rate": 4.611267078420363e-05, "loss": 0.074, "step": 16620 }, { "epoch": 2.33, "learning_rate": 4.611220288227588e-05, "loss": 0.051, "step": 16622 }, { "epoch": 2.33, "learning_rate": 4.611173498034812e-05, "loss": 0.0808, "step": 16624 }, { "epoch": 2.33, "learning_rate": 4.611126707842037e-05, "loss": 0.0537, "step": 16626 }, { "epoch": 2.33, "learning_rate": 4.6110799176492606e-05, "loss": 0.0457, "step": 16628 }, { "epoch": 2.33, "learning_rate": 4.611033127456486e-05, "loss": 0.087, "step": 16630 }, { "epoch": 2.33, "learning_rate": 4.61098633726371e-05, "loss": 0.0687, "step": 16632 }, { "epoch": 2.33, "learning_rate": 4.6109395470709344e-05, "loss": 0.056, "step": 16634 }, { "epoch": 2.34, "learning_rate": 4.610892756878158e-05, "loss": 0.0518, "step": 16636 }, { "epoch": 2.34, "learning_rate": 4.610845966685383e-05, "loss": 0.0531, "step": 16638 }, { "epoch": 2.34, "learning_rate": 4.6107991764926075e-05, "loss": 0.0789, "step": 16640 }, { "epoch": 2.34, "learning_rate": 4.610752386299832e-05, "loss": 0.0628, "step": 16642 }, { "epoch": 2.34, "learning_rate": 4.610705596107056e-05, "loss": 0.0588, "step": 16644 }, { "epoch": 2.34, "learning_rate": 4.6106588059142806e-05, "loss": 0.0638, "step": 16646 }, { "epoch": 2.34, "learning_rate": 4.610612015721505e-05, "loss": 0.061, "step": 16648 }, { "epoch": 2.34, "learning_rate": 4.61056522552873e-05, "loss": 0.0556, "step": 16650 }, { "epoch": 2.34, "learning_rate": 4.6105184353359536e-05, "loss": 0.0462, "step": 16652 }, { "epoch": 2.34, "learning_rate": 4.610471645143178e-05, "loss": 0.0714, "step": 16654 }, { "epoch": 2.34, "learning_rate": 4.610424854950403e-05, "loss": 0.0501, "step": 16656 }, { "epoch": 2.34, "learning_rate": 4.6103780647576274e-05, "loss": 0.0516, "step": 16658 }, { "epoch": 2.34, "learning_rate": 4.610331274564851e-05, "loss": 0.0599, "step": 16660 }, { "epoch": 2.34, "learning_rate": 4.610284484372076e-05, "loss": 0.0821, "step": 16662 }, { "epoch": 2.34, "learning_rate": 4.6102376941793005e-05, "loss": 0.0811, "step": 16664 }, { "epoch": 2.34, "learning_rate": 4.610190903986525e-05, "loss": 0.0708, "step": 16666 }, { "epoch": 2.34, "learning_rate": 4.610144113793749e-05, "loss": 0.0741, "step": 16668 }, { "epoch": 2.34, "learning_rate": 4.6100973236009736e-05, "loss": 0.0713, "step": 16670 }, { "epoch": 2.34, "learning_rate": 4.6100505334081975e-05, "loss": 0.0618, "step": 16672 }, { "epoch": 2.34, "learning_rate": 4.610003743215423e-05, "loss": 0.0671, "step": 16674 }, { "epoch": 2.34, "learning_rate": 4.609956953022647e-05, "loss": 0.0601, "step": 16676 }, { "epoch": 2.34, "learning_rate": 4.609910162829871e-05, "loss": 0.0528, "step": 16678 }, { "epoch": 2.34, "learning_rate": 4.609863372637095e-05, "loss": 0.0526, "step": 16680 }, { "epoch": 2.34, "learning_rate": 4.60981658244432e-05, "loss": 0.0576, "step": 16682 }, { "epoch": 2.34, "learning_rate": 4.6097697922515444e-05, "loss": 0.0426, "step": 16684 }, { "epoch": 2.34, "learning_rate": 4.609723002058769e-05, "loss": 0.0726, "step": 16686 }, { "epoch": 2.34, "learning_rate": 4.609676211865993e-05, "loss": 0.0705, "step": 16688 }, { "epoch": 2.34, "learning_rate": 4.6096294216732175e-05, "loss": 0.0621, "step": 16690 }, { "epoch": 2.34, "learning_rate": 4.609582631480442e-05, "loss": 0.0509, "step": 16692 }, { "epoch": 2.34, "learning_rate": 4.609535841287667e-05, "loss": 0.0782, "step": 16694 }, { "epoch": 2.34, "learning_rate": 4.6094890510948906e-05, "loss": 0.0638, "step": 16696 }, { "epoch": 2.34, "learning_rate": 4.609442260902115e-05, "loss": 0.0521, "step": 16698 }, { "epoch": 2.34, "learning_rate": 4.60939547070934e-05, "loss": 0.0451, "step": 16700 }, { "epoch": 2.34, "learning_rate": 4.6093486805165644e-05, "loss": 0.0591, "step": 16702 }, { "epoch": 2.34, "learning_rate": 4.609301890323788e-05, "loss": 0.0667, "step": 16704 }, { "epoch": 2.35, "learning_rate": 4.609255100131013e-05, "loss": 0.0561, "step": 16706 }, { "epoch": 2.35, "learning_rate": 4.6092083099382375e-05, "loss": 0.0688, "step": 16708 }, { "epoch": 2.35, "learning_rate": 4.609161519745462e-05, "loss": 0.0598, "step": 16710 }, { "epoch": 2.35, "learning_rate": 4.609114729552686e-05, "loss": 0.0753, "step": 16712 }, { "epoch": 2.35, "learning_rate": 4.6090679393599106e-05, "loss": 0.0469, "step": 16714 }, { "epoch": 2.35, "learning_rate": 4.6090211491671345e-05, "loss": 0.0704, "step": 16716 }, { "epoch": 2.35, "learning_rate": 4.608974358974359e-05, "loss": 0.077, "step": 16718 }, { "epoch": 2.35, "learning_rate": 4.6089275687815836e-05, "loss": 0.0525, "step": 16720 }, { "epoch": 2.35, "learning_rate": 4.6088807785888076e-05, "loss": 0.0562, "step": 16722 }, { "epoch": 2.35, "learning_rate": 4.608833988396032e-05, "loss": 0.052, "step": 16724 }, { "epoch": 2.35, "learning_rate": 4.608787198203257e-05, "loss": 0.0595, "step": 16726 }, { "epoch": 2.35, "learning_rate": 4.608740408010481e-05, "loss": 0.0414, "step": 16728 }, { "epoch": 2.35, "learning_rate": 4.608693617817705e-05, "loss": 0.0572, "step": 16730 }, { "epoch": 2.35, "learning_rate": 4.60864682762493e-05, "loss": 0.0627, "step": 16732 }, { "epoch": 2.35, "learning_rate": 4.6086000374321544e-05, "loss": 0.0548, "step": 16734 }, { "epoch": 2.35, "learning_rate": 4.608553247239379e-05, "loss": 0.0753, "step": 16736 }, { "epoch": 2.35, "learning_rate": 4.608506457046603e-05, "loss": 0.0451, "step": 16738 }, { "epoch": 2.35, "learning_rate": 4.6084596668538275e-05, "loss": 0.0617, "step": 16740 }, { "epoch": 2.35, "learning_rate": 4.608412876661052e-05, "loss": 0.053, "step": 16742 }, { "epoch": 2.35, "learning_rate": 4.608366086468277e-05, "loss": 0.0546, "step": 16744 }, { "epoch": 2.35, "learning_rate": 4.6083192962755006e-05, "loss": 0.0679, "step": 16746 }, { "epoch": 2.35, "learning_rate": 4.608272506082725e-05, "loss": 0.0787, "step": 16748 }, { "epoch": 2.35, "learning_rate": 4.608225715889949e-05, "loss": 0.0687, "step": 16750 }, { "epoch": 2.35, "learning_rate": 4.6081789256971744e-05, "loss": 0.0536, "step": 16752 }, { "epoch": 2.35, "learning_rate": 4.608132135504398e-05, "loss": 0.0475, "step": 16754 }, { "epoch": 2.35, "learning_rate": 4.608085345311623e-05, "loss": 0.0521, "step": 16756 }, { "epoch": 2.35, "learning_rate": 4.608038555118847e-05, "loss": 0.0566, "step": 16758 }, { "epoch": 2.35, "learning_rate": 4.607991764926072e-05, "loss": 0.0481, "step": 16760 }, { "epoch": 2.35, "learning_rate": 4.607944974733296e-05, "loss": 0.0376, "step": 16762 }, { "epoch": 2.35, "learning_rate": 4.6078981845405206e-05, "loss": 0.0459, "step": 16764 }, { "epoch": 2.35, "learning_rate": 4.6078513943477445e-05, "loss": 0.0415, "step": 16766 }, { "epoch": 2.35, "learning_rate": 4.607804604154969e-05, "loss": 0.0703, "step": 16768 }, { "epoch": 2.35, "learning_rate": 4.607757813962194e-05, "loss": 0.0706, "step": 16770 }, { "epoch": 2.35, "learning_rate": 4.607711023769418e-05, "loss": 0.064, "step": 16772 }, { "epoch": 2.35, "learning_rate": 4.607664233576642e-05, "loss": 0.0482, "step": 16774 }, { "epoch": 2.35, "learning_rate": 4.607617443383867e-05, "loss": 0.0616, "step": 16776 }, { "epoch": 2.36, "learning_rate": 4.6075706531910914e-05, "loss": 0.0534, "step": 16778 }, { "epoch": 2.36, "learning_rate": 4.607523862998316e-05, "loss": 0.0702, "step": 16780 }, { "epoch": 2.36, "learning_rate": 4.60747707280554e-05, "loss": 0.0701, "step": 16782 }, { "epoch": 2.36, "learning_rate": 4.6074302826127645e-05, "loss": 0.0727, "step": 16784 }, { "epoch": 2.36, "learning_rate": 4.607383492419989e-05, "loss": 0.0622, "step": 16786 }, { "epoch": 2.36, "learning_rate": 4.6073367022272137e-05, "loss": 0.0635, "step": 16788 }, { "epoch": 2.36, "learning_rate": 4.6072899120344376e-05, "loss": 0.0579, "step": 16790 }, { "epoch": 2.36, "learning_rate": 4.607243121841662e-05, "loss": 0.0479, "step": 16792 }, { "epoch": 2.36, "learning_rate": 4.607196331648887e-05, "loss": 0.0494, "step": 16794 }, { "epoch": 2.36, "learning_rate": 4.607149541456111e-05, "loss": 0.0831, "step": 16796 }, { "epoch": 2.36, "learning_rate": 4.607102751263335e-05, "loss": 0.0533, "step": 16798 }, { "epoch": 2.36, "learning_rate": 4.60705596107056e-05, "loss": 0.0537, "step": 16800 }, { "epoch": 2.36, "learning_rate": 4.607009170877784e-05, "loss": 0.0819, "step": 16802 }, { "epoch": 2.36, "learning_rate": 4.606962380685009e-05, "loss": 0.0561, "step": 16804 }, { "epoch": 2.36, "learning_rate": 4.606915590492233e-05, "loss": 0.06, "step": 16806 }, { "epoch": 2.36, "learning_rate": 4.6068688002994575e-05, "loss": 0.0597, "step": 16808 }, { "epoch": 2.36, "learning_rate": 4.6068220101066814e-05, "loss": 0.0701, "step": 16810 }, { "epoch": 2.36, "learning_rate": 4.606775219913906e-05, "loss": 0.0679, "step": 16812 }, { "epoch": 2.36, "learning_rate": 4.6067284297211306e-05, "loss": 0.0591, "step": 16814 }, { "epoch": 2.36, "learning_rate": 4.606681639528355e-05, "loss": 0.0587, "step": 16816 }, { "epoch": 2.36, "learning_rate": 4.606634849335579e-05, "loss": 0.0487, "step": 16818 }, { "epoch": 2.36, "learning_rate": 4.606588059142804e-05, "loss": 0.0621, "step": 16820 }, { "epoch": 2.36, "learning_rate": 4.606541268950028e-05, "loss": 0.0735, "step": 16822 }, { "epoch": 2.36, "learning_rate": 4.606494478757253e-05, "loss": 0.0654, "step": 16824 }, { "epoch": 2.36, "learning_rate": 4.606447688564477e-05, "loss": 0.0682, "step": 16826 }, { "epoch": 2.36, "learning_rate": 4.6064008983717014e-05, "loss": 0.0346, "step": 16828 }, { "epoch": 2.36, "learning_rate": 4.606354108178926e-05, "loss": 0.0667, "step": 16830 }, { "epoch": 2.36, "learning_rate": 4.6063073179861506e-05, "loss": 0.0453, "step": 16832 }, { "epoch": 2.36, "learning_rate": 4.6062605277933745e-05, "loss": 0.0623, "step": 16834 }, { "epoch": 2.36, "learning_rate": 4.606213737600599e-05, "loss": 0.0471, "step": 16836 }, { "epoch": 2.36, "learning_rate": 4.606166947407824e-05, "loss": 0.0709, "step": 16838 }, { "epoch": 2.36, "learning_rate": 4.606120157215048e-05, "loss": 0.0435, "step": 16840 }, { "epoch": 2.36, "learning_rate": 4.606073367022272e-05, "loss": 0.0719, "step": 16842 }, { "epoch": 2.36, "learning_rate": 4.606026576829497e-05, "loss": 0.0587, "step": 16844 }, { "epoch": 2.36, "learning_rate": 4.605979786636721e-05, "loss": 0.0595, "step": 16846 }, { "epoch": 2.36, "learning_rate": 4.605932996443946e-05, "loss": 0.0608, "step": 16848 }, { "epoch": 2.37, "learning_rate": 4.60588620625117e-05, "loss": 0.0538, "step": 16850 }, { "epoch": 2.37, "learning_rate": 4.6058394160583945e-05, "loss": 0.0636, "step": 16852 }, { "epoch": 2.37, "learning_rate": 4.6057926258656184e-05, "loss": 0.0599, "step": 16854 }, { "epoch": 2.37, "learning_rate": 4.6057458356728437e-05, "loss": 0.0517, "step": 16856 }, { "epoch": 2.37, "learning_rate": 4.6056990454800676e-05, "loss": 0.0577, "step": 16858 }, { "epoch": 2.37, "learning_rate": 4.605652255287292e-05, "loss": 0.0617, "step": 16860 }, { "epoch": 2.37, "learning_rate": 4.605605465094516e-05, "loss": 0.0459, "step": 16862 }, { "epoch": 2.37, "learning_rate": 4.605558674901741e-05, "loss": 0.0593, "step": 16864 }, { "epoch": 2.37, "learning_rate": 4.605511884708965e-05, "loss": 0.0528, "step": 16866 }, { "epoch": 2.37, "learning_rate": 4.60546509451619e-05, "loss": 0.069, "step": 16868 }, { "epoch": 2.37, "learning_rate": 4.605418304323414e-05, "loss": 0.0485, "step": 16870 }, { "epoch": 2.37, "learning_rate": 4.6053715141306383e-05, "loss": 0.0633, "step": 16872 }, { "epoch": 2.37, "learning_rate": 4.605324723937863e-05, "loss": 0.0693, "step": 16874 }, { "epoch": 2.37, "learning_rate": 4.6052779337450875e-05, "loss": 0.0531, "step": 16876 }, { "epoch": 2.37, "learning_rate": 4.6052311435523114e-05, "loss": 0.0652, "step": 16878 }, { "epoch": 2.37, "learning_rate": 4.605184353359536e-05, "loss": 0.0575, "step": 16880 }, { "epoch": 2.37, "learning_rate": 4.6051375631667606e-05, "loss": 0.0489, "step": 16882 }, { "epoch": 2.37, "learning_rate": 4.605090772973985e-05, "loss": 0.0633, "step": 16884 }, { "epoch": 2.37, "learning_rate": 4.605043982781209e-05, "loss": 0.0673, "step": 16886 }, { "epoch": 2.37, "learning_rate": 4.604997192588434e-05, "loss": 0.0693, "step": 16888 }, { "epoch": 2.37, "learning_rate": 4.604950402395658e-05, "loss": 0.0557, "step": 16890 }, { "epoch": 2.37, "learning_rate": 4.604903612202883e-05, "loss": 0.07, "step": 16892 }, { "epoch": 2.37, "learning_rate": 4.604856822010107e-05, "loss": 0.0546, "step": 16894 }, { "epoch": 2.37, "learning_rate": 4.6048100318173314e-05, "loss": 0.0549, "step": 16896 }, { "epoch": 2.37, "learning_rate": 4.604763241624555e-05, "loss": 0.0663, "step": 16898 }, { "epoch": 2.37, "learning_rate": 4.6047164514317806e-05, "loss": 0.0624, "step": 16900 }, { "epoch": 2.37, "learning_rate": 4.6046696612390045e-05, "loss": 0.043, "step": 16902 }, { "epoch": 2.37, "learning_rate": 4.604622871046229e-05, "loss": 0.0709, "step": 16904 }, { "epoch": 2.37, "learning_rate": 4.604576080853453e-05, "loss": 0.0474, "step": 16906 }, { "epoch": 2.37, "learning_rate": 4.604529290660678e-05, "loss": 0.0657, "step": 16908 }, { "epoch": 2.37, "learning_rate": 4.604482500467902e-05, "loss": 0.0666, "step": 16910 }, { "epoch": 2.37, "learning_rate": 4.604435710275127e-05, "loss": 0.0794, "step": 16912 }, { "epoch": 2.37, "learning_rate": 4.604388920082351e-05, "loss": 0.0522, "step": 16914 }, { "epoch": 2.37, "learning_rate": 4.604342129889575e-05, "loss": 0.0546, "step": 16916 }, { "epoch": 2.37, "learning_rate": 4.6042953396968e-05, "loss": 0.0876, "step": 16918 }, { "epoch": 2.38, "learning_rate": 4.6042485495040245e-05, "loss": 0.0542, "step": 16920 }, { "epoch": 2.38, "learning_rate": 4.6042017593112484e-05, "loss": 0.0445, "step": 16922 }, { "epoch": 2.38, "learning_rate": 4.604154969118473e-05, "loss": 0.0845, "step": 16924 }, { "epoch": 2.38, "learning_rate": 4.6041081789256976e-05, "loss": 0.0591, "step": 16926 }, { "epoch": 2.38, "learning_rate": 4.604061388732922e-05, "loss": 0.0515, "step": 16928 }, { "epoch": 2.38, "learning_rate": 4.604014598540146e-05, "loss": 0.0716, "step": 16930 }, { "epoch": 2.38, "learning_rate": 4.603967808347371e-05, "loss": 0.045, "step": 16932 }, { "epoch": 2.38, "learning_rate": 4.603921018154595e-05, "loss": 0.0721, "step": 16934 }, { "epoch": 2.38, "learning_rate": 4.60387422796182e-05, "loss": 0.0594, "step": 16936 }, { "epoch": 2.38, "learning_rate": 4.603827437769044e-05, "loss": 0.0614, "step": 16938 }, { "epoch": 2.38, "learning_rate": 4.6037806475762684e-05, "loss": 0.0675, "step": 16940 }, { "epoch": 2.38, "learning_rate": 4.603733857383493e-05, "loss": 0.0688, "step": 16942 }, { "epoch": 2.38, "learning_rate": 4.6036870671907175e-05, "loss": 0.0542, "step": 16944 }, { "epoch": 2.38, "learning_rate": 4.6036402769979414e-05, "loss": 0.0424, "step": 16946 }, { "epoch": 2.38, "learning_rate": 4.603593486805166e-05, "loss": 0.0735, "step": 16948 }, { "epoch": 2.38, "learning_rate": 4.60354669661239e-05, "loss": 0.0488, "step": 16950 }, { "epoch": 2.38, "learning_rate": 4.603499906419615e-05, "loss": 0.0603, "step": 16952 }, { "epoch": 2.38, "learning_rate": 4.603453116226839e-05, "loss": 0.0782, "step": 16954 }, { "epoch": 2.38, "learning_rate": 4.603406326034064e-05, "loss": 0.0582, "step": 16956 }, { "epoch": 2.38, "learning_rate": 4.6033595358412876e-05, "loss": 0.064, "step": 16958 }, { "epoch": 2.38, "learning_rate": 4.603312745648512e-05, "loss": 0.0792, "step": 16960 }, { "epoch": 2.38, "learning_rate": 4.603265955455737e-05, "loss": 0.0692, "step": 16962 }, { "epoch": 2.38, "learning_rate": 4.6032191652629614e-05, "loss": 0.0634, "step": 16964 }, { "epoch": 2.38, "learning_rate": 4.603172375070185e-05, "loss": 0.0441, "step": 16966 }, { "epoch": 2.38, "learning_rate": 4.60312558487741e-05, "loss": 0.0729, "step": 16968 }, { "epoch": 2.38, "learning_rate": 4.6030787946846345e-05, "loss": 0.0599, "step": 16970 }, { "epoch": 2.38, "learning_rate": 4.6030320044918584e-05, "loss": 0.0608, "step": 16972 }, { "epoch": 2.38, "learning_rate": 4.602985214299083e-05, "loss": 0.0811, "step": 16974 }, { "epoch": 2.38, "learning_rate": 4.6029384241063076e-05, "loss": 0.0585, "step": 16976 }, { "epoch": 2.38, "learning_rate": 4.602891633913532e-05, "loss": 0.0783, "step": 16978 }, { "epoch": 2.38, "learning_rate": 4.602844843720756e-05, "loss": 0.066, "step": 16980 }, { "epoch": 2.38, "learning_rate": 4.602798053527981e-05, "loss": 0.0548, "step": 16982 }, { "epoch": 2.38, "learning_rate": 4.6027512633352046e-05, "loss": 0.0649, "step": 16984 }, { "epoch": 2.38, "learning_rate": 4.60270447314243e-05, "loss": 0.0574, "step": 16986 }, { "epoch": 2.38, "learning_rate": 4.602657682949654e-05, "loss": 0.0691, "step": 16988 }, { "epoch": 2.38, "learning_rate": 4.6026108927568784e-05, "loss": 0.0485, "step": 16990 }, { "epoch": 2.39, "learning_rate": 4.602564102564102e-05, "loss": 0.0437, "step": 16992 }, { "epoch": 2.39, "learning_rate": 4.602517312371327e-05, "loss": 0.0648, "step": 16994 }, { "epoch": 2.39, "learning_rate": 4.6024705221785515e-05, "loss": 0.062, "step": 16996 }, { "epoch": 2.39, "learning_rate": 4.602423731985776e-05, "loss": 0.0586, "step": 16998 }, { "epoch": 2.39, "learning_rate": 4.602376941793e-05, "loss": 0.0713, "step": 17000 }, { "epoch": 2.39, "eval_gen_len": 29.3103, "eval_loss": 1.077703595161438, "eval_meteor": 0.0453, "eval_runtime": 16.122, "eval_samples_per_second": 3.598, "eval_steps_per_second": 0.496, "step": 17000 }, { "epoch": 2.39, "learning_rate": 4.6023301516002246e-05, "loss": 0.0617, "step": 17002 }, { "epoch": 2.39, "learning_rate": 4.602283361407449e-05, "loss": 0.0574, "step": 17004 }, { "epoch": 2.39, "learning_rate": 4.602236571214674e-05, "loss": 0.0454, "step": 17006 }, { "epoch": 2.39, "learning_rate": 4.602189781021898e-05, "loss": 0.0754, "step": 17008 }, { "epoch": 2.39, "learning_rate": 4.602142990829122e-05, "loss": 0.076, "step": 17010 }, { "epoch": 2.39, "learning_rate": 4.602096200636347e-05, "loss": 0.0908, "step": 17012 }, { "epoch": 2.39, "learning_rate": 4.6020494104435714e-05, "loss": 0.0524, "step": 17014 }, { "epoch": 2.39, "learning_rate": 4.6020026202507954e-05, "loss": 0.0693, "step": 17016 }, { "epoch": 2.39, "learning_rate": 4.60195583005802e-05, "loss": 0.0546, "step": 17018 }, { "epoch": 2.39, "learning_rate": 4.6019090398652445e-05, "loss": 0.0899, "step": 17020 }, { "epoch": 2.39, "learning_rate": 4.601862249672469e-05, "loss": 0.0628, "step": 17022 }, { "epoch": 2.39, "learning_rate": 4.601815459479693e-05, "loss": 0.0701, "step": 17024 }, { "epoch": 2.39, "learning_rate": 4.6017686692869176e-05, "loss": 0.0752, "step": 17026 }, { "epoch": 2.39, "learning_rate": 4.6017218790941416e-05, "loss": 0.0449, "step": 17028 }, { "epoch": 2.39, "learning_rate": 4.601675088901367e-05, "loss": 0.066, "step": 17030 }, { "epoch": 2.39, "learning_rate": 4.601628298708591e-05, "loss": 0.0833, "step": 17032 }, { "epoch": 2.39, "learning_rate": 4.601581508515815e-05, "loss": 0.0474, "step": 17034 }, { "epoch": 2.39, "learning_rate": 4.601534718323039e-05, "loss": 0.0568, "step": 17036 }, { "epoch": 2.39, "learning_rate": 4.6014879281302645e-05, "loss": 0.0506, "step": 17038 }, { "epoch": 2.39, "learning_rate": 4.6014411379374884e-05, "loss": 0.061, "step": 17040 }, { "epoch": 2.39, "learning_rate": 4.601394347744713e-05, "loss": 0.0601, "step": 17042 }, { "epoch": 2.39, "learning_rate": 4.601347557551937e-05, "loss": 0.0619, "step": 17044 }, { "epoch": 2.39, "learning_rate": 4.6013007673591615e-05, "loss": 0.0704, "step": 17046 }, { "epoch": 2.39, "learning_rate": 4.601253977166386e-05, "loss": 0.0464, "step": 17048 }, { "epoch": 2.39, "learning_rate": 4.601207186973611e-05, "loss": 0.065, "step": 17050 }, { "epoch": 2.39, "learning_rate": 4.6011603967808346e-05, "loss": 0.0587, "step": 17052 }, { "epoch": 2.39, "learning_rate": 4.601113606588059e-05, "loss": 0.0758, "step": 17054 }, { "epoch": 2.39, "learning_rate": 4.601066816395284e-05, "loss": 0.0538, "step": 17056 }, { "epoch": 2.39, "learning_rate": 4.6010200262025084e-05, "loss": 0.0675, "step": 17058 }, { "epoch": 2.39, "learning_rate": 4.600973236009732e-05, "loss": 0.0759, "step": 17060 }, { "epoch": 2.4, "learning_rate": 4.600926445816957e-05, "loss": 0.0533, "step": 17062 }, { "epoch": 2.4, "learning_rate": 4.6008796556241815e-05, "loss": 0.0525, "step": 17064 }, { "epoch": 2.4, "learning_rate": 4.600832865431406e-05, "loss": 0.0541, "step": 17066 }, { "epoch": 2.4, "learning_rate": 4.60078607523863e-05, "loss": 0.0637, "step": 17068 }, { "epoch": 2.4, "learning_rate": 4.6007392850458546e-05, "loss": 0.0586, "step": 17070 }, { "epoch": 2.4, "learning_rate": 4.600692494853079e-05, "loss": 0.072, "step": 17072 }, { "epoch": 2.4, "learning_rate": 4.600645704660304e-05, "loss": 0.0754, "step": 17074 }, { "epoch": 2.4, "learning_rate": 4.600598914467528e-05, "loss": 0.0752, "step": 17076 }, { "epoch": 2.4, "learning_rate": 4.600552124274752e-05, "loss": 0.0705, "step": 17078 }, { "epoch": 2.4, "learning_rate": 4.600505334081976e-05, "loss": 0.0503, "step": 17080 }, { "epoch": 2.4, "learning_rate": 4.6004585438892014e-05, "loss": 0.0637, "step": 17082 }, { "epoch": 2.4, "learning_rate": 4.6004117536964254e-05, "loss": 0.0715, "step": 17084 }, { "epoch": 2.4, "learning_rate": 4.60036496350365e-05, "loss": 0.057, "step": 17086 }, { "epoch": 2.4, "learning_rate": 4.600318173310874e-05, "loss": 0.0567, "step": 17088 }, { "epoch": 2.4, "learning_rate": 4.600271383118099e-05, "loss": 0.0731, "step": 17090 }, { "epoch": 2.4, "learning_rate": 4.600224592925323e-05, "loss": 0.0605, "step": 17092 }, { "epoch": 2.4, "learning_rate": 4.6001778027325476e-05, "loss": 0.0607, "step": 17094 }, { "epoch": 2.4, "learning_rate": 4.6001310125397716e-05, "loss": 0.0753, "step": 17096 }, { "epoch": 2.4, "learning_rate": 4.600084222346996e-05, "loss": 0.0527, "step": 17098 }, { "epoch": 2.4, "learning_rate": 4.600037432154221e-05, "loss": 0.0481, "step": 17100 }, { "epoch": 2.4, "learning_rate": 4.599990641961445e-05, "loss": 0.0617, "step": 17102 }, { "epoch": 2.4, "learning_rate": 4.599943851768669e-05, "loss": 0.0761, "step": 17104 }, { "epoch": 2.4, "learning_rate": 4.599897061575894e-05, "loss": 0.0574, "step": 17106 }, { "epoch": 2.4, "learning_rate": 4.5998502713831184e-05, "loss": 0.0524, "step": 17108 }, { "epoch": 2.4, "learning_rate": 4.599803481190343e-05, "loss": 0.0573, "step": 17110 }, { "epoch": 2.4, "learning_rate": 4.599756690997567e-05, "loss": 0.0556, "step": 17112 }, { "epoch": 2.4, "learning_rate": 4.5997099008047915e-05, "loss": 0.0389, "step": 17114 }, { "epoch": 2.4, "learning_rate": 4.599663110612016e-05, "loss": 0.0475, "step": 17116 }, { "epoch": 2.4, "learning_rate": 4.599616320419241e-05, "loss": 0.0517, "step": 17118 }, { "epoch": 2.4, "learning_rate": 4.5995695302264646e-05, "loss": 0.0611, "step": 17120 }, { "epoch": 2.4, "learning_rate": 4.599522740033689e-05, "loss": 0.054, "step": 17122 }, { "epoch": 2.4, "learning_rate": 4.599475949840913e-05, "loss": 0.0561, "step": 17124 }, { "epoch": 2.4, "learning_rate": 4.5994291596481384e-05, "loss": 0.072, "step": 17126 }, { "epoch": 2.4, "learning_rate": 4.599382369455362e-05, "loss": 0.054, "step": 17128 }, { "epoch": 2.4, "learning_rate": 4.599335579262587e-05, "loss": 0.0766, "step": 17130 }, { "epoch": 2.4, "learning_rate": 4.599288789069811e-05, "loss": 0.0794, "step": 17132 }, { "epoch": 2.41, "learning_rate": 4.599241998877036e-05, "loss": 0.0664, "step": 17134 }, { "epoch": 2.41, "learning_rate": 4.59919520868426e-05, "loss": 0.067, "step": 17136 }, { "epoch": 2.41, "learning_rate": 4.5991484184914846e-05, "loss": 0.0645, "step": 17138 }, { "epoch": 2.41, "learning_rate": 4.5991016282987085e-05, "loss": 0.0591, "step": 17140 }, { "epoch": 2.41, "learning_rate": 4.599054838105933e-05, "loss": 0.0638, "step": 17142 }, { "epoch": 2.41, "learning_rate": 4.599008047913158e-05, "loss": 0.0713, "step": 17144 }, { "epoch": 2.41, "learning_rate": 4.598961257720382e-05, "loss": 0.061, "step": 17146 }, { "epoch": 2.41, "learning_rate": 4.598914467527606e-05, "loss": 0.0592, "step": 17148 }, { "epoch": 2.41, "learning_rate": 4.598867677334831e-05, "loss": 0.0666, "step": 17150 }, { "epoch": 2.41, "learning_rate": 4.5988208871420554e-05, "loss": 0.0607, "step": 17152 }, { "epoch": 2.41, "learning_rate": 4.59877409694928e-05, "loss": 0.0546, "step": 17154 }, { "epoch": 2.41, "learning_rate": 4.598727306756504e-05, "loss": 0.0609, "step": 17156 }, { "epoch": 2.41, "learning_rate": 4.5986805165637285e-05, "loss": 0.0678, "step": 17158 }, { "epoch": 2.41, "learning_rate": 4.598633726370953e-05, "loss": 0.0629, "step": 17160 }, { "epoch": 2.41, "learning_rate": 4.5985869361781776e-05, "loss": 0.0774, "step": 17162 }, { "epoch": 2.41, "learning_rate": 4.5985401459854016e-05, "loss": 0.0653, "step": 17164 }, { "epoch": 2.41, "learning_rate": 4.598493355792626e-05, "loss": 0.0508, "step": 17166 }, { "epoch": 2.41, "learning_rate": 4.598446565599851e-05, "loss": 0.0537, "step": 17168 }, { "epoch": 2.41, "learning_rate": 4.598399775407075e-05, "loss": 0.0452, "step": 17170 }, { "epoch": 2.41, "learning_rate": 4.598352985214299e-05, "loss": 0.0739, "step": 17172 }, { "epoch": 2.41, "learning_rate": 4.598306195021524e-05, "loss": 0.0569, "step": 17174 }, { "epoch": 2.41, "learning_rate": 4.598259404828748e-05, "loss": 0.0488, "step": 17176 }, { "epoch": 2.41, "learning_rate": 4.598212614635973e-05, "loss": 0.0352, "step": 17178 }, { "epoch": 2.41, "learning_rate": 4.598165824443197e-05, "loss": 0.0631, "step": 17180 }, { "epoch": 2.41, "learning_rate": 4.5981190342504215e-05, "loss": 0.0624, "step": 17182 }, { "epoch": 2.41, "learning_rate": 4.5980722440576454e-05, "loss": 0.0418, "step": 17184 }, { "epoch": 2.41, "learning_rate": 4.598025453864871e-05, "loss": 0.0597, "step": 17186 }, { "epoch": 2.41, "learning_rate": 4.5979786636720946e-05, "loss": 0.0612, "step": 17188 }, { "epoch": 2.41, "learning_rate": 4.597931873479319e-05, "loss": 0.0543, "step": 17190 }, { "epoch": 2.41, "learning_rate": 4.597885083286543e-05, "loss": 0.0512, "step": 17192 }, { "epoch": 2.41, "learning_rate": 4.597838293093768e-05, "loss": 0.0516, "step": 17194 }, { "epoch": 2.41, "learning_rate": 4.597791502900992e-05, "loss": 0.0556, "step": 17196 }, { "epoch": 2.41, "learning_rate": 4.597744712708217e-05, "loss": 0.0564, "step": 17198 }, { "epoch": 2.41, "learning_rate": 4.597697922515441e-05, "loss": 0.044, "step": 17200 }, { "epoch": 2.41, "learning_rate": 4.5976511323226654e-05, "loss": 0.078, "step": 17202 }, { "epoch": 2.41, "learning_rate": 4.59760434212989e-05, "loss": 0.0606, "step": 17204 }, { "epoch": 2.42, "learning_rate": 4.5975575519371146e-05, "loss": 0.0552, "step": 17206 }, { "epoch": 2.42, "learning_rate": 4.5975107617443385e-05, "loss": 0.0616, "step": 17208 }, { "epoch": 2.42, "learning_rate": 4.597463971551563e-05, "loss": 0.0567, "step": 17210 }, { "epoch": 2.42, "learning_rate": 4.597417181358788e-05, "loss": 0.0598, "step": 17212 }, { "epoch": 2.42, "learning_rate": 4.597370391166012e-05, "loss": 0.0723, "step": 17214 }, { "epoch": 2.42, "learning_rate": 4.597323600973236e-05, "loss": 0.0473, "step": 17216 }, { "epoch": 2.42, "learning_rate": 4.597276810780461e-05, "loss": 0.0779, "step": 17218 }, { "epoch": 2.42, "learning_rate": 4.5972300205876854e-05, "loss": 0.0689, "step": 17220 }, { "epoch": 2.42, "learning_rate": 4.59718323039491e-05, "loss": 0.0804, "step": 17222 }, { "epoch": 2.42, "learning_rate": 4.597136440202134e-05, "loss": 0.0717, "step": 17224 }, { "epoch": 2.42, "learning_rate": 4.597089650009358e-05, "loss": 0.0595, "step": 17226 }, { "epoch": 2.42, "learning_rate": 4.5970428598165824e-05, "loss": 0.055, "step": 17228 }, { "epoch": 2.42, "learning_rate": 4.596996069623807e-05, "loss": 0.0618, "step": 17230 }, { "epoch": 2.42, "learning_rate": 4.5969492794310316e-05, "loss": 0.0858, "step": 17232 }, { "epoch": 2.42, "learning_rate": 4.5969024892382555e-05, "loss": 0.0511, "step": 17234 }, { "epoch": 2.42, "learning_rate": 4.59685569904548e-05, "loss": 0.0913, "step": 17236 }, { "epoch": 2.42, "learning_rate": 4.5968089088527047e-05, "loss": 0.0728, "step": 17238 }, { "epoch": 2.42, "learning_rate": 4.596762118659929e-05, "loss": 0.0665, "step": 17240 }, { "epoch": 2.42, "learning_rate": 4.596715328467153e-05, "loss": 0.0573, "step": 17242 }, { "epoch": 2.42, "learning_rate": 4.596668538274378e-05, "loss": 0.0545, "step": 17244 }, { "epoch": 2.42, "learning_rate": 4.5966217480816023e-05, "loss": 0.0544, "step": 17246 }, { "epoch": 2.42, "learning_rate": 4.596574957888827e-05, "loss": 0.062, "step": 17248 }, { "epoch": 2.42, "learning_rate": 4.596528167696051e-05, "loss": 0.0514, "step": 17250 }, { "epoch": 2.42, "learning_rate": 4.5964813775032754e-05, "loss": 0.061, "step": 17252 }, { "epoch": 2.42, "learning_rate": 4.5964345873105e-05, "loss": 0.0646, "step": 17254 }, { "epoch": 2.42, "learning_rate": 4.5963877971177246e-05, "loss": 0.0504, "step": 17256 }, { "epoch": 2.42, "learning_rate": 4.5963410069249485e-05, "loss": 0.0726, "step": 17258 }, { "epoch": 2.42, "learning_rate": 4.596294216732173e-05, "loss": 0.054, "step": 17260 }, { "epoch": 2.42, "learning_rate": 4.596247426539397e-05, "loss": 0.0761, "step": 17262 }, { "epoch": 2.42, "learning_rate": 4.596200636346622e-05, "loss": 0.0594, "step": 17264 }, { "epoch": 2.42, "learning_rate": 4.596153846153846e-05, "loss": 0.0682, "step": 17266 }, { "epoch": 2.42, "learning_rate": 4.596107055961071e-05, "loss": 0.0634, "step": 17268 }, { "epoch": 2.42, "learning_rate": 4.596060265768295e-05, "loss": 0.0604, "step": 17270 }, { "epoch": 2.42, "learning_rate": 4.596013475575519e-05, "loss": 0.0801, "step": 17272 }, { "epoch": 2.42, "learning_rate": 4.595966685382744e-05, "loss": 0.0758, "step": 17274 }, { "epoch": 2.43, "learning_rate": 4.5959198951899685e-05, "loss": 0.0713, "step": 17276 }, { "epoch": 2.43, "learning_rate": 4.5958731049971924e-05, "loss": 0.0481, "step": 17278 }, { "epoch": 2.43, "learning_rate": 4.595826314804417e-05, "loss": 0.0451, "step": 17280 }, { "epoch": 2.43, "learning_rate": 4.5957795246116416e-05, "loss": 0.0611, "step": 17282 }, { "epoch": 2.43, "learning_rate": 4.595732734418866e-05, "loss": 0.0627, "step": 17284 }, { "epoch": 2.43, "learning_rate": 4.59568594422609e-05, "loss": 0.0542, "step": 17286 }, { "epoch": 2.43, "learning_rate": 4.595639154033315e-05, "loss": 0.054, "step": 17288 }, { "epoch": 2.43, "learning_rate": 4.595592363840539e-05, "loss": 0.0746, "step": 17290 }, { "epoch": 2.43, "learning_rate": 4.595545573647764e-05, "loss": 0.0646, "step": 17292 }, { "epoch": 2.43, "learning_rate": 4.595498783454988e-05, "loss": 0.0576, "step": 17294 }, { "epoch": 2.43, "learning_rate": 4.5954519932622124e-05, "loss": 0.0434, "step": 17296 }, { "epoch": 2.43, "learning_rate": 4.595405203069437e-05, "loss": 0.0592, "step": 17298 }, { "epoch": 2.43, "learning_rate": 4.5953584128766616e-05, "loss": 0.0621, "step": 17300 }, { "epoch": 2.43, "learning_rate": 4.5953116226838855e-05, "loss": 0.0599, "step": 17302 }, { "epoch": 2.43, "learning_rate": 4.59526483249111e-05, "loss": 0.0931, "step": 17304 }, { "epoch": 2.43, "learning_rate": 4.595218042298334e-05, "loss": 0.0755, "step": 17306 }, { "epoch": 2.43, "learning_rate": 4.595171252105559e-05, "loss": 0.0653, "step": 17308 }, { "epoch": 2.43, "learning_rate": 4.595124461912783e-05, "loss": 0.0642, "step": 17310 }, { "epoch": 2.43, "learning_rate": 4.595077671720008e-05, "loss": 0.0703, "step": 17312 }, { "epoch": 2.43, "learning_rate": 4.595030881527232e-05, "loss": 0.0717, "step": 17314 }, { "epoch": 2.43, "learning_rate": 4.594984091334457e-05, "loss": 0.0565, "step": 17316 }, { "epoch": 2.43, "learning_rate": 4.594937301141681e-05, "loss": 0.066, "step": 17318 }, { "epoch": 2.43, "learning_rate": 4.5948905109489054e-05, "loss": 0.0815, "step": 17320 }, { "epoch": 2.43, "learning_rate": 4.5948437207561294e-05, "loss": 0.0583, "step": 17322 }, { "epoch": 2.43, "learning_rate": 4.594796930563354e-05, "loss": 0.0635, "step": 17324 }, { "epoch": 2.43, "learning_rate": 4.5947501403705785e-05, "loss": 0.0643, "step": 17326 }, { "epoch": 2.43, "learning_rate": 4.594703350177803e-05, "loss": 0.0534, "step": 17328 }, { "epoch": 2.43, "learning_rate": 4.594656559985027e-05, "loss": 0.0778, "step": 17330 }, { "epoch": 2.43, "learning_rate": 4.5946097697922516e-05, "loss": 0.0661, "step": 17332 }, { "epoch": 2.43, "learning_rate": 4.594562979599476e-05, "loss": 0.0731, "step": 17334 }, { "epoch": 2.43, "learning_rate": 4.594516189406701e-05, "loss": 0.0527, "step": 17336 }, { "epoch": 2.43, "learning_rate": 4.594469399213925e-05, "loss": 0.0644, "step": 17338 }, { "epoch": 2.43, "learning_rate": 4.594422609021149e-05, "loss": 0.0626, "step": 17340 }, { "epoch": 2.43, "learning_rate": 4.594375818828374e-05, "loss": 0.0721, "step": 17342 }, { "epoch": 2.43, "learning_rate": 4.5943290286355985e-05, "loss": 0.0726, "step": 17344 }, { "epoch": 2.43, "learning_rate": 4.5942822384428224e-05, "loss": 0.0613, "step": 17346 }, { "epoch": 2.44, "learning_rate": 4.594235448250047e-05, "loss": 0.0407, "step": 17348 }, { "epoch": 2.44, "learning_rate": 4.5941886580572716e-05, "loss": 0.0511, "step": 17350 }, { "epoch": 2.44, "learning_rate": 4.594141867864496e-05, "loss": 0.0644, "step": 17352 }, { "epoch": 2.44, "learning_rate": 4.59409507767172e-05, "loss": 0.0679, "step": 17354 }, { "epoch": 2.44, "learning_rate": 4.594048287478945e-05, "loss": 0.0597, "step": 17356 }, { "epoch": 2.44, "learning_rate": 4.5940014972861686e-05, "loss": 0.0555, "step": 17358 }, { "epoch": 2.44, "learning_rate": 4.593954707093394e-05, "loss": 0.0446, "step": 17360 }, { "epoch": 2.44, "learning_rate": 4.593907916900618e-05, "loss": 0.0539, "step": 17362 }, { "epoch": 2.44, "learning_rate": 4.5938611267078424e-05, "loss": 0.0805, "step": 17364 }, { "epoch": 2.44, "learning_rate": 4.593814336515066e-05, "loss": 0.0582, "step": 17366 }, { "epoch": 2.44, "learning_rate": 4.5937675463222916e-05, "loss": 0.0748, "step": 17368 }, { "epoch": 2.44, "learning_rate": 4.5937207561295155e-05, "loss": 0.0731, "step": 17370 }, { "epoch": 2.44, "learning_rate": 4.59367396593674e-05, "loss": 0.0769, "step": 17372 }, { "epoch": 2.44, "learning_rate": 4.593627175743964e-05, "loss": 0.0725, "step": 17374 }, { "epoch": 2.44, "learning_rate": 4.5935803855511886e-05, "loss": 0.0608, "step": 17376 }, { "epoch": 2.44, "learning_rate": 4.593533595358413e-05, "loss": 0.0682, "step": 17378 }, { "epoch": 2.44, "learning_rate": 4.593486805165638e-05, "loss": 0.0711, "step": 17380 }, { "epoch": 2.44, "learning_rate": 4.593440014972862e-05, "loss": 0.05, "step": 17382 }, { "epoch": 2.44, "learning_rate": 4.593393224780086e-05, "loss": 0.0613, "step": 17384 }, { "epoch": 2.44, "learning_rate": 4.593346434587311e-05, "loss": 0.0728, "step": 17386 }, { "epoch": 2.44, "learning_rate": 4.5932996443945354e-05, "loss": 0.0447, "step": 17388 }, { "epoch": 2.44, "learning_rate": 4.5932528542017594e-05, "loss": 0.0402, "step": 17390 }, { "epoch": 2.44, "learning_rate": 4.593206064008984e-05, "loss": 0.0498, "step": 17392 }, { "epoch": 2.44, "learning_rate": 4.5931592738162085e-05, "loss": 0.0553, "step": 17394 }, { "epoch": 2.44, "learning_rate": 4.593112483623433e-05, "loss": 0.0556, "step": 17396 }, { "epoch": 2.44, "learning_rate": 4.593065693430657e-05, "loss": 0.0628, "step": 17398 }, { "epoch": 2.44, "learning_rate": 4.5930189032378816e-05, "loss": 0.0647, "step": 17400 }, { "epoch": 2.44, "learning_rate": 4.5929721130451055e-05, "loss": 0.0459, "step": 17402 }, { "epoch": 2.44, "learning_rate": 4.592925322852331e-05, "loss": 0.0442, "step": 17404 }, { "epoch": 2.44, "learning_rate": 4.592878532659555e-05, "loss": 0.0455, "step": 17406 }, { "epoch": 2.44, "learning_rate": 4.592831742466779e-05, "loss": 0.0435, "step": 17408 }, { "epoch": 2.44, "learning_rate": 4.592784952274003e-05, "loss": 0.0642, "step": 17410 }, { "epoch": 2.44, "learning_rate": 4.5927381620812285e-05, "loss": 0.0517, "step": 17412 }, { "epoch": 2.44, "learning_rate": 4.5926913718884524e-05, "loss": 0.0332, "step": 17414 }, { "epoch": 2.44, "learning_rate": 4.592644581695677e-05, "loss": 0.0744, "step": 17416 }, { "epoch": 2.44, "learning_rate": 4.592597791502901e-05, "loss": 0.0637, "step": 17418 }, { "epoch": 2.45, "learning_rate": 4.5925510013101255e-05, "loss": 0.0749, "step": 17420 }, { "epoch": 2.45, "learning_rate": 4.59250421111735e-05, "loss": 0.0784, "step": 17422 }, { "epoch": 2.45, "learning_rate": 4.592457420924575e-05, "loss": 0.0613, "step": 17424 }, { "epoch": 2.45, "learning_rate": 4.5924106307317986e-05, "loss": 0.0633, "step": 17426 }, { "epoch": 2.45, "learning_rate": 4.592363840539023e-05, "loss": 0.0738, "step": 17428 }, { "epoch": 2.45, "learning_rate": 4.592317050346248e-05, "loss": 0.0553, "step": 17430 }, { "epoch": 2.45, "learning_rate": 4.5922702601534724e-05, "loss": 0.0489, "step": 17432 }, { "epoch": 2.45, "learning_rate": 4.592223469960696e-05, "loss": 0.0674, "step": 17434 }, { "epoch": 2.45, "learning_rate": 4.592176679767921e-05, "loss": 0.0708, "step": 17436 }, { "epoch": 2.45, "learning_rate": 4.5921298895751455e-05, "loss": 0.0416, "step": 17438 }, { "epoch": 2.45, "learning_rate": 4.59208309938237e-05, "loss": 0.0772, "step": 17440 }, { "epoch": 2.45, "learning_rate": 4.592036309189594e-05, "loss": 0.0666, "step": 17442 }, { "epoch": 2.45, "learning_rate": 4.5919895189968186e-05, "loss": 0.0476, "step": 17444 }, { "epoch": 2.45, "learning_rate": 4.591942728804043e-05, "loss": 0.0762, "step": 17446 }, { "epoch": 2.45, "learning_rate": 4.591895938611268e-05, "loss": 0.0708, "step": 17448 }, { "epoch": 2.45, "learning_rate": 4.591849148418492e-05, "loss": 0.0836, "step": 17450 }, { "epoch": 2.45, "learning_rate": 4.591802358225716e-05, "loss": 0.0539, "step": 17452 }, { "epoch": 2.45, "learning_rate": 4.59175556803294e-05, "loss": 0.0663, "step": 17454 }, { "epoch": 2.45, "learning_rate": 4.5917087778401654e-05, "loss": 0.0752, "step": 17456 }, { "epoch": 2.45, "learning_rate": 4.5916619876473894e-05, "loss": 0.0744, "step": 17458 }, { "epoch": 2.45, "learning_rate": 4.591615197454614e-05, "loss": 0.0574, "step": 17460 }, { "epoch": 2.45, "learning_rate": 4.591568407261838e-05, "loss": 0.0496, "step": 17462 }, { "epoch": 2.45, "learning_rate": 4.591521617069063e-05, "loss": 0.0725, "step": 17464 }, { "epoch": 2.45, "learning_rate": 4.591474826876287e-05, "loss": 0.0516, "step": 17466 }, { "epoch": 2.45, "learning_rate": 4.5914280366835116e-05, "loss": 0.0559, "step": 17468 }, { "epoch": 2.45, "learning_rate": 4.5913812464907355e-05, "loss": 0.0705, "step": 17470 }, { "epoch": 2.45, "learning_rate": 4.59133445629796e-05, "loss": 0.0542, "step": 17472 }, { "epoch": 2.45, "learning_rate": 4.591287666105185e-05, "loss": 0.088, "step": 17474 }, { "epoch": 2.45, "learning_rate": 4.591240875912409e-05, "loss": 0.0651, "step": 17476 }, { "epoch": 2.45, "learning_rate": 4.591194085719633e-05, "loss": 0.0428, "step": 17478 }, { "epoch": 2.45, "learning_rate": 4.591147295526858e-05, "loss": 0.0498, "step": 17480 }, { "epoch": 2.45, "learning_rate": 4.5911005053340824e-05, "loss": 0.0568, "step": 17482 }, { "epoch": 2.45, "learning_rate": 4.591053715141306e-05, "loss": 0.0718, "step": 17484 }, { "epoch": 2.45, "learning_rate": 4.591006924948531e-05, "loss": 0.0672, "step": 17486 }, { "epoch": 2.45, "learning_rate": 4.590960134755755e-05, "loss": 0.0563, "step": 17488 }, { "epoch": 2.46, "learning_rate": 4.59091334456298e-05, "loss": 0.0661, "step": 17490 }, { "epoch": 2.46, "learning_rate": 4.590866554370204e-05, "loss": 0.0686, "step": 17492 }, { "epoch": 2.46, "learning_rate": 4.5908197641774286e-05, "loss": 0.0484, "step": 17494 }, { "epoch": 2.46, "learning_rate": 4.5907729739846525e-05, "loss": 0.0558, "step": 17496 }, { "epoch": 2.46, "learning_rate": 4.590726183791878e-05, "loss": 0.0628, "step": 17498 }, { "epoch": 2.46, "learning_rate": 4.590679393599102e-05, "loss": 0.0566, "step": 17500 }, { "epoch": 2.46, "learning_rate": 4.590632603406326e-05, "loss": 0.0517, "step": 17502 }, { "epoch": 2.46, "learning_rate": 4.59058581321355e-05, "loss": 0.0612, "step": 17504 }, { "epoch": 2.46, "learning_rate": 4.590539023020775e-05, "loss": 0.0634, "step": 17506 }, { "epoch": 2.46, "learning_rate": 4.5904922328279994e-05, "loss": 0.049, "step": 17508 }, { "epoch": 2.46, "learning_rate": 4.590445442635224e-05, "loss": 0.0839, "step": 17510 }, { "epoch": 2.46, "learning_rate": 4.590398652442448e-05, "loss": 0.0552, "step": 17512 }, { "epoch": 2.46, "learning_rate": 4.5903518622496725e-05, "loss": 0.0704, "step": 17514 }, { "epoch": 2.46, "learning_rate": 4.590305072056897e-05, "loss": 0.0565, "step": 17516 }, { "epoch": 2.46, "learning_rate": 4.590258281864122e-05, "loss": 0.0716, "step": 17518 }, { "epoch": 2.46, "learning_rate": 4.5902114916713456e-05, "loss": 0.0609, "step": 17520 }, { "epoch": 2.46, "learning_rate": 4.59016470147857e-05, "loss": 0.0788, "step": 17522 }, { "epoch": 2.46, "learning_rate": 4.590117911285795e-05, "loss": 0.058, "step": 17524 }, { "epoch": 2.46, "learning_rate": 4.5900711210930194e-05, "loss": 0.0572, "step": 17526 }, { "epoch": 2.46, "learning_rate": 4.590024330900243e-05, "loss": 0.0633, "step": 17528 }, { "epoch": 2.46, "learning_rate": 4.589977540707468e-05, "loss": 0.086, "step": 17530 }, { "epoch": 2.46, "learning_rate": 4.5899307505146925e-05, "loss": 0.0699, "step": 17532 }, { "epoch": 2.46, "learning_rate": 4.589883960321917e-05, "loss": 0.1082, "step": 17534 }, { "epoch": 2.46, "learning_rate": 4.589837170129141e-05, "loss": 0.0465, "step": 17536 }, { "epoch": 2.46, "learning_rate": 4.5897903799363655e-05, "loss": 0.0599, "step": 17538 }, { "epoch": 2.46, "learning_rate": 4.5897435897435895e-05, "loss": 0.0774, "step": 17540 }, { "epoch": 2.46, "learning_rate": 4.589696799550815e-05, "loss": 0.0616, "step": 17542 }, { "epoch": 2.46, "learning_rate": 4.5896500093580386e-05, "loss": 0.0889, "step": 17544 }, { "epoch": 2.46, "learning_rate": 4.589603219165263e-05, "loss": 0.0605, "step": 17546 }, { "epoch": 2.46, "learning_rate": 4.589556428972487e-05, "loss": 0.0632, "step": 17548 }, { "epoch": 2.46, "learning_rate": 4.589509638779712e-05, "loss": 0.0577, "step": 17550 }, { "epoch": 2.46, "learning_rate": 4.589462848586936e-05, "loss": 0.0548, "step": 17552 }, { "epoch": 2.46, "learning_rate": 4.589416058394161e-05, "loss": 0.0549, "step": 17554 }, { "epoch": 2.46, "learning_rate": 4.589369268201385e-05, "loss": 0.0402, "step": 17556 }, { "epoch": 2.46, "learning_rate": 4.5893224780086094e-05, "loss": 0.0613, "step": 17558 }, { "epoch": 2.46, "learning_rate": 4.589275687815834e-05, "loss": 0.076, "step": 17560 }, { "epoch": 2.47, "learning_rate": 4.5892288976230586e-05, "loss": 0.0689, "step": 17562 }, { "epoch": 2.47, "learning_rate": 4.5891821074302825e-05, "loss": 0.0634, "step": 17564 }, { "epoch": 2.47, "learning_rate": 4.589135317237507e-05, "loss": 0.0766, "step": 17566 }, { "epoch": 2.47, "learning_rate": 4.589088527044732e-05, "loss": 0.0475, "step": 17568 }, { "epoch": 2.47, "learning_rate": 4.589041736851956e-05, "loss": 0.0632, "step": 17570 }, { "epoch": 2.47, "learning_rate": 4.58899494665918e-05, "loss": 0.0821, "step": 17572 }, { "epoch": 2.47, "learning_rate": 4.588948156466405e-05, "loss": 0.053, "step": 17574 }, { "epoch": 2.47, "learning_rate": 4.5889013662736294e-05, "loss": 0.0832, "step": 17576 }, { "epoch": 2.47, "learning_rate": 4.588854576080854e-05, "loss": 0.0941, "step": 17578 }, { "epoch": 2.47, "learning_rate": 4.588807785888078e-05, "loss": 0.0644, "step": 17580 }, { "epoch": 2.47, "learning_rate": 4.5887609956953025e-05, "loss": 0.0533, "step": 17582 }, { "epoch": 2.47, "learning_rate": 4.5887142055025264e-05, "loss": 0.0699, "step": 17584 }, { "epoch": 2.47, "learning_rate": 4.588667415309752e-05, "loss": 0.064, "step": 17586 }, { "epoch": 2.47, "learning_rate": 4.5886206251169756e-05, "loss": 0.0677, "step": 17588 }, { "epoch": 2.47, "learning_rate": 4.5885738349242e-05, "loss": 0.0468, "step": 17590 }, { "epoch": 2.47, "learning_rate": 4.588527044731424e-05, "loss": 0.0669, "step": 17592 }, { "epoch": 2.47, "learning_rate": 4.5884802545386494e-05, "loss": 0.0693, "step": 17594 }, { "epoch": 2.47, "learning_rate": 4.588433464345873e-05, "loss": 0.0475, "step": 17596 }, { "epoch": 2.47, "learning_rate": 4.588386674153098e-05, "loss": 0.0922, "step": 17598 }, { "epoch": 2.47, "learning_rate": 4.588339883960322e-05, "loss": 0.0714, "step": 17600 }, { "epoch": 2.47, "learning_rate": 4.5882930937675464e-05, "loss": 0.0506, "step": 17602 }, { "epoch": 2.47, "learning_rate": 4.588246303574771e-05, "loss": 0.056, "step": 17604 }, { "epoch": 2.47, "learning_rate": 4.5881995133819956e-05, "loss": 0.0527, "step": 17606 }, { "epoch": 2.47, "learning_rate": 4.5881527231892195e-05, "loss": 0.0761, "step": 17608 }, { "epoch": 2.47, "learning_rate": 4.588105932996444e-05, "loss": 0.0497, "step": 17610 }, { "epoch": 2.47, "learning_rate": 4.5880591428036686e-05, "loss": 0.0687, "step": 17612 }, { "epoch": 2.47, "learning_rate": 4.588012352610893e-05, "loss": 0.0524, "step": 17614 }, { "epoch": 2.47, "learning_rate": 4.587965562418117e-05, "loss": 0.0633, "step": 17616 }, { "epoch": 2.47, "learning_rate": 4.587918772225342e-05, "loss": 0.0666, "step": 17618 }, { "epoch": 2.47, "learning_rate": 4.587871982032566e-05, "loss": 0.0597, "step": 17620 }, { "epoch": 2.47, "learning_rate": 4.587825191839791e-05, "loss": 0.0489, "step": 17622 }, { "epoch": 2.47, "learning_rate": 4.587778401647015e-05, "loss": 0.0527, "step": 17624 }, { "epoch": 2.47, "learning_rate": 4.5877316114542394e-05, "loss": 0.0649, "step": 17626 }, { "epoch": 2.47, "learning_rate": 4.587684821261464e-05, "loss": 0.0676, "step": 17628 }, { "epoch": 2.47, "learning_rate": 4.5876380310686886e-05, "loss": 0.0773, "step": 17630 }, { "epoch": 2.48, "learning_rate": 4.5875912408759125e-05, "loss": 0.0742, "step": 17632 }, { "epoch": 2.48, "learning_rate": 4.587544450683137e-05, "loss": 0.0586, "step": 17634 }, { "epoch": 2.48, "learning_rate": 4.587497660490361e-05, "loss": 0.0665, "step": 17636 }, { "epoch": 2.48, "learning_rate": 4.587450870297586e-05, "loss": 0.061, "step": 17638 }, { "epoch": 2.48, "learning_rate": 4.58740408010481e-05, "loss": 0.0697, "step": 17640 }, { "epoch": 2.48, "learning_rate": 4.587357289912035e-05, "loss": 0.0759, "step": 17642 }, { "epoch": 2.48, "learning_rate": 4.587310499719259e-05, "loss": 0.078, "step": 17644 }, { "epoch": 2.48, "learning_rate": 4.587263709526484e-05, "loss": 0.0636, "step": 17646 }, { "epoch": 2.48, "learning_rate": 4.587216919333708e-05, "loss": 0.0553, "step": 17648 }, { "epoch": 2.48, "learning_rate": 4.5871701291409325e-05, "loss": 0.0655, "step": 17650 }, { "epoch": 2.48, "learning_rate": 4.5871233389481564e-05, "loss": 0.0632, "step": 17652 }, { "epoch": 2.48, "learning_rate": 4.587076548755381e-05, "loss": 0.0642, "step": 17654 }, { "epoch": 2.48, "learning_rate": 4.5870297585626056e-05, "loss": 0.0557, "step": 17656 }, { "epoch": 2.48, "learning_rate": 4.58698296836983e-05, "loss": 0.0645, "step": 17658 }, { "epoch": 2.48, "learning_rate": 4.586936178177054e-05, "loss": 0.0706, "step": 17660 }, { "epoch": 2.48, "learning_rate": 4.586889387984279e-05, "loss": 0.0511, "step": 17662 }, { "epoch": 2.48, "learning_rate": 4.586842597791503e-05, "loss": 0.059, "step": 17664 }, { "epoch": 2.48, "learning_rate": 4.586795807598728e-05, "loss": 0.0614, "step": 17666 }, { "epoch": 2.48, "learning_rate": 4.586749017405952e-05, "loss": 0.046, "step": 17668 }, { "epoch": 2.48, "learning_rate": 4.5867022272131764e-05, "loss": 0.0649, "step": 17670 }, { "epoch": 2.48, "learning_rate": 4.586655437020401e-05, "loss": 0.0633, "step": 17672 }, { "epoch": 2.48, "learning_rate": 4.5866086468276256e-05, "loss": 0.0494, "step": 17674 }, { "epoch": 2.48, "learning_rate": 4.5865618566348495e-05, "loss": 0.0504, "step": 17676 }, { "epoch": 2.48, "learning_rate": 4.586515066442074e-05, "loss": 0.0646, "step": 17678 }, { "epoch": 2.48, "learning_rate": 4.5864682762492986e-05, "loss": 0.0582, "step": 17680 }, { "epoch": 2.48, "learning_rate": 4.586421486056523e-05, "loss": 0.0574, "step": 17682 }, { "epoch": 2.48, "learning_rate": 4.586374695863747e-05, "loss": 0.0904, "step": 17684 }, { "epoch": 2.48, "learning_rate": 4.586327905670972e-05, "loss": 0.0734, "step": 17686 }, { "epoch": 2.48, "learning_rate": 4.5862811154781957e-05, "loss": 0.0562, "step": 17688 }, { "epoch": 2.48, "learning_rate": 4.586234325285421e-05, "loss": 0.0643, "step": 17690 }, { "epoch": 2.48, "learning_rate": 4.586187535092645e-05, "loss": 0.0668, "step": 17692 }, { "epoch": 2.48, "learning_rate": 4.5861407448998694e-05, "loss": 0.0445, "step": 17694 }, { "epoch": 2.48, "learning_rate": 4.5860939547070933e-05, "loss": 0.071, "step": 17696 }, { "epoch": 2.48, "learning_rate": 4.586047164514318e-05, "loss": 0.0678, "step": 17698 }, { "epoch": 2.48, "learning_rate": 4.5860003743215425e-05, "loss": 0.0446, "step": 17700 }, { "epoch": 2.48, "learning_rate": 4.585953584128767e-05, "loss": 0.0642, "step": 17702 }, { "epoch": 2.49, "learning_rate": 4.585906793935991e-05, "loss": 0.0502, "step": 17704 }, { "epoch": 2.49, "learning_rate": 4.5858600037432156e-05, "loss": 0.0481, "step": 17706 }, { "epoch": 2.49, "learning_rate": 4.58581321355044e-05, "loss": 0.0736, "step": 17708 }, { "epoch": 2.49, "learning_rate": 4.585766423357665e-05, "loss": 0.0674, "step": 17710 }, { "epoch": 2.49, "learning_rate": 4.585719633164889e-05, "loss": 0.0467, "step": 17712 }, { "epoch": 2.49, "learning_rate": 4.585672842972113e-05, "loss": 0.0636, "step": 17714 }, { "epoch": 2.49, "learning_rate": 4.585626052779338e-05, "loss": 0.0785, "step": 17716 }, { "epoch": 2.49, "learning_rate": 4.5855792625865625e-05, "loss": 0.0584, "step": 17718 }, { "epoch": 2.49, "learning_rate": 4.5855324723937864e-05, "loss": 0.0388, "step": 17720 }, { "epoch": 2.49, "learning_rate": 4.585485682201011e-05, "loss": 0.0692, "step": 17722 }, { "epoch": 2.49, "learning_rate": 4.5854388920082356e-05, "loss": 0.0619, "step": 17724 }, { "epoch": 2.49, "learning_rate": 4.58539210181546e-05, "loss": 0.0624, "step": 17726 }, { "epoch": 2.49, "learning_rate": 4.585345311622684e-05, "loss": 0.049, "step": 17728 }, { "epoch": 2.49, "learning_rate": 4.585298521429908e-05, "loss": 0.0452, "step": 17730 }, { "epoch": 2.49, "learning_rate": 4.5852517312371326e-05, "loss": 0.0644, "step": 17732 }, { "epoch": 2.49, "learning_rate": 4.585204941044357e-05, "loss": 0.0575, "step": 17734 }, { "epoch": 2.49, "learning_rate": 4.585158150851582e-05, "loss": 0.0643, "step": 17736 }, { "epoch": 2.49, "learning_rate": 4.585111360658806e-05, "loss": 0.0657, "step": 17738 }, { "epoch": 2.49, "learning_rate": 4.58506457046603e-05, "loss": 0.0603, "step": 17740 }, { "epoch": 2.49, "learning_rate": 4.585017780273255e-05, "loss": 0.0641, "step": 17742 }, { "epoch": 2.49, "learning_rate": 4.5849709900804795e-05, "loss": 0.0647, "step": 17744 }, { "epoch": 2.49, "learning_rate": 4.5849241998877034e-05, "loss": 0.045, "step": 17746 }, { "epoch": 2.49, "learning_rate": 4.584877409694928e-05, "loss": 0.0628, "step": 17748 }, { "epoch": 2.49, "learning_rate": 4.5848306195021526e-05, "loss": 0.0619, "step": 17750 }, { "epoch": 2.49, "learning_rate": 4.584783829309377e-05, "loss": 0.0657, "step": 17752 }, { "epoch": 2.49, "learning_rate": 4.584737039116601e-05, "loss": 0.0645, "step": 17754 }, { "epoch": 2.49, "learning_rate": 4.5846902489238257e-05, "loss": 0.0772, "step": 17756 }, { "epoch": 2.49, "learning_rate": 4.58464345873105e-05, "loss": 0.0545, "step": 17758 }, { "epoch": 2.49, "learning_rate": 4.584596668538275e-05, "loss": 0.0629, "step": 17760 }, { "epoch": 2.49, "learning_rate": 4.584549878345499e-05, "loss": 0.088, "step": 17762 }, { "epoch": 2.49, "learning_rate": 4.5845030881527233e-05, "loss": 0.0606, "step": 17764 }, { "epoch": 2.49, "learning_rate": 4.584456297959947e-05, "loss": 0.0589, "step": 17766 }, { "epoch": 2.49, "learning_rate": 4.5844095077671725e-05, "loss": 0.057, "step": 17768 }, { "epoch": 2.49, "learning_rate": 4.5843627175743964e-05, "loss": 0.0668, "step": 17770 }, { "epoch": 2.49, "learning_rate": 4.584315927381621e-05, "loss": 0.0618, "step": 17772 }, { "epoch": 2.49, "learning_rate": 4.584269137188845e-05, "loss": 0.0636, "step": 17774 }, { "epoch": 2.5, "learning_rate": 4.58422234699607e-05, "loss": 0.0617, "step": 17776 }, { "epoch": 2.5, "learning_rate": 4.584175556803294e-05, "loss": 0.0615, "step": 17778 }, { "epoch": 2.5, "learning_rate": 4.584128766610519e-05, "loss": 0.0678, "step": 17780 }, { "epoch": 2.5, "learning_rate": 4.5840819764177426e-05, "loss": 0.0594, "step": 17782 }, { "epoch": 2.5, "learning_rate": 4.584035186224967e-05, "loss": 0.051, "step": 17784 }, { "epoch": 2.5, "learning_rate": 4.583988396032192e-05, "loss": 0.0807, "step": 17786 }, { "epoch": 2.5, "learning_rate": 4.5839416058394164e-05, "loss": 0.0593, "step": 17788 }, { "epoch": 2.5, "learning_rate": 4.58389481564664e-05, "loss": 0.0637, "step": 17790 }, { "epoch": 2.5, "learning_rate": 4.583848025453865e-05, "loss": 0.0318, "step": 17792 }, { "epoch": 2.5, "learning_rate": 4.5838012352610895e-05, "loss": 0.0564, "step": 17794 }, { "epoch": 2.5, "learning_rate": 4.583754445068314e-05, "loss": 0.0575, "step": 17796 }, { "epoch": 2.5, "learning_rate": 4.583707654875538e-05, "loss": 0.078, "step": 17798 }, { "epoch": 2.5, "learning_rate": 4.5836608646827626e-05, "loss": 0.0557, "step": 17800 }, { "epoch": 2.5, "learning_rate": 4.583614074489987e-05, "loss": 0.0446, "step": 17802 }, { "epoch": 2.5, "learning_rate": 4.583567284297212e-05, "loss": 0.0582, "step": 17804 }, { "epoch": 2.5, "learning_rate": 4.583520494104436e-05, "loss": 0.0591, "step": 17806 }, { "epoch": 2.5, "learning_rate": 4.58347370391166e-05, "loss": 0.0552, "step": 17808 }, { "epoch": 2.5, "learning_rate": 4.583426913718885e-05, "loss": 0.0943, "step": 17810 }, { "epoch": 2.5, "learning_rate": 4.5833801235261095e-05, "loss": 0.0671, "step": 17812 }, { "epoch": 2.5, "learning_rate": 4.5833333333333334e-05, "loss": 0.0914, "step": 17814 }, { "epoch": 2.5, "learning_rate": 4.583286543140558e-05, "loss": 0.0672, "step": 17816 }, { "epoch": 2.5, "learning_rate": 4.583239752947782e-05, "loss": 0.0605, "step": 17818 }, { "epoch": 2.5, "learning_rate": 4.583192962755007e-05, "loss": 0.0664, "step": 17820 }, { "epoch": 2.5, "learning_rate": 4.583146172562231e-05, "loss": 0.05, "step": 17822 }, { "epoch": 2.5, "learning_rate": 4.583099382369456e-05, "loss": 0.0583, "step": 17824 }, { "epoch": 2.5, "learning_rate": 4.5830525921766796e-05, "loss": 0.0592, "step": 17826 }, { "epoch": 2.5, "learning_rate": 4.583005801983904e-05, "loss": 0.0468, "step": 17828 }, { "epoch": 2.5, "learning_rate": 4.582959011791129e-05, "loss": 0.0662, "step": 17830 }, { "epoch": 2.5, "learning_rate": 4.5829122215983533e-05, "loss": 0.0739, "step": 17832 }, { "epoch": 2.5, "learning_rate": 4.582865431405577e-05, "loss": 0.0741, "step": 17834 }, { "epoch": 2.5, "learning_rate": 4.582818641212802e-05, "loss": 0.0662, "step": 17836 }, { "epoch": 2.5, "learning_rate": 4.5827718510200264e-05, "loss": 0.0741, "step": 17838 }, { "epoch": 2.5, "learning_rate": 4.582725060827251e-05, "loss": 0.085, "step": 17840 }, { "epoch": 2.5, "learning_rate": 4.582678270634475e-05, "loss": 0.0626, "step": 17842 }, { "epoch": 2.5, "learning_rate": 4.5826314804416995e-05, "loss": 0.0599, "step": 17844 }, { "epoch": 2.51, "learning_rate": 4.582584690248924e-05, "loss": 0.0551, "step": 17846 }, { "epoch": 2.51, "learning_rate": 4.582537900056149e-05, "loss": 0.058, "step": 17848 }, { "epoch": 2.51, "learning_rate": 4.5824911098633726e-05, "loss": 0.0534, "step": 17850 }, { "epoch": 2.51, "learning_rate": 4.582444319670597e-05, "loss": 0.05, "step": 17852 }, { "epoch": 2.51, "learning_rate": 4.582397529477822e-05, "loss": 0.0659, "step": 17854 }, { "epoch": 2.51, "learning_rate": 4.5823507392850464e-05, "loss": 0.0593, "step": 17856 }, { "epoch": 2.51, "learning_rate": 4.58230394909227e-05, "loss": 0.0534, "step": 17858 }, { "epoch": 2.51, "learning_rate": 4.582257158899495e-05, "loss": 0.072, "step": 17860 }, { "epoch": 2.51, "learning_rate": 4.582210368706719e-05, "loss": 0.0592, "step": 17862 }, { "epoch": 2.51, "learning_rate": 4.582163578513944e-05, "loss": 0.0706, "step": 17864 }, { "epoch": 2.51, "learning_rate": 4.582116788321168e-05, "loss": 0.0623, "step": 17866 }, { "epoch": 2.51, "learning_rate": 4.5820699981283926e-05, "loss": 0.0639, "step": 17868 }, { "epoch": 2.51, "learning_rate": 4.5820232079356165e-05, "loss": 0.0638, "step": 17870 }, { "epoch": 2.51, "learning_rate": 4.581976417742842e-05, "loss": 0.0713, "step": 17872 }, { "epoch": 2.51, "learning_rate": 4.581929627550066e-05, "loss": 0.0725, "step": 17874 }, { "epoch": 2.51, "learning_rate": 4.58188283735729e-05, "loss": 0.0629, "step": 17876 }, { "epoch": 2.51, "learning_rate": 4.581836047164514e-05, "loss": 0.0673, "step": 17878 }, { "epoch": 2.51, "learning_rate": 4.581789256971739e-05, "loss": 0.063, "step": 17880 }, { "epoch": 2.51, "learning_rate": 4.5817424667789634e-05, "loss": 0.0706, "step": 17882 }, { "epoch": 2.51, "learning_rate": 4.581695676586188e-05, "loss": 0.0729, "step": 17884 }, { "epoch": 2.51, "learning_rate": 4.581648886393412e-05, "loss": 0.0563, "step": 17886 }, { "epoch": 2.51, "learning_rate": 4.5816020962006365e-05, "loss": 0.058, "step": 17888 }, { "epoch": 2.51, "learning_rate": 4.581555306007861e-05, "loss": 0.0682, "step": 17890 }, { "epoch": 2.51, "learning_rate": 4.581508515815086e-05, "loss": 0.0483, "step": 17892 }, { "epoch": 2.51, "learning_rate": 4.5814617256223096e-05, "loss": 0.0651, "step": 17894 }, { "epoch": 2.51, "learning_rate": 4.581414935429534e-05, "loss": 0.0691, "step": 17896 }, { "epoch": 2.51, "learning_rate": 4.581368145236759e-05, "loss": 0.0664, "step": 17898 }, { "epoch": 2.51, "learning_rate": 4.5813213550439834e-05, "loss": 0.0722, "step": 17900 }, { "epoch": 2.51, "learning_rate": 4.581274564851207e-05, "loss": 0.0618, "step": 17902 }, { "epoch": 2.51, "learning_rate": 4.581227774658432e-05, "loss": 0.0641, "step": 17904 }, { "epoch": 2.51, "learning_rate": 4.5811809844656564e-05, "loss": 0.0549, "step": 17906 }, { "epoch": 2.51, "learning_rate": 4.581134194272881e-05, "loss": 0.0581, "step": 17908 }, { "epoch": 2.51, "learning_rate": 4.581087404080105e-05, "loss": 0.0466, "step": 17910 }, { "epoch": 2.51, "learning_rate": 4.5810406138873295e-05, "loss": 0.0632, "step": 17912 }, { "epoch": 2.51, "learning_rate": 4.5809938236945535e-05, "loss": 0.0846, "step": 17914 }, { "epoch": 2.51, "learning_rate": 4.580947033501779e-05, "loss": 0.0557, "step": 17916 }, { "epoch": 2.52, "learning_rate": 4.5809002433090026e-05, "loss": 0.0645, "step": 17918 }, { "epoch": 2.52, "learning_rate": 4.580853453116227e-05, "loss": 0.0547, "step": 17920 }, { "epoch": 2.52, "learning_rate": 4.580806662923451e-05, "loss": 0.083, "step": 17922 }, { "epoch": 2.52, "learning_rate": 4.5807598727306764e-05, "loss": 0.0525, "step": 17924 }, { "epoch": 2.52, "learning_rate": 4.5807130825379e-05, "loss": 0.0499, "step": 17926 }, { "epoch": 2.52, "learning_rate": 4.580666292345125e-05, "loss": 0.0718, "step": 17928 }, { "epoch": 2.52, "learning_rate": 4.580619502152349e-05, "loss": 0.0704, "step": 17930 }, { "epoch": 2.52, "learning_rate": 4.5805727119595734e-05, "loss": 0.055, "step": 17932 }, { "epoch": 2.52, "learning_rate": 4.580525921766798e-05, "loss": 0.1309, "step": 17934 }, { "epoch": 2.52, "learning_rate": 4.5804791315740226e-05, "loss": 0.099, "step": 17936 }, { "epoch": 2.52, "learning_rate": 4.5804323413812465e-05, "loss": 0.0566, "step": 17938 }, { "epoch": 2.52, "learning_rate": 4.580385551188471e-05, "loss": 0.0975, "step": 17940 }, { "epoch": 2.52, "learning_rate": 4.580338760995696e-05, "loss": 0.0441, "step": 17942 }, { "epoch": 2.52, "learning_rate": 4.58029197080292e-05, "loss": 0.0631, "step": 17944 }, { "epoch": 2.52, "learning_rate": 4.580245180610144e-05, "loss": 0.0603, "step": 17946 }, { "epoch": 2.52, "learning_rate": 4.580198390417369e-05, "loss": 0.0821, "step": 17948 }, { "epoch": 2.52, "learning_rate": 4.5801516002245934e-05, "loss": 0.0696, "step": 17950 }, { "epoch": 2.52, "learning_rate": 4.580104810031818e-05, "loss": 0.0596, "step": 17952 }, { "epoch": 2.52, "learning_rate": 4.580058019839042e-05, "loss": 0.0837, "step": 17954 }, { "epoch": 2.52, "learning_rate": 4.5800112296462665e-05, "loss": 0.0728, "step": 17956 }, { "epoch": 2.52, "learning_rate": 4.579964439453491e-05, "loss": 0.0573, "step": 17958 }, { "epoch": 2.52, "learning_rate": 4.579917649260716e-05, "loss": 0.0813, "step": 17960 }, { "epoch": 2.52, "learning_rate": 4.5798708590679396e-05, "loss": 0.0614, "step": 17962 }, { "epoch": 2.52, "learning_rate": 4.579824068875164e-05, "loss": 0.0495, "step": 17964 }, { "epoch": 2.52, "learning_rate": 4.579777278682388e-05, "loss": 0.0763, "step": 17966 }, { "epoch": 2.52, "learning_rate": 4.5797304884896134e-05, "loss": 0.0801, "step": 17968 }, { "epoch": 2.52, "learning_rate": 4.579683698296837e-05, "loss": 0.0909, "step": 17970 }, { "epoch": 2.52, "learning_rate": 4.579636908104062e-05, "loss": 0.0803, "step": 17972 }, { "epoch": 2.52, "learning_rate": 4.579590117911286e-05, "loss": 0.0755, "step": 17974 }, { "epoch": 2.52, "learning_rate": 4.5795433277185104e-05, "loss": 0.0455, "step": 17976 }, { "epoch": 2.52, "learning_rate": 4.579496537525735e-05, "loss": 0.0625, "step": 17978 }, { "epoch": 2.52, "learning_rate": 4.5794497473329595e-05, "loss": 0.0573, "step": 17980 }, { "epoch": 2.52, "learning_rate": 4.5794029571401835e-05, "loss": 0.0725, "step": 17982 }, { "epoch": 2.52, "learning_rate": 4.579356166947408e-05, "loss": 0.0406, "step": 17984 }, { "epoch": 2.52, "learning_rate": 4.5793093767546326e-05, "loss": 0.0562, "step": 17986 }, { "epoch": 2.52, "learning_rate": 4.5792625865618566e-05, "loss": 0.0727, "step": 17988 }, { "epoch": 2.53, "learning_rate": 4.579215796369081e-05, "loss": 0.074, "step": 17990 }, { "epoch": 2.53, "learning_rate": 4.579169006176306e-05, "loss": 0.0805, "step": 17992 }, { "epoch": 2.53, "learning_rate": 4.57912221598353e-05, "loss": 0.0646, "step": 17994 }, { "epoch": 2.53, "learning_rate": 4.579075425790754e-05, "loss": 0.0542, "step": 17996 }, { "epoch": 2.53, "learning_rate": 4.579028635597979e-05, "loss": 0.0552, "step": 17998 }, { "epoch": 2.53, "learning_rate": 4.578981845405203e-05, "loss": 0.0685, "step": 18000 }, { "epoch": 2.53, "eval_gen_len": 29.9828, "eval_loss": 1.0630050897598267, "eval_meteor": 0.0517, "eval_runtime": 14.711, "eval_samples_per_second": 3.943, "eval_steps_per_second": 0.544, "step": 18000 }, { "epoch": 2.53, "learning_rate": 4.578935055212428e-05, "loss": 0.0574, "step": 18002 }, { "epoch": 2.53, "learning_rate": 4.578888265019652e-05, "loss": 0.0585, "step": 18004 }, { "epoch": 2.53, "learning_rate": 4.5788414748268765e-05, "loss": 0.0534, "step": 18006 }, { "epoch": 2.53, "learning_rate": 4.5787946846341004e-05, "loss": 0.0853, "step": 18008 }, { "epoch": 2.53, "learning_rate": 4.578747894441325e-05, "loss": 0.0485, "step": 18010 }, { "epoch": 2.53, "learning_rate": 4.5787011042485496e-05, "loss": 0.0588, "step": 18012 }, { "epoch": 2.53, "learning_rate": 4.578654314055774e-05, "loss": 0.0653, "step": 18014 }, { "epoch": 2.53, "learning_rate": 4.578607523862998e-05, "loss": 0.0611, "step": 18016 }, { "epoch": 2.53, "learning_rate": 4.578560733670223e-05, "loss": 0.0585, "step": 18018 }, { "epoch": 2.53, "learning_rate": 4.578513943477447e-05, "loss": 0.0494, "step": 18020 }, { "epoch": 2.53, "learning_rate": 4.578467153284672e-05, "loss": 0.0579, "step": 18022 }, { "epoch": 2.53, "learning_rate": 4.578420363091896e-05, "loss": 0.0788, "step": 18024 }, { "epoch": 2.53, "learning_rate": 4.5783735728991204e-05, "loss": 0.0835, "step": 18026 }, { "epoch": 2.53, "learning_rate": 4.578326782706345e-05, "loss": 0.0799, "step": 18028 }, { "epoch": 2.53, "learning_rate": 4.5782799925135696e-05, "loss": 0.0779, "step": 18030 }, { "epoch": 2.53, "learning_rate": 4.5782332023207935e-05, "loss": 0.0508, "step": 18032 }, { "epoch": 2.53, "learning_rate": 4.578186412128018e-05, "loss": 0.0542, "step": 18034 }, { "epoch": 2.53, "learning_rate": 4.578139621935243e-05, "loss": 0.0572, "step": 18036 }, { "epoch": 2.53, "learning_rate": 4.578092831742467e-05, "loss": 0.0846, "step": 18038 }, { "epoch": 2.53, "learning_rate": 4.578046041549691e-05, "loss": 0.1055, "step": 18040 }, { "epoch": 2.53, "learning_rate": 4.577999251356916e-05, "loss": 0.0624, "step": 18042 }, { "epoch": 2.53, "learning_rate": 4.57795246116414e-05, "loss": 0.0574, "step": 18044 }, { "epoch": 2.53, "learning_rate": 4.577905670971365e-05, "loss": 0.0669, "step": 18046 }, { "epoch": 2.53, "learning_rate": 4.577858880778589e-05, "loss": 0.0626, "step": 18048 }, { "epoch": 2.53, "learning_rate": 4.5778120905858135e-05, "loss": 0.0663, "step": 18050 }, { "epoch": 2.53, "learning_rate": 4.5777653003930374e-05, "loss": 0.061, "step": 18052 }, { "epoch": 2.53, "learning_rate": 4.5777185102002626e-05, "loss": 0.0967, "step": 18054 }, { "epoch": 2.53, "learning_rate": 4.5776717200074866e-05, "loss": 0.076, "step": 18056 }, { "epoch": 2.53, "learning_rate": 4.577624929814711e-05, "loss": 0.0503, "step": 18058 }, { "epoch": 2.54, "learning_rate": 4.577578139621935e-05, "loss": 0.0614, "step": 18060 }, { "epoch": 2.54, "learning_rate": 4.5775313494291597e-05, "loss": 0.0655, "step": 18062 }, { "epoch": 2.54, "learning_rate": 4.577484559236384e-05, "loss": 0.0796, "step": 18064 }, { "epoch": 2.54, "learning_rate": 4.577437769043609e-05, "loss": 0.0709, "step": 18066 }, { "epoch": 2.54, "learning_rate": 4.577390978850833e-05, "loss": 0.0553, "step": 18068 }, { "epoch": 2.54, "learning_rate": 4.577344188658057e-05, "loss": 0.064, "step": 18070 }, { "epoch": 2.54, "learning_rate": 4.577297398465282e-05, "loss": 0.0773, "step": 18072 }, { "epoch": 2.54, "learning_rate": 4.5772506082725065e-05, "loss": 0.0511, "step": 18074 }, { "epoch": 2.54, "learning_rate": 4.5772038180797304e-05, "loss": 0.0555, "step": 18076 }, { "epoch": 2.54, "learning_rate": 4.577157027886955e-05, "loss": 0.0628, "step": 18078 }, { "epoch": 2.54, "learning_rate": 4.5771102376941796e-05, "loss": 0.0613, "step": 18080 }, { "epoch": 2.54, "learning_rate": 4.577063447501404e-05, "loss": 0.0681, "step": 18082 }, { "epoch": 2.54, "learning_rate": 4.577016657308628e-05, "loss": 0.0565, "step": 18084 }, { "epoch": 2.54, "learning_rate": 4.576969867115853e-05, "loss": 0.0705, "step": 18086 }, { "epoch": 2.54, "learning_rate": 4.576923076923077e-05, "loss": 0.0486, "step": 18088 }, { "epoch": 2.54, "learning_rate": 4.576876286730302e-05, "loss": 0.0643, "step": 18090 }, { "epoch": 2.54, "learning_rate": 4.576829496537526e-05, "loss": 0.0604, "step": 18092 }, { "epoch": 2.54, "learning_rate": 4.5767827063447504e-05, "loss": 0.0679, "step": 18094 }, { "epoch": 2.54, "learning_rate": 4.576735916151974e-05, "loss": 0.0723, "step": 18096 }, { "epoch": 2.54, "learning_rate": 4.5766891259591996e-05, "loss": 0.0516, "step": 18098 }, { "epoch": 2.54, "learning_rate": 4.5766423357664235e-05, "loss": 0.0815, "step": 18100 }, { "epoch": 2.54, "learning_rate": 4.576595545573648e-05, "loss": 0.0627, "step": 18102 }, { "epoch": 2.54, "learning_rate": 4.576548755380872e-05, "loss": 0.0669, "step": 18104 }, { "epoch": 2.54, "learning_rate": 4.5765019651880966e-05, "loss": 0.0467, "step": 18106 }, { "epoch": 2.54, "learning_rate": 4.576455174995321e-05, "loss": 0.0538, "step": 18108 }, { "epoch": 2.54, "learning_rate": 4.576408384802546e-05, "loss": 0.0598, "step": 18110 }, { "epoch": 2.54, "learning_rate": 4.57636159460977e-05, "loss": 0.0831, "step": 18112 }, { "epoch": 2.54, "learning_rate": 4.576314804416994e-05, "loss": 0.0829, "step": 18114 }, { "epoch": 2.54, "learning_rate": 4.576268014224219e-05, "loss": 0.0795, "step": 18116 }, { "epoch": 2.54, "learning_rate": 4.5762212240314435e-05, "loss": 0.0695, "step": 18118 }, { "epoch": 2.54, "learning_rate": 4.5761744338386674e-05, "loss": 0.0852, "step": 18120 }, { "epoch": 2.54, "learning_rate": 4.576127643645892e-05, "loss": 0.0471, "step": 18122 }, { "epoch": 2.54, "learning_rate": 4.5760808534531166e-05, "loss": 0.0622, "step": 18124 }, { "epoch": 2.54, "learning_rate": 4.576034063260341e-05, "loss": 0.0434, "step": 18126 }, { "epoch": 2.54, "learning_rate": 4.575987273067565e-05, "loss": 0.0489, "step": 18128 }, { "epoch": 2.54, "learning_rate": 4.5759404828747897e-05, "loss": 0.0932, "step": 18130 }, { "epoch": 2.55, "learning_rate": 4.575893692682014e-05, "loss": 0.0676, "step": 18132 }, { "epoch": 2.55, "learning_rate": 4.575846902489239e-05, "loss": 0.0738, "step": 18134 }, { "epoch": 2.55, "learning_rate": 4.575800112296463e-05, "loss": 0.0719, "step": 18136 }, { "epoch": 2.55, "learning_rate": 4.5757533221036873e-05, "loss": 0.0558, "step": 18138 }, { "epoch": 2.55, "learning_rate": 4.575706531910911e-05, "loss": 0.0841, "step": 18140 }, { "epoch": 2.55, "learning_rate": 4.5756597417181365e-05, "loss": 0.0758, "step": 18142 }, { "epoch": 2.55, "learning_rate": 4.5756129515253604e-05, "loss": 0.0637, "step": 18144 }, { "epoch": 2.55, "learning_rate": 4.575566161332585e-05, "loss": 0.0691, "step": 18146 }, { "epoch": 2.55, "learning_rate": 4.575519371139809e-05, "loss": 0.0624, "step": 18148 }, { "epoch": 2.55, "learning_rate": 4.575472580947034e-05, "loss": 0.0743, "step": 18150 }, { "epoch": 2.55, "learning_rate": 4.575425790754258e-05, "loss": 0.0611, "step": 18152 }, { "epoch": 2.55, "learning_rate": 4.575379000561483e-05, "loss": 0.0608, "step": 18154 }, { "epoch": 2.55, "learning_rate": 4.5753322103687066e-05, "loss": 0.0768, "step": 18156 }, { "epoch": 2.55, "learning_rate": 4.575285420175931e-05, "loss": 0.0476, "step": 18158 }, { "epoch": 2.55, "learning_rate": 4.575238629983156e-05, "loss": 0.0837, "step": 18160 }, { "epoch": 2.55, "learning_rate": 4.5751918397903804e-05, "loss": 0.0747, "step": 18162 }, { "epoch": 2.55, "learning_rate": 4.575145049597604e-05, "loss": 0.0707, "step": 18164 }, { "epoch": 2.55, "learning_rate": 4.575098259404829e-05, "loss": 0.0619, "step": 18166 }, { "epoch": 2.55, "learning_rate": 4.5750514692120535e-05, "loss": 0.0645, "step": 18168 }, { "epoch": 2.55, "learning_rate": 4.575004679019278e-05, "loss": 0.0577, "step": 18170 }, { "epoch": 2.55, "learning_rate": 4.574957888826502e-05, "loss": 0.0545, "step": 18172 }, { "epoch": 2.55, "learning_rate": 4.5749110986337266e-05, "loss": 0.0641, "step": 18174 }, { "epoch": 2.55, "learning_rate": 4.574864308440951e-05, "loss": 0.0938, "step": 18176 }, { "epoch": 2.55, "learning_rate": 4.574817518248176e-05, "loss": 0.0725, "step": 18178 }, { "epoch": 2.55, "learning_rate": 4.5747707280554e-05, "loss": 0.075, "step": 18180 }, { "epoch": 2.55, "learning_rate": 4.574723937862624e-05, "loss": 0.0707, "step": 18182 }, { "epoch": 2.55, "learning_rate": 4.574677147669849e-05, "loss": 0.0726, "step": 18184 }, { "epoch": 2.55, "learning_rate": 4.5746303574770735e-05, "loss": 0.0506, "step": 18186 }, { "epoch": 2.55, "learning_rate": 4.5745835672842974e-05, "loss": 0.0632, "step": 18188 }, { "epoch": 2.55, "learning_rate": 4.574536777091522e-05, "loss": 0.0544, "step": 18190 }, { "epoch": 2.55, "learning_rate": 4.574489986898746e-05, "loss": 0.0564, "step": 18192 }, { "epoch": 2.55, "learning_rate": 4.574443196705971e-05, "loss": 0.0785, "step": 18194 }, { "epoch": 2.55, "learning_rate": 4.574396406513195e-05, "loss": 0.074, "step": 18196 }, { "epoch": 2.55, "learning_rate": 4.5743496163204197e-05, "loss": 0.0464, "step": 18198 }, { "epoch": 2.55, "learning_rate": 4.5743028261276436e-05, "loss": 0.0614, "step": 18200 }, { "epoch": 2.56, "learning_rate": 4.574256035934869e-05, "loss": 0.0843, "step": 18202 }, { "epoch": 2.56, "learning_rate": 4.574209245742093e-05, "loss": 0.0725, "step": 18204 }, { "epoch": 2.56, "learning_rate": 4.5741624555493173e-05, "loss": 0.0684, "step": 18206 }, { "epoch": 2.56, "learning_rate": 4.574115665356541e-05, "loss": 0.0989, "step": 18208 }, { "epoch": 2.56, "learning_rate": 4.574068875163766e-05, "loss": 0.0952, "step": 18210 }, { "epoch": 2.56, "learning_rate": 4.5740220849709904e-05, "loss": 0.0607, "step": 18212 }, { "epoch": 2.56, "learning_rate": 4.573975294778215e-05, "loss": 0.0572, "step": 18214 }, { "epoch": 2.56, "learning_rate": 4.573928504585439e-05, "loss": 0.0673, "step": 18216 }, { "epoch": 2.56, "learning_rate": 4.5738817143926635e-05, "loss": 0.0595, "step": 18218 }, { "epoch": 2.56, "learning_rate": 4.573834924199888e-05, "loss": 0.0681, "step": 18220 }, { "epoch": 2.56, "learning_rate": 4.573788134007113e-05, "loss": 0.0632, "step": 18222 }, { "epoch": 2.56, "learning_rate": 4.5737413438143366e-05, "loss": 0.0569, "step": 18224 }, { "epoch": 2.56, "learning_rate": 4.573694553621561e-05, "loss": 0.0609, "step": 18226 }, { "epoch": 2.56, "learning_rate": 4.573647763428786e-05, "loss": 0.052, "step": 18228 }, { "epoch": 2.56, "learning_rate": 4.5736009732360104e-05, "loss": 0.0567, "step": 18230 }, { "epoch": 2.56, "learning_rate": 4.573554183043234e-05, "loss": 0.0725, "step": 18232 }, { "epoch": 2.56, "learning_rate": 4.573507392850459e-05, "loss": 0.0439, "step": 18234 }, { "epoch": 2.56, "learning_rate": 4.5734606026576835e-05, "loss": 0.0471, "step": 18236 }, { "epoch": 2.56, "learning_rate": 4.5734138124649074e-05, "loss": 0.0728, "step": 18238 }, { "epoch": 2.56, "learning_rate": 4.573367022272132e-05, "loss": 0.0785, "step": 18240 }, { "epoch": 2.56, "learning_rate": 4.573320232079356e-05, "loss": 0.0747, "step": 18242 }, { "epoch": 2.56, "learning_rate": 4.5732734418865805e-05, "loss": 0.0579, "step": 18244 }, { "epoch": 2.56, "learning_rate": 4.573226651693805e-05, "loss": 0.0665, "step": 18246 }, { "epoch": 2.56, "learning_rate": 4.57317986150103e-05, "loss": 0.0481, "step": 18248 }, { "epoch": 2.56, "learning_rate": 4.5731330713082536e-05, "loss": 0.0711, "step": 18250 }, { "epoch": 2.56, "learning_rate": 4.573086281115478e-05, "loss": 0.0555, "step": 18252 }, { "epoch": 2.56, "learning_rate": 4.573039490922703e-05, "loss": 0.0842, "step": 18254 }, { "epoch": 2.56, "learning_rate": 4.5729927007299274e-05, "loss": 0.0768, "step": 18256 }, { "epoch": 2.56, "learning_rate": 4.572945910537151e-05, "loss": 0.0584, "step": 18258 }, { "epoch": 2.56, "learning_rate": 4.572899120344376e-05, "loss": 0.0669, "step": 18260 }, { "epoch": 2.56, "learning_rate": 4.5728523301516005e-05, "loss": 0.0967, "step": 18262 }, { "epoch": 2.56, "learning_rate": 4.572805539958825e-05, "loss": 0.0714, "step": 18264 }, { "epoch": 2.56, "learning_rate": 4.572758749766049e-05, "loss": 0.0476, "step": 18266 }, { "epoch": 2.56, "learning_rate": 4.5727119595732736e-05, "loss": 0.0612, "step": 18268 }, { "epoch": 2.56, "learning_rate": 4.572665169380498e-05, "loss": 0.0443, "step": 18270 }, { "epoch": 2.56, "learning_rate": 4.572618379187723e-05, "loss": 0.0566, "step": 18272 }, { "epoch": 2.57, "learning_rate": 4.572571588994947e-05, "loss": 0.0596, "step": 18274 }, { "epoch": 2.57, "learning_rate": 4.572524798802171e-05, "loss": 0.0519, "step": 18276 }, { "epoch": 2.57, "learning_rate": 4.572478008609395e-05, "loss": 0.0782, "step": 18278 }, { "epoch": 2.57, "learning_rate": 4.5724312184166204e-05, "loss": 0.0565, "step": 18280 }, { "epoch": 2.57, "learning_rate": 4.5723844282238444e-05, "loss": 0.0611, "step": 18282 }, { "epoch": 2.57, "learning_rate": 4.572337638031069e-05, "loss": 0.0419, "step": 18284 }, { "epoch": 2.57, "learning_rate": 4.572290847838293e-05, "loss": 0.0639, "step": 18286 }, { "epoch": 2.57, "learning_rate": 4.5722440576455174e-05, "loss": 0.0831, "step": 18288 }, { "epoch": 2.57, "learning_rate": 4.572197267452742e-05, "loss": 0.0537, "step": 18290 }, { "epoch": 2.57, "learning_rate": 4.5721504772599666e-05, "loss": 0.076, "step": 18292 }, { "epoch": 2.57, "learning_rate": 4.5721036870671905e-05, "loss": 0.0811, "step": 18294 }, { "epoch": 2.57, "learning_rate": 4.572056896874415e-05, "loss": 0.0534, "step": 18296 }, { "epoch": 2.57, "learning_rate": 4.57201010668164e-05, "loss": 0.0726, "step": 18298 }, { "epoch": 2.57, "learning_rate": 4.571963316488864e-05, "loss": 0.0908, "step": 18300 }, { "epoch": 2.57, "learning_rate": 4.571916526296088e-05, "loss": 0.0847, "step": 18302 }, { "epoch": 2.57, "learning_rate": 4.571869736103313e-05, "loss": 0.065, "step": 18304 }, { "epoch": 2.57, "learning_rate": 4.5718229459105374e-05, "loss": 0.0583, "step": 18306 }, { "epoch": 2.57, "learning_rate": 4.571776155717762e-05, "loss": 0.059, "step": 18308 }, { "epoch": 2.57, "learning_rate": 4.571729365524986e-05, "loss": 0.0935, "step": 18310 }, { "epoch": 2.57, "learning_rate": 4.5716825753322105e-05, "loss": 0.0507, "step": 18312 }, { "epoch": 2.57, "learning_rate": 4.571635785139435e-05, "loss": 0.0744, "step": 18314 }, { "epoch": 2.57, "learning_rate": 4.57158899494666e-05, "loss": 0.0534, "step": 18316 }, { "epoch": 2.57, "learning_rate": 4.5715422047538836e-05, "loss": 0.0578, "step": 18318 }, { "epoch": 2.57, "learning_rate": 4.571495414561108e-05, "loss": 0.0678, "step": 18320 }, { "epoch": 2.57, "learning_rate": 4.571448624368332e-05, "loss": 0.0631, "step": 18322 }, { "epoch": 2.57, "learning_rate": 4.5714018341755574e-05, "loss": 0.0579, "step": 18324 }, { "epoch": 2.57, "learning_rate": 4.571355043982781e-05, "loss": 0.0568, "step": 18326 }, { "epoch": 2.57, "learning_rate": 4.571308253790006e-05, "loss": 0.0613, "step": 18328 }, { "epoch": 2.57, "learning_rate": 4.57126146359723e-05, "loss": 0.0549, "step": 18330 }, { "epoch": 2.57, "learning_rate": 4.571214673404455e-05, "loss": 0.0559, "step": 18332 }, { "epoch": 2.57, "learning_rate": 4.571167883211679e-05, "loss": 0.0579, "step": 18334 }, { "epoch": 2.57, "learning_rate": 4.5711210930189036e-05, "loss": 0.0581, "step": 18336 }, { "epoch": 2.57, "learning_rate": 4.5710743028261275e-05, "loss": 0.0759, "step": 18338 }, { "epoch": 2.57, "learning_rate": 4.571027512633352e-05, "loss": 0.0619, "step": 18340 }, { "epoch": 2.57, "learning_rate": 4.570980722440577e-05, "loss": 0.0932, "step": 18342 }, { "epoch": 2.57, "learning_rate": 4.570933932247801e-05, "loss": 0.0834, "step": 18344 }, { "epoch": 2.58, "learning_rate": 4.570887142055025e-05, "loss": 0.0916, "step": 18346 }, { "epoch": 2.58, "learning_rate": 4.57084035186225e-05, "loss": 0.0794, "step": 18348 }, { "epoch": 2.58, "learning_rate": 4.5707935616694744e-05, "loss": 0.0707, "step": 18350 }, { "epoch": 2.58, "learning_rate": 4.570746771476699e-05, "loss": 0.0715, "step": 18352 }, { "epoch": 2.58, "learning_rate": 4.570699981283923e-05, "loss": 0.0645, "step": 18354 }, { "epoch": 2.58, "learning_rate": 4.5706531910911475e-05, "loss": 0.0676, "step": 18356 }, { "epoch": 2.58, "learning_rate": 4.570606400898372e-05, "loss": 0.0562, "step": 18358 }, { "epoch": 2.58, "learning_rate": 4.5705596107055966e-05, "loss": 0.0768, "step": 18360 }, { "epoch": 2.58, "learning_rate": 4.5705128205128205e-05, "loss": 0.0724, "step": 18362 }, { "epoch": 2.58, "learning_rate": 4.570466030320045e-05, "loss": 0.0937, "step": 18364 }, { "epoch": 2.58, "learning_rate": 4.57041924012727e-05, "loss": 0.0621, "step": 18366 }, { "epoch": 2.58, "learning_rate": 4.570372449934494e-05, "loss": 0.0561, "step": 18368 }, { "epoch": 2.58, "learning_rate": 4.570325659741718e-05, "loss": 0.0608, "step": 18370 }, { "epoch": 2.58, "learning_rate": 4.570278869548943e-05, "loss": 0.0528, "step": 18372 }, { "epoch": 2.58, "learning_rate": 4.570232079356167e-05, "loss": 0.0754, "step": 18374 }, { "epoch": 2.58, "learning_rate": 4.570185289163392e-05, "loss": 0.0526, "step": 18376 }, { "epoch": 2.58, "learning_rate": 4.570138498970616e-05, "loss": 0.0686, "step": 18378 }, { "epoch": 2.58, "learning_rate": 4.5700917087778405e-05, "loss": 0.0589, "step": 18380 }, { "epoch": 2.58, "learning_rate": 4.5700449185850644e-05, "loss": 0.0836, "step": 18382 }, { "epoch": 2.58, "learning_rate": 4.56999812839229e-05, "loss": 0.0691, "step": 18384 }, { "epoch": 2.58, "learning_rate": 4.5699513381995136e-05, "loss": 0.0577, "step": 18386 }, { "epoch": 2.58, "learning_rate": 4.569904548006738e-05, "loss": 0.0598, "step": 18388 }, { "epoch": 2.58, "learning_rate": 4.569857757813962e-05, "loss": 0.0531, "step": 18390 }, { "epoch": 2.58, "learning_rate": 4.569810967621187e-05, "loss": 0.0687, "step": 18392 }, { "epoch": 2.58, "learning_rate": 4.569764177428411e-05, "loss": 0.0573, "step": 18394 }, { "epoch": 2.58, "learning_rate": 4.569717387235636e-05, "loss": 0.0716, "step": 18396 }, { "epoch": 2.58, "learning_rate": 4.56967059704286e-05, "loss": 0.0508, "step": 18398 }, { "epoch": 2.58, "learning_rate": 4.5696238068500844e-05, "loss": 0.0861, "step": 18400 }, { "epoch": 2.58, "learning_rate": 4.569577016657309e-05, "loss": 0.0575, "step": 18402 }, { "epoch": 2.58, "learning_rate": 4.5695302264645336e-05, "loss": 0.0549, "step": 18404 }, { "epoch": 2.58, "learning_rate": 4.5694834362717575e-05, "loss": 0.0513, "step": 18406 }, { "epoch": 2.58, "learning_rate": 4.569436646078982e-05, "loss": 0.066, "step": 18408 }, { "epoch": 2.58, "learning_rate": 4.569389855886207e-05, "loss": 0.0694, "step": 18410 }, { "epoch": 2.58, "learning_rate": 4.569343065693431e-05, "loss": 0.0617, "step": 18412 }, { "epoch": 2.58, "learning_rate": 4.569296275500655e-05, "loss": 0.046, "step": 18414 }, { "epoch": 2.59, "learning_rate": 4.56924948530788e-05, "loss": 0.0726, "step": 18416 }, { "epoch": 2.59, "learning_rate": 4.569202695115104e-05, "loss": 0.0718, "step": 18418 }, { "epoch": 2.59, "learning_rate": 4.569155904922329e-05, "loss": 0.0521, "step": 18420 }, { "epoch": 2.59, "learning_rate": 4.569109114729553e-05, "loss": 0.07, "step": 18422 }, { "epoch": 2.59, "learning_rate": 4.5690623245367775e-05, "loss": 0.0855, "step": 18424 }, { "epoch": 2.59, "learning_rate": 4.5690155343440014e-05, "loss": 0.0811, "step": 18426 }, { "epoch": 2.59, "learning_rate": 4.5689687441512266e-05, "loss": 0.0491, "step": 18428 }, { "epoch": 2.59, "learning_rate": 4.5689219539584505e-05, "loss": 0.0697, "step": 18430 }, { "epoch": 2.59, "learning_rate": 4.568875163765675e-05, "loss": 0.0671, "step": 18432 }, { "epoch": 2.59, "learning_rate": 4.568828373572899e-05, "loss": 0.0611, "step": 18434 }, { "epoch": 2.59, "learning_rate": 4.5687815833801236e-05, "loss": 0.0682, "step": 18436 }, { "epoch": 2.59, "learning_rate": 4.568734793187348e-05, "loss": 0.0622, "step": 18438 }, { "epoch": 2.59, "learning_rate": 4.568688002994573e-05, "loss": 0.0688, "step": 18440 }, { "epoch": 2.59, "learning_rate": 4.568641212801797e-05, "loss": 0.0682, "step": 18442 }, { "epoch": 2.59, "learning_rate": 4.568594422609021e-05, "loss": 0.0585, "step": 18444 }, { "epoch": 2.59, "learning_rate": 4.568547632416246e-05, "loss": 0.0631, "step": 18446 }, { "epoch": 2.59, "learning_rate": 4.5685008422234705e-05, "loss": 0.0519, "step": 18448 }, { "epoch": 2.59, "learning_rate": 4.5684540520306944e-05, "loss": 0.0781, "step": 18450 }, { "epoch": 2.59, "learning_rate": 4.568407261837919e-05, "loss": 0.0722, "step": 18452 }, { "epoch": 2.59, "learning_rate": 4.5683604716451436e-05, "loss": 0.0735, "step": 18454 }, { "epoch": 2.59, "learning_rate": 4.568313681452368e-05, "loss": 0.0465, "step": 18456 }, { "epoch": 2.59, "learning_rate": 4.568266891259592e-05, "loss": 0.0464, "step": 18458 }, { "epoch": 2.59, "learning_rate": 4.568220101066817e-05, "loss": 0.0834, "step": 18460 }, { "epoch": 2.59, "learning_rate": 4.568173310874041e-05, "loss": 0.0868, "step": 18462 }, { "epoch": 2.59, "learning_rate": 4.568126520681266e-05, "loss": 0.0675, "step": 18464 }, { "epoch": 2.59, "learning_rate": 4.56807973048849e-05, "loss": 0.0623, "step": 18466 }, { "epoch": 2.59, "learning_rate": 4.5680329402957144e-05, "loss": 0.0811, "step": 18468 }, { "epoch": 2.59, "learning_rate": 4.567986150102938e-05, "loss": 0.1039, "step": 18470 }, { "epoch": 2.59, "learning_rate": 4.5679393599101636e-05, "loss": 0.0557, "step": 18472 }, { "epoch": 2.59, "learning_rate": 4.5678925697173875e-05, "loss": 0.048, "step": 18474 }, { "epoch": 2.59, "learning_rate": 4.567845779524612e-05, "loss": 0.0504, "step": 18476 }, { "epoch": 2.59, "learning_rate": 4.567798989331836e-05, "loss": 0.0646, "step": 18478 }, { "epoch": 2.59, "learning_rate": 4.567752199139061e-05, "loss": 0.0745, "step": 18480 }, { "epoch": 2.59, "learning_rate": 4.567705408946285e-05, "loss": 0.0639, "step": 18482 }, { "epoch": 2.59, "learning_rate": 4.56765861875351e-05, "loss": 0.0683, "step": 18484 }, { "epoch": 2.59, "learning_rate": 4.567611828560734e-05, "loss": 0.0596, "step": 18486 }, { "epoch": 2.6, "learning_rate": 4.567565038367958e-05, "loss": 0.0624, "step": 18488 }, { "epoch": 2.6, "learning_rate": 4.567518248175183e-05, "loss": 0.0536, "step": 18490 }, { "epoch": 2.6, "learning_rate": 4.567471457982407e-05, "loss": 0.0704, "step": 18492 }, { "epoch": 2.6, "learning_rate": 4.5674246677896314e-05, "loss": 0.0923, "step": 18494 }, { "epoch": 2.6, "learning_rate": 4.567377877596856e-05, "loss": 0.0783, "step": 18496 }, { "epoch": 2.6, "learning_rate": 4.5673310874040806e-05, "loss": 0.0792, "step": 18498 }, { "epoch": 2.6, "learning_rate": 4.5672842972113045e-05, "loss": 0.045, "step": 18500 }, { "epoch": 2.6, "learning_rate": 4.567237507018529e-05, "loss": 0.0592, "step": 18502 }, { "epoch": 2.6, "learning_rate": 4.567190716825753e-05, "loss": 0.0582, "step": 18504 }, { "epoch": 2.6, "learning_rate": 4.567143926632978e-05, "loss": 0.0808, "step": 18506 }, { "epoch": 2.6, "learning_rate": 4.567097136440202e-05, "loss": 0.0711, "step": 18508 }, { "epoch": 2.6, "learning_rate": 4.567050346247427e-05, "loss": 0.058, "step": 18510 }, { "epoch": 2.6, "learning_rate": 4.5670035560546507e-05, "loss": 0.0584, "step": 18512 }, { "epoch": 2.6, "learning_rate": 4.566956765861876e-05, "loss": 0.0872, "step": 18514 }, { "epoch": 2.6, "learning_rate": 4.5669099756691e-05, "loss": 0.0625, "step": 18516 }, { "epoch": 2.6, "learning_rate": 4.5668631854763244e-05, "loss": 0.0688, "step": 18518 }, { "epoch": 2.6, "learning_rate": 4.5668163952835483e-05, "loss": 0.0666, "step": 18520 }, { "epoch": 2.6, "learning_rate": 4.566769605090773e-05, "loss": 0.0567, "step": 18522 }, { "epoch": 2.6, "learning_rate": 4.5667228148979975e-05, "loss": 0.0653, "step": 18524 }, { "epoch": 2.6, "learning_rate": 4.566676024705222e-05, "loss": 0.0905, "step": 18526 }, { "epoch": 2.6, "learning_rate": 4.566629234512446e-05, "loss": 0.096, "step": 18528 }, { "epoch": 2.6, "learning_rate": 4.5665824443196706e-05, "loss": 0.0797, "step": 18530 }, { "epoch": 2.6, "learning_rate": 4.566535654126895e-05, "loss": 0.0705, "step": 18532 }, { "epoch": 2.6, "learning_rate": 4.56648886393412e-05, "loss": 0.0588, "step": 18534 }, { "epoch": 2.6, "learning_rate": 4.566442073741344e-05, "loss": 0.0743, "step": 18536 }, { "epoch": 2.6, "learning_rate": 4.566395283548568e-05, "loss": 0.0661, "step": 18538 }, { "epoch": 2.6, "learning_rate": 4.566348493355793e-05, "loss": 0.0802, "step": 18540 }, { "epoch": 2.6, "learning_rate": 4.5663017031630175e-05, "loss": 0.0562, "step": 18542 }, { "epoch": 2.6, "learning_rate": 4.5662549129702414e-05, "loss": 0.0646, "step": 18544 }, { "epoch": 2.6, "learning_rate": 4.566208122777466e-05, "loss": 0.0774, "step": 18546 }, { "epoch": 2.6, "learning_rate": 4.5661613325846906e-05, "loss": 0.0669, "step": 18548 }, { "epoch": 2.6, "learning_rate": 4.566114542391915e-05, "loss": 0.0566, "step": 18550 }, { "epoch": 2.6, "learning_rate": 4.566067752199139e-05, "loss": 0.0428, "step": 18552 }, { "epoch": 2.6, "learning_rate": 4.566020962006364e-05, "loss": 0.0669, "step": 18554 }, { "epoch": 2.6, "learning_rate": 4.5659741718135876e-05, "loss": 0.0795, "step": 18556 }, { "epoch": 2.6, "learning_rate": 4.565927381620813e-05, "loss": 0.0556, "step": 18558 }, { "epoch": 2.61, "learning_rate": 4.565880591428037e-05, "loss": 0.0486, "step": 18560 }, { "epoch": 2.61, "learning_rate": 4.5658338012352614e-05, "loss": 0.056, "step": 18562 }, { "epoch": 2.61, "learning_rate": 4.565787011042485e-05, "loss": 0.0688, "step": 18564 }, { "epoch": 2.61, "learning_rate": 4.56574022084971e-05, "loss": 0.0698, "step": 18566 }, { "epoch": 2.61, "learning_rate": 4.5656934306569345e-05, "loss": 0.0619, "step": 18568 }, { "epoch": 2.61, "learning_rate": 4.565646640464159e-05, "loss": 0.0527, "step": 18570 }, { "epoch": 2.61, "learning_rate": 4.565599850271383e-05, "loss": 0.0389, "step": 18572 }, { "epoch": 2.61, "learning_rate": 4.5655530600786076e-05, "loss": 0.0677, "step": 18574 }, { "epoch": 2.61, "learning_rate": 4.565506269885832e-05, "loss": 0.0628, "step": 18576 }, { "epoch": 2.61, "learning_rate": 4.565459479693057e-05, "loss": 0.0577, "step": 18578 }, { "epoch": 2.61, "learning_rate": 4.5654126895002807e-05, "loss": 0.0564, "step": 18580 }, { "epoch": 2.61, "learning_rate": 4.565365899307505e-05, "loss": 0.0572, "step": 18582 }, { "epoch": 2.61, "learning_rate": 4.56531910911473e-05, "loss": 0.0669, "step": 18584 }, { "epoch": 2.61, "learning_rate": 4.5652723189219544e-05, "loss": 0.0569, "step": 18586 }, { "epoch": 2.61, "learning_rate": 4.5652255287291783e-05, "loss": 0.0608, "step": 18588 }, { "epoch": 2.61, "learning_rate": 4.565178738536403e-05, "loss": 0.0616, "step": 18590 }, { "epoch": 2.61, "learning_rate": 4.5651319483436275e-05, "loss": 0.101, "step": 18592 }, { "epoch": 2.61, "learning_rate": 4.565085158150852e-05, "loss": 0.0599, "step": 18594 }, { "epoch": 2.61, "learning_rate": 4.565038367958076e-05, "loss": 0.0807, "step": 18596 }, { "epoch": 2.61, "learning_rate": 4.5649915777653006e-05, "loss": 0.051, "step": 18598 }, { "epoch": 2.61, "learning_rate": 4.5649447875725245e-05, "loss": 0.076, "step": 18600 }, { "epoch": 2.61, "learning_rate": 4.56489799737975e-05, "loss": 0.0665, "step": 18602 }, { "epoch": 2.61, "learning_rate": 4.564851207186974e-05, "loss": 0.0898, "step": 18604 }, { "epoch": 2.61, "learning_rate": 4.564804416994198e-05, "loss": 0.0615, "step": 18606 }, { "epoch": 2.61, "learning_rate": 4.564757626801422e-05, "loss": 0.0567, "step": 18608 }, { "epoch": 2.61, "learning_rate": 4.5647108366086475e-05, "loss": 0.0735, "step": 18610 }, { "epoch": 2.61, "learning_rate": 4.5646640464158714e-05, "loss": 0.0742, "step": 18612 }, { "epoch": 2.61, "learning_rate": 4.564617256223096e-05, "loss": 0.0531, "step": 18614 }, { "epoch": 2.61, "learning_rate": 4.56457046603032e-05, "loss": 0.0808, "step": 18616 }, { "epoch": 2.61, "learning_rate": 4.5645236758375445e-05, "loss": 0.0787, "step": 18618 }, { "epoch": 2.61, "learning_rate": 4.564476885644769e-05, "loss": 0.0624, "step": 18620 }, { "epoch": 2.61, "learning_rate": 4.564430095451994e-05, "loss": 0.0635, "step": 18622 }, { "epoch": 2.61, "learning_rate": 4.5643833052592176e-05, "loss": 0.0672, "step": 18624 }, { "epoch": 2.61, "learning_rate": 4.564336515066442e-05, "loss": 0.0847, "step": 18626 }, { "epoch": 2.61, "learning_rate": 4.564289724873667e-05, "loss": 0.0717, "step": 18628 }, { "epoch": 2.62, "learning_rate": 4.5642429346808914e-05, "loss": 0.0541, "step": 18630 }, { "epoch": 2.62, "learning_rate": 4.564196144488115e-05, "loss": 0.0768, "step": 18632 }, { "epoch": 2.62, "learning_rate": 4.56414935429534e-05, "loss": 0.0713, "step": 18634 }, { "epoch": 2.62, "learning_rate": 4.5641025641025645e-05, "loss": 0.066, "step": 18636 }, { "epoch": 2.62, "learning_rate": 4.564055773909789e-05, "loss": 0.0893, "step": 18638 }, { "epoch": 2.62, "learning_rate": 4.564008983717013e-05, "loss": 0.0778, "step": 18640 }, { "epoch": 2.62, "learning_rate": 4.5639621935242376e-05, "loss": 0.0615, "step": 18642 }, { "epoch": 2.62, "learning_rate": 4.563915403331462e-05, "loss": 0.0626, "step": 18644 }, { "epoch": 2.62, "learning_rate": 4.563868613138687e-05, "loss": 0.0516, "step": 18646 }, { "epoch": 2.62, "learning_rate": 4.5638218229459107e-05, "loss": 0.0506, "step": 18648 }, { "epoch": 2.62, "learning_rate": 4.563775032753135e-05, "loss": 0.0568, "step": 18650 }, { "epoch": 2.62, "learning_rate": 4.563728242560359e-05, "loss": 0.057, "step": 18652 }, { "epoch": 2.62, "learning_rate": 4.5636814523675844e-05, "loss": 0.062, "step": 18654 }, { "epoch": 2.62, "learning_rate": 4.5636346621748083e-05, "loss": 0.0646, "step": 18656 }, { "epoch": 2.62, "learning_rate": 4.563587871982033e-05, "loss": 0.0736, "step": 18658 }, { "epoch": 2.62, "learning_rate": 4.563541081789257e-05, "loss": 0.0544, "step": 18660 }, { "epoch": 2.62, "learning_rate": 4.563494291596482e-05, "loss": 0.072, "step": 18662 }, { "epoch": 2.62, "learning_rate": 4.563447501403706e-05, "loss": 0.038, "step": 18664 }, { "epoch": 2.62, "learning_rate": 4.5634007112109306e-05, "loss": 0.0732, "step": 18666 }, { "epoch": 2.62, "learning_rate": 4.5633539210181545e-05, "loss": 0.0632, "step": 18668 }, { "epoch": 2.62, "learning_rate": 4.563307130825379e-05, "loss": 0.0554, "step": 18670 }, { "epoch": 2.62, "learning_rate": 4.563260340632604e-05, "loss": 0.0718, "step": 18672 }, { "epoch": 2.62, "learning_rate": 4.563213550439828e-05, "loss": 0.0615, "step": 18674 }, { "epoch": 2.62, "learning_rate": 4.563166760247052e-05, "loss": 0.0532, "step": 18676 }, { "epoch": 2.62, "learning_rate": 4.563119970054277e-05, "loss": 0.0494, "step": 18678 }, { "epoch": 2.62, "learning_rate": 4.5630731798615014e-05, "loss": 0.0624, "step": 18680 }, { "epoch": 2.62, "learning_rate": 4.563026389668726e-05, "loss": 0.0544, "step": 18682 }, { "epoch": 2.62, "learning_rate": 4.56297959947595e-05, "loss": 0.0613, "step": 18684 }, { "epoch": 2.62, "learning_rate": 4.5629328092831745e-05, "loss": 0.0713, "step": 18686 }, { "epoch": 2.62, "learning_rate": 4.562886019090399e-05, "loss": 0.0632, "step": 18688 }, { "epoch": 2.62, "learning_rate": 4.562839228897624e-05, "loss": 0.0607, "step": 18690 }, { "epoch": 2.62, "learning_rate": 4.5627924387048476e-05, "loss": 0.068, "step": 18692 }, { "epoch": 2.62, "learning_rate": 4.562745648512072e-05, "loss": 0.0653, "step": 18694 }, { "epoch": 2.62, "learning_rate": 4.562698858319297e-05, "loss": 0.0796, "step": 18696 }, { "epoch": 2.62, "learning_rate": 4.5626520681265214e-05, "loss": 0.0606, "step": 18698 }, { "epoch": 2.62, "learning_rate": 4.562605277933745e-05, "loss": 0.0678, "step": 18700 }, { "epoch": 2.63, "learning_rate": 4.56255848774097e-05, "loss": 0.0536, "step": 18702 }, { "epoch": 2.63, "learning_rate": 4.562511697548194e-05, "loss": 0.0632, "step": 18704 }, { "epoch": 2.63, "learning_rate": 4.562464907355419e-05, "loss": 0.0571, "step": 18706 }, { "epoch": 2.63, "learning_rate": 4.562418117162643e-05, "loss": 0.0728, "step": 18708 }, { "epoch": 2.63, "learning_rate": 4.5623713269698676e-05, "loss": 0.0751, "step": 18710 }, { "epoch": 2.63, "learning_rate": 4.5623245367770915e-05, "loss": 0.073, "step": 18712 }, { "epoch": 2.63, "learning_rate": 4.562277746584316e-05, "loss": 0.0533, "step": 18714 }, { "epoch": 2.63, "learning_rate": 4.5622309563915407e-05, "loss": 0.0686, "step": 18716 }, { "epoch": 2.63, "learning_rate": 4.562184166198765e-05, "loss": 0.0733, "step": 18718 }, { "epoch": 2.63, "learning_rate": 4.562137376005989e-05, "loss": 0.0781, "step": 18720 }, { "epoch": 2.63, "learning_rate": 4.562090585813214e-05, "loss": 0.0726, "step": 18722 }, { "epoch": 2.63, "learning_rate": 4.5620437956204383e-05, "loss": 0.072, "step": 18724 }, { "epoch": 2.63, "learning_rate": 4.561997005427663e-05, "loss": 0.056, "step": 18726 }, { "epoch": 2.63, "learning_rate": 4.561950215234887e-05, "loss": 0.0599, "step": 18728 }, { "epoch": 2.63, "learning_rate": 4.5619034250421114e-05, "loss": 0.0684, "step": 18730 }, { "epoch": 2.63, "learning_rate": 4.561856634849336e-05, "loss": 0.0667, "step": 18732 }, { "epoch": 2.63, "learning_rate": 4.5618098446565606e-05, "loss": 0.083, "step": 18734 }, { "epoch": 2.63, "learning_rate": 4.5617630544637845e-05, "loss": 0.0546, "step": 18736 }, { "epoch": 2.63, "learning_rate": 4.561716264271009e-05, "loss": 0.0572, "step": 18738 }, { "epoch": 2.63, "learning_rate": 4.561669474078234e-05, "loss": 0.0632, "step": 18740 }, { "epoch": 2.63, "learning_rate": 4.561622683885458e-05, "loss": 0.0497, "step": 18742 }, { "epoch": 2.63, "learning_rate": 4.561575893692682e-05, "loss": 0.0471, "step": 18744 }, { "epoch": 2.63, "learning_rate": 4.561529103499906e-05, "loss": 0.062, "step": 18746 }, { "epoch": 2.63, "learning_rate": 4.561482313307131e-05, "loss": 0.0707, "step": 18748 }, { "epoch": 2.63, "learning_rate": 4.561435523114355e-05, "loss": 0.0727, "step": 18750 }, { "epoch": 2.63, "learning_rate": 4.56138873292158e-05, "loss": 0.0546, "step": 18752 }, { "epoch": 2.63, "learning_rate": 4.561341942728804e-05, "loss": 0.0725, "step": 18754 }, { "epoch": 2.63, "learning_rate": 4.5612951525360284e-05, "loss": 0.0531, "step": 18756 }, { "epoch": 2.63, "learning_rate": 4.561248362343253e-05, "loss": 0.0847, "step": 18758 }, { "epoch": 2.63, "learning_rate": 4.5612015721504776e-05, "loss": 0.0603, "step": 18760 }, { "epoch": 2.63, "learning_rate": 4.5611547819577015e-05, "loss": 0.0646, "step": 18762 }, { "epoch": 2.63, "learning_rate": 4.561107991764926e-05, "loss": 0.0671, "step": 18764 }, { "epoch": 2.63, "learning_rate": 4.561061201572151e-05, "loss": 0.0626, "step": 18766 }, { "epoch": 2.63, "learning_rate": 4.561014411379375e-05, "loss": 0.0754, "step": 18768 }, { "epoch": 2.63, "learning_rate": 4.560967621186599e-05, "loss": 0.0583, "step": 18770 }, { "epoch": 2.64, "learning_rate": 4.560920830993824e-05, "loss": 0.0808, "step": 18772 }, { "epoch": 2.64, "learning_rate": 4.5608740408010484e-05, "loss": 0.0559, "step": 18774 }, { "epoch": 2.64, "learning_rate": 4.560827250608273e-05, "loss": 0.0659, "step": 18776 }, { "epoch": 2.64, "learning_rate": 4.560780460415497e-05, "loss": 0.0571, "step": 18778 }, { "epoch": 2.64, "learning_rate": 4.5607336702227215e-05, "loss": 0.0578, "step": 18780 }, { "epoch": 2.64, "learning_rate": 4.5606868800299454e-05, "loss": 0.0604, "step": 18782 }, { "epoch": 2.64, "learning_rate": 4.560640089837171e-05, "loss": 0.0682, "step": 18784 }, { "epoch": 2.64, "learning_rate": 4.5605932996443946e-05, "loss": 0.0456, "step": 18786 }, { "epoch": 2.64, "learning_rate": 4.560546509451619e-05, "loss": 0.0641, "step": 18788 }, { "epoch": 2.64, "learning_rate": 4.560499719258843e-05, "loss": 0.0702, "step": 18790 }, { "epoch": 2.64, "learning_rate": 4.5604529290660683e-05, "loss": 0.0541, "step": 18792 }, { "epoch": 2.64, "learning_rate": 4.560406138873292e-05, "loss": 0.0545, "step": 18794 }, { "epoch": 2.64, "learning_rate": 4.560359348680517e-05, "loss": 0.0776, "step": 18796 }, { "epoch": 2.64, "learning_rate": 4.560312558487741e-05, "loss": 0.0748, "step": 18798 }, { "epoch": 2.64, "learning_rate": 4.5602657682949654e-05, "loss": 0.062, "step": 18800 }, { "epoch": 2.64, "learning_rate": 4.56021897810219e-05, "loss": 0.0597, "step": 18802 }, { "epoch": 2.64, "learning_rate": 4.5601721879094145e-05, "loss": 0.0576, "step": 18804 }, { "epoch": 2.64, "learning_rate": 4.5601253977166385e-05, "loss": 0.0621, "step": 18806 }, { "epoch": 2.64, "learning_rate": 4.560078607523863e-05, "loss": 0.065, "step": 18808 }, { "epoch": 2.64, "learning_rate": 4.5600318173310876e-05, "loss": 0.0754, "step": 18810 }, { "epoch": 2.64, "learning_rate": 4.559985027138312e-05, "loss": 0.068, "step": 18812 }, { "epoch": 2.64, "learning_rate": 4.559938236945536e-05, "loss": 0.0525, "step": 18814 }, { "epoch": 2.64, "learning_rate": 4.559891446752761e-05, "loss": 0.0633, "step": 18816 }, { "epoch": 2.64, "learning_rate": 4.559844656559985e-05, "loss": 0.0751, "step": 18818 }, { "epoch": 2.64, "learning_rate": 4.55979786636721e-05, "loss": 0.0747, "step": 18820 }, { "epoch": 2.64, "learning_rate": 4.559751076174434e-05, "loss": 0.0561, "step": 18822 }, { "epoch": 2.64, "learning_rate": 4.5597042859816584e-05, "loss": 0.0641, "step": 18824 }, { "epoch": 2.64, "learning_rate": 4.559657495788883e-05, "loss": 0.0682, "step": 18826 }, { "epoch": 2.64, "learning_rate": 4.5596107055961076e-05, "loss": 0.0755, "step": 18828 }, { "epoch": 2.64, "learning_rate": 4.5595639154033315e-05, "loss": 0.0675, "step": 18830 }, { "epoch": 2.64, "learning_rate": 4.559517125210556e-05, "loss": 0.0602, "step": 18832 }, { "epoch": 2.64, "learning_rate": 4.55947033501778e-05, "loss": 0.0609, "step": 18834 }, { "epoch": 2.64, "learning_rate": 4.559423544825005e-05, "loss": 0.0635, "step": 18836 }, { "epoch": 2.64, "learning_rate": 4.559376754632229e-05, "loss": 0.0697, "step": 18838 }, { "epoch": 2.64, "learning_rate": 4.559329964439454e-05, "loss": 0.0515, "step": 18840 }, { "epoch": 2.64, "learning_rate": 4.559283174246678e-05, "loss": 0.0562, "step": 18842 }, { "epoch": 2.65, "learning_rate": 4.559236384053902e-05, "loss": 0.0694, "step": 18844 }, { "epoch": 2.65, "learning_rate": 4.559189593861127e-05, "loss": 0.0796, "step": 18846 }, { "epoch": 2.65, "learning_rate": 4.5591428036683515e-05, "loss": 0.0685, "step": 18848 }, { "epoch": 2.65, "learning_rate": 4.5590960134755754e-05, "loss": 0.0648, "step": 18850 }, { "epoch": 2.65, "learning_rate": 4.5590492232828e-05, "loss": 0.0689, "step": 18852 }, { "epoch": 2.65, "learning_rate": 4.5590024330900246e-05, "loss": 0.0586, "step": 18854 }, { "epoch": 2.65, "learning_rate": 4.558955642897249e-05, "loss": 0.0408, "step": 18856 }, { "epoch": 2.65, "learning_rate": 4.558908852704473e-05, "loss": 0.0653, "step": 18858 }, { "epoch": 2.65, "learning_rate": 4.558862062511698e-05, "loss": 0.0431, "step": 18860 }, { "epoch": 2.65, "learning_rate": 4.558815272318922e-05, "loss": 0.0681, "step": 18862 }, { "epoch": 2.65, "learning_rate": 4.558768482126147e-05, "loss": 0.0815, "step": 18864 }, { "epoch": 2.65, "learning_rate": 4.558721691933371e-05, "loss": 0.0746, "step": 18866 }, { "epoch": 2.65, "learning_rate": 4.5586749017405954e-05, "loss": 0.0648, "step": 18868 }, { "epoch": 2.65, "learning_rate": 4.55862811154782e-05, "loss": 0.0511, "step": 18870 }, { "epoch": 2.65, "learning_rate": 4.5585813213550445e-05, "loss": 0.0745, "step": 18872 }, { "epoch": 2.65, "learning_rate": 4.5585345311622685e-05, "loss": 0.0765, "step": 18874 }, { "epoch": 2.65, "learning_rate": 4.558487740969493e-05, "loss": 0.0779, "step": 18876 }, { "epoch": 2.65, "learning_rate": 4.558440950776717e-05, "loss": 0.0693, "step": 18878 }, { "epoch": 2.65, "learning_rate": 4.558394160583942e-05, "loss": 0.0779, "step": 18880 }, { "epoch": 2.65, "learning_rate": 4.558347370391166e-05, "loss": 0.0706, "step": 18882 }, { "epoch": 2.65, "learning_rate": 4.558300580198391e-05, "loss": 0.0539, "step": 18884 }, { "epoch": 2.65, "learning_rate": 4.5582537900056146e-05, "loss": 0.0695, "step": 18886 }, { "epoch": 2.65, "learning_rate": 4.55820699981284e-05, "loss": 0.0581, "step": 18888 }, { "epoch": 2.65, "learning_rate": 4.558160209620064e-05, "loss": 0.0647, "step": 18890 }, { "epoch": 2.65, "learning_rate": 4.5581134194272884e-05, "loss": 0.0665, "step": 18892 }, { "epoch": 2.65, "learning_rate": 4.558066629234512e-05, "loss": 0.063, "step": 18894 }, { "epoch": 2.65, "learning_rate": 4.558019839041737e-05, "loss": 0.06, "step": 18896 }, { "epoch": 2.65, "learning_rate": 4.5579730488489615e-05, "loss": 0.0868, "step": 18898 }, { "epoch": 2.65, "learning_rate": 4.557926258656186e-05, "loss": 0.059, "step": 18900 }, { "epoch": 2.65, "learning_rate": 4.55787946846341e-05, "loss": 0.0526, "step": 18902 }, { "epoch": 2.65, "learning_rate": 4.5578326782706346e-05, "loss": 0.0748, "step": 18904 }, { "epoch": 2.65, "learning_rate": 4.557785888077859e-05, "loss": 0.069, "step": 18906 }, { "epoch": 2.65, "learning_rate": 4.557739097885084e-05, "loss": 0.0612, "step": 18908 }, { "epoch": 2.65, "learning_rate": 4.557692307692308e-05, "loss": 0.049, "step": 18910 }, { "epoch": 2.65, "learning_rate": 4.557645517499532e-05, "loss": 0.0793, "step": 18912 }, { "epoch": 2.65, "learning_rate": 4.557598727306757e-05, "loss": 0.0985, "step": 18914 }, { "epoch": 2.66, "learning_rate": 4.5575519371139815e-05, "loss": 0.0623, "step": 18916 }, { "epoch": 2.66, "learning_rate": 4.5575051469212054e-05, "loss": 0.0652, "step": 18918 }, { "epoch": 2.66, "learning_rate": 4.55745835672843e-05, "loss": 0.0655, "step": 18920 }, { "epoch": 2.66, "learning_rate": 4.5574115665356546e-05, "loss": 0.0721, "step": 18922 }, { "epoch": 2.66, "learning_rate": 4.557364776342879e-05, "loss": 0.0596, "step": 18924 }, { "epoch": 2.66, "learning_rate": 4.557317986150103e-05, "loss": 0.0819, "step": 18926 }, { "epoch": 2.66, "learning_rate": 4.557271195957328e-05, "loss": 0.0507, "step": 18928 }, { "epoch": 2.66, "learning_rate": 4.5572244057645516e-05, "loss": 0.0519, "step": 18930 }, { "epoch": 2.66, "learning_rate": 4.557177615571777e-05, "loss": 0.0694, "step": 18932 }, { "epoch": 2.66, "learning_rate": 4.557130825379001e-05, "loss": 0.0723, "step": 18934 }, { "epoch": 2.66, "learning_rate": 4.5570840351862254e-05, "loss": 0.0775, "step": 18936 }, { "epoch": 2.66, "learning_rate": 4.557037244993449e-05, "loss": 0.0823, "step": 18938 }, { "epoch": 2.66, "learning_rate": 4.5569904548006745e-05, "loss": 0.063, "step": 18940 }, { "epoch": 2.66, "learning_rate": 4.5569436646078985e-05, "loss": 0.0498, "step": 18942 }, { "epoch": 2.66, "learning_rate": 4.556896874415123e-05, "loss": 0.0713, "step": 18944 }, { "epoch": 2.66, "learning_rate": 4.556850084222347e-05, "loss": 0.0698, "step": 18946 }, { "epoch": 2.66, "learning_rate": 4.5568032940295716e-05, "loss": 0.065, "step": 18948 }, { "epoch": 2.66, "learning_rate": 4.556756503836796e-05, "loss": 0.0562, "step": 18950 }, { "epoch": 2.66, "learning_rate": 4.556709713644021e-05, "loss": 0.0549, "step": 18952 }, { "epoch": 2.66, "learning_rate": 4.5566629234512447e-05, "loss": 0.058, "step": 18954 }, { "epoch": 2.66, "learning_rate": 4.556616133258469e-05, "loss": 0.0707, "step": 18956 }, { "epoch": 2.66, "learning_rate": 4.556569343065694e-05, "loss": 0.0784, "step": 18958 }, { "epoch": 2.66, "learning_rate": 4.5565225528729184e-05, "loss": 0.057, "step": 18960 }, { "epoch": 2.66, "learning_rate": 4.556475762680142e-05, "loss": 0.0569, "step": 18962 }, { "epoch": 2.66, "learning_rate": 4.556428972487367e-05, "loss": 0.0696, "step": 18964 }, { "epoch": 2.66, "learning_rate": 4.5563821822945915e-05, "loss": 0.0619, "step": 18966 }, { "epoch": 2.66, "learning_rate": 4.556335392101816e-05, "loss": 0.0745, "step": 18968 }, { "epoch": 2.66, "learning_rate": 4.55628860190904e-05, "loss": 0.0691, "step": 18970 }, { "epoch": 2.66, "learning_rate": 4.5562418117162646e-05, "loss": 0.084, "step": 18972 }, { "epoch": 2.66, "learning_rate": 4.556195021523489e-05, "loss": 0.0603, "step": 18974 }, { "epoch": 2.66, "learning_rate": 4.556148231330714e-05, "loss": 0.0585, "step": 18976 }, { "epoch": 2.66, "learning_rate": 4.556101441137938e-05, "loss": 0.0505, "step": 18978 }, { "epoch": 2.66, "learning_rate": 4.556054650945162e-05, "loss": 0.0765, "step": 18980 }, { "epoch": 2.66, "learning_rate": 4.556007860752386e-05, "loss": 0.0823, "step": 18982 }, { "epoch": 2.66, "learning_rate": 4.5559610705596115e-05, "loss": 0.0715, "step": 18984 }, { "epoch": 2.67, "learning_rate": 4.5559142803668354e-05, "loss": 0.0713, "step": 18986 }, { "epoch": 2.67, "learning_rate": 4.55586749017406e-05, "loss": 0.078, "step": 18988 }, { "epoch": 2.67, "learning_rate": 4.555820699981284e-05, "loss": 0.0643, "step": 18990 }, { "epoch": 2.67, "learning_rate": 4.5557739097885085e-05, "loss": 0.057, "step": 18992 }, { "epoch": 2.67, "learning_rate": 4.555727119595733e-05, "loss": 0.067, "step": 18994 }, { "epoch": 2.67, "learning_rate": 4.555680329402957e-05, "loss": 0.0474, "step": 18996 }, { "epoch": 2.67, "learning_rate": 4.5556335392101816e-05, "loss": 0.0659, "step": 18998 }, { "epoch": 2.67, "learning_rate": 4.555586749017406e-05, "loss": 0.0504, "step": 19000 }, { "epoch": 2.67, "eval_gen_len": 30.431, "eval_loss": 1.0705924034118652, "eval_meteor": 0.0441, "eval_runtime": 16.562, "eval_samples_per_second": 3.502, "eval_steps_per_second": 0.483, "step": 19000 }, { "epoch": 2.67, "learning_rate": 4.555539958824631e-05, "loss": 0.0504, "step": 19002 }, { "epoch": 2.67, "learning_rate": 4.555493168631855e-05, "loss": 0.0731, "step": 19004 }, { "epoch": 2.67, "learning_rate": 4.555446378439079e-05, "loss": 0.0599, "step": 19006 }, { "epoch": 2.67, "learning_rate": 4.555399588246303e-05, "loss": 0.0748, "step": 19008 }, { "epoch": 2.67, "learning_rate": 4.5553527980535285e-05, "loss": 0.06, "step": 19010 }, { "epoch": 2.67, "learning_rate": 4.5553060078607524e-05, "loss": 0.0765, "step": 19012 }, { "epoch": 2.67, "learning_rate": 4.555259217667977e-05, "loss": 0.069, "step": 19014 }, { "epoch": 2.67, "learning_rate": 4.555212427475201e-05, "loss": 0.0649, "step": 19016 }, { "epoch": 2.67, "learning_rate": 4.555165637282426e-05, "loss": 0.0666, "step": 19018 }, { "epoch": 2.67, "learning_rate": 4.55511884708965e-05, "loss": 0.0585, "step": 19020 }, { "epoch": 2.67, "learning_rate": 4.5550720568968747e-05, "loss": 0.0764, "step": 19022 }, { "epoch": 2.67, "learning_rate": 4.5550252667040986e-05, "loss": 0.0548, "step": 19024 }, { "epoch": 2.67, "learning_rate": 4.554978476511323e-05, "loss": 0.0594, "step": 19026 }, { "epoch": 2.67, "learning_rate": 4.554931686318548e-05, "loss": 0.0696, "step": 19028 }, { "epoch": 2.67, "learning_rate": 4.5548848961257723e-05, "loss": 0.0481, "step": 19030 }, { "epoch": 2.67, "learning_rate": 4.554838105932996e-05, "loss": 0.0692, "step": 19032 }, { "epoch": 2.67, "learning_rate": 4.554791315740221e-05, "loss": 0.0682, "step": 19034 }, { "epoch": 2.67, "learning_rate": 4.5547445255474454e-05, "loss": 0.0632, "step": 19036 }, { "epoch": 2.67, "learning_rate": 4.55469773535467e-05, "loss": 0.0829, "step": 19038 }, { "epoch": 2.67, "learning_rate": 4.554650945161894e-05, "loss": 0.0775, "step": 19040 }, { "epoch": 2.67, "learning_rate": 4.5546041549691185e-05, "loss": 0.0518, "step": 19042 }, { "epoch": 2.67, "learning_rate": 4.554557364776343e-05, "loss": 0.0752, "step": 19044 }, { "epoch": 2.67, "learning_rate": 4.554510574583568e-05, "loss": 0.0581, "step": 19046 }, { "epoch": 2.67, "learning_rate": 4.5544637843907916e-05, "loss": 0.0826, "step": 19048 }, { "epoch": 2.67, "learning_rate": 4.554416994198016e-05, "loss": 0.0852, "step": 19050 }, { "epoch": 2.67, "learning_rate": 4.554370204005241e-05, "loss": 0.0631, "step": 19052 }, { "epoch": 2.67, "learning_rate": 4.5543234138124654e-05, "loss": 0.0701, "step": 19054 }, { "epoch": 2.67, "learning_rate": 4.554276623619689e-05, "loss": 0.0688, "step": 19056 }, { "epoch": 2.68, "learning_rate": 4.554229833426914e-05, "loss": 0.0678, "step": 19058 }, { "epoch": 2.68, "learning_rate": 4.554183043234138e-05, "loss": 0.0652, "step": 19060 }, { "epoch": 2.68, "learning_rate": 4.554136253041363e-05, "loss": 0.0785, "step": 19062 }, { "epoch": 2.68, "learning_rate": 4.554089462848587e-05, "loss": 0.0489, "step": 19064 }, { "epoch": 2.68, "learning_rate": 4.5540426726558116e-05, "loss": 0.0755, "step": 19066 }, { "epoch": 2.68, "learning_rate": 4.5539958824630355e-05, "loss": 0.0735, "step": 19068 }, { "epoch": 2.68, "learning_rate": 4.553949092270261e-05, "loss": 0.044, "step": 19070 }, { "epoch": 2.68, "learning_rate": 4.553902302077485e-05, "loss": 0.0729, "step": 19072 }, { "epoch": 2.68, "learning_rate": 4.553855511884709e-05, "loss": 0.0615, "step": 19074 }, { "epoch": 2.68, "learning_rate": 4.553808721691933e-05, "loss": 0.0526, "step": 19076 }, { "epoch": 2.68, "learning_rate": 4.553761931499158e-05, "loss": 0.0603, "step": 19078 }, { "epoch": 2.68, "learning_rate": 4.5537151413063824e-05, "loss": 0.087, "step": 19080 }, { "epoch": 2.68, "learning_rate": 4.553668351113607e-05, "loss": 0.0739, "step": 19082 }, { "epoch": 2.68, "learning_rate": 4.553621560920831e-05, "loss": 0.0643, "step": 19084 }, { "epoch": 2.68, "learning_rate": 4.5535747707280555e-05, "loss": 0.0575, "step": 19086 }, { "epoch": 2.68, "learning_rate": 4.55352798053528e-05, "loss": 0.0624, "step": 19088 }, { "epoch": 2.68, "learning_rate": 4.5534811903425047e-05, "loss": 0.0632, "step": 19090 }, { "epoch": 2.68, "learning_rate": 4.5534344001497286e-05, "loss": 0.0607, "step": 19092 }, { "epoch": 2.68, "learning_rate": 4.553387609956953e-05, "loss": 0.0689, "step": 19094 }, { "epoch": 2.68, "learning_rate": 4.553340819764178e-05, "loss": 0.0604, "step": 19096 }, { "epoch": 2.68, "learning_rate": 4.5532940295714023e-05, "loss": 0.0714, "step": 19098 }, { "epoch": 2.68, "learning_rate": 4.553247239378626e-05, "loss": 0.0531, "step": 19100 }, { "epoch": 2.68, "learning_rate": 4.553200449185851e-05, "loss": 0.0674, "step": 19102 }, { "epoch": 2.68, "learning_rate": 4.5531536589930754e-05, "loss": 0.052, "step": 19104 }, { "epoch": 2.68, "learning_rate": 4.5531068688003e-05, "loss": 0.0647, "step": 19106 }, { "epoch": 2.68, "learning_rate": 4.553060078607524e-05, "loss": 0.0566, "step": 19108 }, { "epoch": 2.68, "learning_rate": 4.5530132884147485e-05, "loss": 0.0649, "step": 19110 }, { "epoch": 2.68, "learning_rate": 4.5529664982219724e-05, "loss": 0.0778, "step": 19112 }, { "epoch": 2.68, "learning_rate": 4.552919708029198e-05, "loss": 0.0608, "step": 19114 }, { "epoch": 2.68, "learning_rate": 4.5528729178364216e-05, "loss": 0.0413, "step": 19116 }, { "epoch": 2.68, "learning_rate": 4.552826127643646e-05, "loss": 0.0594, "step": 19118 }, { "epoch": 2.68, "learning_rate": 4.55277933745087e-05, "loss": 0.0597, "step": 19120 }, { "epoch": 2.68, "learning_rate": 4.552732547258095e-05, "loss": 0.0633, "step": 19122 }, { "epoch": 2.68, "learning_rate": 4.552685757065319e-05, "loss": 0.05, "step": 19124 }, { "epoch": 2.68, "learning_rate": 4.552638966872544e-05, "loss": 0.073, "step": 19126 }, { "epoch": 2.69, "learning_rate": 4.552592176679768e-05, "loss": 0.0781, "step": 19128 }, { "epoch": 2.69, "learning_rate": 4.5525453864869924e-05, "loss": 0.0681, "step": 19130 }, { "epoch": 2.69, "learning_rate": 4.552498596294217e-05, "loss": 0.0609, "step": 19132 }, { "epoch": 2.69, "learning_rate": 4.5524518061014416e-05, "loss": 0.0785, "step": 19134 }, { "epoch": 2.69, "learning_rate": 4.5524050159086655e-05, "loss": 0.061, "step": 19136 }, { "epoch": 2.69, "learning_rate": 4.55235822571589e-05, "loss": 0.0505, "step": 19138 }, { "epoch": 2.69, "learning_rate": 4.552311435523115e-05, "loss": 0.0731, "step": 19140 }, { "epoch": 2.69, "learning_rate": 4.552264645330339e-05, "loss": 0.0712, "step": 19142 }, { "epoch": 2.69, "learning_rate": 4.552217855137563e-05, "loss": 0.0726, "step": 19144 }, { "epoch": 2.69, "learning_rate": 4.552171064944788e-05, "loss": 0.0575, "step": 19146 }, { "epoch": 2.69, "learning_rate": 4.5521242747520124e-05, "loss": 0.0604, "step": 19148 }, { "epoch": 2.69, "learning_rate": 4.552077484559237e-05, "loss": 0.0475, "step": 19150 }, { "epoch": 2.69, "learning_rate": 4.552030694366461e-05, "loss": 0.0734, "step": 19152 }, { "epoch": 2.69, "learning_rate": 4.5519839041736855e-05, "loss": 0.0659, "step": 19154 }, { "epoch": 2.69, "learning_rate": 4.5519371139809094e-05, "loss": 0.0629, "step": 19156 }, { "epoch": 2.69, "learning_rate": 4.5518903237881347e-05, "loss": 0.0676, "step": 19158 }, { "epoch": 2.69, "learning_rate": 4.5518435335953586e-05, "loss": 0.0714, "step": 19160 }, { "epoch": 2.69, "learning_rate": 4.551796743402583e-05, "loss": 0.0646, "step": 19162 }, { "epoch": 2.69, "learning_rate": 4.551749953209807e-05, "loss": 0.0534, "step": 19164 }, { "epoch": 2.69, "learning_rate": 4.5517031630170323e-05, "loss": 0.0719, "step": 19166 }, { "epoch": 2.69, "learning_rate": 4.551656372824256e-05, "loss": 0.0642, "step": 19168 }, { "epoch": 2.69, "learning_rate": 4.551609582631481e-05, "loss": 0.066, "step": 19170 }, { "epoch": 2.69, "learning_rate": 4.551562792438705e-05, "loss": 0.0562, "step": 19172 }, { "epoch": 2.69, "learning_rate": 4.5515160022459294e-05, "loss": 0.0592, "step": 19174 }, { "epoch": 2.69, "learning_rate": 4.551469212053154e-05, "loss": 0.0606, "step": 19176 }, { "epoch": 2.69, "learning_rate": 4.5514224218603785e-05, "loss": 0.0603, "step": 19178 }, { "epoch": 2.69, "learning_rate": 4.5513756316676024e-05, "loss": 0.068, "step": 19180 }, { "epoch": 2.69, "learning_rate": 4.551328841474827e-05, "loss": 0.0748, "step": 19182 }, { "epoch": 2.69, "learning_rate": 4.5512820512820516e-05, "loss": 0.0691, "step": 19184 }, { "epoch": 2.69, "learning_rate": 4.551235261089276e-05, "loss": 0.0742, "step": 19186 }, { "epoch": 2.69, "learning_rate": 4.5511884708965e-05, "loss": 0.0575, "step": 19188 }, { "epoch": 2.69, "learning_rate": 4.551141680703725e-05, "loss": 0.0545, "step": 19190 }, { "epoch": 2.69, "learning_rate": 4.551094890510949e-05, "loss": 0.0569, "step": 19192 }, { "epoch": 2.69, "learning_rate": 4.551048100318174e-05, "loss": 0.0844, "step": 19194 }, { "epoch": 2.69, "learning_rate": 4.551001310125398e-05, "loss": 0.0789, "step": 19196 }, { "epoch": 2.69, "learning_rate": 4.5509545199326224e-05, "loss": 0.0648, "step": 19198 }, { "epoch": 2.7, "learning_rate": 4.550907729739847e-05, "loss": 0.0764, "step": 19200 }, { "epoch": 2.7, "learning_rate": 4.5508609395470716e-05, "loss": 0.0917, "step": 19202 }, { "epoch": 2.7, "learning_rate": 4.5508141493542955e-05, "loss": 0.0543, "step": 19204 }, { "epoch": 2.7, "learning_rate": 4.55076735916152e-05, "loss": 0.0609, "step": 19206 }, { "epoch": 2.7, "learning_rate": 4.550720568968744e-05, "loss": 0.0483, "step": 19208 }, { "epoch": 2.7, "learning_rate": 4.550673778775969e-05, "loss": 0.0676, "step": 19210 }, { "epoch": 2.7, "learning_rate": 4.550626988583193e-05, "loss": 0.0832, "step": 19212 }, { "epoch": 2.7, "learning_rate": 4.550580198390418e-05, "loss": 0.0605, "step": 19214 }, { "epoch": 2.7, "learning_rate": 4.550533408197642e-05, "loss": 0.068, "step": 19216 }, { "epoch": 2.7, "learning_rate": 4.550486618004867e-05, "loss": 0.066, "step": 19218 }, { "epoch": 2.7, "learning_rate": 4.550439827812091e-05, "loss": 0.0515, "step": 19220 }, { "epoch": 2.7, "learning_rate": 4.5503930376193155e-05, "loss": 0.086, "step": 19222 }, { "epoch": 2.7, "learning_rate": 4.5503462474265394e-05, "loss": 0.0766, "step": 19224 }, { "epoch": 2.7, "learning_rate": 4.550299457233764e-05, "loss": 0.0666, "step": 19226 }, { "epoch": 2.7, "learning_rate": 4.5502526670409886e-05, "loss": 0.0722, "step": 19228 }, { "epoch": 2.7, "learning_rate": 4.550205876848213e-05, "loss": 0.0682, "step": 19230 }, { "epoch": 2.7, "learning_rate": 4.550159086655437e-05, "loss": 0.0736, "step": 19232 }, { "epoch": 2.7, "learning_rate": 4.550112296462662e-05, "loss": 0.0439, "step": 19234 }, { "epoch": 2.7, "learning_rate": 4.550065506269886e-05, "loss": 0.0654, "step": 19236 }, { "epoch": 2.7, "learning_rate": 4.550018716077111e-05, "loss": 0.0754, "step": 19238 }, { "epoch": 2.7, "learning_rate": 4.549971925884335e-05, "loss": 0.0553, "step": 19240 }, { "epoch": 2.7, "learning_rate": 4.5499251356915594e-05, "loss": 0.0719, "step": 19242 }, { "epoch": 2.7, "learning_rate": 4.549878345498784e-05, "loss": 0.0593, "step": 19244 }, { "epoch": 2.7, "learning_rate": 4.5498315553060085e-05, "loss": 0.0577, "step": 19246 }, { "epoch": 2.7, "learning_rate": 4.5497847651132324e-05, "loss": 0.0555, "step": 19248 }, { "epoch": 2.7, "learning_rate": 4.5497379749204564e-05, "loss": 0.0575, "step": 19250 }, { "epoch": 2.7, "learning_rate": 4.5496911847276816e-05, "loss": 0.0824, "step": 19252 }, { "epoch": 2.7, "learning_rate": 4.5496443945349055e-05, "loss": 0.0802, "step": 19254 }, { "epoch": 2.7, "learning_rate": 4.54959760434213e-05, "loss": 0.0755, "step": 19256 }, { "epoch": 2.7, "learning_rate": 4.549550814149354e-05, "loss": 0.0592, "step": 19258 }, { "epoch": 2.7, "learning_rate": 4.5495040239565786e-05, "loss": 0.0548, "step": 19260 }, { "epoch": 2.7, "learning_rate": 4.549457233763803e-05, "loss": 0.0653, "step": 19262 }, { "epoch": 2.7, "learning_rate": 4.549410443571028e-05, "loss": 0.055, "step": 19264 }, { "epoch": 2.7, "learning_rate": 4.549363653378252e-05, "loss": 0.0724, "step": 19266 }, { "epoch": 2.7, "learning_rate": 4.549316863185476e-05, "loss": 0.0656, "step": 19268 }, { "epoch": 2.7, "learning_rate": 4.549270072992701e-05, "loss": 0.0491, "step": 19270 }, { "epoch": 2.71, "learning_rate": 4.5492232827999255e-05, "loss": 0.0565, "step": 19272 }, { "epoch": 2.71, "learning_rate": 4.5491764926071494e-05, "loss": 0.0735, "step": 19274 }, { "epoch": 2.71, "learning_rate": 4.549129702414374e-05, "loss": 0.0564, "step": 19276 }, { "epoch": 2.71, "learning_rate": 4.5490829122215986e-05, "loss": 0.074, "step": 19278 }, { "epoch": 2.71, "learning_rate": 4.549036122028823e-05, "loss": 0.068, "step": 19280 }, { "epoch": 2.71, "learning_rate": 4.548989331836047e-05, "loss": 0.067, "step": 19282 }, { "epoch": 2.71, "learning_rate": 4.548942541643272e-05, "loss": 0.0689, "step": 19284 }, { "epoch": 2.71, "learning_rate": 4.548895751450496e-05, "loss": 0.0556, "step": 19286 }, { "epoch": 2.71, "learning_rate": 4.548848961257721e-05, "loss": 0.0582, "step": 19288 }, { "epoch": 2.71, "learning_rate": 4.548802171064945e-05, "loss": 0.0523, "step": 19290 }, { "epoch": 2.71, "learning_rate": 4.5487553808721694e-05, "loss": 0.0598, "step": 19292 }, { "epoch": 2.71, "learning_rate": 4.548708590679393e-05, "loss": 0.0523, "step": 19294 }, { "epoch": 2.71, "learning_rate": 4.5486618004866186e-05, "loss": 0.0419, "step": 19296 }, { "epoch": 2.71, "learning_rate": 4.5486150102938425e-05, "loss": 0.0432, "step": 19298 }, { "epoch": 2.71, "learning_rate": 4.548568220101067e-05, "loss": 0.059, "step": 19300 }, { "epoch": 2.71, "learning_rate": 4.548521429908291e-05, "loss": 0.077, "step": 19302 }, { "epoch": 2.71, "learning_rate": 4.5484746397155156e-05, "loss": 0.0592, "step": 19304 }, { "epoch": 2.71, "learning_rate": 4.54842784952274e-05, "loss": 0.0649, "step": 19306 }, { "epoch": 2.71, "learning_rate": 4.548381059329965e-05, "loss": 0.0603, "step": 19308 }, { "epoch": 2.71, "learning_rate": 4.548334269137189e-05, "loss": 0.074, "step": 19310 }, { "epoch": 2.71, "learning_rate": 4.548287478944413e-05, "loss": 0.0663, "step": 19312 }, { "epoch": 2.71, "learning_rate": 4.548240688751638e-05, "loss": 0.0613, "step": 19314 }, { "epoch": 2.71, "learning_rate": 4.5481938985588625e-05, "loss": 0.0723, "step": 19316 }, { "epoch": 2.71, "learning_rate": 4.5481471083660864e-05, "loss": 0.0665, "step": 19318 }, { "epoch": 2.71, "learning_rate": 4.548100318173311e-05, "loss": 0.08, "step": 19320 }, { "epoch": 2.71, "learning_rate": 4.5480535279805355e-05, "loss": 0.0713, "step": 19322 }, { "epoch": 2.71, "learning_rate": 4.54800673778776e-05, "loss": 0.0742, "step": 19324 }, { "epoch": 2.71, "learning_rate": 4.547959947594984e-05, "loss": 0.0652, "step": 19326 }, { "epoch": 2.71, "learning_rate": 4.5479131574022086e-05, "loss": 0.0818, "step": 19328 }, { "epoch": 2.71, "learning_rate": 4.547866367209433e-05, "loss": 0.0762, "step": 19330 }, { "epoch": 2.71, "learning_rate": 4.547819577016658e-05, "loss": 0.0653, "step": 19332 }, { "epoch": 2.71, "learning_rate": 4.547772786823882e-05, "loss": 0.0663, "step": 19334 }, { "epoch": 2.71, "learning_rate": 4.547725996631106e-05, "loss": 0.0596, "step": 19336 }, { "epoch": 2.71, "learning_rate": 4.54767920643833e-05, "loss": 0.0731, "step": 19338 }, { "epoch": 2.71, "learning_rate": 4.5476324162455555e-05, "loss": 0.0628, "step": 19340 }, { "epoch": 2.72, "learning_rate": 4.5475856260527794e-05, "loss": 0.0415, "step": 19342 }, { "epoch": 2.72, "learning_rate": 4.547538835860004e-05, "loss": 0.0531, "step": 19344 }, { "epoch": 2.72, "learning_rate": 4.547492045667228e-05, "loss": 0.088, "step": 19346 }, { "epoch": 2.72, "learning_rate": 4.547445255474453e-05, "loss": 0.084, "step": 19348 }, { "epoch": 2.72, "learning_rate": 4.547398465281677e-05, "loss": 0.0515, "step": 19350 }, { "epoch": 2.72, "learning_rate": 4.547351675088902e-05, "loss": 0.0747, "step": 19352 }, { "epoch": 2.72, "learning_rate": 4.5473048848961256e-05, "loss": 0.0598, "step": 19354 }, { "epoch": 2.72, "learning_rate": 4.54725809470335e-05, "loss": 0.0594, "step": 19356 }, { "epoch": 2.72, "learning_rate": 4.547211304510575e-05, "loss": 0.0726, "step": 19358 }, { "epoch": 2.72, "learning_rate": 4.5471645143177994e-05, "loss": 0.0763, "step": 19360 }, { "epoch": 2.72, "learning_rate": 4.547117724125023e-05, "loss": 0.0562, "step": 19362 }, { "epoch": 2.72, "learning_rate": 4.547070933932248e-05, "loss": 0.0957, "step": 19364 }, { "epoch": 2.72, "learning_rate": 4.5470241437394725e-05, "loss": 0.0545, "step": 19366 }, { "epoch": 2.72, "learning_rate": 4.546977353546697e-05, "loss": 0.0679, "step": 19368 }, { "epoch": 2.72, "learning_rate": 4.546930563353921e-05, "loss": 0.0555, "step": 19370 }, { "epoch": 2.72, "learning_rate": 4.5468837731611456e-05, "loss": 0.0656, "step": 19372 }, { "epoch": 2.72, "learning_rate": 4.54683698296837e-05, "loss": 0.0852, "step": 19374 }, { "epoch": 2.72, "learning_rate": 4.546790192775595e-05, "loss": 0.0689, "step": 19376 }, { "epoch": 2.72, "learning_rate": 4.546743402582819e-05, "loss": 0.068, "step": 19378 }, { "epoch": 2.72, "learning_rate": 4.546696612390043e-05, "loss": 0.0604, "step": 19380 }, { "epoch": 2.72, "learning_rate": 4.546649822197268e-05, "loss": 0.0525, "step": 19382 }, { "epoch": 2.72, "learning_rate": 4.5466030320044925e-05, "loss": 0.0712, "step": 19384 }, { "epoch": 2.72, "learning_rate": 4.5465562418117164e-05, "loss": 0.0672, "step": 19386 }, { "epoch": 2.72, "learning_rate": 4.546509451618941e-05, "loss": 0.0593, "step": 19388 }, { "epoch": 2.72, "learning_rate": 4.546462661426165e-05, "loss": 0.0772, "step": 19390 }, { "epoch": 2.72, "learning_rate": 4.54641587123339e-05, "loss": 0.0518, "step": 19392 }, { "epoch": 2.72, "learning_rate": 4.546369081040614e-05, "loss": 0.0641, "step": 19394 }, { "epoch": 2.72, "learning_rate": 4.5463222908478386e-05, "loss": 0.0677, "step": 19396 }, { "epoch": 2.72, "learning_rate": 4.5462755006550626e-05, "loss": 0.0702, "step": 19398 }, { "epoch": 2.72, "learning_rate": 4.546228710462287e-05, "loss": 0.067, "step": 19400 }, { "epoch": 2.72, "learning_rate": 4.546181920269512e-05, "loss": 0.0771, "step": 19402 }, { "epoch": 2.72, "learning_rate": 4.546135130076736e-05, "loss": 0.0658, "step": 19404 }, { "epoch": 2.72, "learning_rate": 4.54608833988396e-05, "loss": 0.0665, "step": 19406 }, { "epoch": 2.72, "learning_rate": 4.546041549691185e-05, "loss": 0.0538, "step": 19408 }, { "epoch": 2.72, "learning_rate": 4.5459947594984094e-05, "loss": 0.0468, "step": 19410 }, { "epoch": 2.72, "learning_rate": 4.545947969305634e-05, "loss": 0.0793, "step": 19412 }, { "epoch": 2.73, "learning_rate": 4.545901179112858e-05, "loss": 0.0563, "step": 19414 }, { "epoch": 2.73, "learning_rate": 4.5458543889200825e-05, "loss": 0.0623, "step": 19416 }, { "epoch": 2.73, "learning_rate": 4.545807598727307e-05, "loss": 0.0568, "step": 19418 }, { "epoch": 2.73, "learning_rate": 4.545760808534532e-05, "loss": 0.0633, "step": 19420 }, { "epoch": 2.73, "learning_rate": 4.5457140183417556e-05, "loss": 0.0618, "step": 19422 }, { "epoch": 2.73, "learning_rate": 4.54566722814898e-05, "loss": 0.0648, "step": 19424 }, { "epoch": 2.73, "learning_rate": 4.545620437956205e-05, "loss": 0.0549, "step": 19426 }, { "epoch": 2.73, "learning_rate": 4.5455736477634294e-05, "loss": 0.0495, "step": 19428 }, { "epoch": 2.73, "learning_rate": 4.545526857570653e-05, "loss": 0.0602, "step": 19430 }, { "epoch": 2.73, "learning_rate": 4.545480067377878e-05, "loss": 0.0654, "step": 19432 }, { "epoch": 2.73, "learning_rate": 4.545433277185102e-05, "loss": 0.0697, "step": 19434 }, { "epoch": 2.73, "learning_rate": 4.545386486992327e-05, "loss": 0.0556, "step": 19436 }, { "epoch": 2.73, "learning_rate": 4.545339696799551e-05, "loss": 0.0901, "step": 19438 }, { "epoch": 2.73, "learning_rate": 4.5452929066067756e-05, "loss": 0.0668, "step": 19440 }, { "epoch": 2.73, "learning_rate": 4.5452461164139995e-05, "loss": 0.053, "step": 19442 }, { "epoch": 2.73, "learning_rate": 4.545199326221225e-05, "loss": 0.0725, "step": 19444 }, { "epoch": 2.73, "learning_rate": 4.545152536028449e-05, "loss": 0.0628, "step": 19446 }, { "epoch": 2.73, "learning_rate": 4.545105745835673e-05, "loss": 0.0901, "step": 19448 }, { "epoch": 2.73, "learning_rate": 4.545058955642897e-05, "loss": 0.0553, "step": 19450 }, { "epoch": 2.73, "learning_rate": 4.545012165450122e-05, "loss": 0.0812, "step": 19452 }, { "epoch": 2.73, "learning_rate": 4.5449653752573464e-05, "loss": 0.0603, "step": 19454 }, { "epoch": 2.73, "learning_rate": 4.544918585064571e-05, "loss": 0.0691, "step": 19456 }, { "epoch": 2.73, "learning_rate": 4.544871794871795e-05, "loss": 0.0566, "step": 19458 }, { "epoch": 2.73, "learning_rate": 4.5448250046790195e-05, "loss": 0.0724, "step": 19460 }, { "epoch": 2.73, "learning_rate": 4.544778214486244e-05, "loss": 0.0753, "step": 19462 }, { "epoch": 2.73, "learning_rate": 4.5447314242934686e-05, "loss": 0.0713, "step": 19464 }, { "epoch": 2.73, "learning_rate": 4.5446846341006926e-05, "loss": 0.0683, "step": 19466 }, { "epoch": 2.73, "learning_rate": 4.544637843907917e-05, "loss": 0.0846, "step": 19468 }, { "epoch": 2.73, "learning_rate": 4.544591053715142e-05, "loss": 0.0595, "step": 19470 }, { "epoch": 2.73, "learning_rate": 4.544544263522366e-05, "loss": 0.0471, "step": 19472 }, { "epoch": 2.73, "learning_rate": 4.54449747332959e-05, "loss": 0.0723, "step": 19474 }, { "epoch": 2.73, "learning_rate": 4.544450683136815e-05, "loss": 0.0627, "step": 19476 }, { "epoch": 2.73, "learning_rate": 4.5444038929440394e-05, "loss": 0.0688, "step": 19478 }, { "epoch": 2.73, "learning_rate": 4.544357102751264e-05, "loss": 0.069, "step": 19480 }, { "epoch": 2.73, "learning_rate": 4.544310312558488e-05, "loss": 0.0623, "step": 19482 }, { "epoch": 2.73, "learning_rate": 4.5442635223657125e-05, "loss": 0.0502, "step": 19484 }, { "epoch": 2.74, "learning_rate": 4.5442167321729364e-05, "loss": 0.0588, "step": 19486 }, { "epoch": 2.74, "learning_rate": 4.544169941980162e-05, "loss": 0.0726, "step": 19488 }, { "epoch": 2.74, "learning_rate": 4.5441231517873856e-05, "loss": 0.0762, "step": 19490 }, { "epoch": 2.74, "learning_rate": 4.54407636159461e-05, "loss": 0.0669, "step": 19492 }, { "epoch": 2.74, "learning_rate": 4.544029571401834e-05, "loss": 0.0543, "step": 19494 }, { "epoch": 2.74, "learning_rate": 4.5439827812090594e-05, "loss": 0.0528, "step": 19496 }, { "epoch": 2.74, "learning_rate": 4.543935991016283e-05, "loss": 0.0633, "step": 19498 }, { "epoch": 2.74, "learning_rate": 4.543889200823508e-05, "loss": 0.0552, "step": 19500 }, { "epoch": 2.74, "learning_rate": 4.543842410630732e-05, "loss": 0.0763, "step": 19502 }, { "epoch": 2.74, "learning_rate": 4.5437956204379564e-05, "loss": 0.0545, "step": 19504 }, { "epoch": 2.74, "learning_rate": 4.543748830245181e-05, "loss": 0.0695, "step": 19506 }, { "epoch": 2.74, "learning_rate": 4.543702040052405e-05, "loss": 0.069, "step": 19508 }, { "epoch": 2.74, "learning_rate": 4.5436552498596295e-05, "loss": 0.0894, "step": 19510 }, { "epoch": 2.74, "learning_rate": 4.543608459666854e-05, "loss": 0.0437, "step": 19512 }, { "epoch": 2.74, "learning_rate": 4.543561669474079e-05, "loss": 0.0792, "step": 19514 }, { "epoch": 2.74, "learning_rate": 4.5435148792813026e-05, "loss": 0.0734, "step": 19516 }, { "epoch": 2.74, "learning_rate": 4.543468089088527e-05, "loss": 0.0431, "step": 19518 }, { "epoch": 2.74, "learning_rate": 4.543421298895751e-05, "loss": 0.0727, "step": 19520 }, { "epoch": 2.74, "learning_rate": 4.5433745087029764e-05, "loss": 0.0816, "step": 19522 }, { "epoch": 2.74, "learning_rate": 4.5433277185102e-05, "loss": 0.0608, "step": 19524 }, { "epoch": 2.74, "learning_rate": 4.543280928317425e-05, "loss": 0.0633, "step": 19526 }, { "epoch": 2.74, "learning_rate": 4.543234138124649e-05, "loss": 0.0703, "step": 19528 }, { "epoch": 2.74, "learning_rate": 4.543187347931874e-05, "loss": 0.0581, "step": 19530 }, { "epoch": 2.74, "learning_rate": 4.543140557739098e-05, "loss": 0.0641, "step": 19532 }, { "epoch": 2.74, "learning_rate": 4.5430937675463226e-05, "loss": 0.0694, "step": 19534 }, { "epoch": 2.74, "learning_rate": 4.5430469773535465e-05, "loss": 0.0527, "step": 19536 }, { "epoch": 2.74, "learning_rate": 4.543000187160771e-05, "loss": 0.0524, "step": 19538 }, { "epoch": 2.74, "learning_rate": 4.5429533969679957e-05, "loss": 0.0767, "step": 19540 }, { "epoch": 2.74, "learning_rate": 4.54290660677522e-05, "loss": 0.0624, "step": 19542 }, { "epoch": 2.74, "learning_rate": 4.542859816582444e-05, "loss": 0.0769, "step": 19544 }, { "epoch": 2.74, "learning_rate": 4.542813026389669e-05, "loss": 0.0615, "step": 19546 }, { "epoch": 2.74, "learning_rate": 4.5427662361968933e-05, "loss": 0.0767, "step": 19548 }, { "epoch": 2.74, "learning_rate": 4.542719446004118e-05, "loss": 0.0764, "step": 19550 }, { "epoch": 2.74, "learning_rate": 4.542672655811342e-05, "loss": 0.0618, "step": 19552 }, { "epoch": 2.74, "learning_rate": 4.5426258656185664e-05, "loss": 0.0635, "step": 19554 }, { "epoch": 2.75, "learning_rate": 4.542579075425791e-05, "loss": 0.078, "step": 19556 }, { "epoch": 2.75, "learning_rate": 4.5425322852330156e-05, "loss": 0.0589, "step": 19558 }, { "epoch": 2.75, "learning_rate": 4.5424854950402395e-05, "loss": 0.0724, "step": 19560 }, { "epoch": 2.75, "learning_rate": 4.542438704847464e-05, "loss": 0.0552, "step": 19562 }, { "epoch": 2.75, "learning_rate": 4.542391914654689e-05, "loss": 0.0413, "step": 19564 }, { "epoch": 2.75, "learning_rate": 4.542345124461913e-05, "loss": 0.0676, "step": 19566 }, { "epoch": 2.75, "learning_rate": 4.542298334269137e-05, "loss": 0.0735, "step": 19568 }, { "epoch": 2.75, "learning_rate": 4.542251544076362e-05, "loss": 0.0909, "step": 19570 }, { "epoch": 2.75, "learning_rate": 4.542204753883586e-05, "loss": 0.0715, "step": 19572 }, { "epoch": 2.75, "learning_rate": 4.542157963690811e-05, "loss": 0.0639, "step": 19574 }, { "epoch": 2.75, "learning_rate": 4.542111173498035e-05, "loss": 0.0744, "step": 19576 }, { "epoch": 2.75, "learning_rate": 4.5420643833052595e-05, "loss": 0.0783, "step": 19578 }, { "epoch": 2.75, "learning_rate": 4.5420175931124834e-05, "loss": 0.0552, "step": 19580 }, { "epoch": 2.75, "learning_rate": 4.541970802919708e-05, "loss": 0.0751, "step": 19582 }, { "epoch": 2.75, "learning_rate": 4.5419240127269326e-05, "loss": 0.0764, "step": 19584 }, { "epoch": 2.75, "learning_rate": 4.541877222534157e-05, "loss": 0.0739, "step": 19586 }, { "epoch": 2.75, "learning_rate": 4.541830432341381e-05, "loss": 0.0482, "step": 19588 }, { "epoch": 2.75, "learning_rate": 4.541783642148606e-05, "loss": 0.0758, "step": 19590 }, { "epoch": 2.75, "learning_rate": 4.54173685195583e-05, "loss": 0.0751, "step": 19592 }, { "epoch": 2.75, "learning_rate": 4.541690061763055e-05, "loss": 0.0566, "step": 19594 }, { "epoch": 2.75, "learning_rate": 4.541643271570279e-05, "loss": 0.0504, "step": 19596 }, { "epoch": 2.75, "learning_rate": 4.5415964813775034e-05, "loss": 0.0535, "step": 19598 }, { "epoch": 2.75, "learning_rate": 4.541549691184728e-05, "loss": 0.0662, "step": 19600 }, { "epoch": 2.75, "learning_rate": 4.5415029009919526e-05, "loss": 0.0714, "step": 19602 }, { "epoch": 2.75, "learning_rate": 4.5414561107991765e-05, "loss": 0.0624, "step": 19604 }, { "epoch": 2.75, "learning_rate": 4.541409320606401e-05, "loss": 0.0571, "step": 19606 }, { "epoch": 2.75, "learning_rate": 4.5413625304136257e-05, "loss": 0.054, "step": 19608 }, { "epoch": 2.75, "learning_rate": 4.54131574022085e-05, "loss": 0.0696, "step": 19610 }, { "epoch": 2.75, "learning_rate": 4.541268950028074e-05, "loss": 0.0785, "step": 19612 }, { "epoch": 2.75, "learning_rate": 4.541222159835299e-05, "loss": 0.0435, "step": 19614 }, { "epoch": 2.75, "learning_rate": 4.541175369642523e-05, "loss": 0.0757, "step": 19616 }, { "epoch": 2.75, "learning_rate": 4.541128579449748e-05, "loss": 0.0575, "step": 19618 }, { "epoch": 2.75, "learning_rate": 4.541081789256972e-05, "loss": 0.089, "step": 19620 }, { "epoch": 2.75, "learning_rate": 4.5410349990641964e-05, "loss": 0.0588, "step": 19622 }, { "epoch": 2.75, "learning_rate": 4.5409882088714204e-05, "loss": 0.0677, "step": 19624 }, { "epoch": 2.75, "learning_rate": 4.5409414186786456e-05, "loss": 0.0845, "step": 19626 }, { "epoch": 2.76, "learning_rate": 4.5408946284858695e-05, "loss": 0.0549, "step": 19628 }, { "epoch": 2.76, "learning_rate": 4.540847838293094e-05, "loss": 0.0841, "step": 19630 }, { "epoch": 2.76, "learning_rate": 4.540801048100318e-05, "loss": 0.0744, "step": 19632 }, { "epoch": 2.76, "learning_rate": 4.5407542579075426e-05, "loss": 0.0634, "step": 19634 }, { "epoch": 2.76, "learning_rate": 4.540707467714767e-05, "loss": 0.0807, "step": 19636 }, { "epoch": 2.76, "learning_rate": 4.540660677521992e-05, "loss": 0.0731, "step": 19638 }, { "epoch": 2.76, "learning_rate": 4.540613887329216e-05, "loss": 0.0739, "step": 19640 }, { "epoch": 2.76, "learning_rate": 4.54056709713644e-05, "loss": 0.0634, "step": 19642 }, { "epoch": 2.76, "learning_rate": 4.540520306943665e-05, "loss": 0.0718, "step": 19644 }, { "epoch": 2.76, "learning_rate": 4.5404735167508895e-05, "loss": 0.0679, "step": 19646 }, { "epoch": 2.76, "learning_rate": 4.5404267265581134e-05, "loss": 0.065, "step": 19648 }, { "epoch": 2.76, "learning_rate": 4.540379936365338e-05, "loss": 0.0652, "step": 19650 }, { "epoch": 2.76, "learning_rate": 4.5403331461725626e-05, "loss": 0.0818, "step": 19652 }, { "epoch": 2.76, "learning_rate": 4.540286355979787e-05, "loss": 0.0758, "step": 19654 }, { "epoch": 2.76, "learning_rate": 4.540239565787011e-05, "loss": 0.0519, "step": 19656 }, { "epoch": 2.76, "learning_rate": 4.540192775594236e-05, "loss": 0.061, "step": 19658 }, { "epoch": 2.76, "learning_rate": 4.54014598540146e-05, "loss": 0.0553, "step": 19660 }, { "epoch": 2.76, "learning_rate": 4.540099195208685e-05, "loss": 0.0802, "step": 19662 }, { "epoch": 2.76, "learning_rate": 4.540052405015909e-05, "loss": 0.0617, "step": 19664 }, { "epoch": 2.76, "learning_rate": 4.5400056148231334e-05, "loss": 0.0555, "step": 19666 }, { "epoch": 2.76, "learning_rate": 4.539958824630357e-05, "loss": 0.0882, "step": 19668 }, { "epoch": 2.76, "learning_rate": 4.5399120344375826e-05, "loss": 0.0765, "step": 19670 }, { "epoch": 2.76, "learning_rate": 4.5398652442448065e-05, "loss": 0.0644, "step": 19672 }, { "epoch": 2.76, "learning_rate": 4.539818454052031e-05, "loss": 0.0619, "step": 19674 }, { "epoch": 2.76, "learning_rate": 4.539771663859255e-05, "loss": 0.0749, "step": 19676 }, { "epoch": 2.76, "learning_rate": 4.53972487366648e-05, "loss": 0.0661, "step": 19678 }, { "epoch": 2.76, "learning_rate": 4.539678083473704e-05, "loss": 0.0654, "step": 19680 }, { "epoch": 2.76, "learning_rate": 4.539631293280929e-05, "loss": 0.0752, "step": 19682 }, { "epoch": 2.76, "learning_rate": 4.539584503088153e-05, "loss": 0.0627, "step": 19684 }, { "epoch": 2.76, "learning_rate": 4.539537712895377e-05, "loss": 0.0608, "step": 19686 }, { "epoch": 2.76, "learning_rate": 4.539490922702602e-05, "loss": 0.0715, "step": 19688 }, { "epoch": 2.76, "learning_rate": 4.5394441325098264e-05, "loss": 0.0614, "step": 19690 }, { "epoch": 2.76, "learning_rate": 4.5393973423170504e-05, "loss": 0.071, "step": 19692 }, { "epoch": 2.76, "learning_rate": 4.539350552124275e-05, "loss": 0.0462, "step": 19694 }, { "epoch": 2.76, "learning_rate": 4.5393037619314995e-05, "loss": 0.0656, "step": 19696 }, { "epoch": 2.77, "learning_rate": 4.539256971738724e-05, "loss": 0.0698, "step": 19698 }, { "epoch": 2.77, "learning_rate": 4.539210181545948e-05, "loss": 0.0709, "step": 19700 }, { "epoch": 2.77, "learning_rate": 4.5391633913531726e-05, "loss": 0.0644, "step": 19702 }, { "epoch": 2.77, "learning_rate": 4.539116601160397e-05, "loss": 0.0823, "step": 19704 }, { "epoch": 2.77, "learning_rate": 4.539069810967622e-05, "loss": 0.0657, "step": 19706 }, { "epoch": 2.77, "learning_rate": 4.539023020774846e-05, "loss": 0.0682, "step": 19708 }, { "epoch": 2.77, "learning_rate": 4.53897623058207e-05, "loss": 0.0574, "step": 19710 }, { "epoch": 2.77, "learning_rate": 4.538929440389294e-05, "loss": 0.0621, "step": 19712 }, { "epoch": 2.77, "learning_rate": 4.5388826501965195e-05, "loss": 0.0777, "step": 19714 }, { "epoch": 2.77, "learning_rate": 4.5388358600037434e-05, "loss": 0.0759, "step": 19716 }, { "epoch": 2.77, "learning_rate": 4.538789069810968e-05, "loss": 0.0735, "step": 19718 }, { "epoch": 2.77, "learning_rate": 4.538742279618192e-05, "loss": 0.0611, "step": 19720 }, { "epoch": 2.77, "learning_rate": 4.538695489425417e-05, "loss": 0.0881, "step": 19722 }, { "epoch": 2.77, "learning_rate": 4.538648699232641e-05, "loss": 0.0563, "step": 19724 }, { "epoch": 2.77, "learning_rate": 4.538601909039866e-05, "loss": 0.0561, "step": 19726 }, { "epoch": 2.77, "learning_rate": 4.5385551188470896e-05, "loss": 0.0569, "step": 19728 }, { "epoch": 2.77, "learning_rate": 4.538508328654314e-05, "loss": 0.077, "step": 19730 }, { "epoch": 2.77, "learning_rate": 4.538461538461539e-05, "loss": 0.0643, "step": 19732 }, { "epoch": 2.77, "learning_rate": 4.5384147482687634e-05, "loss": 0.0555, "step": 19734 }, { "epoch": 2.77, "learning_rate": 4.538367958075987e-05, "loss": 0.0595, "step": 19736 }, { "epoch": 2.77, "learning_rate": 4.538321167883212e-05, "loss": 0.0582, "step": 19738 }, { "epoch": 2.77, "learning_rate": 4.5382743776904365e-05, "loss": 0.0724, "step": 19740 }, { "epoch": 2.77, "learning_rate": 4.538227587497661e-05, "loss": 0.0678, "step": 19742 }, { "epoch": 2.77, "learning_rate": 4.538180797304885e-05, "loss": 0.0564, "step": 19744 }, { "epoch": 2.77, "learning_rate": 4.5381340071121096e-05, "loss": 0.0886, "step": 19746 }, { "epoch": 2.77, "learning_rate": 4.538087216919334e-05, "loss": 0.0591, "step": 19748 }, { "epoch": 2.77, "learning_rate": 4.538040426726559e-05, "loss": 0.0716, "step": 19750 }, { "epoch": 2.77, "learning_rate": 4.537993636533783e-05, "loss": 0.0814, "step": 19752 }, { "epoch": 2.77, "learning_rate": 4.537946846341007e-05, "loss": 0.0917, "step": 19754 }, { "epoch": 2.77, "learning_rate": 4.537900056148232e-05, "loss": 0.0526, "step": 19756 }, { "epoch": 2.77, "learning_rate": 4.537853265955456e-05, "loss": 0.067, "step": 19758 }, { "epoch": 2.77, "learning_rate": 4.5378064757626804e-05, "loss": 0.0629, "step": 19760 }, { "epoch": 2.77, "learning_rate": 4.537759685569904e-05, "loss": 0.0868, "step": 19762 }, { "epoch": 2.77, "learning_rate": 4.537712895377129e-05, "loss": 0.077, "step": 19764 }, { "epoch": 2.77, "learning_rate": 4.5376661051843535e-05, "loss": 0.071, "step": 19766 }, { "epoch": 2.77, "learning_rate": 4.537619314991578e-05, "loss": 0.0748, "step": 19768 }, { "epoch": 2.78, "learning_rate": 4.537572524798802e-05, "loss": 0.0775, "step": 19770 }, { "epoch": 2.78, "learning_rate": 4.5375257346060266e-05, "loss": 0.0652, "step": 19772 }, { "epoch": 2.78, "learning_rate": 4.537478944413251e-05, "loss": 0.07, "step": 19774 }, { "epoch": 2.78, "learning_rate": 4.537432154220476e-05, "loss": 0.0767, "step": 19776 }, { "epoch": 2.78, "learning_rate": 4.5373853640276996e-05, "loss": 0.0756, "step": 19778 }, { "epoch": 2.78, "learning_rate": 4.537338573834924e-05, "loss": 0.0569, "step": 19780 }, { "epoch": 2.78, "learning_rate": 4.537291783642149e-05, "loss": 0.0516, "step": 19782 }, { "epoch": 2.78, "learning_rate": 4.5372449934493734e-05, "loss": 0.0617, "step": 19784 }, { "epoch": 2.78, "learning_rate": 4.537198203256597e-05, "loss": 0.0691, "step": 19786 }, { "epoch": 2.78, "learning_rate": 4.537151413063822e-05, "loss": 0.0813, "step": 19788 }, { "epoch": 2.78, "learning_rate": 4.5371046228710465e-05, "loss": 0.0938, "step": 19790 }, { "epoch": 2.78, "learning_rate": 4.537057832678271e-05, "loss": 0.0675, "step": 19792 }, { "epoch": 2.78, "learning_rate": 4.537011042485495e-05, "loss": 0.0616, "step": 19794 }, { "epoch": 2.78, "learning_rate": 4.5369642522927196e-05, "loss": 0.0643, "step": 19796 }, { "epoch": 2.78, "learning_rate": 4.5369174620999435e-05, "loss": 0.0681, "step": 19798 }, { "epoch": 2.78, "learning_rate": 4.536870671907169e-05, "loss": 0.0562, "step": 19800 }, { "epoch": 2.78, "learning_rate": 4.536823881714393e-05, "loss": 0.0901, "step": 19802 }, { "epoch": 2.78, "learning_rate": 4.536777091521617e-05, "loss": 0.0784, "step": 19804 }, { "epoch": 2.78, "learning_rate": 4.536730301328841e-05, "loss": 0.0787, "step": 19806 }, { "epoch": 2.78, "learning_rate": 4.5366835111360665e-05, "loss": 0.0533, "step": 19808 }, { "epoch": 2.78, "learning_rate": 4.5366367209432904e-05, "loss": 0.0456, "step": 19810 }, { "epoch": 2.78, "learning_rate": 4.536589930750515e-05, "loss": 0.0672, "step": 19812 }, { "epoch": 2.78, "learning_rate": 4.536543140557739e-05, "loss": 0.0406, "step": 19814 }, { "epoch": 2.78, "learning_rate": 4.5364963503649635e-05, "loss": 0.0633, "step": 19816 }, { "epoch": 2.78, "learning_rate": 4.536449560172188e-05, "loss": 0.0619, "step": 19818 }, { "epoch": 2.78, "learning_rate": 4.536402769979413e-05, "loss": 0.0761, "step": 19820 }, { "epoch": 2.78, "learning_rate": 4.5363559797866366e-05, "loss": 0.0581, "step": 19822 }, { "epoch": 2.78, "learning_rate": 4.536309189593861e-05, "loss": 0.0584, "step": 19824 }, { "epoch": 2.78, "learning_rate": 4.536262399401086e-05, "loss": 0.064, "step": 19826 }, { "epoch": 2.78, "learning_rate": 4.5362156092083104e-05, "loss": 0.0677, "step": 19828 }, { "epoch": 2.78, "learning_rate": 4.536168819015534e-05, "loss": 0.0465, "step": 19830 }, { "epoch": 2.78, "learning_rate": 4.536122028822759e-05, "loss": 0.0571, "step": 19832 }, { "epoch": 2.78, "learning_rate": 4.5360752386299835e-05, "loss": 0.0684, "step": 19834 }, { "epoch": 2.78, "learning_rate": 4.536028448437208e-05, "loss": 0.0612, "step": 19836 }, { "epoch": 2.78, "learning_rate": 4.535981658244432e-05, "loss": 0.054, "step": 19838 }, { "epoch": 2.78, "learning_rate": 4.5359348680516566e-05, "loss": 0.0587, "step": 19840 }, { "epoch": 2.79, "learning_rate": 4.535888077858881e-05, "loss": 0.0657, "step": 19842 }, { "epoch": 2.79, "learning_rate": 4.535841287666106e-05, "loss": 0.0591, "step": 19844 }, { "epoch": 2.79, "learning_rate": 4.5357944974733296e-05, "loss": 0.0682, "step": 19846 }, { "epoch": 2.79, "learning_rate": 4.535747707280554e-05, "loss": 0.0813, "step": 19848 }, { "epoch": 2.79, "learning_rate": 4.535700917087778e-05, "loss": 0.0878, "step": 19850 }, { "epoch": 2.79, "learning_rate": 4.5356541268950034e-05, "loss": 0.0768, "step": 19852 }, { "epoch": 2.79, "learning_rate": 4.535607336702227e-05, "loss": 0.0755, "step": 19854 }, { "epoch": 2.79, "learning_rate": 4.535560546509452e-05, "loss": 0.0582, "step": 19856 }, { "epoch": 2.79, "learning_rate": 4.535513756316676e-05, "loss": 0.064, "step": 19858 }, { "epoch": 2.79, "learning_rate": 4.5354669661239004e-05, "loss": 0.0593, "step": 19860 }, { "epoch": 2.79, "learning_rate": 4.535420175931125e-05, "loss": 0.0766, "step": 19862 }, { "epoch": 2.79, "learning_rate": 4.5353733857383496e-05, "loss": 0.088, "step": 19864 }, { "epoch": 2.79, "learning_rate": 4.5353265955455735e-05, "loss": 0.079, "step": 19866 }, { "epoch": 2.79, "learning_rate": 4.535279805352798e-05, "loss": 0.0584, "step": 19868 }, { "epoch": 2.79, "learning_rate": 4.535233015160023e-05, "loss": 0.0563, "step": 19870 }, { "epoch": 2.79, "learning_rate": 4.535186224967247e-05, "loss": 0.0555, "step": 19872 }, { "epoch": 2.79, "learning_rate": 4.535139434774471e-05, "loss": 0.0936, "step": 19874 }, { "epoch": 2.79, "learning_rate": 4.535092644581696e-05, "loss": 0.0635, "step": 19876 }, { "epoch": 2.79, "learning_rate": 4.5350458543889204e-05, "loss": 0.0664, "step": 19878 }, { "epoch": 2.79, "learning_rate": 4.534999064196145e-05, "loss": 0.0554, "step": 19880 }, { "epoch": 2.79, "learning_rate": 4.534952274003369e-05, "loss": 0.0917, "step": 19882 }, { "epoch": 2.79, "learning_rate": 4.5349054838105935e-05, "loss": 0.0565, "step": 19884 }, { "epoch": 2.79, "learning_rate": 4.534858693617818e-05, "loss": 0.0716, "step": 19886 }, { "epoch": 2.79, "learning_rate": 4.534811903425043e-05, "loss": 0.0707, "step": 19888 }, { "epoch": 2.79, "learning_rate": 4.5347651132322666e-05, "loss": 0.0745, "step": 19890 }, { "epoch": 2.79, "learning_rate": 4.534718323039491e-05, "loss": 0.0768, "step": 19892 }, { "epoch": 2.79, "learning_rate": 4.534671532846715e-05, "loss": 0.0776, "step": 19894 }, { "epoch": 2.79, "learning_rate": 4.5346247426539404e-05, "loss": 0.0834, "step": 19896 }, { "epoch": 2.79, "learning_rate": 4.534577952461164e-05, "loss": 0.0666, "step": 19898 }, { "epoch": 2.79, "learning_rate": 4.534531162268389e-05, "loss": 0.0813, "step": 19900 }, { "epoch": 2.79, "learning_rate": 4.534484372075613e-05, "loss": 0.077, "step": 19902 }, { "epoch": 2.79, "learning_rate": 4.534437581882838e-05, "loss": 0.0506, "step": 19904 }, { "epoch": 2.79, "learning_rate": 4.534390791690062e-05, "loss": 0.0766, "step": 19906 }, { "epoch": 2.79, "learning_rate": 4.5343440014972866e-05, "loss": 0.0815, "step": 19908 }, { "epoch": 2.79, "learning_rate": 4.5342972113045105e-05, "loss": 0.0618, "step": 19910 }, { "epoch": 2.8, "learning_rate": 4.534250421111735e-05, "loss": 0.0726, "step": 19912 }, { "epoch": 2.8, "learning_rate": 4.5342036309189597e-05, "loss": 0.0688, "step": 19914 }, { "epoch": 2.8, "learning_rate": 4.534156840726184e-05, "loss": 0.0624, "step": 19916 }, { "epoch": 2.8, "learning_rate": 4.534110050533408e-05, "loss": 0.0857, "step": 19918 }, { "epoch": 2.8, "learning_rate": 4.534063260340633e-05, "loss": 0.083, "step": 19920 }, { "epoch": 2.8, "learning_rate": 4.534016470147857e-05, "loss": 0.063, "step": 19922 }, { "epoch": 2.8, "learning_rate": 4.533969679955082e-05, "loss": 0.0587, "step": 19924 }, { "epoch": 2.8, "learning_rate": 4.533922889762306e-05, "loss": 0.0435, "step": 19926 }, { "epoch": 2.8, "learning_rate": 4.5338760995695304e-05, "loss": 0.0605, "step": 19928 }, { "epoch": 2.8, "learning_rate": 4.533829309376755e-05, "loss": 0.0856, "step": 19930 }, { "epoch": 2.8, "learning_rate": 4.5337825191839796e-05, "loss": 0.0854, "step": 19932 }, { "epoch": 2.8, "learning_rate": 4.5337357289912035e-05, "loss": 0.0674, "step": 19934 }, { "epoch": 2.8, "learning_rate": 4.533688938798428e-05, "loss": 0.0453, "step": 19936 }, { "epoch": 2.8, "learning_rate": 4.533642148605653e-05, "loss": 0.0656, "step": 19938 }, { "epoch": 2.8, "learning_rate": 4.533595358412877e-05, "loss": 0.077, "step": 19940 }, { "epoch": 2.8, "learning_rate": 4.533548568220101e-05, "loss": 0.0655, "step": 19942 }, { "epoch": 2.8, "learning_rate": 4.533501778027326e-05, "loss": 0.0688, "step": 19944 }, { "epoch": 2.8, "learning_rate": 4.53345498783455e-05, "loss": 0.0745, "step": 19946 }, { "epoch": 2.8, "learning_rate": 4.533408197641775e-05, "loss": 0.0589, "step": 19948 }, { "epoch": 2.8, "learning_rate": 4.533361407448999e-05, "loss": 0.0559, "step": 19950 }, { "epoch": 2.8, "learning_rate": 4.5333146172562235e-05, "loss": 0.0798, "step": 19952 }, { "epoch": 2.8, "learning_rate": 4.5332678270634474e-05, "loss": 0.0677, "step": 19954 }, { "epoch": 2.8, "learning_rate": 4.533221036870673e-05, "loss": 0.0581, "step": 19956 }, { "epoch": 2.8, "learning_rate": 4.5331742466778966e-05, "loss": 0.073, "step": 19958 }, { "epoch": 2.8, "learning_rate": 4.533127456485121e-05, "loss": 0.0769, "step": 19960 }, { "epoch": 2.8, "learning_rate": 4.533080666292345e-05, "loss": 0.0616, "step": 19962 }, { "epoch": 2.8, "learning_rate": 4.53303387609957e-05, "loss": 0.069, "step": 19964 }, { "epoch": 2.8, "learning_rate": 4.532987085906794e-05, "loss": 0.0505, "step": 19966 }, { "epoch": 2.8, "learning_rate": 4.532940295714019e-05, "loss": 0.0569, "step": 19968 }, { "epoch": 2.8, "learning_rate": 4.532893505521243e-05, "loss": 0.0925, "step": 19970 }, { "epoch": 2.8, "learning_rate": 4.5328467153284674e-05, "loss": 0.0643, "step": 19972 }, { "epoch": 2.8, "learning_rate": 4.532799925135692e-05, "loss": 0.0617, "step": 19974 }, { "epoch": 2.8, "learning_rate": 4.5327531349429166e-05, "loss": 0.0697, "step": 19976 }, { "epoch": 2.8, "learning_rate": 4.5327063447501405e-05, "loss": 0.0583, "step": 19978 }, { "epoch": 2.8, "learning_rate": 4.532659554557365e-05, "loss": 0.0478, "step": 19980 }, { "epoch": 2.8, "learning_rate": 4.5326127643645897e-05, "loss": 0.0592, "step": 19982 }, { "epoch": 2.81, "learning_rate": 4.532565974171814e-05, "loss": 0.0671, "step": 19984 }, { "epoch": 2.81, "learning_rate": 4.532519183979038e-05, "loss": 0.0581, "step": 19986 }, { "epoch": 2.81, "learning_rate": 4.532472393786263e-05, "loss": 0.0655, "step": 19988 }, { "epoch": 2.81, "learning_rate": 4.5324256035934873e-05, "loss": 0.0543, "step": 19990 }, { "epoch": 2.81, "learning_rate": 4.532378813400712e-05, "loss": 0.0843, "step": 19992 }, { "epoch": 2.81, "learning_rate": 4.532332023207936e-05, "loss": 0.0496, "step": 19994 }, { "epoch": 2.81, "learning_rate": 4.5322852330151604e-05, "loss": 0.0775, "step": 19996 }, { "epoch": 2.81, "learning_rate": 4.5322384428223843e-05, "loss": 0.071, "step": 19998 }, { "epoch": 2.81, "learning_rate": 4.5321916526296096e-05, "loss": 0.0646, "step": 20000 }, { "epoch": 2.81, "eval_gen_len": 30.9655, "eval_loss": 1.0668187141418457, "eval_meteor": 0.0542, "eval_runtime": 15.772, "eval_samples_per_second": 3.677, "eval_steps_per_second": 0.507, "step": 20000 } ], "max_steps": 213720, "num_train_epochs": 30, "total_flos": 2.878106005251529e+19, "trial_name": null, "trial_params": null }