pythia-exp / trainer_state.json
TinyPixel's picture
Upload folder using huggingface_hub
6fddacd
raw
history blame
111 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.995418914791815,
"eval_steps": 500,
"global_step": 1839,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 7.142857142857143e-07,
"loss": 1.611,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.7568,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 2.1428571428571427e-06,
"loss": 1.658,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 2.8571428571428573e-06,
"loss": 1.8564,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 3.5714285714285718e-06,
"loss": 2.0213,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 4.2857142857142855e-06,
"loss": 1.9062,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 5e-06,
"loss": 1.828,
"step": 14
},
{
"epoch": 0.03,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.9038,
"step": 16
},
{
"epoch": 0.03,
"learning_rate": 6.4285714285714295e-06,
"loss": 1.9794,
"step": 18
},
{
"epoch": 0.03,
"learning_rate": 7.1428571428571436e-06,
"loss": 1.9665,
"step": 20
},
{
"epoch": 0.04,
"learning_rate": 7.857142857142858e-06,
"loss": 1.9494,
"step": 22
},
{
"epoch": 0.04,
"learning_rate": 8.571428571428571e-06,
"loss": 1.962,
"step": 24
},
{
"epoch": 0.04,
"learning_rate": 9.285714285714288e-06,
"loss": 2.195,
"step": 26
},
{
"epoch": 0.05,
"learning_rate": 1e-05,
"loss": 2.0743,
"step": 28
},
{
"epoch": 0.05,
"learning_rate": 1.0714285714285714e-05,
"loss": 2.2212,
"step": 30
},
{
"epoch": 0.05,
"learning_rate": 1.1428571428571429e-05,
"loss": 2.1635,
"step": 32
},
{
"epoch": 0.06,
"learning_rate": 1.2142857142857142e-05,
"loss": 2.3554,
"step": 34
},
{
"epoch": 0.06,
"learning_rate": 1.2857142857142859e-05,
"loss": 2.2266,
"step": 36
},
{
"epoch": 0.06,
"learning_rate": 1.3571428571428574e-05,
"loss": 2.2897,
"step": 38
},
{
"epoch": 0.07,
"learning_rate": 1.4285714285714287e-05,
"loss": 2.385,
"step": 40
},
{
"epoch": 0.07,
"learning_rate": 1.5000000000000002e-05,
"loss": 2.404,
"step": 42
},
{
"epoch": 0.07,
"learning_rate": 1.5714285714285715e-05,
"loss": 2.5732,
"step": 44
},
{
"epoch": 0.07,
"learning_rate": 1.642857142857143e-05,
"loss": 2.7419,
"step": 46
},
{
"epoch": 0.08,
"learning_rate": 1.7142857142857142e-05,
"loss": 2.7752,
"step": 48
},
{
"epoch": 0.08,
"learning_rate": 1.785714285714286e-05,
"loss": 2.9747,
"step": 50
},
{
"epoch": 0.08,
"learning_rate": 1.8571428571428575e-05,
"loss": 1.7419,
"step": 52
},
{
"epoch": 0.09,
"learning_rate": 1.928571428571429e-05,
"loss": 1.5881,
"step": 54
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 1.7405,
"step": 56
},
{
"epoch": 0.09,
"learning_rate": 1.9999937909292965e-05,
"loss": 1.7546,
"step": 58
},
{
"epoch": 0.1,
"learning_rate": 1.9999751637942895e-05,
"loss": 1.8288,
"step": 60
},
{
"epoch": 0.1,
"learning_rate": 1.9999441188262945e-05,
"loss": 1.8816,
"step": 62
},
{
"epoch": 0.1,
"learning_rate": 1.999900656410832e-05,
"loss": 1.7845,
"step": 64
},
{
"epoch": 0.11,
"learning_rate": 1.9998447770876235e-05,
"loss": 1.8064,
"step": 66
},
{
"epoch": 0.11,
"learning_rate": 1.999776481550588e-05,
"loss": 1.7793,
"step": 68
},
{
"epoch": 0.11,
"learning_rate": 1.9996957706478283e-05,
"loss": 1.8418,
"step": 70
},
{
"epoch": 0.12,
"learning_rate": 1.999602645381624e-05,
"loss": 1.769,
"step": 72
},
{
"epoch": 0.12,
"learning_rate": 1.999497106908417e-05,
"loss": 1.9599,
"step": 74
},
{
"epoch": 0.12,
"learning_rate": 1.9993791565388e-05,
"loss": 1.822,
"step": 76
},
{
"epoch": 0.13,
"learning_rate": 1.999248795737497e-05,
"loss": 1.8957,
"step": 78
},
{
"epoch": 0.13,
"learning_rate": 1.9991060261233468e-05,
"loss": 1.9517,
"step": 80
},
{
"epoch": 0.13,
"learning_rate": 1.998950849469283e-05,
"loss": 1.9511,
"step": 82
},
{
"epoch": 0.14,
"learning_rate": 1.9987832677023104e-05,
"loss": 2.0987,
"step": 84
},
{
"epoch": 0.14,
"learning_rate": 1.998603282903484e-05,
"loss": 2.0514,
"step": 86
},
{
"epoch": 0.14,
"learning_rate": 1.99841089730788e-05,
"loss": 2.1349,
"step": 88
},
{
"epoch": 0.15,
"learning_rate": 1.99820611330457e-05,
"loss": 2.1021,
"step": 90
},
{
"epoch": 0.15,
"learning_rate": 1.997988933436591e-05,
"loss": 2.2552,
"step": 92
},
{
"epoch": 0.15,
"learning_rate": 1.9977593604009126e-05,
"loss": 2.1949,
"step": 94
},
{
"epoch": 0.16,
"learning_rate": 1.997517397048406e-05,
"loss": 2.4357,
"step": 96
},
{
"epoch": 0.16,
"learning_rate": 1.9972630463838062e-05,
"loss": 2.1899,
"step": 98
},
{
"epoch": 0.16,
"learning_rate": 1.9969963115656754e-05,
"loss": 2.1754,
"step": 100
},
{
"epoch": 0.17,
"learning_rate": 1.9967171959063644e-05,
"loss": 1.664,
"step": 102
},
{
"epoch": 0.17,
"learning_rate": 1.996425702871971e-05,
"loss": 1.5388,
"step": 104
},
{
"epoch": 0.17,
"learning_rate": 1.9961218360822968e-05,
"loss": 1.6347,
"step": 106
},
{
"epoch": 0.18,
"learning_rate": 1.9958055993108026e-05,
"loss": 1.6809,
"step": 108
},
{
"epoch": 0.18,
"learning_rate": 1.9954769964845614e-05,
"loss": 1.7729,
"step": 110
},
{
"epoch": 0.18,
"learning_rate": 1.9951360316842097e-05,
"loss": 1.7805,
"step": 112
},
{
"epoch": 0.19,
"learning_rate": 1.994782709143896e-05,
"loss": 1.6005,
"step": 114
},
{
"epoch": 0.19,
"learning_rate": 1.9944170332512303e-05,
"loss": 1.7232,
"step": 116
},
{
"epoch": 0.19,
"learning_rate": 1.9940390085472274e-05,
"loss": 1.7918,
"step": 118
},
{
"epoch": 0.2,
"learning_rate": 1.9936486397262514e-05,
"loss": 1.7841,
"step": 120
},
{
"epoch": 0.2,
"learning_rate": 1.9932459316359573e-05,
"loss": 1.6909,
"step": 122
},
{
"epoch": 0.2,
"learning_rate": 1.9928308892772315e-05,
"loss": 1.7525,
"step": 124
},
{
"epoch": 0.21,
"learning_rate": 1.9924035178041284e-05,
"loss": 1.8016,
"step": 126
},
{
"epoch": 0.21,
"learning_rate": 1.9919638225238077e-05,
"loss": 1.8685,
"step": 128
},
{
"epoch": 0.21,
"learning_rate": 1.9915118088964674e-05,
"loss": 1.9582,
"step": 130
},
{
"epoch": 0.22,
"learning_rate": 1.9910474825352767e-05,
"loss": 1.818,
"step": 132
},
{
"epoch": 0.22,
"learning_rate": 1.9905708492063055e-05,
"loss": 2.0729,
"step": 134
},
{
"epoch": 0.22,
"learning_rate": 1.990081914828455e-05,
"loss": 2.0539,
"step": 136
},
{
"epoch": 0.22,
"learning_rate": 1.98958068547338e-05,
"loss": 2.1301,
"step": 138
},
{
"epoch": 0.23,
"learning_rate": 1.9890671673654186e-05,
"loss": 2.1811,
"step": 140
},
{
"epoch": 0.23,
"learning_rate": 1.988541366881511e-05,
"loss": 2.204,
"step": 142
},
{
"epoch": 0.23,
"learning_rate": 1.9880032905511218e-05,
"loss": 2.2893,
"step": 144
},
{
"epoch": 0.24,
"learning_rate": 1.9874529450561592e-05,
"loss": 2.1792,
"step": 146
},
{
"epoch": 0.24,
"learning_rate": 1.986890337230891e-05,
"loss": 2.4459,
"step": 148
},
{
"epoch": 0.24,
"learning_rate": 1.9863154740618613e-05,
"loss": 2.3752,
"step": 150
},
{
"epoch": 0.25,
"learning_rate": 1.9857283626878016e-05,
"loss": 1.5311,
"step": 152
},
{
"epoch": 0.25,
"learning_rate": 1.9851290103995446e-05,
"loss": 1.657,
"step": 154
},
{
"epoch": 0.25,
"learning_rate": 1.9845174246399313e-05,
"loss": 1.6497,
"step": 156
},
{
"epoch": 0.26,
"learning_rate": 1.9838936130037202e-05,
"loss": 1.5877,
"step": 158
},
{
"epoch": 0.26,
"learning_rate": 1.9832575832374926e-05,
"loss": 1.6494,
"step": 160
},
{
"epoch": 0.26,
"learning_rate": 1.982609343239556e-05,
"loss": 1.8271,
"step": 162
},
{
"epoch": 0.27,
"learning_rate": 1.981948901059846e-05,
"loss": 1.8217,
"step": 164
},
{
"epoch": 0.27,
"learning_rate": 1.9812762648998278e-05,
"loss": 1.8643,
"step": 166
},
{
"epoch": 0.27,
"learning_rate": 1.9805914431123915e-05,
"loss": 1.7973,
"step": 168
},
{
"epoch": 0.28,
"learning_rate": 1.9798944442017514e-05,
"loss": 1.7682,
"step": 170
},
{
"epoch": 0.28,
"learning_rate": 1.9791852768233385e-05,
"loss": 1.7038,
"step": 172
},
{
"epoch": 0.28,
"learning_rate": 1.9784639497836934e-05,
"loss": 1.711,
"step": 174
},
{
"epoch": 0.29,
"learning_rate": 1.9777304720403574e-05,
"loss": 1.8575,
"step": 176
},
{
"epoch": 0.29,
"learning_rate": 1.9769848527017608e-05,
"loss": 1.8222,
"step": 178
},
{
"epoch": 0.29,
"learning_rate": 1.9762271010271097e-05,
"loss": 1.959,
"step": 180
},
{
"epoch": 0.3,
"learning_rate": 1.975457226426272e-05,
"loss": 1.8217,
"step": 182
},
{
"epoch": 0.3,
"learning_rate": 1.974675238459659e-05,
"loss": 2.0096,
"step": 184
},
{
"epoch": 0.3,
"learning_rate": 1.9738811468381086e-05,
"loss": 1.9606,
"step": 186
},
{
"epoch": 0.31,
"learning_rate": 1.9730749614227617e-05,
"loss": 2.0809,
"step": 188
},
{
"epoch": 0.31,
"learning_rate": 1.9722566922249436e-05,
"loss": 2.1243,
"step": 190
},
{
"epoch": 0.31,
"learning_rate": 1.971426349406037e-05,
"loss": 1.9635,
"step": 192
},
{
"epoch": 0.32,
"learning_rate": 1.9705839432773555e-05,
"loss": 2.0439,
"step": 194
},
{
"epoch": 0.32,
"learning_rate": 1.9697294843000185e-05,
"loss": 2.2575,
"step": 196
},
{
"epoch": 0.32,
"learning_rate": 1.968862983084818e-05,
"loss": 2.2378,
"step": 198
},
{
"epoch": 0.33,
"learning_rate": 1.9679844503920888e-05,
"loss": 1.859,
"step": 200
},
{
"epoch": 0.33,
"learning_rate": 1.967093897131574e-05,
"loss": 1.7287,
"step": 202
},
{
"epoch": 0.33,
"learning_rate": 1.9661913343622894e-05,
"loss": 1.6376,
"step": 204
},
{
"epoch": 0.34,
"learning_rate": 1.965276773292388e-05,
"loss": 1.7034,
"step": 206
},
{
"epoch": 0.34,
"learning_rate": 1.964350225279018e-05,
"loss": 1.6158,
"step": 208
},
{
"epoch": 0.34,
"learning_rate": 1.963411701828184e-05,
"loss": 1.6504,
"step": 210
},
{
"epoch": 0.35,
"learning_rate": 1.9624612145946024e-05,
"loss": 1.7069,
"step": 212
},
{
"epoch": 0.35,
"learning_rate": 1.9614987753815584e-05,
"loss": 1.7062,
"step": 214
},
{
"epoch": 0.35,
"learning_rate": 1.9605243961407583e-05,
"loss": 1.8162,
"step": 216
},
{
"epoch": 0.36,
"learning_rate": 1.9595380889721813e-05,
"loss": 1.6725,
"step": 218
},
{
"epoch": 0.36,
"learning_rate": 1.9585398661239287e-05,
"loss": 1.8527,
"step": 220
},
{
"epoch": 0.36,
"learning_rate": 1.9575297399920742e-05,
"loss": 1.6978,
"step": 222
},
{
"epoch": 0.36,
"learning_rate": 1.9565077231205057e-05,
"loss": 1.7312,
"step": 224
},
{
"epoch": 0.37,
"learning_rate": 1.9554738282007736e-05,
"loss": 1.8961,
"step": 226
},
{
"epoch": 0.37,
"learning_rate": 1.9544280680719316e-05,
"loss": 1.7643,
"step": 228
},
{
"epoch": 0.37,
"learning_rate": 1.953370455720377e-05,
"loss": 1.8736,
"step": 230
},
{
"epoch": 0.38,
"learning_rate": 1.9523010042796885e-05,
"loss": 2.0634,
"step": 232
},
{
"epoch": 0.38,
"learning_rate": 1.9512197270304664e-05,
"loss": 1.886,
"step": 234
},
{
"epoch": 0.38,
"learning_rate": 1.9501266374001637e-05,
"loss": 1.9808,
"step": 236
},
{
"epoch": 0.39,
"learning_rate": 1.9490217489629226e-05,
"loss": 2.1238,
"step": 238
},
{
"epoch": 0.39,
"learning_rate": 1.947905075439404e-05,
"loss": 2.2318,
"step": 240
},
{
"epoch": 0.39,
"learning_rate": 1.9467766306966167e-05,
"loss": 2.2005,
"step": 242
},
{
"epoch": 0.4,
"learning_rate": 1.945636428747748e-05,
"loss": 2.334,
"step": 244
},
{
"epoch": 0.4,
"learning_rate": 1.9444844837519867e-05,
"loss": 2.1858,
"step": 246
},
{
"epoch": 0.4,
"learning_rate": 1.9433208100143486e-05,
"loss": 2.1054,
"step": 248
},
{
"epoch": 0.41,
"learning_rate": 1.942145421985499e-05,
"loss": 2.2089,
"step": 250
},
{
"epoch": 0.41,
"learning_rate": 1.9409583342615718e-05,
"loss": 1.6846,
"step": 252
},
{
"epoch": 0.41,
"learning_rate": 1.939759561583991e-05,
"loss": 1.5922,
"step": 254
},
{
"epoch": 0.42,
"learning_rate": 1.9385491188392855e-05,
"loss": 1.6762,
"step": 256
},
{
"epoch": 0.42,
"learning_rate": 1.9373270210589034e-05,
"loss": 1.7761,
"step": 258
},
{
"epoch": 0.42,
"learning_rate": 1.9360932834190285e-05,
"loss": 1.5138,
"step": 260
},
{
"epoch": 0.43,
"learning_rate": 1.934847921240389e-05,
"loss": 1.6493,
"step": 262
},
{
"epoch": 0.43,
"learning_rate": 1.933590949988069e-05,
"loss": 1.7205,
"step": 264
},
{
"epoch": 0.43,
"learning_rate": 1.9323223852713148e-05,
"loss": 1.839,
"step": 266
},
{
"epoch": 0.44,
"learning_rate": 1.9310422428433426e-05,
"loss": 1.7112,
"step": 268
},
{
"epoch": 0.44,
"learning_rate": 1.929750538601142e-05,
"loss": 1.7417,
"step": 270
},
{
"epoch": 0.44,
"learning_rate": 1.9284472885852787e-05,
"loss": 1.6419,
"step": 272
},
{
"epoch": 0.45,
"learning_rate": 1.9271325089796963e-05,
"loss": 1.7585,
"step": 274
},
{
"epoch": 0.45,
"learning_rate": 1.9258062161115134e-05,
"loss": 1.744,
"step": 276
},
{
"epoch": 0.45,
"learning_rate": 1.924468426450823e-05,
"loss": 1.8246,
"step": 278
},
{
"epoch": 0.46,
"learning_rate": 1.923119156610485e-05,
"loss": 2.0436,
"step": 280
},
{
"epoch": 0.46,
"learning_rate": 1.9217584233459245e-05,
"loss": 1.8373,
"step": 282
},
{
"epoch": 0.46,
"learning_rate": 1.9203862435549187e-05,
"loss": 1.8646,
"step": 284
},
{
"epoch": 0.47,
"learning_rate": 1.9190026342773904e-05,
"loss": 1.9658,
"step": 286
},
{
"epoch": 0.47,
"learning_rate": 1.917607612695196e-05,
"loss": 2.0826,
"step": 288
},
{
"epoch": 0.47,
"learning_rate": 1.9162011961319095e-05,
"loss": 2.1591,
"step": 290
},
{
"epoch": 0.48,
"learning_rate": 1.914783402052612e-05,
"loss": 2.154,
"step": 292
},
{
"epoch": 0.48,
"learning_rate": 1.9133542480636693e-05,
"loss": 2.0487,
"step": 294
},
{
"epoch": 0.48,
"learning_rate": 1.911913751912519e-05,
"loss": 2.1138,
"step": 296
},
{
"epoch": 0.49,
"learning_rate": 1.910461931487446e-05,
"loss": 2.2284,
"step": 298
},
{
"epoch": 0.49,
"learning_rate": 1.9089988048173612e-05,
"loss": 2.076,
"step": 300
},
{
"epoch": 0.49,
"learning_rate": 1.9075243900715786e-05,
"loss": 1.6895,
"step": 302
},
{
"epoch": 0.5,
"learning_rate": 1.9060387055595887e-05,
"loss": 1.6211,
"step": 304
},
{
"epoch": 0.5,
"learning_rate": 1.9045417697308322e-05,
"loss": 1.5078,
"step": 306
},
{
"epoch": 0.5,
"learning_rate": 1.90303360117447e-05,
"loss": 1.561,
"step": 308
},
{
"epoch": 0.5,
"learning_rate": 1.9015142186191525e-05,
"loss": 1.6423,
"step": 310
},
{
"epoch": 0.51,
"learning_rate": 1.8999836409327868e-05,
"loss": 1.677,
"step": 312
},
{
"epoch": 0.51,
"learning_rate": 1.898441887122303e-05,
"loss": 1.823,
"step": 314
},
{
"epoch": 0.51,
"learning_rate": 1.8968889763334183e-05,
"loss": 1.6767,
"step": 316
},
{
"epoch": 0.52,
"learning_rate": 1.8953249278503983e-05,
"loss": 1.6872,
"step": 318
},
{
"epoch": 0.52,
"learning_rate": 1.893749761095818e-05,
"loss": 1.7334,
"step": 320
},
{
"epoch": 0.52,
"learning_rate": 1.8921634956303214e-05,
"loss": 1.7819,
"step": 322
},
{
"epoch": 0.53,
"learning_rate": 1.890566151152377e-05,
"loss": 1.7185,
"step": 324
},
{
"epoch": 0.53,
"learning_rate": 1.888957747498035e-05,
"loss": 1.8429,
"step": 326
},
{
"epoch": 0.53,
"learning_rate": 1.8873383046406785e-05,
"loss": 1.8522,
"step": 328
},
{
"epoch": 0.54,
"learning_rate": 1.8857078426907783e-05,
"loss": 1.7076,
"step": 330
},
{
"epoch": 0.54,
"learning_rate": 1.8840663818956413e-05,
"loss": 1.8622,
"step": 332
},
{
"epoch": 0.54,
"learning_rate": 1.8824139426391603e-05,
"loss": 1.9649,
"step": 334
},
{
"epoch": 0.55,
"learning_rate": 1.880750545441559e-05,
"loss": 2.0755,
"step": 336
},
{
"epoch": 0.55,
"learning_rate": 1.8790762109591395e-05,
"loss": 2.0182,
"step": 338
},
{
"epoch": 0.55,
"learning_rate": 1.877390959984024e-05,
"loss": 2.0193,
"step": 340
},
{
"epoch": 0.56,
"learning_rate": 1.8756948134438973e-05,
"loss": 2.0283,
"step": 342
},
{
"epoch": 0.56,
"learning_rate": 1.873987792401747e-05,
"loss": 2.0209,
"step": 344
},
{
"epoch": 0.56,
"learning_rate": 1.8722699180556027e-05,
"loss": 2.2715,
"step": 346
},
{
"epoch": 0.57,
"learning_rate": 1.870541211738269e-05,
"loss": 2.1141,
"step": 348
},
{
"epoch": 0.57,
"learning_rate": 1.8688016949170672e-05,
"loss": 2.0019,
"step": 350
},
{
"epoch": 0.57,
"learning_rate": 1.8670513891935623e-05,
"loss": 1.633,
"step": 352
},
{
"epoch": 0.58,
"learning_rate": 1.8652903163032985e-05,
"loss": 1.7309,
"step": 354
},
{
"epoch": 0.58,
"learning_rate": 1.8635184981155282e-05,
"loss": 1.6854,
"step": 356
},
{
"epoch": 0.58,
"learning_rate": 1.8617359566329397e-05,
"loss": 1.4624,
"step": 358
},
{
"epoch": 0.59,
"learning_rate": 1.8599427139913855e-05,
"loss": 1.7174,
"step": 360
},
{
"epoch": 0.59,
"learning_rate": 1.8581387924596065e-05,
"loss": 1.7547,
"step": 362
},
{
"epoch": 0.59,
"learning_rate": 1.8563242144389546e-05,
"loss": 1.7919,
"step": 364
},
{
"epoch": 0.6,
"learning_rate": 1.854499002463117e-05,
"loss": 1.8004,
"step": 366
},
{
"epoch": 0.6,
"learning_rate": 1.852663179197834e-05,
"loss": 1.7906,
"step": 368
},
{
"epoch": 0.6,
"learning_rate": 1.850816767440619e-05,
"loss": 1.8483,
"step": 370
},
{
"epoch": 0.61,
"learning_rate": 1.8489597901204728e-05,
"loss": 1.7136,
"step": 372
},
{
"epoch": 0.61,
"learning_rate": 1.8470922702976034e-05,
"loss": 1.726,
"step": 374
},
{
"epoch": 0.61,
"learning_rate": 1.8452142311631364e-05,
"loss": 1.8759,
"step": 376
},
{
"epoch": 0.62,
"learning_rate": 1.8433256960388265e-05,
"loss": 1.8467,
"step": 378
},
{
"epoch": 0.62,
"learning_rate": 1.8414266883767704e-05,
"loss": 1.8555,
"step": 380
},
{
"epoch": 0.62,
"learning_rate": 1.8395172317591137e-05,
"loss": 1.9375,
"step": 382
},
{
"epoch": 0.63,
"learning_rate": 1.8375973498977586e-05,
"loss": 2.0685,
"step": 384
},
{
"epoch": 0.63,
"learning_rate": 1.8356670666340695e-05,
"loss": 1.967,
"step": 386
},
{
"epoch": 0.63,
"learning_rate": 1.833726405938577e-05,
"loss": 1.9947,
"step": 388
},
{
"epoch": 0.64,
"learning_rate": 1.83177539191068e-05,
"loss": 1.943,
"step": 390
},
{
"epoch": 0.64,
"learning_rate": 1.829814048778347e-05,
"loss": 2.0449,
"step": 392
},
{
"epoch": 0.64,
"learning_rate": 1.8278424008978136e-05,
"loss": 2.2705,
"step": 394
},
{
"epoch": 0.65,
"learning_rate": 1.8258604727532827e-05,
"loss": 2.2501,
"step": 396
},
{
"epoch": 0.65,
"learning_rate": 1.823868288956618e-05,
"loss": 2.0302,
"step": 398
},
{
"epoch": 0.65,
"learning_rate": 1.8218658742470393e-05,
"loss": 2.3523,
"step": 400
},
{
"epoch": 0.65,
"learning_rate": 1.819853253490816e-05,
"loss": 1.6453,
"step": 402
},
{
"epoch": 0.66,
"learning_rate": 1.817830451680957e-05,
"loss": 1.6131,
"step": 404
},
{
"epoch": 0.66,
"learning_rate": 1.8157974939369015e-05,
"loss": 1.6462,
"step": 406
},
{
"epoch": 0.66,
"learning_rate": 1.813754405504206e-05,
"loss": 1.5778,
"step": 408
},
{
"epoch": 0.67,
"learning_rate": 1.8117012117542317e-05,
"loss": 1.7071,
"step": 410
},
{
"epoch": 0.67,
"learning_rate": 1.809637938183829e-05,
"loss": 1.5606,
"step": 412
},
{
"epoch": 0.67,
"learning_rate": 1.8075646104150205e-05,
"loss": 1.8436,
"step": 414
},
{
"epoch": 0.68,
"learning_rate": 1.805481254194684e-05,
"loss": 1.8131,
"step": 416
},
{
"epoch": 0.68,
"learning_rate": 1.8033878953942315e-05,
"loss": 1.6493,
"step": 418
},
{
"epoch": 0.68,
"learning_rate": 1.8012845600092886e-05,
"loss": 1.737,
"step": 420
},
{
"epoch": 0.69,
"learning_rate": 1.7991712741593715e-05,
"loss": 1.721,
"step": 422
},
{
"epoch": 0.69,
"learning_rate": 1.7970480640875626e-05,
"loss": 1.7574,
"step": 424
},
{
"epoch": 0.69,
"learning_rate": 1.7949149561601853e-05,
"loss": 1.8218,
"step": 426
},
{
"epoch": 0.7,
"learning_rate": 1.7927719768664747e-05,
"loss": 1.8906,
"step": 428
},
{
"epoch": 0.7,
"learning_rate": 1.7906191528182516e-05,
"loss": 1.7087,
"step": 430
},
{
"epoch": 0.7,
"learning_rate": 1.788456510749589e-05,
"loss": 1.7493,
"step": 432
},
{
"epoch": 0.71,
"learning_rate": 1.7862840775164814e-05,
"loss": 1.7121,
"step": 434
},
{
"epoch": 0.71,
"learning_rate": 1.784101880096513e-05,
"loss": 2.0566,
"step": 436
},
{
"epoch": 0.71,
"learning_rate": 1.7819099455885195e-05,
"loss": 1.8518,
"step": 438
},
{
"epoch": 0.72,
"learning_rate": 1.779708301212253e-05,
"loss": 2.1761,
"step": 440
},
{
"epoch": 0.72,
"learning_rate": 1.7774969743080453e-05,
"loss": 2.1634,
"step": 442
},
{
"epoch": 0.72,
"learning_rate": 1.775275992336466e-05,
"loss": 2.1009,
"step": 444
},
{
"epoch": 0.73,
"learning_rate": 1.7730453828779842e-05,
"loss": 2.1546,
"step": 446
},
{
"epoch": 0.73,
"learning_rate": 1.7708051736326232e-05,
"loss": 2.2202,
"step": 448
},
{
"epoch": 0.73,
"learning_rate": 1.768555392419618e-05,
"loss": 1.9977,
"step": 450
},
{
"epoch": 0.74,
"learning_rate": 1.7662960671770694e-05,
"loss": 1.471,
"step": 452
},
{
"epoch": 0.74,
"learning_rate": 1.764027225961599e-05,
"loss": 1.5104,
"step": 454
},
{
"epoch": 0.74,
"learning_rate": 1.7617488969479968e-05,
"loss": 1.6735,
"step": 456
},
{
"epoch": 0.75,
"learning_rate": 1.759461108428875e-05,
"loss": 1.6133,
"step": 458
},
{
"epoch": 0.75,
"learning_rate": 1.757163888814315e-05,
"loss": 1.6513,
"step": 460
},
{
"epoch": 0.75,
"learning_rate": 1.7548572666315148e-05,
"loss": 1.7925,
"step": 462
},
{
"epoch": 0.76,
"learning_rate": 1.7525412705244348e-05,
"loss": 1.685,
"step": 464
},
{
"epoch": 0.76,
"learning_rate": 1.750215929253442e-05,
"loss": 1.6181,
"step": 466
},
{
"epoch": 0.76,
"learning_rate": 1.7478812716949533e-05,
"loss": 1.6964,
"step": 468
},
{
"epoch": 0.77,
"learning_rate": 1.7455373268410763e-05,
"loss": 1.7479,
"step": 470
},
{
"epoch": 0.77,
"learning_rate": 1.74318412379925e-05,
"loss": 1.7461,
"step": 472
},
{
"epoch": 0.77,
"learning_rate": 1.740821691791882e-05,
"loss": 1.7439,
"step": 474
},
{
"epoch": 0.78,
"learning_rate": 1.738450060155987e-05,
"loss": 1.6585,
"step": 476
},
{
"epoch": 0.78,
"learning_rate": 1.736069258342823e-05,
"loss": 1.708,
"step": 478
},
{
"epoch": 0.78,
"learning_rate": 1.7336793159175228e-05,
"loss": 1.9081,
"step": 480
},
{
"epoch": 0.79,
"learning_rate": 1.731280262558729e-05,
"loss": 2.0349,
"step": 482
},
{
"epoch": 0.79,
"learning_rate": 1.7288721280582263e-05,
"loss": 1.956,
"step": 484
},
{
"epoch": 0.79,
"learning_rate": 1.726454942320569e-05,
"loss": 1.8301,
"step": 486
},
{
"epoch": 0.79,
"learning_rate": 1.7240287353627117e-05,
"loss": 2.0108,
"step": 488
},
{
"epoch": 0.8,
"learning_rate": 1.721593537313635e-05,
"loss": 1.9919,
"step": 490
},
{
"epoch": 0.8,
"learning_rate": 1.719149378413973e-05,
"loss": 2.0082,
"step": 492
},
{
"epoch": 0.8,
"learning_rate": 1.7166962890156366e-05,
"loss": 2.1452,
"step": 494
},
{
"epoch": 0.81,
"learning_rate": 1.7142342995814364e-05,
"loss": 2.1019,
"step": 496
},
{
"epoch": 0.81,
"learning_rate": 1.711763440684706e-05,
"loss": 2.1886,
"step": 498
},
{
"epoch": 0.81,
"learning_rate": 1.70928374300892e-05,
"loss": 2.0478,
"step": 500
},
{
"epoch": 0.82,
"learning_rate": 1.706795237347315e-05,
"loss": 1.5012,
"step": 502
},
{
"epoch": 0.82,
"learning_rate": 1.7042979546025063e-05,
"loss": 1.492,
"step": 504
},
{
"epoch": 0.82,
"learning_rate": 1.701791925786104e-05,
"loss": 1.7338,
"step": 506
},
{
"epoch": 0.83,
"learning_rate": 1.6992771820183282e-05,
"loss": 1.5319,
"step": 508
},
{
"epoch": 0.83,
"learning_rate": 1.696753754527623e-05,
"loss": 1.8472,
"step": 510
},
{
"epoch": 0.83,
"learning_rate": 1.6942216746502675e-05,
"loss": 1.6705,
"step": 512
},
{
"epoch": 0.84,
"learning_rate": 1.6916809738299875e-05,
"loss": 1.7004,
"step": 514
},
{
"epoch": 0.84,
"learning_rate": 1.6891316836175658e-05,
"loss": 1.7979,
"step": 516
},
{
"epoch": 0.84,
"learning_rate": 1.686573835670448e-05,
"loss": 1.707,
"step": 518
},
{
"epoch": 0.85,
"learning_rate": 1.684007461752352e-05,
"loss": 1.7927,
"step": 520
},
{
"epoch": 0.85,
"learning_rate": 1.6814325937328717e-05,
"loss": 1.7515,
"step": 522
},
{
"epoch": 0.85,
"learning_rate": 1.6788492635870827e-05,
"loss": 1.6683,
"step": 524
},
{
"epoch": 0.86,
"learning_rate": 1.6762575033951438e-05,
"loss": 1.7289,
"step": 526
},
{
"epoch": 0.86,
"learning_rate": 1.6736573453418998e-05,
"loss": 1.8645,
"step": 528
},
{
"epoch": 0.86,
"learning_rate": 1.6710488217164803e-05,
"loss": 1.943,
"step": 530
},
{
"epoch": 0.87,
"learning_rate": 1.6684319649119018e-05,
"loss": 2.0221,
"step": 532
},
{
"epoch": 0.87,
"learning_rate": 1.665806807424661e-05,
"loss": 1.9465,
"step": 534
},
{
"epoch": 0.87,
"learning_rate": 1.6631733818543352e-05,
"loss": 1.9296,
"step": 536
},
{
"epoch": 0.88,
"learning_rate": 1.6605317209031758e-05,
"loss": 2.1713,
"step": 538
},
{
"epoch": 0.88,
"learning_rate": 1.657881857375702e-05,
"loss": 2.0983,
"step": 540
},
{
"epoch": 0.88,
"learning_rate": 1.6552238241782934e-05,
"loss": 2.0322,
"step": 542
},
{
"epoch": 0.89,
"learning_rate": 1.652557654318782e-05,
"loss": 2.1081,
"step": 544
},
{
"epoch": 0.89,
"learning_rate": 1.649883380906043e-05,
"loss": 2.2136,
"step": 546
},
{
"epoch": 0.89,
"learning_rate": 1.6472010371495808e-05,
"loss": 2.1303,
"step": 548
},
{
"epoch": 0.9,
"learning_rate": 1.6445106563591206e-05,
"loss": 1.9853,
"step": 550
},
{
"epoch": 0.9,
"learning_rate": 1.6418122719441903e-05,
"loss": 1.5762,
"step": 552
},
{
"epoch": 0.9,
"learning_rate": 1.63910591741371e-05,
"loss": 1.5862,
"step": 554
},
{
"epoch": 0.91,
"learning_rate": 1.636391626375572e-05,
"loss": 1.6162,
"step": 556
},
{
"epoch": 0.91,
"learning_rate": 1.6336694325362275e-05,
"loss": 1.5129,
"step": 558
},
{
"epoch": 0.91,
"learning_rate": 1.6309393697002637e-05,
"loss": 1.7277,
"step": 560
},
{
"epoch": 0.92,
"learning_rate": 1.6282014717699872e-05,
"loss": 1.7149,
"step": 562
},
{
"epoch": 0.92,
"learning_rate": 1.6254557727450016e-05,
"loss": 1.6317,
"step": 564
},
{
"epoch": 0.92,
"learning_rate": 1.6227023067217857e-05,
"loss": 1.7377,
"step": 566
},
{
"epoch": 0.93,
"learning_rate": 1.61994110789327e-05,
"loss": 1.8907,
"step": 568
},
{
"epoch": 0.93,
"learning_rate": 1.6171722105484116e-05,
"loss": 1.7045,
"step": 570
},
{
"epoch": 0.93,
"learning_rate": 1.61439564907177e-05,
"loss": 1.7665,
"step": 572
},
{
"epoch": 0.93,
"learning_rate": 1.611611457943077e-05,
"loss": 1.7919,
"step": 574
},
{
"epoch": 0.94,
"learning_rate": 1.6088196717368133e-05,
"loss": 1.8146,
"step": 576
},
{
"epoch": 0.94,
"learning_rate": 1.6060203251217742e-05,
"loss": 1.7641,
"step": 578
},
{
"epoch": 0.94,
"learning_rate": 1.6032134528606413e-05,
"loss": 1.8167,
"step": 580
},
{
"epoch": 0.95,
"learning_rate": 1.600399089809552e-05,
"loss": 1.9476,
"step": 582
},
{
"epoch": 0.95,
"learning_rate": 1.5975772709176638e-05,
"loss": 1.9411,
"step": 584
},
{
"epoch": 0.95,
"learning_rate": 1.5947480312267235e-05,
"loss": 1.9764,
"step": 586
},
{
"epoch": 0.96,
"learning_rate": 1.5919114058706295e-05,
"loss": 2.1555,
"step": 588
},
{
"epoch": 0.96,
"learning_rate": 1.589067430074996e-05,
"loss": 2.1399,
"step": 590
},
{
"epoch": 0.96,
"learning_rate": 1.5862161391567172e-05,
"loss": 2.2399,
"step": 592
},
{
"epoch": 0.97,
"learning_rate": 1.583357568523527e-05,
"loss": 2.2355,
"step": 594
},
{
"epoch": 0.97,
"learning_rate": 1.580491753673559e-05,
"loss": 2.0916,
"step": 596
},
{
"epoch": 0.97,
"learning_rate": 1.5776187301949083e-05,
"loss": 2.0709,
"step": 598
},
{
"epoch": 0.98,
"learning_rate": 1.574738533765186e-05,
"loss": 1.9886,
"step": 600
},
{
"epoch": 0.98,
"learning_rate": 1.571851200151079e-05,
"loss": 1.5965,
"step": 602
},
{
"epoch": 0.98,
"learning_rate": 1.5689567652079037e-05,
"loss": 1.673,
"step": 604
},
{
"epoch": 0.99,
"learning_rate": 1.5660552648791638e-05,
"loss": 1.8633,
"step": 606
},
{
"epoch": 0.99,
"learning_rate": 1.5631467351960994e-05,
"loss": 1.9587,
"step": 608
},
{
"epoch": 0.99,
"learning_rate": 1.5602312122772443e-05,
"loss": 1.7769,
"step": 610
},
{
"epoch": 1.0,
"learning_rate": 1.557308732327974e-05,
"loss": 2.2116,
"step": 612
},
{
"epoch": 1.0,
"learning_rate": 1.5543793316400575e-05,
"loss": 2.0675,
"step": 614
},
{
"epoch": 1.0,
"learning_rate": 1.5514430465912072e-05,
"loss": 1.5471,
"step": 616
},
{
"epoch": 1.01,
"learning_rate": 1.548499913644626e-05,
"loss": 1.6333,
"step": 618
},
{
"epoch": 1.01,
"learning_rate": 1.5455499693485547e-05,
"loss": 1.5103,
"step": 620
},
{
"epoch": 1.01,
"learning_rate": 1.5425932503358194e-05,
"loss": 1.6023,
"step": 622
},
{
"epoch": 1.02,
"learning_rate": 1.539629793323374e-05,
"loss": 1.729,
"step": 624
},
{
"epoch": 1.02,
"learning_rate": 1.536659635111848e-05,
"loss": 1.5536,
"step": 626
},
{
"epoch": 1.02,
"learning_rate": 1.5336828125850846e-05,
"loss": 1.7814,
"step": 628
},
{
"epoch": 1.03,
"learning_rate": 1.5306993627096873e-05,
"loss": 1.5771,
"step": 630
},
{
"epoch": 1.03,
"learning_rate": 1.527709322534559e-05,
"loss": 1.689,
"step": 632
},
{
"epoch": 1.03,
"learning_rate": 1.5247127291904415e-05,
"loss": 1.7706,
"step": 634
},
{
"epoch": 1.04,
"learning_rate": 1.5217096198894545e-05,
"loss": 1.7094,
"step": 636
},
{
"epoch": 1.04,
"learning_rate": 1.5187000319246337e-05,
"loss": 1.6026,
"step": 638
},
{
"epoch": 1.04,
"learning_rate": 1.5156840026694677e-05,
"loss": 1.5624,
"step": 640
},
{
"epoch": 1.05,
"learning_rate": 1.5126615695774352e-05,
"loss": 1.9486,
"step": 642
},
{
"epoch": 1.05,
"learning_rate": 1.5096327701815373e-05,
"loss": 1.8097,
"step": 644
},
{
"epoch": 1.05,
"learning_rate": 1.5065976420938331e-05,
"loss": 1.7679,
"step": 646
},
{
"epoch": 1.06,
"learning_rate": 1.5035562230049725e-05,
"loss": 1.8091,
"step": 648
},
{
"epoch": 1.06,
"learning_rate": 1.500508550683728e-05,
"loss": 1.9126,
"step": 650
},
{
"epoch": 1.06,
"learning_rate": 1.4974546629765247e-05,
"loss": 2.1112,
"step": 652
},
{
"epoch": 1.07,
"learning_rate": 1.494394597806973e-05,
"loss": 2.1882,
"step": 654
},
{
"epoch": 1.07,
"learning_rate": 1.4913283931753943e-05,
"loss": 1.9756,
"step": 656
},
{
"epoch": 1.07,
"learning_rate": 1.4882560871583514e-05,
"loss": 2.0534,
"step": 658
},
{
"epoch": 1.08,
"learning_rate": 1.4851777179081752e-05,
"loss": 2.14,
"step": 660
},
{
"epoch": 1.08,
"learning_rate": 1.4820933236524897e-05,
"loss": 2.1471,
"step": 662
},
{
"epoch": 1.08,
"learning_rate": 1.4790029426937394e-05,
"loss": 2.1118,
"step": 664
},
{
"epoch": 1.08,
"learning_rate": 1.4759066134087117e-05,
"loss": 1.529,
"step": 666
},
{
"epoch": 1.09,
"learning_rate": 1.472804374248062e-05,
"loss": 1.5823,
"step": 668
},
{
"epoch": 1.09,
"learning_rate": 1.4696962637358347e-05,
"loss": 1.6219,
"step": 670
},
{
"epoch": 1.09,
"learning_rate": 1.4665823204689856e-05,
"loss": 1.6208,
"step": 672
},
{
"epoch": 1.1,
"learning_rate": 1.463462583116902e-05,
"loss": 1.5877,
"step": 674
},
{
"epoch": 1.1,
"learning_rate": 1.4603370904209244e-05,
"loss": 1.6907,
"step": 676
},
{
"epoch": 1.1,
"learning_rate": 1.4572058811938625e-05,
"loss": 1.7026,
"step": 678
},
{
"epoch": 1.11,
"learning_rate": 1.4540689943195157e-05,
"loss": 1.7639,
"step": 680
},
{
"epoch": 1.11,
"learning_rate": 1.4509264687521883e-05,
"loss": 1.611,
"step": 682
},
{
"epoch": 1.11,
"learning_rate": 1.4477783435162071e-05,
"loss": 1.6413,
"step": 684
},
{
"epoch": 1.12,
"learning_rate": 1.444624657705437e-05,
"loss": 1.7311,
"step": 686
},
{
"epoch": 1.12,
"learning_rate": 1.441465450482794e-05,
"loss": 1.7851,
"step": 688
},
{
"epoch": 1.12,
"learning_rate": 1.4383007610797603e-05,
"loss": 1.8824,
"step": 690
},
{
"epoch": 1.13,
"learning_rate": 1.4351306287958963e-05,
"loss": 1.851,
"step": 692
},
{
"epoch": 1.13,
"learning_rate": 1.431955092998353e-05,
"loss": 1.8574,
"step": 694
},
{
"epoch": 1.13,
"learning_rate": 1.428774193121383e-05,
"loss": 1.7716,
"step": 696
},
{
"epoch": 1.14,
"learning_rate": 1.4255879686658508e-05,
"loss": 1.8968,
"step": 698
},
{
"epoch": 1.14,
"learning_rate": 1.4223964591987423e-05,
"loss": 2.0786,
"step": 700
},
{
"epoch": 1.14,
"learning_rate": 1.419199704352673e-05,
"loss": 1.8951,
"step": 702
},
{
"epoch": 1.15,
"learning_rate": 1.4159977438253971e-05,
"loss": 1.985,
"step": 704
},
{
"epoch": 1.15,
"learning_rate": 1.412790617379313e-05,
"loss": 2.0482,
"step": 706
},
{
"epoch": 1.15,
"learning_rate": 1.4095783648409703e-05,
"loss": 2.1777,
"step": 708
},
{
"epoch": 1.16,
"learning_rate": 1.4063610261005753e-05,
"loss": 1.9897,
"step": 710
},
{
"epoch": 1.16,
"learning_rate": 1.4031386411114958e-05,
"loss": 2.1258,
"step": 712
},
{
"epoch": 1.16,
"learning_rate": 1.3999112498897638e-05,
"loss": 2.1569,
"step": 714
},
{
"epoch": 1.17,
"learning_rate": 1.39667889251358e-05,
"loss": 1.6332,
"step": 716
},
{
"epoch": 1.17,
"learning_rate": 1.3934416091228158e-05,
"loss": 1.4974,
"step": 718
},
{
"epoch": 1.17,
"learning_rate": 1.3901994399185134e-05,
"loss": 1.629,
"step": 720
},
{
"epoch": 1.18,
"learning_rate": 1.3869524251623889e-05,
"loss": 1.6409,
"step": 722
},
{
"epoch": 1.18,
"learning_rate": 1.3837006051763306e-05,
"loss": 1.7071,
"step": 724
},
{
"epoch": 1.18,
"learning_rate": 1.3804440203418991e-05,
"loss": 1.7401,
"step": 726
},
{
"epoch": 1.19,
"learning_rate": 1.377182711099825e-05,
"loss": 1.6579,
"step": 728
},
{
"epoch": 1.19,
"learning_rate": 1.373916717949508e-05,
"loss": 1.7399,
"step": 730
},
{
"epoch": 1.19,
"learning_rate": 1.3706460814485127e-05,
"loss": 1.7,
"step": 732
},
{
"epoch": 1.2,
"learning_rate": 1.3673708422120658e-05,
"loss": 1.7109,
"step": 734
},
{
"epoch": 1.2,
"learning_rate": 1.364091040912551e-05,
"loss": 1.6352,
"step": 736
},
{
"epoch": 1.2,
"learning_rate": 1.3608067182790046e-05,
"loss": 1.5825,
"step": 738
},
{
"epoch": 1.21,
"learning_rate": 1.35751791509661e-05,
"loss": 1.7866,
"step": 740
},
{
"epoch": 1.21,
"learning_rate": 1.3542246722061897e-05,
"loss": 1.8663,
"step": 742
},
{
"epoch": 1.21,
"learning_rate": 1.3509270305036999e-05,
"loss": 1.7922,
"step": 744
},
{
"epoch": 1.22,
"learning_rate": 1.3476250309397214e-05,
"loss": 1.8688,
"step": 746
},
{
"epoch": 1.22,
"learning_rate": 1.3443187145189517e-05,
"loss": 1.98,
"step": 748
},
{
"epoch": 1.22,
"learning_rate": 1.341008122299696e-05,
"loss": 2.1442,
"step": 750
},
{
"epoch": 1.22,
"learning_rate": 1.3376932953933562e-05,
"loss": 2.0696,
"step": 752
},
{
"epoch": 1.23,
"learning_rate": 1.3343742749639214e-05,
"loss": 2.0473,
"step": 754
},
{
"epoch": 1.23,
"learning_rate": 1.3310511022274572e-05,
"loss": 2.0118,
"step": 756
},
{
"epoch": 1.23,
"learning_rate": 1.3277238184515924e-05,
"loss": 2.0139,
"step": 758
},
{
"epoch": 1.24,
"learning_rate": 1.3243924649550073e-05,
"loss": 2.0929,
"step": 760
},
{
"epoch": 1.24,
"learning_rate": 1.3210570831069207e-05,
"loss": 2.2703,
"step": 762
},
{
"epoch": 1.24,
"learning_rate": 1.317717714326576e-05,
"loss": 2.0147,
"step": 764
},
{
"epoch": 1.25,
"learning_rate": 1.314374400082727e-05,
"loss": 1.5511,
"step": 766
},
{
"epoch": 1.25,
"learning_rate": 1.3110271818931226e-05,
"loss": 1.554,
"step": 768
},
{
"epoch": 1.25,
"learning_rate": 1.3076761013239921e-05,
"loss": 1.6176,
"step": 770
},
{
"epoch": 1.26,
"learning_rate": 1.3043211999895273e-05,
"loss": 1.6345,
"step": 772
},
{
"epoch": 1.26,
"learning_rate": 1.3009625195513678e-05,
"loss": 1.7802,
"step": 774
},
{
"epoch": 1.26,
"learning_rate": 1.2976001017180817e-05,
"loss": 1.7645,
"step": 776
},
{
"epoch": 1.27,
"learning_rate": 1.2942339882446497e-05,
"loss": 1.7157,
"step": 778
},
{
"epoch": 1.27,
"learning_rate": 1.2908642209319446e-05,
"loss": 1.8843,
"step": 780
},
{
"epoch": 1.27,
"learning_rate": 1.2874908416262137e-05,
"loss": 1.7453,
"step": 782
},
{
"epoch": 1.28,
"learning_rate": 1.2841138922185577e-05,
"loss": 1.8738,
"step": 784
},
{
"epoch": 1.28,
"learning_rate": 1.2807334146444124e-05,
"loss": 1.6823,
"step": 786
},
{
"epoch": 1.28,
"learning_rate": 1.277349450883026e-05,
"loss": 1.7135,
"step": 788
},
{
"epoch": 1.29,
"learning_rate": 1.273962042956939e-05,
"loss": 1.7416,
"step": 790
},
{
"epoch": 1.29,
"learning_rate": 1.2705712329314623e-05,
"loss": 1.6427,
"step": 792
},
{
"epoch": 1.29,
"learning_rate": 1.2671770629141542e-05,
"loss": 1.7839,
"step": 794
},
{
"epoch": 1.3,
"learning_rate": 1.263779575054298e-05,
"loss": 1.8602,
"step": 796
},
{
"epoch": 1.3,
"learning_rate": 1.260378811542378e-05,
"loss": 1.9448,
"step": 798
},
{
"epoch": 1.3,
"learning_rate": 1.2569748146095569e-05,
"loss": 1.9093,
"step": 800
},
{
"epoch": 1.31,
"learning_rate": 1.2535676265271495e-05,
"loss": 1.9411,
"step": 802
},
{
"epoch": 1.31,
"learning_rate": 1.2501572896060994e-05,
"loss": 2.0018,
"step": 804
},
{
"epoch": 1.31,
"learning_rate": 1.246743846196453e-05,
"loss": 2.1838,
"step": 806
},
{
"epoch": 1.32,
"learning_rate": 1.2433273386868327e-05,
"loss": 2.3012,
"step": 808
},
{
"epoch": 1.32,
"learning_rate": 1.2399078095039124e-05,
"loss": 2.0348,
"step": 810
},
{
"epoch": 1.32,
"learning_rate": 1.2364853011118887e-05,
"loss": 2.0092,
"step": 812
},
{
"epoch": 1.33,
"learning_rate": 1.2330598560119547e-05,
"loss": 1.8901,
"step": 814
},
{
"epoch": 1.33,
"learning_rate": 1.2296315167417728e-05,
"loss": 1.5123,
"step": 816
},
{
"epoch": 1.33,
"learning_rate": 1.226200325874944e-05,
"loss": 1.5888,
"step": 818
},
{
"epoch": 1.34,
"learning_rate": 1.2227663260204818e-05,
"loss": 1.504,
"step": 820
},
{
"epoch": 1.34,
"learning_rate": 1.2193295598222825e-05,
"loss": 1.5482,
"step": 822
},
{
"epoch": 1.34,
"learning_rate": 1.2158900699585943e-05,
"loss": 1.6201,
"step": 824
},
{
"epoch": 1.35,
"learning_rate": 1.2124478991414885e-05,
"loss": 1.6641,
"step": 826
},
{
"epoch": 1.35,
"learning_rate": 1.2090030901163297e-05,
"loss": 1.7015,
"step": 828
},
{
"epoch": 1.35,
"learning_rate": 1.2055556856612429e-05,
"loss": 1.7003,
"step": 830
},
{
"epoch": 1.36,
"learning_rate": 1.2021057285865845e-05,
"loss": 1.6527,
"step": 832
},
{
"epoch": 1.36,
"learning_rate": 1.198653261734409e-05,
"loss": 1.7511,
"step": 834
},
{
"epoch": 1.36,
"learning_rate": 1.1951983279779382e-05,
"loss": 1.6885,
"step": 836
},
{
"epoch": 1.36,
"learning_rate": 1.1917409702210283e-05,
"loss": 1.7034,
"step": 838
},
{
"epoch": 1.37,
"learning_rate": 1.1882812313976362e-05,
"loss": 1.6924,
"step": 840
},
{
"epoch": 1.37,
"learning_rate": 1.1848191544712883e-05,
"loss": 1.7866,
"step": 842
},
{
"epoch": 1.37,
"learning_rate": 1.1813547824345453e-05,
"loss": 1.7805,
"step": 844
},
{
"epoch": 1.38,
"learning_rate": 1.1778881583084689e-05,
"loss": 1.868,
"step": 846
},
{
"epoch": 1.38,
"learning_rate": 1.1744193251420877e-05,
"loss": 1.9408,
"step": 848
},
{
"epoch": 1.38,
"learning_rate": 1.170948326011863e-05,
"loss": 1.9899,
"step": 850
},
{
"epoch": 1.39,
"learning_rate": 1.1674752040211521e-05,
"loss": 2.0072,
"step": 852
},
{
"epoch": 1.39,
"learning_rate": 1.1640000022996755e-05,
"loss": 1.9553,
"step": 854
},
{
"epoch": 1.39,
"learning_rate": 1.1605227640029795e-05,
"loss": 2.1992,
"step": 856
},
{
"epoch": 1.4,
"learning_rate": 1.1570435323119006e-05,
"loss": 2.0417,
"step": 858
},
{
"epoch": 1.4,
"learning_rate": 1.1535623504320308e-05,
"loss": 2.2629,
"step": 860
},
{
"epoch": 1.4,
"learning_rate": 1.1500792615931781e-05,
"loss": 2.0293,
"step": 862
},
{
"epoch": 1.41,
"learning_rate": 1.1465943090488326e-05,
"loss": 2.0421,
"step": 864
},
{
"epoch": 1.41,
"learning_rate": 1.1431075360756277e-05,
"loss": 1.5993,
"step": 866
},
{
"epoch": 1.41,
"learning_rate": 1.1396189859728032e-05,
"loss": 1.5874,
"step": 868
},
{
"epoch": 1.42,
"learning_rate": 1.1361287020616675e-05,
"loss": 1.6249,
"step": 870
},
{
"epoch": 1.42,
"learning_rate": 1.1326367276850605e-05,
"loss": 1.5156,
"step": 872
},
{
"epoch": 1.42,
"learning_rate": 1.1291431062068128e-05,
"loss": 1.7634,
"step": 874
},
{
"epoch": 1.43,
"learning_rate": 1.1256478810112105e-05,
"loss": 1.6592,
"step": 876
},
{
"epoch": 1.43,
"learning_rate": 1.1221510955024542e-05,
"loss": 1.6603,
"step": 878
},
{
"epoch": 1.43,
"learning_rate": 1.1186527931041205e-05,
"loss": 1.6741,
"step": 880
},
{
"epoch": 1.44,
"learning_rate": 1.1151530172586238e-05,
"loss": 1.8044,
"step": 882
},
{
"epoch": 1.44,
"learning_rate": 1.1116518114266757e-05,
"loss": 1.7933,
"step": 884
},
{
"epoch": 1.44,
"learning_rate": 1.1081492190867445e-05,
"loss": 1.7928,
"step": 886
},
{
"epoch": 1.45,
"learning_rate": 1.1046452837345174e-05,
"loss": 1.6921,
"step": 888
},
{
"epoch": 1.45,
"learning_rate": 1.1011400488823594e-05,
"loss": 1.8955,
"step": 890
},
{
"epoch": 1.45,
"learning_rate": 1.097633558058772e-05,
"loss": 1.6873,
"step": 892
},
{
"epoch": 1.46,
"learning_rate": 1.094125854807855e-05,
"loss": 1.701,
"step": 894
},
{
"epoch": 1.46,
"learning_rate": 1.0906169826887625e-05,
"loss": 1.9367,
"step": 896
},
{
"epoch": 1.46,
"learning_rate": 1.0871069852751653e-05,
"loss": 2.0128,
"step": 898
},
{
"epoch": 1.47,
"learning_rate": 1.083595906154707e-05,
"loss": 1.8403,
"step": 900
},
{
"epoch": 1.47,
"learning_rate": 1.080083788928465e-05,
"loss": 1.8517,
"step": 902
},
{
"epoch": 1.47,
"learning_rate": 1.0765706772104076e-05,
"loss": 1.9253,
"step": 904
},
{
"epoch": 1.48,
"learning_rate": 1.0730566146268533e-05,
"loss": 2.1538,
"step": 906
},
{
"epoch": 1.48,
"learning_rate": 1.0695416448159274e-05,
"loss": 2.1409,
"step": 908
},
{
"epoch": 1.48,
"learning_rate": 1.0660258114270227e-05,
"loss": 2.2853,
"step": 910
},
{
"epoch": 1.49,
"learning_rate": 1.062509158120255e-05,
"loss": 2.0466,
"step": 912
},
{
"epoch": 1.49,
"learning_rate": 1.0589917285659222e-05,
"loss": 1.807,
"step": 914
},
{
"epoch": 1.49,
"learning_rate": 1.0554735664439623e-05,
"loss": 1.795,
"step": 916
},
{
"epoch": 1.5,
"learning_rate": 1.0519547154434105e-05,
"loss": 1.5502,
"step": 918
},
{
"epoch": 1.5,
"learning_rate": 1.048435219261855e-05,
"loss": 1.6967,
"step": 920
},
{
"epoch": 1.5,
"learning_rate": 1.0449151216048976e-05,
"loss": 1.6531,
"step": 922
},
{
"epoch": 1.51,
"learning_rate": 1.0413944661856084e-05,
"loss": 1.6669,
"step": 924
},
{
"epoch": 1.51,
"learning_rate": 1.037873296723985e-05,
"loss": 1.8114,
"step": 926
},
{
"epoch": 1.51,
"learning_rate": 1.0343516569464076e-05,
"loss": 1.6277,
"step": 928
},
{
"epoch": 1.51,
"learning_rate": 1.0308295905850963e-05,
"loss": 1.6846,
"step": 930
},
{
"epoch": 1.52,
"learning_rate": 1.0273071413775695e-05,
"loss": 1.647,
"step": 932
},
{
"epoch": 1.52,
"learning_rate": 1.0237843530660996e-05,
"loss": 1.8631,
"step": 934
},
{
"epoch": 1.52,
"learning_rate": 1.02026126939717e-05,
"loss": 1.6792,
"step": 936
},
{
"epoch": 1.53,
"learning_rate": 1.0167379341209323e-05,
"loss": 1.7413,
"step": 938
},
{
"epoch": 1.53,
"learning_rate": 1.0132143909906617e-05,
"loss": 1.6664,
"step": 940
},
{
"epoch": 1.53,
"learning_rate": 1.009690683762215e-05,
"loss": 1.7221,
"step": 942
},
{
"epoch": 1.54,
"learning_rate": 1.0061668561934869e-05,
"loss": 1.8029,
"step": 944
},
{
"epoch": 1.54,
"learning_rate": 1.0026429520438665e-05,
"loss": 1.9112,
"step": 946
},
{
"epoch": 1.54,
"learning_rate": 9.991190150736941e-06,
"loss": 2.031,
"step": 948
},
{
"epoch": 1.55,
"learning_rate": 9.955950890437174e-06,
"loss": 1.9326,
"step": 950
},
{
"epoch": 1.55,
"learning_rate": 9.920712177145475e-06,
"loss": 1.8636,
"step": 952
},
{
"epoch": 1.55,
"learning_rate": 9.88547444846117e-06,
"loss": 2.1533,
"step": 954
},
{
"epoch": 1.56,
"learning_rate": 9.850238141971363e-06,
"loss": 1.9499,
"step": 956
},
{
"epoch": 1.56,
"learning_rate": 9.815003695245482e-06,
"loss": 2.0289,
"step": 958
},
{
"epoch": 1.56,
"learning_rate": 9.779771545829878e-06,
"loss": 1.9591,
"step": 960
},
{
"epoch": 1.57,
"learning_rate": 9.744542131242359e-06,
"loss": 2.0738,
"step": 962
},
{
"epoch": 1.57,
"learning_rate": 9.709315888966773e-06,
"loss": 1.7359,
"step": 964
},
{
"epoch": 1.57,
"learning_rate": 9.67409325644759e-06,
"loss": 1.6213,
"step": 966
},
{
"epoch": 1.58,
"learning_rate": 9.638874671084428e-06,
"loss": 1.6477,
"step": 968
},
{
"epoch": 1.58,
"learning_rate": 9.603660570226673e-06,
"loss": 1.5659,
"step": 970
},
{
"epoch": 1.58,
"learning_rate": 9.568451391167995e-06,
"loss": 1.5428,
"step": 972
},
{
"epoch": 1.59,
"learning_rate": 9.533247571140971e-06,
"loss": 1.5364,
"step": 974
},
{
"epoch": 1.59,
"learning_rate": 9.498049547311611e-06,
"loss": 1.6608,
"step": 976
},
{
"epoch": 1.59,
"learning_rate": 9.46285775677395e-06,
"loss": 1.6773,
"step": 978
},
{
"epoch": 1.6,
"learning_rate": 9.427672636544624e-06,
"loss": 1.7897,
"step": 980
},
{
"epoch": 1.6,
"learning_rate": 9.392494623557425e-06,
"loss": 1.72,
"step": 982
},
{
"epoch": 1.6,
"learning_rate": 9.3573241546579e-06,
"loss": 1.5833,
"step": 984
},
{
"epoch": 1.61,
"learning_rate": 9.322161666597903e-06,
"loss": 1.7148,
"step": 986
},
{
"epoch": 1.61,
"learning_rate": 9.287007596030178e-06,
"loss": 1.6607,
"step": 988
},
{
"epoch": 1.61,
"learning_rate": 9.251862379502952e-06,
"loss": 1.8589,
"step": 990
},
{
"epoch": 1.62,
"learning_rate": 9.216726453454486e-06,
"loss": 1.9797,
"step": 992
},
{
"epoch": 1.62,
"learning_rate": 9.181600254207685e-06,
"loss": 1.7569,
"step": 994
},
{
"epoch": 1.62,
"learning_rate": 9.146484217964656e-06,
"loss": 1.7409,
"step": 996
},
{
"epoch": 1.63,
"learning_rate": 9.111378780801298e-06,
"loss": 1.843,
"step": 998
},
{
"epoch": 1.63,
"learning_rate": 9.076284378661904e-06,
"loss": 1.9225,
"step": 1000
},
{
"epoch": 1.63,
"learning_rate": 9.04120144735371e-06,
"loss": 1.9379,
"step": 1002
},
{
"epoch": 1.64,
"learning_rate": 9.00613042254153e-06,
"loss": 2.0141,
"step": 1004
},
{
"epoch": 1.64,
"learning_rate": 8.971071739742301e-06,
"loss": 2.096,
"step": 1006
},
{
"epoch": 1.64,
"learning_rate": 8.936025834319707e-06,
"loss": 2.203,
"step": 1008
},
{
"epoch": 1.65,
"learning_rate": 8.900993141478759e-06,
"loss": 2.1307,
"step": 1010
},
{
"epoch": 1.65,
"learning_rate": 8.865974096260383e-06,
"loss": 1.9568,
"step": 1012
},
{
"epoch": 1.65,
"learning_rate": 8.830969133536046e-06,
"loss": 1.9975,
"step": 1014
},
{
"epoch": 1.65,
"learning_rate": 8.795978688002313e-06,
"loss": 1.7385,
"step": 1016
},
{
"epoch": 1.66,
"learning_rate": 8.761003194175494e-06,
"loss": 1.5292,
"step": 1018
},
{
"epoch": 1.66,
"learning_rate": 8.726043086386215e-06,
"loss": 1.6952,
"step": 1020
},
{
"epoch": 1.66,
"learning_rate": 8.691098798774033e-06,
"loss": 1.5524,
"step": 1022
},
{
"epoch": 1.67,
"learning_rate": 8.656170765282062e-06,
"loss": 1.5926,
"step": 1024
},
{
"epoch": 1.67,
"learning_rate": 8.621259419651552e-06,
"loss": 1.707,
"step": 1026
},
{
"epoch": 1.67,
"learning_rate": 8.586365195416532e-06,
"loss": 1.6256,
"step": 1028
},
{
"epoch": 1.68,
"learning_rate": 8.55148852589842e-06,
"loss": 1.6525,
"step": 1030
},
{
"epoch": 1.68,
"learning_rate": 8.516629844200618e-06,
"loss": 1.6297,
"step": 1032
},
{
"epoch": 1.68,
"learning_rate": 8.481789583203176e-06,
"loss": 1.7687,
"step": 1034
},
{
"epoch": 1.69,
"learning_rate": 8.446968175557373e-06,
"loss": 1.6962,
"step": 1036
},
{
"epoch": 1.69,
"learning_rate": 8.412166053680377e-06,
"loss": 1.7744,
"step": 1038
},
{
"epoch": 1.69,
"learning_rate": 8.377383649749862e-06,
"loss": 1.7246,
"step": 1040
},
{
"epoch": 1.7,
"learning_rate": 8.342621395698634e-06,
"loss": 1.9096,
"step": 1042
},
{
"epoch": 1.7,
"learning_rate": 8.307879723209281e-06,
"loss": 1.815,
"step": 1044
},
{
"epoch": 1.7,
"learning_rate": 8.2731590637088e-06,
"loss": 1.7844,
"step": 1046
},
{
"epoch": 1.71,
"learning_rate": 8.238459848363262e-06,
"loss": 1.9144,
"step": 1048
},
{
"epoch": 1.71,
"learning_rate": 8.203782508072417e-06,
"loss": 1.9194,
"step": 1050
},
{
"epoch": 1.71,
"learning_rate": 8.169127473464387e-06,
"loss": 1.9421,
"step": 1052
},
{
"epoch": 1.72,
"learning_rate": 8.134495174890295e-06,
"loss": 2.1128,
"step": 1054
},
{
"epoch": 1.72,
"learning_rate": 8.099886042418914e-06,
"loss": 2.0412,
"step": 1056
},
{
"epoch": 1.72,
"learning_rate": 8.065300505831353e-06,
"loss": 2.0167,
"step": 1058
},
{
"epoch": 1.73,
"learning_rate": 8.030738994615687e-06,
"loss": 2.101,
"step": 1060
},
{
"epoch": 1.73,
"learning_rate": 7.996201937961659e-06,
"loss": 2.1433,
"step": 1062
},
{
"epoch": 1.73,
"learning_rate": 7.96168976475532e-06,
"loss": 1.8789,
"step": 1064
},
{
"epoch": 1.74,
"learning_rate": 7.92720290357371e-06,
"loss": 1.5519,
"step": 1066
},
{
"epoch": 1.74,
"learning_rate": 7.89274178267956e-06,
"loss": 1.5451,
"step": 1068
},
{
"epoch": 1.74,
"learning_rate": 7.858306830015929e-06,
"loss": 1.4921,
"step": 1070
},
{
"epoch": 1.75,
"learning_rate": 7.823898473200935e-06,
"loss": 1.6422,
"step": 1072
},
{
"epoch": 1.75,
"learning_rate": 7.789517139522424e-06,
"loss": 1.7339,
"step": 1074
},
{
"epoch": 1.75,
"learning_rate": 7.755163255932654e-06,
"loss": 1.7226,
"step": 1076
},
{
"epoch": 1.76,
"learning_rate": 7.720837249043008e-06,
"loss": 1.6448,
"step": 1078
},
{
"epoch": 1.76,
"learning_rate": 7.686539545118694e-06,
"loss": 1.5942,
"step": 1080
},
{
"epoch": 1.76,
"learning_rate": 7.652270570073447e-06,
"loss": 1.6787,
"step": 1082
},
{
"epoch": 1.77,
"learning_rate": 7.618030749464255e-06,
"loss": 1.8133,
"step": 1084
},
{
"epoch": 1.77,
"learning_rate": 7.5838205084860415e-06,
"loss": 1.7191,
"step": 1086
},
{
"epoch": 1.77,
"learning_rate": 7.5496402719664245e-06,
"loss": 1.7413,
"step": 1088
},
{
"epoch": 1.78,
"learning_rate": 7.5154904643604045e-06,
"loss": 1.7457,
"step": 1090
},
{
"epoch": 1.78,
"learning_rate": 7.481371509745128e-06,
"loss": 1.7851,
"step": 1092
},
{
"epoch": 1.78,
"learning_rate": 7.447283831814603e-06,
"loss": 1.8332,
"step": 1094
},
{
"epoch": 1.79,
"learning_rate": 7.413227853874425e-06,
"loss": 1.8734,
"step": 1096
},
{
"epoch": 1.79,
"learning_rate": 7.379203998836551e-06,
"loss": 1.9721,
"step": 1098
},
{
"epoch": 1.79,
"learning_rate": 7.345212689214014e-06,
"loss": 1.8037,
"step": 1100
},
{
"epoch": 1.79,
"learning_rate": 7.311254347115717e-06,
"loss": 2.178,
"step": 1102
},
{
"epoch": 1.8,
"learning_rate": 7.277329394241141e-06,
"loss": 2.1488,
"step": 1104
},
{
"epoch": 1.8,
"learning_rate": 7.243438251875157e-06,
"loss": 2.1007,
"step": 1106
},
{
"epoch": 1.8,
"learning_rate": 7.209581340882763e-06,
"loss": 2.2559,
"step": 1108
},
{
"epoch": 1.81,
"learning_rate": 7.175759081703863e-06,
"loss": 2.1401,
"step": 1110
},
{
"epoch": 1.81,
"learning_rate": 7.14197189434806e-06,
"loss": 2.0104,
"step": 1112
},
{
"epoch": 1.81,
"learning_rate": 7.108220198389415e-06,
"loss": 1.7817,
"step": 1114
},
{
"epoch": 1.82,
"learning_rate": 7.074504412961267e-06,
"loss": 1.5753,
"step": 1116
},
{
"epoch": 1.82,
"learning_rate": 7.040824956751012e-06,
"loss": 1.6232,
"step": 1118
},
{
"epoch": 1.82,
"learning_rate": 7.007182247994893e-06,
"loss": 1.6046,
"step": 1120
},
{
"epoch": 1.83,
"learning_rate": 6.973576704472829e-06,
"loss": 1.6074,
"step": 1122
},
{
"epoch": 1.83,
"learning_rate": 6.9400087435032025e-06,
"loss": 1.5879,
"step": 1124
},
{
"epoch": 1.83,
"learning_rate": 6.906478781937708e-06,
"loss": 1.5187,
"step": 1126
},
{
"epoch": 1.84,
"learning_rate": 6.872987236156151e-06,
"loss": 1.7352,
"step": 1128
},
{
"epoch": 1.84,
"learning_rate": 6.839534522061278e-06,
"loss": 1.7378,
"step": 1130
},
{
"epoch": 1.84,
"learning_rate": 6.806121055073626e-06,
"loss": 1.7823,
"step": 1132
},
{
"epoch": 1.85,
"learning_rate": 6.772747250126349e-06,
"loss": 1.6373,
"step": 1134
},
{
"epoch": 1.85,
"learning_rate": 6.739413521660079e-06,
"loss": 1.7084,
"step": 1136
},
{
"epoch": 1.85,
"learning_rate": 6.706120283617773e-06,
"loss": 1.7693,
"step": 1138
},
{
"epoch": 1.86,
"learning_rate": 6.6728679494395624e-06,
"loss": 1.9544,
"step": 1140
},
{
"epoch": 1.86,
"learning_rate": 6.639656932057641e-06,
"loss": 1.7807,
"step": 1142
},
{
"epoch": 1.86,
"learning_rate": 6.6064876438911154e-06,
"loss": 1.7377,
"step": 1144
},
{
"epoch": 1.87,
"learning_rate": 6.573360496840899e-06,
"loss": 2.0219,
"step": 1146
},
{
"epoch": 1.87,
"learning_rate": 6.540275902284583e-06,
"loss": 1.974,
"step": 1148
},
{
"epoch": 1.87,
"learning_rate": 6.507234271071345e-06,
"loss": 1.8626,
"step": 1150
},
{
"epoch": 1.88,
"learning_rate": 6.474236013516837e-06,
"loss": 1.9131,
"step": 1152
},
{
"epoch": 1.88,
"learning_rate": 6.441281539398082e-06,
"loss": 2.1233,
"step": 1154
},
{
"epoch": 1.88,
"learning_rate": 6.408371257948405e-06,
"loss": 2.0239,
"step": 1156
},
{
"epoch": 1.89,
"learning_rate": 6.375505577852326e-06,
"loss": 2.1019,
"step": 1158
},
{
"epoch": 1.89,
"learning_rate": 6.342684907240513e-06,
"loss": 1.9495,
"step": 1160
},
{
"epoch": 1.89,
"learning_rate": 6.3099096536847e-06,
"loss": 2.0837,
"step": 1162
},
{
"epoch": 1.9,
"learning_rate": 6.277180224192613e-06,
"loss": 1.9115,
"step": 1164
},
{
"epoch": 1.9,
"learning_rate": 6.24449702520294e-06,
"loss": 1.5175,
"step": 1166
},
{
"epoch": 1.9,
"learning_rate": 6.211860462580261e-06,
"loss": 1.5625,
"step": 1168
},
{
"epoch": 1.91,
"learning_rate": 6.179270941610031e-06,
"loss": 1.4726,
"step": 1170
},
{
"epoch": 1.91,
"learning_rate": 6.146728866993535e-06,
"loss": 1.7119,
"step": 1172
},
{
"epoch": 1.91,
"learning_rate": 6.114234642842847e-06,
"loss": 1.8043,
"step": 1174
},
{
"epoch": 1.92,
"learning_rate": 6.081788672675845e-06,
"loss": 1.677,
"step": 1176
},
{
"epoch": 1.92,
"learning_rate": 6.049391359411171e-06,
"loss": 1.6549,
"step": 1178
},
{
"epoch": 1.92,
"learning_rate": 6.017043105363241e-06,
"loss": 1.8129,
"step": 1180
},
{
"epoch": 1.93,
"learning_rate": 5.9847443122372525e-06,
"loss": 1.7374,
"step": 1182
},
{
"epoch": 1.93,
"learning_rate": 5.952495381124181e-06,
"loss": 1.6513,
"step": 1184
},
{
"epoch": 1.93,
"learning_rate": 5.920296712495819e-06,
"loss": 1.6845,
"step": 1186
},
{
"epoch": 1.94,
"learning_rate": 5.888148706199782e-06,
"loss": 1.6281,
"step": 1188
},
{
"epoch": 1.94,
"learning_rate": 5.856051761454562e-06,
"loss": 1.7846,
"step": 1190
},
{
"epoch": 1.94,
"learning_rate": 5.8240062768445486e-06,
"loss": 1.7218,
"step": 1192
},
{
"epoch": 1.94,
"learning_rate": 5.792012650315112e-06,
"loss": 1.8823,
"step": 1194
},
{
"epoch": 1.95,
"learning_rate": 5.760071279167629e-06,
"loss": 1.7295,
"step": 1196
},
{
"epoch": 1.95,
"learning_rate": 5.728182560054556e-06,
"loss": 1.8422,
"step": 1198
},
{
"epoch": 1.95,
"learning_rate": 5.6963468889745265e-06,
"loss": 1.8647,
"step": 1200
},
{
"epoch": 1.96,
"learning_rate": 5.6645646612673934e-06,
"loss": 1.9415,
"step": 1202
},
{
"epoch": 1.96,
"learning_rate": 5.6328362716093645e-06,
"loss": 2.1005,
"step": 1204
},
{
"epoch": 1.96,
"learning_rate": 5.60116211400807e-06,
"loss": 1.9825,
"step": 1206
},
{
"epoch": 1.97,
"learning_rate": 5.569542581797667e-06,
"loss": 2.136,
"step": 1208
},
{
"epoch": 1.97,
"learning_rate": 5.537978067633993e-06,
"loss": 2.1115,
"step": 1210
},
{
"epoch": 1.97,
"learning_rate": 5.506468963489631e-06,
"loss": 2.0926,
"step": 1212
},
{
"epoch": 1.98,
"learning_rate": 5.475015660649106e-06,
"loss": 2.0519,
"step": 1214
},
{
"epoch": 1.98,
"learning_rate": 5.443618549703979e-06,
"loss": 1.4451,
"step": 1216
},
{
"epoch": 1.98,
"learning_rate": 5.412278020548003e-06,
"loss": 1.6499,
"step": 1218
},
{
"epoch": 1.99,
"learning_rate": 5.380994462372315e-06,
"loss": 1.7575,
"step": 1220
},
{
"epoch": 1.99,
"learning_rate": 5.349768263660551e-06,
"loss": 1.768,
"step": 1222
},
{
"epoch": 1.99,
"learning_rate": 5.318599812184071e-06,
"loss": 1.9914,
"step": 1224
},
{
"epoch": 2.0,
"learning_rate": 5.287489494997111e-06,
"loss": 2.0054,
"step": 1226
},
{
"epoch": 2.0,
"learning_rate": 5.256437698431986e-06,
"loss": 1.9918,
"step": 1228
},
{
"epoch": 2.0,
"learning_rate": 5.225444808094305e-06,
"loss": 1.6525,
"step": 1230
},
{
"epoch": 2.01,
"learning_rate": 5.1945112088581485e-06,
"loss": 1.5217,
"step": 1232
},
{
"epoch": 2.01,
"learning_rate": 5.1636372848613395e-06,
"loss": 1.5475,
"step": 1234
},
{
"epoch": 2.01,
"learning_rate": 5.132823419500626e-06,
"loss": 1.6077,
"step": 1236
},
{
"epoch": 2.02,
"learning_rate": 5.102069995426946e-06,
"loss": 1.6792,
"step": 1238
},
{
"epoch": 2.02,
"learning_rate": 5.071377394540673e-06,
"loss": 1.5353,
"step": 1240
},
{
"epoch": 2.02,
"learning_rate": 5.040745997986853e-06,
"loss": 1.6658,
"step": 1242
},
{
"epoch": 2.03,
"learning_rate": 5.010176186150515e-06,
"loss": 1.8029,
"step": 1244
},
{
"epoch": 2.03,
"learning_rate": 4.979668338651891e-06,
"loss": 1.5903,
"step": 1246
},
{
"epoch": 2.03,
"learning_rate": 4.9492228343417545e-06,
"loss": 1.6274,
"step": 1248
},
{
"epoch": 2.04,
"learning_rate": 4.918840051296686e-06,
"loss": 1.574,
"step": 1250
},
{
"epoch": 2.04,
"learning_rate": 4.888520366814369e-06,
"loss": 1.5849,
"step": 1252
},
{
"epoch": 2.04,
"learning_rate": 4.858264157408948e-06,
"loss": 1.6375,
"step": 1254
},
{
"epoch": 2.05,
"learning_rate": 4.828071798806294e-06,
"loss": 1.7622,
"step": 1256
},
{
"epoch": 2.05,
"learning_rate": 4.797943665939398e-06,
"loss": 1.623,
"step": 1258
},
{
"epoch": 2.05,
"learning_rate": 4.767880132943671e-06,
"loss": 1.8606,
"step": 1260
},
{
"epoch": 2.06,
"learning_rate": 4.737881573152317e-06,
"loss": 1.882,
"step": 1262
},
{
"epoch": 2.06,
"learning_rate": 4.707948359091694e-06,
"loss": 1.8452,
"step": 1264
},
{
"epoch": 2.06,
"learning_rate": 4.678080862476679e-06,
"loss": 1.909,
"step": 1266
},
{
"epoch": 2.07,
"learning_rate": 4.648279454206079e-06,
"loss": 2.0825,
"step": 1268
},
{
"epoch": 2.07,
"learning_rate": 4.618544504357992e-06,
"loss": 1.9816,
"step": 1270
},
{
"epoch": 2.07,
"learning_rate": 4.588876382185231e-06,
"loss": 2.1092,
"step": 1272
},
{
"epoch": 2.08,
"learning_rate": 4.559275456110736e-06,
"loss": 2.0248,
"step": 1274
},
{
"epoch": 2.08,
"learning_rate": 4.529742093722978e-06,
"loss": 2.1858,
"step": 1276
},
{
"epoch": 2.08,
"learning_rate": 4.5002766617714444e-06,
"loss": 1.8722,
"step": 1278
},
{
"epoch": 2.08,
"learning_rate": 4.470879526162028e-06,
"loss": 1.5387,
"step": 1280
},
{
"epoch": 2.09,
"learning_rate": 4.441551051952518e-06,
"loss": 1.6771,
"step": 1282
},
{
"epoch": 2.09,
"learning_rate": 4.412291603348055e-06,
"loss": 1.6703,
"step": 1284
},
{
"epoch": 2.09,
"learning_rate": 4.383101543696603e-06,
"loss": 1.6778,
"step": 1286
},
{
"epoch": 2.1,
"learning_rate": 4.353981235484461e-06,
"loss": 1.56,
"step": 1288
},
{
"epoch": 2.1,
"learning_rate": 4.324931040331722e-06,
"loss": 1.5857,
"step": 1290
},
{
"epoch": 2.1,
"learning_rate": 4.295951318987826e-06,
"loss": 1.6695,
"step": 1292
},
{
"epoch": 2.11,
"learning_rate": 4.267042431327049e-06,
"loss": 1.6566,
"step": 1294
},
{
"epoch": 2.11,
"learning_rate": 4.238204736344045e-06,
"loss": 1.6524,
"step": 1296
},
{
"epoch": 2.11,
"learning_rate": 4.209438592149392e-06,
"loss": 1.7873,
"step": 1298
},
{
"epoch": 2.12,
"learning_rate": 4.180744355965126e-06,
"loss": 1.7025,
"step": 1300
},
{
"epoch": 2.12,
"learning_rate": 4.152122384120339e-06,
"loss": 1.6507,
"step": 1302
},
{
"epoch": 2.12,
"learning_rate": 4.123573032046723e-06,
"loss": 1.9534,
"step": 1304
},
{
"epoch": 2.13,
"learning_rate": 4.0950966542741675e-06,
"loss": 1.7546,
"step": 1306
},
{
"epoch": 2.13,
"learning_rate": 4.066693604426364e-06,
"loss": 1.7409,
"step": 1308
},
{
"epoch": 2.13,
"learning_rate": 4.038364235216389e-06,
"loss": 1.9443,
"step": 1310
},
{
"epoch": 2.14,
"learning_rate": 4.010108898442368e-06,
"loss": 1.764,
"step": 1312
},
{
"epoch": 2.14,
"learning_rate": 3.981927944983063e-06,
"loss": 1.8742,
"step": 1314
},
{
"epoch": 2.14,
"learning_rate": 3.953821724793539e-06,
"loss": 1.9115,
"step": 1316
},
{
"epoch": 2.15,
"learning_rate": 3.92579058690082e-06,
"loss": 1.9487,
"step": 1318
},
{
"epoch": 2.15,
"learning_rate": 3.897834879399526e-06,
"loss": 2.1663,
"step": 1320
},
{
"epoch": 2.15,
"learning_rate": 3.869954949447596e-06,
"loss": 2.0637,
"step": 1322
},
{
"epoch": 2.16,
"learning_rate": 3.842151143261943e-06,
"loss": 2.1487,
"step": 1324
},
{
"epoch": 2.16,
"learning_rate": 3.8144238061141593e-06,
"loss": 2.0735,
"step": 1326
},
{
"epoch": 2.16,
"learning_rate": 3.7867732823262417e-06,
"loss": 1.9353,
"step": 1328
},
{
"epoch": 2.17,
"learning_rate": 3.7591999152663027e-06,
"loss": 1.5337,
"step": 1330
},
{
"epoch": 2.17,
"learning_rate": 3.7317040473443157e-06,
"loss": 1.6132,
"step": 1332
},
{
"epoch": 2.17,
"learning_rate": 3.7042860200078545e-06,
"loss": 1.4843,
"step": 1334
},
{
"epoch": 2.18,
"learning_rate": 3.6769461737378597e-06,
"loss": 1.5181,
"step": 1336
},
{
"epoch": 2.18,
"learning_rate": 3.6496848480444104e-06,
"loss": 1.7481,
"step": 1338
},
{
"epoch": 2.18,
"learning_rate": 3.6225023814625025e-06,
"loss": 1.7613,
"step": 1340
},
{
"epoch": 2.19,
"learning_rate": 3.5953991115478526e-06,
"loss": 1.6056,
"step": 1342
},
{
"epoch": 2.19,
"learning_rate": 3.568375374872691e-06,
"loss": 1.6225,
"step": 1344
},
{
"epoch": 2.19,
"learning_rate": 3.54143150702161e-06,
"loss": 1.6991,
"step": 1346
},
{
"epoch": 2.2,
"learning_rate": 3.5145678425873663e-06,
"loss": 1.6515,
"step": 1348
},
{
"epoch": 2.2,
"learning_rate": 3.4877847151667452e-06,
"loss": 1.6193,
"step": 1350
},
{
"epoch": 2.2,
"learning_rate": 3.461082457356414e-06,
"loss": 1.591,
"step": 1352
},
{
"epoch": 2.21,
"learning_rate": 3.4344614007487742e-06,
"loss": 1.708,
"step": 1354
},
{
"epoch": 2.21,
"learning_rate": 3.407921875927883e-06,
"loss": 1.7268,
"step": 1356
},
{
"epoch": 2.21,
"learning_rate": 3.3814642124653086e-06,
"loss": 1.9632,
"step": 1358
},
{
"epoch": 2.22,
"learning_rate": 3.355088738916056e-06,
"loss": 1.7395,
"step": 1360
},
{
"epoch": 2.22,
"learning_rate": 3.328795782814487e-06,
"loss": 1.8749,
"step": 1362
},
{
"epoch": 2.22,
"learning_rate": 3.3025856706702477e-06,
"loss": 1.802,
"step": 1364
},
{
"epoch": 2.22,
"learning_rate": 3.276458727964218e-06,
"loss": 2.1502,
"step": 1366
},
{
"epoch": 2.23,
"learning_rate": 3.2504152791444656e-06,
"loss": 1.9775,
"step": 1368
},
{
"epoch": 2.23,
"learning_rate": 3.224455647622221e-06,
"loss": 2.1675,
"step": 1370
},
{
"epoch": 2.23,
"learning_rate": 3.1985801557678606e-06,
"loss": 2.0203,
"step": 1372
},
{
"epoch": 2.24,
"learning_rate": 3.1727891249069e-06,
"loss": 2.0424,
"step": 1374
},
{
"epoch": 2.24,
"learning_rate": 3.147082875316008e-06,
"loss": 2.1101,
"step": 1376
},
{
"epoch": 2.24,
"learning_rate": 3.121461726219026e-06,
"loss": 1.9402,
"step": 1378
},
{
"epoch": 2.25,
"learning_rate": 3.095925995783008e-06,
"loss": 1.5451,
"step": 1380
},
{
"epoch": 2.25,
"learning_rate": 3.0704760011142653e-06,
"loss": 1.5736,
"step": 1382
},
{
"epoch": 2.25,
"learning_rate": 3.0451120582544303e-06,
"loss": 1.5506,
"step": 1384
},
{
"epoch": 2.26,
"learning_rate": 3.0198344821765346e-06,
"loss": 1.5911,
"step": 1386
},
{
"epoch": 2.26,
"learning_rate": 2.994643586781083e-06,
"loss": 1.6752,
"step": 1388
},
{
"epoch": 2.26,
"learning_rate": 2.9695396848921875e-06,
"loss": 1.6838,
"step": 1390
},
{
"epoch": 2.27,
"learning_rate": 2.944523088253648e-06,
"loss": 1.7638,
"step": 1392
},
{
"epoch": 2.27,
"learning_rate": 2.9195941075250976e-06,
"loss": 1.836,
"step": 1394
},
{
"epoch": 2.27,
"learning_rate": 2.8947530522781455e-06,
"loss": 1.6376,
"step": 1396
},
{
"epoch": 2.28,
"learning_rate": 2.870000230992528e-06,
"loss": 1.7293,
"step": 1398
},
{
"epoch": 2.28,
"learning_rate": 2.8453359510522803e-06,
"loss": 1.9062,
"step": 1400
},
{
"epoch": 2.28,
"learning_rate": 2.8207605187419194e-06,
"loss": 1.8306,
"step": 1402
},
{
"epoch": 2.29,
"learning_rate": 2.7962742392426377e-06,
"loss": 1.7754,
"step": 1404
},
{
"epoch": 2.29,
"learning_rate": 2.7718774166285157e-06,
"loss": 1.8018,
"step": 1406
},
{
"epoch": 2.29,
"learning_rate": 2.7475703538627485e-06,
"loss": 1.7589,
"step": 1408
},
{
"epoch": 2.3,
"learning_rate": 2.723353352793878e-06,
"loss": 1.7515,
"step": 1410
},
{
"epoch": 2.3,
"learning_rate": 2.6992267141520467e-06,
"loss": 2.0015,
"step": 1412
},
{
"epoch": 2.3,
"learning_rate": 2.675190737545267e-06,
"loss": 2.0211,
"step": 1414
},
{
"epoch": 2.31,
"learning_rate": 2.6512457214556943e-06,
"loss": 1.9215,
"step": 1416
},
{
"epoch": 2.31,
"learning_rate": 2.6273919632359247e-06,
"loss": 1.9639,
"step": 1418
},
{
"epoch": 2.31,
"learning_rate": 2.6036297591052993e-06,
"loss": 2.0678,
"step": 1420
},
{
"epoch": 2.32,
"learning_rate": 2.5799594041462318e-06,
"loss": 2.1676,
"step": 1422
},
{
"epoch": 2.32,
"learning_rate": 2.5563811923005355e-06,
"loss": 2.0718,
"step": 1424
},
{
"epoch": 2.32,
"learning_rate": 2.532895416365779e-06,
"loss": 2.0191,
"step": 1426
},
{
"epoch": 2.33,
"learning_rate": 2.5095023679916508e-06,
"loss": 1.6374,
"step": 1428
},
{
"epoch": 2.33,
"learning_rate": 2.4862023376763324e-06,
"loss": 1.4945,
"step": 1430
},
{
"epoch": 2.33,
"learning_rate": 2.462995614762894e-06,
"loss": 1.5496,
"step": 1432
},
{
"epoch": 2.34,
"learning_rate": 2.4398824874357042e-06,
"loss": 1.6265,
"step": 1434
},
{
"epoch": 2.34,
"learning_rate": 2.416863242716845e-06,
"loss": 1.6466,
"step": 1436
},
{
"epoch": 2.34,
"learning_rate": 2.393938166462554e-06,
"loss": 1.6862,
"step": 1438
},
{
"epoch": 2.35,
"learning_rate": 2.3711075433596675e-06,
"loss": 1.5553,
"step": 1440
},
{
"epoch": 2.35,
"learning_rate": 2.348371656922095e-06,
"loss": 1.6354,
"step": 1442
},
{
"epoch": 2.35,
"learning_rate": 2.3257307894872873e-06,
"loss": 1.7651,
"step": 1444
},
{
"epoch": 2.36,
"learning_rate": 2.303185222212737e-06,
"loss": 1.7357,
"step": 1446
},
{
"epoch": 2.36,
"learning_rate": 2.2807352350724874e-06,
"loss": 1.7449,
"step": 1448
},
{
"epoch": 2.36,
"learning_rate": 2.2583811068536556e-06,
"loss": 1.6902,
"step": 1450
},
{
"epoch": 2.37,
"learning_rate": 2.236123115152963e-06,
"loss": 1.7208,
"step": 1452
},
{
"epoch": 2.37,
"learning_rate": 2.2139615363733015e-06,
"loss": 1.86,
"step": 1454
},
{
"epoch": 2.37,
"learning_rate": 2.1918966457202873e-06,
"loss": 1.7434,
"step": 1456
},
{
"epoch": 2.37,
"learning_rate": 2.1699287171988546e-06,
"loss": 1.8137,
"step": 1458
},
{
"epoch": 2.38,
"learning_rate": 2.1480580236098448e-06,
"loss": 1.9533,
"step": 1460
},
{
"epoch": 2.38,
"learning_rate": 2.126284836546626e-06,
"loss": 1.981,
"step": 1462
},
{
"epoch": 2.38,
"learning_rate": 2.1046094263917115e-06,
"loss": 1.9583,
"step": 1464
},
{
"epoch": 2.39,
"learning_rate": 2.0830320623134105e-06,
"loss": 1.9106,
"step": 1466
},
{
"epoch": 2.39,
"learning_rate": 2.0615530122624816e-06,
"loss": 1.945,
"step": 1468
},
{
"epoch": 2.39,
"learning_rate": 2.040172542968806e-06,
"loss": 2.1462,
"step": 1470
},
{
"epoch": 2.4,
"learning_rate": 2.0188909199380734e-06,
"loss": 2.1872,
"step": 1472
},
{
"epoch": 2.4,
"learning_rate": 1.9977084074484888e-06,
"loss": 1.9743,
"step": 1474
},
{
"epoch": 2.4,
"learning_rate": 1.9766252685474887e-06,
"loss": 2.101,
"step": 1476
},
{
"epoch": 2.41,
"learning_rate": 1.9556417650484716e-06,
"loss": 1.8562,
"step": 1478
},
{
"epoch": 2.41,
"learning_rate": 1.934758157527552e-06,
"loss": 1.5153,
"step": 1480
},
{
"epoch": 2.41,
"learning_rate": 1.913974705320322e-06,
"loss": 1.6029,
"step": 1482
},
{
"epoch": 2.42,
"learning_rate": 1.8932916665186275e-06,
"loss": 1.6023,
"step": 1484
},
{
"epoch": 2.42,
"learning_rate": 1.8727092979673722e-06,
"loss": 1.5499,
"step": 1486
},
{
"epoch": 2.42,
"learning_rate": 1.852227855261317e-06,
"loss": 1.7338,
"step": 1488
},
{
"epoch": 2.43,
"learning_rate": 1.8318475927419143e-06,
"loss": 1.6786,
"step": 1490
},
{
"epoch": 2.43,
"learning_rate": 1.811568763494147e-06,
"loss": 1.7163,
"step": 1492
},
{
"epoch": 2.43,
"learning_rate": 1.7913916193433822e-06,
"loss": 1.7118,
"step": 1494
},
{
"epoch": 2.44,
"learning_rate": 1.7713164108522518e-06,
"loss": 1.764,
"step": 1496
},
{
"epoch": 2.44,
"learning_rate": 1.7513433873175313e-06,
"loss": 1.5847,
"step": 1498
},
{
"epoch": 2.44,
"learning_rate": 1.731472796767053e-06,
"loss": 1.7989,
"step": 1500
},
{
"epoch": 2.45,
"learning_rate": 1.7117048859566188e-06,
"loss": 1.647,
"step": 1502
},
{
"epoch": 2.45,
"learning_rate": 1.6920399003669418e-06,
"loss": 1.8616,
"step": 1504
},
{
"epoch": 2.45,
"learning_rate": 1.6724780842005928e-06,
"loss": 1.7615,
"step": 1506
},
{
"epoch": 2.46,
"learning_rate": 1.6530196803789711e-06,
"loss": 1.8888,
"step": 1508
},
{
"epoch": 2.46,
"learning_rate": 1.6336649305392883e-06,
"loss": 1.7978,
"step": 1510
},
{
"epoch": 2.46,
"learning_rate": 1.614414075031564e-06,
"loss": 1.9045,
"step": 1512
},
{
"epoch": 2.47,
"learning_rate": 1.5952673529156426e-06,
"loss": 1.9761,
"step": 1514
},
{
"epoch": 2.47,
"learning_rate": 1.5762250019582293e-06,
"loss": 2.0572,
"step": 1516
},
{
"epoch": 2.47,
"learning_rate": 1.5572872586299304e-06,
"loss": 2.0228,
"step": 1518
},
{
"epoch": 2.48,
"learning_rate": 1.5384543581023192e-06,
"loss": 2.1219,
"step": 1520
},
{
"epoch": 2.48,
"learning_rate": 1.5197265342450184e-06,
"loss": 2.0404,
"step": 1522
},
{
"epoch": 2.48,
"learning_rate": 1.5011040196227932e-06,
"loss": 2.0621,
"step": 1524
},
{
"epoch": 2.49,
"learning_rate": 1.482587045492664e-06,
"loss": 1.9169,
"step": 1526
},
{
"epoch": 2.49,
"learning_rate": 1.464175841801032e-06,
"loss": 1.7185,
"step": 1528
},
{
"epoch": 2.49,
"learning_rate": 1.4458706371808306e-06,
"loss": 1.6368,
"step": 1530
},
{
"epoch": 2.5,
"learning_rate": 1.4276716589486784e-06,
"loss": 1.5826,
"step": 1532
},
{
"epoch": 2.5,
"learning_rate": 1.4095791331020592e-06,
"loss": 1.5946,
"step": 1534
},
{
"epoch": 2.5,
"learning_rate": 1.3915932843165192e-06,
"loss": 1.7053,
"step": 1536
},
{
"epoch": 2.51,
"learning_rate": 1.3737143359428706e-06,
"loss": 1.6754,
"step": 1538
},
{
"epoch": 2.51,
"learning_rate": 1.3559425100044233e-06,
"loss": 1.6797,
"step": 1540
},
{
"epoch": 2.51,
"learning_rate": 1.3382780271942253e-06,
"loss": 1.788,
"step": 1542
},
{
"epoch": 2.51,
"learning_rate": 1.3207211068723203e-06,
"loss": 1.6312,
"step": 1544
},
{
"epoch": 2.52,
"learning_rate": 1.30327196706303e-06,
"loss": 1.8689,
"step": 1546
},
{
"epoch": 2.52,
"learning_rate": 1.2859308244522384e-06,
"loss": 1.7198,
"step": 1548
},
{
"epoch": 2.52,
"learning_rate": 1.268697894384706e-06,
"loss": 1.63,
"step": 1550
},
{
"epoch": 2.53,
"learning_rate": 1.251573390861397e-06,
"loss": 1.6771,
"step": 1552
},
{
"epoch": 2.53,
"learning_rate": 1.234557526536817e-06,
"loss": 1.7827,
"step": 1554
},
{
"epoch": 2.53,
"learning_rate": 1.2176505127163752e-06,
"loss": 1.8489,
"step": 1556
},
{
"epoch": 2.54,
"learning_rate": 1.2008525593537601e-06,
"loss": 1.7908,
"step": 1558
},
{
"epoch": 2.54,
"learning_rate": 1.1841638750483308e-06,
"loss": 1.9689,
"step": 1560
},
{
"epoch": 2.54,
"learning_rate": 1.1675846670425317e-06,
"loss": 1.966,
"step": 1562
},
{
"epoch": 2.55,
"learning_rate": 1.151115141219309e-06,
"loss": 1.8851,
"step": 1564
},
{
"epoch": 2.55,
"learning_rate": 1.134755502099566e-06,
"loss": 1.8687,
"step": 1566
},
{
"epoch": 2.55,
"learning_rate": 1.1185059528396125e-06,
"loss": 1.9745,
"step": 1568
},
{
"epoch": 2.56,
"learning_rate": 1.1023666952286504e-06,
"loss": 2.0723,
"step": 1570
},
{
"epoch": 2.56,
"learning_rate": 1.086337929686261e-06,
"loss": 2.0465,
"step": 1572
},
{
"epoch": 2.56,
"learning_rate": 1.0704198552599265e-06,
"loss": 2.1794,
"step": 1574
},
{
"epoch": 2.57,
"learning_rate": 1.0546126696225412e-06,
"loss": 2.138,
"step": 1576
},
{
"epoch": 2.57,
"learning_rate": 1.038916569069972e-06,
"loss": 2.0238,
"step": 1578
},
{
"epoch": 2.57,
"learning_rate": 1.0233317485186167e-06,
"loss": 1.7425,
"step": 1580
},
{
"epoch": 2.58,
"learning_rate": 1.007858401502979e-06,
"loss": 1.434,
"step": 1582
},
{
"epoch": 2.58,
"learning_rate": 9.924967201732726e-07,
"loss": 1.625,
"step": 1584
},
{
"epoch": 2.58,
"learning_rate": 9.772468952930259e-07,
"loss": 1.5343,
"step": 1586
},
{
"epoch": 2.59,
"learning_rate": 9.621091162367225e-07,
"loss": 1.7319,
"step": 1588
},
{
"epoch": 2.59,
"learning_rate": 9.470835709874437e-07,
"loss": 1.808,
"step": 1590
},
{
"epoch": 2.59,
"learning_rate": 9.321704461345326e-07,
"loss": 1.657,
"step": 1592
},
{
"epoch": 2.6,
"learning_rate": 9.173699268712855e-07,
"loss": 1.7956,
"step": 1594
},
{
"epoch": 2.6,
"learning_rate": 9.026821969926414e-07,
"loss": 1.6254,
"step": 1596
},
{
"epoch": 2.6,
"learning_rate": 8.881074388929078e-07,
"loss": 1.6897,
"step": 1598
},
{
"epoch": 2.61,
"learning_rate": 8.736458335634912e-07,
"loss": 1.7652,
"step": 1600
},
{
"epoch": 2.61,
"learning_rate": 8.592975605906528e-07,
"loss": 1.6281,
"step": 1602
},
{
"epoch": 2.61,
"learning_rate": 8.450627981532733e-07,
"loss": 1.6301,
"step": 1604
},
{
"epoch": 2.62,
"learning_rate": 8.309417230206462e-07,
"loss": 1.866,
"step": 1606
},
{
"epoch": 2.62,
"learning_rate": 8.169345105502824e-07,
"loss": 1.8391,
"step": 1608
},
{
"epoch": 2.62,
"learning_rate": 8.030413346857236e-07,
"loss": 1.7652,
"step": 1610
},
{
"epoch": 2.63,
"learning_rate": 7.892623679543932e-07,
"loss": 1.8361,
"step": 1612
},
{
"epoch": 2.63,
"learning_rate": 7.75597781465448e-07,
"loss": 2.0671,
"step": 1614
},
{
"epoch": 2.63,
"learning_rate": 7.620477449076546e-07,
"loss": 1.9406,
"step": 1616
},
{
"epoch": 2.64,
"learning_rate": 7.486124265472871e-07,
"loss": 2.0041,
"step": 1618
},
{
"epoch": 2.64,
"learning_rate": 7.35291993226025e-07,
"loss": 1.9721,
"step": 1620
},
{
"epoch": 2.64,
"learning_rate": 7.22086610358893e-07,
"loss": 2.0757,
"step": 1622
},
{
"epoch": 2.65,
"learning_rate": 7.089964419322049e-07,
"loss": 2.1608,
"step": 1624
},
{
"epoch": 2.65,
"learning_rate": 6.960216505015216e-07,
"loss": 2.1365,
"step": 1626
},
{
"epoch": 2.65,
"learning_rate": 6.831623971896384e-07,
"loss": 1.8488,
"step": 1628
},
{
"epoch": 2.65,
"learning_rate": 6.704188416845814e-07,
"loss": 1.5193,
"step": 1630
},
{
"epoch": 2.66,
"learning_rate": 6.577911422376238e-07,
"loss": 1.5308,
"step": 1632
},
{
"epoch": 2.66,
"learning_rate": 6.452794556613251e-07,
"loss": 1.5771,
"step": 1634
},
{
"epoch": 2.66,
"learning_rate": 6.328839373275775e-07,
"loss": 1.5246,
"step": 1636
},
{
"epoch": 2.67,
"learning_rate": 6.20604741165679e-07,
"loss": 1.5939,
"step": 1638
},
{
"epoch": 2.67,
"learning_rate": 6.084420196604246e-07,
"loss": 1.635,
"step": 1640
},
{
"epoch": 2.67,
"learning_rate": 5.963959238502137e-07,
"loss": 1.5927,
"step": 1642
},
{
"epoch": 2.68,
"learning_rate": 5.844666033251634e-07,
"loss": 1.7587,
"step": 1644
},
{
"epoch": 2.68,
"learning_rate": 5.726542062252626e-07,
"loss": 1.7935,
"step": 1646
},
{
"epoch": 2.68,
"learning_rate": 5.609588792385301e-07,
"loss": 1.7692,
"step": 1648
},
{
"epoch": 2.69,
"learning_rate": 5.493807675991891e-07,
"loss": 1.778,
"step": 1650
},
{
"epoch": 2.69,
"learning_rate": 5.37920015085871e-07,
"loss": 1.6862,
"step": 1652
},
{
"epoch": 2.69,
"learning_rate": 5.265767640198171e-07,
"loss": 1.8197,
"step": 1654
},
{
"epoch": 2.7,
"learning_rate": 5.15351155263124e-07,
"loss": 1.8365,
"step": 1656
},
{
"epoch": 2.7,
"learning_rate": 5.042433282169901e-07,
"loss": 1.7107,
"step": 1658
},
{
"epoch": 2.7,
"learning_rate": 4.93253420819979e-07,
"loss": 1.8031,
"step": 1660
},
{
"epoch": 2.71,
"learning_rate": 4.823815695463208e-07,
"loss": 1.865,
"step": 1662
},
{
"epoch": 2.71,
"learning_rate": 4.7162790940419643e-07,
"loss": 1.9976,
"step": 1664
},
{
"epoch": 2.71,
"learning_rate": 4.6099257393407926e-07,
"loss": 1.9547,
"step": 1666
},
{
"epoch": 2.72,
"learning_rate": 4.50475695207071e-07,
"loss": 2.1711,
"step": 1668
},
{
"epoch": 2.72,
"learning_rate": 4.400774038232569e-07,
"loss": 1.9739,
"step": 1670
},
{
"epoch": 2.72,
"learning_rate": 4.297978289100924e-07,
"loss": 2.0658,
"step": 1672
},
{
"epoch": 2.73,
"learning_rate": 4.1963709812078756e-07,
"loss": 2.1135,
"step": 1674
},
{
"epoch": 2.73,
"learning_rate": 4.0959533763274106e-07,
"loss": 1.9309,
"step": 1676
},
{
"epoch": 2.73,
"learning_rate": 3.9967267214594874e-07,
"loss": 1.8501,
"step": 1678
},
{
"epoch": 2.74,
"learning_rate": 3.8986922488147614e-07,
"loss": 1.4997,
"step": 1680
},
{
"epoch": 2.74,
"learning_rate": 3.801851175799176e-07,
"loss": 1.7059,
"step": 1682
},
{
"epoch": 2.74,
"learning_rate": 3.7062047049988505e-07,
"loss": 1.6233,
"step": 1684
},
{
"epoch": 2.75,
"learning_rate": 3.611754024165226e-07,
"loss": 1.5627,
"step": 1686
},
{
"epoch": 2.75,
"learning_rate": 3.5185003062001676e-07,
"loss": 1.5997,
"step": 1688
},
{
"epoch": 2.75,
"learning_rate": 3.426444709141563e-07,
"loss": 1.7561,
"step": 1690
},
{
"epoch": 2.76,
"learning_rate": 3.335588376148824e-07,
"loss": 1.6633,
"step": 1692
},
{
"epoch": 2.76,
"learning_rate": 3.24593243548873e-07,
"loss": 1.6728,
"step": 1694
},
{
"epoch": 2.76,
"learning_rate": 3.1574780005214633e-07,
"loss": 1.6645,
"step": 1696
},
{
"epoch": 2.77,
"learning_rate": 3.070226169686663e-07,
"loss": 1.7514,
"step": 1698
},
{
"epoch": 2.77,
"learning_rate": 2.984178026489937e-07,
"loss": 1.6465,
"step": 1700
},
{
"epoch": 2.77,
"learning_rate": 2.8993346394892706e-07,
"loss": 1.7203,
"step": 1702
},
{
"epoch": 2.78,
"learning_rate": 2.8156970622818416e-07,
"loss": 1.6954,
"step": 1704
},
{
"epoch": 2.78,
"learning_rate": 2.733266333490947e-07,
"loss": 1.6774,
"step": 1706
},
{
"epoch": 2.78,
"learning_rate": 2.6520434767529856e-07,
"loss": 1.8674,
"step": 1708
},
{
"epoch": 2.79,
"learning_rate": 2.57202950070492e-07,
"loss": 1.903,
"step": 1710
},
{
"epoch": 2.79,
"learning_rate": 2.4932253989715883e-07,
"loss": 1.9868,
"step": 1712
},
{
"epoch": 2.79,
"learning_rate": 2.415632150153469e-07,
"loss": 1.9943,
"step": 1714
},
{
"epoch": 2.8,
"learning_rate": 2.3392507178145229e-07,
"loss": 1.8895,
"step": 1716
},
{
"epoch": 2.8,
"learning_rate": 2.264082050470151e-07,
"loss": 2.0532,
"step": 1718
},
{
"epoch": 2.8,
"learning_rate": 2.1901270815755215e-07,
"loss": 2.0355,
"step": 1720
},
{
"epoch": 2.8,
"learning_rate": 2.1173867295138816e-07,
"loss": 2.0338,
"step": 1722
},
{
"epoch": 2.81,
"learning_rate": 2.0458618975851996e-07,
"loss": 2.0172,
"step": 1724
},
{
"epoch": 2.81,
"learning_rate": 1.9755534739949733e-07,
"loss": 1.9194,
"step": 1726
},
{
"epoch": 2.81,
"learning_rate": 1.9064623318431175e-07,
"loss": 1.9948,
"step": 1728
},
{
"epoch": 2.82,
"learning_rate": 1.8385893291132494e-07,
"loss": 1.7778,
"step": 1730
},
{
"epoch": 2.82,
"learning_rate": 1.7719353086618763e-07,
"loss": 1.4658,
"step": 1732
},
{
"epoch": 2.82,
"learning_rate": 1.7065010982080688e-07,
"loss": 1.6131,
"step": 1734
},
{
"epoch": 2.83,
"learning_rate": 1.6422875103230928e-07,
"loss": 1.5365,
"step": 1736
},
{
"epoch": 2.83,
"learning_rate": 1.5792953424203727e-07,
"loss": 1.6563,
"step": 1738
},
{
"epoch": 2.83,
"learning_rate": 1.5175253767455656e-07,
"loss": 1.6242,
"step": 1740
},
{
"epoch": 2.84,
"learning_rate": 1.4569783803668135e-07,
"loss": 1.5964,
"step": 1742
},
{
"epoch": 2.84,
"learning_rate": 1.3976551051653187e-07,
"loss": 1.5942,
"step": 1744
},
{
"epoch": 2.84,
"learning_rate": 1.3395562878258606e-07,
"loss": 1.5532,
"step": 1746
},
{
"epoch": 2.85,
"learning_rate": 1.2826826498277823e-07,
"loss": 1.6633,
"step": 1748
},
{
"epoch": 2.85,
"learning_rate": 1.227034897435986e-07,
"loss": 1.7018,
"step": 1750
},
{
"epoch": 2.85,
"learning_rate": 1.172613721692084e-07,
"loss": 1.6879,
"step": 1752
},
{
"epoch": 2.86,
"learning_rate": 1.1194197984059852e-07,
"loss": 1.7878,
"step": 1754
},
{
"epoch": 2.86,
"learning_rate": 1.0674537881473102e-07,
"loss": 1.8444,
"step": 1756
},
{
"epoch": 2.86,
"learning_rate": 1.0167163362373333e-07,
"loss": 1.8548,
"step": 1758
},
{
"epoch": 2.87,
"learning_rate": 9.672080727409217e-08,
"loss": 1.7185,
"step": 1760
},
{
"epoch": 2.87,
"learning_rate": 9.189296124586744e-08,
"loss": 2.0053,
"step": 1762
},
{
"epoch": 2.87,
"learning_rate": 8.718815549193516e-08,
"loss": 1.9904,
"step": 1764
},
{
"epoch": 2.88,
"learning_rate": 8.260644843723686e-08,
"loss": 1.9012,
"step": 1766
},
{
"epoch": 2.88,
"learning_rate": 7.814789697806025e-08,
"loss": 2.1227,
"step": 1768
},
{
"epoch": 2.88,
"learning_rate": 7.381255648132746e-08,
"loss": 2.1608,
"step": 1770
},
{
"epoch": 2.89,
"learning_rate": 6.960048078390902e-08,
"loss": 2.1626,
"step": 1772
},
{
"epoch": 2.89,
"learning_rate": 6.551172219195767e-08,
"loss": 2.1162,
"step": 1774
},
{
"epoch": 2.89,
"learning_rate": 6.154633148025446e-08,
"loss": 2.094,
"step": 1776
},
{
"epoch": 2.9,
"learning_rate": 5.770435789158369e-08,
"loss": 1.8872,
"step": 1778
},
{
"epoch": 2.9,
"learning_rate": 5.398584913611449e-08,
"loss": 1.6439,
"step": 1780
},
{
"epoch": 2.9,
"learning_rate": 5.039085139081579e-08,
"loss": 1.484,
"step": 1782
},
{
"epoch": 2.91,
"learning_rate": 4.691940929887895e-08,
"loss": 1.5765,
"step": 1784
},
{
"epoch": 2.91,
"learning_rate": 4.357156596916046e-08,
"loss": 1.5862,
"step": 1786
},
{
"epoch": 2.91,
"learning_rate": 4.0347362975654556e-08,
"loss": 1.5702,
"step": 1788
},
{
"epoch": 2.92,
"learning_rate": 3.724684035696813e-08,
"loss": 1.6757,
"step": 1790
},
{
"epoch": 2.92,
"learning_rate": 3.427003661582995e-08,
"loss": 1.7416,
"step": 1792
},
{
"epoch": 2.92,
"learning_rate": 3.1416988718609986e-08,
"loss": 1.7207,
"step": 1794
},
{
"epoch": 2.93,
"learning_rate": 2.8687732094859755e-08,
"loss": 1.6749,
"step": 1796
},
{
"epoch": 2.93,
"learning_rate": 2.6082300636876e-08,
"loss": 1.6948,
"step": 1798
},
{
"epoch": 2.93,
"learning_rate": 2.360072669927327e-08,
"loss": 1.6434,
"step": 1800
},
{
"epoch": 2.94,
"learning_rate": 2.1243041098587548e-08,
"loss": 1.7158,
"step": 1802
},
{
"epoch": 2.94,
"learning_rate": 1.9009273112893245e-08,
"loss": 1.6931,
"step": 1804
},
{
"epoch": 2.94,
"learning_rate": 1.689945048143571e-08,
"loss": 1.7745,
"step": 1806
},
{
"epoch": 2.94,
"learning_rate": 1.491359940429149e-08,
"loss": 1.8563,
"step": 1808
},
{
"epoch": 2.95,
"learning_rate": 1.3051744542039724e-08,
"loss": 1.8418,
"step": 1810
},
{
"epoch": 2.95,
"learning_rate": 1.1313909015457925e-08,
"loss": 1.8236,
"step": 1812
},
{
"epoch": 2.95,
"learning_rate": 9.70011440523222e-09,
"loss": 1.7733,
"step": 1814
},
{
"epoch": 2.96,
"learning_rate": 8.210380751694225e-09,
"loss": 1.9181,
"step": 1816
},
{
"epoch": 2.96,
"learning_rate": 6.844726554565695e-09,
"loss": 2.1189,
"step": 1818
},
{
"epoch": 2.96,
"learning_rate": 5.603168772733147e-09,
"loss": 1.9684,
"step": 1820
},
{
"epoch": 2.97,
"learning_rate": 4.4857228240380305e-09,
"loss": 2.0925,
"step": 1822
},
{
"epoch": 2.97,
"learning_rate": 3.4924025850813225e-09,
"loss": 1.8737,
"step": 1824
},
{
"epoch": 2.97,
"learning_rate": 2.6232203910547817e-09,
"loss": 2.1593,
"step": 1826
},
{
"epoch": 2.98,
"learning_rate": 1.87818703558551e-09,
"loss": 1.9364,
"step": 1828
},
{
"epoch": 2.98,
"learning_rate": 1.2573117706027317e-09,
"loss": 1.5308,
"step": 1830
},
{
"epoch": 2.98,
"learning_rate": 7.606023062234346e-10,
"loss": 1.6164,
"step": 1832
},
{
"epoch": 2.99,
"learning_rate": 3.880648106557861e-10,
"loss": 1.739,
"step": 1834
},
{
"epoch": 2.99,
"learning_rate": 1.3970391012363415e-10,
"loss": 1.7537,
"step": 1836
},
{
"epoch": 2.99,
"learning_rate": 1.552268880766761e-11,
"loss": 1.8015,
"step": 1838
}
],
"logging_steps": 2,
"max_steps": 1839,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 6.817382226043699e+16,
"trial_name": null,
"trial_params": null
}