|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 16.402405686167306, |
|
"global_step": 120000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1294, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.999658259859204e-05, |
|
"loss": 3.1422, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.999316519718406e-05, |
|
"loss": 3.1479, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.998974779577609e-05, |
|
"loss": 3.1412, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.998633039436813e-05, |
|
"loss": 3.1387, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.9982912992960154e-05, |
|
"loss": 3.1344, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.997949559155218e-05, |
|
"loss": 3.1353, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.9976078190144217e-05, |
|
"loss": 3.1283, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.9972660788736244e-05, |
|
"loss": 3.1291, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.996924338732828e-05, |
|
"loss": 3.1226, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.9965825985920306e-05, |
|
"loss": 3.1173, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.996240858451234e-05, |
|
"loss": 3.1126, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.995899118310437e-05, |
|
"loss": 3.1117, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.99555737816964e-05, |
|
"loss": 3.1037, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.995215638028843e-05, |
|
"loss": 3.0981, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.994873897888046e-05, |
|
"loss": 3.0682, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.994532157747249e-05, |
|
"loss": 3.0697, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.994190417606453e-05, |
|
"loss": 3.0673, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.993848677465655e-05, |
|
"loss": 3.0703, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.993506937324858e-05, |
|
"loss": 3.0624, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.993165197184062e-05, |
|
"loss": 3.0618, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.9928234570432645e-05, |
|
"loss": 3.0612, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.992481716902467e-05, |
|
"loss": 3.0651, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.992139976761671e-05, |
|
"loss": 3.0555, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.9917982366208735e-05, |
|
"loss": 3.0609, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.991456496480077e-05, |
|
"loss": 3.0564, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.99111475633928e-05, |
|
"loss": 3.0581, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.990773016198483e-05, |
|
"loss": 3.054, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.990431276057686e-05, |
|
"loss": 3.0559, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.990089535916889e-05, |
|
"loss": 3.0273, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.989747795776092e-05, |
|
"loss": 3.0221, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.989406055635295e-05, |
|
"loss": 3.0218, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.989064315494498e-05, |
|
"loss": 3.0245, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.988722575353702e-05, |
|
"loss": 3.0237, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.988380835212904e-05, |
|
"loss": 3.0224, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.988039095072107e-05, |
|
"loss": 3.0183, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.987697354931311e-05, |
|
"loss": 3.0113, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.9873556147905135e-05, |
|
"loss": 3.0211, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 4.987013874649716e-05, |
|
"loss": 3.0253, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.98667213450892e-05, |
|
"loss": 3.0143, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.9863303943681225e-05, |
|
"loss": 3.0192, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.985988654227326e-05, |
|
"loss": 3.012, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.985646914086529e-05, |
|
"loss": 3.016, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.985305173945732e-05, |
|
"loss": 3.0087, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 4.984963433804935e-05, |
|
"loss": 2.9676, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.9846216936641384e-05, |
|
"loss": 2.9644, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 4.984279953523341e-05, |
|
"loss": 2.9714, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 4.983938213382544e-05, |
|
"loss": 2.9816, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 4.9835964732417474e-05, |
|
"loss": 2.9759, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 4.983254733100951e-05, |
|
"loss": 2.977, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 4.982912992960153e-05, |
|
"loss": 2.9841, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.982571252819356e-05, |
|
"loss": 2.9731, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.98222951267856e-05, |
|
"loss": 2.9716, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 4.9818877725377625e-05, |
|
"loss": 2.9762, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 4.981546032396965e-05, |
|
"loss": 2.9774, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.981204292256169e-05, |
|
"loss": 2.9743, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.9808625521153715e-05, |
|
"loss": 2.9737, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 4.980520811974575e-05, |
|
"loss": 2.9701, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.980179071833778e-05, |
|
"loss": 2.9589, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.9798373316929805e-05, |
|
"loss": 2.926, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 4.979495591552184e-05, |
|
"loss": 2.9327, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 4.9791538514113874e-05, |
|
"loss": 2.9401, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 4.97881211127059e-05, |
|
"loss": 2.9345, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 4.978470371129793e-05, |
|
"loss": 2.9348, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 4.9781286309889964e-05, |
|
"loss": 2.9386, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 4.977786890848199e-05, |
|
"loss": 2.9312, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 4.977445150707402e-05, |
|
"loss": 2.9361, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 4.9771034105666054e-05, |
|
"loss": 2.9316, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 4.976761670425809e-05, |
|
"loss": 2.9397, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 4.9764199302850116e-05, |
|
"loss": 2.9377, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 4.9760781901442144e-05, |
|
"loss": 2.9378, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 4.975736450003418e-05, |
|
"loss": 2.9306, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 4.9753947098626206e-05, |
|
"loss": 2.9339, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 4.975052969721824e-05, |
|
"loss": 2.8964, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 4.974711229581027e-05, |
|
"loss": 2.8929, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 4.9743694894402296e-05, |
|
"loss": 2.901, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 4.974027749299433e-05, |
|
"loss": 2.8934, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 4.9736860091586364e-05, |
|
"loss": 2.9019, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.973344269017839e-05, |
|
"loss": 2.9037, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.973002528877042e-05, |
|
"loss": 2.8915, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 4.9726607887362454e-05, |
|
"loss": 2.899, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 4.972319048595448e-05, |
|
"loss": 2.9026, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.971977308454651e-05, |
|
"loss": 2.9062, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 4.9716355683138544e-05, |
|
"loss": 2.8974, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 4.971293828173058e-05, |
|
"loss": 2.8969, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 4.9709520880322606e-05, |
|
"loss": 2.8972, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 4.9706103478914634e-05, |
|
"loss": 2.8994, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.970268607750667e-05, |
|
"loss": 2.8925, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 4.9699268676098696e-05, |
|
"loss": 2.8637, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.969585127469073e-05, |
|
"loss": 2.859, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 4.969243387328276e-05, |
|
"loss": 2.8624, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 4.9689016471874786e-05, |
|
"loss": 2.8595, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 4.968559907046682e-05, |
|
"loss": 2.8632, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 4.9682181669058855e-05, |
|
"loss": 2.8609, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 4.9678764267650876e-05, |
|
"loss": 2.8702, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 4.967534686624291e-05, |
|
"loss": 2.8714, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 4.9671929464834945e-05, |
|
"loss": 2.8636, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 4.966851206342697e-05, |
|
"loss": 2.8666, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 4.9665094662019e-05, |
|
"loss": 2.8701, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 4.9661677260611034e-05, |
|
"loss": 2.87, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 4.965825985920307e-05, |
|
"loss": 2.8608, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 4.96548424577951e-05, |
|
"loss": 2.867, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 4.9651425056387124e-05, |
|
"loss": 2.8444, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 4.964800765497916e-05, |
|
"loss": 2.8337, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 4.9644590253571186e-05, |
|
"loss": 2.8337, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 4.964117285216322e-05, |
|
"loss": 2.8319, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 4.963775545075525e-05, |
|
"loss": 2.8317, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 4.9634338049347276e-05, |
|
"loss": 2.8342, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 4.963092064793931e-05, |
|
"loss": 2.8273, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 4.962750324653134e-05, |
|
"loss": 2.8347, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 4.9624085845123366e-05, |
|
"loss": 2.8383, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 4.96206684437154e-05, |
|
"loss": 2.8313, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 4.9617251042307435e-05, |
|
"loss": 2.8387, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 4.961383364089946e-05, |
|
"loss": 2.834, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 4.961041623949149e-05, |
|
"loss": 2.8377, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 4.9606998838083525e-05, |
|
"loss": 2.8365, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.960358143667555e-05, |
|
"loss": 2.8338, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 4.960016403526758e-05, |
|
"loss": 2.7898, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 4.9596746633859615e-05, |
|
"loss": 2.7955, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 4.959332923245165e-05, |
|
"loss": 2.7901, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 4.958991183104368e-05, |
|
"loss": 2.7974, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 4.9586494429635705e-05, |
|
"loss": 2.8055, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 4.958307702822774e-05, |
|
"loss": 2.8088, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 4.957965962681977e-05, |
|
"loss": 2.8061, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 4.95762422254118e-05, |
|
"loss": 2.8005, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 4.957282482400383e-05, |
|
"loss": 2.8056, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 4.9569407422595856e-05, |
|
"loss": 2.8096, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 4.956599002118789e-05, |
|
"loss": 2.8077, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 4.9562572619779925e-05, |
|
"loss": 2.8107, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 4.955915521837195e-05, |
|
"loss": 2.8123, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 4.955573781696398e-05, |
|
"loss": 2.8121, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.9552320415556015e-05, |
|
"loss": 2.7953, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.954890301414804e-05, |
|
"loss": 2.7705, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 4.954548561274007e-05, |
|
"loss": 2.7682, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 4.9542068211332105e-05, |
|
"loss": 2.7656, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 4.953865080992414e-05, |
|
"loss": 2.7742, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 4.953523340851617e-05, |
|
"loss": 2.7677, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 4.9531816007108195e-05, |
|
"loss": 2.7772, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 4.952839860570023e-05, |
|
"loss": 2.7756, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.952498120429226e-05, |
|
"loss": 2.7735, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 4.952156380288429e-05, |
|
"loss": 2.7794, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 4.951814640147632e-05, |
|
"loss": 2.7803, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 4.951472900006835e-05, |
|
"loss": 2.7819, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 4.951131159866038e-05, |
|
"loss": 2.7798, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 4.9507894197252416e-05, |
|
"loss": 2.7858, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 4.950447679584444e-05, |
|
"loss": 2.7831, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.950105939443647e-05, |
|
"loss": 2.7485, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 4.9497641993028506e-05, |
|
"loss": 2.7361, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 4.949422459162053e-05, |
|
"loss": 2.739, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 4.949080719021256e-05, |
|
"loss": 2.7434, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 4.9487389788804595e-05, |
|
"loss": 2.7427, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 4.948397238739663e-05, |
|
"loss": 2.7424, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 4.948055498598866e-05, |
|
"loss": 2.756, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 4.9477137584580685e-05, |
|
"loss": 2.7434, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 4.947372018317272e-05, |
|
"loss": 2.7512, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 4.947030278176475e-05, |
|
"loss": 2.7527, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 4.946688538035678e-05, |
|
"loss": 2.7605, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 4.946346797894881e-05, |
|
"loss": 2.7624, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 4.946005057754084e-05, |
|
"loss": 2.7563, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 4.945663317613287e-05, |
|
"loss": 2.7531, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.9453215774724906e-05, |
|
"loss": 2.7539, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 4.944979837331693e-05, |
|
"loss": 2.7114, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 4.944638097190896e-05, |
|
"loss": 2.7128, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 4.9442963570500996e-05, |
|
"loss": 2.7177, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 4.9439546169093024e-05, |
|
"loss": 2.7212, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 4.943612876768505e-05, |
|
"loss": 2.7224, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 4.9432711366277086e-05, |
|
"loss": 2.7234, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.9429293964869113e-05, |
|
"loss": 2.7233, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 4.942587656346115e-05, |
|
"loss": 2.7297, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 4.9422459162053176e-05, |
|
"loss": 2.7211, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 4.941904176064521e-05, |
|
"loss": 2.7282, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 4.941562435923724e-05, |
|
"loss": 2.7272, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 4.941220695782927e-05, |
|
"loss": 2.7325, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 4.94087895564213e-05, |
|
"loss": 2.7275, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 4.940537215501333e-05, |
|
"loss": 2.7303, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 4.940195475360536e-05, |
|
"loss": 2.7091, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 4.9398537352197397e-05, |
|
"loss": 2.6863, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 4.939511995078942e-05, |
|
"loss": 2.6867, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 4.939170254938145e-05, |
|
"loss": 2.6934, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 4.9388285147973486e-05, |
|
"loss": 2.6936, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 4.9384867746565514e-05, |
|
"loss": 2.6953, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 4.938145034515754e-05, |
|
"loss": 2.6973, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 4.9378032943749576e-05, |
|
"loss": 2.7022, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 4.9374615542341604e-05, |
|
"loss": 2.6977, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 4.937119814093364e-05, |
|
"loss": 2.6996, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 4.9367780739525666e-05, |
|
"loss": 2.7008, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 4.93643633381177e-05, |
|
"loss": 2.7091, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 4.936094593670973e-05, |
|
"loss": 2.7057, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 4.935752853530176e-05, |
|
"loss": 2.7084, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 4.935411113389379e-05, |
|
"loss": 2.7114, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 4.935069373248582e-05, |
|
"loss": 2.6727, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 4.934727633107785e-05, |
|
"loss": 2.6586, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 4.934385892966989e-05, |
|
"loss": 2.6631, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 4.934044152826191e-05, |
|
"loss": 2.6632, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 4.933702412685394e-05, |
|
"loss": 2.6727, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 4.933360672544598e-05, |
|
"loss": 2.6671, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 4.9330189324038004e-05, |
|
"loss": 2.686, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 4.932677192263003e-05, |
|
"loss": 2.6796, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 4.9323354521222067e-05, |
|
"loss": 2.6745, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 4.9319937119814094e-05, |
|
"loss": 2.6825, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 4.931651971840613e-05, |
|
"loss": 2.6857, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 4.9313102316998156e-05, |
|
"loss": 2.6812, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 4.9309684915590184e-05, |
|
"loss": 2.6805, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 4.930626751418222e-05, |
|
"loss": 2.6857, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 4.930285011277425e-05, |
|
"loss": 2.6768, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 4.929943271136628e-05, |
|
"loss": 2.6435, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 4.929601530995831e-05, |
|
"loss": 2.6508, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 4.929259790855034e-05, |
|
"loss": 2.6444, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 4.928918050714238e-05, |
|
"loss": 2.6396, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 4.92857631057344e-05, |
|
"loss": 2.6452, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 4.928234570432643e-05, |
|
"loss": 2.6535, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 4.927892830291847e-05, |
|
"loss": 2.6482, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 4.9275510901510495e-05, |
|
"loss": 2.6567, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 4.927209350010252e-05, |
|
"loss": 2.6589, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 4.926867609869456e-05, |
|
"loss": 2.6564, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 4.9265258697286585e-05, |
|
"loss": 2.6579, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 4.926184129587861e-05, |
|
"loss": 2.658, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 4.925842389447065e-05, |
|
"loss": 2.6658, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 4.9255006493062674e-05, |
|
"loss": 2.6535, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 4.925158909165471e-05, |
|
"loss": 2.6404, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 4.9248171690246737e-05, |
|
"loss": 2.6185, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 4.924475428883877e-05, |
|
"loss": 2.6158, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 4.92413368874308e-05, |
|
"loss": 2.621, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 4.923791948602283e-05, |
|
"loss": 2.6296, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 4.923450208461486e-05, |
|
"loss": 2.6284, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 4.923108468320689e-05, |
|
"loss": 2.6252, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"learning_rate": 4.922766728179892e-05, |
|
"loss": 2.6355, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 4.922424988039096e-05, |
|
"loss": 2.6337, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 4.922083247898298e-05, |
|
"loss": 2.6337, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 4.921741507757501e-05, |
|
"loss": 2.6354, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 4.921399767616705e-05, |
|
"loss": 2.6371, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 4.9210580274759075e-05, |
|
"loss": 2.6427, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 4.92071628733511e-05, |
|
"loss": 2.639, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 4.920374547194314e-05, |
|
"loss": 2.6426, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 4.9200328070535165e-05, |
|
"loss": 2.5977, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 4.91969106691272e-05, |
|
"loss": 2.5961, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 4.919349326771923e-05, |
|
"loss": 2.6007, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 4.919007586631126e-05, |
|
"loss": 2.6015, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 4.918665846490329e-05, |
|
"loss": 2.6064, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 4.9183241063495324e-05, |
|
"loss": 2.6125, |
|
"step": 120000 |
|
} |
|
], |
|
"max_steps": 7316000, |
|
"num_train_epochs": 1000, |
|
"total_flos": 241553911465377792, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|