{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 41090, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 4.9391579459722564e-05, "loss": 1.7982, "step": 500 }, { "epoch": 0.24, "learning_rate": 4.878315891944512e-05, "loss": 1.2914, "step": 1000 }, { "epoch": 0.37, "learning_rate": 4.817473837916768e-05, "loss": 1.1456, "step": 1500 }, { "epoch": 0.49, "learning_rate": 4.756631783889024e-05, "loss": 1.0345, "step": 2000 }, { "epoch": 0.61, "learning_rate": 4.6957897298612804e-05, "loss": 0.9665, "step": 2500 }, { "epoch": 0.73, "learning_rate": 4.6349476758335365e-05, "loss": 0.9272, "step": 3000 }, { "epoch": 0.85, "learning_rate": 4.574105621805793e-05, "loss": 0.8571, "step": 3500 }, { "epoch": 0.97, "learning_rate": 4.513263567778048e-05, "loss": 0.8139, "step": 4000 }, { "epoch": 1.1, "learning_rate": 4.4524215137503043e-05, "loss": 0.6986, "step": 4500 }, { "epoch": 1.22, "learning_rate": 4.3915794597225605e-05, "loss": 0.6375, "step": 5000 }, { "epoch": 1.34, "learning_rate": 4.330737405694817e-05, "loss": 0.6407, "step": 5500 }, { "epoch": 1.46, "learning_rate": 4.269895351667073e-05, "loss": 0.6139, "step": 6000 }, { "epoch": 1.58, "learning_rate": 4.209053297639329e-05, "loss": 0.5973, "step": 6500 }, { "epoch": 1.7, "learning_rate": 4.1482112436115845e-05, "loss": 0.6094, "step": 7000 }, { "epoch": 1.83, "learning_rate": 4.0873691895838406e-05, "loss": 0.5824, "step": 7500 }, { "epoch": 1.95, "learning_rate": 4.026527135556097e-05, "loss": 0.5459, "step": 8000 }, { "epoch": 2.07, "learning_rate": 3.965685081528352e-05, "loss": 0.4764, "step": 8500 }, { "epoch": 2.19, "learning_rate": 3.9048430275006084e-05, "loss": 0.427, "step": 9000 }, { "epoch": 2.31, "learning_rate": 3.8440009734728646e-05, "loss": 0.4192, "step": 9500 }, { "epoch": 2.43, "learning_rate": 3.783158919445121e-05, "loss": 0.4278, "step": 10000 }, { "epoch": 2.56, "learning_rate": 3.722316865417377e-05, "loss": 0.4157, "step": 10500 }, { "epoch": 2.68, "learning_rate": 3.6614748113896324e-05, "loss": 0.4156, "step": 11000 }, { "epoch": 2.8, "learning_rate": 3.6006327573618886e-05, "loss": 0.4137, "step": 11500 }, { "epoch": 2.92, "learning_rate": 3.539790703334145e-05, "loss": 0.3731, "step": 12000 }, { "epoch": 3.04, "learning_rate": 3.4789486493064e-05, "loss": 0.3516, "step": 12500 }, { "epoch": 3.16, "learning_rate": 3.418106595278657e-05, "loss": 0.3098, "step": 13000 }, { "epoch": 3.29, "learning_rate": 3.357264541250913e-05, "loss": 0.3051, "step": 13500 }, { "epoch": 3.41, "learning_rate": 3.296422487223169e-05, "loss": 0.3029, "step": 14000 }, { "epoch": 3.53, "learning_rate": 3.235580433195425e-05, "loss": 0.3107, "step": 14500 }, { "epoch": 3.65, "learning_rate": 3.174738379167681e-05, "loss": 0.2969, "step": 15000 }, { "epoch": 3.77, "learning_rate": 3.1138963251399365e-05, "loss": 0.2886, "step": 15500 }, { "epoch": 3.89, "learning_rate": 3.053054271112193e-05, "loss": 0.2887, "step": 16000 }, { "epoch": 4.02, "learning_rate": 2.9922122170844492e-05, "loss": 0.2803, "step": 16500 }, { "epoch": 4.14, "learning_rate": 2.9313701630567047e-05, "loss": 0.2256, "step": 17000 }, { "epoch": 4.26, "learning_rate": 2.8705281090289608e-05, "loss": 0.2209, "step": 17500 }, { "epoch": 4.38, "learning_rate": 2.8096860550012173e-05, "loss": 0.2197, "step": 18000 }, { "epoch": 4.5, "learning_rate": 2.7488440009734728e-05, "loss": 0.2305, "step": 18500 }, { "epoch": 4.62, "learning_rate": 2.688001946945729e-05, "loss": 0.2252, "step": 19000 }, { "epoch": 4.75, "learning_rate": 2.627159892917985e-05, "loss": 0.207, "step": 19500 }, { "epoch": 4.87, "learning_rate": 2.566317838890241e-05, "loss": 0.2187, "step": 20000 }, { "epoch": 4.99, "learning_rate": 2.505475784862497e-05, "loss": 0.207, "step": 20500 }, { "epoch": 5.11, "learning_rate": 2.444633730834753e-05, "loss": 0.1685, "step": 21000 }, { "epoch": 5.23, "learning_rate": 2.383791676807009e-05, "loss": 0.1772, "step": 21500 }, { "epoch": 5.35, "learning_rate": 2.3229496227792653e-05, "loss": 0.1728, "step": 22000 }, { "epoch": 5.48, "learning_rate": 2.262107568751521e-05, "loss": 0.1678, "step": 22500 }, { "epoch": 5.6, "learning_rate": 2.201265514723777e-05, "loss": 0.1642, "step": 23000 }, { "epoch": 5.72, "learning_rate": 2.1404234606960334e-05, "loss": 0.1664, "step": 23500 }, { "epoch": 5.84, "learning_rate": 2.0795814066682892e-05, "loss": 0.1575, "step": 24000 }, { "epoch": 5.96, "learning_rate": 2.018739352640545e-05, "loss": 0.1599, "step": 24500 }, { "epoch": 6.08, "learning_rate": 1.9578972986128012e-05, "loss": 0.1249, "step": 25000 }, { "epoch": 6.21, "learning_rate": 1.8970552445850574e-05, "loss": 0.1266, "step": 25500 }, { "epoch": 6.33, "learning_rate": 1.8362131905573132e-05, "loss": 0.1304, "step": 26000 }, { "epoch": 6.45, "learning_rate": 1.7753711365295694e-05, "loss": 0.1161, "step": 26500 }, { "epoch": 6.57, "learning_rate": 1.7145290825018255e-05, "loss": 0.1253, "step": 27000 }, { "epoch": 6.69, "learning_rate": 1.6536870284740814e-05, "loss": 0.1215, "step": 27500 }, { "epoch": 6.81, "learning_rate": 1.5928449744463375e-05, "loss": 0.1158, "step": 28000 }, { "epoch": 6.94, "learning_rate": 1.5320029204185933e-05, "loss": 0.1219, "step": 28500 }, { "epoch": 7.06, "learning_rate": 1.4711608663908493e-05, "loss": 0.1039, "step": 29000 }, { "epoch": 7.18, "learning_rate": 1.4103188123631053e-05, "loss": 0.081, "step": 29500 }, { "epoch": 7.3, "learning_rate": 1.3494767583353615e-05, "loss": 0.0922, "step": 30000 }, { "epoch": 7.42, "learning_rate": 1.2886347043076175e-05, "loss": 0.0844, "step": 30500 }, { "epoch": 7.54, "learning_rate": 1.2277926502798735e-05, "loss": 0.0759, "step": 31000 }, { "epoch": 7.67, "learning_rate": 1.1669505962521295e-05, "loss": 0.0882, "step": 31500 }, { "epoch": 7.79, "learning_rate": 1.1061085422243855e-05, "loss": 0.0856, "step": 32000 }, { "epoch": 7.91, "learning_rate": 1.0452664881966416e-05, "loss": 0.0881, "step": 32500 }, { "epoch": 8.03, "learning_rate": 9.844244341688976e-06, "loss": 0.0708, "step": 33000 }, { "epoch": 8.15, "learning_rate": 9.235823801411536e-06, "loss": 0.0538, "step": 33500 }, { "epoch": 8.27, "learning_rate": 8.627403261134098e-06, "loss": 0.0554, "step": 34000 }, { "epoch": 8.4, "learning_rate": 8.018982720856656e-06, "loss": 0.06, "step": 34500 }, { "epoch": 8.52, "learning_rate": 7.410562180579217e-06, "loss": 0.0514, "step": 35000 }, { "epoch": 8.64, "learning_rate": 6.802141640301777e-06, "loss": 0.0536, "step": 35500 }, { "epoch": 8.76, "learning_rate": 6.193721100024337e-06, "loss": 0.0583, "step": 36000 }, { "epoch": 8.88, "learning_rate": 5.585300559746897e-06, "loss": 0.052, "step": 36500 }, { "epoch": 9.0, "learning_rate": 4.976880019469458e-06, "loss": 0.0481, "step": 37000 }, { "epoch": 9.13, "learning_rate": 4.368459479192018e-06, "loss": 0.0378, "step": 37500 }, { "epoch": 9.25, "learning_rate": 3.760038938914578e-06, "loss": 0.0296, "step": 38000 }, { "epoch": 9.37, "learning_rate": 3.151618398637138e-06, "loss": 0.037, "step": 38500 }, { "epoch": 9.49, "learning_rate": 2.543197858359698e-06, "loss": 0.0339, "step": 39000 }, { "epoch": 9.61, "learning_rate": 1.9347773180822585e-06, "loss": 0.0369, "step": 39500 }, { "epoch": 9.73, "learning_rate": 1.3263567778048189e-06, "loss": 0.0344, "step": 40000 }, { "epoch": 9.86, "learning_rate": 7.17936237527379e-07, "loss": 0.0301, "step": 40500 }, { "epoch": 9.98, "learning_rate": 1.0951569724993917e-07, "loss": 0.0325, "step": 41000 }, { "epoch": 10.0, "step": 41090, "total_flos": 1.1825796643443088e+17, "train_loss": 0.30929526851019973, "train_runtime": 55900.09, "train_samples_per_second": 11.761, "train_steps_per_second": 0.735 } ], "max_steps": 41090, "num_train_epochs": 10, "total_flos": 1.1825796643443088e+17, "trial_name": null, "trial_params": null }