{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9175784099197664, "global_step": 24000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 5e-05, "loss": 3.2675, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.896600215071553e-05, "loss": 2.9196, "step": 1000 }, { "epoch": 0.18, "learning_rate": 4.793200430143105e-05, "loss": 2.8585, "step": 1500 }, { "epoch": 0.24, "learning_rate": 4.689800645214658e-05, "loss": 2.8352, "step": 2000 }, { "epoch": 0.3, "learning_rate": 4.586400860286211e-05, "loss": 2.8187, "step": 2500 }, { "epoch": 0.36, "learning_rate": 4.483001075357763e-05, "loss": 2.8061, "step": 3000 }, { "epoch": 0.43, "learning_rate": 4.379601290429316e-05, "loss": 2.8031, "step": 3500 }, { "epoch": 0.49, "learning_rate": 4.276201505500869e-05, "loss": 2.7748, "step": 4000 }, { "epoch": 0.55, "learning_rate": 4.1728017205724215e-05, "loss": 2.7805, "step": 4500 }, { "epoch": 0.61, "learning_rate": 4.069401935643974e-05, "loss": 2.7626, "step": 5000 }, { "epoch": 0.67, "learning_rate": 3.9660021507155265e-05, "loss": 2.7667, "step": 5500 }, { "epoch": 0.73, "learning_rate": 3.8626023657870794e-05, "loss": 2.7543, "step": 6000 }, { "epoch": 0.79, "learning_rate": 3.7592025808586315e-05, "loss": 2.7481, "step": 6500 }, { "epoch": 0.85, "learning_rate": 3.6558027959301844e-05, "loss": 2.756, "step": 7000 }, { "epoch": 0.91, "learning_rate": 3.552403011001738e-05, "loss": 2.7437, "step": 7500 }, { "epoch": 0.97, "learning_rate": 3.44900322607329e-05, "loss": 2.7375, "step": 8000 }, { "epoch": 1.03, "learning_rate": 3.345603441144843e-05, "loss": 2.7159, "step": 8500 }, { "epoch": 1.09, "learning_rate": 3.242203656216396e-05, "loss": 2.697, "step": 9000 }, { "epoch": 1.15, "learning_rate": 3.138803871287948e-05, "loss": 2.697, "step": 9500 }, { "epoch": 1.22, "learning_rate": 3.0354040863595007e-05, "loss": 2.6973, "step": 10000 }, { "epoch": 1.28, "learning_rate": 2.932004301431053e-05, "loss": 2.6924, "step": 10500 }, { "epoch": 1.34, "learning_rate": 2.828604516502606e-05, "loss": 2.6973, "step": 11000 }, { "epoch": 1.4, "learning_rate": 2.7252047315741585e-05, "loss": 2.6975, "step": 11500 }, { "epoch": 1.46, "learning_rate": 2.6218049466457113e-05, "loss": 2.6978, "step": 12000 }, { "epoch": 1.52, "learning_rate": 2.5184051617172638e-05, "loss": 2.6867, "step": 12500 }, { "epoch": 1.58, "learning_rate": 2.4150053767888163e-05, "loss": 2.6771, "step": 13000 }, { "epoch": 1.64, "learning_rate": 2.311605591860369e-05, "loss": 2.6919, "step": 13500 }, { "epoch": 1.7, "learning_rate": 2.2082058069319216e-05, "loss": 2.6826, "step": 14000 }, { "epoch": 1.76, "learning_rate": 2.1048060220034745e-05, "loss": 2.6865, "step": 14500 }, { "epoch": 1.82, "learning_rate": 2.001406237075027e-05, "loss": 2.6724, "step": 15000 }, { "epoch": 1.88, "learning_rate": 1.8980064521465795e-05, "loss": 2.6768, "step": 15500 }, { "epoch": 1.95, "learning_rate": 1.7946066672181323e-05, "loss": 2.68, "step": 16000 }, { "epoch": 2.01, "learning_rate": 1.6912068822896848e-05, "loss": 2.6798, "step": 16500 }, { "epoch": 2.07, "learning_rate": 1.5878070973612376e-05, "loss": 2.6573, "step": 17000 }, { "epoch": 2.13, "learning_rate": 1.4844073124327901e-05, "loss": 2.6498, "step": 17500 }, { "epoch": 2.19, "learning_rate": 1.381007527504343e-05, "loss": 2.648, "step": 18000 }, { "epoch": 2.25, "learning_rate": 1.2776077425758956e-05, "loss": 2.6586, "step": 18500 }, { "epoch": 2.31, "learning_rate": 1.1742079576474481e-05, "loss": 2.6553, "step": 19000 }, { "epoch": 2.37, "learning_rate": 1.070808172719001e-05, "loss": 2.6484, "step": 19500 }, { "epoch": 2.43, "learning_rate": 9.674083877905534e-06, "loss": 2.656, "step": 20000 }, { "epoch": 2.49, "learning_rate": 8.640086028621061e-06, "loss": 2.6524, "step": 20500 }, { "epoch": 2.55, "learning_rate": 7.6060881793365875e-06, "loss": 2.6489, "step": 21000 }, { "epoch": 2.61, "learning_rate": 6.572090330052113e-06, "loss": 2.6452, "step": 21500 }, { "epoch": 2.67, "learning_rate": 5.538092480767641e-06, "loss": 2.6397, "step": 22000 }, { "epoch": 2.74, "learning_rate": 4.5040946314831665e-06, "loss": 2.6359, "step": 22500 }, { "epoch": 2.8, "learning_rate": 3.470096782198693e-06, "loss": 2.6491, "step": 23000 }, { "epoch": 2.86, "learning_rate": 2.43609893291422e-06, "loss": 2.6369, "step": 23500 }, { "epoch": 2.92, "learning_rate": 1.402101083629746e-06, "loss": 2.6461, "step": 24000 } ], "max_steps": 24678, "num_train_epochs": 3, "total_flos": 48313248320323584, "trial_name": null, "trial_params": null }