{ "best_metric": 1.818081021308899, "best_model_checkpoint": "math/checkpoint-17040", "epoch": 5.0, "global_step": 17040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 4.853286384976526e-05, "loss": 2.4525, "step": 500 }, { "epoch": 0.29, "learning_rate": 4.706572769953052e-05, "loss": 2.3303, "step": 1000 }, { "epoch": 0.44, "learning_rate": 4.559859154929578e-05, "loss": 2.2788, "step": 1500 }, { "epoch": 0.59, "learning_rate": 4.413145539906103e-05, "loss": 2.2405, "step": 2000 }, { "epoch": 0.73, "learning_rate": 4.26643192488263e-05, "loss": 2.2089, "step": 2500 }, { "epoch": 0.88, "learning_rate": 4.119718309859155e-05, "loss": 2.1796, "step": 3000 }, { "epoch": 1.0, "eval_loss": 1.94922935962677, "eval_runtime": 38.2996, "eval_samples_per_second": 79.113, "eval_steps_per_second": 9.896, "step": 3408 }, { "epoch": 1.03, "learning_rate": 3.973004694835681e-05, "loss": 2.1575, "step": 3500 }, { "epoch": 1.17, "learning_rate": 3.826291079812207e-05, "loss": 2.1208, "step": 4000 }, { "epoch": 1.32, "learning_rate": 3.679577464788733e-05, "loss": 2.0958, "step": 4500 }, { "epoch": 1.47, "learning_rate": 3.532863849765258e-05, "loss": 2.0928, "step": 5000 }, { "epoch": 1.61, "learning_rate": 3.386150234741784e-05, "loss": 2.0898, "step": 5500 }, { "epoch": 1.76, "learning_rate": 3.23943661971831e-05, "loss": 2.0751, "step": 6000 }, { "epoch": 1.91, "learning_rate": 3.092723004694836e-05, "loss": 2.0669, "step": 6500 }, { "epoch": 2.0, "eval_loss": 1.8773746490478516, "eval_runtime": 38.3673, "eval_samples_per_second": 78.973, "eval_steps_per_second": 9.878, "step": 6816 }, { "epoch": 2.05, "learning_rate": 2.9460093896713615e-05, "loss": 2.0433, "step": 7000 }, { "epoch": 2.2, "learning_rate": 2.7992957746478874e-05, "loss": 2.0275, "step": 7500 }, { "epoch": 2.35, "learning_rate": 2.6525821596244134e-05, "loss": 2.0168, "step": 8000 }, { "epoch": 2.49, "learning_rate": 2.505868544600939e-05, "loss": 2.0208, "step": 8500 }, { "epoch": 2.64, "learning_rate": 2.359154929577465e-05, "loss": 2.0111, "step": 9000 }, { "epoch": 2.79, "learning_rate": 2.2124413145539908e-05, "loss": 2.0058, "step": 9500 }, { "epoch": 2.93, "learning_rate": 2.0657276995305167e-05, "loss": 2.0019, "step": 10000 }, { "epoch": 3.0, "eval_loss": 1.8432753086090088, "eval_runtime": 38.3414, "eval_samples_per_second": 79.027, "eval_steps_per_second": 9.885, "step": 10224 }, { "epoch": 3.08, "learning_rate": 1.9190140845070423e-05, "loss": 1.9746, "step": 10500 }, { "epoch": 3.23, "learning_rate": 1.7723004694835683e-05, "loss": 1.9727, "step": 11000 }, { "epoch": 3.37, "learning_rate": 1.625586854460094e-05, "loss": 1.9677, "step": 11500 }, { "epoch": 3.52, "learning_rate": 1.4788732394366198e-05, "loss": 1.9785, "step": 12000 }, { "epoch": 3.67, "learning_rate": 1.3321596244131457e-05, "loss": 1.9694, "step": 12500 }, { "epoch": 3.81, "learning_rate": 1.1854460093896715e-05, "loss": 1.9653, "step": 13000 }, { "epoch": 3.96, "learning_rate": 1.0387323943661972e-05, "loss": 1.9702, "step": 13500 }, { "epoch": 4.0, "eval_loss": 1.8244620561599731, "eval_runtime": 38.37, "eval_samples_per_second": 78.968, "eval_steps_per_second": 9.878, "step": 13632 }, { "epoch": 4.11, "learning_rate": 8.92018779342723e-06, "loss": 1.9433, "step": 14000 }, { "epoch": 4.25, "learning_rate": 7.453051643192488e-06, "loss": 1.9495, "step": 14500 }, { "epoch": 4.4, "learning_rate": 5.9859154929577465e-06, "loss": 1.9531, "step": 15000 }, { "epoch": 4.55, "learning_rate": 4.518779342723005e-06, "loss": 1.9457, "step": 15500 }, { "epoch": 4.69, "learning_rate": 3.051643192488263e-06, "loss": 1.9329, "step": 16000 }, { "epoch": 4.84, "learning_rate": 1.5845070422535212e-06, "loss": 1.9425, "step": 16500 }, { "epoch": 4.99, "learning_rate": 1.1737089201877935e-07, "loss": 1.9458, "step": 17000 }, { "epoch": 5.0, "eval_loss": 1.818081021308899, "eval_runtime": 38.3351, "eval_samples_per_second": 79.04, "eval_steps_per_second": 9.886, "step": 17040 } ], "max_steps": 17040, "num_train_epochs": 5, "total_flos": 1.312631697641472e+16, "trial_name": null, "trial_params": null }