{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.31412202892914304, "global_step": 105000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.925209040731157e-05, "loss": 0.0716, "step": 5000 }, { "epoch": 0.03, "learning_rate": 4.850418081462313e-05, "loss": 0.0741, "step": 10000 }, { "epoch": 0.04, "learning_rate": 4.77562712219347e-05, "loss": 0.0753, "step": 15000 }, { "epoch": 0.06, "learning_rate": 4.7008361629246264e-05, "loss": 0.0709, "step": 20000 }, { "epoch": 0.07, "learning_rate": 4.6260452036557824e-05, "loss": 0.0754, "step": 25000 }, { "epoch": 0.07, "eval_loss": 0.07123171538114548, "eval_runtime": 1351.5318, "eval_samples_per_second": 13.741, "eval_steps_per_second": 13.741, "step": 25000 }, { "epoch": 0.09, "learning_rate": 4.551254244386939e-05, "loss": 0.0745, "step": 30000 }, { "epoch": 0.1, "learning_rate": 4.476463285118095e-05, "loss": 0.0671, "step": 35000 }, { "epoch": 0.12, "learning_rate": 4.401672325849251e-05, "loss": 0.0687, "step": 40000 }, { "epoch": 0.13, "learning_rate": 4.326881366580408e-05, "loss": 0.0718, "step": 45000 }, { "epoch": 0.15, "learning_rate": 4.252090407311564e-05, "loss": 0.0692, "step": 50000 }, { "epoch": 0.15, "eval_loss": 0.06770400702953339, "eval_runtime": 1359.9214, "eval_samples_per_second": 13.656, "eval_steps_per_second": 13.656, "step": 50000 }, { "epoch": 0.16, "learning_rate": 4.1772994480427206e-05, "loss": 0.0695, "step": 55000 }, { "epoch": 0.18, "learning_rate": 4.102508488773877e-05, "loss": 0.0696, "step": 60000 }, { "epoch": 0.19, "learning_rate": 4.0277175295050333e-05, "loss": 0.0689, "step": 65000 }, { "epoch": 0.21, "learning_rate": 3.95292657023619e-05, "loss": 0.0679, "step": 70000 }, { "epoch": 0.22, "learning_rate": 3.878135610967347e-05, "loss": 0.071, "step": 75000 }, { "epoch": 0.22, "eval_loss": 0.05809599161148071, "eval_runtime": 2705.4449, "eval_samples_per_second": 6.864, "eval_steps_per_second": 6.864, "step": 75000 }, { "epoch": 0.24, "learning_rate": 3.803344651698503e-05, "loss": 0.0712, "step": 80000 }, { "epoch": 0.25, "learning_rate": 3.7285536924296595e-05, "loss": 0.0683, "step": 85000 }, { "epoch": 0.27, "learning_rate": 3.653762733160816e-05, "loss": 0.0673, "step": 90000 }, { "epoch": 0.28, "learning_rate": 3.578971773891972e-05, "loss": 0.0685, "step": 95000 }, { "epoch": 0.3, "learning_rate": 3.504180814623129e-05, "loss": 0.0676, "step": 100000 }, { "epoch": 0.3, "eval_loss": 0.06122186779975891, "eval_runtime": 2772.9648, "eval_samples_per_second": 6.697, "eval_steps_per_second": 6.697, "step": 100000 }, { "epoch": 0.31, "learning_rate": 3.429389855354285e-05, "loss": 0.0672, "step": 105000 } ], "max_steps": 334265, "num_train_epochs": 1, "total_flos": 6.792868897849498e+16, "trial_name": null, "trial_params": null }