{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 8630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.58, "learning_rate": 9.710312862108923e-06, "loss": 3.2151, "step": 500 }, { "epoch": 1.0, "eval_loss": 2.4692916870117188, "eval_runtime": 45.2268, "eval_samples_per_second": 40.883, "eval_steps_per_second": 5.13, "step": 863 }, { "epoch": 1.16, "learning_rate": 9.420625724217845e-06, "loss": 2.5031, "step": 1000 }, { "epoch": 1.74, "learning_rate": 9.130938586326767e-06, "loss": 2.3986, "step": 1500 }, { "epoch": 2.0, "eval_loss": 2.3642208576202393, "eval_runtime": 44.3366, "eval_samples_per_second": 41.704, "eval_steps_per_second": 5.233, "step": 1726 }, { "epoch": 2.32, "learning_rate": 8.84125144843569e-06, "loss": 2.3399, "step": 2000 }, { "epoch": 2.9, "learning_rate": 8.551564310544612e-06, "loss": 2.3188, "step": 2500 }, { "epoch": 3.0, "eval_loss": 2.3149166107177734, "eval_runtime": 44.9069, "eval_samples_per_second": 41.174, "eval_steps_per_second": 5.166, "step": 2589 }, { "epoch": 3.48, "learning_rate": 8.261877172653534e-06, "loss": 2.2853, "step": 3000 }, { "epoch": 4.0, "eval_loss": 2.284034013748169, "eval_runtime": 44.6174, "eval_samples_per_second": 41.441, "eval_steps_per_second": 5.2, "step": 3452 }, { "epoch": 4.06, "learning_rate": 7.972190034762456e-06, "loss": 2.2634, "step": 3500 }, { "epoch": 4.63, "learning_rate": 7.68250289687138e-06, "loss": 2.2415, "step": 4000 }, { "epoch": 5.0, "eval_loss": 2.2624311447143555, "eval_runtime": 45.3808, "eval_samples_per_second": 40.744, "eval_steps_per_second": 5.112, "step": 4315 }, { "epoch": 5.21, "learning_rate": 7.392815758980302e-06, "loss": 2.2406, "step": 4500 }, { "epoch": 5.79, "learning_rate": 7.103128621089225e-06, "loss": 2.2182, "step": 5000 }, { "epoch": 6.0, "eval_loss": 2.2461531162261963, "eval_runtime": 51.6238, "eval_samples_per_second": 35.817, "eval_steps_per_second": 4.494, "step": 5178 }, { "epoch": 6.37, "learning_rate": 6.813441483198147e-06, "loss": 2.2067, "step": 5500 }, { "epoch": 6.95, "learning_rate": 6.523754345307068e-06, "loss": 2.1992, "step": 6000 }, { "epoch": 7.0, "eval_loss": 2.2332844734191895, "eval_runtime": 44.2392, "eval_samples_per_second": 41.795, "eval_steps_per_second": 5.244, "step": 6041 }, { "epoch": 7.53, "learning_rate": 6.234067207415992e-06, "loss": 2.1873, "step": 6500 }, { "epoch": 8.0, "eval_loss": 2.223007917404175, "eval_runtime": 46.3144, "eval_samples_per_second": 39.923, "eval_steps_per_second": 5.009, "step": 6904 }, { "epoch": 8.11, "learning_rate": 5.944380069524913e-06, "loss": 2.1861, "step": 7000 }, { "epoch": 8.69, "learning_rate": 5.654692931633836e-06, "loss": 2.1706, "step": 7500 }, { "epoch": 9.0, "eval_loss": 2.2148640155792236, "eval_runtime": 44.5375, "eval_samples_per_second": 41.516, "eval_steps_per_second": 5.209, "step": 7767 }, { "epoch": 9.27, "learning_rate": 5.365005793742758e-06, "loss": 2.1684, "step": 8000 }, { "epoch": 9.85, "learning_rate": 5.0753186558516806e-06, "loss": 2.1562, "step": 8500 }, { "epoch": 10.0, "eval_loss": 2.2083637714385986, "eval_runtime": 45.5164, "eval_samples_per_second": 40.623, "eval_steps_per_second": 5.097, "step": 8630 } ], "logging_steps": 500, "max_steps": 17260, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.352774172672e+16, "trial_name": null, "trial_params": null }