{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 17200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.58, "learning_rate": 4.854651162790698e-05, "loss": 3.572, "step": 500 }, { "epoch": 1.0, "eval_loss": 3.3753416538238525, "eval_runtime": 146.7285, "eval_samples_per_second": 163.69, "step": 860 }, { "epoch": 1.16, "learning_rate": 4.709302325581396e-05, "loss": 3.4691, "step": 1000 }, { "epoch": 1.74, "learning_rate": 4.563953488372093e-05, "loss": 3.2751, "step": 1500 }, { "epoch": 2.0, "eval_loss": 3.3520445823669434, "eval_runtime": 147.0097, "eval_samples_per_second": 163.377, "step": 1720 }, { "epoch": 2.33, "learning_rate": 4.418604651162791e-05, "loss": 3.1479, "step": 2000 }, { "epoch": 2.91, "learning_rate": 4.2732558139534885e-05, "loss": 3.0201, "step": 2500 }, { "epoch": 3.0, "eval_loss": 3.3326475620269775, "eval_runtime": 146.9025, "eval_samples_per_second": 163.496, "step": 2580 }, { "epoch": 3.49, "learning_rate": 4.127906976744187e-05, "loss": 2.8872, "step": 3000 }, { "epoch": 4.0, "eval_loss": 3.355100154876709, "eval_runtime": 146.8556, "eval_samples_per_second": 163.548, "step": 3440 }, { "epoch": 4.07, "learning_rate": 3.9825581395348835e-05, "loss": 2.8373, "step": 3500 }, { "epoch": 4.65, "learning_rate": 3.837209302325582e-05, "loss": 2.6775, "step": 4000 }, { "epoch": 5.0, "eval_loss": 3.35534405708313, "eval_runtime": 146.787, "eval_samples_per_second": 163.625, "step": 4300 }, { "epoch": 5.23, "learning_rate": 3.691860465116279e-05, "loss": 2.6133, "step": 4500 }, { "epoch": 5.81, "learning_rate": 3.5465116279069774e-05, "loss": 2.5406, "step": 5000 }, { "epoch": 6.0, "eval_loss": 3.336427927017212, "eval_runtime": 146.5342, "eval_samples_per_second": 163.907, "step": 5160 }, { "epoch": 6.4, "learning_rate": 3.401162790697674e-05, "loss": 2.4318, "step": 5500 }, { "epoch": 6.98, "learning_rate": 3.2558139534883724e-05, "loss": 2.4171, "step": 6000 }, { "epoch": 7.0, "eval_loss": 3.332899808883667, "eval_runtime": 146.7159, "eval_samples_per_second": 163.704, "step": 6020 }, { "epoch": 7.56, "learning_rate": 3.11046511627907e-05, "loss": 2.267, "step": 6500 }, { "epoch": 8.0, "eval_loss": 3.3342535495758057, "eval_runtime": 146.2923, "eval_samples_per_second": 164.178, "step": 6880 }, { "epoch": 8.14, "learning_rate": 2.9651162790697678e-05, "loss": 2.232, "step": 7000 }, { "epoch": 8.72, "learning_rate": 2.8197674418604653e-05, "loss": 2.1545, "step": 7500 }, { "epoch": 9.0, "eval_loss": 3.3147542476654053, "eval_runtime": 147.8917, "eval_samples_per_second": 162.403, "step": 7740 }, { "epoch": 9.3, "learning_rate": 2.674418604651163e-05, "loss": 2.1127, "step": 8000 }, { "epoch": 9.88, "learning_rate": 2.5290697674418607e-05, "loss": 2.064, "step": 8500 }, { "epoch": 10.0, "eval_loss": 3.3158812522888184, "eval_runtime": 147.2862, "eval_samples_per_second": 163.07, "step": 8600 }, { "epoch": 10.47, "learning_rate": 2.3837209302325582e-05, "loss": 1.9663, "step": 9000 }, { "epoch": 11.0, "eval_loss": 3.3465089797973633, "eval_runtime": 148.1203, "eval_samples_per_second": 162.152, "step": 9460 }, { "epoch": 11.05, "learning_rate": 2.238372093023256e-05, "loss": 1.9353, "step": 9500 }, { "epoch": 11.63, "learning_rate": 2.0930232558139536e-05, "loss": 1.8572, "step": 10000 }, { "epoch": 12.0, "eval_loss": 3.2874529361724854, "eval_runtime": 147.3935, "eval_samples_per_second": 162.952, "step": 10320 }, { "epoch": 12.21, "learning_rate": 1.9476744186046514e-05, "loss": 1.8373, "step": 10500 }, { "epoch": 12.79, "learning_rate": 1.802325581395349e-05, "loss": 1.7866, "step": 11000 }, { "epoch": 13.0, "eval_loss": 3.321674108505249, "eval_runtime": 146.9637, "eval_samples_per_second": 163.428, "step": 11180 }, { "epoch": 13.37, "learning_rate": 1.6569767441860464e-05, "loss": 1.7253, "step": 11500 }, { "epoch": 13.95, "learning_rate": 1.5116279069767441e-05, "loss": 1.719, "step": 12000 }, { "epoch": 14.0, "eval_loss": 3.2760589122772217, "eval_runtime": 145.6136, "eval_samples_per_second": 164.943, "step": 12040 }, { "epoch": 14.53, "learning_rate": 1.3662790697674418e-05, "loss": 1.6293, "step": 12500 }, { "epoch": 15.0, "eval_loss": 3.302276611328125, "eval_runtime": 146.2216, "eval_samples_per_second": 164.258, "step": 12900 }, { "epoch": 15.12, "learning_rate": 1.2209302325581395e-05, "loss": 1.6339, "step": 13000 }, { "epoch": 15.7, "learning_rate": 1.0755813953488372e-05, "loss": 1.5656, "step": 13500 }, { "epoch": 16.0, "eval_loss": 3.2689430713653564, "eval_runtime": 145.5127, "eval_samples_per_second": 165.058, "step": 13760 }, { "epoch": 16.28, "learning_rate": 9.302325581395349e-06, "loss": 1.5503, "step": 14000 }, { "epoch": 16.86, "learning_rate": 7.848837209302325e-06, "loss": 1.5344, "step": 14500 }, { "epoch": 17.0, "eval_loss": 3.237142562866211, "eval_runtime": 146.4021, "eval_samples_per_second": 164.055, "step": 14620 }, { "epoch": 17.44, "learning_rate": 6.395348837209303e-06, "loss": 1.4974, "step": 15000 }, { "epoch": 18.0, "eval_loss": 3.209892749786377, "eval_runtime": 147.2158, "eval_samples_per_second": 163.148, "step": 15480 }, { "epoch": 18.02, "learning_rate": 4.941860465116279e-06, "loss": 1.4739, "step": 15500 }, { "epoch": 18.6, "learning_rate": 3.488372093023256e-06, "loss": 1.4574, "step": 16000 }, { "epoch": 19.0, "eval_loss": 3.2351009845733643, "eval_runtime": 146.7384, "eval_samples_per_second": 163.679, "step": 16340 }, { "epoch": 19.19, "learning_rate": 2.0348837209302328e-06, "loss": 1.4283, "step": 16500 }, { "epoch": 19.77, "learning_rate": 5.813953488372093e-07, "loss": 1.4204, "step": 17000 }, { "epoch": 20.0, "eval_loss": 3.2112162113189697, "eval_runtime": 147.2288, "eval_samples_per_second": 163.134, "step": 17200 } ], "max_steps": 17200, "num_train_epochs": 20, "total_flos": 6.8827072512e+16, "trial_name": null, "trial_params": null }