{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.996168582375478, "global_step": 650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.77, "learning_rate": 4.615384615384616e-05, "loss": 9.0082, "step": 50 }, { "epoch": 0.77, "eval_loss": 8.411425590515137, "eval_runtime": 12.7191, "eval_samples_per_second": 45.601, "eval_steps_per_second": 2.909, "step": 50 }, { "epoch": 1.54, "learning_rate": 4.230769230769231e-05, "loss": 8.0104, "step": 100 }, { "epoch": 1.54, "eval_loss": 7.781517505645752, "eval_runtime": 12.722, "eval_samples_per_second": 45.59, "eval_steps_per_second": 2.908, "step": 100 }, { "epoch": 2.31, "learning_rate": 3.846153846153846e-05, "loss": 7.6542, "step": 150 }, { "epoch": 2.31, "eval_loss": 7.551076889038086, "eval_runtime": 12.717, "eval_samples_per_second": 45.608, "eval_steps_per_second": 2.909, "step": 150 }, { "epoch": 3.08, "learning_rate": 3.461538461538462e-05, "loss": 7.4951, "step": 200 }, { "epoch": 3.08, "eval_loss": 7.532299995422363, "eval_runtime": 12.7257, "eval_samples_per_second": 45.577, "eval_steps_per_second": 2.908, "step": 200 }, { "epoch": 3.84, "learning_rate": 3.0769230769230774e-05, "loss": 7.3489, "step": 250 }, { "epoch": 3.84, "eval_loss": 7.389176845550537, "eval_runtime": 12.723, "eval_samples_per_second": 45.587, "eval_steps_per_second": 2.908, "step": 250 }, { "epoch": 4.61, "learning_rate": 2.6923076923076923e-05, "loss": 7.349, "step": 300 }, { "epoch": 4.61, "eval_loss": 7.34345006942749, "eval_runtime": 12.7274, "eval_samples_per_second": 45.571, "eval_steps_per_second": 2.907, "step": 300 }, { "epoch": 5.38, "learning_rate": 2.307692307692308e-05, "loss": 7.2625, "step": 350 }, { "epoch": 5.38, "eval_loss": 7.265749454498291, "eval_runtime": 12.7257, "eval_samples_per_second": 45.577, "eval_steps_per_second": 2.907, "step": 350 }, { "epoch": 6.15, "learning_rate": 1.923076923076923e-05, "loss": 7.1982, "step": 400 }, { "epoch": 6.15, "eval_loss": 7.229588985443115, "eval_runtime": 12.7294, "eval_samples_per_second": 45.564, "eval_steps_per_second": 2.907, "step": 400 }, { "epoch": 6.92, "learning_rate": 1.5384615384615387e-05, "loss": 7.1324, "step": 450 }, { "epoch": 6.92, "eval_loss": 7.157928466796875, "eval_runtime": 12.7187, "eval_samples_per_second": 45.602, "eval_steps_per_second": 2.909, "step": 450 }, { "epoch": 7.69, "learning_rate": 1.153846153846154e-05, "loss": 7.1505, "step": 500 }, { "epoch": 7.69, "eval_loss": 7.151326656341553, "eval_runtime": 12.7324, "eval_samples_per_second": 45.553, "eval_steps_per_second": 2.906, "step": 500 }, { "epoch": 8.46, "learning_rate": 7.692307692307694e-06, "loss": 7.1079, "step": 550 }, { "epoch": 8.46, "eval_loss": 7.1157402992248535, "eval_runtime": 12.7267, "eval_samples_per_second": 45.573, "eval_steps_per_second": 2.907, "step": 550 }, { "epoch": 9.23, "learning_rate": 3.846153846153847e-06, "loss": 7.1039, "step": 600 }, { "epoch": 9.23, "eval_loss": 7.14491605758667, "eval_runtime": 12.7279, "eval_samples_per_second": 45.569, "eval_steps_per_second": 2.907, "step": 600 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 7.046, "step": 650 }, { "epoch": 10.0, "eval_loss": 7.087325096130371, "eval_runtime": 12.7263, "eval_samples_per_second": 45.575, "eval_steps_per_second": 2.907, "step": 650 } ], "max_steps": 650, "num_train_epochs": 10, "total_flos": 1.3722182915328e+16, "trial_name": null, "trial_params": null }