{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 1410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 8e-05, "loss": 3.4965, "step": 94 }, { "epoch": 1.0, "eval_loss": 2.779850721359253, "eval_runtime": 23.1518, "eval_samples_per_second": 8.639, "eval_steps_per_second": 1.08, "step": 94 }, { "epoch": 2.0, "learning_rate": 6e-05, "loss": 3.4526, "step": 188 }, { "epoch": 2.0, "eval_loss": 2.738048791885376, "eval_runtime": 24.2844, "eval_samples_per_second": 8.236, "eval_steps_per_second": 1.029, "step": 188 }, { "epoch": 3.0, "learning_rate": 4e-05, "loss": 3.4012, "step": 282 }, { "epoch": 3.0, "eval_loss": 2.6721441745758057, "eval_runtime": 23.2045, "eval_samples_per_second": 8.619, "eval_steps_per_second": 1.077, "step": 282 }, { "epoch": 4.0, "learning_rate": 2e-05, "loss": 3.2776, "step": 376 }, { "epoch": 4.0, "eval_loss": 2.6650607585906982, "eval_runtime": 24.0682, "eval_samples_per_second": 8.31, "eval_steps_per_second": 1.039, "step": 376 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 3.2164, "step": 470 }, { "epoch": 5.0, "eval_loss": 2.6554956436157227, "eval_runtime": 22.8547, "eval_samples_per_second": 8.751, "eval_steps_per_second": 1.094, "step": 470 }, { "epoch": 6.0, "learning_rate": 4e-05, "loss": 3.2701, "step": 564 }, { "epoch": 6.0, "eval_loss": 2.648871421813965, "eval_runtime": 23.3072, "eval_samples_per_second": 8.581, "eval_steps_per_second": 1.073, "step": 564 }, { "epoch": 7.0, "learning_rate": 3e-05, "loss": 3.1847, "step": 658 }, { "epoch": 7.0, "eval_loss": 2.699300765991211, "eval_runtime": 22.1301, "eval_samples_per_second": 9.037, "eval_steps_per_second": 1.13, "step": 658 }, { "epoch": 8.0, "learning_rate": 2e-05, "loss": 3.0959, "step": 752 }, { "epoch": 8.0, "eval_loss": 2.636422634124756, "eval_runtime": 21.8919, "eval_samples_per_second": 9.136, "eval_steps_per_second": 1.142, "step": 752 }, { "epoch": 9.0, "learning_rate": 1e-05, "loss": 3.0506, "step": 846 }, { "epoch": 9.0, "eval_loss": 2.6463677883148193, "eval_runtime": 22.9819, "eval_samples_per_second": 8.702, "eval_steps_per_second": 1.088, "step": 846 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 3.0497, "step": 940 }, { "epoch": 10.0, "eval_loss": 2.63041615486145, "eval_runtime": 23.2242, "eval_samples_per_second": 8.612, "eval_steps_per_second": 1.076, "step": 940 }, { "epoch": 11.0, "learning_rate": 2.6666666666666667e-05, "loss": 3.0767, "step": 1034 }, { "epoch": 11.0, "eval_loss": 2.634387254714966, "eval_runtime": 23.8188, "eval_samples_per_second": 8.397, "eval_steps_per_second": 1.05, "step": 1034 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 3.0397, "step": 1128 }, { "epoch": 12.0, "eval_loss": 2.6142303943634033, "eval_runtime": 23.2502, "eval_samples_per_second": 8.602, "eval_steps_per_second": 1.075, "step": 1128 }, { "epoch": 13.0, "learning_rate": 1.3333333333333333e-05, "loss": 2.982, "step": 1222 }, { "epoch": 13.0, "eval_loss": 2.678748369216919, "eval_runtime": 22.8778, "eval_samples_per_second": 8.742, "eval_steps_per_second": 1.093, "step": 1222 }, { "epoch": 14.0, "learning_rate": 6.666666666666667e-06, "loss": 2.883, "step": 1316 }, { "epoch": 14.0, "eval_loss": 2.6491620540618896, "eval_runtime": 22.7109, "eval_samples_per_second": 8.806, "eval_steps_per_second": 1.101, "step": 1316 }, { "epoch": 15.0, "learning_rate": 0.0, "loss": 2.8978, "step": 1410 }, { "epoch": 15.0, "eval_loss": 2.631659507751465, "eval_runtime": 23.5813, "eval_samples_per_second": 8.481, "eval_steps_per_second": 1.06, "step": 1410 }, { "epoch": 15.0, "step": 1410, "total_flos": 3.4352702816256e+17, "train_loss": 0.9919471253740026, "train_runtime": 710.2043, "train_samples_per_second": 15.841, "train_steps_per_second": 1.985 } ], "max_steps": 1410, "num_train_epochs": 15, "total_flos": 3.4352702816256e+17, "trial_name": null, "trial_params": null }