{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9748106591865358, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 4.765037593984963e-05, "loss": 12.2573, "step": 200 }, { "epoch": 0.18, "eval_loss": 2.7165932655334473, "eval_runtime": 138.6905, "eval_samples_per_second": 13.483, "step": 200 }, { "epoch": 0.36, "learning_rate": 4.295112781954887e-05, "loss": 2.6565, "step": 400 }, { "epoch": 0.36, "eval_loss": 2.7028679847717285, "eval_runtime": 135.8765, "eval_samples_per_second": 13.762, "step": 400 }, { "epoch": 0.54, "learning_rate": 3.825187969924812e-05, "loss": 2.6054, "step": 600 }, { "epoch": 0.54, "eval_loss": 2.677929162979126, "eval_runtime": 138.76, "eval_samples_per_second": 13.477, "step": 600 }, { "epoch": 0.72, "learning_rate": 3.355263157894737e-05, "loss": 2.6296, "step": 800 }, { "epoch": 0.72, "eval_loss": 2.667823553085327, "eval_runtime": 138.8745, "eval_samples_per_second": 13.465, "step": 800 }, { "epoch": 0.9, "learning_rate": 2.8853383458646617e-05, "loss": 2.6148, "step": 1000 }, { "epoch": 0.9, "eval_loss": 2.6645262241363525, "eval_runtime": 138.612, "eval_samples_per_second": 13.491, "step": 1000 }, { "epoch": 1.08, "learning_rate": 2.4154135338345866e-05, "loss": 2.5843, "step": 1200 }, { "epoch": 1.08, "eval_loss": 2.6597423553466797, "eval_runtime": 138.5965, "eval_samples_per_second": 13.492, "step": 1200 }, { "epoch": 1.26, "learning_rate": 1.9454887218045115e-05, "loss": 2.5548, "step": 1400 }, { "epoch": 1.26, "eval_loss": 2.653554677963257, "eval_runtime": 138.5895, "eval_samples_per_second": 13.493, "step": 1400 }, { "epoch": 1.44, "learning_rate": 1.4755639097744361e-05, "loss": 2.5588, "step": 1600 }, { "epoch": 1.44, "eval_loss": 2.6523187160491943, "eval_runtime": 138.5665, "eval_samples_per_second": 13.495, "step": 1600 }, { "epoch": 1.62, "learning_rate": 1.005639097744361e-05, "loss": 2.5814, "step": 1800 }, { "epoch": 1.62, "eval_loss": 2.64888858795166, "eval_runtime": 138.6375, "eval_samples_per_second": 13.488, "step": 1800 }, { "epoch": 1.8, "learning_rate": 5.357142857142857e-06, "loss": 2.569, "step": 2000 }, { "epoch": 1.8, "eval_loss": 2.6475982666015625, "eval_runtime": 138.5945, "eval_samples_per_second": 13.493, "step": 2000 }, { "epoch": 1.97, "learning_rate": 6.578947368421053e-07, "loss": 2.5401, "step": 2200 }, { "epoch": 1.97, "eval_loss": 2.6439733505249023, "eval_runtime": 138.082, "eval_samples_per_second": 13.543, "step": 2200 } ], "max_steps": 2228, "num_train_epochs": 2, "total_flos": 2.691387690319872e+16, "trial_name": null, "trial_params": null }