{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.991695501730105, "global_step": 4332, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.00027499999999999996, "loss": 2.3243, "step": 361 }, { "epoch": 1.0, "eval_loss": 2.3360440731048584, "eval_runtime": 81.7171, "eval_samples_per_second": 457.077, "eval_steps_per_second": 57.136, "step": 361 }, { "epoch": 2.0, "learning_rate": 0.00025, "loss": 1.9943, "step": 722 }, { "epoch": 2.0, "eval_loss": 2.3849265575408936, "eval_runtime": 80.8133, "eval_samples_per_second": 462.189, "eval_steps_per_second": 57.775, "step": 722 }, { "epoch": 3.0, "learning_rate": 0.000225, "loss": 1.7331, "step": 1083 }, { "epoch": 3.0, "eval_loss": 2.5010716915130615, "eval_runtime": 80.8638, "eval_samples_per_second": 461.9, "eval_steps_per_second": 57.739, "step": 1083 }, { "epoch": 4.0, "learning_rate": 0.00019993074792243765, "loss": 1.463, "step": 1445 }, { "epoch": 4.0, "eval_loss": 2.7065136432647705, "eval_runtime": 80.5929, "eval_samples_per_second": 463.453, "eval_steps_per_second": 57.933, "step": 1445 }, { "epoch": 5.0, "learning_rate": 0.00017493074792243764, "loss": 1.1969, "step": 1806 }, { "epoch": 5.0, "eval_loss": 2.9511666297912598, "eval_runtime": 80.4169, "eval_samples_per_second": 464.467, "eval_steps_per_second": 58.06, "step": 1806 }, { "epoch": 6.0, "learning_rate": 0.00014993074792243766, "loss": 0.9339, "step": 2167 }, { "epoch": 6.0, "eval_loss": 3.2412941455841064, "eval_runtime": 80.5491, "eval_samples_per_second": 463.705, "eval_steps_per_second": 57.965, "step": 2167 }, { "epoch": 7.0, "learning_rate": 0.00012493074792243767, "loss": 0.6915, "step": 2528 }, { "epoch": 7.0, "eval_loss": 3.56394362449646, "eval_runtime": 79.8139, "eval_samples_per_second": 467.976, "eval_steps_per_second": 58.499, "step": 2528 }, { "epoch": 8.0, "learning_rate": 9.986149584487533e-05, "loss": 0.4811, "step": 2890 }, { "epoch": 8.0, "eval_loss": 3.897730827331543, "eval_runtime": 80.0943, "eval_samples_per_second": 466.338, "eval_steps_per_second": 58.294, "step": 2890 }, { "epoch": 9.0, "learning_rate": 7.486149584487535e-05, "loss": 0.3195, "step": 3251 }, { "epoch": 9.0, "eval_loss": 4.2111005783081055, "eval_runtime": 79.9229, "eval_samples_per_second": 467.338, "eval_steps_per_second": 58.419, "step": 3251 }, { "epoch": 10.0, "learning_rate": 4.986149584487534e-05, "loss": 0.211, "step": 3612 }, { "epoch": 10.0, "eval_loss": 4.46787166595459, "eval_runtime": 80.4105, "eval_samples_per_second": 464.504, "eval_steps_per_second": 58.065, "step": 3612 }, { "epoch": 11.0, "learning_rate": 2.4861495844875343e-05, "loss": 0.1515, "step": 3973 }, { "epoch": 11.0, "eval_loss": 4.6477370262146, "eval_runtime": 80.1153, "eval_samples_per_second": 466.216, "eval_steps_per_second": 58.279, "step": 3973 }, { "epoch": 11.99, "learning_rate": 0.0, "loss": 0.1233, "step": 4332 }, { "epoch": 11.99, "eval_loss": 4.7357659339904785, "eval_runtime": 80.0925, "eval_samples_per_second": 466.348, "eval_steps_per_second": 58.295, "step": 4332 } ], "max_steps": 4332, "num_train_epochs": 12, "total_flos": 1.705389820458624e+17, "trial_name": null, "trial_params": null }