{ "best_metric": 2.3570337295532227, "best_model_checkpoint": "output/grimes/checkpoint-168", "epoch": 8.0, "global_step": 168, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 0.00011888735840752609, "loss": 2.8697, "step": 5 }, { "epoch": 0.48, "learning_rate": 7.372648442002871e-05, "loss": 2.7244, "step": 10 }, { "epoch": 0.71, "learning_rate": 2.5828599592490882e-05, "loss": 2.5533, "step": 15 }, { "epoch": 0.95, "learning_rate": 7.662053209561833e-07, "loss": 2.4558, "step": 20 }, { "epoch": 1.0, "eval_loss": 2.552718162536621, "eval_runtime": 1.4059, "eval_samples_per_second": 22.762, "eval_steps_per_second": 2.845, "step": 21 }, { "epoch": 1.19, "learning_rate": 1.1920020081922749e-05, "loss": 2.436, "step": 25 }, { "epoch": 1.43, "learning_rate": 5.333506393059682e-05, "loss": 2.4972, "step": 30 }, { "epoch": 1.67, "learning_rate": 0.00010290000000000001, "loss": 2.1316, "step": 35 }, { "epoch": 1.9, "learning_rate": 0.00013415229447692924, "loss": 2.3666, "step": 40 }, { "epoch": 2.0, "eval_loss": 2.45241641998291, "eval_runtime": 1.4175, "eval_samples_per_second": 22.575, "eval_steps_per_second": 2.822, "step": 42 }, { "epoch": 2.14, "learning_rate": 0.00013040646433810595, "loss": 2.2004, "step": 45 }, { "epoch": 2.38, "learning_rate": 9.36623942715347e-05, "loss": 2.2352, "step": 50 }, { "epoch": 2.62, "learning_rate": 4.3537605728465284e-05, "loss": 1.9823, "step": 55 }, { "epoch": 2.86, "learning_rate": 6.793535661894062e-06, "loss": 2.0814, "step": 60 }, { "epoch": 3.0, "eval_loss": 2.412614583969116, "eval_runtime": 1.4325, "eval_samples_per_second": 22.338, "eval_steps_per_second": 2.792, "step": 63 }, { "epoch": 3.1, "learning_rate": 3.047705523070765e-06, "loss": 1.9043, "step": 65 }, { "epoch": 3.33, "learning_rate": 3.4300000000000014e-05, "loss": 2.0794, "step": 70 }, { "epoch": 3.57, "learning_rate": 8.386493606940314e-05, "loss": 1.767, "step": 75 }, { "epoch": 3.81, "learning_rate": 0.00012527997991807721, "loss": 2.0631, "step": 80 }, { "epoch": 4.0, "eval_loss": 2.377373456954956, "eval_runtime": 1.4484, "eval_samples_per_second": 22.093, "eval_steps_per_second": 2.762, "step": 84 }, { "epoch": 4.05, "learning_rate": 0.00013643379467904383, "loss": 2.0061, "step": 85 }, { "epoch": 4.29, "learning_rate": 0.00011137140040750914, "loss": 1.6506, "step": 90 }, { "epoch": 4.52, "learning_rate": 6.347351557997137e-05, "loss": 1.9202, "step": 95 }, { "epoch": 4.76, "learning_rate": 1.8312641592473912e-05, "loss": 1.9009, "step": 100 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.7547, "step": 105 }, { "epoch": 5.0, "eval_loss": 2.376068592071533, "eval_runtime": 1.4234, "eval_samples_per_second": 22.481, "eval_steps_per_second": 2.81, "step": 105 }, { "epoch": 5.24, "learning_rate": 1.8312641592473936e-05, "loss": 1.6331, "step": 110 }, { "epoch": 5.48, "learning_rate": 6.347351557997117e-05, "loss": 1.7732, "step": 115 }, { "epoch": 5.71, "learning_rate": 0.00011137140040750908, "loss": 1.7347, "step": 120 }, { "epoch": 5.95, "learning_rate": 0.00013643379467904383, "loss": 1.5963, "step": 125 }, { "epoch": 6.0, "eval_loss": 2.3798959255218506, "eval_runtime": 1.4389, "eval_samples_per_second": 22.239, "eval_steps_per_second": 2.78, "step": 126 }, { "epoch": 6.19, "learning_rate": 0.0001252799799180772, "loss": 1.5787, "step": 130 }, { "epoch": 6.43, "learning_rate": 8.386493606940322e-05, "loss": 1.3787, "step": 135 }, { "epoch": 6.67, "learning_rate": 3.429999999999998e-05, "loss": 1.4709, "step": 140 }, { "epoch": 6.9, "learning_rate": 3.0477055230707115e-06, "loss": 1.7318, "step": 145 }, { "epoch": 7.0, "eval_loss": 2.3730082511901855, "eval_runtime": 1.4477, "eval_samples_per_second": 22.104, "eval_steps_per_second": 2.763, "step": 147 }, { "epoch": 7.14, "learning_rate": 6.793535661894024e-06, "loss": 1.4261, "step": 150 }, { "epoch": 7.38, "learning_rate": 4.353760572846532e-05, "loss": 1.2714, "step": 155 }, { "epoch": 7.62, "learning_rate": 9.366239427153457e-05, "loss": 1.3748, "step": 160 }, { "epoch": 7.86, "learning_rate": 0.00013040646433810593, "loss": 1.5499, "step": 165 }, { "epoch": 8.0, "eval_loss": 2.3570337295532227, "eval_runtime": 1.4478, "eval_samples_per_second": 22.102, "eval_steps_per_second": 2.763, "step": 168 } ], "max_steps": 210, "num_train_epochs": 10, "total_flos": 175588245504000.0, "trial_name": null, "trial_params": null }