{ "best_metric": 1.621703028678894, "best_model_checkpoint": "output/the-the-pigs/checkpoint-132", "epoch": 11.0, "global_step": 132, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38, "learning_rate": 9.292589525111794e-05, "loss": 2.5049, "step": 5 }, { "epoch": 0.77, "learning_rate": 1.725216267546246e-05, "loss": 2.2501, "step": 10 }, { "epoch": 1.0, "eval_loss": 1.966219425201416, "eval_runtime": 0.848, "eval_samples_per_second": 22.405, "eval_steps_per_second": 3.538, "step": 13 }, { "epoch": 1.15, "learning_rate": 7.857716640189785e-06, "loss": 2.2537, "step": 15 }, { "epoch": 1.54, "learning_rate": 7.686881626551516e-05, "loss": 2.1305, "step": 20 }, { "epoch": 1.92, "learning_rate": 0.00013520660867542716, "loss": 2.0792, "step": 25 }, { "epoch": 2.0, "eval_loss": 1.9108868837356567, "eval_runtime": 0.8428, "eval_samples_per_second": 22.544, "eval_steps_per_second": 3.56, "step": 26 }, { "epoch": 2.31, "learning_rate": 0.00010756924162575734, "loss": 2.1293, "step": 30 }, { "epoch": 2.69, "learning_rate": 2.9630758374242683e-05, "loss": 2.0062, "step": 35 }, { "epoch": 3.0, "eval_loss": 1.830873966217041, "eval_runtime": 0.8441, "eval_samples_per_second": 22.51, "eval_steps_per_second": 3.554, "step": 39 }, { "epoch": 3.08, "learning_rate": 1.9933913245728396e-06, "loss": 1.9324, "step": 40 }, { "epoch": 3.46, "learning_rate": 6.033118373448485e-05, "loss": 1.9227, "step": 45 }, { "epoch": 3.85, "learning_rate": 0.00012934228335981018, "loss": 1.9492, "step": 50 }, { "epoch": 4.0, "eval_loss": 1.8247166872024536, "eval_runtime": 0.8513, "eval_samples_per_second": 22.319, "eval_steps_per_second": 3.524, "step": 52 }, { "epoch": 4.23, "learning_rate": 0.00011994783732453755, "loss": 1.8601, "step": 55 }, { "epoch": 4.62, "learning_rate": 4.42741047488822e-05, "loss": 1.7486, "step": 60 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.8994, "step": 65 }, { "epoch": 5.0, "eval_loss": 1.7911409139633179, "eval_runtime": 0.8376, "eval_samples_per_second": 22.685, "eval_steps_per_second": 3.582, "step": 65 }, { "epoch": 5.38, "learning_rate": 4.4274104748882125e-05, "loss": 1.7558, "step": 70 }, { "epoch": 5.77, "learning_rate": 0.00011994783732453749, "loss": 1.8425, "step": 75 }, { "epoch": 6.0, "eval_loss": 1.780040979385376, "eval_runtime": 0.8465, "eval_samples_per_second": 22.444, "eval_steps_per_second": 3.544, "step": 78 }, { "epoch": 6.15, "learning_rate": 0.00012934228335981018, "loss": 1.72, "step": 80 }, { "epoch": 6.54, "learning_rate": 6.033118373448493e-05, "loss": 1.7991, "step": 85 }, { "epoch": 6.92, "learning_rate": 1.9933913245728244e-06, "loss": 1.7086, "step": 90 }, { "epoch": 7.0, "eval_loss": 1.7639576196670532, "eval_runtime": 0.8412, "eval_samples_per_second": 22.587, "eval_steps_per_second": 3.566, "step": 91 }, { "epoch": 7.31, "learning_rate": 2.963075837424261e-05, "loss": 1.6218, "step": 95 }, { "epoch": 7.69, "learning_rate": 0.00010756924162575728, "loss": 1.6723, "step": 100 }, { "epoch": 8.0, "eval_loss": 1.7437282800674438, "eval_runtime": 0.8437, "eval_samples_per_second": 22.521, "eval_steps_per_second": 3.556, "step": 104 }, { "epoch": 8.08, "learning_rate": 0.0001352066086754272, "loss": 1.7365, "step": 105 }, { "epoch": 8.46, "learning_rate": 7.68688162655152e-05, "loss": 1.6506, "step": 110 }, { "epoch": 8.85, "learning_rate": 7.85771664018977e-06, "loss": 1.5713, "step": 115 }, { "epoch": 9.0, "eval_loss": 1.7347867488861084, "eval_runtime": 0.851, "eval_samples_per_second": 22.327, "eval_steps_per_second": 3.525, "step": 117 }, { "epoch": 10.0, "learning_rate": 0.0001372, "loss": 1.6342, "step": 120 }, { "epoch": 10.0, "eval_loss": 1.6437487602233887, "eval_runtime": 0.9748, "eval_samples_per_second": 22.569, "eval_steps_per_second": 3.078, "step": 120 }, { "epoch": 10.42, "learning_rate": 8.635498649403298e-05, "loss": 1.7365, "step": 125 }, { "epoch": 10.83, "learning_rate": 9.190657300387535e-06, "loss": 1.6443, "step": 130 }, { "epoch": 11.0, "eval_loss": 1.621703028678894, "eval_runtime": 1.0058, "eval_samples_per_second": 21.873, "eval_steps_per_second": 2.983, "step": 132 } ], "max_steps": 1308, "num_train_epochs": 109, "total_flos": 130384723968000.0, "trial_name": null, "trial_params": null }