{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999220394480393, "global_step": 6413, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 4.610166848588804e-05, "loss": 0.6465, "step": 500 }, { "epoch": 0.08, "eval_gen_len": 43.125025166096236, "eval_loss": 0.09457100182771683, "eval_runtime": 1631.4106, "eval_samples_per_second": 3.045, "eval_steps_per_second": 0.381, "step": 500 }, { "epoch": 0.16, "learning_rate": 4.220333697177608e-05, "loss": 0.0749, "step": 1000 }, { "epoch": 0.16, "eval_gen_len": 40.72941413327965, "eval_loss": 0.08562018722295761, "eval_runtime": 1556.1203, "eval_samples_per_second": 3.192, "eval_steps_per_second": 0.399, "step": 1000 }, { "epoch": 0.23, "learning_rate": 3.830500545766412e-05, "loss": 0.0695, "step": 1500 }, { "epoch": 0.23, "eval_gen_len": 41.34185625125831, "eval_loss": 0.08309376239776611, "eval_runtime": 1494.5878, "eval_samples_per_second": 3.323, "eval_steps_per_second": 0.415, "step": 1500 }, { "epoch": 0.31, "learning_rate": 3.440667394355216e-05, "loss": 0.066, "step": 2000 }, { "epoch": 0.31, "eval_gen_len": 40.56633782967586, "eval_loss": 0.07833520323038101, "eval_runtime": 1580.4677, "eval_samples_per_second": 3.143, "eval_steps_per_second": 0.393, "step": 2000 }, { "epoch": 0.39, "learning_rate": 3.05083424294402e-05, "loss": 0.065, "step": 2500 }, { "epoch": 0.39, "eval_gen_len": 42.21783772901148, "eval_loss": 0.07629594951868057, "eval_runtime": 1515.9865, "eval_samples_per_second": 3.276, "eval_steps_per_second": 0.41, "step": 2500 }, { "epoch": 0.47, "learning_rate": 2.6610010915328243e-05, "loss": 0.0628, "step": 3000 }, { "epoch": 0.47, "eval_gen_len": 40.085363398429635, "eval_loss": 0.07593819499015808, "eval_runtime": 1486.4085, "eval_samples_per_second": 3.342, "eval_steps_per_second": 0.418, "step": 3000 }, { "epoch": 0.55, "learning_rate": 2.271167940121628e-05, "loss": 0.0605, "step": 3500 }, { "epoch": 0.55, "eval_gen_len": 40.57298167908194, "eval_loss": 0.07500995695590973, "eval_runtime": 1478.1922, "eval_samples_per_second": 3.36, "eval_steps_per_second": 0.42, "step": 3500 }, { "epoch": 0.62, "learning_rate": 1.881334788710432e-05, "loss": 0.0604, "step": 4000 }, { "epoch": 0.62, "eval_gen_len": 41.43708475941212, "eval_loss": 0.07322078198194504, "eval_runtime": 1508.2222, "eval_samples_per_second": 3.293, "eval_steps_per_second": 0.412, "step": 4000 }, { "epoch": 0.7, "learning_rate": 1.491501637299236e-05, "loss": 0.0597, "step": 4500 }, { "epoch": 0.7, "eval_gen_len": 42.330380511375076, "eval_loss": 0.0725751593708992, "eval_runtime": 1504.6447, "eval_samples_per_second": 3.301, "eval_steps_per_second": 0.413, "step": 4500 }, { "epoch": 0.78, "learning_rate": 1.1016684858880399e-05, "loss": 0.0592, "step": 5000 }, { "epoch": 0.78, "eval_gen_len": 42.24562109925508, "eval_loss": 0.07068128883838654, "eval_runtime": 1517.39, "eval_samples_per_second": 3.273, "eval_steps_per_second": 0.409, "step": 5000 }, { "epoch": 0.86, "learning_rate": 7.11835334476844e-06, "loss": 0.0586, "step": 5500 }, { "epoch": 0.86, "eval_gen_len": 43.367827662572985, "eval_loss": 0.07018809765577316, "eval_runtime": 1537.9058, "eval_samples_per_second": 3.23, "eval_steps_per_second": 0.404, "step": 5500 }, { "epoch": 0.94, "learning_rate": 3.220021830656479e-06, "loss": 0.0577, "step": 6000 }, { "epoch": 0.94, "eval_gen_len": 43.2325347292128, "eval_loss": 0.06985782831907272, "eval_runtime": 1536.9981, "eval_samples_per_second": 3.232, "eval_steps_per_second": 0.404, "step": 6000 }, { "epoch": 1.0, "step": 6413, "total_flos": 4.5543286951870464e+17, "train_loss": 0.108196587787856, "train_runtime": 33928.5621, "train_samples_per_second": 3.024, "train_steps_per_second": 0.189 } ], "max_steps": 6413, "num_train_epochs": 1, "total_flos": 4.5543286951870464e+17, "trial_name": null, "trial_params": null }