{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999220394480393, "global_step": 6413, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 4.610166848588804e-05, "loss": 1.501, "step": 500 }, { "epoch": 0.08, "eval_gen_len": 38.430038252466275, "eval_loss": 0.11725818365812302, "eval_runtime": 1525.1588, "eval_samples_per_second": 3.257, "eval_steps_per_second": 0.407, "step": 500 }, { "epoch": 0.16, "learning_rate": 4.220333697177608e-05, "loss": 0.0972, "step": 1000 }, { "epoch": 0.16, "eval_gen_len": 38.57358566539158, "eval_loss": 0.10414853692054749, "eval_runtime": 1374.4335, "eval_samples_per_second": 3.614, "eval_steps_per_second": 0.452, "step": 1000 }, { "epoch": 0.23, "learning_rate": 3.830500545766412e-05, "loss": 0.0871, "step": 1500 }, { "epoch": 0.23, "eval_gen_len": 41.80269780551641, "eval_loss": 0.09756787866353989, "eval_runtime": 1416.1217, "eval_samples_per_second": 3.507, "eval_steps_per_second": 0.439, "step": 1500 }, { "epoch": 0.31, "learning_rate": 3.440667394355216e-05, "loss": 0.0821, "step": 2000 }, { "epoch": 0.31, "eval_gen_len": 38.83108516206966, "eval_loss": 0.09272466599941254, "eval_runtime": 1386.2597, "eval_samples_per_second": 3.583, "eval_steps_per_second": 0.448, "step": 2000 }, { "epoch": 0.39, "learning_rate": 3.05083424294402e-05, "loss": 0.0796, "step": 2500 }, { "epoch": 0.39, "eval_gen_len": 42.82927320314073, "eval_loss": 0.09029995650053024, "eval_runtime": 1431.1493, "eval_samples_per_second": 3.471, "eval_steps_per_second": 0.434, "step": 2500 }, { "epoch": 0.47, "learning_rate": 2.6610010915328243e-05, "loss": 0.0766, "step": 3000 }, { "epoch": 0.47, "eval_gen_len": 41.111938796053956, "eval_loss": 0.08799975365400314, "eval_runtime": 1401.8123, "eval_samples_per_second": 3.543, "eval_steps_per_second": 0.443, "step": 3000 }, { "epoch": 0.55, "learning_rate": 2.271167940121628e-05, "loss": 0.0741, "step": 3500 }, { "epoch": 0.55, "eval_gen_len": 40.73605798268573, "eval_loss": 0.08718982338905334, "eval_runtime": 1373.6325, "eval_samples_per_second": 3.616, "eval_steps_per_second": 0.452, "step": 3500 }, { "epoch": 0.62, "learning_rate": 1.881334788710432e-05, "loss": 0.0729, "step": 4000 }, { "epoch": 0.62, "eval_gen_len": 39.60056372055567, "eval_loss": 0.08510363847017288, "eval_runtime": 1369.314, "eval_samples_per_second": 3.627, "eval_steps_per_second": 0.454, "step": 4000 }, { "epoch": 0.7, "learning_rate": 1.491501637299236e-05, "loss": 0.0723, "step": 4500 }, { "epoch": 0.7, "eval_gen_len": 40.53654117173344, "eval_loss": 0.08480597287416458, "eval_runtime": 1394.4001, "eval_samples_per_second": 3.562, "eval_steps_per_second": 0.445, "step": 4500 }, { "epoch": 0.78, "learning_rate": 1.1016684858880399e-05, "loss": 0.0729, "step": 5000 }, { "epoch": 0.78, "eval_gen_len": 41.251258304811756, "eval_loss": 0.08257019519805908, "eval_runtime": 1406.7799, "eval_samples_per_second": 3.531, "eval_steps_per_second": 0.441, "step": 5000 }, { "epoch": 0.86, "learning_rate": 7.11835334476844e-06, "loss": 0.071, "step": 5500 }, { "epoch": 0.86, "eval_gen_len": 41.85141936782766, "eval_loss": 0.08206828683614731, "eval_runtime": 1411.1175, "eval_samples_per_second": 3.52, "eval_steps_per_second": 0.44, "step": 5500 }, { "epoch": 0.94, "learning_rate": 3.220021830656479e-06, "loss": 0.0699, "step": 6000 }, { "epoch": 0.94, "eval_gen_len": 41.36098248439702, "eval_loss": 0.08196299523115158, "eval_runtime": 1408.2137, "eval_samples_per_second": 3.527, "eval_steps_per_second": 0.441, "step": 6000 }, { "epoch": 1.0, "step": 6413, "total_flos": 1.2273483536252928e+17, "train_loss": 0.18819899583558072, "train_runtime": 25349.1542, "train_samples_per_second": 4.048, "train_steps_per_second": 0.253 } ], "max_steps": 6413, "num_train_epochs": 1, "total_flos": 1.2273483536252928e+17, "trial_name": null, "trial_params": null }