{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 26580, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 4.90594431903687e-05, "loss": 3.0406, "step": 500 }, { "epoch": 0.19, "learning_rate": 4.81188863807374e-05, "loss": 2.9246, "step": 1000 }, { "epoch": 0.28, "learning_rate": 4.71783295711061e-05, "loss": 2.8939, "step": 1500 }, { "epoch": 0.38, "learning_rate": 4.6237772761474796e-05, "loss": 2.8394, "step": 2000 }, { "epoch": 0.47, "learning_rate": 4.5297215951843495e-05, "loss": 2.8208, "step": 2500 }, { "epoch": 0.56, "learning_rate": 4.435665914221219e-05, "loss": 2.7974, "step": 3000 }, { "epoch": 0.66, "learning_rate": 4.3416102332580885e-05, "loss": 2.775, "step": 3500 }, { "epoch": 0.75, "learning_rate": 4.247554552294959e-05, "loss": 2.7597, "step": 4000 }, { "epoch": 0.85, "learning_rate": 4.153498871331828e-05, "loss": 2.7433, "step": 4500 }, { "epoch": 0.94, "learning_rate": 4.059443190368699e-05, "loss": 2.7478, "step": 5000 }, { "epoch": 1.03, "learning_rate": 3.965387509405568e-05, "loss": 2.6777, "step": 5500 }, { "epoch": 1.13, "learning_rate": 3.8713318284424384e-05, "loss": 2.5825, "step": 6000 }, { "epoch": 1.22, "learning_rate": 3.7772761474793075e-05, "loss": 2.5918, "step": 6500 }, { "epoch": 1.32, "learning_rate": 3.683220466516178e-05, "loss": 2.5835, "step": 7000 }, { "epoch": 1.41, "learning_rate": 3.589164785553047e-05, "loss": 2.5557, "step": 7500 }, { "epoch": 1.5, "learning_rate": 3.495109104589918e-05, "loss": 2.5774, "step": 8000 }, { "epoch": 1.6, "learning_rate": 3.401053423626787e-05, "loss": 2.5853, "step": 8500 }, { "epoch": 1.69, "learning_rate": 3.3069977426636574e-05, "loss": 2.552, "step": 9000 }, { "epoch": 1.79, "learning_rate": 3.2129420617005266e-05, "loss": 2.5412, "step": 9500 }, { "epoch": 1.88, "learning_rate": 3.118886380737397e-05, "loss": 2.5345, "step": 10000 }, { "epoch": 1.98, "learning_rate": 3.0248306997742666e-05, "loss": 2.5333, "step": 10500 }, { "epoch": 2.07, "learning_rate": 2.9307750188111365e-05, "loss": 2.463, "step": 11000 }, { "epoch": 2.16, "learning_rate": 2.8367193378480063e-05, "loss": 2.4103, "step": 11500 }, { "epoch": 2.26, "learning_rate": 2.742663656884876e-05, "loss": 2.4347, "step": 12000 }, { "epoch": 2.35, "learning_rate": 2.6486079759217457e-05, "loss": 2.4205, "step": 12500 }, { "epoch": 2.45, "learning_rate": 2.5545522949586155e-05, "loss": 2.4301, "step": 13000 }, { "epoch": 2.54, "learning_rate": 2.4604966139954853e-05, "loss": 2.4104, "step": 13500 }, { "epoch": 2.63, "learning_rate": 2.3664409330323552e-05, "loss": 2.42, "step": 14000 }, { "epoch": 2.73, "learning_rate": 2.272385252069225e-05, "loss": 2.4195, "step": 14500 }, { "epoch": 2.82, "learning_rate": 2.178329571106095e-05, "loss": 2.4112, "step": 15000 }, { "epoch": 2.92, "learning_rate": 2.0842738901429647e-05, "loss": 2.3915, "step": 15500 }, { "epoch": 3.01, "learning_rate": 1.9902182091798346e-05, "loss": 2.3849, "step": 16000 }, { "epoch": 3.1, "learning_rate": 1.8961625282167044e-05, "loss": 2.3143, "step": 16500 }, { "epoch": 3.2, "learning_rate": 1.8021068472535743e-05, "loss": 2.304, "step": 17000 }, { "epoch": 3.29, "learning_rate": 1.708051166290444e-05, "loss": 2.3193, "step": 17500 }, { "epoch": 3.39, "learning_rate": 1.613995485327314e-05, "loss": 2.318, "step": 18000 }, { "epoch": 3.48, "learning_rate": 1.5199398043641836e-05, "loss": 2.308, "step": 18500 }, { "epoch": 3.57, "learning_rate": 1.4258841234010533e-05, "loss": 2.3242, "step": 19000 }, { "epoch": 3.67, "learning_rate": 1.3318284424379231e-05, "loss": 2.3143, "step": 19500 }, { "epoch": 3.76, "learning_rate": 1.2377727614747931e-05, "loss": 2.3192, "step": 20000 }, { "epoch": 3.86, "learning_rate": 1.143717080511663e-05, "loss": 2.3153, "step": 20500 }, { "epoch": 3.95, "learning_rate": 1.0496613995485328e-05, "loss": 2.2939, "step": 21000 }, { "epoch": 4.04, "learning_rate": 9.556057185854025e-06, "loss": 2.2786, "step": 21500 }, { "epoch": 4.14, "learning_rate": 8.615500376222724e-06, "loss": 2.2562, "step": 22000 }, { "epoch": 4.23, "learning_rate": 7.674943566591422e-06, "loss": 2.2488, "step": 22500 }, { "epoch": 4.33, "learning_rate": 6.73438675696012e-06, "loss": 2.2515, "step": 23000 }, { "epoch": 4.42, "learning_rate": 5.793829947328819e-06, "loss": 2.2375, "step": 23500 }, { "epoch": 4.51, "learning_rate": 4.853273137697517e-06, "loss": 2.2446, "step": 24000 }, { "epoch": 4.61, "learning_rate": 3.912716328066216e-06, "loss": 2.2226, "step": 24500 }, { "epoch": 4.7, "learning_rate": 2.9721595184349133e-06, "loss": 2.2476, "step": 25000 }, { "epoch": 4.8, "learning_rate": 2.0316027088036117e-06, "loss": 2.2568, "step": 25500 }, { "epoch": 4.89, "learning_rate": 1.0910458991723102e-06, "loss": 2.2529, "step": 26000 }, { "epoch": 4.98, "learning_rate": 1.5048908954100828e-07, "loss": 2.245, "step": 26500 }, { "epoch": 5.0, "step": 26580, "total_flos": 20321956433756160, "train_runtime": 15554.6344, "train_samples_per_second": 1.709 } ], "max_steps": 26580, "num_train_epochs": 5, "total_flos": 20321956433756160, "trial_name": null, "trial_params": null }