{ "best_metric": 1.8946211338043213, "best_model_checkpoint": "output/big-baby-tape/checkpoint-126", "epoch": 1.0, "global_step": 126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.0001366676095727718, "loss": 2.7486, "step": 5 }, { "epoch": 0.08, "learning_rate": 0.00013507870183531476, "loss": 2.6211, "step": 10 }, { "epoch": 0.12, "learning_rate": 0.0001324579391569924, "loss": 2.2625, "step": 15 }, { "epoch": 0.16, "learning_rate": 0.00012884599993319768, "loss": 2.3757, "step": 20 }, { "epoch": 0.2, "learning_rate": 0.00012429894719210777, "loss": 2.3396, "step": 25 }, { "epoch": 0.24, "learning_rate": 0.00011888735840752609, "loss": 2.2627, "step": 30 }, { "epoch": 0.28, "learning_rate": 0.00011269523002449659, "loss": 2.226, "step": 35 }, { "epoch": 0.32, "learning_rate": 0.0001058186737011911, "loss": 2.1916, "step": 40 }, { "epoch": 0.36, "learning_rate": 9.836442450346448e-05, "loss": 2.2987, "step": 45 }, { "epoch": 0.4, "learning_rate": 9.044818420726556e-05, "loss": 2.0771, "step": 50 }, { "epoch": 0.44, "learning_rate": 8.219282542347867e-05, "loss": 2.1651, "step": 55 }, { "epoch": 0.48, "learning_rate": 7.372648442002871e-05, "loss": 2.1705, "step": 60 }, { "epoch": 0.52, "learning_rate": 6.518057224367617e-05, "loss": 2.0684, "step": 65 }, { "epoch": 0.56, "learning_rate": 5.668773501204858e-05, "loss": 2.025, "step": 70 }, { "epoch": 0.6, "learning_rate": 4.837979503541197e-05, "loss": 2.1328, "step": 75 }, { "epoch": 0.63, "learning_rate": 4.0385704725240065e-05, "loss": 2.076, "step": 80 }, { "epoch": 0.67, "learning_rate": 3.282954504816644e-05, "loss": 2.0965, "step": 85 }, { "epoch": 0.71, "learning_rate": 2.5828599592490882e-05, "loss": 2.157, "step": 90 }, { "epoch": 0.75, "learning_rate": 1.9491534140783314e-05, "loss": 2.0402, "step": 95 }, { "epoch": 0.79, "learning_rate": 1.3916710004507539e-05, "loss": 2.0006, "step": 100 }, { "epoch": 0.83, "learning_rate": 9.190657300387505e-06, "loss": 1.993, "step": 105 }, { "epoch": 0.87, "learning_rate": 5.38673186569003e-06, "loss": 2.0394, "step": 110 }, { "epoch": 0.91, "learning_rate": 2.5639766592291746e-06, "loss": 1.9176, "step": 115 }, { "epoch": 0.95, "learning_rate": 7.662053209561833e-07, "loss": 2.0078, "step": 120 }, { "epoch": 0.99, "learning_rate": 2.132211474400556e-08, "loss": 2.0426, "step": 125 }, { "epoch": 1.0, "eval_loss": 1.8946211338043213, "eval_runtime": 10.3833, "eval_samples_per_second": 22.44, "eval_steps_per_second": 2.889, "step": 126 } ], "max_steps": 126, "num_train_epochs": 1, "total_flos": 130776662016000.0, "trial_name": null, "trial_params": null }