{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 14214, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 4.8241170676797525e-05, "loss": 5.1993, "step": 500 }, { "epoch": 0.21, "learning_rate": 4.648234135359505e-05, "loss": 2.7321, "step": 1000 }, { "epoch": 0.32, "learning_rate": 4.472351203039257e-05, "loss": 2.4547, "step": 1500 }, { "epoch": 0.42, "learning_rate": 4.2964682707190094e-05, "loss": 2.3454, "step": 2000 }, { "epoch": 0.53, "learning_rate": 4.120585338398762e-05, "loss": 2.2375, "step": 2500 }, { "epoch": 0.63, "learning_rate": 3.944702406078514e-05, "loss": 2.1503, "step": 3000 }, { "epoch": 0.74, "learning_rate": 3.768819473758266e-05, "loss": 2.0775, "step": 3500 }, { "epoch": 0.84, "learning_rate": 3.592936541438019e-05, "loss": 2.0802, "step": 4000 }, { "epoch": 0.95, "learning_rate": 3.4170536091177716e-05, "loss": 2.0028, "step": 4500 }, { "epoch": 1.06, "learning_rate": 3.241170676797524e-05, "loss": 1.9862, "step": 5000 }, { "epoch": 1.16, "learning_rate": 3.065287744477276e-05, "loss": 1.95, "step": 5500 }, { "epoch": 1.27, "learning_rate": 2.8894048121570285e-05, "loss": 1.9279, "step": 6000 }, { "epoch": 1.37, "learning_rate": 2.7135218798367808e-05, "loss": 1.8896, "step": 6500 }, { "epoch": 1.48, "learning_rate": 2.537638947516533e-05, "loss": 1.8719, "step": 7000 }, { "epoch": 1.58, "learning_rate": 2.3617560151962857e-05, "loss": 1.8719, "step": 7500 }, { "epoch": 1.69, "learning_rate": 2.185873082876038e-05, "loss": 1.8422, "step": 8000 }, { "epoch": 1.79, "learning_rate": 2.0099901505557903e-05, "loss": 1.8601, "step": 8500 }, { "epoch": 1.9, "learning_rate": 1.8341072182355426e-05, "loss": 1.8463, "step": 9000 }, { "epoch": 2.01, "learning_rate": 1.658224285915295e-05, "loss": 1.8008, "step": 9500 }, { "epoch": 2.11, "learning_rate": 1.4823413535950473e-05, "loss": 1.812, "step": 10000 }, { "epoch": 2.22, "learning_rate": 1.3064584212747996e-05, "loss": 1.7943, "step": 10500 }, { "epoch": 2.32, "learning_rate": 1.130575488954552e-05, "loss": 1.7935, "step": 11000 }, { "epoch": 2.43, "learning_rate": 9.546925566343042e-06, "loss": 1.782, "step": 11500 }, { "epoch": 2.53, "learning_rate": 7.788096243140567e-06, "loss": 1.7745, "step": 12000 }, { "epoch": 2.64, "learning_rate": 6.02926691993809e-06, "loss": 1.7724, "step": 12500 }, { "epoch": 2.74, "learning_rate": 4.270437596735613e-06, "loss": 1.7846, "step": 13000 }, { "epoch": 2.85, "learning_rate": 2.5116082735331366e-06, "loss": 1.7903, "step": 13500 }, { "epoch": 2.95, "learning_rate": 7.5277895033066e-07, "loss": 1.763, "step": 14000 }, { "epoch": 3.0, "step": 14214, "total_flos": 6.011566735491072e+16, "train_loss": 2.0746695444824423, "train_runtime": 9365.9127, "train_samples_per_second": 6.07, "train_steps_per_second": 1.518 } ], "logging_steps": 500, "max_steps": 14214, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 6.011566735491072e+16, "trial_name": null, "trial_params": null }