{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9595959595959593, "eval_steps": 500, "global_step": 196, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.51, "grad_norm": 2.071033477783203, "learning_rate": 0.00019545454545454548, "loss": 1.5121, "step": 25 }, { "epoch": 1.01, "grad_norm": 0.7647247314453125, "learning_rate": 0.00016704545454545455, "loss": 0.4934, "step": 50 }, { "epoch": 1.52, "grad_norm": 0.6377527713775635, "learning_rate": 0.00013863636363636365, "loss": 0.2928, "step": 75 }, { "epoch": 2.02, "grad_norm": 1.0899125337600708, "learning_rate": 0.00011022727272727273, "loss": 0.1976, "step": 100 }, { "epoch": 2.53, "grad_norm": 0.8106021285057068, "learning_rate": 8.181818181818183e-05, "loss": 0.1196, "step": 125 }, { "epoch": 3.03, "grad_norm": 0.38758715987205505, "learning_rate": 5.340909090909091e-05, "loss": 0.1052, "step": 150 }, { "epoch": 3.54, "grad_norm": 0.48243477940559387, "learning_rate": 2.5e-05, "loss": 0.0809, "step": 175 } ], "logging_steps": 25, "max_steps": 196, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 6.856828180758528e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }