{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 69, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3, "grad_norm": 0.621504545211792, "learning_rate": 3e-05, "loss": 2.5074, "step": 7 }, { "epoch": 0.61, "grad_norm": 0.4357547461986542, "learning_rate": 2.6612903225806453e-05, "loss": 2.4362, "step": 14 }, { "epoch": 0.91, "grad_norm": 0.5074889063835144, "learning_rate": 2.3225806451612902e-05, "loss": 2.5027, "step": 21 }, { "epoch": 1.22, "grad_norm": 0.6310410499572754, "learning_rate": 1.9838709677419355e-05, "loss": 2.5137, "step": 28 }, { "epoch": 1.52, "grad_norm": 0.5066408514976501, "learning_rate": 1.6451612903225807e-05, "loss": 2.5501, "step": 35 }, { "epoch": 1.83, "grad_norm": 0.5650488138198853, "learning_rate": 1.3064516129032258e-05, "loss": 2.1844, "step": 42 }, { "epoch": 2.13, "grad_norm": 0.44675177335739136, "learning_rate": 9.67741935483871e-06, "loss": 2.3875, "step": 49 }, { "epoch": 2.43, "grad_norm": 0.44878822565078735, "learning_rate": 6.290322580645162e-06, "loss": 2.4376, "step": 56 }, { "epoch": 2.74, "grad_norm": 0.3949732780456543, "learning_rate": 2.9032258064516128e-06, "loss": 2.3816, "step": 63 } ], "logging_steps": 7, "max_steps": 69, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 148233963700224.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }