{ "best_metric": null, "best_model_checkpoint": null, "epoch": 67.53246753246754, "eval_steps": 500, "global_step": 78000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.16, "learning_rate": 9.892164502164502e-06, "loss": 0.8489, "step": 2500 }, { "epoch": 4.33, "learning_rate": 9.783982683982685e-06, "loss": 0.6312, "step": 5000 }, { "epoch": 6.49, "learning_rate": 9.675757575757577e-06, "loss": 0.5488, "step": 7500 }, { "epoch": 8.66, "learning_rate": 9.56757575757576e-06, "loss": 0.5403, "step": 10000 }, { "epoch": 10.82, "learning_rate": 9.45939393939394e-06, "loss": 0.5208, "step": 12500 }, { "epoch": 12.99, "learning_rate": 9.351212121212121e-06, "loss": 0.4982, "step": 15000 }, { "epoch": 15.15, "learning_rate": 9.243030303030304e-06, "loss": 0.4878, "step": 17500 }, { "epoch": 17.32, "learning_rate": 9.134891774891776e-06, "loss": 0.5034, "step": 20000 }, { "epoch": 19.48, "learning_rate": 9.026666666666666e-06, "loss": 0.4937, "step": 22500 }, { "epoch": 21.65, "learning_rate": 8.91852813852814e-06, "loss": 0.5369, "step": 25000 }, { "epoch": 23.81, "learning_rate": 8.810346320346321e-06, "loss": 0.4645, "step": 27500 }, { "epoch": 25.97, "learning_rate": 8.702121212121212e-06, "loss": 0.4509, "step": 30000 }, { "epoch": 28.14, "learning_rate": 8.593939393939395e-06, "loss": 0.4499, "step": 32500 }, { "epoch": 30.3, "learning_rate": 8.485757575757576e-06, "loss": 0.4875, "step": 35000 }, { "epoch": 32.47, "learning_rate": 8.377575757575759e-06, "loss": 0.4544, "step": 37500 }, { "epoch": 34.63, "learning_rate": 8.26939393939394e-06, "loss": 0.4406, "step": 40000 }, { "epoch": 36.8, "learning_rate": 8.161212121212123e-06, "loss": 0.4369, "step": 42500 }, { "epoch": 38.96, "learning_rate": 8.053030303030304e-06, "loss": 0.4193, "step": 45000 }, { "epoch": 41.13, "learning_rate": 7.944848484848485e-06, "loss": 0.4057, "step": 47500 }, { "epoch": 43.29, "learning_rate": 7.836666666666667e-06, "loss": 0.3984, "step": 50000 }, { "epoch": 45.45, "learning_rate": 7.72848484848485e-06, "loss": 0.3911, "step": 52500 }, { "epoch": 47.62, "learning_rate": 7.620303030303031e-06, "loss": 0.3861, "step": 55000 }, { "epoch": 49.78, "learning_rate": 7.512121212121213e-06, "loss": 0.3833, "step": 57500 }, { "epoch": 51.95, "learning_rate": 7.403939393939395e-06, "loss": 0.3836, "step": 60000 }, { "epoch": 54.11, "learning_rate": 7.295757575757576e-06, "loss": 0.3706, "step": 62500 }, { "epoch": 56.28, "learning_rate": 7.1875757575757576e-06, "loss": 0.3625, "step": 65000 }, { "epoch": 58.44, "learning_rate": 7.07939393939394e-06, "loss": 0.3563, "step": 67500 }, { "epoch": 60.61, "learning_rate": 6.971212121212122e-06, "loss": 0.3522, "step": 70000 }, { "epoch": 62.77, "learning_rate": 6.863030303030304e-06, "loss": 0.347, "step": 72500 }, { "epoch": 64.94, "learning_rate": 6.754891774891775e-06, "loss": 0.3413, "step": 75000 }, { "epoch": 67.1, "learning_rate": 6.646709956709957e-06, "loss": 0.3355, "step": 77500 } ], "logging_steps": 2500, "max_steps": 231000, "num_train_epochs": 200, "save_steps": 1000, "total_flos": 1.863115306612301e+20, "trial_name": null, "trial_params": null }