{ "best_metric": null, "best_model_checkpoint": null, "epoch": 59.653179190751445, "eval_steps": 774, "global_step": 7740, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.965317919075145, "grad_norm": 2.1615042686462402, "learning_rate": 9.002583979328166e-06, "loss": 2.686, "step": 774 }, { "epoch": 5.965317919075145, "eval_accuracy": 0.32318619989852865, "eval_loss": 2.3315887451171875, "eval_runtime": 25.0742, "eval_samples_per_second": 314.426, "eval_steps_per_second": 15.753, "step": 774 }, { "epoch": 11.93063583815029, "grad_norm": 2.2875025272369385, "learning_rate": 8.002583979328166e-06, "loss": 2.5095, "step": 1548 }, { "epoch": 11.93063583815029, "eval_accuracy": 0.3167808219178082, "eval_loss": 2.3506176471710205, "eval_runtime": 25.0863, "eval_samples_per_second": 314.275, "eval_steps_per_second": 15.746, "step": 1548 }, { "epoch": 17.895953757225435, "grad_norm": 3.3261446952819824, "learning_rate": 7.003875968992248e-06, "loss": 2.4304, "step": 2322 }, { "epoch": 17.895953757225435, "eval_accuracy": 0.31033316421444274, "eval_loss": 2.4180381298065186, "eval_runtime": 25.0532, "eval_samples_per_second": 314.69, "eval_steps_per_second": 15.766, "step": 2322 }, { "epoch": 23.86127167630058, "grad_norm": 2.2134969234466553, "learning_rate": 6.003875968992249e-06, "loss": 2.3871, "step": 3096 }, { "epoch": 23.86127167630058, "eval_accuracy": 0.3051750380517504, "eval_loss": 2.4722793102264404, "eval_runtime": 25.1716, "eval_samples_per_second": 313.21, "eval_steps_per_second": 15.692, "step": 3096 }, { "epoch": 29.826589595375722, "grad_norm": 2.5758585929870605, "learning_rate": 5.003875968992249e-06, "loss": 2.3556, "step": 3870 }, { "epoch": 29.826589595375722, "eval_accuracy": 0.3, "eval_loss": 2.5127205848693848, "eval_runtime": 25.002, "eval_samples_per_second": 315.335, "eval_steps_per_second": 15.799, "step": 3870 }, { "epoch": 35.79190751445087, "grad_norm": 2.606534242630005, "learning_rate": 4.005167958656331e-06, "loss": 2.3325, "step": 4644 }, { "epoch": 35.79190751445087, "eval_accuracy": 0.2964865550481989, "eval_loss": 2.5233418941497803, "eval_runtime": 25.6388, "eval_samples_per_second": 307.503, "eval_steps_per_second": 15.406, "step": 4644 }, { "epoch": 41.75722543352601, "grad_norm": 2.7333884239196777, "learning_rate": 3.0051679586563307e-06, "loss": 2.3155, "step": 5418 }, { "epoch": 41.75722543352601, "eval_accuracy": 0.2930165978111184, "eval_loss": 2.5572330951690674, "eval_runtime": 25.7526, "eval_samples_per_second": 306.144, "eval_steps_per_second": 15.338, "step": 5418 }, { "epoch": 47.72254335260116, "grad_norm": 3.1406445503234863, "learning_rate": 2.0064599483204137e-06, "loss": 2.3137, "step": 6192 }, { "epoch": 47.72254335260116, "eval_accuracy": 0.29025558092338916, "eval_loss": 2.5638859272003174, "eval_runtime": 25.0057, "eval_samples_per_second": 315.288, "eval_steps_per_second": 15.796, "step": 6192 }, { "epoch": 53.6878612716763, "grad_norm": 2.6950843334198, "learning_rate": 1.0064599483204135e-06, "loss": 2.2978, "step": 6966 }, { "epoch": 53.6878612716763, "eval_accuracy": 0.2877839788037657, "eval_loss": 2.574859380722046, "eval_runtime": 25.1615, "eval_samples_per_second": 313.336, "eval_steps_per_second": 15.699, "step": 6966 }, { "epoch": 59.653179190751445, "grad_norm": 2.301842212677002, "learning_rate": 6.4599483204134375e-09, "loss": 2.2964, "step": 7740 }, { "epoch": 59.653179190751445, "eval_accuracy": 0.28579401319127345, "eval_loss": 2.5783193111419678, "eval_runtime": 24.745, "eval_samples_per_second": 318.609, "eval_steps_per_second": 15.963, "step": 7740 }, { "epoch": 59.653179190751445, "step": 7740, "total_flos": 9.747443743335875e+17, "train_loss": 2.392465943329094, "train_runtime": 26168.6752, "train_samples_per_second": 142.723, "train_steps_per_second": 0.296 } ], "logging_steps": 774, "max_steps": 7740, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.747443743335875e+17, "train_batch_size": 60, "trial_name": null, "trial_params": null }