{ "best_metric": null, "best_model_checkpoint": null, "epoch": 59.820761762509335, "eval_steps": 1602, "global_step": 16020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.982076176250933, "grad_norm": 1.8657052516937256, "learning_rate": 9.001248439450687e-06, "loss": 1.7286, "step": 1602 }, { "epoch": 5.982076176250933, "eval_accuracy": 0.06538245219347581, "eval_loss": 3.015066623687744, "eval_runtime": 37.6228, "eval_samples_per_second": 189.034, "eval_steps_per_second": 9.462, "step": 1602 }, { "epoch": 11.964152352501866, "grad_norm": 4.766932964324951, "learning_rate": 8.001248439450687e-06, "loss": 1.6207, "step": 3204 }, { "epoch": 11.964152352501866, "eval_accuracy": 0.06650731158605175, "eval_loss": 3.237614393234253, "eval_runtime": 37.2902, "eval_samples_per_second": 190.72, "eval_steps_per_second": 9.547, "step": 3204 }, { "epoch": 17.9462285287528, "grad_norm": 3.993530035018921, "learning_rate": 7.00187265917603e-06, "loss": 1.5399, "step": 4806 }, { "epoch": 17.9462285287528, "eval_accuracy": 0.06847581552305962, "eval_loss": 3.2385711669921875, "eval_runtime": 38.1712, "eval_samples_per_second": 186.319, "eval_steps_per_second": 9.326, "step": 4806 }, { "epoch": 23.928304705003733, "grad_norm": 1.5667791366577148, "learning_rate": 6.002496878901374e-06, "loss": 1.4981, "step": 6408 }, { "epoch": 23.928304705003733, "eval_accuracy": 0.0673158042744657, "eval_loss": 3.354517936706543, "eval_runtime": 37.7648, "eval_samples_per_second": 188.324, "eval_steps_per_second": 9.427, "step": 6408 }, { "epoch": 29.910380881254667, "grad_norm": 2.5913686752319336, "learning_rate": 5.003121098626717e-06, "loss": 1.4774, "step": 8010 }, { "epoch": 29.910380881254667, "eval_accuracy": 0.06766029246344207, "eval_loss": 3.340369939804077, "eval_runtime": 37.3411, "eval_samples_per_second": 190.461, "eval_steps_per_second": 9.534, "step": 8010 }, { "epoch": 35.8924570575056, "grad_norm": 1.792555332183838, "learning_rate": 4.003121098626717e-06, "loss": 1.4648, "step": 9612 }, { "epoch": 35.8924570575056, "eval_accuracy": 0.06699943757030372, "eval_loss": 3.4236271381378174, "eval_runtime": 37.2235, "eval_samples_per_second": 191.062, "eval_steps_per_second": 9.564, "step": 9612 }, { "epoch": 41.87453323375654, "grad_norm": 1.6825398206710815, "learning_rate": 3.003121098626717e-06, "loss": 1.4549, "step": 11214 }, { "epoch": 41.87453323375654, "eval_accuracy": 0.06644705126144947, "eval_loss": 3.4688560962677, "eval_runtime": 37.3243, "eval_samples_per_second": 190.546, "eval_steps_per_second": 9.538, "step": 11214 }, { "epoch": 47.856609410007465, "grad_norm": 1.0186405181884766, "learning_rate": 2.00374531835206e-06, "loss": 1.4528, "step": 12816 }, { "epoch": 47.856609410007465, "eval_accuracy": 0.06592730596175478, "eval_loss": 3.520540237426758, "eval_runtime": 37.6061, "eval_samples_per_second": 189.118, "eval_steps_per_second": 9.467, "step": 12816 }, { "epoch": 53.8386855862584, "grad_norm": 1.1254180669784546, "learning_rate": 1.0043695380774035e-06, "loss": 1.4538, "step": 14418 }, { "epoch": 53.8386855862584, "eval_accuracy": 0.06553868266466692, "eval_loss": 3.4702768325805664, "eval_runtime": 37.4351, "eval_samples_per_second": 189.982, "eval_steps_per_second": 9.51, "step": 14418 }, { "epoch": 59.820761762509335, "grad_norm": 0.6253169775009155, "learning_rate": 4.993757802746567e-09, "loss": 1.4519, "step": 16020 }, { "epoch": 59.820761762509335, "eval_accuracy": 0.06511529808773903, "eval_loss": 3.4956905841827393, "eval_runtime": 38.202, "eval_samples_per_second": 186.168, "eval_steps_per_second": 9.319, "step": 16020 }, { "epoch": 59.820761762509335, "step": 16020, "total_flos": 1.9001138663630822e+18, "train_loss": 1.5142938697233925, "train_runtime": 42053.4844, "train_samples_per_second": 76.397, "train_steps_per_second": 0.381 } ], "logging_steps": 1602, "max_steps": 16020, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9001138663630822e+18, "train_batch_size": 20, "trial_name": null, "trial_params": null }