|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 59.653179190751445, |
|
"eval_steps": 774, |
|
"global_step": 7740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.965317919075145, |
|
"grad_norm": 2.1615042686462402, |
|
"learning_rate": 9.002583979328166e-06, |
|
"loss": 2.686, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 5.965317919075145, |
|
"eval_accuracy": 0.32318619989852865, |
|
"eval_loss": 2.3315887451171875, |
|
"eval_runtime": 25.0742, |
|
"eval_samples_per_second": 314.426, |
|
"eval_steps_per_second": 15.753, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 11.93063583815029, |
|
"grad_norm": 2.2875025272369385, |
|
"learning_rate": 8.002583979328166e-06, |
|
"loss": 2.5095, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 11.93063583815029, |
|
"eval_accuracy": 0.3167808219178082, |
|
"eval_loss": 2.3506176471710205, |
|
"eval_runtime": 25.0863, |
|
"eval_samples_per_second": 314.275, |
|
"eval_steps_per_second": 15.746, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 17.895953757225435, |
|
"grad_norm": 3.3261446952819824, |
|
"learning_rate": 7.003875968992248e-06, |
|
"loss": 2.4304, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 17.895953757225435, |
|
"eval_accuracy": 0.31033316421444274, |
|
"eval_loss": 2.4180381298065186, |
|
"eval_runtime": 25.0532, |
|
"eval_samples_per_second": 314.69, |
|
"eval_steps_per_second": 15.766, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 23.86127167630058, |
|
"grad_norm": 2.2134969234466553, |
|
"learning_rate": 6.003875968992249e-06, |
|
"loss": 2.3871, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 23.86127167630058, |
|
"eval_accuracy": 0.3051750380517504, |
|
"eval_loss": 2.4722793102264404, |
|
"eval_runtime": 25.1716, |
|
"eval_samples_per_second": 313.21, |
|
"eval_steps_per_second": 15.692, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 29.826589595375722, |
|
"grad_norm": 2.5758585929870605, |
|
"learning_rate": 5.003875968992249e-06, |
|
"loss": 2.3556, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 29.826589595375722, |
|
"eval_accuracy": 0.3, |
|
"eval_loss": 2.5127205848693848, |
|
"eval_runtime": 25.002, |
|
"eval_samples_per_second": 315.335, |
|
"eval_steps_per_second": 15.799, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 35.79190751445087, |
|
"grad_norm": 2.606534242630005, |
|
"learning_rate": 4.005167958656331e-06, |
|
"loss": 2.3325, |
|
"step": 4644 |
|
}, |
|
{ |
|
"epoch": 35.79190751445087, |
|
"eval_accuracy": 0.2964865550481989, |
|
"eval_loss": 2.5233418941497803, |
|
"eval_runtime": 25.6388, |
|
"eval_samples_per_second": 307.503, |
|
"eval_steps_per_second": 15.406, |
|
"step": 4644 |
|
}, |
|
{ |
|
"epoch": 41.75722543352601, |
|
"grad_norm": 2.7333884239196777, |
|
"learning_rate": 3.0051679586563307e-06, |
|
"loss": 2.3155, |
|
"step": 5418 |
|
}, |
|
{ |
|
"epoch": 41.75722543352601, |
|
"eval_accuracy": 0.2930165978111184, |
|
"eval_loss": 2.5572330951690674, |
|
"eval_runtime": 25.7526, |
|
"eval_samples_per_second": 306.144, |
|
"eval_steps_per_second": 15.338, |
|
"step": 5418 |
|
}, |
|
{ |
|
"epoch": 47.72254335260116, |
|
"grad_norm": 3.1406445503234863, |
|
"learning_rate": 2.0064599483204137e-06, |
|
"loss": 2.3137, |
|
"step": 6192 |
|
}, |
|
{ |
|
"epoch": 47.72254335260116, |
|
"eval_accuracy": 0.29025558092338916, |
|
"eval_loss": 2.5638859272003174, |
|
"eval_runtime": 25.0057, |
|
"eval_samples_per_second": 315.288, |
|
"eval_steps_per_second": 15.796, |
|
"step": 6192 |
|
}, |
|
{ |
|
"epoch": 53.6878612716763, |
|
"grad_norm": 2.6950843334198, |
|
"learning_rate": 1.0064599483204135e-06, |
|
"loss": 2.2978, |
|
"step": 6966 |
|
}, |
|
{ |
|
"epoch": 53.6878612716763, |
|
"eval_accuracy": 0.2877839788037657, |
|
"eval_loss": 2.574859380722046, |
|
"eval_runtime": 25.1615, |
|
"eval_samples_per_second": 313.336, |
|
"eval_steps_per_second": 15.699, |
|
"step": 6966 |
|
}, |
|
{ |
|
"epoch": 59.653179190751445, |
|
"grad_norm": 2.301842212677002, |
|
"learning_rate": 6.4599483204134375e-09, |
|
"loss": 2.2964, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 59.653179190751445, |
|
"eval_accuracy": 0.28579401319127345, |
|
"eval_loss": 2.5783193111419678, |
|
"eval_runtime": 24.745, |
|
"eval_samples_per_second": 318.609, |
|
"eval_steps_per_second": 15.963, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 59.653179190751445, |
|
"step": 7740, |
|
"total_flos": 9.747443743335875e+17, |
|
"train_loss": 2.392465943329094, |
|
"train_runtime": 26168.6752, |
|
"train_samples_per_second": 142.723, |
|
"train_steps_per_second": 0.296 |
|
} |
|
], |
|
"logging_steps": 774, |
|
"max_steps": 7740, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.747443743335875e+17, |
|
"train_batch_size": 60, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|