|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 3858, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.38880248833592534, |
|
"grad_norm": 0.6042742133140564, |
|
"learning_rate": 1.740798341109383e-05, |
|
"loss": 0.2151, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38880248833592534, |
|
"eval_accuracy": 0.9762738234150136, |
|
"eval_loss": 0.10295161604881287, |
|
"eval_runtime": 26.8587, |
|
"eval_samples_per_second": 95.723, |
|
"eval_steps_per_second": 11.989, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7776049766718507, |
|
"grad_norm": 0.04766521230340004, |
|
"learning_rate": 1.4815966822187664e-05, |
|
"loss": 0.1167, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7776049766718507, |
|
"eval_accuracy": 0.9875534811357448, |
|
"eval_loss": 0.05390123650431633, |
|
"eval_runtime": 26.735, |
|
"eval_samples_per_second": 96.166, |
|
"eval_steps_per_second": 12.044, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.166407465007776, |
|
"grad_norm": 0.008871573023498058, |
|
"learning_rate": 1.2223950233281495e-05, |
|
"loss": 0.0687, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.166407465007776, |
|
"eval_accuracy": 0.9793854531310774, |
|
"eval_loss": 0.12513048946857452, |
|
"eval_runtime": 27.1074, |
|
"eval_samples_per_second": 94.845, |
|
"eval_steps_per_second": 11.879, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5552099533437014, |
|
"grad_norm": 0.0014064661227166653, |
|
"learning_rate": 9.631933644375326e-06, |
|
"loss": 0.0279, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5552099533437014, |
|
"eval_accuracy": 0.9778296382730455, |
|
"eval_loss": 0.148821160197258, |
|
"eval_runtime": 26.879, |
|
"eval_samples_per_second": 95.651, |
|
"eval_steps_per_second": 11.98, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.9440124416796267, |
|
"grad_norm": 0.002627410925924778, |
|
"learning_rate": 7.039917055469155e-06, |
|
"loss": 0.0293, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9440124416796267, |
|
"eval_accuracy": 0.9914430182808246, |
|
"eval_loss": 0.04895803704857826, |
|
"eval_runtime": 26.8775, |
|
"eval_samples_per_second": 95.656, |
|
"eval_steps_per_second": 11.98, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.332814930015552, |
|
"grad_norm": 0.0008352847071364522, |
|
"learning_rate": 4.447900466562986e-06, |
|
"loss": 0.0119, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.332814930015552, |
|
"eval_accuracy": 0.9766627771295215, |
|
"eval_loss": 0.15932457149028778, |
|
"eval_runtime": 27.4463, |
|
"eval_samples_per_second": 93.674, |
|
"eval_steps_per_second": 11.732, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.721617418351477, |
|
"grad_norm": 0.006800688803195953, |
|
"learning_rate": 1.8558838776568172e-06, |
|
"loss": 0.0045, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.721617418351477, |
|
"eval_accuracy": 0.9797744068455854, |
|
"eval_loss": 0.14682677388191223, |
|
"eval_runtime": 27.6918, |
|
"eval_samples_per_second": 92.843, |
|
"eval_steps_per_second": 11.628, |
|
"step": 3500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3858, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 5863351757603040.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|