|
{ |
|
"best_metric": 0.8666666746139526, |
|
"best_model_checkpoint": "/content/best_model/checkpoint-3000", |
|
"epoch": 20.0, |
|
"global_step": 7020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.8575498575498575e-05, |
|
"loss": 0.5673, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.8437275886535645, |
|
"eval_loss": 0.43583494424819946, |
|
"eval_runtime": 13.3841, |
|
"eval_samples_per_second": 104.228, |
|
"eval_steps_per_second": 3.287, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.7150997150997152e-05, |
|
"loss": 0.2898, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.8523297309875488, |
|
"eval_loss": 0.4844760596752167, |
|
"eval_runtime": 13.3926, |
|
"eval_samples_per_second": 104.162, |
|
"eval_steps_per_second": 3.285, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.5726495726495726e-05, |
|
"loss": 0.1669, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_accuracy": 0.8573476672172546, |
|
"eval_loss": 0.6232547760009766, |
|
"eval_runtime": 13.3901, |
|
"eval_samples_per_second": 104.181, |
|
"eval_steps_per_second": 3.286, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 1.4301994301994305e-05, |
|
"loss": 0.1087, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_accuracy": 0.8573476672172546, |
|
"eval_loss": 0.7262628078460693, |
|
"eval_runtime": 13.3968, |
|
"eval_samples_per_second": 104.129, |
|
"eval_steps_per_second": 3.284, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 1.2877492877492879e-05, |
|
"loss": 0.0728, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_accuracy": 0.8637992739677429, |
|
"eval_loss": 0.8840554356575012, |
|
"eval_runtime": 13.3781, |
|
"eval_samples_per_second": 104.275, |
|
"eval_steps_per_second": 3.289, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 1.1452991452991454e-05, |
|
"loss": 0.0512, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_accuracy": 0.8666666746139526, |
|
"eval_loss": 0.9500740170478821, |
|
"eval_runtime": 13.4339, |
|
"eval_samples_per_second": 103.842, |
|
"eval_steps_per_second": 3.275, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 1.002849002849003e-05, |
|
"loss": 0.0372, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_accuracy": 0.856630802154541, |
|
"eval_loss": 1.0440385341644287, |
|
"eval_runtime": 13.391, |
|
"eval_samples_per_second": 104.175, |
|
"eval_steps_per_second": 3.286, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 8.603988603988605e-06, |
|
"loss": 0.0262, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"eval_accuracy": 0.8609318733215332, |
|
"eval_loss": 1.0770107507705688, |
|
"eval_runtime": 13.3894, |
|
"eval_samples_per_second": 104.187, |
|
"eval_steps_per_second": 3.286, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 7.17948717948718e-06, |
|
"loss": 0.0243, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"eval_accuracy": 0.8616487383842468, |
|
"eval_loss": 1.0931395292282104, |
|
"eval_runtime": 13.3865, |
|
"eval_samples_per_second": 104.209, |
|
"eval_steps_per_second": 3.287, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 5.7549857549857555e-06, |
|
"loss": 0.023, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"eval_accuracy": 0.8630824089050293, |
|
"eval_loss": 1.1087766885757446, |
|
"eval_runtime": 13.3932, |
|
"eval_samples_per_second": 104.157, |
|
"eval_steps_per_second": 3.285, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 4.330484330484331e-06, |
|
"loss": 0.0163, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"eval_accuracy": 0.8580645322799683, |
|
"eval_loss": 1.1263514757156372, |
|
"eval_runtime": 13.3902, |
|
"eval_samples_per_second": 104.18, |
|
"eval_steps_per_second": 3.286, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 2.9059829059829063e-06, |
|
"loss": 0.0111, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"eval_accuracy": 0.8616487383842468, |
|
"eval_loss": 1.154114842414856, |
|
"eval_runtime": 13.3822, |
|
"eval_samples_per_second": 104.243, |
|
"eval_steps_per_second": 3.288, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 0.0098, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"eval_accuracy": 0.8630824089050293, |
|
"eval_loss": 1.1541603803634644, |
|
"eval_runtime": 13.3814, |
|
"eval_samples_per_second": 104.25, |
|
"eval_steps_per_second": 3.288, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"learning_rate": 5.6980056980056986e-08, |
|
"loss": 0.0074, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"eval_accuracy": 0.8637992739677429, |
|
"eval_loss": 1.1652880907058716, |
|
"eval_runtime": 13.3866, |
|
"eval_samples_per_second": 104.209, |
|
"eval_steps_per_second": 3.287, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 7020, |
|
"total_flos": 3.4632303608448e+16, |
|
"train_loss": 0.10057472327884626, |
|
"train_runtime": 6733.6679, |
|
"train_samples_per_second": 33.361, |
|
"train_steps_per_second": 1.043 |
|
} |
|
], |
|
"max_steps": 7020, |
|
"num_train_epochs": 20, |
|
"total_flos": 3.4632303608448e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|