|
{ |
|
"best_metric": 0.18238767981529236, |
|
"best_model_checkpoint": "./beans_outputs/checkpoint-220", |
|
"epoch": 5.0, |
|
"global_step": 220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9090909090909094e-05, |
|
"loss": 1.0242, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.898, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 0.7761, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 0.672, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9398496240601504, |
|
"eval_loss": 0.5671981573104858, |
|
"eval_runtime": 2.1561, |
|
"eval_samples_per_second": 61.686, |
|
"eval_steps_per_second": 2.783, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 0.583, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 0.5104, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 0.4104, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 0.411, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9699248120300752, |
|
"eval_loss": 0.30274108052253723, |
|
"eval_runtime": 4.0989, |
|
"eval_samples_per_second": 32.448, |
|
"eval_steps_per_second": 1.464, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.181818181818182e-05, |
|
"loss": 0.3473, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.3066, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2993, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.2402, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 0.2542, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9699248120300752, |
|
"eval_loss": 0.20783478021621704, |
|
"eval_runtime": 2.6143, |
|
"eval_samples_per_second": 50.874, |
|
"eval_steps_per_second": 2.295, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 0.257, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 0.2739, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 0.1899, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.1886, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9699248120300752, |
|
"eval_loss": 0.1881999671459198, |
|
"eval_runtime": 4.3782, |
|
"eval_samples_per_second": 30.378, |
|
"eval_steps_per_second": 1.37, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 0.1929, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 0.1909, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 0.1607, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 0.1953, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.1931, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9699248120300752, |
|
"eval_loss": 0.18238767981529236, |
|
"eval_runtime": 4.8076, |
|
"eval_samples_per_second": 27.664, |
|
"eval_steps_per_second": 1.248, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 220, |
|
"total_flos": 4.006371770595533e+17, |
|
"train_loss": 0.3897728649052707, |
|
"train_runtime": 301.7547, |
|
"train_samples_per_second": 17.133, |
|
"train_steps_per_second": 0.729 |
|
} |
|
], |
|
"max_steps": 220, |
|
"num_train_epochs": 5, |
|
"total_flos": 4.006371770595533e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|