|
{ |
|
"best_metric": 0.07713755965232849, |
|
"best_model_checkpoint": "/nasty/data/tpid/vizwiz/ViTGPT2_VW/checkpoint-14000", |
|
"epoch": 0.04404233526800956, |
|
"global_step": 14645, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9863839666769097e-05, |
|
"loss": 0.1256, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 0.09279213845729828, |
|
"eval_runtime": 1047.3083, |
|
"eval_samples_per_second": 37.0, |
|
"eval_steps_per_second": 9.25, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9727269623408106e-05, |
|
"loss": 0.0947, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.0897236242890358, |
|
"eval_runtime": 864.4123, |
|
"eval_samples_per_second": 44.828, |
|
"eval_steps_per_second": 11.208, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9590699580047118e-05, |
|
"loss": 0.0889, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.08590871840715408, |
|
"eval_runtime": 857.9482, |
|
"eval_samples_per_second": 45.166, |
|
"eval_steps_per_second": 11.292, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.945412953668613e-05, |
|
"loss": 0.0888, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.08419814705848694, |
|
"eval_runtime": 866.4231, |
|
"eval_samples_per_second": 44.724, |
|
"eval_steps_per_second": 11.182, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.931755949332514e-05, |
|
"loss": 0.0866, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 0.08309577405452728, |
|
"eval_runtime": 864.2485, |
|
"eval_samples_per_second": 44.837, |
|
"eval_steps_per_second": 11.21, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.918098944996415e-05, |
|
"loss": 0.0852, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.08189540356397629, |
|
"eval_runtime": 865.5776, |
|
"eval_samples_per_second": 44.768, |
|
"eval_steps_per_second": 11.193, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9044419406603162e-05, |
|
"loss": 0.0833, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.08095835894346237, |
|
"eval_runtime": 864.7144, |
|
"eval_samples_per_second": 44.812, |
|
"eval_steps_per_second": 11.204, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.8907849363242174e-05, |
|
"loss": 0.0835, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 0.08023638278245926, |
|
"eval_runtime": 866.728, |
|
"eval_samples_per_second": 44.708, |
|
"eval_steps_per_second": 11.178, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.8771279319881186e-05, |
|
"loss": 0.081, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 0.07963699847459793, |
|
"eval_runtime": 865.0337, |
|
"eval_samples_per_second": 44.796, |
|
"eval_steps_per_second": 11.2, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.86347092765202e-05, |
|
"loss": 0.0803, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.07886938005685806, |
|
"eval_runtime": 864.2901, |
|
"eval_samples_per_second": 44.834, |
|
"eval_steps_per_second": 11.209, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.849813923315921e-05, |
|
"loss": 0.0814, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.07851768285036087, |
|
"eval_runtime": 863.0619, |
|
"eval_samples_per_second": 44.898, |
|
"eval_steps_per_second": 11.225, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.836156918979822e-05, |
|
"loss": 0.0799, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 0.0779803916811943, |
|
"eval_runtime": 862.8886, |
|
"eval_samples_per_second": 44.907, |
|
"eval_steps_per_second": 11.227, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.822513571648059e-05, |
|
"loss": 0.0786, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 0.07762513309717178, |
|
"eval_runtime": 861.7094, |
|
"eval_samples_per_second": 44.969, |
|
"eval_steps_per_second": 11.243, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.8088565673119603e-05, |
|
"loss": 0.0796, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.07713755965232849, |
|
"eval_runtime": 866.4253, |
|
"eval_samples_per_second": 44.724, |
|
"eval_steps_per_second": 11.182, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"step": 14645, |
|
"total_flos": 7.904602466056929e+18, |
|
"train_loss": 0.0033295607640092873, |
|
"train_runtime": 303.4327, |
|
"train_samples_per_second": 386.099, |
|
"train_steps_per_second": 48.264 |
|
} |
|
], |
|
"max_steps": 14645, |
|
"num_train_epochs": 1, |
|
"total_flos": 7.904602466056929e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|