ViTGPT2_VW / trainer_state.json
gagan3012's picture
End of training
6ab563d
{
"best_metric": 0.07713755965232849,
"best_model_checkpoint": "/nasty/data/tpid/vizwiz/ViTGPT2_VW/checkpoint-14000",
"epoch": 0.04404233526800956,
"global_step": 14645,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 1.9863839666769097e-05,
"loss": 0.1256,
"step": 1000
},
{
"epoch": 0.03,
"eval_loss": 0.09279213845729828,
"eval_runtime": 1047.3083,
"eval_samples_per_second": 37.0,
"eval_steps_per_second": 9.25,
"step": 1000
},
{
"epoch": 0.07,
"learning_rate": 1.9727269623408106e-05,
"loss": 0.0947,
"step": 2000
},
{
"epoch": 0.07,
"eval_loss": 0.0897236242890358,
"eval_runtime": 864.4123,
"eval_samples_per_second": 44.828,
"eval_steps_per_second": 11.208,
"step": 2000
},
{
"epoch": 0.1,
"learning_rate": 1.9590699580047118e-05,
"loss": 0.0889,
"step": 3000
},
{
"epoch": 0.1,
"eval_loss": 0.08590871840715408,
"eval_runtime": 857.9482,
"eval_samples_per_second": 45.166,
"eval_steps_per_second": 11.292,
"step": 3000
},
{
"epoch": 0.14,
"learning_rate": 1.945412953668613e-05,
"loss": 0.0888,
"step": 4000
},
{
"epoch": 0.14,
"eval_loss": 0.08419814705848694,
"eval_runtime": 866.4231,
"eval_samples_per_second": 44.724,
"eval_steps_per_second": 11.182,
"step": 4000
},
{
"epoch": 0.17,
"learning_rate": 1.931755949332514e-05,
"loss": 0.0866,
"step": 5000
},
{
"epoch": 0.17,
"eval_loss": 0.08309577405452728,
"eval_runtime": 864.2485,
"eval_samples_per_second": 44.837,
"eval_steps_per_second": 11.21,
"step": 5000
},
{
"epoch": 0.2,
"learning_rate": 1.918098944996415e-05,
"loss": 0.0852,
"step": 6000
},
{
"epoch": 0.2,
"eval_loss": 0.08189540356397629,
"eval_runtime": 865.5776,
"eval_samples_per_second": 44.768,
"eval_steps_per_second": 11.193,
"step": 6000
},
{
"epoch": 0.24,
"learning_rate": 1.9044419406603162e-05,
"loss": 0.0833,
"step": 7000
},
{
"epoch": 0.24,
"eval_loss": 0.08095835894346237,
"eval_runtime": 864.7144,
"eval_samples_per_second": 44.812,
"eval_steps_per_second": 11.204,
"step": 7000
},
{
"epoch": 0.27,
"learning_rate": 1.8907849363242174e-05,
"loss": 0.0835,
"step": 8000
},
{
"epoch": 0.27,
"eval_loss": 0.08023638278245926,
"eval_runtime": 866.728,
"eval_samples_per_second": 44.708,
"eval_steps_per_second": 11.178,
"step": 8000
},
{
"epoch": 0.31,
"learning_rate": 1.8771279319881186e-05,
"loss": 0.081,
"step": 9000
},
{
"epoch": 0.31,
"eval_loss": 0.07963699847459793,
"eval_runtime": 865.0337,
"eval_samples_per_second": 44.796,
"eval_steps_per_second": 11.2,
"step": 9000
},
{
"epoch": 0.34,
"learning_rate": 1.86347092765202e-05,
"loss": 0.0803,
"step": 10000
},
{
"epoch": 0.34,
"eval_loss": 0.07886938005685806,
"eval_runtime": 864.2901,
"eval_samples_per_second": 44.834,
"eval_steps_per_second": 11.209,
"step": 10000
},
{
"epoch": 0.38,
"learning_rate": 1.849813923315921e-05,
"loss": 0.0814,
"step": 11000
},
{
"epoch": 0.38,
"eval_loss": 0.07851768285036087,
"eval_runtime": 863.0619,
"eval_samples_per_second": 44.898,
"eval_steps_per_second": 11.225,
"step": 11000
},
{
"epoch": 0.41,
"learning_rate": 1.836156918979822e-05,
"loss": 0.0799,
"step": 12000
},
{
"epoch": 0.41,
"eval_loss": 0.0779803916811943,
"eval_runtime": 862.8886,
"eval_samples_per_second": 44.907,
"eval_steps_per_second": 11.227,
"step": 12000
},
{
"epoch": 0.44,
"learning_rate": 1.822513571648059e-05,
"loss": 0.0786,
"step": 13000
},
{
"epoch": 0.44,
"eval_loss": 0.07762513309717178,
"eval_runtime": 861.7094,
"eval_samples_per_second": 44.969,
"eval_steps_per_second": 11.243,
"step": 13000
},
{
"epoch": 0.48,
"learning_rate": 1.8088565673119603e-05,
"loss": 0.0796,
"step": 14000
},
{
"epoch": 0.48,
"eval_loss": 0.07713755965232849,
"eval_runtime": 866.4253,
"eval_samples_per_second": 44.724,
"eval_steps_per_second": 11.182,
"step": 14000
},
{
"epoch": 0.04,
"step": 14645,
"total_flos": 7.904602466056929e+18,
"train_loss": 0.0033295607640092873,
"train_runtime": 303.4327,
"train_samples_per_second": 386.099,
"train_steps_per_second": 48.264
}
],
"max_steps": 14645,
"num_train_epochs": 1,
"total_flos": 7.904602466056929e+18,
"trial_name": null,
"trial_params": null
}