Augusto777's picture
End of training
509071a verified
{
"best_metric": 1.0,
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-U3-40A/checkpoint-119",
"epoch": 40.0,
"eval_steps": 500,
"global_step": 280,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.37373737373737376,
"eval_loss": 1.329177737236023,
"eval_runtime": 1.2424,
"eval_samples_per_second": 79.683,
"eval_steps_per_second": 3.22,
"step": 7
},
{
"epoch": 1.71,
"grad_norm": 3.085270404815674,
"learning_rate": 2.1428571428571428e-05,
"loss": 1.3407,
"step": 12
},
{
"epoch": 2.0,
"eval_accuracy": 0.5151515151515151,
"eval_loss": 1.107913851737976,
"eval_runtime": 1.2457,
"eval_samples_per_second": 79.472,
"eval_steps_per_second": 3.211,
"step": 14
},
{
"epoch": 3.0,
"eval_accuracy": 0.6262626262626263,
"eval_loss": 0.8917983770370483,
"eval_runtime": 1.2574,
"eval_samples_per_second": 78.737,
"eval_steps_per_second": 3.181,
"step": 21
},
{
"epoch": 3.43,
"grad_norm": 3.550651788711548,
"learning_rate": 4.2857142857142856e-05,
"loss": 0.9919,
"step": 24
},
{
"epoch": 4.0,
"eval_accuracy": 0.7878787878787878,
"eval_loss": 0.6446935534477234,
"eval_runtime": 1.2679,
"eval_samples_per_second": 78.083,
"eval_steps_per_second": 3.155,
"step": 28
},
{
"epoch": 5.0,
"eval_accuracy": 0.8282828282828283,
"eval_loss": 0.45023515820503235,
"eval_runtime": 1.4064,
"eval_samples_per_second": 70.393,
"eval_steps_per_second": 2.844,
"step": 35
},
{
"epoch": 5.14,
"grad_norm": 3.4338462352752686,
"learning_rate": 4.841269841269841e-05,
"loss": 0.5761,
"step": 36
},
{
"epoch": 6.0,
"eval_accuracy": 0.9191919191919192,
"eval_loss": 0.27204275131225586,
"eval_runtime": 1.4909,
"eval_samples_per_second": 66.403,
"eval_steps_per_second": 2.683,
"step": 42
},
{
"epoch": 6.86,
"grad_norm": 1.6468698978424072,
"learning_rate": 4.603174603174603e-05,
"loss": 0.3111,
"step": 48
},
{
"epoch": 7.0,
"eval_accuracy": 0.9292929292929293,
"eval_loss": 0.23024092614650726,
"eval_runtime": 1.3066,
"eval_samples_per_second": 75.766,
"eval_steps_per_second": 3.061,
"step": 49
},
{
"epoch": 8.0,
"eval_accuracy": 0.9494949494949495,
"eval_loss": 0.16501076519489288,
"eval_runtime": 1.3125,
"eval_samples_per_second": 75.428,
"eval_steps_per_second": 3.048,
"step": 56
},
{
"epoch": 8.57,
"grad_norm": 1.7793350219726562,
"learning_rate": 4.3650793650793655e-05,
"loss": 0.204,
"step": 60
},
{
"epoch": 9.0,
"eval_accuracy": 0.9494949494949495,
"eval_loss": 0.1503186672925949,
"eval_runtime": 1.316,
"eval_samples_per_second": 75.227,
"eval_steps_per_second": 3.039,
"step": 63
},
{
"epoch": 10.0,
"eval_accuracy": 0.9797979797979798,
"eval_loss": 0.08136877417564392,
"eval_runtime": 1.3015,
"eval_samples_per_second": 76.065,
"eval_steps_per_second": 3.073,
"step": 70
},
{
"epoch": 10.29,
"grad_norm": 1.3909555673599243,
"learning_rate": 4.126984126984127e-05,
"loss": 0.1518,
"step": 72
},
{
"epoch": 11.0,
"eval_accuracy": 0.9797979797979798,
"eval_loss": 0.06037978082895279,
"eval_runtime": 1.2829,
"eval_samples_per_second": 77.171,
"eval_steps_per_second": 3.118,
"step": 77
},
{
"epoch": 12.0,
"grad_norm": 1.9186725616455078,
"learning_rate": 3.888888888888889e-05,
"loss": 0.1272,
"step": 84
},
{
"epoch": 12.0,
"eval_accuracy": 0.9494949494949495,
"eval_loss": 0.1265028864145279,
"eval_runtime": 1.2863,
"eval_samples_per_second": 76.965,
"eval_steps_per_second": 3.11,
"step": 84
},
{
"epoch": 13.0,
"eval_accuracy": 0.9797979797979798,
"eval_loss": 0.05176503211259842,
"eval_runtime": 1.3002,
"eval_samples_per_second": 76.143,
"eval_steps_per_second": 3.076,
"step": 91
},
{
"epoch": 13.71,
"grad_norm": 1.4530500173568726,
"learning_rate": 3.650793650793651e-05,
"loss": 0.1379,
"step": 96
},
{
"epoch": 14.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.044787079095840454,
"eval_runtime": 1.303,
"eval_samples_per_second": 75.979,
"eval_steps_per_second": 3.07,
"step": 98
},
{
"epoch": 15.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.03611420467495918,
"eval_runtime": 1.309,
"eval_samples_per_second": 75.628,
"eval_steps_per_second": 3.056,
"step": 105
},
{
"epoch": 15.43,
"grad_norm": 1.3324140310287476,
"learning_rate": 3.412698412698413e-05,
"loss": 0.092,
"step": 108
},
{
"epoch": 16.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.032215822488069534,
"eval_runtime": 1.3002,
"eval_samples_per_second": 76.141,
"eval_steps_per_second": 3.076,
"step": 112
},
{
"epoch": 17.0,
"eval_accuracy": 1.0,
"eval_loss": 0.021263618022203445,
"eval_runtime": 1.307,
"eval_samples_per_second": 75.748,
"eval_steps_per_second": 3.061,
"step": 119
},
{
"epoch": 17.14,
"grad_norm": 2.2976629734039307,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.0762,
"step": 120
},
{
"epoch": 18.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.04688708856701851,
"eval_runtime": 1.315,
"eval_samples_per_second": 75.288,
"eval_steps_per_second": 3.042,
"step": 126
},
{
"epoch": 18.86,
"grad_norm": 2.308321714401245,
"learning_rate": 2.9365079365079366e-05,
"loss": 0.0954,
"step": 132
},
{
"epoch": 19.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.061472997069358826,
"eval_runtime": 1.3031,
"eval_samples_per_second": 75.971,
"eval_steps_per_second": 3.07,
"step": 133
},
{
"epoch": 20.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.03133073449134827,
"eval_runtime": 1.2993,
"eval_samples_per_second": 76.194,
"eval_steps_per_second": 3.079,
"step": 140
},
{
"epoch": 20.57,
"grad_norm": 1.274895191192627,
"learning_rate": 2.6984126984126984e-05,
"loss": 0.0795,
"step": 144
},
{
"epoch": 21.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.0380566343665123,
"eval_runtime": 1.3136,
"eval_samples_per_second": 75.368,
"eval_steps_per_second": 3.045,
"step": 147
},
{
"epoch": 22.0,
"eval_accuracy": 1.0,
"eval_loss": 0.013774486258625984,
"eval_runtime": 1.309,
"eval_samples_per_second": 75.628,
"eval_steps_per_second": 3.056,
"step": 154
},
{
"epoch": 22.29,
"grad_norm": 1.425993800163269,
"learning_rate": 2.4603174603174602e-05,
"loss": 0.077,
"step": 156
},
{
"epoch": 23.0,
"eval_accuracy": 1.0,
"eval_loss": 0.01703532226383686,
"eval_runtime": 1.3122,
"eval_samples_per_second": 75.446,
"eval_steps_per_second": 3.048,
"step": 161
},
{
"epoch": 24.0,
"grad_norm": 0.986053466796875,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.0675,
"step": 168
},
{
"epoch": 24.0,
"eval_accuracy": 1.0,
"eval_loss": 0.010675261728465557,
"eval_runtime": 1.3159,
"eval_samples_per_second": 75.231,
"eval_steps_per_second": 3.04,
"step": 168
},
{
"epoch": 25.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.019328022375702858,
"eval_runtime": 1.3069,
"eval_samples_per_second": 75.752,
"eval_steps_per_second": 3.061,
"step": 175
},
{
"epoch": 25.71,
"grad_norm": 2.7184066772460938,
"learning_rate": 1.984126984126984e-05,
"loss": 0.0659,
"step": 180
},
{
"epoch": 26.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.025486120954155922,
"eval_runtime": 1.3084,
"eval_samples_per_second": 75.667,
"eval_steps_per_second": 3.057,
"step": 182
},
{
"epoch": 27.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.020136240869760513,
"eval_runtime": 1.3096,
"eval_samples_per_second": 75.596,
"eval_steps_per_second": 3.054,
"step": 189
},
{
"epoch": 27.43,
"grad_norm": 2.4670774936676025,
"learning_rate": 1.746031746031746e-05,
"loss": 0.0758,
"step": 192
},
{
"epoch": 28.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.03251149132847786,
"eval_runtime": 1.2895,
"eval_samples_per_second": 76.775,
"eval_steps_per_second": 3.102,
"step": 196
},
{
"epoch": 29.0,
"eval_accuracy": 1.0,
"eval_loss": 0.011012612842023373,
"eval_runtime": 1.3105,
"eval_samples_per_second": 75.542,
"eval_steps_per_second": 3.052,
"step": 203
},
{
"epoch": 29.14,
"grad_norm": 1.395484209060669,
"learning_rate": 1.5079365079365079e-05,
"loss": 0.0589,
"step": 204
},
{
"epoch": 30.0,
"eval_accuracy": 1.0,
"eval_loss": 0.0159281175583601,
"eval_runtime": 1.3316,
"eval_samples_per_second": 74.346,
"eval_steps_per_second": 3.004,
"step": 210
},
{
"epoch": 30.86,
"grad_norm": 1.4966486692428589,
"learning_rate": 1.2698412698412699e-05,
"loss": 0.0521,
"step": 216
},
{
"epoch": 31.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.031857237219810486,
"eval_runtime": 1.2919,
"eval_samples_per_second": 76.629,
"eval_steps_per_second": 3.096,
"step": 217
},
{
"epoch": 32.0,
"eval_accuracy": 0.9797979797979798,
"eval_loss": 0.029438314959406853,
"eval_runtime": 1.3011,
"eval_samples_per_second": 76.091,
"eval_steps_per_second": 3.074,
"step": 224
},
{
"epoch": 32.57,
"grad_norm": 1.3882635831832886,
"learning_rate": 1.0317460317460318e-05,
"loss": 0.0618,
"step": 228
},
{
"epoch": 33.0,
"eval_accuracy": 0.9797979797979798,
"eval_loss": 0.039191748946905136,
"eval_runtime": 1.3063,
"eval_samples_per_second": 75.788,
"eval_steps_per_second": 3.062,
"step": 231
},
{
"epoch": 34.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.026867415755987167,
"eval_runtime": 1.2944,
"eval_samples_per_second": 76.481,
"eval_steps_per_second": 3.09,
"step": 238
},
{
"epoch": 34.29,
"grad_norm": 1.203762412071228,
"learning_rate": 7.936507936507936e-06,
"loss": 0.0422,
"step": 240
},
{
"epoch": 35.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.021003253757953644,
"eval_runtime": 1.3003,
"eval_samples_per_second": 76.137,
"eval_steps_per_second": 3.076,
"step": 245
},
{
"epoch": 36.0,
"grad_norm": 1.9063193798065186,
"learning_rate": 5.555555555555556e-06,
"loss": 0.0551,
"step": 252
},
{
"epoch": 36.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.01777764968574047,
"eval_runtime": 1.456,
"eval_samples_per_second": 67.993,
"eval_steps_per_second": 2.747,
"step": 252
},
{
"epoch": 37.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.01593274623155594,
"eval_runtime": 1.3158,
"eval_samples_per_second": 75.242,
"eval_steps_per_second": 3.04,
"step": 259
},
{
"epoch": 37.71,
"grad_norm": 1.4439316987991333,
"learning_rate": 3.1746031746031746e-06,
"loss": 0.0518,
"step": 264
},
{
"epoch": 38.0,
"eval_accuracy": 0.98989898989899,
"eval_loss": 0.012379774823784828,
"eval_runtime": 1.3454,
"eval_samples_per_second": 73.583,
"eval_steps_per_second": 2.973,
"step": 266
},
{
"epoch": 39.0,
"eval_accuracy": 1.0,
"eval_loss": 0.011175006628036499,
"eval_runtime": 1.4492,
"eval_samples_per_second": 68.315,
"eval_steps_per_second": 2.76,
"step": 273
},
{
"epoch": 39.43,
"grad_norm": 1.1414995193481445,
"learning_rate": 7.936507936507937e-07,
"loss": 0.0313,
"step": 276
},
{
"epoch": 40.0,
"eval_accuracy": 1.0,
"eval_loss": 0.01096320990473032,
"eval_runtime": 1.3095,
"eval_samples_per_second": 75.6,
"eval_steps_per_second": 3.055,
"step": 280
},
{
"epoch": 40.0,
"step": 280,
"total_flos": 2.7494650758139085e+18,
"train_loss": 0.2073148890797581,
"train_runtime": 1492.3317,
"train_samples_per_second": 23.775,
"train_steps_per_second": 0.188
}
],
"logging_steps": 12,
"max_steps": 280,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 2.7494650758139085e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}