Augusto777's picture
End of training
4c3ec2b verified
{
"best_metric": 0.55,
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-U5-42D/checkpoint-15",
"epoch": 37.935483870967744,
"eval_steps": 500,
"global_step": 294,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9,
"eval_accuracy": 0.5166666666666667,
"eval_loss": 1.0970458984375,
"eval_runtime": 1.6221,
"eval_samples_per_second": 36.99,
"eval_steps_per_second": 1.233,
"step": 7
},
{
"epoch": 1.55,
"grad_norm": 5.820129871368408,
"learning_rate": 0.0012000000000000001,
"loss": 1.3527,
"step": 12
},
{
"epoch": 1.94,
"eval_accuracy": 0.55,
"eval_loss": 1.038263201713562,
"eval_runtime": 1.563,
"eval_samples_per_second": 38.388,
"eval_steps_per_second": 1.28,
"step": 15
},
{
"epoch": 2.97,
"eval_accuracy": 0.4166666666666667,
"eval_loss": 1.235134243965149,
"eval_runtime": 2.1089,
"eval_samples_per_second": 28.451,
"eval_steps_per_second": 0.948,
"step": 23
},
{
"epoch": 3.1,
"grad_norm": 5.020723819732666,
"learning_rate": 0.0024000000000000002,
"loss": 1.3013,
"step": 24
},
{
"epoch": 4.0,
"eval_accuracy": 0.3333333333333333,
"eval_loss": 1.3025089502334595,
"eval_runtime": 1.6082,
"eval_samples_per_second": 37.309,
"eval_steps_per_second": 1.244,
"step": 31
},
{
"epoch": 4.65,
"grad_norm": 2.038219451904297,
"learning_rate": 0.002931818181818182,
"loss": 1.3706,
"step": 36
},
{
"epoch": 4.9,
"eval_accuracy": 0.21666666666666667,
"eval_loss": 1.3800110816955566,
"eval_runtime": 1.5927,
"eval_samples_per_second": 37.671,
"eval_steps_per_second": 1.256,
"step": 38
},
{
"epoch": 5.94,
"eval_accuracy": 0.18333333333333332,
"eval_loss": 1.4608864784240723,
"eval_runtime": 1.5994,
"eval_samples_per_second": 37.515,
"eval_steps_per_second": 1.25,
"step": 46
},
{
"epoch": 6.19,
"grad_norm": 1.954506754875183,
"learning_rate": 0.0027954545454545454,
"loss": 1.4415,
"step": 48
},
{
"epoch": 6.97,
"eval_accuracy": 0.43333333333333335,
"eval_loss": 1.3718478679656982,
"eval_runtime": 1.569,
"eval_samples_per_second": 38.242,
"eval_steps_per_second": 1.275,
"step": 54
},
{
"epoch": 7.74,
"grad_norm": 1.276404619216919,
"learning_rate": 0.002659090909090909,
"loss": 1.3602,
"step": 60
},
{
"epoch": 8.0,
"eval_accuracy": 0.31666666666666665,
"eval_loss": 1.3172897100448608,
"eval_runtime": 1.564,
"eval_samples_per_second": 38.363,
"eval_steps_per_second": 1.279,
"step": 62
},
{
"epoch": 8.9,
"eval_accuracy": 0.4,
"eval_loss": 1.2827069759368896,
"eval_runtime": 1.5647,
"eval_samples_per_second": 38.345,
"eval_steps_per_second": 1.278,
"step": 69
},
{
"epoch": 9.29,
"grad_norm": 1.0918904542922974,
"learning_rate": 0.002522727272727273,
"loss": 1.3079,
"step": 72
},
{
"epoch": 9.94,
"eval_accuracy": 0.31666666666666665,
"eval_loss": 1.3166981935501099,
"eval_runtime": 1.6067,
"eval_samples_per_second": 37.343,
"eval_steps_per_second": 1.245,
"step": 77
},
{
"epoch": 10.84,
"grad_norm": 1.5578159093856812,
"learning_rate": 0.002386363636363636,
"loss": 1.3247,
"step": 84
},
{
"epoch": 10.97,
"eval_accuracy": 0.4,
"eval_loss": 1.257886528968811,
"eval_runtime": 1.5535,
"eval_samples_per_second": 38.623,
"eval_steps_per_second": 1.287,
"step": 85
},
{
"epoch": 12.0,
"eval_accuracy": 0.2,
"eval_loss": 1.3202433586120605,
"eval_runtime": 2.2419,
"eval_samples_per_second": 26.762,
"eval_steps_per_second": 0.892,
"step": 93
},
{
"epoch": 12.39,
"grad_norm": 0.5343012809753418,
"learning_rate": 0.0022500000000000003,
"loss": 1.3102,
"step": 96
},
{
"epoch": 12.9,
"eval_accuracy": 0.45,
"eval_loss": 1.2353752851486206,
"eval_runtime": 1.5721,
"eval_samples_per_second": 38.166,
"eval_steps_per_second": 1.272,
"step": 100
},
{
"epoch": 13.94,
"grad_norm": 1.3499153852462769,
"learning_rate": 0.002113636363636364,
"loss": 1.2807,
"step": 108
},
{
"epoch": 13.94,
"eval_accuracy": 0.25,
"eval_loss": 1.3610022068023682,
"eval_runtime": 1.9584,
"eval_samples_per_second": 30.638,
"eval_steps_per_second": 1.021,
"step": 108
},
{
"epoch": 14.97,
"eval_accuracy": 0.4,
"eval_loss": 1.2803313732147217,
"eval_runtime": 1.596,
"eval_samples_per_second": 37.594,
"eval_steps_per_second": 1.253,
"step": 116
},
{
"epoch": 15.48,
"grad_norm": 1.6496480703353882,
"learning_rate": 0.0019772727272727273,
"loss": 1.2774,
"step": 120
},
{
"epoch": 16.0,
"eval_accuracy": 0.21666666666666667,
"eval_loss": 1.3338415622711182,
"eval_runtime": 1.5818,
"eval_samples_per_second": 37.931,
"eval_steps_per_second": 1.264,
"step": 124
},
{
"epoch": 16.9,
"eval_accuracy": 0.35,
"eval_loss": 1.2548964023590088,
"eval_runtime": 1.5648,
"eval_samples_per_second": 38.344,
"eval_steps_per_second": 1.278,
"step": 131
},
{
"epoch": 17.03,
"grad_norm": 0.824222207069397,
"learning_rate": 0.001840909090909091,
"loss": 1.2596,
"step": 132
},
{
"epoch": 17.94,
"eval_accuracy": 0.36666666666666664,
"eval_loss": 1.2692508697509766,
"eval_runtime": 1.6871,
"eval_samples_per_second": 35.564,
"eval_steps_per_second": 1.185,
"step": 139
},
{
"epoch": 18.58,
"grad_norm": 0.44431018829345703,
"learning_rate": 0.0017045454545454547,
"loss": 1.2413,
"step": 144
},
{
"epoch": 18.97,
"eval_accuracy": 0.21666666666666667,
"eval_loss": 1.3005454540252686,
"eval_runtime": 2.5177,
"eval_samples_per_second": 23.831,
"eval_steps_per_second": 0.794,
"step": 147
},
{
"epoch": 20.0,
"eval_accuracy": 0.43333333333333335,
"eval_loss": 1.229854941368103,
"eval_runtime": 1.7681,
"eval_samples_per_second": 33.934,
"eval_steps_per_second": 1.131,
"step": 155
},
{
"epoch": 20.13,
"grad_norm": 1.6288515329360962,
"learning_rate": 0.0015681818181818182,
"loss": 1.262,
"step": 156
},
{
"epoch": 20.9,
"eval_accuracy": 0.26666666666666666,
"eval_loss": 1.3453844785690308,
"eval_runtime": 1.5884,
"eval_samples_per_second": 37.774,
"eval_steps_per_second": 1.259,
"step": 162
},
{
"epoch": 21.68,
"grad_norm": 1.0566848516464233,
"learning_rate": 0.0014318181818181819,
"loss": 1.2261,
"step": 168
},
{
"epoch": 21.94,
"eval_accuracy": 0.31666666666666665,
"eval_loss": 1.2818458080291748,
"eval_runtime": 1.5729,
"eval_samples_per_second": 38.146,
"eval_steps_per_second": 1.272,
"step": 170
},
{
"epoch": 22.97,
"eval_accuracy": 0.43333333333333335,
"eval_loss": 1.249794840812683,
"eval_runtime": 1.5575,
"eval_samples_per_second": 38.524,
"eval_steps_per_second": 1.284,
"step": 178
},
{
"epoch": 23.23,
"grad_norm": 1.7413015365600586,
"learning_rate": 0.0012954545454545456,
"loss": 1.2405,
"step": 180
},
{
"epoch": 24.0,
"eval_accuracy": 0.31666666666666665,
"eval_loss": 1.3376109600067139,
"eval_runtime": 1.6036,
"eval_samples_per_second": 37.415,
"eval_steps_per_second": 1.247,
"step": 186
},
{
"epoch": 24.77,
"grad_norm": 0.5584876537322998,
"learning_rate": 0.001159090909090909,
"loss": 1.2245,
"step": 192
},
{
"epoch": 24.9,
"eval_accuracy": 0.36666666666666664,
"eval_loss": 1.2595055103302002,
"eval_runtime": 1.5658,
"eval_samples_per_second": 38.32,
"eval_steps_per_second": 1.277,
"step": 193
},
{
"epoch": 25.94,
"eval_accuracy": 0.4,
"eval_loss": 1.331896424293518,
"eval_runtime": 2.0295,
"eval_samples_per_second": 29.564,
"eval_steps_per_second": 0.985,
"step": 201
},
{
"epoch": 26.32,
"grad_norm": 0.9537753462791443,
"learning_rate": 0.0010227272727272726,
"loss": 1.2034,
"step": 204
},
{
"epoch": 26.97,
"eval_accuracy": 0.38333333333333336,
"eval_loss": 1.25283944606781,
"eval_runtime": 1.5793,
"eval_samples_per_second": 37.992,
"eval_steps_per_second": 1.266,
"step": 209
},
{
"epoch": 27.87,
"grad_norm": 1.7752221822738647,
"learning_rate": 0.0008863636363636364,
"loss": 1.1818,
"step": 216
},
{
"epoch": 28.0,
"eval_accuracy": 0.36666666666666664,
"eval_loss": 1.3656209707260132,
"eval_runtime": 1.5691,
"eval_samples_per_second": 38.237,
"eval_steps_per_second": 1.275,
"step": 217
},
{
"epoch": 28.9,
"eval_accuracy": 0.38333333333333336,
"eval_loss": 1.2500847578048706,
"eval_runtime": 1.5809,
"eval_samples_per_second": 37.953,
"eval_steps_per_second": 1.265,
"step": 224
},
{
"epoch": 29.42,
"grad_norm": 1.1072659492492676,
"learning_rate": 0.00075,
"loss": 1.1479,
"step": 228
},
{
"epoch": 29.94,
"eval_accuracy": 0.3,
"eval_loss": 1.324063777923584,
"eval_runtime": 1.5986,
"eval_samples_per_second": 37.533,
"eval_steps_per_second": 1.251,
"step": 232
},
{
"epoch": 30.97,
"grad_norm": 0.8141500353813171,
"learning_rate": 0.0006136363636363637,
"loss": 1.1193,
"step": 240
},
{
"epoch": 30.97,
"eval_accuracy": 0.36666666666666664,
"eval_loss": 1.380292534828186,
"eval_runtime": 1.6105,
"eval_samples_per_second": 37.256,
"eval_steps_per_second": 1.242,
"step": 240
},
{
"epoch": 32.0,
"eval_accuracy": 0.4166666666666667,
"eval_loss": 1.2294162511825562,
"eval_runtime": 1.603,
"eval_samples_per_second": 37.429,
"eval_steps_per_second": 1.248,
"step": 248
},
{
"epoch": 32.52,
"grad_norm": 0.7440662384033203,
"learning_rate": 0.0004772727272727273,
"loss": 1.1071,
"step": 252
},
{
"epoch": 32.9,
"eval_accuracy": 0.5,
"eval_loss": 1.4134150743484497,
"eval_runtime": 1.5689,
"eval_samples_per_second": 38.243,
"eval_steps_per_second": 1.275,
"step": 255
},
{
"epoch": 33.94,
"eval_accuracy": 0.36666666666666664,
"eval_loss": 1.4123319387435913,
"eval_runtime": 1.5844,
"eval_samples_per_second": 37.869,
"eval_steps_per_second": 1.262,
"step": 263
},
{
"epoch": 34.06,
"grad_norm": 1.0041050910949707,
"learning_rate": 0.0003409090909090909,
"loss": 1.0429,
"step": 264
},
{
"epoch": 34.97,
"eval_accuracy": 0.5,
"eval_loss": 1.2183587551116943,
"eval_runtime": 1.607,
"eval_samples_per_second": 37.336,
"eval_steps_per_second": 1.245,
"step": 271
},
{
"epoch": 35.61,
"grad_norm": 1.336283564567566,
"learning_rate": 0.00020454545454545454,
"loss": 1.0528,
"step": 276
},
{
"epoch": 36.0,
"eval_accuracy": 0.45,
"eval_loss": 1.3099627494812012,
"eval_runtime": 2.0818,
"eval_samples_per_second": 28.821,
"eval_steps_per_second": 0.961,
"step": 279
},
{
"epoch": 36.9,
"eval_accuracy": 0.38333333333333336,
"eval_loss": 1.3248744010925293,
"eval_runtime": 1.5674,
"eval_samples_per_second": 38.28,
"eval_steps_per_second": 1.276,
"step": 286
},
{
"epoch": 37.16,
"grad_norm": 1.36141836643219,
"learning_rate": 6.818181818181818e-05,
"loss": 1.0055,
"step": 288
},
{
"epoch": 37.94,
"eval_accuracy": 0.5,
"eval_loss": 1.3050577640533447,
"eval_runtime": 1.5996,
"eval_samples_per_second": 37.51,
"eval_steps_per_second": 1.25,
"step": 294
},
{
"epoch": 37.94,
"step": 294,
"total_flos": 2.864620236542755e+18,
"train_loss": 1.2294491975485873,
"train_runtime": 1673.337,
"train_samples_per_second": 24.447,
"train_steps_per_second": 0.176
}
],
"logging_steps": 12,
"max_steps": 294,
"num_input_tokens_seen": 0,
"num_train_epochs": 42,
"save_steps": 500,
"total_flos": 2.864620236542755e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}