|
{ |
|
"best_metric": 0.7808219178082192, |
|
"best_model_checkpoint": "deit-small-patch16-224-finetuned-piid/checkpoint-307", |
|
"epoch": 19.51219512195122, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.4297, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.1537, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.547945205479452, |
|
"eval_loss": 1.0005042552947998, |
|
"eval_runtime": 1.0515, |
|
"eval_samples_per_second": 208.282, |
|
"eval_steps_per_second": 26.63, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.8255, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7025, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.593607305936073, |
|
"eval_loss": 0.8481215834617615, |
|
"eval_runtime": 1.2994, |
|
"eval_samples_per_second": 168.538, |
|
"eval_steps_per_second": 21.548, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.6057, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.6581, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy": 0.7214611872146118, |
|
"eval_loss": 0.6351255774497986, |
|
"eval_runtime": 1.0449, |
|
"eval_samples_per_second": 209.599, |
|
"eval_steps_per_second": 26.798, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.5469, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.5019, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7214611872146118, |
|
"eval_loss": 0.6695594191551208, |
|
"eval_runtime": 1.0755, |
|
"eval_samples_per_second": 203.631, |
|
"eval_steps_per_second": 26.035, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 0.5202, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.4708, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_accuracy": 0.7534246575342466, |
|
"eval_loss": 0.5861416459083557, |
|
"eval_runtime": 1.2022, |
|
"eval_samples_per_second": 182.173, |
|
"eval_steps_per_second": 23.292, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 0.5432, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.3647, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.776255707762557, |
|
"eval_loss": 0.5583679676055908, |
|
"eval_runtime": 1.0741, |
|
"eval_samples_per_second": 203.885, |
|
"eval_steps_per_second": 26.068, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3456, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.2973, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_loss": 0.5783523321151733, |
|
"eval_runtime": 1.4218, |
|
"eval_samples_per_second": 154.026, |
|
"eval_steps_per_second": 19.693, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.3172, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.2827, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_loss": 0.5850614309310913, |
|
"eval_runtime": 1.1505, |
|
"eval_samples_per_second": 190.352, |
|
"eval_steps_per_second": 24.337, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 0.2399, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.237, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_accuracy": 0.7625570776255708, |
|
"eval_loss": 0.6790792942047119, |
|
"eval_runtime": 1.21, |
|
"eval_samples_per_second": 180.993, |
|
"eval_steps_per_second": 23.141, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.1989, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.2505, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7625570776255708, |
|
"eval_loss": 0.5550326108932495, |
|
"eval_runtime": 1.0754, |
|
"eval_samples_per_second": 203.65, |
|
"eval_steps_per_second": 26.037, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.2188, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2018, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_accuracy": 0.7625570776255708, |
|
"eval_loss": 0.5445629358291626, |
|
"eval_runtime": 1.0807, |
|
"eval_samples_per_second": 202.648, |
|
"eval_steps_per_second": 25.909, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 0.2028, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1841, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7442922374429224, |
|
"eval_loss": 0.5496687889099121, |
|
"eval_runtime": 1.1278, |
|
"eval_samples_per_second": 194.175, |
|
"eval_steps_per_second": 24.826, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.165, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.1692, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"eval_accuracy": 0.771689497716895, |
|
"eval_loss": 0.5916743278503418, |
|
"eval_runtime": 1.1094, |
|
"eval_samples_per_second": 197.411, |
|
"eval_steps_per_second": 25.24, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.2192, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1624, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.776255707762557, |
|
"eval_loss": 0.5254391431808472, |
|
"eval_runtime": 1.1036, |
|
"eval_samples_per_second": 198.45, |
|
"eval_steps_per_second": 25.373, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.2206, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.1518, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_accuracy": 0.7808219178082192, |
|
"eval_loss": 0.5295854806900024, |
|
"eval_runtime": 1.059, |
|
"eval_samples_per_second": 206.808, |
|
"eval_steps_per_second": 26.441, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1465, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1275, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7625570776255708, |
|
"eval_loss": 0.585834801197052, |
|
"eval_runtime": 1.2635, |
|
"eval_samples_per_second": 173.334, |
|
"eval_steps_per_second": 22.161, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.1237, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 16.59, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.1107, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_accuracy": 0.776255707762557, |
|
"eval_loss": 0.591891348361969, |
|
"eval_runtime": 1.0639, |
|
"eval_samples_per_second": 205.84, |
|
"eval_steps_per_second": 26.317, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.1422, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1192, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.771689497716895, |
|
"eval_loss": 0.6027428507804871, |
|
"eval_runtime": 1.0536, |
|
"eval_samples_per_second": 207.851, |
|
"eval_steps_per_second": 26.575, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.1806, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0842, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_accuracy": 0.771689497716895, |
|
"eval_loss": 0.6435291171073914, |
|
"eval_runtime": 1.0647, |
|
"eval_samples_per_second": 205.689, |
|
"eval_steps_per_second": 26.298, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.1594, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 0.0, |
|
"loss": 0.1472, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_loss": 0.6201618313789368, |
|
"eval_runtime": 1.0892, |
|
"eval_samples_per_second": 201.069, |
|
"eval_steps_per_second": 25.707, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"step": 400, |
|
"total_flos": 2.493693637268521e+17, |
|
"train_loss": 0.3432186938822269, |
|
"train_runtime": 208.3396, |
|
"train_samples_per_second": 62.686, |
|
"train_steps_per_second": 1.92 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 400, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 2.493693637268521e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|