|
{ |
|
"best_metric": 0.8166666666666667, |
|
"best_model_checkpoint": "beit-base-patch16-224-dmae-va-U5-42/checkpoint-224", |
|
"epoch": 37.935483870967744, |
|
"eval_steps": 500, |
|
"global_step": 294, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.347098708152771, |
|
"eval_runtime": 1.5986, |
|
"eval_samples_per_second": 37.533, |
|
"eval_steps_per_second": 1.251, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 10.756444931030273, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.6023, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.0873388051986694, |
|
"eval_runtime": 1.6091, |
|
"eval_samples_per_second": 37.288, |
|
"eval_steps_per_second": 1.243, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 9.189943313598633, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.1509, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 0.9947898983955383, |
|
"eval_runtime": 2.0991, |
|
"eval_samples_per_second": 28.584, |
|
"eval_steps_per_second": 0.953, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"grad_norm": 11.76810073852539, |
|
"learning_rate": 5e-05, |
|
"loss": 0.826, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7244278192520142, |
|
"eval_runtime": 1.6205, |
|
"eval_samples_per_second": 37.025, |
|
"eval_steps_per_second": 1.234, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.5740881562232971, |
|
"eval_runtime": 2.1053, |
|
"eval_samples_per_second": 28.5, |
|
"eval_steps_per_second": 0.95, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"grad_norm": 10.483253479003906, |
|
"learning_rate": 4.810606060606061e-05, |
|
"loss": 0.5551, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6568958759307861, |
|
"eval_runtime": 2.0199, |
|
"eval_samples_per_second": 29.704, |
|
"eval_steps_per_second": 0.99, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"grad_norm": 6.183708667755127, |
|
"learning_rate": 4.621212121212121e-05, |
|
"loss": 0.3649, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.6321703195571899, |
|
"eval_runtime": 1.6185, |
|
"eval_samples_per_second": 37.07, |
|
"eval_steps_per_second": 1.236, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"grad_norm": 8.211569786071777, |
|
"learning_rate": 4.431818181818182e-05, |
|
"loss": 0.2592, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.6993545889854431, |
|
"eval_runtime": 1.7914, |
|
"eval_samples_per_second": 33.494, |
|
"eval_steps_per_second": 1.116, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.6589908599853516, |
|
"eval_runtime": 1.6239, |
|
"eval_samples_per_second": 36.949, |
|
"eval_steps_per_second": 1.232, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 6.782934665679932, |
|
"learning_rate": 4.242424242424243e-05, |
|
"loss": 0.1958, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.6845510601997375, |
|
"eval_runtime": 1.8061, |
|
"eval_samples_per_second": 33.22, |
|
"eval_steps_per_second": 1.107, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"grad_norm": 6.670173645019531, |
|
"learning_rate": 4.053030303030303e-05, |
|
"loss": 0.1664, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.7165997624397278, |
|
"eval_runtime": 1.6803, |
|
"eval_samples_per_second": 35.708, |
|
"eval_steps_per_second": 1.19, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"grad_norm": 7.649813175201416, |
|
"learning_rate": 3.8636363636363636e-05, |
|
"loss": 0.1571, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.7842047214508057, |
|
"eval_runtime": 1.6667, |
|
"eval_samples_per_second": 35.999, |
|
"eval_steps_per_second": 1.2, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"grad_norm": 5.415360927581787, |
|
"learning_rate": 3.6742424242424246e-05, |
|
"loss": 0.1174, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8464832901954651, |
|
"eval_runtime": 1.8073, |
|
"eval_samples_per_second": 33.198, |
|
"eval_steps_per_second": 1.107, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.9116414189338684, |
|
"eval_runtime": 1.6421, |
|
"eval_samples_per_second": 36.539, |
|
"eval_steps_per_second": 1.218, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"grad_norm": 3.7090325355529785, |
|
"learning_rate": 3.484848484848485e-05, |
|
"loss": 0.0956, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.9740654230117798, |
|
"eval_runtime": 1.6076, |
|
"eval_samples_per_second": 37.322, |
|
"eval_steps_per_second": 1.244, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"grad_norm": 7.993117809295654, |
|
"learning_rate": 3.295454545454545e-05, |
|
"loss": 0.1252, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.7759976983070374, |
|
"eval_runtime": 1.6595, |
|
"eval_samples_per_second": 36.154, |
|
"eval_steps_per_second": 1.205, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"grad_norm": 3.9146523475646973, |
|
"learning_rate": 3.106060606060606e-05, |
|
"loss": 0.0933, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9424102902412415, |
|
"eval_runtime": 2.1274, |
|
"eval_samples_per_second": 28.203, |
|
"eval_steps_per_second": 0.94, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 1.0444563627243042, |
|
"eval_runtime": 1.6154, |
|
"eval_samples_per_second": 37.143, |
|
"eval_steps_per_second": 1.238, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"grad_norm": 7.39138126373291, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.1455, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.8524726629257202, |
|
"eval_runtime": 1.681, |
|
"eval_samples_per_second": 35.693, |
|
"eval_steps_per_second": 1.19, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"grad_norm": 3.7276432514190674, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 0.1034, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.822151243686676, |
|
"eval_runtime": 2.0162, |
|
"eval_samples_per_second": 29.759, |
|
"eval_steps_per_second": 0.992, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"grad_norm": 4.372833728790283, |
|
"learning_rate": 2.537878787878788e-05, |
|
"loss": 0.0855, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.8990534543991089, |
|
"eval_runtime": 2.1376, |
|
"eval_samples_per_second": 28.069, |
|
"eval_steps_per_second": 0.936, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"grad_norm": 6.204973220825195, |
|
"learning_rate": 2.3484848484848487e-05, |
|
"loss": 0.0985, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8954617977142334, |
|
"eval_runtime": 1.7159, |
|
"eval_samples_per_second": 34.968, |
|
"eval_steps_per_second": 1.166, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.9603295922279358, |
|
"eval_runtime": 1.628, |
|
"eval_samples_per_second": 36.854, |
|
"eval_steps_per_second": 1.228, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"grad_norm": 2.5669796466827393, |
|
"learning_rate": 2.1590909090909093e-05, |
|
"loss": 0.087, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9932332634925842, |
|
"eval_runtime": 1.6549, |
|
"eval_samples_per_second": 36.255, |
|
"eval_steps_per_second": 1.209, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"grad_norm": 4.33466911315918, |
|
"learning_rate": 1.9696969696969697e-05, |
|
"loss": 0.0832, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 1.0099666118621826, |
|
"eval_runtime": 1.6219, |
|
"eval_samples_per_second": 36.994, |
|
"eval_steps_per_second": 1.233, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"grad_norm": 2.6737489700317383, |
|
"learning_rate": 1.7803030303030303e-05, |
|
"loss": 0.0632, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.9393168091773987, |
|
"eval_runtime": 1.62, |
|
"eval_samples_per_second": 37.037, |
|
"eval_steps_per_second": 1.235, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9061955213546753, |
|
"eval_runtime": 2.0226, |
|
"eval_samples_per_second": 29.665, |
|
"eval_steps_per_second": 0.989, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"grad_norm": 6.029117107391357, |
|
"learning_rate": 1.590909090909091e-05, |
|
"loss": 0.0778, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.9339290857315063, |
|
"eval_runtime": 2.1654, |
|
"eval_samples_per_second": 27.708, |
|
"eval_steps_per_second": 0.924, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"grad_norm": 3.781505584716797, |
|
"learning_rate": 1.4015151515151515e-05, |
|
"loss": 0.0627, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 1.003859043121338, |
|
"eval_runtime": 1.6395, |
|
"eval_samples_per_second": 36.596, |
|
"eval_steps_per_second": 1.22, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"grad_norm": 8.317666053771973, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 0.0837, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 1.0636054277420044, |
|
"eval_runtime": 2.1368, |
|
"eval_samples_per_second": 28.079, |
|
"eval_steps_per_second": 0.936, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"grad_norm": 4.580255508422852, |
|
"learning_rate": 1.0227272727272729e-05, |
|
"loss": 0.0595, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 1.0424180030822754, |
|
"eval_runtime": 1.9745, |
|
"eval_samples_per_second": 30.387, |
|
"eval_steps_per_second": 1.013, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 1.0514304637908936, |
|
"eval_runtime": 1.6234, |
|
"eval_samples_per_second": 36.96, |
|
"eval_steps_per_second": 1.232, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"grad_norm": 3.7219626903533936, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0706, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 1.0638715028762817, |
|
"eval_runtime": 1.6117, |
|
"eval_samples_per_second": 37.228, |
|
"eval_steps_per_second": 1.241, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.55, |
|
"grad_norm": 2.273451566696167, |
|
"learning_rate": 6.43939393939394e-06, |
|
"loss": 0.0565, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 1.0494216680526733, |
|
"eval_runtime": 1.9391, |
|
"eval_samples_per_second": 30.942, |
|
"eval_steps_per_second": 1.031, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"grad_norm": 2.9928078651428223, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.0515, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 1.0627790689468384, |
|
"eval_runtime": 1.5942, |
|
"eval_samples_per_second": 37.636, |
|
"eval_steps_per_second": 1.255, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 1.1089335680007935, |
|
"eval_runtime": 1.74, |
|
"eval_samples_per_second": 34.484, |
|
"eval_steps_per_second": 1.149, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"grad_norm": 6.05413818359375, |
|
"learning_rate": 2.651515151515152e-06, |
|
"loss": 0.0614, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 1.086135983467102, |
|
"eval_runtime": 1.5851, |
|
"eval_samples_per_second": 37.853, |
|
"eval_steps_per_second": 1.262, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.42, |
|
"grad_norm": 3.7136027812957764, |
|
"learning_rate": 7.575757575757576e-07, |
|
"loss": 0.0496, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 1.0713495016098022, |
|
"eval_runtime": 1.6733, |
|
"eval_samples_per_second": 35.857, |
|
"eval_steps_per_second": 1.195, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"step": 294, |
|
"total_flos": 2.8633958865108173e+18, |
|
"train_loss": 0.23703576421656578, |
|
"train_runtime": 1686.3425, |
|
"train_samples_per_second": 24.258, |
|
"train_steps_per_second": 0.174 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 294, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 42, |
|
"save_steps": 500, |
|
"total_flos": 2.8633958865108173e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|