|
{ |
|
"best_metric": 0.5666666666666667, |
|
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-U5-42C/checkpoint-232", |
|
"epoch": 37.935483870967744, |
|
"eval_steps": 500, |
|
"global_step": 294, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.13333333333333333, |
|
"eval_loss": 1.4545705318450928, |
|
"eval_runtime": 2.7807, |
|
"eval_samples_per_second": 21.578, |
|
"eval_steps_per_second": 0.719, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 5.473387241363525, |
|
"learning_rate": 4e-07, |
|
"loss": 1.5342, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.13333333333333333, |
|
"eval_loss": 1.4379044771194458, |
|
"eval_runtime": 1.5117, |
|
"eval_samples_per_second": 39.69, |
|
"eval_steps_per_second": 1.323, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.16666666666666666, |
|
"eval_loss": 1.411478042602539, |
|
"eval_runtime": 1.4724, |
|
"eval_samples_per_second": 40.749, |
|
"eval_steps_per_second": 1.358, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 6.9020280838012695, |
|
"learning_rate": 8e-07, |
|
"loss": 1.5331, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.2, |
|
"eval_loss": 1.3786956071853638, |
|
"eval_runtime": 1.4845, |
|
"eval_samples_per_second": 40.418, |
|
"eval_steps_per_second": 1.347, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 4.692047119140625, |
|
"learning_rate": 9.772727272727273e-07, |
|
"loss": 1.4639, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.2833333333333333, |
|
"eval_loss": 1.351299524307251, |
|
"eval_runtime": 1.5497, |
|
"eval_samples_per_second": 38.718, |
|
"eval_steps_per_second": 1.291, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.3333333333333333, |
|
"eval_loss": 1.3290389776229858, |
|
"eval_runtime": 1.48, |
|
"eval_samples_per_second": 40.54, |
|
"eval_steps_per_second": 1.351, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"grad_norm": 3.2680552005767822, |
|
"learning_rate": 9.318181818181817e-07, |
|
"loss": 1.4056, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.38333333333333336, |
|
"eval_loss": 1.3113869428634644, |
|
"eval_runtime": 1.5135, |
|
"eval_samples_per_second": 39.644, |
|
"eval_steps_per_second": 1.321, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"grad_norm": 2.349689245223999, |
|
"learning_rate": 8.863636363636363e-07, |
|
"loss": 1.3679, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.43333333333333335, |
|
"eval_loss": 1.2940715551376343, |
|
"eval_runtime": 1.9694, |
|
"eval_samples_per_second": 30.465, |
|
"eval_steps_per_second": 1.016, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.2827337980270386, |
|
"eval_runtime": 1.9571, |
|
"eval_samples_per_second": 30.658, |
|
"eval_steps_per_second": 1.022, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"grad_norm": 2.8183329105377197, |
|
"learning_rate": 8.409090909090909e-07, |
|
"loss": 1.3387, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.2678014039993286, |
|
"eval_runtime": 1.5047, |
|
"eval_samples_per_second": 39.874, |
|
"eval_steps_per_second": 1.329, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"grad_norm": 2.4520814418792725, |
|
"learning_rate": 7.954545454545454e-07, |
|
"loss": 1.2992, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.2557296752929688, |
|
"eval_runtime": 1.8046, |
|
"eval_samples_per_second": 33.248, |
|
"eval_steps_per_second": 1.108, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.2453906536102295, |
|
"eval_runtime": 1.4943, |
|
"eval_samples_per_second": 40.152, |
|
"eval_steps_per_second": 1.338, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"grad_norm": 2.357367992401123, |
|
"learning_rate": 7.5e-07, |
|
"loss": 1.2797, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.234529972076416, |
|
"eval_runtime": 1.5563, |
|
"eval_samples_per_second": 38.554, |
|
"eval_steps_per_second": 1.285, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"grad_norm": 2.453970432281494, |
|
"learning_rate": 7.045454545454545e-07, |
|
"loss": 1.2507, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.221469759941101, |
|
"eval_runtime": 1.4885, |
|
"eval_samples_per_second": 40.308, |
|
"eval_steps_per_second": 1.344, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.2108979225158691, |
|
"eval_runtime": 1.4973, |
|
"eval_samples_per_second": 40.072, |
|
"eval_steps_per_second": 1.336, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"grad_norm": 2.9230761528015137, |
|
"learning_rate": 6.59090909090909e-07, |
|
"loss": 1.2337, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.2004987001419067, |
|
"eval_runtime": 1.7915, |
|
"eval_samples_per_second": 33.492, |
|
"eval_steps_per_second": 1.116, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.1903845071792603, |
|
"eval_runtime": 1.4887, |
|
"eval_samples_per_second": 40.304, |
|
"eval_steps_per_second": 1.343, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"grad_norm": 2.8677401542663574, |
|
"learning_rate": 6.136363636363636e-07, |
|
"loss": 1.2076, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 1.1796098947525024, |
|
"eval_runtime": 1.4929, |
|
"eval_samples_per_second": 40.191, |
|
"eval_steps_per_second": 1.34, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"grad_norm": 2.4694573879241943, |
|
"learning_rate": 5.681818181818182e-07, |
|
"loss": 1.1968, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.1699223518371582, |
|
"eval_runtime": 1.4962, |
|
"eval_samples_per_second": 40.102, |
|
"eval_steps_per_second": 1.337, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.1609662771224976, |
|
"eval_runtime": 2.0442, |
|
"eval_samples_per_second": 29.351, |
|
"eval_steps_per_second": 0.978, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.13, |
|
"grad_norm": 2.54495906829834, |
|
"learning_rate": 5.227272727272727e-07, |
|
"loss": 1.171, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.1543792486190796, |
|
"eval_runtime": 1.4771, |
|
"eval_samples_per_second": 40.621, |
|
"eval_steps_per_second": 1.354, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"grad_norm": 2.391249418258667, |
|
"learning_rate": 4.772727272727273e-07, |
|
"loss": 1.1572, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.1475883722305298, |
|
"eval_runtime": 1.4966, |
|
"eval_samples_per_second": 40.091, |
|
"eval_steps_per_second": 1.336, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.1411240100860596, |
|
"eval_runtime": 1.5033, |
|
"eval_samples_per_second": 39.912, |
|
"eval_steps_per_second": 1.33, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"grad_norm": 2.524062395095825, |
|
"learning_rate": 4.318181818181818e-07, |
|
"loss": 1.1383, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.135024905204773, |
|
"eval_runtime": 1.6434, |
|
"eval_samples_per_second": 36.51, |
|
"eval_steps_per_second": 1.217, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"grad_norm": 2.7990708351135254, |
|
"learning_rate": 3.8636363636363636e-07, |
|
"loss": 1.14, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.1297781467437744, |
|
"eval_runtime": 1.4951, |
|
"eval_samples_per_second": 40.131, |
|
"eval_steps_per_second": 1.338, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.1256134510040283, |
|
"eval_runtime": 1.4962, |
|
"eval_samples_per_second": 40.102, |
|
"eval_steps_per_second": 1.337, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"grad_norm": 2.7583816051483154, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"loss": 1.1114, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.1212241649627686, |
|
"eval_runtime": 1.6451, |
|
"eval_samples_per_second": 36.472, |
|
"eval_steps_per_second": 1.216, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"grad_norm": 2.4490418434143066, |
|
"learning_rate": 2.9545454545454545e-07, |
|
"loss": 1.1094, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.1173356771469116, |
|
"eval_runtime": 1.5074, |
|
"eval_samples_per_second": 39.803, |
|
"eval_steps_per_second": 1.327, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.1143361330032349, |
|
"eval_runtime": 1.5142, |
|
"eval_samples_per_second": 39.625, |
|
"eval_steps_per_second": 1.321, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"grad_norm": 2.93764066696167, |
|
"learning_rate": 2.5e-07, |
|
"loss": 1.0872, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.111220121383667, |
|
"eval_runtime": 1.4968, |
|
"eval_samples_per_second": 40.085, |
|
"eval_steps_per_second": 1.336, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"grad_norm": 2.4049625396728516, |
|
"learning_rate": 2.0454545454545456e-07, |
|
"loss": 1.0941, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.1078248023986816, |
|
"eval_runtime": 1.9337, |
|
"eval_samples_per_second": 31.028, |
|
"eval_steps_per_second": 1.034, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.1053519248962402, |
|
"eval_runtime": 1.483, |
|
"eval_samples_per_second": 40.459, |
|
"eval_steps_per_second": 1.349, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.52, |
|
"grad_norm": 2.444944381713867, |
|
"learning_rate": 1.5909090909090907e-07, |
|
"loss": 1.0882, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.1032441854476929, |
|
"eval_runtime": 1.5278, |
|
"eval_samples_per_second": 39.272, |
|
"eval_steps_per_second": 1.309, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.101210355758667, |
|
"eval_runtime": 1.4836, |
|
"eval_samples_per_second": 40.443, |
|
"eval_steps_per_second": 1.348, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.06, |
|
"grad_norm": 2.511270523071289, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"loss": 1.0685, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.099798560142517, |
|
"eval_runtime": 2.009, |
|
"eval_samples_per_second": 29.866, |
|
"eval_steps_per_second": 0.996, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 35.61, |
|
"grad_norm": 2.6111106872558594, |
|
"learning_rate": 6.818181818181817e-08, |
|
"loss": 1.0775, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.0987868309020996, |
|
"eval_runtime": 1.5072, |
|
"eval_samples_per_second": 39.81, |
|
"eval_steps_per_second": 1.327, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.098325252532959, |
|
"eval_runtime": 1.4717, |
|
"eval_samples_per_second": 40.77, |
|
"eval_steps_per_second": 1.359, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"grad_norm": 2.345568895339966, |
|
"learning_rate": 2.2727272727272725e-08, |
|
"loss": 1.0817, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.0981043577194214, |
|
"eval_runtime": 1.576, |
|
"eval_samples_per_second": 38.072, |
|
"eval_steps_per_second": 1.269, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"step": 294, |
|
"total_flos": 2.864620236542755e+18, |
|
"train_loss": 1.2232356217442726, |
|
"train_runtime": 1646.5284, |
|
"train_samples_per_second": 24.845, |
|
"train_steps_per_second": 0.179 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 294, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 42, |
|
"save_steps": 500, |
|
"total_flos": 2.864620236542755e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|