|
{ |
|
"best_metric": 0.55, |
|
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-U5-42D/checkpoint-15", |
|
"epoch": 37.935483870967744, |
|
"eval_steps": 500, |
|
"global_step": 294, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 1.0970458984375, |
|
"eval_runtime": 1.6221, |
|
"eval_samples_per_second": 36.99, |
|
"eval_steps_per_second": 1.233, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 5.820129871368408, |
|
"learning_rate": 0.0012000000000000001, |
|
"loss": 1.3527, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.038263201713562, |
|
"eval_runtime": 1.563, |
|
"eval_samples_per_second": 38.388, |
|
"eval_steps_per_second": 1.28, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.4166666666666667, |
|
"eval_loss": 1.235134243965149, |
|
"eval_runtime": 2.1089, |
|
"eval_samples_per_second": 28.451, |
|
"eval_steps_per_second": 0.948, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 5.020723819732666, |
|
"learning_rate": 0.0024000000000000002, |
|
"loss": 1.3013, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3333333333333333, |
|
"eval_loss": 1.3025089502334595, |
|
"eval_runtime": 1.6082, |
|
"eval_samples_per_second": 37.309, |
|
"eval_steps_per_second": 1.244, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 2.038219451904297, |
|
"learning_rate": 0.002931818181818182, |
|
"loss": 1.3706, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.21666666666666667, |
|
"eval_loss": 1.3800110816955566, |
|
"eval_runtime": 1.5927, |
|
"eval_samples_per_second": 37.671, |
|
"eval_steps_per_second": 1.256, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.18333333333333332, |
|
"eval_loss": 1.4608864784240723, |
|
"eval_runtime": 1.5994, |
|
"eval_samples_per_second": 37.515, |
|
"eval_steps_per_second": 1.25, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"grad_norm": 1.954506754875183, |
|
"learning_rate": 0.0027954545454545454, |
|
"loss": 1.4415, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.43333333333333335, |
|
"eval_loss": 1.3718478679656982, |
|
"eval_runtime": 1.569, |
|
"eval_samples_per_second": 38.242, |
|
"eval_steps_per_second": 1.275, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"grad_norm": 1.276404619216919, |
|
"learning_rate": 0.002659090909090909, |
|
"loss": 1.3602, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.31666666666666665, |
|
"eval_loss": 1.3172897100448608, |
|
"eval_runtime": 1.564, |
|
"eval_samples_per_second": 38.363, |
|
"eval_steps_per_second": 1.279, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 1.2827069759368896, |
|
"eval_runtime": 1.5647, |
|
"eval_samples_per_second": 38.345, |
|
"eval_steps_per_second": 1.278, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"grad_norm": 1.0918904542922974, |
|
"learning_rate": 0.002522727272727273, |
|
"loss": 1.3079, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.31666666666666665, |
|
"eval_loss": 1.3166981935501099, |
|
"eval_runtime": 1.6067, |
|
"eval_samples_per_second": 37.343, |
|
"eval_steps_per_second": 1.245, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"grad_norm": 1.5578159093856812, |
|
"learning_rate": 0.002386363636363636, |
|
"loss": 1.3247, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 1.257886528968811, |
|
"eval_runtime": 1.5535, |
|
"eval_samples_per_second": 38.623, |
|
"eval_steps_per_second": 1.287, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.2, |
|
"eval_loss": 1.3202433586120605, |
|
"eval_runtime": 2.2419, |
|
"eval_samples_per_second": 26.762, |
|
"eval_steps_per_second": 0.892, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"grad_norm": 0.5343012809753418, |
|
"learning_rate": 0.0022500000000000003, |
|
"loss": 1.3102, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 1.2353752851486206, |
|
"eval_runtime": 1.5721, |
|
"eval_samples_per_second": 38.166, |
|
"eval_steps_per_second": 1.272, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"grad_norm": 1.3499153852462769, |
|
"learning_rate": 0.002113636363636364, |
|
"loss": 1.2807, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.25, |
|
"eval_loss": 1.3610022068023682, |
|
"eval_runtime": 1.9584, |
|
"eval_samples_per_second": 30.638, |
|
"eval_steps_per_second": 1.021, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 1.2803313732147217, |
|
"eval_runtime": 1.596, |
|
"eval_samples_per_second": 37.594, |
|
"eval_steps_per_second": 1.253, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"grad_norm": 1.6496480703353882, |
|
"learning_rate": 0.0019772727272727273, |
|
"loss": 1.2774, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.21666666666666667, |
|
"eval_loss": 1.3338415622711182, |
|
"eval_runtime": 1.5818, |
|
"eval_samples_per_second": 37.931, |
|
"eval_steps_per_second": 1.264, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.35, |
|
"eval_loss": 1.2548964023590088, |
|
"eval_runtime": 1.5648, |
|
"eval_samples_per_second": 38.344, |
|
"eval_steps_per_second": 1.278, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"grad_norm": 0.824222207069397, |
|
"learning_rate": 0.001840909090909091, |
|
"loss": 1.2596, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.36666666666666664, |
|
"eval_loss": 1.2692508697509766, |
|
"eval_runtime": 1.6871, |
|
"eval_samples_per_second": 35.564, |
|
"eval_steps_per_second": 1.185, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"grad_norm": 0.44431018829345703, |
|
"learning_rate": 0.0017045454545454547, |
|
"loss": 1.2413, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.21666666666666667, |
|
"eval_loss": 1.3005454540252686, |
|
"eval_runtime": 2.5177, |
|
"eval_samples_per_second": 23.831, |
|
"eval_steps_per_second": 0.794, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.43333333333333335, |
|
"eval_loss": 1.229854941368103, |
|
"eval_runtime": 1.7681, |
|
"eval_samples_per_second": 33.934, |
|
"eval_steps_per_second": 1.131, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.13, |
|
"grad_norm": 1.6288515329360962, |
|
"learning_rate": 0.0015681818181818182, |
|
"loss": 1.262, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.26666666666666666, |
|
"eval_loss": 1.3453844785690308, |
|
"eval_runtime": 1.5884, |
|
"eval_samples_per_second": 37.774, |
|
"eval_steps_per_second": 1.259, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"grad_norm": 1.0566848516464233, |
|
"learning_rate": 0.0014318181818181819, |
|
"loss": 1.2261, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.31666666666666665, |
|
"eval_loss": 1.2818458080291748, |
|
"eval_runtime": 1.5729, |
|
"eval_samples_per_second": 38.146, |
|
"eval_steps_per_second": 1.272, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.43333333333333335, |
|
"eval_loss": 1.249794840812683, |
|
"eval_runtime": 1.5575, |
|
"eval_samples_per_second": 38.524, |
|
"eval_steps_per_second": 1.284, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"grad_norm": 1.7413015365600586, |
|
"learning_rate": 0.0012954545454545456, |
|
"loss": 1.2405, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.31666666666666665, |
|
"eval_loss": 1.3376109600067139, |
|
"eval_runtime": 1.6036, |
|
"eval_samples_per_second": 37.415, |
|
"eval_steps_per_second": 1.247, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"grad_norm": 0.5584876537322998, |
|
"learning_rate": 0.001159090909090909, |
|
"loss": 1.2245, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.36666666666666664, |
|
"eval_loss": 1.2595055103302002, |
|
"eval_runtime": 1.5658, |
|
"eval_samples_per_second": 38.32, |
|
"eval_steps_per_second": 1.277, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 1.331896424293518, |
|
"eval_runtime": 2.0295, |
|
"eval_samples_per_second": 29.564, |
|
"eval_steps_per_second": 0.985, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"grad_norm": 0.9537753462791443, |
|
"learning_rate": 0.0010227272727272726, |
|
"loss": 1.2034, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.38333333333333336, |
|
"eval_loss": 1.25283944606781, |
|
"eval_runtime": 1.5793, |
|
"eval_samples_per_second": 37.992, |
|
"eval_steps_per_second": 1.266, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"grad_norm": 1.7752221822738647, |
|
"learning_rate": 0.0008863636363636364, |
|
"loss": 1.1818, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.36666666666666664, |
|
"eval_loss": 1.3656209707260132, |
|
"eval_runtime": 1.5691, |
|
"eval_samples_per_second": 38.237, |
|
"eval_steps_per_second": 1.275, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.38333333333333336, |
|
"eval_loss": 1.2500847578048706, |
|
"eval_runtime": 1.5809, |
|
"eval_samples_per_second": 37.953, |
|
"eval_steps_per_second": 1.265, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"grad_norm": 1.1072659492492676, |
|
"learning_rate": 0.00075, |
|
"loss": 1.1479, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.3, |
|
"eval_loss": 1.324063777923584, |
|
"eval_runtime": 1.5986, |
|
"eval_samples_per_second": 37.533, |
|
"eval_steps_per_second": 1.251, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"grad_norm": 0.8141500353813171, |
|
"learning_rate": 0.0006136363636363637, |
|
"loss": 1.1193, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.36666666666666664, |
|
"eval_loss": 1.380292534828186, |
|
"eval_runtime": 1.6105, |
|
"eval_samples_per_second": 37.256, |
|
"eval_steps_per_second": 1.242, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.4166666666666667, |
|
"eval_loss": 1.2294162511825562, |
|
"eval_runtime": 1.603, |
|
"eval_samples_per_second": 37.429, |
|
"eval_steps_per_second": 1.248, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.52, |
|
"grad_norm": 0.7440662384033203, |
|
"learning_rate": 0.0004772727272727273, |
|
"loss": 1.1071, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.4134150743484497, |
|
"eval_runtime": 1.5689, |
|
"eval_samples_per_second": 38.243, |
|
"eval_steps_per_second": 1.275, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.36666666666666664, |
|
"eval_loss": 1.4123319387435913, |
|
"eval_runtime": 1.5844, |
|
"eval_samples_per_second": 37.869, |
|
"eval_steps_per_second": 1.262, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.06, |
|
"grad_norm": 1.0041050910949707, |
|
"learning_rate": 0.0003409090909090909, |
|
"loss": 1.0429, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.2183587551116943, |
|
"eval_runtime": 1.607, |
|
"eval_samples_per_second": 37.336, |
|
"eval_steps_per_second": 1.245, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 35.61, |
|
"grad_norm": 1.336283564567566, |
|
"learning_rate": 0.00020454545454545454, |
|
"loss": 1.0528, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 1.3099627494812012, |
|
"eval_runtime": 2.0818, |
|
"eval_samples_per_second": 28.821, |
|
"eval_steps_per_second": 0.961, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"eval_accuracy": 0.38333333333333336, |
|
"eval_loss": 1.3248744010925293, |
|
"eval_runtime": 1.5674, |
|
"eval_samples_per_second": 38.28, |
|
"eval_steps_per_second": 1.276, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"grad_norm": 1.36141836643219, |
|
"learning_rate": 6.818181818181818e-05, |
|
"loss": 1.0055, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.3050577640533447, |
|
"eval_runtime": 1.5996, |
|
"eval_samples_per_second": 37.51, |
|
"eval_steps_per_second": 1.25, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"step": 294, |
|
"total_flos": 2.864620236542755e+18, |
|
"train_loss": 1.2294491975485873, |
|
"train_runtime": 1673.337, |
|
"train_samples_per_second": 24.447, |
|
"train_steps_per_second": 0.176 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 294, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 42, |
|
"save_steps": 500, |
|
"total_flos": 2.864620236542755e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|