|
{ |
|
"best_metric": 0.8333333333333334, |
|
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-U5-42/checkpoint-108", |
|
"epoch": 37.935483870967744, |
|
"eval_steps": 500, |
|
"global_step": 294, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.28582763671875, |
|
"eval_runtime": 1.6082, |
|
"eval_samples_per_second": 37.31, |
|
"eval_steps_per_second": 1.244, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 4.701062202453613, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3455, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.1091300249099731, |
|
"eval_runtime": 2.0547, |
|
"eval_samples_per_second": 29.201, |
|
"eval_steps_per_second": 0.973, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 0.8518259525299072, |
|
"eval_runtime": 1.5248, |
|
"eval_samples_per_second": 39.349, |
|
"eval_steps_per_second": 1.312, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 2.8079495429992676, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0067, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7317402362823486, |
|
"eval_runtime": 1.5389, |
|
"eval_samples_per_second": 38.988, |
|
"eval_steps_per_second": 1.3, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 3.14342999458313, |
|
"learning_rate": 4.886363636363637e-05, |
|
"loss": 0.6085, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.694876492023468, |
|
"eval_runtime": 1.5371, |
|
"eval_samples_per_second": 39.035, |
|
"eval_steps_per_second": 1.301, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6633104085922241, |
|
"eval_runtime": 1.5303, |
|
"eval_samples_per_second": 39.208, |
|
"eval_steps_per_second": 1.307, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"grad_norm": 2.108163833618164, |
|
"learning_rate": 4.659090909090909e-05, |
|
"loss": 0.3389, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.6791020035743713, |
|
"eval_runtime": 1.5402, |
|
"eval_samples_per_second": 38.956, |
|
"eval_steps_per_second": 1.299, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"grad_norm": 1.7717548608779907, |
|
"learning_rate": 4.431818181818182e-05, |
|
"loss": 0.1977, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7010270357131958, |
|
"eval_runtime": 1.6107, |
|
"eval_samples_per_second": 37.252, |
|
"eval_steps_per_second": 1.242, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.697003960609436, |
|
"eval_runtime": 1.5576, |
|
"eval_samples_per_second": 38.522, |
|
"eval_steps_per_second": 1.284, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"grad_norm": 1.6967554092407227, |
|
"learning_rate": 4.204545454545455e-05, |
|
"loss": 0.1496, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.6983956098556519, |
|
"eval_runtime": 1.5413, |
|
"eval_samples_per_second": 38.929, |
|
"eval_steps_per_second": 1.298, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"grad_norm": 2.1696012020111084, |
|
"learning_rate": 3.9772727272727275e-05, |
|
"loss": 0.1194, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.9060508012771606, |
|
"eval_runtime": 1.5628, |
|
"eval_samples_per_second": 38.392, |
|
"eval_steps_per_second": 1.28, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.8720260858535767, |
|
"eval_runtime": 1.5352, |
|
"eval_samples_per_second": 39.084, |
|
"eval_steps_per_second": 1.303, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"grad_norm": 1.1283667087554932, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.109, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.8438921570777893, |
|
"eval_runtime": 1.549, |
|
"eval_samples_per_second": 38.735, |
|
"eval_steps_per_second": 1.291, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"grad_norm": 1.2174146175384521, |
|
"learning_rate": 3.522727272727273e-05, |
|
"loss": 0.0902, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7344614863395691, |
|
"eval_runtime": 1.5138, |
|
"eval_samples_per_second": 39.635, |
|
"eval_steps_per_second": 1.321, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.8419939279556274, |
|
"eval_runtime": 2.094, |
|
"eval_samples_per_second": 28.654, |
|
"eval_steps_per_second": 0.955, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"grad_norm": 2.3597609996795654, |
|
"learning_rate": 3.295454545454545e-05, |
|
"loss": 0.0938, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.799355149269104, |
|
"eval_runtime": 1.6114, |
|
"eval_samples_per_second": 37.235, |
|
"eval_steps_per_second": 1.241, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.834147036075592, |
|
"eval_runtime": 1.5793, |
|
"eval_samples_per_second": 37.991, |
|
"eval_steps_per_second": 1.266, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"grad_norm": 3.3779869079589844, |
|
"learning_rate": 3.068181818181818e-05, |
|
"loss": 0.0862, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.7238650918006897, |
|
"eval_runtime": 1.5475, |
|
"eval_samples_per_second": 38.771, |
|
"eval_steps_per_second": 1.292, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"grad_norm": 1.2446733713150024, |
|
"learning_rate": 2.8409090909090912e-05, |
|
"loss": 0.0864, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.8485052585601807, |
|
"eval_runtime": 1.5613, |
|
"eval_samples_per_second": 38.428, |
|
"eval_steps_per_second": 1.281, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8948166370391846, |
|
"eval_runtime": 1.558, |
|
"eval_samples_per_second": 38.512, |
|
"eval_steps_per_second": 1.284, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.13, |
|
"grad_norm": 1.548230767250061, |
|
"learning_rate": 2.6136363636363637e-05, |
|
"loss": 0.065, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.8681192994117737, |
|
"eval_runtime": 1.5486, |
|
"eval_samples_per_second": 38.745, |
|
"eval_steps_per_second": 1.292, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"grad_norm": 1.3194923400878906, |
|
"learning_rate": 2.3863636363636365e-05, |
|
"loss": 0.0793, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.8225926756858826, |
|
"eval_runtime": 1.5256, |
|
"eval_samples_per_second": 39.329, |
|
"eval_steps_per_second": 1.311, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7495377063751221, |
|
"eval_runtime": 1.5935, |
|
"eval_samples_per_second": 37.653, |
|
"eval_steps_per_second": 1.255, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"grad_norm": 1.1563166379928589, |
|
"learning_rate": 2.1590909090909093e-05, |
|
"loss": 0.0629, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.8813876509666443, |
|
"eval_runtime": 1.5206, |
|
"eval_samples_per_second": 39.458, |
|
"eval_steps_per_second": 1.315, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"grad_norm": 1.163840413093567, |
|
"learning_rate": 1.9318181818181818e-05, |
|
"loss": 0.0666, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.7739368081092834, |
|
"eval_runtime": 1.5452, |
|
"eval_samples_per_second": 38.83, |
|
"eval_steps_per_second": 1.294, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9246408939361572, |
|
"eval_runtime": 1.5689, |
|
"eval_samples_per_second": 38.244, |
|
"eval_steps_per_second": 1.275, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"grad_norm": 0.9550792574882507, |
|
"learning_rate": 1.7045454545454546e-05, |
|
"loss": 0.0571, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.8077472448348999, |
|
"eval_runtime": 1.5388, |
|
"eval_samples_per_second": 38.992, |
|
"eval_steps_per_second": 1.3, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"grad_norm": 1.725164532661438, |
|
"learning_rate": 1.4772727272727274e-05, |
|
"loss": 0.0519, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.8974602818489075, |
|
"eval_runtime": 1.5489, |
|
"eval_samples_per_second": 38.738, |
|
"eval_steps_per_second": 1.291, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9199196100234985, |
|
"eval_runtime": 1.5274, |
|
"eval_samples_per_second": 39.283, |
|
"eval_steps_per_second": 1.309, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"grad_norm": 0.8022506833076477, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0523, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8512372374534607, |
|
"eval_runtime": 1.5327, |
|
"eval_samples_per_second": 39.145, |
|
"eval_steps_per_second": 1.305, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"grad_norm": 1.4496326446533203, |
|
"learning_rate": 1.0227272727272729e-05, |
|
"loss": 0.0548, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.9376980066299438, |
|
"eval_runtime": 1.5279, |
|
"eval_samples_per_second": 39.268, |
|
"eval_steps_per_second": 1.309, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.8212655782699585, |
|
"eval_runtime": 1.5365, |
|
"eval_samples_per_second": 39.049, |
|
"eval_steps_per_second": 1.302, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.52, |
|
"grad_norm": 2.0979461669921875, |
|
"learning_rate": 7.954545454545455e-06, |
|
"loss": 0.0576, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.8384222388267517, |
|
"eval_runtime": 1.5251, |
|
"eval_samples_per_second": 39.342, |
|
"eval_steps_per_second": 1.311, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.866399347782135, |
|
"eval_runtime": 1.5559, |
|
"eval_samples_per_second": 38.564, |
|
"eval_steps_per_second": 1.285, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.06, |
|
"grad_norm": 1.5001689195632935, |
|
"learning_rate": 5.681818181818182e-06, |
|
"loss": 0.0381, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8817654252052307, |
|
"eval_runtime": 1.5241, |
|
"eval_samples_per_second": 39.366, |
|
"eval_steps_per_second": 1.312, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 35.61, |
|
"grad_norm": 0.628817617893219, |
|
"learning_rate": 3.409090909090909e-06, |
|
"loss": 0.0338, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9105542302131653, |
|
"eval_runtime": 1.5534, |
|
"eval_samples_per_second": 38.625, |
|
"eval_steps_per_second": 1.287, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9056997299194336, |
|
"eval_runtime": 1.5384, |
|
"eval_samples_per_second": 39.002, |
|
"eval_steps_per_second": 1.3, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"grad_norm": 0.9884141683578491, |
|
"learning_rate": 1.1363636363636364e-06, |
|
"loss": 0.0443, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9011732339859009, |
|
"eval_runtime": 1.6397, |
|
"eval_samples_per_second": 36.593, |
|
"eval_steps_per_second": 1.22, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"step": 294, |
|
"total_flos": 2.864620236542755e+18, |
|
"train_loss": 0.20059432347818296, |
|
"train_runtime": 1595.666, |
|
"train_samples_per_second": 25.637, |
|
"train_steps_per_second": 0.184 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 294, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 42, |
|
"save_steps": 500, |
|
"total_flos": 2.864620236542755e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|