{ "best_metric": 0.8333333333333334, "best_model_checkpoint": "vit-base-patch16-224-dmae-va-U5-42/checkpoint-108", "epoch": 37.935483870967744, "eval_steps": 500, "global_step": 294, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9, "eval_accuracy": 0.5, "eval_loss": 1.28582763671875, "eval_runtime": 1.6082, "eval_samples_per_second": 37.31, "eval_steps_per_second": 1.244, "step": 7 }, { "epoch": 1.55, "grad_norm": 4.701062202453613, "learning_rate": 2e-05, "loss": 1.3455, "step": 12 }, { "epoch": 1.94, "eval_accuracy": 0.48333333333333334, "eval_loss": 1.1091300249099731, "eval_runtime": 2.0547, "eval_samples_per_second": 29.201, "eval_steps_per_second": 0.973, "step": 15 }, { "epoch": 2.97, "eval_accuracy": 0.5833333333333334, "eval_loss": 0.8518259525299072, "eval_runtime": 1.5248, "eval_samples_per_second": 39.349, "eval_steps_per_second": 1.312, "step": 23 }, { "epoch": 3.1, "grad_norm": 2.8079495429992676, "learning_rate": 4e-05, "loss": 1.0067, "step": 24 }, { "epoch": 4.0, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7317402362823486, "eval_runtime": 1.5389, "eval_samples_per_second": 38.988, "eval_steps_per_second": 1.3, "step": 31 }, { "epoch": 4.65, "grad_norm": 3.14342999458313, "learning_rate": 4.886363636363637e-05, "loss": 0.6085, "step": 36 }, { "epoch": 4.9, "eval_accuracy": 0.75, "eval_loss": 0.694876492023468, "eval_runtime": 1.5371, "eval_samples_per_second": 39.035, "eval_steps_per_second": 1.301, "step": 38 }, { "epoch": 5.94, "eval_accuracy": 0.75, "eval_loss": 0.6633104085922241, "eval_runtime": 1.5303, "eval_samples_per_second": 39.208, "eval_steps_per_second": 1.307, "step": 46 }, { "epoch": 6.19, "grad_norm": 2.108163833618164, "learning_rate": 4.659090909090909e-05, "loss": 0.3389, "step": 48 }, { "epoch": 6.97, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.6791020035743713, "eval_runtime": 1.5402, "eval_samples_per_second": 38.956, "eval_steps_per_second": 1.299, "step": 54 }, { "epoch": 7.74, "grad_norm": 1.7717548608779907, "learning_rate": 4.431818181818182e-05, "loss": 0.1977, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.7010270357131958, "eval_runtime": 1.6107, "eval_samples_per_second": 37.252, "eval_steps_per_second": 1.242, "step": 62 }, { "epoch": 8.9, "eval_accuracy": 0.75, "eval_loss": 0.697003960609436, "eval_runtime": 1.5576, "eval_samples_per_second": 38.522, "eval_steps_per_second": 1.284, "step": 69 }, { "epoch": 9.29, "grad_norm": 1.6967554092407227, "learning_rate": 4.204545454545455e-05, "loss": 0.1496, "step": 72 }, { "epoch": 9.94, "eval_accuracy": 0.8, "eval_loss": 0.6983956098556519, "eval_runtime": 1.5413, "eval_samples_per_second": 38.929, "eval_steps_per_second": 1.298, "step": 77 }, { "epoch": 10.84, "grad_norm": 2.1696012020111084, "learning_rate": 3.9772727272727275e-05, "loss": 0.1194, "step": 84 }, { "epoch": 10.97, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.9060508012771606, "eval_runtime": 1.5628, "eval_samples_per_second": 38.392, "eval_steps_per_second": 1.28, "step": 85 }, { "epoch": 12.0, "eval_accuracy": 0.75, "eval_loss": 0.8720260858535767, "eval_runtime": 1.5352, "eval_samples_per_second": 39.084, "eval_steps_per_second": 1.303, "step": 93 }, { "epoch": 12.39, "grad_norm": 1.1283667087554932, "learning_rate": 3.7500000000000003e-05, "loss": 0.109, "step": 96 }, { "epoch": 12.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.8438921570777893, "eval_runtime": 1.549, "eval_samples_per_second": 38.735, "eval_steps_per_second": 1.291, "step": 100 }, { "epoch": 13.94, "grad_norm": 1.2174146175384521, "learning_rate": 3.522727272727273e-05, "loss": 0.0902, "step": 108 }, { "epoch": 13.94, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.7344614863395691, "eval_runtime": 1.5138, "eval_samples_per_second": 39.635, "eval_steps_per_second": 1.321, "step": 108 }, { "epoch": 14.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.8419939279556274, "eval_runtime": 2.094, "eval_samples_per_second": 28.654, "eval_steps_per_second": 0.955, "step": 116 }, { "epoch": 15.48, "grad_norm": 2.3597609996795654, "learning_rate": 3.295454545454545e-05, "loss": 0.0938, "step": 120 }, { "epoch": 16.0, "eval_accuracy": 0.75, "eval_loss": 0.799355149269104, "eval_runtime": 1.6114, "eval_samples_per_second": 37.235, "eval_steps_per_second": 1.241, "step": 124 }, { "epoch": 16.9, "eval_accuracy": 0.8, "eval_loss": 0.834147036075592, "eval_runtime": 1.5793, "eval_samples_per_second": 37.991, "eval_steps_per_second": 1.266, "step": 131 }, { "epoch": 17.03, "grad_norm": 3.3779869079589844, "learning_rate": 3.068181818181818e-05, "loss": 0.0862, "step": 132 }, { "epoch": 17.94, "eval_accuracy": 0.8, "eval_loss": 0.7238650918006897, "eval_runtime": 1.5475, "eval_samples_per_second": 38.771, "eval_steps_per_second": 1.292, "step": 139 }, { "epoch": 18.58, "grad_norm": 1.2446733713150024, "learning_rate": 2.8409090909090912e-05, "loss": 0.0864, "step": 144 }, { "epoch": 18.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.8485052585601807, "eval_runtime": 1.5613, "eval_samples_per_second": 38.428, "eval_steps_per_second": 1.281, "step": 147 }, { "epoch": 20.0, "eval_accuracy": 0.8, "eval_loss": 0.8948166370391846, "eval_runtime": 1.558, "eval_samples_per_second": 38.512, "eval_steps_per_second": 1.284, "step": 155 }, { "epoch": 20.13, "grad_norm": 1.548230767250061, "learning_rate": 2.6136363636363637e-05, "loss": 0.065, "step": 156 }, { "epoch": 20.9, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.8681192994117737, "eval_runtime": 1.5486, "eval_samples_per_second": 38.745, "eval_steps_per_second": 1.292, "step": 162 }, { "epoch": 21.68, "grad_norm": 1.3194923400878906, "learning_rate": 2.3863636363636365e-05, "loss": 0.0793, "step": 168 }, { "epoch": 21.94, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.8225926756858826, "eval_runtime": 1.5256, "eval_samples_per_second": 39.329, "eval_steps_per_second": 1.311, "step": 170 }, { "epoch": 22.97, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.7495377063751221, "eval_runtime": 1.5935, "eval_samples_per_second": 37.653, "eval_steps_per_second": 1.255, "step": 178 }, { "epoch": 23.23, "grad_norm": 1.1563166379928589, "learning_rate": 2.1590909090909093e-05, "loss": 0.0629, "step": 180 }, { "epoch": 24.0, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.8813876509666443, "eval_runtime": 1.5206, "eval_samples_per_second": 39.458, "eval_steps_per_second": 1.315, "step": 186 }, { "epoch": 24.77, "grad_norm": 1.163840413093567, "learning_rate": 1.9318181818181818e-05, "loss": 0.0666, "step": 192 }, { "epoch": 24.9, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.7739368081092834, "eval_runtime": 1.5452, "eval_samples_per_second": 38.83, "eval_steps_per_second": 1.294, "step": 193 }, { "epoch": 25.94, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9246408939361572, "eval_runtime": 1.5689, "eval_samples_per_second": 38.244, "eval_steps_per_second": 1.275, "step": 201 }, { "epoch": 26.32, "grad_norm": 0.9550792574882507, "learning_rate": 1.7045454545454546e-05, "loss": 0.0571, "step": 204 }, { "epoch": 26.97, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.8077472448348999, "eval_runtime": 1.5388, "eval_samples_per_second": 38.992, "eval_steps_per_second": 1.3, "step": 209 }, { "epoch": 27.87, "grad_norm": 1.725164532661438, "learning_rate": 1.4772727272727274e-05, "loss": 0.0519, "step": 216 }, { "epoch": 28.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.8974602818489075, "eval_runtime": 1.5489, "eval_samples_per_second": 38.738, "eval_steps_per_second": 1.291, "step": 217 }, { "epoch": 28.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9199196100234985, "eval_runtime": 1.5274, "eval_samples_per_second": 39.283, "eval_steps_per_second": 1.309, "step": 224 }, { "epoch": 29.42, "grad_norm": 0.8022506833076477, "learning_rate": 1.25e-05, "loss": 0.0523, "step": 228 }, { "epoch": 29.94, "eval_accuracy": 0.8, "eval_loss": 0.8512372374534607, "eval_runtime": 1.5327, "eval_samples_per_second": 39.145, "eval_steps_per_second": 1.305, "step": 232 }, { "epoch": 30.97, "grad_norm": 1.4496326446533203, "learning_rate": 1.0227272727272729e-05, "loss": 0.0548, "step": 240 }, { "epoch": 30.97, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.9376980066299438, "eval_runtime": 1.5279, "eval_samples_per_second": 39.268, "eval_steps_per_second": 1.309, "step": 240 }, { "epoch": 32.0, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.8212655782699585, "eval_runtime": 1.5365, "eval_samples_per_second": 39.049, "eval_steps_per_second": 1.302, "step": 248 }, { "epoch": 32.52, "grad_norm": 2.0979461669921875, "learning_rate": 7.954545454545455e-06, "loss": 0.0576, "step": 252 }, { "epoch": 32.9, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.8384222388267517, "eval_runtime": 1.5251, "eval_samples_per_second": 39.342, "eval_steps_per_second": 1.311, "step": 255 }, { "epoch": 33.94, "eval_accuracy": 0.8, "eval_loss": 0.866399347782135, "eval_runtime": 1.5559, "eval_samples_per_second": 38.564, "eval_steps_per_second": 1.285, "step": 263 }, { "epoch": 34.06, "grad_norm": 1.5001689195632935, "learning_rate": 5.681818181818182e-06, "loss": 0.0381, "step": 264 }, { "epoch": 34.97, "eval_accuracy": 0.8, "eval_loss": 0.8817654252052307, "eval_runtime": 1.5241, "eval_samples_per_second": 39.366, "eval_steps_per_second": 1.312, "step": 271 }, { "epoch": 35.61, "grad_norm": 0.628817617893219, "learning_rate": 3.409090909090909e-06, "loss": 0.0338, "step": 276 }, { "epoch": 36.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9105542302131653, "eval_runtime": 1.5534, "eval_samples_per_second": 38.625, "eval_steps_per_second": 1.287, "step": 279 }, { "epoch": 36.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9056997299194336, "eval_runtime": 1.5384, "eval_samples_per_second": 39.002, "eval_steps_per_second": 1.3, "step": 286 }, { "epoch": 37.16, "grad_norm": 0.9884141683578491, "learning_rate": 1.1363636363636364e-06, "loss": 0.0443, "step": 288 }, { "epoch": 37.94, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9011732339859009, "eval_runtime": 1.6397, "eval_samples_per_second": 36.593, "eval_steps_per_second": 1.22, "step": 294 }, { "epoch": 37.94, "step": 294, "total_flos": 2.864620236542755e+18, "train_loss": 0.20059432347818296, "train_runtime": 1595.666, "train_samples_per_second": 25.637, "train_steps_per_second": 0.184 } ], "logging_steps": 12, "max_steps": 294, "num_input_tokens_seen": 0, "num_train_epochs": 42, "save_steps": 500, "total_flos": 2.864620236542755e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }