{ "best_metric": 0.9908256880733946, "best_model_checkpoint": "vit-base-patch16-224-dmae-va-U/checkpoint-217", "epoch": 36.12903225806452, "eval_steps": 500, "global_step": 280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9, "eval_accuracy": 0.25688073394495414, "eval_loss": 1.4318687915802002, "eval_runtime": 1.4321, "eval_samples_per_second": 76.112, "eval_steps_per_second": 2.793, "step": 7 }, { "epoch": 1.55, "learning_rate": 2.1428571428571428e-05, "loss": 1.3911, "step": 12 }, { "epoch": 1.94, "eval_accuracy": 0.47706422018348627, "eval_loss": 1.213340163230896, "eval_runtime": 1.4871, "eval_samples_per_second": 73.295, "eval_steps_per_second": 2.69, "step": 15 }, { "epoch": 2.97, "eval_accuracy": 0.6055045871559633, "eval_loss": 0.9487143158912659, "eval_runtime": 1.5892, "eval_samples_per_second": 68.589, "eval_steps_per_second": 2.517, "step": 23 }, { "epoch": 3.1, "learning_rate": 4.2857142857142856e-05, "loss": 1.0766, "step": 24 }, { "epoch": 4.0, "eval_accuracy": 0.7155963302752294, "eval_loss": 0.6542130708694458, "eval_runtime": 1.5217, "eval_samples_per_second": 71.63, "eval_steps_per_second": 2.629, "step": 31 }, { "epoch": 4.65, "learning_rate": 4.841269841269841e-05, "loss": 0.6974, "step": 36 }, { "epoch": 4.9, "eval_accuracy": 0.8715596330275229, "eval_loss": 0.4643765389919281, "eval_runtime": 1.561, "eval_samples_per_second": 69.827, "eval_steps_per_second": 2.562, "step": 38 }, { "epoch": 5.94, "eval_accuracy": 0.8715596330275229, "eval_loss": 0.39188772439956665, "eval_runtime": 1.5611, "eval_samples_per_second": 69.823, "eval_steps_per_second": 2.562, "step": 46 }, { "epoch": 6.19, "learning_rate": 4.603174603174603e-05, "loss": 0.421, "step": 48 }, { "epoch": 6.97, "eval_accuracy": 0.8715596330275229, "eval_loss": 0.30943310260772705, "eval_runtime": 1.6488, "eval_samples_per_second": 66.109, "eval_steps_per_second": 2.426, "step": 54 }, { "epoch": 7.74, "learning_rate": 4.3650793650793655e-05, "loss": 0.2513, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.8990825688073395, "eval_loss": 0.2334030568599701, "eval_runtime": 1.6748, "eval_samples_per_second": 65.082, "eval_steps_per_second": 2.388, "step": 62 }, { "epoch": 8.9, "eval_accuracy": 0.9174311926605505, "eval_loss": 0.19148482382297516, "eval_runtime": 1.68, "eval_samples_per_second": 64.88, "eval_steps_per_second": 2.381, "step": 69 }, { "epoch": 9.29, "learning_rate": 4.126984126984127e-05, "loss": 0.1931, "step": 72 }, { "epoch": 9.94, "eval_accuracy": 0.8807339449541285, "eval_loss": 0.24314457178115845, "eval_runtime": 1.7464, "eval_samples_per_second": 62.412, "eval_steps_per_second": 2.29, "step": 77 }, { "epoch": 10.84, "learning_rate": 3.888888888888889e-05, "loss": 0.1757, "step": 84 }, { "epoch": 10.97, "eval_accuracy": 0.944954128440367, "eval_loss": 0.16078265011310577, "eval_runtime": 1.6471, "eval_samples_per_second": 66.177, "eval_steps_per_second": 2.428, "step": 85 }, { "epoch": 12.0, "eval_accuracy": 0.926605504587156, "eval_loss": 0.1423795074224472, "eval_runtime": 1.7337, "eval_samples_per_second": 62.87, "eval_steps_per_second": 2.307, "step": 93 }, { "epoch": 12.39, "learning_rate": 3.650793650793651e-05, "loss": 0.1442, "step": 96 }, { "epoch": 12.9, "eval_accuracy": 0.944954128440367, "eval_loss": 0.1280096173286438, "eval_runtime": 1.6549, "eval_samples_per_second": 65.864, "eval_steps_per_second": 2.417, "step": 100 }, { "epoch": 13.94, "learning_rate": 3.412698412698413e-05, "loss": 0.1085, "step": 108 }, { "epoch": 13.94, "eval_accuracy": 0.9541284403669725, "eval_loss": 0.10545489937067032, "eval_runtime": 1.6964, "eval_samples_per_second": 64.255, "eval_steps_per_second": 2.358, "step": 108 }, { "epoch": 14.97, "eval_accuracy": 0.9541284403669725, "eval_loss": 0.1080455482006073, "eval_runtime": 1.5479, "eval_samples_per_second": 70.417, "eval_steps_per_second": 2.584, "step": 116 }, { "epoch": 15.48, "learning_rate": 3.1746031746031745e-05, "loss": 0.1056, "step": 120 }, { "epoch": 16.0, "eval_accuracy": 0.963302752293578, "eval_loss": 0.09967872500419617, "eval_runtime": 1.7093, "eval_samples_per_second": 63.77, "eval_steps_per_second": 2.34, "step": 124 }, { "epoch": 16.9, "eval_accuracy": 0.963302752293578, "eval_loss": 0.11852575093507767, "eval_runtime": 1.5272, "eval_samples_per_second": 71.372, "eval_steps_per_second": 2.619, "step": 131 }, { "epoch": 17.03, "learning_rate": 2.9365079365079366e-05, "loss": 0.0926, "step": 132 }, { "epoch": 17.94, "eval_accuracy": 0.963302752293578, "eval_loss": 0.07731892913579941, "eval_runtime": 1.5604, "eval_samples_per_second": 69.856, "eval_steps_per_second": 2.564, "step": 139 }, { "epoch": 18.58, "learning_rate": 2.6984126984126984e-05, "loss": 0.103, "step": 144 }, { "epoch": 18.97, "eval_accuracy": 0.963302752293578, "eval_loss": 0.12785662710666656, "eval_runtime": 1.527, "eval_samples_per_second": 71.384, "eval_steps_per_second": 2.62, "step": 147 }, { "epoch": 20.0, "eval_accuracy": 0.963302752293578, "eval_loss": 0.10433010756969452, "eval_runtime": 1.5327, "eval_samples_per_second": 71.115, "eval_steps_per_second": 2.61, "step": 155 }, { "epoch": 20.13, "learning_rate": 2.4603174603174602e-05, "loss": 0.0938, "step": 156 }, { "epoch": 20.9, "eval_accuracy": 0.981651376146789, "eval_loss": 0.08241702616214752, "eval_runtime": 1.5631, "eval_samples_per_second": 69.735, "eval_steps_per_second": 2.559, "step": 162 }, { "epoch": 21.68, "learning_rate": 2.2222222222222223e-05, "loss": 0.0891, "step": 168 }, { "epoch": 21.94, "eval_accuracy": 0.9541284403669725, "eval_loss": 0.14486828446388245, "eval_runtime": 1.5236, "eval_samples_per_second": 71.54, "eval_steps_per_second": 2.625, "step": 170 }, { "epoch": 22.97, "eval_accuracy": 0.963302752293578, "eval_loss": 0.13658782839775085, "eval_runtime": 1.7185, "eval_samples_per_second": 63.429, "eval_steps_per_second": 2.328, "step": 178 }, { "epoch": 23.23, "learning_rate": 1.984126984126984e-05, "loss": 0.0754, "step": 180 }, { "epoch": 24.0, "eval_accuracy": 0.9357798165137615, "eval_loss": 0.11480199545621872, "eval_runtime": 1.6552, "eval_samples_per_second": 65.853, "eval_steps_per_second": 2.417, "step": 186 }, { "epoch": 24.77, "learning_rate": 1.746031746031746e-05, "loss": 0.0882, "step": 192 }, { "epoch": 24.9, "eval_accuracy": 0.9357798165137615, "eval_loss": 0.19915136694908142, "eval_runtime": 1.6537, "eval_samples_per_second": 65.913, "eval_steps_per_second": 2.419, "step": 193 }, { "epoch": 25.94, "eval_accuracy": 0.981651376146789, "eval_loss": 0.07425253838300705, "eval_runtime": 1.4966, "eval_samples_per_second": 72.83, "eval_steps_per_second": 2.673, "step": 201 }, { "epoch": 26.32, "learning_rate": 1.5079365079365079e-05, "loss": 0.078, "step": 204 }, { "epoch": 26.97, "eval_accuracy": 0.9724770642201835, "eval_loss": 0.06684111058712006, "eval_runtime": 1.5659, "eval_samples_per_second": 69.61, "eval_steps_per_second": 2.554, "step": 209 }, { "epoch": 27.87, "learning_rate": 1.2698412698412699e-05, "loss": 0.0666, "step": 216 }, { "epoch": 28.0, "eval_accuracy": 0.9908256880733946, "eval_loss": 0.053366996347904205, "eval_runtime": 1.5382, "eval_samples_per_second": 70.862, "eval_steps_per_second": 2.6, "step": 217 }, { "epoch": 28.9, "eval_accuracy": 0.9908256880733946, "eval_loss": 0.049850545823574066, "eval_runtime": 1.5346, "eval_samples_per_second": 71.027, "eval_steps_per_second": 2.607, "step": 224 }, { "epoch": 29.42, "learning_rate": 1.0317460317460318e-05, "loss": 0.0514, "step": 228 }, { "epoch": 29.94, "eval_accuracy": 0.9724770642201835, "eval_loss": 0.043298669159412384, "eval_runtime": 1.5329, "eval_samples_per_second": 71.108, "eval_steps_per_second": 2.609, "step": 232 }, { "epoch": 30.97, "learning_rate": 7.936507936507936e-06, "loss": 0.062, "step": 240 }, { "epoch": 30.97, "eval_accuracy": 0.963302752293578, "eval_loss": 0.08402539044618607, "eval_runtime": 1.7473, "eval_samples_per_second": 62.382, "eval_steps_per_second": 2.289, "step": 240 }, { "epoch": 32.0, "eval_accuracy": 0.9724770642201835, "eval_loss": 0.05133233219385147, "eval_runtime": 1.6441, "eval_samples_per_second": 66.297, "eval_steps_per_second": 2.433, "step": 248 }, { "epoch": 32.52, "learning_rate": 5.555555555555556e-06, "loss": 0.0712, "step": 252 }, { "epoch": 32.9, "eval_accuracy": 0.981651376146789, "eval_loss": 0.04824218526482582, "eval_runtime": 1.6796, "eval_samples_per_second": 64.896, "eval_steps_per_second": 2.382, "step": 255 }, { "epoch": 33.94, "eval_accuracy": 0.981651376146789, "eval_loss": 0.055325187742710114, "eval_runtime": 1.5821, "eval_samples_per_second": 68.894, "eval_steps_per_second": 2.528, "step": 263 }, { "epoch": 34.06, "learning_rate": 3.1746031746031746e-06, "loss": 0.0703, "step": 264 }, { "epoch": 34.97, "eval_accuracy": 0.9724770642201835, "eval_loss": 0.060162752866744995, "eval_runtime": 1.5164, "eval_samples_per_second": 71.882, "eval_steps_per_second": 2.638, "step": 271 }, { "epoch": 35.61, "learning_rate": 7.936507936507937e-07, "loss": 0.0553, "step": 276 }, { "epoch": 36.0, "eval_accuracy": 0.9724770642201835, "eval_loss": 0.05953967571258545, "eval_runtime": 1.5574, "eval_samples_per_second": 69.99, "eval_steps_per_second": 2.568, "step": 279 }, { "epoch": 36.13, "eval_accuracy": 0.9724770642201835, "eval_loss": 0.05953451991081238, "eval_runtime": 1.6854, "eval_samples_per_second": 64.671, "eval_steps_per_second": 2.373, "step": 280 }, { "epoch": 36.13, "step": 280, "total_flos": 2.738306029282984e+18, "train_loss": 0.23923614642449786, "train_runtime": 1562.4778, "train_samples_per_second": 25.037, "train_steps_per_second": 0.179 } ], "logging_steps": 12, "max_steps": 280, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.738306029282984e+18, "trial_name": null, "trial_params": null }