{ "best_metric": 1.0, "best_model_checkpoint": "vit-base-patch16-224-dmae-va-U3-40A/checkpoint-119", "epoch": 40.0, "eval_steps": 500, "global_step": 280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.37373737373737376, "eval_loss": 1.329177737236023, "eval_runtime": 1.2424, "eval_samples_per_second": 79.683, "eval_steps_per_second": 3.22, "step": 7 }, { "epoch": 1.71, "grad_norm": 3.085270404815674, "learning_rate": 2.1428571428571428e-05, "loss": 1.3407, "step": 12 }, { "epoch": 2.0, "eval_accuracy": 0.5151515151515151, "eval_loss": 1.107913851737976, "eval_runtime": 1.2457, "eval_samples_per_second": 79.472, "eval_steps_per_second": 3.211, "step": 14 }, { "epoch": 3.0, "eval_accuracy": 0.6262626262626263, "eval_loss": 0.8917983770370483, "eval_runtime": 1.2574, "eval_samples_per_second": 78.737, "eval_steps_per_second": 3.181, "step": 21 }, { "epoch": 3.43, "grad_norm": 3.550651788711548, "learning_rate": 4.2857142857142856e-05, "loss": 0.9919, "step": 24 }, { "epoch": 4.0, "eval_accuracy": 0.7878787878787878, "eval_loss": 0.6446935534477234, "eval_runtime": 1.2679, "eval_samples_per_second": 78.083, "eval_steps_per_second": 3.155, "step": 28 }, { "epoch": 5.0, "eval_accuracy": 0.8282828282828283, "eval_loss": 0.45023515820503235, "eval_runtime": 1.4064, "eval_samples_per_second": 70.393, "eval_steps_per_second": 2.844, "step": 35 }, { "epoch": 5.14, "grad_norm": 3.4338462352752686, "learning_rate": 4.841269841269841e-05, "loss": 0.5761, "step": 36 }, { "epoch": 6.0, "eval_accuracy": 0.9191919191919192, "eval_loss": 0.27204275131225586, "eval_runtime": 1.4909, "eval_samples_per_second": 66.403, "eval_steps_per_second": 2.683, "step": 42 }, { "epoch": 6.86, "grad_norm": 1.6468698978424072, "learning_rate": 4.603174603174603e-05, "loss": 0.3111, "step": 48 }, { "epoch": 7.0, "eval_accuracy": 0.9292929292929293, "eval_loss": 0.23024092614650726, "eval_runtime": 1.3066, "eval_samples_per_second": 75.766, "eval_steps_per_second": 3.061, "step": 49 }, { "epoch": 8.0, "eval_accuracy": 0.9494949494949495, "eval_loss": 0.16501076519489288, "eval_runtime": 1.3125, "eval_samples_per_second": 75.428, "eval_steps_per_second": 3.048, "step": 56 }, { "epoch": 8.57, "grad_norm": 1.7793350219726562, "learning_rate": 4.3650793650793655e-05, "loss": 0.204, "step": 60 }, { "epoch": 9.0, "eval_accuracy": 0.9494949494949495, "eval_loss": 0.1503186672925949, "eval_runtime": 1.316, "eval_samples_per_second": 75.227, "eval_steps_per_second": 3.039, "step": 63 }, { "epoch": 10.0, "eval_accuracy": 0.9797979797979798, "eval_loss": 0.08136877417564392, "eval_runtime": 1.3015, "eval_samples_per_second": 76.065, "eval_steps_per_second": 3.073, "step": 70 }, { "epoch": 10.29, "grad_norm": 1.3909555673599243, "learning_rate": 4.126984126984127e-05, "loss": 0.1518, "step": 72 }, { "epoch": 11.0, "eval_accuracy": 0.9797979797979798, "eval_loss": 0.06037978082895279, "eval_runtime": 1.2829, "eval_samples_per_second": 77.171, "eval_steps_per_second": 3.118, "step": 77 }, { "epoch": 12.0, "grad_norm": 1.9186725616455078, "learning_rate": 3.888888888888889e-05, "loss": 0.1272, "step": 84 }, { "epoch": 12.0, "eval_accuracy": 0.9494949494949495, "eval_loss": 0.1265028864145279, "eval_runtime": 1.2863, "eval_samples_per_second": 76.965, "eval_steps_per_second": 3.11, "step": 84 }, { "epoch": 13.0, "eval_accuracy": 0.9797979797979798, "eval_loss": 0.05176503211259842, "eval_runtime": 1.3002, "eval_samples_per_second": 76.143, "eval_steps_per_second": 3.076, "step": 91 }, { "epoch": 13.71, "grad_norm": 1.4530500173568726, "learning_rate": 3.650793650793651e-05, "loss": 0.1379, "step": 96 }, { "epoch": 14.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.044787079095840454, "eval_runtime": 1.303, "eval_samples_per_second": 75.979, "eval_steps_per_second": 3.07, "step": 98 }, { "epoch": 15.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.03611420467495918, "eval_runtime": 1.309, "eval_samples_per_second": 75.628, "eval_steps_per_second": 3.056, "step": 105 }, { "epoch": 15.43, "grad_norm": 1.3324140310287476, "learning_rate": 3.412698412698413e-05, "loss": 0.092, "step": 108 }, { "epoch": 16.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.032215822488069534, "eval_runtime": 1.3002, "eval_samples_per_second": 76.141, "eval_steps_per_second": 3.076, "step": 112 }, { "epoch": 17.0, "eval_accuracy": 1.0, "eval_loss": 0.021263618022203445, "eval_runtime": 1.307, "eval_samples_per_second": 75.748, "eval_steps_per_second": 3.061, "step": 119 }, { "epoch": 17.14, "grad_norm": 2.2976629734039307, "learning_rate": 3.1746031746031745e-05, "loss": 0.0762, "step": 120 }, { "epoch": 18.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.04688708856701851, "eval_runtime": 1.315, "eval_samples_per_second": 75.288, "eval_steps_per_second": 3.042, "step": 126 }, { "epoch": 18.86, "grad_norm": 2.308321714401245, "learning_rate": 2.9365079365079366e-05, "loss": 0.0954, "step": 132 }, { "epoch": 19.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.061472997069358826, "eval_runtime": 1.3031, "eval_samples_per_second": 75.971, "eval_steps_per_second": 3.07, "step": 133 }, { "epoch": 20.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.03133073449134827, "eval_runtime": 1.2993, "eval_samples_per_second": 76.194, "eval_steps_per_second": 3.079, "step": 140 }, { "epoch": 20.57, "grad_norm": 1.274895191192627, "learning_rate": 2.6984126984126984e-05, "loss": 0.0795, "step": 144 }, { "epoch": 21.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.0380566343665123, "eval_runtime": 1.3136, "eval_samples_per_second": 75.368, "eval_steps_per_second": 3.045, "step": 147 }, { "epoch": 22.0, "eval_accuracy": 1.0, "eval_loss": 0.013774486258625984, "eval_runtime": 1.309, "eval_samples_per_second": 75.628, "eval_steps_per_second": 3.056, "step": 154 }, { "epoch": 22.29, "grad_norm": 1.425993800163269, "learning_rate": 2.4603174603174602e-05, "loss": 0.077, "step": 156 }, { "epoch": 23.0, "eval_accuracy": 1.0, "eval_loss": 0.01703532226383686, "eval_runtime": 1.3122, "eval_samples_per_second": 75.446, "eval_steps_per_second": 3.048, "step": 161 }, { "epoch": 24.0, "grad_norm": 0.986053466796875, "learning_rate": 2.2222222222222223e-05, "loss": 0.0675, "step": 168 }, { "epoch": 24.0, "eval_accuracy": 1.0, "eval_loss": 0.010675261728465557, "eval_runtime": 1.3159, "eval_samples_per_second": 75.231, "eval_steps_per_second": 3.04, "step": 168 }, { "epoch": 25.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.019328022375702858, "eval_runtime": 1.3069, "eval_samples_per_second": 75.752, "eval_steps_per_second": 3.061, "step": 175 }, { "epoch": 25.71, "grad_norm": 2.7184066772460938, "learning_rate": 1.984126984126984e-05, "loss": 0.0659, "step": 180 }, { "epoch": 26.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.025486120954155922, "eval_runtime": 1.3084, "eval_samples_per_second": 75.667, "eval_steps_per_second": 3.057, "step": 182 }, { "epoch": 27.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.020136240869760513, "eval_runtime": 1.3096, "eval_samples_per_second": 75.596, "eval_steps_per_second": 3.054, "step": 189 }, { "epoch": 27.43, "grad_norm": 2.4670774936676025, "learning_rate": 1.746031746031746e-05, "loss": 0.0758, "step": 192 }, { "epoch": 28.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.03251149132847786, "eval_runtime": 1.2895, "eval_samples_per_second": 76.775, "eval_steps_per_second": 3.102, "step": 196 }, { "epoch": 29.0, "eval_accuracy": 1.0, "eval_loss": 0.011012612842023373, "eval_runtime": 1.3105, "eval_samples_per_second": 75.542, "eval_steps_per_second": 3.052, "step": 203 }, { "epoch": 29.14, "grad_norm": 1.395484209060669, "learning_rate": 1.5079365079365079e-05, "loss": 0.0589, "step": 204 }, { "epoch": 30.0, "eval_accuracy": 1.0, "eval_loss": 0.0159281175583601, "eval_runtime": 1.3316, "eval_samples_per_second": 74.346, "eval_steps_per_second": 3.004, "step": 210 }, { "epoch": 30.86, "grad_norm": 1.4966486692428589, "learning_rate": 1.2698412698412699e-05, "loss": 0.0521, "step": 216 }, { "epoch": 31.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.031857237219810486, "eval_runtime": 1.2919, "eval_samples_per_second": 76.629, "eval_steps_per_second": 3.096, "step": 217 }, { "epoch": 32.0, "eval_accuracy": 0.9797979797979798, "eval_loss": 0.029438314959406853, "eval_runtime": 1.3011, "eval_samples_per_second": 76.091, "eval_steps_per_second": 3.074, "step": 224 }, { "epoch": 32.57, "grad_norm": 1.3882635831832886, "learning_rate": 1.0317460317460318e-05, "loss": 0.0618, "step": 228 }, { "epoch": 33.0, "eval_accuracy": 0.9797979797979798, "eval_loss": 0.039191748946905136, "eval_runtime": 1.3063, "eval_samples_per_second": 75.788, "eval_steps_per_second": 3.062, "step": 231 }, { "epoch": 34.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.026867415755987167, "eval_runtime": 1.2944, "eval_samples_per_second": 76.481, "eval_steps_per_second": 3.09, "step": 238 }, { "epoch": 34.29, "grad_norm": 1.203762412071228, "learning_rate": 7.936507936507936e-06, "loss": 0.0422, "step": 240 }, { "epoch": 35.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.021003253757953644, "eval_runtime": 1.3003, "eval_samples_per_second": 76.137, "eval_steps_per_second": 3.076, "step": 245 }, { "epoch": 36.0, "grad_norm": 1.9063193798065186, "learning_rate": 5.555555555555556e-06, "loss": 0.0551, "step": 252 }, { "epoch": 36.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.01777764968574047, "eval_runtime": 1.456, "eval_samples_per_second": 67.993, "eval_steps_per_second": 2.747, "step": 252 }, { "epoch": 37.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.01593274623155594, "eval_runtime": 1.3158, "eval_samples_per_second": 75.242, "eval_steps_per_second": 3.04, "step": 259 }, { "epoch": 37.71, "grad_norm": 1.4439316987991333, "learning_rate": 3.1746031746031746e-06, "loss": 0.0518, "step": 264 }, { "epoch": 38.0, "eval_accuracy": 0.98989898989899, "eval_loss": 0.012379774823784828, "eval_runtime": 1.3454, "eval_samples_per_second": 73.583, "eval_steps_per_second": 2.973, "step": 266 }, { "epoch": 39.0, "eval_accuracy": 1.0, "eval_loss": 0.011175006628036499, "eval_runtime": 1.4492, "eval_samples_per_second": 68.315, "eval_steps_per_second": 2.76, "step": 273 }, { "epoch": 39.43, "grad_norm": 1.1414995193481445, "learning_rate": 7.936507936507937e-07, "loss": 0.0313, "step": 276 }, { "epoch": 40.0, "eval_accuracy": 1.0, "eval_loss": 0.01096320990473032, "eval_runtime": 1.3095, "eval_samples_per_second": 75.6, "eval_steps_per_second": 3.055, "step": 280 }, { "epoch": 40.0, "step": 280, "total_flos": 2.7494650758139085e+18, "train_loss": 0.2073148890797581, "train_runtime": 1492.3317, "train_samples_per_second": 23.775, "train_steps_per_second": 0.188 } ], "logging_steps": 12, "max_steps": 280, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.7494650758139085e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }