{ "best_metric": 0.55, "best_model_checkpoint": "vit-base-patch16-224-dmae-va-U5-42D/checkpoint-15", "epoch": 37.935483870967744, "eval_steps": 500, "global_step": 294, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9, "eval_accuracy": 0.5166666666666667, "eval_loss": 1.0970458984375, "eval_runtime": 1.6221, "eval_samples_per_second": 36.99, "eval_steps_per_second": 1.233, "step": 7 }, { "epoch": 1.55, "grad_norm": 5.820129871368408, "learning_rate": 0.0012000000000000001, "loss": 1.3527, "step": 12 }, { "epoch": 1.94, "eval_accuracy": 0.55, "eval_loss": 1.038263201713562, "eval_runtime": 1.563, "eval_samples_per_second": 38.388, "eval_steps_per_second": 1.28, "step": 15 }, { "epoch": 2.97, "eval_accuracy": 0.4166666666666667, "eval_loss": 1.235134243965149, "eval_runtime": 2.1089, "eval_samples_per_second": 28.451, "eval_steps_per_second": 0.948, "step": 23 }, { "epoch": 3.1, "grad_norm": 5.020723819732666, "learning_rate": 0.0024000000000000002, "loss": 1.3013, "step": 24 }, { "epoch": 4.0, "eval_accuracy": 0.3333333333333333, "eval_loss": 1.3025089502334595, "eval_runtime": 1.6082, "eval_samples_per_second": 37.309, "eval_steps_per_second": 1.244, "step": 31 }, { "epoch": 4.65, "grad_norm": 2.038219451904297, "learning_rate": 0.002931818181818182, "loss": 1.3706, "step": 36 }, { "epoch": 4.9, "eval_accuracy": 0.21666666666666667, "eval_loss": 1.3800110816955566, "eval_runtime": 1.5927, "eval_samples_per_second": 37.671, "eval_steps_per_second": 1.256, "step": 38 }, { "epoch": 5.94, "eval_accuracy": 0.18333333333333332, "eval_loss": 1.4608864784240723, "eval_runtime": 1.5994, "eval_samples_per_second": 37.515, "eval_steps_per_second": 1.25, "step": 46 }, { "epoch": 6.19, "grad_norm": 1.954506754875183, "learning_rate": 0.0027954545454545454, "loss": 1.4415, "step": 48 }, { "epoch": 6.97, "eval_accuracy": 0.43333333333333335, "eval_loss": 1.3718478679656982, "eval_runtime": 1.569, "eval_samples_per_second": 38.242, "eval_steps_per_second": 1.275, "step": 54 }, { "epoch": 7.74, "grad_norm": 1.276404619216919, "learning_rate": 0.002659090909090909, "loss": 1.3602, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.31666666666666665, "eval_loss": 1.3172897100448608, "eval_runtime": 1.564, "eval_samples_per_second": 38.363, "eval_steps_per_second": 1.279, "step": 62 }, { "epoch": 8.9, "eval_accuracy": 0.4, "eval_loss": 1.2827069759368896, "eval_runtime": 1.5647, "eval_samples_per_second": 38.345, "eval_steps_per_second": 1.278, "step": 69 }, { "epoch": 9.29, "grad_norm": 1.0918904542922974, "learning_rate": 0.002522727272727273, "loss": 1.3079, "step": 72 }, { "epoch": 9.94, "eval_accuracy": 0.31666666666666665, "eval_loss": 1.3166981935501099, "eval_runtime": 1.6067, "eval_samples_per_second": 37.343, "eval_steps_per_second": 1.245, "step": 77 }, { "epoch": 10.84, "grad_norm": 1.5578159093856812, "learning_rate": 0.002386363636363636, "loss": 1.3247, "step": 84 }, { "epoch": 10.97, "eval_accuracy": 0.4, "eval_loss": 1.257886528968811, "eval_runtime": 1.5535, "eval_samples_per_second": 38.623, "eval_steps_per_second": 1.287, "step": 85 }, { "epoch": 12.0, "eval_accuracy": 0.2, "eval_loss": 1.3202433586120605, "eval_runtime": 2.2419, "eval_samples_per_second": 26.762, "eval_steps_per_second": 0.892, "step": 93 }, { "epoch": 12.39, "grad_norm": 0.5343012809753418, "learning_rate": 0.0022500000000000003, "loss": 1.3102, "step": 96 }, { "epoch": 12.9, "eval_accuracy": 0.45, "eval_loss": 1.2353752851486206, "eval_runtime": 1.5721, "eval_samples_per_second": 38.166, "eval_steps_per_second": 1.272, "step": 100 }, { "epoch": 13.94, "grad_norm": 1.3499153852462769, "learning_rate": 0.002113636363636364, "loss": 1.2807, "step": 108 }, { "epoch": 13.94, "eval_accuracy": 0.25, "eval_loss": 1.3610022068023682, "eval_runtime": 1.9584, "eval_samples_per_second": 30.638, "eval_steps_per_second": 1.021, "step": 108 }, { "epoch": 14.97, "eval_accuracy": 0.4, "eval_loss": 1.2803313732147217, "eval_runtime": 1.596, "eval_samples_per_second": 37.594, "eval_steps_per_second": 1.253, "step": 116 }, { "epoch": 15.48, "grad_norm": 1.6496480703353882, "learning_rate": 0.0019772727272727273, "loss": 1.2774, "step": 120 }, { "epoch": 16.0, "eval_accuracy": 0.21666666666666667, "eval_loss": 1.3338415622711182, "eval_runtime": 1.5818, "eval_samples_per_second": 37.931, "eval_steps_per_second": 1.264, "step": 124 }, { "epoch": 16.9, "eval_accuracy": 0.35, "eval_loss": 1.2548964023590088, "eval_runtime": 1.5648, "eval_samples_per_second": 38.344, "eval_steps_per_second": 1.278, "step": 131 }, { "epoch": 17.03, "grad_norm": 0.824222207069397, "learning_rate": 0.001840909090909091, "loss": 1.2596, "step": 132 }, { "epoch": 17.94, "eval_accuracy": 0.36666666666666664, "eval_loss": 1.2692508697509766, "eval_runtime": 1.6871, "eval_samples_per_second": 35.564, "eval_steps_per_second": 1.185, "step": 139 }, { "epoch": 18.58, "grad_norm": 0.44431018829345703, "learning_rate": 0.0017045454545454547, "loss": 1.2413, "step": 144 }, { "epoch": 18.97, "eval_accuracy": 0.21666666666666667, "eval_loss": 1.3005454540252686, "eval_runtime": 2.5177, "eval_samples_per_second": 23.831, "eval_steps_per_second": 0.794, "step": 147 }, { "epoch": 20.0, "eval_accuracy": 0.43333333333333335, "eval_loss": 1.229854941368103, "eval_runtime": 1.7681, "eval_samples_per_second": 33.934, "eval_steps_per_second": 1.131, "step": 155 }, { "epoch": 20.13, "grad_norm": 1.6288515329360962, "learning_rate": 0.0015681818181818182, "loss": 1.262, "step": 156 }, { "epoch": 20.9, "eval_accuracy": 0.26666666666666666, "eval_loss": 1.3453844785690308, "eval_runtime": 1.5884, "eval_samples_per_second": 37.774, "eval_steps_per_second": 1.259, "step": 162 }, { "epoch": 21.68, "grad_norm": 1.0566848516464233, "learning_rate": 0.0014318181818181819, "loss": 1.2261, "step": 168 }, { "epoch": 21.94, "eval_accuracy": 0.31666666666666665, "eval_loss": 1.2818458080291748, "eval_runtime": 1.5729, "eval_samples_per_second": 38.146, "eval_steps_per_second": 1.272, "step": 170 }, { "epoch": 22.97, "eval_accuracy": 0.43333333333333335, "eval_loss": 1.249794840812683, "eval_runtime": 1.5575, "eval_samples_per_second": 38.524, "eval_steps_per_second": 1.284, "step": 178 }, { "epoch": 23.23, "grad_norm": 1.7413015365600586, "learning_rate": 0.0012954545454545456, "loss": 1.2405, "step": 180 }, { "epoch": 24.0, "eval_accuracy": 0.31666666666666665, "eval_loss": 1.3376109600067139, "eval_runtime": 1.6036, "eval_samples_per_second": 37.415, "eval_steps_per_second": 1.247, "step": 186 }, { "epoch": 24.77, "grad_norm": 0.5584876537322998, "learning_rate": 0.001159090909090909, "loss": 1.2245, "step": 192 }, { "epoch": 24.9, "eval_accuracy": 0.36666666666666664, "eval_loss": 1.2595055103302002, "eval_runtime": 1.5658, "eval_samples_per_second": 38.32, "eval_steps_per_second": 1.277, "step": 193 }, { "epoch": 25.94, "eval_accuracy": 0.4, "eval_loss": 1.331896424293518, "eval_runtime": 2.0295, "eval_samples_per_second": 29.564, "eval_steps_per_second": 0.985, "step": 201 }, { "epoch": 26.32, "grad_norm": 0.9537753462791443, "learning_rate": 0.0010227272727272726, "loss": 1.2034, "step": 204 }, { "epoch": 26.97, "eval_accuracy": 0.38333333333333336, "eval_loss": 1.25283944606781, "eval_runtime": 1.5793, "eval_samples_per_second": 37.992, "eval_steps_per_second": 1.266, "step": 209 }, { "epoch": 27.87, "grad_norm": 1.7752221822738647, "learning_rate": 0.0008863636363636364, "loss": 1.1818, "step": 216 }, { "epoch": 28.0, "eval_accuracy": 0.36666666666666664, "eval_loss": 1.3656209707260132, "eval_runtime": 1.5691, "eval_samples_per_second": 38.237, "eval_steps_per_second": 1.275, "step": 217 }, { "epoch": 28.9, "eval_accuracy": 0.38333333333333336, "eval_loss": 1.2500847578048706, "eval_runtime": 1.5809, "eval_samples_per_second": 37.953, "eval_steps_per_second": 1.265, "step": 224 }, { "epoch": 29.42, "grad_norm": 1.1072659492492676, "learning_rate": 0.00075, "loss": 1.1479, "step": 228 }, { "epoch": 29.94, "eval_accuracy": 0.3, "eval_loss": 1.324063777923584, "eval_runtime": 1.5986, "eval_samples_per_second": 37.533, "eval_steps_per_second": 1.251, "step": 232 }, { "epoch": 30.97, "grad_norm": 0.8141500353813171, "learning_rate": 0.0006136363636363637, "loss": 1.1193, "step": 240 }, { "epoch": 30.97, "eval_accuracy": 0.36666666666666664, "eval_loss": 1.380292534828186, "eval_runtime": 1.6105, "eval_samples_per_second": 37.256, "eval_steps_per_second": 1.242, "step": 240 }, { "epoch": 32.0, "eval_accuracy": 0.4166666666666667, "eval_loss": 1.2294162511825562, "eval_runtime": 1.603, "eval_samples_per_second": 37.429, "eval_steps_per_second": 1.248, "step": 248 }, { "epoch": 32.52, "grad_norm": 0.7440662384033203, "learning_rate": 0.0004772727272727273, "loss": 1.1071, "step": 252 }, { "epoch": 32.9, "eval_accuracy": 0.5, "eval_loss": 1.4134150743484497, "eval_runtime": 1.5689, "eval_samples_per_second": 38.243, "eval_steps_per_second": 1.275, "step": 255 }, { "epoch": 33.94, "eval_accuracy": 0.36666666666666664, "eval_loss": 1.4123319387435913, "eval_runtime": 1.5844, "eval_samples_per_second": 37.869, "eval_steps_per_second": 1.262, "step": 263 }, { "epoch": 34.06, "grad_norm": 1.0041050910949707, "learning_rate": 0.0003409090909090909, "loss": 1.0429, "step": 264 }, { "epoch": 34.97, "eval_accuracy": 0.5, "eval_loss": 1.2183587551116943, "eval_runtime": 1.607, "eval_samples_per_second": 37.336, "eval_steps_per_second": 1.245, "step": 271 }, { "epoch": 35.61, "grad_norm": 1.336283564567566, "learning_rate": 0.00020454545454545454, "loss": 1.0528, "step": 276 }, { "epoch": 36.0, "eval_accuracy": 0.45, "eval_loss": 1.3099627494812012, "eval_runtime": 2.0818, "eval_samples_per_second": 28.821, "eval_steps_per_second": 0.961, "step": 279 }, { "epoch": 36.9, "eval_accuracy": 0.38333333333333336, "eval_loss": 1.3248744010925293, "eval_runtime": 1.5674, "eval_samples_per_second": 38.28, "eval_steps_per_second": 1.276, "step": 286 }, { "epoch": 37.16, "grad_norm": 1.36141836643219, "learning_rate": 6.818181818181818e-05, "loss": 1.0055, "step": 288 }, { "epoch": 37.94, "eval_accuracy": 0.5, "eval_loss": 1.3050577640533447, "eval_runtime": 1.5996, "eval_samples_per_second": 37.51, "eval_steps_per_second": 1.25, "step": 294 }, { "epoch": 37.94, "step": 294, "total_flos": 2.864620236542755e+18, "train_loss": 1.2294491975485873, "train_runtime": 1673.337, "train_samples_per_second": 24.447, "train_steps_per_second": 0.176 } ], "logging_steps": 12, "max_steps": 294, "num_input_tokens_seen": 0, "num_train_epochs": 42, "save_steps": 500, "total_flos": 2.864620236542755e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }