{ "best_metric": 0.8166666666666667, "best_model_checkpoint": "beit-base-patch16-224-dmae-va-U5-42/checkpoint-224", "epoch": 37.935483870967744, "eval_steps": 500, "global_step": 294, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9, "eval_accuracy": 0.4666666666666667, "eval_loss": 1.347098708152771, "eval_runtime": 1.5986, "eval_samples_per_second": 37.533, "eval_steps_per_second": 1.251, "step": 7 }, { "epoch": 1.29, "grad_norm": 10.756444931030273, "learning_rate": 1.6666666666666667e-05, "loss": 1.6023, "step": 10 }, { "epoch": 1.94, "eval_accuracy": 0.5833333333333334, "eval_loss": 1.0873388051986694, "eval_runtime": 1.6091, "eval_samples_per_second": 37.288, "eval_steps_per_second": 1.243, "step": 15 }, { "epoch": 2.58, "grad_norm": 9.189943313598633, "learning_rate": 3.3333333333333335e-05, "loss": 1.1509, "step": 20 }, { "epoch": 2.97, "eval_accuracy": 0.5833333333333334, "eval_loss": 0.9947898983955383, "eval_runtime": 2.0991, "eval_samples_per_second": 28.584, "eval_steps_per_second": 0.953, "step": 23 }, { "epoch": 3.87, "grad_norm": 11.76810073852539, "learning_rate": 5e-05, "loss": 0.826, "step": 30 }, { "epoch": 4.0, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7244278192520142, "eval_runtime": 1.6205, "eval_samples_per_second": 37.025, "eval_steps_per_second": 1.234, "step": 31 }, { "epoch": 4.9, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.5740881562232971, "eval_runtime": 2.1053, "eval_samples_per_second": 28.5, "eval_steps_per_second": 0.95, "step": 38 }, { "epoch": 5.16, "grad_norm": 10.483253479003906, "learning_rate": 4.810606060606061e-05, "loss": 0.5551, "step": 40 }, { "epoch": 5.94, "eval_accuracy": 0.75, "eval_loss": 0.6568958759307861, "eval_runtime": 2.0199, "eval_samples_per_second": 29.704, "eval_steps_per_second": 0.99, "step": 46 }, { "epoch": 6.45, "grad_norm": 6.183708667755127, "learning_rate": 4.621212121212121e-05, "loss": 0.3649, "step": 50 }, { "epoch": 6.97, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.6321703195571899, "eval_runtime": 1.6185, "eval_samples_per_second": 37.07, "eval_steps_per_second": 1.236, "step": 54 }, { "epoch": 7.74, "grad_norm": 8.211569786071777, "learning_rate": 4.431818181818182e-05, "loss": 0.2592, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.6993545889854431, "eval_runtime": 1.7914, "eval_samples_per_second": 33.494, "eval_steps_per_second": 1.116, "step": 62 }, { "epoch": 8.9, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.6589908599853516, "eval_runtime": 1.6239, "eval_samples_per_second": 36.949, "eval_steps_per_second": 1.232, "step": 69 }, { "epoch": 9.03, "grad_norm": 6.782934665679932, "learning_rate": 4.242424242424243e-05, "loss": 0.1958, "step": 70 }, { "epoch": 9.94, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.6845510601997375, "eval_runtime": 1.8061, "eval_samples_per_second": 33.22, "eval_steps_per_second": 1.107, "step": 77 }, { "epoch": 10.32, "grad_norm": 6.670173645019531, "learning_rate": 4.053030303030303e-05, "loss": 0.1664, "step": 80 }, { "epoch": 10.97, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7165997624397278, "eval_runtime": 1.6803, "eval_samples_per_second": 35.708, "eval_steps_per_second": 1.19, "step": 85 }, { "epoch": 11.61, "grad_norm": 7.649813175201416, "learning_rate": 3.8636363636363636e-05, "loss": 0.1571, "step": 90 }, { "epoch": 12.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.7842047214508057, "eval_runtime": 1.6667, "eval_samples_per_second": 35.999, "eval_steps_per_second": 1.2, "step": 93 }, { "epoch": 12.9, "grad_norm": 5.415360927581787, "learning_rate": 3.6742424242424246e-05, "loss": 0.1174, "step": 100 }, { "epoch": 12.9, "eval_accuracy": 0.8, "eval_loss": 0.8464832901954651, "eval_runtime": 1.8073, "eval_samples_per_second": 33.198, "eval_steps_per_second": 1.107, "step": 100 }, { "epoch": 13.94, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.9116414189338684, "eval_runtime": 1.6421, "eval_samples_per_second": 36.539, "eval_steps_per_second": 1.218, "step": 108 }, { "epoch": 14.19, "grad_norm": 3.7090325355529785, "learning_rate": 3.484848484848485e-05, "loss": 0.0956, "step": 110 }, { "epoch": 14.97, "eval_accuracy": 0.75, "eval_loss": 0.9740654230117798, "eval_runtime": 1.6076, "eval_samples_per_second": 37.322, "eval_steps_per_second": 1.244, "step": 116 }, { "epoch": 15.48, "grad_norm": 7.993117809295654, "learning_rate": 3.295454545454545e-05, "loss": 0.1252, "step": 120 }, { "epoch": 16.0, "eval_accuracy": 0.8, "eval_loss": 0.7759976983070374, "eval_runtime": 1.6595, "eval_samples_per_second": 36.154, "eval_steps_per_second": 1.205, "step": 124 }, { "epoch": 16.77, "grad_norm": 3.9146523475646973, "learning_rate": 3.106060606060606e-05, "loss": 0.0933, "step": 130 }, { "epoch": 16.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9424102902412415, "eval_runtime": 2.1274, "eval_samples_per_second": 28.203, "eval_steps_per_second": 0.94, "step": 131 }, { "epoch": 17.94, "eval_accuracy": 0.7333333333333333, "eval_loss": 1.0444563627243042, "eval_runtime": 1.6154, "eval_samples_per_second": 37.143, "eval_steps_per_second": 1.238, "step": 139 }, { "epoch": 18.06, "grad_norm": 7.39138126373291, "learning_rate": 2.916666666666667e-05, "loss": 0.1455, "step": 140 }, { "epoch": 18.97, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.8524726629257202, "eval_runtime": 1.681, "eval_samples_per_second": 35.693, "eval_steps_per_second": 1.19, "step": 147 }, { "epoch": 19.35, "grad_norm": 3.7276432514190674, "learning_rate": 2.7272727272727273e-05, "loss": 0.1034, "step": 150 }, { "epoch": 20.0, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.822151243686676, "eval_runtime": 2.0162, "eval_samples_per_second": 29.759, "eval_steps_per_second": 0.992, "step": 155 }, { "epoch": 20.65, "grad_norm": 4.372833728790283, "learning_rate": 2.537878787878788e-05, "loss": 0.0855, "step": 160 }, { "epoch": 20.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.8990534543991089, "eval_runtime": 2.1376, "eval_samples_per_second": 28.069, "eval_steps_per_second": 0.936, "step": 162 }, { "epoch": 21.94, "grad_norm": 6.204973220825195, "learning_rate": 2.3484848484848487e-05, "loss": 0.0985, "step": 170 }, { "epoch": 21.94, "eval_accuracy": 0.8, "eval_loss": 0.8954617977142334, "eval_runtime": 1.7159, "eval_samples_per_second": 34.968, "eval_steps_per_second": 1.166, "step": 170 }, { "epoch": 22.97, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.9603295922279358, "eval_runtime": 1.628, "eval_samples_per_second": 36.854, "eval_steps_per_second": 1.228, "step": 178 }, { "epoch": 23.23, "grad_norm": 2.5669796466827393, "learning_rate": 2.1590909090909093e-05, "loss": 0.087, "step": 180 }, { "epoch": 24.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9932332634925842, "eval_runtime": 1.6549, "eval_samples_per_second": 36.255, "eval_steps_per_second": 1.209, "step": 186 }, { "epoch": 24.52, "grad_norm": 4.33466911315918, "learning_rate": 1.9696969696969697e-05, "loss": 0.0832, "step": 190 }, { "epoch": 24.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 1.0099666118621826, "eval_runtime": 1.6219, "eval_samples_per_second": 36.994, "eval_steps_per_second": 1.233, "step": 193 }, { "epoch": 25.81, "grad_norm": 2.6737489700317383, "learning_rate": 1.7803030303030303e-05, "loss": 0.0632, "step": 200 }, { "epoch": 25.94, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.9393168091773987, "eval_runtime": 1.62, "eval_samples_per_second": 37.037, "eval_steps_per_second": 1.235, "step": 201 }, { "epoch": 26.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9061955213546753, "eval_runtime": 2.0226, "eval_samples_per_second": 29.665, "eval_steps_per_second": 0.989, "step": 209 }, { "epoch": 27.1, "grad_norm": 6.029117107391357, "learning_rate": 1.590909090909091e-05, "loss": 0.0778, "step": 210 }, { "epoch": 28.0, "eval_accuracy": 0.8, "eval_loss": 0.9339290857315063, "eval_runtime": 2.1654, "eval_samples_per_second": 27.708, "eval_steps_per_second": 0.924, "step": 217 }, { "epoch": 28.39, "grad_norm": 3.781505584716797, "learning_rate": 1.4015151515151515e-05, "loss": 0.0627, "step": 220 }, { "epoch": 28.9, "eval_accuracy": 0.8166666666666667, "eval_loss": 1.003859043121338, "eval_runtime": 1.6395, "eval_samples_per_second": 36.596, "eval_steps_per_second": 1.22, "step": 224 }, { "epoch": 29.68, "grad_norm": 8.317666053771973, "learning_rate": 1.2121212121212122e-05, "loss": 0.0837, "step": 230 }, { "epoch": 29.94, "eval_accuracy": 0.7333333333333333, "eval_loss": 1.0636054277420044, "eval_runtime": 2.1368, "eval_samples_per_second": 28.079, "eval_steps_per_second": 0.936, "step": 232 }, { "epoch": 30.97, "grad_norm": 4.580255508422852, "learning_rate": 1.0227272727272729e-05, "loss": 0.0595, "step": 240 }, { "epoch": 30.97, "eval_accuracy": 0.75, "eval_loss": 1.0424180030822754, "eval_runtime": 1.9745, "eval_samples_per_second": 30.387, "eval_steps_per_second": 1.013, "step": 240 }, { "epoch": 32.0, "eval_accuracy": 0.8, "eval_loss": 1.0514304637908936, "eval_runtime": 1.6234, "eval_samples_per_second": 36.96, "eval_steps_per_second": 1.232, "step": 248 }, { "epoch": 32.26, "grad_norm": 3.7219626903533936, "learning_rate": 8.333333333333334e-06, "loss": 0.0706, "step": 250 }, { "epoch": 32.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 1.0638715028762817, "eval_runtime": 1.6117, "eval_samples_per_second": 37.228, "eval_steps_per_second": 1.241, "step": 255 }, { "epoch": 33.55, "grad_norm": 2.273451566696167, "learning_rate": 6.43939393939394e-06, "loss": 0.0565, "step": 260 }, { "epoch": 33.94, "eval_accuracy": 0.7666666666666667, "eval_loss": 1.0494216680526733, "eval_runtime": 1.9391, "eval_samples_per_second": 30.942, "eval_steps_per_second": 1.031, "step": 263 }, { "epoch": 34.84, "grad_norm": 2.9928078651428223, "learning_rate": 4.5454545454545455e-06, "loss": 0.0515, "step": 270 }, { "epoch": 34.97, "eval_accuracy": 0.7666666666666667, "eval_loss": 1.0627790689468384, "eval_runtime": 1.5942, "eval_samples_per_second": 37.636, "eval_steps_per_second": 1.255, "step": 271 }, { "epoch": 36.0, "eval_accuracy": 0.7666666666666667, "eval_loss": 1.1089335680007935, "eval_runtime": 1.74, "eval_samples_per_second": 34.484, "eval_steps_per_second": 1.149, "step": 279 }, { "epoch": 36.13, "grad_norm": 6.05413818359375, "learning_rate": 2.651515151515152e-06, "loss": 0.0614, "step": 280 }, { "epoch": 36.9, "eval_accuracy": 0.8, "eval_loss": 1.086135983467102, "eval_runtime": 1.5851, "eval_samples_per_second": 37.853, "eval_steps_per_second": 1.262, "step": 286 }, { "epoch": 37.42, "grad_norm": 3.7136027812957764, "learning_rate": 7.575757575757576e-07, "loss": 0.0496, "step": 290 }, { "epoch": 37.94, "eval_accuracy": 0.8, "eval_loss": 1.0713495016098022, "eval_runtime": 1.6733, "eval_samples_per_second": 35.857, "eval_steps_per_second": 1.195, "step": 294 }, { "epoch": 37.94, "step": 294, "total_flos": 2.8633958865108173e+18, "train_loss": 0.23703576421656578, "train_runtime": 1686.3425, "train_samples_per_second": 24.258, "train_steps_per_second": 0.174 } ], "logging_steps": 10, "max_steps": 294, "num_input_tokens_seen": 0, "num_train_epochs": 42, "save_steps": 500, "total_flos": 2.8633958865108173e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }