{ "best_metric": 0.7939698492462312, "best_model_checkpoint": "videomae-base-finetuned-ElderReact-Sadness11/checkpoint-39", "epoch": 9.076315789473684, "eval_steps": 500, "global_step": 380, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 5.057394027709961, "learning_rate": 1.3157894736842106e-05, "loss": 0.5954, "step": 10 }, { "epoch": 0.05, "grad_norm": 2.3400027751922607, "learning_rate": 2.6315789473684212e-05, "loss": 0.5237, "step": 20 }, { "epoch": 0.08, "grad_norm": 1.731216549873352, "learning_rate": 3.9473684210526316e-05, "loss": 0.536, "step": 30 }, { "epoch": 0.1, "eval_accuracy": 0.7939698492462312, "eval_loss": 0.5131362080574036, "eval_runtime": 204.6912, "eval_samples_per_second": 3.889, "eval_steps_per_second": 0.244, "step": 39 }, { "epoch": 1.0, "grad_norm": 1.5283629894256592, "learning_rate": 4.970760233918128e-05, "loss": 0.4652, "step": 40 }, { "epoch": 1.03, "grad_norm": 4.3654375076293945, "learning_rate": 4.824561403508772e-05, "loss": 0.5204, "step": 50 }, { "epoch": 1.06, "grad_norm": 4.845033168792725, "learning_rate": 4.678362573099415e-05, "loss": 0.4353, "step": 60 }, { "epoch": 1.08, "grad_norm": 3.4814226627349854, "learning_rate": 4.5321637426900585e-05, "loss": 0.4818, "step": 70 }, { "epoch": 1.1, "eval_accuracy": 0.7939698492462312, "eval_loss": 0.5103316307067871, "eval_runtime": 217.1913, "eval_samples_per_second": 3.665, "eval_steps_per_second": 0.23, "step": 78 }, { "epoch": 2.01, "grad_norm": 1.3778834342956543, "learning_rate": 4.3859649122807014e-05, "loss": 0.4654, "step": 80 }, { "epoch": 2.03, "grad_norm": 2.985764503479004, "learning_rate": 4.239766081871345e-05, "loss": 0.4797, "step": 90 }, { "epoch": 2.06, "grad_norm": 5.408680438995361, "learning_rate": 4.093567251461988e-05, "loss": 0.5515, "step": 100 }, { "epoch": 2.08, "grad_norm": 1.4513001441955566, "learning_rate": 3.9473684210526316e-05, "loss": 0.4619, "step": 110 }, { "epoch": 2.1, "eval_accuracy": 0.7939698492462312, "eval_loss": 0.5378741025924683, "eval_runtime": 210.4629, "eval_samples_per_second": 3.782, "eval_steps_per_second": 0.238, "step": 117 }, { "epoch": 3.01, "grad_norm": 1.8063580989837646, "learning_rate": 3.8011695906432746e-05, "loss": 0.415, "step": 120 }, { "epoch": 3.03, "grad_norm": 1.633288025856018, "learning_rate": 3.654970760233918e-05, "loss": 0.3969, "step": 130 }, { "epoch": 3.06, "grad_norm": 2.1381514072418213, "learning_rate": 3.508771929824561e-05, "loss": 0.5252, "step": 140 }, { "epoch": 3.09, "grad_norm": 1.0985075235366821, "learning_rate": 3.362573099415205e-05, "loss": 0.4704, "step": 150 }, { "epoch": 3.1, "eval_accuracy": 0.7939698492462312, "eval_loss": 0.5121056437492371, "eval_runtime": 211.8645, "eval_samples_per_second": 3.757, "eval_steps_per_second": 0.236, "step": 156 }, { "epoch": 4.01, "grad_norm": 1.3139609098434448, "learning_rate": 3.216374269005848e-05, "loss": 0.472, "step": 160 }, { "epoch": 4.04, "grad_norm": 1.0382041931152344, "learning_rate": 3.0701754385964913e-05, "loss": 0.476, "step": 170 }, { "epoch": 4.06, "grad_norm": 1.9487144947052002, "learning_rate": 2.9239766081871346e-05, "loss": 0.3877, "step": 180 }, { "epoch": 4.09, "grad_norm": 1.7637165784835815, "learning_rate": 2.777777777777778e-05, "loss": 0.5128, "step": 190 }, { "epoch": 4.1, "eval_accuracy": 0.785175879396985, "eval_loss": 0.5606660842895508, "eval_runtime": 217.2766, "eval_samples_per_second": 3.664, "eval_steps_per_second": 0.23, "step": 195 }, { "epoch": 5.01, "grad_norm": 5.227734088897705, "learning_rate": 2.6315789473684212e-05, "loss": 0.5178, "step": 200 }, { "epoch": 5.04, "grad_norm": 1.3204573392868042, "learning_rate": 2.485380116959064e-05, "loss": 0.4433, "step": 210 }, { "epoch": 5.07, "grad_norm": 4.037812232971191, "learning_rate": 2.3391812865497074e-05, "loss": 0.4521, "step": 220 }, { "epoch": 5.09, "grad_norm": 1.3772491216659546, "learning_rate": 2.1929824561403507e-05, "loss": 0.4691, "step": 230 }, { "epoch": 5.1, "eval_accuracy": 0.7939698492462312, "eval_loss": 0.5311861634254456, "eval_runtime": 237.5745, "eval_samples_per_second": 3.351, "eval_steps_per_second": 0.21, "step": 234 }, { "epoch": 6.02, "grad_norm": 1.5507118701934814, "learning_rate": 2.046783625730994e-05, "loss": 0.3618, "step": 240 }, { "epoch": 6.04, "grad_norm": 2.258527994155884, "learning_rate": 1.9005847953216373e-05, "loss": 0.4646, "step": 250 }, { "epoch": 6.07, "grad_norm": 2.1282131671905518, "learning_rate": 1.7543859649122806e-05, "loss": 0.4335, "step": 260 }, { "epoch": 6.09, "grad_norm": 6.060189723968506, "learning_rate": 1.608187134502924e-05, "loss": 0.4799, "step": 270 }, { "epoch": 6.1, "eval_accuracy": 0.7939698492462312, "eval_loss": 0.5785809755325317, "eval_runtime": 226.9223, "eval_samples_per_second": 3.508, "eval_steps_per_second": 0.22, "step": 273 }, { "epoch": 7.02, "grad_norm": 2.6040494441986084, "learning_rate": 1.4619883040935673e-05, "loss": 0.4185, "step": 280 }, { "epoch": 7.04, "grad_norm": 2.301250457763672, "learning_rate": 1.3157894736842106e-05, "loss": 0.4667, "step": 290 }, { "epoch": 7.07, "grad_norm": 1.555298089981079, "learning_rate": 1.1695906432748537e-05, "loss": 0.3985, "step": 300 }, { "epoch": 7.1, "grad_norm": 4.446948528289795, "learning_rate": 1.023391812865497e-05, "loss": 0.3584, "step": 310 }, { "epoch": 7.1, "eval_accuracy": 0.7939698492462312, "eval_loss": 0.5997191071510315, "eval_runtime": 210.3019, "eval_samples_per_second": 3.785, "eval_steps_per_second": 0.238, "step": 312 }, { "epoch": 8.02, "grad_norm": 3.671874761581421, "learning_rate": 8.771929824561403e-06, "loss": 0.5455, "step": 320 }, { "epoch": 8.05, "grad_norm": 1.981838345527649, "learning_rate": 7.3099415204678366e-06, "loss": 0.3086, "step": 330 }, { "epoch": 8.07, "grad_norm": 1.8950921297073364, "learning_rate": 5.8479532163742686e-06, "loss": 0.5139, "step": 340 }, { "epoch": 8.1, "grad_norm": 3.0630290508270264, "learning_rate": 4.3859649122807014e-06, "loss": 0.3749, "step": 350 }, { "epoch": 8.1, "eval_accuracy": 0.7939698492462312, "eval_loss": 0.5853101015090942, "eval_runtime": 215.7709, "eval_samples_per_second": 3.689, "eval_steps_per_second": 0.232, "step": 351 }, { "epoch": 9.02, "grad_norm": 2.4308969974517822, "learning_rate": 2.9239766081871343e-06, "loss": 0.3716, "step": 360 }, { "epoch": 9.05, "grad_norm": 2.7709577083587646, "learning_rate": 1.4619883040935671e-06, "loss": 0.4279, "step": 370 }, { "epoch": 9.08, "grad_norm": 3.444274425506592, "learning_rate": 0.0, "loss": 0.3792, "step": 380 }, { "epoch": 9.08, "eval_accuracy": 0.7939698492462312, "eval_loss": 0.5896389484405518, "eval_runtime": 206.7967, "eval_samples_per_second": 3.849, "eval_steps_per_second": 0.242, "step": 380 }, { "epoch": 9.08, "step": 380, "total_flos": 7.475141472240992e+18, "train_loss": 0.4567895299509952, "train_runtime": 3981.2865, "train_samples_per_second": 1.527, "train_steps_per_second": 0.095 }, { "epoch": 9.08, "eval_accuracy": 0.8026666666666666, "eval_loss": 0.5030941367149353, "eval_runtime": 189.9731, "eval_samples_per_second": 3.948, "eval_steps_per_second": 0.247, "step": 380 }, { "epoch": 9.08, "eval_accuracy": 0.8026666666666666, "eval_loss": 0.5030941367149353, "eval_runtime": 191.8522, "eval_samples_per_second": 3.909, "eval_steps_per_second": 0.245, "step": 380 } ], "logging_steps": 10, "max_steps": 380, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 7.475141472240992e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }