{ "best_metric": 0.011764705882352941, "best_model_checkpoint": "videomae-base-finetuned-ElderReact-Sadness12/checkpoint-195", "epoch": 9.076315789473684, "eval_steps": 500, "global_step": 380, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 6.048687934875488, "learning_rate": 1.3157894736842106e-05, "loss": 0.6582, "step": 10 }, { "epoch": 0.05, "grad_norm": 2.6699490547180176, "learning_rate": 2.6315789473684212e-05, "loss": 0.53, "step": 20 }, { "epoch": 0.08, "grad_norm": 1.7039493322372437, "learning_rate": 3.9473684210526316e-05, "loss": 0.5312, "step": 30 }, { "epoch": 0.1, "eval_f1": 0.0, "eval_loss": 0.5029338002204895, "eval_runtime": 200.7405, "eval_samples_per_second": 3.965, "eval_steps_per_second": 0.249, "step": 39 }, { "epoch": 1.0, "grad_norm": 1.7121078968048096, "learning_rate": 4.970760233918128e-05, "loss": 0.463, "step": 40 }, { "epoch": 1.03, "grad_norm": 4.501192092895508, "learning_rate": 4.824561403508772e-05, "loss": 0.5258, "step": 50 }, { "epoch": 1.06, "grad_norm": 4.873466491699219, "learning_rate": 4.678362573099415e-05, "loss": 0.4323, "step": 60 }, { "epoch": 1.08, "grad_norm": 3.1683290004730225, "learning_rate": 4.5321637426900585e-05, "loss": 0.4889, "step": 70 }, { "epoch": 1.1, "eval_f1": 0.0, "eval_loss": 0.5069170594215393, "eval_runtime": 206.7868, "eval_samples_per_second": 3.849, "eval_steps_per_second": 0.242, "step": 78 }, { "epoch": 2.01, "grad_norm": 1.351720929145813, "learning_rate": 4.3859649122807014e-05, "loss": 0.4661, "step": 80 }, { "epoch": 2.03, "grad_norm": 2.8010413646698, "learning_rate": 4.239766081871345e-05, "loss": 0.48, "step": 90 }, { "epoch": 2.06, "grad_norm": 5.665361404418945, "learning_rate": 4.093567251461988e-05, "loss": 0.559, "step": 100 }, { "epoch": 2.08, "grad_norm": 1.190671682357788, "learning_rate": 3.9473684210526316e-05, "loss": 0.4707, "step": 110 }, { "epoch": 2.1, "eval_f1": 0.0, "eval_loss": 0.52621990442276, "eval_runtime": 214.3958, "eval_samples_per_second": 3.713, "eval_steps_per_second": 0.233, "step": 117 }, { "epoch": 3.01, "grad_norm": 1.8163896799087524, "learning_rate": 3.8011695906432746e-05, "loss": 0.426, "step": 120 }, { "epoch": 3.03, "grad_norm": 1.4688210487365723, "learning_rate": 3.654970760233918e-05, "loss": 0.4022, "step": 130 }, { "epoch": 3.06, "grad_norm": 2.077988386154175, "learning_rate": 3.508771929824561e-05, "loss": 0.5341, "step": 140 }, { "epoch": 3.09, "grad_norm": 1.402732491493225, "learning_rate": 3.362573099415205e-05, "loss": 0.4758, "step": 150 }, { "epoch": 3.1, "eval_f1": 0.0, "eval_loss": 0.5054653882980347, "eval_runtime": 205.8731, "eval_samples_per_second": 3.866, "eval_steps_per_second": 0.243, "step": 156 }, { "epoch": 4.01, "grad_norm": 1.0033667087554932, "learning_rate": 3.216374269005848e-05, "loss": 0.4712, "step": 160 }, { "epoch": 4.04, "grad_norm": 1.153987169265747, "learning_rate": 3.0701754385964913e-05, "loss": 0.4766, "step": 170 }, { "epoch": 4.06, "grad_norm": 2.2413759231567383, "learning_rate": 2.9239766081871346e-05, "loss": 0.3828, "step": 180 }, { "epoch": 4.09, "grad_norm": 1.6635860204696655, "learning_rate": 2.777777777777778e-05, "loss": 0.5164, "step": 190 }, { "epoch": 4.1, "eval_f1": 0.011764705882352941, "eval_loss": 0.5524570941925049, "eval_runtime": 209.7075, "eval_samples_per_second": 3.796, "eval_steps_per_second": 0.238, "step": 195 }, { "epoch": 5.01, "grad_norm": 5.072777271270752, "learning_rate": 2.6315789473684212e-05, "loss": 0.5209, "step": 200 }, { "epoch": 5.04, "grad_norm": 1.1866474151611328, "learning_rate": 2.485380116959064e-05, "loss": 0.4455, "step": 210 }, { "epoch": 5.07, "grad_norm": 3.4566450119018555, "learning_rate": 2.3391812865497074e-05, "loss": 0.4492, "step": 220 }, { "epoch": 5.09, "grad_norm": 1.305912971496582, "learning_rate": 2.1929824561403507e-05, "loss": 0.4793, "step": 230 }, { "epoch": 5.1, "eval_f1": 0.0, "eval_loss": 0.512092649936676, "eval_runtime": 191.2967, "eval_samples_per_second": 4.161, "eval_steps_per_second": 0.261, "step": 234 }, { "epoch": 6.02, "grad_norm": 1.2563284635543823, "learning_rate": 2.046783625730994e-05, "loss": 0.3722, "step": 240 }, { "epoch": 6.04, "grad_norm": 3.25819993019104, "learning_rate": 1.9005847953216373e-05, "loss": 0.5026, "step": 250 }, { "epoch": 6.07, "grad_norm": 1.9585869312286377, "learning_rate": 1.7543859649122806e-05, "loss": 0.4454, "step": 260 }, { "epoch": 6.09, "grad_norm": 5.147015571594238, "learning_rate": 1.608187134502924e-05, "loss": 0.4872, "step": 270 }, { "epoch": 6.1, "eval_f1": 0.0, "eval_loss": 0.5168163180351257, "eval_runtime": 187.9502, "eval_samples_per_second": 4.235, "eval_steps_per_second": 0.266, "step": 273 }, { "epoch": 7.02, "grad_norm": 3.3504269123077393, "learning_rate": 1.4619883040935673e-05, "loss": 0.4434, "step": 280 }, { "epoch": 7.04, "grad_norm": 2.3411853313446045, "learning_rate": 1.3157894736842106e-05, "loss": 0.4706, "step": 290 }, { "epoch": 7.07, "grad_norm": 1.4593596458435059, "learning_rate": 1.1695906432748537e-05, "loss": 0.4031, "step": 300 }, { "epoch": 7.1, "grad_norm": 3.9099016189575195, "learning_rate": 1.023391812865497e-05, "loss": 0.3846, "step": 310 }, { "epoch": 7.1, "eval_f1": 0.0, "eval_loss": 0.548977792263031, "eval_runtime": 200.6612, "eval_samples_per_second": 3.967, "eval_steps_per_second": 0.249, "step": 312 }, { "epoch": 8.02, "grad_norm": 2.722304582595825, "learning_rate": 8.771929824561403e-06, "loss": 0.5456, "step": 320 }, { "epoch": 8.05, "grad_norm": 1.7937108278274536, "learning_rate": 7.3099415204678366e-06, "loss": 0.3438, "step": 330 }, { "epoch": 8.07, "grad_norm": 1.8574304580688477, "learning_rate": 5.8479532163742686e-06, "loss": 0.5481, "step": 340 }, { "epoch": 8.1, "grad_norm": 3.4769558906555176, "learning_rate": 4.3859649122807014e-06, "loss": 0.3796, "step": 350 }, { "epoch": 8.1, "eval_f1": 0.0, "eval_loss": 0.5376216173171997, "eval_runtime": 192.8973, "eval_samples_per_second": 4.127, "eval_steps_per_second": 0.259, "step": 351 }, { "epoch": 9.02, "grad_norm": 2.116058349609375, "learning_rate": 2.9239766081871343e-06, "loss": 0.416, "step": 360 }, { "epoch": 9.05, "grad_norm": 2.9650416374206543, "learning_rate": 1.4619883040935671e-06, "loss": 0.4537, "step": 370 }, { "epoch": 9.08, "grad_norm": 1.7653827667236328, "learning_rate": 0.0, "loss": 0.3853, "step": 380 }, { "epoch": 9.08, "eval_f1": 0.0, "eval_loss": 0.5240046977996826, "eval_runtime": 200.4262, "eval_samples_per_second": 3.972, "eval_steps_per_second": 0.249, "step": 380 }, { "epoch": 9.08, "step": 380, "total_flos": 7.475141472240992e+18, "train_loss": 0.4675432085990906, "train_runtime": 3759.0695, "train_samples_per_second": 1.617, "train_steps_per_second": 0.101 }, { "epoch": 9.08, "eval_f1": 0.013333333333333334, "eval_loss": 0.5360399484634399, "eval_runtime": 179.5545, "eval_samples_per_second": 4.177, "eval_steps_per_second": 0.262, "step": 380 }, { "epoch": 9.08, "eval_f1": 0.013333333333333334, "eval_loss": 0.5360399484634399, "eval_runtime": 198.4663, "eval_samples_per_second": 3.779, "eval_steps_per_second": 0.237, "step": 380 } ], "logging_steps": 10, "max_steps": 380, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 7.475141472240992e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }