|
{ |
|
"best_metric": 0.507537688442211, |
|
"best_model_checkpoint": "videomae-base-finetuned-ElderReact-Happiness11/checkpoint-156", |
|
"epoch": 9.076315789473684, |
|
"eval_steps": 500, |
|
"global_step": 380, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.6178653240203857, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.6756, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.96007776260376, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.6977, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.377594470977783, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.6977, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.4635678391959799, |
|
"eval_loss": 0.7047719359397888, |
|
"eval_runtime": 261.9257, |
|
"eval_samples_per_second": 3.039, |
|
"eval_steps_per_second": 0.191, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.699587345123291, |
|
"learning_rate": 4.970760233918128e-05, |
|
"loss": 0.7053, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 6.8290114402771, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 0.665, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 2.537061929702759, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.7055, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 1.5095937252044678, |
|
"learning_rate": 4.5321637426900585e-05, |
|
"loss": 0.7198, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.47110552763819097, |
|
"eval_loss": 0.7133272290229797, |
|
"eval_runtime": 280.3033, |
|
"eval_samples_per_second": 2.84, |
|
"eval_steps_per_second": 0.178, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 2.1461029052734375, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.6286, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 2.1844964027404785, |
|
"learning_rate": 4.239766081871345e-05, |
|
"loss": 0.6547, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 4.2716522216796875, |
|
"learning_rate": 4.093567251461988e-05, |
|
"loss": 0.6401, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 4.427234649658203, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.639, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.40703517587939697, |
|
"eval_loss": 0.8557814359664917, |
|
"eval_runtime": 261.857, |
|
"eval_samples_per_second": 3.04, |
|
"eval_steps_per_second": 0.191, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 2.5250039100646973, |
|
"learning_rate": 3.8011695906432746e-05, |
|
"loss": 0.7011, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 4.220043659210205, |
|
"learning_rate": 3.654970760233918e-05, |
|
"loss": 0.602, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 7.634807109832764, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.6536, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 2.9651682376861572, |
|
"learning_rate": 3.362573099415205e-05, |
|
"loss": 0.664, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.507537688442211, |
|
"eval_loss": 0.6846373081207275, |
|
"eval_runtime": 252.5146, |
|
"eval_samples_per_second": 3.152, |
|
"eval_steps_per_second": 0.198, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 5.29097318649292, |
|
"learning_rate": 3.216374269005848e-05, |
|
"loss": 0.6102, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 4.258277416229248, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 0.6022, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 2.0270493030548096, |
|
"learning_rate": 2.9239766081871346e-05, |
|
"loss": 0.6251, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 3.647946834564209, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.6217, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.41457286432160806, |
|
"eval_loss": 0.8411366939544678, |
|
"eval_runtime": 254.7722, |
|
"eval_samples_per_second": 3.124, |
|
"eval_steps_per_second": 0.196, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 5.861187934875488, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.6029, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 4.177021026611328, |
|
"learning_rate": 2.485380116959064e-05, |
|
"loss": 0.5714, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 2.6523263454437256, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.6066, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 8.681379318237305, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.5826, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_accuracy": 0.42462311557788945, |
|
"eval_loss": 0.8426756262779236, |
|
"eval_runtime": 246.0667, |
|
"eval_samples_per_second": 3.235, |
|
"eval_steps_per_second": 0.203, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 5.220656871795654, |
|
"learning_rate": 2.046783625730994e-05, |
|
"loss": 0.5107, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 9.55013656616211, |
|
"learning_rate": 1.9005847953216373e-05, |
|
"loss": 0.4856, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 4.11871862411499, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.5916, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 4.019587516784668, |
|
"learning_rate": 1.608187134502924e-05, |
|
"loss": 0.5881, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_accuracy": 0.435929648241206, |
|
"eval_loss": 0.8049066662788391, |
|
"eval_runtime": 263.5585, |
|
"eval_samples_per_second": 3.02, |
|
"eval_steps_per_second": 0.19, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 4.318753719329834, |
|
"learning_rate": 1.4619883040935673e-05, |
|
"loss": 0.5706, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 4.975842475891113, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.5435, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 4.172918796539307, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 0.575, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 4.347151279449463, |
|
"learning_rate": 1.023391812865497e-05, |
|
"loss": 0.421, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.5050251256281407, |
|
"eval_loss": 0.778738796710968, |
|
"eval_runtime": 244.8077, |
|
"eval_samples_per_second": 3.252, |
|
"eval_steps_per_second": 0.204, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 6.215036392211914, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 0.4506, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 6.92239236831665, |
|
"learning_rate": 7.3099415204678366e-06, |
|
"loss": 0.6033, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 2.6235239505767822, |
|
"learning_rate": 5.8479532163742686e-06, |
|
"loss": 0.4765, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 5.367151260375977, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 0.4674, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.47361809045226133, |
|
"eval_loss": 0.8280923962593079, |
|
"eval_runtime": 266.1328, |
|
"eval_samples_per_second": 2.991, |
|
"eval_steps_per_second": 0.188, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 3.808866500854492, |
|
"learning_rate": 2.9239766081871343e-06, |
|
"loss": 0.5096, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 5.840443134307861, |
|
"learning_rate": 1.4619883040935671e-06, |
|
"loss": 0.4805, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 6.295402526855469, |
|
"learning_rate": 0.0, |
|
"loss": 0.4775, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"eval_accuracy": 0.4434673366834171, |
|
"eval_loss": 0.8615225553512573, |
|
"eval_runtime": 248.7642, |
|
"eval_samples_per_second": 3.2, |
|
"eval_steps_per_second": 0.201, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"step": 380, |
|
"total_flos": 7.475141472240992e+18, |
|
"train_loss": 0.5953672107897306, |
|
"train_runtime": 4834.4844, |
|
"train_samples_per_second": 1.258, |
|
"train_steps_per_second": 0.079 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"eval_accuracy": 0.49866666666666665, |
|
"eval_loss": 0.7278424501419067, |
|
"eval_runtime": 243.6787, |
|
"eval_samples_per_second": 3.078, |
|
"eval_steps_per_second": 0.193, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"eval_accuracy": 0.49866666666666665, |
|
"eval_loss": 0.7278424501419067, |
|
"eval_runtime": 236.8099, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 0.198, |
|
"step": 380 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 380, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 7.475141472240992e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|