|
{ |
|
"best_metric": 0.8414901387874361, |
|
"best_model_checkpoint": "videomae-base-finetuned-ElderReact-anger/checkpoint-231", |
|
"epoch": 4.189473684210526, |
|
"eval_steps": 500, |
|
"global_step": 380, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 9.803234100341797, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.6756, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.9001967906951904, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.6072, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.898644924163818, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.639, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.6400651931762695, |
|
"learning_rate": 4.970760233918128e-05, |
|
"loss": 0.6533, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.0733180046081543, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 0.5862, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.681271553039551, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.6974, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.2247824668884277, |
|
"learning_rate": 4.5321637426900585e-05, |
|
"loss": 0.6521, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_f1": 0.8396501457725948, |
|
"eval_loss": 0.5760672092437744, |
|
"eval_runtime": 1244.145, |
|
"eval_samples_per_second": 0.64, |
|
"eval_steps_per_second": 0.08, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 3.428652286529541, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.5924, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.4199066162109375, |
|
"learning_rate": 4.239766081871345e-05, |
|
"loss": 0.6198, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.6612868309020996, |
|
"learning_rate": 4.093567251461988e-05, |
|
"loss": 0.7013, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 6.7560296058654785, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.6165, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.1539256572723389, |
|
"learning_rate": 3.8011695906432746e-05, |
|
"loss": 0.6262, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.500359296798706, |
|
"learning_rate": 3.654970760233918e-05, |
|
"loss": 0.6867, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 5.01125431060791, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.6146, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 6.541323661804199, |
|
"learning_rate": 3.362573099415205e-05, |
|
"loss": 0.5836, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_f1": 0.8396501457725948, |
|
"eval_loss": 0.5899893045425415, |
|
"eval_runtime": 1102.8397, |
|
"eval_samples_per_second": 0.722, |
|
"eval_steps_per_second": 0.091, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 1.118071436882019, |
|
"learning_rate": 3.216374269005848e-05, |
|
"loss": 0.7149, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 1.7673434019088745, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 0.691, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 1.951958179473877, |
|
"learning_rate": 2.9239766081871346e-05, |
|
"loss": 0.6627, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 3.83512806892395, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.5765, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 2.5325303077697754, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.5841, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 1.832468867301941, |
|
"learning_rate": 2.485380116959064e-05, |
|
"loss": 0.5493, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 1.6643192768096924, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.4687, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 6.444340705871582, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.6733, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_f1": 0.8414901387874361, |
|
"eval_loss": 0.5975301861763, |
|
"eval_runtime": 1053.4704, |
|
"eval_samples_per_second": 0.756, |
|
"eval_steps_per_second": 0.095, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 2.2140614986419678, |
|
"learning_rate": 2.046783625730994e-05, |
|
"loss": 0.5454, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 1.7166881561279297, |
|
"learning_rate": 1.9005847953216373e-05, |
|
"loss": 0.5886, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 4.783045768737793, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.6739, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 1.9788509607315063, |
|
"learning_rate": 1.608187134502924e-05, |
|
"loss": 0.5494, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 4.0997090339660645, |
|
"learning_rate": 1.4619883040935673e-05, |
|
"loss": 0.6146, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 3.787324905395508, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.6205, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 1.0465867519378662, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 0.6136, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_f1": 0.8396501457725948, |
|
"eval_loss": 0.5905158519744873, |
|
"eval_runtime": 1159.7833, |
|
"eval_samples_per_second": 0.686, |
|
"eval_steps_per_second": 0.086, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 7.15081787109375, |
|
"learning_rate": 1.023391812865497e-05, |
|
"loss": 0.6304, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 3.809471368789673, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 0.5289, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 6.606677055358887, |
|
"learning_rate": 7.3099415204678366e-06, |
|
"loss": 0.4864, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 3.1082494258880615, |
|
"learning_rate": 5.8479532163742686e-06, |
|
"loss": 0.7082, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 2.4603655338287354, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 0.5509, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 3.1572580337524414, |
|
"learning_rate": 2.9239766081871343e-06, |
|
"loss": 0.5476, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 1.5971115827560425, |
|
"learning_rate": 1.4619883040935671e-06, |
|
"loss": 0.5939, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"grad_norm": 1.7118576765060425, |
|
"learning_rate": 0.0, |
|
"loss": 0.5394, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_f1": 0.8350515463917526, |
|
"eval_loss": 0.5992816686630249, |
|
"eval_runtime": 652.5135, |
|
"eval_samples_per_second": 1.22, |
|
"eval_steps_per_second": 0.153, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"step": 380, |
|
"total_flos": 3.783052093636215e+18, |
|
"train_loss": 0.6122152554361444, |
|
"train_runtime": 13314.3168, |
|
"train_samples_per_second": 0.228, |
|
"train_steps_per_second": 0.029 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_f1": 0.8670694864048338, |
|
"eval_loss": 0.6053914427757263, |
|
"eval_runtime": 599.5418, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.157, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_f1": 0.8670694864048338, |
|
"eval_loss": 0.6053914427757263, |
|
"eval_runtime": 599.0073, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 380 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 380, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 3.783052093636215e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|