|
{ |
|
"best_metric": 0.5617977528089888, |
|
"best_model_checkpoint": "videomae-base-finetuned-ElderReact-Happiness/checkpoint-308", |
|
"epoch": 4.189473684210526, |
|
"eval_steps": 500, |
|
"global_step": 380, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.016882419586182, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.7013, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.460171699523926, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.6796, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.785501480102539, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.74, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 4.875156879425049, |
|
"learning_rate": 4.970760233918128e-05, |
|
"loss": 0.7044, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 7.292051792144775, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 0.7057, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 7.376715660095215, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.7756, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.814006805419922, |
|
"learning_rate": 4.5321637426900585e-05, |
|
"loss": 0.6723, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_f1": 0.5549845837615622, |
|
"eval_loss": 0.7051836252212524, |
|
"eval_runtime": 1602.3534, |
|
"eval_samples_per_second": 0.497, |
|
"eval_steps_per_second": 0.062, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 3.5856733322143555, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.6769, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.825258255004883, |
|
"learning_rate": 4.239766081871345e-05, |
|
"loss": 0.6709, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 2.5148415565490723, |
|
"learning_rate": 4.093567251461988e-05, |
|
"loss": 0.727, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 2.4561989307403564, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.6706, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 2.8707118034362793, |
|
"learning_rate": 3.8011695906432746e-05, |
|
"loss": 0.7212, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.9631680250167847, |
|
"learning_rate": 3.654970760233918e-05, |
|
"loss": 0.7098, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 9.788117408752441, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.7359, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 10.18767261505127, |
|
"learning_rate": 3.362573099415205e-05, |
|
"loss": 0.6748, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_f1": 0.006289308176100629, |
|
"eval_loss": 0.6582110524177551, |
|
"eval_runtime": 1572.4124, |
|
"eval_samples_per_second": 0.506, |
|
"eval_steps_per_second": 0.064, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 4.891780853271484, |
|
"learning_rate": 3.216374269005848e-05, |
|
"loss": 0.6535, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 5.340829849243164, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 0.6206, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 2.49276065826416, |
|
"learning_rate": 2.9239766081871346e-05, |
|
"loss": 0.7199, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 4.326613903045654, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.6836, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 3.0456364154815674, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.6351, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 3.2249398231506348, |
|
"learning_rate": 2.485380116959064e-05, |
|
"loss": 0.6387, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 6.565281867980957, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.7079, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 3.1880271434783936, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.7745, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_f1": 0.5589403973509933, |
|
"eval_loss": 0.6776570677757263, |
|
"eval_runtime": 1484.5114, |
|
"eval_samples_per_second": 0.536, |
|
"eval_steps_per_second": 0.067, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 1.8452378511428833, |
|
"learning_rate": 2.046783625730994e-05, |
|
"loss": 0.661, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 3.8639652729034424, |
|
"learning_rate": 1.9005847953216373e-05, |
|
"loss": 0.6743, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 1.692387342453003, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.6429, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 2.1387529373168945, |
|
"learning_rate": 1.608187134502924e-05, |
|
"loss": 0.6625, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 4.23469352722168, |
|
"learning_rate": 1.4619883040935673e-05, |
|
"loss": 0.7041, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 2.7067341804504395, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.6662, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 5.802628040313721, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 0.6286, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_f1": 0.5617977528089888, |
|
"eval_loss": 0.6901108026504517, |
|
"eval_runtime": 1598.6139, |
|
"eval_samples_per_second": 0.498, |
|
"eval_steps_per_second": 0.063, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 6.488805294036865, |
|
"learning_rate": 1.023391812865497e-05, |
|
"loss": 0.5724, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 5.795147895812988, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 0.6883, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 2.8693175315856934, |
|
"learning_rate": 7.3099415204678366e-06, |
|
"loss": 0.6674, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 2.0438976287841797, |
|
"learning_rate": 5.8479532163742686e-06, |
|
"loss": 0.6649, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 2.5664432048797607, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 0.6188, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 6.117301940917969, |
|
"learning_rate": 2.9239766081871343e-06, |
|
"loss": 0.6105, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 5.357626914978027, |
|
"learning_rate": 1.4619883040935671e-06, |
|
"loss": 0.6511, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"grad_norm": 3.1075475215911865, |
|
"learning_rate": 0.0, |
|
"loss": 0.6018, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_f1": 0.552046783625731, |
|
"eval_loss": 0.6821714043617249, |
|
"eval_runtime": 1518.717, |
|
"eval_samples_per_second": 0.524, |
|
"eval_steps_per_second": 0.066, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"step": 380, |
|
"total_flos": 3.783052093636215e+18, |
|
"train_loss": 0.6766992167422646, |
|
"train_runtime": 18323.6613, |
|
"train_samples_per_second": 0.166, |
|
"train_steps_per_second": 0.021 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_f1": 0.48677248677248675, |
|
"eval_loss": 0.7341307997703552, |
|
"eval_runtime": 1329.5369, |
|
"eval_samples_per_second": 0.564, |
|
"eval_steps_per_second": 0.071, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_f1": 0.48677248677248675, |
|
"eval_loss": 0.7341306805610657, |
|
"eval_runtime": 1312.4548, |
|
"eval_samples_per_second": 0.571, |
|
"eval_steps_per_second": 0.072, |
|
"step": 380 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 380, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 3.783052093636215e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|