"best_metric": 0.4796141982078552,
"best_model_checkpoint": "/content/output_dir/checkpoint-268",
"epoch": 10.0,
"global_step": 2680,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"max_steps": 2680,
"num_train_epochs": 10,
"total_flos": 7113018526540800.0,
"trial_name": null,
"trial_params": null
epoch | eval_loss | eval_matthews_correlation | eval_runtime | eval_samples_per_second | eval_steps_per_second | step | learning_rate | loss |
---|---|---|---|---|---|---|---|---|
1 | 0.4796141982078552 | 0.5351033849356494 | 8.8067 | 118.433 | 14.875 | 268 | 0.000018067415730337083 | 0.4913 |
2 | 0.5334435701370239 | 0.5178799252679331 | 8.9439 | 116.616 | 14.647 | 536 | 0.00001605992509363296 | 0.2872 |
3 | 0.5544090270996094 | 0.5649788851042796 | 8.9467 | 116.58 | 14.642 | 804 | 0.000014052434456928841 | 0.1777 |
4 | 0.5754779577255249 | 0.6105374636148787 | 8.8982 | 117.215 | 14.722 | 1072 | 0.000012044943820224718 | 0.1263 |
5 | 0.7263916730880737 | 0.5807606001872874 | 8.9705 | 116.27 | 14.603 | 1340 | 0.000010037453183520601 | 0.0905 |
6 | 0.8121512532234192 | 0.5651092792103851 | 8.9924 | 115.987 | 14.568 | 1608 | 0.00000802996254681648 | 0.0692 |
7 | 0.941014289855957 | 0.5632084517291658 | 8.9583 | 116.428 | 14.623 | 1876 | 0.000006022471910112359 | 0.0413 |
8 | 1.0095174312591553 | 0.5856531698367675 | 9.0029 | 115.851 | 14.551 | 2144 | 0.00000401498127340824 | 0.0327 |
9 | 1.0425965785980225 | 0.5941395545037332 | 8.9217 | 116.906 | 14.683 | 2412 | 0.00000200749063670412 | 0.0202 |
10 | 1.0782166719436646 | 0.5956649094312695 | 8.9472 | 116.572 | 14.641 | 2680 | 0 | 0.0104 |