|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 27.07662582397461, |
|
"eval_runtime": 3.2794, |
|
"eval_samples_per_second": 84.466, |
|
"eval_steps_per_second": 10.673, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.009732905982905982, |
|
"loss": 29.447, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 24.088668823242188, |
|
"eval_runtime": 3.2857, |
|
"eval_samples_per_second": 84.304, |
|
"eval_steps_per_second": 10.652, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 23.964040756225586, |
|
"eval_runtime": 3.2866, |
|
"eval_samples_per_second": 84.282, |
|
"eval_steps_per_second": 10.649, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.009465811965811966, |
|
"loss": 27.7172, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 22.026033401489258, |
|
"eval_runtime": 3.2885, |
|
"eval_samples_per_second": 84.232, |
|
"eval_steps_per_second": 10.643, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.009198717948717948, |
|
"loss": 26.4345, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 22.050241470336914, |
|
"eval_runtime": 3.2777, |
|
"eval_samples_per_second": 84.511, |
|
"eval_steps_per_second": 10.678, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 22.933731079101562, |
|
"eval_runtime": 3.2673, |
|
"eval_samples_per_second": 84.78, |
|
"eval_steps_per_second": 10.712, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.008931623931623932, |
|
"loss": 27.0832, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.285945892333984, |
|
"eval_runtime": 3.2711, |
|
"eval_samples_per_second": 84.68, |
|
"eval_steps_per_second": 10.7, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.470870971679688, |
|
"eval_runtime": 3.2744, |
|
"eval_samples_per_second": 84.596, |
|
"eval_steps_per_second": 10.689, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.008664529914529914, |
|
"loss": 25.6523, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.353946685791016, |
|
"eval_runtime": 3.2711, |
|
"eval_samples_per_second": 84.68, |
|
"eval_steps_per_second": 10.7, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.008397435897435898, |
|
"loss": 25.5288, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.298246383666992, |
|
"eval_runtime": 3.2754, |
|
"eval_samples_per_second": 84.569, |
|
"eval_steps_per_second": 10.686, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 22.059904098510742, |
|
"eval_runtime": 3.2708, |
|
"eval_samples_per_second": 84.69, |
|
"eval_steps_per_second": 10.701, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.00813034188034188, |
|
"loss": 25.9846, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 22.10004997253418, |
|
"eval_runtime": 3.2728, |
|
"eval_samples_per_second": 84.637, |
|
"eval_steps_per_second": 10.694, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.007863247863247862, |
|
"loss": 26.609, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 24.113325119018555, |
|
"eval_runtime": 3.2603, |
|
"eval_samples_per_second": 84.961, |
|
"eval_steps_per_second": 10.735, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 22.439191818237305, |
|
"eval_runtime": 3.2721, |
|
"eval_samples_per_second": 84.656, |
|
"eval_steps_per_second": 10.697, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.007596153846153846, |
|
"loss": 26.7751, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 22.051416397094727, |
|
"eval_runtime": 3.2657, |
|
"eval_samples_per_second": 84.82, |
|
"eval_steps_per_second": 10.717, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.441261291503906, |
|
"eval_runtime": 3.2728, |
|
"eval_samples_per_second": 84.636, |
|
"eval_steps_per_second": 10.694, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.007329059829059828, |
|
"loss": 25.8484, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.67586898803711, |
|
"eval_runtime": 3.2717, |
|
"eval_samples_per_second": 84.666, |
|
"eval_steps_per_second": 10.698, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.007061965811965812, |
|
"loss": 25.7937, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.272621154785156, |
|
"eval_runtime": 3.2737, |
|
"eval_samples_per_second": 84.614, |
|
"eval_steps_per_second": 10.691, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.248903274536133, |
|
"eval_runtime": 3.2728, |
|
"eval_samples_per_second": 84.638, |
|
"eval_steps_per_second": 10.694, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.006794871794871795, |
|
"loss": 25.6479, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.18812370300293, |
|
"eval_runtime": 3.2717, |
|
"eval_samples_per_second": 84.665, |
|
"eval_steps_per_second": 10.698, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.006527777777777778, |
|
"loss": 25.6144, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.03541374206543, |
|
"eval_runtime": 3.2731, |
|
"eval_samples_per_second": 84.628, |
|
"eval_steps_per_second": 10.693, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.06875228881836, |
|
"eval_runtime": 3.27, |
|
"eval_samples_per_second": 84.709, |
|
"eval_steps_per_second": 10.703, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.006260683760683761, |
|
"loss": 25.4368, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.21544075012207, |
|
"eval_runtime": 3.2774, |
|
"eval_samples_per_second": 84.519, |
|
"eval_steps_per_second": 10.679, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.234771728515625, |
|
"eval_runtime": 3.2735, |
|
"eval_samples_per_second": 84.62, |
|
"eval_steps_per_second": 10.692, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.005993589743589743, |
|
"loss": 25.5564, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.15097999572754, |
|
"eval_runtime": 3.2785, |
|
"eval_samples_per_second": 84.49, |
|
"eval_steps_per_second": 10.676, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.005726495726495726, |
|
"loss": 25.5495, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.399234771728516, |
|
"eval_runtime": 3.2797, |
|
"eval_samples_per_second": 84.46, |
|
"eval_steps_per_second": 10.672, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.403474807739258, |
|
"eval_runtime": 3.2783, |
|
"eval_samples_per_second": 84.496, |
|
"eval_steps_per_second": 10.676, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.005459401709401709, |
|
"loss": 25.4536, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.964290618896484, |
|
"eval_runtime": 3.2695, |
|
"eval_samples_per_second": 84.721, |
|
"eval_steps_per_second": 10.705, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.005192307692307693, |
|
"loss": 25.3641, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 20.778039932250977, |
|
"eval_runtime": 3.2743, |
|
"eval_samples_per_second": 84.598, |
|
"eval_steps_per_second": 10.689, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.47609519958496, |
|
"eval_runtime": 3.2759, |
|
"eval_samples_per_second": 84.557, |
|
"eval_steps_per_second": 10.684, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.004925213675213675, |
|
"loss": 25.4089, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.10526466369629, |
|
"eval_runtime": 3.2774, |
|
"eval_samples_per_second": 84.519, |
|
"eval_steps_per_second": 10.679, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.15567398071289, |
|
"eval_runtime": 3.2733, |
|
"eval_samples_per_second": 84.624, |
|
"eval_steps_per_second": 10.693, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.004658119658119658, |
|
"loss": 25.6056, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.017988204956055, |
|
"eval_runtime": 3.2716, |
|
"eval_samples_per_second": 84.668, |
|
"eval_steps_per_second": 10.698, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.004391025641025641, |
|
"loss": 25.5078, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.102638244628906, |
|
"eval_runtime": 3.2768, |
|
"eval_samples_per_second": 84.534, |
|
"eval_steps_per_second": 10.681, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.372339248657227, |
|
"eval_runtime": 3.278, |
|
"eval_samples_per_second": 84.503, |
|
"eval_steps_per_second": 10.677, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.004123931623931624, |
|
"loss": 25.6607, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.430931091308594, |
|
"eval_runtime": 3.271, |
|
"eval_samples_per_second": 84.683, |
|
"eval_steps_per_second": 10.7, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.003856837606837607, |
|
"loss": 25.9641, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.40829849243164, |
|
"eval_runtime": 3.2761, |
|
"eval_samples_per_second": 84.553, |
|
"eval_steps_per_second": 10.684, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.287500381469727, |
|
"eval_runtime": 3.2756, |
|
"eval_samples_per_second": 84.564, |
|
"eval_steps_per_second": 10.685, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0035897435897435897, |
|
"loss": 25.6756, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.453834533691406, |
|
"eval_runtime": 3.2765, |
|
"eval_samples_per_second": 84.54, |
|
"eval_steps_per_second": 10.682, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 21.18698501586914, |
|
"eval_runtime": 3.2787, |
|
"eval_samples_per_second": 84.484, |
|
"eval_steps_per_second": 10.675, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0033226495726495727, |
|
"loss": 25.4709, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 21.079587936401367, |
|
"eval_runtime": 3.2743, |
|
"eval_samples_per_second": 84.599, |
|
"eval_steps_per_second": 10.689, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0030555555555555557, |
|
"loss": 25.2913, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.941194534301758, |
|
"eval_runtime": 3.265, |
|
"eval_samples_per_second": 84.839, |
|
"eval_steps_per_second": 10.72, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.893199920654297, |
|
"eval_runtime": 3.2682, |
|
"eval_samples_per_second": 84.756, |
|
"eval_steps_per_second": 10.709, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0027884615384615387, |
|
"loss": 25.1541, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 20.917190551757812, |
|
"eval_runtime": 3.2676, |
|
"eval_samples_per_second": 84.771, |
|
"eval_steps_per_second": 10.711, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0025213675213675213, |
|
"loss": 25.0679, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.67868995666504, |
|
"eval_runtime": 3.2617, |
|
"eval_samples_per_second": 84.924, |
|
"eval_steps_per_second": 10.731, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 20.630779266357422, |
|
"eval_runtime": 3.2725, |
|
"eval_samples_per_second": 84.644, |
|
"eval_steps_per_second": 10.695, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.0022542735042735042, |
|
"loss": 24.965, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.524005889892578, |
|
"eval_runtime": 3.262, |
|
"eval_samples_per_second": 84.918, |
|
"eval_steps_per_second": 10.73, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 20.637798309326172, |
|
"eval_runtime": 3.2694, |
|
"eval_samples_per_second": 84.724, |
|
"eval_steps_per_second": 10.705, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0019871794871794872, |
|
"loss": 24.8969, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 20.50297737121582, |
|
"eval_runtime": 3.2754, |
|
"eval_samples_per_second": 84.57, |
|
"eval_steps_per_second": 10.686, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.0017200854700854702, |
|
"loss": 24.8319, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.325740814208984, |
|
"eval_runtime": 3.272, |
|
"eval_samples_per_second": 84.657, |
|
"eval_steps_per_second": 10.697, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.298986434936523, |
|
"eval_runtime": 3.2697, |
|
"eval_samples_per_second": 84.718, |
|
"eval_steps_per_second": 10.704, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.0014529914529914532, |
|
"loss": 24.7301, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 20.36614418029785, |
|
"eval_runtime": 3.272, |
|
"eval_samples_per_second": 84.658, |
|
"eval_steps_per_second": 10.697, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.001185897435897436, |
|
"loss": 24.6644, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.20882225036621, |
|
"eval_runtime": 3.2794, |
|
"eval_samples_per_second": 84.467, |
|
"eval_steps_per_second": 10.673, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.15430450439453, |
|
"eval_runtime": 3.2814, |
|
"eval_samples_per_second": 84.414, |
|
"eval_steps_per_second": 10.666, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.0009188034188034189, |
|
"loss": 24.5917, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 20.086036682128906, |
|
"eval_runtime": 3.2838, |
|
"eval_samples_per_second": 84.354, |
|
"eval_steps_per_second": 10.658, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.067216873168945, |
|
"eval_runtime": 3.2742, |
|
"eval_samples_per_second": 84.601, |
|
"eval_steps_per_second": 10.69, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.0006517094017094017, |
|
"loss": 24.5505, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.051782608032227, |
|
"eval_runtime": 3.2808, |
|
"eval_samples_per_second": 84.431, |
|
"eval_steps_per_second": 10.668, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.00038461538461538467, |
|
"loss": 24.5065, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 20.003583908081055, |
|
"eval_runtime": 3.2825, |
|
"eval_samples_per_second": 84.387, |
|
"eval_steps_per_second": 10.663, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 19.99390411376953, |
|
"eval_runtime": 3.2846, |
|
"eval_samples_per_second": 84.332, |
|
"eval_steps_per_second": 10.656, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 0.00011752136752136752, |
|
"loss": 24.4773, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 19.989938735961914, |
|
"eval_runtime": 3.2766, |
|
"eval_samples_per_second": 84.539, |
|
"eval_steps_per_second": 10.682, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 7.25686341470208e+16, |
|
"train_loss": 25.62186534914196, |
|
"train_runtime": 3131.9415, |
|
"train_samples_per_second": 47.702, |
|
"train_steps_per_second": 5.977 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 7.25686341470208e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|