|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 9360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.8690470457077026, |
|
"eval_runtime": 4.1588, |
|
"eval_samples_per_second": 66.605, |
|
"eval_steps_per_second": 8.416, |
|
"step": 156 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4729241877256318, |
|
"epoch": 1.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 0.7261884212493896, |
|
"eval_runtime": 4.2514, |
|
"eval_samples_per_second": 65.154, |
|
"eval_steps_per_second": 8.233, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 2.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4693140794223827, |
|
"eval_loss": 0.7646142244338989, |
|
"eval_runtime": 4.264, |
|
"eval_samples_per_second": 64.963, |
|
"eval_steps_per_second": 8.208, |
|
"step": 468 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5270758122743683, |
|
"epoch": 3.0, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0028397435897435895, |
|
"loss": 0.8294, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5884476534296029, |
|
"eval_loss": 0.7043800354003906, |
|
"eval_runtime": 4.2676, |
|
"eval_samples_per_second": 64.907, |
|
"eval_steps_per_second": 8.201, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 4.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5884476534296029, |
|
"eval_loss": 0.7099267840385437, |
|
"eval_runtime": 4.2759, |
|
"eval_samples_per_second": 64.781, |
|
"eval_steps_per_second": 8.185, |
|
"step": 780 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5884476534296029, |
|
"epoch": 5.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.6449177265167236, |
|
"eval_runtime": 4.2742, |
|
"eval_samples_per_second": 64.808, |
|
"eval_steps_per_second": 8.189, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 6.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00267948717948718, |
|
"loss": 0.785, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.7755226492881775, |
|
"eval_runtime": 4.2724, |
|
"eval_samples_per_second": 64.834, |
|
"eval_steps_per_second": 8.192, |
|
"step": 1092 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 7.0, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6606498194945848, |
|
"eval_loss": 0.6442985534667969, |
|
"eval_runtime": 4.2721, |
|
"eval_samples_per_second": 64.84, |
|
"eval_steps_per_second": 8.193, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.6606498194945848, |
|
"epoch": 8.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6859205776173285, |
|
"eval_loss": 0.6348857879638672, |
|
"eval_runtime": 4.2715, |
|
"eval_samples_per_second": 64.849, |
|
"eval_steps_per_second": 8.194, |
|
"step": 1404 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 9.0, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0025192307692307693, |
|
"loss": 0.6665, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.9543846845626831, |
|
"eval_runtime": 4.2724, |
|
"eval_samples_per_second": 64.835, |
|
"eval_steps_per_second": 8.192, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 10.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.6008003354072571, |
|
"eval_runtime": 4.2739, |
|
"eval_samples_per_second": 64.811, |
|
"eval_steps_per_second": 8.189, |
|
"step": 1716 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 11.0, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.6502608060836792, |
|
"eval_runtime": 4.2753, |
|
"eval_samples_per_second": 64.791, |
|
"eval_steps_per_second": 8.187, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 12.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.002358974358974359, |
|
"loss": 0.6276, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.626946210861206, |
|
"eval_runtime": 4.2756, |
|
"eval_samples_per_second": 64.787, |
|
"eval_steps_per_second": 8.186, |
|
"step": 2028 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 13.0, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.5787616968154907, |
|
"eval_runtime": 4.2726, |
|
"eval_samples_per_second": 64.832, |
|
"eval_steps_per_second": 8.192, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 14.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.664523720741272, |
|
"eval_runtime": 4.2707, |
|
"eval_samples_per_second": 64.861, |
|
"eval_steps_per_second": 8.195, |
|
"step": 2340 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 15.0, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6425992779783394, |
|
"eval_loss": 0.9684011936187744, |
|
"eval_runtime": 4.273, |
|
"eval_samples_per_second": 64.825, |
|
"eval_steps_per_second": 8.191, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 16.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0021987179487179486, |
|
"loss": 0.587, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.6227187514305115, |
|
"eval_runtime": 4.2714, |
|
"eval_samples_per_second": 64.85, |
|
"eval_steps_per_second": 8.194, |
|
"step": 2652 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 17.0, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.6448877453804016, |
|
"eval_runtime": 4.2744, |
|
"eval_samples_per_second": 64.804, |
|
"eval_steps_per_second": 8.188, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 18.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.6650940179824829, |
|
"eval_runtime": 4.271, |
|
"eval_samples_per_second": 64.857, |
|
"eval_steps_per_second": 8.195, |
|
"step": 2964 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 19.0, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0020384615384615385, |
|
"loss": 0.5287, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 1.132426142692566, |
|
"eval_runtime": 4.2727, |
|
"eval_samples_per_second": 64.83, |
|
"eval_steps_per_second": 8.191, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 20.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.7391165494918823, |
|
"eval_runtime": 4.2755, |
|
"eval_samples_per_second": 64.787, |
|
"eval_steps_per_second": 8.186, |
|
"step": 3276 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 21.0, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6642599277978339, |
|
"eval_loss": 1.0194385051727295, |
|
"eval_runtime": 4.2717, |
|
"eval_samples_per_second": 64.845, |
|
"eval_steps_per_second": 8.193, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 22.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0018782051282051281, |
|
"loss": 0.5035, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.7837710380554199, |
|
"eval_runtime": 4.2729, |
|
"eval_samples_per_second": 64.826, |
|
"eval_steps_per_second": 8.191, |
|
"step": 3588 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 23.0, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.8647497296333313, |
|
"eval_runtime": 4.2733, |
|
"eval_samples_per_second": 64.821, |
|
"eval_steps_per_second": 8.19, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 24.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 1.0973684787750244, |
|
"eval_runtime": 4.2735, |
|
"eval_samples_per_second": 64.817, |
|
"eval_steps_per_second": 8.19, |
|
"step": 3900 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 25.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0017179487179487178, |
|
"loss": 0.4533, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.5861077308654785, |
|
"eval_runtime": 4.2708, |
|
"eval_samples_per_second": 64.86, |
|
"eval_steps_per_second": 8.195, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 26.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.6684616804122925, |
|
"eval_runtime": 4.2664, |
|
"eval_samples_per_second": 64.925, |
|
"eval_steps_per_second": 8.204, |
|
"step": 4212 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 27.0, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.6998275518417358, |
|
"eval_runtime": 4.2768, |
|
"eval_samples_per_second": 64.768, |
|
"eval_steps_per_second": 8.184, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 28.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0015576923076923079, |
|
"loss": 0.4398, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.7596197128295898, |
|
"eval_runtime": 4.2737, |
|
"eval_samples_per_second": 64.815, |
|
"eval_steps_per_second": 8.19, |
|
"step": 4524 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 29.0, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.696681797504425, |
|
"eval_runtime": 4.2817, |
|
"eval_samples_per_second": 64.695, |
|
"eval_steps_per_second": 8.174, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 26, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 30.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.7040970325469971, |
|
"eval_runtime": 4.2717, |
|
"eval_samples_per_second": 64.845, |
|
"eval_steps_per_second": 8.193, |
|
"step": 4836 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 31.0, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.7616851925849915, |
|
"eval_runtime": 4.2802, |
|
"eval_samples_per_second": 64.716, |
|
"eval_steps_per_second": 8.177, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 32.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0013974358974358976, |
|
"loss": 0.3837, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.7990546822547913, |
|
"eval_runtime": 4.2772, |
|
"eval_samples_per_second": 64.762, |
|
"eval_steps_per_second": 8.183, |
|
"step": 5148 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 33.0, |
|
"step": 5148 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.8228901028633118, |
|
"eval_runtime": 4.2769, |
|
"eval_samples_per_second": 64.766, |
|
"eval_steps_per_second": 8.183, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 34.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.7745354175567627, |
|
"eval_runtime": 4.2809, |
|
"eval_samples_per_second": 64.706, |
|
"eval_steps_per_second": 8.176, |
|
"step": 5460 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 35.0, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0012371794871794872, |
|
"loss": 0.3471, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.778685450553894, |
|
"eval_runtime": 4.2824, |
|
"eval_samples_per_second": 64.684, |
|
"eval_steps_per_second": 8.173, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 36.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.7990731596946716, |
|
"eval_runtime": 4.2833, |
|
"eval_samples_per_second": 64.669, |
|
"eval_steps_per_second": 8.171, |
|
"step": 5772 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 37.0, |
|
"step": 5772 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 1.0205601453781128, |
|
"eval_runtime": 4.2801, |
|
"eval_samples_per_second": 64.718, |
|
"eval_steps_per_second": 8.177, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 38.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0010769230769230769, |
|
"loss": 0.3303, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.8976525664329529, |
|
"eval_runtime": 4.2827, |
|
"eval_samples_per_second": 64.679, |
|
"eval_steps_per_second": 8.172, |
|
"step": 6084 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 39.0, |
|
"step": 6084 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.7327412366867065, |
|
"eval_runtime": 4.2744, |
|
"eval_samples_per_second": 64.805, |
|
"eval_steps_per_second": 8.188, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 40.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.810184895992279, |
|
"eval_runtime": 4.2816, |
|
"eval_samples_per_second": 64.696, |
|
"eval_steps_per_second": 8.175, |
|
"step": 6396 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 41.0, |
|
"step": 6396 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0009166666666666668, |
|
"loss": 0.2991, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.7347400784492493, |
|
"eval_runtime": 4.2774, |
|
"eval_samples_per_second": 64.759, |
|
"eval_steps_per_second": 8.182, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 42.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.867670476436615, |
|
"eval_runtime": 4.2777, |
|
"eval_samples_per_second": 64.754, |
|
"eval_steps_per_second": 8.182, |
|
"step": 6708 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 43.0, |
|
"step": 6708 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.9773887991905212, |
|
"eval_runtime": 4.2767, |
|
"eval_samples_per_second": 64.77, |
|
"eval_steps_per_second": 8.184, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 30, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 44.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0007564102564102564, |
|
"loss": 0.275, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7581227436823105, |
|
"eval_loss": 0.8556901216506958, |
|
"eval_runtime": 4.277, |
|
"eval_samples_per_second": 64.765, |
|
"eval_steps_per_second": 8.183, |
|
"step": 7020 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 45.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.9789180755615234, |
|
"eval_runtime": 4.2782, |
|
"eval_samples_per_second": 64.747, |
|
"eval_steps_per_second": 8.181, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 46.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 1.001485824584961, |
|
"eval_runtime": 4.2769, |
|
"eval_samples_per_second": 64.767, |
|
"eval_steps_per_second": 8.184, |
|
"step": 7332 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 47.0, |
|
"step": 7332 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.8449525833129883, |
|
"eval_runtime": 4.2736, |
|
"eval_samples_per_second": 64.816, |
|
"eval_steps_per_second": 8.19, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 48.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0005961538461538461, |
|
"loss": 0.2596, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7581227436823105, |
|
"eval_loss": 0.8222180008888245, |
|
"eval_runtime": 4.278, |
|
"eval_samples_per_second": 64.75, |
|
"eval_steps_per_second": 8.181, |
|
"step": 7644 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 49.0, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.8967640995979309, |
|
"eval_runtime": 4.286, |
|
"eval_samples_per_second": 64.629, |
|
"eval_steps_per_second": 8.166, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 50.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.8584460616111755, |
|
"eval_runtime": 4.2772, |
|
"eval_samples_per_second": 64.762, |
|
"eval_steps_per_second": 8.183, |
|
"step": 7956 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 51.0, |
|
"step": 7956 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00043589743589743596, |
|
"loss": 0.2469, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.9157448410987854, |
|
"eval_runtime": 4.2727, |
|
"eval_samples_per_second": 64.83, |
|
"eval_steps_per_second": 8.191, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 52.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.9731793403625488, |
|
"eval_runtime": 4.2736, |
|
"eval_samples_per_second": 64.817, |
|
"eval_steps_per_second": 8.19, |
|
"step": 8268 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 53.0, |
|
"step": 8268 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 1.0671247243881226, |
|
"eval_runtime": 4.2701, |
|
"eval_samples_per_second": 64.87, |
|
"eval_steps_per_second": 8.197, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 54.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.0002756410256410257, |
|
"loss": 0.2303, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.9512353539466858, |
|
"eval_runtime": 4.2682, |
|
"eval_samples_per_second": 64.899, |
|
"eval_steps_per_second": 8.2, |
|
"step": 8580 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 55.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.8708024621009827, |
|
"eval_runtime": 4.2721, |
|
"eval_samples_per_second": 64.839, |
|
"eval_steps_per_second": 8.193, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 56.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.9289500117301941, |
|
"eval_runtime": 4.2714, |
|
"eval_samples_per_second": 64.849, |
|
"eval_steps_per_second": 8.194, |
|
"step": 8892 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 57.0, |
|
"step": 8892 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0001153846153846154, |
|
"loss": 0.2275, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.8866466283798218, |
|
"eval_runtime": 4.2702, |
|
"eval_samples_per_second": 64.868, |
|
"eval_steps_per_second": 8.196, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 58.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.936638593673706, |
|
"eval_runtime": 4.2724, |
|
"eval_samples_per_second": 64.835, |
|
"eval_steps_per_second": 8.192, |
|
"step": 9204 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 59.0, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.9555269479751587, |
|
"eval_runtime": 4.279, |
|
"eval_samples_per_second": 64.735, |
|
"eval_steps_per_second": 8.179, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 44, |
|
"best_eval_accuracy": 0.7581227436823105, |
|
"epoch": 60.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 9360, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.4369181796016856, |
|
"train_runtime": 3671.2013, |
|
"train_samples_per_second": 40.695, |
|
"train_steps_per_second": 2.55 |
|
} |
|
], |
|
"max_steps": 9360, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|