|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5054151624548736, |
|
"eval_loss": 0.3735279142856598, |
|
"eval_runtime": 9.0462, |
|
"eval_samples_per_second": 30.621, |
|
"eval_steps_per_second": 3.869, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5054151624548736, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.002919871794871795, |
|
"loss": 0.5142, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5415162454873647, |
|
"eval_loss": 0.4847595989704132, |
|
"eval_runtime": 9.0779, |
|
"eval_samples_per_second": 30.514, |
|
"eval_steps_per_second": 3.856, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5415162454873647, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.3801901936531067, |
|
"eval_runtime": 9.1102, |
|
"eval_samples_per_second": 30.405, |
|
"eval_steps_per_second": 3.842, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5415162454873647, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0028397435897435895, |
|
"loss": 0.4859, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.4823205769062042, |
|
"eval_runtime": 9.1679, |
|
"eval_samples_per_second": 30.214, |
|
"eval_steps_per_second": 3.818, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5415162454873647, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0027596153846153847, |
|
"loss": 0.4412, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.39017239212989807, |
|
"eval_runtime": 9.088, |
|
"eval_samples_per_second": 30.48, |
|
"eval_steps_per_second": 3.851, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.5415162454873647, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5595667870036101, |
|
"eval_loss": 0.3743537962436676, |
|
"eval_runtime": 9.2011, |
|
"eval_samples_per_second": 30.105, |
|
"eval_steps_per_second": 3.804, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00267948717948718, |
|
"loss": 0.4418, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5487364620938628, |
|
"eval_loss": 0.4612128734588623, |
|
"eval_runtime": 9.1118, |
|
"eval_samples_per_second": 30.4, |
|
"eval_steps_per_second": 3.841, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.45901069045066833, |
|
"eval_runtime": 8.9276, |
|
"eval_samples_per_second": 31.027, |
|
"eval_steps_per_second": 3.92, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.0025993589743589745, |
|
"loss": 0.4467, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.47772425413131714, |
|
"eval_runtime": 8.8284, |
|
"eval_samples_per_second": 31.376, |
|
"eval_steps_per_second": 3.964, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0025192307692307693, |
|
"loss": 0.4177, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.48375451263537905, |
|
"eval_loss": 0.3615993857383728, |
|
"eval_runtime": 8.8201, |
|
"eval_samples_per_second": 31.405, |
|
"eval_steps_per_second": 3.968, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5595667870036101, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.37356361746788025, |
|
"eval_runtime": 9.032, |
|
"eval_samples_per_second": 30.669, |
|
"eval_steps_per_second": 3.875, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.002439102564102564, |
|
"loss": 0.3988, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5992779783393501, |
|
"eval_loss": 0.3464394211769104, |
|
"eval_runtime": 9.0159, |
|
"eval_samples_per_second": 30.724, |
|
"eval_steps_per_second": 3.882, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.002358974358974359, |
|
"loss": 0.3911, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.3522431254386902, |
|
"eval_runtime": 9.0184, |
|
"eval_samples_per_second": 30.715, |
|
"eval_steps_per_second": 3.881, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 12, |
|
"best_eval_accuracy": 0.628158844765343, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6859205776173285, |
|
"eval_loss": 0.3406241238117218, |
|
"eval_runtime": 9.0292, |
|
"eval_samples_per_second": 30.678, |
|
"eval_steps_per_second": 3.876, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.002278846153846154, |
|
"loss": 0.3893, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.42231810092926025, |
|
"eval_runtime": 9.1033, |
|
"eval_samples_per_second": 30.429, |
|
"eval_steps_per_second": 3.845, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5415162454873647, |
|
"eval_loss": 0.6759101748466492, |
|
"eval_runtime": 9.0957, |
|
"eval_samples_per_second": 30.454, |
|
"eval_steps_per_second": 3.848, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0021987179487179486, |
|
"loss": 0.38, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6823104693140795, |
|
"eval_loss": 0.36305832862854004, |
|
"eval_runtime": 9.0737, |
|
"eval_samples_per_second": 30.528, |
|
"eval_steps_per_second": 3.857, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6859205776173285, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.0021185897435897437, |
|
"loss": 0.3772, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.343391090631485, |
|
"eval_runtime": 9.1219, |
|
"eval_samples_per_second": 30.367, |
|
"eval_steps_per_second": 3.837, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6137184115523465, |
|
"eval_loss": 0.33444103598594666, |
|
"eval_runtime": 9.129, |
|
"eval_samples_per_second": 30.343, |
|
"eval_steps_per_second": 3.834, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 17, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0020384615384615385, |
|
"loss": 0.3639, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.3670320212841034, |
|
"eval_runtime": 9.0881, |
|
"eval_samples_per_second": 30.479, |
|
"eval_steps_per_second": 3.851, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.0019583333333333336, |
|
"loss": 0.336, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.3482595980167389, |
|
"eval_runtime": 8.9775, |
|
"eval_samples_per_second": 30.855, |
|
"eval_steps_per_second": 3.899, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7148014440433214, |
|
"eval_loss": 0.34846988320350647, |
|
"eval_runtime": 6.7951, |
|
"eval_samples_per_second": 40.765, |
|
"eval_steps_per_second": 5.151, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 21, |
|
"best_eval_accuracy": 0.7148014440433214, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0018782051282051281, |
|
"loss": 0.3369, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.3540826141834259, |
|
"eval_runtime": 9.1597, |
|
"eval_samples_per_second": 30.241, |
|
"eval_steps_per_second": 3.821, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.3346291184425354, |
|
"eval_runtime": 9.1464, |
|
"eval_samples_per_second": 30.285, |
|
"eval_steps_per_second": 3.827, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.001798076923076923, |
|
"loss": 0.3291, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3387109339237213, |
|
"eval_runtime": 9.16, |
|
"eval_samples_per_second": 30.24, |
|
"eval_steps_per_second": 3.821, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0017179487179487178, |
|
"loss": 0.3228, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7220216606498195, |
|
"eval_loss": 0.3492320775985718, |
|
"eval_runtime": 9.1667, |
|
"eval_samples_per_second": 30.218, |
|
"eval_steps_per_second": 3.818, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.333434522151947, |
|
"eval_runtime": 9.1342, |
|
"eval_samples_per_second": 30.326, |
|
"eval_steps_per_second": 3.832, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.7364620938628159, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.0016378205128205127, |
|
"loss": 0.3206, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.33880335092544556, |
|
"eval_runtime": 9.1365, |
|
"eval_samples_per_second": 30.318, |
|
"eval_steps_per_second": 3.831, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.740072202166065, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0015576923076923079, |
|
"loss": 0.3189, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.33039894700050354, |
|
"eval_runtime": 9.1312, |
|
"eval_samples_per_second": 30.335, |
|
"eval_steps_per_second": 3.833, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.740072202166065, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3566158413887024, |
|
"eval_runtime": 9.115, |
|
"eval_samples_per_second": 30.389, |
|
"eval_steps_per_second": 3.84, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.740072202166065, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0014775641025641026, |
|
"loss": 0.3148, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.33697280287742615, |
|
"eval_runtime": 9.1237, |
|
"eval_samples_per_second": 30.361, |
|
"eval_steps_per_second": 3.836, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.740072202166065, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.33278366923332214, |
|
"eval_runtime": 9.1556, |
|
"eval_samples_per_second": 30.255, |
|
"eval_steps_per_second": 3.823, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.740072202166065, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0013974358974358976, |
|
"loss": 0.31, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.3422046899795532, |
|
"eval_runtime": 9.1388, |
|
"eval_samples_per_second": 30.31, |
|
"eval_steps_per_second": 3.83, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 32, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0013173076923076923, |
|
"loss": 0.306, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3339076340198517, |
|
"eval_runtime": 9.1306, |
|
"eval_samples_per_second": 30.338, |
|
"eval_steps_per_second": 3.833, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 32, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.3253796398639679, |
|
"eval_runtime": 9.1502, |
|
"eval_samples_per_second": 30.272, |
|
"eval_steps_per_second": 3.825, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 32, |
|
"best_eval_accuracy": 0.7436823104693141, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0012371794871794872, |
|
"loss": 0.3032, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.3329690992832184, |
|
"eval_runtime": 9.1346, |
|
"eval_samples_per_second": 30.324, |
|
"eval_steps_per_second": 3.832, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0011570512820512822, |
|
"loss": 0.3028, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.3718496859073639, |
|
"eval_runtime": 9.1526, |
|
"eval_samples_per_second": 30.265, |
|
"eval_steps_per_second": 3.824, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.32942327857017517, |
|
"eval_runtime": 9.1344, |
|
"eval_samples_per_second": 30.325, |
|
"eval_steps_per_second": 3.832, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0010769230769230769, |
|
"loss": 0.3005, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.3465494215488434, |
|
"eval_runtime": 9.149, |
|
"eval_samples_per_second": 30.277, |
|
"eval_steps_per_second": 3.826, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7292418772563177, |
|
"eval_loss": 0.33344584703445435, |
|
"eval_runtime": 9.134, |
|
"eval_samples_per_second": 30.326, |
|
"eval_steps_per_second": 3.832, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0009967948717948718, |
|
"loss": 0.2965, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7256317689530686, |
|
"eval_loss": 0.32391032576560974, |
|
"eval_runtime": 9.1543, |
|
"eval_samples_per_second": 30.259, |
|
"eval_steps_per_second": 3.823, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0009166666666666668, |
|
"loss": 0.2947, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.33224305510520935, |
|
"eval_runtime": 9.1316, |
|
"eval_samples_per_second": 30.334, |
|
"eval_steps_per_second": 3.833, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.3370007574558258, |
|
"eval_runtime": 9.1457, |
|
"eval_samples_per_second": 30.288, |
|
"eval_steps_per_second": 3.827, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0008365384615384616, |
|
"loss": 0.2909, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7472924187725631, |
|
"eval_loss": 0.3385498523712158, |
|
"eval_runtime": 9.1809, |
|
"eval_samples_per_second": 30.171, |
|
"eval_steps_per_second": 3.812, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0007564102564102564, |
|
"loss": 0.2915, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7328519855595668, |
|
"eval_loss": 0.3364669382572174, |
|
"eval_runtime": 9.1472, |
|
"eval_samples_per_second": 30.283, |
|
"eval_steps_per_second": 3.826, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3435125946998596, |
|
"eval_runtime": 9.1788, |
|
"eval_samples_per_second": 30.178, |
|
"eval_steps_per_second": 3.813, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.0006762820512820514, |
|
"loss": 0.29, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.3300550878047943, |
|
"eval_runtime": 9.1805, |
|
"eval_samples_per_second": 30.173, |
|
"eval_steps_per_second": 3.812, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.34428879618644714, |
|
"eval_runtime": 9.1365, |
|
"eval_samples_per_second": 30.318, |
|
"eval_steps_per_second": 3.831, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0005961538461538461, |
|
"loss": 0.2872, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.3392505347728729, |
|
"eval_runtime": 9.1185, |
|
"eval_samples_per_second": 30.378, |
|
"eval_steps_per_second": 3.838, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.000516025641025641, |
|
"loss": 0.2838, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.3291374742984772, |
|
"eval_runtime": 9.1502, |
|
"eval_samples_per_second": 30.273, |
|
"eval_steps_per_second": 3.825, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.33556076884269714, |
|
"eval_runtime": 9.1546, |
|
"eval_samples_per_second": 30.258, |
|
"eval_steps_per_second": 3.823, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00043589743589743596, |
|
"loss": 0.2865, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.3306904137134552, |
|
"eval_runtime": 9.1381, |
|
"eval_samples_per_second": 30.313, |
|
"eval_steps_per_second": 3.83, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.00035576923076923074, |
|
"loss": 0.2823, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.34133222699165344, |
|
"eval_runtime": 9.1664, |
|
"eval_samples_per_second": 30.219, |
|
"eval_steps_per_second": 3.818, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.3353198766708374, |
|
"eval_runtime": 9.1849, |
|
"eval_samples_per_second": 30.158, |
|
"eval_steps_per_second": 3.811, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.0002756410256410257, |
|
"loss": 0.28, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.33145207166671753, |
|
"eval_runtime": 9.1935, |
|
"eval_samples_per_second": 30.13, |
|
"eval_steps_per_second": 3.807, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7364620938628159, |
|
"eval_loss": 0.34334462881088257, |
|
"eval_runtime": 9.1494, |
|
"eval_samples_per_second": 30.275, |
|
"eval_steps_per_second": 3.825, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.00019551282051282054, |
|
"loss": 0.2832, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.33375221490859985, |
|
"eval_runtime": 9.1538, |
|
"eval_samples_per_second": 30.261, |
|
"eval_steps_per_second": 3.824, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0001153846153846154, |
|
"loss": 0.2794, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.3367079198360443, |
|
"eval_runtime": 9.1809, |
|
"eval_samples_per_second": 30.171, |
|
"eval_steps_per_second": 3.812, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.740072202166065, |
|
"eval_loss": 0.3371344208717346, |
|
"eval_runtime": 3.5086, |
|
"eval_samples_per_second": 78.948, |
|
"eval_steps_per_second": 9.975, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 3.5256410256410254e-05, |
|
"loss": 0.2785, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7436823104693141, |
|
"eval_loss": 0.33732497692108154, |
|
"eval_runtime": 9.129, |
|
"eval_samples_per_second": 30.343, |
|
"eval_steps_per_second": 3.834, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 35, |
|
"best_eval_accuracy": 0.7472924187725631, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.33913088435800665, |
|
"train_runtime": 8207.561, |
|
"train_samples_per_second": 18.203, |
|
"train_steps_per_second": 2.281 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|