|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.17852023243904114, |
|
"eval_runtime": 4.2873, |
|
"eval_samples_per_second": 64.61, |
|
"eval_steps_per_second": 8.164, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.002919871794871795, |
|
"loss": 0.2552, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5054151624548736, |
|
"eval_loss": 0.18264690041542053, |
|
"eval_runtime": 4.3896, |
|
"eval_samples_per_second": 63.104, |
|
"eval_steps_per_second": 7.973, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.33281320333480835, |
|
"eval_runtime": 4.433, |
|
"eval_samples_per_second": 62.486, |
|
"eval_steps_per_second": 7.895, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0028397435897435895, |
|
"loss": 0.24, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.2049672156572342, |
|
"eval_runtime": 4.4533, |
|
"eval_samples_per_second": 62.201, |
|
"eval_steps_per_second": 7.859, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0027596153846153847, |
|
"loss": 0.2369, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6064981949458483, |
|
"eval_loss": 0.17499716579914093, |
|
"eval_runtime": 4.4615, |
|
"eval_samples_per_second": 62.087, |
|
"eval_steps_per_second": 7.845, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.6064981949458483, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.47653429602888087, |
|
"eval_loss": 0.17517220973968506, |
|
"eval_runtime": 4.4386, |
|
"eval_samples_per_second": 62.407, |
|
"eval_steps_per_second": 7.885, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.6064981949458483, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00267948717948718, |
|
"loss": 0.2199, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.592057761732852, |
|
"eval_loss": 0.17986096441745758, |
|
"eval_runtime": 4.441, |
|
"eval_samples_per_second": 62.373, |
|
"eval_steps_per_second": 7.881, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.6064981949458483, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.18959033489227295, |
|
"eval_runtime": 4.4419, |
|
"eval_samples_per_second": 62.361, |
|
"eval_steps_per_second": 7.88, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.6064981949458483, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.0025993589743589745, |
|
"loss": 0.1955, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.1726665496826172, |
|
"eval_runtime": 4.4446, |
|
"eval_samples_per_second": 62.322, |
|
"eval_steps_per_second": 7.875, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0025192307692307693, |
|
"loss": 0.185, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5667870036101083, |
|
"eval_loss": 0.1733800321817398, |
|
"eval_runtime": 4.4393, |
|
"eval_samples_per_second": 62.397, |
|
"eval_steps_per_second": 7.884, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5812274368231047, |
|
"eval_loss": 0.1781184822320938, |
|
"eval_runtime": 4.4403, |
|
"eval_samples_per_second": 62.383, |
|
"eval_steps_per_second": 7.882, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.002439102564102564, |
|
"loss": 0.184, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.631768953068592, |
|
"eval_loss": 0.17108257114887238, |
|
"eval_runtime": 4.4449, |
|
"eval_samples_per_second": 62.319, |
|
"eval_steps_per_second": 7.874, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.631768953068592, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.002358974358974359, |
|
"loss": 0.1819, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.49097472924187724, |
|
"eval_loss": 0.17825525999069214, |
|
"eval_runtime": 4.4419, |
|
"eval_samples_per_second": 62.361, |
|
"eval_steps_per_second": 7.88, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.631768953068592, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.1702543944120407, |
|
"eval_runtime": 4.4448, |
|
"eval_samples_per_second": 62.32, |
|
"eval_steps_per_second": 7.874, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.6534296028880866, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.002278846153846154, |
|
"loss": 0.1793, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.16970041394233704, |
|
"eval_runtime": 4.4427, |
|
"eval_samples_per_second": 62.35, |
|
"eval_steps_per_second": 7.878, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6642599277978339, |
|
"eval_loss": 0.1709691882133484, |
|
"eval_runtime": 4.4418, |
|
"eval_samples_per_second": 62.362, |
|
"eval_steps_per_second": 7.88, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0021987179487179486, |
|
"loss": 0.179, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.17275716364383698, |
|
"eval_runtime": 4.4437, |
|
"eval_samples_per_second": 62.335, |
|
"eval_steps_per_second": 7.876, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.0021185897435897437, |
|
"loss": 0.1784, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.1711890995502472, |
|
"eval_runtime": 4.4513, |
|
"eval_samples_per_second": 62.229, |
|
"eval_steps_per_second": 7.863, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6064981949458483, |
|
"eval_loss": 0.1725841462612152, |
|
"eval_runtime": 4.4464, |
|
"eval_samples_per_second": 62.297, |
|
"eval_steps_per_second": 7.872, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0020384615384615385, |
|
"loss": 0.1778, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6678700361010831, |
|
"eval_loss": 0.17198872566223145, |
|
"eval_runtime": 4.4507, |
|
"eval_samples_per_second": 62.237, |
|
"eval_steps_per_second": 7.864, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.0019583333333333336, |
|
"loss": 0.1761, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6606498194945848, |
|
"eval_loss": 0.17243142426013947, |
|
"eval_runtime": 4.4385, |
|
"eval_samples_per_second": 62.408, |
|
"eval_steps_per_second": 7.886, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.17924970388412476, |
|
"eval_runtime": 4.4358, |
|
"eval_samples_per_second": 62.447, |
|
"eval_steps_per_second": 7.89, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0018782051282051281, |
|
"loss": 0.1761, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.1700066775083542, |
|
"eval_runtime": 4.4501, |
|
"eval_samples_per_second": 62.246, |
|
"eval_steps_per_second": 7.865, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6678700361010831, |
|
"eval_loss": 0.16977772116661072, |
|
"eval_runtime": 4.4334, |
|
"eval_samples_per_second": 62.48, |
|
"eval_steps_per_second": 7.895, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.6931407942238267, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.001798076923076923, |
|
"loss": 0.1748, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.16966477036476135, |
|
"eval_runtime": 4.4363, |
|
"eval_samples_per_second": 62.44, |
|
"eval_steps_per_second": 7.89, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0017179487179487178, |
|
"loss": 0.1744, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6859205776173285, |
|
"eval_loss": 0.17290985584259033, |
|
"eval_runtime": 4.4401, |
|
"eval_samples_per_second": 62.387, |
|
"eval_steps_per_second": 7.883, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.1702173352241516, |
|
"eval_runtime": 4.4312, |
|
"eval_samples_per_second": 62.511, |
|
"eval_steps_per_second": 7.898, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.0016378205128205127, |
|
"loss": 0.1736, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.1707705557346344, |
|
"eval_runtime": 4.4336, |
|
"eval_samples_per_second": 62.477, |
|
"eval_steps_per_second": 7.894, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0015576923076923079, |
|
"loss": 0.1723, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.16975490748882294, |
|
"eval_runtime": 4.4304, |
|
"eval_samples_per_second": 62.523, |
|
"eval_steps_per_second": 7.9, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.17993663251399994, |
|
"eval_runtime": 4.437, |
|
"eval_samples_per_second": 62.429, |
|
"eval_steps_per_second": 7.888, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0014775641025641026, |
|
"loss": 0.1735, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.17266085743904114, |
|
"eval_runtime": 4.4358, |
|
"eval_samples_per_second": 62.447, |
|
"eval_steps_per_second": 7.89, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.17320023477077484, |
|
"eval_runtime": 4.4326, |
|
"eval_samples_per_second": 62.491, |
|
"eval_steps_per_second": 7.896, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0013974358974358976, |
|
"loss": 0.1722, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.1702079027891159, |
|
"eval_runtime": 4.4347, |
|
"eval_samples_per_second": 62.463, |
|
"eval_steps_per_second": 7.892, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0013173076923076923, |
|
"loss": 0.1709, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.17065663635730743, |
|
"eval_runtime": 4.4365, |
|
"eval_samples_per_second": 62.437, |
|
"eval_steps_per_second": 7.889, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6967509025270758, |
|
"eval_loss": 0.17139047384262085, |
|
"eval_runtime": 4.4388, |
|
"eval_samples_per_second": 62.405, |
|
"eval_steps_per_second": 7.885, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0012371794871794872, |
|
"loss": 0.1697, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.17118841409683228, |
|
"eval_runtime": 4.4402, |
|
"eval_samples_per_second": 62.385, |
|
"eval_steps_per_second": 7.883, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0011570512820512822, |
|
"loss": 0.1696, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.17884960770606995, |
|
"eval_runtime": 4.4456, |
|
"eval_samples_per_second": 62.309, |
|
"eval_steps_per_second": 7.873, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.17026345431804657, |
|
"eval_runtime": 4.4452, |
|
"eval_samples_per_second": 62.315, |
|
"eval_steps_per_second": 7.874, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0010769230769230769, |
|
"loss": 0.1697, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.17346355319023132, |
|
"eval_runtime": 4.4459, |
|
"eval_samples_per_second": 62.305, |
|
"eval_steps_per_second": 7.872, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6787003610108303, |
|
"eval_loss": 0.17404569685459137, |
|
"eval_runtime": 4.4468, |
|
"eval_samples_per_second": 62.292, |
|
"eval_steps_per_second": 7.871, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0009967948717948718, |
|
"loss": 0.1683, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.1709529161453247, |
|
"eval_runtime": 4.4491, |
|
"eval_samples_per_second": 62.26, |
|
"eval_steps_per_second": 7.867, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.6967509025270758, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0009166666666666668, |
|
"loss": 0.1688, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.17236575484275818, |
|
"eval_runtime": 4.4523, |
|
"eval_samples_per_second": 62.215, |
|
"eval_steps_per_second": 7.861, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.17182697355747223, |
|
"eval_runtime": 4.4464, |
|
"eval_samples_per_second": 62.298, |
|
"eval_steps_per_second": 7.872, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0008365384615384616, |
|
"loss": 0.1679, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.17357730865478516, |
|
"eval_runtime": 4.4496, |
|
"eval_samples_per_second": 62.253, |
|
"eval_steps_per_second": 7.866, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0007564102564102564, |
|
"loss": 0.1681, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.17195428907871246, |
|
"eval_runtime": 4.4504, |
|
"eval_samples_per_second": 62.242, |
|
"eval_steps_per_second": 7.865, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.17170363664627075, |
|
"eval_runtime": 4.4496, |
|
"eval_samples_per_second": 62.253, |
|
"eval_steps_per_second": 7.866, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.0006762820512820514, |
|
"loss": 0.1664, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.17101722955703735, |
|
"eval_runtime": 4.454, |
|
"eval_samples_per_second": 62.192, |
|
"eval_steps_per_second": 7.858, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.6895306859205776, |
|
"eval_loss": 0.17659461498260498, |
|
"eval_runtime": 4.4486, |
|
"eval_samples_per_second": 62.267, |
|
"eval_steps_per_second": 7.868, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0005961538461538461, |
|
"loss": 0.1662, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.1728653609752655, |
|
"eval_runtime": 4.4498, |
|
"eval_samples_per_second": 62.25, |
|
"eval_steps_per_second": 7.865, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.000516025641025641, |
|
"loss": 0.1655, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.17041344940662384, |
|
"eval_runtime": 4.4544, |
|
"eval_samples_per_second": 62.185, |
|
"eval_steps_per_second": 7.857, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 41, |
|
"best_eval_accuracy": 0.7075812274368231, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7184115523465704, |
|
"eval_loss": 0.1710704118013382, |
|
"eval_runtime": 4.4496, |
|
"eval_samples_per_second": 62.252, |
|
"eval_steps_per_second": 7.866, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00043589743589743596, |
|
"loss": 0.1665, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.1708567589521408, |
|
"eval_runtime": 4.4539, |
|
"eval_samples_per_second": 62.192, |
|
"eval_steps_per_second": 7.858, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.00035576923076923074, |
|
"loss": 0.1651, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.1710604727268219, |
|
"eval_runtime": 4.4518, |
|
"eval_samples_per_second": 62.222, |
|
"eval_steps_per_second": 7.862, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.703971119133574, |
|
"eval_loss": 0.173600971698761, |
|
"eval_runtime": 4.4493, |
|
"eval_samples_per_second": 62.257, |
|
"eval_steps_per_second": 7.866, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.0002756410256410257, |
|
"loss": 0.1646, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.17123006284236908, |
|
"eval_runtime": 4.452, |
|
"eval_samples_per_second": 62.219, |
|
"eval_steps_per_second": 7.862, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.17398422956466675, |
|
"eval_runtime": 4.4571, |
|
"eval_samples_per_second": 62.149, |
|
"eval_steps_per_second": 7.853, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.00019551282051282054, |
|
"loss": 0.1647, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.17230337858200073, |
|
"eval_runtime": 4.446, |
|
"eval_samples_per_second": 62.303, |
|
"eval_steps_per_second": 7.872, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0001153846153846154, |
|
"loss": 0.1642, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7003610108303249, |
|
"eval_loss": 0.1714901179075241, |
|
"eval_runtime": 4.4636, |
|
"eval_samples_per_second": 62.057, |
|
"eval_steps_per_second": 7.841, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7075812274368231, |
|
"eval_loss": 0.17266467213630676, |
|
"eval_runtime": 4.446, |
|
"eval_samples_per_second": 62.303, |
|
"eval_steps_per_second": 7.872, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 3.5256410256410254e-05, |
|
"loss": 0.1643, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7111913357400722, |
|
"eval_loss": 0.1723625361919403, |
|
"eval_runtime": 4.4235, |
|
"eval_samples_per_second": 62.62, |
|
"eval_steps_per_second": 7.912, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 50, |
|
"best_eval_accuracy": 0.7184115523465704, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.17944894468682443, |
|
"train_runtime": 4049.329, |
|
"train_samples_per_second": 36.895, |
|
"train_steps_per_second": 4.623 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|