|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5415162454873647, |
|
"eval_loss": 0.3589429557323456, |
|
"eval_runtime": 4.3667, |
|
"eval_samples_per_second": 63.434, |
|
"eval_steps_per_second": 8.015, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5415162454873647, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0019465811965811966, |
|
"loss": 0.4381, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.555956678700361, |
|
"eval_loss": 0.35848623514175415, |
|
"eval_runtime": 4.4826, |
|
"eval_samples_per_second": 61.794, |
|
"eval_steps_per_second": 7.808, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.555956678700361, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 0.48240402340888977, |
|
"eval_runtime": 4.5185, |
|
"eval_samples_per_second": 61.304, |
|
"eval_steps_per_second": 7.746, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.555956678700361, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0018931623931623931, |
|
"loss": 0.4251, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5740072202166066, |
|
"eval_loss": 0.34972989559173584, |
|
"eval_runtime": 4.5352, |
|
"eval_samples_per_second": 61.078, |
|
"eval_steps_per_second": 7.717, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0018397435897435897, |
|
"loss": 0.4013, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 0.551459550857544, |
|
"eval_runtime": 4.52, |
|
"eval_samples_per_second": 61.283, |
|
"eval_steps_per_second": 7.743, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5342960288808665, |
|
"eval_loss": 0.5299800038337708, |
|
"eval_runtime": 4.5138, |
|
"eval_samples_per_second": 61.368, |
|
"eval_steps_per_second": 7.754, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.0017863247863247865, |
|
"loss": 0.4064, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4981949458483754, |
|
"eval_loss": 0.35154926776885986, |
|
"eval_runtime": 4.5182, |
|
"eval_samples_per_second": 61.307, |
|
"eval_steps_per_second": 7.746, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5703971119133574, |
|
"eval_loss": 0.3455522358417511, |
|
"eval_runtime": 4.5195, |
|
"eval_samples_per_second": 61.29, |
|
"eval_steps_per_second": 7.744, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.001732905982905983, |
|
"loss": 0.4121, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5631768953068592, |
|
"eval_loss": 0.35217946767807007, |
|
"eval_runtime": 4.5238, |
|
"eval_samples_per_second": 61.232, |
|
"eval_steps_per_second": 7.737, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0016794871794871796, |
|
"loss": 0.4048, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5631768953068592, |
|
"eval_loss": 0.3436826169490814, |
|
"eval_runtime": 4.5226, |
|
"eval_samples_per_second": 61.247, |
|
"eval_steps_per_second": 7.739, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5667870036101083, |
|
"eval_loss": 0.3483211398124695, |
|
"eval_runtime": 4.5176, |
|
"eval_samples_per_second": 61.315, |
|
"eval_steps_per_second": 7.747, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.0016260683760683761, |
|
"loss": 0.4035, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4657039711191336, |
|
"eval_loss": 0.39523008465766907, |
|
"eval_runtime": 4.5292, |
|
"eval_samples_per_second": 61.158, |
|
"eval_steps_per_second": 7.728, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0015726495726495727, |
|
"loss": 0.3797, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.48014440433212996, |
|
"eval_loss": 0.3535395562648773, |
|
"eval_runtime": 4.5227, |
|
"eval_samples_per_second": 61.246, |
|
"eval_steps_per_second": 7.739, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.5740072202166066, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5992779783393501, |
|
"eval_loss": 0.3442744314670563, |
|
"eval_runtime": 4.5323, |
|
"eval_samples_per_second": 61.117, |
|
"eval_steps_per_second": 7.722, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.5992779783393501, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.0015192307692307692, |
|
"loss": 0.3657, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5379061371841155, |
|
"eval_loss": 0.34309083223342896, |
|
"eval_runtime": 4.5324, |
|
"eval_samples_per_second": 61.116, |
|
"eval_steps_per_second": 7.722, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.5992779783393501, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5992779783393501, |
|
"eval_loss": 0.34777817130088806, |
|
"eval_runtime": 4.5274, |
|
"eval_samples_per_second": 61.184, |
|
"eval_steps_per_second": 7.731, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.5992779783393501, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.0014658119658119658, |
|
"loss": 0.3615, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6173285198555957, |
|
"eval_loss": 0.34750139713287354, |
|
"eval_runtime": 4.5218, |
|
"eval_samples_per_second": 61.259, |
|
"eval_steps_per_second": 7.74, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.0014123931623931626, |
|
"loss": 0.3573, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6101083032490975, |
|
"eval_loss": 0.35394176840782166, |
|
"eval_runtime": 4.5276, |
|
"eval_samples_per_second": 61.181, |
|
"eval_steps_per_second": 7.73, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6101083032490975, |
|
"eval_loss": 0.33835408091545105, |
|
"eval_runtime": 4.5292, |
|
"eval_samples_per_second": 61.159, |
|
"eval_steps_per_second": 7.728, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.6173285198555957, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0013589743589743591, |
|
"loss": 0.3552, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.3483423590660095, |
|
"eval_runtime": 4.5342, |
|
"eval_samples_per_second": 61.092, |
|
"eval_steps_per_second": 7.719, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.0013055555555555557, |
|
"loss": 0.3545, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6173285198555957, |
|
"eval_loss": 0.33585211634635925, |
|
"eval_runtime": 4.5322, |
|
"eval_samples_per_second": 61.119, |
|
"eval_steps_per_second": 7.723, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5740072202166066, |
|
"eval_loss": 0.384427011013031, |
|
"eval_runtime": 4.5292, |
|
"eval_samples_per_second": 61.158, |
|
"eval_steps_per_second": 7.728, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.6245487364620939, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.0012521367521367522, |
|
"loss": 0.349, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.34361839294433594, |
|
"eval_runtime": 4.5281, |
|
"eval_samples_per_second": 61.173, |
|
"eval_steps_per_second": 7.729, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6209386281588448, |
|
"eval_loss": 0.3422470688819885, |
|
"eval_runtime": 4.5291, |
|
"eval_samples_per_second": 61.16, |
|
"eval_steps_per_second": 7.728, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.0011987179487179488, |
|
"loss": 0.351, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.631768953068592, |
|
"eval_loss": 0.3495335876941681, |
|
"eval_runtime": 4.5369, |
|
"eval_samples_per_second": 61.055, |
|
"eval_steps_per_second": 7.715, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.0011452991452991453, |
|
"loss": 0.3471, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6101083032490975, |
|
"eval_loss": 0.3498461842536926, |
|
"eval_runtime": 4.5294, |
|
"eval_samples_per_second": 61.156, |
|
"eval_steps_per_second": 7.727, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.3315870463848114, |
|
"eval_runtime": 4.532, |
|
"eval_samples_per_second": 61.121, |
|
"eval_steps_per_second": 7.723, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.6498194945848376, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.001091880341880342, |
|
"loss": 0.3468, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6750902527075813, |
|
"eval_loss": 0.33216673135757446, |
|
"eval_runtime": 4.5307, |
|
"eval_samples_per_second": 61.139, |
|
"eval_steps_per_second": 7.725, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.0010384615384615387, |
|
"loss": 0.3459, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.3354048728942871, |
|
"eval_runtime": 4.5404, |
|
"eval_samples_per_second": 61.008, |
|
"eval_steps_per_second": 7.709, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.33531084656715393, |
|
"eval_runtime": 4.5418, |
|
"eval_samples_per_second": 60.99, |
|
"eval_steps_per_second": 7.706, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0009850427350427352, |
|
"loss": 0.344, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6353790613718412, |
|
"eval_loss": 0.33834579586982727, |
|
"eval_runtime": 4.5312, |
|
"eval_samples_per_second": 61.131, |
|
"eval_steps_per_second": 7.724, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.3328704535961151, |
|
"eval_runtime": 4.5306, |
|
"eval_samples_per_second": 61.14, |
|
"eval_steps_per_second": 7.725, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.0009316239316239317, |
|
"loss": 0.3435, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.3411121070384979, |
|
"eval_runtime": 4.5294, |
|
"eval_samples_per_second": 61.156, |
|
"eval_steps_per_second": 7.727, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0008782051282051282, |
|
"loss": 0.3408, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6353790613718412, |
|
"eval_loss": 0.34143632650375366, |
|
"eval_runtime": 4.5307, |
|
"eval_samples_per_second": 61.139, |
|
"eval_steps_per_second": 7.725, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6534296028880866, |
|
"eval_loss": 0.3319098949432373, |
|
"eval_runtime": 4.532, |
|
"eval_samples_per_second": 61.121, |
|
"eval_steps_per_second": 7.723, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0008247863247863248, |
|
"loss": 0.3401, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.3347095251083374, |
|
"eval_runtime": 4.5326, |
|
"eval_samples_per_second": 61.113, |
|
"eval_steps_per_second": 7.722, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0007713675213675214, |
|
"loss": 0.3406, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6137184115523465, |
|
"eval_loss": 0.3381596505641937, |
|
"eval_runtime": 4.5336, |
|
"eval_samples_per_second": 61.099, |
|
"eval_steps_per_second": 7.72, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.3355455696582794, |
|
"eval_runtime": 4.5352, |
|
"eval_samples_per_second": 61.077, |
|
"eval_steps_per_second": 7.717, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.000717948717948718, |
|
"loss": 0.3378, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6245487364620939, |
|
"eval_loss": 0.34160640835762024, |
|
"eval_runtime": 4.5305, |
|
"eval_samples_per_second": 61.141, |
|
"eval_steps_per_second": 7.725, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6209386281588448, |
|
"eval_loss": 0.3422325551509857, |
|
"eval_runtime": 4.5339, |
|
"eval_samples_per_second": 61.095, |
|
"eval_steps_per_second": 7.72, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.0006645299145299145, |
|
"loss": 0.3386, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.3388216197490692, |
|
"eval_runtime": 4.5387, |
|
"eval_samples_per_second": 61.03, |
|
"eval_steps_per_second": 7.711, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0006111111111111112, |
|
"loss": 0.3362, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.3329535722732544, |
|
"eval_runtime": 4.5307, |
|
"eval_samples_per_second": 61.138, |
|
"eval_steps_per_second": 7.725, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.3393040895462036, |
|
"eval_runtime": 4.5298, |
|
"eval_samples_per_second": 61.15, |
|
"eval_steps_per_second": 7.727, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0005576923076923078, |
|
"loss": 0.3373, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.33396920561790466, |
|
"eval_runtime": 4.537, |
|
"eval_samples_per_second": 61.054, |
|
"eval_steps_per_second": 7.714, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.0005042735042735043, |
|
"loss": 0.3337, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.33175528049468994, |
|
"eval_runtime": 4.5329, |
|
"eval_samples_per_second": 61.108, |
|
"eval_steps_per_second": 7.721, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.6353790613718412, |
|
"eval_loss": 0.33228200674057007, |
|
"eval_runtime": 4.5321, |
|
"eval_samples_per_second": 61.119, |
|
"eval_steps_per_second": 7.723, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.00045085470085470087, |
|
"loss": 0.3332, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6642599277978339, |
|
"eval_loss": 0.33014196157455444, |
|
"eval_runtime": 4.5347, |
|
"eval_samples_per_second": 61.085, |
|
"eval_steps_per_second": 7.718, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.628158844765343, |
|
"eval_loss": 0.3421645164489746, |
|
"eval_runtime": 4.5363, |
|
"eval_samples_per_second": 61.064, |
|
"eval_steps_per_second": 7.716, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0003974358974358974, |
|
"loss": 0.3315, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.3348065912723541, |
|
"eval_runtime": 4.533, |
|
"eval_samples_per_second": 61.107, |
|
"eval_steps_per_second": 7.721, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 0.00034401709401709403, |
|
"loss": 0.33, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.3366377651691437, |
|
"eval_runtime": 4.5288, |
|
"eval_samples_per_second": 61.165, |
|
"eval_steps_per_second": 7.728, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.3308314085006714, |
|
"eval_runtime": 4.5289, |
|
"eval_samples_per_second": 61.163, |
|
"eval_steps_per_second": 7.728, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 0.00029059829059829064, |
|
"loss": 0.331, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.6606498194945848, |
|
"eval_loss": 0.329833447933197, |
|
"eval_runtime": 4.5318, |
|
"eval_samples_per_second": 61.123, |
|
"eval_steps_per_second": 7.723, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.00023717948717948717, |
|
"loss": 0.3295, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.337655633687973, |
|
"eval_runtime": 4.5297, |
|
"eval_samples_per_second": 61.153, |
|
"eval_steps_per_second": 7.727, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.343928724527359, |
|
"eval_runtime": 4.524, |
|
"eval_samples_per_second": 61.229, |
|
"eval_steps_per_second": 7.737, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 0.00018376068376068378, |
|
"loss": 0.3282, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.33256685733795166, |
|
"eval_runtime": 4.5262, |
|
"eval_samples_per_second": 61.199, |
|
"eval_steps_per_second": 7.733, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.33564698696136475, |
|
"eval_runtime": 4.5262, |
|
"eval_samples_per_second": 61.2, |
|
"eval_steps_per_second": 7.733, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 0.00013034188034188036, |
|
"loss": 0.3291, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.6570397111913358, |
|
"eval_loss": 0.3309127986431122, |
|
"eval_runtime": 4.5232, |
|
"eval_samples_per_second": 61.24, |
|
"eval_steps_per_second": 7.738, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 0.3278, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.33329811692237854, |
|
"eval_runtime": 4.5357, |
|
"eval_samples_per_second": 61.072, |
|
"eval_steps_per_second": 7.717, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.33244961500167847, |
|
"eval_runtime": 4.5017, |
|
"eval_samples_per_second": 61.532, |
|
"eval_steps_per_second": 7.775, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.3292, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.6498194945848376, |
|
"eval_loss": 0.33346447348594666, |
|
"eval_runtime": 4.4869, |
|
"eval_samples_per_second": 61.735, |
|
"eval_steps_per_second": 7.8, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 27, |
|
"best_eval_accuracy": 0.6750902527075813, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 0.3555507358322796, |
|
"train_runtime": 4126.6509, |
|
"train_samples_per_second": 36.204, |
|
"train_steps_per_second": 4.536 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|