|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 18720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4693140794223827, |
|
"eval_loss": 33.111061096191406, |
|
"eval_runtime": 7.0298, |
|
"eval_samples_per_second": 39.404, |
|
"eval_steps_per_second": 4.979, |
|
"step": 312 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4693140794223827, |
|
"epoch": 1.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00048664529914529915, |
|
"loss": 33.5632, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 29.4329833984375, |
|
"eval_runtime": 7.0194, |
|
"eval_samples_per_second": 39.462, |
|
"eval_steps_per_second": 4.986, |
|
"step": 624 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.4729241877256318, |
|
"epoch": 2.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 28.657472610473633, |
|
"eval_runtime": 7.0397, |
|
"eval_samples_per_second": 39.348, |
|
"eval_steps_per_second": 4.972, |
|
"step": 936 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.4729241877256318, |
|
"epoch": 3.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0004732905982905983, |
|
"loss": 29.5796, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.49458483754512633, |
|
"eval_loss": 27.559425354003906, |
|
"eval_runtime": 7.0574, |
|
"eval_samples_per_second": 39.249, |
|
"eval_steps_per_second": 4.959, |
|
"step": 1248 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.49458483754512633, |
|
"epoch": 4.0, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0004599358974358974, |
|
"loss": 27.7947, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 24.001134872436523, |
|
"eval_runtime": 7.0529, |
|
"eval_samples_per_second": 39.274, |
|
"eval_steps_per_second": 4.962, |
|
"step": 1560 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.49458483754512633, |
|
"epoch": 5.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5306859205776173, |
|
"eval_loss": 21.849742889404297, |
|
"eval_runtime": 6.9198, |
|
"eval_samples_per_second": 40.03, |
|
"eval_steps_per_second": 5.058, |
|
"step": 1872 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 6.0, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.0004465811965811966, |
|
"loss": 24.4291, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 18.93817138671875, |
|
"eval_runtime": 6.9705, |
|
"eval_samples_per_second": 39.739, |
|
"eval_steps_per_second": 5.021, |
|
"step": 2184 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 7.0, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 17.022798538208008, |
|
"eval_runtime": 7.0475, |
|
"eval_samples_per_second": 39.305, |
|
"eval_steps_per_second": 4.966, |
|
"step": 2496 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 8.0, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.00043322649572649576, |
|
"loss": 21.7331, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 16.219053268432617, |
|
"eval_runtime": 7.0492, |
|
"eval_samples_per_second": 39.295, |
|
"eval_steps_per_second": 4.965, |
|
"step": 2808 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 9.0, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.0004198717948717949, |
|
"loss": 20.2434, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 15.6640043258667, |
|
"eval_runtime": 7.009, |
|
"eval_samples_per_second": 39.521, |
|
"eval_steps_per_second": 4.994, |
|
"step": 3120 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 10.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 15.320855140686035, |
|
"eval_runtime": 6.928, |
|
"eval_samples_per_second": 39.983, |
|
"eval_steps_per_second": 5.052, |
|
"step": 3432 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 11.0, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 0.00040651709401709403, |
|
"loss": 19.5791, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 15.036731719970703, |
|
"eval_runtime": 7.0537, |
|
"eval_samples_per_second": 39.27, |
|
"eval_steps_per_second": 4.962, |
|
"step": 3744 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 12.0, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.00039316239316239317, |
|
"loss": 19.1759, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 14.785924911499023, |
|
"eval_runtime": 6.9672, |
|
"eval_samples_per_second": 39.758, |
|
"eval_steps_per_second": 5.024, |
|
"step": 4056 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 13.0, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 14.568937301635742, |
|
"eval_runtime": 7.0271, |
|
"eval_samples_per_second": 39.419, |
|
"eval_steps_per_second": 4.981, |
|
"step": 4368 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 14.0, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.0003798076923076923, |
|
"loss": 18.9129, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 14.419888496398926, |
|
"eval_runtime": 7.0292, |
|
"eval_samples_per_second": 39.407, |
|
"eval_steps_per_second": 4.979, |
|
"step": 4680 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 15.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 14.306955337524414, |
|
"eval_runtime": 6.9501, |
|
"eval_samples_per_second": 39.856, |
|
"eval_steps_per_second": 5.036, |
|
"step": 4992 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 16.0, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 0.00036645299145299145, |
|
"loss": 18.725, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 14.20071029663086, |
|
"eval_runtime": 7.0305, |
|
"eval_samples_per_second": 39.4, |
|
"eval_steps_per_second": 4.978, |
|
"step": 5304 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 17.0, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 0.00035309829059829064, |
|
"loss": 18.5733, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 14.099628448486328, |
|
"eval_runtime": 7.0315, |
|
"eval_samples_per_second": 39.394, |
|
"eval_steps_per_second": 4.978, |
|
"step": 5616 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 18.0, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 14.055991172790527, |
|
"eval_runtime": 7.0244, |
|
"eval_samples_per_second": 39.434, |
|
"eval_steps_per_second": 4.983, |
|
"step": 5928 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 19.0, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0003397435897435898, |
|
"loss": 18.4591, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.947591781616211, |
|
"eval_runtime": 6.9888, |
|
"eval_samples_per_second": 39.635, |
|
"eval_steps_per_second": 5.008, |
|
"step": 6240 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 20.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 0.0003263888888888889, |
|
"loss": 18.3533, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.853177070617676, |
|
"eval_runtime": 7.0232, |
|
"eval_samples_per_second": 39.44, |
|
"eval_steps_per_second": 4.983, |
|
"step": 6552 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 21.0, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.809125900268555, |
|
"eval_runtime": 6.9984, |
|
"eval_samples_per_second": 39.58, |
|
"eval_steps_per_second": 5.001, |
|
"step": 6864 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 22.0, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 0.00031303418803418806, |
|
"loss": 18.2596, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.72776985168457, |
|
"eval_runtime": 7.0025, |
|
"eval_samples_per_second": 39.558, |
|
"eval_steps_per_second": 4.998, |
|
"step": 7176 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 23.0, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.661612510681152, |
|
"eval_runtime": 7.0007, |
|
"eval_samples_per_second": 39.567, |
|
"eval_steps_per_second": 4.999, |
|
"step": 7488 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 24.0, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.0002996794871794872, |
|
"loss": 18.1857, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.58198356628418, |
|
"eval_runtime": 7.0185, |
|
"eval_samples_per_second": 39.467, |
|
"eval_steps_per_second": 4.987, |
|
"step": 7800 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 25.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.00028632478632478634, |
|
"loss": 18.1091, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.565810203552246, |
|
"eval_runtime": 6.9956, |
|
"eval_samples_per_second": 39.597, |
|
"eval_steps_per_second": 5.003, |
|
"step": 8112 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 26.0, |
|
"step": 8112 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.495006561279297, |
|
"eval_runtime": 7.0216, |
|
"eval_samples_per_second": 39.45, |
|
"eval_steps_per_second": 4.985, |
|
"step": 8424 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 27.0, |
|
"step": 8424 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 0.0002729700854700855, |
|
"loss": 18.0388, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.410882949829102, |
|
"eval_runtime": 7.0403, |
|
"eval_samples_per_second": 39.345, |
|
"eval_steps_per_second": 4.971, |
|
"step": 8736 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 28.0, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 0.00025961538461538467, |
|
"loss": 17.9676, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.357146263122559, |
|
"eval_runtime": 7.0135, |
|
"eval_samples_per_second": 39.495, |
|
"eval_steps_per_second": 4.99, |
|
"step": 9048 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 29.0, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.309640884399414, |
|
"eval_runtime": 6.9911, |
|
"eval_samples_per_second": 39.622, |
|
"eval_steps_per_second": 5.006, |
|
"step": 9360 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 30.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0002462606837606838, |
|
"loss": 17.9109, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.26890754699707, |
|
"eval_runtime": 7.0321, |
|
"eval_samples_per_second": 39.391, |
|
"eval_steps_per_second": 4.977, |
|
"step": 9672 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 31.0, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.219923973083496, |
|
"eval_runtime": 7.0259, |
|
"eval_samples_per_second": 39.425, |
|
"eval_steps_per_second": 4.982, |
|
"step": 9984 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 32.0, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 0.00023290598290598292, |
|
"loss": 17.8555, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.170161247253418, |
|
"eval_runtime": 7.0381, |
|
"eval_samples_per_second": 39.357, |
|
"eval_steps_per_second": 4.973, |
|
"step": 10296 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 33.0, |
|
"step": 10296 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.00021955128205128206, |
|
"loss": 17.7959, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.131486892700195, |
|
"eval_runtime": 7.0195, |
|
"eval_samples_per_second": 39.462, |
|
"eval_steps_per_second": 4.986, |
|
"step": 10608 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 34.0, |
|
"step": 10608 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.097670555114746, |
|
"eval_runtime": 7.0588, |
|
"eval_samples_per_second": 39.242, |
|
"eval_steps_per_second": 4.958, |
|
"step": 10920 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 35.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 0.0002061965811965812, |
|
"loss": 17.7567, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.071810722351074, |
|
"eval_runtime": 7.0557, |
|
"eval_samples_per_second": 39.259, |
|
"eval_steps_per_second": 4.961, |
|
"step": 11232 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 36.0, |
|
"step": 11232 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.00019284188034188036, |
|
"loss": 17.718, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 13.024420738220215, |
|
"eval_runtime": 7.0253, |
|
"eval_samples_per_second": 39.429, |
|
"eval_steps_per_second": 4.982, |
|
"step": 11544 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 37.0, |
|
"step": 11544 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 13.006111145019531, |
|
"eval_runtime": 7.0291, |
|
"eval_samples_per_second": 39.408, |
|
"eval_steps_per_second": 4.979, |
|
"step": 11856 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 38.0, |
|
"step": 11856 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0001794871794871795, |
|
"loss": 17.6743, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.977677345275879, |
|
"eval_runtime": 7.0704, |
|
"eval_samples_per_second": 39.177, |
|
"eval_steps_per_second": 4.95, |
|
"step": 12168 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 39.0, |
|
"step": 12168 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.9545259475708, |
|
"eval_runtime": 7.0367, |
|
"eval_samples_per_second": 39.365, |
|
"eval_steps_per_second": 4.974, |
|
"step": 12480 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 40.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 0.00016613247863247864, |
|
"loss": 17.6411, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.93622875213623, |
|
"eval_runtime": 7.0219, |
|
"eval_samples_per_second": 39.448, |
|
"eval_steps_per_second": 4.984, |
|
"step": 12792 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 41.0, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0001527777777777778, |
|
"loss": 17.6197, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.95643138885498, |
|
"eval_runtime": 7.0397, |
|
"eval_samples_per_second": 39.348, |
|
"eval_steps_per_second": 4.972, |
|
"step": 13104 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 42.0, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.893377304077148, |
|
"eval_runtime": 7.0406, |
|
"eval_samples_per_second": 39.343, |
|
"eval_steps_per_second": 4.971, |
|
"step": 13416 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 43.0, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.00013942307692307694, |
|
"loss": 17.598, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.88237476348877, |
|
"eval_runtime": 7.0375, |
|
"eval_samples_per_second": 39.36, |
|
"eval_steps_per_second": 4.973, |
|
"step": 13728 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 44.0, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 0.00012606837606837608, |
|
"loss": 17.5669, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.892475128173828, |
|
"eval_runtime": 7.0533, |
|
"eval_samples_per_second": 39.272, |
|
"eval_steps_per_second": 4.962, |
|
"step": 14040 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 45.0, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.856684684753418, |
|
"eval_runtime": 7.0329, |
|
"eval_samples_per_second": 39.386, |
|
"eval_steps_per_second": 4.977, |
|
"step": 14352 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 46.0, |
|
"step": 14352 |
|
}, |
|
{ |
|
"epoch": 46.47, |
|
"learning_rate": 0.00011271367521367522, |
|
"loss": 17.5513, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.852547645568848, |
|
"eval_runtime": 7.0648, |
|
"eval_samples_per_second": 39.208, |
|
"eval_steps_per_second": 4.954, |
|
"step": 14664 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 47.0, |
|
"step": 14664 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5270758122743683, |
|
"eval_loss": 12.82678508758545, |
|
"eval_runtime": 7.0826, |
|
"eval_samples_per_second": 39.11, |
|
"eval_steps_per_second": 4.942, |
|
"step": 14976 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 48.0, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 9.935897435897436e-05, |
|
"loss": 17.5412, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.810016632080078, |
|
"eval_runtime": 7.0391, |
|
"eval_samples_per_second": 39.352, |
|
"eval_steps_per_second": 4.972, |
|
"step": 15288 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 49.0, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"learning_rate": 8.600427350427351e-05, |
|
"loss": 17.5282, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.805554389953613, |
|
"eval_runtime": 7.0839, |
|
"eval_samples_per_second": 39.103, |
|
"eval_steps_per_second": 4.941, |
|
"step": 15600 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 50.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.799531936645508, |
|
"eval_runtime": 7.0532, |
|
"eval_samples_per_second": 39.273, |
|
"eval_steps_per_second": 4.962, |
|
"step": 15912 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 51.0, |
|
"step": 15912 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 7.264957264957266e-05, |
|
"loss": 17.51, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.799637794494629, |
|
"eval_runtime": 7.0595, |
|
"eval_samples_per_second": 39.238, |
|
"eval_steps_per_second": 4.958, |
|
"step": 16224 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 52.0, |
|
"step": 16224 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 5.929487179487179e-05, |
|
"loss": 17.5032, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.77934455871582, |
|
"eval_runtime": 7.037, |
|
"eval_samples_per_second": 39.363, |
|
"eval_steps_per_second": 4.974, |
|
"step": 16536 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 53.0, |
|
"step": 16536 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.773187637329102, |
|
"eval_runtime": 7.0038, |
|
"eval_samples_per_second": 39.55, |
|
"eval_steps_per_second": 4.997, |
|
"step": 16848 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 54.0, |
|
"step": 16848 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 4.5940170940170944e-05, |
|
"loss": 17.4893, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.768193244934082, |
|
"eval_runtime": 4.4858, |
|
"eval_samples_per_second": 61.751, |
|
"eval_steps_per_second": 7.802, |
|
"step": 17160 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 55.0, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.762487411499023, |
|
"eval_runtime": 3.356, |
|
"eval_samples_per_second": 82.538, |
|
"eval_steps_per_second": 10.429, |
|
"step": 17472 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 56.0, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 3.258547008547009e-05, |
|
"loss": 17.4874, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.764068603515625, |
|
"eval_runtime": 3.3645, |
|
"eval_samples_per_second": 82.331, |
|
"eval_steps_per_second": 10.403, |
|
"step": 17784 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 57.0, |
|
"step": 17784 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 17.4805, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.757027626037598, |
|
"eval_runtime": 3.3549, |
|
"eval_samples_per_second": 82.566, |
|
"eval_steps_per_second": 10.432, |
|
"step": 18096 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 58.0, |
|
"step": 18096 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.756416320800781, |
|
"eval_runtime": 3.3544, |
|
"eval_samples_per_second": 82.578, |
|
"eval_steps_per_second": 10.434, |
|
"step": 18408 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 59.0, |
|
"step": 18408 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 5.876068376068376e-06, |
|
"loss": 17.4784, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.4729241877256318, |
|
"eval_loss": 12.755927085876465, |
|
"eval_runtime": 3.3587, |
|
"eval_samples_per_second": 82.474, |
|
"eval_steps_per_second": 10.421, |
|
"step": 18720 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.5306859205776173, |
|
"epoch": 60.0, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 18720, |
|
"total_flos": 6.96152728406016e+16, |
|
"train_loss": 19.286396724342282, |
|
"train_runtime": 6149.9047, |
|
"train_samples_per_second": 24.293, |
|
"train_steps_per_second": 3.044 |
|
} |
|
], |
|
"max_steps": 18720, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.96152728406016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|