|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6077527403831482, |
|
"eval_runtime": 2.8957, |
|
"eval_samples_per_second": 34.534, |
|
"eval_steps_per_second": 4.489, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5962929725646973, |
|
"eval_runtime": 2.9223, |
|
"eval_samples_per_second": 34.22, |
|
"eval_steps_per_second": 4.449, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6125411987304688, |
|
"eval_runtime": 2.9502, |
|
"eval_samples_per_second": 33.896, |
|
"eval_steps_per_second": 4.407, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6042141914367676, |
|
"eval_runtime": 2.9786, |
|
"eval_samples_per_second": 33.573, |
|
"eval_steps_per_second": 4.364, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6065436005592346, |
|
"eval_runtime": 2.9992, |
|
"eval_samples_per_second": 33.342, |
|
"eval_steps_per_second": 4.334, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6019887328147888, |
|
"eval_runtime": 3.0098, |
|
"eval_samples_per_second": 33.225, |
|
"eval_steps_per_second": 4.319, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.598693311214447, |
|
"eval_runtime": 3.016, |
|
"eval_samples_per_second": 33.157, |
|
"eval_steps_per_second": 4.31, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6016180515289307, |
|
"eval_runtime": 3.0167, |
|
"eval_samples_per_second": 33.149, |
|
"eval_steps_per_second": 4.309, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6065845489501953, |
|
"eval_runtime": 3.0194, |
|
"eval_samples_per_second": 33.119, |
|
"eval_steps_per_second": 4.305, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6111646294593811, |
|
"eval_runtime": 3.0214, |
|
"eval_samples_per_second": 33.097, |
|
"eval_steps_per_second": 4.303, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.608534574508667, |
|
"eval_runtime": 3.0216, |
|
"eval_samples_per_second": 33.095, |
|
"eval_steps_per_second": 4.302, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5975815057754517, |
|
"eval_runtime": 3.0222, |
|
"eval_samples_per_second": 33.089, |
|
"eval_steps_per_second": 4.302, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6074051856994629, |
|
"eval_runtime": 3.0229, |
|
"eval_samples_per_second": 33.081, |
|
"eval_steps_per_second": 4.3, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6060487627983093, |
|
"eval_runtime": 3.0206, |
|
"eval_samples_per_second": 33.106, |
|
"eval_steps_per_second": 4.304, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6253820061683655, |
|
"eval_runtime": 3.022, |
|
"eval_samples_per_second": 33.09, |
|
"eval_steps_per_second": 4.302, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6030685305595398, |
|
"eval_runtime": 3.022, |
|
"eval_samples_per_second": 33.091, |
|
"eval_steps_per_second": 4.302, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.6011382937431335, |
|
"eval_runtime": 3.0277, |
|
"eval_samples_per_second": 33.028, |
|
"eval_steps_per_second": 4.294, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6062633991241455, |
|
"eval_runtime": 3.0243, |
|
"eval_samples_per_second": 33.066, |
|
"eval_steps_per_second": 4.299, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6030519008636475, |
|
"eval_runtime": 3.0228, |
|
"eval_samples_per_second": 33.082, |
|
"eval_steps_per_second": 4.301, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00075, |
|
"loss": 0.6484, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6012805700302124, |
|
"eval_runtime": 3.023, |
|
"eval_samples_per_second": 33.08, |
|
"eval_steps_per_second": 4.3, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6041200160980225, |
|
"eval_runtime": 3.023, |
|
"eval_samples_per_second": 33.08, |
|
"eval_steps_per_second": 4.3, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.603689968585968, |
|
"eval_runtime": 3.0216, |
|
"eval_samples_per_second": 33.095, |
|
"eval_steps_per_second": 4.302, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6045952439308167, |
|
"eval_runtime": 3.0228, |
|
"eval_samples_per_second": 33.082, |
|
"eval_steps_per_second": 4.301, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.607201099395752, |
|
"eval_runtime": 3.0236, |
|
"eval_samples_per_second": 33.073, |
|
"eval_steps_per_second": 4.299, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.598048985004425, |
|
"eval_runtime": 3.0235, |
|
"eval_samples_per_second": 33.075, |
|
"eval_steps_per_second": 4.3, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6038631796836853, |
|
"eval_runtime": 3.0246, |
|
"eval_samples_per_second": 33.062, |
|
"eval_steps_per_second": 4.298, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6025145053863525, |
|
"eval_runtime": 3.0238, |
|
"eval_samples_per_second": 33.071, |
|
"eval_steps_per_second": 4.299, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6062153577804565, |
|
"eval_runtime": 3.0238, |
|
"eval_samples_per_second": 33.071, |
|
"eval_steps_per_second": 4.299, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6056156754493713, |
|
"eval_runtime": 3.0237, |
|
"eval_samples_per_second": 33.072, |
|
"eval_steps_per_second": 4.299, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.6091020703315735, |
|
"eval_runtime": 3.0256, |
|
"eval_samples_per_second": 33.051, |
|
"eval_steps_per_second": 4.297, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.603661060333252, |
|
"eval_runtime": 3.0236, |
|
"eval_samples_per_second": 33.073, |
|
"eval_steps_per_second": 4.3, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.6037007570266724, |
|
"eval_runtime": 3.0228, |
|
"eval_samples_per_second": 33.082, |
|
"eval_steps_per_second": 4.301, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6175075769424438, |
|
"eval_runtime": 3.0227, |
|
"eval_samples_per_second": 33.083, |
|
"eval_steps_per_second": 4.301, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.6089429259300232, |
|
"eval_runtime": 3.0249, |
|
"eval_samples_per_second": 33.058, |
|
"eval_steps_per_second": 4.298, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6076489090919495, |
|
"eval_runtime": 3.0243, |
|
"eval_samples_per_second": 33.065, |
|
"eval_steps_per_second": 4.299, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6072561740875244, |
|
"eval_runtime": 3.0248, |
|
"eval_samples_per_second": 33.06, |
|
"eval_steps_per_second": 4.298, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6059485673904419, |
|
"eval_runtime": 3.0219, |
|
"eval_samples_per_second": 33.091, |
|
"eval_steps_per_second": 4.302, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.6108887195587158, |
|
"eval_runtime": 3.0227, |
|
"eval_samples_per_second": 33.083, |
|
"eval_steps_per_second": 4.301, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6089987754821777, |
|
"eval_runtime": 3.0258, |
|
"eval_samples_per_second": 33.049, |
|
"eval_steps_per_second": 4.296, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.6362, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.608008086681366, |
|
"eval_runtime": 3.0289, |
|
"eval_samples_per_second": 33.016, |
|
"eval_steps_per_second": 4.292, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5993980169296265, |
|
"eval_runtime": 3.0219, |
|
"eval_samples_per_second": 33.092, |
|
"eval_steps_per_second": 4.302, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6034057140350342, |
|
"eval_runtime": 3.0231, |
|
"eval_samples_per_second": 33.079, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.6113384366035461, |
|
"eval_runtime": 3.0245, |
|
"eval_samples_per_second": 33.063, |
|
"eval_steps_per_second": 4.298, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6130579113960266, |
|
"eval_runtime": 3.0267, |
|
"eval_samples_per_second": 33.04, |
|
"eval_steps_per_second": 4.295, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.6150393486022949, |
|
"eval_runtime": 3.0244, |
|
"eval_samples_per_second": 33.065, |
|
"eval_steps_per_second": 4.298, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.6114516258239746, |
|
"eval_runtime": 3.0243, |
|
"eval_samples_per_second": 33.066, |
|
"eval_steps_per_second": 4.299, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6054720878601074, |
|
"eval_runtime": 3.0238, |
|
"eval_samples_per_second": 33.071, |
|
"eval_steps_per_second": 4.299, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6033183336257935, |
|
"eval_runtime": 3.0267, |
|
"eval_samples_per_second": 33.039, |
|
"eval_steps_per_second": 4.295, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6046847701072693, |
|
"eval_runtime": 3.0229, |
|
"eval_samples_per_second": 33.081, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6036849617958069, |
|
"eval_runtime": 3.0227, |
|
"eval_samples_per_second": 33.083, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.6010092496871948, |
|
"eval_runtime": 3.0225, |
|
"eval_samples_per_second": 33.086, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5987973213195801, |
|
"eval_runtime": 3.0233, |
|
"eval_samples_per_second": 33.077, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5991275906562805, |
|
"eval_runtime": 3.0229, |
|
"eval_samples_per_second": 33.081, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6018882989883423, |
|
"eval_runtime": 3.0234, |
|
"eval_samples_per_second": 33.076, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6002119183540344, |
|
"eval_runtime": 3.0225, |
|
"eval_samples_per_second": 33.085, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6006402373313904, |
|
"eval_runtime": 3.0226, |
|
"eval_samples_per_second": 33.084, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5991740226745605, |
|
"eval_runtime": 3.0248, |
|
"eval_samples_per_second": 33.06, |
|
"eval_steps_per_second": 4.298, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5991753935813904, |
|
"eval_runtime": 3.0228, |
|
"eval_samples_per_second": 33.082, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5992391109466553, |
|
"eval_runtime": 3.0225, |
|
"eval_samples_per_second": 33.085, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00025, |
|
"loss": 0.6341, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6026336550712585, |
|
"eval_runtime": 3.0215, |
|
"eval_samples_per_second": 33.096, |
|
"eval_steps_per_second": 4.302, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6022050976753235, |
|
"eval_runtime": 3.0236, |
|
"eval_samples_per_second": 33.074, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6025804281234741, |
|
"eval_runtime": 3.0231, |
|
"eval_samples_per_second": 33.078, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6036398410797119, |
|
"eval_runtime": 3.0233, |
|
"eval_samples_per_second": 33.077, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6039140224456787, |
|
"eval_runtime": 3.0225, |
|
"eval_samples_per_second": 33.085, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6040653586387634, |
|
"eval_runtime": 3.0224, |
|
"eval_samples_per_second": 33.086, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6034075021743774, |
|
"eval_runtime": 3.0223, |
|
"eval_samples_per_second": 33.088, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6048703789710999, |
|
"eval_runtime": 3.0235, |
|
"eval_samples_per_second": 33.074, |
|
"eval_steps_per_second": 4.3, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6027147769927979, |
|
"eval_runtime": 3.0195, |
|
"eval_samples_per_second": 33.118, |
|
"eval_steps_per_second": 4.305, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6057494878768921, |
|
"eval_runtime": 3.024, |
|
"eval_samples_per_second": 33.069, |
|
"eval_steps_per_second": 4.299, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6056165099143982, |
|
"eval_runtime": 3.021, |
|
"eval_samples_per_second": 33.102, |
|
"eval_steps_per_second": 4.303, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6048298478126526, |
|
"eval_runtime": 3.0253, |
|
"eval_samples_per_second": 33.054, |
|
"eval_steps_per_second": 4.297, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6018526554107666, |
|
"eval_runtime": 3.0215, |
|
"eval_samples_per_second": 33.096, |
|
"eval_steps_per_second": 4.302, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6020650267601013, |
|
"eval_runtime": 3.0323, |
|
"eval_samples_per_second": 32.978, |
|
"eval_steps_per_second": 4.287, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6018134951591492, |
|
"eval_runtime": 3.0272, |
|
"eval_samples_per_second": 33.034, |
|
"eval_steps_per_second": 4.294, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6026907563209534, |
|
"eval_runtime": 3.021, |
|
"eval_samples_per_second": 33.102, |
|
"eval_steps_per_second": 4.303, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6025264263153076, |
|
"eval_runtime": 3.0229, |
|
"eval_samples_per_second": 33.081, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6021308898925781, |
|
"eval_runtime": 3.0228, |
|
"eval_samples_per_second": 33.082, |
|
"eval_steps_per_second": 4.301, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6022512316703796, |
|
"eval_runtime": 3.0219, |
|
"eval_samples_per_second": 33.092, |
|
"eval_steps_per_second": 4.302, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6024240255355835, |
|
"eval_runtime": 3.0204, |
|
"eval_samples_per_second": 33.109, |
|
"eval_steps_per_second": 4.304, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.626, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6023900508880615, |
|
"eval_runtime": 3.0377, |
|
"eval_samples_per_second": 32.92, |
|
"eval_steps_per_second": 4.28, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.6361599731445312, |
|
"train_runtime": 1616.4107, |
|
"train_samples_per_second": 19.797, |
|
"train_steps_per_second": 1.237 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|