{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.65, "eval_loss": 0.6077527403831482, "eval_runtime": 2.8957, "eval_samples_per_second": 34.534, "eval_steps_per_second": 4.489, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.65, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.66, "eval_loss": 0.5962929725646973, "eval_runtime": 2.9223, "eval_samples_per_second": 34.22, "eval_steps_per_second": 4.449, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.65, "eval_loss": 0.6125411987304688, "eval_runtime": 2.9502, "eval_samples_per_second": 33.896, "eval_steps_per_second": 4.407, "step": 75 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.66, "eval_loss": 0.6042141914367676, "eval_runtime": 2.9786, "eval_samples_per_second": 33.573, "eval_steps_per_second": 4.364, "step": 100 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.66, "eval_loss": 0.6065436005592346, "eval_runtime": 2.9992, "eval_samples_per_second": 33.342, "eval_steps_per_second": 4.334, "step": 125 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.65, "eval_loss": 0.6019887328147888, "eval_runtime": 3.0098, "eval_samples_per_second": 33.225, "eval_steps_per_second": 4.319, "step": 150 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.65, "eval_loss": 0.598693311214447, "eval_runtime": 3.016, "eval_samples_per_second": 33.157, "eval_steps_per_second": 4.31, "step": 175 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.66, "eval_loss": 0.6016180515289307, "eval_runtime": 3.0167, "eval_samples_per_second": 33.149, "eval_steps_per_second": 4.309, "step": 200 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.66, "eval_loss": 0.6065845489501953, "eval_runtime": 3.0194, "eval_samples_per_second": 33.119, "eval_steps_per_second": 4.305, "step": 225 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.66, "eval_loss": 0.6111646294593811, "eval_runtime": 3.0214, "eval_samples_per_second": 33.097, "eval_steps_per_second": 4.303, "step": 250 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.66, "eval_loss": 0.608534574508667, "eval_runtime": 3.0216, "eval_samples_per_second": 33.095, "eval_steps_per_second": 4.302, "step": 275 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.66, "eval_loss": 0.5975815057754517, "eval_runtime": 3.0222, "eval_samples_per_second": 33.089, "eval_steps_per_second": 4.302, "step": 300 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.66, "eval_loss": 0.6074051856994629, "eval_runtime": 3.0229, "eval_samples_per_second": 33.081, "eval_steps_per_second": 4.3, "step": 325 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.65, "eval_loss": 0.6060487627983093, "eval_runtime": 3.0206, "eval_samples_per_second": 33.106, "eval_steps_per_second": 4.304, "step": 350 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.65, "eval_loss": 0.6253820061683655, "eval_runtime": 3.022, "eval_samples_per_second": 33.09, "eval_steps_per_second": 4.302, "step": 375 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.66, "eval_loss": 0.6030685305595398, "eval_runtime": 3.022, "eval_samples_per_second": 33.091, "eval_steps_per_second": 4.302, "step": 400 }, { "best_epoch": 1, "best_eval_accuracy": 0.66, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.67, "eval_loss": 0.6011382937431335, "eval_runtime": 3.0277, "eval_samples_per_second": 33.028, "eval_steps_per_second": 4.294, "step": 425 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.66, "eval_loss": 0.6062633991241455, "eval_runtime": 3.0243, "eval_samples_per_second": 33.066, "eval_steps_per_second": 4.299, "step": 450 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.65, "eval_loss": 0.6030519008636475, "eval_runtime": 3.0228, "eval_samples_per_second": 33.082, "eval_steps_per_second": 4.301, "step": 475 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.00075, "loss": 0.6484, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.65, "eval_loss": 0.6012805700302124, "eval_runtime": 3.023, "eval_samples_per_second": 33.08, "eval_steps_per_second": 4.3, "step": 500 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.65, "eval_loss": 0.6041200160980225, "eval_runtime": 3.023, "eval_samples_per_second": 33.08, "eval_steps_per_second": 4.3, "step": 525 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.65, "eval_loss": 0.603689968585968, "eval_runtime": 3.0216, "eval_samples_per_second": 33.095, "eval_steps_per_second": 4.302, "step": 550 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.65, "eval_loss": 0.6045952439308167, "eval_runtime": 3.0228, "eval_samples_per_second": 33.082, "eval_steps_per_second": 4.301, "step": 575 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.66, "eval_loss": 0.607201099395752, "eval_runtime": 3.0236, "eval_samples_per_second": 33.073, "eval_steps_per_second": 4.299, "step": 600 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.66, "eval_loss": 0.598048985004425, "eval_runtime": 3.0235, "eval_samples_per_second": 33.075, "eval_steps_per_second": 4.3, "step": 625 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.64, "eval_loss": 0.6038631796836853, "eval_runtime": 3.0246, "eval_samples_per_second": 33.062, "eval_steps_per_second": 4.298, "step": 650 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.65, "eval_loss": 0.6025145053863525, "eval_runtime": 3.0238, "eval_samples_per_second": 33.071, "eval_steps_per_second": 4.299, "step": 675 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.65, "eval_loss": 0.6062153577804565, "eval_runtime": 3.0238, "eval_samples_per_second": 33.071, "eval_steps_per_second": 4.299, "step": 700 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.64, "eval_loss": 0.6056156754493713, "eval_runtime": 3.0237, "eval_samples_per_second": 33.072, "eval_steps_per_second": 4.299, "step": 725 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.61, "eval_loss": 0.6091020703315735, "eval_runtime": 3.0256, "eval_samples_per_second": 33.051, "eval_steps_per_second": 4.297, "step": 750 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.65, "eval_loss": 0.603661060333252, "eval_runtime": 3.0236, "eval_samples_per_second": 33.073, "eval_steps_per_second": 4.3, "step": 775 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.63, "eval_loss": 0.6037007570266724, "eval_runtime": 3.0228, "eval_samples_per_second": 33.082, "eval_steps_per_second": 4.301, "step": 800 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.64, "eval_loss": 0.6175075769424438, "eval_runtime": 3.0227, "eval_samples_per_second": 33.083, "eval_steps_per_second": 4.301, "step": 825 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.62, "eval_loss": 0.6089429259300232, "eval_runtime": 3.0249, "eval_samples_per_second": 33.058, "eval_steps_per_second": 4.298, "step": 850 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.64, "eval_loss": 0.6076489090919495, "eval_runtime": 3.0243, "eval_samples_per_second": 33.065, "eval_steps_per_second": 4.299, "step": 875 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.64, "eval_loss": 0.6072561740875244, "eval_runtime": 3.0248, "eval_samples_per_second": 33.06, "eval_steps_per_second": 4.298, "step": 900 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.64, "eval_loss": 0.6059485673904419, "eval_runtime": 3.0219, "eval_samples_per_second": 33.091, "eval_steps_per_second": 4.302, "step": 925 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.63, "eval_loss": 0.6108887195587158, "eval_runtime": 3.0227, "eval_samples_per_second": 33.083, "eval_steps_per_second": 4.301, "step": 950 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.64, "eval_loss": 0.6089987754821777, "eval_runtime": 3.0258, "eval_samples_per_second": 33.049, "eval_steps_per_second": 4.296, "step": 975 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.0005, "loss": 0.6362, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.64, "eval_loss": 0.608008086681366, "eval_runtime": 3.0289, "eval_samples_per_second": 33.016, "eval_steps_per_second": 4.292, "step": 1000 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.64, "eval_loss": 0.5993980169296265, "eval_runtime": 3.0219, "eval_samples_per_second": 33.092, "eval_steps_per_second": 4.302, "step": 1025 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.64, "eval_loss": 0.6034057140350342, "eval_runtime": 3.0231, "eval_samples_per_second": 33.079, "eval_steps_per_second": 4.3, "step": 1050 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.6, "eval_loss": 0.6113384366035461, "eval_runtime": 3.0245, "eval_samples_per_second": 33.063, "eval_steps_per_second": 4.298, "step": 1075 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.64, "eval_loss": 0.6130579113960266, "eval_runtime": 3.0267, "eval_samples_per_second": 33.04, "eval_steps_per_second": 4.295, "step": 1100 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.61, "eval_loss": 0.6150393486022949, "eval_runtime": 3.0244, "eval_samples_per_second": 33.065, "eval_steps_per_second": 4.298, "step": 1125 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.63, "eval_loss": 0.6114516258239746, "eval_runtime": 3.0243, "eval_samples_per_second": 33.066, "eval_steps_per_second": 4.299, "step": 1150 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.64, "eval_loss": 0.6054720878601074, "eval_runtime": 3.0238, "eval_samples_per_second": 33.071, "eval_steps_per_second": 4.299, "step": 1175 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.64, "eval_loss": 0.6033183336257935, "eval_runtime": 3.0267, "eval_samples_per_second": 33.039, "eval_steps_per_second": 4.295, "step": 1200 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.64, "eval_loss": 0.6046847701072693, "eval_runtime": 3.0229, "eval_samples_per_second": 33.081, "eval_steps_per_second": 4.301, "step": 1225 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.64, "eval_loss": 0.6036849617958069, "eval_runtime": 3.0227, "eval_samples_per_second": 33.083, "eval_steps_per_second": 4.301, "step": 1250 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.63, "eval_loss": 0.6010092496871948, "eval_runtime": 3.0225, "eval_samples_per_second": 33.086, "eval_steps_per_second": 4.301, "step": 1275 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.64, "eval_loss": 0.5987973213195801, "eval_runtime": 3.0233, "eval_samples_per_second": 33.077, "eval_steps_per_second": 4.3, "step": 1300 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.64, "eval_loss": 0.5991275906562805, "eval_runtime": 3.0229, "eval_samples_per_second": 33.081, "eval_steps_per_second": 4.3, "step": 1325 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.64, "eval_loss": 0.6018882989883423, "eval_runtime": 3.0234, "eval_samples_per_second": 33.076, "eval_steps_per_second": 4.3, "step": 1350 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.64, "eval_loss": 0.6002119183540344, "eval_runtime": 3.0225, "eval_samples_per_second": 33.085, "eval_steps_per_second": 4.301, "step": 1375 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.64, "eval_loss": 0.6006402373313904, "eval_runtime": 3.0226, "eval_samples_per_second": 33.084, "eval_steps_per_second": 4.301, "step": 1400 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.63, "eval_loss": 0.5991740226745605, "eval_runtime": 3.0248, "eval_samples_per_second": 33.06, "eval_steps_per_second": 4.298, "step": 1425 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.63, "eval_loss": 0.5991753935813904, "eval_runtime": 3.0228, "eval_samples_per_second": 33.082, "eval_steps_per_second": 4.301, "step": 1450 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.64, "eval_loss": 0.5992391109466553, "eval_runtime": 3.0225, "eval_samples_per_second": 33.085, "eval_steps_per_second": 4.301, "step": 1475 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.00025, "loss": 0.6341, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.64, "eval_loss": 0.6026336550712585, "eval_runtime": 3.0215, "eval_samples_per_second": 33.096, "eval_steps_per_second": 4.302, "step": 1500 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.64, "eval_loss": 0.6022050976753235, "eval_runtime": 3.0236, "eval_samples_per_second": 33.074, "eval_steps_per_second": 4.3, "step": 1525 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.64, "eval_loss": 0.6025804281234741, "eval_runtime": 3.0231, "eval_samples_per_second": 33.078, "eval_steps_per_second": 4.3, "step": 1550 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.64, "eval_loss": 0.6036398410797119, "eval_runtime": 3.0233, "eval_samples_per_second": 33.077, "eval_steps_per_second": 4.3, "step": 1575 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.64, "eval_loss": 0.6039140224456787, "eval_runtime": 3.0225, "eval_samples_per_second": 33.085, "eval_steps_per_second": 4.301, "step": 1600 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.64, "eval_loss": 0.6040653586387634, "eval_runtime": 3.0224, "eval_samples_per_second": 33.086, "eval_steps_per_second": 4.301, "step": 1625 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.64, "eval_loss": 0.6034075021743774, "eval_runtime": 3.0223, "eval_samples_per_second": 33.088, "eval_steps_per_second": 4.301, "step": 1650 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.64, "eval_loss": 0.6048703789710999, "eval_runtime": 3.0235, "eval_samples_per_second": 33.074, "eval_steps_per_second": 4.3, "step": 1675 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.64, "eval_loss": 0.6027147769927979, "eval_runtime": 3.0195, "eval_samples_per_second": 33.118, "eval_steps_per_second": 4.305, "step": 1700 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.64, "eval_loss": 0.6057494878768921, "eval_runtime": 3.024, "eval_samples_per_second": 33.069, "eval_steps_per_second": 4.299, "step": 1725 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.64, "eval_loss": 0.6056165099143982, "eval_runtime": 3.021, "eval_samples_per_second": 33.102, "eval_steps_per_second": 4.303, "step": 1750 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.64, "eval_loss": 0.6048298478126526, "eval_runtime": 3.0253, "eval_samples_per_second": 33.054, "eval_steps_per_second": 4.297, "step": 1775 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.64, "eval_loss": 0.6018526554107666, "eval_runtime": 3.0215, "eval_samples_per_second": 33.096, "eval_steps_per_second": 4.302, "step": 1800 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.64, "eval_loss": 0.6020650267601013, "eval_runtime": 3.0323, "eval_samples_per_second": 32.978, "eval_steps_per_second": 4.287, "step": 1825 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.64, "eval_loss": 0.6018134951591492, "eval_runtime": 3.0272, "eval_samples_per_second": 33.034, "eval_steps_per_second": 4.294, "step": 1850 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.64, "eval_loss": 0.6026907563209534, "eval_runtime": 3.021, "eval_samples_per_second": 33.102, "eval_steps_per_second": 4.303, "step": 1875 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.64, "eval_loss": 0.6025264263153076, "eval_runtime": 3.0229, "eval_samples_per_second": 33.081, "eval_steps_per_second": 4.301, "step": 1900 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.64, "eval_loss": 0.6021308898925781, "eval_runtime": 3.0228, "eval_samples_per_second": 33.082, "eval_steps_per_second": 4.301, "step": 1925 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.64, "eval_loss": 0.6022512316703796, "eval_runtime": 3.0219, "eval_samples_per_second": 33.092, "eval_steps_per_second": 4.302, "step": 1950 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.64, "eval_loss": 0.6024240255355835, "eval_runtime": 3.0204, "eval_samples_per_second": 33.109, "eval_steps_per_second": 4.304, "step": 1975 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.626, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.64, "eval_loss": 0.6023900508880615, "eval_runtime": 3.0377, "eval_samples_per_second": 32.92, "eval_steps_per_second": 4.28, "step": 2000 }, { "best_epoch": 16, "best_eval_accuracy": 0.67, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.6361599731445312, "train_runtime": 1616.4107, "train_samples_per_second": 19.797, "train_steps_per_second": 1.237 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }