{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.49, "eval_loss": 0.5651425123214722, "eval_runtime": 2.9028, "eval_samples_per_second": 34.45, "eval_steps_per_second": 4.478, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.49, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.6, "eval_loss": 0.5092301368713379, "eval_runtime": 2.9462, "eval_samples_per_second": 33.943, "eval_steps_per_second": 4.413, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.6, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.63, "eval_loss": 0.5062299370765686, "eval_runtime": 2.9899, "eval_samples_per_second": 33.446, "eval_steps_per_second": 4.348, "step": 75 }, { "best_epoch": 2, "best_eval_accuracy": 0.63, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.64, "eval_loss": 0.4843701422214508, "eval_runtime": 3.0147, "eval_samples_per_second": 33.171, "eval_steps_per_second": 4.312, "step": 100 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.58, "eval_loss": 0.441400945186615, "eval_runtime": 3.0319, "eval_samples_per_second": 32.982, "eval_steps_per_second": 4.288, "step": 125 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.58, "eval_loss": 0.4265769124031067, "eval_runtime": 3.0555, "eval_samples_per_second": 32.728, "eval_steps_per_second": 4.255, "step": 150 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.62, "eval_loss": 0.4179702401161194, "eval_runtime": 3.0661, "eval_samples_per_second": 32.615, "eval_steps_per_second": 4.24, "step": 175 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.64, "eval_loss": 0.46249979734420776, "eval_runtime": 3.0777, "eval_samples_per_second": 32.492, "eval_steps_per_second": 4.224, "step": 200 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.61, "eval_loss": 0.41327133774757385, "eval_runtime": 3.0835, "eval_samples_per_second": 32.43, "eval_steps_per_second": 4.216, "step": 225 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.63, "eval_loss": 0.42823413014411926, "eval_runtime": 3.0882, "eval_samples_per_second": 32.382, "eval_steps_per_second": 4.21, "step": 250 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.57, "eval_loss": 0.41956833004951477, "eval_runtime": 3.0926, "eval_samples_per_second": 32.335, "eval_steps_per_second": 4.204, "step": 275 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.6, "eval_loss": 0.40657806396484375, "eval_runtime": 3.092, "eval_samples_per_second": 32.342, "eval_steps_per_second": 4.204, "step": 300 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.62, "eval_loss": 0.40092143416404724, "eval_runtime": 3.0926, "eval_samples_per_second": 32.336, "eval_steps_per_second": 4.204, "step": 325 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.63, "eval_loss": 0.39529332518577576, "eval_runtime": 3.0883, "eval_samples_per_second": 32.381, "eval_steps_per_second": 4.209, "step": 350 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.61, "eval_loss": 0.39530593156814575, "eval_runtime": 3.0882, "eval_samples_per_second": 32.382, "eval_steps_per_second": 4.21, "step": 375 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.64, "eval_loss": 0.4115433990955353, "eval_runtime": 3.085, "eval_samples_per_second": 32.415, "eval_steps_per_second": 4.214, "step": 400 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.6, "eval_loss": 0.38952964544296265, "eval_runtime": 3.0886, "eval_samples_per_second": 32.377, "eval_steps_per_second": 4.209, "step": 425 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.63, "eval_loss": 0.427374929189682, "eval_runtime": 3.0847, "eval_samples_per_second": 32.418, "eval_steps_per_second": 4.214, "step": 450 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.64, "eval_loss": 0.39974284172058105, "eval_runtime": 3.085, "eval_samples_per_second": 32.415, "eval_steps_per_second": 4.214, "step": 475 }, { "best_epoch": 3, "best_eval_accuracy": 0.64, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.0075, "loss": 0.6183, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.66, "eval_loss": 0.3965441584587097, "eval_runtime": 3.085, "eval_samples_per_second": 32.415, "eval_steps_per_second": 4.214, "step": 500 }, { "best_epoch": 19, "best_eval_accuracy": 0.66, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.68, "eval_loss": 0.43516531586647034, "eval_runtime": 3.0853, "eval_samples_per_second": 32.412, "eval_steps_per_second": 4.214, "step": 525 }, { "best_epoch": 20, "best_eval_accuracy": 0.68, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.69, "eval_loss": 0.4253265857696533, "eval_runtime": 3.0836, "eval_samples_per_second": 32.43, "eval_steps_per_second": 4.216, "step": 550 }, { "best_epoch": 21, "best_eval_accuracy": 0.69, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.66, "eval_loss": 0.3890969753265381, "eval_runtime": 3.0862, "eval_samples_per_second": 32.402, "eval_steps_per_second": 4.212, "step": 575 }, { "best_epoch": 21, "best_eval_accuracy": 0.69, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.69, "eval_loss": 0.4323919415473938, "eval_runtime": 3.0876, "eval_samples_per_second": 32.387, "eval_steps_per_second": 4.21, "step": 600 }, { "best_epoch": 21, "best_eval_accuracy": 0.69, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.73, "eval_loss": 0.4395662248134613, "eval_runtime": 3.0865, "eval_samples_per_second": 32.399, "eval_steps_per_second": 4.212, "step": 625 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.68, "eval_loss": 0.43163925409317017, "eval_runtime": 3.0856, "eval_samples_per_second": 32.408, "eval_steps_per_second": 4.213, "step": 650 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.67, "eval_loss": 0.39514267444610596, "eval_runtime": 3.0856, "eval_samples_per_second": 32.408, "eval_steps_per_second": 4.213, "step": 675 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.68, "eval_loss": 0.4021581709384918, "eval_runtime": 3.0862, "eval_samples_per_second": 32.402, "eval_steps_per_second": 4.212, "step": 700 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.68, "eval_loss": 0.42091983556747437, "eval_runtime": 3.0882, "eval_samples_per_second": 32.382, "eval_steps_per_second": 4.21, "step": 725 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.7, "eval_loss": 0.4499932527542114, "eval_runtime": 3.0879, "eval_samples_per_second": 32.385, "eval_steps_per_second": 4.21, "step": 750 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.71, "eval_loss": 0.4072466194629669, "eval_runtime": 3.0878, "eval_samples_per_second": 32.385, "eval_steps_per_second": 4.21, "step": 775 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.7, "eval_loss": 0.40178433060646057, "eval_runtime": 3.0877, "eval_samples_per_second": 32.387, "eval_steps_per_second": 4.21, "step": 800 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.7, "eval_loss": 0.41905662417411804, "eval_runtime": 3.089, "eval_samples_per_second": 32.373, "eval_steps_per_second": 4.208, "step": 825 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.71, "eval_loss": 0.3970845937728882, "eval_runtime": 3.0867, "eval_samples_per_second": 32.397, "eval_steps_per_second": 4.212, "step": 850 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.7, "eval_loss": 0.39993271231651306, "eval_runtime": 3.088, "eval_samples_per_second": 32.383, "eval_steps_per_second": 4.21, "step": 875 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.71, "eval_loss": 0.40247154235839844, "eval_runtime": 3.0865, "eval_samples_per_second": 32.399, "eval_steps_per_second": 4.212, "step": 900 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.71, "eval_loss": 0.4090607762336731, "eval_runtime": 3.0867, "eval_samples_per_second": 32.397, "eval_steps_per_second": 4.212, "step": 925 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.72, "eval_loss": 0.4060271084308624, "eval_runtime": 3.0865, "eval_samples_per_second": 32.399, "eval_steps_per_second": 4.212, "step": 950 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.71, "eval_loss": 0.4415541887283325, "eval_runtime": 3.089, "eval_samples_per_second": 32.373, "eval_steps_per_second": 4.208, "step": 975 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.005, "loss": 0.4716, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.71, "eval_loss": 0.404102623462677, "eval_runtime": 3.0888, "eval_samples_per_second": 32.375, "eval_steps_per_second": 4.209, "step": 1000 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.72, "eval_loss": 0.4100326895713806, "eval_runtime": 3.0875, "eval_samples_per_second": 32.389, "eval_steps_per_second": 4.211, "step": 1025 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.73, "eval_loss": 0.40424102544784546, "eval_runtime": 3.0901, "eval_samples_per_second": 32.362, "eval_steps_per_second": 4.207, "step": 1050 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.71, "eval_loss": 0.3743560314178467, "eval_runtime": 3.0886, "eval_samples_per_second": 32.377, "eval_steps_per_second": 4.209, "step": 1075 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.71, "eval_loss": 0.3826563358306885, "eval_runtime": 3.089, "eval_samples_per_second": 32.373, "eval_steps_per_second": 4.209, "step": 1100 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.71, "eval_loss": 0.39410629868507385, "eval_runtime": 3.0915, "eval_samples_per_second": 32.346, "eval_steps_per_second": 4.205, "step": 1125 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.73, "eval_loss": 0.430500864982605, "eval_runtime": 3.0889, "eval_samples_per_second": 32.373, "eval_steps_per_second": 4.209, "step": 1150 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.73, "eval_loss": 0.40077340602874756, "eval_runtime": 3.0873, "eval_samples_per_second": 32.39, "eval_steps_per_second": 4.211, "step": 1175 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.73, "eval_loss": 0.40273621678352356, "eval_runtime": 3.0892, "eval_samples_per_second": 32.371, "eval_steps_per_second": 4.208, "step": 1200 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.72, "eval_loss": 0.4023691415786743, "eval_runtime": 3.0893, "eval_samples_per_second": 32.369, "eval_steps_per_second": 4.208, "step": 1225 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.72, "eval_loss": 0.3937583267688751, "eval_runtime": 3.0877, "eval_samples_per_second": 32.387, "eval_steps_per_second": 4.21, "step": 1250 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.73, "eval_loss": 0.38425591588020325, "eval_runtime": 3.0873, "eval_samples_per_second": 32.391, "eval_steps_per_second": 4.211, "step": 1275 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.73, "eval_loss": 0.3911306858062744, "eval_runtime": 3.0857, "eval_samples_per_second": 32.407, "eval_steps_per_second": 4.213, "step": 1300 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.73, "eval_loss": 0.3854801654815674, "eval_runtime": 3.0878, "eval_samples_per_second": 32.385, "eval_steps_per_second": 4.21, "step": 1325 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.72, "eval_loss": 0.3934061527252197, "eval_runtime": 3.0865, "eval_samples_per_second": 32.399, "eval_steps_per_second": 4.212, "step": 1350 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.73, "eval_loss": 0.40291014313697815, "eval_runtime": 3.0864, "eval_samples_per_second": 32.401, "eval_steps_per_second": 4.212, "step": 1375 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.73, "eval_loss": 0.38784506916999817, "eval_runtime": 3.0835, "eval_samples_per_second": 32.43, "eval_steps_per_second": 4.216, "step": 1400 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.72, "eval_loss": 0.3839341104030609, "eval_runtime": 3.0855, "eval_samples_per_second": 32.41, "eval_steps_per_second": 4.213, "step": 1425 }, { "best_epoch": 24, "best_eval_accuracy": 0.73, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.75, "eval_loss": 0.3942648768424988, "eval_runtime": 3.0854, "eval_samples_per_second": 32.411, "eval_steps_per_second": 4.213, "step": 1450 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.74, "eval_loss": 0.3984474539756775, "eval_runtime": 3.0848, "eval_samples_per_second": 32.417, "eval_steps_per_second": 4.214, "step": 1475 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.0025, "loss": 0.4121, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.71, "eval_loss": 0.4064193069934845, "eval_runtime": 3.0855, "eval_samples_per_second": 32.41, "eval_steps_per_second": 4.213, "step": 1500 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.72, "eval_loss": 0.3871249258518219, "eval_runtime": 3.0854, "eval_samples_per_second": 32.411, "eval_steps_per_second": 4.213, "step": 1525 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.73, "eval_loss": 0.41410523653030396, "eval_runtime": 3.0839, "eval_samples_per_second": 32.427, "eval_steps_per_second": 4.215, "step": 1550 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.72, "eval_loss": 0.38500529527664185, "eval_runtime": 3.0874, "eval_samples_per_second": 32.39, "eval_steps_per_second": 4.211, "step": 1575 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.73, "eval_loss": 0.3933054804801941, "eval_runtime": 3.0858, "eval_samples_per_second": 32.406, "eval_steps_per_second": 4.213, "step": 1600 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.72, "eval_loss": 0.40550005435943604, "eval_runtime": 3.0847, "eval_samples_per_second": 32.418, "eval_steps_per_second": 4.214, "step": 1625 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.72, "eval_loss": 0.38515716791152954, "eval_runtime": 3.084, "eval_samples_per_second": 32.425, "eval_steps_per_second": 4.215, "step": 1650 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.73, "eval_loss": 0.39516574144363403, "eval_runtime": 3.0876, "eval_samples_per_second": 32.387, "eval_steps_per_second": 4.21, "step": 1675 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.72, "eval_loss": 0.38739094138145447, "eval_runtime": 3.0845, "eval_samples_per_second": 32.42, "eval_steps_per_second": 4.215, "step": 1700 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.72, "eval_loss": 0.3999227285385132, "eval_runtime": 3.0852, "eval_samples_per_second": 32.413, "eval_steps_per_second": 4.214, "step": 1725 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.72, "eval_loss": 0.39561355113983154, "eval_runtime": 3.0845, "eval_samples_per_second": 32.42, "eval_steps_per_second": 4.215, "step": 1750 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.72, "eval_loss": 0.39178183674812317, "eval_runtime": 3.0849, "eval_samples_per_second": 32.416, "eval_steps_per_second": 4.214, "step": 1775 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.72, "eval_loss": 0.38587287068367004, "eval_runtime": 3.0854, "eval_samples_per_second": 32.411, "eval_steps_per_second": 4.213, "step": 1800 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.72, "eval_loss": 0.39257708191871643, "eval_runtime": 3.0824, "eval_samples_per_second": 32.443, "eval_steps_per_second": 4.218, "step": 1825 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.72, "eval_loss": 0.38968443870544434, "eval_runtime": 3.084, "eval_samples_per_second": 32.425, "eval_steps_per_second": 4.215, "step": 1850 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.72, "eval_loss": 0.3859497010707855, "eval_runtime": 3.0819, "eval_samples_per_second": 32.447, "eval_steps_per_second": 4.218, "step": 1875 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.72, "eval_loss": 0.38490238785743713, "eval_runtime": 3.083, "eval_samples_per_second": 32.436, "eval_steps_per_second": 4.217, "step": 1900 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.72, "eval_loss": 0.3855893313884735, "eval_runtime": 3.0824, "eval_samples_per_second": 32.442, "eval_steps_per_second": 4.217, "step": 1925 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.72, "eval_loss": 0.3901687562465668, "eval_runtime": 3.0824, "eval_samples_per_second": 32.442, "eval_steps_per_second": 4.217, "step": 1950 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.72, "eval_loss": 0.3903580904006958, "eval_runtime": 3.0842, "eval_samples_per_second": 32.423, "eval_steps_per_second": 4.215, "step": 1975 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.3881, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.72, "eval_loss": 0.3895449936389923, "eval_runtime": 3.0918, "eval_samples_per_second": 32.344, "eval_steps_per_second": 4.205, "step": 2000 }, { "best_epoch": 57, "best_eval_accuracy": 0.75, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.47253459167480466, "train_runtime": 1645.8876, "train_samples_per_second": 19.442, "train_steps_per_second": 1.215 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }