{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 18720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.5077166557312012, "eval_runtime": 4.2532, "eval_samples_per_second": 65.127, "eval_steps_per_second": 8.229, "step": 312 }, { "best_epoch": 0, "best_eval_accuracy": 0.5306859205776173, "epoch": 1.0, "step": 312 }, { "epoch": 1.6, "learning_rate": 0.0019465811965811966, "loss": 0.4439, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.48736462093862815, "eval_loss": 0.3970734775066376, "eval_runtime": 4.3287, "eval_samples_per_second": 63.992, "eval_steps_per_second": 8.086, "step": 624 }, { "best_epoch": 0, "best_eval_accuracy": 0.5306859205776173, "epoch": 2.0, "step": 624 }, { "epoch": 3.0, "eval_accuracy": 0.5379061371841155, "eval_loss": 0.35738521814346313, "eval_runtime": 4.3459, "eval_samples_per_second": 63.739, "eval_steps_per_second": 8.054, "step": 936 }, { "best_epoch": 2, "best_eval_accuracy": 0.5379061371841155, "epoch": 3.0, "step": 936 }, { "epoch": 3.21, "learning_rate": 0.0018931623931623931, "loss": 0.4231, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5776173285198556, "eval_loss": 0.362491250038147, "eval_runtime": 4.3598, "eval_samples_per_second": 63.535, "eval_steps_per_second": 8.028, "step": 1248 }, { "best_epoch": 3, "best_eval_accuracy": 0.5776173285198556, "epoch": 4.0, "step": 1248 }, { "epoch": 4.81, "learning_rate": 0.0018397435897435897, "loss": 0.4071, "step": 1500 }, { "epoch": 5.0, "eval_accuracy": 0.5342960288808665, "eval_loss": 0.49368584156036377, "eval_runtime": 4.3603, "eval_samples_per_second": 63.527, "eval_steps_per_second": 8.027, "step": 1560 }, { "best_epoch": 3, "best_eval_accuracy": 0.5776173285198556, "epoch": 5.0, "step": 1560 }, { "epoch": 6.0, "eval_accuracy": 0.5667870036101083, "eval_loss": 0.3738144636154175, "eval_runtime": 4.3477, "eval_samples_per_second": 63.712, "eval_steps_per_second": 8.05, "step": 1872 }, { "best_epoch": 3, "best_eval_accuracy": 0.5776173285198556, "epoch": 6.0, "step": 1872 }, { "epoch": 6.41, "learning_rate": 0.0017863247863247865, "loss": 0.3956, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.4080715775489807, "eval_runtime": 4.3468, "eval_samples_per_second": 63.725, "eval_steps_per_second": 8.052, "step": 2184 }, { "best_epoch": 3, "best_eval_accuracy": 0.5776173285198556, "epoch": 7.0, "step": 2184 }, { "epoch": 8.0, "eval_accuracy": 0.6209386281588448, "eval_loss": 0.3385550379753113, "eval_runtime": 4.3506, "eval_samples_per_second": 63.67, "eval_steps_per_second": 8.045, "step": 2496 }, { "best_epoch": 7, "best_eval_accuracy": 0.6209386281588448, "epoch": 8.0, "step": 2496 }, { "epoch": 8.01, "learning_rate": 0.001732905982905983, "loss": 0.3905, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.4146560728549957, "eval_runtime": 4.3523, "eval_samples_per_second": 63.644, "eval_steps_per_second": 8.042, "step": 2808 }, { "best_epoch": 7, "best_eval_accuracy": 0.6209386281588448, "epoch": 9.0, "step": 2808 }, { "epoch": 9.62, "learning_rate": 0.0016794871794871796, "loss": 0.3888, "step": 3000 }, { "epoch": 10.0, "eval_accuracy": 0.6353790613718412, "eval_loss": 0.33526456356048584, "eval_runtime": 4.353, "eval_samples_per_second": 63.634, "eval_steps_per_second": 8.04, "step": 3120 }, { "best_epoch": 9, "best_eval_accuracy": 0.6353790613718412, "epoch": 10.0, "step": 3120 }, { "epoch": 11.0, "eval_accuracy": 0.628158844765343, "eval_loss": 0.3539533317089081, "eval_runtime": 4.3528, "eval_samples_per_second": 63.637, "eval_steps_per_second": 8.041, "step": 3432 }, { "best_epoch": 9, "best_eval_accuracy": 0.6353790613718412, "epoch": 11.0, "step": 3432 }, { "epoch": 11.22, "learning_rate": 0.0016260683760683761, "loss": 0.3992, "step": 3500 }, { "epoch": 12.0, "eval_accuracy": 0.5848375451263538, "eval_loss": 0.3453006446361542, "eval_runtime": 4.3589, "eval_samples_per_second": 63.547, "eval_steps_per_second": 8.029, "step": 3744 }, { "best_epoch": 9, "best_eval_accuracy": 0.6353790613718412, "epoch": 12.0, "step": 3744 }, { "epoch": 12.82, "learning_rate": 0.0015726495726495727, "loss": 0.372, "step": 4000 }, { "epoch": 13.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.32649099826812744, "eval_runtime": 4.3537, "eval_samples_per_second": 63.623, "eval_steps_per_second": 8.039, "step": 4056 }, { "best_epoch": 12, "best_eval_accuracy": 0.6895306859205776, "epoch": 13.0, "step": 4056 }, { "epoch": 14.0, "eval_accuracy": 0.6425992779783394, "eval_loss": 0.35752376914024353, "eval_runtime": 4.3547, "eval_samples_per_second": 63.609, "eval_steps_per_second": 8.037, "step": 4368 }, { "best_epoch": 12, "best_eval_accuracy": 0.6895306859205776, "epoch": 14.0, "step": 4368 }, { "epoch": 14.42, "learning_rate": 0.0015192307692307692, "loss": 0.3643, "step": 4500 }, { "epoch": 15.0, "eval_accuracy": 0.6498194945848376, "eval_loss": 0.3304148018360138, "eval_runtime": 4.3605, "eval_samples_per_second": 63.525, "eval_steps_per_second": 8.027, "step": 4680 }, { "best_epoch": 12, "best_eval_accuracy": 0.6895306859205776, "epoch": 15.0, "step": 4680 }, { "epoch": 16.0, "eval_accuracy": 0.6714801444043321, "eval_loss": 0.3632947504520416, "eval_runtime": 4.3555, "eval_samples_per_second": 63.598, "eval_steps_per_second": 8.036, "step": 4992 }, { "best_epoch": 12, "best_eval_accuracy": 0.6895306859205776, "epoch": 16.0, "step": 4992 }, { "epoch": 16.03, "learning_rate": 0.0014658119658119658, "loss": 0.3666, "step": 5000 }, { "epoch": 17.0, "eval_accuracy": 0.5342960288808665, "eval_loss": 0.5230220556259155, "eval_runtime": 4.3594, "eval_samples_per_second": 63.54, "eval_steps_per_second": 8.029, "step": 5304 }, { "best_epoch": 12, "best_eval_accuracy": 0.6895306859205776, "epoch": 17.0, "step": 5304 }, { "epoch": 17.63, "learning_rate": 0.0014123931623931626, "loss": 0.3517, "step": 5500 }, { "epoch": 18.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 0.33842334151268005, "eval_runtime": 4.3558, "eval_samples_per_second": 63.593, "eval_steps_per_second": 8.035, "step": 5616 }, { "best_epoch": 12, "best_eval_accuracy": 0.6895306859205776, "epoch": 18.0, "step": 5616 }, { "epoch": 19.0, "eval_accuracy": 0.6823104693140795, "eval_loss": 0.32931071519851685, "eval_runtime": 4.3567, "eval_samples_per_second": 63.58, "eval_steps_per_second": 8.034, "step": 5928 }, { "best_epoch": 12, "best_eval_accuracy": 0.6895306859205776, "epoch": 19.0, "step": 5928 }, { "epoch": 19.23, "learning_rate": 0.0013589743589743591, "loss": 0.3519, "step": 6000 }, { "epoch": 20.0, "eval_accuracy": 0.6823104693140795, "eval_loss": 0.36133021116256714, "eval_runtime": 4.354, "eval_samples_per_second": 63.62, "eval_steps_per_second": 8.039, "step": 6240 }, { "best_epoch": 12, "best_eval_accuracy": 0.6895306859205776, "epoch": 20.0, "step": 6240 }, { "epoch": 20.83, "learning_rate": 0.0013055555555555557, "loss": 0.338, "step": 6500 }, { "epoch": 21.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.3241924047470093, "eval_runtime": 4.3569, "eval_samples_per_second": 63.577, "eval_steps_per_second": 8.033, "step": 6552 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 21.0, "step": 6552 }, { "epoch": 22.0, "eval_accuracy": 0.7184115523465704, "eval_loss": 0.3399159610271454, "eval_runtime": 4.3574, "eval_samples_per_second": 63.57, "eval_steps_per_second": 8.032, "step": 6864 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 22.0, "step": 6864 }, { "epoch": 22.44, "learning_rate": 0.0012521367521367522, "loss": 0.3316, "step": 7000 }, { "epoch": 23.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.33920276165008545, "eval_runtime": 4.3578, "eval_samples_per_second": 63.565, "eval_steps_per_second": 8.032, "step": 7176 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 23.0, "step": 7176 }, { "epoch": 24.0, "eval_accuracy": 0.6534296028880866, "eval_loss": 0.33432814478874207, "eval_runtime": 4.3588, "eval_samples_per_second": 63.55, "eval_steps_per_second": 8.03, "step": 7488 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 24.0, "step": 7488 }, { "epoch": 24.04, "learning_rate": 0.0011987179487179488, "loss": 0.3266, "step": 7500 }, { "epoch": 25.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.3467292785644531, "eval_runtime": 4.3605, "eval_samples_per_second": 63.525, "eval_steps_per_second": 8.027, "step": 7800 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 25.0, "step": 7800 }, { "epoch": 25.64, "learning_rate": 0.0011452991452991453, "loss": 0.3213, "step": 8000 }, { "epoch": 26.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.3418598473072052, "eval_runtime": 4.3581, "eval_samples_per_second": 63.559, "eval_steps_per_second": 8.031, "step": 8112 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 26.0, "step": 8112 }, { "epoch": 27.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.3190039396286011, "eval_runtime": 4.3552, "eval_samples_per_second": 63.601, "eval_steps_per_second": 8.036, "step": 8424 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 27.0, "step": 8424 }, { "epoch": 27.24, "learning_rate": 0.001091880341880342, "loss": 0.3177, "step": 8500 }, { "epoch": 28.0, "eval_accuracy": 0.6931407942238267, "eval_loss": 0.32053741812705994, "eval_runtime": 4.3604, "eval_samples_per_second": 63.526, "eval_steps_per_second": 8.027, "step": 8736 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 28.0, "step": 8736 }, { "epoch": 28.85, "learning_rate": 0.0010384615384615387, "loss": 0.3187, "step": 9000 }, { "epoch": 29.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.3302537798881531, "eval_runtime": 4.3616, "eval_samples_per_second": 63.509, "eval_steps_per_second": 8.025, "step": 9048 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 29.0, "step": 9048 }, { "epoch": 30.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.32684004306793213, "eval_runtime": 4.3594, "eval_samples_per_second": 63.541, "eval_steps_per_second": 8.029, "step": 9360 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 30.0, "step": 9360 }, { "epoch": 30.45, "learning_rate": 0.0009850427350427352, "loss": 0.3162, "step": 9500 }, { "epoch": 31.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3273889720439911, "eval_runtime": 4.3611, "eval_samples_per_second": 63.516, "eval_steps_per_second": 8.026, "step": 9672 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 31.0, "step": 9672 }, { "epoch": 32.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.33114469051361084, "eval_runtime": 4.3586, "eval_samples_per_second": 63.552, "eval_steps_per_second": 8.03, "step": 9984 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 32.0, "step": 9984 }, { "epoch": 32.05, "learning_rate": 0.0009316239316239317, "loss": 0.3132, "step": 10000 }, { "epoch": 33.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.345443457365036, "eval_runtime": 4.3573, "eval_samples_per_second": 63.572, "eval_steps_per_second": 8.033, "step": 10296 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 33.0, "step": 10296 }, { "epoch": 33.65, "learning_rate": 0.0008782051282051282, "loss": 0.3087, "step": 10500 }, { "epoch": 34.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.32495415210723877, "eval_runtime": 4.3587, "eval_samples_per_second": 63.551, "eval_steps_per_second": 8.03, "step": 10608 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 34.0, "step": 10608 }, { "epoch": 35.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.3265839219093323, "eval_runtime": 4.3556, "eval_samples_per_second": 63.597, "eval_steps_per_second": 8.036, "step": 10920 }, { "best_epoch": 20, "best_eval_accuracy": 0.7256317689530686, "epoch": 35.0, "step": 10920 }, { "epoch": 35.26, "learning_rate": 0.0008247863247863248, "loss": 0.3076, "step": 11000 }, { "epoch": 36.0, "eval_accuracy": 0.7292418772563177, "eval_loss": 0.3346775770187378, "eval_runtime": 4.3524, "eval_samples_per_second": 63.643, "eval_steps_per_second": 8.042, "step": 11232 }, { "best_epoch": 35, "best_eval_accuracy": 0.7292418772563177, "epoch": 36.0, "step": 11232 }, { "epoch": 36.86, "learning_rate": 0.0007713675213675214, "loss": 0.3071, "step": 11500 }, { "epoch": 37.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.3307958245277405, "eval_runtime": 4.3598, "eval_samples_per_second": 63.535, "eval_steps_per_second": 8.028, "step": 11544 }, { "best_epoch": 35, "best_eval_accuracy": 0.7292418772563177, "epoch": 37.0, "step": 11544 }, { "epoch": 38.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.3272268772125244, "eval_runtime": 4.3546, "eval_samples_per_second": 63.611, "eval_steps_per_second": 8.038, "step": 11856 }, { "best_epoch": 35, "best_eval_accuracy": 0.7292418772563177, "epoch": 38.0, "step": 11856 }, { "epoch": 38.46, "learning_rate": 0.000717948717948718, "loss": 0.3061, "step": 12000 }, { "epoch": 39.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3301219046115875, "eval_runtime": 4.3561, "eval_samples_per_second": 63.589, "eval_steps_per_second": 8.035, "step": 12168 }, { "best_epoch": 35, "best_eval_accuracy": 0.7292418772563177, "epoch": 39.0, "step": 12168 }, { "epoch": 40.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.32260364294052124, "eval_runtime": 4.356, "eval_samples_per_second": 63.59, "eval_steps_per_second": 8.035, "step": 12480 }, { "best_epoch": 35, "best_eval_accuracy": 0.7292418772563177, "epoch": 40.0, "step": 12480 }, { "epoch": 40.06, "learning_rate": 0.0006645299145299145, "loss": 0.3006, "step": 12500 }, { "epoch": 41.0, "eval_accuracy": 0.7364620938628159, "eval_loss": 0.32847943902015686, "eval_runtime": 4.3592, "eval_samples_per_second": 63.544, "eval_steps_per_second": 8.029, "step": 12792 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 41.0, "step": 12792 }, { "epoch": 41.67, "learning_rate": 0.0006111111111111112, "loss": 0.3016, "step": 13000 }, { "epoch": 42.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3225715160369873, "eval_runtime": 4.3622, "eval_samples_per_second": 63.5, "eval_steps_per_second": 8.023, "step": 13104 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 42.0, "step": 13104 }, { "epoch": 43.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.3290694057941437, "eval_runtime": 4.3571, "eval_samples_per_second": 63.575, "eval_steps_per_second": 8.033, "step": 13416 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 43.0, "step": 13416 }, { "epoch": 43.27, "learning_rate": 0.0005576923076923078, "loss": 0.2984, "step": 13500 }, { "epoch": 44.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.33770960569381714, "eval_runtime": 4.3531, "eval_samples_per_second": 63.633, "eval_steps_per_second": 8.04, "step": 13728 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 44.0, "step": 13728 }, { "epoch": 44.87, "learning_rate": 0.0005042735042735043, "loss": 0.2976, "step": 14000 }, { "epoch": 45.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.3326423168182373, "eval_runtime": 4.3579, "eval_samples_per_second": 63.563, "eval_steps_per_second": 8.031, "step": 14040 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 45.0, "step": 14040 }, { "epoch": 46.0, "eval_accuracy": 0.7292418772563177, "eval_loss": 0.3340679407119751, "eval_runtime": 4.3576, "eval_samples_per_second": 63.568, "eval_steps_per_second": 8.032, "step": 14352 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 46.0, "step": 14352 }, { "epoch": 46.47, "learning_rate": 0.00045085470085470087, "loss": 0.2967, "step": 14500 }, { "epoch": 47.0, "eval_accuracy": 0.7184115523465704, "eval_loss": 0.3186676800251007, "eval_runtime": 4.3574, "eval_samples_per_second": 63.57, "eval_steps_per_second": 8.032, "step": 14664 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 47.0, "step": 14664 }, { "epoch": 48.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3322432041168213, "eval_runtime": 4.3552, "eval_samples_per_second": 63.602, "eval_steps_per_second": 8.036, "step": 14976 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 48.0, "step": 14976 }, { "epoch": 48.08, "learning_rate": 0.0003974358974358974, "loss": 0.2953, "step": 15000 }, { "epoch": 49.0, "eval_accuracy": 0.7364620938628159, "eval_loss": 0.32692331075668335, "eval_runtime": 4.3589, "eval_samples_per_second": 63.548, "eval_steps_per_second": 8.03, "step": 15288 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 49.0, "step": 15288 }, { "epoch": 49.68, "learning_rate": 0.00034401709401709403, "loss": 0.2911, "step": 15500 }, { "epoch": 50.0, "eval_accuracy": 0.7364620938628159, "eval_loss": 0.3256283104419708, "eval_runtime": 4.3551, "eval_samples_per_second": 63.604, "eval_steps_per_second": 8.037, "step": 15600 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 50.0, "step": 15600 }, { "epoch": 51.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.3252301812171936, "eval_runtime": 4.3543, "eval_samples_per_second": 63.615, "eval_steps_per_second": 8.038, "step": 15912 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 51.0, "step": 15912 }, { "epoch": 51.28, "learning_rate": 0.00029059829059829064, "loss": 0.2929, "step": 16000 }, { "epoch": 52.0, "eval_accuracy": 0.7292418772563177, "eval_loss": 0.32512250542640686, "eval_runtime": 4.3638, "eval_samples_per_second": 63.476, "eval_steps_per_second": 8.02, "step": 16224 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 52.0, "step": 16224 }, { "epoch": 52.88, "learning_rate": 0.00023717948717948717, "loss": 0.2904, "step": 16500 }, { "epoch": 53.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.32575854659080505, "eval_runtime": 4.3574, "eval_samples_per_second": 63.57, "eval_steps_per_second": 8.032, "step": 16536 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 53.0, "step": 16536 }, { "epoch": 54.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.3357574939727783, "eval_runtime": 4.3583, "eval_samples_per_second": 63.556, "eval_steps_per_second": 8.031, "step": 16848 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 54.0, "step": 16848 }, { "epoch": 54.49, "learning_rate": 0.00018376068376068378, "loss": 0.2895, "step": 17000 }, { "epoch": 55.0, "eval_accuracy": 0.7328519855595668, "eval_loss": 0.32194650173187256, "eval_runtime": 4.3566, "eval_samples_per_second": 63.581, "eval_steps_per_second": 8.034, "step": 17160 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 55.0, "step": 17160 }, { "epoch": 56.0, "eval_accuracy": 0.7328519855595668, "eval_loss": 0.3322417438030243, "eval_runtime": 4.356, "eval_samples_per_second": 63.591, "eval_steps_per_second": 8.035, "step": 17472 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 56.0, "step": 17472 }, { "epoch": 56.09, "learning_rate": 0.00013034188034188036, "loss": 0.2887, "step": 17500 }, { "epoch": 57.0, "eval_accuracy": 0.7364620938628159, "eval_loss": 0.325890451669693, "eval_runtime": 4.3537, "eval_samples_per_second": 63.624, "eval_steps_per_second": 8.039, "step": 17784 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 57.0, "step": 17784 }, { "epoch": 57.69, "learning_rate": 7.692307692307693e-05, "loss": 0.2883, "step": 18000 }, { "epoch": 58.0, "eval_accuracy": 0.7292418772563177, "eval_loss": 0.32602423429489136, "eval_runtime": 4.3569, "eval_samples_per_second": 63.578, "eval_steps_per_second": 8.033, "step": 18096 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 58.0, "step": 18096 }, { "epoch": 59.0, "eval_accuracy": 0.7364620938628159, "eval_loss": 0.3276124596595764, "eval_runtime": 4.3463, "eval_samples_per_second": 63.732, "eval_steps_per_second": 8.053, "step": 18408 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 59.0, "step": 18408 }, { "epoch": 59.29, "learning_rate": 2.3504273504273504e-05, "loss": 0.2874, "step": 18500 }, { "epoch": 60.0, "eval_accuracy": 0.7328519855595668, "eval_loss": 0.3288937509059906, "eval_runtime": 4.351, "eval_samples_per_second": 63.664, "eval_steps_per_second": 8.044, "step": 18720 }, { "best_epoch": 40, "best_eval_accuracy": 0.7364620938628159, "epoch": 60.0, "step": 18720 }, { "epoch": 60.0, "step": 18720, "total_flos": 6.96152728406016e+16, "train_loss": 0.33155403911557974, "train_runtime": 3997.9333, "train_samples_per_second": 37.369, "train_steps_per_second": 4.682 } ], "max_steps": 18720, "num_train_epochs": 60, "total_flos": 6.96152728406016e+16, "trial_name": null, "trial_params": null }