{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 18720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5234657039711191, "eval_loss": 0.5883553624153137, "eval_runtime": 8.7854, "eval_samples_per_second": 31.53, "eval_steps_per_second": 3.984, "step": 312 }, { "best_epoch": 0, "best_eval_accuracy": 0.5234657039711191, "epoch": 1.0, "step": 312 }, { "epoch": 1.6, "learning_rate": 0.004866452991452991, "loss": 0.6001, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.4145338237285614, "eval_runtime": 8.8107, "eval_samples_per_second": 31.439, "eval_steps_per_second": 3.972, "step": 624 }, { "best_epoch": 0, "best_eval_accuracy": 0.5234657039711191, "epoch": 2.0, "step": 624 }, { "epoch": 3.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.6337469816207886, "eval_runtime": 8.7248, "eval_samples_per_second": 31.748, "eval_steps_per_second": 4.012, "step": 936 }, { "best_epoch": 0, "best_eval_accuracy": 0.5234657039711191, "epoch": 3.0, "step": 936 }, { "epoch": 3.21, "learning_rate": 0.004732905982905983, "loss": 0.5343, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.48375451263537905, "eval_loss": 0.3934288024902344, "eval_runtime": 8.7605, "eval_samples_per_second": 31.619, "eval_steps_per_second": 3.995, "step": 1248 }, { "best_epoch": 0, "best_eval_accuracy": 0.5234657039711191, "epoch": 4.0, "step": 1248 }, { "epoch": 4.81, "learning_rate": 0.004599358974358974, "loss": 0.5255, "step": 1500 }, { "epoch": 5.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.566152811050415, "eval_runtime": 8.8523, "eval_samples_per_second": 31.291, "eval_steps_per_second": 3.954, "step": 1560 }, { "best_epoch": 0, "best_eval_accuracy": 0.5234657039711191, "epoch": 5.0, "step": 1560 }, { "epoch": 6.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.5157924890518188, "eval_runtime": 8.9667, "eval_samples_per_second": 30.892, "eval_steps_per_second": 3.903, "step": 1872 }, { "best_epoch": 5, "best_eval_accuracy": 0.5270758122743683, "epoch": 6.0, "step": 1872 }, { "epoch": 6.41, "learning_rate": 0.004465811965811966, "loss": 0.504, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.5342960288808665, "eval_loss": 0.3480044901371002, "eval_runtime": 8.8419, "eval_samples_per_second": 31.328, "eval_steps_per_second": 3.958, "step": 2184 }, { "best_epoch": 6, "best_eval_accuracy": 0.5342960288808665, "epoch": 7.0, "step": 2184 }, { "epoch": 8.0, "eval_accuracy": 0.5379061371841155, "eval_loss": 0.38455167412757874, "eval_runtime": 8.9247, "eval_samples_per_second": 31.037, "eval_steps_per_second": 3.922, "step": 2496 }, { "best_epoch": 7, "best_eval_accuracy": 0.5379061371841155, "epoch": 8.0, "step": 2496 }, { "epoch": 8.01, "learning_rate": 0.004332264957264957, "loss": 0.4941, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.5111421942710876, "eval_runtime": 9.2356, "eval_samples_per_second": 29.993, "eval_steps_per_second": 3.79, "step": 2808 }, { "best_epoch": 7, "best_eval_accuracy": 0.5379061371841155, "epoch": 9.0, "step": 2808 }, { "epoch": 9.62, "learning_rate": 0.004198717948717949, "loss": 0.5022, "step": 3000 }, { "epoch": 10.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.4620998203754425, "eval_runtime": 8.9286, "eval_samples_per_second": 31.024, "eval_steps_per_second": 3.92, "step": 3120 }, { "best_epoch": 7, "best_eval_accuracy": 0.5379061371841155, "epoch": 10.0, "step": 3120 }, { "epoch": 11.0, "eval_accuracy": 0.6425992779783394, "eval_loss": 0.3418288826942444, "eval_runtime": 8.7679, "eval_samples_per_second": 31.592, "eval_steps_per_second": 3.992, "step": 3432 }, { "best_epoch": 10, "best_eval_accuracy": 0.6425992779783394, "epoch": 11.0, "step": 3432 }, { "epoch": 11.22, "learning_rate": 0.00406517094017094, "loss": 0.453, "step": 3500 }, { "epoch": 12.0, "eval_accuracy": 0.5631768953068592, "eval_loss": 0.3651691675186157, "eval_runtime": 8.7849, "eval_samples_per_second": 31.532, "eval_steps_per_second": 3.984, "step": 3744 }, { "best_epoch": 10, "best_eval_accuracy": 0.6425992779783394, "epoch": 12.0, "step": 3744 }, { "epoch": 12.82, "learning_rate": 0.003931623931623931, "loss": 0.3879, "step": 4000 }, { "epoch": 13.0, "eval_accuracy": 0.5595667870036101, "eval_loss": 0.3450925946235657, "eval_runtime": 8.7727, "eval_samples_per_second": 31.575, "eval_steps_per_second": 3.99, "step": 4056 }, { "best_epoch": 10, "best_eval_accuracy": 0.6425992779783394, "epoch": 13.0, "step": 4056 }, { "epoch": 14.0, "eval_accuracy": 0.6425992779783394, "eval_loss": 0.33118629455566406, "eval_runtime": 8.7741, "eval_samples_per_second": 31.57, "eval_steps_per_second": 3.989, "step": 4368 }, { "best_epoch": 10, "best_eval_accuracy": 0.6425992779783394, "epoch": 14.0, "step": 4368 }, { "epoch": 14.42, "learning_rate": 0.003798076923076923, "loss": 0.3698, "step": 4500 }, { "epoch": 15.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 0.3599488437175751, "eval_runtime": 8.7843, "eval_samples_per_second": 31.533, "eval_steps_per_second": 3.984, "step": 4680 }, { "best_epoch": 14, "best_eval_accuracy": 0.6462093862815884, "epoch": 15.0, "step": 4680 }, { "epoch": 16.0, "eval_accuracy": 0.5992779783393501, "eval_loss": 0.39469993114471436, "eval_runtime": 8.7768, "eval_samples_per_second": 31.561, "eval_steps_per_second": 3.988, "step": 4992 }, { "best_epoch": 14, "best_eval_accuracy": 0.6462093862815884, "epoch": 16.0, "step": 4992 }, { "epoch": 16.03, "learning_rate": 0.003664529914529914, "loss": 0.3705, "step": 5000 }, { "epoch": 17.0, "eval_accuracy": 0.6173285198555957, "eval_loss": 0.38333019614219666, "eval_runtime": 8.7807, "eval_samples_per_second": 31.547, "eval_steps_per_second": 3.986, "step": 5304 }, { "best_epoch": 14, "best_eval_accuracy": 0.6462093862815884, "epoch": 17.0, "step": 5304 }, { "epoch": 17.63, "learning_rate": 0.003530982905982906, "loss": 0.3598, "step": 5500 }, { "epoch": 18.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 0.33536940813064575, "eval_runtime": 8.765, "eval_samples_per_second": 31.603, "eval_steps_per_second": 3.993, "step": 5616 }, { "best_epoch": 14, "best_eval_accuracy": 0.6462093862815884, "epoch": 18.0, "step": 5616 }, { "epoch": 19.0, "eval_accuracy": 0.6714801444043321, "eval_loss": 0.33954861760139465, "eval_runtime": 8.7463, "eval_samples_per_second": 31.67, "eval_steps_per_second": 4.002, "step": 5928 }, { "best_epoch": 18, "best_eval_accuracy": 0.6714801444043321, "epoch": 19.0, "step": 5928 }, { "epoch": 19.23, "learning_rate": 0.0033974358974358976, "loss": 0.3631, "step": 6000 }, { "epoch": 20.0, "eval_accuracy": 0.6389891696750902, "eval_loss": 0.36644089221954346, "eval_runtime": 8.72, "eval_samples_per_second": 31.766, "eval_steps_per_second": 4.014, "step": 6240 }, { "best_epoch": 18, "best_eval_accuracy": 0.6714801444043321, "epoch": 20.0, "step": 6240 }, { "epoch": 20.83, "learning_rate": 0.003263888888888889, "loss": 0.3515, "step": 6500 }, { "epoch": 21.0, "eval_accuracy": 0.6787003610108303, "eval_loss": 0.34204551577568054, "eval_runtime": 8.8321, "eval_samples_per_second": 31.363, "eval_steps_per_second": 3.963, "step": 6552 }, { "best_epoch": 20, "best_eval_accuracy": 0.6787003610108303, "epoch": 21.0, "step": 6552 }, { "epoch": 22.0, "eval_accuracy": 0.6137184115523465, "eval_loss": 0.34830158948898315, "eval_runtime": 8.8779, "eval_samples_per_second": 31.201, "eval_steps_per_second": 3.942, "step": 6864 }, { "best_epoch": 20, "best_eval_accuracy": 0.6787003610108303, "epoch": 22.0, "step": 6864 }, { "epoch": 22.44, "learning_rate": 0.0031303418803418806, "loss": 0.3486, "step": 7000 }, { "epoch": 23.0, "eval_accuracy": 0.6498194945848376, "eval_loss": 0.3820384442806244, "eval_runtime": 8.8616, "eval_samples_per_second": 31.258, "eval_steps_per_second": 3.95, "step": 7176 }, { "best_epoch": 20, "best_eval_accuracy": 0.6787003610108303, "epoch": 23.0, "step": 7176 }, { "epoch": 24.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.32402852177619934, "eval_runtime": 8.8647, "eval_samples_per_second": 31.248, "eval_steps_per_second": 3.948, "step": 7488 }, { "best_epoch": 23, "best_eval_accuracy": 0.7003610108303249, "epoch": 24.0, "step": 7488 }, { "epoch": 24.04, "learning_rate": 0.0029967948717948716, "loss": 0.3437, "step": 7500 }, { "epoch": 25.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3299575448036194, "eval_runtime": 8.8718, "eval_samples_per_second": 31.223, "eval_steps_per_second": 3.945, "step": 7800 }, { "best_epoch": 24, "best_eval_accuracy": 0.7148014440433214, "epoch": 25.0, "step": 7800 }, { "epoch": 25.64, "learning_rate": 0.002863247863247863, "loss": 0.3389, "step": 8000 }, { "epoch": 26.0, "eval_accuracy": 0.6787003610108303, "eval_loss": 0.3404531478881836, "eval_runtime": 8.8761, "eval_samples_per_second": 31.208, "eval_steps_per_second": 3.943, "step": 8112 }, { "best_epoch": 24, "best_eval_accuracy": 0.7148014440433214, "epoch": 26.0, "step": 8112 }, { "epoch": 27.0, "eval_accuracy": 0.6967509025270758, "eval_loss": 0.32906994223594666, "eval_runtime": 8.8771, "eval_samples_per_second": 31.204, "eval_steps_per_second": 3.943, "step": 8424 }, { "best_epoch": 24, "best_eval_accuracy": 0.7148014440433214, "epoch": 27.0, "step": 8424 }, { "epoch": 27.24, "learning_rate": 0.0027297008547008546, "loss": 0.3363, "step": 8500 }, { "epoch": 28.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.333825945854187, "eval_runtime": 8.8635, "eval_samples_per_second": 31.252, "eval_steps_per_second": 3.949, "step": 8736 }, { "best_epoch": 24, "best_eval_accuracy": 0.7148014440433214, "epoch": 28.0, "step": 8736 }, { "epoch": 28.85, "learning_rate": 0.0025961538461538466, "loss": 0.3381, "step": 9000 }, { "epoch": 29.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.33664965629577637, "eval_runtime": 9.0918, "eval_samples_per_second": 30.467, "eval_steps_per_second": 3.85, "step": 9048 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 29.0, "step": 9048 }, { "epoch": 30.0, "eval_accuracy": 0.6606498194945848, "eval_loss": 0.3830634355545044, "eval_runtime": 10.2562, "eval_samples_per_second": 27.008, "eval_steps_per_second": 3.413, "step": 9360 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 30.0, "step": 9360 }, { "epoch": 30.45, "learning_rate": 0.0024626068376068376, "loss": 0.3302, "step": 9500 }, { "epoch": 31.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.3299681842327118, "eval_runtime": 10.119, "eval_samples_per_second": 27.374, "eval_steps_per_second": 3.459, "step": 9672 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 31.0, "step": 9672 }, { "epoch": 32.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.3223809599876404, "eval_runtime": 10.1698, "eval_samples_per_second": 27.238, "eval_steps_per_second": 3.442, "step": 9984 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 32.0, "step": 9984 }, { "epoch": 32.05, "learning_rate": 0.002329059829059829, "loss": 0.33, "step": 10000 }, { "epoch": 33.0, "eval_accuracy": 0.6787003610108303, "eval_loss": 0.3332250118255615, "eval_runtime": 10.1039, "eval_samples_per_second": 27.415, "eval_steps_per_second": 3.464, "step": 10296 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 33.0, "step": 10296 }, { "epoch": 33.65, "learning_rate": 0.0021955128205128206, "loss": 0.3271, "step": 10500 }, { "epoch": 34.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.3412158787250519, "eval_runtime": 10.1195, "eval_samples_per_second": 27.373, "eval_steps_per_second": 3.459, "step": 10608 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 34.0, "step": 10608 }, { "epoch": 35.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.3197210431098938, "eval_runtime": 10.0745, "eval_samples_per_second": 27.495, "eval_steps_per_second": 3.474, "step": 10920 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 35.0, "step": 10920 }, { "epoch": 35.26, "learning_rate": 0.002061965811965812, "loss": 0.3266, "step": 11000 }, { "epoch": 36.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3236064612865448, "eval_runtime": 10.0538, "eval_samples_per_second": 27.552, "eval_steps_per_second": 3.481, "step": 11232 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 36.0, "step": 11232 }, { "epoch": 36.86, "learning_rate": 0.0019284188034188036, "loss": 0.3248, "step": 11500 }, { "epoch": 37.0, "eval_accuracy": 0.6750902527075813, "eval_loss": 0.3621442914009094, "eval_runtime": 10.1983, "eval_samples_per_second": 27.161, "eval_steps_per_second": 3.432, "step": 11544 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 37.0, "step": 11544 }, { "epoch": 38.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.33299773931503296, "eval_runtime": 10.136, "eval_samples_per_second": 27.328, "eval_steps_per_second": 3.453, "step": 11856 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 38.0, "step": 11856 }, { "epoch": 38.46, "learning_rate": 0.0017948717948717949, "loss": 0.3223, "step": 12000 }, { "epoch": 39.0, "eval_accuracy": 0.6823104693140795, "eval_loss": 0.3635832667350769, "eval_runtime": 10.2689, "eval_samples_per_second": 26.975, "eval_steps_per_second": 3.408, "step": 12168 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 39.0, "step": 12168 }, { "epoch": 40.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.3298203647136688, "eval_runtime": 10.1829, "eval_samples_per_second": 27.202, "eval_steps_per_second": 3.437, "step": 12480 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 40.0, "step": 12480 }, { "epoch": 40.06, "learning_rate": 0.0016613247863247864, "loss": 0.3205, "step": 12500 }, { "epoch": 41.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.32243612408638, "eval_runtime": 10.719, "eval_samples_per_second": 25.842, "eval_steps_per_second": 3.265, "step": 12792 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 41.0, "step": 12792 }, { "epoch": 41.67, "learning_rate": 0.0015277777777777779, "loss": 0.3177, "step": 13000 }, { "epoch": 42.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.3287702202796936, "eval_runtime": 10.6219, "eval_samples_per_second": 26.078, "eval_steps_per_second": 3.295, "step": 13104 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 42.0, "step": 13104 }, { "epoch": 43.0, "eval_accuracy": 0.6823104693140795, "eval_loss": 0.3464113175868988, "eval_runtime": 9.1644, "eval_samples_per_second": 30.226, "eval_steps_per_second": 3.819, "step": 13416 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 43.0, "step": 13416 }, { "epoch": 43.27, "learning_rate": 0.0013942307692307694, "loss": 0.3167, "step": 13500 }, { "epoch": 44.0, "eval_accuracy": 0.6787003610108303, "eval_loss": 0.35672882199287415, "eval_runtime": 9.0166, "eval_samples_per_second": 30.721, "eval_steps_per_second": 3.882, "step": 13728 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 44.0, "step": 13728 }, { "epoch": 44.87, "learning_rate": 0.0012606837606837606, "loss": 0.3159, "step": 14000 }, { "epoch": 45.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.355069100856781, "eval_runtime": 9.0565, "eval_samples_per_second": 30.586, "eval_steps_per_second": 3.865, "step": 14040 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 45.0, "step": 14040 }, { "epoch": 46.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.33131033182144165, "eval_runtime": 9.1597, "eval_samples_per_second": 30.241, "eval_steps_per_second": 3.821, "step": 14352 }, { "best_epoch": 33, "best_eval_accuracy": 0.7256317689530686, "epoch": 46.0, "step": 14352 }, { "epoch": 46.47, "learning_rate": 0.0011271367521367521, "loss": 0.3131, "step": 14500 }, { "epoch": 47.0, "eval_accuracy": 0.7292418772563177, "eval_loss": 0.3233274519443512, "eval_runtime": 9.0766, "eval_samples_per_second": 30.518, "eval_steps_per_second": 3.856, "step": 14664 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 47.0, "step": 14664 }, { "epoch": 48.0, "eval_accuracy": 0.6750902527075813, "eval_loss": 0.35078391432762146, "eval_runtime": 9.0843, "eval_samples_per_second": 30.492, "eval_steps_per_second": 3.853, "step": 14976 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 48.0, "step": 14976 }, { "epoch": 48.08, "learning_rate": 0.0009935897435897436, "loss": 0.3118, "step": 15000 }, { "epoch": 49.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.34203651547431946, "eval_runtime": 9.1363, "eval_samples_per_second": 30.319, "eval_steps_per_second": 3.831, "step": 15288 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 49.0, "step": 15288 }, { "epoch": 49.68, "learning_rate": 0.0008600427350427351, "loss": 0.3088, "step": 15500 }, { "epoch": 50.0, "eval_accuracy": 0.6967509025270758, "eval_loss": 0.34102147817611694, "eval_runtime": 9.0781, "eval_samples_per_second": 30.513, "eval_steps_per_second": 3.855, "step": 15600 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 50.0, "step": 15600 }, { "epoch": 51.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.3420613706111908, "eval_runtime": 8.9032, "eval_samples_per_second": 31.113, "eval_steps_per_second": 3.931, "step": 15912 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 51.0, "step": 15912 }, { "epoch": 51.28, "learning_rate": 0.0007264957264957266, "loss": 0.3082, "step": 16000 }, { "epoch": 52.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.3411448895931244, "eval_runtime": 8.9332, "eval_samples_per_second": 31.008, "eval_steps_per_second": 3.918, "step": 16224 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 52.0, "step": 16224 }, { "epoch": 52.88, "learning_rate": 0.000592948717948718, "loss": 0.3068, "step": 16500 }, { "epoch": 53.0, "eval_accuracy": 0.6823104693140795, "eval_loss": 0.3616185486316681, "eval_runtime": 8.921, "eval_samples_per_second": 31.05, "eval_steps_per_second": 3.923, "step": 16536 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 53.0, "step": 16536 }, { "epoch": 54.0, "eval_accuracy": 0.6714801444043321, "eval_loss": 0.3554804027080536, "eval_runtime": 8.9621, "eval_samples_per_second": 30.908, "eval_steps_per_second": 3.905, "step": 16848 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 54.0, "step": 16848 }, { "epoch": 54.49, "learning_rate": 0.00045940170940170943, "loss": 0.3031, "step": 17000 }, { "epoch": 55.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.34177201986312866, "eval_runtime": 9.1926, "eval_samples_per_second": 30.133, "eval_steps_per_second": 3.807, "step": 17160 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 55.0, "step": 17160 }, { "epoch": 56.0, "eval_accuracy": 0.6859205776173285, "eval_loss": 0.34597525000572205, "eval_runtime": 9.1395, "eval_samples_per_second": 30.308, "eval_steps_per_second": 3.83, "step": 17472 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 56.0, "step": 17472 }, { "epoch": 56.09, "learning_rate": 0.00032585470085470087, "loss": 0.3039, "step": 17500 }, { "epoch": 57.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.335258424282074, "eval_runtime": 9.0559, "eval_samples_per_second": 30.588, "eval_steps_per_second": 3.865, "step": 17784 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 57.0, "step": 17784 }, { "epoch": 57.69, "learning_rate": 0.00019230769230769233, "loss": 0.3025, "step": 18000 }, { "epoch": 58.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.34501808881759644, "eval_runtime": 3.4803, "eval_samples_per_second": 79.591, "eval_steps_per_second": 10.057, "step": 18096 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 58.0, "step": 18096 }, { "epoch": 59.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.34273090958595276, "eval_runtime": 8.911, "eval_samples_per_second": 31.085, "eval_steps_per_second": 3.928, "step": 18408 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 59.0, "step": 18408 }, { "epoch": 59.29, "learning_rate": 5.876068376068376e-05, "loss": 0.3034, "step": 18500 }, { "epoch": 60.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.3430791199207306, "eval_runtime": 8.959, "eval_samples_per_second": 30.919, "eval_steps_per_second": 3.907, "step": 18720 }, { "best_epoch": 46, "best_eval_accuracy": 0.7292418772563177, "epoch": 60.0, "step": 18720 }, { "epoch": 60.0, "step": 18720, "total_flos": 6.96152728406016e+16, "train_loss": 0.36451038132365954, "train_runtime": 8268.8296, "train_samples_per_second": 18.068, "train_steps_per_second": 2.264 } ], "max_steps": 18720, "num_train_epochs": 60, "total_flos": 6.96152728406016e+16, "trial_name": null, "trial_params": null }