{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 18720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.3733972907066345, "eval_runtime": 4.2135, "eval_samples_per_second": 65.74, "eval_steps_per_second": 8.307, "step": 312 }, { "best_epoch": 0, "best_eval_accuracy": 0.5306859205776173, "epoch": 1.0, "step": 312 }, { "epoch": 1.6, "learning_rate": 0.0009732905982905983, "loss": 0.4216, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.38019850850105286, "eval_runtime": 4.2887, "eval_samples_per_second": 64.589, "eval_steps_per_second": 8.161, "step": 624 }, { "best_epoch": 0, "best_eval_accuracy": 0.5306859205776173, "epoch": 2.0, "step": 624 }, { "epoch": 3.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.4298796057701111, "eval_runtime": 4.2965, "eval_samples_per_second": 64.471, "eval_steps_per_second": 8.146, "step": 936 }, { "best_epoch": 0, "best_eval_accuracy": 0.5306859205776173, "epoch": 3.0, "step": 936 }, { "epoch": 3.21, "learning_rate": 0.0009465811965811966, "loss": 0.3883, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5451263537906137, "eval_loss": 0.34903544187545776, "eval_runtime": 4.308, "eval_samples_per_second": 64.299, "eval_steps_per_second": 8.124, "step": 1248 }, { "best_epoch": 3, "best_eval_accuracy": 0.5451263537906137, "epoch": 4.0, "step": 1248 }, { "epoch": 4.81, "learning_rate": 0.0009198717948717949, "loss": 0.3918, "step": 1500 }, { "epoch": 5.0, "eval_accuracy": 0.5884476534296029, "eval_loss": 0.3460931181907654, "eval_runtime": 4.301, "eval_samples_per_second": 64.404, "eval_steps_per_second": 8.138, "step": 1560 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 5.0, "step": 1560 }, { "epoch": 6.0, "eval_accuracy": 0.5523465703971119, "eval_loss": 0.3598935902118683, "eval_runtime": 4.2993, "eval_samples_per_second": 64.429, "eval_steps_per_second": 8.141, "step": 1872 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 6.0, "step": 1872 }, { "epoch": 6.41, "learning_rate": 0.0008931623931623932, "loss": 0.3764, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.5451263537906137, "eval_loss": 0.356489896774292, "eval_runtime": 4.3043, "eval_samples_per_second": 64.355, "eval_steps_per_second": 8.131, "step": 2184 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 7.0, "step": 2184 }, { "epoch": 8.0, "eval_accuracy": 0.5018050541516246, "eval_loss": 0.3611055910587311, "eval_runtime": 4.3049, "eval_samples_per_second": 64.345, "eval_steps_per_second": 8.13, "step": 2496 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 8.0, "step": 2496 }, { "epoch": 8.01, "learning_rate": 0.0008664529914529915, "loss": 0.3794, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.5415162454873647, "eval_loss": 0.40397846698760986, "eval_runtime": 4.3052, "eval_samples_per_second": 64.341, "eval_steps_per_second": 8.13, "step": 2808 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 9.0, "step": 2808 }, { "epoch": 9.62, "learning_rate": 0.0008397435897435898, "loss": 0.3778, "step": 3000 }, { "epoch": 10.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.3622380495071411, "eval_runtime": 4.3031, "eval_samples_per_second": 64.372, "eval_steps_per_second": 8.134, "step": 3120 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 10.0, "step": 3120 }, { "epoch": 11.0, "eval_accuracy": 0.4693140794223827, "eval_loss": 0.4954076111316681, "eval_runtime": 4.3057, "eval_samples_per_second": 64.333, "eval_steps_per_second": 8.129, "step": 3432 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 11.0, "step": 3432 }, { "epoch": 11.22, "learning_rate": 0.0008130341880341881, "loss": 0.3813, "step": 3500 }, { "epoch": 12.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.3602141737937927, "eval_runtime": 4.3078, "eval_samples_per_second": 64.302, "eval_steps_per_second": 8.125, "step": 3744 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 12.0, "step": 3744 }, { "epoch": 12.82, "learning_rate": 0.0007863247863247863, "loss": 0.3718, "step": 4000 }, { "epoch": 13.0, "eval_accuracy": 0.5415162454873647, "eval_loss": 0.345300555229187, "eval_runtime": 4.2997, "eval_samples_per_second": 64.422, "eval_steps_per_second": 8.14, "step": 4056 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 13.0, "step": 4056 }, { "epoch": 14.0, "eval_accuracy": 0.5342960288808665, "eval_loss": 0.36396491527557373, "eval_runtime": 4.3002, "eval_samples_per_second": 64.415, "eval_steps_per_second": 8.139, "step": 4368 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 14.0, "step": 4368 }, { "epoch": 14.42, "learning_rate": 0.0007596153846153846, "loss": 0.3701, "step": 4500 }, { "epoch": 15.0, "eval_accuracy": 0.48375451263537905, "eval_loss": 0.358859658241272, "eval_runtime": 4.3059, "eval_samples_per_second": 64.33, "eval_steps_per_second": 8.128, "step": 4680 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 15.0, "step": 4680 }, { "epoch": 16.0, "eval_accuracy": 0.5631768953068592, "eval_loss": 0.36997947096824646, "eval_runtime": 4.3027, "eval_samples_per_second": 64.378, "eval_steps_per_second": 8.134, "step": 4992 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 16.0, "step": 4992 }, { "epoch": 16.03, "learning_rate": 0.0007329059829059829, "loss": 0.371, "step": 5000 }, { "epoch": 17.0, "eval_accuracy": 0.5342960288808665, "eval_loss": 0.4146815538406372, "eval_runtime": 4.3009, "eval_samples_per_second": 64.405, "eval_steps_per_second": 8.138, "step": 5304 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 17.0, "step": 5304 }, { "epoch": 17.63, "learning_rate": 0.0007061965811965813, "loss": 0.3644, "step": 5500 }, { "epoch": 18.0, "eval_accuracy": 0.5740072202166066, "eval_loss": 0.3504742681980133, "eval_runtime": 4.3002, "eval_samples_per_second": 64.416, "eval_steps_per_second": 8.139, "step": 5616 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 18.0, "step": 5616 }, { "epoch": 19.0, "eval_accuracy": 0.48736462093862815, "eval_loss": 0.37356558442115784, "eval_runtime": 4.2986, "eval_samples_per_second": 64.439, "eval_steps_per_second": 8.142, "step": 5928 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 19.0, "step": 5928 }, { "epoch": 19.23, "learning_rate": 0.0006794871794871796, "loss": 0.3667, "step": 6000 }, { "epoch": 20.0, "eval_accuracy": 0.5703971119133574, "eval_loss": 0.3636634945869446, "eval_runtime": 4.2999, "eval_samples_per_second": 64.42, "eval_steps_per_second": 8.14, "step": 6240 }, { "best_epoch": 4, "best_eval_accuracy": 0.5884476534296029, "epoch": 20.0, "step": 6240 }, { "epoch": 20.83, "learning_rate": 0.0006527777777777778, "loss": 0.3629, "step": 6500 }, { "epoch": 21.0, "eval_accuracy": 0.6209386281588448, "eval_loss": 0.3412002921104431, "eval_runtime": 4.2976, "eval_samples_per_second": 64.455, "eval_steps_per_second": 8.144, "step": 6552 }, { "best_epoch": 20, "best_eval_accuracy": 0.6209386281588448, "epoch": 21.0, "step": 6552 }, { "epoch": 22.0, "eval_accuracy": 0.628158844765343, "eval_loss": 0.34508734941482544, "eval_runtime": 4.3013, "eval_samples_per_second": 64.399, "eval_steps_per_second": 8.137, "step": 6864 }, { "best_epoch": 21, "best_eval_accuracy": 0.628158844765343, "epoch": 22.0, "step": 6864 }, { "epoch": 22.44, "learning_rate": 0.0006260683760683761, "loss": 0.3574, "step": 7000 }, { "epoch": 23.0, "eval_accuracy": 0.6064981949458483, "eval_loss": 0.3625960350036621, "eval_runtime": 4.301, "eval_samples_per_second": 64.404, "eval_steps_per_second": 8.138, "step": 7176 }, { "best_epoch": 21, "best_eval_accuracy": 0.628158844765343, "epoch": 23.0, "step": 7176 }, { "epoch": 24.0, "eval_accuracy": 0.48736462093862815, "eval_loss": 0.3732198178768158, "eval_runtime": 4.2992, "eval_samples_per_second": 64.431, "eval_steps_per_second": 8.141, "step": 7488 }, { "best_epoch": 21, "best_eval_accuracy": 0.628158844765343, "epoch": 24.0, "step": 7488 }, { "epoch": 24.04, "learning_rate": 0.0005993589743589744, "loss": 0.3565, "step": 7500 }, { "epoch": 25.0, "eval_accuracy": 0.6173285198555957, "eval_loss": 0.342726469039917, "eval_runtime": 4.3024, "eval_samples_per_second": 64.382, "eval_steps_per_second": 8.135, "step": 7800 }, { "best_epoch": 21, "best_eval_accuracy": 0.628158844765343, "epoch": 25.0, "step": 7800 }, { "epoch": 25.64, "learning_rate": 0.0005726495726495727, "loss": 0.3525, "step": 8000 }, { "epoch": 26.0, "eval_accuracy": 0.5812274368231047, "eval_loss": 0.38550591468811035, "eval_runtime": 4.3005, "eval_samples_per_second": 64.41, "eval_steps_per_second": 8.139, "step": 8112 }, { "best_epoch": 21, "best_eval_accuracy": 0.628158844765343, "epoch": 26.0, "step": 8112 }, { "epoch": 27.0, "eval_accuracy": 0.6498194945848376, "eval_loss": 0.3384442925453186, "eval_runtime": 4.2994, "eval_samples_per_second": 64.428, "eval_steps_per_second": 8.141, "step": 8424 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 27.0, "step": 8424 }, { "epoch": 27.24, "learning_rate": 0.000545940170940171, "loss": 0.3523, "step": 8500 }, { "epoch": 28.0, "eval_accuracy": 0.628158844765343, "eval_loss": 0.3408096730709076, "eval_runtime": 4.3005, "eval_samples_per_second": 64.412, "eval_steps_per_second": 8.139, "step": 8736 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 28.0, "step": 8736 }, { "epoch": 28.85, "learning_rate": 0.0005192307692307693, "loss": 0.3505, "step": 9000 }, { "epoch": 29.0, "eval_accuracy": 0.6101083032490975, "eval_loss": 0.354794442653656, "eval_runtime": 4.3016, "eval_samples_per_second": 64.395, "eval_steps_per_second": 8.137, "step": 9048 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 29.0, "step": 9048 }, { "epoch": 30.0, "eval_accuracy": 0.592057761732852, "eval_loss": 0.3860756754875183, "eval_runtime": 4.2988, "eval_samples_per_second": 64.436, "eval_steps_per_second": 8.142, "step": 9360 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 30.0, "step": 9360 }, { "epoch": 30.45, "learning_rate": 0.0004925213675213676, "loss": 0.3509, "step": 9500 }, { "epoch": 31.0, "eval_accuracy": 0.5992779783393501, "eval_loss": 0.370971143245697, "eval_runtime": 4.2976, "eval_samples_per_second": 64.454, "eval_steps_per_second": 8.144, "step": 9672 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 31.0, "step": 9672 }, { "epoch": 32.0, "eval_accuracy": 0.5992779783393501, "eval_loss": 0.38974320888519287, "eval_runtime": 4.303, "eval_samples_per_second": 64.374, "eval_steps_per_second": 8.134, "step": 9984 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 32.0, "step": 9984 }, { "epoch": 32.05, "learning_rate": 0.00046581196581196583, "loss": 0.3494, "step": 10000 }, { "epoch": 33.0, "eval_accuracy": 0.6353790613718412, "eval_loss": 0.3535420596599579, "eval_runtime": 4.299, "eval_samples_per_second": 64.433, "eval_steps_per_second": 8.141, "step": 10296 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 33.0, "step": 10296 }, { "epoch": 33.65, "learning_rate": 0.0004391025641025641, "loss": 0.3459, "step": 10500 }, { "epoch": 34.0, "eval_accuracy": 0.628158844765343, "eval_loss": 0.33893653750419617, "eval_runtime": 4.3011, "eval_samples_per_second": 64.401, "eval_steps_per_second": 8.137, "step": 10608 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 34.0, "step": 10608 }, { "epoch": 35.0, "eval_accuracy": 0.6209386281588448, "eval_loss": 0.3396986126899719, "eval_runtime": 4.2983, "eval_samples_per_second": 64.445, "eval_steps_per_second": 8.143, "step": 10920 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 35.0, "step": 10920 }, { "epoch": 35.26, "learning_rate": 0.0004123931623931624, "loss": 0.3429, "step": 11000 }, { "epoch": 36.0, "eval_accuracy": 0.6101083032490975, "eval_loss": 0.34503623843193054, "eval_runtime": 4.2992, "eval_samples_per_second": 64.431, "eval_steps_per_second": 8.141, "step": 11232 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 36.0, "step": 11232 }, { "epoch": 36.86, "learning_rate": 0.0003856837606837607, "loss": 0.3432, "step": 11500 }, { "epoch": 37.0, "eval_accuracy": 0.6064981949458483, "eval_loss": 0.392516553401947, "eval_runtime": 4.2992, "eval_samples_per_second": 64.431, "eval_steps_per_second": 8.141, "step": 11544 }, { "best_epoch": 26, "best_eval_accuracy": 0.6498194945848376, "epoch": 37.0, "step": 11544 }, { "epoch": 38.0, "eval_accuracy": 0.6714801444043321, "eval_loss": 0.32944434881210327, "eval_runtime": 4.2991, "eval_samples_per_second": 64.432, "eval_steps_per_second": 8.141, "step": 11856 }, { "best_epoch": 37, "best_eval_accuracy": 0.6714801444043321, "epoch": 38.0, "step": 11856 }, { "epoch": 38.46, "learning_rate": 0.000358974358974359, "loss": 0.341, "step": 12000 }, { "epoch": 39.0, "eval_accuracy": 0.6389891696750902, "eval_loss": 0.34416595101356506, "eval_runtime": 4.2999, "eval_samples_per_second": 64.419, "eval_steps_per_second": 8.14, "step": 12168 }, { "best_epoch": 37, "best_eval_accuracy": 0.6714801444043321, "epoch": 39.0, "step": 12168 }, { "epoch": 40.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 0.342062771320343, "eval_runtime": 4.3022, "eval_samples_per_second": 64.386, "eval_steps_per_second": 8.135, "step": 12480 }, { "best_epoch": 37, "best_eval_accuracy": 0.6714801444043321, "epoch": 40.0, "step": 12480 }, { "epoch": 40.06, "learning_rate": 0.00033226495726495727, "loss": 0.3392, "step": 12500 }, { "epoch": 41.0, "eval_accuracy": 0.6389891696750902, "eval_loss": 0.33708664774894714, "eval_runtime": 4.2992, "eval_samples_per_second": 64.431, "eval_steps_per_second": 8.141, "step": 12792 }, { "best_epoch": 37, "best_eval_accuracy": 0.6714801444043321, "epoch": 41.0, "step": 12792 }, { "epoch": 41.67, "learning_rate": 0.0003055555555555556, "loss": 0.3392, "step": 13000 }, { "epoch": 42.0, "eval_accuracy": 0.6534296028880866, "eval_loss": 0.33264684677124023, "eval_runtime": 4.3016, "eval_samples_per_second": 64.395, "eval_steps_per_second": 8.136, "step": 13104 }, { "best_epoch": 37, "best_eval_accuracy": 0.6714801444043321, "epoch": 42.0, "step": 13104 }, { "epoch": 43.0, "eval_accuracy": 0.628158844765343, "eval_loss": 0.3713812530040741, "eval_runtime": 4.2963, "eval_samples_per_second": 64.474, "eval_steps_per_second": 8.146, "step": 13416 }, { "best_epoch": 37, "best_eval_accuracy": 0.6714801444043321, "epoch": 43.0, "step": 13416 }, { "epoch": 43.27, "learning_rate": 0.0002788461538461539, "loss": 0.337, "step": 13500 }, { "epoch": 44.0, "eval_accuracy": 0.6245487364620939, "eval_loss": 0.3535134494304657, "eval_runtime": 4.2972, "eval_samples_per_second": 64.461, "eval_steps_per_second": 8.145, "step": 13728 }, { "best_epoch": 37, "best_eval_accuracy": 0.6714801444043321, "epoch": 44.0, "step": 13728 }, { "epoch": 44.87, "learning_rate": 0.00025213675213675216, "loss": 0.3352, "step": 14000 }, { "epoch": 45.0, "eval_accuracy": 0.6245487364620939, "eval_loss": 0.35477742552757263, "eval_runtime": 4.2996, "eval_samples_per_second": 64.424, "eval_steps_per_second": 8.14, "step": 14040 }, { "best_epoch": 37, "best_eval_accuracy": 0.6714801444043321, "epoch": 45.0, "step": 14040 }, { "epoch": 46.0, "eval_accuracy": 0.6570397111913358, "eval_loss": 0.33611994981765747, "eval_runtime": 4.2993, "eval_samples_per_second": 64.429, "eval_steps_per_second": 8.141, "step": 14352 }, { "best_epoch": 37, "best_eval_accuracy": 0.6714801444043321, "epoch": 46.0, "step": 14352 }, { "epoch": 46.47, "learning_rate": 0.00022542735042735044, "loss": 0.3335, "step": 14500 }, { "epoch": 47.0, "eval_accuracy": 0.6859205776173285, "eval_loss": 0.33294087648391724, "eval_runtime": 4.3015, "eval_samples_per_second": 64.396, "eval_steps_per_second": 8.137, "step": 14664 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 47.0, "step": 14664 }, { "epoch": 48.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 0.3423037827014923, "eval_runtime": 4.297, "eval_samples_per_second": 64.464, "eval_steps_per_second": 8.145, "step": 14976 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 48.0, "step": 14976 }, { "epoch": 48.08, "learning_rate": 0.0001987179487179487, "loss": 0.3329, "step": 15000 }, { "epoch": 49.0, "eval_accuracy": 0.6534296028880866, "eval_loss": 0.33556535840034485, "eval_runtime": 4.2948, "eval_samples_per_second": 64.497, "eval_steps_per_second": 8.149, "step": 15288 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 49.0, "step": 15288 }, { "epoch": 49.68, "learning_rate": 0.00017200854700854702, "loss": 0.3308, "step": 15500 }, { "epoch": 50.0, "eval_accuracy": 0.6642599277978339, "eval_loss": 0.33979371190071106, "eval_runtime": 4.2975, "eval_samples_per_second": 64.456, "eval_steps_per_second": 8.144, "step": 15600 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 50.0, "step": 15600 }, { "epoch": 51.0, "eval_accuracy": 0.6678700361010831, "eval_loss": 0.3374033272266388, "eval_runtime": 4.2988, "eval_samples_per_second": 64.437, "eval_steps_per_second": 8.142, "step": 15912 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 51.0, "step": 15912 }, { "epoch": 51.28, "learning_rate": 0.00014529914529914532, "loss": 0.3291, "step": 16000 }, { "epoch": 52.0, "eval_accuracy": 0.6787003610108303, "eval_loss": 0.33146607875823975, "eval_runtime": 4.3011, "eval_samples_per_second": 64.402, "eval_steps_per_second": 8.137, "step": 16224 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 52.0, "step": 16224 }, { "epoch": 52.88, "learning_rate": 0.00011858974358974358, "loss": 0.3284, "step": 16500 }, { "epoch": 53.0, "eval_accuracy": 0.631768953068592, "eval_loss": 0.36504465341567993, "eval_runtime": 4.3007, "eval_samples_per_second": 64.409, "eval_steps_per_second": 8.138, "step": 16536 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 53.0, "step": 16536 }, { "epoch": 54.0, "eval_accuracy": 0.628158844765343, "eval_loss": 0.35373085737228394, "eval_runtime": 4.2986, "eval_samples_per_second": 64.44, "eval_steps_per_second": 8.142, "step": 16848 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 54.0, "step": 16848 }, { "epoch": 54.49, "learning_rate": 9.188034188034189e-05, "loss": 0.3257, "step": 17000 }, { "epoch": 55.0, "eval_accuracy": 0.6425992779783394, "eval_loss": 0.34801429510116577, "eval_runtime": 4.3013, "eval_samples_per_second": 64.399, "eval_steps_per_second": 8.137, "step": 17160 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 55.0, "step": 17160 }, { "epoch": 56.0, "eval_accuracy": 0.6570397111913358, "eval_loss": 0.34239715337753296, "eval_runtime": 4.2962, "eval_samples_per_second": 64.476, "eval_steps_per_second": 8.147, "step": 17472 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 56.0, "step": 17472 }, { "epoch": 56.09, "learning_rate": 6.517094017094018e-05, "loss": 0.3274, "step": 17500 }, { "epoch": 57.0, "eval_accuracy": 0.6678700361010831, "eval_loss": 0.3412990868091583, "eval_runtime": 4.2933, "eval_samples_per_second": 64.519, "eval_steps_per_second": 8.152, "step": 17784 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 57.0, "step": 17784 }, { "epoch": 57.69, "learning_rate": 3.846153846153846e-05, "loss": 0.3265, "step": 18000 }, { "epoch": 58.0, "eval_accuracy": 0.6389891696750902, "eval_loss": 0.34415990114212036, "eval_runtime": 4.3058, "eval_samples_per_second": 64.332, "eval_steps_per_second": 8.129, "step": 18096 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 58.0, "step": 18096 }, { "epoch": 59.0, "eval_accuracy": 0.6534296028880866, "eval_loss": 0.34172776341438293, "eval_runtime": 4.2978, "eval_samples_per_second": 64.452, "eval_steps_per_second": 8.144, "step": 18408 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 59.0, "step": 18408 }, { "epoch": 59.29, "learning_rate": 1.1752136752136752e-05, "loss": 0.326, "step": 18500 }, { "epoch": 60.0, "eval_accuracy": 0.6570397111913358, "eval_loss": 0.3407086431980133, "eval_runtime": 4.2954, "eval_samples_per_second": 64.488, "eval_steps_per_second": 8.148, "step": 18720 }, { "best_epoch": 46, "best_eval_accuracy": 0.6859205776173285, "epoch": 60.0, "step": 18720 }, { "epoch": 60.0, "step": 18720, "total_flos": 6.96152728406016e+16, "train_loss": 0.3531108416043795, "train_runtime": 3953.5033, "train_samples_per_second": 37.789, "train_steps_per_second": 4.735 } ], "max_steps": 18720, "num_train_epochs": 60, "total_flos": 6.96152728406016e+16, "trial_name": null, "trial_params": null }