{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 18720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5234657039711191, "eval_loss": 0.5125718116760254, "eval_runtime": 8.8736, "eval_samples_per_second": 31.216, "eval_steps_per_second": 3.944, "step": 312 }, { "best_epoch": 0, "best_eval_accuracy": 0.5234657039711191, "epoch": 1.0, "step": 312 }, { "epoch": 1.6, "learning_rate": 0.002919871794871795, "loss": 0.5126, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.38242238759994507, "eval_runtime": 8.7725, "eval_samples_per_second": 31.576, "eval_steps_per_second": 3.99, "step": 624 }, { "best_epoch": 0, "best_eval_accuracy": 0.5234657039711191, "epoch": 2.0, "step": 624 }, { "epoch": 3.0, "eval_accuracy": 0.49097472924187724, "eval_loss": 0.3692016005516052, "eval_runtime": 8.7845, "eval_samples_per_second": 31.533, "eval_steps_per_second": 3.984, "step": 936 }, { "best_epoch": 0, "best_eval_accuracy": 0.5234657039711191, "epoch": 3.0, "step": 936 }, { "epoch": 3.21, "learning_rate": 0.0028397435897435895, "loss": 0.4613, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5342960288808665, "eval_loss": 0.3940806984901428, "eval_runtime": 8.8274, "eval_samples_per_second": 31.38, "eval_steps_per_second": 3.965, "step": 1248 }, { "best_epoch": 3, "best_eval_accuracy": 0.5342960288808665, "epoch": 4.0, "step": 1248 }, { "epoch": 4.81, "learning_rate": 0.0027596153846153847, "loss": 0.446, "step": 1500 }, { "epoch": 5.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6773150563240051, "eval_runtime": 8.9686, "eval_samples_per_second": 30.885, "eval_steps_per_second": 3.902, "step": 1560 }, { "best_epoch": 3, "best_eval_accuracy": 0.5342960288808665, "epoch": 5.0, "step": 1560 }, { "epoch": 6.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.5515874028205872, "eval_runtime": 9.1762, "eval_samples_per_second": 30.187, "eval_steps_per_second": 3.814, "step": 1872 }, { "best_epoch": 3, "best_eval_accuracy": 0.5342960288808665, "epoch": 6.0, "step": 1872 }, { "epoch": 6.41, "learning_rate": 0.00267948717948718, "loss": 0.4477, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.51985559566787, "eval_loss": 0.3517453372478485, "eval_runtime": 9.1384, "eval_samples_per_second": 30.312, "eval_steps_per_second": 3.83, "step": 2184 }, { "best_epoch": 3, "best_eval_accuracy": 0.5342960288808665, "epoch": 7.0, "step": 2184 }, { "epoch": 8.0, "eval_accuracy": 0.49097472924187724, "eval_loss": 0.37720802426338196, "eval_runtime": 9.0573, "eval_samples_per_second": 30.583, "eval_steps_per_second": 3.864, "step": 2496 }, { "best_epoch": 3, "best_eval_accuracy": 0.5342960288808665, "epoch": 8.0, "step": 2496 }, { "epoch": 8.01, "learning_rate": 0.0025993589743589745, "loss": 0.4263, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.48375451263537905, "eval_loss": 0.36901482939720154, "eval_runtime": 9.0499, "eval_samples_per_second": 30.608, "eval_steps_per_second": 3.867, "step": 2808 }, { "best_epoch": 3, "best_eval_accuracy": 0.5342960288808665, "epoch": 9.0, "step": 2808 }, { "epoch": 9.62, "learning_rate": 0.0025192307692307693, "loss": 0.4397, "step": 3000 }, { "epoch": 10.0, "eval_accuracy": 0.48375451263537905, "eval_loss": 0.35119369626045227, "eval_runtime": 9.0328, "eval_samples_per_second": 30.666, "eval_steps_per_second": 3.875, "step": 3120 }, { "best_epoch": 3, "best_eval_accuracy": 0.5342960288808665, "epoch": 10.0, "step": 3120 }, { "epoch": 11.0, "eval_accuracy": 0.5379061371841155, "eval_loss": 0.47159671783447266, "eval_runtime": 9.0616, "eval_samples_per_second": 30.569, "eval_steps_per_second": 3.862, "step": 3432 }, { "best_epoch": 10, "best_eval_accuracy": 0.5379061371841155, "epoch": 11.0, "step": 3432 }, { "epoch": 11.22, "learning_rate": 0.002439102564102564, "loss": 0.4425, "step": 3500 }, { "epoch": 12.0, "eval_accuracy": 0.6570397111913358, "eval_loss": 0.36053842306137085, "eval_runtime": 9.0613, "eval_samples_per_second": 30.57, "eval_steps_per_second": 3.863, "step": 3744 }, { "best_epoch": 11, "best_eval_accuracy": 0.6570397111913358, "epoch": 12.0, "step": 3744 }, { "epoch": 12.82, "learning_rate": 0.002358974358974359, "loss": 0.4269, "step": 4000 }, { "epoch": 13.0, "eval_accuracy": 0.5379061371841155, "eval_loss": 0.3570541739463806, "eval_runtime": 9.0666, "eval_samples_per_second": 30.552, "eval_steps_per_second": 3.86, "step": 4056 }, { "best_epoch": 11, "best_eval_accuracy": 0.6570397111913358, "epoch": 13.0, "step": 4056 }, { "epoch": 14.0, "eval_accuracy": 0.48375451263537905, "eval_loss": 0.3544716238975525, "eval_runtime": 9.1496, "eval_samples_per_second": 30.274, "eval_steps_per_second": 3.825, "step": 4368 }, { "best_epoch": 11, "best_eval_accuracy": 0.6570397111913358, "epoch": 14.0, "step": 4368 }, { "epoch": 14.42, "learning_rate": 0.002278846153846154, "loss": 0.3975, "step": 4500 }, { "epoch": 15.0, "eval_accuracy": 0.6498194945848376, "eval_loss": 0.37444454431533813, "eval_runtime": 9.0245, "eval_samples_per_second": 30.694, "eval_steps_per_second": 3.878, "step": 4680 }, { "best_epoch": 11, "best_eval_accuracy": 0.6570397111913358, "epoch": 15.0, "step": 4680 }, { "epoch": 16.0, "eval_accuracy": 0.6606498194945848, "eval_loss": 0.35777392983436584, "eval_runtime": 9.0439, "eval_samples_per_second": 30.628, "eval_steps_per_second": 3.87, "step": 4992 }, { "best_epoch": 15, "best_eval_accuracy": 0.6606498194945848, "epoch": 16.0, "step": 4992 }, { "epoch": 16.03, "learning_rate": 0.0021987179487179486, "loss": 0.3906, "step": 5000 }, { "epoch": 17.0, "eval_accuracy": 0.6931407942238267, "eval_loss": 0.37044015526771545, "eval_runtime": 9.0352, "eval_samples_per_second": 30.658, "eval_steps_per_second": 3.874, "step": 5304 }, { "best_epoch": 16, "best_eval_accuracy": 0.6931407942238267, "epoch": 17.0, "step": 5304 }, { "epoch": 17.63, "learning_rate": 0.0021185897435897437, "loss": 0.3633, "step": 5500 }, { "epoch": 18.0, "eval_accuracy": 0.6064981949458483, "eval_loss": 0.3355711102485657, "eval_runtime": 9.076, "eval_samples_per_second": 30.52, "eval_steps_per_second": 3.856, "step": 5616 }, { "best_epoch": 16, "best_eval_accuracy": 0.6931407942238267, "epoch": 18.0, "step": 5616 }, { "epoch": 19.0, "eval_accuracy": 0.6064981949458483, "eval_loss": 0.3397271931171417, "eval_runtime": 9.0579, "eval_samples_per_second": 30.581, "eval_steps_per_second": 3.864, "step": 5928 }, { "best_epoch": 16, "best_eval_accuracy": 0.6931407942238267, "epoch": 19.0, "step": 5928 }, { "epoch": 19.23, "learning_rate": 0.0020384615384615385, "loss": 0.3604, "step": 6000 }, { "epoch": 20.0, "eval_accuracy": 0.6931407942238267, "eval_loss": 0.3809119760990143, "eval_runtime": 9.0061, "eval_samples_per_second": 30.757, "eval_steps_per_second": 3.886, "step": 6240 }, { "best_epoch": 16, "best_eval_accuracy": 0.6931407942238267, "epoch": 20.0, "step": 6240 }, { "epoch": 20.83, "learning_rate": 0.0019583333333333336, "loss": 0.3565, "step": 6500 }, { "epoch": 21.0, "eval_accuracy": 0.6787003610108303, "eval_loss": 0.33565953373908997, "eval_runtime": 9.0589, "eval_samples_per_second": 30.578, "eval_steps_per_second": 3.864, "step": 6552 }, { "best_epoch": 16, "best_eval_accuracy": 0.6931407942238267, "epoch": 21.0, "step": 6552 }, { "epoch": 22.0, "eval_accuracy": 0.6209386281588448, "eval_loss": 0.38026997447013855, "eval_runtime": 9.0356, "eval_samples_per_second": 30.656, "eval_steps_per_second": 3.874, "step": 6864 }, { "best_epoch": 16, "best_eval_accuracy": 0.6931407942238267, "epoch": 22.0, "step": 6864 }, { "epoch": 22.44, "learning_rate": 0.0018782051282051281, "loss": 0.3533, "step": 7000 }, { "epoch": 23.0, "eval_accuracy": 0.6750902527075813, "eval_loss": 0.37539029121398926, "eval_runtime": 9.0595, "eval_samples_per_second": 30.576, "eval_steps_per_second": 3.863, "step": 7176 }, { "best_epoch": 16, "best_eval_accuracy": 0.6931407942238267, "epoch": 23.0, "step": 7176 }, { "epoch": 24.0, "eval_accuracy": 0.6353790613718412, "eval_loss": 0.3304370641708374, "eval_runtime": 9.2561, "eval_samples_per_second": 29.926, "eval_steps_per_second": 3.781, "step": 7488 }, { "best_epoch": 16, "best_eval_accuracy": 0.6931407942238267, "epoch": 24.0, "step": 7488 }, { "epoch": 24.04, "learning_rate": 0.001798076923076923, "loss": 0.3462, "step": 7500 }, { "epoch": 25.0, "eval_accuracy": 0.6967509025270758, "eval_loss": 0.36999332904815674, "eval_runtime": 9.2621, "eval_samples_per_second": 29.907, "eval_steps_per_second": 3.779, "step": 7800 }, { "best_epoch": 24, "best_eval_accuracy": 0.6967509025270758, "epoch": 25.0, "step": 7800 }, { "epoch": 25.64, "learning_rate": 0.0017179487179487178, "loss": 0.3432, "step": 8000 }, { "epoch": 26.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.333748996257782, "eval_runtime": 9.203, "eval_samples_per_second": 30.099, "eval_steps_per_second": 3.803, "step": 8112 }, { "best_epoch": 25, "best_eval_accuracy": 0.7148014440433214, "epoch": 26.0, "step": 8112 }, { "epoch": 27.0, "eval_accuracy": 0.6967509025270758, "eval_loss": 0.3289393484592438, "eval_runtime": 9.2259, "eval_samples_per_second": 30.024, "eval_steps_per_second": 3.794, "step": 8424 }, { "best_epoch": 25, "best_eval_accuracy": 0.7148014440433214, "epoch": 27.0, "step": 8424 }, { "epoch": 27.24, "learning_rate": 0.0016378205128205127, "loss": 0.3409, "step": 8500 }, { "epoch": 28.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3340049684047699, "eval_runtime": 9.2073, "eval_samples_per_second": 30.085, "eval_steps_per_second": 3.801, "step": 8736 }, { "best_epoch": 25, "best_eval_accuracy": 0.7148014440433214, "epoch": 28.0, "step": 8736 }, { "epoch": 28.85, "learning_rate": 0.0015576923076923079, "loss": 0.3381, "step": 9000 }, { "epoch": 29.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.3466947674751282, "eval_runtime": 9.2066, "eval_samples_per_second": 30.087, "eval_steps_per_second": 3.802, "step": 9048 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 29.0, "step": 9048 }, { "epoch": 30.0, "eval_accuracy": 0.6823104693140795, "eval_loss": 0.3859948515892029, "eval_runtime": 9.1838, "eval_samples_per_second": 30.162, "eval_steps_per_second": 3.811, "step": 9360 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 30.0, "step": 9360 }, { "epoch": 30.45, "learning_rate": 0.0014775641025641026, "loss": 0.337, "step": 9500 }, { "epoch": 31.0, "eval_accuracy": 0.6931407942238267, "eval_loss": 0.3795461356639862, "eval_runtime": 9.2077, "eval_samples_per_second": 30.083, "eval_steps_per_second": 3.801, "step": 9672 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 31.0, "step": 9672 }, { "epoch": 32.0, "eval_accuracy": 0.7184115523465704, "eval_loss": 0.3755357563495636, "eval_runtime": 9.142, "eval_samples_per_second": 30.3, "eval_steps_per_second": 3.828, "step": 9984 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 32.0, "step": 9984 }, { "epoch": 32.05, "learning_rate": 0.0013974358974358976, "loss": 0.334, "step": 10000 }, { "epoch": 33.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.3529473543167114, "eval_runtime": 9.0973, "eval_samples_per_second": 30.449, "eval_steps_per_second": 3.847, "step": 10296 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 33.0, "step": 10296 }, { "epoch": 33.65, "learning_rate": 0.0013173076923076923, "loss": 0.3321, "step": 10500 }, { "epoch": 34.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.33890479803085327, "eval_runtime": 9.1599, "eval_samples_per_second": 30.24, "eval_steps_per_second": 3.821, "step": 10608 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 34.0, "step": 10608 }, { "epoch": 35.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3260202407836914, "eval_runtime": 9.3959, "eval_samples_per_second": 29.481, "eval_steps_per_second": 3.725, "step": 10920 }, { "best_epoch": 28, "best_eval_accuracy": 0.7220216606498195, "epoch": 35.0, "step": 10920 }, { "epoch": 35.26, "learning_rate": 0.0012371794871794872, "loss": 0.3315, "step": 11000 }, { "epoch": 36.0, "eval_accuracy": 0.7328519855595668, "eval_loss": 0.35185953974723816, "eval_runtime": 9.0108, "eval_samples_per_second": 30.741, "eval_steps_per_second": 3.884, "step": 11232 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 36.0, "step": 11232 }, { "epoch": 36.86, "learning_rate": 0.0011570512820512822, "loss": 0.3317, "step": 11500 }, { "epoch": 37.0, "eval_accuracy": 0.6967509025270758, "eval_loss": 0.3741394877433777, "eval_runtime": 8.9889, "eval_samples_per_second": 30.816, "eval_steps_per_second": 3.894, "step": 11544 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 37.0, "step": 11544 }, { "epoch": 38.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.33644017577171326, "eval_runtime": 9.1679, "eval_samples_per_second": 30.214, "eval_steps_per_second": 3.818, "step": 11856 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 38.0, "step": 11856 }, { "epoch": 38.46, "learning_rate": 0.0010769230769230769, "loss": 0.325, "step": 12000 }, { "epoch": 39.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.34382882714271545, "eval_runtime": 9.0452, "eval_samples_per_second": 30.624, "eval_steps_per_second": 3.869, "step": 12168 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 39.0, "step": 12168 }, { "epoch": 40.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3461564779281616, "eval_runtime": 8.9087, "eval_samples_per_second": 31.093, "eval_steps_per_second": 3.929, "step": 12480 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 40.0, "step": 12480 }, { "epoch": 40.06, "learning_rate": 0.0009967948717948718, "loss": 0.3282, "step": 12500 }, { "epoch": 41.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.33443841338157654, "eval_runtime": 9.0016, "eval_samples_per_second": 30.772, "eval_steps_per_second": 3.888, "step": 12792 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 41.0, "step": 12792 }, { "epoch": 41.67, "learning_rate": 0.0009166666666666668, "loss": 0.3251, "step": 13000 }, { "epoch": 42.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.328012079000473, "eval_runtime": 8.8448, "eval_samples_per_second": 31.318, "eval_steps_per_second": 3.957, "step": 13104 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 42.0, "step": 13104 }, { "epoch": 43.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.35438141226768494, "eval_runtime": 8.8382, "eval_samples_per_second": 31.341, "eval_steps_per_second": 3.96, "step": 13416 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 43.0, "step": 13416 }, { "epoch": 43.27, "learning_rate": 0.0008365384615384616, "loss": 0.3223, "step": 13500 }, { "epoch": 44.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.34875577688217163, "eval_runtime": 8.7814, "eval_samples_per_second": 31.544, "eval_steps_per_second": 3.986, "step": 13728 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 44.0, "step": 13728 }, { "epoch": 44.87, "learning_rate": 0.0007564102564102564, "loss": 0.3215, "step": 14000 }, { "epoch": 45.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.3436548113822937, "eval_runtime": 8.7944, "eval_samples_per_second": 31.497, "eval_steps_per_second": 3.98, "step": 14040 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 45.0, "step": 14040 }, { "epoch": 46.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.34295418858528137, "eval_runtime": 8.7788, "eval_samples_per_second": 31.553, "eval_steps_per_second": 3.987, "step": 14352 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 46.0, "step": 14352 }, { "epoch": 46.47, "learning_rate": 0.0006762820512820514, "loss": 0.3205, "step": 14500 }, { "epoch": 47.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.33936554193496704, "eval_runtime": 8.8644, "eval_samples_per_second": 31.249, "eval_steps_per_second": 3.948, "step": 14664 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 47.0, "step": 14664 }, { "epoch": 48.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.3676346242427826, "eval_runtime": 8.8385, "eval_samples_per_second": 31.34, "eval_steps_per_second": 3.96, "step": 14976 }, { "best_epoch": 35, "best_eval_accuracy": 0.7328519855595668, "epoch": 48.0, "step": 14976 }, { "epoch": 48.08, "learning_rate": 0.0005961538461538461, "loss": 0.3163, "step": 15000 }, { "epoch": 49.0, "eval_accuracy": 0.7364620938628159, "eval_loss": 0.3486822545528412, "eval_runtime": 8.8462, "eval_samples_per_second": 31.313, "eval_steps_per_second": 3.956, "step": 15288 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 49.0, "step": 15288 }, { "epoch": 49.68, "learning_rate": 0.000516025641025641, "loss": 0.3154, "step": 15500 }, { "epoch": 50.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.33869412541389465, "eval_runtime": 8.9668, "eval_samples_per_second": 30.892, "eval_steps_per_second": 3.903, "step": 15600 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 50.0, "step": 15600 }, { "epoch": 51.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.3448237478733063, "eval_runtime": 8.8551, "eval_samples_per_second": 31.281, "eval_steps_per_second": 3.953, "step": 15912 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 51.0, "step": 15912 }, { "epoch": 51.28, "learning_rate": 0.00043589743589743596, "loss": 0.3164, "step": 16000 }, { "epoch": 52.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.33612075448036194, "eval_runtime": 8.8349, "eval_samples_per_second": 31.353, "eval_steps_per_second": 3.962, "step": 16224 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 52.0, "step": 16224 }, { "epoch": 52.88, "learning_rate": 0.00035576923076923074, "loss": 0.3153, "step": 16500 }, { "epoch": 53.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.367554634809494, "eval_runtime": 8.8354, "eval_samples_per_second": 31.351, "eval_steps_per_second": 3.961, "step": 16536 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 53.0, "step": 16536 }, { "epoch": 54.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.3463309407234192, "eval_runtime": 8.8416, "eval_samples_per_second": 31.329, "eval_steps_per_second": 3.959, "step": 16848 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 54.0, "step": 16848 }, { "epoch": 54.49, "learning_rate": 0.0002756410256410257, "loss": 0.3145, "step": 17000 }, { "epoch": 55.0, "eval_accuracy": 0.7328519855595668, "eval_loss": 0.34912317991256714, "eval_runtime": 8.8368, "eval_samples_per_second": 31.346, "eval_steps_per_second": 3.961, "step": 17160 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 55.0, "step": 17160 }, { "epoch": 56.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.3599321246147156, "eval_runtime": 8.8319, "eval_samples_per_second": 31.363, "eval_steps_per_second": 3.963, "step": 17472 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 56.0, "step": 17472 }, { "epoch": 56.09, "learning_rate": 0.00019551282051282054, "loss": 0.3151, "step": 17500 }, { "epoch": 57.0, "eval_accuracy": 0.7292418772563177, "eval_loss": 0.3457310199737549, "eval_runtime": 8.9008, "eval_samples_per_second": 31.121, "eval_steps_per_second": 3.932, "step": 17784 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 57.0, "step": 17784 }, { "epoch": 57.69, "learning_rate": 0.0001153846153846154, "loss": 0.3103, "step": 18000 }, { "epoch": 58.0, "eval_accuracy": 0.7220216606498195, "eval_loss": 0.34887006878852844, "eval_runtime": 8.8994, "eval_samples_per_second": 31.126, "eval_steps_per_second": 3.933, "step": 18096 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 58.0, "step": 18096 }, { "epoch": 59.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.348056823015213, "eval_runtime": 8.9031, "eval_samples_per_second": 31.113, "eval_steps_per_second": 3.931, "step": 18408 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 59.0, "step": 18408 }, { "epoch": 59.29, "learning_rate": 3.5256410256410254e-05, "loss": 0.314, "step": 18500 }, { "epoch": 60.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.35035568475723267, "eval_runtime": 8.8809, "eval_samples_per_second": 31.191, "eval_steps_per_second": 3.941, "step": 18720 }, { "best_epoch": 48, "best_eval_accuracy": 0.7364620938628159, "epoch": 60.0, "step": 18720 }, { "epoch": 60.0, "step": 18720, "total_flos": 6.96152728406016e+16, "train_loss": 0.3594475073692126, "train_runtime": 8160.6004, "train_samples_per_second": 18.307, "train_steps_per_second": 2.294 } ], "max_steps": 18720, "num_train_epochs": 60, "total_flos": 6.96152728406016e+16, "trial_name": null, "trial_params": null }