{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 18720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.07883936911821365, "eval_runtime": 4.3827, "eval_samples_per_second": 63.203, "eval_steps_per_second": 7.986, "step": 312 }, { "best_epoch": 0, "best_eval_accuracy": 0.5270758122743683, "epoch": 1.0, "step": 312 }, { "epoch": 1.6, "learning_rate": 0.003893162393162393, "loss": 0.1715, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.2058744877576828, "eval_runtime": 4.4761, "eval_samples_per_second": 61.884, "eval_steps_per_second": 7.819, "step": 624 }, { "best_epoch": 0, "best_eval_accuracy": 0.5270758122743683, "epoch": 2.0, "step": 624 }, { "epoch": 3.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.11342689394950867, "eval_runtime": 4.4913, "eval_samples_per_second": 61.675, "eval_steps_per_second": 7.793, "step": 936 }, { "best_epoch": 0, "best_eval_accuracy": 0.5270758122743683, "epoch": 3.0, "step": 936 }, { "epoch": 3.21, "learning_rate": 0.0037863247863247863, "loss": 0.1222, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5342960288808665, "eval_loss": 0.08309175819158554, "eval_runtime": 4.5034, "eval_samples_per_second": 61.509, "eval_steps_per_second": 7.772, "step": 1248 }, { "best_epoch": 3, "best_eval_accuracy": 0.5342960288808665, "epoch": 4.0, "step": 1248 }, { "epoch": 4.81, "learning_rate": 0.0036794871794871794, "loss": 0.1187, "step": 1500 }, { "epoch": 5.0, "eval_accuracy": 0.5703971119133574, "eval_loss": 0.07322770357131958, "eval_runtime": 4.5054, "eval_samples_per_second": 61.482, "eval_steps_per_second": 7.769, "step": 1560 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 5.0, "step": 1560 }, { "epoch": 6.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07959606498479843, "eval_runtime": 4.4974, "eval_samples_per_second": 61.591, "eval_steps_per_second": 7.782, "step": 1872 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 6.0, "step": 1872 }, { "epoch": 6.41, "learning_rate": 0.003572649572649573, "loss": 0.0991, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.0745367482304573, "eval_runtime": 4.4876, "eval_samples_per_second": 61.725, "eval_steps_per_second": 7.799, "step": 2184 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 7.0, "step": 2184 }, { "epoch": 8.0, "eval_accuracy": 0.4981949458483754, "eval_loss": 0.0715838074684143, "eval_runtime": 4.4817, "eval_samples_per_second": 61.807, "eval_steps_per_second": 7.81, "step": 2496 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 8.0, "step": 2496 }, { "epoch": 8.01, "learning_rate": 0.003465811965811966, "loss": 0.0819, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.49097472924187724, "eval_loss": 0.07054454833269119, "eval_runtime": 4.4827, "eval_samples_per_second": 61.793, "eval_steps_per_second": 7.808, "step": 2808 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 9.0, "step": 2808 }, { "epoch": 9.62, "learning_rate": 0.003358974358974359, "loss": 0.0807, "step": 3000 }, { "epoch": 10.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.07019203901290894, "eval_runtime": 4.4883, "eval_samples_per_second": 61.716, "eval_steps_per_second": 7.798, "step": 3120 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 10.0, "step": 3120 }, { "epoch": 11.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07131501287221909, "eval_runtime": 4.4864, "eval_samples_per_second": 61.742, "eval_steps_per_second": 7.801, "step": 3432 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 11.0, "step": 3432 }, { "epoch": 11.22, "learning_rate": 0.0032521367521367523, "loss": 0.0802, "step": 3500 }, { "epoch": 12.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07025401294231415, "eval_runtime": 4.4874, "eval_samples_per_second": 61.728, "eval_steps_per_second": 7.8, "step": 3744 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 12.0, "step": 3744 }, { "epoch": 12.82, "learning_rate": 0.0031452991452991454, "loss": 0.0795, "step": 4000 }, { "epoch": 13.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07835417985916138, "eval_runtime": 4.4839, "eval_samples_per_second": 61.776, "eval_steps_per_second": 7.806, "step": 4056 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 13.0, "step": 4056 }, { "epoch": 14.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.07064678519964218, "eval_runtime": 4.4919, "eval_samples_per_second": 61.666, "eval_steps_per_second": 7.792, "step": 4368 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 14.0, "step": 4368 }, { "epoch": 14.42, "learning_rate": 0.0030384615384615385, "loss": 0.0794, "step": 4500 }, { "epoch": 15.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07303398102521896, "eval_runtime": 4.4936, "eval_samples_per_second": 61.643, "eval_steps_per_second": 7.789, "step": 4680 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 15.0, "step": 4680 }, { "epoch": 16.0, "eval_accuracy": 0.48014440433212996, "eval_loss": 0.0706457644701004, "eval_runtime": 4.489, "eval_samples_per_second": 61.706, "eval_steps_per_second": 7.797, "step": 4992 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 16.0, "step": 4992 }, { "epoch": 16.03, "learning_rate": 0.0029316239316239316, "loss": 0.0806, "step": 5000 }, { "epoch": 17.0, "eval_accuracy": 0.5595667870036101, "eval_loss": 0.07106263935565948, "eval_runtime": 4.4937, "eval_samples_per_second": 61.642, "eval_steps_per_second": 7.789, "step": 5304 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 17.0, "step": 5304 }, { "epoch": 17.63, "learning_rate": 0.002824786324786325, "loss": 0.0811, "step": 5500 }, { "epoch": 18.0, "eval_accuracy": 0.4693140794223827, "eval_loss": 0.0703640729188919, "eval_runtime": 4.4948, "eval_samples_per_second": 61.626, "eval_steps_per_second": 7.787, "step": 5616 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 18.0, "step": 5616 }, { "epoch": 19.0, "eval_accuracy": 0.48736462093862815, "eval_loss": 0.07006794959306717, "eval_runtime": 4.4898, "eval_samples_per_second": 61.695, "eval_steps_per_second": 7.795, "step": 5928 }, { "best_epoch": 4, "best_eval_accuracy": 0.5703971119133574, "epoch": 19.0, "step": 5928 }, { "epoch": 19.23, "learning_rate": 0.0027179487179487182, "loss": 0.0798, "step": 6000 }, { "epoch": 20.0, "eval_accuracy": 0.6101083032490975, "eval_loss": 0.07190611958503723, "eval_runtime": 4.493, "eval_samples_per_second": 61.651, "eval_steps_per_second": 7.79, "step": 6240 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 20.0, "step": 6240 }, { "epoch": 20.83, "learning_rate": 0.0026111111111111114, "loss": 0.0793, "step": 6500 }, { "epoch": 21.0, "eval_accuracy": 0.4693140794223827, "eval_loss": 0.07052430510520935, "eval_runtime": 4.4908, "eval_samples_per_second": 61.682, "eval_steps_per_second": 7.794, "step": 6552 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 21.0, "step": 6552 }, { "epoch": 22.0, "eval_accuracy": 0.5884476534296029, "eval_loss": 0.07068859040737152, "eval_runtime": 4.4944, "eval_samples_per_second": 61.633, "eval_steps_per_second": 7.788, "step": 6864 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 22.0, "step": 6864 }, { "epoch": 22.44, "learning_rate": 0.0025042735042735045, "loss": 0.0795, "step": 7000 }, { "epoch": 23.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07123356312513351, "eval_runtime": 4.4895, "eval_samples_per_second": 61.7, "eval_steps_per_second": 7.796, "step": 7176 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 23.0, "step": 7176 }, { "epoch": 24.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07049828767776489, "eval_runtime": 4.4902, "eval_samples_per_second": 61.69, "eval_steps_per_second": 7.795, "step": 7488 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 24.0, "step": 7488 }, { "epoch": 24.04, "learning_rate": 0.0023974358974358976, "loss": 0.0796, "step": 7500 }, { "epoch": 25.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.07892809063196182, "eval_runtime": 4.4907, "eval_samples_per_second": 61.682, "eval_steps_per_second": 7.794, "step": 7800 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 25.0, "step": 7800 }, { "epoch": 25.64, "learning_rate": 0.0022905982905982907, "loss": 0.0796, "step": 8000 }, { "epoch": 26.0, "eval_accuracy": 0.48014440433212996, "eval_loss": 0.07052510976791382, "eval_runtime": 4.4886, "eval_samples_per_second": 61.712, "eval_steps_per_second": 7.798, "step": 8112 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 26.0, "step": 8112 }, { "epoch": 27.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.07029584050178528, "eval_runtime": 4.4833, "eval_samples_per_second": 61.785, "eval_steps_per_second": 7.807, "step": 8424 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 27.0, "step": 8424 }, { "epoch": 27.24, "learning_rate": 0.002183760683760684, "loss": 0.0787, "step": 8500 }, { "epoch": 28.0, "eval_accuracy": 0.48375451263537905, "eval_loss": 0.07033976167440414, "eval_runtime": 4.4877, "eval_samples_per_second": 61.725, "eval_steps_per_second": 7.799, "step": 8736 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 28.0, "step": 8736 }, { "epoch": 28.85, "learning_rate": 0.0020769230769230773, "loss": 0.079, "step": 9000 }, { "epoch": 29.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07161471992731094, "eval_runtime": 4.4853, "eval_samples_per_second": 61.758, "eval_steps_per_second": 7.803, "step": 9048 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 29.0, "step": 9048 }, { "epoch": 30.0, "eval_accuracy": 0.5703971119133574, "eval_loss": 0.07392293959856033, "eval_runtime": 4.4851, "eval_samples_per_second": 61.76, "eval_steps_per_second": 7.804, "step": 9360 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 30.0, "step": 9360 }, { "epoch": 30.45, "learning_rate": 0.0019700854700854704, "loss": 0.0788, "step": 9500 }, { "epoch": 31.0, "eval_accuracy": 0.5631768953068592, "eval_loss": 0.07494190335273743, "eval_runtime": 4.4843, "eval_samples_per_second": 61.771, "eval_steps_per_second": 7.805, "step": 9672 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 31.0, "step": 9672 }, { "epoch": 32.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.0710810124874115, "eval_runtime": 4.4859, "eval_samples_per_second": 61.749, "eval_steps_per_second": 7.802, "step": 9984 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 32.0, "step": 9984 }, { "epoch": 32.05, "learning_rate": 0.0018632478632478633, "loss": 0.0789, "step": 10000 }, { "epoch": 33.0, "eval_accuracy": 0.48375451263537905, "eval_loss": 0.07050064951181412, "eval_runtime": 4.4845, "eval_samples_per_second": 61.768, "eval_steps_per_second": 7.805, "step": 10296 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 33.0, "step": 10296 }, { "epoch": 33.65, "learning_rate": 0.0017564102564102564, "loss": 0.0786, "step": 10500 }, { "epoch": 34.0, "eval_accuracy": 0.51985559566787, "eval_loss": 0.06995219737291336, "eval_runtime": 4.476, "eval_samples_per_second": 61.886, "eval_steps_per_second": 7.82, "step": 10608 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 34.0, "step": 10608 }, { "epoch": 35.0, "eval_accuracy": 0.48375451263537905, "eval_loss": 0.06990911066532135, "eval_runtime": 4.4873, "eval_samples_per_second": 61.73, "eval_steps_per_second": 7.8, "step": 10920 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 35.0, "step": 10920 }, { "epoch": 35.26, "learning_rate": 0.0016495726495726495, "loss": 0.0785, "step": 11000 }, { "epoch": 36.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07145760208368301, "eval_runtime": 4.4889, "eval_samples_per_second": 61.707, "eval_steps_per_second": 7.797, "step": 11232 }, { "best_epoch": 19, "best_eval_accuracy": 0.6101083032490975, "epoch": 36.0, "step": 11232 }, { "epoch": 36.86, "learning_rate": 0.0015427350427350429, "loss": 0.0784, "step": 11500 }, { "epoch": 37.0, "eval_accuracy": 0.6353790613718412, "eval_loss": 0.07159464806318283, "eval_runtime": 4.4862, "eval_samples_per_second": 61.745, "eval_steps_per_second": 7.802, "step": 11544 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 37.0, "step": 11544 }, { "epoch": 38.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07186750322580338, "eval_runtime": 4.4867, "eval_samples_per_second": 61.737, "eval_steps_per_second": 7.801, "step": 11856 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 38.0, "step": 11856 }, { "epoch": 38.46, "learning_rate": 0.001435897435897436, "loss": 0.0781, "step": 12000 }, { "epoch": 39.0, "eval_accuracy": 0.5487364620938628, "eval_loss": 0.07000464200973511, "eval_runtime": 4.4864, "eval_samples_per_second": 61.742, "eval_steps_per_second": 7.801, "step": 12168 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 39.0, "step": 12168 }, { "epoch": 40.0, "eval_accuracy": 0.5848375451263538, "eval_loss": 0.06997690349817276, "eval_runtime": 4.4896, "eval_samples_per_second": 61.699, "eval_steps_per_second": 7.796, "step": 12480 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 40.0, "step": 12480 }, { "epoch": 40.06, "learning_rate": 0.001329059829059829, "loss": 0.0778, "step": 12500 }, { "epoch": 41.0, "eval_accuracy": 0.6173285198555957, "eval_loss": 0.07039925456047058, "eval_runtime": 4.4877, "eval_samples_per_second": 61.725, "eval_steps_per_second": 7.799, "step": 12792 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 41.0, "step": 12792 }, { "epoch": 41.67, "learning_rate": 0.0012222222222222224, "loss": 0.0778, "step": 13000 }, { "epoch": 42.0, "eval_accuracy": 0.5848375451263538, "eval_loss": 0.07050343602895737, "eval_runtime": 4.4896, "eval_samples_per_second": 61.699, "eval_steps_per_second": 7.796, "step": 13104 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 42.0, "step": 13104 }, { "epoch": 43.0, "eval_accuracy": 0.6209386281588448, "eval_loss": 0.07049660384654999, "eval_runtime": 4.487, "eval_samples_per_second": 61.734, "eval_steps_per_second": 7.8, "step": 13416 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 43.0, "step": 13416 }, { "epoch": 43.27, "learning_rate": 0.0011153846153846155, "loss": 0.078, "step": 13500 }, { "epoch": 44.0, "eval_accuracy": 0.51985559566787, "eval_loss": 0.07012051343917847, "eval_runtime": 4.4899, "eval_samples_per_second": 61.694, "eval_steps_per_second": 7.795, "step": 13728 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 44.0, "step": 13728 }, { "epoch": 44.87, "learning_rate": 0.0010085470085470086, "loss": 0.0776, "step": 14000 }, { "epoch": 45.0, "eval_accuracy": 0.5956678700361011, "eval_loss": 0.07043617963790894, "eval_runtime": 4.4893, "eval_samples_per_second": 61.702, "eval_steps_per_second": 7.796, "step": 14040 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 45.0, "step": 14040 }, { "epoch": 46.0, "eval_accuracy": 0.5848375451263538, "eval_loss": 0.07017319649457932, "eval_runtime": 4.4885, "eval_samples_per_second": 61.714, "eval_steps_per_second": 7.798, "step": 14352 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 46.0, "step": 14352 }, { "epoch": 46.47, "learning_rate": 0.0009017094017094017, "loss": 0.0772, "step": 14500 }, { "epoch": 47.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.07025384157896042, "eval_runtime": 4.4917, "eval_samples_per_second": 61.669, "eval_steps_per_second": 7.792, "step": 14664 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 47.0, "step": 14664 }, { "epoch": 48.0, "eval_accuracy": 0.5379061371841155, "eval_loss": 0.0697227343916893, "eval_runtime": 4.4921, "eval_samples_per_second": 61.664, "eval_steps_per_second": 7.791, "step": 14976 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 48.0, "step": 14976 }, { "epoch": 48.08, "learning_rate": 0.0007948717948717948, "loss": 0.0773, "step": 15000 }, { "epoch": 49.0, "eval_accuracy": 0.5595667870036101, "eval_loss": 0.06964733451604843, "eval_runtime": 4.5022, "eval_samples_per_second": 61.526, "eval_steps_per_second": 7.774, "step": 15288 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 49.0, "step": 15288 }, { "epoch": 49.68, "learning_rate": 0.0006880341880341881, "loss": 0.0772, "step": 15500 }, { "epoch": 50.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.07015614211559296, "eval_runtime": 4.4706, "eval_samples_per_second": 61.96, "eval_steps_per_second": 7.829, "step": 15600 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 50.0, "step": 15600 }, { "epoch": 51.0, "eval_accuracy": 0.48014440433212996, "eval_loss": 0.07005967199802399, "eval_runtime": 4.4652, "eval_samples_per_second": 62.036, "eval_steps_per_second": 7.838, "step": 15912 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 51.0, "step": 15912 }, { "epoch": 51.28, "learning_rate": 0.0005811965811965813, "loss": 0.0776, "step": 16000 }, { "epoch": 52.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07058187574148178, "eval_runtime": 4.465, "eval_samples_per_second": 62.038, "eval_steps_per_second": 7.839, "step": 16224 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 52.0, "step": 16224 }, { "epoch": 52.88, "learning_rate": 0.00047435897435897434, "loss": 0.0772, "step": 16500 }, { "epoch": 53.0, "eval_accuracy": 0.5054151624548736, "eval_loss": 0.06982703506946564, "eval_runtime": 4.4547, "eval_samples_per_second": 62.182, "eval_steps_per_second": 7.857, "step": 16536 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 53.0, "step": 16536 }, { "epoch": 54.0, "eval_accuracy": 0.631768953068592, "eval_loss": 0.07056145370006561, "eval_runtime": 4.4549, "eval_samples_per_second": 62.179, "eval_steps_per_second": 7.857, "step": 16848 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 54.0, "step": 16848 }, { "epoch": 54.49, "learning_rate": 0.00036752136752136755, "loss": 0.0766, "step": 17000 }, { "epoch": 55.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.07077504694461823, "eval_runtime": 4.4385, "eval_samples_per_second": 62.409, "eval_steps_per_second": 7.886, "step": 17160 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 55.0, "step": 17160 }, { "epoch": 56.0, "eval_accuracy": 0.6209386281588448, "eval_loss": 0.07004080712795258, "eval_runtime": 4.4393, "eval_samples_per_second": 62.397, "eval_steps_per_second": 7.884, "step": 17472 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 56.0, "step": 17472 }, { "epoch": 56.09, "learning_rate": 0.0002606837606837607, "loss": 0.0766, "step": 17500 }, { "epoch": 57.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.06971540302038193, "eval_runtime": 4.4341, "eval_samples_per_second": 62.47, "eval_steps_per_second": 7.893, "step": 17784 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 57.0, "step": 17784 }, { "epoch": 57.69, "learning_rate": 0.00015384615384615385, "loss": 0.0767, "step": 18000 }, { "epoch": 58.0, "eval_accuracy": 0.48014440433212996, "eval_loss": 0.07002807408571243, "eval_runtime": 4.4375, "eval_samples_per_second": 62.422, "eval_steps_per_second": 7.887, "step": 18096 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 58.0, "step": 18096 }, { "epoch": 59.0, "eval_accuracy": 0.5234657039711191, "eval_loss": 0.06969156861305237, "eval_runtime": 4.4353, "eval_samples_per_second": 62.454, "eval_steps_per_second": 7.891, "step": 18408 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 59.0, "step": 18408 }, { "epoch": 59.29, "learning_rate": 4.700854700854701e-05, "loss": 0.0767, "step": 18500 }, { "epoch": 60.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.06970320641994476, "eval_runtime": 4.4366, "eval_samples_per_second": 62.435, "eval_steps_per_second": 7.889, "step": 18720 }, { "best_epoch": 36, "best_eval_accuracy": 0.6353790613718412, "epoch": 60.0, "step": 18720 }, { "epoch": 60.0, "step": 18720, "total_flos": 6.96152728406016e+16, "train_loss": 0.0838659094949054, "train_runtime": 4072.213, "train_samples_per_second": 36.688, "train_steps_per_second": 4.597 } ], "max_steps": 18720, "num_train_epochs": 60, "total_flos": 6.96152728406016e+16, "trial_name": null, "trial_params": null }