{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 18720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.15293638408184052, "eval_runtime": 4.3856, "eval_samples_per_second": 63.161, "eval_steps_per_second": 7.981, "step": 312 }, { "best_epoch": 0, "best_eval_accuracy": 0.4729241877256318, "epoch": 1.0, "step": 312 }, { "epoch": 1.6, "learning_rate": 0.004866452991452991, "loss": 0.2201, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.10219389200210571, "eval_runtime": 4.4747, "eval_samples_per_second": 61.904, "eval_steps_per_second": 7.822, "step": 624 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 2.0, "step": 624 }, { "epoch": 3.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.26191186904907227, "eval_runtime": 4.4885, "eval_samples_per_second": 61.714, "eval_steps_per_second": 7.798, "step": 936 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 3.0, "step": 936 }, { "epoch": 3.21, "learning_rate": 0.004732905982905983, "loss": 0.1563, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.51985559566787, "eval_loss": 0.07380817085504532, "eval_runtime": 4.4993, "eval_samples_per_second": 61.565, "eval_steps_per_second": 7.779, "step": 1248 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 4.0, "step": 1248 }, { "epoch": 4.81, "learning_rate": 0.004599358974358974, "loss": 0.0889, "step": 1500 }, { "epoch": 5.0, "eval_accuracy": 0.4981949458483754, "eval_loss": 0.07089700549840927, "eval_runtime": 4.4954, "eval_samples_per_second": 61.619, "eval_steps_per_second": 7.786, "step": 1560 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 5.0, "step": 1560 }, { "epoch": 6.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07577553391456604, "eval_runtime": 4.4933, "eval_samples_per_second": 61.647, "eval_steps_per_second": 7.789, "step": 1872 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 6.0, "step": 1872 }, { "epoch": 6.41, "learning_rate": 0.004465811965811966, "loss": 0.0808, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07322199642658234, "eval_runtime": 4.4858, "eval_samples_per_second": 61.751, "eval_steps_per_second": 7.802, "step": 2184 }, { "best_epoch": 1, "best_eval_accuracy": 0.5270758122743683, "epoch": 7.0, "step": 2184 }, { "epoch": 8.0, "eval_accuracy": 0.5595667870036101, "eval_loss": 0.07163483649492264, "eval_runtime": 4.4829, "eval_samples_per_second": 61.79, "eval_steps_per_second": 7.807, "step": 2496 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 8.0, "step": 2496 }, { "epoch": 8.01, "learning_rate": 0.004332264957264957, "loss": 0.0802, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.07071372121572495, "eval_runtime": 4.4841, "eval_samples_per_second": 61.774, "eval_steps_per_second": 7.805, "step": 2808 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 9.0, "step": 2808 }, { "epoch": 9.62, "learning_rate": 0.004198717948717949, "loss": 0.0819, "step": 3000 }, { "epoch": 10.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07121473550796509, "eval_runtime": 4.4844, "eval_samples_per_second": 61.77, "eval_steps_per_second": 7.805, "step": 3120 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 10.0, "step": 3120 }, { "epoch": 11.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.07059402763843536, "eval_runtime": 4.4875, "eval_samples_per_second": 61.727, "eval_steps_per_second": 7.799, "step": 3432 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 11.0, "step": 3432 }, { "epoch": 11.22, "learning_rate": 0.00406517094017094, "loss": 0.0818, "step": 3500 }, { "epoch": 12.0, "eval_accuracy": 0.4620938628158845, "eval_loss": 0.07027973979711533, "eval_runtime": 4.4847, "eval_samples_per_second": 61.766, "eval_steps_per_second": 7.804, "step": 3744 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 12.0, "step": 3744 }, { "epoch": 12.82, "learning_rate": 0.003931623931623931, "loss": 0.08, "step": 4000 }, { "epoch": 13.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07372691482305527, "eval_runtime": 4.4841, "eval_samples_per_second": 61.774, "eval_steps_per_second": 7.805, "step": 4056 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 13.0, "step": 4056 }, { "epoch": 14.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.07121168822050095, "eval_runtime": 4.4841, "eval_samples_per_second": 61.774, "eval_steps_per_second": 7.805, "step": 4368 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 14.0, "step": 4368 }, { "epoch": 14.42, "learning_rate": 0.003798076923076923, "loss": 0.0803, "step": 4500 }, { "epoch": 15.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07377327233552933, "eval_runtime": 4.4908, "eval_samples_per_second": 61.682, "eval_steps_per_second": 7.794, "step": 4680 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 15.0, "step": 4680 }, { "epoch": 16.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07084672898054123, "eval_runtime": 4.4838, "eval_samples_per_second": 61.778, "eval_steps_per_second": 7.806, "step": 4992 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 16.0, "step": 4992 }, { "epoch": 16.03, "learning_rate": 0.003664529914529914, "loss": 0.0807, "step": 5000 }, { "epoch": 17.0, "eval_accuracy": 0.5487364620938628, "eval_loss": 0.0708812028169632, "eval_runtime": 4.485, "eval_samples_per_second": 61.761, "eval_steps_per_second": 7.804, "step": 5304 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 17.0, "step": 5304 }, { "epoch": 17.63, "learning_rate": 0.003530982905982906, "loss": 0.082, "step": 5500 }, { "epoch": 18.0, "eval_accuracy": 0.5523465703971119, "eval_loss": 0.0720214918255806, "eval_runtime": 4.4868, "eval_samples_per_second": 61.736, "eval_steps_per_second": 7.801, "step": 5616 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 18.0, "step": 5616 }, { "epoch": 19.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07122407853603363, "eval_runtime": 4.49, "eval_samples_per_second": 61.693, "eval_steps_per_second": 7.795, "step": 5928 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 19.0, "step": 5928 }, { "epoch": 19.23, "learning_rate": 0.0033974358974358976, "loss": 0.0806, "step": 6000 }, { "epoch": 20.0, "eval_accuracy": 0.5090252707581228, "eval_loss": 0.07032328844070435, "eval_runtime": 4.4893, "eval_samples_per_second": 61.703, "eval_steps_per_second": 7.796, "step": 6240 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 20.0, "step": 6240 }, { "epoch": 20.83, "learning_rate": 0.003263888888888889, "loss": 0.0801, "step": 6500 }, { "epoch": 21.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07104188203811646, "eval_runtime": 4.488, "eval_samples_per_second": 61.72, "eval_steps_per_second": 7.799, "step": 6552 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 21.0, "step": 6552 }, { "epoch": 22.0, "eval_accuracy": 0.48736462093862815, "eval_loss": 0.07005982100963593, "eval_runtime": 4.4962, "eval_samples_per_second": 61.608, "eval_steps_per_second": 7.784, "step": 6864 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 22.0, "step": 6864 }, { "epoch": 22.44, "learning_rate": 0.0031303418803418806, "loss": 0.0798, "step": 7000 }, { "epoch": 23.0, "eval_accuracy": 0.48736462093862815, "eval_loss": 0.07032880932092667, "eval_runtime": 4.4887, "eval_samples_per_second": 61.711, "eval_steps_per_second": 7.797, "step": 7176 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 23.0, "step": 7176 }, { "epoch": 24.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.07051917910575867, "eval_runtime": 4.4932, "eval_samples_per_second": 61.649, "eval_steps_per_second": 7.79, "step": 7488 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 24.0, "step": 7488 }, { "epoch": 24.04, "learning_rate": 0.0029967948717948716, "loss": 0.0854, "step": 7500 }, { "epoch": 25.0, "eval_accuracy": 0.5523465703971119, "eval_loss": 0.07042864710092545, "eval_runtime": 4.4874, "eval_samples_per_second": 61.728, "eval_steps_per_second": 7.8, "step": 7800 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 25.0, "step": 7800 }, { "epoch": 25.64, "learning_rate": 0.002863247863247863, "loss": 0.0793, "step": 8000 }, { "epoch": 26.0, "eval_accuracy": 0.49097472924187724, "eval_loss": 0.07020144909620285, "eval_runtime": 4.4906, "eval_samples_per_second": 61.684, "eval_steps_per_second": 7.794, "step": 8112 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 26.0, "step": 8112 }, { "epoch": 27.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07208646833896637, "eval_runtime": 4.4856, "eval_samples_per_second": 61.753, "eval_steps_per_second": 7.803, "step": 8424 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 27.0, "step": 8424 }, { "epoch": 27.24, "learning_rate": 0.0027297008547008546, "loss": 0.0792, "step": 8500 }, { "epoch": 28.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07201223820447922, "eval_runtime": 4.4887, "eval_samples_per_second": 61.711, "eval_steps_per_second": 7.797, "step": 8736 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 28.0, "step": 8736 }, { "epoch": 28.85, "learning_rate": 0.0025961538461538466, "loss": 0.0794, "step": 9000 }, { "epoch": 29.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.0713350772857666, "eval_runtime": 4.4877, "eval_samples_per_second": 61.724, "eval_steps_per_second": 7.799, "step": 9048 }, { "best_epoch": 7, "best_eval_accuracy": 0.5595667870036101, "epoch": 29.0, "step": 9048 }, { "epoch": 30.0, "eval_accuracy": 0.5631768953068592, "eval_loss": 0.07010631263256073, "eval_runtime": 4.4831, "eval_samples_per_second": 61.787, "eval_steps_per_second": 7.807, "step": 9360 }, { "best_epoch": 29, "best_eval_accuracy": 0.5631768953068592, "epoch": 30.0, "step": 9360 }, { "epoch": 30.45, "learning_rate": 0.0024626068376068376, "loss": 0.0785, "step": 9500 }, { "epoch": 31.0, "eval_accuracy": 0.6101083032490975, "eval_loss": 0.07099565863609314, "eval_runtime": 4.4893, "eval_samples_per_second": 61.703, "eval_steps_per_second": 7.796, "step": 9672 }, { "best_epoch": 30, "best_eval_accuracy": 0.6101083032490975, "epoch": 31.0, "step": 9672 }, { "epoch": 32.0, "eval_accuracy": 0.48014440433212996, "eval_loss": 0.07032310217618942, "eval_runtime": 4.4879, "eval_samples_per_second": 61.721, "eval_steps_per_second": 7.799, "step": 9984 }, { "best_epoch": 30, "best_eval_accuracy": 0.6101083032490975, "epoch": 32.0, "step": 9984 }, { "epoch": 32.05, "learning_rate": 0.002329059829059829, "loss": 0.0786, "step": 10000 }, { "epoch": 33.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07281779497861862, "eval_runtime": 4.4847, "eval_samples_per_second": 61.766, "eval_steps_per_second": 7.804, "step": 10296 }, { "best_epoch": 30, "best_eval_accuracy": 0.6101083032490975, "epoch": 33.0, "step": 10296 }, { "epoch": 33.65, "learning_rate": 0.0021955128205128206, "loss": 0.0791, "step": 10500 }, { "epoch": 34.0, "eval_accuracy": 0.5054151624548736, "eval_loss": 0.07028384506702423, "eval_runtime": 4.4824, "eval_samples_per_second": 61.798, "eval_steps_per_second": 7.808, "step": 10608 }, { "best_epoch": 30, "best_eval_accuracy": 0.6101083032490975, "epoch": 34.0, "step": 10608 }, { "epoch": 35.0, "eval_accuracy": 0.6173285198555957, "eval_loss": 0.07163991034030914, "eval_runtime": 4.4867, "eval_samples_per_second": 61.738, "eval_steps_per_second": 7.801, "step": 10920 }, { "best_epoch": 34, "best_eval_accuracy": 0.6173285198555957, "epoch": 35.0, "step": 10920 }, { "epoch": 35.26, "learning_rate": 0.002061965811965812, "loss": 0.0789, "step": 11000 }, { "epoch": 36.0, "eval_accuracy": 0.47653429602888087, "eval_loss": 0.07081528007984161, "eval_runtime": 4.4866, "eval_samples_per_second": 61.739, "eval_steps_per_second": 7.801, "step": 11232 }, { "best_epoch": 34, "best_eval_accuracy": 0.6173285198555957, "epoch": 36.0, "step": 11232 }, { "epoch": 36.86, "learning_rate": 0.0019284188034188036, "loss": 0.0786, "step": 11500 }, { "epoch": 37.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07695722579956055, "eval_runtime": 4.4827, "eval_samples_per_second": 61.793, "eval_steps_per_second": 7.808, "step": 11544 }, { "best_epoch": 34, "best_eval_accuracy": 0.6173285198555957, "epoch": 37.0, "step": 11544 }, { "epoch": 38.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.07178593426942825, "eval_runtime": 4.4814, "eval_samples_per_second": 61.811, "eval_steps_per_second": 7.81, "step": 11856 }, { "best_epoch": 34, "best_eval_accuracy": 0.6173285198555957, "epoch": 38.0, "step": 11856 }, { "epoch": 38.46, "learning_rate": 0.0017948717948717949, "loss": 0.0784, "step": 12000 }, { "epoch": 39.0, "eval_accuracy": 0.48375451263537905, "eval_loss": 0.06998021900653839, "eval_runtime": 4.4856, "eval_samples_per_second": 61.753, "eval_steps_per_second": 7.803, "step": 12168 }, { "best_epoch": 34, "best_eval_accuracy": 0.6173285198555957, "epoch": 39.0, "step": 12168 }, { "epoch": 40.0, "eval_accuracy": 0.5234657039711191, "eval_loss": 0.06990227848291397, "eval_runtime": 4.4914, "eval_samples_per_second": 61.673, "eval_steps_per_second": 7.793, "step": 12480 }, { "best_epoch": 34, "best_eval_accuracy": 0.6173285198555957, "epoch": 40.0, "step": 12480 }, { "epoch": 40.06, "learning_rate": 0.0016613247863247864, "loss": 0.0775, "step": 12500 }, { "epoch": 41.0, "eval_accuracy": 0.6137184115523465, "eval_loss": 0.06979037076234818, "eval_runtime": 4.4875, "eval_samples_per_second": 61.727, "eval_steps_per_second": 7.799, "step": 12792 }, { "best_epoch": 34, "best_eval_accuracy": 0.6173285198555957, "epoch": 41.0, "step": 12792 }, { "epoch": 41.67, "learning_rate": 0.0015277777777777779, "loss": 0.0779, "step": 13000 }, { "epoch": 42.0, "eval_accuracy": 0.51985559566787, "eval_loss": 0.06967325508594513, "eval_runtime": 4.4905, "eval_samples_per_second": 61.686, "eval_steps_per_second": 7.794, "step": 13104 }, { "best_epoch": 34, "best_eval_accuracy": 0.6173285198555957, "epoch": 42.0, "step": 13104 }, { "epoch": 43.0, "eval_accuracy": 0.6534296028880866, "eval_loss": 0.06980358064174652, "eval_runtime": 4.4879, "eval_samples_per_second": 61.721, "eval_steps_per_second": 7.799, "step": 13416 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 43.0, "step": 13416 }, { "epoch": 43.27, "learning_rate": 0.0013942307692307694, "loss": 0.0777, "step": 13500 }, { "epoch": 44.0, "eval_accuracy": 0.5848375451263538, "eval_loss": 0.06972871720790863, "eval_runtime": 4.5198, "eval_samples_per_second": 61.286, "eval_steps_per_second": 7.744, "step": 13728 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 44.0, "step": 13728 }, { "epoch": 44.87, "learning_rate": 0.0012606837606837606, "loss": 0.0776, "step": 14000 }, { "epoch": 45.0, "eval_accuracy": 0.6425992779783394, "eval_loss": 0.06987190246582031, "eval_runtime": 4.501, "eval_samples_per_second": 61.542, "eval_steps_per_second": 7.776, "step": 14040 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 45.0, "step": 14040 }, { "epoch": 46.0, "eval_accuracy": 0.6028880866425993, "eval_loss": 0.06966899335384369, "eval_runtime": 4.4681, "eval_samples_per_second": 61.995, "eval_steps_per_second": 7.833, "step": 14352 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 46.0, "step": 14352 }, { "epoch": 46.47, "learning_rate": 0.0011271367521367521, "loss": 0.0769, "step": 14500 }, { "epoch": 47.0, "eval_accuracy": 0.48736462093862815, "eval_loss": 0.07046782970428467, "eval_runtime": 4.4607, "eval_samples_per_second": 62.098, "eval_steps_per_second": 7.846, "step": 14664 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 47.0, "step": 14664 }, { "epoch": 48.0, "eval_accuracy": 0.6209386281588448, "eval_loss": 0.06947591155767441, "eval_runtime": 4.4615, "eval_samples_per_second": 62.087, "eval_steps_per_second": 7.845, "step": 14976 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 48.0, "step": 14976 }, { "epoch": 48.08, "learning_rate": 0.0009935897435897436, "loss": 0.077, "step": 15000 }, { "epoch": 49.0, "eval_accuracy": 0.5667870036101083, "eval_loss": 0.06946070492267609, "eval_runtime": 4.4574, "eval_samples_per_second": 62.144, "eval_steps_per_second": 7.852, "step": 15288 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 49.0, "step": 15288 }, { "epoch": 49.68, "learning_rate": 0.0008600427350427351, "loss": 0.077, "step": 15500 }, { "epoch": 50.0, "eval_accuracy": 0.5018050541516246, "eval_loss": 0.06961851567029953, "eval_runtime": 4.4447, "eval_samples_per_second": 62.321, "eval_steps_per_second": 7.874, "step": 15600 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 50.0, "step": 15600 }, { "epoch": 51.0, "eval_accuracy": 0.49458483754512633, "eval_loss": 0.07000398635864258, "eval_runtime": 4.4397, "eval_samples_per_second": 62.391, "eval_steps_per_second": 7.883, "step": 15912 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 51.0, "step": 15912 }, { "epoch": 51.28, "learning_rate": 0.0007264957264957266, "loss": 0.0774, "step": 16000 }, { "epoch": 52.0, "eval_accuracy": 0.4981949458483754, "eval_loss": 0.0700514167547226, "eval_runtime": 4.4359, "eval_samples_per_second": 62.445, "eval_steps_per_second": 7.89, "step": 16224 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 52.0, "step": 16224 }, { "epoch": 52.88, "learning_rate": 0.000592948717948718, "loss": 0.0767, "step": 16500 }, { "epoch": 53.0, "eval_accuracy": 0.5812274368231047, "eval_loss": 0.06938585638999939, "eval_runtime": 4.4418, "eval_samples_per_second": 62.363, "eval_steps_per_second": 7.88, "step": 16536 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 53.0, "step": 16536 }, { "epoch": 54.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 0.07005595415830612, "eval_runtime": 4.4342, "eval_samples_per_second": 62.469, "eval_steps_per_second": 7.893, "step": 16848 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 54.0, "step": 16848 }, { "epoch": 54.49, "learning_rate": 0.00045940170940170943, "loss": 0.0761, "step": 17000 }, { "epoch": 55.0, "eval_accuracy": 0.48736462093862815, "eval_loss": 0.07062944769859314, "eval_runtime": 4.4382, "eval_samples_per_second": 62.412, "eval_steps_per_second": 7.886, "step": 17160 }, { "best_epoch": 42, "best_eval_accuracy": 0.6534296028880866, "epoch": 55.0, "step": 17160 }, { "epoch": 56.0, "eval_accuracy": 0.6787003610108303, "eval_loss": 0.06952185928821564, "eval_runtime": 4.4379, "eval_samples_per_second": 62.417, "eval_steps_per_second": 7.887, "step": 17472 }, { "best_epoch": 55, "best_eval_accuracy": 0.6787003610108303, "epoch": 56.0, "step": 17472 }, { "epoch": 56.09, "learning_rate": 0.00032585470085470087, "loss": 0.0762, "step": 17500 }, { "epoch": 57.0, "eval_accuracy": 0.6028880866425993, "eval_loss": 0.0693010687828064, "eval_runtime": 4.4361, "eval_samples_per_second": 62.442, "eval_steps_per_second": 7.89, "step": 17784 }, { "best_epoch": 55, "best_eval_accuracy": 0.6787003610108303, "epoch": 57.0, "step": 17784 }, { "epoch": 57.69, "learning_rate": 0.00019230769230769233, "loss": 0.0763, "step": 18000 }, { "epoch": 58.0, "eval_accuracy": 0.51985559566787, "eval_loss": 0.06962151825428009, "eval_runtime": 4.4368, "eval_samples_per_second": 62.432, "eval_steps_per_second": 7.889, "step": 18096 }, { "best_epoch": 55, "best_eval_accuracy": 0.6787003610108303, "epoch": 58.0, "step": 18096 }, { "epoch": 59.0, "eval_accuracy": 0.5740072202166066, "eval_loss": 0.06931070238351822, "eval_runtime": 4.4366, "eval_samples_per_second": 62.435, "eval_steps_per_second": 7.889, "step": 18408 }, { "best_epoch": 55, "best_eval_accuracy": 0.6787003610108303, "epoch": 59.0, "step": 18408 }, { "epoch": 59.29, "learning_rate": 5.876068376068376e-05, "loss": 0.0763, "step": 18500 }, { "epoch": 60.0, "eval_accuracy": 0.5812274368231047, "eval_loss": 0.06930926442146301, "eval_runtime": 4.4417, "eval_samples_per_second": 62.364, "eval_steps_per_second": 7.88, "step": 18720 }, { "best_epoch": 55, "best_eval_accuracy": 0.6787003610108303, "epoch": 60.0, "step": 18720 }, { "epoch": 60.0, "step": 18720, "total_flos": 6.96152728406016e+16, "train_loss": 0.08502325043719039, "train_runtime": 4069.7527, "train_samples_per_second": 36.71, "train_steps_per_second": 4.6 } ], "max_steps": 18720, "num_train_epochs": 60, "total_flos": 6.96152728406016e+16, "trial_name": null, "trial_params": null }