{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.53, "eval_loss": 0.6182658672332764, "eval_runtime": 2.964, "eval_samples_per_second": 33.738, "eval_steps_per_second": 4.386, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.53, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.62, "eval_loss": 0.41887640953063965, "eval_runtime": 3.0102, "eval_samples_per_second": 33.22, "eval_steps_per_second": 4.319, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.62, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.6, "eval_loss": 0.43512362241744995, "eval_runtime": 3.0581, "eval_samples_per_second": 32.7, "eval_steps_per_second": 4.251, "step": 75 }, { "best_epoch": 1, "best_eval_accuracy": 0.62, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.6, "eval_loss": 0.41805171966552734, "eval_runtime": 3.0736, "eval_samples_per_second": 32.536, "eval_steps_per_second": 4.23, "step": 100 }, { "best_epoch": 1, "best_eval_accuracy": 0.62, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.62, "eval_loss": 0.41053706407546997, "eval_runtime": 3.0847, "eval_samples_per_second": 32.418, "eval_steps_per_second": 4.214, "step": 125 }, { "best_epoch": 1, "best_eval_accuracy": 0.62, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.63, "eval_loss": 0.41397789120674133, "eval_runtime": 3.0887, "eval_samples_per_second": 32.376, "eval_steps_per_second": 4.209, "step": 150 }, { "best_epoch": 5, "best_eval_accuracy": 0.63, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.66, "eval_loss": 0.40520820021629333, "eval_runtime": 3.0892, "eval_samples_per_second": 32.371, "eval_steps_per_second": 4.208, "step": 175 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.66, "eval_loss": 0.43216773867607117, "eval_runtime": 3.0856, "eval_samples_per_second": 32.408, "eval_steps_per_second": 4.213, "step": 200 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.41, "eval_loss": 0.4364350438117981, "eval_runtime": 3.0898, "eval_samples_per_second": 32.365, "eval_steps_per_second": 4.207, "step": 225 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.55, "eval_loss": 0.42465832829475403, "eval_runtime": 3.0783, "eval_samples_per_second": 32.485, "eval_steps_per_second": 4.223, "step": 250 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.53, "eval_loss": 0.42610007524490356, "eval_runtime": 3.0769, "eval_samples_per_second": 32.5, "eval_steps_per_second": 4.225, "step": 275 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.6, "eval_loss": 0.41756904125213623, "eval_runtime": 3.0756, "eval_samples_per_second": 32.514, "eval_steps_per_second": 4.227, "step": 300 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.58, "eval_loss": 0.4107968211174011, "eval_runtime": 3.0727, "eval_samples_per_second": 32.544, "eval_steps_per_second": 4.231, "step": 325 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.51, "eval_loss": 0.43049681186676025, "eval_runtime": 3.0737, "eval_samples_per_second": 32.534, "eval_steps_per_second": 4.229, "step": 350 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.61, "eval_loss": 0.40635767579078674, "eval_runtime": 3.0804, "eval_samples_per_second": 32.463, "eval_steps_per_second": 4.22, "step": 375 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.59, "eval_loss": 0.40317821502685547, "eval_runtime": 3.0757, "eval_samples_per_second": 32.513, "eval_steps_per_second": 4.227, "step": 400 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.63, "eval_loss": 0.40984559059143066, "eval_runtime": 3.0757, "eval_samples_per_second": 32.513, "eval_steps_per_second": 4.227, "step": 425 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.61, "eval_loss": 0.41323602199554443, "eval_runtime": 3.0748, "eval_samples_per_second": 32.522, "eval_steps_per_second": 4.228, "step": 450 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.65, "eval_loss": 0.3924804627895355, "eval_runtime": 3.0886, "eval_samples_per_second": 32.377, "eval_steps_per_second": 4.209, "step": 475 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.015, "loss": 0.7171, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.69, "eval_loss": 0.3957255482673645, "eval_runtime": 3.0914, "eval_samples_per_second": 32.348, "eval_steps_per_second": 4.205, "step": 500 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.64, "eval_loss": 0.42917293310165405, "eval_runtime": 3.0689, "eval_samples_per_second": 32.585, "eval_steps_per_second": 4.236, "step": 525 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.63, "eval_loss": 0.40250298380851746, "eval_runtime": 3.0667, "eval_samples_per_second": 32.608, "eval_steps_per_second": 4.239, "step": 550 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.69, "eval_loss": 0.3997068703174591, "eval_runtime": 3.0617, "eval_samples_per_second": 32.662, "eval_steps_per_second": 4.246, "step": 575 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.62, "eval_loss": 0.4115046560764313, "eval_runtime": 3.0625, "eval_samples_per_second": 32.653, "eval_steps_per_second": 4.245, "step": 600 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.67, "eval_loss": 0.40437012910842896, "eval_runtime": 3.062, "eval_samples_per_second": 32.659, "eval_steps_per_second": 4.246, "step": 625 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.69, "eval_loss": 0.409763902425766, "eval_runtime": 3.0623, "eval_samples_per_second": 32.655, "eval_steps_per_second": 4.245, "step": 650 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.65, "eval_loss": 0.40510663390159607, "eval_runtime": 3.0617, "eval_samples_per_second": 32.661, "eval_steps_per_second": 4.246, "step": 675 }, { "best_epoch": 19, "best_eval_accuracy": 0.69, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.72, "eval_loss": 0.42438602447509766, "eval_runtime": 3.0608, "eval_samples_per_second": 32.671, "eval_steps_per_second": 4.247, "step": 700 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.64, "eval_loss": 0.4031755328178406, "eval_runtime": 3.0609, "eval_samples_per_second": 32.67, "eval_steps_per_second": 4.247, "step": 725 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.7, "eval_loss": 0.413577675819397, "eval_runtime": 3.0625, "eval_samples_per_second": 32.653, "eval_steps_per_second": 4.245, "step": 750 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.68, "eval_loss": 0.3992563486099243, "eval_runtime": 3.064, "eval_samples_per_second": 32.637, "eval_steps_per_second": 4.243, "step": 775 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.72, "eval_loss": 0.417043000459671, "eval_runtime": 3.0628, "eval_samples_per_second": 32.65, "eval_steps_per_second": 4.245, "step": 800 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.71, "eval_loss": 0.40380868315696716, "eval_runtime": 3.0665, "eval_samples_per_second": 32.611, "eval_steps_per_second": 4.239, "step": 825 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.72, "eval_loss": 0.42510226368904114, "eval_runtime": 3.0667, "eval_samples_per_second": 32.608, "eval_steps_per_second": 4.239, "step": 850 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.66, "eval_loss": 0.4078834056854248, "eval_runtime": 3.0638, "eval_samples_per_second": 32.639, "eval_steps_per_second": 4.243, "step": 875 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.71, "eval_loss": 0.4119352102279663, "eval_runtime": 3.0725, "eval_samples_per_second": 32.547, "eval_steps_per_second": 4.231, "step": 900 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.67, "eval_loss": 0.4074689745903015, "eval_runtime": 3.0606, "eval_samples_per_second": 32.674, "eval_steps_per_second": 4.248, "step": 925 }, { "best_epoch": 27, "best_eval_accuracy": 0.72, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.73, "eval_loss": 0.44055667519569397, "eval_runtime": 3.0609, "eval_samples_per_second": 32.67, "eval_steps_per_second": 4.247, "step": 950 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.72, "eval_loss": 0.4081181287765503, "eval_runtime": 3.061, "eval_samples_per_second": 32.669, "eval_steps_per_second": 4.247, "step": 975 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.01, "loss": 0.4731, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.67, "eval_loss": 0.4190601110458374, "eval_runtime": 3.0627, "eval_samples_per_second": 32.651, "eval_steps_per_second": 4.245, "step": 1000 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.68, "eval_loss": 0.4217339754104614, "eval_runtime": 3.0615, "eval_samples_per_second": 32.663, "eval_steps_per_second": 4.246, "step": 1025 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.73, "eval_loss": 0.39827075600624084, "eval_runtime": 3.0673, "eval_samples_per_second": 32.602, "eval_steps_per_second": 4.238, "step": 1050 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.66, "eval_loss": 0.40923720598220825, "eval_runtime": 3.0598, "eval_samples_per_second": 32.682, "eval_steps_per_second": 4.249, "step": 1075 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.69, "eval_loss": 0.42479878664016724, "eval_runtime": 3.0596, "eval_samples_per_second": 32.684, "eval_steps_per_second": 4.249, "step": 1100 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.68, "eval_loss": 0.4218236804008484, "eval_runtime": 3.059, "eval_samples_per_second": 32.691, "eval_steps_per_second": 4.25, "step": 1125 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.7, "eval_loss": 0.4371417164802551, "eval_runtime": 3.0613, "eval_samples_per_second": 32.666, "eval_steps_per_second": 4.247, "step": 1150 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.69, "eval_loss": 0.4098566472530365, "eval_runtime": 3.0607, "eval_samples_per_second": 32.672, "eval_steps_per_second": 4.247, "step": 1175 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.69, "eval_loss": 0.4299997091293335, "eval_runtime": 3.0624, "eval_samples_per_second": 32.654, "eval_steps_per_second": 4.245, "step": 1200 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.72, "eval_loss": 0.4093553125858307, "eval_runtime": 3.0616, "eval_samples_per_second": 32.662, "eval_steps_per_second": 4.246, "step": 1225 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.71, "eval_loss": 0.4205920100212097, "eval_runtime": 3.062, "eval_samples_per_second": 32.658, "eval_steps_per_second": 4.246, "step": 1250 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.72, "eval_loss": 0.4240824580192566, "eval_runtime": 3.0617, "eval_samples_per_second": 32.662, "eval_steps_per_second": 4.246, "step": 1275 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.66, "eval_loss": 0.4252602756023407, "eval_runtime": 3.0627, "eval_samples_per_second": 32.651, "eval_steps_per_second": 4.245, "step": 1300 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.66, "eval_loss": 0.4116606116294861, "eval_runtime": 3.0605, "eval_samples_per_second": 32.674, "eval_steps_per_second": 4.248, "step": 1325 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.67, "eval_loss": 0.417370080947876, "eval_runtime": 3.0649, "eval_samples_per_second": 32.628, "eval_steps_per_second": 4.242, "step": 1350 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.67, "eval_loss": 0.41312336921691895, "eval_runtime": 3.0627, "eval_samples_per_second": 32.651, "eval_steps_per_second": 4.245, "step": 1375 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.67, "eval_loss": 0.42308780550956726, "eval_runtime": 3.0629, "eval_samples_per_second": 32.648, "eval_steps_per_second": 4.244, "step": 1400 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.7, "eval_loss": 0.4059382379055023, "eval_runtime": 3.0606, "eval_samples_per_second": 32.674, "eval_steps_per_second": 4.248, "step": 1425 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.72, "eval_loss": 0.4168393313884735, "eval_runtime": 3.0634, "eval_samples_per_second": 32.643, "eval_steps_per_second": 4.244, "step": 1450 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.68, "eval_loss": 0.42363443970680237, "eval_runtime": 3.0615, "eval_samples_per_second": 32.664, "eval_steps_per_second": 4.246, "step": 1475 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.005, "loss": 0.4204, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.68, "eval_loss": 0.400055855512619, "eval_runtime": 3.0593, "eval_samples_per_second": 32.687, "eval_steps_per_second": 4.249, "step": 1500 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.71, "eval_loss": 0.41580745577812195, "eval_runtime": 3.0632, "eval_samples_per_second": 32.645, "eval_steps_per_second": 4.244, "step": 1525 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.68, "eval_loss": 0.43029120564460754, "eval_runtime": 3.0605, "eval_samples_per_second": 32.675, "eval_steps_per_second": 4.248, "step": 1550 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.65, "eval_loss": 0.41548973321914673, "eval_runtime": 3.0606, "eval_samples_per_second": 32.673, "eval_steps_per_second": 4.247, "step": 1575 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.66, "eval_loss": 0.41950705647468567, "eval_runtime": 3.0588, "eval_samples_per_second": 32.692, "eval_steps_per_second": 4.25, "step": 1600 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.67, "eval_loss": 0.43146055936813354, "eval_runtime": 3.0608, "eval_samples_per_second": 32.671, "eval_steps_per_second": 4.247, "step": 1625 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.71, "eval_loss": 0.4239749610424042, "eval_runtime": 3.0598, "eval_samples_per_second": 32.682, "eval_steps_per_second": 4.249, "step": 1650 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.68, "eval_loss": 0.4191063344478607, "eval_runtime": 3.0607, "eval_samples_per_second": 32.672, "eval_steps_per_second": 4.247, "step": 1675 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.71, "eval_loss": 0.42141807079315186, "eval_runtime": 3.0592, "eval_samples_per_second": 32.688, "eval_steps_per_second": 4.249, "step": 1700 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.71, "eval_loss": 0.41697102785110474, "eval_runtime": 3.0626, "eval_samples_per_second": 32.652, "eval_steps_per_second": 4.245, "step": 1725 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.68, "eval_loss": 0.41583549976348877, "eval_runtime": 3.0593, "eval_samples_per_second": 32.687, "eval_steps_per_second": 4.249, "step": 1750 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.69, "eval_loss": 0.42303329706192017, "eval_runtime": 3.0591, "eval_samples_per_second": 32.69, "eval_steps_per_second": 4.25, "step": 1775 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.69, "eval_loss": 0.41061896085739136, "eval_runtime": 3.0576, "eval_samples_per_second": 32.706, "eval_steps_per_second": 4.252, "step": 1800 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.68, "eval_loss": 0.42553210258483887, "eval_runtime": 3.056, "eval_samples_per_second": 32.723, "eval_steps_per_second": 4.254, "step": 1825 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.67, "eval_loss": 0.4223473072052002, "eval_runtime": 3.057, "eval_samples_per_second": 32.712, "eval_steps_per_second": 4.253, "step": 1850 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.7, "eval_loss": 0.41237103939056396, "eval_runtime": 3.0557, "eval_samples_per_second": 32.725, "eval_steps_per_second": 4.254, "step": 1875 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.7, "eval_loss": 0.4114343225955963, "eval_runtime": 3.0557, "eval_samples_per_second": 32.726, "eval_steps_per_second": 4.254, "step": 1900 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.71, "eval_loss": 0.4114573001861572, "eval_runtime": 3.0539, "eval_samples_per_second": 32.745, "eval_steps_per_second": 4.257, "step": 1925 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.71, "eval_loss": 0.4135931432247162, "eval_runtime": 3.0553, "eval_samples_per_second": 32.73, "eval_steps_per_second": 4.255, "step": 1950 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.71, "eval_loss": 0.41504940390586853, "eval_runtime": 3.0541, "eval_samples_per_second": 32.743, "eval_steps_per_second": 4.257, "step": 1975 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.3939, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.71, "eval_loss": 0.41364365816116333, "eval_runtime": 3.0573, "eval_samples_per_second": 32.709, "eval_steps_per_second": 4.252, "step": 2000 }, { "best_epoch": 37, "best_eval_accuracy": 0.73, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.5011474990844726, "train_runtime": 1641.5222, "train_samples_per_second": 19.494, "train_steps_per_second": 1.218 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }