{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.66, "eval_loss": 0.41458651423454285, "eval_runtime": 2.9544, "eval_samples_per_second": 33.848, "eval_steps_per_second": 4.4, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.66, "eval_loss": 0.4116404056549072, "eval_runtime": 2.9946, "eval_samples_per_second": 33.394, "eval_steps_per_second": 4.341, "step": 50 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.66, "eval_loss": 0.41392984986305237, "eval_runtime": 3.022, "eval_samples_per_second": 33.091, "eval_steps_per_second": 4.302, "step": 75 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.64, "eval_loss": 0.416966050863266, "eval_runtime": 3.0401, "eval_samples_per_second": 32.893, "eval_steps_per_second": 4.276, "step": 100 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.65, "eval_loss": 0.4182185232639313, "eval_runtime": 3.0632, "eval_samples_per_second": 32.646, "eval_steps_per_second": 4.244, "step": 125 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.57, "eval_loss": 0.42081978917121887, "eval_runtime": 3.0619, "eval_samples_per_second": 32.66, "eval_steps_per_second": 4.246, "step": 150 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.66, "eval_loss": 0.41149312257766724, "eval_runtime": 3.069, "eval_samples_per_second": 32.584, "eval_steps_per_second": 4.236, "step": 175 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.66, "eval_loss": 0.41568344831466675, "eval_runtime": 3.0682, "eval_samples_per_second": 32.592, "eval_steps_per_second": 4.237, "step": 200 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.64, "eval_loss": 0.4229449927806854, "eval_runtime": 3.0693, "eval_samples_per_second": 32.581, "eval_steps_per_second": 4.235, "step": 225 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.65, "eval_loss": 0.4205465018749237, "eval_runtime": 3.0704, "eval_samples_per_second": 32.569, "eval_steps_per_second": 4.234, "step": 250 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.64, "eval_loss": 0.41778337955474854, "eval_runtime": 3.0755, "eval_samples_per_second": 32.515, "eval_steps_per_second": 4.227, "step": 275 }, { "best_epoch": 0, "best_eval_accuracy": 0.66, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.67, "eval_loss": 0.41307488083839417, "eval_runtime": 3.0694, "eval_samples_per_second": 32.58, "eval_steps_per_second": 4.235, "step": 300 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.65, "eval_loss": 0.4146430492401123, "eval_runtime": 3.0703, "eval_samples_per_second": 32.57, "eval_steps_per_second": 4.234, "step": 325 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.63, "eval_loss": 0.42016124725341797, "eval_runtime": 3.0704, "eval_samples_per_second": 32.569, "eval_steps_per_second": 4.234, "step": 350 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.62, "eval_loss": 0.4330647587776184, "eval_runtime": 3.0703, "eval_samples_per_second": 32.57, "eval_steps_per_second": 4.234, "step": 375 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.66, "eval_loss": 0.4120154082775116, "eval_runtime": 3.0708, "eval_samples_per_second": 32.564, "eval_steps_per_second": 4.233, "step": 400 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.63, "eval_loss": 0.4144454896450043, "eval_runtime": 3.0689, "eval_samples_per_second": 32.585, "eval_steps_per_second": 4.236, "step": 425 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.64, "eval_loss": 0.4182257354259491, "eval_runtime": 3.066, "eval_samples_per_second": 32.616, "eval_steps_per_second": 4.24, "step": 450 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.59, "eval_loss": 0.41842299699783325, "eval_runtime": 3.0648, "eval_samples_per_second": 32.629, "eval_steps_per_second": 4.242, "step": 475 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.00075, "loss": 0.5392, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.65, "eval_loss": 0.41611525416374207, "eval_runtime": 3.0695, "eval_samples_per_second": 32.578, "eval_steps_per_second": 4.235, "step": 500 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.64, "eval_loss": 0.41845789551734924, "eval_runtime": 3.0612, "eval_samples_per_second": 32.667, "eval_steps_per_second": 4.247, "step": 525 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.59, "eval_loss": 0.4186709225177765, "eval_runtime": 3.0611, "eval_samples_per_second": 32.668, "eval_steps_per_second": 4.247, "step": 550 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.62, "eval_loss": 0.4185837209224701, "eval_runtime": 3.0617, "eval_samples_per_second": 32.662, "eval_steps_per_second": 4.246, "step": 575 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.65, "eval_loss": 0.41592276096343994, "eval_runtime": 3.0573, "eval_samples_per_second": 32.709, "eval_steps_per_second": 4.252, "step": 600 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.64, "eval_loss": 0.4151555895805359, "eval_runtime": 3.0541, "eval_samples_per_second": 32.743, "eval_steps_per_second": 4.257, "step": 625 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.62, "eval_loss": 0.41507554054260254, "eval_runtime": 3.0542, "eval_samples_per_second": 32.742, "eval_steps_per_second": 4.256, "step": 650 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.63, "eval_loss": 0.41364148259162903, "eval_runtime": 3.0522, "eval_samples_per_second": 32.763, "eval_steps_per_second": 4.259, "step": 675 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.65, "eval_loss": 0.41904377937316895, "eval_runtime": 3.0528, "eval_samples_per_second": 32.757, "eval_steps_per_second": 4.258, "step": 700 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.61, "eval_loss": 0.4224679470062256, "eval_runtime": 3.0526, "eval_samples_per_second": 32.759, "eval_steps_per_second": 4.259, "step": 725 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.57, "eval_loss": 0.42088595032691956, "eval_runtime": 3.0523, "eval_samples_per_second": 32.762, "eval_steps_per_second": 4.259, "step": 750 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.63, "eval_loss": 0.416681706905365, "eval_runtime": 3.0478, "eval_samples_per_second": 32.81, "eval_steps_per_second": 4.265, "step": 775 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.62, "eval_loss": 0.4152999818325043, "eval_runtime": 3.049, "eval_samples_per_second": 32.797, "eval_steps_per_second": 4.264, "step": 800 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.6, "eval_loss": 0.4235653281211853, "eval_runtime": 3.0473, "eval_samples_per_second": 32.816, "eval_steps_per_second": 4.266, "step": 825 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.58, "eval_loss": 0.41910892724990845, "eval_runtime": 3.0486, "eval_samples_per_second": 32.801, "eval_steps_per_second": 4.264, "step": 850 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.61, "eval_loss": 0.41600170731544495, "eval_runtime": 3.0482, "eval_samples_per_second": 32.806, "eval_steps_per_second": 4.265, "step": 875 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.62, "eval_loss": 0.41634392738342285, "eval_runtime": 3.0496, "eval_samples_per_second": 32.791, "eval_steps_per_second": 4.263, "step": 900 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.59, "eval_loss": 0.419331818819046, "eval_runtime": 3.0513, "eval_samples_per_second": 32.773, "eval_steps_per_second": 4.261, "step": 925 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.62, "eval_loss": 0.4208333194255829, "eval_runtime": 3.0517, "eval_samples_per_second": 32.768, "eval_steps_per_second": 4.26, "step": 950 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.6, "eval_loss": 0.416349321603775, "eval_runtime": 3.0475, "eval_samples_per_second": 32.814, "eval_steps_per_second": 4.266, "step": 975 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.0005, "loss": 0.5359, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.6, "eval_loss": 0.4158593416213989, "eval_runtime": 3.0471, "eval_samples_per_second": 32.818, "eval_steps_per_second": 4.266, "step": 1000 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.62, "eval_loss": 0.41456928849220276, "eval_runtime": 3.0494, "eval_samples_per_second": 32.793, "eval_steps_per_second": 4.263, "step": 1025 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.6, "eval_loss": 0.41578975319862366, "eval_runtime": 3.0512, "eval_samples_per_second": 32.774, "eval_steps_per_second": 4.261, "step": 1050 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.59, "eval_loss": 0.42108017206192017, "eval_runtime": 3.0504, "eval_samples_per_second": 32.783, "eval_steps_per_second": 4.262, "step": 1075 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.59, "eval_loss": 0.4202888607978821, "eval_runtime": 3.049, "eval_samples_per_second": 32.797, "eval_steps_per_second": 4.264, "step": 1100 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.57, "eval_loss": 0.42174988985061646, "eval_runtime": 3.0504, "eval_samples_per_second": 32.782, "eval_steps_per_second": 4.262, "step": 1125 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.6, "eval_loss": 0.41829144954681396, "eval_runtime": 3.0521, "eval_samples_per_second": 32.764, "eval_steps_per_second": 4.259, "step": 1150 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.63, "eval_loss": 0.413789838552475, "eval_runtime": 3.0518, "eval_samples_per_second": 32.767, "eval_steps_per_second": 4.26, "step": 1175 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.63, "eval_loss": 0.4123985171318054, "eval_runtime": 3.0523, "eval_samples_per_second": 32.763, "eval_steps_per_second": 4.259, "step": 1200 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.63, "eval_loss": 0.41400572657585144, "eval_runtime": 3.0522, "eval_samples_per_second": 32.763, "eval_steps_per_second": 4.259, "step": 1225 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.64, "eval_loss": 0.4117923378944397, "eval_runtime": 3.0586, "eval_samples_per_second": 32.694, "eval_steps_per_second": 4.25, "step": 1250 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.62, "eval_loss": 0.4136669933795929, "eval_runtime": 3.062, "eval_samples_per_second": 32.658, "eval_steps_per_second": 4.246, "step": 1275 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.63, "eval_loss": 0.4112585783004761, "eval_runtime": 3.0617, "eval_samples_per_second": 32.662, "eval_steps_per_second": 4.246, "step": 1300 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.62, "eval_loss": 0.411198228597641, "eval_runtime": 3.0569, "eval_samples_per_second": 32.713, "eval_steps_per_second": 4.253, "step": 1325 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.63, "eval_loss": 0.41399067640304565, "eval_runtime": 3.0552, "eval_samples_per_second": 32.731, "eval_steps_per_second": 4.255, "step": 1350 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.64, "eval_loss": 0.4128565490245819, "eval_runtime": 3.0536, "eval_samples_per_second": 32.748, "eval_steps_per_second": 4.257, "step": 1375 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.64, "eval_loss": 0.41508427262306213, "eval_runtime": 3.0516, "eval_samples_per_second": 32.77, "eval_steps_per_second": 4.26, "step": 1400 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.63, "eval_loss": 0.4154996871948242, "eval_runtime": 3.0553, "eval_samples_per_second": 32.73, "eval_steps_per_second": 4.255, "step": 1425 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.63, "eval_loss": 0.41395488381385803, "eval_runtime": 3.0559, "eval_samples_per_second": 32.724, "eval_steps_per_second": 4.254, "step": 1450 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.64, "eval_loss": 0.414520800113678, "eval_runtime": 3.0565, "eval_samples_per_second": 32.717, "eval_steps_per_second": 4.253, "step": 1475 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.00025, "loss": 0.5347, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.63, "eval_loss": 0.41578900814056396, "eval_runtime": 3.0637, "eval_samples_per_second": 32.641, "eval_steps_per_second": 4.243, "step": 1500 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.62, "eval_loss": 0.41476428508758545, "eval_runtime": 3.0578, "eval_samples_per_second": 32.703, "eval_steps_per_second": 4.251, "step": 1525 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.6, "eval_loss": 0.41465869545936584, "eval_runtime": 3.0637, "eval_samples_per_second": 32.64, "eval_steps_per_second": 4.243, "step": 1550 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.64, "eval_loss": 0.41527068614959717, "eval_runtime": 3.0607, "eval_samples_per_second": 32.672, "eval_steps_per_second": 4.247, "step": 1575 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.63, "eval_loss": 0.41556769609451294, "eval_runtime": 3.0628, "eval_samples_per_second": 32.65, "eval_steps_per_second": 4.244, "step": 1600 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.64, "eval_loss": 0.4151536226272583, "eval_runtime": 3.0628, "eval_samples_per_second": 32.65, "eval_steps_per_second": 4.245, "step": 1625 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.64, "eval_loss": 0.4146164655685425, "eval_runtime": 3.0648, "eval_samples_per_second": 32.629, "eval_steps_per_second": 4.242, "step": 1650 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.64, "eval_loss": 0.4151190221309662, "eval_runtime": 3.0643, "eval_samples_per_second": 32.634, "eval_steps_per_second": 4.242, "step": 1675 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.61, "eval_loss": 0.41448092460632324, "eval_runtime": 3.0668, "eval_samples_per_second": 32.607, "eval_steps_per_second": 4.239, "step": 1700 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.61, "eval_loss": 0.41525405645370483, "eval_runtime": 3.0657, "eval_samples_per_second": 32.618, "eval_steps_per_second": 4.24, "step": 1725 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.64, "eval_loss": 0.4146501123905182, "eval_runtime": 3.0643, "eval_samples_per_second": 32.633, "eval_steps_per_second": 4.242, "step": 1750 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.64, "eval_loss": 0.4146224558353424, "eval_runtime": 3.0657, "eval_samples_per_second": 32.619, "eval_steps_per_second": 4.24, "step": 1775 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.62, "eval_loss": 0.41336506605148315, "eval_runtime": 3.068, "eval_samples_per_second": 32.595, "eval_steps_per_second": 4.237, "step": 1800 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.63, "eval_loss": 0.41397038102149963, "eval_runtime": 3.0653, "eval_samples_per_second": 32.624, "eval_steps_per_second": 4.241, "step": 1825 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.64, "eval_loss": 0.4141198694705963, "eval_runtime": 3.0676, "eval_samples_per_second": 32.599, "eval_steps_per_second": 4.238, "step": 1850 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.63, "eval_loss": 0.41505852341651917, "eval_runtime": 3.0673, "eval_samples_per_second": 32.602, "eval_steps_per_second": 4.238, "step": 1875 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.62, "eval_loss": 0.4149923622608185, "eval_runtime": 3.0651, "eval_samples_per_second": 32.625, "eval_steps_per_second": 4.241, "step": 1900 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.61, "eval_loss": 0.41483086347579956, "eval_runtime": 3.0669, "eval_samples_per_second": 32.606, "eval_steps_per_second": 4.239, "step": 1925 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.62, "eval_loss": 0.41491347551345825, "eval_runtime": 3.069, "eval_samples_per_second": 32.584, "eval_steps_per_second": 4.236, "step": 1950 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.63, "eval_loss": 0.4150215983390808, "eval_runtime": 3.0645, "eval_samples_per_second": 32.632, "eval_steps_per_second": 4.242, "step": 1975 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.5285, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.63, "eval_loss": 0.4149799048900604, "eval_runtime": 3.0687, "eval_samples_per_second": 32.587, "eval_steps_per_second": 4.236, "step": 2000 }, { "best_epoch": 11, "best_eval_accuracy": 0.67, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.5345863189697265, "train_runtime": 1637.4299, "train_samples_per_second": 19.543, "train_steps_per_second": 1.221 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }