{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.45, "eval_loss": 0.46248623728752136, "eval_runtime": 2.8961, "eval_samples_per_second": 34.53, "eval_steps_per_second": 4.489, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.45, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.61, "eval_loss": 0.48594656586647034, "eval_runtime": 2.9335, "eval_samples_per_second": 34.089, "eval_steps_per_second": 4.432, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.61, "eval_loss": 0.4227062165737152, "eval_runtime": 2.9668, "eval_samples_per_second": 33.707, "eval_steps_per_second": 4.382, "step": 75 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.53, "eval_loss": 0.4247153103351593, "eval_runtime": 2.9923, "eval_samples_per_second": 33.419, "eval_steps_per_second": 4.344, "step": 100 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.43, "eval_loss": 0.44806617498397827, "eval_runtime": 3.0015, "eval_samples_per_second": 33.317, "eval_steps_per_second": 4.331, "step": 125 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.57, "eval_loss": 0.4309893846511841, "eval_runtime": 3.023, "eval_samples_per_second": 33.079, "eval_steps_per_second": 4.3, "step": 150 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.47, "eval_loss": 0.4267328679561615, "eval_runtime": 3.0288, "eval_samples_per_second": 33.017, "eval_steps_per_second": 4.292, "step": 175 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.5, "eval_loss": 0.42460113763809204, "eval_runtime": 3.0312, "eval_samples_per_second": 32.99, "eval_steps_per_second": 4.289, "step": 200 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.44, "eval_loss": 0.426717609167099, "eval_runtime": 3.0248, "eval_samples_per_second": 33.06, "eval_steps_per_second": 4.298, "step": 225 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.51, "eval_loss": 0.4260082542896271, "eval_runtime": 3.0266, "eval_samples_per_second": 33.04, "eval_steps_per_second": 4.295, "step": 250 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.52, "eval_loss": 0.42262589931488037, "eval_runtime": 3.027, "eval_samples_per_second": 33.037, "eval_steps_per_second": 4.295, "step": 275 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.44, "eval_loss": 0.427063524723053, "eval_runtime": 3.0293, "eval_samples_per_second": 33.011, "eval_steps_per_second": 4.291, "step": 300 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.49, "eval_loss": 0.42662951350212097, "eval_runtime": 3.0315, "eval_samples_per_second": 32.987, "eval_steps_per_second": 4.288, "step": 325 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.58, "eval_loss": 0.42443209886550903, "eval_runtime": 3.0363, "eval_samples_per_second": 32.935, "eval_steps_per_second": 4.282, "step": 350 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.55, "eval_loss": 0.42530936002731323, "eval_runtime": 3.0344, "eval_samples_per_second": 32.956, "eval_steps_per_second": 4.284, "step": 375 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.51, "eval_loss": 0.4255802631378174, "eval_runtime": 3.0396, "eval_samples_per_second": 32.9, "eval_steps_per_second": 4.277, "step": 400 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.44, "eval_loss": 0.4265216588973999, "eval_runtime": 3.0416, "eval_samples_per_second": 32.877, "eval_steps_per_second": 4.274, "step": 425 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.42, "eval_loss": 0.42613503336906433, "eval_runtime": 3.0532, "eval_samples_per_second": 32.753, "eval_steps_per_second": 4.258, "step": 450 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.46, "eval_loss": 0.4262290298938751, "eval_runtime": 3.0418, "eval_samples_per_second": 32.875, "eval_steps_per_second": 4.274, "step": 475 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.037500000000000006, "loss": 1.4009, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.47, "eval_loss": 0.4260247051715851, "eval_runtime": 3.0426, "eval_samples_per_second": 32.867, "eval_steps_per_second": 4.273, "step": 500 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.42, "eval_loss": 0.4285390377044678, "eval_runtime": 3.0444, "eval_samples_per_second": 32.848, "eval_steps_per_second": 4.27, "step": 525 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.5, "eval_loss": 0.4259931147098541, "eval_runtime": 3.0428, "eval_samples_per_second": 32.865, "eval_steps_per_second": 4.272, "step": 550 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.54, "eval_loss": 0.42451244592666626, "eval_runtime": 3.0442, "eval_samples_per_second": 32.849, "eval_steps_per_second": 4.27, "step": 575 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.54, "eval_loss": 0.42513129115104675, "eval_runtime": 3.0467, "eval_samples_per_second": 32.823, "eval_steps_per_second": 4.267, "step": 600 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.46, "eval_loss": 0.42705675959587097, "eval_runtime": 3.0463, "eval_samples_per_second": 32.827, "eval_steps_per_second": 4.268, "step": 625 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.46, "eval_loss": 0.4261193871498108, "eval_runtime": 3.0479, "eval_samples_per_second": 32.81, "eval_steps_per_second": 4.265, "step": 650 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.49, "eval_loss": 0.42566367983818054, "eval_runtime": 3.0456, "eval_samples_per_second": 32.834, "eval_steps_per_second": 4.268, "step": 675 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.55, "eval_loss": 0.42552897334098816, "eval_runtime": 3.0459, "eval_samples_per_second": 32.831, "eval_steps_per_second": 4.268, "step": 700 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.52, "eval_loss": 0.42543452978134155, "eval_runtime": 3.0492, "eval_samples_per_second": 32.796, "eval_steps_per_second": 4.263, "step": 725 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.52, "eval_loss": 0.4259891211986542, "eval_runtime": 3.0481, "eval_samples_per_second": 32.808, "eval_steps_per_second": 4.265, "step": 750 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.49, "eval_loss": 0.4255729913711548, "eval_runtime": 3.0503, "eval_samples_per_second": 32.784, "eval_steps_per_second": 4.262, "step": 775 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.55, "eval_loss": 0.4256853759288788, "eval_runtime": 3.0494, "eval_samples_per_second": 32.793, "eval_steps_per_second": 4.263, "step": 800 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.53, "eval_loss": 0.42549124360084534, "eval_runtime": 3.0483, "eval_samples_per_second": 32.806, "eval_steps_per_second": 4.265, "step": 825 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.54, "eval_loss": 0.4255514144897461, "eval_runtime": 3.0467, "eval_samples_per_second": 32.823, "eval_steps_per_second": 4.267, "step": 850 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.44, "eval_loss": 0.4261658191680908, "eval_runtime": 3.0508, "eval_samples_per_second": 32.778, "eval_steps_per_second": 4.261, "step": 875 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.51, "eval_loss": 0.4256916344165802, "eval_runtime": 3.0492, "eval_samples_per_second": 32.795, "eval_steps_per_second": 4.263, "step": 900 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.4, "eval_loss": 0.42672908306121826, "eval_runtime": 3.0502, "eval_samples_per_second": 32.785, "eval_steps_per_second": 4.262, "step": 925 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.48, "eval_loss": 0.4258723556995392, "eval_runtime": 3.0474, "eval_samples_per_second": 32.815, "eval_steps_per_second": 4.266, "step": 950 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.55, "eval_loss": 0.4254632890224457, "eval_runtime": 3.0495, "eval_samples_per_second": 32.792, "eval_steps_per_second": 4.263, "step": 975 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.025, "loss": 0.9833, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.49, "eval_loss": 0.42543506622314453, "eval_runtime": 3.0505, "eval_samples_per_second": 32.782, "eval_steps_per_second": 4.262, "step": 1000 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.49, "eval_loss": 0.4257011413574219, "eval_runtime": 3.0503, "eval_samples_per_second": 32.783, "eval_steps_per_second": 4.262, "step": 1025 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.58, "eval_loss": 0.42543143033981323, "eval_runtime": 3.0485, "eval_samples_per_second": 32.803, "eval_steps_per_second": 4.264, "step": 1050 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.48, "eval_loss": 0.42607200145721436, "eval_runtime": 3.0551, "eval_samples_per_second": 32.732, "eval_steps_per_second": 4.255, "step": 1075 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.5, "eval_loss": 0.4259500503540039, "eval_runtime": 3.0508, "eval_samples_per_second": 32.779, "eval_steps_per_second": 4.261, "step": 1100 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.51, "eval_loss": 0.42568397521972656, "eval_runtime": 3.0487, "eval_samples_per_second": 32.801, "eval_steps_per_second": 4.264, "step": 1125 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.52, "eval_loss": 0.4254191517829895, "eval_runtime": 3.0504, "eval_samples_per_second": 32.782, "eval_steps_per_second": 4.262, "step": 1150 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.5, "eval_loss": 0.42553916573524475, "eval_runtime": 3.0517, "eval_samples_per_second": 32.769, "eval_steps_per_second": 4.26, "step": 1175 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.48, "eval_loss": 0.42568346858024597, "eval_runtime": 3.05, "eval_samples_per_second": 32.787, "eval_steps_per_second": 4.262, "step": 1200 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.41, "eval_loss": 0.42611098289489746, "eval_runtime": 3.0501, "eval_samples_per_second": 32.786, "eval_steps_per_second": 4.262, "step": 1225 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.57, "eval_loss": 0.42508599162101746, "eval_runtime": 3.0503, "eval_samples_per_second": 32.784, "eval_steps_per_second": 4.262, "step": 1250 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.47, "eval_loss": 0.4258117377758026, "eval_runtime": 3.0511, "eval_samples_per_second": 32.775, "eval_steps_per_second": 4.261, "step": 1275 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.52, "eval_loss": 0.42553529143333435, "eval_runtime": 3.0496, "eval_samples_per_second": 32.792, "eval_steps_per_second": 4.263, "step": 1300 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.53, "eval_loss": 0.42570650577545166, "eval_runtime": 3.0501, "eval_samples_per_second": 32.786, "eval_steps_per_second": 4.262, "step": 1325 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.52, "eval_loss": 0.4255848228931427, "eval_runtime": 3.0507, "eval_samples_per_second": 32.78, "eval_steps_per_second": 4.261, "step": 1350 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.51, "eval_loss": 0.4257044196128845, "eval_runtime": 3.0575, "eval_samples_per_second": 32.706, "eval_steps_per_second": 4.252, "step": 1375 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.5, "eval_loss": 0.4256804585456848, "eval_runtime": 3.0497, "eval_samples_per_second": 32.79, "eval_steps_per_second": 4.263, "step": 1400 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.49, "eval_loss": 0.4257115423679352, "eval_runtime": 3.0519, "eval_samples_per_second": 32.766, "eval_steps_per_second": 4.26, "step": 1425 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.51, "eval_loss": 0.4257067143917084, "eval_runtime": 3.0511, "eval_samples_per_second": 32.775, "eval_steps_per_second": 4.261, "step": 1450 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.57, "eval_loss": 0.42552703619003296, "eval_runtime": 3.052, "eval_samples_per_second": 32.765, "eval_steps_per_second": 4.259, "step": 1475 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.0125, "loss": 0.7428, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.46, "eval_loss": 0.42590823769569397, "eval_runtime": 3.0534, "eval_samples_per_second": 32.75, "eval_steps_per_second": 4.258, "step": 1500 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.51, "eval_loss": 0.4257360100746155, "eval_runtime": 3.0514, "eval_samples_per_second": 32.772, "eval_steps_per_second": 4.26, "step": 1525 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.55, "eval_loss": 0.4255290627479553, "eval_runtime": 3.0557, "eval_samples_per_second": 32.725, "eval_steps_per_second": 4.254, "step": 1550 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.55, "eval_loss": 0.42556220293045044, "eval_runtime": 3.05, "eval_samples_per_second": 32.787, "eval_steps_per_second": 4.262, "step": 1575 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.4, "eval_loss": 0.4258395731449127, "eval_runtime": 3.0496, "eval_samples_per_second": 32.791, "eval_steps_per_second": 4.263, "step": 1600 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.44, "eval_loss": 0.4258385896682739, "eval_runtime": 3.0514, "eval_samples_per_second": 32.771, "eval_steps_per_second": 4.26, "step": 1625 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.41, "eval_loss": 0.4258986711502075, "eval_runtime": 3.0504, "eval_samples_per_second": 32.782, "eval_steps_per_second": 4.262, "step": 1650 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.38, "eval_loss": 0.4259600341320038, "eval_runtime": 3.0516, "eval_samples_per_second": 32.77, "eval_steps_per_second": 4.26, "step": 1675 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.52, "eval_loss": 0.4256523549556732, "eval_runtime": 3.0526, "eval_samples_per_second": 32.759, "eval_steps_per_second": 4.259, "step": 1700 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.35, "eval_loss": 0.42585432529449463, "eval_runtime": 3.0509, "eval_samples_per_second": 32.777, "eval_steps_per_second": 4.261, "step": 1725 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.38, "eval_loss": 0.4259151518344879, "eval_runtime": 3.0508, "eval_samples_per_second": 32.779, "eval_steps_per_second": 4.261, "step": 1750 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.44, "eval_loss": 0.4259404242038727, "eval_runtime": 3.052, "eval_samples_per_second": 32.766, "eval_steps_per_second": 4.26, "step": 1775 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.41, "eval_loss": 0.42599546909332275, "eval_runtime": 3.0519, "eval_samples_per_second": 32.766, "eval_steps_per_second": 4.26, "step": 1800 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.45, "eval_loss": 0.42574265599250793, "eval_runtime": 3.0522, "eval_samples_per_second": 32.764, "eval_steps_per_second": 4.259, "step": 1825 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.42, "eval_loss": 0.4257790446281433, "eval_runtime": 3.0506, "eval_samples_per_second": 32.781, "eval_steps_per_second": 4.261, "step": 1850 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.41, "eval_loss": 0.4257981479167938, "eval_runtime": 3.0529, "eval_samples_per_second": 32.755, "eval_steps_per_second": 4.258, "step": 1875 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.4, "eval_loss": 0.4257836639881134, "eval_runtime": 3.0536, "eval_samples_per_second": 32.748, "eval_steps_per_second": 4.257, "step": 1900 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.45, "eval_loss": 0.42581596970558167, "eval_runtime": 3.0529, "eval_samples_per_second": 32.756, "eval_steps_per_second": 4.258, "step": 1925 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.43, "eval_loss": 0.4258066415786743, "eval_runtime": 3.0527, "eval_samples_per_second": 32.758, "eval_steps_per_second": 4.258, "step": 1950 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.44, "eval_loss": 0.4258076846599579, "eval_runtime": 3.0537, "eval_samples_per_second": 32.747, "eval_steps_per_second": 4.257, "step": 1975 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.6138, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.4, "eval_loss": 0.4258350431919098, "eval_runtime": 3.0537, "eval_samples_per_second": 32.747, "eval_steps_per_second": 4.257, "step": 2000 }, { "best_epoch": 1, "best_eval_accuracy": 0.61, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.9352073516845704, "train_runtime": 1633.4708, "train_samples_per_second": 19.59, "train_steps_per_second": 1.224 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }