{ "best_metric": null, "best_model_checkpoint": null, "epoch": 80.0, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.34, "eval_loss": 0.9872172474861145, "eval_runtime": 2.9066, "eval_samples_per_second": 34.405, "eval_steps_per_second": 4.473, "step": 25 }, { "best_epoch": 0, "best_eval_accuracy": 0.34, "epoch": 1.0, "step": 25 }, { "epoch": 2.0, "eval_accuracy": 0.59, "eval_loss": 0.854668378829956, "eval_runtime": 2.922, "eval_samples_per_second": 34.224, "eval_steps_per_second": 4.449, "step": 50 }, { "best_epoch": 1, "best_eval_accuracy": 0.59, "epoch": 2.0, "step": 50 }, { "epoch": 3.0, "eval_accuracy": 0.64, "eval_loss": 0.6061574816703796, "eval_runtime": 2.9748, "eval_samples_per_second": 33.615, "eval_steps_per_second": 4.37, "step": 75 }, { "best_epoch": 2, "best_eval_accuracy": 0.64, "epoch": 3.0, "step": 75 }, { "epoch": 4.0, "eval_accuracy": 0.61, "eval_loss": 0.609697163105011, "eval_runtime": 3.0021, "eval_samples_per_second": 33.31, "eval_steps_per_second": 4.33, "step": 100 }, { "best_epoch": 2, "best_eval_accuracy": 0.64, "epoch": 4.0, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.62, "eval_loss": 0.6063556671142578, "eval_runtime": 3.0192, "eval_samples_per_second": 33.122, "eval_steps_per_second": 4.306, "step": 125 }, { "best_epoch": 2, "best_eval_accuracy": 0.64, "epoch": 5.0, "step": 125 }, { "epoch": 6.0, "eval_accuracy": 0.63, "eval_loss": 0.5974360704421997, "eval_runtime": 3.0236, "eval_samples_per_second": 33.073, "eval_steps_per_second": 4.3, "step": 150 }, { "best_epoch": 2, "best_eval_accuracy": 0.64, "epoch": 6.0, "step": 150 }, { "epoch": 7.0, "eval_accuracy": 0.66, "eval_loss": 0.5722998976707458, "eval_runtime": 3.0321, "eval_samples_per_second": 32.98, "eval_steps_per_second": 4.287, "step": 175 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 7.0, "step": 175 }, { "epoch": 8.0, "eval_accuracy": 0.63, "eval_loss": 0.6179049015045166, "eval_runtime": 3.0319, "eval_samples_per_second": 32.983, "eval_steps_per_second": 4.288, "step": 200 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 8.0, "step": 200 }, { "epoch": 9.0, "eval_accuracy": 0.62, "eval_loss": 0.5842037796974182, "eval_runtime": 3.0474, "eval_samples_per_second": 32.815, "eval_steps_per_second": 4.266, "step": 225 }, { "best_epoch": 6, "best_eval_accuracy": 0.66, "epoch": 9.0, "step": 225 }, { "epoch": 10.0, "eval_accuracy": 0.68, "eval_loss": 0.6117396354675293, "eval_runtime": 3.0455, "eval_samples_per_second": 32.835, "eval_steps_per_second": 4.269, "step": 250 }, { "best_epoch": 9, "best_eval_accuracy": 0.68, "epoch": 10.0, "step": 250 }, { "epoch": 11.0, "eval_accuracy": 0.64, "eval_loss": 0.5443994402885437, "eval_runtime": 3.0418, "eval_samples_per_second": 32.875, "eval_steps_per_second": 4.274, "step": 275 }, { "best_epoch": 9, "best_eval_accuracy": 0.68, "epoch": 11.0, "step": 275 }, { "epoch": 12.0, "eval_accuracy": 0.68, "eval_loss": 0.7898240089416504, "eval_runtime": 3.0445, "eval_samples_per_second": 32.846, "eval_steps_per_second": 4.27, "step": 300 }, { "best_epoch": 9, "best_eval_accuracy": 0.68, "epoch": 12.0, "step": 300 }, { "epoch": 13.0, "eval_accuracy": 0.68, "eval_loss": 0.6850593686103821, "eval_runtime": 3.0447, "eval_samples_per_second": 32.844, "eval_steps_per_second": 4.27, "step": 325 }, { "best_epoch": 9, "best_eval_accuracy": 0.68, "epoch": 13.0, "step": 325 }, { "epoch": 14.0, "eval_accuracy": 0.69, "eval_loss": 0.7715756893157959, "eval_runtime": 3.0444, "eval_samples_per_second": 32.847, "eval_steps_per_second": 4.27, "step": 350 }, { "best_epoch": 13, "best_eval_accuracy": 0.69, "epoch": 14.0, "step": 350 }, { "epoch": 15.0, "eval_accuracy": 0.71, "eval_loss": 0.6750221252441406, "eval_runtime": 3.0491, "eval_samples_per_second": 32.796, "eval_steps_per_second": 4.264, "step": 375 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 15.0, "step": 375 }, { "epoch": 16.0, "eval_accuracy": 0.7, "eval_loss": 0.7645028829574585, "eval_runtime": 3.051, "eval_samples_per_second": 32.776, "eval_steps_per_second": 4.261, "step": 400 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 16.0, "step": 400 }, { "epoch": 17.0, "eval_accuracy": 0.7, "eval_loss": 0.7337567806243896, "eval_runtime": 3.0492, "eval_samples_per_second": 32.795, "eval_steps_per_second": 4.263, "step": 425 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 17.0, "step": 425 }, { "epoch": 18.0, "eval_accuracy": 0.66, "eval_loss": 0.8156324625015259, "eval_runtime": 3.0486, "eval_samples_per_second": 32.801, "eval_steps_per_second": 4.264, "step": 450 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 18.0, "step": 450 }, { "epoch": 19.0, "eval_accuracy": 0.68, "eval_loss": 0.7524186968803406, "eval_runtime": 3.0469, "eval_samples_per_second": 32.821, "eval_steps_per_second": 4.267, "step": 475 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 19.0, "step": 475 }, { "epoch": 20.0, "learning_rate": 0.015, "loss": 0.7431, "step": 500 }, { "epoch": 20.0, "eval_accuracy": 0.65, "eval_loss": 0.8516017198562622, "eval_runtime": 3.0478, "eval_samples_per_second": 32.811, "eval_steps_per_second": 4.265, "step": 500 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 20.0, "step": 500 }, { "epoch": 21.0, "eval_accuracy": 0.65, "eval_loss": 0.8224111199378967, "eval_runtime": 3.0473, "eval_samples_per_second": 32.816, "eval_steps_per_second": 4.266, "step": 525 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 21.0, "step": 525 }, { "epoch": 22.0, "eval_accuracy": 0.67, "eval_loss": 1.0606846809387207, "eval_runtime": 3.0487, "eval_samples_per_second": 32.801, "eval_steps_per_second": 4.264, "step": 550 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 22.0, "step": 550 }, { "epoch": 23.0, "eval_accuracy": 0.66, "eval_loss": 0.8977242112159729, "eval_runtime": 3.0478, "eval_samples_per_second": 32.81, "eval_steps_per_second": 4.265, "step": 575 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 23.0, "step": 575 }, { "epoch": 24.0, "eval_accuracy": 0.66, "eval_loss": 0.7860437631607056, "eval_runtime": 3.0465, "eval_samples_per_second": 32.825, "eval_steps_per_second": 4.267, "step": 600 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 24.0, "step": 600 }, { "epoch": 25.0, "eval_accuracy": 0.66, "eval_loss": 0.7284977436065674, "eval_runtime": 3.0473, "eval_samples_per_second": 32.816, "eval_steps_per_second": 4.266, "step": 625 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 25.0, "step": 625 }, { "epoch": 26.0, "eval_accuracy": 0.64, "eval_loss": 0.7096824049949646, "eval_runtime": 3.0452, "eval_samples_per_second": 32.839, "eval_steps_per_second": 4.269, "step": 650 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 26.0, "step": 650 }, { "epoch": 27.0, "eval_accuracy": 0.64, "eval_loss": 0.7291831374168396, "eval_runtime": 3.0439, "eval_samples_per_second": 32.852, "eval_steps_per_second": 4.271, "step": 675 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 27.0, "step": 675 }, { "epoch": 28.0, "eval_accuracy": 0.65, "eval_loss": 0.7131230235099792, "eval_runtime": 3.0452, "eval_samples_per_second": 32.839, "eval_steps_per_second": 4.269, "step": 700 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 28.0, "step": 700 }, { "epoch": 29.0, "eval_accuracy": 0.65, "eval_loss": 0.8038756847381592, "eval_runtime": 3.0474, "eval_samples_per_second": 32.815, "eval_steps_per_second": 4.266, "step": 725 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 29.0, "step": 725 }, { "epoch": 30.0, "eval_accuracy": 0.65, "eval_loss": 0.798814058303833, "eval_runtime": 3.0483, "eval_samples_per_second": 32.806, "eval_steps_per_second": 4.265, "step": 750 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 30.0, "step": 750 }, { "epoch": 31.0, "eval_accuracy": 0.64, "eval_loss": 0.7809301614761353, "eval_runtime": 3.0445, "eval_samples_per_second": 32.846, "eval_steps_per_second": 4.27, "step": 775 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 31.0, "step": 775 }, { "epoch": 32.0, "eval_accuracy": 0.64, "eval_loss": 0.7544003129005432, "eval_runtime": 3.0471, "eval_samples_per_second": 32.818, "eval_steps_per_second": 4.266, "step": 800 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 32.0, "step": 800 }, { "epoch": 33.0, "eval_accuracy": 0.62, "eval_loss": 0.7492163181304932, "eval_runtime": 3.0436, "eval_samples_per_second": 32.856, "eval_steps_per_second": 4.271, "step": 825 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 33.0, "step": 825 }, { "epoch": 34.0, "eval_accuracy": 0.64, "eval_loss": 0.8205734491348267, "eval_runtime": 3.0415, "eval_samples_per_second": 32.878, "eval_steps_per_second": 4.274, "step": 850 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 34.0, "step": 850 }, { "epoch": 35.0, "eval_accuracy": 0.66, "eval_loss": 0.640932559967041, "eval_runtime": 3.0376, "eval_samples_per_second": 32.921, "eval_steps_per_second": 4.28, "step": 875 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 35.0, "step": 875 }, { "epoch": 36.0, "eval_accuracy": 0.63, "eval_loss": 0.7143794894218445, "eval_runtime": 3.0421, "eval_samples_per_second": 32.872, "eval_steps_per_second": 4.273, "step": 900 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 36.0, "step": 900 }, { "epoch": 37.0, "eval_accuracy": 0.63, "eval_loss": 0.74139004945755, "eval_runtime": 3.037, "eval_samples_per_second": 32.927, "eval_steps_per_second": 4.281, "step": 925 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 37.0, "step": 925 }, { "epoch": 38.0, "eval_accuracy": 0.65, "eval_loss": 0.7422806620597839, "eval_runtime": 3.0339, "eval_samples_per_second": 32.961, "eval_steps_per_second": 4.285, "step": 950 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 38.0, "step": 950 }, { "epoch": 39.0, "eval_accuracy": 0.65, "eval_loss": 0.7765854001045227, "eval_runtime": 3.0422, "eval_samples_per_second": 32.871, "eval_steps_per_second": 4.273, "step": 975 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 39.0, "step": 975 }, { "epoch": 40.0, "learning_rate": 0.01, "loss": 0.3363, "step": 1000 }, { "epoch": 40.0, "eval_accuracy": 0.67, "eval_loss": 0.7181565761566162, "eval_runtime": 3.0356, "eval_samples_per_second": 32.942, "eval_steps_per_second": 4.282, "step": 1000 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 40.0, "step": 1000 }, { "epoch": 41.0, "eval_accuracy": 0.67, "eval_loss": 0.7375120520591736, "eval_runtime": 3.0388, "eval_samples_per_second": 32.908, "eval_steps_per_second": 4.278, "step": 1025 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 41.0, "step": 1025 }, { "epoch": 42.0, "eval_accuracy": 0.67, "eval_loss": 0.7235824465751648, "eval_runtime": 3.033, "eval_samples_per_second": 32.971, "eval_steps_per_second": 4.286, "step": 1050 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 42.0, "step": 1050 }, { "epoch": 43.0, "eval_accuracy": 0.66, "eval_loss": 0.7218219041824341, "eval_runtime": 3.039, "eval_samples_per_second": 32.906, "eval_steps_per_second": 4.278, "step": 1075 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 43.0, "step": 1075 }, { "epoch": 44.0, "eval_accuracy": 0.67, "eval_loss": 0.732373833656311, "eval_runtime": 3.0415, "eval_samples_per_second": 32.879, "eval_steps_per_second": 4.274, "step": 1100 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 44.0, "step": 1100 }, { "epoch": 45.0, "eval_accuracy": 0.67, "eval_loss": 0.7290899157524109, "eval_runtime": 3.0411, "eval_samples_per_second": 32.883, "eval_steps_per_second": 4.275, "step": 1125 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 45.0, "step": 1125 }, { "epoch": 46.0, "eval_accuracy": 0.67, "eval_loss": 0.6803033947944641, "eval_runtime": 3.0409, "eval_samples_per_second": 32.885, "eval_steps_per_second": 4.275, "step": 1150 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 46.0, "step": 1150 }, { "epoch": 47.0, "eval_accuracy": 0.67, "eval_loss": 0.6636987924575806, "eval_runtime": 3.0397, "eval_samples_per_second": 32.898, "eval_steps_per_second": 4.277, "step": 1175 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 47.0, "step": 1175 }, { "epoch": 48.0, "eval_accuracy": 0.65, "eval_loss": 0.7063630819320679, "eval_runtime": 3.0394, "eval_samples_per_second": 32.901, "eval_steps_per_second": 4.277, "step": 1200 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 48.0, "step": 1200 }, { "epoch": 49.0, "eval_accuracy": 0.65, "eval_loss": 0.6534314751625061, "eval_runtime": 3.0351, "eval_samples_per_second": 32.948, "eval_steps_per_second": 4.283, "step": 1225 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 49.0, "step": 1225 }, { "epoch": 50.0, "eval_accuracy": 0.67, "eval_loss": 0.7229902148246765, "eval_runtime": 3.0356, "eval_samples_per_second": 32.942, "eval_steps_per_second": 4.282, "step": 1250 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 50.0, "step": 1250 }, { "epoch": 51.0, "eval_accuracy": 0.65, "eval_loss": 0.733811616897583, "eval_runtime": 3.0331, "eval_samples_per_second": 32.97, "eval_steps_per_second": 4.286, "step": 1275 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 51.0, "step": 1275 }, { "epoch": 52.0, "eval_accuracy": 0.62, "eval_loss": 0.6494699716567993, "eval_runtime": 3.036, "eval_samples_per_second": 32.938, "eval_steps_per_second": 4.282, "step": 1300 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 52.0, "step": 1300 }, { "epoch": 53.0, "eval_accuracy": 0.63, "eval_loss": 0.653989851474762, "eval_runtime": 3.0366, "eval_samples_per_second": 32.932, "eval_steps_per_second": 4.281, "step": 1325 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 53.0, "step": 1325 }, { "epoch": 54.0, "eval_accuracy": 0.62, "eval_loss": 0.6994405388832092, "eval_runtime": 3.0342, "eval_samples_per_second": 32.958, "eval_steps_per_second": 4.285, "step": 1350 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 54.0, "step": 1350 }, { "epoch": 55.0, "eval_accuracy": 0.63, "eval_loss": 0.7039541006088257, "eval_runtime": 3.0349, "eval_samples_per_second": 32.95, "eval_steps_per_second": 4.284, "step": 1375 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 55.0, "step": 1375 }, { "epoch": 56.0, "eval_accuracy": 0.63, "eval_loss": 0.6774616837501526, "eval_runtime": 3.0335, "eval_samples_per_second": 32.965, "eval_steps_per_second": 4.285, "step": 1400 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 56.0, "step": 1400 }, { "epoch": 57.0, "eval_accuracy": 0.65, "eval_loss": 0.6425368785858154, "eval_runtime": 3.0318, "eval_samples_per_second": 32.984, "eval_steps_per_second": 4.288, "step": 1425 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 57.0, "step": 1425 }, { "epoch": 58.0, "eval_accuracy": 0.66, "eval_loss": 0.6424228549003601, "eval_runtime": 3.038, "eval_samples_per_second": 32.917, "eval_steps_per_second": 4.279, "step": 1450 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 58.0, "step": 1450 }, { "epoch": 59.0, "eval_accuracy": 0.66, "eval_loss": 0.6781678795814514, "eval_runtime": 3.0394, "eval_samples_per_second": 32.901, "eval_steps_per_second": 4.277, "step": 1475 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 59.0, "step": 1475 }, { "epoch": 60.0, "learning_rate": 0.005, "loss": 0.2375, "step": 1500 }, { "epoch": 60.0, "eval_accuracy": 0.68, "eval_loss": 0.6769825220108032, "eval_runtime": 3.0319, "eval_samples_per_second": 32.983, "eval_steps_per_second": 4.288, "step": 1500 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 60.0, "step": 1500 }, { "epoch": 61.0, "eval_accuracy": 0.68, "eval_loss": 0.7028858661651611, "eval_runtime": 3.041, "eval_samples_per_second": 32.884, "eval_steps_per_second": 4.275, "step": 1525 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 61.0, "step": 1525 }, { "epoch": 62.0, "eval_accuracy": 0.68, "eval_loss": 0.6823871731758118, "eval_runtime": 3.036, "eval_samples_per_second": 32.938, "eval_steps_per_second": 4.282, "step": 1550 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 62.0, "step": 1550 }, { "epoch": 63.0, "eval_accuracy": 0.68, "eval_loss": 0.6846780180931091, "eval_runtime": 3.0396, "eval_samples_per_second": 32.899, "eval_steps_per_second": 4.277, "step": 1575 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 63.0, "step": 1575 }, { "epoch": 64.0, "eval_accuracy": 0.68, "eval_loss": 0.6766635775566101, "eval_runtime": 3.0413, "eval_samples_per_second": 32.88, "eval_steps_per_second": 4.274, "step": 1600 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 64.0, "step": 1600 }, { "epoch": 65.0, "eval_accuracy": 0.67, "eval_loss": 0.6361604928970337, "eval_runtime": 3.033, "eval_samples_per_second": 32.971, "eval_steps_per_second": 4.286, "step": 1625 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 65.0, "step": 1625 }, { "epoch": 66.0, "eval_accuracy": 0.67, "eval_loss": 0.6292469501495361, "eval_runtime": 3.0278, "eval_samples_per_second": 33.027, "eval_steps_per_second": 4.294, "step": 1650 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 66.0, "step": 1650 }, { "epoch": 67.0, "eval_accuracy": 0.67, "eval_loss": 0.6470186710357666, "eval_runtime": 3.0287, "eval_samples_per_second": 33.018, "eval_steps_per_second": 4.292, "step": 1675 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 67.0, "step": 1675 }, { "epoch": 68.0, "eval_accuracy": 0.67, "eval_loss": 0.6660796403884888, "eval_runtime": 3.0283, "eval_samples_per_second": 33.022, "eval_steps_per_second": 4.293, "step": 1700 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 68.0, "step": 1700 }, { "epoch": 69.0, "eval_accuracy": 0.67, "eval_loss": 0.6304946541786194, "eval_runtime": 3.0292, "eval_samples_per_second": 33.012, "eval_steps_per_second": 4.292, "step": 1725 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 69.0, "step": 1725 }, { "epoch": 70.0, "eval_accuracy": 0.67, "eval_loss": 0.6491644978523254, "eval_runtime": 3.0286, "eval_samples_per_second": 33.019, "eval_steps_per_second": 4.292, "step": 1750 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 70.0, "step": 1750 }, { "epoch": 71.0, "eval_accuracy": 0.67, "eval_loss": 0.652509331703186, "eval_runtime": 3.0249, "eval_samples_per_second": 33.058, "eval_steps_per_second": 4.298, "step": 1775 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 71.0, "step": 1775 }, { "epoch": 72.0, "eval_accuracy": 0.67, "eval_loss": 0.6339465379714966, "eval_runtime": 3.0269, "eval_samples_per_second": 33.037, "eval_steps_per_second": 4.295, "step": 1800 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 72.0, "step": 1800 }, { "epoch": 73.0, "eval_accuracy": 0.67, "eval_loss": 0.6621412634849548, "eval_runtime": 3.0262, "eval_samples_per_second": 33.045, "eval_steps_per_second": 4.296, "step": 1825 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 73.0, "step": 1825 }, { "epoch": 74.0, "eval_accuracy": 0.67, "eval_loss": 0.6562237739562988, "eval_runtime": 3.0239, "eval_samples_per_second": 33.07, "eval_steps_per_second": 4.299, "step": 1850 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 74.0, "step": 1850 }, { "epoch": 75.0, "eval_accuracy": 0.67, "eval_loss": 0.6396531462669373, "eval_runtime": 3.0227, "eval_samples_per_second": 33.083, "eval_steps_per_second": 4.301, "step": 1875 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 75.0, "step": 1875 }, { "epoch": 76.0, "eval_accuracy": 0.67, "eval_loss": 0.6496105194091797, "eval_runtime": 3.0233, "eval_samples_per_second": 33.077, "eval_steps_per_second": 4.3, "step": 1900 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 76.0, "step": 1900 }, { "epoch": 77.0, "eval_accuracy": 0.67, "eval_loss": 0.6401783227920532, "eval_runtime": 3.0238, "eval_samples_per_second": 33.072, "eval_steps_per_second": 4.299, "step": 1925 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 77.0, "step": 1925 }, { "epoch": 78.0, "eval_accuracy": 0.67, "eval_loss": 0.6382293105125427, "eval_runtime": 3.0235, "eval_samples_per_second": 33.074, "eval_steps_per_second": 4.3, "step": 1950 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 78.0, "step": 1950 }, { "epoch": 79.0, "eval_accuracy": 0.67, "eval_loss": 0.6407208442687988, "eval_runtime": 3.0231, "eval_samples_per_second": 33.079, "eval_steps_per_second": 4.3, "step": 1975 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 79.0, "step": 1975 }, { "epoch": 80.0, "learning_rate": 0.0, "loss": 0.2102, "step": 2000 }, { "epoch": 80.0, "eval_accuracy": 0.67, "eval_loss": 0.6390834450721741, "eval_runtime": 3.0233, "eval_samples_per_second": 33.076, "eval_steps_per_second": 4.3, "step": 2000 }, { "best_epoch": 14, "best_eval_accuracy": 0.71, "epoch": 80.0, "step": 2000 }, { "epoch": 80.0, "step": 2000, "total_flos": 2.9821702864896e+16, "train_loss": 0.38176900482177734, "train_runtime": 1625.2113, "train_samples_per_second": 19.69, "train_steps_per_second": 1.231 } ], "max_steps": 2000, "num_train_epochs": 80, "total_flos": 2.9821702864896e+16, "trial_name": null, "trial_params": null }