{ "best_metric": 0.8169014084507042, "best_model_checkpoint": "deit-base-distilled-patch16-224-65-fold4/checkpoint-91", "epoch": 92.3076923076923, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9230769230769231, "eval_accuracy": 0.4647887323943662, "eval_loss": 0.7266488075256348, "eval_runtime": 0.8434, "eval_samples_per_second": 84.184, "eval_steps_per_second": 3.557, "step": 3 }, { "epoch": 1.8461538461538463, "eval_accuracy": 0.5211267605633803, "eval_loss": 0.8115941286087036, "eval_runtime": 0.8919, "eval_samples_per_second": 79.608, "eval_steps_per_second": 3.364, "step": 6 }, { "epoch": 2.769230769230769, "eval_accuracy": 0.4647887323943662, "eval_loss": 0.7081143856048584, "eval_runtime": 0.8712, "eval_samples_per_second": 81.496, "eval_steps_per_second": 3.443, "step": 9 }, { "epoch": 3.076923076923077, "grad_norm": 2.1958322525024414, "learning_rate": 1.6666666666666667e-05, "loss": 0.7173, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.5633802816901409, "eval_loss": 0.6644501090049744, "eval_runtime": 0.8954, "eval_samples_per_second": 79.298, "eval_steps_per_second": 3.351, "step": 13 }, { "epoch": 4.923076923076923, "eval_accuracy": 0.5915492957746479, "eval_loss": 0.6440630555152893, "eval_runtime": 0.8941, "eval_samples_per_second": 79.413, "eval_steps_per_second": 3.355, "step": 16 }, { "epoch": 5.846153846153846, "eval_accuracy": 0.676056338028169, "eval_loss": 0.6400186419487, "eval_runtime": 0.933, "eval_samples_per_second": 76.096, "eval_steps_per_second": 3.215, "step": 19 }, { "epoch": 6.153846153846154, "grad_norm": 2.7608325481414795, "learning_rate": 3.3333333333333335e-05, "loss": 0.6351, "step": 20 }, { "epoch": 6.769230769230769, "eval_accuracy": 0.6619718309859155, "eval_loss": 0.6054678559303284, "eval_runtime": 0.9387, "eval_samples_per_second": 75.638, "eval_steps_per_second": 3.196, "step": 22 }, { "epoch": 8.0, "eval_accuracy": 0.5352112676056338, "eval_loss": 0.7769902944564819, "eval_runtime": 0.9329, "eval_samples_per_second": 76.108, "eval_steps_per_second": 3.216, "step": 26 }, { "epoch": 8.923076923076923, "eval_accuracy": 0.6901408450704225, "eval_loss": 0.6259447932243347, "eval_runtime": 0.924, "eval_samples_per_second": 76.842, "eval_steps_per_second": 3.247, "step": 29 }, { "epoch": 9.23076923076923, "grad_norm": 5.699825286865234, "learning_rate": 5e-05, "loss": 0.5434, "step": 30 }, { "epoch": 9.846153846153847, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.5889422297477722, "eval_runtime": 0.9148, "eval_samples_per_second": 77.612, "eval_steps_per_second": 3.279, "step": 32 }, { "epoch": 10.76923076923077, "eval_accuracy": 0.647887323943662, "eval_loss": 0.7283326387405396, "eval_runtime": 0.9062, "eval_samples_per_second": 78.347, "eval_steps_per_second": 3.31, "step": 35 }, { "epoch": 12.0, "eval_accuracy": 0.647887323943662, "eval_loss": 0.6897829174995422, "eval_runtime": 0.9341, "eval_samples_per_second": 76.013, "eval_steps_per_second": 3.212, "step": 39 }, { "epoch": 12.307692307692308, "grad_norm": 2.502607583999634, "learning_rate": 4.814814814814815e-05, "loss": 0.4861, "step": 40 }, { "epoch": 12.923076923076923, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.6428806781768799, "eval_runtime": 0.9382, "eval_samples_per_second": 75.679, "eval_steps_per_second": 3.198, "step": 42 }, { "epoch": 13.846153846153847, "eval_accuracy": 0.6619718309859155, "eval_loss": 0.691542387008667, "eval_runtime": 0.932, "eval_samples_per_second": 76.179, "eval_steps_per_second": 3.219, "step": 45 }, { "epoch": 14.76923076923077, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.5702247619628906, "eval_runtime": 0.9242, "eval_samples_per_second": 76.822, "eval_steps_per_second": 3.246, "step": 48 }, { "epoch": 15.384615384615385, "grad_norm": 5.8594255447387695, "learning_rate": 4.62962962962963e-05, "loss": 0.4285, "step": 50 }, { "epoch": 16.0, "eval_accuracy": 0.704225352112676, "eval_loss": 0.6356058120727539, "eval_runtime": 0.919, "eval_samples_per_second": 77.258, "eval_steps_per_second": 3.264, "step": 52 }, { "epoch": 16.923076923076923, "eval_accuracy": 0.676056338028169, "eval_loss": 0.6981013417243958, "eval_runtime": 0.9247, "eval_samples_per_second": 76.785, "eval_steps_per_second": 3.244, "step": 55 }, { "epoch": 17.846153846153847, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.5218324661254883, "eval_runtime": 0.917, "eval_samples_per_second": 77.425, "eval_steps_per_second": 3.271, "step": 58 }, { "epoch": 18.46153846153846, "grad_norm": 6.547220706939697, "learning_rate": 4.4444444444444447e-05, "loss": 0.3781, "step": 60 }, { "epoch": 18.76923076923077, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.534016489982605, "eval_runtime": 0.914, "eval_samples_per_second": 77.678, "eval_steps_per_second": 3.282, "step": 61 }, { "epoch": 20.0, "eval_accuracy": 0.676056338028169, "eval_loss": 0.7611135244369507, "eval_runtime": 0.9206, "eval_samples_per_second": 77.126, "eval_steps_per_second": 3.259, "step": 65 }, { "epoch": 20.923076923076923, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.5939193964004517, "eval_runtime": 0.9415, "eval_samples_per_second": 75.408, "eval_steps_per_second": 3.186, "step": 68 }, { "epoch": 21.53846153846154, "grad_norm": 4.307927131652832, "learning_rate": 4.259259259259259e-05, "loss": 0.3516, "step": 70 }, { "epoch": 21.846153846153847, "eval_accuracy": 0.7887323943661971, "eval_loss": 0.61859130859375, "eval_runtime": 0.9317, "eval_samples_per_second": 76.201, "eval_steps_per_second": 3.22, "step": 71 }, { "epoch": 22.76923076923077, "eval_accuracy": 0.704225352112676, "eval_loss": 0.712211549282074, "eval_runtime": 0.9272, "eval_samples_per_second": 76.571, "eval_steps_per_second": 3.235, "step": 74 }, { "epoch": 24.0, "eval_accuracy": 0.7887323943661971, "eval_loss": 0.5930981040000916, "eval_runtime": 0.9231, "eval_samples_per_second": 76.916, "eval_steps_per_second": 3.25, "step": 78 }, { "epoch": 24.615384615384617, "grad_norm": 5.136722087860107, "learning_rate": 4.074074074074074e-05, "loss": 0.296, "step": 80 }, { "epoch": 24.923076923076923, "eval_accuracy": 0.6901408450704225, "eval_loss": 0.6304548978805542, "eval_runtime": 0.9177, "eval_samples_per_second": 77.366, "eval_steps_per_second": 3.269, "step": 81 }, { "epoch": 25.846153846153847, "eval_accuracy": 0.704225352112676, "eval_loss": 0.8947206735610962, "eval_runtime": 0.9322, "eval_samples_per_second": 76.164, "eval_steps_per_second": 3.218, "step": 84 }, { "epoch": 26.76923076923077, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.6216529011726379, "eval_runtime": 0.9208, "eval_samples_per_second": 77.108, "eval_steps_per_second": 3.258, "step": 87 }, { "epoch": 27.692307692307693, "grad_norm": 2.791210651397705, "learning_rate": 3.888888888888889e-05, "loss": 0.2741, "step": 90 }, { "epoch": 28.0, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.7217584848403931, "eval_runtime": 0.9386, "eval_samples_per_second": 75.642, "eval_steps_per_second": 3.196, "step": 91 }, { "epoch": 28.923076923076923, "eval_accuracy": 0.7887323943661971, "eval_loss": 0.6687091588973999, "eval_runtime": 0.9304, "eval_samples_per_second": 76.314, "eval_steps_per_second": 3.225, "step": 94 }, { "epoch": 29.846153846153847, "eval_accuracy": 0.8028169014084507, "eval_loss": 0.6647565960884094, "eval_runtime": 0.9206, "eval_samples_per_second": 77.123, "eval_steps_per_second": 3.259, "step": 97 }, { "epoch": 30.76923076923077, "grad_norm": 4.038685321807861, "learning_rate": 3.7037037037037037e-05, "loss": 0.2559, "step": 100 }, { "epoch": 30.76923076923077, "eval_accuracy": 0.7746478873239436, "eval_loss": 0.64328533411026, "eval_runtime": 0.9253, "eval_samples_per_second": 76.729, "eval_steps_per_second": 3.242, "step": 100 }, { "epoch": 32.0, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.6673524975776672, "eval_runtime": 0.9299, "eval_samples_per_second": 76.356, "eval_steps_per_second": 3.226, "step": 104 }, { "epoch": 32.92307692307692, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.6642715334892273, "eval_runtime": 0.9221, "eval_samples_per_second": 76.995, "eval_steps_per_second": 3.253, "step": 107 }, { "epoch": 33.84615384615385, "grad_norm": 2.9196064472198486, "learning_rate": 3.518518518518519e-05, "loss": 0.2001, "step": 110 }, { "epoch": 33.84615384615385, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.6247330904006958, "eval_runtime": 0.923, "eval_samples_per_second": 76.922, "eval_steps_per_second": 3.25, "step": 110 }, { "epoch": 34.76923076923077, "eval_accuracy": 0.6901408450704225, "eval_loss": 0.634434163570404, "eval_runtime": 0.9207, "eval_samples_per_second": 77.116, "eval_steps_per_second": 3.258, "step": 113 }, { "epoch": 36.0, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.7071972489356995, "eval_runtime": 0.9162, "eval_samples_per_second": 77.491, "eval_steps_per_second": 3.274, "step": 117 }, { "epoch": 36.92307692307692, "grad_norm": 2.7139010429382324, "learning_rate": 3.3333333333333335e-05, "loss": 0.1728, "step": 120 }, { "epoch": 36.92307692307692, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.7145668864250183, "eval_runtime": 0.9546, "eval_samples_per_second": 74.38, "eval_steps_per_second": 3.143, "step": 120 }, { "epoch": 37.84615384615385, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.8212233781814575, "eval_runtime": 0.9206, "eval_samples_per_second": 77.122, "eval_steps_per_second": 3.259, "step": 123 }, { "epoch": 38.76923076923077, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.7901431322097778, "eval_runtime": 0.9408, "eval_samples_per_second": 75.465, "eval_steps_per_second": 3.189, "step": 126 }, { "epoch": 40.0, "grad_norm": 3.7214014530181885, "learning_rate": 3.148148148148148e-05, "loss": 0.2109, "step": 130 }, { "epoch": 40.0, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.8235028982162476, "eval_runtime": 0.9237, "eval_samples_per_second": 76.863, "eval_steps_per_second": 3.248, "step": 130 }, { "epoch": 40.92307692307692, "eval_accuracy": 0.6901408450704225, "eval_loss": 0.9196304678916931, "eval_runtime": 0.9201, "eval_samples_per_second": 77.162, "eval_steps_per_second": 3.26, "step": 133 }, { "epoch": 41.84615384615385, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.7758485078811646, "eval_runtime": 0.918, "eval_samples_per_second": 77.345, "eval_steps_per_second": 3.268, "step": 136 }, { "epoch": 42.76923076923077, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.7692318558692932, "eval_runtime": 0.9271, "eval_samples_per_second": 76.58, "eval_steps_per_second": 3.236, "step": 139 }, { "epoch": 43.07692307692308, "grad_norm": 2.981480121612549, "learning_rate": 2.962962962962963e-05, "loss": 0.1634, "step": 140 }, { "epoch": 44.0, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.830990731716156, "eval_runtime": 0.9143, "eval_samples_per_second": 77.651, "eval_steps_per_second": 3.281, "step": 143 }, { "epoch": 44.92307692307692, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.7550302147865295, "eval_runtime": 0.9175, "eval_samples_per_second": 77.387, "eval_steps_per_second": 3.27, "step": 146 }, { "epoch": 45.84615384615385, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.7645807862281799, "eval_runtime": 0.9201, "eval_samples_per_second": 77.165, "eval_steps_per_second": 3.26, "step": 149 }, { "epoch": 46.15384615384615, "grad_norm": 2.219327926635742, "learning_rate": 2.777777777777778e-05, "loss": 0.148, "step": 150 }, { "epoch": 46.76923076923077, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.7207580804824829, "eval_runtime": 0.9248, "eval_samples_per_second": 76.77, "eval_steps_per_second": 3.244, "step": 152 }, { "epoch": 48.0, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.7324273586273193, "eval_runtime": 0.9287, "eval_samples_per_second": 76.454, "eval_steps_per_second": 3.23, "step": 156 }, { "epoch": 48.92307692307692, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.7855945229530334, "eval_runtime": 0.923, "eval_samples_per_second": 76.922, "eval_steps_per_second": 3.25, "step": 159 }, { "epoch": 49.23076923076923, "grad_norm": 3.3428738117218018, "learning_rate": 2.5925925925925925e-05, "loss": 0.1568, "step": 160 }, { "epoch": 49.84615384615385, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.8032997250556946, "eval_runtime": 0.9319, "eval_samples_per_second": 76.191, "eval_steps_per_second": 3.219, "step": 162 }, { "epoch": 50.76923076923077, "eval_accuracy": 0.7746478873239436, "eval_loss": 0.9006530046463013, "eval_runtime": 0.9271, "eval_samples_per_second": 76.582, "eval_steps_per_second": 3.236, "step": 165 }, { "epoch": 52.0, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.817884624004364, "eval_runtime": 0.926, "eval_samples_per_second": 76.671, "eval_steps_per_second": 3.24, "step": 169 }, { "epoch": 52.30769230769231, "grad_norm": 2.3081116676330566, "learning_rate": 2.4074074074074074e-05, "loss": 0.1659, "step": 170 }, { "epoch": 52.92307692307692, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.7775102853775024, "eval_runtime": 0.9343, "eval_samples_per_second": 75.993, "eval_steps_per_second": 3.211, "step": 172 }, { "epoch": 53.84615384615385, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.7214329838752747, "eval_runtime": 0.9316, "eval_samples_per_second": 76.215, "eval_steps_per_second": 3.22, "step": 175 }, { "epoch": 54.76923076923077, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.7385321259498596, "eval_runtime": 0.9261, "eval_samples_per_second": 76.67, "eval_steps_per_second": 3.24, "step": 178 }, { "epoch": 55.38461538461539, "grad_norm": 2.9334683418273926, "learning_rate": 2.2222222222222223e-05, "loss": 0.1352, "step": 180 }, { "epoch": 56.0, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.7433763742446899, "eval_runtime": 0.9297, "eval_samples_per_second": 76.371, "eval_steps_per_second": 3.227, "step": 182 }, { "epoch": 56.92307692307692, "eval_accuracy": 0.704225352112676, "eval_loss": 0.8971463441848755, "eval_runtime": 0.9262, "eval_samples_per_second": 76.656, "eval_steps_per_second": 3.239, "step": 185 }, { "epoch": 57.84615384615385, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.7821467518806458, "eval_runtime": 0.928, "eval_samples_per_second": 76.509, "eval_steps_per_second": 3.233, "step": 188 }, { "epoch": 58.46153846153846, "grad_norm": 4.111489295959473, "learning_rate": 2.037037037037037e-05, "loss": 0.1309, "step": 190 }, { "epoch": 58.76923076923077, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.7896379828453064, "eval_runtime": 0.9233, "eval_samples_per_second": 76.899, "eval_steps_per_second": 3.249, "step": 191 }, { "epoch": 60.0, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.8339643478393555, "eval_runtime": 0.933, "eval_samples_per_second": 76.101, "eval_steps_per_second": 3.216, "step": 195 }, { "epoch": 60.92307692307692, "eval_accuracy": 0.7746478873239436, "eval_loss": 0.8154428005218506, "eval_runtime": 0.9292, "eval_samples_per_second": 76.406, "eval_steps_per_second": 3.228, "step": 198 }, { "epoch": 61.53846153846154, "grad_norm": 4.092026233673096, "learning_rate": 1.8518518518518518e-05, "loss": 0.1201, "step": 200 }, { "epoch": 61.84615384615385, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.8184639811515808, "eval_runtime": 0.9373, "eval_samples_per_second": 75.748, "eval_steps_per_second": 3.201, "step": 201 }, { "epoch": 62.76923076923077, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.9639940857887268, "eval_runtime": 0.9235, "eval_samples_per_second": 76.878, "eval_steps_per_second": 3.248, "step": 204 }, { "epoch": 64.0, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.8484686613082886, "eval_runtime": 0.9218, "eval_samples_per_second": 77.024, "eval_steps_per_second": 3.255, "step": 208 }, { "epoch": 64.61538461538461, "grad_norm": 2.0266196727752686, "learning_rate": 1.6666666666666667e-05, "loss": 0.1291, "step": 210 }, { "epoch": 64.92307692307692, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.8807466626167297, "eval_runtime": 0.9258, "eval_samples_per_second": 76.694, "eval_steps_per_second": 3.241, "step": 211 }, { "epoch": 65.84615384615384, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.8652527332305908, "eval_runtime": 0.9568, "eval_samples_per_second": 74.203, "eval_steps_per_second": 3.135, "step": 214 }, { "epoch": 66.76923076923077, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.8744374513626099, "eval_runtime": 0.9208, "eval_samples_per_second": 77.107, "eval_steps_per_second": 3.258, "step": 217 }, { "epoch": 67.6923076923077, "grad_norm": 2.0726959705352783, "learning_rate": 1.4814814814814815e-05, "loss": 0.124, "step": 220 }, { "epoch": 68.0, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.8723464012145996, "eval_runtime": 0.9161, "eval_samples_per_second": 77.501, "eval_steps_per_second": 3.275, "step": 221 }, { "epoch": 68.92307692307692, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.8948094248771667, "eval_runtime": 0.93, "eval_samples_per_second": 76.342, "eval_steps_per_second": 3.226, "step": 224 }, { "epoch": 69.84615384615384, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.9777162671089172, "eval_runtime": 0.9269, "eval_samples_per_second": 76.603, "eval_steps_per_second": 3.237, "step": 227 }, { "epoch": 70.76923076923077, "grad_norm": 3.592405319213867, "learning_rate": 1.2962962962962962e-05, "loss": 0.1262, "step": 230 }, { "epoch": 70.76923076923077, "eval_accuracy": 0.7746478873239436, "eval_loss": 0.940915048122406, "eval_runtime": 0.9252, "eval_samples_per_second": 76.743, "eval_steps_per_second": 3.243, "step": 230 }, { "epoch": 72.0, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.9617937207221985, "eval_runtime": 0.9316, "eval_samples_per_second": 76.211, "eval_steps_per_second": 3.22, "step": 234 }, { "epoch": 72.92307692307692, "eval_accuracy": 0.7605633802816901, "eval_loss": 0.9641876220703125, "eval_runtime": 0.9327, "eval_samples_per_second": 76.122, "eval_steps_per_second": 3.216, "step": 237 }, { "epoch": 73.84615384615384, "grad_norm": 1.9709769487380981, "learning_rate": 1.1111111111111112e-05, "loss": 0.1036, "step": 240 }, { "epoch": 73.84615384615384, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.9737982153892517, "eval_runtime": 0.9281, "eval_samples_per_second": 76.5, "eval_steps_per_second": 3.232, "step": 240 }, { "epoch": 74.76923076923077, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.9787779450416565, "eval_runtime": 0.9363, "eval_samples_per_second": 75.831, "eval_steps_per_second": 3.204, "step": 243 }, { "epoch": 76.0, "eval_accuracy": 0.7464788732394366, "eval_loss": 1.0114330053329468, "eval_runtime": 0.9271, "eval_samples_per_second": 76.582, "eval_steps_per_second": 3.236, "step": 247 }, { "epoch": 76.92307692307692, "grad_norm": 3.0993545055389404, "learning_rate": 9.259259259259259e-06, "loss": 0.1183, "step": 250 }, { "epoch": 76.92307692307692, "eval_accuracy": 0.7464788732394366, "eval_loss": 1.0004260540008545, "eval_runtime": 0.9321, "eval_samples_per_second": 76.171, "eval_steps_per_second": 3.218, "step": 250 }, { "epoch": 77.84615384615384, "eval_accuracy": 0.7464788732394366, "eval_loss": 1.0407198667526245, "eval_runtime": 0.9175, "eval_samples_per_second": 77.387, "eval_steps_per_second": 3.27, "step": 253 }, { "epoch": 78.76923076923077, "eval_accuracy": 0.7323943661971831, "eval_loss": 1.1509737968444824, "eval_runtime": 0.9303, "eval_samples_per_second": 76.319, "eval_steps_per_second": 3.225, "step": 256 }, { "epoch": 80.0, "grad_norm": 5.949638843536377, "learning_rate": 7.4074074074074075e-06, "loss": 0.0981, "step": 260 }, { "epoch": 80.0, "eval_accuracy": 0.7464788732394366, "eval_loss": 1.0718269348144531, "eval_runtime": 0.9185, "eval_samples_per_second": 77.304, "eval_steps_per_second": 3.266, "step": 260 }, { "epoch": 80.92307692307692, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.9988247752189636, "eval_runtime": 0.933, "eval_samples_per_second": 76.098, "eval_steps_per_second": 3.215, "step": 263 }, { "epoch": 81.84615384615384, "eval_accuracy": 0.704225352112676, "eval_loss": 1.0053763389587402, "eval_runtime": 0.9278, "eval_samples_per_second": 76.524, "eval_steps_per_second": 3.233, "step": 266 }, { "epoch": 82.76923076923077, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.9896395206451416, "eval_runtime": 0.9224, "eval_samples_per_second": 76.975, "eval_steps_per_second": 3.252, "step": 269 }, { "epoch": 83.07692307692308, "grad_norm": 2.5808982849121094, "learning_rate": 5.555555555555556e-06, "loss": 0.106, "step": 270 }, { "epoch": 84.0, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.9850640892982483, "eval_runtime": 0.9326, "eval_samples_per_second": 76.134, "eval_steps_per_second": 3.217, "step": 273 }, { "epoch": 84.92307692307692, "eval_accuracy": 0.7464788732394366, "eval_loss": 0.9769949913024902, "eval_runtime": 0.9194, "eval_samples_per_second": 77.228, "eval_steps_per_second": 3.263, "step": 276 }, { "epoch": 85.84615384615384, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.9622512459754944, "eval_runtime": 0.9188, "eval_samples_per_second": 77.276, "eval_steps_per_second": 3.265, "step": 279 }, { "epoch": 86.15384615384616, "grad_norm": 3.007988214492798, "learning_rate": 3.7037037037037037e-06, "loss": 0.114, "step": 280 }, { "epoch": 86.76923076923077, "eval_accuracy": 0.704225352112676, "eval_loss": 0.9664495587348938, "eval_runtime": 0.9429, "eval_samples_per_second": 75.301, "eval_steps_per_second": 3.182, "step": 282 }, { "epoch": 88.0, "eval_accuracy": 0.704225352112676, "eval_loss": 0.9780421853065491, "eval_runtime": 0.9309, "eval_samples_per_second": 76.269, "eval_steps_per_second": 3.223, "step": 286 }, { "epoch": 88.92307692307692, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.9670152068138123, "eval_runtime": 0.927, "eval_samples_per_second": 76.593, "eval_steps_per_second": 3.236, "step": 289 }, { "epoch": 89.23076923076923, "grad_norm": 2.9891533851623535, "learning_rate": 1.8518518518518519e-06, "loss": 0.1157, "step": 290 }, { "epoch": 89.84615384615384, "eval_accuracy": 0.7323943661971831, "eval_loss": 0.9586439728736877, "eval_runtime": 0.9232, "eval_samples_per_second": 76.909, "eval_steps_per_second": 3.25, "step": 292 }, { "epoch": 90.76923076923077, "eval_accuracy": 0.7183098591549296, "eval_loss": 0.9586858749389648, "eval_runtime": 0.9402, "eval_samples_per_second": 75.515, "eval_steps_per_second": 3.191, "step": 295 }, { "epoch": 92.0, "eval_accuracy": 0.704225352112676, "eval_loss": 0.9610524773597717, "eval_runtime": 0.9383, "eval_samples_per_second": 75.669, "eval_steps_per_second": 3.197, "step": 299 }, { "epoch": 92.3076923076923, "grad_norm": 2.5594146251678467, "learning_rate": 0.0, "loss": 0.0834, "step": 300 }, { "epoch": 92.3076923076923, "eval_accuracy": 0.704225352112676, "eval_loss": 0.9612475037574768, "eval_runtime": 0.9284, "eval_samples_per_second": 76.472, "eval_steps_per_second": 3.231, "step": 300 }, { "epoch": 92.3076923076923, "step": 300, "total_flos": 2.8402872494292173e+18, "train_loss": 0.23628523468971252, "train_runtime": 1593.1864, "train_samples_per_second": 24.919, "train_steps_per_second": 0.188 }, { "epoch": 92.3076923076923, "eval_accuracy": 0.8169014084507042, "eval_loss": 0.7217584848403931, "eval_runtime": 0.9525, "eval_samples_per_second": 74.537, "eval_steps_per_second": 3.149, "step": 300 } ], "logging_steps": 10, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8402872494292173e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }