{ "best_metric": 0.8227848101265823, "best_model_checkpoint": "deit-base-distilled-patch16-224-55-fold5/checkpoint-150", "epoch": 85.71428571428571, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8571428571428571, "eval_accuracy": 0.5189873417721519, "eval_loss": 0.7318889498710632, "eval_runtime": 1.1213, "eval_samples_per_second": 70.455, "eval_steps_per_second": 2.676, "step": 3 }, { "epoch": 2.0, "eval_accuracy": 0.569620253164557, "eval_loss": 0.675592303276062, "eval_runtime": 1.1562, "eval_samples_per_second": 68.326, "eval_steps_per_second": 2.595, "step": 7 }, { "epoch": 2.857142857142857, "grad_norm": 1.9025880098342896, "learning_rate": 1.6666666666666667e-05, "loss": 0.6544, "step": 10 }, { "epoch": 2.857142857142857, "eval_accuracy": 0.6455696202531646, "eval_loss": 0.619909405708313, "eval_runtime": 1.2043, "eval_samples_per_second": 65.598, "eval_steps_per_second": 2.491, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.620253164556962, "eval_loss": 0.5987372994422913, "eval_runtime": 1.1134, "eval_samples_per_second": 70.952, "eval_steps_per_second": 2.694, "step": 14 }, { "epoch": 4.857142857142857, "eval_accuracy": 0.6708860759493671, "eval_loss": 0.5676239728927612, "eval_runtime": 1.1193, "eval_samples_per_second": 70.581, "eval_steps_per_second": 2.68, "step": 17 }, { "epoch": 5.714285714285714, "grad_norm": 2.5250775814056396, "learning_rate": 3.3333333333333335e-05, "loss": 0.6173, "step": 20 }, { "epoch": 6.0, "eval_accuracy": 0.5822784810126582, "eval_loss": 0.6542880535125732, "eval_runtime": 1.1319, "eval_samples_per_second": 69.791, "eval_steps_per_second": 2.65, "step": 21 }, { "epoch": 6.857142857142857, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.5309708118438721, "eval_runtime": 1.1635, "eval_samples_per_second": 67.9, "eval_steps_per_second": 2.578, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.6075949367088608, "eval_loss": 0.6724244356155396, "eval_runtime": 1.1519, "eval_samples_per_second": 68.58, "eval_steps_per_second": 2.604, "step": 28 }, { "epoch": 8.571428571428571, "grad_norm": 3.0099055767059326, "learning_rate": 5e-05, "loss": 0.5245, "step": 30 }, { "epoch": 8.857142857142858, "eval_accuracy": 0.6582278481012658, "eval_loss": 0.6443866491317749, "eval_runtime": 1.1308, "eval_samples_per_second": 69.861, "eval_steps_per_second": 2.653, "step": 31 }, { "epoch": 10.0, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.5026643872261047, "eval_runtime": 1.1348, "eval_samples_per_second": 69.614, "eval_steps_per_second": 2.644, "step": 35 }, { "epoch": 10.857142857142858, "eval_accuracy": 0.6582278481012658, "eval_loss": 0.6327927708625793, "eval_runtime": 1.1376, "eval_samples_per_second": 69.443, "eval_steps_per_second": 2.637, "step": 38 }, { "epoch": 11.428571428571429, "grad_norm": 3.429919719696045, "learning_rate": 4.814814814814815e-05, "loss": 0.4554, "step": 40 }, { "epoch": 12.0, "eval_accuracy": 0.759493670886076, "eval_loss": 0.48831707239151, "eval_runtime": 1.1619, "eval_samples_per_second": 67.99, "eval_steps_per_second": 2.582, "step": 42 }, { "epoch": 12.857142857142858, "eval_accuracy": 0.6582278481012658, "eval_loss": 0.6736446619033813, "eval_runtime": 1.1393, "eval_samples_per_second": 69.338, "eval_steps_per_second": 2.633, "step": 45 }, { "epoch": 14.0, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.4583931863307953, "eval_runtime": 1.1376, "eval_samples_per_second": 69.444, "eval_steps_per_second": 2.637, "step": 49 }, { "epoch": 14.285714285714286, "grad_norm": 3.318418264389038, "learning_rate": 4.62962962962963e-05, "loss": 0.4575, "step": 50 }, { "epoch": 14.857142857142858, "eval_accuracy": 0.6455696202531646, "eval_loss": 0.8099328875541687, "eval_runtime": 1.1512, "eval_samples_per_second": 68.622, "eval_steps_per_second": 2.606, "step": 52 }, { "epoch": 16.0, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.47672876715660095, "eval_runtime": 1.1261, "eval_samples_per_second": 70.154, "eval_steps_per_second": 2.664, "step": 56 }, { "epoch": 16.857142857142858, "eval_accuracy": 0.6835443037974683, "eval_loss": 0.6058729290962219, "eval_runtime": 1.1442, "eval_samples_per_second": 69.041, "eval_steps_per_second": 2.622, "step": 59 }, { "epoch": 17.142857142857142, "grad_norm": 4.997932434082031, "learning_rate": 4.4444444444444447e-05, "loss": 0.3798, "step": 60 }, { "epoch": 18.0, "eval_accuracy": 0.759493670886076, "eval_loss": 0.4863268733024597, "eval_runtime": 1.1309, "eval_samples_per_second": 69.854, "eval_steps_per_second": 2.653, "step": 63 }, { "epoch": 18.857142857142858, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.5635764598846436, "eval_runtime": 1.1455, "eval_samples_per_second": 68.967, "eval_steps_per_second": 2.619, "step": 66 }, { "epoch": 20.0, "grad_norm": 3.959798574447632, "learning_rate": 4.259259259259259e-05, "loss": 0.3419, "step": 70 }, { "epoch": 20.0, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.4677181839942932, "eval_runtime": 1.1321, "eval_samples_per_second": 69.782, "eval_steps_per_second": 2.65, "step": 70 }, { "epoch": 20.857142857142858, "eval_accuracy": 0.7088607594936709, "eval_loss": 0.4883308410644531, "eval_runtime": 1.1375, "eval_samples_per_second": 69.453, "eval_steps_per_second": 2.637, "step": 73 }, { "epoch": 22.0, "eval_accuracy": 0.7215189873417721, "eval_loss": 0.5549487471580505, "eval_runtime": 1.1347, "eval_samples_per_second": 69.623, "eval_steps_per_second": 2.644, "step": 77 }, { "epoch": 22.857142857142858, "grad_norm": 6.16960334777832, "learning_rate": 4.074074074074074e-05, "loss": 0.3079, "step": 80 }, { "epoch": 22.857142857142858, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.4324057400226593, "eval_runtime": 1.1312, "eval_samples_per_second": 69.84, "eval_steps_per_second": 2.652, "step": 80 }, { "epoch": 24.0, "eval_accuracy": 0.6708860759493671, "eval_loss": 0.6183649897575378, "eval_runtime": 1.1713, "eval_samples_per_second": 67.446, "eval_steps_per_second": 2.561, "step": 84 }, { "epoch": 24.857142857142858, "eval_accuracy": 0.7088607594936709, "eval_loss": 0.6148533821105957, "eval_runtime": 1.147, "eval_samples_per_second": 68.878, "eval_steps_per_second": 2.616, "step": 87 }, { "epoch": 25.714285714285715, "grad_norm": 3.098267078399658, "learning_rate": 3.888888888888889e-05, "loss": 0.2616, "step": 90 }, { "epoch": 26.0, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.44876629114151, "eval_runtime": 1.1303, "eval_samples_per_second": 69.895, "eval_steps_per_second": 2.654, "step": 91 }, { "epoch": 26.857142857142858, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.43682238459587097, "eval_runtime": 1.1557, "eval_samples_per_second": 68.354, "eval_steps_per_second": 2.596, "step": 94 }, { "epoch": 28.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.45659732818603516, "eval_runtime": 1.1429, "eval_samples_per_second": 69.124, "eval_steps_per_second": 2.625, "step": 98 }, { "epoch": 28.571428571428573, "grad_norm": 4.5599517822265625, "learning_rate": 3.7037037037037037e-05, "loss": 0.2157, "step": 100 }, { "epoch": 28.857142857142858, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.4657270312309265, "eval_runtime": 1.1397, "eval_samples_per_second": 69.319, "eval_steps_per_second": 2.632, "step": 101 }, { "epoch": 30.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.4514474868774414, "eval_runtime": 1.1483, "eval_samples_per_second": 68.795, "eval_steps_per_second": 2.612, "step": 105 }, { "epoch": 30.857142857142858, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.5082857608795166, "eval_runtime": 1.1218, "eval_samples_per_second": 70.425, "eval_steps_per_second": 2.674, "step": 108 }, { "epoch": 31.428571428571427, "grad_norm": 2.26061749458313, "learning_rate": 3.518518518518519e-05, "loss": 0.2258, "step": 110 }, { "epoch": 32.0, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.5260893106460571, "eval_runtime": 1.1489, "eval_samples_per_second": 68.759, "eval_steps_per_second": 2.611, "step": 112 }, { "epoch": 32.857142857142854, "eval_accuracy": 0.759493670886076, "eval_loss": 0.556735634803772, "eval_runtime": 1.1778, "eval_samples_per_second": 67.076, "eval_steps_per_second": 2.547, "step": 115 }, { "epoch": 34.0, "eval_accuracy": 0.810126582278481, "eval_loss": 0.5566364526748657, "eval_runtime": 1.1382, "eval_samples_per_second": 69.406, "eval_steps_per_second": 2.636, "step": 119 }, { "epoch": 34.285714285714285, "grad_norm": 2.7529988288879395, "learning_rate": 3.3333333333333335e-05, "loss": 0.1972, "step": 120 }, { "epoch": 34.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.5494687557220459, "eval_runtime": 1.1486, "eval_samples_per_second": 68.78, "eval_steps_per_second": 2.612, "step": 122 }, { "epoch": 36.0, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.49917009472846985, "eval_runtime": 1.1413, "eval_samples_per_second": 69.221, "eval_steps_per_second": 2.629, "step": 126 }, { "epoch": 36.857142857142854, "eval_accuracy": 0.759493670886076, "eval_loss": 0.566094696521759, "eval_runtime": 1.1393, "eval_samples_per_second": 69.341, "eval_steps_per_second": 2.633, "step": 129 }, { "epoch": 37.142857142857146, "grad_norm": 3.6283090114593506, "learning_rate": 3.148148148148148e-05, "loss": 0.1709, "step": 130 }, { "epoch": 38.0, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.7325516939163208, "eval_runtime": 1.1565, "eval_samples_per_second": 68.311, "eval_steps_per_second": 2.594, "step": 133 }, { "epoch": 38.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.5634633898735046, "eval_runtime": 1.1297, "eval_samples_per_second": 69.93, "eval_steps_per_second": 2.656, "step": 136 }, { "epoch": 40.0, "grad_norm": 2.0195701122283936, "learning_rate": 2.962962962962963e-05, "loss": 0.1537, "step": 140 }, { "epoch": 40.0, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.8130123019218445, "eval_runtime": 1.1502, "eval_samples_per_second": 68.685, "eval_steps_per_second": 2.608, "step": 140 }, { "epoch": 40.857142857142854, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.6983793377876282, "eval_runtime": 1.1474, "eval_samples_per_second": 68.849, "eval_steps_per_second": 2.615, "step": 143 }, { "epoch": 42.0, "eval_accuracy": 0.759493670886076, "eval_loss": 0.7777484655380249, "eval_runtime": 1.1447, "eval_samples_per_second": 69.011, "eval_steps_per_second": 2.621, "step": 147 }, { "epoch": 42.857142857142854, "grad_norm": 3.92864727973938, "learning_rate": 2.777777777777778e-05, "loss": 0.1687, "step": 150 }, { "epoch": 42.857142857142854, "eval_accuracy": 0.8227848101265823, "eval_loss": 0.645160436630249, "eval_runtime": 1.1266, "eval_samples_per_second": 70.123, "eval_steps_per_second": 2.663, "step": 150 }, { "epoch": 44.0, "eval_accuracy": 0.7215189873417721, "eval_loss": 0.8527082800865173, "eval_runtime": 1.162, "eval_samples_per_second": 67.988, "eval_steps_per_second": 2.582, "step": 154 }, { "epoch": 44.857142857142854, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.6483196020126343, "eval_runtime": 1.1395, "eval_samples_per_second": 69.329, "eval_steps_per_second": 2.633, "step": 157 }, { "epoch": 45.714285714285715, "grad_norm": 2.570295810699463, "learning_rate": 2.5925925925925925e-05, "loss": 0.1588, "step": 160 }, { "epoch": 46.0, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.8185186982154846, "eval_runtime": 1.1438, "eval_samples_per_second": 69.068, "eval_steps_per_second": 2.623, "step": 161 }, { "epoch": 46.857142857142854, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.6821473836898804, "eval_runtime": 1.174, "eval_samples_per_second": 67.29, "eval_steps_per_second": 2.555, "step": 164 }, { "epoch": 48.0, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.7593724131584167, "eval_runtime": 1.1514, "eval_samples_per_second": 68.611, "eval_steps_per_second": 2.605, "step": 168 }, { "epoch": 48.57142857142857, "grad_norm": 2.7910444736480713, "learning_rate": 2.4074074074074074e-05, "loss": 0.144, "step": 170 }, { "epoch": 48.857142857142854, "eval_accuracy": 0.759493670886076, "eval_loss": 1.0232322216033936, "eval_runtime": 1.1447, "eval_samples_per_second": 69.012, "eval_steps_per_second": 2.621, "step": 171 }, { "epoch": 50.0, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.6178007125854492, "eval_runtime": 1.1535, "eval_samples_per_second": 68.485, "eval_steps_per_second": 2.601, "step": 175 }, { "epoch": 50.857142857142854, "eval_accuracy": 0.759493670886076, "eval_loss": 0.6243430376052856, "eval_runtime": 1.149, "eval_samples_per_second": 68.756, "eval_steps_per_second": 2.611, "step": 178 }, { "epoch": 51.42857142857143, "grad_norm": 2.441843271255493, "learning_rate": 2.2222222222222223e-05, "loss": 0.1449, "step": 180 }, { "epoch": 52.0, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.8159094452857971, "eval_runtime": 1.1388, "eval_samples_per_second": 69.372, "eval_steps_per_second": 2.634, "step": 182 }, { "epoch": 52.857142857142854, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.6664446592330933, "eval_runtime": 1.1472, "eval_samples_per_second": 68.865, "eval_steps_per_second": 2.615, "step": 185 }, { "epoch": 54.0, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.7069984078407288, "eval_runtime": 1.1371, "eval_samples_per_second": 69.475, "eval_steps_per_second": 2.638, "step": 189 }, { "epoch": 54.285714285714285, "grad_norm": 1.9345264434814453, "learning_rate": 2.037037037037037e-05, "loss": 0.144, "step": 190 }, { "epoch": 54.857142857142854, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.7360506057739258, "eval_runtime": 1.1264, "eval_samples_per_second": 70.132, "eval_steps_per_second": 2.663, "step": 192 }, { "epoch": 56.0, "eval_accuracy": 0.759493670886076, "eval_loss": 0.6655969023704529, "eval_runtime": 1.1372, "eval_samples_per_second": 69.471, "eval_steps_per_second": 2.638, "step": 196 }, { "epoch": 56.857142857142854, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.7487228512763977, "eval_runtime": 1.158, "eval_samples_per_second": 68.222, "eval_steps_per_second": 2.591, "step": 199 }, { "epoch": 57.142857142857146, "grad_norm": 2.869379758834839, "learning_rate": 1.8518518518518518e-05, "loss": 0.1199, "step": 200 }, { "epoch": 58.0, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.7992926239967346, "eval_runtime": 1.1371, "eval_samples_per_second": 69.472, "eval_steps_per_second": 2.638, "step": 203 }, { "epoch": 58.857142857142854, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.7426438927650452, "eval_runtime": 1.1515, "eval_samples_per_second": 68.608, "eval_steps_per_second": 2.605, "step": 206 }, { "epoch": 60.0, "grad_norm": 3.277022123336792, "learning_rate": 1.6666666666666667e-05, "loss": 0.1258, "step": 210 }, { "epoch": 60.0, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.7530637979507446, "eval_runtime": 1.1459, "eval_samples_per_second": 68.943, "eval_steps_per_second": 2.618, "step": 210 }, { "epoch": 60.857142857142854, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.7388187646865845, "eval_runtime": 1.1538, "eval_samples_per_second": 68.468, "eval_steps_per_second": 2.6, "step": 213 }, { "epoch": 62.0, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.7394750714302063, "eval_runtime": 1.1338, "eval_samples_per_second": 69.677, "eval_steps_per_second": 2.646, "step": 217 }, { "epoch": 62.857142857142854, "grad_norm": 2.4016706943511963, "learning_rate": 1.4814814814814815e-05, "loss": 0.1392, "step": 220 }, { "epoch": 62.857142857142854, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.8237771391868591, "eval_runtime": 1.1382, "eval_samples_per_second": 69.41, "eval_steps_per_second": 2.636, "step": 220 }, { "epoch": 64.0, "eval_accuracy": 0.7215189873417721, "eval_loss": 0.9302475452423096, "eval_runtime": 1.1601, "eval_samples_per_second": 68.098, "eval_steps_per_second": 2.586, "step": 224 }, { "epoch": 64.85714285714286, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.7539066076278687, "eval_runtime": 1.142, "eval_samples_per_second": 69.178, "eval_steps_per_second": 2.627, "step": 227 }, { "epoch": 65.71428571428571, "grad_norm": 2.6353225708007812, "learning_rate": 1.2962962962962962e-05, "loss": 0.1303, "step": 230 }, { "epoch": 66.0, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6738837361335754, "eval_runtime": 1.127, "eval_samples_per_second": 70.095, "eval_steps_per_second": 2.662, "step": 231 }, { "epoch": 66.85714285714286, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.662686824798584, "eval_runtime": 1.1663, "eval_samples_per_second": 67.734, "eval_steps_per_second": 2.572, "step": 234 }, { "epoch": 68.0, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.640326201915741, "eval_runtime": 1.133, "eval_samples_per_second": 69.724, "eval_steps_per_second": 2.648, "step": 238 }, { "epoch": 68.57142857142857, "grad_norm": 3.1281940937042236, "learning_rate": 1.1111111111111112e-05, "loss": 0.1423, "step": 240 }, { "epoch": 68.85714285714286, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.6378774642944336, "eval_runtime": 1.1326, "eval_samples_per_second": 69.753, "eval_steps_per_second": 2.649, "step": 241 }, { "epoch": 70.0, "eval_accuracy": 0.759493670886076, "eval_loss": 0.7658242583274841, "eval_runtime": 1.1438, "eval_samples_per_second": 69.068, "eval_steps_per_second": 2.623, "step": 245 }, { "epoch": 70.85714285714286, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.9195311069488525, "eval_runtime": 1.1256, "eval_samples_per_second": 70.185, "eval_steps_per_second": 2.665, "step": 248 }, { "epoch": 71.42857142857143, "grad_norm": 2.4990944862365723, "learning_rate": 9.259259259259259e-06, "loss": 0.1019, "step": 250 }, { "epoch": 72.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.7286852598190308, "eval_runtime": 1.1291, "eval_samples_per_second": 69.969, "eval_steps_per_second": 2.657, "step": 252 }, { "epoch": 72.85714285714286, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.6548206210136414, "eval_runtime": 1.127, "eval_samples_per_second": 70.098, "eval_steps_per_second": 2.662, "step": 255 }, { "epoch": 74.0, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.653405487537384, "eval_runtime": 1.1305, "eval_samples_per_second": 69.88, "eval_steps_per_second": 2.654, "step": 259 }, { "epoch": 74.28571428571429, "grad_norm": 2.0475404262542725, "learning_rate": 7.4074074074074075e-06, "loss": 0.1286, "step": 260 }, { "epoch": 74.85714285714286, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.7330575585365295, "eval_runtime": 1.1366, "eval_samples_per_second": 69.508, "eval_steps_per_second": 2.64, "step": 262 }, { "epoch": 76.0, "eval_accuracy": 0.759493670886076, "eval_loss": 0.7845055460929871, "eval_runtime": 1.1356, "eval_samples_per_second": 69.566, "eval_steps_per_second": 2.642, "step": 266 }, { "epoch": 76.85714285714286, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.7187694311141968, "eval_runtime": 1.1547, "eval_samples_per_second": 68.416, "eval_steps_per_second": 2.598, "step": 269 }, { "epoch": 77.14285714285714, "grad_norm": 3.98937726020813, "learning_rate": 5.555555555555556e-06, "loss": 0.1054, "step": 270 }, { "epoch": 78.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.6595200896263123, "eval_runtime": 1.1479, "eval_samples_per_second": 68.823, "eval_steps_per_second": 2.614, "step": 273 }, { "epoch": 78.85714285714286, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.662256121635437, "eval_runtime": 1.1304, "eval_samples_per_second": 69.885, "eval_steps_per_second": 2.654, "step": 276 }, { "epoch": 80.0, "grad_norm": 1.8091683387756348, "learning_rate": 3.7037037037037037e-06, "loss": 0.1053, "step": 280 }, { "epoch": 80.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.7336876392364502, "eval_runtime": 1.1418, "eval_samples_per_second": 69.186, "eval_steps_per_second": 2.627, "step": 280 }, { "epoch": 80.85714285714286, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.8085371851921082, "eval_runtime": 1.1388, "eval_samples_per_second": 69.372, "eval_steps_per_second": 2.634, "step": 283 }, { "epoch": 82.0, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.8200588226318359, "eval_runtime": 1.1478, "eval_samples_per_second": 68.825, "eval_steps_per_second": 2.614, "step": 287 }, { "epoch": 82.85714285714286, "grad_norm": 3.0363099575042725, "learning_rate": 1.8518518518518519e-06, "loss": 0.1086, "step": 290 }, { "epoch": 82.85714285714286, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.7947056293487549, "eval_runtime": 1.1218, "eval_samples_per_second": 70.419, "eval_steps_per_second": 2.674, "step": 290 }, { "epoch": 84.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.7668984532356262, "eval_runtime": 1.1428, "eval_samples_per_second": 69.129, "eval_steps_per_second": 2.625, "step": 294 }, { "epoch": 84.85714285714286, "eval_accuracy": 0.759493670886076, "eval_loss": 0.7581708431243896, "eval_runtime": 1.1285, "eval_samples_per_second": 70.006, "eval_steps_per_second": 2.658, "step": 297 }, { "epoch": 85.71428571428571, "grad_norm": 2.2897379398345947, "learning_rate": 0.0, "loss": 0.1186, "step": 300 }, { "epoch": 85.71428571428571, "eval_accuracy": 0.759493670886076, "eval_loss": 0.7541030645370483, "eval_runtime": 1.1594, "eval_samples_per_second": 68.138, "eval_steps_per_second": 2.588, "step": 300 }, { "epoch": 85.71428571428571, "step": 300, "total_flos": 2.9362240500074496e+18, "train_loss": 0.23499208807945252, "train_runtime": 1665.1689, "train_samples_per_second": 26.544, "train_steps_per_second": 0.18 }, { "epoch": 85.71428571428571, "eval_accuracy": 0.8227848101265823, "eval_loss": 0.645160436630249, "eval_runtime": 1.1931, "eval_samples_per_second": 66.215, "eval_steps_per_second": 2.514, "step": 300 } ], "logging_steps": 10, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.9362240500074496e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }