{ "best_metric": 0.8481012658227848, "best_model_checkpoint": "beit-base-patch16-224-fold1/checkpoint-248", "epoch": 85.71428571428571, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8571428571428571, "eval_accuracy": 0.45569620253164556, "eval_loss": 0.8050068020820618, "eval_runtime": 20.5223, "eval_samples_per_second": 3.849, "eval_steps_per_second": 0.146, "step": 3 }, { "epoch": 2.0, "eval_accuracy": 0.569620253164557, "eval_loss": 0.7151382565498352, "eval_runtime": 1.3733, "eval_samples_per_second": 57.526, "eval_steps_per_second": 2.185, "step": 7 }, { "epoch": 2.857142857142857, "grad_norm": 4.684664249420166, "learning_rate": 1.6666666666666667e-05, "loss": 0.8103, "step": 10 }, { "epoch": 2.857142857142857, "eval_accuracy": 0.5569620253164557, "eval_loss": 0.6821601390838623, "eval_runtime": 1.4197, "eval_samples_per_second": 55.644, "eval_steps_per_second": 2.113, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.569620253164557, "eval_loss": 0.640774667263031, "eval_runtime": 1.4465, "eval_samples_per_second": 54.616, "eval_steps_per_second": 2.074, "step": 14 }, { "epoch": 4.857142857142857, "eval_accuracy": 0.6708860759493671, "eval_loss": 0.6244170069694519, "eval_runtime": 1.5149, "eval_samples_per_second": 52.147, "eval_steps_per_second": 1.98, "step": 17 }, { "epoch": 5.714285714285714, "grad_norm": 2.6553750038146973, "learning_rate": 3.3333333333333335e-05, "loss": 0.6583, "step": 20 }, { "epoch": 6.0, "eval_accuracy": 0.6708860759493671, "eval_loss": 0.5892533659934998, "eval_runtime": 1.448, "eval_samples_per_second": 54.56, "eval_steps_per_second": 2.072, "step": 21 }, { "epoch": 6.857142857142857, "eval_accuracy": 0.6329113924050633, "eval_loss": 0.5876858234405518, "eval_runtime": 1.4555, "eval_samples_per_second": 54.277, "eval_steps_per_second": 2.061, "step": 24 }, { "epoch": 8.0, "eval_accuracy": 0.6835443037974683, "eval_loss": 0.5752159953117371, "eval_runtime": 1.4455, "eval_samples_per_second": 54.653, "eval_steps_per_second": 2.075, "step": 28 }, { "epoch": 8.571428571428571, "grad_norm": 8.61998176574707, "learning_rate": 5e-05, "loss": 0.5912, "step": 30 }, { "epoch": 8.857142857142858, "eval_accuracy": 0.6455696202531646, "eval_loss": 0.5825986862182617, "eval_runtime": 1.4574, "eval_samples_per_second": 54.204, "eval_steps_per_second": 2.058, "step": 31 }, { "epoch": 10.0, "eval_accuracy": 0.6835443037974683, "eval_loss": 0.5469183325767517, "eval_runtime": 1.4759, "eval_samples_per_second": 53.528, "eval_steps_per_second": 2.033, "step": 35 }, { "epoch": 10.857142857142858, "eval_accuracy": 0.6582278481012658, "eval_loss": 0.6173216700553894, "eval_runtime": 1.4368, "eval_samples_per_second": 54.985, "eval_steps_per_second": 2.088, "step": 38 }, { "epoch": 11.428571428571429, "grad_norm": 4.089001655578613, "learning_rate": 4.814814814814815e-05, "loss": 0.5301, "step": 40 }, { "epoch": 12.0, "eval_accuracy": 0.6962025316455697, "eval_loss": 0.51507169008255, "eval_runtime": 1.4391, "eval_samples_per_second": 54.894, "eval_steps_per_second": 2.085, "step": 42 }, { "epoch": 12.857142857142858, "eval_accuracy": 0.6962025316455697, "eval_loss": 0.5105239152908325, "eval_runtime": 1.5017, "eval_samples_per_second": 52.608, "eval_steps_per_second": 1.998, "step": 45 }, { "epoch": 14.0, "eval_accuracy": 0.7088607594936709, "eval_loss": 0.5488570928573608, "eval_runtime": 1.4299, "eval_samples_per_second": 55.25, "eval_steps_per_second": 2.098, "step": 49 }, { "epoch": 14.285714285714286, "grad_norm": 5.854975700378418, "learning_rate": 4.62962962962963e-05, "loss": 0.4703, "step": 50 }, { "epoch": 14.857142857142858, "eval_accuracy": 0.6835443037974683, "eval_loss": 0.5724519491195679, "eval_runtime": 1.4269, "eval_samples_per_second": 55.364, "eval_steps_per_second": 2.102, "step": 52 }, { "epoch": 16.0, "eval_accuracy": 0.6962025316455697, "eval_loss": 0.5559752583503723, "eval_runtime": 1.4698, "eval_samples_per_second": 53.75, "eval_steps_per_second": 2.041, "step": 56 }, { "epoch": 16.857142857142858, "eval_accuracy": 0.6708860759493671, "eval_loss": 0.5824136137962341, "eval_runtime": 1.4546, "eval_samples_per_second": 54.312, "eval_steps_per_second": 2.062, "step": 59 }, { "epoch": 17.142857142857142, "grad_norm": 6.086174964904785, "learning_rate": 4.4444444444444447e-05, "loss": 0.4189, "step": 60 }, { "epoch": 18.0, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.540145754814148, "eval_runtime": 1.4352, "eval_samples_per_second": 55.046, "eval_steps_per_second": 2.09, "step": 63 }, { "epoch": 18.857142857142858, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.514731764793396, "eval_runtime": 1.4503, "eval_samples_per_second": 54.473, "eval_steps_per_second": 2.069, "step": 66 }, { "epoch": 20.0, "grad_norm": 8.862787246704102, "learning_rate": 4.259259259259259e-05, "loss": 0.3741, "step": 70 }, { "epoch": 20.0, "eval_accuracy": 0.759493670886076, "eval_loss": 0.48641237616539, "eval_runtime": 1.4392, "eval_samples_per_second": 54.891, "eval_steps_per_second": 2.084, "step": 70 }, { "epoch": 20.857142857142858, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.5272199511528015, "eval_runtime": 1.461, "eval_samples_per_second": 54.072, "eval_steps_per_second": 2.053, "step": 73 }, { "epoch": 22.0, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.49136921763420105, "eval_runtime": 1.4904, "eval_samples_per_second": 53.005, "eval_steps_per_second": 2.013, "step": 77 }, { "epoch": 22.857142857142858, "grad_norm": 8.650327682495117, "learning_rate": 4.074074074074074e-05, "loss": 0.387, "step": 80 }, { "epoch": 22.857142857142858, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.5658156275749207, "eval_runtime": 1.498, "eval_samples_per_second": 52.738, "eval_steps_per_second": 2.003, "step": 80 }, { "epoch": 24.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.4662097096443176, "eval_runtime": 1.512, "eval_samples_per_second": 52.249, "eval_steps_per_second": 1.984, "step": 84 }, { "epoch": 24.857142857142858, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.4376372694969177, "eval_runtime": 1.5044, "eval_samples_per_second": 52.514, "eval_steps_per_second": 1.994, "step": 87 }, { "epoch": 25.714285714285715, "grad_norm": 6.057330131530762, "learning_rate": 3.888888888888889e-05, "loss": 0.3502, "step": 90 }, { "epoch": 26.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.5366873145103455, "eval_runtime": 1.5039, "eval_samples_per_second": 52.529, "eval_steps_per_second": 1.995, "step": 91 }, { "epoch": 26.857142857142858, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.5490015745162964, "eval_runtime": 1.4224, "eval_samples_per_second": 55.541, "eval_steps_per_second": 2.109, "step": 94 }, { "epoch": 28.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.7162956595420837, "eval_runtime": 1.4548, "eval_samples_per_second": 54.303, "eval_steps_per_second": 2.062, "step": 98 }, { "epoch": 28.571428571428573, "grad_norm": 6.062076568603516, "learning_rate": 3.7037037037037037e-05, "loss": 0.3148, "step": 100 }, { "epoch": 28.857142857142858, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.6004660129547119, "eval_runtime": 1.4277, "eval_samples_per_second": 55.333, "eval_steps_per_second": 2.101, "step": 101 }, { "epoch": 30.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.6500609517097473, "eval_runtime": 1.4701, "eval_samples_per_second": 53.739, "eval_steps_per_second": 2.041, "step": 105 }, { "epoch": 30.857142857142858, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.5312591791152954, "eval_runtime": 1.4996, "eval_samples_per_second": 52.68, "eval_steps_per_second": 2.001, "step": 108 }, { "epoch": 31.428571428571427, "grad_norm": 5.8153252601623535, "learning_rate": 3.518518518518519e-05, "loss": 0.2973, "step": 110 }, { "epoch": 32.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.5466110706329346, "eval_runtime": 1.5101, "eval_samples_per_second": 52.314, "eval_steps_per_second": 1.987, "step": 112 }, { "epoch": 32.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.5730607509613037, "eval_runtime": 1.4879, "eval_samples_per_second": 53.094, "eval_steps_per_second": 2.016, "step": 115 }, { "epoch": 34.0, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6543712615966797, "eval_runtime": 1.4649, "eval_samples_per_second": 53.927, "eval_steps_per_second": 2.048, "step": 119 }, { "epoch": 34.285714285714285, "grad_norm": 5.931222438812256, "learning_rate": 3.3333333333333335e-05, "loss": 0.2474, "step": 120 }, { "epoch": 34.857142857142854, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.6060739159584045, "eval_runtime": 1.4417, "eval_samples_per_second": 54.798, "eval_steps_per_second": 2.081, "step": 122 }, { "epoch": 36.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.5815550684928894, "eval_runtime": 1.4713, "eval_samples_per_second": 53.693, "eval_steps_per_second": 2.039, "step": 126 }, { "epoch": 36.857142857142854, "eval_accuracy": 0.759493670886076, "eval_loss": 0.7160954475402832, "eval_runtime": 1.5016, "eval_samples_per_second": 52.612, "eval_steps_per_second": 1.998, "step": 129 }, { "epoch": 37.142857142857146, "grad_norm": 5.137592792510986, "learning_rate": 3.148148148148148e-05, "loss": 0.2033, "step": 130 }, { "epoch": 38.0, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.6234713196754456, "eval_runtime": 1.4848, "eval_samples_per_second": 53.205, "eval_steps_per_second": 2.02, "step": 133 }, { "epoch": 38.857142857142854, "eval_accuracy": 0.759493670886076, "eval_loss": 0.7888889312744141, "eval_runtime": 1.4207, "eval_samples_per_second": 55.607, "eval_steps_per_second": 2.112, "step": 136 }, { "epoch": 40.0, "grad_norm": 5.441008567810059, "learning_rate": 2.962962962962963e-05, "loss": 0.2338, "step": 140 }, { "epoch": 40.0, "eval_accuracy": 0.759493670886076, "eval_loss": 0.5943406224250793, "eval_runtime": 1.4387, "eval_samples_per_second": 54.911, "eval_steps_per_second": 2.085, "step": 140 }, { "epoch": 40.857142857142854, "eval_accuracy": 0.7341772151898734, "eval_loss": 0.6169915795326233, "eval_runtime": 1.4387, "eval_samples_per_second": 54.909, "eval_steps_per_second": 2.085, "step": 143 }, { "epoch": 42.0, "eval_accuracy": 0.6962025316455697, "eval_loss": 0.6963752508163452, "eval_runtime": 1.4482, "eval_samples_per_second": 54.55, "eval_steps_per_second": 2.072, "step": 147 }, { "epoch": 42.857142857142854, "grad_norm": 6.6485161781311035, "learning_rate": 2.777777777777778e-05, "loss": 0.2067, "step": 150 }, { "epoch": 42.857142857142854, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.7153680324554443, "eval_runtime": 1.574, "eval_samples_per_second": 50.19, "eval_steps_per_second": 1.906, "step": 150 }, { "epoch": 44.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.767503559589386, "eval_runtime": 1.4899, "eval_samples_per_second": 53.025, "eval_steps_per_second": 2.014, "step": 154 }, { "epoch": 44.857142857142854, "eval_accuracy": 0.7468354430379747, "eval_loss": 0.7765600681304932, "eval_runtime": 1.4794, "eval_samples_per_second": 53.4, "eval_steps_per_second": 2.028, "step": 157 }, { "epoch": 45.714285714285715, "grad_norm": 6.1349005699157715, "learning_rate": 2.5925925925925925e-05, "loss": 0.2133, "step": 160 }, { "epoch": 46.0, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.932968258857727, "eval_runtime": 1.4465, "eval_samples_per_second": 54.616, "eval_steps_per_second": 2.074, "step": 161 }, { "epoch": 46.857142857142854, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.64939284324646, "eval_runtime": 1.4334, "eval_samples_per_second": 55.113, "eval_steps_per_second": 2.093, "step": 164 }, { "epoch": 48.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.5709493160247803, "eval_runtime": 1.4722, "eval_samples_per_second": 53.662, "eval_steps_per_second": 2.038, "step": 168 }, { "epoch": 48.57142857142857, "grad_norm": 3.4344000816345215, "learning_rate": 2.4074074074074074e-05, "loss": 0.2004, "step": 170 }, { "epoch": 48.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6462149620056152, "eval_runtime": 1.5036, "eval_samples_per_second": 52.54, "eval_steps_per_second": 1.995, "step": 171 }, { "epoch": 50.0, "eval_accuracy": 0.7721518987341772, "eval_loss": 0.6667977571487427, "eval_runtime": 1.5326, "eval_samples_per_second": 51.547, "eval_steps_per_second": 1.957, "step": 175 }, { "epoch": 50.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6305052638053894, "eval_runtime": 1.4376, "eval_samples_per_second": 54.953, "eval_steps_per_second": 2.087, "step": 178 }, { "epoch": 51.42857142857143, "grad_norm": 5.206828594207764, "learning_rate": 2.2222222222222223e-05, "loss": 0.188, "step": 180 }, { "epoch": 52.0, "eval_accuracy": 0.8227848101265823, "eval_loss": 0.7189355492591858, "eval_runtime": 1.4518, "eval_samples_per_second": 54.415, "eval_steps_per_second": 2.066, "step": 182 }, { "epoch": 52.857142857142854, "eval_accuracy": 0.7848101265822784, "eval_loss": 0.6853471398353577, "eval_runtime": 1.49, "eval_samples_per_second": 53.02, "eval_steps_per_second": 2.013, "step": 185 }, { "epoch": 54.0, "eval_accuracy": 0.8227848101265823, "eval_loss": 0.8039994835853577, "eval_runtime": 1.4908, "eval_samples_per_second": 52.991, "eval_steps_per_second": 2.012, "step": 189 }, { "epoch": 54.285714285714285, "grad_norm": 5.863402843475342, "learning_rate": 2.037037037037037e-05, "loss": 0.1623, "step": 190 }, { "epoch": 54.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.69575035572052, "eval_runtime": 1.5387, "eval_samples_per_second": 51.343, "eval_steps_per_second": 1.95, "step": 192 }, { "epoch": 56.0, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6906704902648926, "eval_runtime": 1.576, "eval_samples_per_second": 50.126, "eval_steps_per_second": 1.904, "step": 196 }, { "epoch": 56.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6821295619010925, "eval_runtime": 1.4415, "eval_samples_per_second": 54.804, "eval_steps_per_second": 2.081, "step": 199 }, { "epoch": 57.142857142857146, "grad_norm": 4.665853500366211, "learning_rate": 1.8518518518518518e-05, "loss": 0.1588, "step": 200 }, { "epoch": 58.0, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6534023880958557, "eval_runtime": 1.429, "eval_samples_per_second": 55.283, "eval_steps_per_second": 2.099, "step": 203 }, { "epoch": 58.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.7192457318305969, "eval_runtime": 1.4185, "eval_samples_per_second": 55.694, "eval_steps_per_second": 2.115, "step": 206 }, { "epoch": 60.0, "grad_norm": 6.225094318389893, "learning_rate": 1.6666666666666667e-05, "loss": 0.1607, "step": 210 }, { "epoch": 60.0, "eval_accuracy": 0.8227848101265823, "eval_loss": 0.7752671837806702, "eval_runtime": 1.4284, "eval_samples_per_second": 55.308, "eval_steps_per_second": 2.1, "step": 210 }, { "epoch": 60.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.8949642181396484, "eval_runtime": 1.5505, "eval_samples_per_second": 50.951, "eval_steps_per_second": 1.935, "step": 213 }, { "epoch": 62.0, "eval_accuracy": 0.810126582278481, "eval_loss": 0.7903599739074707, "eval_runtime": 1.5102, "eval_samples_per_second": 52.311, "eval_steps_per_second": 1.986, "step": 217 }, { "epoch": 62.857142857142854, "grad_norm": 4.583127498626709, "learning_rate": 1.4814814814814815e-05, "loss": 0.1767, "step": 220 }, { "epoch": 62.857142857142854, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6972522735595703, "eval_runtime": 1.4628, "eval_samples_per_second": 54.005, "eval_steps_per_second": 2.051, "step": 220 }, { "epoch": 64.0, "eval_accuracy": 0.7974683544303798, "eval_loss": 0.6694443225860596, "eval_runtime": 1.4975, "eval_samples_per_second": 52.754, "eval_steps_per_second": 2.003, "step": 224 }, { "epoch": 64.85714285714286, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6338869333267212, "eval_runtime": 1.4504, "eval_samples_per_second": 54.468, "eval_steps_per_second": 2.068, "step": 227 }, { "epoch": 65.71428571428571, "grad_norm": 3.7681446075439453, "learning_rate": 1.2962962962962962e-05, "loss": 0.1463, "step": 230 }, { "epoch": 66.0, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6530351042747498, "eval_runtime": 1.4628, "eval_samples_per_second": 54.005, "eval_steps_per_second": 2.051, "step": 231 }, { "epoch": 66.85714285714286, "eval_accuracy": 0.810126582278481, "eval_loss": 0.6141919493675232, "eval_runtime": 1.5198, "eval_samples_per_second": 51.98, "eval_steps_per_second": 1.974, "step": 234 }, { "epoch": 68.0, "eval_accuracy": 0.8227848101265823, "eval_loss": 0.628998339176178, "eval_runtime": 1.4603, "eval_samples_per_second": 54.1, "eval_steps_per_second": 2.054, "step": 238 }, { "epoch": 68.57142857142857, "grad_norm": 5.3702874183654785, "learning_rate": 1.1111111111111112e-05, "loss": 0.1287, "step": 240 }, { "epoch": 68.85714285714286, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.6333932280540466, "eval_runtime": 1.437, "eval_samples_per_second": 54.977, "eval_steps_per_second": 2.088, "step": 241 }, { "epoch": 70.0, "eval_accuracy": 0.810126582278481, "eval_loss": 0.8058773279190063, "eval_runtime": 1.437, "eval_samples_per_second": 54.974, "eval_steps_per_second": 2.088, "step": 245 }, { "epoch": 70.85714285714286, "eval_accuracy": 0.8481012658227848, "eval_loss": 0.7241003513336182, "eval_runtime": 1.4148, "eval_samples_per_second": 55.837, "eval_steps_per_second": 2.12, "step": 248 }, { "epoch": 71.42857142857143, "grad_norm": 4.752800941467285, "learning_rate": 9.259259259259259e-06, "loss": 0.1323, "step": 250 }, { "epoch": 72.0, "eval_accuracy": 0.8481012658227848, "eval_loss": 0.6835869550704956, "eval_runtime": 1.4344, "eval_samples_per_second": 55.075, "eval_steps_per_second": 2.091, "step": 252 }, { "epoch": 72.85714285714286, "eval_accuracy": 0.8227848101265823, "eval_loss": 0.6587881445884705, "eval_runtime": 1.559, "eval_samples_per_second": 50.672, "eval_steps_per_second": 1.924, "step": 255 }, { "epoch": 74.0, "eval_accuracy": 0.8481012658227848, "eval_loss": 0.6597732901573181, "eval_runtime": 1.5278, "eval_samples_per_second": 51.709, "eval_steps_per_second": 1.964, "step": 259 }, { "epoch": 74.28571428571429, "grad_norm": 3.2891921997070312, "learning_rate": 7.4074074074074075e-06, "loss": 0.1042, "step": 260 }, { "epoch": 74.85714285714286, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.713896632194519, "eval_runtime": 1.4654, "eval_samples_per_second": 53.909, "eval_steps_per_second": 2.047, "step": 262 }, { "epoch": 76.0, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.72358638048172, "eval_runtime": 1.4884, "eval_samples_per_second": 53.077, "eval_steps_per_second": 2.016, "step": 266 }, { "epoch": 76.85714285714286, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.6918818950653076, "eval_runtime": 1.4316, "eval_samples_per_second": 55.184, "eval_steps_per_second": 2.096, "step": 269 }, { "epoch": 77.14285714285714, "grad_norm": 4.013108730316162, "learning_rate": 5.555555555555556e-06, "loss": 0.1106, "step": 270 }, { "epoch": 78.0, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.6568043828010559, "eval_runtime": 1.5155, "eval_samples_per_second": 52.128, "eval_steps_per_second": 1.98, "step": 273 }, { "epoch": 78.85714285714286, "eval_accuracy": 0.8481012658227848, "eval_loss": 0.6556110382080078, "eval_runtime": 1.5408, "eval_samples_per_second": 51.272, "eval_steps_per_second": 1.947, "step": 276 }, { "epoch": 80.0, "grad_norm": 6.208752632141113, "learning_rate": 3.7037037037037037e-06, "loss": 0.1348, "step": 280 }, { "epoch": 80.0, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.6612224578857422, "eval_runtime": 1.4365, "eval_samples_per_second": 54.993, "eval_steps_per_second": 2.088, "step": 280 }, { "epoch": 80.85714285714286, "eval_accuracy": 0.8227848101265823, "eval_loss": 0.6686135530471802, "eval_runtime": 1.4579, "eval_samples_per_second": 54.186, "eval_steps_per_second": 2.058, "step": 283 }, { "epoch": 82.0, "eval_accuracy": 0.8481012658227848, "eval_loss": 0.6705390214920044, "eval_runtime": 1.4513, "eval_samples_per_second": 54.434, "eval_steps_per_second": 2.067, "step": 287 }, { "epoch": 82.85714285714286, "grad_norm": 4.1432647705078125, "learning_rate": 1.8518518518518519e-06, "loss": 0.1352, "step": 290 }, { "epoch": 82.85714285714286, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.677626371383667, "eval_runtime": 1.4762, "eval_samples_per_second": 53.516, "eval_steps_per_second": 2.032, "step": 290 }, { "epoch": 84.0, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.6872657537460327, "eval_runtime": 1.5716, "eval_samples_per_second": 50.268, "eval_steps_per_second": 1.909, "step": 294 }, { "epoch": 84.85714285714286, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.6887751817703247, "eval_runtime": 1.5031, "eval_samples_per_second": 52.557, "eval_steps_per_second": 1.996, "step": 297 }, { "epoch": 85.71428571428571, "grad_norm": 3.4207639694213867, "learning_rate": 0.0, "loss": 0.1226, "step": 300 }, { "epoch": 85.71428571428571, "eval_accuracy": 0.8354430379746836, "eval_loss": 0.688024640083313, "eval_runtime": 1.4114, "eval_samples_per_second": 55.972, "eval_steps_per_second": 2.126, "step": 300 }, { "epoch": 85.71428571428571, "step": 300, "total_flos": 2.9349165326823014e+18, "train_loss": 0.2789517060915629, "train_runtime": 2373.8808, "train_samples_per_second": 18.619, "train_steps_per_second": 0.126 }, { "epoch": 85.71428571428571, "eval_accuracy": 0.8481012658227848, "eval_loss": 0.7241003513336182, "eval_runtime": 1.4361, "eval_samples_per_second": 55.011, "eval_steps_per_second": 2.089, "step": 300 } ], "logging_steps": 10, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 2.9349165326823014e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }