{ "best_metric": 3.643535614013672, "best_model_checkpoint": "./drive/MyDrive/repositories/torch_example_image-classification/outputs_letter3/checkpoint-700", "epoch": 100.0, "eval_steps": 500, "global_step": 700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.030612244897959183, "eval_loss": 3.945223808288574, "eval_runtime": 35.9416, "eval_samples_per_second": 2.727, "eval_steps_per_second": 0.056, "step": 7 }, { "epoch": 1.43, "learning_rate": 1.9714285714285718e-05, "loss": 3.9498, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.05102040816326531, "eval_loss": 3.943814277648926, "eval_runtime": 1.6003, "eval_samples_per_second": 61.237, "eval_steps_per_second": 1.25, "step": 14 }, { "epoch": 2.86, "learning_rate": 1.942857142857143e-05, "loss": 3.9413, "step": 20 }, { "epoch": 3.0, "eval_accuracy": 0.04081632653061224, "eval_loss": 3.9436657428741455, "eval_runtime": 1.5584, "eval_samples_per_second": 62.886, "eval_steps_per_second": 1.283, "step": 21 }, { "epoch": 4.0, "eval_accuracy": 0.04081632653061224, "eval_loss": 3.9431352615356445, "eval_runtime": 1.6475, "eval_samples_per_second": 59.483, "eval_steps_per_second": 1.214, "step": 28 }, { "epoch": 4.29, "learning_rate": 1.9142857142857146e-05, "loss": 3.9255, "step": 30 }, { "epoch": 5.0, "eval_accuracy": 0.04081632653061224, "eval_loss": 3.9424338340759277, "eval_runtime": 1.5527, "eval_samples_per_second": 63.114, "eval_steps_per_second": 1.288, "step": 35 }, { "epoch": 5.71, "learning_rate": 1.885714285714286e-05, "loss": 3.9132, "step": 40 }, { "epoch": 6.0, "eval_accuracy": 0.030612244897959183, "eval_loss": 3.9400839805603027, "eval_runtime": 1.6728, "eval_samples_per_second": 58.585, "eval_steps_per_second": 1.196, "step": 42 }, { "epoch": 7.0, "eval_accuracy": 0.030612244897959183, "eval_loss": 3.937299966812134, "eval_runtime": 1.6864, "eval_samples_per_second": 58.111, "eval_steps_per_second": 1.186, "step": 49 }, { "epoch": 7.14, "learning_rate": 1.8571428571428575e-05, "loss": 3.8913, "step": 50 }, { "epoch": 8.0, "eval_accuracy": 0.02040816326530612, "eval_loss": 3.9351377487182617, "eval_runtime": 1.7475, "eval_samples_per_second": 56.08, "eval_steps_per_second": 1.144, "step": 56 }, { "epoch": 8.57, "learning_rate": 1.8285714285714288e-05, "loss": 3.8685, "step": 60 }, { "epoch": 9.0, "eval_accuracy": 0.02040816326530612, "eval_loss": 3.931184768676758, "eval_runtime": 1.7165, "eval_samples_per_second": 57.092, "eval_steps_per_second": 1.165, "step": 63 }, { "epoch": 10.0, "learning_rate": 1.8e-05, "loss": 3.8413, "step": 70 }, { "epoch": 10.0, "eval_accuracy": 0.030612244897959183, "eval_loss": 3.9259071350097656, "eval_runtime": 1.7086, "eval_samples_per_second": 57.357, "eval_steps_per_second": 1.171, "step": 70 }, { "epoch": 11.0, "eval_accuracy": 0.030612244897959183, "eval_loss": 3.9218833446502686, "eval_runtime": 1.6006, "eval_samples_per_second": 61.228, "eval_steps_per_second": 1.25, "step": 77 }, { "epoch": 11.43, "learning_rate": 1.7714285714285717e-05, "loss": 3.8163, "step": 80 }, { "epoch": 12.0, "eval_accuracy": 0.02040816326530612, "eval_loss": 3.9182682037353516, "eval_runtime": 1.8155, "eval_samples_per_second": 53.98, "eval_steps_per_second": 1.102, "step": 84 }, { "epoch": 12.86, "learning_rate": 1.742857142857143e-05, "loss": 3.7912, "step": 90 }, { "epoch": 13.0, "eval_accuracy": 0.04081632653061224, "eval_loss": 3.9151253700256348, "eval_runtime": 1.704, "eval_samples_per_second": 57.511, "eval_steps_per_second": 1.174, "step": 91 }, { "epoch": 14.0, "eval_accuracy": 0.030612244897959183, "eval_loss": 3.911611795425415, "eval_runtime": 1.7037, "eval_samples_per_second": 57.521, "eval_steps_per_second": 1.174, "step": 98 }, { "epoch": 14.29, "learning_rate": 1.7142857142857142e-05, "loss": 3.7616, "step": 100 }, { "epoch": 15.0, "eval_accuracy": 0.04081632653061224, "eval_loss": 3.9074392318725586, "eval_runtime": 1.6801, "eval_samples_per_second": 58.331, "eval_steps_per_second": 1.19, "step": 105 }, { "epoch": 15.71, "learning_rate": 1.6857142857142858e-05, "loss": 3.734, "step": 110 }, { "epoch": 16.0, "eval_accuracy": 0.04081632653061224, "eval_loss": 3.9029488563537598, "eval_runtime": 1.7659, "eval_samples_per_second": 55.496, "eval_steps_per_second": 1.133, "step": 112 }, { "epoch": 17.0, "eval_accuracy": 0.061224489795918366, "eval_loss": 3.8968818187713623, "eval_runtime": 1.6451, "eval_samples_per_second": 59.57, "eval_steps_per_second": 1.216, "step": 119 }, { "epoch": 17.14, "learning_rate": 1.6571428571428574e-05, "loss": 3.7014, "step": 120 }, { "epoch": 18.0, "eval_accuracy": 0.07142857142857142, "eval_loss": 3.890695095062256, "eval_runtime": 1.6901, "eval_samples_per_second": 57.983, "eval_steps_per_second": 1.183, "step": 126 }, { "epoch": 18.57, "learning_rate": 1.6285714285714287e-05, "loss": 3.6707, "step": 130 }, { "epoch": 19.0, "eval_accuracy": 0.07142857142857142, "eval_loss": 3.8844714164733887, "eval_runtime": 1.6505, "eval_samples_per_second": 59.374, "eval_steps_per_second": 1.212, "step": 133 }, { "epoch": 20.0, "learning_rate": 1.6000000000000003e-05, "loss": 3.6307, "step": 140 }, { "epoch": 20.0, "eval_accuracy": 0.08163265306122448, "eval_loss": 3.877913475036621, "eval_runtime": 1.8041, "eval_samples_per_second": 54.322, "eval_steps_per_second": 1.109, "step": 140 }, { "epoch": 21.0, "eval_accuracy": 0.08163265306122448, "eval_loss": 3.8703930377960205, "eval_runtime": 1.7575, "eval_samples_per_second": 55.762, "eval_steps_per_second": 1.138, "step": 147 }, { "epoch": 21.43, "learning_rate": 1.5714285714285715e-05, "loss": 3.596, "step": 150 }, { "epoch": 22.0, "eval_accuracy": 0.09183673469387756, "eval_loss": 3.8646252155303955, "eval_runtime": 1.6598, "eval_samples_per_second": 59.042, "eval_steps_per_second": 1.205, "step": 154 }, { "epoch": 22.86, "learning_rate": 1.542857142857143e-05, "loss": 3.5875, "step": 160 }, { "epoch": 23.0, "eval_accuracy": 0.09183673469387756, "eval_loss": 3.8603618144989014, "eval_runtime": 1.6878, "eval_samples_per_second": 58.065, "eval_steps_per_second": 1.185, "step": 161 }, { "epoch": 24.0, "eval_accuracy": 0.09183673469387756, "eval_loss": 3.8561482429504395, "eval_runtime": 1.6677, "eval_samples_per_second": 58.764, "eval_steps_per_second": 1.199, "step": 168 }, { "epoch": 24.29, "learning_rate": 1.5142857142857144e-05, "loss": 3.5532, "step": 170 }, { "epoch": 25.0, "eval_accuracy": 0.09183673469387756, "eval_loss": 3.8509910106658936, "eval_runtime": 1.7363, "eval_samples_per_second": 56.441, "eval_steps_per_second": 1.152, "step": 175 }, { "epoch": 25.71, "learning_rate": 1.4857142857142858e-05, "loss": 3.5374, "step": 180 }, { "epoch": 26.0, "eval_accuracy": 0.09183673469387756, "eval_loss": 3.844221353530884, "eval_runtime": 1.7776, "eval_samples_per_second": 55.131, "eval_steps_per_second": 1.125, "step": 182 }, { "epoch": 27.0, "eval_accuracy": 0.10204081632653061, "eval_loss": 3.83986496925354, "eval_runtime": 1.6514, "eval_samples_per_second": 59.342, "eval_steps_per_second": 1.211, "step": 189 }, { "epoch": 27.14, "learning_rate": 1.4571428571428573e-05, "loss": 3.51, "step": 190 }, { "epoch": 28.0, "eval_accuracy": 0.11224489795918367, "eval_loss": 3.8350086212158203, "eval_runtime": 1.6793, "eval_samples_per_second": 58.357, "eval_steps_per_second": 1.191, "step": 196 }, { "epoch": 28.57, "learning_rate": 1.4285714285714287e-05, "loss": 3.4842, "step": 200 }, { "epoch": 29.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.8296377658843994, "eval_runtime": 1.7517, "eval_samples_per_second": 55.946, "eval_steps_per_second": 1.142, "step": 203 }, { "epoch": 30.0, "learning_rate": 1.4e-05, "loss": 3.4495, "step": 210 }, { "epoch": 30.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.8243255615234375, "eval_runtime": 1.7837, "eval_samples_per_second": 54.942, "eval_steps_per_second": 1.121, "step": 210 }, { "epoch": 31.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.8213043212890625, "eval_runtime": 1.6545, "eval_samples_per_second": 59.233, "eval_steps_per_second": 1.209, "step": 217 }, { "epoch": 31.43, "learning_rate": 1.3714285714285716e-05, "loss": 3.4155, "step": 220 }, { "epoch": 32.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.815812110900879, "eval_runtime": 1.6721, "eval_samples_per_second": 58.61, "eval_steps_per_second": 1.196, "step": 224 }, { "epoch": 32.86, "learning_rate": 1.3428571428571429e-05, "loss": 3.4257, "step": 230 }, { "epoch": 33.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.8117594718933105, "eval_runtime": 1.6581, "eval_samples_per_second": 59.102, "eval_steps_per_second": 1.206, "step": 231 }, { "epoch": 34.0, "eval_accuracy": 0.1326530612244898, "eval_loss": 3.8061439990997314, "eval_runtime": 1.6272, "eval_samples_per_second": 60.227, "eval_steps_per_second": 1.229, "step": 238 }, { "epoch": 34.29, "learning_rate": 1.3142857142857145e-05, "loss": 3.395, "step": 240 }, { "epoch": 35.0, "eval_accuracy": 0.1326530612244898, "eval_loss": 3.8029837608337402, "eval_runtime": 1.6449, "eval_samples_per_second": 59.577, "eval_steps_per_second": 1.216, "step": 245 }, { "epoch": 35.71, "learning_rate": 1.2857142857142859e-05, "loss": 3.3693, "step": 250 }, { "epoch": 36.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.795672655105591, "eval_runtime": 1.6624, "eval_samples_per_second": 58.953, "eval_steps_per_second": 1.203, "step": 252 }, { "epoch": 37.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.790419578552246, "eval_runtime": 1.6455, "eval_samples_per_second": 59.556, "eval_steps_per_second": 1.215, "step": 259 }, { "epoch": 37.14, "learning_rate": 1.2571428571428572e-05, "loss": 3.35, "step": 260 }, { "epoch": 38.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.783411741256714, "eval_runtime": 1.6562, "eval_samples_per_second": 59.172, "eval_steps_per_second": 1.208, "step": 266 }, { "epoch": 38.57, "learning_rate": 1.2285714285714288e-05, "loss": 3.3453, "step": 270 }, { "epoch": 39.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.778721332550049, "eval_runtime": 1.7932, "eval_samples_per_second": 54.651, "eval_steps_per_second": 1.115, "step": 273 }, { "epoch": 40.0, "learning_rate": 1.2e-05, "loss": 3.2977, "step": 280 }, { "epoch": 40.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.772735357284546, "eval_runtime": 1.6336, "eval_samples_per_second": 59.991, "eval_steps_per_second": 1.224, "step": 280 }, { "epoch": 41.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.7680680751800537, "eval_runtime": 1.7171, "eval_samples_per_second": 57.072, "eval_steps_per_second": 1.165, "step": 287 }, { "epoch": 41.43, "learning_rate": 1.1714285714285716e-05, "loss": 3.2875, "step": 290 }, { "epoch": 42.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.762765407562256, "eval_runtime": 1.6284, "eval_samples_per_second": 60.181, "eval_steps_per_second": 1.228, "step": 294 }, { "epoch": 42.86, "learning_rate": 1.1428571428571429e-05, "loss": 3.2504, "step": 300 }, { "epoch": 43.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.758150100708008, "eval_runtime": 1.6731, "eval_samples_per_second": 58.575, "eval_steps_per_second": 1.195, "step": 301 }, { "epoch": 44.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.7527263164520264, "eval_runtime": 1.7657, "eval_samples_per_second": 55.502, "eval_steps_per_second": 1.133, "step": 308 }, { "epoch": 44.29, "learning_rate": 1.1142857142857143e-05, "loss": 3.2772, "step": 310 }, { "epoch": 45.0, "eval_accuracy": 0.12244897959183673, "eval_loss": 3.749258041381836, "eval_runtime": 1.6842, "eval_samples_per_second": 58.188, "eval_steps_per_second": 1.188, "step": 315 }, { "epoch": 45.71, "learning_rate": 1.0857142857142858e-05, "loss": 3.2353, "step": 320 }, { "epoch": 46.0, "eval_accuracy": 0.11224489795918367, "eval_loss": 3.7462167739868164, "eval_runtime": 1.6536, "eval_samples_per_second": 59.264, "eval_steps_per_second": 1.209, "step": 322 }, { "epoch": 47.0, "eval_accuracy": 0.1326530612244898, "eval_loss": 3.743089437484741, "eval_runtime": 1.639, "eval_samples_per_second": 59.793, "eval_steps_per_second": 1.22, "step": 329 }, { "epoch": 47.14, "learning_rate": 1.0571428571428572e-05, "loss": 3.2198, "step": 330 }, { "epoch": 48.0, "eval_accuracy": 0.1326530612244898, "eval_loss": 3.7392406463623047, "eval_runtime": 1.6659, "eval_samples_per_second": 58.827, "eval_steps_per_second": 1.201, "step": 336 }, { "epoch": 48.57, "learning_rate": 1.0285714285714285e-05, "loss": 3.204, "step": 340 }, { "epoch": 49.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.73702073097229, "eval_runtime": 1.7576, "eval_samples_per_second": 55.758, "eval_steps_per_second": 1.138, "step": 343 }, { "epoch": 50.0, "learning_rate": 1e-05, "loss": 3.1762, "step": 350 }, { "epoch": 50.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.733855962753296, "eval_runtime": 1.6472, "eval_samples_per_second": 59.494, "eval_steps_per_second": 1.214, "step": 350 }, { "epoch": 51.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.730581283569336, "eval_runtime": 1.6737, "eval_samples_per_second": 58.554, "eval_steps_per_second": 1.195, "step": 357 }, { "epoch": 51.43, "learning_rate": 9.714285714285715e-06, "loss": 3.1741, "step": 360 }, { "epoch": 52.0, "eval_accuracy": 0.16326530612244897, "eval_loss": 3.7267162799835205, "eval_runtime": 1.669, "eval_samples_per_second": 58.718, "eval_steps_per_second": 1.198, "step": 364 }, { "epoch": 52.86, "learning_rate": 9.42857142857143e-06, "loss": 3.1757, "step": 370 }, { "epoch": 53.0, "eval_accuracy": 0.16326530612244897, "eval_loss": 3.7221927642822266, "eval_runtime": 1.6595, "eval_samples_per_second": 59.054, "eval_steps_per_second": 1.205, "step": 371 }, { "epoch": 54.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.717994451522827, "eval_runtime": 1.6603, "eval_samples_per_second": 59.027, "eval_steps_per_second": 1.205, "step": 378 }, { "epoch": 54.29, "learning_rate": 9.142857142857144e-06, "loss": 3.1492, "step": 380 }, { "epoch": 55.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.7148733139038086, "eval_runtime": 1.6452, "eval_samples_per_second": 59.566, "eval_steps_per_second": 1.216, "step": 385 }, { "epoch": 55.71, "learning_rate": 8.857142857142858e-06, "loss": 3.1442, "step": 390 }, { "epoch": 56.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.7107248306274414, "eval_runtime": 1.6947, "eval_samples_per_second": 57.827, "eval_steps_per_second": 1.18, "step": 392 }, { "epoch": 57.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.7084951400756836, "eval_runtime": 1.6087, "eval_samples_per_second": 60.919, "eval_steps_per_second": 1.243, "step": 399 }, { "epoch": 57.14, "learning_rate": 8.571428571428571e-06, "loss": 3.1174, "step": 400 }, { "epoch": 58.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.705909013748169, "eval_runtime": 1.7081, "eval_samples_per_second": 57.372, "eval_steps_per_second": 1.171, "step": 406 }, { "epoch": 58.57, "learning_rate": 8.285714285714287e-06, "loss": 3.0962, "step": 410 }, { "epoch": 59.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.7031126022338867, "eval_runtime": 1.5815, "eval_samples_per_second": 61.967, "eval_steps_per_second": 1.265, "step": 413 }, { "epoch": 60.0, "learning_rate": 8.000000000000001e-06, "loss": 3.1237, "step": 420 }, { "epoch": 60.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.701946258544922, "eval_runtime": 1.7026, "eval_samples_per_second": 57.559, "eval_steps_per_second": 1.175, "step": 420 }, { "epoch": 61.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6996471881866455, "eval_runtime": 1.6975, "eval_samples_per_second": 57.732, "eval_steps_per_second": 1.178, "step": 427 }, { "epoch": 61.43, "learning_rate": 7.714285714285716e-06, "loss": 3.1229, "step": 430 }, { "epoch": 62.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6955974102020264, "eval_runtime": 1.6826, "eval_samples_per_second": 58.242, "eval_steps_per_second": 1.189, "step": 434 }, { "epoch": 62.86, "learning_rate": 7.428571428571429e-06, "loss": 3.0946, "step": 440 }, { "epoch": 63.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.692981719970703, "eval_runtime": 1.7683, "eval_samples_per_second": 55.42, "eval_steps_per_second": 1.131, "step": 441 }, { "epoch": 64.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6915957927703857, "eval_runtime": 1.6752, "eval_samples_per_second": 58.501, "eval_steps_per_second": 1.194, "step": 448 }, { "epoch": 64.29, "learning_rate": 7.1428571428571436e-06, "loss": 3.0861, "step": 450 }, { "epoch": 65.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6893138885498047, "eval_runtime": 1.6711, "eval_samples_per_second": 58.644, "eval_steps_per_second": 1.197, "step": 455 }, { "epoch": 65.71, "learning_rate": 6.857142857142858e-06, "loss": 3.0406, "step": 460 }, { "epoch": 66.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6859352588653564, "eval_runtime": 1.7238, "eval_samples_per_second": 56.85, "eval_steps_per_second": 1.16, "step": 462 }, { "epoch": 67.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6839077472686768, "eval_runtime": 1.6549, "eval_samples_per_second": 59.218, "eval_steps_per_second": 1.209, "step": 469 }, { "epoch": 67.14, "learning_rate": 6.571428571428572e-06, "loss": 3.077, "step": 470 }, { "epoch": 68.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6815552711486816, "eval_runtime": 1.673, "eval_samples_per_second": 58.578, "eval_steps_per_second": 1.195, "step": 476 }, { "epoch": 68.57, "learning_rate": 6.285714285714286e-06, "loss": 3.0555, "step": 480 }, { "epoch": 69.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.678163766860962, "eval_runtime": 1.6518, "eval_samples_per_second": 59.329, "eval_steps_per_second": 1.211, "step": 483 }, { "epoch": 70.0, "learning_rate": 6e-06, "loss": 3.035, "step": 490 }, { "epoch": 70.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6762583255767822, "eval_runtime": 1.6591, "eval_samples_per_second": 59.069, "eval_steps_per_second": 1.205, "step": 490 }, { "epoch": 71.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.672853469848633, "eval_runtime": 1.6495, "eval_samples_per_second": 59.412, "eval_steps_per_second": 1.212, "step": 497 }, { "epoch": 71.43, "learning_rate": 5.7142857142857145e-06, "loss": 3.0246, "step": 500 }, { "epoch": 72.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6719117164611816, "eval_runtime": 1.6901, "eval_samples_per_second": 57.986, "eval_steps_per_second": 1.183, "step": 504 }, { "epoch": 72.86, "learning_rate": 5.428571428571429e-06, "loss": 3.0282, "step": 510 }, { "epoch": 73.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.670848846435547, "eval_runtime": 1.7293, "eval_samples_per_second": 56.669, "eval_steps_per_second": 1.157, "step": 511 }, { "epoch": 74.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.6683461666107178, "eval_runtime": 1.6959, "eval_samples_per_second": 57.785, "eval_steps_per_second": 1.179, "step": 518 }, { "epoch": 74.29, "learning_rate": 5.142857142857142e-06, "loss": 3.0293, "step": 520 }, { "epoch": 75.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.665170669555664, "eval_runtime": 1.6438, "eval_samples_per_second": 59.617, "eval_steps_per_second": 1.217, "step": 525 }, { "epoch": 75.71, "learning_rate": 4.857142857142858e-06, "loss": 2.9893, "step": 530 }, { "epoch": 76.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.66402268409729, "eval_runtime": 1.6852, "eval_samples_per_second": 58.155, "eval_steps_per_second": 1.187, "step": 532 }, { "epoch": 77.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.6634998321533203, "eval_runtime": 1.7537, "eval_samples_per_second": 55.882, "eval_steps_per_second": 1.14, "step": 539 }, { "epoch": 77.14, "learning_rate": 4.571428571428572e-06, "loss": 2.9888, "step": 540 }, { "epoch": 78.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.6618170738220215, "eval_runtime": 1.7774, "eval_samples_per_second": 55.135, "eval_steps_per_second": 1.125, "step": 546 }, { "epoch": 78.57, "learning_rate": 4.2857142857142855e-06, "loss": 2.9833, "step": 550 }, { "epoch": 79.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.659451484680176, "eval_runtime": 1.6474, "eval_samples_per_second": 59.487, "eval_steps_per_second": 1.214, "step": 553 }, { "epoch": 80.0, "learning_rate": 4.000000000000001e-06, "loss": 2.9739, "step": 560 }, { "epoch": 80.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.6578376293182373, "eval_runtime": 1.6531, "eval_samples_per_second": 59.283, "eval_steps_per_second": 1.21, "step": 560 }, { "epoch": 81.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.656160593032837, "eval_runtime": 1.7409, "eval_samples_per_second": 56.294, "eval_steps_per_second": 1.149, "step": 567 }, { "epoch": 81.43, "learning_rate": 3.7142857142857146e-06, "loss": 2.9513, "step": 570 }, { "epoch": 82.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.655242681503296, "eval_runtime": 1.6811, "eval_samples_per_second": 58.294, "eval_steps_per_second": 1.19, "step": 574 }, { "epoch": 82.86, "learning_rate": 3.428571428571429e-06, "loss": 2.9503, "step": 580 }, { "epoch": 83.0, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.6539218425750732, "eval_runtime": 1.7574, "eval_samples_per_second": 55.764, "eval_steps_per_second": 1.138, "step": 581 }, { "epoch": 84.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.653193950653076, "eval_runtime": 1.6302, "eval_samples_per_second": 60.114, "eval_steps_per_second": 1.227, "step": 588 }, { "epoch": 84.29, "learning_rate": 3.142857142857143e-06, "loss": 2.9792, "step": 590 }, { "epoch": 85.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.651691436767578, "eval_runtime": 1.7277, "eval_samples_per_second": 56.724, "eval_steps_per_second": 1.158, "step": 595 }, { "epoch": 85.71, "learning_rate": 2.8571428571428573e-06, "loss": 2.9561, "step": 600 }, { "epoch": 86.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6496691703796387, "eval_runtime": 1.6772, "eval_samples_per_second": 58.432, "eval_steps_per_second": 1.192, "step": 602 }, { "epoch": 87.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6485908031463623, "eval_runtime": 1.664, "eval_samples_per_second": 58.893, "eval_steps_per_second": 1.202, "step": 609 }, { "epoch": 87.14, "learning_rate": 2.571428571428571e-06, "loss": 2.964, "step": 610 }, { "epoch": 88.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.647573709487915, "eval_runtime": 1.7778, "eval_samples_per_second": 55.124, "eval_steps_per_second": 1.125, "step": 616 }, { "epoch": 88.57, "learning_rate": 2.285714285714286e-06, "loss": 2.9665, "step": 620 }, { "epoch": 89.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6470184326171875, "eval_runtime": 1.6689, "eval_samples_per_second": 58.722, "eval_steps_per_second": 1.198, "step": 623 }, { "epoch": 90.0, "learning_rate": 2.0000000000000003e-06, "loss": 2.9439, "step": 630 }, { "epoch": 90.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.646164894104004, "eval_runtime": 2.2025, "eval_samples_per_second": 44.495, "eval_steps_per_second": 0.908, "step": 630 }, { "epoch": 91.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6452953815460205, "eval_runtime": 1.6826, "eval_samples_per_second": 58.242, "eval_steps_per_second": 1.189, "step": 637 }, { "epoch": 91.43, "learning_rate": 1.7142857142857145e-06, "loss": 2.9369, "step": 640 }, { "epoch": 92.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.645128011703491, "eval_runtime": 1.646, "eval_samples_per_second": 59.539, "eval_steps_per_second": 1.215, "step": 644 }, { "epoch": 92.86, "learning_rate": 1.4285714285714286e-06, "loss": 2.9619, "step": 650 }, { "epoch": 93.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.64505934715271, "eval_runtime": 1.7663, "eval_samples_per_second": 55.483, "eval_steps_per_second": 1.132, "step": 651 }, { "epoch": 94.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6448814868927, "eval_runtime": 1.6466, "eval_samples_per_second": 59.516, "eval_steps_per_second": 1.215, "step": 658 }, { "epoch": 94.29, "learning_rate": 1.142857142857143e-06, "loss": 2.955, "step": 660 }, { "epoch": 95.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6444060802459717, "eval_runtime": 1.6863, "eval_samples_per_second": 58.115, "eval_steps_per_second": 1.186, "step": 665 }, { "epoch": 95.71, "learning_rate": 8.571428571428572e-07, "loss": 2.9323, "step": 670 }, { "epoch": 96.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6440815925598145, "eval_runtime": 1.626, "eval_samples_per_second": 60.271, "eval_steps_per_second": 1.23, "step": 672 }, { "epoch": 97.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6438138484954834, "eval_runtime": 1.7724, "eval_samples_per_second": 55.292, "eval_steps_per_second": 1.128, "step": 679 }, { "epoch": 97.14, "learning_rate": 5.714285714285715e-07, "loss": 2.9466, "step": 680 }, { "epoch": 98.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.643672466278076, "eval_runtime": 1.7638, "eval_samples_per_second": 55.56, "eval_steps_per_second": 1.134, "step": 686 }, { "epoch": 98.57, "learning_rate": 2.8571428571428575e-07, "loss": 2.945, "step": 690 }, { "epoch": 99.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.6435706615448, "eval_runtime": 1.6937, "eval_samples_per_second": 57.862, "eval_steps_per_second": 1.181, "step": 693 }, { "epoch": 100.0, "learning_rate": 0.0, "loss": 2.9665, "step": 700 }, { "epoch": 100.0, "eval_accuracy": 0.15306122448979592, "eval_loss": 3.643535614013672, "eval_runtime": 1.66, "eval_samples_per_second": 59.037, "eval_steps_per_second": 1.205, "step": 700 }, { "epoch": 100.0, "step": 700, "total_flos": 4.2794747466153984e+18, "train_loss": 3.289549721309117, "train_runtime": 4059.8356, "train_samples_per_second": 13.597, "train_steps_per_second": 0.172 } ], "logging_steps": 10, "max_steps": 700, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 4.2794747466153984e+18, "train_batch_size": 80, "trial_name": null, "trial_params": null }